--- title: "Analyzing FBD Parameters" date: "`r Sys.Date()`" output: html_vignette: toc: yes vignette: | %\VignetteIndexEntry{Analyzing FBD Parameters} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} bibliography: references.bib link-citations: true --- This vignette explains how to extract, plot, and statistically test for differences among BD (Birth-Death) or FBD (fossilized birth-death) parameters (e.g., *net diversification*, *relative extinction (turnover)*, and *relative fossilization*) across the tree when using the Skyline BD or Skyline FBD tree models produced by the program [Mr. Bayes](https://nbisweden.github.io/MrBayes/) or [BEAST2](http://www.beast2.org/) (SA or BDSKY packages). ```{r, setup, echo=FALSE} knitr::opts_chunk$set(echo = TRUE) ``` Load the **EvoPhylo** package ```{r, eval = FALSE} library(EvoPhylo) ``` ```{r, include=FALSE} devtools::load_all(".") ``` ## FBD Parameters Statistics and Plots (Mr. Bayes) Below we demonstrate how to extract evolutionary rate summary statistics from each node from a Bayesian clock (time-calibrate) summary tree produced by Mr. Bayes, store them in a data frame, produce summary tables, and plots. ### 1. Import combined log file from all runs. This is produced by using `combine_log()`.The first argument passed to `combine_log()` should be a path to the folder containing the log files to be imported and combined. ```{r,eval=FALSE} ## Import all log (.p) files from all runs and combine them, with burn-in = 25% ## and downsampling to 2.5k trees in each log file posterior3p <- combine_log("LogFiles3p", burnin = 0.25, downsample = 1000) ``` Below, we use the posterior dataset `posterior3p` that accompanies `EvoPhylo`. ```{r, results='hide'} data(posterior3p) ## Show first 5 lines of combined log file head(posterior3p, 5) ``` The posterior data must first be transformed from wide to long to be used with the functions described below; `FBD_reshape()` accomplishes this. ```{r} ## Reshape imported combined log file from wide to long with FBD_reshape posterior3p_long <- FBD_reshape(posterior3p, variables = NULL, log.type = "MrBayes") ``` ### 2. Summarize FBD parameters by time bin Summary statistics for each FBD parameter by time bin can be quickly summarized using `FBD_summary()`: ```{r, eval = FALSE} ## Summarize parameters by time bin and analysis t3.1 <- FBD_summary(posterior3p_long) t3.1 ``` ```{r, echo = FALSE} t3.1 <- FBD_summary(posterior3p_long, digits = 2) kableExtra::kbl(t3.1, caption = "FBD parameters by time bin") |> kableExtra::kable_styling(font_size = 15, full_width = FALSE, bootstrap_options = "striped", "condensed") ``` ```{r, eval=FALSE} ## Export the table write.csv(t3.1, file = "FBD_summary.csv") ``` ### 3. Plot the distribution of each FBD parameter Each of (or all) the FBD parameter distributions can be plotted by time bin using various plotting alternatives with `FBD_dens_plot()`: ```{r, fig.width=8, fig.height=5, fig.align = "center", out.width = "70%"} ## Plot distribution of the desired FBD parameter by time bin with ## kernel density plot FBD_dens_plot(posterior3p_long, parameter = "net_speciation", type = "density", stack = FALSE) ``` ```{r, fig.width=8, fig.height=5, fig.align = "center", out.width = "70%"} ## Plot distribution of the desired FBD parameter by time bin with ## stacked kernel density plot FBD_dens_plot(posterior3p_long, parameter = "net_speciation", type = "density", stack = TRUE) ``` ```{r, fig.width=4, fig.height=4, fig.align = "center", out.width = "50%"} ## Plot distribution of the desired FBD parameter by time bin with ## a violin plot FBD_dens_plot(posterior3p_long, parameter = "net_speciation", type = "violin", stack = FALSE, color = "red") ``` ```{r, fig.width=12, fig.height=4, fig.align = "center", out.width = "100%", warning=FALSE} ## Plot distribution of all FBD parameter by time bin with a violin plot p1 <- FBD_dens_plot(posterior3p_long, parameter = "net_speciation", type = "violin", stack = FALSE, color = "red") p2 <- FBD_dens_plot(posterior3p_long, parameter = "relative_extinction", type = "violin", stack = FALSE, color = "cyan3") p3 <- FBD_dens_plot(posterior3p_long, parameter = "relative_fossilization", type = "violin", stack = FALSE, color = "green3") library(patchwork) p1 + p2 + p3 + plot_layout(nrow = 1) ``` ```{r, eval = FALSE} ## Save your plot to your working directory as a PDF ggplot2::ggsave("Plot_regs.pdf", width = 12, height = 4) ``` ### 4. Test for assumptions In this step, users can perform tests for normality and homoscedasticity in data distribution for each of the FBD parameters under consideration. The output will determine whether parametric or nonparametric tests will be performed subsequently. ```{r} ##### Tests for normality and homoscedasticity for each FBD parameter for all time bins t3.2 <- FBD_tests1(posterior3p_long) ``` ```{r, eval = FALSE} ### Export the output table for all tests write.csv(t3.2, file = "FBD_Tests1_Assum.csv") ``` The results of the Shapiro-Wilk normality test for each parameter can be output as seperate tables or as a single combined table. ```{r, eval = FALSE} # Output as separate tables t3.2$shapiro ``` ```{r, echo = FALSE} kableExtra::kbl(t3.2$shapiro, digits = 4, align = c('c','c','c','c'), caption = "Shapiro-Wilk normality test ") |> kableExtra::kable_styling(font_size = 12, full_width = FALSE, bootstrap_options = "striped", "condensed") ``` ```{r} # OR as single merged table t3.2$shapiro$net_speciation$bin <- row.names(t3.2$shapiro$net_speciation) t3.2$shapiro$relative_extinction$bin <- row.names(t3.2$shapiro$relative_extinction) t3.2$shapiro$relative_fossilization$bin <- row.names(t3.2$shapiro$relative_fossilization) k1all <- rbind(t3.2$shapiro$net_speciation, t3.2$shapiro$relative_extinction, t3.2$shapiro$relative_fossilization, make.row.names = FALSE) ``` ```{r, eval = FALSE} k1all ``` ```{r, echo=FALSE} kableExtra::kbl(k1all, digits = 4, caption = "Shapiro-Wilk normality test ") |> kableExtra::kable_styling(font_size = 12, full_width = FALSE, bootstrap_options = "striped", "condensed") ``` ```{r, eval = FALSE} ## Bartlett's test for homogeneity of variance t3.2$bartlett ``` ```{r, echo=FALSE} kableExtra::kbl(t3.2$bartlett, caption = "Bartlett's test") |> kableExtra::kable_styling(font_size = 12, full_width = FALSE, bootstrap_options = "striped", "condensed") ``` ```{r, eval = FALSE} ## Fligner-Killeen test for homogeneity of variance t3.2$fligner ``` ```{r, echo=FALSE} kableExtra::kbl(t3.2$fligner, caption = "Fligner-Killeen test") |> kableExtra::kable_styling(font_size = 12, full_width = FALSE, bootstrap_options = "striped", "condensed") ``` Deviations from normality can be displayed graphically using `FBD_normality_plot()`: ```{r, fig.width=8, fig.height=6, fig.align = "center", out.width = "100%"} ## Visualize deviations from normality and similarity of variances FBD_normality_plot(posterior3p_long) ``` ```{r, eval=FALSE} ## Save your plot to your working directory as a PDF ggplot2::ggsave("Plot_normTests.pdf", width = 8, height = 6) ``` ### 5. Test for significant FBD shifts between time bins Significant shifts in FBD parameters across time bins can be easily tested using parametric (Student's t-test) and nonparametric (Mann-Whitney test) pairwise comparisons with `FBD_tests2()`. Both are automatically calculated and the preferred pairwise comparison will be chosen by the user depending on the results of the assumption tests **step #4** (above). ```{r} ##### Test for significant differences between each time bin for each FBD parameter t3.3 <- FBD_tests2(posterior3p_long) ``` ```{r, eval=FALSE} ### Export the output table for all tests write.csv(t3.3, file = "FBD_Tests2_Sign.csv") ## Pairwise t-tests # Output as separate tables t3.3$t_tests ``` ```{r, echo=FALSE} kableExtra::kbl(t3.3$t_tests, digits = 4, align = c('c','c','c','c'), caption = "Significant tests ") |> kableExtra::kable_styling(font_size = 10, full_width = FALSE, bootstrap_options = "striped", "condensed") ``` ```{r} # OR as single merged table k3.3a <- rbind(t3.3$t_tests$net_speciation, t3.3$t_tests$relative_extinction, t3.3$t_tests$relative_fossilization, make.row.names = FALSE) ``` ```{r, eval=FALSE} k3.3a ``` ```{r, echo = FALSE} kableExtra::kbl(k3.3a, digits = 4, align = c('c','c','c','c'), caption = "Pairwise t-tests") |> kableExtra::kable_styling(font_size = 12, full_width = FALSE, bootstrap_options = "striped", "condensed") ``` ```{r, eval=FALSE} ## Mann-Whitney tests (use if Tests in step #4 fail assumptions) # Output as separate tables t3.3$mwu_tests ``` ```{r, echo=FALSE} kableExtra::kbl(t3.3$mwu_tests, digits = 4, align = c('c','c','c','c'), caption = "Mann-Whitney tests") |> kableExtra::kable_styling(font_size = 10, full_width = FALSE, bootstrap_options = "striped", "condensed") ``` ```{r} # OR as single merged table k3.3b <- rbind(t3.3$mwu_tests$net_speciation, t3.3$mwu_tests$relative_extinction, t3.3$mwu_tests$relative_fossilization, make.row.names = FALSE) ``` ```{r, eval=FALSE} k3.3b ``` ```{r, echo = FALSE} kableExtra::kbl(k3.3b, digits=4, align=c('c','c','c','c'), caption = "Mann-Whitney tests") |> kableExtra::kable_styling(font_size = 12, full_width = FALSE, bootstrap_options = "striped", "condensed") ```       ## FBD Parameters Statistics and Plots (BEAST2) ### 1. Import combined log file from all runs. The combined posterior log file from [BEAST2](https://www.beast2.org/beagle-beast-2-in-cluster/index.html) is outputted by **LogCombiner** from their software package. Our own function to combined log files `combine_log` is intended to work with Mr. Bayes posterior files only. Below, we use the posterior dataset "Penguins_log.log" that accompanies `EvoPhylo`. ```{r, results='hide'} posterior <- system.file("extdata", "Penguins_log.log", package = "EvoPhylo") posterior <- read.table(posterior, header = TRUE) ## Show first 10 lines of combined log file head(posterior, 5) ``` The posterior data must first be transformed from wide to long to be used with the functions described below; `FBD_reshape()` accomplishes this. ```{r} ## Reshape imported combined log file from wide to long with FBD_reshape posterior_long <- FBD_reshape(posterior, variables = NULL, log.type = "BEAST2") ``` ### 2. Summarize FBD parameters by time bin Summary statistics for each FBD parameter by time bin can be quickly summarized using `FBD_summary()`: ```{r, eval = FALSE} ## Summarize parameters by time bin and analysis t3.1 <- FBD_summary(posterior_long) t3.1 ``` ```{r, echo = FALSE} t3.1 <- FBD_summary(posterior_long, digits = 2) kableExtra::kbl(t3.1, caption = "FBD parameters by time bin") |> kableExtra::kable_styling(font_size = 15, full_width = FALSE, bootstrap_options = "striped", "condensed") ``` ```{r, eval=FALSE} ## Export the table write.csv(t3.1, file = "FBD_summary_BEAST2.csv") ``` ### 3. Plot the distribution of each FBD parameter Each of (or all) the FBD parameter distributions can be plotted by time bin using various plotting alternatives with `FBD_dens_plot()`: ```{r, fig.width=8, fig.height=5, fig.align = "center", out.width = "70%"} ## Plot distribution of the desired FBD parameter by time bin with ## kernel density plot FBD_dens_plot(posterior_long, parameter = "diversificationRateFBD", type = "density", stack = FALSE) ``` ```{r, fig.width=4, fig.height=4, fig.align = "center", out.width = "50%"} ## Plot distribution of the desired FBD parameter by time bin with ## a violin plot FBD_dens_plot(posterior_long, parameter = "diversificationRateFBD", type = "violin", stack = FALSE, color = "red") ``` ```{r, fig.width=12, fig.height=4, fig.align = "center", out.width = "100%", warning=FALSE} ## Plot distribution of all FBD parameter by time bin with a violin plot p1 <- FBD_dens_plot(posterior_long, parameter = "diversificationRateFBD", type = "violin", stack = FALSE, color = "red") p2 <- FBD_dens_plot(posterior_long, parameter = "turnoverFBD", type = "violin", stack = FALSE, color = "cyan3") p3 <- FBD_dens_plot(posterior_long, parameter = "samplingProportionFBD", type = "violin", stack = FALSE, color = "green3") library(patchwork) p1 + p2 + p3 + plot_layout(nrow = 1) ``` ```{r, eval = FALSE} ## Save your plot to your working directory as a PDF ggplot2::ggsave("Plot_regs.pdf", width = 12, height = 4) ``` ## References