Title: | Phylogenetic Tools for Eco-Phylogenetics |
---|---|
Description: | A collection of tools for building RAxML supermatrix using PHYLIP or aligned FASTA files. These functions will be useful for building large phylogenies using multiple markers. |
Authors: | Jinlong Zhang [aut, cre], Nancai Pei [ctb], Xiangcheng Mi [ctb] |
Maintainer: | Jinlong Zhang <[email protected]> |
License: | GPL-2 |
Version: | 0.2.5 |
Built: | 2024-11-09 04:22:33 UTC |
Source: | https://github.com/helixcn/phylotools |
A collection of a few functions for handling DNA-barcoding sequences, building PHYLIP supermatrix for RAxML etc.
Package: | phylotools |
Type: | Package |
Version: | 0.2.1 |
Date: | 2017-12-08 |
License: | GLP-2 |
LazyLoad: | yes |
Jinlong Zhang
Maintainer: Jinlong Zhang <[email protected]>
Cleaning the names of sequences for a fasta file. The punctuation characters and the white space will be replaced with "_".
clean.fasta.name(infile = NULL, outfile = "name_cleaned.fasta")
clean.fasta.name(infile = NULL, outfile = "name_cleaned.fasta")
infile |
character string representing the name of the fasta file. |
outfile |
Character string representing the file name to be generated. |
Punctuation characters and white space will be replaced by "_". More information can be found at regex
.
This is a subroutine without a return value. A fasta file with all the names of sequences renamed will be saved to the working directory.
Jinlong Zhang <[email protected]>
http://www.genomatix.de/online_help/help/sequence_formats.html
cat( ">seq_1*66", "--TTACAAATTGACTTATTATA", ">seq_2()r", "GATTACAAATTGACTTATTATA", ">seq_3:test", "GATTACAAATTGACTTATTATA", ">seq_588", "GATTACAAATTGACTTATTATA", ">seq_8$$yu", "GATTACAAATTGACTTATTATA", ">seq_10", "---TACAAATTGAATTATTATA", file = "matk.fasta", sep = "\n") clean.fasta.name(infile = "matk.fasta") get.fasta.name("name_cleaned.fasta") # Delete file unlink("matk.fasta") unlink("name_cleaned.fasta")
cat( ">seq_1*66", "--TTACAAATTGACTTATTATA", ">seq_2()r", "GATTACAAATTGACTTATTATA", ">seq_3:test", "GATTACAAATTGACTTATTATA", ">seq_588", "GATTACAAATTGACTTATTATA", ">seq_8$$yu", "GATTACAAATTGACTTATTATA", ">seq_10", "---TACAAATTGAATTATTATA", file = "matk.fasta", sep = "\n") clean.fasta.name(infile = "matk.fasta") get.fasta.name("name_cleaned.fasta") # Delete file unlink("matk.fasta") unlink("name_cleaned.fasta")
Convert and Save sequence data frame to fasta file.
dat2fasta(dat, outfile = "out.fasta")
dat2fasta(dat, outfile = "out.fasta")
dat |
data frame by |
outfile |
a character string, representing the name of the fasta file to be generated |
The column of the data frame must be: 1. seq.name, 2. seq.text, represent the name of the sequences, the content of the sequence, eg. ATCGGGAAC.
This is a routine without return value.
Jinlong Zhang <[email protected]>
http://www.genomatix.de/online_help/help/sequence_formats.html
cat( ">seq_2", "GTCTTATAAGAAAGAATAAGAAAG--AAATACAAA-------AAAAAAGA", ">seq_3", "GTCTTATAAGAAAGAAATAGAAAAGTAAAAAAAAA-------AAAAAAAG", ">seq_5", "GACATAAGACATAAAATAGAATACTCAATCAGAAACCAACCCATAAAAAC", ">seq_8", "ATTCCAAAATAAAATACAAAAAGAAAAAACTAGAAAGTTTTTTTTCTTTG", ">seq_9", "ATTCTTTGTTCTTTTTTTTCTTTAATCTTTAAATAAACCTTTTTTTTTTA", file = "trn1.fasta", sep = "\n") res <- read.fasta("trn1.fasta") dat2fasta(res) unlink("trn1.fasta") unlink("out.fasta")
cat( ">seq_2", "GTCTTATAAGAAAGAATAAGAAAG--AAATACAAA-------AAAAAAGA", ">seq_3", "GTCTTATAAGAAAGAAATAGAAAAGTAAAAAAAAA-------AAAAAAAG", ">seq_5", "GACATAAGACATAAAATAGAATACTCAATCAGAAACCAACCCATAAAAAC", ">seq_8", "ATTCCAAAATAAAATACAAAAAGAAAAAACTAGAAAGTTTTTTTTCTTTG", ">seq_9", "ATTCTTTGTTCTTTTTTTTCTTTAATCTTTAAATAAACCTTTTTTTTTTA", file = "trn1.fasta", sep = "\n") res <- read.fasta("trn1.fasta") dat2fasta(res) unlink("trn1.fasta") unlink("out.fasta")
Convert and save a data frame to sequential PHYLIP file.
dat2phylip(dat, outfile = "out.phy")
dat2phylip(dat, outfile = "out.phy")
dat |
the data frame returned by |
outfile |
character string represents the phylip file to be generated. |
The output will be in sequential PHYLIP format.
This is a subroutine, there is no return value.
The names of the sequences should not contain white space or Punctuation characters. See regex
for more details.
Jinlong Zhang <[email protected]>
http://www.genomatix.de/online_help/help/sequence_formats.html
dat2fasta
, read.fasta
, read.phylip
cat( ">seq_2", "GTCTTATAAGAAAGAATAAGAAAG--AAATACAAA-------AAAAAAGA", ">seq_3", "GTCTTATAAGAAAGAAATAGAAAAGTAAAAAAAAA-------AAAAAAAG", ">seq_5", "GACATAAGACATAAAATAGAATACTCAATCAGAAACCAACCCATAAAAAC", ">seq_8", "ATTCCAAAATAAAATACAAAAAGAAAAAACTAGAAAGTTTTTTTTCTTTG", ">seq_9", "ATTCTTTGTTCTTTTTTTTCTTTAATCTTTAAATAAACCTTTTTTTTTTA", file = "trn1.fasta", sep = "\n") res <- read.fasta("trn1.fasta") dat2phylip(res) unlink("trn1.fasta") unlink("out.phy")
cat( ">seq_2", "GTCTTATAAGAAAGAATAAGAAAG--AAATACAAA-------AAAAAAGA", ">seq_3", "GTCTTATAAGAAAGAAATAGAAAAGTAAAAAAAAA-------AAAAAAAG", ">seq_5", "GACATAAGACATAAAATAGAATACTCAATCAGAAACCAACCCATAAAAAC", ">seq_8", "ATTCCAAAATAAAATACAAAAAGAAAAAACTAGAAAGTTTTTTTTCTTTG", ">seq_9", "ATTCTTTGTTCTTTTTTTTCTTTAATCTTTAAATAAACCTTTTTTTTTTA", file = "trn1.fasta", sep = "\n") res <- read.fasta("trn1.fasta") dat2phylip(res) unlink("trn1.fasta") unlink("out.phy")
get the names of all the sequences of a fasta file, and perform cleaning of the names of the sequences
get.fasta.name(infile, clean_name = FALSE)
get.fasta.name(infile, clean_name = FALSE)
infile |
character string representing the name of the fasta file. |
clean_name |
logical, representing cleaning of the names will be performed. |
a character vector containing the names of the sequences
Punctuation characters and white space be replaced by "_". Definition of Punctuation characters can be found at regex
.
Jinlong Zhang <[email protected]>
http://www.genomatix.de/online_help/help/sequence_formats.html
cat( ">seq_2", "GTCTTATAAGAAAGAATAAGAAAG--AAATACAAA-------AAAAAAGA", ">seq_3", "GTCTTATAAGAAAGAAATAGAAAAGTAAAAAAAAA-------AAAAAAAG", ">seq_5", "GACATAAGACATAAAATAGAATACTCAATCAGAAACCAACCCATAAAAAC", ">seq_8", "ATTCCAAAATAAAATACAAAAAGAAAAAACTAGAAAGTTTTTTTTCTTTG", ">seq_9", "ATTCTTTGTTCTTTTTTTTCTTTAATCTTTAAATAAACCTTTTTTTTTTA", file = "trn1.fasta", sep = "\n") get.fasta.name("trn1.fasta") unlink("trn1.fasta")
cat( ">seq_2", "GTCTTATAAGAAAGAATAAGAAAG--AAATACAAA-------AAAAAAGA", ">seq_3", "GTCTTATAAGAAAGAAATAGAAAAGTAAAAAAAAA-------AAAAAAAG", ">seq_5", "GACATAAGACATAAAATAGAATACTCAATCAGAAACCAACCCATAAAAAC", ">seq_8", "ATTCCAAAATAAAATACAAAAAGAAAAAACTAGAAAGTTTTTTTTCTTTG", ">seq_9", "ATTCTTTGTTCTTTTTTTTCTTTAATCTTTAAATAAACCTTTTTTTTTTA", file = "trn1.fasta", sep = "\n") get.fasta.name("trn1.fasta") unlink("trn1.fasta")
get the names of sequences from a PHYLIP file.
get.phylip.name(infile, clean_name = FALSE)
get.phylip.name(infile, clean_name = FALSE)
infile |
character representing the name or path of the phylip file. |
clean_name |
logical, representing cleaning of the names will be performed. |
Punctuation characters and white space be replaced by "_". Definition of Punctuation characters can be found at regex
.
a character vector of the names of the sequences
Jinlong Zhang <[email protected]>
cat("6 22", "seq_1 --TTACAAATTGACTTATTATA", "seq_2 GATTACAAATTGACTTATTATA", "seq_3 GATTACAAATTGACTTATTATA", "seq_5 GATTACAAATTGACTTATTATA", "seq_8 GATTACAAATTGACTTATTATA", "seq_10 ---TACAAATTGAATTATTATA", file = "matk.phy", sep = "\n") get.phylip.name("matk.phy") unlink("matk.phy")
cat("6 22", "seq_1 --TTACAAATTGACTTATTATA", "seq_2 GATTACAAATTGACTTATTATA", "seq_3 GATTACAAATTGACTTATTATA", "seq_5 GATTACAAATTGACTTATTATA", "seq_8 GATTACAAATTGACTTATTATA", "seq_10 ---TACAAATTGAATTATTATA", file = "matk.phy", sep = "\n") get.phylip.name("matk.phy") unlink("matk.phy")
Read and convert the fasta file to data frame
read.fasta(file = NULL, clean_name = FALSE)
read.fasta(file = NULL, clean_name = FALSE)
file |
character string representing the name of the fasta file. |
clean_name |
logical, representing cleaning of the names will be performed. Punctuation characters and white space be replaced by "_" . See |
In this function, names of the sequences are identified by ">", and all the lines before next ">" will be concatenated.
a data frame with two columns: (1) seq.name, the names for all the sequences. (2) seq.text, the raw sequence data.
Punctuation characters and white space in the names of the sequences will be replaced by "_".
Jinlong Zhang <[email protected]>
http://www.genomatix.de/online_help/help/sequence_formats.html
read.phylip
,dat2fasta
,dat2phylip
,split_dat
cat( ">seq_2", "GTCTTATAAGAAAGAATAAGAAAG--AAATACAAA-------AAAAAAGA", ">seq_3", "GTCTTATAAGAAAGAAATAGAAAAGTAAAAAAAAA-------AAAAAAAG", ">seq_5", "GACATAAGACATAAAATAGAATACTCAATCAGAAACCAACCCATAAAAAC", ">seq_8", "ATTCCAAAATAAAATACAAAAAGAAAAAACTAGAAAGTTTTTTTTCTTTG", ">seq_9", "ATTCTTTGTTCTTTTTTTTCTTTAATCTTTAAATAAACCTTTTTTTTTTA", file = "trn1.fasta", sep = "\n") res <- read.fasta("trn1.fasta") unlink("trn1.fasta")
cat( ">seq_2", "GTCTTATAAGAAAGAATAAGAAAG--AAATACAAA-------AAAAAAGA", ">seq_3", "GTCTTATAAGAAAGAAATAGAAAAGTAAAAAAAAA-------AAAAAAAG", ">seq_5", "GACATAAGACATAAAATAGAATACTCAATCAGAAACCAACCCATAAAAAC", ">seq_8", "ATTCCAAAATAAAATACAAAAAGAAAAAACTAGAAAGTTTTTTTTCTTTG", ">seq_9", "ATTCTTTGTTCTTTTTTTTCTTTAATCTTTAAATAAACCTTTTTTTTTTA", file = "trn1.fasta", sep = "\n") res <- read.fasta("trn1.fasta") unlink("trn1.fasta")
read the phylip file, and store the sequences and their names in a dataframe.
read.phylip(infile, clean_name = TRUE, seq.name.length = NA)
read.phylip(infile, clean_name = TRUE, seq.name.length = NA)
infile |
character string for the name of the phylip file. |
clean_name |
logical, representing cleaning of the names will be performed. |
seq.name.length |
Number of characters for the sequence names, if not specified, the function will use the first white space as the identifier. All the character before the first white space will be treated as sequence names, and the rest will be treated as sequences. |
read.phylip accepts both interleaved and sequential phylip, the number of sequences is identified by parsing the first line of the file. Sequences and their names will be stored in a data frame.
If clean_name is TRUE, punctuation characters and white space be replaced by "_". Definition of punctuation characters can be found at regex
.
a data frame with two columns: (1) seq.name, the names for all the sequences; (2) seq.text, the raw sequence data.
the Punctuation characters and white space in the names of the sequences will be replaced by "_".
Jinlong Zhang <[email protected]>
cat("6 22", "seq_1 --TTACAAATTGACTTATTATA", "seq_2 GATTACAAATTGACTTATTATA", "seq_3 GATTACAAATTGACTTATTATA", "seq_5 GATTACAAATTGACTTATTATA", "seq_8 GATTACAAATTGACTTATTATA", "seq_10 ---TACAAATTGAATTATTATA", file = "matk.phy", sep = "\n") res <- read.phylip(infile = "matk.phy") unlink("matk.phy")
cat("6 22", "seq_1 --TTACAAATTGACTTATTATA", "seq_2 GATTACAAATTGACTTATTATA", "seq_3 GATTACAAATTGACTTATTATA", "seq_5 GATTACAAATTGACTTATTATA", "seq_8 GATTACAAATTGACTTATTATA", "seq_10 ---TACAAATTGAATTATTATA", file = "matk.phy", sep = "\n") res <- read.phylip(infile = "matk.phy") unlink("matk.phy")
Rename the sequences within a fasta file according to a data frame supplied.
rename.fasta(infile = NULL, ref_table, outfile = "renamed.fasta")
rename.fasta(infile = NULL, ref_table, outfile = "renamed.fasta")
infile |
character string containing the name of the fasta file. |
ref_table |
a data frame with first column for original name, second column for the new name of the sequence. |
outfile |
The name of the fasta file with sequences renamed. |
If the orginal name was not found in the ref_table, the name for the sequence will be changed into "old_name_" + orginal name.
This is a subroutine without return value.
Since whitespace and punctuation characters will be replaced with "_", name of a sequence might change. It is suggest to obtain the name of the sequences by calling read.fasta first, and save the data.frame to a csv file to obtain the "original" name for the sequences.
Jinlong Zhang <[email protected]>
http://www.genomatix.de/online_help/help/sequence_formats.html
cat( ">seq_1", "--TTACAAATTGACTTATTATA", ">seq_2", "GATTACAAATTGACTTATTATA", ">seq_3", "GATTACAAATTGACTTATTATA", ">seq_5", "GATTACAAATTGACTTATTATA", ">seq_8", "GATTACAAATTGACTTATTATA", ">seq_10", "---TACAAATTGAATTATTATA", file = "matk.fasta", sep = "\n") old_name <- get.fasta.name("matk.fasta") new_name <- c("Magnolia", "Ranunculus", "Carex", "Morus", "Ulmus", "Salix") ref2 <- data.frame(old_name, new_name) rename.fasta(infile = "matk.fasta", ref_table = ref2, outfile = "renamed.fasta") unlink("matk.fasta") unlink("renamed.fasta")
cat( ">seq_1", "--TTACAAATTGACTTATTATA", ">seq_2", "GATTACAAATTGACTTATTATA", ">seq_3", "GATTACAAATTGACTTATTATA", ">seq_5", "GATTACAAATTGACTTATTATA", ">seq_8", "GATTACAAATTGACTTATTATA", ">seq_10", "---TACAAATTGAATTATTATA", file = "matk.fasta", sep = "\n") old_name <- get.fasta.name("matk.fasta") new_name <- c("Magnolia", "Ranunculus", "Carex", "Morus", "Ulmus", "Salix") ref2 <- data.frame(old_name, new_name) rename.fasta(infile = "matk.fasta", ref_table = ref2, outfile = "renamed.fasta") unlink("matk.fasta") unlink("renamed.fasta")
Delete sequences from fasta file
rm.sequence.fasta(infile, outfile = "sequence.removed.fasta", to.rm = NULL)
rm.sequence.fasta(infile, outfile = "sequence.removed.fasta", to.rm = NULL)
infile |
Character string representing the name of the fasta file. |
outfile |
Character string representing the name of the output fasta file. |
to.rm |
Vector of character string containing the names of sequences to be deleted. |
Delete sequences from a fasta file.
This is a subroutine without return value.
Jinlong Zhang <[email protected]>
http://www.genomatix.de/online_help/help/sequence_formats.html
cat( ">seq_1", "---TCCGCCCCCCTACTCTA", ">seq_3", "CTCTCCGCCCCTCTACTCTA", ">seq_5", "---TCCGCCC-TTTACTCTA", ">seq_6", "---TCCGCCCCTCTACTCTA", ">seq_9", "---TCCGCCC-TCTACTCTA", ">seq_12", "CTCTCCGCCC-TCTACTCTA", file = "trn2.fasta", sep = "\n") rm.sequence.fasta(infile = "trn2.fasta", to.rm = c("seq_1","seq_12")) unlink("trn2.fasta") unlink("sequence.removed.fasta")
cat( ">seq_1", "---TCCGCCCCCCTACTCTA", ">seq_3", "CTCTCCGCCCCTCTACTCTA", ">seq_5", "---TCCGCCC-TTTACTCTA", ">seq_6", "---TCCGCCCCTCTACTCTA", ">seq_9", "---TCCGCCC-TCTACTCTA", ">seq_12", "CTCTCCGCCC-TCTACTCTA", file = "trn2.fasta", sep = "\n") rm.sequence.fasta(infile = "trn2.fasta", to.rm = c("seq_1","seq_12")) unlink("trn2.fasta") unlink("sequence.removed.fasta")
Splite the data frame of sequences based on the reference table of grouping.
split_dat(dat, ref_table)
split_dat(dat, ref_table)
dat |
data frame generated by |
ref_table |
data frame with first column for the name of the sequence, second column for the group the sequence belongs to. |
Each group of sequences will be saved to a fasta file. Sequences not included in the ref_table will be saved in "Ungrouped.fasta"
This is a subroutine, there is no return value.
Jinlong Zhang <[email protected]>
http://www.genomatix.de/online_help/help/sequence_formats.html
cat( ">seq_1", "--TTACAAATTGACTTATTATA", ">seq_2", "GATTACAAATTGACTTATTATA", ">seq_3", "GATTACAAATTGACTTATTATA", ">seq_5", "GATTACAAATTGACTTATTATA", ">seq_8", "GATTACAAATTGACTTATTATA", ">seq_10", "---TACAAATTGAATTATTATA", ">seq_11", "--TTACAAATTGACTTATTATA", ">seq_12", "GATTACAAATTGACTTATTATA", ">seq_13", "GATTACAAATTGACTTATTATA", ">seq_15", "GATTACAAATTGACTTATTATA", ">seq_16", "GATTACAAATTGACTTATTATA", ">seq_17", "---TACAAATTGAATTATTATA", file = "trnh.fasta", sep = "\n") sequence_name <- get.fasta.name("trnh.fasta") sequence_group <- c("group1","group1","group1","group1","group1", "group2","group2","group2","group3","group3","group3","group3") group <- data.frame(sequence_name, sequence_group) fasta <- read.fasta("trnh.fasta") split_dat(fasta, group) unlink("trnh.fasta") unlink("ungrouped.fasta") unlink("group1.fasta") unlink("group2.fasta") unlink("group3.fasta")
cat( ">seq_1", "--TTACAAATTGACTTATTATA", ">seq_2", "GATTACAAATTGACTTATTATA", ">seq_3", "GATTACAAATTGACTTATTATA", ">seq_5", "GATTACAAATTGACTTATTATA", ">seq_8", "GATTACAAATTGACTTATTATA", ">seq_10", "---TACAAATTGAATTATTATA", ">seq_11", "--TTACAAATTGACTTATTATA", ">seq_12", "GATTACAAATTGACTTATTATA", ">seq_13", "GATTACAAATTGACTTATTATA", ">seq_15", "GATTACAAATTGACTTATTATA", ">seq_16", "GATTACAAATTGACTTATTATA", ">seq_17", "---TACAAATTGAATTATTATA", file = "trnh.fasta", sep = "\n") sequence_name <- get.fasta.name("trnh.fasta") sequence_group <- c("group1","group1","group1","group1","group1", "group2","group2","group2","group3","group3","group3","group3") group <- data.frame(sequence_name, sequence_group) fasta <- read.fasta("trnh.fasta") split_dat(fasta, group) unlink("trnh.fasta") unlink("ungrouped.fasta") unlink("group1.fasta") unlink("group2.fasta") unlink("group3.fasta")
Substitute the tip labels of a phylogenetic tree according to a reference data table.
sub.taxa.label(tree, dat)
sub.taxa.label(tree, dat)
tree |
Phylogenetic tree |
dat |
A dataframe with the first column the tip labels and the second column the new names. |
A Phylogenetic tree with the tip labels substituted
Jinlong Zhang [email protected]
library(ape) data(bird.families) tips <- bird.families$tip.label abr <- paste("fam",1:length(tips), sep = "") dat <- data.frame(tips, abr) ntree <- sub.taxa.label(bird.families, dat)
library(ape) data(bird.families) tips <- bird.families$tip.label abr <- paste("fam",1:length(tips), sep = "") dat <- data.frame(tips, abr) ntree <- sub.taxa.label(bird.families, dat)
Build PHYLIP supermatrix and create RAxML partition file using aligned fasta or phylip files.
supermat(infiles, outfile = "supermat.out.fas", partition.file = "gene_partition.txt")
supermat(infiles, outfile = "supermat.out.fas", partition.file = "gene_partition.txt")
infiles |
a character string vector for phylip or aligned fasta file. |
outfile |
the name of the PHYLIP supermatrix |
partition.file |
partition data summary describing the genes. |
Supermatrix here means a phylip file with combined aligned sequences. The missing sequences should be replaced with either "?" or "-".
A list containing: (1)supermat.dat:a list containing all the data frames read by read.phylip or read.fasta (2)res.super.dat: a data frame containing the sequences and the names (3)partition.dat: summary for all the fasta or phylip files (4)partition.dat.vector: character string vector for the partition file for RAxML
Punctuation characters and white space in the names of the sequences will be replaced by "_". More information can be found at regex
.
Type of the sequence in the RAxML partition file should be changed manually according to the manual of RAxML.
Jinlong Zhang <[email protected]>
Kress, W. J., Erickson, D. L., Jones, F. A., Swenson, N. G., Perez, R., Sanjur, O., & Bermingham, E. (2009). Plant DNA barcodes and a community phylogeny of a tropical forest dynamics plot in Panama. Proceedings of the National Academy of Sciences, 106(44), 18621-18626.
de Queiroz, A.and Gatesy, J. (2007). The supermatrix approach to systematics. Trends in Ecology & Evolution, 22(1), 34-41.
https://github.com/stamatak/standard-RAxML
read.fasta
,read.phylip
,dat2phylip
,
cat("6 22", "seq_1 --TTACAAATTGACTTATTATA", "seq_2 GATTACAAATTGACTTATTATA", "seq_3 GATTACAAATTGACTTATTATA", "seq_5 GATTACAAATTGACTTATTATA", "seq_8 GATTACAAATTGACTTATTATA", "seq_10 ---TACAAATTGAATTATTATA", file = "matk.phy", sep = "\n") cat("5 15", "seq_1 GATTACAAATTGACT", "seq_3 GATTACAAATTGACT", "seq_4 GATTACAAATTGACT", "seq_5 GATTACAAATTGACT", "seq_8 GATTACAAATTGACT", file = "rbcla.phy", sep = "\n") cat("5 50", "seq_2 GTCTTATAAGAAAGAATAAGAAAG--AAATACAAA-------AAAAAAGA", "seq_3 GTCTTATAAGAAAGAAATAGAAAAGTAAAAAAAAA-------AAAAAAAG", "seq_5 GACATAAGACATAAAATAGAATACTCAATCAGAAACCAACCCATAAAAAC", "seq_8 ATTCCAAAATAAAATACAAAAAGAAAAAACTAGAAAGTTTTTTTTCTTTG", "seq_9 ATTCTTTGTTCTTTTTTTTCTTTAATCTTTAAATAAACCTTTTTTTTTTA", file = "trn1.phy", sep = "\n") supermat(infiles = c("matk.phy", "rbcla.phy", "trn1.phy")) unlink(c("matk.phy", "rbcla.phy", "trn1.phy")) unlink(c("supermat.out.phy","gene_partition.txt")) cat( ">seq_1", "--TTACAAATTGACTTATTATA", ">seq_2", "GATTACAAATTGACTTATTATA", ">seq_3", "GATTACAAATTGACTTATTATA", ">seq_5", "GATTACAAATTGACTTATTATA", ">seq_8", "GATTACAAATTGACTTATTATA", ">seq_10", "---TACAAATTGAATTATTATA", file = "matk.fasta", sep = "\n") cat( ">seq_1", "GATTACAAATTGACT", ">seq_3", "GATTACAAATTGACT", ">seq_4", "GATTACAAATTGACT", ">seq_5", "GATTACAAATTGACT", ">seq_8", "GATTACAAATTGACT", file = "rbcla.fasta", sep = "\n") cat( ">seq_2", "GTCTTATAAGAAAGAATAAGAAAG--AAATACAAA-------AAAAAAGA", ">seq_3", "GTCTTATAAGAAAGAAATAGAAAAGTAAAAAAAAA-------AAAAAAAG", ">seq_5", "GACATAAGACATAAAATAGAATACTCAATCAGAAACCAACCCATAAAAAC", ">seq_8", "ATTCCAAAATAAAATACAAAAAGAAAAAACTAGAAAGTTTTTTTTCTTTG", ">seq_9", "ATTCTTTGTTCTTTTTTTTCTTTAATCTTTAAATAAACCTTTTTTTTTTA", file = "trn1.fasta", sep = "\n") supermat(infiles = c("matk.fasta", "rbcla.fasta", "trn1.fasta")) unlink(c("matk.fasta", "rbcla.fasta", "trn1.fasta")) unlink(c("supermat.out.phy","gene_partition.txt"))
cat("6 22", "seq_1 --TTACAAATTGACTTATTATA", "seq_2 GATTACAAATTGACTTATTATA", "seq_3 GATTACAAATTGACTTATTATA", "seq_5 GATTACAAATTGACTTATTATA", "seq_8 GATTACAAATTGACTTATTATA", "seq_10 ---TACAAATTGAATTATTATA", file = "matk.phy", sep = "\n") cat("5 15", "seq_1 GATTACAAATTGACT", "seq_3 GATTACAAATTGACT", "seq_4 GATTACAAATTGACT", "seq_5 GATTACAAATTGACT", "seq_8 GATTACAAATTGACT", file = "rbcla.phy", sep = "\n") cat("5 50", "seq_2 GTCTTATAAGAAAGAATAAGAAAG--AAATACAAA-------AAAAAAGA", "seq_3 GTCTTATAAGAAAGAAATAGAAAAGTAAAAAAAAA-------AAAAAAAG", "seq_5 GACATAAGACATAAAATAGAATACTCAATCAGAAACCAACCCATAAAAAC", "seq_8 ATTCCAAAATAAAATACAAAAAGAAAAAACTAGAAAGTTTTTTTTCTTTG", "seq_9 ATTCTTTGTTCTTTTTTTTCTTTAATCTTTAAATAAACCTTTTTTTTTTA", file = "trn1.phy", sep = "\n") supermat(infiles = c("matk.phy", "rbcla.phy", "trn1.phy")) unlink(c("matk.phy", "rbcla.phy", "trn1.phy")) unlink(c("supermat.out.phy","gene_partition.txt")) cat( ">seq_1", "--TTACAAATTGACTTATTATA", ">seq_2", "GATTACAAATTGACTTATTATA", ">seq_3", "GATTACAAATTGACTTATTATA", ">seq_5", "GATTACAAATTGACTTATTATA", ">seq_8", "GATTACAAATTGACTTATTATA", ">seq_10", "---TACAAATTGAATTATTATA", file = "matk.fasta", sep = "\n") cat( ">seq_1", "GATTACAAATTGACT", ">seq_3", "GATTACAAATTGACT", ">seq_4", "GATTACAAATTGACT", ">seq_5", "GATTACAAATTGACT", ">seq_8", "GATTACAAATTGACT", file = "rbcla.fasta", sep = "\n") cat( ">seq_2", "GTCTTATAAGAAAGAATAAGAAAG--AAATACAAA-------AAAAAAGA", ">seq_3", "GTCTTATAAGAAAGAAATAGAAAAGTAAAAAAAAA-------AAAAAAAG", ">seq_5", "GACATAAGACATAAAATAGAATACTCAATCAGAAACCAACCCATAAAAAC", ">seq_8", "ATTCCAAAATAAAATACAAAAAGAAAAAACTAGAAAGTTTTTTTTCTTTG", ">seq_9", "ATTCTTTGTTCTTTTTTTTCTTTAATCTTTAAATAAACCTTTTTTTTTTA", file = "trn1.fasta", sep = "\n") supermat(infiles = c("matk.fasta", "rbcla.fasta", "trn1.fasta")) unlink(c("matk.fasta", "rbcla.fasta", "trn1.fasta")) unlink(c("supermat.out.phy","gene_partition.txt"))