Created
May 17, 2016 23:42
-
-
Save marekborowiec/bc5e6190bf9c9d19c9d86e6403521961 to your computer and use it in GitHub Desktop.
R script using ape to get data on clade monophyly
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("ape") | |
library("data.table") | |
### SET WORKING DIRECTORY ### | |
setwd("/home/mlb/Dropbox/Misof_et_al_Science/Supplementary_Archive_2/aa_final_alignments/trees") | |
### READ IN TREE FILES ### | |
tree_files <- dir(pattern="*bipartitions.EOG*") | |
### DEFINE CLADES ### | |
bees <- c("AMELI","Bombus_terrestris","Exoneura_robusta") | |
Cole <- c("Aleochara_curtula","Dendroctonus_ponderosae","Meloe_violaceus", | |
"TCAST","Lepicerus_sp","Priacma_serrata","Gyrinus_marinus", | |
"Carabus_granulatus") | |
Stre <- c("Mengenilla","Stylops_melittae") | |
Neur <- c("Conwentzia_psociformis","Osmylus_fulvicephalus","Dichochrysa_prasina", | |
"Euroleon_nostras") | |
Mega <- c("Corydalus_cornutus","Sialis_lutaria") | |
Raph <- c("Inocellia_crassicornis","Xanthostigma_xanthostigma") | |
Dipt <- c("AGAMB","Aede_aegy","Phlebotomus_papatasi","Trichocera_fuscata", | |
"Tipula_maxima","Bibio_marci","Bombylius_major","DMELA","Lipara_lucens", | |
"Rhagoletis_pomonella","Glossina_morsitans","Sarcophaga_crassipalpis", | |
"Triarthria_setipennis") | |
Meco <- c("Boreus_hyemalis","Nannochorista_sp","Bittacus_pilicornis", | |
"Panorpa_vulgaris") | |
Siph <- c("Ceratophyllus_gallinae","Archaeopsylla_erinacei","Ctenocephalides_felis") | |
Hyme <- c("Tenthredo_koehleri","Orussus_abietinus","Cotesia_vestalis", | |
"Leptopilina_clavipes","NVITR","Chrysis_viridula","AECHI", | |
"Harp_salt",bees) | |
Lepi <- c("Micropterix_calthella","Eriocrania_subpurpurella","Triodia_sylvina", | |
"Nemophora_deegerella","Yponomeuta_evonymella","Zygaena_fausta", | |
"Polyommatus_icarus","Parides_arcas","BMORI","Manduca_sexta") | |
Tric <- c("Rhyacophila_fasciata","Platycentropus_radiatus","Hydroptilidae_sp", | |
"Philopotamus_ludificatus","Cheumatopsyche_sp") | |
Hemi <- c("Trialeurodes_vaporariorum","Bemisia_tabaci","Acanthocasuarina_muellerianae", | |
"Planococcus_citri","Essigella_californica","APISU","Aphis_gossypii", | |
"Acanthosoma_haemorrhoidale","Notostira_elongata","Ranatra_linearis", | |
"Velia_caprai","Xenophysella_greensladeae","Nilaparvata_lugens", | |
"Cercopis_vulnerata","Okanagana_villosa") | |
Psoc <- c("Ectopsocus_briggsi","Liposcelis_bostrychophila","Menopon_gallinae", | |
"PHUMA") | |
Thys <- c("Gynaikothrips_ficorum","Frankliniella_cephalica","Thrips_palmi") | |
Isop <- c("Mastotermes_darwiniensis","Prorhinotermes_simplex","ZNEVA") | |
Blat <- c("Blaberus_atropus","Periplaneta_americana","Cryptocercus_sp") | |
BlIs <- c(Isop,Blat) | |
Mant <- c("Metallyticus_splendidus","Empusa_pennata","Mantis_religiosa") | |
Phas <- c("Timema_cristinae","Peruphasma_schultei","Aretaon_asperrimus") | |
Embi <- c("Haploembia_palaui","Aposthonia_japonica") | |
Gryl <- c("Grylloblatta_bifratrilecta","Galloisiana_yuasai") | |
Mntp <- c("Tanzaniophasma_sp") | |
Orth <- c("Gryllotalpa_sp","Ceuthophilus_sp","Tetrix_subulata", | |
"Prosarthria_teretrirostris","Stenobothrus_lineatus") | |
Plec <- c("Leuctra_sp","Perla_marginata","Cosmioperla_kuna") | |
Zora <- c("Zorotypus_caudelli") | |
Ephe <- c("Ephemera_danica","Isonychia_bicolor","Eurylophella_sp", | |
"Baetis_pumilus") | |
Odon <- c("Calopteryx_splendens","Cordulegaster_boltoni","Epiophlebia_superstes") | |
Zyge <- c("Tricholepidion_gertschi","Atelura_formicaria","Thermobia_domestica") | |
Arch <- c("Meinertellus_cundinamarcensis","Machilis_hrabei") | |
Dipl <- c("Campodea_augens","Occasjapyx_japonicus") | |
Coll <- c("Sminthurus_vir_nig","Tetrodontophora_bielanensis","Anurida_maritima", | |
"Pogonognathellus_lon_fla","Folsomia_candida") | |
Prot <- c("Acerentomon_sp") | |
#eury <- c("Eurylophella_sp","Ephemera_danica") | |
#mosq <- c("Aede_aegy","AGAMB") | |
#psoc <- c("Ectopsocus_briggsi","Liposcelis_bostrychophila","PHUMA","Menopon_gallinae") | |
CoSt <- c(Cole,Stre) | |
Neoi <- c(Neur,Mega,Raph) | |
CoNe <- c(CoSt,Neoi) | |
Amph <- c(Lepi,Tric) | |
Antl <- c(Dipt,Meco,Siph) | |
Holo <- c(CoNe,Amph,Antl,Hyme) | |
Neop <- c(Holo,Hemi,Psoc,Thys,Blis,Mant,Phas,Embi,Gryl,Mntp,Orth,Plec,Zora) | |
Pter <- c(Neop,Ephe,Odon) | |
Hexa <- c(Pter,Arch,Zyge,Dipl,Coll,Prot) | |
Clades <- list(Cole,Dipt,Lepi,Hyme,Hemi,Amph,Neur,Mega, | |
Raph,CoSt,Neoi,CoNe,Antl,Holo,Psoc,Thys, | |
BlIs,Mant,Phas,Embi,Gryl,Orth,Plec,Ephe, | |
Odon,Zyge,Arch,Dipl,Coll,Neop,Pter,Hexa) | |
header <- c("OG","Coleoptera_monophyly","Diptera_monophyly", | |
"Lepidoptera_monophyly","Hymenoptera_monophyly", | |
"Hemiptera_monophyly","Amphiesmenoptera_monophyly", | |
"Neuroptera_monophyly","Megaloptera_monophyly", | |
"Raphidioptera_monophyly","Coleoptera+Strepsiptera_monophyly", | |
"Neuropterida_monophyly","Coleoptera+Neuropterida_monophyly", | |
"Antliophora_monophyly","Holometabola_monophyly", | |
"Psocodea_monophyly","Thysanoptera_monophyly", | |
"Blattodea+Isoptera_monophyly","Mantodea_monophyly", | |
"Phasmatodea_monophyly","Embioptera_monophyly", | |
"Grylloblattodea_monophyly","Orthoptera_monophyly", | |
"Plecoptera_monophyly","Ephemeroptera_monophyly", | |
"Odonata_monophyly","Zygentoma_monophyly", | |
"Archaeognatha_monophyly","Diplura_monophyly", | |
"Collembola_monophyly","Neoptera_monophyly", | |
"Pterygota_monophyly","Hexapoda_monophyly") | |
### DEFINE ROOT TREES FUNCTION ### | |
rootNonHex <- function(tree) { | |
possible_outgroups <- c("ISCAP","Symphylella_vulgaris","Glomeris_pustulata","Cypridininae_sp","Sarsinebalia_urgorii","Litopenaeus_vannamei","Celuca_puligator","Lepeophtheirus_salmonis","DPULE","Speleonectes_tulumensis") | |
for (outgroup in possible_outgroups) { | |
taxa <- tree$tip.label | |
if (outgroup %in% taxa) { | |
tree <- root(phy=tree,outgroup=outgroup,resolve.root=T) | |
break | |
} | |
} | |
return(tree) | |
} | |
### DEFINE MONOPHYLY FUNCTION ### | |
checkMono <- function(tree,clade){ | |
taxa <- tree$tip.label | |
for (taxon in clade) { | |
if (!(taxon %in% taxa)) { | |
clade <- clade[!clade==taxon] | |
} | |
} | |
len <- length(clade) | |
if (len <= 1) { | |
monophyly <- NA | |
} else { | |
monophyly <- is.monophyletic(tree,tips=clade) | |
} | |
return(monophyly) | |
} | |
### DEFINE A FUNCTION TO CHECK FOR MONOPHYLY OF ALL CLADES ### | |
getMonophylyData <- function(tree_file,clades) { | |
tr_name <- sub("RAxML_bipartitions.(EOG[0-9A-Z]+).linsi.aa.fas-out", | |
"\\1", perl=TRUE, x=tree_file) | |
tr <- read.tree(tree_file) | |
tr <- rootNonHex(tr) | |
#plot(tr) | |
#title(tr_name) | |
data <- data.frame(tr_name,lapply(clades,checkMono,tree=tr)) | |
return(data) | |
} | |
### LOOP OVER ALL TREES ### | |
ptm <- proc.time() | |
m <- lapply(tree_files,getMonophylyData,clades=Clades) | |
proc.time() - ptm | |
d <- rbindlist(m) | |
z <- setnames(d,header) | |
write.csv(z,file="misof_monophyly.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment