Skip to content

Instantly share code, notes, and snippets.

@marekborowiec
Created May 17, 2016 23:42
Show Gist options
  • Save marekborowiec/bc5e6190bf9c9d19c9d86e6403521961 to your computer and use it in GitHub Desktop.
Save marekborowiec/bc5e6190bf9c9d19c9d86e6403521961 to your computer and use it in GitHub Desktop.
R script using ape to get data on clade monophyly
library("ape")
library("data.table")
### SET WORKING DIRECTORY ###
setwd("/home/mlb/Dropbox/Misof_et_al_Science/Supplementary_Archive_2/aa_final_alignments/trees")
### READ IN TREE FILES ###
tree_files <- dir(pattern="*bipartitions.EOG*")
### DEFINE CLADES ###
bees <- c("AMELI","Bombus_terrestris","Exoneura_robusta")
Cole <- c("Aleochara_curtula","Dendroctonus_ponderosae","Meloe_violaceus",
"TCAST","Lepicerus_sp","Priacma_serrata","Gyrinus_marinus",
"Carabus_granulatus")
Stre <- c("Mengenilla","Stylops_melittae")
Neur <- c("Conwentzia_psociformis","Osmylus_fulvicephalus","Dichochrysa_prasina",
"Euroleon_nostras")
Mega <- c("Corydalus_cornutus","Sialis_lutaria")
Raph <- c("Inocellia_crassicornis","Xanthostigma_xanthostigma")
Dipt <- c("AGAMB","Aede_aegy","Phlebotomus_papatasi","Trichocera_fuscata",
"Tipula_maxima","Bibio_marci","Bombylius_major","DMELA","Lipara_lucens",
"Rhagoletis_pomonella","Glossina_morsitans","Sarcophaga_crassipalpis",
"Triarthria_setipennis")
Meco <- c("Boreus_hyemalis","Nannochorista_sp","Bittacus_pilicornis",
"Panorpa_vulgaris")
Siph <- c("Ceratophyllus_gallinae","Archaeopsylla_erinacei","Ctenocephalides_felis")
Hyme <- c("Tenthredo_koehleri","Orussus_abietinus","Cotesia_vestalis",
"Leptopilina_clavipes","NVITR","Chrysis_viridula","AECHI",
"Harp_salt",bees)
Lepi <- c("Micropterix_calthella","Eriocrania_subpurpurella","Triodia_sylvina",
"Nemophora_deegerella","Yponomeuta_evonymella","Zygaena_fausta",
"Polyommatus_icarus","Parides_arcas","BMORI","Manduca_sexta")
Tric <- c("Rhyacophila_fasciata","Platycentropus_radiatus","Hydroptilidae_sp",
"Philopotamus_ludificatus","Cheumatopsyche_sp")
Hemi <- c("Trialeurodes_vaporariorum","Bemisia_tabaci","Acanthocasuarina_muellerianae",
"Planococcus_citri","Essigella_californica","APISU","Aphis_gossypii",
"Acanthosoma_haemorrhoidale","Notostira_elongata","Ranatra_linearis",
"Velia_caprai","Xenophysella_greensladeae","Nilaparvata_lugens",
"Cercopis_vulnerata","Okanagana_villosa")
Psoc <- c("Ectopsocus_briggsi","Liposcelis_bostrychophila","Menopon_gallinae",
"PHUMA")
Thys <- c("Gynaikothrips_ficorum","Frankliniella_cephalica","Thrips_palmi")
Isop <- c("Mastotermes_darwiniensis","Prorhinotermes_simplex","ZNEVA")
Blat <- c("Blaberus_atropus","Periplaneta_americana","Cryptocercus_sp")
BlIs <- c(Isop,Blat)
Mant <- c("Metallyticus_splendidus","Empusa_pennata","Mantis_religiosa")
Phas <- c("Timema_cristinae","Peruphasma_schultei","Aretaon_asperrimus")
Embi <- c("Haploembia_palaui","Aposthonia_japonica")
Gryl <- c("Grylloblatta_bifratrilecta","Galloisiana_yuasai")
Mntp <- c("Tanzaniophasma_sp")
Orth <- c("Gryllotalpa_sp","Ceuthophilus_sp","Tetrix_subulata",
"Prosarthria_teretrirostris","Stenobothrus_lineatus")
Plec <- c("Leuctra_sp","Perla_marginata","Cosmioperla_kuna")
Zora <- c("Zorotypus_caudelli")
Ephe <- c("Ephemera_danica","Isonychia_bicolor","Eurylophella_sp",
"Baetis_pumilus")
Odon <- c("Calopteryx_splendens","Cordulegaster_boltoni","Epiophlebia_superstes")
Zyge <- c("Tricholepidion_gertschi","Atelura_formicaria","Thermobia_domestica")
Arch <- c("Meinertellus_cundinamarcensis","Machilis_hrabei")
Dipl <- c("Campodea_augens","Occasjapyx_japonicus")
Coll <- c("Sminthurus_vir_nig","Tetrodontophora_bielanensis","Anurida_maritima",
"Pogonognathellus_lon_fla","Folsomia_candida")
Prot <- c("Acerentomon_sp")
#eury <- c("Eurylophella_sp","Ephemera_danica")
#mosq <- c("Aede_aegy","AGAMB")
#psoc <- c("Ectopsocus_briggsi","Liposcelis_bostrychophila","PHUMA","Menopon_gallinae")
CoSt <- c(Cole,Stre)
Neoi <- c(Neur,Mega,Raph)
CoNe <- c(CoSt,Neoi)
Amph <- c(Lepi,Tric)
Antl <- c(Dipt,Meco,Siph)
Holo <- c(CoNe,Amph,Antl,Hyme)
Neop <- c(Holo,Hemi,Psoc,Thys,Blis,Mant,Phas,Embi,Gryl,Mntp,Orth,Plec,Zora)
Pter <- c(Neop,Ephe,Odon)
Hexa <- c(Pter,Arch,Zyge,Dipl,Coll,Prot)
Clades <- list(Cole,Dipt,Lepi,Hyme,Hemi,Amph,Neur,Mega,
Raph,CoSt,Neoi,CoNe,Antl,Holo,Psoc,Thys,
BlIs,Mant,Phas,Embi,Gryl,Orth,Plec,Ephe,
Odon,Zyge,Arch,Dipl,Coll,Neop,Pter,Hexa)
header <- c("OG","Coleoptera_monophyly","Diptera_monophyly",
"Lepidoptera_monophyly","Hymenoptera_monophyly",
"Hemiptera_monophyly","Amphiesmenoptera_monophyly",
"Neuroptera_monophyly","Megaloptera_monophyly",
"Raphidioptera_monophyly","Coleoptera+Strepsiptera_monophyly",
"Neuropterida_monophyly","Coleoptera+Neuropterida_monophyly",
"Antliophora_monophyly","Holometabola_monophyly",
"Psocodea_monophyly","Thysanoptera_monophyly",
"Blattodea+Isoptera_monophyly","Mantodea_monophyly",
"Phasmatodea_monophyly","Embioptera_monophyly",
"Grylloblattodea_monophyly","Orthoptera_monophyly",
"Plecoptera_monophyly","Ephemeroptera_monophyly",
"Odonata_monophyly","Zygentoma_monophyly",
"Archaeognatha_monophyly","Diplura_monophyly",
"Collembola_monophyly","Neoptera_monophyly",
"Pterygota_monophyly","Hexapoda_monophyly")
### DEFINE ROOT TREES FUNCTION ###
rootNonHex <- function(tree) {
possible_outgroups <- c("ISCAP","Symphylella_vulgaris","Glomeris_pustulata","Cypridininae_sp","Sarsinebalia_urgorii","Litopenaeus_vannamei","Celuca_puligator","Lepeophtheirus_salmonis","DPULE","Speleonectes_tulumensis")
for (outgroup in possible_outgroups) {
taxa <- tree$tip.label
if (outgroup %in% taxa) {
tree <- root(phy=tree,outgroup=outgroup,resolve.root=T)
break
}
}
return(tree)
}
### DEFINE MONOPHYLY FUNCTION ###
checkMono <- function(tree,clade){
taxa <- tree$tip.label
for (taxon in clade) {
if (!(taxon %in% taxa)) {
clade <- clade[!clade==taxon]
}
}
len <- length(clade)
if (len <= 1) {
monophyly <- NA
} else {
monophyly <- is.monophyletic(tree,tips=clade)
}
return(monophyly)
}
### DEFINE A FUNCTION TO CHECK FOR MONOPHYLY OF ALL CLADES ###
getMonophylyData <- function(tree_file,clades) {
tr_name <- sub("RAxML_bipartitions.(EOG[0-9A-Z]+).linsi.aa.fas-out",
"\\1", perl=TRUE, x=tree_file)
tr <- read.tree(tree_file)
tr <- rootNonHex(tr)
#plot(tr)
#title(tr_name)
data <- data.frame(tr_name,lapply(clades,checkMono,tree=tr))
return(data)
}
### LOOP OVER ALL TREES ###
ptm <- proc.time()
m <- lapply(tree_files,getMonophylyData,clades=Clades)
proc.time() - ptm
d <- rbindlist(m)
z <- setnames(d,header)
write.csv(z,file="misof_monophyly.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment