-
-
Save danielevans/66cea8f881a036b527eb to your computer and use it in GitHub Desktop.
a script to parse bacterial phylogeny from http://www.bacterio.net/-classifphyla.html and print it into a table format.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# original text is from http://www.bacterio.net/-classifphyla.html | |
# load hard-coded filename LPSN.txt into a string | |
# create a File object from LPSN.txt | |
line_array = File.readlines("LPSN.txt") or die "LPSN.txt not found." | |
# todo: parse lines your own way if it helps separate genuses. | |
# todo: grab this data directly from the website | |
# use the included date to report how old the current genus table is | |
# phylum = "no phylum" | |
# clas = "no class" | |
# order = "no order" | |
# family = "no family" | |
# genus = "no genus" | |
genus_next = false | |
unclassed_next = false | |
out_file = open("LPSN-out.txt",'w') | |
anomolies_file = open("LPSN-anomolies.txt","w") | |
line_array.each do |line| | |
# make an exhaustive if/else chain to categorize each line | |
# some update the phylogenic labels | |
# some text lines are ignored | |
# only the genus line adds 1 or more lines to the genus table | |
# find all lines containing "Phylum" | |
if (/^Phylum "?(?<phylum>\w+)/ =~ line) != nil | |
# out_file.write("#{phylum}\n") # write the capture | |
elsif (/^Class "?(?<clas>\w+)/ =~ line) != nil | |
# out_file.write("#{clas}\n") # write the capture | |
elsif (/^Order "?(?<order>\w+)/ =~ line) != nil | |
# out_file.write("#{order}\n") # write the capture | |
elsif (/^Family "?(?<family>\w+)/ =~ line) != nil | |
genus_next = true | |
# out_file.write("#{family}\n") # write the capture | |
elsif (/^Unclassified "?(?<unclassed>\w+)/ =~ line) != nil | |
genus_next = true | |
unclassed_next = true | |
elsif genus_next | |
if (/\w+/ =~ line) != nil | |
genus_next = false | |
# line is list of genuses | |
out_file.write(line) # write the capture | |
end | |
else | |
# write anomaly lines to their own file. | |
anomolies_file.write(line) | |
end | |
end | |
out_file.close() | |
anomolies_file.close() | |
# save the table into a file | |
# include the date. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment