Skip to content

Instantly share code, notes, and snippets.

@inutano
Created December 8, 2016 09:03
Show Gist options
  • Save inutano/d7123e347d863ecebd5439d8e0f3aa24 to your computer and use it in GitHub Desktop.
Save inutano/d7123e347d863ecebd5439d8e0f3aa24 to your computer and use it in GitHub Desktop.
require 'set'
class Table
def initialize(table_path, tax_name, tax_rank)
@table = load_table(table_path, "\t")
@names = load_table(tax_name, "\t|\t").to_set
@ranks = load_table(tax_rank, "\t|\t").to_set
end
def load_table(path, chr)
open(path).readlines.map{|ln| ln.chomp.split(chr) }
end
def join
join_rank.map{|e| e.join("\t") }
end
def join_rank
sname_col_list = @table.map{|e| e[3] }.drop(1).uniq
col_to_rank = snames2rank(sname_col_list).to_h
@table.drop(1).map do |entry|
sname_col = entry[3]
p sname_col
entry << col_to_rank[sname_col]
entry.flatten
end
end
def extract_sname_from_col(col)
col.sub(/(mitoch|chloroplast|clone|plastid|complete).+$/,"").sub(/^\s+/,"").sub(/\s+$/,"")
end
def snames2rank(sname_col_list)
sname_col_list.map do |sname_col|
sname = extract_sname_from_col(sname_col)
taxid = @names.select{|e| e[1] == sname }[0][0]
parents = id2parents(taxid)
[
sname_col,
[
parents["genus"],
parents["family"],
parents["order"],
parents["class"],
parents["phylum"],
]
]
end
end
def id2parents(taxid)
id = taxid
current_rank = ""
ps = {}
while current_rank != "kingdom"
record = @ranks.select{|e| e[0] == id }[0]
parent_id = record[1]
parent_sname = id2name(parent_id)
id = parent_id
rank = record[2]
current_rank = rank
ps[rank] = parent_sname
end
ps
end
def id2name(taxid)
@names.select{|e| e[0] == taxid }[0][1]
end
end
if __FILE__ == $0
puts Table.new(*ARGV).join
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment