Created
July 1, 2009 18:34
-
-
Save drio/138962 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
in_file = ARGV[0] | |
sqtocs = { "AA" => 0, "AC" => 1, "AG" => 2, "AT" => 3, | |
"CA" => 1, "CC" => 0, "CG" => 3, "CT" => 2, | |
"GA" => 2, "GC" => 3, "GG" => 0, "GT" => 1, | |
"TA" => 3, "TC" => 2, "TG" => 1, "TT" => 0 } | |
# seq space Old/NEW format: | |
# 1 775852 rs2980300 A/G A GACTTCACTAACTCANAGAGACACAGTCATT | |
# 1 711153 drio1 CACCAAAGGAGGAAG CCTTGGTAGAAGATA C G | |
# CS space NEW format: | |
# 1 711153 rs12565286 10020220202 02010132202 C G 30 03 | |
# WARNING!: | |
# + ss: flanks are 15bp long | |
# + cs: flanks are 11bp long | |
i = 1 | |
File.open(in_file).each_line do |l| | |
chrm, pos, id, f_seq, t_seq, ssr, ssv = l.split | |
valid_nuc = /[ACGT]/ | |
unless ssr =~ valid_nuc and ssv =~ valid_nuc | |
next | |
raise "invalid coding for nucleotides: line:\n#{i}: #{l}" | |
end | |
# Convert to color space the sequence | |
f_cs = ""; t_cs = "" | |
(0..f_seq.size-2).each {|i| f_cs << sqtocs[f_seq[i] + f_seq[i+1]].to_s } | |
(0..t_seq.size-2).each {|i| t_cs << sqtocs[t_seq[i] + t_seq[i+1]].to_s } | |
# Convert to color space the ref and var seq | |
rd = (sqtocs[f_seq[-1] + ssr]).to_s + (sqtocs[ssr + t_seq[0]]).to_s | |
vd = (sqtocs[f_seq[-1] + ssv]).to_s + (sqtocs[ssv + t_seq[0]]).to_s | |
# Find the other possible alleles. sequence space | |
o1, o2 = %w(A C T G).inject("") {|a, n| | |
(n != ssr && n != ssv) ? a + n : a + '' | |
}.split("") | |
# Convert them to cs | |
o1cs = (sqtocs[f_seq[-1] + o1]).to_s + (sqtocs[o1 + t_seq[0]]).to_s | |
o2cs = (sqtocs[f_seq[-1] + o2]).to_s + (sqtocs[o2 + t_seq[0]]).to_s | |
puts "#{chrm}\t#{pos}\t#{id}\t#{f_cs[3..14]}\t#{t_cs[0..10]}\t#{ssr}\t#{ssv}\t#{rd}\t#{vd}" | |
#puts "#{chrm}\t#{pos}\t#{id}\t#{f_cs}\t#{t_cs}\t#{ssr}\t#{ssv}\t#{rd}\t#{vd} -- #{o1}:#{o1cs} #{o2}:#{o2cs}" | |
i+=1 | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment