Skip to content

Instantly share code, notes, and snippets.

@cth
Created November 20, 2015 13:05
Show Gist options
  • Save cth/309bd6e8525db23e2153 to your computer and use it in GitHub Desktop.
Save cth/309bd6e8525db23e2153 to your computer and use it in GitHub Desktop.
#!/home/fng514/bin/ruby
#$ -S /home/fng514/bin/ruby
#$ -cwd
require 'bio-samtools'
require 'zlib'
rsnumbers = {}
puts ARGV.inspect
throw "invalid numbmer of args" unless ARGV.size == 2
puts "reading vcf"
info_lines = [[ "ID", "REF", "ALT", "CHROM", "POS", "ALT_FREQ" ]]
columns = []
variants_processed=0
File.open(ARGV[0]) do |file|
file.each do |line|
if line =~ /^#CHROM.*/ then
fields = line.chomp.split("\t")
9.times { fields.shift }
columns[0] = ['particid'] + fields
end
next if line =~ /^#.*/
variants_processed = variants_processed + 1
puts "variants processed: #{variants_processed}" if variants_processed % 10 == 0
vcfline=Bio::DB::Vcf.new(line)
columns << [ vcfline.id ]
dosage_sum=0
vcfline.samples.each do |id,geno|
columns[columns.size-1] << "%.3f" % geno["DS"]
#puts geno.inspect
dosage_sum = dosage_sum + geno["DS"].to_f
end
allele_freq ="%.3f" % (dosage_sum.to_f / (2*vcfline.samples.size))
# todo: RSQ (but we do not need it yet)
info_lines << [ vcfline.id, vcfline.ref, vcfline.alt, vcfline.chrom, vcfline.pos, allele_freq ]
end
end
rows = columns.transpose
File.open(ARGV[1]+".dosage", "w") do |file|
rows.each do |r|
file.puts r.join("\t")
end
end
File.open(ARGV[1]+".info", "w") do |file|
info_lines.each do |fields|
file.puts fields.join("\t")
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment