Skip to content

Instantly share code, notes, and snippets.

@cth
Created February 22, 2016 09:01
Show Gist options
  • Save cth/03d37c136cedc5bead8e to your computer and use it in GitHub Desktop.
Save cth/03d37c136cedc5bead8e to your computer and use it in GitHub Desktop.
#!/home/fng514/bin/julia
#$ -S /home/fng514/bin/julia
#$ -cwd
# Christian Theil Have, 2016.
using HypothesisTests
using StatsBase
nucleotides = Set{AbstractString}(["A","G","C","T"])
function process_vcf_line(outfile,line)
fields = split(line)
# Find the index of genotype quality fields
gtidx = first(find(x -> x=="GT", split(fields[9],':')))
tmp=find(x -> x=="AD", split(fields[9],':'))
if length(tmp) > 0
adidx=first(tmp)
else
write(outfile,line)
return(0)
end
if !in(fields[4],nucleotides) || !in(fields[5],nucleotides)
write(outfile,line)
return(0)
end
removed = 0
i = 1
field_idx = 10
for field in fields[10:end]
field_values = split(field,':')
gt = field_values[gtidx]
ad = field_values[adidx]
if (ad != ".")
try
a1,a2 = sort(map(x -> parse(Int,x), split(ad,",")))
pval = 1
ratio = a1 / (a1+a2)
#println(string("ad="ad, " a1=",a1," a2=",a2, " ratio=", ratio))
if gt == "0/1" && a1 > 0
# Do a binomial test of the counts
pval = pvalue(BinomialTest(a1,a1+a2,0.5))
elseif gt == "1/1" && a1 > 0
pval = pvalue(BinomialTest(a1,a1+a2,0.001))
end
if (ratio < 0.2 && gt == "0/1") || pval < 5e-7
field_values[gtidx] = "./."
removed += 1
fields[field_idx] = join(field_values, ":")
end
catch e
println(e)
end
end
field_idx += 1
end
write(outfile,string(join(fields,"\t"),"\n"))
return(removed)
end
outvcf=open(ARGS[2], "w")
println(string("reading ", ARGS[1]))
open(ARGS[1]) do vcf
total_removed = 0
lines_read = 0
for line in eachline(vcf)
if lines_read % 1000 == 0
println(lines_read)
end
if ismatch(r"^chr", line)
total_removed += process_vcf_line(outvcf,line)
else
write(outvcf,line)
end
lines_read += 1
end
println(string("total amount of variants removed: ", total_removed))
end
close(outvcf)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment