Skip to content

Instantly share code, notes, and snippets.

@cth
Last active September 18, 2018 09:18
Show Gist options
  • Save cth/ca54337196cb12ed7a9e5c5a6a09941a to your computer and use it in GitHub Desktop.
Save cth/ca54337196cb12ed7a9e5c5a6a09941a to your computer and use it in GitHub Desktop.
# Basic julia script for filter INFO in VCF files
using BGZFStreams
# usage: julia filterinfo.jl input.vcf output.vcf INFO
sin = BGZFStream(ARGS[1])
sout = BGZFStream(ARGS[2],"w")
min_info = parse(Float64,ARGS[3])
min_maf = parse(Float64,ARGS[4])
println(string("IN:", ARGS[1]))
println(string("OUT:", ARGS[2]))
println(string("INFO:", ARGS[3]))
println(string("MAF:", ARGS[4]))
for line in eachline(sin)
if occursin(r"^#", line)
write(sout,line)
else
fields = split(line)
info_field = fields[8]
info_subfields = split(info_field,';')
maf_ok = false
info_ok = false
for subfield in split(info_field,';')
if occursin(r"^INFO=", subfield) || occursin(r"^R2=", subfield)
if min_info < parse(Float64,(split(subfield,'=')[2]))
info_ok = true
end
elseif occursin(r"^MAF", subfield)
if min_maf < parse(Float64,(split(subfield,'=')[2]))
maf_ok = true
end
end
end
info_ok && maf_ok && write(sout,line)
end
end
close(sin)
close(sout)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment