Last active
July 24, 2018 12:36
-
-
Save tbl3rd/90a5c77d4f384f1cd1bff645ff1b6d9b to your computer and use it in GitHub Desktop.
Parse header metadata from a Variant Call Format file into a Clojure map. (See https://github.com/tbl3rd/vcf)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{"p95Red" "8992", | |
"Extension(A)" "Extension(A)|Extension|25349|405", | |
"zcallVersion" "1.0.0.0", | |
"manifestFile" "Broad_GWAS_supplemental_15061359_A1.bpm", | |
"chipWellBarcode" "200557070005_R06C01", | |
"Biotin(High)" "Biotin(High)|Staining|618|4165", | |
"analysisVersionNumber" "1", | |
"picardVersion" "07b46e26eb638116226b10df9f3f653b82b8ea95", | |
"String(MM)" "String(MM)|Stringency|896|237", | |
"arrayType" "Broad_GWAS_supplemental_15061359_A1", | |
"extendedManifestFile" "Broad_GWAS_supplemental_15061359_A1.1.2.extended.csv", | |
"p95Green" "4329", | |
"fileformat" "VCFv4.2", | |
"NSB(Bgnd)Red" "NSB(Bgnd)Red|Non-SpecificBinding|360|153", | |
"Hyb(Low)" "Hyb(Low)|Hybridization|2031|1856", | |
"Restore" "Restore|Restoration|228|305", | |
"fingerprintGender" "Unknown", | |
"fileDate" "Wed May 23 21:21:12 UTC 2018", | |
"DNP(High)" "DNP(High)|Staining|12573|355", | |
"contig" {"9" {"ID" "9", "length" "141213431", "assembly" "GRCh37"}, | |
"GL000213.1" {"ID" "GL000213.1", | |
"length" "164239", | |
"assembly" "GRCh37"}, | |
"GL000207.1" {"ID" "GL000207.1", | |
"length" "4262", | |
"assembly" "GRCh37"}, | |
"3" {"ID" "3", "length" "198022430", "assembly" "GRCh37"}, | |
"GL000248.1" {"ID" "GL000248.1", | |
"length" "39786", | |
"assembly" "GRCh37"}, | |
"GL000205.1" {"ID" "GL000205.1", | |
"length" "174588", | |
"assembly" "GRCh37"}, | |
"GL000237.1" {"ID" "GL000237.1", | |
"length" "45867", | |
"assembly" "GRCh37"}, | |
"GL000196.1" {"ID" "GL000196.1", | |
"length" "38914", | |
"assembly" "GRCh37"}, | |
"GL000212.1" {"ID" "GL000212.1", | |
"length" "186858", | |
"assembly" "GRCh37"}, | |
"22" {"ID" "22", "length" "51304566", "assembly" "GRCh37"}, | |
"GL000241.1" {"ID" "GL000241.1", | |
"length" "42152", | |
"assembly" "GRCh37"}, | |
"4" {"ID" "4", "length" "191154276", "assembly" "GRCh37"}, | |
"8" {"ID" "8", "length" "146364022", "assembly" "GRCh37"}, | |
"14" {"ID" "14", "length" "107349540", "assembly" "GRCh37"}, | |
"GL000239.1" {"ID" "GL000239.1", | |
"length" "33824", | |
"assembly" "GRCh37"}, | |
"GL000202.1" {"ID" "GL000202.1", | |
"length" "40103", | |
"assembly" "GRCh37"}, | |
"GL000234.1" {"ID" "GL000234.1", | |
"length" "40531", | |
"assembly" "GRCh37"}, | |
"21" {"ID" "21", "length" "48129895", "assembly" "GRCh37"}, | |
"GL000246.1" {"ID" "GL000246.1", | |
"length" "38154", | |
"assembly" "GRCh37"}, | |
"GL000235.1" {"ID" "GL000235.1", | |
"length" "34474", | |
"assembly" "GRCh37"}, | |
"GL000220.1" {"ID" "GL000220.1", | |
"length" "161802", | |
"assembly" "GRCh37"}, | |
"NC_007605" {"ID" "NC_007605", | |
"length" "171823", | |
"assembly" "NC_007605.1"}, | |
"20" {"ID" "20", "length" "63025520", "assembly" "GRCh37"}, | |
"GL000240.1" {"ID" "GL000240.1", | |
"length" "41933", | |
"assembly" "GRCh37"}, | |
"GL000208.1" {"ID" "GL000208.1", | |
"length" "92689", | |
"assembly" "GRCh37"}, | |
"GL000230.1" {"ID" "GL000230.1", | |
"length" "43691", | |
"assembly" "GRCh37"}, | |
"GL000238.1" {"ID" "GL000238.1", | |
"length" "39939", | |
"assembly" "GRCh37"}, | |
"GL000199.1" {"ID" "GL000199.1", | |
"length" "169874", | |
"assembly" "GRCh37"}, | |
"19" {"ID" "19", "length" "59128983", "assembly" "GRCh37"}, | |
"17" {"ID" "17", "length" "81195210", "assembly" "GRCh37"}, | |
"Y" {"ID" "Y", "length" "59373566", "assembly" "GRCh37"}, | |
"GL000221.1" {"ID" "GL000221.1", | |
"length" "155397", | |
"assembly" "GRCh37"}, | |
"GL000224.1" {"ID" "GL000224.1", | |
"length" "179693", | |
"assembly" "GRCh37"}, | |
"GL000215.1" {"ID" "GL000215.1", | |
"length" "172545", | |
"assembly" "GRCh37"}, | |
"15" {"ID" "15", "length" "102531392", "assembly" "GRCh37"}, | |
"7" {"ID" "7", "length" "159138663", "assembly" "GRCh37"}, | |
"GL000217.1" {"ID" "GL000217.1", | |
"length" "172149", | |
"assembly" "GRCh37"}, | |
"GL000236.1" {"ID" "GL000236.1", | |
"length" "41934", | |
"assembly" "GRCh37"}, | |
"5" {"ID" "5", "length" "180915260", "assembly" "GRCh37"}, | |
"18" {"ID" "18", "length" "78077248", "assembly" "GRCh37"}, | |
"12" {"ID" "12", "length" "133851895", "assembly" "GRCh37"}, | |
"GL000242.1" {"ID" "GL000242.1", | |
"length" "43523", | |
"assembly" "GRCh37"}, | |
"13" {"ID" "13", "length" "115169878", "assembly" "GRCh37"}, | |
"GL000219.1" {"ID" "GL000219.1", | |
"length" "179198", | |
"assembly" "GRCh37"}, | |
"GL000243.1" {"ID" "GL000243.1", | |
"length" "43341", | |
"assembly" "GRCh37"}, | |
"GL000195.1" {"ID" "GL000195.1", | |
"length" "182896", | |
"assembly" "GRCh37"}, | |
"GL000232.1" {"ID" "GL000232.1", | |
"length" "40652", | |
"assembly" "GRCh37"}, | |
"6" {"ID" "6", "length" "171115067", "assembly" "GRCh37"}, | |
"GL000247.1" {"ID" "GL000247.1", | |
"length" "36422", | |
"assembly" "GRCh37"}, | |
"GL000211.1" {"ID" "GL000211.1", | |
"length" "166566", | |
"assembly" "GRCh37"}, | |
"GL000231.1" {"ID" "GL000231.1", | |
"length" "27386", | |
"assembly" "GRCh37"}, | |
"GL000233.1" {"ID" "GL000233.1", | |
"length" "45941", | |
"assembly" "GRCh37"}, | |
"GL000216.1" {"ID" "GL000216.1", | |
"length" "172294", | |
"assembly" "GRCh37"}, | |
"GL000223.1" {"ID" "GL000223.1", | |
"length" "180455", | |
"assembly" "GRCh37"}, | |
"GL000222.1" {"ID" "GL000222.1", | |
"length" "186861", | |
"assembly" "GRCh37"}, | |
"GL000227.1" {"ID" "GL000227.1", | |
"length" "128374", | |
"assembly" "GRCh37"}, | |
"GL000193.1" {"ID" "GL000193.1", | |
"length" "189789", | |
"assembly" "GRCh37"}, | |
"1" {"ID" "1", "length" "249250621", "assembly" "GRCh37"}, | |
"GL000244.1" {"ID" "GL000244.1", | |
"length" "39929", | |
"assembly" "GRCh37"}, | |
"GL000229.1" {"ID" "GL000229.1", | |
"length" "19913", | |
"assembly" "GRCh37"}, | |
"GL000210.1" {"ID" "GL000210.1", | |
"length" "27682", | |
"assembly" "GRCh37"}, | |
"GL000206.1" {"ID" "GL000206.1", | |
"length" "41001", | |
"assembly" "GRCh37"}, | |
"GL000194.1" {"ID" "GL000194.1", | |
"length" "191469", | |
"assembly" "GRCh37"}, | |
"X" {"ID" "X", "length" "155270560", "assembly" "GRCh37"}, | |
"GL000204.1" {"ID" "GL000204.1", | |
"length" "81310", | |
"assembly" "GRCh37"}, | |
"11" {"ID" "11", "length" "135006516", "assembly" "GRCh37"}, | |
"GL000226.1" {"ID" "GL000226.1", | |
"length" "15008", | |
"assembly" "GRCh37"}, | |
"GL000198.1" {"ID" "GL000198.1", | |
"length" "90085", | |
"assembly" "GRCh37"}, | |
"GL000200.1" {"ID" "GL000200.1", | |
"length" "187035", | |
"assembly" "GRCh37"}, | |
"GL000228.1" {"ID" "GL000228.1", | |
"length" "129120", | |
"assembly" "GRCh37"}, | |
"GL000192.1" {"ID" "GL000192.1", | |
"length" "547496", | |
"assembly" "GRCh37"}, | |
"GL000225.1" {"ID" "GL000225.1", | |
"length" "211173", | |
"assembly" "GRCh37"}, | |
"GL000201.1" {"ID" "GL000201.1", | |
"length" "36148", | |
"assembly" "GRCh37"}, | |
"2" {"ID" "2", "length" "243199373", "assembly" "GRCh37"}, | |
"GL000245.1" {"ID" "GL000245.1", | |
"length" "36651", | |
"assembly" "GRCh37"}, | |
"GL000191.1" {"ID" "GL000191.1", | |
"length" "106433", | |
"assembly" "GRCh37"}, | |
"16" {"ID" "16", "length" "90354753", "assembly" "GRCh37"}, | |
"GL000209.1" {"ID" "GL000209.1", | |
"length" "159169", | |
"assembly" "GRCh37"}, | |
"10" {"ID" "10", "length" "135534747", "assembly" "GRCh37"}, | |
"GL000214.1" {"ID" "GL000214.1", | |
"length" "137718", | |
"assembly" "GRCh37"}, | |
"GL000218.1" {"ID" "GL000218.1", | |
"length" "161147", | |
"assembly" "GRCh37"}, | |
"GL000203.1" {"ID" "GL000203.1", | |
"length" "37498", | |
"assembly" "GRCh37"}, | |
"GL000249.1" {"ID" "GL000249.1", | |
"length" "38502", | |
"assembly" "GRCh37"}, | |
"MT" {"ID" "MT", "length" "16569", "assembly" "GRCh37"}, | |
"GL000197.1" {"ID" "GL000197.1", | |
"length" "37175", | |
"assembly" "GRCh37"}}, | |
"NSB(Bgnd)Purple" "NSB(Bgnd)Purple|Non-SpecificBinding|371|161", | |
"autocallVersion" "2.0.0.137", | |
"extendedIlluminaManifestVersion" "1.2", | |
"NSB(Bgnd)Green" "NSB(Bgnd)Green|Non-SpecificBinding|277|149", | |
"NSB(Bgnd)Blue" "NSB(Bgnd)Blue|Non-SpecificBinding|371|153", | |
"Extension(G)" "Extension(G)|Extension|2205|8950", | |
"content" "Broad_GWAS_supplemental_15061359_A1.1.2.extended.csv", | |
"NP(G)" "NP(G)|Non-Polymorphic|563|4047", | |
"autocallDate" "05/23/2018 21:15", | |
"Extension(T)" "Extension(T)|Extension|27882|316", | |
"scannerName" "N370", | |
"NP(C)" "NP(C)|Non-Polymorphic|568|4775", | |
"reference" "/cromwell_root/broad-references/hg19/v0/Homo_sapiens_assembly19.fasta", | |
"NP(T)" "NP(T)|Non-Polymorphic|7892|204", | |
"source" "BPM file", | |
"DNP(Bgnd)" "DNP(Bgnd)|Staining|333|266", | |
"genomeBuild" "HG19", | |
"clusterFile" "Broad_GWAS_supplemental_15061359_A1.egt", | |
"expectedGender" "Female", | |
"FILTER" {"DUPE" {"ID" "DUPE", | |
"Description" "Duplicate assays position."}, | |
"FAIL_REF" {"ID" "FAIL_REF", | |
"Description" "Assay failed to map to reference."}, | |
"TRIALLELIC" {"ID" "TRIALLELIC", | |
"Description" "Tri-allelic assay."}, | |
"ZCALL_DIFF" {"ID" "ZCALL_DIFF", | |
"Description" "ZCALL_DIFF"}}, | |
"String(PM)" "String(PM)|Stringency|13564|257", | |
"autocallGender" "F", | |
"zcallThresholds" "thresholds.7.txt", | |
"Extension(C)" "Extension(C)|Extension|1442|8646", | |
"FORMAT" {"NORMX" {"ID" "NORMX", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Normalized X intensity"}, | |
"LRR" {"ID" "LRR", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Log R Ratio"}, | |
"Y" {"ID" "Y", | |
"Number" "1", | |
"Type" "Integer", | |
"Description" "Raw Y intensity"}, | |
"R" {"ID" "R", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Normalized R value"}, | |
"IGC" {"ID" "IGC", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Illumina GenCall Confidence Score"}, | |
"NORMY" {"ID" "NORMY", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Normalized Y intensity"}, | |
"THETA" {"ID" "THETA", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Normalized Theta value"}, | |
"X" {"ID" "X", | |
"Number" "1", | |
"Type" "Integer", | |
"Description" "Raw X intensity"}, | |
"GTA" {"ID" "GTA", | |
"Number" "1", | |
"Type" "String", | |
"Description" "Illumina Autocall Genotype"}, | |
"GT" {"ID" "GT", | |
"Number" "1", | |
"Type" "String", | |
"Description" "Genotype"}, | |
"BAF" {"ID" "BAF", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "B Allele Frequency"}, | |
"GTZ" {"ID" "GTZ", | |
"Number" "1", | |
"Type" "String", | |
"Description" "zCall Genotype"}}, | |
"Hyb(Medium)" "Hyb(Medium)|Hybridization|517|5068", | |
"INFO" {"N_AA" {"ID" "N_AA", | |
"Number" "1", | |
"Type" "Integer", | |
"Description" "Number of AA calls in training set"}, | |
"devR_AB" {"ID" "devR_AB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Standard deviation of normalized R for AB cluster"}, | |
"meanX_AA" {"ID" "meanX_AA", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Mean of normalized X for AA cluster"}, | |
"devY_BB" {"ID" "devY_BB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Standard deviation of normalized Y for BB cluster"}, | |
"SOURCE" {"ID" "SOURCE", | |
"Number" "1", | |
"Type" "String", | |
"Description" "Probe source"}, | |
"meanY_AA" {"ID" "meanY_AA", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Mean of normalized Y for AA cluster"}, | |
"N_AB" {"ID" "N_AB", | |
"Number" "1", | |
"Type" "Integer", | |
"Description" "Number of AB calls in training set"}, | |
"ILLUMINA_STRAND" {"ID" "ILLUMINA_STRAND", | |
"Number" "1", | |
"Type" "String", | |
"Description" "Probe strand"}, | |
"ILLUMINA_CHR" {"ID" "ILLUMINA_CHR", | |
"Number" "1", | |
"Type" "String", | |
"Description" "Chromosome in Illumina manifest"}, | |
"devY_AA" {"ID" "devY_AA", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Standard deviation of normalized Y for AA cluster"}, | |
"devR_AA" {"ID" "devR_AA", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Standard deviation of normalized R for AA cluster"}, | |
"ILLUMINA_BUILD" {"ID" "ILLUMINA_BUILD", | |
"Number" "1", | |
"Type" "String", | |
"Description" "Genome Build in Illumina manifest"}, | |
"PROBE_B" {"ID" "PROBE_B", | |
"Number" "1", | |
"Type" "String", | |
"Description" "Probe base pair sequence; not missing for strand-ambiguous SNPs"}, | |
"meanTHETA_BB" {"ID" "meanTHETA_BB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Mean of normalized THETA for BB cluster"}, | |
"devTHETA_AB" {"ID" "devTHETA_AB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Standard deviation of normalized THETA for AB cluster"}, | |
"AC" {"ID" "AC", | |
"Number" "A", | |
"Type" "Integer", | |
"Description" "Allele count in genotypes, for each ALT allele, in the same order as listed"}, | |
"devY_AB" {"ID" "devY_AB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Standard deviation of normalized Y for AB cluster"}, | |
"devR_BB" {"ID" "devR_BB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Standard deviation of normalized R for BB cluster"}, | |
"meanR_AB" {"ID" "meanR_AB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Mean of normalized R for AB cluster"}, | |
"GC_SCORE" {"ID" "GC_SCORE", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Gentrain Score"}, | |
"AN" {"ID" "AN", | |
"Number" "1", | |
"Type" "Integer", | |
"Description" "Total number of alleles in called genotypes"}, | |
"PROBE_A" {"ID" "PROBE_A", | |
"Number" "1", | |
"Type" "String", | |
"Description" "Probe base pair sequence"}, | |
"AF" {"ID" "AF", | |
"Number" "A", | |
"Type" "Float", | |
"Description" "Allele Frequency, for each ALT allele, in the same order as listed"}, | |
"meanR_AA" {"ID" "meanR_AA", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Mean of normalized R for AA cluster"}, | |
"devTHETA_AA" {"ID" "devTHETA_AA", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Standard deviation of normalized THETA for AA cluster"}, | |
"devTHETA_BB" {"ID" "devTHETA_BB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Standard deviation of normalized THETA for BB cluster"}, | |
"devX_BB" {"ID" "devX_BB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Standard deviation of normalized X for BB cluster"}, | |
"meanTHETA_AA" {"ID" "meanTHETA_AA", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Mean of normalized THETA for AA cluster"}, | |
"meanY_BB" {"ID" "meanY_BB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Mean of normalized Y for BB cluster"}, | |
"meanY_AB" {"ID" "meanY_AB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Mean of normalized Y for AB cluster"}, | |
"meanTHETA_AB" {"ID" "meanTHETA_AB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Mean of normalized THETA for AB cluster"}, | |
"ALLELE_A" {"ID" "ALLELE_A", | |
"Number" "1", | |
"Type" "String", | |
"Description" "A allele"}, | |
"devX_AB" {"ID" "devX_AB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Standard deviation of normalized X for AB cluster"}, | |
"meanR_BB" {"ID" "meanR_BB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Mean of normalized R for BB cluster"}, | |
"devX_AA" {"ID" "devX_AA", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Standard deviation of normalized X for AA cluster"}, | |
"ILLUMINA_POS" {"ID" "ILLUMINA_POS", | |
"Number" "1", | |
"Type" "Integer", | |
"Description" "Position in Illumina manifest"}, | |
"ALLELE_B" {"ID" "ALLELE_B", | |
"Number" "1", | |
"Type" "String", | |
"Description" "B allele"}, | |
"refSNP" {"ID" "refSNP", | |
"Number" "1", | |
"Type" "String", | |
"Description" "dbSNP rs ID"}, | |
"zthresh_X" {"ID" "zthresh_X", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "zCall X threshold"}, | |
"meanX_AB" {"ID" "meanX_AB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Mean of normalized X for AB cluster"}, | |
"meanX_BB" {"ID" "meanX_BB", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "Mean of normalized X for BB cluster"}, | |
"N_BB" {"ID" "N_BB", | |
"Number" "1", | |
"Type" "Integer", | |
"Description" "Number of BB calls in training set"}, | |
"zthresh_Y" {"ID" "zthresh_Y", | |
"Number" "1", | |
"Type" "Float", | |
"Description" "zCall Y threshold"}, | |
"BEADSET_ID" {"ID" "BEADSET_ID", | |
"Number" "1", | |
"Type" "Integer", | |
"Description" "Bead set ID for normalization"}}, | |
"Hyb(High)" "Hyb(High)|Hybridization|2227|8113", | |
"sampleAlias" "NA12878", | |
"Biotin(Bgnd)" "Biotin(Bgnd)|Staining|456|292", | |
"imagingDate" "3/2/2017 4:24:38 PM", | |
"NP(A)" "NP(A)|Non-Polymorphic|7725|255", | |
"TargetRemoval" "TargetRemoval|TargetRemoval|1156|208"} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment