-
-
Save gpcr/4e4a2e89e0fa3c0d4b69 to your computer and use it in GitHub Desktop.
GEMINI Tutorial Commands
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# slide 4 | |
curl https://s3.amazonaws.com/gemini-tutorials/trio.trim.vep.vcf.gz > trio.trim.vep.vcf.gz | |
curl https://s3.amazonaws.com/gemini-tutorials/recessive.ped > dominant.ped | |
gemini load --cores 4 \ | |
-v trio.trim.vep.vcf.gz \ | |
-t VEP \ | |
--skip-gene-tables \ | |
-p dominant.ped \ | |
trio.trim.vep.dominant.db | |
# slide 5 | |
gemini autosomal_dominant --columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \ | |
trio.trim.vep.dominant.db \ | |
| head \ | |
| column -t | |
# slide 6 | |
gemini autosomal_dominant \ | |
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \ | |
trio.trim.vep.dominant.db \ | |
| wc -l | |
# slide 7 | |
gemini autosomal_dominant \ | |
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \ | |
--filter "filter is NULL" \ | |
trio.trim.vep.dominant.db \ | |
| wc -l | |
# slide 8 | |
gemini autosomal_dominant \ | |
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \ | |
--filter "filter is NULL and impact_severity != 'LOW'" \ | |
trio.trim.vep.dominant.db \ | |
| wc -l | |
# slide 9 | |
gemini autosomal_dominant \ | |
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \ | |
--filter "filter is NULL and impact_severity != 'LOW' and (aaf_esp_ea <= 0.01 or aaf_esp_ea is NULL) and (aaf_exac_all <= 0.01 or aaf_exac_all is NULL)" \ | |
trio.trim.vep.dominant.db \ | |
| wc -l | |
# slide 10 | |
gemini autosomal_dominant \ | |
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \ | |
--filter "filter is NULL and impact_severity == 'HIGH' and (aaf_esp_ea <= 0.01 or aaf_esp_ea is NULL) and (aaf_exac_all <= 0.01 or aaf_exac_all is NULL)" \ | |
trio.trim.vep.dominant.db \ | |
| wc -l | |
# slide 14 | |
gemini query \ | |
-q "SELECT chrom, start, end, ref, alt, gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--header \ | |
--gt-filter "gt_types.4805 == HET \ | |
and gt_types.1805 == HET \ | |
and gt_types.1847 == HOM_REF" \ | |
trio.trim.vep.dominant.db \ | |
| head \ | |
| column -t | |
# slide 16 | |
gemini query \ | |
-q "SELECT chrom, start, end, ref, alt, gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--header \ | |
--gt-filter "(gt_types).(phenotype==2).(==HET).(all) \ | |
and (gt_types).(phenotype==1).(==HOM_REF).(all)" \ | |
trio.trim.vep.dominant.db \ | |
| head \ | |
| column -t | |
# slide 17 | |
gemini query \ | |
-q "SELECT chrom, start, end, ref, alt, gene, impact, \ | |
(gts).(*), (gt_depths).(*) \ | |
FROM variants" \ | |
--header \ | |
--gt-filter "(gt_types).(phenotype==2).(==HET).(all) \ | |
and (gt_types).(phenotype==1).(==HOM_REF).(all) \ | |
and (gt_depths).(*).(>=20).(all)" \ | |
trio.trim.vep.dominant.db \ | |
| head \ | |
| column -t | |
# slide 18 | |
gemini query \ | |
-q "SELECT chrom, start, end, ref, alt, gene, impact, \ | |
(gts).(*), (gt_depths).(*) \ | |
FROM variants \ | |
WHERE filter is NULL and impact_severity == 'HIGH' | |
and (aaf_esp_ea <= 0.01 or aaf_esp_ea is NULL) | |
and (aaf_exac_all <= 0.01 or aaf_exac_all is NULL)" \ | |
--header \ | |
--gt-filter "(gt_types).(phenotype==2).(==HET).(all) \ | |
and (gt_types).(phenotype==1).(==HOM_REF).(all) \ | |
and (gt_depths).(*).(>=20).(all)" \ | |
trio.trim.vep.dominant.db \ | |
| column -t | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# slide 12 | |
curl https://s3.amazonaws.com/gemini-tutorials/trio.trim.vep.vcf.gz > trio.trim.vep.vcf.gz | |
curl https://s3.amazonaws.com/gemini-tutorials/recessive.ped > recessive.ped | |
gemini load --cores 4 \ | |
-v trio.trim.vep.vcf.gz \ | |
-t VEP \ | |
--skip-gene-tables \ | |
-p recessive.ped \ | |
trio.trim.vep.recessive.db | |
# slide 13 | |
gemini comp_hets trio.trim.vep.recessive.db | |
# slide 14 | |
gemini comp_hets --columns "chrom, start, end, gene, impact, cadd_raw" trio.trim.vep.recessive.db | |
# slide 16 | |
gemini comp_hets \ | |
--columns "chrom, start, end, gene, impact, cadd_raw" \ | |
trio.trim.vep.recessive.db \ | |
| awk '$14==1' \ | |
| head | |
gemini comp_hets \ | |
--columns "chrom, start, end, gene, impact, cadd_raw" \ | |
trio.trim.vep.recessive.db \ | |
| awk '$14==1' \ | |
| wc -l | |
# slide 17 | |
gemini comp_hets \ | |
--columns "chrom, start, end, gene, impact, cadd_raw" \ | |
--filter "impact_severity != 'LOW'" \ | |
trio.trim.vep.recessive.db \ | |
| awk '$14==1' \ | |
| wc -l | |
# slide 18 | |
gemini comp_hets \ | |
--columns "chrom, start, end, gene, impact, cadd_raw" \ | |
--filter "impact_severity != 'LOW' \ | |
and ((aaf_esp_ea <= 0.01 or aaf_esp_ea is NULL) \ | |
and (aaf_exac_all <= 0.01 or aaf_exac_all is NULL))" \ | |
trio.trim.vep.recessive.db \ | |
| awk '$14==1' \ | |
| wc -l | |
# slide 19 | |
gemini comp_hets \ | |
--columns "chrom, start, end, gene, impact, cadd_raw" \ | |
--filter "impact_severity != 'LOW' \ | |
and ((aaf_esp_ea <= 0.01 or aaf_esp_ea is NULL) \ | |
and (aaf_exac_all <= 0.01 or aaf_exac_all is NULL))" \ | |
trio.trim.vep.recessive.db \ | |
| awk '$14==1' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# slide 16 | |
curl https://s3.amazonaws.com/gemini-tutorials/trio.trim.vep.vcf.gz > trio.trim.vep.vcf.gz | |
curl https://s3.amazonaws.com/gemini-tutorials/denovo.ped > denovo.ped | |
gemini load --cores 4 \ | |
-v trio.trim.vep.vcf.gz \ | |
-t VEP \ | |
--tempdir . \ | |
--skip-gene-tables --skip-cadd --skip-gerp-bp \ | |
-p denovo.ped \ | |
trio.trim.vep.denovo.db | |
# slide 17 | |
gemini de_novo trio.trim.vep.denovo.db | |
# type Ctrl+C to stop output if you'd like (should take 20 seconds to complete) | |
# slide 19,20 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" trio.trim.vep.denovo.db | |
# slide 21 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" trio.trim.vep.denovo.db | wc -l | |
# slide 23 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \ | |
-d 15 \ | |
trio.trim.vep.denovo.db | wc -l | |
# slide 25 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \ | |
-d 15 \ | |
--filter "filter is NULL" \ | |
trio.trim.vep.denovo.db | wc -l | |
# slide 27 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \ | |
-d 15 \ | |
--filter "filter is NULL and impact_severity != 'LOW'" \ | |
trio.trim.vep.denovo.db | wc -l | |
# slide 29 | |
gemini de_novo \ | |
--columns "chrom, start, end, ref, alt, \ | |
filter, qual, gene, impact" \ | |
-d 15 \ | |
--filter "filter is NULL \ | |
and is_coding = 1 and impact_severity != 'LOW' \ | |
and (aaf_1kg_eur <= 0.005 or aaf_1kg_eur is NULL) \ | |
and (aaf_esp_ea <= 0.005 or aaf_esp_ea is NULL)" \ | |
trio.trim.vep.denovo.db | wc -l | |
# slide 30 | |
gemini de_novo \ | |
--columns "chrom, start, end, ref, alt, \ | |
filter, qual, gene, impact" \ | |
-d 15 \ | |
--filter "filter is NULL \ | |
and is_coding = 1 and impact_severity != 'LOW' \ | |
and (aaf_1kg_eur <= 0.005 or aaf_1kg_eur is NULL) \ | |
and (aaf_esp_ea <= 0.005 or aaf_esp_ea is NULL)" \ | |
trio.trim.vep.denovo.db |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# slide 7 | |
curl https://s3.amazonaws.com/gemini-tutorials/learnSQL.db > learnSQL.db | |
curl https://s3.amazonaws.com/gemini-tutorials/learnSQL2.db > learnSQL2.db | |
curl https://s3.amazonaws.com/gemini-tutorials/chr22.VEP.vcf > chr22.VEP.vcf | |
curl https://s3.amazonaws.com/gemini-tutorials/trio.ped > trio.ped | |
# slide 9 | |
gemini query -q "SELECT name FROM samples" learnSQL.db | |
# slide 10 | |
gemini query -q "SELECT name FROM samples WHERE phenotype == 2" learnSQL.db | |
# slide 11 | |
gemini query -q "SELECT name FROM samples WHERE phenotype <> 2" learnSQL.db | |
# slide 12 | |
gemini query -q "SELECT name FROM samples WHERE sample_id < 3" learnSQL.db | |
# slide 13 | |
gemini query -q "SELECT name FROM samples WHERE ethnicity IS NULL" learnSQL2.db | |
# slide 14 | |
gemini query -q "SELECT name FROM samples WHERE ethnicity IS NOT NULL" learnSQL2.db | |
# slide 15 | |
gemini query -q "SELECT * FROM fakevariants" learnSQL2.db | |
# slide 17 | |
gemini query -q "SELECT chrom,start,end FROM fakevariants | |
WHERE in_dbsnp == 1" learnSQL2.db | |
gemini query -q "SELECT chrom,start,end FROM fakevariants | |
WHERE in_dbsnp" learnSQL2.db | |
# slide 18 | |
gemini query -q "SELECT COUNT(*) FROM fakevariants | |
WHERE chrom == 'chr1' " learnSQL2.db | |
# slide 19 | |
gemini query -q "SELECT COUNT(*) FROM fakevariants | |
WHERE chrom == 'chr1' | |
AND in_dbsnp == 0 " learnSQL2.db | |
# slide 21 | |
#perl ~/software/variant_effect_predictor/variant_effect_predictor.pl -i chr22.vcf -o chr22.VEP.vcf --vcf \ | |
#--cache --dir ~/software/variant_effect_predictor/references \ | |
#--compress "gunzip -c" \ | |
#--force_overwrite \ | |
#--sift b --polyphen b --symbol --numbers --biotype --total_length \ | |
#--fields Consequence,Codons,Amino_acids,Gene,SYMBOL,Feature,EXON,PolyPhen,SIFT,Protein_position,BIOTYPE | |
# slide 25 | |
gemini load -v chr22.VEP.vcf \ | |
-p trio.ped \ | |
-t VEP \ | |
--cores 4 \ | |
--tempdir . \ | |
--no-bcolz \ | |
--skip-gene-tables \ | |
chr22.db | |
# slide 26 | |
gemini db_info chr22.db | |
# slide 27 | |
gemini query -q "SELECT name FROM samples" --header chr22.db | |
gemini query -q "SELECT * FROM samples" --header chr22.db | |
# slide 28 | |
gemini query -q "SELECT COUNT(*) \ | |
FROM variants \ | |
WHERE in_dbsnp == 0" --header chr22.db | |
gemini query -q "SELECT COUNT(*) \ | |
FROM variants \ | |
WHERE filter is NULL" --header chr22.db | |
# slide 29 | |
gemini query -q "SELECT * FROM variants WHERE | |
filter is NULL and gene = 'MLC1' " --header chr22.db | |
gemini query -q "SELECT rs_ids, aaf_esp_ea, impact, clinvar_disease_name, clinvar_sig | |
FROM variants | |
WHERE filter is NULL and gene = 'MLC1' " --header chr22.db | |
# slide 30 | |
gemini query -q "SELECT COUNT(*) from variants WHERE clinvar_disease_name is not NULL and aaf_esp_ea <= 0.01" \ | |
chr22.db | |
gemini query -q "SELECT gene from variants \ | |
WHERE clinvar_disease_name is not NULL and aaf_esp_ea <= 0.01" \ | |
chr22.db | |
# slide 32 | |
gemini query -q "SELECT * from variants" \ | |
--gt-filter "gt_types.1805 <> HOM_REF" \ | |
--header \ | |
chr22.db \ | |
| wc -l | |
gemini query -q "SELECT * from variants" \ | |
--gt-filter "(gt_types.1805 <> HOM_REF and \ | |
gt_types.4805 <> HOM_REF)" \ | |
chr22.db \ | |
| wc -l | |
gemini query -q "SELECT gts.1805, gts.4805 from variants" \ | |
--gt-filter "(gt_types.1805 <> HOM_REF and \ | |
gt_types.4805 <> HOM_REF)" \ | |
chr22.db | |
# slide 33 - wildcards | |
gemini query -q "SELECT chrom, start, end, ref, alt, \ | |
gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--gt-filter "(gt_types).(*).(==HET).(all)" \ | |
--header \ | |
chr22.db | |
gemini query -q "SELECT chrom, start, end, ref, alt, \ | |
gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--gt-filter "(gt_types).(sex==2).(==HOM_REF).(all)" \ | |
--header \ | |
chr22.db | |
# slide 34 - the "any" wildcard | |
gemini query -q "SELECT chrom, start, end, ref, alt, \ | |
gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--gt-filter "(gt_types).(sex==2).(!=HOM_REF).(any)" \ | |
--header \ | |
chr22.db | |
# slide 35 - the "none" wildcard | |
gemini query -q "SELECT chrom, start, end, ref, alt, \ | |
gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--gt-filter "(gt_types).(sex==2).(==HOM_REF).(none)" \ | |
--header \ | |
chr22.db | |
# slide 36 - the "count" wildcard | |
gemini query -q "SELECT chrom, start, end, ref, alt, \ | |
gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--gt-filter "(gt_types).(*).(==UNKNOWN).(count >= 2)" \ | |
--header \ | |
chr22.db | |
# slide 38 - wildcards are general to all genotype columns | |
gemini query -q "SELECT chrom, start, end, ref, alt, \ | |
gene, impact, (gts).(*), (gt_depths).(*) \ | |
FROM variants" \ | |
--gt-filter "(gt_depths).(*).(>=50).(all)" \ | |
--header \ | |
chr22.db | |
# slide 39 | |
gemini stats --gts-by-sample chr22.db | column -t | |
gemini stats --tstv chr22.db | column -t | |
# slide 40 | |
gemini stats --summarize \ | |
"SELECT * from variants WHERE in_dbsnp = 0" \ | |
chr22.db | column -t | |
gemini stats --summarize \ | |
"SELECT * from variants WHERE in_dbsnp = 1" \ | |
chr22.db | column -t | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment