Last active
November 13, 2019 17:55
-
-
Save arq5x/9e1928638397ba45da2e to your computer and use it in GitHub Desktop.
GEMINI Tutorial Commands
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# assumes you have SSH'ed and qlogin'ed | |
cd thu | |
cd mydata | |
# slide 5 | |
curl https://s3.amazonaws.com/gemini-tutorials/trio.trim.vep.vcf.gz > trio.trim.vep.vcf.gz | |
curl https://s3.amazonaws.com/gemini-tutorials/dominant.ped > dominant.ped | |
gemini load --cores 2 \ | |
-v trio.trim.vep.vcf.gz \ | |
-t VEP \ | |
--tempdir . \ | |
--skip-gene-tables \ | |
-p dominant.ped \ | |
trio.trim.vep.dominant.db | |
# slide 6 | |
gemini autosomal_dominant --columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \ | |
trio.trim.vep.dominant.db \ | |
| head \ | |
| column -t | |
# slide 7 | |
gemini autosomal_dominant \ | |
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \ | |
trio.trim.vep.dominant.db \ | |
| wc -l | |
# slide 8 | |
gemini autosomal_dominant \ | |
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \ | |
--filter "(filter is NULL or filter=='SBFilter')" \ | |
trio.trim.vep.dominant.db \ | |
| wc -l | |
# slide 9 | |
gemini autosomal_dominant \ | |
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \ | |
--filter "(filter is NULL or filter=='SBFilter') \ | |
and impact_severity != 'LOW'" \ | |
trio.trim.vep.dominant.db \ | |
| wc -l | |
# slide 10 | |
gemini autosomal_dominant \ | |
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \ | |
--filter "(filter is NULL or filter=='SBFilter') \ | |
and impact_severity != 'LOW' \ | |
and max_aaf_all < 0.005" \ | |
trio.trim.vep.dominant.db \ | |
| wc -l | |
# slide 11 | |
gemini autosomal_dominant \ | |
--columns "chrom, start, end, ref, alt, gene, impact, cadd_raw" \ | |
--filter "(filter is NULL or filter=='SBFilter') \ | |
and impact_severity == 'HIGH' \ | |
and max_aaf_all < 0.005" \ | |
trio.trim.vep.dominant.db \ | |
| wc -l | |
# slide 14 | |
gemini query \ | |
-q "SELECT chrom, start, end, ref, alt, gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--header \ | |
--gt-filter "gt_types.4805 == HET \ | |
and gt_types.1805 == HET \ | |
and gt_types.1847 == HOM_REF" \ | |
trio.trim.vep.dominant.db \ | |
| head \ | |
| column -t | |
# slide 16 | |
gemini query \ | |
-q "SELECT chrom, start, end, ref, alt, gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--header \ | |
--gt-filter "(gt_types).(phenotype==2).(==HET).(all) \ | |
and (gt_types).(phenotype==1).(==HOM_REF).(all)" \ | |
trio.trim.vep.dominant.db \ | |
| head \ | |
| column -t | |
# slide 17 | |
gemini query \ | |
-q "SELECT chrom, start, end, ref, alt, gene, impact, \ | |
(gts).(*), (gt_depths).(*) \ | |
FROM variants" \ | |
--header \ | |
--gt-filter "(gt_types).(phenotype==2).(==HET).(all) \ | |
and (gt_types).(phenotype==1).(==HOM_REF).(all) \ | |
and (gt_depths).(*).(>=20).(all)" \ | |
trio.trim.vep.dominant.db \ | |
| head \ | |
| column -t | |
# slide 18 | |
gemini query \ | |
-q "SELECT chrom, start, end, ref, alt, gene, impact, \ | |
(gts).(*), (gt_depths).(*) \ | |
FROM variants \ | |
WHERE (filter is NULL or filter=='SBFilter') \ | |
and impact_severity == 'HIGH' \ | |
and max_aaf_all < 0.005" \ | |
--header \ | |
--gt-filter "(gt_types).(phenotype==2).(==HET).(all) \ | |
and (gt_types).(phenotype==1).(==HOM_REF).(all) \ | |
and (gt_depths).(*).(>=20).(all)" \ | |
trio.trim.vep.dominant.db \ | |
| column -t | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# assumes you have SSH'ed and qlogin'ed | |
# slide 12 | |
cd thu | |
mkdir mydata | |
cd mydata | |
# slide 10 | |
curl https://s3.amazonaws.com/gemini-tutorials/trio.trim.vep.vcf.gz > trio.trim.vep.vcf.gz | |
curl https://s3.amazonaws.com/gemini-tutorials/recessive.ped > recessive.ped | |
gemini load --cores 2 \ | |
-v trio.trim.vep.vcf.gz \ | |
-t VEP \ | |
--tempdir . \ | |
--skip-gene-tables \ | |
-p recessive.ped \ | |
trio.trim.vep.recessive.db | |
# slide 11 | |
gemini comp_hets trio.trim.vep.recessive.db | |
# slide 12 | |
gemini comp_hets --columns "chrom, start, end, gene, impact, cadd_raw" trio.trim.vep.recessive.db | |
# slide 13 | |
gemini comp_hets \ | |
--columns "chrom, start, end, gene, impact, cadd_raw" \ | |
trio.trim.vep.recessive.db \ | |
| head | |
gemini comp_hets \ | |
--columns "chrom, start, end, gene, impact, cadd_raw" \ | |
trio.trim.vep.recessive.db \ | |
| wc -l | |
# slide 14 | |
gemini comp_hets \ | |
--columns "chrom, start, end, gene, impact, cadd_raw" \ | |
--filter "(filter is NULL or filter=='SBFilter') \ | |
and impact_severity != 'LOW'" \ | |
trio.trim.vep.recessive.db \ | |
| wc -l | |
# slide 15 | |
gemini comp_hets \ | |
--columns "chrom, start, end, gene, impact, cadd_raw" \ | |
--filter "(filter is NULL or filter=='SBFilter') \ | |
and impact_severity != 'LOW' \ | |
and max_aaf_all < 0.005" \ | |
trio.trim.vep.recessive.db \ | |
| wc -l | |
# slide 16 | |
gemini comp_hets \ | |
--columns "chrom, start, end, gene, impact, cadd_raw" \ | |
--filter "(filter is NULL or filter=='SBFilter') \ | |
and impact_severity != 'LOW' \ | |
and max_aaf_all < 0.005" \ | |
trio.trim.vep.recessive.db \ | |
# slide 17 | |
gemini comp_hets \ | |
--columns "chrom, start, end, gene, impact, cadd_raw" \ | |
--filter "(filter is NULL or filter=='SBFilter') \ | |
and impact_severity != 'LOW' \ | |
and max_aaf_all < 0.005" \ | |
-d 6 \ | |
--min-gq 20 \ | |
trio.trim.vep.recessive.db |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# assumes you have SSH'ed and qlogin'ed | |
cd wed | |
cd mydata | |
# slide 17 | |
# curl https://s3.amazonaws.com/gemini-tutorials/trio.trim.vep.vcf.gz > trio.trim.vep.vcf.gz | |
# curl https://s3.amazonaws.com/gemini-tutorials/denovo.ped > denovo.ped | |
# gemini load --cores 2 \ | |
# -v trio.trim.vep.vcf.gz \ | |
# -t VEP \ | |
# --tempdir . \ | |
# --skip-gene-tables --skip-cadd --skip-gerp-bp \ | |
# -p denovo.ped \ | |
# trio.trim.vep.denovo.db | |
curl http://home.chpc.utah.edu/~u1138933/gemini_db/trio.trim.vep.denovo.db > trio.trim.vep.denovo.db | |
# slide 19 | |
gemini de_novo trio.trim.vep.denovo.db | |
# type Ctrl+C to stop output if you'd like (should take 20 seconds to complete) | |
# slide 21 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" trio.trim.vep.denovo.db | |
# slide 23 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" trio.trim.vep.denovo.db | wc -l | |
# slide 25 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \ | |
-d 6 \ | |
trio.trim.vep.denovo.db | wc -l | |
# slide 26 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \ | |
-d 6 \ | |
--min-gq 20 \ | |
trio.trim.vep.denovo.db | wc -l | |
# slide 28 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \ | |
-d 6 \ | |
--min-gq 20 \ | |
--filter "filter is NULL" \ | |
trio.trim.vep.denovo.db | wc -l | |
# slide 30 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \ | |
-d 6 \ | |
--min-gq 20 \ | |
--filter "(filter is NULL or filter=='SBFilter')" \ | |
trio.trim.vep.denovo.db | wc -l | |
# slide 32 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \ | |
-d 6 \ | |
--min-gq 20 \ | |
--filter "(filter is NULL or filter=='SBFilter') and impact_severity != 'LOW'" \ | |
trio.trim.vep.denovo.db | wc -l | |
# slide 35 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \ | |
-d 6 \ | |
--min-gq 20 \ | |
--filter "(filter is NULL or filter=='SBFilter') and impact_severity != 'LOW' and max_aaf_all <= 0.005" \ | |
trio.trim.vep.denovo.db | wc -l | |
# slide 35 | |
gemini de_novo --columns "chrom, start, end, ref, alt, filter, qual, gene, impact" \ | |
-d 6 \ | |
--min-gq 20 \ | |
--filter "(filter is NULL or filter=='SBFilter') and impact_severity != 'LOW' and max_aaf_all <= 0.005" \ | |
trio.trim.vep.denovo.db |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# slide 7 | |
mkdir wed | |
cd wed | |
mkdir mydata | |
cd mydata | |
curl https://s3.amazonaws.com/gemini-tutorials/learnSQL.db > learnSQL.db | |
curl https://s3.amazonaws.com/gemini-tutorials/learnSQL2.db > learnSQL2.db | |
curl https://s3.amazonaws.com/gemini-tutorials/chr22.VEP.vcf > chr22.VEP.vcf | |
curl https://s3.amazonaws.com/gemini-tutorials/trio.ped > trio.ped | |
# slide 9 | |
gemini query -q "SELECT name FROM samples" learnSQL.db | |
# slide 10 | |
gemini query -q "SELECT name FROM samples WHERE phenotype == 2" learnSQL.db | |
# slide 11 | |
gemini query -q "SELECT name FROM samples WHERE phenotype <> 2" learnSQL.db | |
# slide 12 | |
gemini query -q "SELECT name FROM samples WHERE sample_id < 3" learnSQL.db | |
# slide 13 | |
gemini query -q "SELECT name FROM samples WHERE ethnicity IS NULL" learnSQL2.db | |
# slide 14 | |
gemini query -q "SELECT name FROM samples WHERE ethnicity IS NOT NULL" learnSQL2.db | |
# slide 15 | |
gemini query -q "SELECT * FROM fakevariants" learnSQL2.db | |
# slide 17 | |
gemini query -q "SELECT chrom,start,end FROM fakevariants | |
WHERE in_dbsnp == 1" learnSQL2.db | |
gemini query -q "SELECT chrom,start,end FROM fakevariants | |
WHERE in_dbsnp" learnSQL2.db | |
# slide 18 | |
gemini query -q "SELECT COUNT(*) FROM fakevariants | |
WHERE chrom == 'chr1' " learnSQL2.db | |
# slide 19 | |
gemini query -q "SELECT COUNT(*) FROM fakevariants | |
WHERE chrom == 'chr1' | |
AND in_dbsnp == 0 " learnSQL2.db | |
# slide 21 | |
#perl ~/software/variant_effect_predictor/variant_effect_predictor.pl -i chr22.vcf -o chr22.VEP.vcf --vcf \ | |
#--cache --dir ~/software/variant_effect_predictor/references \ | |
#--compress "gunzip -c" \ | |
#--force_overwrite \ | |
#--sift b --polyphen b --symbol --numbers --biotype --total_length \ | |
#--fields Consequence,Codons,Amino_acids,Gene,SYMBOL,Feature,EXON,PolyPhen,SIFT,Protein_position,BIOTYPE | |
# slide 25 | |
gemini load -v chr22.VEP.vcf \ | |
-p trio.ped \ | |
-t VEP \ | |
--cores 2 \ | |
--tempdir . \ | |
--skip-gene-tables \ | |
chr22.db | |
# slide 26 | |
gemini db_info chr22.db | |
# slide 27 | |
gemini query -q "SELECT name FROM samples" --header chr22.db | |
gemini query -q "SELECT * FROM samples" --header chr22.db | |
# slide 28 | |
gemini query -q "SELECT COUNT(*) \ | |
FROM variants \ | |
WHERE in_dbsnp == 0" --header chr22.db | |
gemini query -q "SELECT COUNT(*) \ | |
FROM variants \ | |
WHERE filter is NULL" --header chr22.db | |
# slide 29 | |
gemini query -q "SELECT * FROM variants WHERE | |
filter is NULL and gene = 'MLC1' " --header chr22.db | |
gemini query -q "SELECT rs_ids, aaf_esp_ea, impact, clinvar_disease_name, clinvar_sig | |
FROM variants | |
WHERE filter is NULL and gene = 'MLC1' " --header chr22.db | |
# slide 30 | |
gemini query -q "SELECT COUNT(*) from variants WHERE clinvar_disease_name is not NULL and aaf_esp_ea <= 0.01" \ | |
chr22.db | |
gemini query -q "SELECT gene from variants \ | |
WHERE clinvar_disease_name is not NULL and aaf_esp_ea <= 0.01" \ | |
chr22.db | |
# slide 32 | |
gemini query -q "SELECT * from variants" \ | |
--gt-filter "gt_types.1805 <> HOM_REF" \ | |
--header \ | |
chr22.db \ | |
| wc -l | |
gemini query -q "SELECT * from variants" \ | |
--gt-filter "(gt_types.1805 <> HOM_REF and \ | |
gt_types.4805 <> HOM_REF)" \ | |
chr22.db \ | |
| wc -l | |
gemini query -q "SELECT gts.1805, gts.4805 from variants" \ | |
--gt-filter "(gt_types.1805 <> HOM_REF and \ | |
gt_types.4805 <> HOM_REF)" \ | |
chr22.db | |
# slide 33 - wildcards | |
gemini query -q "SELECT chrom, start, end, ref, alt, \ | |
gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--gt-filter "(gt_types).(*).(==HET).(all)" \ | |
--header \ | |
chr22.db | |
gemini query -q "SELECT chrom, start, end, ref, alt, \ | |
gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--gt-filter "(gt_types).(sex==2).(==HOM_REF).(all)" \ | |
--header \ | |
chr22.db | |
# slide 34 - the "any" wildcard | |
gemini query -q "SELECT chrom, start, end, ref, alt, \ | |
gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--gt-filter "(gt_types).(sex==2).(!=HOM_REF).(any)" \ | |
--header \ | |
chr22.db | |
# slide 35 - the "none" wildcard | |
gemini query -q "SELECT chrom, start, end, ref, alt, \ | |
gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--gt-filter "(gt_types).(sex==2).(==HOM_REF).(none)" \ | |
--header \ | |
chr22.db | |
# slide 36 - the "count" wildcard | |
gemini query -q "SELECT chrom, start, end, ref, alt, \ | |
gene, impact, (gts).(*) \ | |
FROM variants" \ | |
--gt-filter "(gt_types).(*).(==UNKNOWN).(count >= 2)" \ | |
--header \ | |
chr22.db | |
# slide 38 - wildcards are general to all genotype columns | |
gemini query -q "SELECT chrom, start, end, ref, alt, \ | |
gene, impact, (gts).(*), (gt_depths).(*) \ | |
FROM variants" \ | |
--gt-filter "(gt_depths).(*).(>=50).(all)" \ | |
--header \ | |
chr22.db | |
# slide 39 | |
gemini stats --gts-by-sample chr22.db | column -t | |
gemini stats --tstv chr22.db | column -t | |
# slide 40 | |
gemini stats --summarize \ | |
"SELECT * from variants WHERE in_dbsnp = 0" \ | |
chr22.db | column -t | |
gemini stats --summarize \ | |
"SELECT * from variants WHERE in_dbsnp = 1" \ | |
chr22.db | column -t | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment