Skip to content

Instantly share code, notes, and snippets.

@smrgit
smrgit / find_normal_bams_in_gcs.sql
Created September 27, 2017 22:36
join several different metadata tables together to find normal BAMs in GCS, combined with their CGHub and/or GDC file identifiers (where available)
WITH
--
-- first, we grab information about the ~48650 TCGA DNA bam files that were at CGHub
-- (and are aligned to GRCh37/HG19 and were classified as "Live")
dbGaPinfo AS (
SELECT
SUBSTR(barcode,1,12) AS case_barcode,
SUBSTR(barcode,1,16) AS sample_barcode,
barcode AS aliquot_barcode,
SUBSTR(barcode,14,2) AS sample_type,
@smrgit
smrgit / join_CGHub_GDC_legacy_DNA_bams_info.sql
Created September 28, 2017 11:44
join CGHub and GDC legacy information for TCGA DNA bams
WITH
--
-- first, we grab information about the ~48650 TCGA DNA bam files that were at CGHub
-- (and are aligned to GRCh37/HG19 and were classified as "Live")
CGHubInfo AS (
SELECT
SUBSTR(barcode,1,12) AS case_barcode,
SUBSTR(barcode,1,16) AS sample_barcode,
barcode AS aliquot_barcode,
SUBSTR(barcode,14,2) AS sample_type,
@smrgit
smrgit / corr_TCGA_GTEx.sql
Created November 28, 2017 23:20
Spearman correlation between TCGA expression data and GTEx median tissue-type TPM
WITH
--
-- get the top-5000 most variable genes based on computing the standard
-- deviation on the GTEx "gene_median_tpm" table
GTEx_top5K AS (
SELECT
gene_id,
gene_description,
STDDEV(gene_exp) AS sigmaExp
FROM
@smrgit
smrgit / gist:cbba2e057211f3a05d74b0eac0b9fd5f
Created May 24, 2018 15:58
PanCancer Atlas publications -- open-access supplementary files at GDC
006ce014-e113-4d61-a944-bcc03661a84a
00a32f7a-c85f-4f86-850d-be53973cbc4d
04559f49-5066-4984-a9d0-27c27ce20458
0511725d-7522-4abc-8f07-172bba90fd13
06a124df-fa5b-4f2d-8bfa-0e73b685f222
06ba0f08-fa9b-41eb-a74d-a9225f14d47f
08096f8f-7b56-495a-be45-62d5a56f2ee8
081f7161-ae17-434d-b8e5-a3047852cb10
09f59368-470e-4311-a7b7-f665886620e4
0e526660-14cb-4742-a49b-11fe47f2f93c
@smrgit
smrgit / gist:c170b5659d095a8e01455a358c0b70ee
Created May 24, 2018 16:00
PanCancer Atlas publications -- controlled-access supplementary files at GDC
0e1bc6-22f0-424b-8a21-305c08af22a7
12003a-6135-4869-852c-a451d45bf39f
1950b4-6695-49a5-a2b1-d0c2e14b8448
19a1e4-f6b9-48bd-b1f7-160f8e720f1e
1c4fee-3e91-495d-8b5e-036c1639bdc8
1fa5dc-5e86-47ee-8eb4-695cb48169b1
24ec2c-dd64-47f9-a44d-c88a317fd88f
258859-604a-4b5c-9227-0f75089eab68
2b862d-da2b-4d51-94e3-a2c7a52ac3fb
2e54aa-4cc0-4cbd-9647-0e039d1e3da1
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.