Skip to content

Instantly share code, notes, and snippets.

@smrgit
Created April 28, 2017 00:56
Show Gist options
  • Select an option

  • Save smrgit/ba519600b62e19d5244f885ebf98b237 to your computer and use it in GitHub Desktop.

Select an option

Save smrgit/ba519600b62e19d5244f885ebf98b237 to your computer and use it in GitHub Desktop.
Find radiology and diagnostic images for selected types of TCGA cases
WITH
cases AS (
SELECT
case_barcode
FROM
`isb-cgc.TCGA_bioclin_v0.Clinical`
WHERE
project_short_name="TCGA-LUSC"
OR project_short_name="TCGA-LUAD" ),
radImg AS (
SELECT
PatientID AS case_barcode,
BodyPartExamined,
PhotometricInterpretation,
SOPClassUID,
SeriesDate,
SeriesDescription,
ZipFileName AS zipGCSurl
FROM
`isb-cgc.metadata.TCGA_radiology_images`
WHERE
PatientID IN (
SELECT
case_barcode
FROM
cases )
GROUP BY
PatientID,
BodyPartExamined,
PhotometricInterpretation,
SOPClassUID,
SeriesDate,
SeriesDescription,
ZipFileName ),
svsImg AS (
SELECT
case_barcode,
sample_barcode,
slide_barcode,
GCSurl AS svsGCSurl
FROM
`isb-cgc.metadata.TCGA_slide_images`
WHERE
slide_id LIKE '%DX%'
AND case_barcode IN (
SELECT
DISTINCT case_barcode
FROM
radImg ) ),
tmpRad AS (
SELECT
case_barcode,
ARRAY_AGG(zipGCSurl) AS radImageList
FROM
radImg
GROUP BY
case_barcode ),
tmpDx AS (
SELECT
case_barcode,
ARRAY_AGG(svsGCSurl) AS dxImageList
FROM
svsImg
GROUP BY
case_barcode )
SELECT
a.case_barcode,
a.radImageList,
b.dxImageList
FROM
tmpRad a
JOIN
tmpDx b
ON
a.case_barcode=b.case_barcode
ORDER BY
case_barcode
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment