Last active
April 14, 2017 17:44
-
-
Save smrgit/b578430edec4e8a2d3fbfee607d656ab to your computer and use it in GitHub Desktop.
GQ GDC hg19 vs hg38 miRNA correlation analysis -- at the stemloop expression level
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| WITH | |
| hg38_d1 AS ( | |
| -- we start with a table at the aliquot level, in case there are multiple aliquots | |
| -- for a single sample; | |
| SELECT | |
| sample_barcode, | |
| aliquot_barcode, | |
| mirna_id, | |
| reads_per_million_miRNA_mapped AS RPM | |
| FROM | |
| `isb-cgc.TCGA_hg38_data_v0.miRNAseq_Expression` ), | |
| hg38_d2 AS ( | |
| -- next we do a MAX() over the RPM values from the aliquots and just | |
| -- keep the sample_barcode; and we're also going to do a LOG() | |
| -- AND we are going to toss OUT (sample,mirna) pairs with ZERO reads | |
| SELECT | |
| sample_barcode, | |
| mirna_id, | |
| LOG(MAX(RPM)) AS logRPM | |
| FROM | |
| hg38_d1 | |
| WHERE | |
| RPM>0 | |
| GROUP BY | |
| sample_barcode, | |
| mirna_id ), | |
| hg19_d1 AS ( | |
| -- now we're going to do the same with the hg19 data ... | |
| SELECT | |
| sample_barcode, | |
| aliquot_barcode, | |
| mirna_id, | |
| reads_per_million_miRNA_mapped AS RPM | |
| FROM | |
| `isb-cgc.TCGA_hg19_data_v0.miRNAseq_Expression` ), | |
| hg19_d2 AS ( | |
| SELECT | |
| sample_barcode, | |
| mirna_id, | |
| LOG(MAX(RPM)) AS logRPM | |
| FROM | |
| hg19_d1 | |
| WHERE | |
| RPM>0 | |
| GROUP BY | |
| sample_barcode, | |
| mirna_id ), | |
| j1 AS ( | |
| SELECT | |
| a.logRPM AS hg19_logRPM, | |
| b.logRPM AS hg38_logRPM, | |
| b.sample_barcode AS sample_barcode, | |
| b.mirna_id AS mirna_id | |
| FROM | |
| hg19_d2 a | |
| JOIN | |
| hg38_d2 b | |
| ON | |
| a.sample_barcode=b.sample_barcode | |
| AND a.mirna_id=b.mirna_id ), | |
| ct AS ( | |
| SELECT | |
| mirna_id, | |
| CORR(hg19_logRPM, | |
| hg38_logRPM) AS corr, | |
| COUNT(*) AS n | |
| FROM | |
| j1 | |
| GROUP BY | |
| mirna_id | |
| HAVING | |
| n >= 1000 ) | |
| SELECT | |
| * | |
| FROM | |
| ct | |
| ORDER BY | |
| corr DESC |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
updated to use new ISB-CGC tables