Created
April 13, 2018 19:06
-
-
Save acthp/4fc9549dfd97dff261f8606c3a813867 to your computer and use it in GitHub Desktop.
fetch gene expression and mutation from Pancan Atlas cohort
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xenaPython as xena | |
host = xena.PUBLIC_HUBS["pancanAtlasHub"] | |
cohort ="TCGA PanCanAtlas" | |
samples = xena.cohort_samples(host, cohort, None) | |
genes = ["FOXM1", "TP53"] | |
expression_dataset = "EB++AdjustPANCAN_IlluminaHiSeq_RNASeqV2.geneExp.xena" | |
[foxm1_expresssion_by_sample, tp53_expression_by_sample] = xena.dataset_fetch(host, expression_dataset, samples, genes) | |
mutation_dataset = "mc3.v0.2.8.PUBLIC.xena" | |
mutations = xena.sparse_data(host, mutation_dataset, samples, genes) | |
# mutation data is returned in column orientation, i.e. ['rows'] is a dict of | |
# arrays, one per column, each having length N, where N is the length of the | |
# result set. | |
all_rows = range(len(mutations['rows']['sampleID'])) | |
foxm1_rows = [i for i in all_rows if mutations['rows']['genes'][i][0] == 'FOXM1'] | |
tp53_rows = [i for i in all_rows if mutations['rows']['genes'][i][0] == 'TP53'] | |
phenotype_dataset = "Survival_SupplementalTable_S1_20171025_xena_sp" | |
subtype_field = "cancer type abbreviation" | |
cancer_subtype = xena.dataset_fetch(host, phenotype_dataset, samples, [subtype_field])[0] | |
codes = xena.field_codes(host, phenotype_dataset, [subtype_field])[0]['code'].split('\t') | |
cancer_subtype_by_sample = [None if v == 'NaN' else codes[int(v)] for v in cancer_subtype] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment