I hereby claim:
- I am ccwang002 on github.
- I am liang2 (https://keybase.io/liang2) on keybase.
- I have a public key ASA_ZHu4l91A5bKbiWrZkL-zyJD9mvEUNQpsaW3LjmvcqQo
To claim this, I am signing this object:
| import numpy as np | |
| rs = np.random.RandomState(seed=5566) | |
| n_conditions = 10 | |
| # Here we simulate a complex computation, for example, analogy of the magnitude | |
| # of gradient decent which expects to be strictly positive. But from the result | |
| # we find that it seems to be sometimes negative, we wish to find out when and | |
| # what condition our program produces bogus ouput. | |
| # | |
| # This is the case to use pdb and condition break point |
| from datetime import datetime | |
| from pytz import timezone # pip install pytz | |
| # Setup remote time | |
| remote_tz = timezone('US/Pacific') # PST for example | |
| remote_dt = remote_tz.localize(datetime(2015, 5, 1, 14, 0)) # May 1, 2015 PM2:00 PST | |
| # Setup Taipei local time | |
| tpe = timezone('Asia/Taipei') |
I hereby claim:
To claim this, I am signing this object:
| import pandas as pd | |
| import os | |
| from pathlib import Path | |
| # Export Zotero library as CSV | |
| ZOTERO_LIBRARY_PTH = '/Users/liang/Desktop/My Library.csv' | |
| REFERENCES_ROOT = Path('/Users/liang/Dropbox/References/') | |
| df = pd.read_csv(ZOTERO_LIBRARY_PTH) |
| # The Snakefile that loads raw data and genome reference locally | |
| GENOME_FA = "griffithlab_brain_vs_uhr/GRCh38_Ens87_chr22_ERCC/chr22_ERCC92.fa" | |
| GENOME_GTF = "griffithlab_brain_vs_uhr/GRCh38_Ens87_chr22_ERCC/genes_chr22_ERCC92.gtf" | |
| HISAT2_INDEX_PREFIX = "hisat2_index/chr22_ERCC92" | |
| SAMPLES, *_ = glob_wildcards('griffithlab_brain_vs_uhr/HBR_UHR_ERCC_ds_10pc/{sample}.read1.fastq.gz') | |
| from pathlib import Path | |
| from pathlib import Path | |
| from snakemake.remote.GS import RemoteProvider as GSRemoteProvider | |
| GS = GSRemoteProvider() | |
| GS_PREFIX = "lbwang-playground/snakemake_rnaseq" | |
| GENOME_FA = GS.remote(f"{GS_PREFIX}/griffithlab_brain_vs_uhr/GRCh38_Ens87_chr22_ERCC/chr22_ERCC92.fa") | |
| GENOME_GTF = GS.remote(f"{GS_PREFIX}/griffithlab_brain_vs_uhr/GRCh38_Ens87_chr22_ERCC/genes_chr22_ERCC92.gtf") | |
| HISAT2_INDEX_PREFIX = "hisat2_index/chr22_ERCC92" | |
| FULL_HISAT2_INDEX_PREFIX = "dinglab/lbwang/snakemake_demo/hisat2_index/chr22_ERCC92" |
| from itertools import combinations, product | |
| def gen_pos_sets_to_sub(barcode, max_sub=1): | |
| """ | |
| Generate all the possible position combinations (sets) within | |
| the given maximal number of substitutions. | |
| Examples: |
| Gene | Cancer | Tumor suppressor or oncogene prediction (by 20/20+) | Decision | Tissue Frequency | Pancan Frequency | Consensus Score | Correlation adusted score | Novel | Rescue Notes | Note about previous publication | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| ARID1A | CHOL | official | 11.76% | 6.69% | 2.5 | 1.80 | 0 | Found in 28297679 | |||
| BAP1 | CHOL | tsg | official | 17.65% | 2.14% | 3.5 | 2.80 | 0 | Found in 28297679 | ||
| EPHA2 | CHOL | tsg | official | 11.76% | 1.58% | 2.5 | 2.50 | 0 | 0 | ||
| IDH1 | CHOL | oncogene | official | 14.71% | 5.56% | 4.5 | 3.80 | 0 | Found in 28297679 | ||
| PBRM1 | CHOL | tsg | official | 17.65% | 3.73% | 3.5 | 2.32 | 0 | 0 |
| """Find the possible Ensembl releases of the given IDs. | |
| The script uses Ensembl Tark APIs to subset the possible Ensembl releases | |
| that cover all the given Ensembl IDs. Usually it can pinpoint the right release | |
| using less than 30 IDs. Feeding more IDs may exceed the API call rate limit. | |
| Known issues: | |
| * The API doesn't handle ENSGR (chrY PAR genes) | |
| """ | |
| import argparse |