Liang-Bo Wang ccwang002

I hereby claim:

To claim this, I am signing this object:

Gene	Cancer	Tumor suppressor or oncogene prediction (by 20/20+)	Decision	Tissue Frequency	Pancan Frequency	Consensus Score	Correlation adusted score	Novel	Rescue Notes	Note about previous publication
ARID1A	CHOL		official	11.76%	6.69%	2.5	1.80	0		Found in 28297679
BAP1	CHOL	tsg	official	17.65%	2.14%	3.5	2.80	0		Found in 28297679
EPHA2	CHOL	tsg	official	11.76%	1.58%	2.5	2.50	0		0
IDH1	CHOL	oncogene	official	14.71%	5.56%	4.5	3.80	0		Found in 28297679
PBRM1	CHOL	tsg	official	17.65%	3.73%	3.5	2.32	0		0

	import numpy as np
	rs = np.random.RandomState(seed=5566)
	n_conditions = 10

	# Here we simulate a complex computation, for example, analogy of the magnitude
	# of gradient decent which expects to be strictly positive. But from the result
	# we find that it seems to be sometimes negative, we wish to find out when and
	# what condition our program produces bogus ouput.
	#
	# This is the case to use pdb and condition break point

	from datetime import datetime
	from pytz import timezone # pip install pytz


	# Setup remote time
	remote_tz = timezone('US/Pacific') # PST for example
	remote_dt = remote_tz.localize(datetime(2015, 5, 1, 14, 0)) # May 1, 2015 PM2:00 PST

	# Setup Taipei local time
	tpe = timezone('Asia/Taipei')

	import pandas as pd
	import os
	from pathlib import Path

	# Export Zotero library as CSV
	ZOTERO_LIBRARY_PTH = '/Users/liang/Desktop/My Library.csv'
	REFERENCES_ROOT = Path('/Users/liang/Dropbox/References/')


	df = pd.read_csv(ZOTERO_LIBRARY_PTH)

	# The Snakefile that loads raw data and genome reference locally
	GENOME_FA = "griffithlab_brain_vs_uhr/GRCh38_Ens87_chr22_ERCC/chr22_ERCC92.fa"
	GENOME_GTF = "griffithlab_brain_vs_uhr/GRCh38_Ens87_chr22_ERCC/genes_chr22_ERCC92.gtf"
	HISAT2_INDEX_PREFIX = "hisat2_index/chr22_ERCC92"

	SAMPLES, *_ = glob_wildcards('griffithlab_brain_vs_uhr/HBR_UHR_ERCC_ds_10pc/{sample}.read1.fastq.gz')

	from pathlib import Path

	from pathlib import Path
	from snakemake.remote.GS import RemoteProvider as GSRemoteProvider
	GS = GSRemoteProvider()


	GS_PREFIX = "lbwang-playground/snakemake_rnaseq"
	GENOME_FA = GS.remote(f"{GS_PREFIX}/griffithlab_brain_vs_uhr/GRCh38_Ens87_chr22_ERCC/chr22_ERCC92.fa")
	GENOME_GTF = GS.remote(f"{GS_PREFIX}/griffithlab_brain_vs_uhr/GRCh38_Ens87_chr22_ERCC/genes_chr22_ERCC92.gtf")
	HISAT2_INDEX_PREFIX = "hisat2_index/chr22_ERCC92"
	FULL_HISAT2_INDEX_PREFIX = "dinglab/lbwang/snakemake_demo/hisat2_index/chr22_ERCC92"

	from itertools import combinations, product


	def gen_pos_sets_to_sub(barcode, max_sub=1):
	"""
	Generate all the possible position combinations (sets) within
	the given maximal number of substitutions.

	Examples:

	"""Find the possible Ensembl releases of the given IDs.

	The script uses Ensembl Tark APIs to subset the possible Ensembl releases
	that cover all the given Ensembl IDs. Usually it can pinpoint the right release
	using less than 30 IDs. Feeding more IDs may exceed the API call rate limit.

	Known issues:
	* The API doesn't handle ENSGR (chrY PAR genes)
	"""
	import argparse