Ming Tang crazyhottommy

🎯

Focusing

Director of Bioinformatics. Care about reproducible research and open science

crazyhottommy / Pvalue_FDR_multiple_test.r

Last active August 29, 2015 14:17

	### This part is from the Edx online Harvard course
	## HarvardX: PH525.3x Advanced Statistics for the Life Sciences, week1

	library(devtools)
	install_github("genomicsclass/GSE5859Subset")

	library(GSE5859Subset)
	data(GSE5859Subset)
	dim(geneExpression)

crazyhottommy / batch_convert_faidx.sh

Last active August 29, 2015 14:16

	#! /usr/bin
	# put the coordinates in a bed file

	infile=$1
	while read chr start end
	do
	samtools faidx ref.fasta $chr:$start-$end >> test.fa
	done <$infile

crazyhottommy / Entrez_Direct.sh

Last active August 29, 2015 14:14

	# search pubmed contains "glioblastoma enhancer"
	$esearch -db pubmed -query "glioblastoma enhancer"
	<ENTREZ_DIRECT>
	<Db>pubmed</Db>
	<WebEnv>NCID_1_539964707_130.14.18.34_9001_1422280320_2091337226_0MetA0_S_MegaStore_F_1</WebEnv>
	<QueryKey>1</QueryKey>
	<Count>97</Count>
	<Step>1</Step>
	</ENTREZ_DIRECT>

crazyhottommy / gene_sets_hypergeometric_test.py

Last active March 8, 2019 07:12

	#! /usr/bin/env python

	import sys
	import scipy.stats as stats

	#The result will be
	# a p-value where by random chance number of genes with both condition A and B will be <= to your number with condition A and B
	# a p-value where by random chance number of genes with both condition A and B will be >= to your number with condition A and B
	# The second p-value is probably what you want.

crazyhottommy / bam2bw.sh

Last active November 18, 2015 00:38

	#! /bin/bash

	for bam in *bam
	do
	echo $bam
	genomeCoverageBed -ibam $bam -bg -g hg19.genome.info > $(basename $bam .bam).bdg
	done

	for bdg in *bdg
	do

crazyhottommy / dplyr_gff.r

Created September 26, 2014 19:22

	library(dplyr)
	setwd("/home/tommy/annotations/human/ensemble/")

	# set the colClasses for faster reading in the data
	gtf_cols <- c(seqname="factor", source="factor", feature="factor",
	start="integer", end="integer", score="character",
	strand="factor", frame="factor", attribute="character")

	hs_gtf <- read.delim('Homo_sapiens.GRCh37.74.gtf.gz', header=FALSE,
	col.names=names(gtf_cols), comment.char="#")

crazyhottommy / convert_ids.r

Created September 10, 2014 16:52


	##### use bioconductor annotation packages #######

	source("http://Bioconductor.org/biocLite.R")
	biocLite("org.Hs.eg.db")
	biocLite(c("GenomicFeatures", "AnnotationDbi"))

	library("org.Hs.eg.db")
	library("AnnotationDbi")
	library("GenomicFeatures")

crazyhottommy / geneSymbol2Entrez.py

Created September 3, 2014 15:09

	#! /usr/bin/env python

	# ID mapping using mygene
	# https://pypi.python.org/pypi/mygene
	# http://nbviewer.ipython.org/gist/newgene/6771106
	# http://mygene-py.readthedocs.org/en/latest/
	# 08/30/14

	__author__ = 'tommy'

crazyhottommy / rename.sh

Created August 14, 2014 14:25

rename_files

	for fspec1 in *.gz
	do
	#echo $fspec1
	fspec2=$(echo ${fspec1} \| sed "s/$[1-4]egg$_r$[1-2]$_0$[1-2]$_sub.fastq.gz/\1_R\3_00\2.fastq.gz/")
	echo $fspec2
	mv ${fspec1} ${fspec2}
	done

crazyhottommy / make_dummy_file.sh

Created August 14, 2014 14:23

make_dummy_files

	for fspec1 in *.gz
	do
	#echo $fspec1
	fspec2=$(echo ${fspec1} \| sed "s/\([1-4]egg\)_r\([1-2]\)_0\([1-2]\)_sub.fastq.gz/\1_R\3_00\2.fastq.gz/")
	echo $fspec2
	mv ${fspec1} ${fspec2}
	done