Skip to content

Instantly share code, notes, and snippets.

View crazyhottommy's full-sized avatar
🎯
Focusing

Ming Tang crazyhottommy

🎯
Focusing
View GitHub Profile
### This part is from the Edx online Harvard course
## HarvardX: PH525.3x Advanced Statistics for the Life Sciences, week1
library(devtools)
install_github("genomicsclass/GSE5859Subset")
library(GSE5859Subset)
data(GSE5859Subset)
dim(geneExpression)
#! /usr/bin
# put the coordinates in a bed file
infile=$1
while read chr start end
do
samtools faidx ref.fasta $chr:$start-$end >> test.fa
done <$infile
# search pubmed contains "glioblastoma enhancer"
$esearch -db pubmed -query "glioblastoma enhancer"
<ENTREZ_DIRECT>
<Db>pubmed</Db>
<WebEnv>NCID_1_539964707_130.14.18.34_9001_1422280320_2091337226_0MetA0_S_MegaStore_F_1</WebEnv>
<QueryKey>1</QueryKey>
<Count>97</Count>
<Step>1</Step>
</ENTREZ_DIRECT>
#! /usr/bin/env python
import sys
import scipy.stats as stats
#The result will be
# a p-value where by random chance number of genes with both condition A and B will be <= to your number with condition A and B
# a p-value where by random chance number of genes with both condition A and B will be >= to your number with condition A and B
# The second p-value is probably what you want.
#! /bin/bash
for bam in *bam
do
echo $bam
genomeCoverageBed -ibam $bam -bg -g hg19.genome.info > $(basename $bam .bam).bdg
done
for bdg in *bdg
do
library(dplyr)
setwd("/home/tommy/annotations/human/ensemble/")
# set the colClasses for faster reading in the data
gtf_cols <- c(seqname="factor", source="factor", feature="factor",
start="integer", end="integer", score="character",
strand="factor", frame="factor", attribute="character")
hs_gtf <- read.delim('Homo_sapiens.GRCh37.74.gtf.gz', header=FALSE,
col.names=names(gtf_cols), comment.char="#")
##### use bioconductor annotation packages #######
source("http://Bioconductor.org/biocLite.R")
biocLite("org.Hs.eg.db")
biocLite(c("GenomicFeatures", "AnnotationDbi"))
library("org.Hs.eg.db")
library("AnnotationDbi")
library("GenomicFeatures")
#! /usr/bin/env python
# ID mapping using mygene
# https://pypi.python.org/pypi/mygene
# http://nbviewer.ipython.org/gist/newgene/6771106
# http://mygene-py.readthedocs.org/en/latest/
# 08/30/14
__author__ = 'tommy'
@crazyhottommy
crazyhottommy / rename.sh
Created August 14, 2014 14:25
rename_files
for fspec1 in *.gz
do
#echo $fspec1
fspec2=$(echo ${fspec1} | sed "s/\([1-4]egg\)_r\([1-2]\)_0\([1-2]\)_sub.fastq.gz/\1_R\3_00\2.fastq.gz/")
echo $fspec2
mv ${fspec1} ${fspec2}
done
@crazyhottommy
crazyhottommy / make_dummy_file.sh
Created August 14, 2014 14:23
make_dummy_files
!# /usr/bin/bash
while read name
do
echo "Name read from file - $name"
touch $name
done < $1