Ming Tang crazyhottommy

🎯

Focusing

Director of Bioinformatics. Care about reproducible research and open science

crazyhottommy / GEOquery_gene_expression.r

Created July 4, 2014 20:03

	library(Biobase)
	library(GEOquery)


	# load series and platform data from GEO

	gset <- getGEO("GSE34412", GSEMatrix =TRUE)
	gset<- gset[[1]]

	# make proper column names to match toptable

crazyhottommy / change_fasta_header.py

Last active August 29, 2015 14:04

	with open ("C:/Users/Tang Ming/Desktop/anotation.txt", "r") as annotation:
	anotation_dict = {}
	for line in annotation:
	line = line.split()
	if line: #test whether it is an empty line
	anotation_dict[line[0]]=line[1:]
	else:
	continue

	# really should not parse the fasta file by myself. there are

crazyhottommy / ngs2014_linux_basics.sh

Last active October 13, 2016 18:01

linux basics

	#linux commands basics
	#http://software-carpentry.org/v5/novice/shell/index.html
	# practise, practise, practise, google, google, google and you will get it :)

	pwd # print working directory
	cd # change directory
	sudo # super user privilege
	chmod 775 # change the privileges http://en.wikipedia.org/wiki/Chmod
	git clone # version control! get to know git and github! http://git-scm.com/
	sudo bash # bad habit

crazyhottommy / ngs2014_R_basics.r

Last active August 29, 2015 14:05

	#2014 MSU NGS R basics tutorial
	#http://angus.readthedocs.org/en/2014/R_Introductory_tutorial_2014.html
	#https://github.com/jrherr/quick_basic_R_tutorial/blob/master/R_tutorial.md

	#pick one language, and learn it well!

	#pick up a dataset, play with it!

	#object-oriented programming
	#functional programming

crazyhottommy / make_dummy_file.sh

Created August 14, 2014 14:23

make_dummy_files

crazyhottommy / rename.sh

Created August 14, 2014 14:25

rename_files

	for fspec1 in *.gz
	do
	#echo $fspec1
	fspec2=$(echo ${fspec1} \| sed "s/$[1-4]egg$_r$[1-2]$_0$[1-2]$_sub.fastq.gz/\1_R\3_00\2.fastq.gz/")
	echo $fspec2
	mv ${fspec1} ${fspec2}
	done

crazyhottommy / geneSymbol2Entrez.py

Created September 3, 2014 15:09

	#! /usr/bin/env python

	# ID mapping using mygene
	# https://pypi.python.org/pypi/mygene
	# http://nbviewer.ipython.org/gist/newgene/6771106
	# http://mygene-py.readthedocs.org/en/latest/
	# 08/30/14

	__author__ = 'tommy'

crazyhottommy / convert_ids.r

Created September 10, 2014 16:52


	##### use bioconductor annotation packages #######

	source("http://Bioconductor.org/biocLite.R")
	biocLite("org.Hs.eg.db")
	biocLite(c("GenomicFeatures", "AnnotationDbi"))

	library("org.Hs.eg.db")
	library("AnnotationDbi")
	library("GenomicFeatures")

crazyhottommy / dplyr_gff.r

Created September 26, 2014 19:22

	library(dplyr)
	setwd("/home/tommy/annotations/human/ensemble/")

	# set the colClasses for faster reading in the data
	gtf_cols <- c(seqname="factor", source="factor", feature="factor",
	start="integer", end="integer", score="character",
	strand="factor", frame="factor", attribute="character")

	hs_gtf <- read.delim('Homo_sapiens.GRCh37.74.gtf.gz', header=FALSE,
	col.names=names(gtf_cols), comment.char="#")

crazyhottommy / bam2bw.sh

Last active November 18, 2015 00:38

	#! /bin/bash

	for bam in *bam
	do
	echo $bam
	genomeCoverageBed -ibam $bam -bg -g hg19.genome.info > $(basename $bam .bam).bdg
	done

	for bdg in *bdg
	do

	for fspec1 in *.gz
	do
	#echo $fspec1
	fspec2=$(echo ${fspec1} \| sed "s/\([1-4]egg\)_r\([1-2]\)_0\([1-2]\)_sub.fastq.gz/\1_R\3_00\2.fastq.gz/")
	echo $fspec2
	mv ${fspec1} ${fspec2}
	done