Ming Tang crazyhottommy

A dummy example for testing

cat DATA.tsv 
ID	head1	head2	head3	head4
1	25.5	1364.0	22.5	13.2
2	10.1	215.56	1.15	22.2

cat LIST.TXT 
ID

get rid of the digits (gene version) in the end for the gene names (gencode v19)

cat STAR_WT-30393468_htseq.cnt| sed -E 's/\.[0-9]+//' > WT_htseq.cnt

transcript to gene mapping file:

library(EnsDb.Hsapiens.v75)

	```r
	library(TxDb.Hsapiens.UCSC.hg19.knownGene)
	UCSC.hg19<- TxDb.Hsapiens.UCSC.hg19.knownGene
	hg19.genes<- genes(UCSC.hg19)
	transcriptsBy(UCSC.hg19, "gene")

	library("org.Hs.eg.db")

	## note that dplyr and AnnotationDbi both have a function called select
	## use dplyr::select when use dplyr

	### Define intronic, exonic and intergenic regions

	```{r}
	library(AnnotationHub)
	library(dplyr) ## for %>%
	ah = AnnotationHub()
	possibleDates(ah)
	AnnotationHub::query(ah, c("gtf", "Homo_sapiens", "GRCh37"))

	GRCh37.gtf<- ah[['AH10684']]

	#! /bin/bash

	set -e
	set -u
	set -o pipefail

	#### Author: Ming Tang (Tommy)
	#### Date 09/29/2016
	#### I got the idea from this stackOverflow post http://stackoverflow.com/questions/11098189/awk-extract-columns-from-file-based-on-header-selected-from-2nd-file


	Make a heatmap with colored dendrogram by `complexHeatmap` and `Dendsort`.
	See help [here](https://bioconductor.org/packages/release/bioc/vignettes/ComplexHeatmap/inst/doc/s2.single_heatmap.html)
	```r
	##### a make_hc function to receive different distance_measure and linkage_method
	make_hc<- function(x, distance_measure, linkage_method){
	if (distance_measure == "pearson"){
	## cor calculate for columns, needs to transpose x first
	distance <- as.dist(1-cor(t(x), method = "pearson"))
	hc<- hclust(distance, method = linkage_method)

	## devtools::install_github("stephenturner/msigdf")
	library(msigdf)
	library(dplyr)
	library(clusterProfiler)

	c2 <- msigdf.human %>%
	filter(collection == "c2") %>% select(geneset, entrez) %>% as.data.frame

	data(geneList)
	de <- names(geneList)[1:100]

	---
	title: "lncRNA_heatmap"
	author: "Ming Tang"
	date: "July 28, 2016"
	output: html_document
	---

	Read in the bigwig files for each mark. bigwig files were generated by Deeptools from bam files.
	```{r}
	library(EnrichedHeatmap)

	aDict = {"B":"inputG1", "A":"inputG1", "C":"inputG2"}

	rule all:
	input: ["C.bed", "A.bed", "B.bed"]

	def get_files(wildcards):
	case = wildcards.case
	control = aDict[case]
	return [case + ".sorted.bam", control + ".sorted.bam"]

	## http://stackoverflow.com/questions/19876505/boxplot-show-the-value-of-mean
	## plot adding mean value
	ggplot(NLR.tidy, aes(x=NLR, y=ratio_value, color= NLR,fill= NLR)) +
	geom_point(position=position_jitterdodge(dodge.width=0.9)) +
	geom_boxplot(fill="white", alpha=0.1, outlier.colour = NA,
	position = position_dodge(width=0.9)) +
	coord_cartesian(ylim = c(-0.5, 15)) +
	stat_summary(fun.y = mean, geom="point",colour="black", size=3, show.legend = FALSE) +
	stat_summary(fun.y=mean, colour="red", geom="text", show.legend =FALSE,
	vjust=-0.7, aes( label=round(..y.., digits=1)))