A dummy example for testing
cat DATA.tsv
ID head1 head2 head3 head4
1 25.5 1364.0 22.5 13.2
2 10.1 215.56 1.15 22.2
cat LIST.TXT
ID
```r | |
library(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
UCSC.hg19<- TxDb.Hsapiens.UCSC.hg19.knownGene | |
hg19.genes<- genes(UCSC.hg19) | |
transcriptsBy(UCSC.hg19, "gene") | |
library("org.Hs.eg.db") | |
## note that dplyr and AnnotationDbi both have a function called select | |
## use dplyr::select when use dplyr |
### Define intronic, exonic and intergenic regions | |
```{r} | |
library(AnnotationHub) | |
library(dplyr) ## for %>% | |
ah = AnnotationHub() | |
possibleDates(ah) | |
AnnotationHub::query(ah, c("gtf", "Homo_sapiens", "GRCh37")) | |
GRCh37.gtf<- ah[['AH10684']] |
A dummy example for testing
cat DATA.tsv
ID head1 head2 head3 head4
1 25.5 1364.0 22.5 13.2
2 10.1 215.56 1.15 22.2
cat LIST.TXT
ID
#! /bin/bash | |
set -e | |
set -u | |
set -o pipefail | |
#### Author: Ming Tang (Tommy) | |
#### Date 09/29/2016 | |
#### I got the idea from this stackOverflow post http://stackoverflow.com/questions/11098189/awk-extract-columns-from-file-based-on-header-selected-from-2nd-file |
Make a heatmap with colored dendrogram by `complexHeatmap` and `Dendsort`. | |
See help [here](https://bioconductor.org/packages/release/bioc/vignettes/ComplexHeatmap/inst/doc/s2.single_heatmap.html) | |
```r | |
##### a make_hc function to receive different distance_measure and linkage_method | |
make_hc<- function(x, distance_measure, linkage_method){ | |
if (distance_measure == "pearson"){ | |
## cor calculate for columns, needs to transpose x first | |
distance <- as.dist(1-cor(t(x), method = "pearson")) | |
hc<- hclust(distance, method = linkage_method) |
## devtools::install_github("stephenturner/msigdf") | |
library(msigdf) | |
library(dplyr) | |
library(clusterProfiler) | |
c2 <- msigdf.human %>% | |
filter(collection == "c2") %>% select(geneset, entrez) %>% as.data.frame | |
data(geneList) | |
de <- names(geneList)[1:100] |
--- | |
title: "lncRNA_heatmap" | |
author: "Ming Tang" | |
date: "July 28, 2016" | |
output: html_document | |
--- | |
Read in the bigwig files for each mark. bigwig files were generated by Deeptools from bam files. | |
```{r} | |
library(EnrichedHeatmap) |
get rid of the digits (gene version) in the end for the gene names (gencode v19)
cat STAR_WT-30393468_htseq.cnt| sed -E 's/\.[0-9]+//' > WT_htseq.cnt
transcript to gene mapping file:
library(EnsDb.Hsapiens.v75)
aDict = {"B":"inputG1", "A":"inputG1", "C":"inputG2"} | |
rule all: | |
input: ["C.bed", "A.bed", "B.bed"] | |
def get_files(wildcards): | |
case = wildcards.case | |
control = aDict[case] | |
return [case + ".sorted.bam", control + ".sorted.bam"] |
## http://stackoverflow.com/questions/19876505/boxplot-show-the-value-of-mean | |
## plot adding mean value | |
ggplot(NLR.tidy, aes(x=NLR, y=ratio_value, color= NLR,fill= NLR)) + | |
geom_point(position=position_jitterdodge(dodge.width=0.9)) + | |
geom_boxplot(fill="white", alpha=0.1, outlier.colour = NA, | |
position = position_dodge(width=0.9)) + | |
coord_cartesian(ylim = c(-0.5, 15)) + | |
stat_summary(fun.y = mean, geom="point",colour="black", size=3, show.legend = FALSE) + | |
stat_summary(fun.y=mean, colour="red", geom="text", show.legend =FALSE, | |
vjust=-0.7, aes( label=round(..y.., digits=1))) |