A dummy example for testing
cat DATA.tsv
ID head1 head2 head3 head4
1 25.5 1364.0 22.5 13.2
2 10.1 215.56 1.15 22.2
cat LIST.TXT
ID| ```r | |
| library(TxDb.Hsapiens.UCSC.hg19.knownGene) | |
| UCSC.hg19<- TxDb.Hsapiens.UCSC.hg19.knownGene | |
| hg19.genes<- genes(UCSC.hg19) | |
| transcriptsBy(UCSC.hg19, "gene") | |
| library("org.Hs.eg.db") | |
| ## note that dplyr and AnnotationDbi both have a function called select | |
| ## use dplyr::select when use dplyr |
| ### Define intronic, exonic and intergenic regions | |
| ```{r} | |
| library(AnnotationHub) | |
| library(dplyr) ## for %>% | |
| ah = AnnotationHub() | |
| possibleDates(ah) | |
| AnnotationHub::query(ah, c("gtf", "Homo_sapiens", "GRCh37")) | |
| GRCh37.gtf<- ah[['AH10684']] |
A dummy example for testing
cat DATA.tsv
ID head1 head2 head3 head4
1 25.5 1364.0 22.5 13.2
2 10.1 215.56 1.15 22.2
cat LIST.TXT
ID| #! /bin/bash | |
| set -e | |
| set -u | |
| set -o pipefail | |
| #### Author: Ming Tang (Tommy) | |
| #### Date 09/29/2016 | |
| #### I got the idea from this stackOverflow post http://stackoverflow.com/questions/11098189/awk-extract-columns-from-file-based-on-header-selected-from-2nd-file |
| Make a heatmap with colored dendrogram by `complexHeatmap` and `Dendsort`. | |
| See help [here](https://bioconductor.org/packages/release/bioc/vignettes/ComplexHeatmap/inst/doc/s2.single_heatmap.html) | |
| ```r | |
| ##### a make_hc function to receive different distance_measure and linkage_method | |
| make_hc<- function(x, distance_measure, linkage_method){ | |
| if (distance_measure == "pearson"){ | |
| ## cor calculate for columns, needs to transpose x first | |
| distance <- as.dist(1-cor(t(x), method = "pearson")) | |
| hc<- hclust(distance, method = linkage_method) |
| ## devtools::install_github("stephenturner/msigdf") | |
| library(msigdf) | |
| library(dplyr) | |
| library(clusterProfiler) | |
| c2 <- msigdf.human %>% | |
| filter(collection == "c2") %>% select(geneset, entrez) %>% as.data.frame | |
| data(geneList) | |
| de <- names(geneList)[1:100] |
| --- | |
| title: "lncRNA_heatmap" | |
| author: "Ming Tang" | |
| date: "July 28, 2016" | |
| output: html_document | |
| --- | |
| Read in the bigwig files for each mark. bigwig files were generated by Deeptools from bam files. | |
| ```{r} | |
| library(EnrichedHeatmap) |
get rid of the digits (gene version) in the end for the gene names (gencode v19)
cat STAR_WT-30393468_htseq.cnt| sed -E 's/\.[0-9]+//' > WT_htseq.cnttranscript to gene mapping file:
library(EnsDb.Hsapiens.v75)| aDict = {"B":"inputG1", "A":"inputG1", "C":"inputG2"} | |
| rule all: | |
| input: ["C.bed", "A.bed", "B.bed"] | |
| def get_files(wildcards): | |
| case = wildcards.case | |
| control = aDict[case] | |
| return [case + ".sorted.bam", control + ".sorted.bam"] |
| ## http://stackoverflow.com/questions/19876505/boxplot-show-the-value-of-mean | |
| ## plot adding mean value | |
| ggplot(NLR.tidy, aes(x=NLR, y=ratio_value, color= NLR,fill= NLR)) + | |
| geom_point(position=position_jitterdodge(dodge.width=0.9)) + | |
| geom_boxplot(fill="white", alpha=0.1, outlier.colour = NA, | |
| position = position_dodge(width=0.9)) + | |
| coord_cartesian(ylim = c(-0.5, 15)) + | |
| stat_summary(fun.y = mean, geom="point",colour="black", size=3, show.legend = FALSE) + | |
| stat_summary(fun.y=mean, colour="red", geom="text", show.legend =FALSE, | |
| vjust=-0.7, aes( label=round(..y.., digits=1))) |