Skip to content

Instantly share code, notes, and snippets.

View ATpoint's full-sized avatar

Alexander Bender ATpoint

  • Germany
  • 10:38 (UTC +01:00)
View GitHub Profile
@ATpoint
ATpoint / EnrichedHeatmap_example.R
Last active October 15, 2020 15:49
EnrichedHeatmap example code
#/ A minimal example on how to use EnrichedHeatmap together with rtracklayer.
#/ Required data are at https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE111902
#/ Go for the files :
#/ "GSM3045250_N_ATAC_Kaech_1_peaks.broadPeak.gz" and (genomic regions in BED-like format)
#/ "GSM3045250_N_ATAC_Kaech_1.bw" (the bigwig files with read counts)
require(EnrichedHeatmap)
require(rtracklayer)
require(circlize)
require(data.table)
require(data.table)
require(microbenchmark)
require(GenomicRanges)
x <- fread("
chrom start end hgnc
1 100 200 MYC
1 150 300 MYC
1 400 500 MYC
1 150 230 TP53
library(biomaRt)
## get mouse transcript coordinates
mmusculus_genes <- getBM(attributes = c("ensembl_gene_id", "chromosome_name",
"transcript_start", "transcript_end"),
mart = useMart("ensembl", dataset = "mmusculus_gene_ensembl"))
## put in the exact same format as the example of the toplevel question,
## not caring about the fact that hgnc is not the correct name for mouse genes:
large.input <- data.frame(chrom = mmusculus_genes$chromosome_name,
##
library(rtracklayer)
## Use rtracklayer to import a subset of a bigwig file from disk into R.
## reference.gr is a GRanges object with the ranges that will be used for subsetting
## selected.gr is a GRanges object with the count data for each interval stored in elementMetadata(selected.gr)
selected.gr <- rtracklayer::import(Path.To.BigWig, format = "BigWig", selection = BigWigSelection(reference.gr))
library(biomaRt)
## How to get gene coordinates starting from Entrez gene IDs:
## Example for human:
GeneName <- "hgnc_symbol" ## for mouse use "mgi_symbol"
DataSet <- "hsapiens_gene_ensembl" ## for mouse use "mmusculus_gene_ensembl
## Create a look-up table using biomaRt:
Coords <- mmusculus_genes <- getBM(attributes = c("entrezgene_id",
## Script to create a transcriptome fasta file that contains both spliced and unspliced
## transcripts based on a GTF file from GENCODE.
## Also see the preprint https://doi.org/10.1101/2020.03.13.990069 from Charlotte Soneson on the matter of
## how preprocessing influences the outcome of velocity analysis.
## The below steps are inspired by the preprint and personal correspondence with CS.
ANALYSISDIR="./"
dir.create(paste0(ANALYSISDIR, "/Alevin_IDX"))
library(GenomicRanges)
library(matrixStats)
ranges1 <- GRanges(seqnames = c("chr1", "chr2", "chr3"),
ranges = IRanges(start=c(1,100,1000),
end = c(10,150,5000)))
ranges2 <- GRanges(seqnames = c("chr1", "chr2", "chr3"),
ranges = IRanges(start=c(11,110,2000),
end = c(20,130,3000)))
@ATpoint
ATpoint / getData.R
Last active December 18, 2022 10:06
# install.packages("data.table", "dplyr")
library(data.table)
library(dplyr)
parseProperly <- function(x){
x <- x[,-c(ncol(x)-1,ncol(x))]
x
}
metaBigBam.c:485:12: warning: 'cigar_tab' is deprecated: Use bam_cigar_table[] instead [-Wdeprecated-declarations]
if (h->cigar_tab == 0)
^
/usr/local/include/htslib/sam.h:75:29: note: 'cigar_tab' has been explicitly marked deprecated here
const int8_t *cigar_tab HTS_DEPRECATED("Use bam_cigar_table[] instead");
^
/usr/local/include/htslib/hts_defs.h:75:49: note: expanded from macro 'HTS_DEPRECATED'
#define HTS_DEPRECATED(message) __attribute__ ((__deprecated__ (message)))
^
metaBigBam.c:487:5: warning: 'cigar_tab' is deprecated: Use bam_cigar_table[] instead [-Wdeprecated-declarations]
@ATpoint
ATpoint / guides_angles.R
Last active September 24, 2021 10:35
No more messing with h/vjust when rotating labels
library(ggplot2)
library(reshape2)
library(egg)
# thanks to https://stackoverflow.com/questions/1330989/rotating-and-spacing-axis-labels-in-ggplot2/60650595#60650595
dat<-reshape2::melt(data.frame(groupA=rnorm(20),
groupB=rnorm(20),
groupC=rnorm(20)))