Skip to content

Instantly share code, notes, and snippets.

View mikelove's full-sized avatar

Michael Love mikelove

View GitHub Profile
@mikelove
mikelove / test_mpra_seqs.R
Created June 14, 2022 18:57
Testing MPRA sequences can be computed from ID
library(Biostrings)
dna <- readDNAStringSet("test_sequences.fasta")
ids <- names(dna)
library(tidyverse)
library(plyranges)
dat <- data.frame(ids)
into <- c("seqnames","start","end","pos","relpos",
"ref","alt","allele","rsid","RE")
dat <- dat %>%
separate(ids, sep="_", into=into, convert=TRUE)
@mikelove
mikelove / glucocorticoid.R
Created May 19, 2022 02:06
Example code for running motif analysis
# https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSM2095218
#url <- "ftp://ftp.ncbi.nlm.nih.gov/geo/samples/GSM2095nnn/GSM2095218/suppl/GSM2095218%5FDEX%5F3hr%5FGR%5FChIPseq%2ERep1%5Fpeaks%2Ebed%2Egz"
#download.file(url, destfile="GSM2095218_DEX_3hr_GR_ChIPseq.Rep1_peaks.bed.gz")
library(plyranges)
x <- read_bed("GSM2095218_DEX_3hr_GR_ChIPseq.Rep1_peaks.bed.gz")
genome(x) <- "hg19"
options(Biostrings.coloring=FALSE)
@mikelove
mikelove / deseq2_edger_QC_RUV.R
Last active April 8, 2022 14:18
DESeq2 and edgeR with basic QC and RUV
x <- read.csv("GSE91061_BMS038109Sample.hg19KnownGene.raw.csv", row.names=1)
condition <- factor(sub(".+_(.+)_.+", "\\1", colnames(x)))
library(DESeq2)
dds <- DESeqDataSetFromMatrix(x, colData=data.frame(condition), ~condition)
vsd <- vst(dds, blind=FALSE)
plotPCA(vsd)
rv <- rowVars(assay(vsd))
pc <- prcomp(t(assay(vsd)[head(order(-rv),1000),]))
@mikelove
mikelove / airpart_fly.R
Last active March 26, 2022 15:36
airpart demo code for fly cross
library(SingleCellExperiment)
sce <- readRDS("sce_metaCells.rds")
dim(sce)
assayNames(sce)
a1 <- as.matrix( round(assay(sce, "dmel_counts")) )
a2 <- as.matrix( round(assay(sce, "counts") - a1) )
a1[is.na(a1)] <- 0
a2[is.na(a2)] <- 0
@mikelove
mikelove / variant_peaks.R
Created March 11, 2022 13:14
Variant overlapping peaks example
# Variants and peaks coverage
# Michael Love
# March 11, 2022
library(readr)
library(dplyr)
library(tidyr)
library(plyranges)
# read_narrowpeaks -> just works
@mikelove
mikelove / gene_plot_ideas.R
Last active April 13, 2023 13:11
gene plot ideas
# Gviz library uses TxDb to make gene models
# this is convenient bc we can build TxDb from any source
library(Gviz)
library(TxDb.Dmelanogaster.UCSC.dm6.ensGene)
txdb <- TxDb.Dmelanogaster.UCSC.dm6.ensGene
# load our TSS-summarized data, now has GRanges on rows
suppressPackageStartupMessages(library(SummarizedExperiment))
load("se_tss.rda")
@mikelove
mikelove / osca_plots.R
Created February 16, 2022 01:33
OSCA examples for plotting counts and allelic counts
library(SingleCellExperiment)
sce <- readRDS("sce.rds")
labels <- readRDS("cellLabels.rds")
sce <- sce[,names(labels)]
colLabels(sce) <- labels
table(labels)
assayNames(sce)
total_count <- rowSums(assay(sce))
@mikelove
mikelove / Liang_2021_inpeak_caQTL_ASCA_eQTL_neurons.csv
Last active June 27, 2022 22:56
Processing data from Dan Liang Nature Neuroscience paper
snp_hg38 rsid refAllele altAllele lfcASCA lfcQTL lfcEQTL
chr2:208557023:C:G rs1036014 C G 0.679909666011761 0.2499407877 0.2194325508
chr4:9924989:T:C rs10939614 T C -2.21500762508123 -0.355952769 -0.2118339332
chr7:138187033:C:T rs17603855 C T -1.73696246880911 -0.4433277822 -0.4808215973
chr7:139495451:C:G rs2530731 C G -1.77729493517346 -0.6283644361 -0.3881908065
chr11:86515916:G:C rs2125358 G C 1.36463250810021 0.4783681025 0.4154173162
chr14:22848616:A:G rs17882077 A G -1.34268551797455 -0.2681226196 -0.2210454793
chr18:25019730:G:A rs10502466 G A 0.716207533070227 0.2821604048 0.2185292588
chr21:43270976:A:G rs2838227 A G 0.8181622133663 0.304950793 0.4107778657
@mikelove
mikelove / overlap_correlation.R
Last active June 13, 2023 18:28
correlation by overlap
# Michael Love
# Dec 25 2021
# make some sim data
a <- rnorm(10)
b <- rnorm(10)
dat_x <- rbind(a+rnorm(10,0,2),
a+rnorm(10,0,.5),
rnorm(10),
b+rnorm(10))
@mikelove
mikelove / plyranges_nullranges_big_blocks.R
Last active December 13, 2021 22:28
plyranges + nullranges for permuting large blocks
# Michael Love
# Dec 12 2021
# read in data -- plyranges is dplyr for GRanges
# I've been moving towards using this in my lab
library(dplyr)
library(plyranges)
a <- read_bed("featuresA.hg19.bed")
b <- read_bed("featuresB.hg19.bed")
a <- a %>% mutate(id=seq_along(a)) # ID for regions in 'a'