Skip to content

Instantly share code, notes, and snippets.

@JakeConway
Last active March 7, 2017 04:59
Show Gist options
  • Save JakeConway/aaff4da603881149aeeb2611d7bd800d to your computer and use it in GitHub Desktop.
Save JakeConway/aaff4da603881149aeeb2611d7bd800d to your computer and use it in GitHub Desktop.
binData <- function(nBins, data, cohort, binSize) {
counts <- c()
for(i in seq(nBins)) {
start <- (i-1)*binSize
end <- i*binSize
count <- c(which(data$start > start & data$start < end))
count <- c(count, which(data$end > start & data$end < end))
count <- c(count, which(data$start < start & data$end > end))
count <- c(count, which(data$start > start & data$end < end))
count <- length(unique(count))
counts[i] <- count
}
fractions <- counts/nrow(data)
return(data.frame(bin=seq(nBins), count=counts, fraction=fractions, cohort=rep(cohort, length(counts))))
}
makePeakPlot <- function(dd, scz, controls, binSize, chr, chrLength) {
chrSizes <- c(249250621, 243199373, 198022430, 191154276, 180915260, 171115067, 159138663,
146364022, 141213431, 135534747, 135006516, 133851895, 115169878, 107349540,
102531392, 90354753, 81195210, 78077248, 59128983, 63025520, 48129895, 51304566)
chrLength <- chrSizes[chr]
chrDD <- dd[which(dd$chr == chr),]
chrSCZ <- scz[which(scz$chr == chr),]
chrControls <- controls[which(controls$chr == chr),]
nBins <- round(chrLength / binSize)
binnedDD <- binData(nBins, chrDD, "DD", binSize)
binnedSCZ <- binData(nBins, chrSCZ, "SCZ", binSize)
binnedControls <- binData(nBins, chrControls, "Controls", binSize)
allChr <- rbind(binnedDD, binnedSCZ, binnedControls)
plot <- ggplot(allChr) + geom_line(aes(x=bin, y=fraction, group=cohort, colour=cohort))
print(plot)
}
# set directory to where bed files are
setwd("/Users/jakeconway/Desktop/Beta Test")
require(ggplot2)
# read in case and control files
controls <- read.table("CTRL.CNV.GRCh37.bed")
scz <- read.table("SCZ.CNV.GRCh37.bed")
dd <- read.table("DD.CNV.GRCh37.bed")
# add column names to data.frames to make data manipulation easier
column_names <-c("chr", "start", "end", "VID", "CNV", "Pheno", "Source_PMID")
names(controls) <- column_names
names(scz) <- column_names
names(dd) <- column_names
makePeakPlot(dd, scz, controls, 1000000, 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment