Skip to content

Instantly share code, notes, and snippets.

@mbk0asis
Created July 25, 2017 01:45
Show Gist options
  • Save mbk0asis/e019d93eafe4c7ef4a49049352e28770 to your computer and use it in GitHub Desktop.
Save mbk0asis/e019d93eafe4c7ef4a49049352e28770 to your computer and use it in GitHub Desktop.
dta <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/27k_Set2/liver/liver.set2.boxplot..csv",
header = T)
dta <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/423CpGs.boxplot.2.csv", header = T)
dim(dta)
##################
types <- data.frame(c(rep("breast.C",793),rep("breast.N",97),
rep("colon.C",313),rep("colon.N",38),
rep("kidney.C",324),rep("kidney.N",160),
rep("liver.C",377),rep("liver.N",50),
rep("lung.C",473),rep("lung.N",32),
rep("prostate.C",502),rep("prostate.N",50)))
colnames(types) <- "types"
types <- data.frame(c(rep("breast.C",793),rep("colon.C",313),
rep("kidney.C",324),rep("liver.C",377),
rep("lung.C",473),rep("prostate.C",502)))
colnames(types) <- "types"
head(types)
tissues <- data.frame(c(rep("breast",890),rep("colon",351),
rep("kidney",484),rep("liver",427),
rep("lung",505), rep("prostate.C",552)))
colnames(tissues) <- "tissues"
hm <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/423CpGs.heatmap.csv",
header=F, row.names=1)
dim(hm)
#dta_hm <- cbind(tissues,types,t(hm))
dta_hm <- cbind(types,t(hm))
library(ggplot2)
setwd("E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/without_normal")
colNames <- names(dta_hm)[2:424]
for(i in colNames){
print(i)
ggplot(dta_hm, aes_string(x=dta_hm[,1], y=i)) +
scale_fill_brewer(palette="Dark2") + geom_violin(alpha=0.6, scale="width",aes(fill=types)) +
geom_boxplot(width=0.4, alpha=0.9,outlier.shape = NA) +
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) +
guides(fill=FALSE) + theme_bw() + xlab("") + ylim(0,1)
ggsave(filename=paste(i,".png",sep=""), width = 5.5, height = 4, dpi = 600)
graphics.off()
}
#+ geom_jitter(width=0.2,alpha=0.3)
#aes(fill=types)
#geom_hline(aes(yintercept=mean(, na.rm=T))) +
for(i in colNames){
print(i)
m<- aggregate(dta_hm[[i]], list(dta_hm$types), mean,na.rm=T)
cm<-colMeans(as.matrix(m[,2]))
dev<-apply(m,2, function(x) abs(m[,2]-cm))
sd<-sd(as.matrix(m[,2]))
ggplot(m, aes(as.factor(Group.1),x)) + theme_bw() +
geom_hline(yintercept = cm, colour="grey10", linetype="dotted", size = 1.2) +
geom_hline(yintercept = sd+cm, colour="blue", linetype="dotted", size = 1.2) +
geom_hline(yintercept = cm-sd, colour="blue", linetype="dotted", size = 1.2) +
geom_point(aes(Group.1,m[,2]), size=5) +
xlab("") + ylab("beta-values") + ylim(0,1)
ggsave(filename=paste(i,".mean.png",sep=""), width = 5.5, height = 4, dpi = 600)
graphics.off()
}
#############################
# calculate SD of each CpG &
# select CpGs with SD >= 0.2
mydata <- list()
for(i in colNames){
print(i)
m<- aggregate(dta_hm[[i]], list(dta_hm$types), mean,na.rm=T)
mydata[[i]] <- m[,2]
}
tdta<- t(data.frame(mydata))
sd <- data.frame(apply(tdta, 1, sd))
colnames(sd) = "stdev"
sd <- sd[order(-sd$stdev), , drop = FALSE]
nr <- nrow(data.frame(sd[sd$stdev >= 0.2, ]))
rNames <- rownames(head(sd, nr))
#####################################
# print outliers
outLiers<-list()
for(i in colNames){
print(i)
m<- aggregate(dta_hm[[i]], list(dta_hm$types), mean, na.rm=T)
boxplot.stats(m$x)
outLiers[[i]] <- boxplot.stats(m$x)$out
}
ol <- as.matrix(outLiers)
head(ol)
ol[,grep("Numeric",ol)]
grep("Numeric", ol, value = T)
#############################
# plot means and SD of each CpG
for(i in rNames){
print(i)
m<- aggregate(dta_hm[[i]], list(dta_hm$types), mean, na.rm=T)
cm<-colMeans(as.matrix(m[,2]))
dev<-apply(m,2, function(x) abs(m[,2]-cm))
sd<-sd(as.matrix(m[,2]))
ggplot(m, aes(as.factor(Group.1),x)) + theme_bw() +
geom_hline(yintercept = cm, colour="blue", linetype="dotted") +
geom_hline(yintercept = sd+cm, colour="red", linetype="dotted") +
geom_hline(yintercept = cm-sd, colour="red", linetype="dotted") +
geom_point(aes(Group.1,m[,2]), size=5) +
xlab("") + ylab("beta-values") + ylim(0,1)
ggsave(filename=paste(i,".sd0.2.png",sep=""), width = 5.5, height = 4, dpi = 600)
graphics.off()
}
dev.off()
dev.off()
#################################
m<- aggregate(dta_hm$cg02264238, list(dta_hm$types), mean, na.rm=T)
cm<-colMeans(as.matrix(m[,2]))
dev<-apply(m,2, function(x) abs(m[,2]-cm))
sd<-sd(as.matrix(m[,2]))
ggplot(m, aes(as.factor(Group.1),x)) + theme_bw() +
geom_hline(yintercept = cm, colour="grey10", linetype="dotted", size = 1.2) +
geom_hline(yintercept = sd+cm, colour="blue", linetype="dotted", size = 1.2) +
geom_hline(yintercept = cm-sd, colour="blue", linetype="dotted", size = 1.2) +
geom_point(aes(Group.1,m[,2]), size=5) +
geom_point(aes(Group.1,dev[,2]+cm), size=5, colour = "red",shape = "X") +
xlab("") + ylab("beta-values") + ylim(0,1)
#################
#######################################
dta1 <- dta[,grep("cg00359087",colnames(dta))]
head(dta1)
par(mar=c(8,3,3,3))
stripchart(dta1, method='jitter', vertical=T, pch=20,cex=.9,
col=(c(rgb(.8,.8,.3,.7),rgb(.3,.8,.8,.7))),axes = F,
jitter=.2, ylim=c(-0.1, 1.1))
b<-boxplot(dta1, las=2, col=(c(rgb(.8,.3,.3,.5),rgb(.3,.3,.8,.5))), add = T,
main="cg00000029 in KIRC", outline=F)#, xaxt="n",)
lines(b$stats[3,], lwd = 2, lty = 5, type = "o")
b$stats[3,]
#########################################
setwd("E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/")
dta <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/423CpGs.boxplot.2.csv", header = T)
file <- "cpg.list.csv"
close(con)
dev.off()
close(con)
dev.off()
con <- file(description=file, open="r")
while (length(line <- readLines(con, n = 1, warn = FALSE)) > 0) {
print(line)
dta1 <- dta[,grep(line,colnames(dta))]
tiff(paste(line,'.tif',sep = ""),units = "px",width = 500, height = 400)
par(mar=c(3,3,3,3))
stripchart(dta1, method='jitter', vertical=T, pch=20,cex=.9,
col=(c(rgb(.8,.8,.3,.7),rgb(.3,.8,.8,.7))),axes = F,
jitter=.2, ylim=c(-0.1, 1.1))
b<-boxplot(dta1, las=2, col=(c(rgb(.8,.3,.3,.5),rgb(.3,.3,.8,.5))), add = T,
main=paste(line), outline=F, xaxt="n")
lines(b$stats[3,], lwd = 2, lty = 5, type = "o")
dev.off()
}
close(con)
dev.off()
##########
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment