Created
July 25, 2017 01:45
-
-
Save mbk0asis/e019d93eafe4c7ef4a49049352e28770 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
dta <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/27k_Set2/liver/liver.set2.boxplot..csv", | |
header = T) | |
dta <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/423CpGs.boxplot.2.csv", header = T) | |
dim(dta) | |
################## | |
types <- data.frame(c(rep("breast.C",793),rep("breast.N",97), | |
rep("colon.C",313),rep("colon.N",38), | |
rep("kidney.C",324),rep("kidney.N",160), | |
rep("liver.C",377),rep("liver.N",50), | |
rep("lung.C",473),rep("lung.N",32), | |
rep("prostate.C",502),rep("prostate.N",50))) | |
colnames(types) <- "types" | |
types <- data.frame(c(rep("breast.C",793),rep("colon.C",313), | |
rep("kidney.C",324),rep("liver.C",377), | |
rep("lung.C",473),rep("prostate.C",502))) | |
colnames(types) <- "types" | |
head(types) | |
tissues <- data.frame(c(rep("breast",890),rep("colon",351), | |
rep("kidney",484),rep("liver",427), | |
rep("lung",505), rep("prostate.C",552))) | |
colnames(tissues) <- "tissues" | |
hm <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/423CpGs.heatmap.csv", | |
header=F, row.names=1) | |
dim(hm) | |
#dta_hm <- cbind(tissues,types,t(hm)) | |
dta_hm <- cbind(types,t(hm)) | |
library(ggplot2) | |
setwd("E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/without_normal") | |
colNames <- names(dta_hm)[2:424] | |
for(i in colNames){ | |
print(i) | |
ggplot(dta_hm, aes_string(x=dta_hm[,1], y=i)) + | |
scale_fill_brewer(palette="Dark2") + geom_violin(alpha=0.6, scale="width",aes(fill=types)) + | |
geom_boxplot(width=0.4, alpha=0.9,outlier.shape = NA) + | |
theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + | |
guides(fill=FALSE) + theme_bw() + xlab("") + ylim(0,1) | |
ggsave(filename=paste(i,".png",sep=""), width = 5.5, height = 4, dpi = 600) | |
graphics.off() | |
} | |
#+ geom_jitter(width=0.2,alpha=0.3) | |
#aes(fill=types) | |
#geom_hline(aes(yintercept=mean(, na.rm=T))) + | |
for(i in colNames){ | |
print(i) | |
m<- aggregate(dta_hm[[i]], list(dta_hm$types), mean,na.rm=T) | |
cm<-colMeans(as.matrix(m[,2])) | |
dev<-apply(m,2, function(x) abs(m[,2]-cm)) | |
sd<-sd(as.matrix(m[,2])) | |
ggplot(m, aes(as.factor(Group.1),x)) + theme_bw() + | |
geom_hline(yintercept = cm, colour="grey10", linetype="dotted", size = 1.2) + | |
geom_hline(yintercept = sd+cm, colour="blue", linetype="dotted", size = 1.2) + | |
geom_hline(yintercept = cm-sd, colour="blue", linetype="dotted", size = 1.2) + | |
geom_point(aes(Group.1,m[,2]), size=5) + | |
xlab("") + ylab("beta-values") + ylim(0,1) | |
ggsave(filename=paste(i,".mean.png",sep=""), width = 5.5, height = 4, dpi = 600) | |
graphics.off() | |
} | |
############################# | |
# calculate SD of each CpG & | |
# select CpGs with SD >= 0.2 | |
mydata <- list() | |
for(i in colNames){ | |
print(i) | |
m<- aggregate(dta_hm[[i]], list(dta_hm$types), mean,na.rm=T) | |
mydata[[i]] <- m[,2] | |
} | |
tdta<- t(data.frame(mydata)) | |
sd <- data.frame(apply(tdta, 1, sd)) | |
colnames(sd) = "stdev" | |
sd <- sd[order(-sd$stdev), , drop = FALSE] | |
nr <- nrow(data.frame(sd[sd$stdev >= 0.2, ])) | |
rNames <- rownames(head(sd, nr)) | |
##################################### | |
# print outliers | |
outLiers<-list() | |
for(i in colNames){ | |
print(i) | |
m<- aggregate(dta_hm[[i]], list(dta_hm$types), mean, na.rm=T) | |
boxplot.stats(m$x) | |
outLiers[[i]] <- boxplot.stats(m$x)$out | |
} | |
ol <- as.matrix(outLiers) | |
head(ol) | |
ol[,grep("Numeric",ol)] | |
grep("Numeric", ol, value = T) | |
############################# | |
# plot means and SD of each CpG | |
for(i in rNames){ | |
print(i) | |
m<- aggregate(dta_hm[[i]], list(dta_hm$types), mean, na.rm=T) | |
cm<-colMeans(as.matrix(m[,2])) | |
dev<-apply(m,2, function(x) abs(m[,2]-cm)) | |
sd<-sd(as.matrix(m[,2])) | |
ggplot(m, aes(as.factor(Group.1),x)) + theme_bw() + | |
geom_hline(yintercept = cm, colour="blue", linetype="dotted") + | |
geom_hline(yintercept = sd+cm, colour="red", linetype="dotted") + | |
geom_hline(yintercept = cm-sd, colour="red", linetype="dotted") + | |
geom_point(aes(Group.1,m[,2]), size=5) + | |
xlab("") + ylab("beta-values") + ylim(0,1) | |
ggsave(filename=paste(i,".sd0.2.png",sep=""), width = 5.5, height = 4, dpi = 600) | |
graphics.off() | |
} | |
dev.off() | |
dev.off() | |
################################# | |
m<- aggregate(dta_hm$cg02264238, list(dta_hm$types), mean, na.rm=T) | |
cm<-colMeans(as.matrix(m[,2])) | |
dev<-apply(m,2, function(x) abs(m[,2]-cm)) | |
sd<-sd(as.matrix(m[,2])) | |
ggplot(m, aes(as.factor(Group.1),x)) + theme_bw() + | |
geom_hline(yintercept = cm, colour="grey10", linetype="dotted", size = 1.2) + | |
geom_hline(yintercept = sd+cm, colour="blue", linetype="dotted", size = 1.2) + | |
geom_hline(yintercept = cm-sd, colour="blue", linetype="dotted", size = 1.2) + | |
geom_point(aes(Group.1,m[,2]), size=5) + | |
geom_point(aes(Group.1,dev[,2]+cm), size=5, colour = "red",shape = "X") + | |
xlab("") + ylab("beta-values") + ylim(0,1) | |
################# | |
####################################### | |
dta1 <- dta[,grep("cg00359087",colnames(dta))] | |
head(dta1) | |
par(mar=c(8,3,3,3)) | |
stripchart(dta1, method='jitter', vertical=T, pch=20,cex=.9, | |
col=(c(rgb(.8,.8,.3,.7),rgb(.3,.8,.8,.7))),axes = F, | |
jitter=.2, ylim=c(-0.1, 1.1)) | |
b<-boxplot(dta1, las=2, col=(c(rgb(.8,.3,.3,.5),rgb(.3,.3,.8,.5))), add = T, | |
main="cg00000029 in KIRC", outline=F)#, xaxt="n",) | |
lines(b$stats[3,], lwd = 2, lty = 5, type = "o") | |
b$stats[3,] | |
######################################### | |
setwd("E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/") | |
dta <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/423CpGs.boxplot.2.csv", header = T) | |
file <- "cpg.list.csv" | |
close(con) | |
dev.off() | |
close(con) | |
dev.off() | |
con <- file(description=file, open="r") | |
while (length(line <- readLines(con, n = 1, warn = FALSE)) > 0) { | |
print(line) | |
dta1 <- dta[,grep(line,colnames(dta))] | |
tiff(paste(line,'.tif',sep = ""),units = "px",width = 500, height = 400) | |
par(mar=c(3,3,3,3)) | |
stripchart(dta1, method='jitter', vertical=T, pch=20,cex=.9, | |
col=(c(rgb(.8,.8,.3,.7),rgb(.3,.8,.8,.7))),axes = F, | |
jitter=.2, ylim=c(-0.1, 1.1)) | |
b<-boxplot(dta1, las=2, col=(c(rgb(.8,.3,.3,.5),rgb(.3,.3,.8,.5))), add = T, | |
main=paste(line), outline=F, xaxt="n") | |
lines(b$stats[3,], lwd = 2, lty = 5, type = "o") | |
dev.off() | |
} | |
close(con) | |
dev.off() | |
########## |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment