Created
July 25, 2017 01:45
-
-
Save mbk0asis/e019d93eafe4c7ef4a49049352e28770 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| dta <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/27k_Set2/liver/liver.set2.boxplot..csv", | |
| header = T) | |
| dta <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/423CpGs.boxplot.2.csv", header = T) | |
| dim(dta) | |
| ################## | |
| types <- data.frame(c(rep("breast.C",793),rep("breast.N",97), | |
| rep("colon.C",313),rep("colon.N",38), | |
| rep("kidney.C",324),rep("kidney.N",160), | |
| rep("liver.C",377),rep("liver.N",50), | |
| rep("lung.C",473),rep("lung.N",32), | |
| rep("prostate.C",502),rep("prostate.N",50))) | |
| colnames(types) <- "types" | |
| types <- data.frame(c(rep("breast.C",793),rep("colon.C",313), | |
| rep("kidney.C",324),rep("liver.C",377), | |
| rep("lung.C",473),rep("prostate.C",502))) | |
| colnames(types) <- "types" | |
| head(types) | |
| tissues <- data.frame(c(rep("breast",890),rep("colon",351), | |
| rep("kidney",484),rep("liver",427), | |
| rep("lung",505), rep("prostate.C",552))) | |
| colnames(tissues) <- "tissues" | |
| hm <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/423CpGs.heatmap.csv", | |
| header=F, row.names=1) | |
| dim(hm) | |
| #dta_hm <- cbind(tissues,types,t(hm)) | |
| dta_hm <- cbind(types,t(hm)) | |
| library(ggplot2) | |
| setwd("E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/without_normal") | |
| colNames <- names(dta_hm)[2:424] | |
| for(i in colNames){ | |
| print(i) | |
| ggplot(dta_hm, aes_string(x=dta_hm[,1], y=i)) + | |
| scale_fill_brewer(palette="Dark2") + geom_violin(alpha=0.6, scale="width",aes(fill=types)) + | |
| geom_boxplot(width=0.4, alpha=0.9,outlier.shape = NA) + | |
| theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) + | |
| guides(fill=FALSE) + theme_bw() + xlab("") + ylim(0,1) | |
| ggsave(filename=paste(i,".png",sep=""), width = 5.5, height = 4, dpi = 600) | |
| graphics.off() | |
| } | |
| #+ geom_jitter(width=0.2,alpha=0.3) | |
| #aes(fill=types) | |
| #geom_hline(aes(yintercept=mean(, na.rm=T))) + | |
| for(i in colNames){ | |
| print(i) | |
| m<- aggregate(dta_hm[[i]], list(dta_hm$types), mean,na.rm=T) | |
| cm<-colMeans(as.matrix(m[,2])) | |
| dev<-apply(m,2, function(x) abs(m[,2]-cm)) | |
| sd<-sd(as.matrix(m[,2])) | |
| ggplot(m, aes(as.factor(Group.1),x)) + theme_bw() + | |
| geom_hline(yintercept = cm, colour="grey10", linetype="dotted", size = 1.2) + | |
| geom_hline(yintercept = sd+cm, colour="blue", linetype="dotted", size = 1.2) + | |
| geom_hline(yintercept = cm-sd, colour="blue", linetype="dotted", size = 1.2) + | |
| geom_point(aes(Group.1,m[,2]), size=5) + | |
| xlab("") + ylab("beta-values") + ylim(0,1) | |
| ggsave(filename=paste(i,".mean.png",sep=""), width = 5.5, height = 4, dpi = 600) | |
| graphics.off() | |
| } | |
| ############################# | |
| # calculate SD of each CpG & | |
| # select CpGs with SD >= 0.2 | |
| mydata <- list() | |
| for(i in colNames){ | |
| print(i) | |
| m<- aggregate(dta_hm[[i]], list(dta_hm$types), mean,na.rm=T) | |
| mydata[[i]] <- m[,2] | |
| } | |
| tdta<- t(data.frame(mydata)) | |
| sd <- data.frame(apply(tdta, 1, sd)) | |
| colnames(sd) = "stdev" | |
| sd <- sd[order(-sd$stdev), , drop = FALSE] | |
| nr <- nrow(data.frame(sd[sd$stdev >= 0.2, ])) | |
| rNames <- rownames(head(sd, nr)) | |
| ##################################### | |
| # print outliers | |
| outLiers<-list() | |
| for(i in colNames){ | |
| print(i) | |
| m<- aggregate(dta_hm[[i]], list(dta_hm$types), mean, na.rm=T) | |
| boxplot.stats(m$x) | |
| outLiers[[i]] <- boxplot.stats(m$x)$out | |
| } | |
| ol <- as.matrix(outLiers) | |
| head(ol) | |
| ol[,grep("Numeric",ol)] | |
| grep("Numeric", ol, value = T) | |
| ############################# | |
| # plot means and SD of each CpG | |
| for(i in rNames){ | |
| print(i) | |
| m<- aggregate(dta_hm[[i]], list(dta_hm$types), mean, na.rm=T) | |
| cm<-colMeans(as.matrix(m[,2])) | |
| dev<-apply(m,2, function(x) abs(m[,2]-cm)) | |
| sd<-sd(as.matrix(m[,2])) | |
| ggplot(m, aes(as.factor(Group.1),x)) + theme_bw() + | |
| geom_hline(yintercept = cm, colour="blue", linetype="dotted") + | |
| geom_hline(yintercept = sd+cm, colour="red", linetype="dotted") + | |
| geom_hline(yintercept = cm-sd, colour="red", linetype="dotted") + | |
| geom_point(aes(Group.1,m[,2]), size=5) + | |
| xlab("") + ylab("beta-values") + ylim(0,1) | |
| ggsave(filename=paste(i,".sd0.2.png",sep=""), width = 5.5, height = 4, dpi = 600) | |
| graphics.off() | |
| } | |
| dev.off() | |
| dev.off() | |
| ################################# | |
| m<- aggregate(dta_hm$cg02264238, list(dta_hm$types), mean, na.rm=T) | |
| cm<-colMeans(as.matrix(m[,2])) | |
| dev<-apply(m,2, function(x) abs(m[,2]-cm)) | |
| sd<-sd(as.matrix(m[,2])) | |
| ggplot(m, aes(as.factor(Group.1),x)) + theme_bw() + | |
| geom_hline(yintercept = cm, colour="grey10", linetype="dotted", size = 1.2) + | |
| geom_hline(yintercept = sd+cm, colour="blue", linetype="dotted", size = 1.2) + | |
| geom_hline(yintercept = cm-sd, colour="blue", linetype="dotted", size = 1.2) + | |
| geom_point(aes(Group.1,m[,2]), size=5) + | |
| geom_point(aes(Group.1,dev[,2]+cm), size=5, colour = "red",shape = "X") + | |
| xlab("") + ylab("beta-values") + ylim(0,1) | |
| ################# | |
| ####################################### | |
| dta1 <- dta[,grep("cg00359087",colnames(dta))] | |
| head(dta1) | |
| par(mar=c(8,3,3,3)) | |
| stripchart(dta1, method='jitter', vertical=T, pch=20,cex=.9, | |
| col=(c(rgb(.8,.8,.3,.7),rgb(.3,.8,.8,.7))),axes = F, | |
| jitter=.2, ylim=c(-0.1, 1.1)) | |
| b<-boxplot(dta1, las=2, col=(c(rgb(.8,.3,.3,.5),rgb(.3,.3,.8,.5))), add = T, | |
| main="cg00000029 in KIRC", outline=F)#, xaxt="n",) | |
| lines(b$stats[3,], lwd = 2, lty = 5, type = "o") | |
| b$stats[3,] | |
| ######################################### | |
| setwd("E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/") | |
| dta <- read.csv("file:///E:/LAB_DATA/00-LabData/Lab/00--Archive/TCGA_DNA_met/423CpGs_set12357/423CpGs.boxplot.2.csv", header = T) | |
| file <- "cpg.list.csv" | |
| close(con) | |
| dev.off() | |
| close(con) | |
| dev.off() | |
| con <- file(description=file, open="r") | |
| while (length(line <- readLines(con, n = 1, warn = FALSE)) > 0) { | |
| print(line) | |
| dta1 <- dta[,grep(line,colnames(dta))] | |
| tiff(paste(line,'.tif',sep = ""),units = "px",width = 500, height = 400) | |
| par(mar=c(3,3,3,3)) | |
| stripchart(dta1, method='jitter', vertical=T, pch=20,cex=.9, | |
| col=(c(rgb(.8,.8,.3,.7),rgb(.3,.8,.8,.7))),axes = F, | |
| jitter=.2, ylim=c(-0.1, 1.1)) | |
| b<-boxplot(dta1, las=2, col=(c(rgb(.8,.3,.3,.5),rgb(.3,.3,.8,.5))), add = T, | |
| main=paste(line), outline=F, xaxt="n") | |
| lines(b$stats[3,], lwd = 2, lty = 5, type = "o") | |
| dev.off() | |
| } | |
| close(con) | |
| dev.off() | |
| ########## |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment