Last active
March 9, 2016 14:57
-
-
Save nickpettican/255e830bc63d241539a1 to your computer and use it in GitHub Desktop.
RP1shortcuts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# write out table with data | |
write.table(merged_data, file='merged_data.tsv', quote=FALSE, sep='\t') | |
# open multiple datasets | |
temp = list.files(pattern="*.tsv") | |
gsub('.stv','',temp) | |
gsub('_output.tsv','_abundance',temp) | |
temp2 <- gsub('_output.tsv','_abundance',temp) | |
df <- read.table(temp[1]) | |
df <- read.table(temp[1],col.names=c('ID','T','P',temp2[1])) | |
df <- read.table(temp[1],header=T,col.names=c('ID','T','P',temp2[1])) | |
temp = list.files(pattern="*.tsv") | |
for (i in 1:length(temp)) assign(temp[i], read.table(temp[i], header=TRUE, col.names = c('GeneID','TCount','PCount',temp2[i]))) | |
temp = list.files(pattern="*.tsv") | |
for (i in 1:length(temp)) assign(temp[i], read.table(temp[i])) | |
temp = list.files(pattern="*.tsv") | |
for (i in 1:length(temp)) assign(temp[i], read.table(temp[i], header=TRUE, col.names = c('GeneID','TCount','PCount',temp2[i]))) | |
# merge different datasets | |
df_all <- merge(merge(df1,df2,by=c('PaxID','ENSPID'),all=TRUE),df3,by=c('PaxID','ENSPID'),all=TRUE) | |
# change column names | |
colnames(ensembl_export02) <- c("GeneID", "TransID", "ProtIDe") | |
# sort the columns to the gene ID, so long as the column in gene ID is called GeneID | |
sort.merged_data <- merged_data[order(GeneID) , ] | |
# assign random rows from dataframe to other dataframe | |
sampling_all_int02 <- all_int_analysis[sample(nrow(all_int_analysis), 100), ] | |
# in this case we assign 100 rows | |
# import table ignore header and comments | |
WHOLE_ORGANISM_integrated <- read.delim("C:/MSc/RP1/WHOLE_ORGANISM_integrated.txt", header=FALSE, comment.char="#") | |
# remove characters from column elements, note that start,finish delimit the position in the element that's kept | |
dataframe$column1 <- substr(dataframe$column1, start, finish) | |
# make average of all other columns except the first (usually where the geneIDs are) while excluding NA | |
newdataname <- data.frame(ID=dataname[,1], Means=rowMeans(dataname[,-1],na.rm = TRUE)) | |
# removes all dataframes | |
rm(list = ls()) | |
# plot graph with red points | |
plot(PCount,PAbundance,pch=21,bg="red") | |
#plot graph with transparency | |
plot(PCount,CellLineAbundance,pch=16,col=rgb(0,100,0,50,maxColorValue=255)) | |
#plot 3D | |
scatter3D(temp_calc_morethan19$PCount,temp_calc_morethan19$range_div_median,temp_calc_morethan19$range_div_range,phi=40,pch=16,col=rgb(0,100,0,50,maxColorValue=255),main="quartRange median PCount \n>=19 is_data",zlab="intQuartRange/maxminRange",ylab="intQuartRange/median",xlab="PCount") | |
# make three graphs next to each other | |
par(mfrow=c(1,3)) | |
plot """"" | |
par(resetPar()) ## reset the pars to defaults | |
par("mfrow") ## back to default | |
# draw regression line | |
abline(lm(BPAbundance~BPCount),col="blue") | |
# spearmans product-moment correlation | |
cor.test(PCount,PAbundance) | |
# spearman's rank correlation | |
cor.test(PCount,PAbundance,method="spearman") | |
# calculate max min of each row (gene) | |
pmax(dataframe$column,dataframe$column) | |
apply(dataframe$columntostartfrom,1,max) | |
#or | |
apply(dataframe[,4:lastone],1,max,na.rm=TRUE) | |
# apply is for matrices, the number 1 is for rows and 2 is for columns | |
# boxplot for non-parametric data, like the one we have | |
boxplot(columnname) | |
boxplot(t(dataframe)) # to apply only to rows | |
# fivenum gives min, max, lower-hinge, median, upper-hinge | |
fivenum(columnname) | |
quantile(rows) | |
# to calculate quantiles of each row: | |
all_int_analysis$upper_quart <- apply(all_int_merged_new[,4:22],1,quantile,probs=c(.75),na.rm=TRUE) | |
randomForest() | |
varImpPlot(the model) # will show the importance of each tissue for predicting |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment