Skip to content

Instantly share code, notes, and snippets.

@nickpettican
Last active March 9, 2016 14:57
Show Gist options
  • Save nickpettican/255e830bc63d241539a1 to your computer and use it in GitHub Desktop.
Save nickpettican/255e830bc63d241539a1 to your computer and use it in GitHub Desktop.
RP1shortcuts
# write out table with data
write.table(merged_data, file='merged_data.tsv', quote=FALSE, sep='\t')
# open multiple datasets
temp = list.files(pattern="*.tsv")
gsub('.stv','',temp)
gsub('_output.tsv','_abundance',temp)
temp2 <- gsub('_output.tsv','_abundance',temp)
df <- read.table(temp[1])
df <- read.table(temp[1],col.names=c('ID','T','P',temp2[1]))
df <- read.table(temp[1],header=T,col.names=c('ID','T','P',temp2[1]))
temp = list.files(pattern="*.tsv")
for (i in 1:length(temp)) assign(temp[i], read.table(temp[i], header=TRUE, col.names = c('GeneID','TCount','PCount',temp2[i])))
temp = list.files(pattern="*.tsv")
for (i in 1:length(temp)) assign(temp[i], read.table(temp[i]))
temp = list.files(pattern="*.tsv")
for (i in 1:length(temp)) assign(temp[i], read.table(temp[i], header=TRUE, col.names = c('GeneID','TCount','PCount',temp2[i])))
# merge different datasets
df_all <- merge(merge(df1,df2,by=c('PaxID','ENSPID'),all=TRUE),df3,by=c('PaxID','ENSPID'),all=TRUE)
# change column names
colnames(ensembl_export02) <- c("GeneID", "TransID", "ProtIDe")
# sort the columns to the gene ID, so long as the column in gene ID is called GeneID
sort.merged_data <- merged_data[order(GeneID) , ]
# assign random rows from dataframe to other dataframe
sampling_all_int02 <- all_int_analysis[sample(nrow(all_int_analysis), 100), ]
# in this case we assign 100 rows
# import table ignore header and comments
WHOLE_ORGANISM_integrated <- read.delim("C:/MSc/RP1/WHOLE_ORGANISM_integrated.txt", header=FALSE, comment.char="#")
# remove characters from column elements, note that start,finish delimit the position in the element that's kept
dataframe$column1 <- substr(dataframe$column1, start, finish)
# make average of all other columns except the first (usually where the geneIDs are) while excluding NA
newdataname <- data.frame(ID=dataname[,1], Means=rowMeans(dataname[,-1],na.rm = TRUE))
# removes all dataframes
rm(list = ls())
# plot graph with red points
plot(PCount,PAbundance,pch=21,bg="red")
#plot graph with transparency
plot(PCount,CellLineAbundance,pch=16,col=rgb(0,100,0,50,maxColorValue=255))
#plot 3D
scatter3D(temp_calc_morethan19$PCount,temp_calc_morethan19$range_div_median,temp_calc_morethan19$range_div_range,phi=40,pch=16,col=rgb(0,100,0,50,maxColorValue=255),main="quartRange median PCount \n>=19 is_data",zlab="intQuartRange/maxminRange",ylab="intQuartRange/median",xlab="PCount")
# make three graphs next to each other
par(mfrow=c(1,3))
plot """""
par(resetPar()) ## reset the pars to defaults
par("mfrow") ## back to default
# draw regression line
abline(lm(BPAbundance~BPCount),col="blue")
# spearmans product-moment correlation
cor.test(PCount,PAbundance)
# spearman's rank correlation
cor.test(PCount,PAbundance,method="spearman")
# calculate max min of each row (gene)
pmax(dataframe$column,dataframe$column)
apply(dataframe$columntostartfrom,1,max)
#or
apply(dataframe[,4:lastone],1,max,na.rm=TRUE)
# apply is for matrices, the number 1 is for rows and 2 is for columns
# boxplot for non-parametric data, like the one we have
boxplot(columnname)
boxplot(t(dataframe)) # to apply only to rows
# fivenum gives min, max, lower-hinge, median, upper-hinge
fivenum(columnname)
quantile(rows)
# to calculate quantiles of each row:
all_int_analysis$upper_quart <- apply(all_int_merged_new[,4:22],1,quantile,probs=c(.75),na.rm=TRUE)
randomForest()
varImpPlot(the model) # will show the importance of each tissue for predicting
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment