Skip to content

Instantly share code, notes, and snippets.

@Velrok
Created September 20, 2013 13:43
Show Gist options
  • Save Velrok/6637746 to your computer and use it in GitHub Desktop.
Save Velrok/6637746 to your computer and use it in GitHub Desktop.
My R graphs.
source("helpers.R")
filter.effects.DE<- function(filename, title, colors){
data <- load.dataset(filename)
bots.with <- filter(data, "data.set", "DE 2013-05 without bot filtering")
bots.with.given1 <- only.given.1(bots.with)
bots.with.allbut1 <- only.all.but.1(bots.with)
bots.without <- filter(data, "data.set", "DE extra bot filtering")
bots.without.given1 <- only.given.1(bots.without)
bots.without.allbut1 <- only.all.but.1(bots.without)
plot.colors <- colors
plot.names <- c("with bots; given-1", "with bots, all-but-1",
"without bots; given-1", "withoutout bots, all-but-1")
plot.pr.init(title, filename, data)
plot.pr.series(bots.with.given1, color=plot.colors[1], symbol=plot.symbols[1])
plot.pr.series(bots.with.allbut1, color=plot.colors[2], symbol=plot.symbols[2])
plot.pr.series(bots.without.given1, color=plot.colors[3], symbol=plot.symbols[3])
plot.pr.series(bots.without.allbut1, color=plot.colors[4], symbol=plot.symbols[4])
plot.pr.legend(plot.names, plot.colors)
}
filter.effects.DE.tanimoto <- function(){
filter.effects.DE("2013-05_DE_bot_filtering_effect_-Tanimoto-",
"2013-05 DE bot filtering effect (Tanimoto)",
color.tanimoto)
}
filter.effects.DE.loglikely <- function(){
filter.effects.DE("2013-05_DE_bot_filtering_effect_-LogLikelihood-",
"2013-05 DE bot filtering effect (Loglikelyhood)",
color.loglikely)
}
filter.effects.DE.slim <- function(){
filter.effects.DE("2013-05_DE_bot_filtering_effect_-SLIM-",
"2013-05 DE bot filtering effect (SLIM)",
color.slim)
}
filter.effects.DE.popular <- function(){
filter.effects.DE("2013-05_DE_bot_filtering_effect_-MostPopular-",
"2013-05 DE bot filtering effect (MostPopular)",
color.popular)
}
random.dataset <- function(){
filename <- "Random_Dataset_2"
data <- load.dataset(filename)
#colnames(data) <- c("data.set", "similarity", "recall", "precision", "topN",
# "split.type")
data.tanimoto <- only.tanimoto(data)
data.tanimoto.given1 <- only.given.1(data.tanimoto)
data.tanimoto.allbut1 <- only.all.but.1(data.tanimoto)
data.loglikely <- only.loglikely(data)
data.loglikely.given1 <- only.given.1(data.loglikely)
data.loglikely.allbut1 <- only.all.but.1(data.loglikely)
data.slim <- only.slim(data)
data.slim.given1 <- only.given.1(data.slim)
data.slim.allbut1 <- only.all.but.1(data.slim)
data.popular <- only.popular(data)
data.popular.given1 <- only.given.1(data.popular)
data.popular.allbut1 <- only.all.but.1(data.popular)
plot.colors <- c(color.tanimoto[1], color.tanimoto[2],
color.loglikely[1], color.loglikely[2],
color.slim[1], color.slim[2],
color.popular[1], color.popular[2])
plot.names <- c("tanimoto; given-1", "tanimoto; all-but-1",
"loglikely; given-1", "loglikely; all-but-1",
"slim; given-1", "slim; all-but-1",
"most-popular; given-1", "most-popular; all-but-1")
plot.pr.init("Random Dataset", filename, data)
plot.pr.series(data.tanimoto.given1, color=plot.colors[1], symbol=plot.symbols[1])
plot.pr.series(data.tanimoto.allbut1, color=plot.colors[2], symbol=plot.symbols[2])
plot.pr.series(data.loglikely.given1, color=plot.colors[3], symbol=plot.symbols[3])
plot.pr.series(data.loglikely.allbut1, color=plot.colors[4], symbol=plot.symbols[4])
plot.pr.series(data.slim.given1, color=plot.colors[5], symbol=plot.symbols[5])
plot.pr.series(data.slim.allbut1, color=plot.colors[6], symbol=plot.symbols[6])
plot.pr.series(data.popular.given1, color=plot.colors[7], symbol=plot.symbols[7])
plot.pr.series(data.popular.allbut1, color=plot.colors[8], symbol=plot.symbols[8])
plot.pr.legend(plot.names, plot.colors, pos="bottomright")
}
self.recommendation <- function(filename, title, colors){
data <- load.dataset(filename)
with <- filter(data, "code.version", "with-self-similartiy")
with.given1 <- only.given.1(with)
with.allbut1 <- only.all.but.1(with)
without <- filter(data, "code.version", "no-self-similarity")
without.given1 <- only.given.1(without)
without.allbut1 <- only.all.but.1(without)
plot.colors <- colors
plot.names <- c("self-similarity; given-1", "self-similarity; all-but-1",
"NO-self-similarity; given-1", "NO-self-similarity; all-but-1")
plot.pr.init(title, filename, data)
plot.pr.series(with.given1, color=plot.colors[1], symbol=plot.symbols[1])
plot.pr.series(with.allbut1, color=plot.colors[2], symbol=plot.symbols[2])
plot.pr.series(without.given1, color=plot.colors[3], symbol=plot.symbols[3])
plot.pr.series(without.allbut1, color=plot.colors[4], symbol=plot.symbols[4])
plot.pr.legend(plot.names, plot.colors)
}
self.recommendation.tanimoto <- function(){
self.recommendation("2013-05_ES_self_similarty_effects_-Tanimoto-",
"2013-05 ES self similarty effects (Tanimoto)",
color.tanimoto)
}
self.recommendation.loglikely <- function(){
self.recommendation("2013-05_ES_self_similartiy_effects_-LogLikelihood-",
"2013-05 ES self similarty effects (Loglikelyhood)",
color.loglikely)
}
slim.alpha.compare <- function(){
filename <- "2013-05_ES_stric_bot_filtering_SLIM_alpha_compare"
data <- load.dataset(filename)
plot.names <- c("0.0", "0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7", "0.8",
"0.9", "1.0")
plot.colors <- rainbow(length(plot.names))
plot.pr.init("2013-05 ES extra bot filtering automatic lambda",
filename, data)
i <- 0
for (alpha in plot.names){
data.alpha = filter(data, "alpha", alpha)
data.alpha.given1 = only.given.1(data.alpha)
data.alpha.allbut1 = only.all.but.1(data.alpha)
plot.pr.series(data.alpha.given1, color=plot.colors[i],
symbol=plot.symbols[i])
plot.pr.series(data.alpha.allbut1, color=plot.colors[i],
symbol=plot.symbols[i])
i <- i + 1
}
plot.pr.legend(plot.names, plot.colors)
}
all.ES <- function(){
filename <- "2013-05_ES_all"
data <- load.dataset(filename)
plot.names <- c("tanimoto; given 1", "tanimoto; all but 1",
"loglikely; given 1", "loglikely; all but 1",
"slim; given 1", "slim; all but 1",
"most-popular; given 1", "most-popular; all but 1")
plot.colors <- c(color.tanimoto[1], color.tanimoto[2],
color.loglikely[1], color.loglikely[2],
color.slim[1], color.slim[2],
color.popular[1], color.popular[2])
plot.pr.init("2013-05 ES advanced bot filtering",
filename, data)
i <- 1
for (sim in c("tanimoto", "loglikely", "slim", "popular")){
data.sim <- filter(data, "similarity", sim)
data.sim.given1 <- only.given.1(data.sim)
data.sim.allbut1 <- only.all.but.1(data.sim)
plot.pr.series(data.sim.given1, color=plot.colors[i], symbol=plot.symbols[i])
plot.pr.series(data.sim.allbut1, color=plot.colors[i+1],symbol=plot.symbols[i+1])
i <- i + 2
}
plot.pr.legend(plot.names, plot.colors)
}
DEvsES <- function(filename, title, similarities, color.schemas){
data <- load.dataset(filename)
data[data$similarity == "most-popular", "similarity"] <- "popular"
data[data$similarity == "loglikelyhood", "similarity"] <- "loglikely"
countries <- c("DE", "ES")
plot.names <- c()
for (sim in similarities){
for (country in countries){
plot.names <- c(plot.names, paste(country, sim, "given 1", sep="; "))
plot.names <- c(plot.names, paste(country, sim, "all but 1", sep="; "))
}
}
plot.colors <- c()
for (schema in color.schemas){
for (i in c(1, 2, 3, 4)){
plot.colors <- c(plot.colors, schema[i])
}
}
plot.pr.init(title, filename, data)
i <- 1
for (sim in similarities){
data.sim <- filter(data, "similarity", sim)
for (country in countries){
data.country <- filter(data.sim, "data.set",
paste(country, " extra bot filtering", sep=""))
data.country.given1 <- only.given.1(data.country)
data.country.allbut1 <- only.all.but.1(data.country)
plot.pr.series(data.country.given1, color=plot.colors[i], symbol=plot.symbols[i])
plot.pr.series(data.country.allbut1, color=plot.colors[i+1], symbol=plot.symbols[i+1])
i <- i+2
}
}
plot.pr.legend(plot.names, plot.colors)
}
DEvsES.tanimoto.popular <- function(){
filename <- "2013-05_DE_vs_ES_extra-bot-filtering"
DEvsES(filename, "2013-05 DE vs ES advanced bot filtering",
c("tanimoto", "popular"),
list(color.tanimoto, color.popular))
}
DEvsES.loglikely.slim <- function(){
filename <- "2013-05_DE_vs_ES_-LogLikelihood-SLIM-"
DEvsES(filename, "2013-05 DE vs ES advanced bot filtering Loglikelyhood vs SLIM",
c("loglikely", "slim"),
list(color.loglikely, color.slim))
}
boost <- function(){
boost.fn <- function(x) { return(85 + 2*85*0.85^x) }
x <- 0:25
y <- sapply(x, boost.fn)
pdf.init("boost_85")
par(mar=c(4.3, 4, 2, 1))
plot(x,y,
xlab="position i",
ylab="effective priority p'",
type="b")
dev.off()
}
go <- function(){
filter.effects.DE.tanimoto()
filter.effects.DE.loglikely()
filter.effects.DE.slim()
filter.effects.DE.popular()
random.dataset()
self.recommendation.tanimoto()
self.recommendation.loglikely()
slim.alpha.compare()
all.ES()
DEvsES.tanimoto.popular()
DEvsES.loglikely.slim()
boost()
}
color.tanimoto <- rainbow(4, start=3/6, end=4/6)
color.loglikely <- rainbow(4, start=2/6, end=3/6)
color.slim <- rainbow(4, start=4/6, end=5/6)
color.popular <- rainbow(4, start=5/6, end=6/6)
plots.dir <- "../../thesis/plots/"
plot.symbols <- c(1,2,3,5,6,7,8,9)
load.dataset <- function(filename){
file <- paste("../data/", filename, ".csv", sep="")
return(read.csv(file, header=T, quote=""))
}
filter <- function(dataset, columnname, value){
check <- dataset[, columnname] == value
return(dataset[check, ])
}
ordered <- function(dataset, columnname){
ordering <- order(dataset[, columnname])
return(dataset[ordering, ])
}
pdf.init <- function(filename){
pdf(file=paste(plots.dir, filename, ".pdf", sep=""),
height=5.0, width=9.0)
}
plot.pr.init <- function(title, filename, data){
pdf.init(filename)
plot.pr.start(minmax(data$recall),
minmax(data$precision))
par(mar=c(5, 1, 1.2, 1))
title(title)
}
only.tanimoto <- function(data){
return(filter(data, "similarity", "tanimoto"))
}
only.slim <- function(data){
return(filter(data, "similarity", "slim"))
}
only.loglikely <- function(data){
return(filter(data, "similarity", "loglikelyhood"))
}
only.popular <- function(data){
return(filter(data, "similarity", "most-popular"))
}
plot.pr.start <- function(xlim, ylim){
plot(1, 1,
xlim=xlim,
ylim=ylim,
xlab="recall", ylab="precision")
}
plot.pr.series <- function(data, color=NULL, symbol=NULL){
data <- ordered(data, "topN")
lines(data$recall, data$precision,
col=color,lwd=1)
points(data$recall, data$precision,
col=color, pch=symbol,lwd=2)
}
plot.pr.legend <- function(names, colors, pos="topright"){
legend(pos,
names, cex=0.8,
col=colors, pch=plot.symbols)
dev.off()
}
minmax <- function(series){
return(c(min(series), max(series)))
}
only.given.1 <- function(dataset) {
return(filter(dataset, "split.type", ":given-1"))
}
only.all.but.1 <- function(dataset) {
return(filter(dataset, "split.type", ":all-but-1"))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment