Velrok · September 20, 2013 13:43
diff --git a/graphs.R b/graphs.R
 source("helpers.R")

 filter.effects.DE<- function(filename, title, colors){
  data <- load.dataset(filename)

  bots.with         <- filter(data, "data.set", "DE 2013-05 without bot filtering")
  bots.with.given1  <- only.given.1(bots.with)
  bots.with.allbut1 <- only.all.but.1(bots.with)

  bots.without         <- filter(data, "data.set", "DE extra bot filtering")
  bots.without.given1  <- only.given.1(bots.without) 
  bots.without.allbut1 <- only.all.but.1(bots.without) 

  plot.colors  <- colors 
  plot.names   <- c("with bots; given-1", "with bots, all-but-1",
                    "without bots; given-1", "withoutout bots, all-but-1")

  plot.pr.init(title, filename, data)

  plot.pr.series(bots.with.given1,     color=plot.colors[1], symbol=plot.symbols[1])
  plot.pr.series(bots.with.allbut1,    color=plot.colors[2], symbol=plot.symbols[2])
  plot.pr.series(bots.without.given1,  color=plot.colors[3], symbol=plot.symbols[3])
  plot.pr.series(bots.without.allbut1, color=plot.colors[4], symbol=plot.symbols[4])

  plot.pr.legend(plot.names, plot.colors)
 }

 filter.effects.DE.tanimoto <- function(){
  filter.effects.DE("2013-05_DE_bot_filtering_effect_-Tanimoto-", 
                    "2013-05 DE bot filtering effect (Tanimoto)",
                    color.tanimoto)
 }

 filter.effects.DE.loglikely <- function(){
  filter.effects.DE("2013-05_DE_bot_filtering_effect_-LogLikelihood-",
                    "2013-05 DE bot filtering effect (Loglikelyhood)",
                    color.loglikely)
 }

 filter.effects.DE.slim <- function(){
  filter.effects.DE("2013-05_DE_bot_filtering_effect_-SLIM-",
                    "2013-05 DE bot filtering effect (SLIM)",
                    color.slim)
 }

 filter.effects.DE.popular <- function(){
  filter.effects.DE("2013-05_DE_bot_filtering_effect_-MostPopular-",
                    "2013-05 DE bot filtering effect (MostPopular)",
                    color.popular)
 }

 random.dataset <- function(){
  filename <- "Random_Dataset_2"
  data <- load.dataset(filename)

  #colnames(data) <- c("data.set", "similarity", "recall", "precision", "topN",
  #                    "split.type")

  data.tanimoto         <- only.tanimoto(data)
  data.tanimoto.given1  <- only.given.1(data.tanimoto)
  data.tanimoto.allbut1 <- only.all.but.1(data.tanimoto)

  data.loglikely         <- only.loglikely(data)
  data.loglikely.given1  <- only.given.1(data.loglikely)
  data.loglikely.allbut1 <- only.all.but.1(data.loglikely)

  data.slim         <- only.slim(data)
  data.slim.given1  <- only.given.1(data.slim)
  data.slim.allbut1 <- only.all.but.1(data.slim)

  data.popular         <- only.popular(data)
  data.popular.given1  <- only.given.1(data.popular)
  data.popular.allbut1 <- only.all.but.1(data.popular)

  plot.colors  <- c(color.tanimoto[1], color.tanimoto[2],
                    color.loglikely[1], color.loglikely[2],
                    color.slim[1], color.slim[2],
                    color.popular[1], color.popular[2])
  plot.names   <- c("tanimoto; given-1", "tanimoto; all-but-1",
                    "loglikely; given-1", "loglikely; all-but-1",
                    "slim; given-1", "slim; all-but-1",
                    "most-popular; given-1", "most-popular; all-but-1")

  plot.pr.init("Random Dataset", filename, data)

  plot.pr.series(data.tanimoto.given1, color=plot.colors[1], symbol=plot.symbols[1])
  plot.pr.series(data.tanimoto.allbut1, color=plot.colors[2], symbol=plot.symbols[2])
  plot.pr.series(data.loglikely.given1, color=plot.colors[3], symbol=plot.symbols[3])
  plot.pr.series(data.loglikely.allbut1, color=plot.colors[4], symbol=plot.symbols[4])
  plot.pr.series(data.slim.given1, color=plot.colors[5], symbol=plot.symbols[5])
  plot.pr.series(data.slim.allbut1, color=plot.colors[6], symbol=plot.symbols[6])
  plot.pr.series(data.popular.given1, color=plot.colors[7], symbol=plot.symbols[7])
  plot.pr.series(data.popular.allbut1, color=plot.colors[8], symbol=plot.symbols[8])

  plot.pr.legend(plot.names, plot.colors, pos="bottomright")

 }

 self.recommendation <- function(filename, title, colors){
  data <- load.dataset(filename)

  with         <- filter(data, "code.version", "with-self-similartiy")
  with.given1  <- only.given.1(with)
  with.allbut1 <- only.all.but.1(with)

  without         <- filter(data, "code.version", "no-self-similarity")
  without.given1  <- only.given.1(without) 
  without.allbut1 <- only.all.but.1(without) 

  plot.colors  <- colors 
  plot.names   <- c("self-similarity; given-1", "self-similarity; all-but-1",
                    "NO-self-similarity; given-1", "NO-self-similarity; all-but-1")

  plot.pr.init(title, filename, data)

  plot.pr.series(with.given1,     color=plot.colors[1], symbol=plot.symbols[1])
  plot.pr.series(with.allbut1,    color=plot.colors[2], symbol=plot.symbols[2])
  plot.pr.series(without.given1,  color=plot.colors[3], symbol=plot.symbols[3])
  plot.pr.series(without.allbut1, color=plot.colors[4], symbol=plot.symbols[4])

  plot.pr.legend(plot.names, plot.colors)
 }

 self.recommendation.tanimoto <- function(){
  self.recommendation("2013-05_ES_self_similarty_effects_-Tanimoto-",
                      "2013-05 ES self similarty effects (Tanimoto)",
                      color.tanimoto)
 }

 self.recommendation.loglikely <- function(){
  self.recommendation("2013-05_ES_self_similartiy_effects_-LogLikelihood-",
                      "2013-05 ES self similarty effects (Loglikelyhood)",
                      color.loglikely)
 }


 slim.alpha.compare <- function(){
  filename <- "2013-05_ES_stric_bot_filtering_SLIM_alpha_compare"
  data <- load.dataset(filename)

  plot.names <- c("0.0", "0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7", "0.8",
                  "0.9", "1.0")

  plot.colors <- rainbow(length(plot.names)) 

  plot.pr.init("2013-05 ES extra bot filtering automatic lambda",
               filename, data)

  i <- 0
  for (alpha in plot.names){
    data.alpha = filter(data, "alpha", alpha)
    data.alpha.given1 = only.given.1(data.alpha)
    data.alpha.allbut1 = only.all.but.1(data.alpha)

    plot.pr.series(data.alpha.given1, color=plot.colors[i],
                   symbol=plot.symbols[i])

    plot.pr.series(data.alpha.allbut1, color=plot.colors[i],
                   symbol=plot.symbols[i])
    i <- i + 1
  }

  plot.pr.legend(plot.names, plot.colors)
 }

 all.ES <- function(){
  filename <- "2013-05_ES_all"

  data <- load.dataset(filename)
  plot.names <- c("tanimoto; given 1", "tanimoto; all but 1", 
                  "loglikely; given 1", "loglikely; all but 1",
                  "slim; given 1", "slim; all but 1", 
                  "most-popular; given 1", "most-popular; all but 1")
  plot.colors <- c(color.tanimoto[1], color.tanimoto[2],
                   color.loglikely[1], color.loglikely[2],
                   color.slim[1], color.slim[2],
                   color.popular[1], color.popular[2])

  plot.pr.init("2013-05 ES advanced bot filtering",
               filename, data)

  i <- 1
  for (sim in c("tanimoto", "loglikely", "slim", "popular")){
    data.sim <- filter(data, "similarity", sim)

    data.sim.given1 <- only.given.1(data.sim)
    data.sim.allbut1 <- only.all.but.1(data.sim)

    plot.pr.series(data.sim.given1, color=plot.colors[i], symbol=plot.symbols[i])
    plot.pr.series(data.sim.allbut1, color=plot.colors[i+1],symbol=plot.symbols[i+1])

    i <- i + 2
  }

  plot.pr.legend(plot.names, plot.colors)
 }


 DEvsES <- function(filename, title, similarities, color.schemas){
  data <- load.dataset(filename)
  data[data$similarity == "most-popular", "similarity"] <- "popular"
  data[data$similarity == "loglikelyhood", "similarity"] <- "loglikely"
  
  countries <- c("DE", "ES")

  plot.names <- c()
  for (sim in similarities){
    for (country in countries){
      plot.names <- c(plot.names, paste(country, sim, "given 1", sep="; "))
      plot.names <- c(plot.names, paste(country, sim, "all but 1", sep="; "))
    }
  }

  plot.colors <- c()
  for (schema in color.schemas){
    for (i in c(1, 2, 3, 4)){
      plot.colors <- c(plot.colors, schema[i])
    }
  }
  plot.pr.init(title, filename, data)

  i <- 1
  for (sim in similarities){
    data.sim <- filter(data, "similarity", sim)
    for (country in countries){
      data.country <- filter(data.sim, "data.set", 
                             paste(country, " extra bot filtering", sep=""))

      data.country.given1 <- only.given.1(data.country)
      data.country.allbut1 <- only.all.but.1(data.country)

      plot.pr.series(data.country.given1, color=plot.colors[i], symbol=plot.symbols[i])
      plot.pr.series(data.country.allbut1, color=plot.colors[i+1], symbol=plot.symbols[i+1])

      i <- i+2
    }
  }

  plot.pr.legend(plot.names, plot.colors)
 }

 DEvsES.tanimoto.popular <- function(){
  filename <- "2013-05_DE_vs_ES_extra-bot-filtering"
  DEvsES(filename, "2013-05 DE vs ES advanced bot filtering", 
         c("tanimoto", "popular"),
         list(color.tanimoto, color.popular))
 }



 DEvsES.loglikely.slim <- function(){
  filename <- "2013-05_DE_vs_ES_-LogLikelihood-SLIM-"
  DEvsES(filename, "2013-05 DE vs ES advanced bot filtering Loglikelyhood vs SLIM", 
         c("loglikely", "slim"),
         list(color.loglikely, color.slim))
 }


 boost <- function(){
  boost.fn <- function(x) { return(85 + 2*85*0.85^x) }
  x <- 0:25
  y <- sapply(x, boost.fn)
  pdf.init("boost_85")
  par(mar=c(4.3, 4, 2, 1))
  plot(x,y, 
       xlab="position i",
       ylab="effective priority p'",
       type="b")
  dev.off()
 }


 go <- function(){
  filter.effects.DE.tanimoto()
  filter.effects.DE.loglikely()
  filter.effects.DE.slim()
  filter.effects.DE.popular()
  random.dataset()
  self.recommendation.tanimoto()
  self.recommendation.loglikely()
  slim.alpha.compare()
  all.ES()
  DEvsES.tanimoto.popular()
  DEvsES.loglikely.slim()
  boost()
 }
diff --git a/helpers.R b/helpers.R
 color.tanimoto  <- rainbow(4, start=3/6, end=4/6)
 color.loglikely <- rainbow(4, start=2/6, end=3/6)
 color.slim      <- rainbow(4, start=4/6, end=5/6)
 color.popular   <- rainbow(4, start=5/6, end=6/6)

 plots.dir <- "../../thesis/plots/"
 plot.symbols <- c(1,2,3,5,6,7,8,9) 

 load.dataset <- function(filename){
  file <- paste("../data/", filename, ".csv", sep="")
  return(read.csv(file, header=T, quote=""))
 }

 filter <- function(dataset, columnname, value){
  check <- dataset[, columnname] == value
  return(dataset[check, ])
 }

 ordered <- function(dataset, columnname){
  ordering <- order(dataset[, columnname])
  return(dataset[ordering, ])
 }

 pdf.init <- function(filename){
  pdf(file=paste(plots.dir, filename, ".pdf", sep=""),
      height=5.0, width=9.0)
 }

 plot.pr.init <- function(title, filename, data){
  pdf.init(filename)

  plot.pr.start(minmax(data$recall), 
                minmax(data$precision))
  par(mar=c(5, 1, 1.2, 1))
  title(title)
 }  

 only.tanimoto <- function(data){
  return(filter(data, "similarity", "tanimoto"))
 }

 only.slim <- function(data){
  return(filter(data, "similarity", "slim"))
 }

 only.loglikely <- function(data){
  return(filter(data, "similarity", "loglikelyhood"))
 }

 only.popular <- function(data){
  return(filter(data, "similarity", "most-popular"))
 }

 plot.pr.start <- function(xlim, ylim){
  plot(1, 1,
       xlim=xlim,
       ylim=ylim,
       xlab="recall", ylab="precision")
 }

 plot.pr.series <- function(data, color=NULL, symbol=NULL){
  data <- ordered(data, "topN")
  lines(data$recall, data$precision,
         col=color,lwd=1)
  points(data$recall, data$precision,
         col=color, pch=symbol,lwd=2)
 }

 plot.pr.legend <- function(names, colors, pos="topright"){
  legend(pos,
         names, cex=0.8, 
         col=colors, pch=plot.symbols)

  dev.off()
 }

 minmax <- function(series){
  return(c(min(series), max(series)))
 }

 only.given.1 <- function(dataset) {
  return(filter(dataset, "split.type", ":given-1"))
 }

 only.all.but.1 <- function(dataset) {
  return(filter(dataset, "split.type", ":all-but-1"))
 }
	source("helpers.R")

	filter.effects.DE<- function(filename, title, colors){
	data <- load.dataset(filename)

	bots.with <- filter(data, "data.set", "DE 2013-05 without bot filtering")
	bots.with.given1 <- only.given.1(bots.with)
	bots.with.allbut1 <- only.all.but.1(bots.with)

	bots.without <- filter(data, "data.set", "DE extra bot filtering")
	bots.without.given1 <- only.given.1(bots.without)
	bots.without.allbut1 <- only.all.but.1(bots.without)

	plot.colors <- colors
	plot.names <- c("with bots; given-1", "with bots, all-but-1",
	"without bots; given-1", "withoutout bots, all-but-1")

	plot.pr.init(title, filename, data)

	plot.pr.series(bots.with.given1, color=plot.colors[1], symbol=plot.symbols[1])
	plot.pr.series(bots.with.allbut1, color=plot.colors[2], symbol=plot.symbols[2])
	plot.pr.series(bots.without.given1, color=plot.colors[3], symbol=plot.symbols[3])
	plot.pr.series(bots.without.allbut1, color=plot.colors[4], symbol=plot.symbols[4])

	plot.pr.legend(plot.names, plot.colors)
	}

	filter.effects.DE.tanimoto <- function(){
	filter.effects.DE("2013-05_DE_bot_filtering_effect_-Tanimoto-",
	"2013-05 DE bot filtering effect (Tanimoto)",
	color.tanimoto)
	}

	filter.effects.DE.loglikely <- function(){
	filter.effects.DE("2013-05_DE_bot_filtering_effect_-LogLikelihood-",
	"2013-05 DE bot filtering effect (Loglikelyhood)",
	color.loglikely)
	}

	filter.effects.DE.slim <- function(){
	filter.effects.DE("2013-05_DE_bot_filtering_effect_-SLIM-",
	"2013-05 DE bot filtering effect (SLIM)",
	color.slim)
	}

	filter.effects.DE.popular <- function(){
	filter.effects.DE("2013-05_DE_bot_filtering_effect_-MostPopular-",
	"2013-05 DE bot filtering effect (MostPopular)",
	color.popular)
	}

	random.dataset <- function(){
	filename <- "Random_Dataset_2"
	data <- load.dataset(filename)

	#colnames(data) <- c("data.set", "similarity", "recall", "precision", "topN",
	# "split.type")

	data.tanimoto <- only.tanimoto(data)
	data.tanimoto.given1 <- only.given.1(data.tanimoto)
	data.tanimoto.allbut1 <- only.all.but.1(data.tanimoto)

	data.loglikely <- only.loglikely(data)
	data.loglikely.given1 <- only.given.1(data.loglikely)
	data.loglikely.allbut1 <- only.all.but.1(data.loglikely)

	data.slim <- only.slim(data)
	data.slim.given1 <- only.given.1(data.slim)
	data.slim.allbut1 <- only.all.but.1(data.slim)

	data.popular <- only.popular(data)
	data.popular.given1 <- only.given.1(data.popular)
	data.popular.allbut1 <- only.all.but.1(data.popular)

	plot.colors <- c(color.tanimoto[1], color.tanimoto[2],
	color.loglikely[1], color.loglikely[2],
	color.slim[1], color.slim[2],
	color.popular[1], color.popular[2])
	plot.names <- c("tanimoto; given-1", "tanimoto; all-but-1",
	"loglikely; given-1", "loglikely; all-but-1",
	"slim; given-1", "slim; all-but-1",
	"most-popular; given-1", "most-popular; all-but-1")

	plot.pr.init("Random Dataset", filename, data)

	plot.pr.series(data.tanimoto.given1, color=plot.colors[1], symbol=plot.symbols[1])
	plot.pr.series(data.tanimoto.allbut1, color=plot.colors[2], symbol=plot.symbols[2])
	plot.pr.series(data.loglikely.given1, color=plot.colors[3], symbol=plot.symbols[3])
	plot.pr.series(data.loglikely.allbut1, color=plot.colors[4], symbol=plot.symbols[4])
	plot.pr.series(data.slim.given1, color=plot.colors[5], symbol=plot.symbols[5])
	plot.pr.series(data.slim.allbut1, color=plot.colors[6], symbol=plot.symbols[6])
	plot.pr.series(data.popular.given1, color=plot.colors[7], symbol=plot.symbols[7])
	plot.pr.series(data.popular.allbut1, color=plot.colors[8], symbol=plot.symbols[8])

	plot.pr.legend(plot.names, plot.colors, pos="bottomright")

	}

	self.recommendation <- function(filename, title, colors){
	data <- load.dataset(filename)

	with <- filter(data, "code.version", "with-self-similartiy")
	with.given1 <- only.given.1(with)
	with.allbut1 <- only.all.but.1(with)

	without <- filter(data, "code.version", "no-self-similarity")
	without.given1 <- only.given.1(without)
	without.allbut1 <- only.all.but.1(without)

	plot.colors <- colors
	plot.names <- c("self-similarity; given-1", "self-similarity; all-but-1",
	"NO-self-similarity; given-1", "NO-self-similarity; all-but-1")

	plot.pr.init(title, filename, data)

	plot.pr.series(with.given1, color=plot.colors[1], symbol=plot.symbols[1])
	plot.pr.series(with.allbut1, color=plot.colors[2], symbol=plot.symbols[2])
	plot.pr.series(without.given1, color=plot.colors[3], symbol=plot.symbols[3])
	plot.pr.series(without.allbut1, color=plot.colors[4], symbol=plot.symbols[4])

	plot.pr.legend(plot.names, plot.colors)
	}

	self.recommendation.tanimoto <- function(){
	self.recommendation("2013-05_ES_self_similarty_effects_-Tanimoto-",
	"2013-05 ES self similarty effects (Tanimoto)",
	color.tanimoto)
	}

	self.recommendation.loglikely <- function(){
	self.recommendation("2013-05_ES_self_similartiy_effects_-LogLikelihood-",
	"2013-05 ES self similarty effects (Loglikelyhood)",
	color.loglikely)
	}


	slim.alpha.compare <- function(){
	filename <- "2013-05_ES_stric_bot_filtering_SLIM_alpha_compare"
	data <- load.dataset(filename)

	plot.names <- c("0.0", "0.1", "0.2", "0.3", "0.4", "0.5", "0.6", "0.7", "0.8",
	"0.9", "1.0")

	plot.colors <- rainbow(length(plot.names))

	plot.pr.init("2013-05 ES extra bot filtering automatic lambda",
	filename, data)

	i <- 0
	for (alpha in plot.names){
	data.alpha = filter(data, "alpha", alpha)
	data.alpha.given1 = only.given.1(data.alpha)
	data.alpha.allbut1 = only.all.but.1(data.alpha)

	plot.pr.series(data.alpha.given1, color=plot.colors[i],
	symbol=plot.symbols[i])

	plot.pr.series(data.alpha.allbut1, color=plot.colors[i],
	symbol=plot.symbols[i])
	i <- i + 1
	}

	plot.pr.legend(plot.names, plot.colors)
	}

	all.ES <- function(){
	filename <- "2013-05_ES_all"

	data <- load.dataset(filename)
	plot.names <- c("tanimoto; given 1", "tanimoto; all but 1",
	"loglikely; given 1", "loglikely; all but 1",
	"slim; given 1", "slim; all but 1",
	"most-popular; given 1", "most-popular; all but 1")
	plot.colors <- c(color.tanimoto[1], color.tanimoto[2],
	color.loglikely[1], color.loglikely[2],
	color.slim[1], color.slim[2],
	color.popular[1], color.popular[2])

	plot.pr.init("2013-05 ES advanced bot filtering",
	filename, data)

	i <- 1
	for (sim in c("tanimoto", "loglikely", "slim", "popular")){
	data.sim <- filter(data, "similarity", sim)

	data.sim.given1 <- only.given.1(data.sim)
	data.sim.allbut1 <- only.all.but.1(data.sim)

	plot.pr.series(data.sim.given1, color=plot.colors[i], symbol=plot.symbols[i])
	plot.pr.series(data.sim.allbut1, color=plot.colors[i+1],symbol=plot.symbols[i+1])

	i <- i + 2
	}

	plot.pr.legend(plot.names, plot.colors)
	}


	DEvsES <- function(filename, title, similarities, color.schemas){
	data <- load.dataset(filename)
	data[data$similarity == "most-popular", "similarity"] <- "popular"
	data[data$similarity == "loglikelyhood", "similarity"] <- "loglikely"

	countries <- c("DE", "ES")

	plot.names <- c()
	for (sim in similarities){
	for (country in countries){
	plot.names <- c(plot.names, paste(country, sim, "given 1", sep="; "))
	plot.names <- c(plot.names, paste(country, sim, "all but 1", sep="; "))
	}
	}

	plot.colors <- c()
	for (schema in color.schemas){
	for (i in c(1, 2, 3, 4)){
	plot.colors <- c(plot.colors, schema[i])
	}
	}
	plot.pr.init(title, filename, data)

	i <- 1
	for (sim in similarities){
	data.sim <- filter(data, "similarity", sim)
	for (country in countries){
	data.country <- filter(data.sim, "data.set",
	paste(country, " extra bot filtering", sep=""))

	data.country.given1 <- only.given.1(data.country)
	data.country.allbut1 <- only.all.but.1(data.country)

	plot.pr.series(data.country.given1, color=plot.colors[i], symbol=plot.symbols[i])
	plot.pr.series(data.country.allbut1, color=plot.colors[i+1], symbol=plot.symbols[i+1])

	i <- i+2
	}
	}

	plot.pr.legend(plot.names, plot.colors)
	}

	DEvsES.tanimoto.popular <- function(){
	filename <- "2013-05_DE_vs_ES_extra-bot-filtering"
	DEvsES(filename, "2013-05 DE vs ES advanced bot filtering",
	c("tanimoto", "popular"),
	list(color.tanimoto, color.popular))
	}



	DEvsES.loglikely.slim <- function(){
	filename <- "2013-05_DE_vs_ES_-LogLikelihood-SLIM-"
	DEvsES(filename, "2013-05 DE vs ES advanced bot filtering Loglikelyhood vs SLIM",
	c("loglikely", "slim"),
	list(color.loglikely, color.slim))
	}


	boost <- function(){
	boost.fn <- function(x) { return(85 + 2850.85^x) }
	x <- 0:25
	y <- sapply(x, boost.fn)
	pdf.init("boost_85")
	par(mar=c(4.3, 4, 2, 1))
	plot(x,y,
	xlab="position i",
	ylab="effective priority p'",
	type="b")
	dev.off()
	}


	go <- function(){
	filter.effects.DE.tanimoto()
	filter.effects.DE.loglikely()
	filter.effects.DE.slim()
	filter.effects.DE.popular()
	random.dataset()
	self.recommendation.tanimoto()
	self.recommendation.loglikely()
	slim.alpha.compare()
	all.ES()
	DEvsES.tanimoto.popular()
	DEvsES.loglikely.slim()
	boost()
	}
	color.tanimoto <- rainbow(4, start=3/6, end=4/6)
	color.loglikely <- rainbow(4, start=2/6, end=3/6)
	color.slim <- rainbow(4, start=4/6, end=5/6)
	color.popular <- rainbow(4, start=5/6, end=6/6)

	plots.dir <- "../../thesis/plots/"
	plot.symbols <- c(1,2,3,5,6,7,8,9)

	load.dataset <- function(filename){
	file <- paste("../data/", filename, ".csv", sep="")
	return(read.csv(file, header=T, quote=""))
	}

	filter <- function(dataset, columnname, value){
	check <- dataset[, columnname] == value
	return(dataset[check, ])
	}

	ordered <- function(dataset, columnname){
	ordering <- order(dataset[, columnname])
	return(dataset[ordering, ])
	}

	pdf.init <- function(filename){
	pdf(file=paste(plots.dir, filename, ".pdf", sep=""),
	height=5.0, width=9.0)
	}

	plot.pr.init <- function(title, filename, data){
	pdf.init(filename)

	plot.pr.start(minmax(data$recall),
	minmax(data$precision))
	par(mar=c(5, 1, 1.2, 1))
	title(title)
	}

	only.tanimoto <- function(data){
	return(filter(data, "similarity", "tanimoto"))
	}

	only.slim <- function(data){
	return(filter(data, "similarity", "slim"))
	}

	only.loglikely <- function(data){
	return(filter(data, "similarity", "loglikelyhood"))
	}

	only.popular <- function(data){
	return(filter(data, "similarity", "most-popular"))
	}

	plot.pr.start <- function(xlim, ylim){
	plot(1, 1,
	xlim=xlim,
	ylim=ylim,
	xlab="recall", ylab="precision")
	}

	plot.pr.series <- function(data, color=NULL, symbol=NULL){
	data <- ordered(data, "topN")
	lines(data$recall, data$precision,
	col=color,lwd=1)
	points(data$recall, data$precision,
	col=color, pch=symbol,lwd=2)
	}

	plot.pr.legend <- function(names, colors, pos="topright"){
	legend(pos,
	names, cex=0.8,
	col=colors, pch=plot.symbols)

	dev.off()
	}

	minmax <- function(series){
	return(c(min(series), max(series)))
	}

	only.given.1 <- function(dataset) {
	return(filter(dataset, "split.type", ":given-1"))
	}

	only.all.but.1 <- function(dataset) {
	return(filter(dataset, "split.type", ":all-but-1"))
	}