jackwasey · May 25, 2018 20:28 · EngrStudent · Jan 12, 2018 · cormac85 · May 25, 2018
diff --git a/fastmatch-demo.r b/fastmatch-demo.r
 library(fastmatch)
 library(microbenchmark)

 WORDS <- read.table("https://dotnetperls-controls.googlecode.com/files/enable1.txt",
                    stringsAsFactors = FALSE)[[1]]

 words_factor <- as.factor(WORDS)

 # generate 100 sentences of between 5 and 15 words:
 SENTENCES <- lapply(c(1:100), sample, x = WORDS, size = sample(c(5:15), size = 1))

 bench_fun <- function(fun)
  lapply(SENTENCES, fun)

 # poster's slow solution:
 hg_convert <- function(sentence){
  return(which(WORDS %in% sentence))
 }

 jw_convert_match <- function(sentence) {
  match(sentence, WORDS)
 }

 jw_convert_match_factor <- function(sentence) {
  match(sentence, words_factor)
 }

 jw_convert_fastmatch <- function(sentence) {
  fmatch(sentence, WORDS)
 }

 jw_convert_fastmatch_factor <- function(sentence) {
  fmatch(sentence, words_factor)
 }

 message("starting benchmark one")
 print(microbenchmark(bench_fun(hg_convert),
                     bench_fun(jw_convert_match),
                     bench_fun(jw_convert_match_factor),
                     bench_fun(jw_convert_fastmatch),
                     bench_fun(jw_convert_fastmatch_factor),
                     times = 10))

 # now again with big samples
 # generating the SENTENCES is quite slow...
 SENTENCES <- lapply(c(1:1e6), sample, x = WORDS, size = sample(c(5:15), size = 1))
 message("starting benchmark two, compare with factor vs vector of words")
 print(microbenchmark(bench_fun(jw_convert_fastmatch),
                     bench_fun(jw_convert_fastmatch_factor),
                     times = 10))
	library(fastmatch)
	library(microbenchmark)

	WORDS <- read.table("https://dotnetperls-controls.googlecode.com/files/enable1.txt",
	stringsAsFactors = FALSE)[[1]]

	words_factor <- as.factor(WORDS)

	# generate 100 sentences of between 5 and 15 words:
	SENTENCES <- lapply(c(1:100), sample, x = WORDS, size = sample(c(5:15), size = 1))

	bench_fun <- function(fun)
	lapply(SENTENCES, fun)

	# poster's slow solution:
	hg_convert <- function(sentence){
	return(which(WORDS %in% sentence))
	}

	jw_convert_match <- function(sentence) {
	match(sentence, WORDS)
	}

	jw_convert_match_factor <- function(sentence) {
	match(sentence, words_factor)
	}

	jw_convert_fastmatch <- function(sentence) {
	fmatch(sentence, WORDS)
	}

	jw_convert_fastmatch_factor <- function(sentence) {
	fmatch(sentence, words_factor)
	}

	message("starting benchmark one")
	print(microbenchmark(bench_fun(hg_convert),
	bench_fun(jw_convert_match),
	bench_fun(jw_convert_match_factor),
	bench_fun(jw_convert_fastmatch),
	bench_fun(jw_convert_fastmatch_factor),
	times = 10))

	# now again with big samples
	# generating the SENTENCES is quite slow...
	SENTENCES <- lapply(c(1:1e6), sample, x = WORDS, size = sample(c(5:15), size = 1))
	message("starting benchmark two, compare with factor vs vector of words")
	print(microbenchmark(bench_fun(jw_convert_fastmatch),
	bench_fun(jw_convert_fastmatch_factor),
	times = 10))