lawrencejones · August 29, 2015 13:59
diff --git a/boyer_moore.coffee b/boyer_moore.coffee
 expect = (a,b) ->
  [a,b] = [a.join(), b.join()]
  if a != b
    console.log """
    Expect failed!
    [#{a}] != [#{b}] """
  else console.log "PASS! [#{a}]"

 # Preprocesses P to generate list of occurances of suffixs within
 # the pattern.
 # Example.
 #
 #     P = "abaa"
 #   = [ 2
 calculate_gsr = (P) ->

 # Preprocesses the pattern P to calculate the rightmost occurance
 # of any letter in pattern P.
 # Example.
 #
 #     P = "abbacus"
 #   = { a: 3, b: 2, c: 4, u: 5, s: 6, [..]: 0 }
 #
 # Where [..] signifies all the rest of the alphabet.
 calculate_rmo = (P) ->
  occ = new Object()
  occ[c] = i for c,i in P
  occ

 # Evaluates the indexes of T where P occurs as a substring.
 boyer_moore = (T, P) ->
  i = 0
  occ = calculate_rmo P
  matches = []
  while i < (T.length - P.length) # scan T left to right
    j = P.length - 1 # scan P right to left
    --j while j >= 0 and (P[j] == T[i+j])
    # If scanned all the way to -1, past pattern P, with matches
    # every time, then add index to match and increment.
    if j < 0 then matches.push i++
    else
      # Find the rightmost occurance of the bad character (T[i+j]) in
      # pattern P. If found, and is not the index we are currently at,
      # then shift P to align against that rightmost occurance.
      if (rmo = occ[T[i+j]])? and rmo != j
        i += rmo + 1
      else
        i += P.length
  matches

 console.log calculate_rmo 'abracadabra'
 T = 'abcababccba'
 P = 'bc'
 console.log """
 Searching for pattern "#{P}" in text "#{T}"..."""
 matches = boyer_moore T, P
 console.log matches

diff --git a/knuth_morris.coffee b/knuth_morris.coffee
 expect = (a,b) ->
  [a,b] = [a.join(), b.join()]
  if a != b
    console.log """
    Expect failed!
    [#{a}] != [#{b}] """
  else console.log "PASS! [#{a}]"

 # Produces an array of indexes representing the occurances
 # of substrings of P, within itself.
 #
 # Eg. 'ababac' -> [0, 0, 1, 2, 3, 0]
 #
 # Used to calculate the number of characters to backtrack
 # in a search text T, when searching for pattern P. If you
 # fail on 'abaX', then you need not move all the way back
 # to the start of the pattern, as you already know you've
 # encountered 'aba'. So go back to index π[k-1] of P, 'a',
 # and continue your search.
 prefix = (p, k = 0) ->
  π = p.split('').map -> 0
  for q in [1..p.length-1]
    while k > 0 and p[q] != p[k]
      k = π[k-1]
    if p[q] == p[k] then ++k
    π[q] = k
  π

 # Knuth-Morris-Pratt Algorithm
 kmp_match = (t, p) ->
  output = []
  π = prefix p
  q = 0 # number of characters matched in p
  for c,i in t
    while q > 0 and c != p[q]
      q = π[q-1]
    if c is p[q] then ++q
    if q is p.length
      output.push (1+i) - q
      q = π[q-1]
  output

 expect (prefix 'ababac'), [0,0,1,2,3,0]
 expect (prefix 'aaaaaaaa'), [0,1,2,3,4,5,6,7]
 expect (kmp_match 'thisisprettyboring', 'is'), [2,4]
 expect (kmp_match 'tryagainplease', 'again'), [3]
 expect (kmp_match 'abacus are actually cool', 'ac'), [2,11]
	expect = (a,b) ->
	[a,b] = [a.join(), b.join()]
	if a != b
	console.log """
	Expect failed!
	[#{a}] != [#{b}] """
	else console.log "PASS! [#{a}]"

	# Preprocesses P to generate list of occurances of suffixs within
	# the pattern.
	# Example.
	#
	# P = "abaa"
	# = [ 2
	calculate_gsr = (P) ->

	# Preprocesses the pattern P to calculate the rightmost occurance
	# of any letter in pattern P.
	# Example.
	#
	# P = "abbacus"
	# = { a: 3, b: 2, c: 4, u: 5, s: 6, [..]: 0 }
	#
	# Where [..] signifies all the rest of the alphabet.
	calculate_rmo = (P) ->
	occ = new Object()
	occ[c] = i for c,i in P
	occ

	# Evaluates the indexes of T where P occurs as a substring.
	boyer_moore = (T, P) ->
	i = 0
	occ = calculate_rmo P
	matches = []
	while i < (T.length - P.length) # scan T left to right
	j = P.length - 1 # scan P right to left
	--j while j >= 0 and (P[j] == T[i+j])
	# If scanned all the way to -1, past pattern P, with matches
	# every time, then add index to match and increment.
	if j < 0 then matches.push i++
	else
	# Find the rightmost occurance of the bad character (T[i+j]) in
	# pattern P. If found, and is not the index we are currently at,
	# then shift P to align against that rightmost occurance.
	if (rmo = occ[T[i+j]])? and rmo != j
	i += rmo + 1
	else
	i += P.length
	matches

	console.log calculate_rmo 'abracadabra'
	T = 'abcababccba'
	P = 'bc'
	console.log """
	Searching for pattern "#{P}" in text "#{T}"..."""
	matches = boyer_moore T, P
	console.log matches