Skip to content

Instantly share code, notes, and snippets.

@lawrencejones
Last active August 29, 2015 13:59
Show Gist options
  • Save lawrencejones/10738507 to your computer and use it in GitHub Desktop.
Save lawrencejones/10738507 to your computer and use it in GitHub Desktop.
expect = (a,b) ->
[a,b] = [a.join(), b.join()]
if a != b
console.log """
Expect failed!
[#{a}] != [#{b}] """
else console.log "PASS! [#{a}]"
# Preprocesses P to generate list of occurances of suffixs within
# the pattern.
# Example.
#
# P = "abaa"
# = [ 2
calculate_gsr = (P) ->
# Preprocesses the pattern P to calculate the rightmost occurance
# of any letter in pattern P.
# Example.
#
# P = "abbacus"
# = { a: 3, b: 2, c: 4, u: 5, s: 6, [..]: 0 }
#
# Where [..] signifies all the rest of the alphabet.
calculate_rmo = (P) ->
occ = new Object()
occ[c] = i for c,i in P
occ
# Evaluates the indexes of T where P occurs as a substring.
boyer_moore = (T, P) ->
i = 0
occ = calculate_rmo P
matches = []
while i < (T.length - P.length) # scan T left to right
j = P.length - 1 # scan P right to left
--j while j >= 0 and (P[j] == T[i+j])
# If scanned all the way to -1, past pattern P, with matches
# every time, then add index to match and increment.
if j < 0 then matches.push i++
else
# Find the rightmost occurance of the bad character (T[i+j]) in
# pattern P. If found, and is not the index we are currently at,
# then shift P to align against that rightmost occurance.
if (rmo = occ[T[i+j]])? and rmo != j
i += rmo + 1
else
i += P.length
matches
console.log calculate_rmo 'abracadabra'
T = 'abcababccba'
P = 'bc'
console.log """
Searching for pattern "#{P}" in text "#{T}"..."""
matches = boyer_moore T, P
console.log matches
expect = (a,b) ->
[a,b] = [a.join(), b.join()]
if a != b
console.log """
Expect failed!
[#{a}] != [#{b}] """
else console.log "PASS! [#{a}]"
# Produces an array of indexes representing the occurances
# of substrings of P, within itself.
#
# Eg. 'ababac' -> [0, 0, 1, 2, 3, 0]
#
# Used to calculate the number of characters to backtrack
# in a search text T, when searching for pattern P. If you
# fail on 'abaX', then you need not move all the way back
# to the start of the pattern, as you already know you've
# encountered 'aba'. So go back to index π[k-1] of P, 'a',
# and continue your search.
prefix = (p, k = 0) ->
π = p.split('').map -> 0
for q in [1..p.length-1]
while k > 0 and p[q] != p[k]
k = π[k-1]
if p[q] == p[k] then ++k
π[q] = k
π
# Knuth-Morris-Pratt Algorithm
kmp_match = (t, p) ->
output = []
π = prefix p
q = 0 # number of characters matched in p
for c,i in t
while q > 0 and c != p[q]
q = π[q-1]
if c is p[q] then ++q
if q is p.length
output.push (1+i) - q
q = π[q-1]
output
expect (prefix 'ababac'), [0,0,1,2,3,0]
expect (prefix 'aaaaaaaa'), [0,1,2,3,4,5,6,7]
expect (kmp_match 'thisisprettyboring', 'is'), [2,4]
expect (kmp_match 'tryagainplease', 'again'), [3]
expect (kmp_match 'abacus are actually cool', 'ac'), [2,11]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment