Last active
August 29, 2015 13:59
-
-
Save lawrencejones/10738507 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| expect = (a,b) -> | |
| [a,b] = [a.join(), b.join()] | |
| if a != b | |
| console.log """ | |
| Expect failed! | |
| [#{a}] != [#{b}] """ | |
| else console.log "PASS! [#{a}]" | |
| # Preprocesses P to generate list of occurances of suffixs within | |
| # the pattern. | |
| # Example. | |
| # | |
| # P = "abaa" | |
| # = [ 2 | |
| calculate_gsr = (P) -> | |
| # Preprocesses the pattern P to calculate the rightmost occurance | |
| # of any letter in pattern P. | |
| # Example. | |
| # | |
| # P = "abbacus" | |
| # = { a: 3, b: 2, c: 4, u: 5, s: 6, [..]: 0 } | |
| # | |
| # Where [..] signifies all the rest of the alphabet. | |
| calculate_rmo = (P) -> | |
| occ = new Object() | |
| occ[c] = i for c,i in P | |
| occ | |
| # Evaluates the indexes of T where P occurs as a substring. | |
| boyer_moore = (T, P) -> | |
| i = 0 | |
| occ = calculate_rmo P | |
| matches = [] | |
| while i < (T.length - P.length) # scan T left to right | |
| j = P.length - 1 # scan P right to left | |
| --j while j >= 0 and (P[j] == T[i+j]) | |
| # If scanned all the way to -1, past pattern P, with matches | |
| # every time, then add index to match and increment. | |
| if j < 0 then matches.push i++ | |
| else | |
| # Find the rightmost occurance of the bad character (T[i+j]) in | |
| # pattern P. If found, and is not the index we are currently at, | |
| # then shift P to align against that rightmost occurance. | |
| if (rmo = occ[T[i+j]])? and rmo != j | |
| i += rmo + 1 | |
| else | |
| i += P.length | |
| matches | |
| console.log calculate_rmo 'abracadabra' | |
| T = 'abcababccba' | |
| P = 'bc' | |
| console.log """ | |
| Searching for pattern "#{P}" in text "#{T}"...""" | |
| matches = boyer_moore T, P | |
| console.log matches | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| expect = (a,b) -> | |
| [a,b] = [a.join(), b.join()] | |
| if a != b | |
| console.log """ | |
| Expect failed! | |
| [#{a}] != [#{b}] """ | |
| else console.log "PASS! [#{a}]" | |
| # Produces an array of indexes representing the occurances | |
| # of substrings of P, within itself. | |
| # | |
| # Eg. 'ababac' -> [0, 0, 1, 2, 3, 0] | |
| # | |
| # Used to calculate the number of characters to backtrack | |
| # in a search text T, when searching for pattern P. If you | |
| # fail on 'abaX', then you need not move all the way back | |
| # to the start of the pattern, as you already know you've | |
| # encountered 'aba'. So go back to index π[k-1] of P, 'a', | |
| # and continue your search. | |
| prefix = (p, k = 0) -> | |
| π = p.split('').map -> 0 | |
| for q in [1..p.length-1] | |
| while k > 0 and p[q] != p[k] | |
| k = π[k-1] | |
| if p[q] == p[k] then ++k | |
| π[q] = k | |
| π | |
| # Knuth-Morris-Pratt Algorithm | |
| kmp_match = (t, p) -> | |
| output = [] | |
| π = prefix p | |
| q = 0 # number of characters matched in p | |
| for c,i in t | |
| while q > 0 and c != p[q] | |
| q = π[q-1] | |
| if c is p[q] then ++q | |
| if q is p.length | |
| output.push (1+i) - q | |
| q = π[q-1] | |
| output | |
| expect (prefix 'ababac'), [0,0,1,2,3,0] | |
| expect (prefix 'aaaaaaaa'), [0,1,2,3,4,5,6,7] | |
| expect (kmp_match 'thisisprettyboring', 'is'), [2,4] | |
| expect (kmp_match 'tryagainplease', 'again'), [3] | |
| expect (kmp_match 'abacus are actually cool', 'ac'), [2,11] | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment