Created
September 4, 2011 22:32
-
-
Save devongovett/1193634 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Hyphenator - a fast hyphenation algorithm | |
# By Devon Govett | |
# MIT LICENSE | |
# | |
# Based on: | |
# 1. The algorithm described by Frank Liang and used in TeX - http://tug.org/docs/liang/liang-thesis.pdf | |
# 2. Hyphenator.js - http://code.google.com/p/hyphenator | |
# 3. Hypher.js - https://github.com/bramstein/Hypher/ | |
# | |
# Usage: | |
# This class uses the same language patterns as Hyphenator.js. | |
# You can find these patterns at http://code.google.com/p/hyphenator/source/browse/trunk/patterns/. | |
# For example, the file for US English is here: | |
# http://code.google.com/p/hyphenator/source/browse/trunk/patterns/en-us.js | |
# | |
# h = new Hyphenator(language) | |
# h.hyphenate('hyphenate') # [ 'hy', 'phen', 'ate' ] | |
# | |
class Hyphenator | |
constructor: (language) -> | |
@tree = createTree(language.patterns) | |
{@leftmin, @rightmin} = language | |
@exceptions = {} | |
return unless language.exceptions | |
for exception in language.exceptions.split(/,\s?/g) | |
@exceptions[exception.replace(/-/g, '')] = exception.split('-') | |
createTree = (patterns) -> | |
tree = points: [] | |
for size, pattern of patterns | |
size = +size | |
for i in [0...pattern.length] by size | |
node = tree | |
points = [] | |
last = 0 | |
for j in [i...i + size] | |
char = pattern[j] | |
if isNaN(char) | |
node = node[char] ?= {} | |
points.push last | |
last = 0 | |
else | |
last = char | |
points.push(last) | |
node.points = points | |
return tree | |
hyphenate: (word) -> | |
return [word] if ~word.indexOf('\u00AD') # soft hyphens | |
return exception if exception = @exceptions[word] | |
word = '_' + word + '_' | |
len = word.length | |
lc = word.toLowerCase() | |
points = [] | |
{tree, leftmin, rightmin} = this | |
for i in [0...len] | |
node = tree | |
for j in [i...len] | |
node = node[lc[j]] | |
break unless node | |
continue unless node.points | |
for point, k in node.points | |
e = (points[i + k] or 0) | |
points[i + k] = if point > e then point else e | |
result = [] | |
cur = '' | |
for i in [1...len - 1] | |
if i > leftmin and i < (len - rightmin) and points[i] % 2 | |
result.push cur | |
cur = word[i] | |
else | |
cur += word[i] | |
result.push(cur) | |
return result |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment