Skip to content

Instantly share code, notes, and snippets.

@devongovett
Created September 4, 2011 22:32
Show Gist options
  • Save devongovett/1193634 to your computer and use it in GitHub Desktop.
Save devongovett/1193634 to your computer and use it in GitHub Desktop.
#
# Hyphenator - a fast hyphenation algorithm
# By Devon Govett
# MIT LICENSE
#
# Based on:
# 1. The algorithm described by Frank Liang and used in TeX - http://tug.org/docs/liang/liang-thesis.pdf
# 2. Hyphenator.js - http://code.google.com/p/hyphenator
# 3. Hypher.js - https://github.com/bramstein/Hypher/
#
# Usage:
# This class uses the same language patterns as Hyphenator.js.
# You can find these patterns at http://code.google.com/p/hyphenator/source/browse/trunk/patterns/.
# For example, the file for US English is here:
# http://code.google.com/p/hyphenator/source/browse/trunk/patterns/en-us.js
#
# h = new Hyphenator(language)
# h.hyphenate('hyphenate') # [ 'hy', 'phen', 'ate' ]
#
class Hyphenator
constructor: (language) ->
@tree = createTree(language.patterns)
{@leftmin, @rightmin} = language
@exceptions = {}
return unless language.exceptions
for exception in language.exceptions.split(/,\s?/g)
@exceptions[exception.replace(/-/g, '')] = exception.split('-')
createTree = (patterns) ->
tree = points: []
for size, pattern of patterns
size = +size
for i in [0...pattern.length] by size
node = tree
points = []
last = 0
for j in [i...i + size]
char = pattern[j]
if isNaN(char)
node = node[char] ?= {}
points.push last
last = 0
else
last = char
points.push(last)
node.points = points
return tree
hyphenate: (word) ->
return [word] if ~word.indexOf('\u00AD') # soft hyphens
return exception if exception = @exceptions[word]
word = '_' + word + '_'
len = word.length
lc = word.toLowerCase()
points = []
{tree, leftmin, rightmin} = this
for i in [0...len]
node = tree
for j in [i...len]
node = node[lc[j]]
break unless node
continue unless node.points
for point, k in node.points
e = (points[i + k] or 0)
points[i + k] = if point > e then point else e
result = []
cur = ''
for i in [1...len - 1]
if i > leftmin and i < (len - rightmin) and points[i] % 2
result.push cur
cur = word[i]
else
cur += word[i]
result.push(cur)
return result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment