Created
September 22, 2011 13:11
-
-
Save malthe/1234726 to your computer and use it in GitHub Desktop.
Dictionary-based HTML hyphenation using soft hyphen
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
from htmlentitydefs import name2codepoint | |
# Must install the `hyphenator` library from PyPi! | |
from hyphenator import Hyphenator | |
# Firefox comes with an English hyphenation dictionary | |
path = os.popen('locate hyph_en_US.dic').readlines()[0].strip() | |
hyphenator = Hyphenator(path) | |
spaces = re.compile(r'\s+') | |
shy = unichr(name2codepoint['shy']) | |
def hyphenate(string, max_length=20): | |
return " ".join( | |
len(word) > max_length and | |
shy.join( | |
part.rstrip('-') for part in | |
(hyphenator.wrap(word, max_length) or (word, )) | |
) or word for word in spaces.split(string.strip())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment