Created
February 28, 2013 07:59
-
-
Save colinpollock/5055056 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Readability | |
[Wiki](http://en.wikipedia.org/wiki/Flesch%E2%80%93Kincaid_readability_test) | |
The Flesch Reading Ease Score (FRES) is | |
206.835 - 1.015 * (#words / #sentences) - 84.6 * (#syllables / #words) | |
Example: | |
"The Australian platypus is seemingly a hybrid of a mammal and reptilian | |
creature." | |
This (one) sentence has thirteen words and twenty four syllables. So its score | |
is | |
206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words) | |
>>> text = "The Australian platypus is seemingly a hybrid of a mammal and " + \ | |
"reptilian creature." | |
>>> readability(text) | |
37.455384615384645 | |
""" | |
from __future__ import division | |
import sys | |
from nltk.corpus import cmudict | |
from nltk.tokenize import word_tokenize, sent_tokenize | |
_syll_map = cmudict.dict() | |
def num_syllables(word): | |
"""Return the number of syllables in `word`.""" | |
if word in _syll_map: | |
return max(len([phone for phone in phones if phone[-1] in '0123456789']) | |
for phones in _syll_map[word]) | |
else: | |
return None | |
def readability(text): | |
"""Return the Flesch Reading Ease Score of `text.""" | |
words = sentences = syllables = 0 | |
for sent in sent_tokenize(text): | |
sentences += 1 | |
for word in word_tokenize(sent): | |
word = word.lower() | |
num_sylls = num_syllables(word) | |
if num_sylls is not None: | |
words += 1 | |
syllables += num_sylls | |
if syllables == 0: | |
return None | |
score = (206.835 - | |
1.015 * (words / sentences) - | |
84.6 * (syllables / words)) | |
return score | |
def main(args): | |
while True: | |
try: | |
text = raw_input('Text: ') | |
except EOFError: | |
return 0 | |
print readability(text) | |
if __name__ == '__main__': | |
exit(main(sys.argv[1:])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment