Last active
December 10, 2015 16:58
-
-
Save ykarikos/4464161 to your computer and use it in GitHub Desktop.
Count character frequencies
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# charFrequency.py (C) 2013 Yrjo Kari-Koskinen <[email protected]> | |
# | |
# This program is licensed under Version 2 of the GPL. | |
# | |
# Use PYTHONIOENCODING=utf_8 environment variable to read and write utf8 to files. | |
import sys, unicodedata | |
def letter(l): | |
return unicodedata.category(l)[0] == 'L' | |
f = {} | |
sum = 0 | |
for line in sys.stdin.readlines(): | |
for char in unicode(line.rstrip(), "utf-8"): | |
if letter(char): | |
sum = sum + 1 | |
if char in f: | |
f[char] = f[char] + 1 | |
else: | |
f[char] = 1 | |
keys = sorted(f, key=f.get, reverse=True) | |
for key in keys: | |
print key, f[key], f[key] * 100 / sum, "%" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment