Created
July 25, 2017 05:50
-
-
Save wtsnjp/c86071ffd855d16683a181fb4b073ff5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# usage: python ccs.py {file path} | |
# | |
import sys | |
import re | |
import collections | |
from matplotlib import pyplot as plt | |
def get_cs(fn): | |
r = re.compile(r'\\([a-zA-Z@]+|[^a-zA-Z])') | |
return [e for l in [re.findall(r, l) for l in open(fn)] for e in l] | |
def frequency_ranking(ls): | |
cd = collections.Counter(ls) | |
return [[k, v] for k,v in cd.most_common()] | |
if __name__ == '__main__': | |
fn = sys.argv[1] | |
cls = get_cs(fn) | |
ls = frequency_ranking(cls) | |
plt.plot([v[1]/len(ls) for v in ls]) | |
plt.xscale('log') | |
plt.yscale('log') | |
plt.xlabel('順位') | |
plt.ylabel('出現頻度') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment