Created
June 4, 2012 00:05
-
-
Save seanjtaylor/2865517 to your computer and use it in GitHub Desktop.
Most Frequent Integer 1-Grams from Google N-Grams
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(py27)sean@leibniz:/media/datasets/n-grams/data/1gms$ zgrep "^[0-9]\+\s" vocab.gz | sort -n -r -k 2 | head -n 100 | |
1 2563166795 | |
2 1846241630 | |
0 1834474671 | |
3 1304070731 | |
2005 1091295264 | |
4 1045739277 | |
5 915581712 | |
10 698337921 | |
6 692314831 | |
2006 618908841 | |
7 604854136 | |
8 585439966 | |
2004 567848185 | |
9 472359747 | |
12 468617991 | |
20 430121236 | |
11 404631816 | |
2003 398470178 | |
15 388436510 | |
30 368322557 | |
14 324164746 | |
16 323660172 | |
13 322260634 | |
24 315022072 | |
25 314971073 | |
18 310054621 | |
2002 293564443 | |
17 280070584 | |
21 269318693 | |
19 262766257 | |
22 255679267 | |
2000 250919267 | |
23 245202221 | |
100 234830854 | |
2001 233212079 | |
26 221432495 | |
28 221192811 | |
27 217023168 | |
50 202696582 | |
29 202522436 | |
31 191488693 | |
1999 145566388 | |
40 145146194 | |
01 121981250 | |
05 121333544 | |
1998 116944486 | |
32 112020959 | |
60 107594014 | |
06 107433168 | |
90 105659063 | |
1997 98252292 | |
35 94914530 | |
03 94558406 | |
04 91842366 | |
02 87006614 | |
1996 84209276 | |
45 80785066 | |
33 80684821 | |
36 80472145 | |
80 79883630 | |
34 76535611 | |
1995 74052933 | |
70 70069368 | |
200 69497693 | |
37 69117732 | |
38 68075206 | |
42 66690664 | |
48 66210325 | |
39 63440940 | |
00 62388506 | |
41 61789143 | |
44 60601193 | |
500 59800435 | |
43 59634388 | |
1994 58849687 | |
08 57380876 | |
75 57299054 | |
07 56761982 | |
64 56335858 | |
55 55214386 | |
09 54928007 | |
46 52893184 | |
49 51871011 | |
47 50843091 | |
1993 49789137 | |
99 49013288 | |
65 49001457 | |
52 48823738 | |
51 47726262 | |
1000 47433744 | |
1990 46017969 | |
1992 45549282 | |
54 45122927 | |
300 45117872 | |
62 43749984 | |
56 43748310 | |
53 42742459 | |
95 42167300 | |
1991 40774273 | |
59 40362522 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment