Last active
September 25, 2023 18:44
-
-
Save imptype/4b372c40c8252e726819e51eef35e2ef to your computer and use it in GitHub Desktop.
Printable ascii characters sorted by frequency
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
etaonisrhldcumfpgyw,.bv01k52TS9AM-CIN"438B6RPEDHx7WLOFYGJzjUq:)($K;V*?Q/X&Z!%#@`'+<=>[\]^_{|}~ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
etaonisrhldcumfpgyw,.bv01k52TS9AM-CIN"438B6RPEDHx7WLOFYGJzjUq:)($K;V*?Q/X&Z!%# |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Case-sensitive letter and bigram frequency counts | |
# from large-scale English corpora | |
# MICHAEL N. JONES and D. J. K. MEWHORT | |
# Queen’s University, Kingston, Ontario, Canada | |
# https://link.springer.com/content/pdf/10.3758/BF03195586.pdf | |
# Letter Uppercase ƒ Lowercase ƒ Rank Rank | |
a = """A 280,937 5,263,779 3 3 | |
B 169,474 866,156 8 20 | |
C 229,363 1,960,412 5 12 | |
D 129,632 2,369,820 12 11 | |
E 138,443 7,741,842 11 1 | |
F 100,751 1,296,925 17 15 | |
G 93,212 1,206,747 19 17 | |
H 123,632 2,955,858 13 9 | |
I 223,312 4,527,332 6 6 | |
J 78,706 65,856 20 25 | |
K 46,580 460,788 22 22 | |
L 106,984 2,553,152 15 10 | |
M 259,474 1,467,376 4 14 | |
N 205,409 4,535,545 7 5 | |
O 105,700 4,729,266 16 4 | |
P 144,239 1,255,579 10 16 | |
Q 11,659 54,221 24 26 | |
R 146,448 4,137,949 9 8 | |
S 304,971 4,186,210 2 7 | |
T 325,462 5,507,692 1 2 | |
U 57,488 1,613,323 21 13 | |
V 31,053 653,370 23 21 | |
W 107,195 1,015,656 14 19 | |
X 7,578 123,577 25 23 | |
Y 94,297 1,062,040 18 18 | |
Z 5,610 66,423 26 24""" | |
b = """! 2,178 58 1,866 | |
“ 284,671 142,168 26,827 | |
# 10 0 0 | |
$ 51,572 427 61 | |
% 1,993 13 9 | |
& 6,523 438 350 | |
‘ 204,497 187,914 185,857 | |
( 53,398 43,473 55 | |
) 53,735 11 37,506 | |
* 20,716 882 530 | |
309 8 112 | |
, 984,969 111 810,376 | |
- 252,302 160,049 138,556 | |
. 946,136 41,636 847,611 | |
/ 8,161 3,948 4,207 | |
0 546,233 2,006 38 | |
1 460,946 959 5,792 | |
2 333,499 1,065 2,435 | |
3 187,606 1,335 1,945 | |
4 192,528 880 1,820 | |
5 374,413 999 1,514 | |
6 153,865 1,576 1,491 | |
7 120,094 840 1,074 | |
8 182,627 828 1,021 | |
9 282,364 1,697 481 | |
: 54,036 13 48,354 | |
; 36,727 58 28,301 | |
82 74 18 | |
22 1 1 | |
83 52 70 | |
? 12,357 10 11,938""" | |
lines = a.split('\n') | |
data = {} | |
def tonumber(text): | |
return int(text.replace(',', '')) | |
for line in lines: | |
words = line.split(' ') | |
data[words[0]] = tonumber(words[1]) | |
data[words[0].lower()] = tonumber(words[2]) | |
lines = b.split('\n') | |
for line in lines: | |
words = line.split(' ') | |
data[words[0]] = tonumber(words[1]) | |
data = ''.join(sorted(data, key = data.get, reverse = True)) | |
print(data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment