Last active
November 28, 2018 23:19
-
-
Save pepasflo/556f93c75583f8a5a62436897a3d222c to your computer and use it in GitHub Desktop.
Find the frequency of characters in Swift source files.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
./charfreq.py c linux-4.19.5 | |
' ' 9.38% | |
e 5.59% | |
t 4.79% | |
_ 4.57% | |
\t 4.30% | |
r 3.82% | |
\n 3.68% | |
i 3.64% | |
s 3.41% | |
a 3.17% | |
n 2.97% | |
c 2.70% | |
o 2.63% | |
d 2.51% | |
l 1.98% | |
u 1.90% | |
p 1.83% | |
f 1.59% | |
, 1.57% | |
0 1.49% | |
m 1.44% | |
) 1.30% | |
( 1.30% | |
; 1.29% | |
* 1.17% | |
h 1.06% | |
v 0.99% | |
g 0.96% | |
b 0.96% | |
- 0.94% | |
E 0.93% | |
x 0.88% | |
= 0.85% | |
I 0.75% | |
T 0.74% | |
> 0.74% | |
R 0.73% | |
S 0.72% | |
A 0.71% | |
C 0.66% | |
N 0.58% | |
k 0.58% | |
P 0.55% | |
L 0.55% | |
1 0.54% | |
D 0.53% | |
O 0.51% | |
w 0.50% | |
. 0.50% | |
2 0.49% | |
M 0.48% | |
/ 0.48% | |
y 0.42% | |
" 0.38% | |
F 0.36% | |
{ 0.35% | |
} 0.35% | |
U 0.32% | |
3 0.29% | |
G 0.28% | |
& 0.28% | |
B 0.27% | |
4 0.25% | |
q 0.24% | |
8 0.24% | |
6 0.21% | |
H 0.18% | |
V 0.18% | |
5 0.18% | |
X 0.17% | |
: 0.16% | |
< 0.14% | |
[ 0.14% | |
] 0.14% | |
7 0.13% | |
K 0.13% | |
z 0.13% | |
+ 0.13% | |
# 0.12% | |
W 0.12% | |
9 0.12% | |
Y 0.10% | |
| 0.10% | |
% 0.09% | |
! 0.09% | |
\ 0.08% | |
Q 0.05% | |
j 0.05% | |
Z 0.04% | |
' 0.03% | |
@ 0.03% | |
~ 0.01% | |
? 0.01% | |
J 0.01% | |
^ 0.00% | |
$ 0.00% | |
` 0.00% |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# charfreq.py: print the frequency of characters in source files. | |
# usage: ./charfreq.py swift /foo/bar /bin/baz ... | |
import os | |
import sys | |
def source_files(ext, d='.'): | |
sources = [] | |
for (root,dirs,files) in os.walk(d): | |
for f in files: | |
if f.endswith('.' + ext): | |
sources.append(os.path.join(root,f)) | |
return sources | |
def get_chfreq(ext, dirs): | |
chmap = {} | |
fnames = [] | |
for dname in dirs: | |
fnames += source_files(ext, dname) | |
for fname in fnames: | |
with open(fname) as fd: | |
for ch in fd.read(): | |
if ch not in chmap: | |
chmap[ch] = 1 | |
else: | |
chmap[ch] += 1 | |
pairs = [(v,k) for k,v in chmap.iteritems()] | |
return pairs | |
if __name__ == "__main__": | |
ext = sys.argv[1] | |
dirs = sys.argv[2:] | |
pairs = get_chfreq(ext, dirs) | |
total = sum([n for n,ch in pairs]) | |
for n,ch in sorted(pairs, reverse=True): | |
if ord(ch) > 127: | |
# skip emoji. | |
continue | |
# make some chars printable. | |
if ch == '\n': | |
ch = "\\n" | |
if ch == ' ': | |
ch = "' '" | |
if ch == '\t': | |
ch = "\\t" | |
print "%s %0.2f%%" % (ch, 100*float(n)/total) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ ./charfreq.py js react-master | |
' ' 23.23% | |
e 7.78% | |
t 5.75% | |
n 4.43% | |
o 4.03% | |
r 4.03% | |
a 3.51% | |
\n 3.39% | |
i 3.11% | |
s 2.97% | |
c 2.56% | |
l 2.42% | |
d 2.12% | |
u 1.87% | |
p 1.80% | |
) 1.37% | |
( 1.37% | |
' 1.36% | |
. 1.25% | |
h 1.25% | |
m 1.18% | |
, 1.00% | |
; 0.99% | |
f 0.98% | |
= 0.87% | |
g 0.75% | |
{ 0.73% | |
} 0.73% | |
/ 0.70% | |
v 0.63% | |
b 0.62% | |
x 0.59% | |
y 0.59% | |
C 0.54% | |
R 0.53% | |
: 0.51% | |
E 0.46% | |
T 0.45% | |
> 0.42% | |
w 0.42% | |
k 0.37% | |
S 0.33% | |
N 0.31% | |
I 0.29% | |
< 0.29% | |
_ 0.28% | |
D 0.27% | |
P 0.26% | |
M 0.23% | |
O 0.22% | |
1 0.22% | |
F 0.21% | |
* 0.21% | |
B 0.20% | |
A 0.20% | |
- 0.19% | |
0 0.17% | |
W 0.15% | |
" 0.15% | |
U 0.14% | |
[ 0.14% | |
] 0.14% | |
2 0.13% | |
L 0.12% | |
4 0.12% | |
H 0.11% | |
q 0.10% | |
` 0.09% | |
+ 0.09% | |
V 0.08% | |
j 0.08% | |
3 0.08% | |
5 0.07% | |
| 0.06% | |
9 0.06% | |
! 0.05% | |
8 0.05% | |
6 0.04% | |
& 0.04% | |
7 0.04% | |
z 0.04% | |
G 0.04% | |
Y 0.03% | |
\ 0.03% | |
$ 0.03% | |
? 0.02% | |
@ 0.02% | |
K 0.02% | |
X 0.02% | |
J 0.01% | |
% 0.01% | |
# 0.01% | |
Q 0.01% | |
Z 0.00% | |
^ 0.00% | |
~ 0.00% |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ ./charfreq.py py werkzeug-master | |
' ' 25.38% | |
e 7.80% | |
t 5.17% | |
s 4.39% | |
r 4.29% | |
a 3.96% | |
o 3.50% | |
i 3.35% | |
n 3.25% | |
\n 2.87% | |
l 2.54% | |
d 2.01% | |
p 1.92% | |
c 1.80% | |
u 1.77% | |
' 1.72% | |
f 1.70% | |
_ 1.65% | |
h 1.42% | |
m 1.41% | |
. 1.38% | |
( 1.22% | |
) 1.22% | |
, 1.01% | |
= 0.97% | |
: 0.94% | |
g 0.90% | |
b 0.74% | |
y 0.69% | |
" 0.58% | |
v 0.56% | |
w 0.51% | |
k 0.44% | |
x 0.41% | |
T 0.32% | |
` 0.31% | |
/ 0.30% | |
- 0.27% | |
q 0.23% | |
0 0.23% | |
R 0.22% | |
E 0.20% | |
N 0.20% | |
] 0.18% | |
[ 0.18% | |
1 0.18% | |
S 0.17% | |
I 0.17% | |
2 0.15% | |
~ 0.14% | |
C 0.14% | |
z 0.14% | |
A 0.13% | |
P 0.12% | |
> 0.12% | |
j 0.12% | |
O 0.11% | |
M 0.10% | |
F 0.10% | |
H 0.10% | |
# 0.10% | |
D 0.10% | |
L 0.10% | |
\ 0.10% | |
% 0.09% | |
W 0.09% | |
* 0.09% | |
3 0.09% | |
4 0.09% | |
< 0.08% | |
U 0.07% | |
B 0.07% | |
8 0.07% | |
{ 0.06% | |
} 0.06% | |
G 0.06% | |
5 0.06% | |
+ 0.05% | |
6 0.05% | |
9 0.05% | |
; 0.04% | |
7 0.03% | |
@ 0.03% | |
V 0.03% | |
X 0.03% | |
K 0.03% | |
Y 0.02% | |
? 0.02% | |
! 0.02% | |
Q 0.01% | |
J 0.01% | |
& 0.01% | |
Z 0.01% | |
| 0.01% | |
$ 0.00% | |
^ 0.00% |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ ./charfreq.py swift projects/FloSports/FloSports projects/FloSportsCore/FloSportsCore | |
' ' 23.54% | |
e 8.39% | |
t 5.62% | |
r 4.59% | |
i 4.54% | |
o 4.21% | |
a 4.03% | |
n 3.90% | |
l 3.66% | |
s 3.42% | |
\n 2.81% | |
c 2.12% | |
d 1.89% | |
u 1.81% | |
p 1.51% | |
. 1.41% | |
f 1.23% | |
g 1.18% | |
: 1.11% | |
b 1.05% | |
v 1.03% | |
h 1.00% | |
m 0.98% | |
/ 0.85% | |
w 0.83% | |
( 0.78% | |
) 0.78% | |
S 0.75% | |
C 0.64% | |
y 0.63% | |
I 0.51% | |
, 0.50% | |
V 0.44% | |
= 0.44% | |
} 0.43% | |
{ 0.43% | |
P 0.39% | |
T 0.39% | |
D 0.35% | |
A 0.35% | |
k 0.34% | |
" 0.33% | |
x 0.33% | |
E 0.30% | |
M 0.27% | |
L 0.27% | |
F 0.26% | |
R 0.25% | |
U 0.24% | |
B 0.23% | |
N 0.20% | |
_ 0.17% | |
O 0.17% | |
? 0.16% | |
0 0.15% | |
- 0.14% | |
1 0.11% | |
] 0.10% | |
[ 0.10% | |
K 0.09% | |
H 0.09% | |
> 0.09% | |
z 0.08% | |
j 0.08% | |
W 0.08% | |
2 0.07% | |
q 0.06% | |
G 0.06% | |
! 0.05% | |
3 0.05% | |
7 0.05% | |
' 0.04% | |
8 0.04% | |
< 0.03% | |
4 0.03% | |
5 0.03% | |
@ 0.03% | |
\ 0.03% | |
6 0.02% | |
9 0.02% | |
J 0.02% | |
* 0.02% | |
Y 0.01% | |
+ 0.01% | |
& 0.01% | |
$ 0.01% | |
Q 0.01% | |
# 0.01% | |
% 0.01% | |
` 0.01% | |
X 0.01% | |
| 0.00% | |
Z 0.00% | |
; 0.00% | |
~ 0.00% | |
^ 0.00% |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment