Skip to content

Instantly share code, notes, and snippets.

@pepasflo
Last active November 28, 2018 23:19
Show Gist options
  • Save pepasflo/556f93c75583f8a5a62436897a3d222c to your computer and use it in GitHub Desktop.
Save pepasflo/556f93c75583f8a5a62436897a3d222c to your computer and use it in GitHub Desktop.
Find the frequency of characters in Swift source files.
./charfreq.py c linux-4.19.5
' ' 9.38%
e 5.59%
t 4.79%
_ 4.57%
\t 4.30%
r 3.82%
\n 3.68%
i 3.64%
s 3.41%
a 3.17%
n 2.97%
c 2.70%
o 2.63%
d 2.51%
l 1.98%
u 1.90%
p 1.83%
f 1.59%
, 1.57%
0 1.49%
m 1.44%
) 1.30%
( 1.30%
; 1.29%
* 1.17%
h 1.06%
v 0.99%
g 0.96%
b 0.96%
- 0.94%
E 0.93%
x 0.88%
= 0.85%
I 0.75%
T 0.74%
> 0.74%
R 0.73%
S 0.72%
A 0.71%
C 0.66%
N 0.58%
k 0.58%
P 0.55%
L 0.55%
1 0.54%
D 0.53%
O 0.51%
w 0.50%
. 0.50%
2 0.49%
M 0.48%
/ 0.48%
y 0.42%
" 0.38%
F 0.36%
{ 0.35%
} 0.35%
U 0.32%
3 0.29%
G 0.28%
& 0.28%
B 0.27%
4 0.25%
q 0.24%
8 0.24%
6 0.21%
H 0.18%
V 0.18%
5 0.18%
X 0.17%
: 0.16%
< 0.14%
[ 0.14%
] 0.14%
7 0.13%
K 0.13%
z 0.13%
+ 0.13%
# 0.12%
W 0.12%
9 0.12%
Y 0.10%
| 0.10%
% 0.09%
! 0.09%
\ 0.08%
Q 0.05%
j 0.05%
Z 0.04%
' 0.03%
@ 0.03%
~ 0.01%
? 0.01%
J 0.01%
^ 0.00%
$ 0.00%
` 0.00%
#!/usr/bin/env python
# charfreq.py: print the frequency of characters in source files.
# usage: ./charfreq.py swift /foo/bar /bin/baz ...
import os
import sys
def source_files(ext, d='.'):
sources = []
for (root,dirs,files) in os.walk(d):
for f in files:
if f.endswith('.' + ext):
sources.append(os.path.join(root,f))
return sources
def get_chfreq(ext, dirs):
chmap = {}
fnames = []
for dname in dirs:
fnames += source_files(ext, dname)
for fname in fnames:
with open(fname) as fd:
for ch in fd.read():
if ch not in chmap:
chmap[ch] = 1
else:
chmap[ch] += 1
pairs = [(v,k) for k,v in chmap.iteritems()]
return pairs
if __name__ == "__main__":
ext = sys.argv[1]
dirs = sys.argv[2:]
pairs = get_chfreq(ext, dirs)
total = sum([n for n,ch in pairs])
for n,ch in sorted(pairs, reverse=True):
if ord(ch) > 127:
# skip emoji.
continue
# make some chars printable.
if ch == '\n':
ch = "\\n"
if ch == ' ':
ch = "' '"
if ch == '\t':
ch = "\\t"
print "%s %0.2f%%" % (ch, 100*float(n)/total)
$ ./charfreq.py js react-master
' ' 23.23%
e 7.78%
t 5.75%
n 4.43%
o 4.03%
r 4.03%
a 3.51%
\n 3.39%
i 3.11%
s 2.97%
c 2.56%
l 2.42%
d 2.12%
u 1.87%
p 1.80%
) 1.37%
( 1.37%
' 1.36%
. 1.25%
h 1.25%
m 1.18%
, 1.00%
; 0.99%
f 0.98%
= 0.87%
g 0.75%
{ 0.73%
} 0.73%
/ 0.70%
v 0.63%
b 0.62%
x 0.59%
y 0.59%
C 0.54%
R 0.53%
: 0.51%
E 0.46%
T 0.45%
> 0.42%
w 0.42%
k 0.37%
S 0.33%
N 0.31%
I 0.29%
< 0.29%
_ 0.28%
D 0.27%
P 0.26%
M 0.23%
O 0.22%
1 0.22%
F 0.21%
* 0.21%
B 0.20%
A 0.20%
- 0.19%
0 0.17%
W 0.15%
" 0.15%
U 0.14%
[ 0.14%
] 0.14%
2 0.13%
L 0.12%
4 0.12%
H 0.11%
q 0.10%
` 0.09%
+ 0.09%
V 0.08%
j 0.08%
3 0.08%
5 0.07%
| 0.06%
9 0.06%
! 0.05%
8 0.05%
6 0.04%
& 0.04%
7 0.04%
z 0.04%
G 0.04%
Y 0.03%
\ 0.03%
$ 0.03%
? 0.02%
@ 0.02%
K 0.02%
X 0.02%
J 0.01%
% 0.01%
# 0.01%
Q 0.01%
Z 0.00%
^ 0.00%
~ 0.00%
$ ./charfreq.py py werkzeug-master
' ' 25.38%
e 7.80%
t 5.17%
s 4.39%
r 4.29%
a 3.96%
o 3.50%
i 3.35%
n 3.25%
\n 2.87%
l 2.54%
d 2.01%
p 1.92%
c 1.80%
u 1.77%
' 1.72%
f 1.70%
_ 1.65%
h 1.42%
m 1.41%
. 1.38%
( 1.22%
) 1.22%
, 1.01%
= 0.97%
: 0.94%
g 0.90%
b 0.74%
y 0.69%
" 0.58%
v 0.56%
w 0.51%
k 0.44%
x 0.41%
T 0.32%
` 0.31%
/ 0.30%
- 0.27%
q 0.23%
0 0.23%
R 0.22%
E 0.20%
N 0.20%
] 0.18%
[ 0.18%
1 0.18%
S 0.17%
I 0.17%
2 0.15%
~ 0.14%
C 0.14%
z 0.14%
A 0.13%
P 0.12%
> 0.12%
j 0.12%
O 0.11%
M 0.10%
F 0.10%
H 0.10%
# 0.10%
D 0.10%
L 0.10%
\ 0.10%
% 0.09%
W 0.09%
* 0.09%
3 0.09%
4 0.09%
< 0.08%
U 0.07%
B 0.07%
8 0.07%
{ 0.06%
} 0.06%
G 0.06%
5 0.06%
+ 0.05%
6 0.05%
9 0.05%
; 0.04%
7 0.03%
@ 0.03%
V 0.03%
X 0.03%
K 0.03%
Y 0.02%
? 0.02%
! 0.02%
Q 0.01%
J 0.01%
& 0.01%
Z 0.01%
| 0.01%
$ 0.00%
^ 0.00%
$ ./charfreq.py swift projects/FloSports/FloSports projects/FloSportsCore/FloSportsCore
' ' 23.54%
e 8.39%
t 5.62%
r 4.59%
i 4.54%
o 4.21%
a 4.03%
n 3.90%
l 3.66%
s 3.42%
\n 2.81%
c 2.12%
d 1.89%
u 1.81%
p 1.51%
. 1.41%
f 1.23%
g 1.18%
: 1.11%
b 1.05%
v 1.03%
h 1.00%
m 0.98%
/ 0.85%
w 0.83%
( 0.78%
) 0.78%
S 0.75%
C 0.64%
y 0.63%
I 0.51%
, 0.50%
V 0.44%
= 0.44%
} 0.43%
{ 0.43%
P 0.39%
T 0.39%
D 0.35%
A 0.35%
k 0.34%
" 0.33%
x 0.33%
E 0.30%
M 0.27%
L 0.27%
F 0.26%
R 0.25%
U 0.24%
B 0.23%
N 0.20%
_ 0.17%
O 0.17%
? 0.16%
0 0.15%
- 0.14%
1 0.11%
] 0.10%
[ 0.10%
K 0.09%
H 0.09%
> 0.09%
z 0.08%
j 0.08%
W 0.08%
2 0.07%
q 0.06%
G 0.06%
! 0.05%
3 0.05%
7 0.05%
' 0.04%
8 0.04%
< 0.03%
4 0.03%
5 0.03%
@ 0.03%
\ 0.03%
6 0.02%
9 0.02%
J 0.02%
* 0.02%
Y 0.01%
+ 0.01%
& 0.01%
$ 0.01%
Q 0.01%
# 0.01%
% 0.01%
` 0.01%
X 0.01%
| 0.00%
Z 0.00%
; 0.00%
~ 0.00%
^ 0.00%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment