Skip to content

Instantly share code, notes, and snippets.

@almostSouji
Last active June 10, 2024 05:58
Show Gist options
  • Save almostSouji/6f4d7c4e18b441b87f235e4e5aef01ef to your computer and use it in GitHub Desktop.
Save almostSouji/6f4d7c4e18b441b87f235e4e5aef01ef to your computer and use it in GitHub Desktop.
Simple py script to find the longest and highest entropy lines in a file/directory
#!/usr/bin/env python3
import os
import sys
from collections import Counter
from math import log
def entropy(text):
cc = Counter(text)
ps = [float(cc[c]/len(text)) for c in cc]
return -sum([p * log(p)/log(2.0) for p in ps])
m = {}
o = {}
for root, subdirs, files in os.walk(sys.argv[1]):
for filename in files:
path = os.path.join(root, filename)
line_num = 0
with open(path, "rb") as f:
for line in f:
line_num += 1
key = f"{path}:{line_num}"
m[key] = len(line)
o[key] = entropy(line)
d = sorted(m, key=m.get, reverse=True)[:10]
e = sorted(o, key=o.get, reverse=True)[:10]
all = [*d, *e]
if len(all) == 0:
print("No files found, check the path!")
exit(1)
longest = sorted(all, key=lambda x: len(x), reverse=True)[0]
target = len(longest) + 4
print("Line Length:")
print()
for k in d:
l = len(k)
print(f"{k}:{' ' * (target - l)}{m[k]}")
print()
print("Shannon's Entropy")
print()
for k in e:
l = len(k)
print(f"{k}:{' ' * (target - l)}{o[k]}")
#!/usr/bin/env python3
from collections import Counter
from math import log
def entropy(text):
cc = Counter(text)
ps = [float(cc[c]/len(text)) for c in cc]
return -sum([p * log(p)/log(2.0) for p in ps])
m = {}
o = {}
for index, line in enumerate(open(0)):
m[index] = len(line)
o[index] = entropy(line)
d = sorted(m, key=m.get, reverse=True)
os = sorted(o, key=o.get, reverse=True)
print("Line Length:")
for k in d[:10]:
print(f"l.{k+1}:\t{m[k]}")
print()
print("Shannon's entropy:")
for k in os[:10]:
print(f"l.{k+1}:\t{o[k]}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment