Created
July 31, 2023 20:41
-
-
Save raek/cdc190591bc4293fed38783103ba99c4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections | |
import re | |
import sys | |
ends = collections.Counter() | |
paths = collections.Counter() | |
def clean(path): | |
parts = path.split(",") | |
result = [] | |
for part in parts: | |
if not part: | |
continue | |
if part.startswith("WIDE"): | |
continue | |
if part == "NONE": | |
continue | |
result.append(part[:-1] if part.endswith("*") else part) | |
return ",".join(result) | |
for line in sys.stdin: | |
time, dir, message = line.rstrip("\n").split("\t") | |
m = re.match(r"^(?P<src>[^>]+)>(?P<desc>[^,:]+)(?P<path>(,[^,:]+)*):(?P<info>.*)$", message) | |
assert m | |
src, path = m.group("src", "path") | |
cleaned = clean(path) | |
parts = cleaned.split(",") | |
end = parts[-1] if cleaned else src | |
ends[end] += 1 | |
paths[cleaned] += 1 | |
print("# Ends") | |
for end, count in ends.most_common(): | |
print(f"{count:4}: {end}") | |
print() | |
print("# Paths") | |
for path, count in paths.most_common(): | |
print(f"{count:4}: {path}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment