Created
April 4, 2026 12:03
-
-
Save taikedz/163afffc42df99432576d8d5d9959ee5 to your computer and use it in GitHub Desktop.
Show Wikipedia ZIMs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import os | |
| import re | |
| import argparse | |
| def download(): | |
| os.system("wget https://dumps.wikimedia.org/kiwix/zim/wikipedia/ -O zim.html") | |
| def extract(lines:list[str]) -> tuple[str,str,int]: | |
| pat = re.compile(r"<a [^>]+>([a-zA-Z0-9_.-]+?)</a>\s+([0-9]{2}-[A-Za-z]{3}-[0-9]{4} [0-9:]{5})\s+([0-9]+)") | |
| kept = [] | |
| for _line in lines: | |
| if m := re.match(pat, _line): | |
| kept.append( (m.group(1), m.group(2), int(m.group(3)) ) ) | |
| return kept | |
| def parseargs(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--update", "-u", action="store_true") | |
| parser.add_argument("filters", nargs="*") | |
| return parser.parse_args() | |
| def ithas(name, targets): | |
| if len(targets) == 0: | |
| return True | |
| return all([f"_{t}_" in name for t in targets]) | |
| def main(): | |
| args = parseargs() | |
| if args.update: | |
| download() | |
| with open("zim.html") as fh: | |
| refs = extract(fh.readlines()) | |
| results = [(name, date, as_size(bytesize)) for name, date, bytesize in refs if ithas(name, args.filters)] | |
| [print(f"{size.ljust(10)}\t{date}\t{name}") for name,date,size in results] | |
| def as_size(bytesize): | |
| units = ["B", "KB", "MB", "GB", "TB"] | |
| value = bytesize | |
| N = 0 | |
| while N < len(units)+1: | |
| if value <= 1024: | |
| return f"{value:.2f} {units[N]}" | |
| if value > 1024: | |
| N += 1 | |
| value = bytesize / (1024 ** N) | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment