Skip to content

Instantly share code, notes, and snippets.

@taikedz
Created April 4, 2026 12:03
Show Gist options
  • Select an option

  • Save taikedz/163afffc42df99432576d8d5d9959ee5 to your computer and use it in GitHub Desktop.

Select an option

Save taikedz/163afffc42df99432576d8d5d9959ee5 to your computer and use it in GitHub Desktop.
Show Wikipedia ZIMs
#!/usr/bin/env python3
import os
import re
import argparse
def download():
os.system("wget https://dumps.wikimedia.org/kiwix/zim/wikipedia/ -O zim.html")
def extract(lines:list[str]) -> tuple[str,str,int]:
pat = re.compile(r"<a [^>]+>([a-zA-Z0-9_.-]+?)</a>\s+([0-9]{2}-[A-Za-z]{3}-[0-9]{4} [0-9:]{5})\s+([0-9]+)")
kept = []
for _line in lines:
if m := re.match(pat, _line):
kept.append( (m.group(1), m.group(2), int(m.group(3)) ) )
return kept
def parseargs():
parser = argparse.ArgumentParser()
parser.add_argument("--update", "-u", action="store_true")
parser.add_argument("filters", nargs="*")
return parser.parse_args()
def ithas(name, targets):
if len(targets) == 0:
return True
return all([f"_{t}_" in name for t in targets])
def main():
args = parseargs()
if args.update:
download()
with open("zim.html") as fh:
refs = extract(fh.readlines())
results = [(name, date, as_size(bytesize)) for name, date, bytesize in refs if ithas(name, args.filters)]
[print(f"{size.ljust(10)}\t{date}\t{name}") for name,date,size in results]
def as_size(bytesize):
units = ["B", "KB", "MB", "GB", "TB"]
value = bytesize
N = 0
while N < len(units)+1:
if value <= 1024:
return f"{value:.2f} {units[N]}"
if value > 1024:
N += 1
value = bytesize / (1024 ** N)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment