Last active
July 8, 2022 02:08
-
-
Save KokoseiJ/04f575758eb6dec569018f6080ebea35 to your computer and use it in GitHub Desktop.
Check md5 hash of batch downloaded archive.org files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import sys | |
import requests | |
from hashlib import md5 | |
from xml.etree.ElementTree import fromstring | |
if len(sys.argv) < 2: | |
print(f"Usage: {sys.executable} {sys.argv[0]} xml_url") | |
url = sys.argv[1] | |
dllist_match = re.fullmatch( | |
r"http(?:s)?://(?:www\.)?archive\.org/download/([a-zA-Z-]+?)", url | |
) | |
if dllist_match: | |
name = dllist_match.group(1) | |
url = f"{url}/{name}_files.xml" | |
r = requests.get(url) | |
xml = fromstring(r.text) | |
ls = os.listdir() | |
hashmap = { | |
file.attrib['name']: file.find("md5").text for file in xml.findall("file") | |
if file.attrib['name'] in ls | |
} | |
failed = [] | |
for name, hashval in hashmap.items(): | |
print(name, end="... ") | |
if hashval == md5(open(name, "rb").read()).hexdigest(): | |
print("OK!") | |
else: | |
print("FAILED") | |
failed.append(name) | |
print("\n -================================- \n") | |
if failed: | |
print("Files with mismatching hashes:", *failed, sep="\n") | |
else: | |
print("No hash mismatches!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment