Created
June 27, 2017 23:01
-
-
Save inchoate/2f8649c517be41117a98cb8141f8b003 to your computer and use it in GitHub Desktop.
Checks remote resources
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Checks to see if media URIs listed in the given file are valid. | |
Warning this is untested, shit code. | |
"Valid" means: | |
- the file is under 32 megs | |
- the file exists at the URI | |
Usage: | |
# check all URLs in the `your-uri-file.txt` and output failures only. | |
python check_media.py your-uri-file.txt | |
# check all URLs in the `your-uri-file.txt` and output all results. | |
python check_media.py your-uri-file.txt --all | |
""" | |
import argparse | |
import logging | |
import requests | |
import sys | |
import time | |
def check_remote_resource(uri, max_bytes=32000000): | |
"""Validates the existence of a remote resource and limits file size.""" | |
response = None | |
backoff = 1 | |
while response is None and backoff <= 8: | |
try: | |
response = requests.head(uri) | |
except requests.ConnectionError as ce: | |
logging.warn( | |
"[WARN] Exception fetching '{}'. Backing off and retrying".\ | |
format(uri)) | |
time.sleep(backoff) | |
backoff = backoff * 2 | |
continue | |
if backoff > 8 and response is None: | |
return False, "[FAIL] Cannot check file: {}".format(uri) | |
msg, status = [], True | |
if not (200 <= response.status_code <= 299): | |
msg.append("Cannot fetch file") | |
msg.append("Status code = {}".format(response.status_code)) | |
msg.append("Headers = {}".format(response.headers)) | |
status = False | |
if "Content-Length" in response.headers: | |
if int(response.headers["Content-Length"]) > max_bytes: | |
msg.append("File too large") | |
status = False | |
else: | |
msg.append("No Content Length. Verify by hand.") | |
status = False | |
return status, msg or ["OK"] | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("urls", help="File with list of URIs to check.") | |
parser.add_argument("--all", help="Prints all, not just failures.", default=False) | |
args = parser.parse_args() | |
infile = args.urls | |
urls = open(infile, 'rb').readlines() | |
for url in urls: | |
url = url.strip() | |
status, msg = check_remote_resource(url) | |
if (not status) or args.all: | |
print("[{}]: {} => {}".format( | |
"SUCCESS" if status else "FAILURE", | |
url, | |
msg)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment