Last active
January 11, 2022 23:06
-
-
Save scovetta/51e15537309d25f3617f908fcd2a1327 to your computer and use it in GitHub Desktop.
Basic script to scan a directory for .cmake and .json files and safely replace `http:` URLs with `https:`, where "safely" is defined as "making requests returns identical bytes".
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
The purpose of this script is to scan a directory (default: ports) for | |
port configuration files that contain http-based URLS, and convert | |
them to https if possible. | |
It only looks for files with an extension of .cmake or .json. | |
The "if possible" check is whether or not the bytes returned from a | |
GET request to the http URL are identical to those returned from a | |
GET request to the https URL. | |
The only external module needed is requests (pip install requests). | |
Author: Michael Scovetta <[email protected]> | |
License: MIT | |
Copyright: Microsoft Corporation | |
Last Updated: 1/3/2022 | |
""" | |
import os | |
import re | |
import sys | |
import requests | |
import hashlib | |
import logging | |
VERSION = "1.0.0" | |
logging.basicConfig(format='%(levelname)s %(message)s', level=logging.DEBUG) | |
class HttpsChecker: | |
# List of files modified | |
updated_files = [] | |
# Request timeout for the network calls | |
REQUEST_TIMEOUT = 30 | |
def is_upgradeable(self, http_link: str) -> bool: | |
""" | |
Check to see if the http URL is upgradeable. | |
Returns: | |
True iff it is upgradeable | |
False if it is not, or on any error. | |
""" | |
logging.debug('is_upgradeable(%s)', http_link) | |
if '${' in http_link: | |
logging.debug('URL is not upgradeable because it contains a variable.') | |
return False | |
try: | |
http_res = requests.get(http_link, timeout=self.REQUEST_TIMEOUT) | |
http_res.raise_for_status() | |
except Exception as msg: | |
logging.debug('Error loading %s: %s', http_link, msg) | |
return False | |
try: | |
https_link = re.sub('^http://', 'https://', http_link, 1, re.IGNORECASE) | |
https_res = requests.get(https_link, timeout=self.REQUEST_TIMEOUT) | |
https_res.raise_for_status() | |
except Exception as msg: | |
logging.debug('Error loading %s: %s', https_link, msg) | |
return False | |
if http_res.content == https_res.content: | |
logging.info("Upgradeable URL: %s, Size=%d, Hash=%s", http_link, | |
len(http_res.content), hashlib.sha256(http_res.content).hexdigest()) | |
return True | |
return False | |
def process_file(self, filename): | |
""" | |
Processes a given file. This means extracting URLs, checking each one, and | |
modifying the file with the new URL if safe. | |
Returns: None | |
""" | |
logging.info('Processing file: %s', filename) | |
if not os.path.isfile(filename): | |
return | |
with open(filename, 'rb') as f: | |
content = f.read() | |
# Handles ports like antlr4, but needs more work. For now, ignore them. | |
# Replace "${NAME}" with data from 'set(NAME VALUE)'" | |
# replacement_content = content | |
# for key, value in re.findall(bytes(r'set\(([^\s]+) ([^\s]+)\)', encoding='ascii'), content): | |
# replacement_content = replacement_content.replace(bytes('${' + key + '}', encoding='ascii'), value) | |
content_modified = False | |
# This regex was modified from one found at: | |
# https://stackoverflow.com/questions/55663644/how-to-search-for-a-href-from-a-text-file-through-python-regex | |
urls = re.findall(bytes(r'http://(?:[-\w.\${}/]|(?:%[\da-fA-F]{2}))+', encoding='ascii'), content) | |
logging.debug("Found %d URLs: [%s]", len(urls), urls) | |
for url in set(urls): | |
url_str = url.decode('utf-8') | |
if self.is_upgradeable(url_str): | |
logging.debug('URL [%s] was upgradeable.', url_str) | |
https_link = re.sub('^http://', 'https://', url_str, 1, re.IGNORECASE) | |
content = content.replace(url, bytes(https_link, encoding='utf-8')) | |
content_modified = True | |
self.updated_files.append(filename) | |
else: | |
logging.debug('URL [%s] was NOT upgradeable.', url_str) | |
if content_modified: | |
with open(filename, 'wb') as f: | |
f.write(content) | |
return | |
if __name__ == '__main__': | |
checker = HttpsChecker() | |
directory = sys.argv[1] if len(sys.argv) > 1 else 'ports' | |
if not os.path.isdir(directory): | |
print("Usage: python update_https.py DIRECTORY") | |
sys.exit(1) | |
for root, _, files in os.walk(directory): | |
for file in files: | |
if file.endswith('.cmake') or file.endswith('.json'): | |
checker.process_file(os.path.join(root, file)) | |
print("Updated %d file(s)." % len(checker.updated_files)) | |
sys.exit(0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment