Skip to content

Instantly share code, notes, and snippets.

@LarsKumbier
Created August 13, 2018 10:46
Show Gist options
  • Save LarsKumbier/2ace3009e70ec963271b766151318c92 to your computer and use it in GitHub Desktop.
Save LarsKumbier/2ace3009e70ec963271b766151318c92 to your computer and use it in GitHub Desktop.
When activating HSTS, all links of a page will be rewritten from http to https upon clicking on them. This might lead to redirect-loops, where an https-page is redirecting to http and back on badly programmed rewrite rules. This script takes a list of urls and checks, if they run into a redirect-loop upon activating HSTS.
from urllib.parse import urlparse
import requests
import sys
MAX_REDIRECTS=8
URL_LIST='urls.txt'
def requestUrl(url, counter=0):
def upgradeUrl(url):
return url.replace('http:', 'https:')
def ensureAbsoluteUrl(previousUrl, nextUrl):
if (nextUrl.startswith('http')):
return nextUrl
url = urlparse(previousUrl)
return url.scheme + '://' + url.netloc + nextUrl
#print(' ' * counter, '+', url)
if (counter >= MAX_REDIRECTS):
#print(' ' * counter, '! maxRedirects reached, aborting')
return False
try:
upgradedUrl = upgradeUrl(url)
response = requests.get(upgradedUrl, allow_redirects=False)
if not response.is_redirect:
return True
except requests.exceptions.MissingSchema:
print(' ' * counter, '! URL did not have a valid schema: ', upgradedUrl)
return False
except:
print(' ' * counter, '! Generic Error on url ', upgradedUrl)
print(' ' * counter, '! Error was: ', sys.exc_info()[0])
return False
nextUrl = ensureAbsoluteUrl(url, response.headers['Location'])
nextUrl = upgradeUrl(nextUrl)
return requestUrl(nextUrl, counter + 1)
with open(URL_LIST) as filehandle:
lines = [line.rstrip('\n') for line in filehandle]
for url in lines:
status = '[ok] ' if requestUrl(url) else '[!!] '
print(status, url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment