Skip to content

Instantly share code, notes, and snippets.

@alias454
Last active June 24, 2018 07:16
Show Gist options
  • Save alias454/bd9870c35d25080c32587ca349556819 to your computer and use it in GitHub Desktop.
Save alias454/bd9870c35d25080c32587ca349556819 to your computer and use it in GitHub Desktop.
Check redirects and return status of active webpages
#!/usr/bin/env python3
'''
Script to check redirects and return status of contacted page.
Input file should be one domain per line
'''
from urllib.parse import urlparse
import requests
input_file = 'domains.txt'
output_file = 'output.txt'
def get_domain(url):
parsed_uri = urlparse(url)
return '{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)
def get_domain_status(url):
try:
session = requests.Session()
res = session.get(url, timeout=0.5)
print("Processing " + url)
if len(res.history) > 0:
chain = ""
code = res.history[0].status_code
domain = get_domain(res.url)
for resp in res.history:
chain += resp.url + ' => '
return str(code) + ", " + str(len(res.history)) + ", " + domain + ", " + chain + ", " + res.url
else:
return str(res.status_code)
except requests.exceptions.Timeout:
return '[Timeout]'
except requests.exceptions.TooManyRedirects:
return '[TooManyRedirects]'
except requests.ConnectionError:
return '[CnnectionError]'
with open(output_file, 'w') as results_file:
results_file.write('starting_URL, status, redirects, base_URL, chain, final_URL\n')
domain_list_file = open(input_file, "r")
urls = domain_list_file.read().splitlines()
for url in urls:
check_url = url if url.startswith('http') else "http://%s" % url
result = get_domain_status(check_url)
results_file.write(url + ", " + str(result) + "\n")
domain_list_file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment