Skip to content

Instantly share code, notes, and snippets.

@averagesecurityguy
Created August 25, 2014 19:17
Show Gist options
  • Save averagesecurityguy/4c791fb48eedb143bb19 to your computer and use it in GitHub Desktop.
Save averagesecurityguy/4c791fb48eedb143bb19 to your computer and use it in GitHub Desktop.
URL Scanner in Python
#!/usr/bin/env python
#
# The requests module is probably already on your system. If it is not
# the run: easy_install requests
#
import sys
import requests
def checkUrl(url):
try:
# If you get too many TMO errors try increasing the timeout value.
resp = requests.head(url, timeout=2, verify=False, allow_redirects=False)
code = resp.status_code
if code >= 200 and code <= 299:
print '2XX: {0}'.format(url)
elif code >= 300 and code <= 399:
print '3XX: {0} -> {1}'.format(url, resp.headers['location'])
elif code >= 400 and code <= 499:
print '4XX: {0}'.format(url)
elif code >= 500 and code <= 599:
print '5XX: {0}'.format(url)
else:
print '{0}: {1}'.format(resp.status_code, url)
except requests.exceptions.ConnectionError:
print 'CXN: {0}'.format(url)
except requests.exceptions.Timeout:
print 'TMO: {0}'.format(url)
except Exception as e:
print 'UNK: {0} - {1}'.format(url, str(e))
if __name__ == '__main__':
if len(sys.argv) != 2:
print 'Usage: url_scan.py url_file'
for url in open(sys.argv[1]):
url = url.rstrip('\r\n')
# Skip empty lines, comments, and lines that do not begin with http
if url == '': continue
if url[0] == '#': continue
if url[:4] != 'http': continue
checkUrl(url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment