Last active
December 10, 2021 04:31
-
-
Save turnipsoup/b235a30ae4aa69c1fa634d6d7fc4b202 to your computer and use it in GitHub Desktop.
Checks a robots.txt file and gets the status code for each endpoint present.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests, sys | |
def check_status_code(url, endpoint): | |
return requests.get(f'{url}{endpoint}').status_code | |
def check_endpoint(endpoint): | |
try: | |
print(endpoint, '->', check_status_code(target_url, endpoint)) | |
except: | |
print(endpoint, '->', 'Failed to connect at all') | |
def check_endpoint_list(endpoint_list): | |
for endpoint in endpoint_list: | |
check_endpoint(endpoint) | |
if __name__ == '__main__': | |
target_url_robots = sys.argv[1] | |
target_url = target_url_robots.split('/robots.txt')[0] | |
# Get all URLS that are present at the robots.txt page | |
r = requests.get(target_url_robots) | |
robots_txt = r.content.decode() | |
# Sep endpoints | |
endpoints = [ x.strip() for x in robots_txt.split('\n') ] | |
# Get sitemaps | |
sitemaps = [ x for x in endpoints if 'sitemap' in x.lower() ] | |
# Get allows | |
allows = [ x for x in endpoints if 'allow' in x.lower() and 'disallow' not in x.lower() ] | |
# Get disallows | |
disallows = [ x for x in endpoints if 'disallow' in x.lower() ] | |
# Clean fetched items | |
clean_sitemaps = [ x.split(': ')[1] for x in sitemaps ] | |
clean_allows = [ x.split(': ')[1] for x in allows ] | |
clean_disallows = [ x.split(': ')[1] for x in disallows ] | |
# Checkthe endpoints and print to screen | |
print("Endpoint -> Status Code") | |
print("--------------------") | |
print() | |
print("Explicitly Allowed Somewhere") | |
print("-----") | |
check_endpoint_list(clean_allows) | |
print() | |
print("Explicitly Disallowed Somewhere") | |
print("-----") | |
check_endpoint_list(clean_disallows) |
Author
turnipsoup
commented
Dec 7, 2021
•
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment