Created
April 10, 2012 20:29
-
-
Save rgov/2354241 to your computer and use it in GitHub Desktop.
Creates a fingerprint of a PHP server by analyzing the output of the credits Easter egg
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import pprint | |
| import requests | |
| import hashlib | |
| from bs4 import BeautifulSoup | |
| def normalize(s): | |
| return hashlib.md5(s.strip().encode('utf-8')).hexdigest() | |
| def parse_credits_page(base_url): | |
| # Construct the Easter egg URL | |
| url = base_url | |
| if '?' in url: | |
| url = base_url[:base_url.index('?')] | |
| url += '?=PHPB8B5F2A0-3C92-11d3-A3A9-4C7B08C10000' | |
| # Request the contents of the page | |
| r = requests.get(url) | |
| if not '<h1>PHP Credits</h1>' in r.text: | |
| raise Exception('Server does not respond to Easter egg') | |
| # Parse it into a structure | |
| struct = { } | |
| soup = BeautifulSoup(r.text) | |
| for table in soup.find_all('table'): | |
| header = table.find('th').text | |
| struct[header] = { } | |
| for row in table.find_all('tr'): | |
| if 'class' in row and row['class'] == 'h': continue | |
| if row.find('th'): continue | |
| e, v = row.find('td', 'e'), row.find('td', 'v') | |
| if v is None: | |
| struct[header] = normalize(e.text) | |
| else: | |
| struct[header][e.text.strip()] = normalize(v.text) | |
| return struct | |
| if __name__ == '__main__': | |
| pprint.pprint(parse_credits_page('http://en.wikipedia.org/')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment