Created
May 21, 2019 09:09
-
-
Save djinn/a46d43e9c5d6223234cf2ee896329e57 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Supreet Sethi <[email protected]> | |
# Date: 21-05-2019 | |
from subprocess import check_output | |
from collections import defaultdict, Counter | |
from requests import get | |
from bs4 import BeautifulSoup as Soup | |
from urllib.parse import urljoin, urlparse | |
#do whois | |
import logging | |
import contextlib | |
try: | |
from http.client import HTTPConnection # py3 | |
except ImportError: | |
from httplib import HTTPConnection # py2 | |
def debug_requests_on(): | |
'''Switches on logging of the requests module.''' | |
HTTPConnection.debuglevel = 1 | |
logging.basicConfig() | |
logging.getLogger().setLevel(logging.DEBUG) | |
requests_log = logging.getLogger("requests.packages.urllib3") | |
requests_log.setLevel(logging.DEBUG) | |
requests_log.propagate = True | |
def call(cmd, domain): | |
output = check_output([cmd, domain]).decode("utf-8") | |
l = output.strip('\r').split('\n') | |
infodict = defaultdict(list) | |
for l_ in l: | |
if l_.find('name =') != -1 and l_.find('canonical name =') == -1: | |
v = l_.split('=')[1] | |
infodict['name'].append(v.strip()) | |
continue | |
if l_.find('canonical name =') != -1 and l_.find('name =') != 1: | |
v = l_.split('=')[1] | |
infodict['name'].append(v.strip()) | |
continue | |
if len(l_.split(':')) < 2: | |
continue | |
else: | |
k, v = l_.split(':', 1) | |
k = k.strip() | |
v = v.strip() | |
infodict[k.lower()].append(v) | |
return infodict | |
def whois(domain): | |
infodict = call("whois", domain) | |
return infodict['name server'] | |
def reverse_nameip_lookup(domain): | |
infodict = call("nslookup", domain) | |
ip = infodict['address'][-1] | |
try: | |
infodict = call("nslookup", ip) | |
except: | |
return [domain] | |
if 'name' in infodict: | |
return infodict['name'] | |
else: | |
return ['unknown'] | |
def normalize_url(domain, ul): | |
url = None | |
if 'data-src' in ul.attrs: | |
url = ul['data-src'] | |
elif 'src' in ul.attrs: | |
url = ul['src'] | |
elif 'href' in ul.attrs: | |
url = ul['href'] | |
else: | |
url = None | |
if url == None: | |
return url | |
if url.find('http') != -1: | |
return url | |
else: | |
return urljoin(domain, url) | |
def extract_urls(domain): | |
user_agent = {'User-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0'} | |
ct = get(domain, headers=user_agent) | |
if ct.status_code != 200: | |
return [] | |
else: | |
urls = [] | |
soup = Soup(ct.content, 'lxml') | |
img = [normalize_url(domain, i) for i in soup.findAll('img')] | |
js = [normalize_url(domain, j) for j in soup.findAll('script', src=True)] | |
css = [normalize_url(domain, c) for c in soup.findAll('link', {'rel': 'stylesheet'})] | |
return img + js + css | |
def reverse_ip_url(urls): | |
urls = list(filter(lambda x: x != None, urls)) | |
domain = [urlparse(u.strip()).hostname for u in urls] | |
d = set(domain) | |
lp = {} | |
for i in d: | |
print(i) | |
if i == None: | |
lp[i] = 'unknown' | |
l = reverse_nameip_lookup(i) | |
lp[i] = l[0] | |
return [lp[d] for d in domain] | |
def wallet_share(domains): | |
def inner_func(d): | |
if d.find('amazonaws') != -1 or d.find('cloudfront') != -1: | |
return 'AWS' | |
elif d.find('1e100') != -1 or d.find('google') != -1: | |
return 'google' | |
elif d.find('facebook') != -1: | |
return 'facebook' | |
elif d.find('azure') != -1: | |
return 'MSFT' | |
elif d.find('akamai') != -1: | |
return 'akamai' | |
val = list(map(inner_func, domains)) | |
total = len(val) | |
ct = Counter(val) | |
share = {} | |
for k, v in ct.items(): | |
s = (v / total) * 100 | |
share[k] = s | |
return share | |
if __name__ == '__main__': | |
#debug_requests_on() | |
from sys import argv | |
urls = extract_urls("https://"+argv[1]+"/") | |
domains = reverse_ip_url(urls) | |
print(domains) | |
print(wallet_share(domains)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment