Skip to content

Instantly share code, notes, and snippets.

@djinn
Created May 21, 2019 09:09
Show Gist options
  • Save djinn/a46d43e9c5d6223234cf2ee896329e57 to your computer and use it in GitHub Desktop.
Save djinn/a46d43e9c5d6223234cf2ee896329e57 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# Supreet Sethi <[email protected]>
# Date: 21-05-2019
from subprocess import check_output
from collections import defaultdict, Counter
from requests import get
from bs4 import BeautifulSoup as Soup
from urllib.parse import urljoin, urlparse
#do whois
import logging
import contextlib
try:
from http.client import HTTPConnection # py3
except ImportError:
from httplib import HTTPConnection # py2
def debug_requests_on():
'''Switches on logging of the requests module.'''
HTTPConnection.debuglevel = 1
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
requests_log = logging.getLogger("requests.packages.urllib3")
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True
def call(cmd, domain):
output = check_output([cmd, domain]).decode("utf-8")
l = output.strip('\r').split('\n')
infodict = defaultdict(list)
for l_ in l:
if l_.find('name =') != -1 and l_.find('canonical name =') == -1:
v = l_.split('=')[1]
infodict['name'].append(v.strip())
continue
if l_.find('canonical name =') != -1 and l_.find('name =') != 1:
v = l_.split('=')[1]
infodict['name'].append(v.strip())
continue
if len(l_.split(':')) < 2:
continue
else:
k, v = l_.split(':', 1)
k = k.strip()
v = v.strip()
infodict[k.lower()].append(v)
return infodict
def whois(domain):
infodict = call("whois", domain)
return infodict['name server']
def reverse_nameip_lookup(domain):
infodict = call("nslookup", domain)
ip = infodict['address'][-1]
try:
infodict = call("nslookup", ip)
except:
return [domain]
if 'name' in infodict:
return infodict['name']
else:
return ['unknown']
def normalize_url(domain, ul):
url = None
if 'data-src' in ul.attrs:
url = ul['data-src']
elif 'src' in ul.attrs:
url = ul['src']
elif 'href' in ul.attrs:
url = ul['href']
else:
url = None
if url == None:
return url
if url.find('http') != -1:
return url
else:
return urljoin(domain, url)
def extract_urls(domain):
user_agent = {'User-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0'}
ct = get(domain, headers=user_agent)
if ct.status_code != 200:
return []
else:
urls = []
soup = Soup(ct.content, 'lxml')
img = [normalize_url(domain, i) for i in soup.findAll('img')]
js = [normalize_url(domain, j) for j in soup.findAll('script', src=True)]
css = [normalize_url(domain, c) for c in soup.findAll('link', {'rel': 'stylesheet'})]
return img + js + css
def reverse_ip_url(urls):
urls = list(filter(lambda x: x != None, urls))
domain = [urlparse(u.strip()).hostname for u in urls]
d = set(domain)
lp = {}
for i in d:
print(i)
if i == None:
lp[i] = 'unknown'
l = reverse_nameip_lookup(i)
lp[i] = l[0]
return [lp[d] for d in domain]
def wallet_share(domains):
def inner_func(d):
if d.find('amazonaws') != -1 or d.find('cloudfront') != -1:
return 'AWS'
elif d.find('1e100') != -1 or d.find('google') != -1:
return 'google'
elif d.find('facebook') != -1:
return 'facebook'
elif d.find('azure') != -1:
return 'MSFT'
elif d.find('akamai') != -1:
return 'akamai'
val = list(map(inner_func, domains))
total = len(val)
ct = Counter(val)
share = {}
for k, v in ct.items():
s = (v / total) * 100
share[k] = s
return share
if __name__ == '__main__':
#debug_requests_on()
from sys import argv
urls = extract_urls("https://"+argv[1]+"/")
domains = reverse_ip_url(urls)
print(domains)
print(wallet_share(domains))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment