Skip to content

Instantly share code, notes, and snippets.

@waqaraqeel
Last active February 23, 2023 22:43
Show Gist options
  • Save waqaraqeel/9368bb0711a67ce17aec367448ac65e6 to your computer and use it in GitHub Desktop.
Save waqaraqeel/9368bb0711a67ce17aec367448ac65e6 to your computer and use it in GitHub Desktop.
Figures out which CDNs were involved in a webpage fetch given HAR file.
#!/usr/bin/env python3
"""
Figures out which CDNs were involved in a webpage fetch given HAR file.
Requires dnspython
Borrows heavily from https://github.com/turbobytes/cdnfinder
Thank you to cdnplanet.com
Usage: ./get_cdn.py -f har-file
Or you could just import the get_cdn function
"""
__author__ = "Waqar Aqeel"
__version__ = "1.0"
__license__ = "MIT"
import socket
from urllib.parse import urlsplit
import dns.resolver
import dns.reversename
TRR = "1.1.1.1"
_cdn_mappings = {
".clients.turbobytes.net": "TurboBytes",
".turbobytes-cdn.com": "TurboBytes",
".afxcdn.net": "afxcdn.net",
".akamai.net": "Akamai",
".akamaiedge.net": "Akamai",
".akadns.net": "Akamai",
".akamaitechnologies.com": "Akamai",
".gslb.tbcache.com": "Alimama",
".cloudfront.net": "Amazon Cloudfront",
".anankecdn.com.br": "Ananke",
".att-dsa.net": "AT&T",
".azioncdn.net": "Azion",
".belugacdn.com": "BelugaCDN",
".bluehatnetwork.com": "Blue Hat Network",
".systemcdn.net": "EdgeCast",
".cachefly.net": "Cachefly",
".cdn77.net": "CDN77",
".cdn77.org": "CDN77",
".panthercdn.com": "CDNetworks",
".cdngc.net": "CDNetworks",
".gccdn.net": "CDNetworks",
".gccdn.cn": "CDNetworks",
".cdnify.io": "CDNify",
".ccgslb.com": "ChinaCache",
".ccgslb.net": "ChinaCache",
".c3cache.net": "ChinaCache",
".chinacache.net": "ChinaCache",
".c3cdn.net": "ChinaCache",
".lxdns.com": "ChinaNetCenter",
".speedcdns.com": "QUANTIL/ChinaNetCenter",
".mwcloudcdn.com": "QUANTIL/ChinaNetCenter",
".cloudflare.com": "Cloudflare",
".cloudflare.net": "Cloudflare",
".edgecastcdn.net": "EdgeCast",
".adn.": "EdgeCast",
".wac.": "EdgeCast",
".wpc.": "EdgeCast",
".fastly.net": "Fastly",
".fastlylb.net": "Fastly",
".google.": "Google",
"googlesyndication.": "Google",
"youtube.": "Google",
".googleusercontent.com": "Google",
".l.doubleclick.net": "Google",
"d.gcdn.co": "G-core",
".hiberniacdn.com": "Hibernia",
".hwcdn.net": "Highwinds",
".incapdns.net": "Incapsula",
".inscname.net": "Instartlogic",
".insnw.net": "Instartlogic",
".internapcdn.net": "Internap",
".kxcdn.com": "KeyCDN",
".lswcdn.net": "LeaseWeb CDN",
".footprint.net": "Level3",
".llnwd.net": "Limelight",
".lldns.net": "Limelight",
".netdna-cdn.com": "MaxCDN",
".netdna-ssl.com": "MaxCDN",
".netdna.com": "MaxCDN",
".stackpathdns.com": "StackPath",
".mncdn.com": "Medianova",
".instacontent.net": "Mirror Image",
".mirror-image.net": "Mirror Image",
".cap-mii.net": "Mirror Image",
".rncdn1.com": "Reflected Networks",
".simplecdn.net": "Simple CDN",
".swiftcdn1.com": "SwiftCDN",
".swiftserve.com": "SwiftServe",
".gslb.taobao.com": "Taobao",
".cdn.bitgravity.com": "Tata communications",
".cdn.telefonica.com": "Telefonica",
".vo.msecnd.net": "Windows Azure",
".ay1.b.yahoo.com": "Yahoo",
".yimg.": "Yahoo",
".zenedge.net": "Zenedge",
".b-cdn.net": "BunnyCDN",
".ksyuncdn.com": "Kingsoft",
}
_resolver = dns.resolver.Resolver()
_resolver.nameservers = [TRR]
_resolver.timeout = 0.5
_found_domains = {}
_cname_failed = set()
_dns_failed = set()
def _cdnmapping_guess(domain):
if domain in _found_domains:
return _found_domains[domain]
for k, v in _cdn_mappings.items():
if k in domain:
_found_domains[domain] = v
return v
def _header_guess(headers):
for hdr in headers:
# Cloudflare advertises a custom Server header
if hdr["name"] == "Server" and hdr["value"].lower() == "cloudflare-nginx":
return "Cloudflare"
# China cache sends a Powered-By-Chinacache header
if hdr["name"] == "powered-by-chinacache":
return "ChinaCache"
# OnApp edge servers use X-Edge-Location to indicate the location
if hdr["name"] == "x-edge-location":
return "OnApp"
# CloudFront adds in some custom tracking id
if hdr["name"] == "x-amz-cf-id":
return "Amazon Cloudfront"
# Bitgravity adds edge hostname to Via header
if hdr["name"] == "via" and "bitgravity.com" in hdr["value"].lower():
return "Bitgravity"
# Skypark sends a X header with their brand name
if hdr["name"] == "X-CDN-Provider" and "skyparkcdn" in hdr["value"].lower():
return "Skypark"
# BaishanCloud uses BC prefix in X-Ser header
if hdr["name"] == "X-Ser" and hdr["value"].startswith("BC"):
return "BaishanCloud"
return None
def get_cdn(en):
""" Return name of CDN used or None from given HAR entry """
# get domain
domain = urlsplit(en["request"]["url"]).netloc
if not domain.endswith("."):
domain = domain + "."
# check cdn mappings
cdn = _cdnmapping_guess(domain)
if cdn:
return cdn
# check http headers
cdn = _header_guess(en["response"]["headers"])
if cdn:
return cdn
try:
# check cnames
if domain not in _cname_failed:
cnames = _resolver.query(domain, "CNAME")
for cname in cnames:
cdn = _cdnmapping_guess(str(cname.target))
if cdn:
_found_domains[domain] = cdn
return cdn
except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers, dns.exception.Timeout):
_cname_failed.add(domain)
try:
# do reverse DNS lookup
if domain not in _dns_failed:
ips = _resolver.query(domain, "A")
for ip in ips:
qname = dns.reversename.from_address(str(ip))
cnames = _resolver.query(qname, "PTR")
for cname in cnames:
cdn = _cdnmapping_guess(str(cname.target))
if cdn:
_found_domains[domain] = cdn
return cdn
except (dns.resolver.NoAnswer, dns.resolver.NXDOMAIN, dns.resolver.NoNameservers, dns.exception.Timeout):
_dns_failed.add(domain)
return None
if __name__ == "__main__":
import json
from argparse import ArgumentParser
parser = ArgumentParser()
parser.add_argument(
"-f", "--file", dest="file", help="HAR file to parse", metavar="FILE"
)
args = parser.parse_args()
try:
harfile = open(args.file)
har = json.load(harfile)
for en in har["log"]["entries"]:
cdn = get_cdn(en)
if cdn:
print(cdn)
except FileNotFoundError:
exit("Could not open " + args.file)
except json.JSONDecodeError:
exit("Could not parse " + args.file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment