Skip to content

Instantly share code, notes, and snippets.

@sovietw0rm
Last active December 23, 2016 16:58
Show Gist options
  • Save sovietw0rm/60052704fd174892c93fe3f6c7cec92f to your computer and use it in GitHub Desktop.
Save sovietw0rm/60052704fd174892c93fe3f6c7cec92f to your computer and use it in GitHub Desktop.
# coding: utf-8
'''
http://www.analyticsedge.com/2016/11/heres-a-secret-%C9%A2oogle-com-is-not-google-com/
'''
import idna
'''
import unicodedata
def unicode_to_ascii(data):
return unicodedata.normalize('NFKD', data).encode('ascii', 'ignore')
def compare2(domain1, domain2):
if len(domain1) != len(domain2):
return False
if unicode_to_ascii(domain1) == unicode_to_ascii(domain2):
return True
return False
'''
THRESHOLD = 0.3
def compare(domain1, domain2):
if len(domain1) != len(domain2):
return False
count = 0
for i in range(len(domain1)):
if domain1[i] != domain2[i]:
count += 1
if count < THRESHOLD * len(domain1):
return True
return False
domains = open("domains.txt", "r").readlines()
checks = [u"google.com", u"apple.com", u"ebay.com", u"amazon.com", u"icloud.com", u"paypal.com", u"microsoft.com", u"gmail.com", u"yahoo.com", u"facebook.com"]
for domain in domains:
#only check unicode domains
if not domain.find('xn--') == 0:
continue
try:
for check in checks:
if compare(idna.decode(domain.strip()), check):
print idna.decode(domain.strip()), domain.strip()
except:
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment