Skip to content

Instantly share code, notes, and snippets.

@tommynyquist
Last active December 13, 2015 21:59
Show Gist options
  • Save tommynyquist/4981623 to your computer and use it in GitHub Desktop.
Save tommynyquist/4981623 to your computer and use it in GitHub Desktop.
Example fuzzy Public Suffix List matching
def parse(domain, psl):
fuzzy_hosts = ('www', 'mobile', 'm')
domain_parts = domain.split('.')
if domain_parts[0] in fuzzy_hosts:
domain_parts = domain_parts[1:]
if '.'.join(domain_parts[1:]) in psl:
return sorted(['.'.join(domain_parts)] + \
['.'.join([fuzzy_host] + domain_parts) for fuzzy_host in fuzzy_hosts])
else:
return sorted([domain])
psl = ['org', 'co.uk']
assert parse('chromium.org', psl) == ['chromium.org', 'm.chromium.org', 'mobile.chromium.org', 'www.chromium.org']
assert parse('m.chromium.org', psl) == ['chromium.org', 'm.chromium.org', 'mobile.chromium.org', 'www.chromium.org']
assert parse('mobile.chromium.org', psl) == ['chromium.org', 'm.chromium.org', 'mobile.chromium.org', 'www.chromium.org']
assert parse('www.chromium.org', psl) == ['chromium.org', 'm.chromium.org', 'mobile.chromium.org', 'www.chromium.org']
assert parse('dev.chromium.org', psl) == ['dev.chromium.org']
assert parse('www.google.co.uk', psl) == ['google.co.uk', 'm.google.co.uk', 'mobile.google.co.uk', 'www.google.co.uk']
assert parse('nothing.google.co.uk', psl) == ['nothing.google.co.uk']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment