Skip to content

Instantly share code, notes, and snippets.

@zweizeichen
Last active June 8, 2017 05:17
Show Gist options
  • Save zweizeichen/10bca3803b54070090ac48f5173910b2 to your computer and use it in GitHub Desktop.
Save zweizeichen/10bca3803b54070090ac48f5173910b2 to your computer and use it in GitHub Desktop.
Check your browsing history for sites using Cloudflare
import sqlite3
import tldextract
history_domains = set()
cf_domains = None
print("Loading domains from Chrome browsing history...")
# Copy history from ~/Library/Application Support/Google/Chrome/Default/History
conn = sqlite3.connect('History')
c = conn.cursor()
for url in c.execute("SELECT url FROM urls"):
history_domains.add(tldextract.extract(url[0]).registered_domain)
print("Added %d domains." % len(history_domains))
print("Loading Cloudflare domains...")
# Get domains here: https://github.com/pirate/sites-using-cloudflare
cf_domains = set(domain.strip() for domain in open('sorted_unique_cf.txt'))
print("Added %d domains." % len(cf_domains))
print("Processing intersection...")
intersection = history_domains.intersection(cf_domains)
print("------------------------------")
for domain in sorted(intersection):
print(domain)
print("------------------------------\nOK: %d domains found." % len(intersection))
import sqlite3
import tldextract
history_domains = set()
cf_domains = None
print("Loading domains from Firefox browsing history...")
# Copy history from ~/Library/Application Support/Firefox/Profiles/*YOUR PROFILE*/places.sqlite
conn = sqlite3.connect('places.sqlite')
c = conn.cursor()
for url in c.execute("SELECT url FROM moz_places"):
history_domains.add(tldextract.extract(url[0]).registered_domain)
print("Added %d domains." % len(history_domains))
print("Loading Cloudflare domains...")
# Get domains here: https://github.com/pirate/sites-using-cloudflare
cf_domains = set(domain.strip() for domain in open('sorted_unique_cf.txt'))
print("Added %d domains." % len(cf_domains))
print("Processing intersection...")
intersection = history_domains.intersection(cf_domains)
print("------------------------------")
for domain in sorted(intersection):
print(domain)
print("------------------------------\nOK: %d domains found." % len(intersection))
import sqlite3
import tldextract
history_domains = set()
cf_domains = None
print("Loading domains from Safari browsing history...")
# Copy history from ~/Library/Safari/History.db
conn = sqlite3.connect('History.db')
c = conn.cursor()
for url in c.execute("SELECT url FROM history_items"):
history_domains.add(tldextract.extract(url[0]).registered_domain)
print("Added %d domains." % len(history_domains))
print("Loading Cloudflare domains...")
# Get domains here: https://github.com/pirate/sites-using-cloudflare
cf_domains = set(domain.strip() for domain in open('sorted_unique_cf.txt'))
print("Added %d domains." % len(cf_domains))
print("Processing intersection...")
intersection = history_domains.intersection(cf_domains)
print("------------------------------")
for domain in sorted(intersection):
print(domain)
print("------------------------------\nOK: %d domains found." % len(intersection))
@tinyapps
Copy link

tinyapps commented Feb 26, 2017

Here is what I had to do to get safari.py working on a relatively new OS X 10.11.6 install:

  1. sudo easy_install pip

  2. sudo pip install tldextract

  3. curl https://raw.githubusercontent.com/pirate/sites-using-cloudflare/master/sorted_unique_cf.txt --output /Users/YOUR_USERNAME/Desktop/sorted_unique_cf.txt

  4. In safari.py, change
    conn = sqlite3.connect('History')
    to
    conn = sqlite3.connect('/Users/YOUR_USERNAME/Library/Safari/History.db')
    and
    sorted_unique_cf.txt
    to
    /Users/YOUR_USERNAME/Desktop/sorted_unique_cf.txt

@sinnfeinn
Copy link

@tinyapps, @Payuing, @skypather

Thanks! Changing it to the following worked:
conn = sqlite3.connect('/Users/YOUR_USER/Library/Application Support/Google/Chrome/Default/History')
and
cf_domains = set(domain.strip() for domain in open('/Users/YOUR_USER/Desktop/sorted_unique_cf.txt'))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment