-
-
Save zweizeichen/10bca3803b54070090ac48f5173910b2 to your computer and use it in GitHub Desktop.
import sqlite3 | |
import tldextract | |
history_domains = set() | |
cf_domains = None | |
print("Loading domains from Chrome browsing history...") | |
# Copy history from ~/Library/Application Support/Google/Chrome/Default/History | |
conn = sqlite3.connect('History') | |
c = conn.cursor() | |
for url in c.execute("SELECT url FROM urls"): | |
history_domains.add(tldextract.extract(url[0]).registered_domain) | |
print("Added %d domains." % len(history_domains)) | |
print("Loading Cloudflare domains...") | |
# Get domains here: https://github.com/pirate/sites-using-cloudflare | |
cf_domains = set(domain.strip() for domain in open('sorted_unique_cf.txt')) | |
print("Added %d domains." % len(cf_domains)) | |
print("Processing intersection...") | |
intersection = history_domains.intersection(cf_domains) | |
print("------------------------------") | |
for domain in sorted(intersection): | |
print(domain) | |
print("------------------------------\nOK: %d domains found." % len(intersection)) |
import sqlite3 | |
import tldextract | |
history_domains = set() | |
cf_domains = None | |
print("Loading domains from Firefox browsing history...") | |
# Copy history from ~/Library/Application Support/Firefox/Profiles/*YOUR PROFILE*/places.sqlite | |
conn = sqlite3.connect('places.sqlite') | |
c = conn.cursor() | |
for url in c.execute("SELECT url FROM moz_places"): | |
history_domains.add(tldextract.extract(url[0]).registered_domain) | |
print("Added %d domains." % len(history_domains)) | |
print("Loading Cloudflare domains...") | |
# Get domains here: https://github.com/pirate/sites-using-cloudflare | |
cf_domains = set(domain.strip() for domain in open('sorted_unique_cf.txt')) | |
print("Added %d domains." % len(cf_domains)) | |
print("Processing intersection...") | |
intersection = history_domains.intersection(cf_domains) | |
print("------------------------------") | |
for domain in sorted(intersection): | |
print(domain) | |
print("------------------------------\nOK: %d domains found." % len(intersection)) |
tldextract |
import sqlite3 | |
import tldextract | |
history_domains = set() | |
cf_domains = None | |
print("Loading domains from Safari browsing history...") | |
# Copy history from ~/Library/Safari/History.db | |
conn = sqlite3.connect('History.db') | |
c = conn.cursor() | |
for url in c.execute("SELECT url FROM history_items"): | |
history_domains.add(tldextract.extract(url[0]).registered_domain) | |
print("Added %d domains." % len(history_domains)) | |
print("Loading Cloudflare domains...") | |
# Get domains here: https://github.com/pirate/sites-using-cloudflare | |
cf_domains = set(domain.strip() for domain in open('sorted_unique_cf.txt')) | |
print("Added %d domains." % len(cf_domains)) | |
print("Processing intersection...") | |
intersection = history_domains.intersection(cf_domains) | |
print("------------------------------") | |
for domain in sorted(intersection): | |
print(domain) | |
print("------------------------------\nOK: %d domains found." % len(intersection)) |
Here is what I had to do to get safari.py working on a relatively new OS X 10.11.6 install:
-
sudo easy_install pip
-
sudo pip install tldextract
-
curl https://raw.githubusercontent.com/pirate/sites-using-cloudflare/master/sorted_unique_cf.txt --output /Users/YOUR_USERNAME/Desktop/sorted_unique_cf.txt
-
In safari.py, change
conn = sqlite3.connect('History')
to
conn = sqlite3.connect('/Users/YOUR_USERNAME/Library/Safari/History.db')
and
sorted_unique_cf.txt
to
/Users/YOUR_USERNAME/Desktop/sorted_unique_cf.txt
@tinyapps, @Payuing, @skypather
Thanks! Changing it to the following worked:
conn = sqlite3.connect('/Users/YOUR_USER/Library/Application Support/Google/Chrome/Default/History')
and
cf_domains = set(domain.strip() for domain in open('/Users/YOUR_USER/Desktop/sorted_unique_cf.txt'))
@sinnfeinn
check out this:
http://stackoverflow.com/questions/28126140/python-sqlite3-operationalerror-no-such-table