Skip to content

Instantly share code, notes, and snippets.

@sillygwailo
Last active September 27, 2015 15:08
Show Gist options
  • Save sillygwailo/1289179 to your computer and use it in GitHub Desktop.
Save sillygwailo/1289179 to your computer and use it in GitHub Desktop.
Top Domains in Readability
#!/usr/bin/env python
import re, operator, argparse, oauth
from readability import ReaderClient
from beaker.cache import cache_regions, cache_region
parser = argparse.ArgumentParser(description="Look at Readability.com and determines the top domains you've read articles with")
parser.add_argument('file', nargs=1, help='Filename to export to.')
cache_regions.update({
'short_term':{
'expire': 3600,
'type':'dbm',
'data_dir': '/tmp',
}
})
@cache_region('short_term', 'sorted_domains')
def sorted_domains(number = -1):
rdd = ReaderClient('consumer-key', 'secret-key', 'username', 'password')
bookmarks = rdd.get_bookmarks(favorite=True)
domains = {}
for bookmark in bookmarks:
domain = bookmark.article.domain
www = re.search('www\.', domain)
if www == None:
domain = bookmark.article.domain
else:
domain = bookmark.article.domain[www.end():]
if not domain in domains:
domains[domain] = 1
else:
domains[domain] += 1
sorted_domains = sorted(domains.iteritems(), key=operator.itemgetter(1), reverse=True)
if number == -1:
pass
else:
sorted_domains = sorted_domains[:number]
return sorted_domains
if __name__ == '__main__':
arguments = parser.parse_args()
for sorted_domain in sorted_domains(arguments.number):
print "%s: %d" % (sorted_domain[0], sorted_domain[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment