Skip to content

Instantly share code, notes, and snippets.

@neilkod
Created January 21, 2011 14:15
Show Gist options
  • Save neilkod/789718 to your computer and use it in GitHub Desktop.
Save neilkod/789718 to your computer and use it in GitHub Desktop.
what topics are associated with cool?
results are on the last line.
I'm trying to determine which subjects on delicious are 'cool'. I downloaded a sample of 1700 urls that were tagged at least once with 'cool'.
I ran the code against a small sample of 50 because I have to look up delicious url metadata(bookmarks, tags) for each url and I'm rate-limited. Out of 50 urls(hey, i'm just testing), 94 were bookmarked with 'cool'. Remember, 1:M relationship between urls:bookmarks.
Looking at those 94 records, I'm counting the other hashtags that were used in conjunction with cool.
Results follow. I'm now running it against 500 urls just for some testing. If this works, I'll run it against as many URLS as I can as I progress. I still have a bit to learn about the deliciousapi module, I haven't looked through its source yet.
from collections import defaultdict
import pickle
import deliciousapi
users = 0
the_tag = "cool"
sample_url = "http://www.cloudera.com/blog/2010/12/hadoop-world-2010-tweet-analysis/"
dapi = deliciousapi.DeliciousAPI()
# don't use delicious, just use the pickled urls
# this is how I generated my initial list of urls.
#cool = dapi.get_urls(tag=the_tag, popular=False, max_urls=2000)
tags = defaultdict(int)
#get objects from the pickled file
pkl_file = open('urls.pkl','rb')
theset = pickle.load(pkl_file)
for url in list(theset)[0:500]:
# get delicious metadata for the current url
metadata = dapi.get_url(url)
# yes i know i need to comprehension-ize this, its just a
# proof-of-concept at this point
for k in metadata.bookmarks:
if 'cool' in k[1]:
users += 1
for tg in k[1]:
tags[tg] += 1
# order the tags by # of times used
srtd = sorted(tags.items(),key=itemgetter(1))
[(u'code', 1), (u'toread', 1), (u'to_do', 1), (u'graphics', 1), (u'paper', 1), (u'cartoon', 1), (u'frankie', 1), (u'animation', 1), (u'location', 1), (u'pretty', 1), (u'libro', 1), (u'Spain', 1), (u'dolls', 1), (u'grafica', 1), (u'geology', 1), (u'read', 1), (u'db', 1), (u'util', 1), (u'space3', 1), (u'facebook', 1), (u'designers', 1), (u'xss', 1), (u'criss', 1), (u'cell', 1), (u'mikecriss', 1), (u'moda', 1), (u'packshot', 1), (u'car', 1), (u'clothes', 1), (u'turtle', 1), (u'people', 1), (u'geek', 1), (u'humans', 1), (u'hacks', 1), (u'animato', 1), (u'logo', 1), (u'humor', 1), (u'harware', 1), (u'technology', 1), (u'space', 1), (u'psychology', 1), (u'review', 1), (u'chaos', 1), (u'content', 1), (u'motiongraphics', 1), (u'capitan', 1), (u'cnn', 1), (u'3d', 1), (u'philosophy', 1), (u'business', 1), (u'domo', 1), (u'javascript', 1), (u'objects', 1), (u'jqyery', 1), (u'web', 1), (u'futuro', 1), (u'news', 1), (u'stage', 1), (u'search', 1), (u'kids', 1), (u'lifestyle', 1), (u'brasil', 1), (u'language', 1), (u'sierpinski', 1), (u'universe', 1), (u'vectors', 1), (u'programming', 1), (u'mac', 1), (u'trips', 1), (u'advertising', 1), (u'Games', 1), (u'italian', 1), (u'marte', 1), (u'shorthand', 1), (u'image', 1), (u'learning', 1), (u'article', 1), (u'canal', 1), (u'languages', 1), (u'usb', 1), (u'girls', 1), (u'consultancy', 1), (u'gallery', 1), (u'music', 1), (u'iphone', 1), (u'articles', 1), (u'astrophysics', 1), (u'capitain', 1), (u'systems', 1), (u'vector', 1), (u'life', 1), (u'brains', 1), (u'photos', 1), (u'automated', 1), (u'Fun', 1), (u'Agullana', 1), (u'hydrogeology', 1), (u'scenography', 1), (u'slider', 1), (u'jquery', 1), (u'typography', 1), (u'future', 1), (u'resources', 1), (u'coca', 1), (u'hacking', 1), (u'age', 1), (u'videos', 1), (u'awesome', 1), (u'neuron', 1), (u'share', 1), (u'accessibility', 1), (u'captcha', 1), (u'engineering', 1), (u'cartone', 1), (u'simulations', 1), (u'portrait', 1), (u'education', 1), (u'stickers', 1), (u'mashup', 1), (u'mike', 1), (u'struts2', 1), (u'gadget', 1), (u'profile', 1), (u'picture', 1), (u'multiple', 1), (u'recensioni', 1), (u'tonchidot', 1), (u'advice', 1), (u'recensione', 1), (u'furnature', 1), (u'fb', 1), (u'lighting', 1), (u'mag', 1), (u'neuroscience', 1), (u'blossomslut', 1), (u'astronomy', 1), (u'why', 1), (u'tend\xeancias', 1), (u'webservice', 1), (u'database', 1), (u'blogs', 1), (u'youtube', 1), (u'til:bib', 1), (u'patterns', 1), (u'gsc', 1), (u'neatorama', 1), (u'tech', 1), (u'Photojojo', 1), (u'fotografia', 1), (u'physics', 1), (u'manga', 1), (u'photoshop', 2), (u'photo', 2), (u'webdesign', 2), (u'portfolio', 2), (u'visualization', 2), (u'generator', 2), (u'writing', 2), (u'galaxy', 2), (u'culture', 2), (u'trends', 2), (u'nature', 2), (u'marketing', 2), (u'illustration', 2), (u'tutorials', 2), (u'howto', 2), (u'magazine', 2), (u'social', 2), (u'tools', 2), (u'toy', 2), (u'shopping', 2), (u'illustrator', 2), (u'comics', 2), (u'fashion', 2), (u'productivity', 2), (u'media', 2), (u'agency', 2), (u'nasa', 2), (u'list', 3), (u'video', 3), (u'funny', 3), (u'adobe', 4), (u'brain', 4), (u'amazing', 4), (u'tricks', 4), (u'ideas', 5), (u'tutorial', 5), (u'diy', 6), (u'art', 6), (u'images', 6), (u'lifehacks', 6), (u'body', 6), (u'games', 6), (u'script', 6), (u'reference', 7), (u'tips', 7), (u'blog', 8), (u'interactive', 8), (u'health', 9), (u'flash', 10), (u'photography', 11), (u'design', 11), (u'interesting', 13), (u'science', 13), (u'inspiration', 13), (u'fun', 16), (u'cool', 94)]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment