Skip to content

Instantly share code, notes, and snippets.

@jmoiron
jmoiron / crawler.py
Created May 27, 2011 20:37
Simple gevent/httplib2 web crawler.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Simple async crawler/callback queue based on gevent."""
import traceback
import logging
import httplib2
import gevent
@jmoiron
jmoiron / omlette_stats.py
Created May 5, 2011 00:04
silly stats on amit's blog
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""post stats on amit just because"""
import re
import urllib2
from lxml import html
url = 'http://omlettesoft.com/newjournal.php3?topic=On+the+Waterfront&who=Lord+Omlette'
# twitter oauth example using urllib
#
# Many uses of the twitter api don't require authenticating as other users,
# but the documentation centers around it. In this example, we're using the
# twitter-provided access key & secret (keys['token']) rather than going
# through the handshake.
import json
import urllib2
# parallelize a function n ways, automatically spliting a big list
# of arguments into n roughly equal sized groups
import math
from multiprocessing import Pool
def split(iterable, n):
"""Splits an iterable up into n roughly equally sized groups."""
groupsize = int(math.floor(len(iterable) / float(n)))
remainder = len(iterable) % n