bycoffe · September 5, 2009 04:32
diff --git a/sickontwitter b/sickontwitter
 """
 A small Django app I built last year that looked for people on Twitter saying they were sick.

 An example of using Twitter search.
 """

 # models.py

 from django.contrib.gis.db import models


 class Phrase(models.Model):
    phrase = models.CharField(max_length=100)

    def __unicode__(self):
        return self.phrase


 class TwitterUser(models.Model):
    name = models.CharField(max_length=100)
    username = models.CharField(max_length=100, unique=True)
    location = models.CharField(max_length=100, blank=True)
    pt = models.PointField(null=True)

    def __unicode__(self):
        return self.username

 class Tweet(models.Model):
    twitter_id = models.IntegerField(unique=True)
    tweet = models.CharField(max_length=255)
    phrase = models.ForeignKey(Phrase)
    user = models.ForeignKey(TwitterUser)
    published = models.DateTimeField()
    is_active = models.BooleanField(default=True)

    def __unicode__(self):
        return self.tweet

 # get_tweets.py

 import datetime
 import os
 import sys
 import urllib
 import urllib2
    
 from django.core.management import setup_environ
 from django.contrib.gis.geos import *
 from django.http import HttpRequest
    
 from BeautifulSoup import BeautifulSoup
 from dateutil.parser import parse as dateparse
 from geopy import geocoders
        
    
 URL = "http://search.twitter.com/search.atom?%s"

 g = geocoders.Google('')

    
 def phrase_urls():
    from sickontwitter.models import Phrase
        
    for phrase in Phrase.objects.all():
        p = '"%s"' % phrase.phrase
        qs = urllib.urlencode({'q': p})
        yield (phrase, URL % qs)
    
    
 def url_to_soup(url):
    page = urllib2.urlopen(url)
    soup = BeautifulSoup(page.read())
    page.close()
    return soup

 def parse_xml(soup, phrase):
    entries = soup.findAll('entry')
    for entry in entries:
        parse_entry(entry, phrase)

 def fix_tags(string):
    """Strip bold tags
    """
    string = string.replace('&lt;b&gt;', '').replace('&lt;/b&gt;', '')
    string = string.replace('&lt;', '<').replace('&gt;', '>')
    string = string.replace('&amp;', '&');
    return string


 def parse_entry(entry, phrase):
    from sickontwitter.models import Tweet

    published = dateparse(entry.find('published').contents[0])
    url = entry.find('link')['href']
    twitter_id = url.split('/')[-1]
    message = fix_tags(entry.find('content').contents[0])
    author_url = entry.find('author').find('uri').contents[0]
    author = get_author(author_url)
    tweet, created = Tweet.objects.get_or_create(
        twitter_id=twitter_id,
        defaults={'tweet': message,
                  'phrase': phrase,
                  'user': author,
                  'published': published})
    if created:
        print(tweet)


 def get_author(url):
    from sickontwitter.models import TwitterUser

    username = unicode(url.split('/')[-1])
    try:
        return TwitterUser.objects.get(username=username)
    except TwitterUser.DoesNotExist:
        pass
    soup = url_to_soup(url)
    name, location = parse_author_page(soup)
    pt = get_pt(location)
    author = TwitterUser.objects.create(
        name=name,
        username=username,
        location=location,
        pt=pt)
    return author


 def get_pt(location):
    pt = Point(0, 0)
    if not location:
        return pt
    try:
        place, (lat, lng) = g.geocode(location)
    except ValueError:
        return pt
    pt = Point(float(lng), float(lat))
    return pt


 def parse_author_page(soup):
    try:
        name = soup.find('span', {'class': 'fn'}).contents[0]
    except AttributeError:
        name = ''
    try:
        location = soup.find('span', {'class': 'adr'}).contents[0]
    except AttributeError:
        location = ''
    return (unicode(name), unicode(location))


 def _main():
    # Django setup
    sys.path.append(os.getcwd())
    import settings
    setup_environ(settings)

    for phrase, url in phrase_urls():
        soup = url_to_soup(url)
        parse_xml(soup, phrase)


 if __name__ == '__main__':
    _main()
	"""
	A small Django app I built last year that looked for people on Twitter saying they were sick.

	An example of using Twitter search.
	"""

	# models.py

	from django.contrib.gis.db import models


	class Phrase(models.Model):
	phrase = models.CharField(max_length=100)

	def __unicode__(self):
	return self.phrase


	class TwitterUser(models.Model):
	name = models.CharField(max_length=100)
	username = models.CharField(max_length=100, unique=True)
	location = models.CharField(max_length=100, blank=True)
	pt = models.PointField(null=True)

	def __unicode__(self):
	return self.username

	class Tweet(models.Model):
	twitter_id = models.IntegerField(unique=True)
	tweet = models.CharField(max_length=255)
	phrase = models.ForeignKey(Phrase)
	user = models.ForeignKey(TwitterUser)
	published = models.DateTimeField()
	is_active = models.BooleanField(default=True)

	def __unicode__(self):
	return self.tweet

	# get_tweets.py

	import datetime
	import os
	import sys
	import urllib
	import urllib2

	from django.core.management import setup_environ
	from django.contrib.gis.geos import *
	from django.http import HttpRequest

	from BeautifulSoup import BeautifulSoup
	from dateutil.parser import parse as dateparse
	from geopy import geocoders


	URL = "http://search.twitter.com/search.atom?%s"

	g = geocoders.Google('')


	def phrase_urls():
	from sickontwitter.models import Phrase

	for phrase in Phrase.objects.all():
	p = '"%s"' % phrase.phrase
	qs = urllib.urlencode({'q': p})
	yield (phrase, URL % qs)


	def url_to_soup(url):
	page = urllib2.urlopen(url)
	soup = BeautifulSoup(page.read())
	page.close()
	return soup

	def parse_xml(soup, phrase):
	entries = soup.findAll('entry')
	for entry in entries:
	parse_entry(entry, phrase)

	def fix_tags(string):
	"""Strip bold tags
	"""
	string = string.replace('<b>', '').replace('</b>', '')
	string = string.replace('<', '<').replace('>', '>')
	string = string.replace('&', '&');
	return string


	def parse_entry(entry, phrase):
	from sickontwitter.models import Tweet

	published = dateparse(entry.find('published').contents[0])
	url = entry.find('link')['href']
	twitter_id = url.split('/')[-1]
	message = fix_tags(entry.find('content').contents[0])
	author_url = entry.find('author').find('uri').contents[0]
	author = get_author(author_url)
	tweet, created = Tweet.objects.get_or_create(
	twitter_id=twitter_id,
	defaults={'tweet': message,
	'phrase': phrase,
	'user': author,
	'published': published})
	if created:
	print(tweet)


	def get_author(url):
	from sickontwitter.models import TwitterUser

	username = unicode(url.split('/')[-1])
	try:
	return TwitterUser.objects.get(username=username)
	except TwitterUser.DoesNotExist:
	pass
	soup = url_to_soup(url)
	name, location = parse_author_page(soup)
	pt = get_pt(location)
	author = TwitterUser.objects.create(
	name=name,
	username=username,
	location=location,
	pt=pt)
	return author


	def get_pt(location):
	pt = Point(0, 0)
	if not location:
	return pt
	try:
	place, (lat, lng) = g.geocode(location)
	except ValueError:
	return pt
	pt = Point(float(lng), float(lat))
	return pt


	def parse_author_page(soup):
	try:
	name = soup.find('span', {'class': 'fn'}).contents[0]
	except AttributeError:
	name = ''
	try:
	location = soup.find('span', {'class': 'adr'}).contents[0]
	except AttributeError:
	location = ''
	return (unicode(name), unicode(location))


	def _main():
	# Django setup
	sys.path.append(os.getcwd())
	import settings
	setup_environ(settings)

	for phrase, url in phrase_urls():
	soup = url_to_soup(url)
	parse_xml(soup, phrase)


	if __name__ == '__main__':
	_main()