Skip to content

Instantly share code, notes, and snippets.

@csessig86
csessig86 / Timeline.py part 1
Created February 20, 2012 20:19
Timeline.py part 1
import urllib2
from BeautifulSoup import BeautifulSoup
import datetime
import re
now = datetime.datetime.now()
# Create a CSV where we'll save our data. See further docs:
# http://propublica.github.com/timeline-setter/#csv
f = open('timeline.csv', 'w')
var FetchLayer;
var increment;
var wind_options;
var wind;
var map;
var center;
var fetch_tile_1997;
var fetch_tile_1998;
var fetch_tile_1999;
var overlayArray = [];
@csessig86
csessig86 / gist:1959531
Created March 2, 2012 16:38
Timeline.py part 2
events = soup.findAll('div', attrs={'class': 'story-block'})
for x in events:
@csessig86
csessig86 / gist:1959618
Created March 2, 2012 16:52
Timeline.py part 3
date = x.find('p', attrs={'class': 'story-more'})('em')
link = x.find('fb:like')['href']
headline = x.find('h3').text
description = x.find('div', attrs={'id': 'blox-story-text'})('p', limit=1)
image = x.find('img')
@csessig86
csessig86 / gist:1959692
Created March 2, 2012 17:06
Timeline.py part 4
# Information on the page that we will scrape
date = x.find('p', attrs={'class': 'story-more'})('em')
link = x.find('fb:like')['href']
headline = x.find('h3').text
description = x.find('div', attrs={'id': 'blox-story-text'})('p', limit=1)
image = x.find('img')
@csessig86
csessig86 / gist:1959704
Created March 2, 2012 17:10
Timeline.py part 5
# Extract that information in strings
date2 = str(date)
link2 = str(link)
headline2 = str(headline)
image2 = str(image)
description2 = str(description)
@csessig86
csessig86 / gist:2046673
Created March 15, 2012 20:25
Timeline.py part 6
# Extra formatting needed for dates to get rid of em tags and unnecessary formatting
date4 = date3.replace('[<em>', "")
date5 = date4.replace('</em>]', "")
date6 = date5.replace('- ', "")
date7 = date6.replace("at ", "")
# Extra formatting is also need for the description to get rid of p tags and new line returns
description4 = description3.replace('[<p>', "")
description5 = description4.replace('</p>]', "")
description6 = description5.replace('\n', " ")
@csessig86
csessig86 / gist:2046704
Created March 15, 2012 20:30
Timeline.py part 7
# We will adjust the width of all images to 300 pixels. Also, Python spits out the word 'None' if it doesn't find an image. Delete that.
image4 = re.sub(r'width="\d\d\d"', 'width="300"', image3)
image5 = image4.replace('None', "")
@csessig86
csessig86 / gist:2046741
Created March 15, 2012 20:38
Timeline.py part 8
# If the story has been updated recently, an em class tag will appear on the page showing the time but not the date. We will delete the class and replace it with today's date. We can change the date in the CSV if we need to.
date8 = date7.replace('[<em class="item-updated badge">Updated:', str(now.strftime("%Y-%m-%d %H:%M")))
@csessig86
csessig86 / gist:2046782
Created March 15, 2012 20:45
Timeline.py part 9
import urllib2
from BeautifulSoup import BeautifulSoup
import datetime
import re
now = datetime.datetime.now()