Skip to content

Instantly share code, notes, and snippets.

@lehrblogger
Created February 27, 2011 03:28
Show Gist options
  • Save lehrblogger/845879 to your computer and use it in GitHub Desktop.
Save lehrblogger/845879 to your computer and use it in GitHub Desktop.
Comparison shopping on the Internet! Ordering samples of all of them was $$$, so I needed a way to annotate the alternatives and narrow it down...
import urllib2
import time
from BeautifulSoup import BeautifulSoup
f = open('/Users/lehrblogger/Desktop/flor.csv')
soup = BeautifulSoup(urllib2.urlopen('http://www.flor.com/get-samples.html?limit=all'))
for a in soup.findAll('a', 'samples-link'):
try:
href = 'http://www.flor.com' + a['href']
response = urllib2.urlopen(href)
doc = response.read()
subsoup = BeautifulSoup(''.join(doc))
name = str(subsoup.find('div', {'id': 'product_details'}).h1.span.contents[0]).split('-')[0].strip()
price = str(subsoup.find('span', 'price').contents[0]).strip('$')
traffic = str(subsoup.find('h3', {'id': 'productPrice'}).nextSibling.nextSibling.contents[0]).split('Traffic')[0].strip().lower()
f.write(name + ',' + price + ',' + traffic + ',' + str(href))
except:
f.write('')
time.sleep(0.5)
f.close()
# then import CSV into a spreadsheet and annotate as desired
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment