Skip to content

Instantly share code, notes, and snippets.

@d6e
Created June 1, 2013 07:04
Show Gist options
  • Save d6e/5689533 to your computer and use it in GitHub Desktop.
Save d6e/5689533 to your computer and use it in GitHub Desktop.
A scraper for looking up the prices various magic the gathering cards given a list of card names.
#! /usr/bin/env python
import sys
import os
import re
import csv
try:
import requests
except:
print "Install requests (sudo easy_install requests)"
sys.exit()
from urllib import urlencode
try:
from BeautifulSoup import BeautifulSoup
except:
print "Install BeautifulSoup (sudo easy_install BeautifulSoup)"
sys.exit()
input = open('cards.txt', 'r').read() #reads file
output = open('prices.txt','w')
urllist = input.split('\n')
resultFile = open(r"output.csv",'wb')
wr = csv.writer(resultFile, delimiter=' ')
def file_len(fname): #reads number of lines in file
with open(fname) as f:
for i, l in enumerate(f):
pass
return i + 1
filelength = file_len('cards.txt')
url = 'http://www.cardkingdom.com/catalog/view?search=basic&filter%5Bname%5D='
for index, object in enumerate(urllist): #formats url to include each card name on the list
urllist[index] = url + urllist[index]
urllist[index] = urllist[index].replace(' ', '+')
urllist.pop() #removes extra leftover linebreak
count = 0
for item in urllist:
html = requests.get(item)
soup = BeautifulSoup(html.content)
pricetable = soup.findAll("table", {"class":"grid"})
count += 1
oldname = ""
oldprice = ""
firstrun = True
error = ""
errorname = item
for table in pricetable:
rows = table.findAll('tr')
skip = True
for tr in rows:
if '0 results' in str(tr):
if error == "":
errorname = errorname.replace('http://www.cardkingdom.com/catalog/view?search=basic&filter%5Bname%5D=', '')
errorname = errorname.replace('+', ' ')
error = 'Sorry, I couldnt find this thing: '+errorname
else:
print error
wr.writerow([error])
col = tr.findAll('td')
if skip == True:
skip = False
else:
if len(col) == 10: #Only the larger tables
name = str(col[0].contents[0].contents[0])
edition = str(col[1].contents[0])
price = str(col[8].contents[0])
if 'Out of stock' in str(col[9].contents[1].contents[0]):
quantity = str(col[9].contents[1].contents[0])
else:
quantity = ""
if firstrun == True: #Is this the first time a new name is encountered?
oldname = name
oldprice = price
firstrun = False #Mark for the previous
excel_col = [name, edition, price, quantity]
wr.writerow(excel_col)
print str(count) + " | " + name + "\t|\t" + edition + "\t|\t" + price + "\t|\t" + quantity
if oldname == name and oldprice == price: #decide whether the information is redundant
z=0 #arbitrary filler line
else:
firstrun = True
excel_col = [name, edition, price, quantity]
wr.writerow(excel_col)
print str(count) + " | " + name + "\t|\t" + edition + "\t|\t" + price + "\t|\t" + quantity
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment