Created
June 1, 2013 07:04
-
-
Save d6e/5689533 to your computer and use it in GitHub Desktop.
A scraper for looking up the prices various magic the gathering cards given a list of card names.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
import sys | |
import os | |
import re | |
import csv | |
try: | |
import requests | |
except: | |
print "Install requests (sudo easy_install requests)" | |
sys.exit() | |
from urllib import urlencode | |
try: | |
from BeautifulSoup import BeautifulSoup | |
except: | |
print "Install BeautifulSoup (sudo easy_install BeautifulSoup)" | |
sys.exit() | |
input = open('cards.txt', 'r').read() #reads file | |
output = open('prices.txt','w') | |
urllist = input.split('\n') | |
resultFile = open(r"output.csv",'wb') | |
wr = csv.writer(resultFile, delimiter=' ') | |
def file_len(fname): #reads number of lines in file | |
with open(fname) as f: | |
for i, l in enumerate(f): | |
pass | |
return i + 1 | |
filelength = file_len('cards.txt') | |
url = 'http://www.cardkingdom.com/catalog/view?search=basic&filter%5Bname%5D=' | |
for index, object in enumerate(urllist): #formats url to include each card name on the list | |
urllist[index] = url + urllist[index] | |
urllist[index] = urllist[index].replace(' ', '+') | |
urllist.pop() #removes extra leftover linebreak | |
count = 0 | |
for item in urllist: | |
html = requests.get(item) | |
soup = BeautifulSoup(html.content) | |
pricetable = soup.findAll("table", {"class":"grid"}) | |
count += 1 | |
oldname = "" | |
oldprice = "" | |
firstrun = True | |
error = "" | |
errorname = item | |
for table in pricetable: | |
rows = table.findAll('tr') | |
skip = True | |
for tr in rows: | |
if '0 results' in str(tr): | |
if error == "": | |
errorname = errorname.replace('http://www.cardkingdom.com/catalog/view?search=basic&filter%5Bname%5D=', '') | |
errorname = errorname.replace('+', ' ') | |
error = 'Sorry, I couldnt find this thing: '+errorname | |
else: | |
print error | |
wr.writerow([error]) | |
col = tr.findAll('td') | |
if skip == True: | |
skip = False | |
else: | |
if len(col) == 10: #Only the larger tables | |
name = str(col[0].contents[0].contents[0]) | |
edition = str(col[1].contents[0]) | |
price = str(col[8].contents[0]) | |
if 'Out of stock' in str(col[9].contents[1].contents[0]): | |
quantity = str(col[9].contents[1].contents[0]) | |
else: | |
quantity = "" | |
if firstrun == True: #Is this the first time a new name is encountered? | |
oldname = name | |
oldprice = price | |
firstrun = False #Mark for the previous | |
excel_col = [name, edition, price, quantity] | |
wr.writerow(excel_col) | |
print str(count) + " | " + name + "\t|\t" + edition + "\t|\t" + price + "\t|\t" + quantity | |
if oldname == name and oldprice == price: #decide whether the information is redundant | |
z=0 #arbitrary filler line | |
else: | |
firstrun = True | |
excel_col = [name, edition, price, quantity] | |
wr.writerow(excel_col) | |
print str(count) + " | " + name + "\t|\t" + edition + "\t|\t" + price + "\t|\t" + quantity |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment