d6e · June 1, 2013 07:04
diff --git a/MTG_price_scraper.py b/MTG_price_scraper.py
 #! /usr/bin/env python
 import sys
 import os
 import re
 import csv

 try:
    import requests
 except:
    print "Install requests (sudo easy_install requests)"
    sys.exit()

 from urllib import urlencode
 try:
    from BeautifulSoup import BeautifulSoup
 except:
    print "Install BeautifulSoup (sudo easy_install BeautifulSoup)"
    sys.exit()
    
 input = open('cards.txt', 'r').read() #reads file
 output = open('prices.txt','w')
 urllist = input.split('\n')
 resultFile = open(r"output.csv",'wb')
 wr = csv.writer(resultFile, delimiter=' ')

 def file_len(fname):  #reads number of lines in file
    with open(fname) as f:
        for i, l in enumerate(f):
            pass
    return i + 1
 filelength = file_len('cards.txt') 

 url = 'http://www.cardkingdom.com/catalog/view?search=basic&filter%5Bname%5D='

 for index, object in enumerate(urllist): #formats url to include each card name on the list
 	urllist[index] = url + urllist[index]
 	urllist[index] = urllist[index].replace(' ', '+')
 urllist.pop() #removes extra leftover linebreak

 count = 0

 for item in urllist:
 	html = requests.get(item)
 	soup = BeautifulSoup(html.content)
 	pricetable = soup.findAll("table", {"class":"grid"})
 	
 	count += 1
 	oldname = ""
 	oldprice = ""
 	firstrun = True
 	error = ""
 	errorname = item
 	
 	for table in pricetable:
 		rows = table.findAll('tr')
 		skip = True
 		for tr in rows:
 			if '0 results' in str(tr):
 				if error == "":
 					errorname = errorname.replace('http://www.cardkingdom.com/catalog/view?search=basic&filter%5Bname%5D=', '')
 					errorname = errorname.replace('+', ' ')
 					error = 'Sorry, I couldnt find this thing: '+errorname
 				else:
 					print error
 					wr.writerow([error])
 			col = tr.findAll('td')
 			if skip == True:
 				skip = False
 			else:
 				
 				if len(col) == 10: #Only the larger tables	
 					name = str(col[0].contents[0].contents[0])
 					edition = str(col[1].contents[0])
 					price = str(col[8].contents[0])
 					if 'Out of stock' in str(col[9].contents[1].contents[0]):
 						quantity = str(col[9].contents[1].contents[0])
 					else: 
 						quantity = ""
 					
 					if firstrun == True: #Is this the first time a new name is encountered?
 						oldname = name
 						oldprice = price
 						firstrun = False #Mark for the previous 
 							
 						excel_col = [name, edition, price, quantity]
 						wr.writerow(excel_col)
 						print str(count) + " | " + name + "\t|\t" + edition + "\t|\t" + price + "\t|\t" + quantity
 						
 					if oldname == name and oldprice == price: #decide whether the information is redundant
 						z=0 #arbitrary filler line
 					else:
 						firstrun = True
 						excel_col = [name, edition, price, quantity]
 						wr.writerow(excel_col)
 						print str(count) + " | " + name + "\t|\t" + edition + "\t|\t" + price + "\t|\t" + quantity
	#! /usr/bin/env python
	import sys
	import os
	import re
	import csv

	try:
	import requests
	except:
	print "Install requests (sudo easy_install requests)"
	sys.exit()

	from urllib import urlencode
	try:
	from BeautifulSoup import BeautifulSoup
	except:
	print "Install BeautifulSoup (sudo easy_install BeautifulSoup)"
	sys.exit()

	input = open('cards.txt', 'r').read() #reads file
	output = open('prices.txt','w')
	urllist = input.split('\n')
	resultFile = open(r"output.csv",'wb')
	wr = csv.writer(resultFile, delimiter=' ')

	def file_len(fname): #reads number of lines in file
	with open(fname) as f:
	for i, l in enumerate(f):
	pass
	return i + 1
	filelength = file_len('cards.txt')

	url = 'http://www.cardkingdom.com/catalog/view?search=basic&filter%5Bname%5D='

	for index, object in enumerate(urllist): #formats url to include each card name on the list
	urllist[index] = url + urllist[index]
	urllist[index] = urllist[index].replace(' ', '+')
	urllist.pop() #removes extra leftover linebreak

	count = 0

	for item in urllist:
	html = requests.get(item)
	soup = BeautifulSoup(html.content)
	pricetable = soup.findAll("table", {"class":"grid"})

	count += 1
	oldname = ""
	oldprice = ""
	firstrun = True
	error = ""
	errorname = item

	for table in pricetable:
	rows = table.findAll('tr')
	skip = True
	for tr in rows:
	if '0 results' in str(tr):
	if error == "":
	errorname = errorname.replace('http://www.cardkingdom.com/catalog/view?search=basic&filter%5Bname%5D=', '')
	errorname = errorname.replace('+', ' ')
	error = 'Sorry, I couldnt find this thing: '+errorname
	else:
	print error
	wr.writerow([error])
	col = tr.findAll('td')
	if skip == True:
	skip = False
	else:

	if len(col) == 10: #Only the larger tables
	name = str(col[0].contents[0].contents[0])
	edition = str(col[1].contents[0])
	price = str(col[8].contents[0])
	if 'Out of stock' in str(col[9].contents[1].contents[0]):
	quantity = str(col[9].contents[1].contents[0])
	else:
	quantity = ""

	if firstrun == True: #Is this the first time a new name is encountered?
	oldname = name
	oldprice = price
	firstrun = False #Mark for the previous

	excel_col = [name, edition, price, quantity]
	wr.writerow(excel_col)
	print str(count) + " \| " + name + "\t\|\t" + edition + "\t\|\t" + price + "\t\|\t" + quantity

	if oldname == name and oldprice == price: #decide whether the information is redundant
	z=0 #arbitrary filler line
	else:
	firstrun = True
	excel_col = [name, edition, price, quantity]
	wr.writerow(excel_col)
	print str(count) + " \| " + name + "\t\|\t" + edition + "\t\|\t" + price + "\t\|\t" + quantity