Last active
December 29, 2015 00:59
-
-
Save janinge/7590279 to your computer and use it in GitHub Desktop.
INFO207 spaghetti.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
EBAY_APP_ID = '' | |
EBAY_DEV_ID = '' | |
EBAY_CER_ID = '' | |
EBAY_USER_TOKEN = '' | |
EBAY_CATEGORIES = [ | |
31388, # Digital Cameras | |
11724, # Camcorders | |
171485, # iPads, Tablets and eBook readers | |
73839, # iPods and MP3 players | |
11071, # Televisions | |
112529, # Headphones | |
139973, # Video Games | |
156955, # GPS units | |
888 # Sporting goods | |
] | |
from ebaysdk import finding, shopping, trading | |
from datetime import datetime | |
from dateutil.parser import parse as dateparse | |
from collections import * | |
from Queue import Queue | |
from threading import Thread | |
import logging as log | |
log.basicConfig(format='%(levelname)s:%(funcName)s:%(lineno)d - %(message)s', level=log.DEBUG) | |
from urllib2 import urlopen | |
# ebaysdk use Soup for XML parsing | |
# I like soup too, :%s/re/bs4/g | |
try: | |
from bs4 import BeautifulSoup | |
except ImportError: | |
from BeautifulSoup import BeautifulSoup | |
finder = finding(appid=EBAY_APP_ID) | |
shopper = shopping(appid=EBAY_APP_ID, devid=EBAY_DEV_ID, certid=EBAY_CER_ID) | |
trader = trading(appid=EBAY_APP_ID, devid=EBAY_DEV_ID, certid=EBAY_CER_ID, token=EBAY_USER_TOKEN) | |
Auction = namedtuple('Auction', ('id', 'title', 'bids', 'price', 'shipping', 'sold', 'endtime')) | |
Bids = namedtuple('Bids', ('sniped', 'time_left', 'bidders', 'winner_bids', 'winner_new', 'new_ratio')) | |
def depaginator(api, call, input, output='searchResult', page=1, max_page=100, retries=3): | |
while page <= max_page and retries: | |
log.info("Retrieving page %i", page) | |
try: | |
api.execute(call, api._to_xml({'paginationInput': {'pageNumber': page}}) + api._to_xml(input)) | |
except: | |
log.exception("Paginator failed in API call") | |
break | |
result = api.response_dict() | |
if result.get('ack', {}).get('value') != 'Success': | |
retries -= 1 | |
log.warning("Unsuccessful %s when requesting page %i of %i. Retries left: %i. Error: %s", | |
call, page, max_page, retries, r.get('errors')) | |
continue | |
items = result.get(output, {}).get('item') | |
if items: | |
yield items | |
page += 1 | |
else: | |
break | |
def find_candidate_products_in_category(category, target=40, **pages): | |
products = Counter() | |
for page in depaginator(finder, 'findCompletedItems', { | |
'categoryId': category, | |
'sortOrder': 'BidCountMost', | |
'itemFilter': [{'name': 'ListingType', 'value': 'Auction'}, | |
{'name': 'Condition', 'value': 'New'}] | |
}, **pages): | |
for item in page: | |
pid = item.get('productId') | |
if pid and pid.get('type', {}).get('value') == 'ReferenceID': | |
products[int(pid.get('value', 0))] += 1 | |
if max(products.values() or (0,)) >= target or \ | |
page[-1].get('sellingStatus', {}).get('bidCount', {}).get('value') < 2: | |
break | |
return products | |
def find_auctions_from_product_id(category, product_id, id_type='ReferenceID', **pages): | |
auctions = set() | |
for page in depaginator(finder, 'findCompletedItems', finder._to_xml({ | |
'categoryId': category, | |
'sortOrder': 'BidCountMost', | |
'itemFilter': [{'name': 'ListingType', 'value': 'Auction'}, | |
{'name': 'Condition', 'value': 'New'}] | |
}) + '<productId type="%s">%s</productId>' % (id_type, product_id), **pages): | |
for item in page: | |
ss = item.get('sellingStatus', {}) | |
listing = Auction( | |
id=int(item.get('itemId', {}).get('value', 0)) or None, | |
title=item.get('title', {}).get('value'), | |
bids=int(ss.get('bidCount', {}).get('value', 0)), | |
price=float(ss.get('convertedCurrentPrice', {}).get('value', 0)) or None, | |
shipping=float(item.get('shippingInfo', {}).get('shippingServiceCost', {}).get('value', 0)), | |
sold=ss.get('sellingState', {}).get('value') == 'EndedWithSales', | |
endtime=dateparse(item.get('listingInfo', {}).get('endTime', {}).get('value'))) | |
if listing.bids < 2: | |
return auctions | |
auctions.add(listing) | |
return auctions | |
def find_samples_from_category_list(categories, min_auctions=10, max_samples=3, **pages): | |
targets = {} | |
for category in categories: | |
log.info("Collecting products from category %i", category) | |
products = find_candidate_products_in_category(category, **pages).most_common(max_samples) | |
targets[category] = [ x[0] for x in products if x[1] > min_auctions ] | |
return targets | |
def get_bid_summary_from_auction(item, endtime, bidlimit=1, timelimit=30.0, newbidcounts=[], oldbidcounts=[]): | |
trader.execute('GetAllBidders', {'CallMode': 'ViewAll', 'IncludeBiddingSummary': 'true', 'ItemID': item}) | |
bidders = trader.response_dict().get('BidArray', {}).get('Offer', []) | |
winner = bidders[0] | |
timebid = dateparse(winner.get('TimeBid', {}).get('value', '')) | |
bidcount = int(winner.get('BidCount', {}).get('value', 1)) | |
timeleft = (endtime - timebid).total_seconds() | |
sniped = timeleft < timelimit and bidcount <= bidlimit | |
isnew = lambda bidder: bidder.get('User', {}).get('NewUser', {}).get('value') == 'true' | |
newuser = isnew(winner) | |
newratio = sum(isnew(bidder) for bidder in bidders) / float(len(bidders)) | |
for bidder in bidders: | |
bidcounter = newbidcounts if newuser else oldbidcounts | |
for auction in bidder.get('User', {}).get('BiddingSummary', {}).get('ItemBidDetails', []): | |
try: | |
bidcounter.append(int(auction.get('BidCount').get('value'))) | |
except AttributeError: | |
continue | |
return Bids(sniped, timeleft, len(bidders), bidcount, newuser, newratio) | |
def scrape_snipers(item, endtime=None, bidlimit=1, timelimit=30.0): | |
soup = BeautifulSoup(urlopen("http://offer.ebay.com/ws/eBayISAPI.dll?ViewBids&item=%i" % item, timeout=20).read()) | |
# Brute force date parser! | |
for sib in soup.find("span", text="Time Ended:").next_siblings: | |
if sib.text.strip(): | |
try: endtime = dateparse(sib.text) | |
except SyntaxError: continue | |
else: break | |
bids = [] | |
# How to build a house of cards in 10 easy steps | |
for row in soup.find(text="Starting Price").find_parent("table").find_all("tr", bgcolor=True): | |
a = row.find("a") | |
c = a.parent.next_sibling | |
if not a: continue # Drop proxy bids | |
aid = a.find(text=True, recursive=False) | |
bid = c.text.strip() | |
time = ' '.join(c.next_sibling.stripped_strings) | |
bids.append((aid, bid, dateparse(time))) | |
if not bids: | |
return | |
winner = bids[0][0] | |
if (endtime - bids[0][2]).total_seconds() < timelimit: | |
if len(filter(lambda x: x[0] == winner, bids)) <= bidlimit: | |
return True | |
return False | |
def classifier_thread(queue, results): | |
while True: | |
work = queue.get() | |
if not work: return | |
try: | |
results[work[1]] = work[0](*work[1:]) | |
except: | |
log.exception("Bad things happened while classifying %s", work[1]) | |
results[work[1]] = None | |
queue.task_done() | |
def launch_worker_pool(*args, **kwargs): | |
work_queue = Queue() | |
threads = [ Thread(target=kwargs.get('task', classifier_thread), args=(work_queue,) + args) | |
for id in range(kwargs.get('workers', 4)) ] | |
[ t.start() for t in threads ] | |
return (work_queue, threads) | |
def join_worker_pool(queue, threads): | |
queue.join() | |
[ queue.put(None) for t in threads ] | |
[ t.join() for t in threads ] | |
def store_auctions_dictionary(): | |
auctions = {} | |
categories = find_samples_from_category_list(EBAY_CATEGORIES, max_page=40) | |
for category in EBAY_CATEGORIES: | |
auctions[category] = {} | |
for product in categories[category]: | |
auctions[category][product] = find_auctions_from_product_id(category, product) | |
store_obj('auctions', auctions) | |
def store_obj(obj, data): | |
import pickle | |
with open(obj + '.python', 'w') as fp: | |
pickle.dump(data, fp) | |
def load_obj(obj): | |
import pickle | |
with open(obj + '.python', 'r') as fp: | |
return pickle.load(fp) | |
def update_bids_from_selection(auctions, bids): | |
for auction in auctions: | |
if auction.id in bids: | |
continue | |
try: | |
summary = get_bid_summary_from_auction(auction.id, auction.endtime) | |
except: | |
log.exception("Calculating summary failed") | |
continue | |
bids[auction.id] = summary | |
def plot_ticks(auctions, bids, title=None, currency='USD'): | |
import matplotlib.pyplot as plt | |
import matplotlib.dates as mdates | |
sniped = [ (a.endtime, a.price + a.shipping) for a in auctions if a.id in bids and bids[a.id].sniped ] | |
unsniped = [ (a.endtime, a.price + a.shipping) for a in auctions if a.id in bids and not bids[a.id].sniped ] | |
plt.plot_date(*zip(*sniped), color='red') | |
plt.plot_date(*zip(*unsniped), color='green') | |
plt.title(title) | |
plt.ylabel(currency) | |
plt.grid(True) | |
#plt.show() | |
plt.save(title + ".png") | |
def plot_box(auctions, bids, title=None, currency='USD'): | |
import matplotlib.pyplot as plt | |
import numpy as np | |
fig = plt.figure() | |
ax = fig.add_subplot(111) | |
plt.setp(ax, xticklabels=["Normal", "Sniped"]) | |
sniped = [] | |
unsniped = [] | |
for auction in auctions: | |
if auction.id in bids: | |
(sniped if bids[auction.id].sniped else unsniped).append(auction.price + auction.shipping) | |
ax.boxplot([unsniped, sniped]) | |
plt.ylabel(currency) | |
plt.grid(True) | |
#plt.show() | |
plt.save(title + ".png") | |
def main(): | |
from pprint import pprint | |
sample_targets = {888: [141695192, 143330669, 99318098], | |
11724: [129664853, 129678208], | |
31388: [100127676, 100113265, 170243795], | |
73839: [154044229, 92295415, 118461770], | |
112529: [114567862, 114624173, 114561288], | |
139973: [153180066, 153175722, 77244068], | |
156955: [109402844, 115377395, 109369394], | |
171485: [117365730, 166792164, 117332436]} | |
auctions = load_obj('auctions') | |
bids = load_obj('bids') | |
for group in auctions: | |
for product in auctions[group].itervalues(): | |
auction_selection = list(product) | |
print "Resolving", len(auction_selection), "auctions" | |
update_bids_from_selection(auction_selection, bids) | |
store_bids_dictionary(bids) | |
plot_ticks(auction_selection, bids, title=str(product)) | |
plot_box(auction_selection, bids, title=str(product)) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi, I'm new to github. Does this code find all bidders and bids for a set of past auctions (defined by keywords) on eBay using the eBay developers API? Thank you very much!