Created
May 30, 2011 00:48
-
-
Save bemasher/998298 to your computer and use it in GitHub Desktop.
Newegg System Memory analysis script.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import threading | |
import urllib, urllib2 | |
import json, re | |
from Queue import Queue | |
class GetURL(threading.Thread): | |
def __init__(self, urlQueue, jsonQueue): | |
threading.Thread.__init__(self) | |
self.urlQueue = urlQueue | |
self.jsonQueue = jsonQueue | |
def run(self): | |
# Run until thread is terminated | |
while True: | |
# Get an item number and url for the item | |
itemNumber, url = self.urlQueue.get() | |
# Get item specifications | |
raw = urllib2.urlopen(url).read() | |
jsonQueue.put((itemNumber, json.loads(raw))) | |
# Indicate that the item pulled last from the queue is finished | |
self.urlQueue.task_done() | |
def getItems(pageNumber = 1): | |
# Parameters retrieved from the query builder | |
params = { | |
"SubCategoryId": 147, | |
"NValue": "100007611 600006050", | |
"StoreDepaId": 1, | |
"NodeId": 7611, | |
"BrandId": -1, | |
# Be sure to fill in the pageNumber variable for pagination | |
"PageNumber": pageNumber, | |
"CategoryId": 17 | |
} | |
# Pass search parameters to newegg's api and get response | |
request = urllib2.Request(searchURL, json.dumps(params)) | |
response = urllib2.urlopen(request) | |
raw = response.read() | |
data = json.loads(raw) | |
# Check to see how many results are left | |
pagination = data['PaginationInfo'] | |
pages = pagination['TotalCount'] / pagination['PageSize'] | |
# Extend the items list with the current result set | |
items = [] | |
items.extend(data['ProductListItems']) | |
# If there are still more pages of data get the next result set | |
if pages >= pageNumber: | |
# Recursively call getItems for next set and extend items with it's result | |
items.extend(getItems(pageNumber + 1)) | |
return items | |
# API urls we need to pull data for this analysis | |
itemSpecURL = "http://www.ows.newegg.com/Products.egg/{}/Specification" | |
searchURL = "http://www.ows.newegg.com/Search.egg/Advanced" | |
# Get item list (a single api call) | |
itemList = getItems() | |
# Define queues for item urls and specs result for each item | |
urlQueue = Queue() | |
jsonQueue = Queue() | |
items = {} | |
# Push each item and it's item number onto the urlQueue | |
# and assign each item number it's item structure | |
for item in itemList: | |
specURL = itemSpecURL.format(item["ItemNumber"]) | |
urlQueue.put((item["ItemNumber"], specURL)) | |
items[item["ItemNumber"]] = item | |
# Start two threads for grabbing specifications of each item | |
# this is not fault tolerant so any more than 4 threads | |
# tends to choke on an exception when a call fails. | |
# Obviously this needs fixing. | |
for worker in xrange(2): | |
t = GetURL(urlQueue, jsonQueue) | |
t.setDaemon(True) | |
t.start() | |
# Block until all requests have been processed | |
urlQueue.join() | |
# Regex's used for parsing specification data for analysis | |
speed_re = re.compile('DDR\d\s(\d+).*') | |
capacity_re = re.compile("(\d+)GB\s\((\d+)\sx\s(\d+)GB\)") | |
timing_re = re.compile('(\d+-\d+-\d+-\d+)') | |
# List of features for each item we're concerned with | |
features = ['Brand', 'Model', 'ItemNumber', 'Price', 'Speed', 'Capacity', 'Dimms', 'Timing', 'Voltage'] | |
required = ['Price', 'Speed', 'Capacity', 'Dimms', 'Timing'] | |
# For every item in the specification queue | |
while not jsonQueue.empty(): | |
itemNumber, specs = jsonQueue.get() | |
# Make a new item dictionary and populate it's feature pairs | |
item = {} | |
for group in specs['SpecificationGroupList']: | |
for pair in group['SpecificationPairList']: | |
if pair['Key'] in features: | |
item[pair['Key']] = pair['Value'].encode('ascii', errors='ignore') | |
# If capacity exists and is parsable, parse it and so on | |
if 'Capacity' in item: | |
capacity = capacity_re.match(item['Capacity']) | |
if capacity: | |
item['Capacity'] = capacity.group(1) | |
item['Dimms'] = capacity.group(2) | |
if 'Speed' in item: | |
speed = speed_re.match(item['Speed']) | |
if speed: | |
item['Speed'] = speed.group(1) | |
if 'Timing' in item: | |
timing = timing_re.match(item['Timing']) | |
if timing: | |
item['Timing'] = timing.group(1).replace('-','\t') | |
# Get price and item number from the items dictionary we made earlier | |
item['Price'] = items[itemNumber]['FinalPrice'] | |
item['ItemNumber'] = specs['NeweggItemNumber'] | |
# Skip the item if it's missing a required feature | |
for feature in required: | |
if feature not in item: | |
continue | |
try: | |
# Fill in blank features | |
for feature in features: | |
if feature not in item: | |
item[feature] = "" | |
# Print all item features | |
print '\t'.join(map(lambda x: item[x], features)) | |
except KeyError: | |
# If an item is missing a feature then just skip the item | |
pass | |
jsonQueue.task_done() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment