Skip to content

Instantly share code, notes, and snippets.

@bemasher
Created May 30, 2011 00:48
Show Gist options
  • Save bemasher/998298 to your computer and use it in GitHub Desktop.
Save bemasher/998298 to your computer and use it in GitHub Desktop.
Newegg System Memory analysis script.
import threading
import urllib, urllib2
import json, re
from Queue import Queue
class GetURL(threading.Thread):
def __init__(self, urlQueue, jsonQueue):
threading.Thread.__init__(self)
self.urlQueue = urlQueue
self.jsonQueue = jsonQueue
def run(self):
# Run until thread is terminated
while True:
# Get an item number and url for the item
itemNumber, url = self.urlQueue.get()
# Get item specifications
raw = urllib2.urlopen(url).read()
jsonQueue.put((itemNumber, json.loads(raw)))
# Indicate that the item pulled last from the queue is finished
self.urlQueue.task_done()
def getItems(pageNumber = 1):
# Parameters retrieved from the query builder
params = {
"SubCategoryId": 147,
"NValue": "100007611 600006050",
"StoreDepaId": 1,
"NodeId": 7611,
"BrandId": -1,
# Be sure to fill in the pageNumber variable for pagination
"PageNumber": pageNumber,
"CategoryId": 17
}
# Pass search parameters to newegg's api and get response
request = urllib2.Request(searchURL, json.dumps(params))
response = urllib2.urlopen(request)
raw = response.read()
data = json.loads(raw)
# Check to see how many results are left
pagination = data['PaginationInfo']
pages = pagination['TotalCount'] / pagination['PageSize']
# Extend the items list with the current result set
items = []
items.extend(data['ProductListItems'])
# If there are still more pages of data get the next result set
if pages >= pageNumber:
# Recursively call getItems for next set and extend items with it's result
items.extend(getItems(pageNumber + 1))
return items
# API urls we need to pull data for this analysis
itemSpecURL = "http://www.ows.newegg.com/Products.egg/{}/Specification"
searchURL = "http://www.ows.newegg.com/Search.egg/Advanced"
# Get item list (a single api call)
itemList = getItems()
# Define queues for item urls and specs result for each item
urlQueue = Queue()
jsonQueue = Queue()
items = {}
# Push each item and it's item number onto the urlQueue
# and assign each item number it's item structure
for item in itemList:
specURL = itemSpecURL.format(item["ItemNumber"])
urlQueue.put((item["ItemNumber"], specURL))
items[item["ItemNumber"]] = item
# Start two threads for grabbing specifications of each item
# this is not fault tolerant so any more than 4 threads
# tends to choke on an exception when a call fails.
# Obviously this needs fixing.
for worker in xrange(2):
t = GetURL(urlQueue, jsonQueue)
t.setDaemon(True)
t.start()
# Block until all requests have been processed
urlQueue.join()
# Regex's used for parsing specification data for analysis
speed_re = re.compile('DDR\d\s(\d+).*')
capacity_re = re.compile("(\d+)GB\s\((\d+)\sx\s(\d+)GB\)")
timing_re = re.compile('(\d+-\d+-\d+-\d+)')
# List of features for each item we're concerned with
features = ['Brand', 'Model', 'ItemNumber', 'Price', 'Speed', 'Capacity', 'Dimms', 'Timing', 'Voltage']
required = ['Price', 'Speed', 'Capacity', 'Dimms', 'Timing']
# For every item in the specification queue
while not jsonQueue.empty():
itemNumber, specs = jsonQueue.get()
# Make a new item dictionary and populate it's feature pairs
item = {}
for group in specs['SpecificationGroupList']:
for pair in group['SpecificationPairList']:
if pair['Key'] in features:
item[pair['Key']] = pair['Value'].encode('ascii', errors='ignore')
# If capacity exists and is parsable, parse it and so on
if 'Capacity' in item:
capacity = capacity_re.match(item['Capacity'])
if capacity:
item['Capacity'] = capacity.group(1)
item['Dimms'] = capacity.group(2)
if 'Speed' in item:
speed = speed_re.match(item['Speed'])
if speed:
item['Speed'] = speed.group(1)
if 'Timing' in item:
timing = timing_re.match(item['Timing'])
if timing:
item['Timing'] = timing.group(1).replace('-','\t')
# Get price and item number from the items dictionary we made earlier
item['Price'] = items[itemNumber]['FinalPrice']
item['ItemNumber'] = specs['NeweggItemNumber']
# Skip the item if it's missing a required feature
for feature in required:
if feature not in item:
continue
try:
# Fill in blank features
for feature in features:
if feature not in item:
item[feature] = ""
# Print all item features
print '\t'.join(map(lambda x: item[x], features))
except KeyError:
# If an item is missing a feature then just skip the item
pass
jsonQueue.task_done()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment