Created
April 3, 2012 20:27
-
-
Save lmacken/2295278 to your computer and use it in GitHub Desktop.
VeraCart Product Catalog Exporter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# VeraCart Product Catalog Exporter | |
# Luke Macken <[email protected]> | |
# License: GPLv3+ | |
import os | |
import csv | |
import uuid | |
from urllib import urlretrieve | |
from urllib2 import urlopen | |
from xml.dom.minidom import parse, parseString | |
from BeautifulSoup import BeautifulSoup | |
APP_NAME = "" | |
API_TOKEN = "" | |
URL = "https://corp1.veracart.com/api/rest/catalog/?api_token=%s&call_name=" % API_TOKEN | |
STATIC_BASE = "https://static.veracart.com/" + APP_NAME | |
category_cols = ['id', 'unique_id', 'parent_id', 'name', 'header', 'footer', | |
'hide', 'sort', 'image_sm', 'image_md', 'image_lg', 'updated_at'] | |
item_cols = ['id','unique_id','sku','name','subtitle','summary' , 'detail', | |
'price' , 'sale_price' , 'no_shipping' , 'no_tax' , 'weight' , | |
'status' , 'image_sm' , 'image_md' , 'image_lg' , 'seo_title_tag' , | |
'seo_meta_keywords' , 'seo_meta_description' , | |
'updated_at', 'created_at'] | |
done_items = [] | |
def get(element, name): | |
node = element.getElementsByTagName(name)[0] | |
try: | |
if node.childNodes: | |
return node.childNodes[0].data | |
else: | |
return '' | |
except Exception, e: | |
print "get(%s, %s)" % (element, name) | |
print str(e) | |
category_dom = parseString(urlopen(URL + "get_category_list").read()) | |
category_csv = csv.writer(open('categories.csv', 'wb')) | |
category_csv.writerow(category_cols) | |
categories = category_dom.lastChild.getElementsByTagName('categories')[0] | |
item_csv = csv.writer(open('products.csv', 'wb')) | |
item_csv.writerow(item_cols + ['category_id']) | |
for category in categories.getElementsByTagName('category'): | |
name = get(category, 'name') | |
row = [] | |
for col in category_cols: | |
row.append(get(category, col)) | |
category_csv.writerow(row) | |
# download category images | |
for size in ('sm', 'md', 'lg'): | |
img = get(category, 'image_%s' % size) | |
if img: | |
output = img.replace(STATIC_BASE, '') | |
dirname = os.path.dirname(output) | |
if os.path.exists(output): | |
continue | |
if not os.path.isdir(dirname): | |
os.makedirs(dirname) | |
urlretrieve(img, filename=output) | |
catid = get(category, 'id') | |
if os.path.exists(catid + '.xml'): | |
item_xml = file(catid + '.xml').read() | |
else: | |
print "Downloading %s products" % name | |
item_xml = urlopen(URL + "get_category_items&category_id=%s" % | |
get(category, 'id')).read() | |
out_xml = file(get(category, 'id') + '.xml', 'w') | |
out_xml.write(item_xml) | |
out_xml.close() | |
if 'No Items found in this category' in item_xml: | |
continue | |
item_soup = BeautifulSoup(item_xml) | |
for item in item_soup.findAll('item'): | |
item_id = item.find('unique_id').text | |
if item_id in done_items: | |
continue | |
else: | |
done_items.append(item_id) | |
row = [] | |
print item.find('name').text | |
for col in item_cols: | |
row.append(item.find(col).text) | |
item_csv.writerow(row + [catid]) | |
# download item images | |
for size in ('sm', 'md', 'lg'): | |
img = item.find('image_%s' % size).text | |
if img: | |
output = img.replace(STATIC_BASE, '') | |
dirname = os.path.dirname(output) | |
if os.path.exists(output): | |
continue | |
if not os.path.isdir(dirname): | |
os.makedirs(dirname) | |
print "Downloading", img | |
urlretrieve(img, filename=output) | |
print "Done! %d items" % len(done_items) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment