Skip to content

Instantly share code, notes, and snippets.

@lmacken
Created April 3, 2012 20:27
Show Gist options
  • Save lmacken/2295278 to your computer and use it in GitHub Desktop.
Save lmacken/2295278 to your computer and use it in GitHub Desktop.
VeraCart Product Catalog Exporter
# VeraCart Product Catalog Exporter
# Luke Macken <[email protected]>
# License: GPLv3+
import os
import csv
import uuid
from urllib import urlretrieve
from urllib2 import urlopen
from xml.dom.minidom import parse, parseString
from BeautifulSoup import BeautifulSoup
APP_NAME = ""
API_TOKEN = ""
URL = "https://corp1.veracart.com/api/rest/catalog/?api_token=%s&call_name=" % API_TOKEN
STATIC_BASE = "https://static.veracart.com/" + APP_NAME
category_cols = ['id', 'unique_id', 'parent_id', 'name', 'header', 'footer',
'hide', 'sort', 'image_sm', 'image_md', 'image_lg', 'updated_at']
item_cols = ['id','unique_id','sku','name','subtitle','summary' , 'detail',
'price' , 'sale_price' , 'no_shipping' , 'no_tax' , 'weight' ,
'status' , 'image_sm' , 'image_md' , 'image_lg' , 'seo_title_tag' ,
'seo_meta_keywords' , 'seo_meta_description' ,
'updated_at', 'created_at']
done_items = []
def get(element, name):
node = element.getElementsByTagName(name)[0]
try:
if node.childNodes:
return node.childNodes[0].data
else:
return ''
except Exception, e:
print "get(%s, %s)" % (element, name)
print str(e)
category_dom = parseString(urlopen(URL + "get_category_list").read())
category_csv = csv.writer(open('categories.csv', 'wb'))
category_csv.writerow(category_cols)
categories = category_dom.lastChild.getElementsByTagName('categories')[0]
item_csv = csv.writer(open('products.csv', 'wb'))
item_csv.writerow(item_cols + ['category_id'])
for category in categories.getElementsByTagName('category'):
name = get(category, 'name')
row = []
for col in category_cols:
row.append(get(category, col))
category_csv.writerow(row)
# download category images
for size in ('sm', 'md', 'lg'):
img = get(category, 'image_%s' % size)
if img:
output = img.replace(STATIC_BASE, '')
dirname = os.path.dirname(output)
if os.path.exists(output):
continue
if not os.path.isdir(dirname):
os.makedirs(dirname)
urlretrieve(img, filename=output)
catid = get(category, 'id')
if os.path.exists(catid + '.xml'):
item_xml = file(catid + '.xml').read()
else:
print "Downloading %s products" % name
item_xml = urlopen(URL + "get_category_items&category_id=%s" %
get(category, 'id')).read()
out_xml = file(get(category, 'id') + '.xml', 'w')
out_xml.write(item_xml)
out_xml.close()
if 'No Items found in this category' in item_xml:
continue
item_soup = BeautifulSoup(item_xml)
for item in item_soup.findAll('item'):
item_id = item.find('unique_id').text
if item_id in done_items:
continue
else:
done_items.append(item_id)
row = []
print item.find('name').text
for col in item_cols:
row.append(item.find(col).text)
item_csv.writerow(row + [catid])
# download item images
for size in ('sm', 'md', 'lg'):
img = item.find('image_%s' % size).text
if img:
output = img.replace(STATIC_BASE, '')
dirname = os.path.dirname(output)
if os.path.exists(output):
continue
if not os.path.isdir(dirname):
os.makedirs(dirname)
print "Downloading", img
urlretrieve(img, filename=output)
print "Done! %d items" % len(done_items)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment