Skip to content

Instantly share code, notes, and snippets.

@vadimii
Last active December 25, 2015 00:59
Show Gist options
  • Save vadimii/6891493 to your computer and use it in GitHub Desktop.
Save vadimii/6891493 to your computer and use it in GitHub Desktop.
Excel file transformation with python-excel lib
# coding: utf-8
import re
from xlrd import open_workbook
from xlwt import Workbook
def read_workbook():
workbook = open_workbook('PriceObMatLidersb.xls')
for sheet in workbook.sheets():
for row in range(sheet.nrows):
cells = [sheet.name]
for col in range(sheet.ncols):
val = unicode(sheet.cell(row, col).value).strip()
cells.append(val)
yield cells
def parse_products(workbook):
price_excl = ['', u'Звоните!', u'Бесплатно']
cats = {}
last_cat = None
for row in workbook:
cat, name = row[1:4:2]
prod = row[2]
if re.match(r'\d[\.\d]*', cat):
levels = tuple(int(lev) for lev in cat.split('.'))
cats[levels] = name
last_cat = levels
elif re.match(r'\d{6}', prod):
sheet = row[0]
name, _, desc, _, measure, price = row[3:9]
prod = dict(sheet=sheet, prod=prod, name=name,
desc=desc, measure=measure,
price=float(price) if price not in price_excl else 0.)
prodcats = []
for i in range(1, len(last_cat)+1):
cat = tuple(last_cat[0:i])
prodcats.append(cats[cat])
prod['cats'] = prodcats
yield prod
def save_new_format(products):
book = Workbook()
sheet1 = book.add_sheet('Sheet 1')
for i, prod in enumerate(products):
sheet1.write(i, 0, prod['prod'])
sheet1.write(i, 1, prod['name'])
sheet1.write(i, 2, prod['desc'])
sheet1.write(i, 3, prod['measure'])
sheet1.write(i, 4, prod['price'])
sheet1.write(i, 5, prod['sheet'])
for j, cat in enumerate(prod['cats'], 6):
sheet1.write(i, j, cat)
book.save('PriceObMatLidersb_out.xls')
if __name__ == '__main__':
workbook = read_workbook()
products = parse_products(workbook)
save_new_format(products)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment