Last active
December 25, 2015 00:59
-
-
Save vadimii/6891493 to your computer and use it in GitHub Desktop.
Excel file transformation with python-excel lib
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import re | |
from xlrd import open_workbook | |
from xlwt import Workbook | |
def read_workbook(): | |
workbook = open_workbook('PriceObMatLidersb.xls') | |
for sheet in workbook.sheets(): | |
for row in range(sheet.nrows): | |
cells = [sheet.name] | |
for col in range(sheet.ncols): | |
val = unicode(sheet.cell(row, col).value).strip() | |
cells.append(val) | |
yield cells | |
def parse_products(workbook): | |
price_excl = ['', u'Звоните!', u'Бесплатно'] | |
cats = {} | |
last_cat = None | |
for row in workbook: | |
cat, name = row[1:4:2] | |
prod = row[2] | |
if re.match(r'\d[\.\d]*', cat): | |
levels = tuple(int(lev) for lev in cat.split('.')) | |
cats[levels] = name | |
last_cat = levels | |
elif re.match(r'\d{6}', prod): | |
sheet = row[0] | |
name, _, desc, _, measure, price = row[3:9] | |
prod = dict(sheet=sheet, prod=prod, name=name, | |
desc=desc, measure=measure, | |
price=float(price) if price not in price_excl else 0.) | |
prodcats = [] | |
for i in range(1, len(last_cat)+1): | |
cat = tuple(last_cat[0:i]) | |
prodcats.append(cats[cat]) | |
prod['cats'] = prodcats | |
yield prod | |
def save_new_format(products): | |
book = Workbook() | |
sheet1 = book.add_sheet('Sheet 1') | |
for i, prod in enumerate(products): | |
sheet1.write(i, 0, prod['prod']) | |
sheet1.write(i, 1, prod['name']) | |
sheet1.write(i, 2, prod['desc']) | |
sheet1.write(i, 3, prod['measure']) | |
sheet1.write(i, 4, prod['price']) | |
sheet1.write(i, 5, prod['sheet']) | |
for j, cat in enumerate(prod['cats'], 6): | |
sheet1.write(i, j, cat) | |
book.save('PriceObMatLidersb_out.xls') | |
if __name__ == '__main__': | |
workbook = read_workbook() | |
products = parse_products(workbook) | |
save_new_format(products) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment