Last active
October 12, 2016 14:37
-
-
Save qiyuangong/a10eb266f9283688a03f92db59f1db7a to your computer and use it in GitHub Desktop.
merge excel with xlrd and xlwt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import pdb | |
import os | |
from xlrd import open_workbook | |
from xlwt import Workbook | |
# from xlwt import | |
if __name__ == '__main__': | |
file_list = os.listdir('.') | |
#remove other files | |
file_list = [t for t in file_list if 'xlsx' in t or 'xls' in t] | |
if 'result.xls' in file_list: | |
file_list.remove('result.xls') | |
# print file_list | |
tech_name = [] | |
# 原始数据 项目 论文 专利 | |
sheet_name = [u'原始数据',u'项目',u'论文',u'专利'] | |
#change str to utf-8 unicode | |
for t in file_list: | |
if os.name == 'nt': | |
t = t.decode('gbk') | |
else: | |
t = t.decode('utf-8') | |
tech_name.append(t[:-5]) | |
# print tech_name | |
#create workbook | |
wbook = Workbook() | |
#add sheets: 原始数据 项目 论文 专利 | |
for t in sheet_name: | |
wbook.add_sheet(t) | |
#begin static | |
row_index = [0, 0, 0, 0] | |
for index, t in enumerate(file_list): | |
book = open_workbook(t) | |
# print book.nsheets | |
for sheet_index in range(book.nsheets): | |
rsheet = book.sheet_by_index(sheet_index) | |
wsheet = wbook.get_sheet(sheet_index) | |
# pdb.set_trace() | |
# print rsheet.name | |
if sheet_index == 0: | |
for r in range(rsheet.nrows): | |
for c in range(rsheet.ncols): | |
# print rsheet.cell(r, c).value | |
wsheet.write(row_index[sheet_index], c, rsheet.cell(r, c).value) | |
row_index[sheet_index] += 1 | |
row_index[sheet_index] += 4 | |
elif sheet_index == 1: | |
count = 0 | |
if row_index[sheet_index] == 0: | |
wsheet.write(0, 0, u'项目负责人') | |
wsheet.write(0, 1, u'数量') | |
wsheet.write(0, 2, u'立项时间') | |
wsheet.write(0, 3, u'结束时间') | |
wsheet.write(0, 4, u'项目代号') | |
wsheet.write(0, 5, u'项目名称') | |
wsheet.write(0, 6, u'级别') | |
wsheet.write(0, 7, u'项目类型') | |
wsheet.write(0, 8, u'学科类别') | |
wsheet.write(0, 9, u'合同总额(万元)') | |
wsheet.write(0, 10, u'参加人员及比例') | |
wsheet.write(0, 11, u'备注(负责人签字)') | |
wsheet.write(0, 12, u'项目分') | |
row_index[sheet_index] += 1 | |
wsheet.write(row_index[sheet_index], 0, tech_name[index]) | |
# print tech_name[index] | |
top_index = row_index[sheet_index] | |
if rsheet.nrows >= 1: | |
for r in range(rsheet.nrows): | |
if r == 0 and (u'科研情况' in rsheet.cell(r, 0).value or u'无'in rsheet.cell(r, 0).value): | |
continue | |
if tech_name[index] in rsheet.cell(r, 10).value: | |
count += 1 | |
for c in range(rsheet.ncols): | |
wsheet.write(row_index[sheet_index], c + 2, rsheet.cell(r, c).value) | |
row_index[sheet_index] += 1 | |
wsheet.write(top_index, 1, count) | |
row_index[sheet_index] += 4 | |
elif sheet_index == 2: | |
count = 0 | |
# pdb.set_trace() | |
if row_index[sheet_index] == 0: | |
wsheet.write(0, 0, u'作者') | |
wsheet.write(0, 1, u'数量') | |
wsheet.write(0, 2, u'发表时间') | |
wsheet.write(0, 3, u'论文题目') | |
wsheet.write(0, 4, u'期号') | |
wsheet.write(0, 5, u'刊名') | |
wsheet.write(0, 6, u'作者说明') | |
wsheet.write(0, 7, u'刊物级别') | |
wsheet.write(0, 8, u'被收录情况') | |
wsheet.write(0, 9, u'被收录时间') | |
wsheet.write(0, 10, u'备注') | |
wsheet.write(0, 11, u'分') | |
row_index[sheet_index] += 1 | |
wsheet.write(row_index[sheet_index], 0, tech_name[index]) | |
# print tech_name[index] | |
top_index = row_index[sheet_index] | |
if rsheet.nrows >= 1: | |
for r in range(rsheet.nrows): | |
if r == 0 and (u'发表论文' in rsheet.cell(r, 0).value or u'无'in rsheet.cell(r, 0).value): | |
continue | |
if not u'作者' in rsheet.cell(r, 4).value or u'论文题目' in rsheet.cell(r, 1).value: | |
continue | |
count += 1 | |
for c in range(rsheet.ncols): | |
wsheet.write(row_index[sheet_index], c + 2, rsheet.cell(r, c).value) | |
row_index[sheet_index] += 1 | |
else: | |
row_index[sheet_index] += 1 | |
wsheet.write(top_index, 1, count) | |
row_index[sheet_index] += 4 | |
elif sheet_index == 3: | |
count = 0 | |
acc_count = 0 | |
if row_index[sheet_index] == 0: | |
wsheet.write(0, 2, u'获得时间') | |
wsheet.write(0, 3, u'专利名称') | |
wsheet.write(0, 4, u'专利类型') | |
wsheet.write(0, 5, u'申请号或授权号') | |
wsheet.write(0, 6, u'实施收益(万元)') | |
wsheet.write(0, 7, u'技术转让(万元)') | |
wsheet.write(0, 8, u'一次性转让或无形资产评估(万元)') | |
wsheet.write(0, 9, u'企业上交利润(万元)') | |
wsheet.write(0, 10, u'其他参加人员及比例') | |
wsheet.write(0, 11, u'备注(负责人签字)') | |
wsheet.write(0, 12, u'分') | |
row_index[sheet_index] += 1 | |
wsheet.write(row_index[sheet_index], 0, tech_name[index]) | |
wsheet.write(row_index[sheet_index] + 1, 0, u'受理') | |
wsheet.write(row_index[sheet_index] + 2, 0, u'授权') | |
top_index = row_index[sheet_index] | |
# print tech_name[index] | |
if rsheet.nrows >= 1: | |
for r in range(rsheet.nrows): | |
if r == 0 and (u'专利成果转化' in rsheet.cell(r, 0).value or u'无'in rsheet.cell(r, 0).value): | |
continue | |
if u'专利名称' in rsheet.cell(r, 1).value or u'小计' in rsheet.cell(r, 9).value: | |
continue | |
if u'受理' in rsheet.cell(r, 2).value: | |
acc_count += 1 | |
for c in range(rsheet.ncols): | |
wsheet.write(row_index[sheet_index], c + 2, rsheet.cell(r, c).value) | |
row_index[sheet_index] += 1 | |
count += 1 | |
if count < 3: | |
row_index[sheet_index] += 3 - count | |
wsheet.write(top_index, 1, count) | |
wsheet.write(top_index + 1, 1, acc_count) | |
wsheet.write(top_index + 2, 1, count - acc_count) | |
row_index[sheet_index] += 4 | |
wbook.save('result.xls') | |
# wbook.save(TemporaryFile()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment