Last active
March 20, 2022 20:44
-
-
Save aschleg/09cca5317a7de3e6a1e8 to your computer and use it in GitHub Desktop.
Merge first worksheet in all Excel workbooks in a folder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from xlwings import Workbook, Range | |
import pandas as pd | |
import os | |
import re | |
# Script to merge a folder containing Excel workbooks into a single workbook. | |
# The folder should only contain Excel workbooks and must all either be in csv, xls or xlsx format | |
# To run, open the command prompt and enter the command python Merge_Excel_Workbooks.py | |
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" | |
function to load data into Excel without overloading memory | |
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" | |
def chunk_df(df, sheet, startcell, chunk_size=1000): | |
if len(df) <= (chunk_size + 1): | |
Range(sheet, startcell, index=False, header=True).value = df | |
else: | |
Range(sheet, startcell, index=False).value = list(df.columns) | |
c = re.match(r"([a-z]+)([0-9]+)", startcell[0] + str(int(startcell[1]) + 1), re.I) | |
row = c.group(1) | |
col = int(c.group(2)) | |
for chunk in (df[rw:rw + chunk_size] for rw in | |
range(0, len(df), chunk_size)): | |
Range(sheet, row + str(col), index=False, header=False).value = chunk | |
col += chunk_size | |
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" | |
Script | |
""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" | |
# Prompt to enter the folder path. | |
# Only Excel Workbooks should be included in the folder path and all should either be in csv, xls or xlsx format | |
workbook_folder = raw_input('Enter folder path of workbooks to merge: ') | |
workbook_folder = os.path.normpath(workbook_folder) | |
passback_templates = os.listdir(workbook_folder) | |
temp_to_merge = [] | |
for i in passback_templates: | |
temp_to_merge.append(workbook_folder + '/' + str(i)) | |
merged_workbooks = pd.DataFrame() | |
# 0 in the read_excel method refers to the index of the sheet. The first | |
# sheet in the workbook has a 0 index. | |
for sheet in temp_to_merge: | |
tab = raw_input('Enter worksheet name to merge: ') | |
if sheet[:-4] == '.csv': | |
workbook = pd.read_csv(sheet, index_col=None, na_values=[0]) | |
merged_workbooks = merged_workbooks.append(workbook) | |
else: | |
workbook = pd.read_excel(sheet, tab, index_col=None, na_values=[0]) | |
merged_workbooks = merged_workbooks.append(workbook) | |
merged_workbooks.fillna(0, inplace = True) | |
# A new workbook will open and copy the merged data. | |
wb = Workbook() | |
chunk_df(merged_workbooks, 'Sheet1', 'A1') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment