Last active
June 15, 2022 16:11
-
-
Save jymchng/ce3a89fced820470cf87cf55e4b77a4c to your computer and use it in GitHub Desktop.
Given a Excel (.xlsx) file with merged cells, returns a pandas DataFrame with its unmerged cells' values appropriately filled.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from openpyxl.workbook import Workbook | |
from openpyxl import load_workbook | |
from openpyxl.utils.cell import range_boundaries | |
import pandas as pd | |
import numpy as np | |
def _parse_xlxs_merged_cells(filepath, how='top-left'): | |
""" | |
Takes in a Path-like object specifying the .xlsx file and returns a pandas DataFrame with unmerged cells' values | |
appropriately filled. | |
-params- | |
filepath: Path-like | |
how: a string, specifying which cell's value to be copied to all unmerged cells, default is 'top-left' | |
-returns- | |
pandas DataFrame with with unmerged cells' filled with specified values | |
""" | |
df = pd.read_excel(filepath) | |
how_mapping = {'top-left': (1, 0)} | |
wb = load_workbook(filename=filepath) | |
for st_name in wb.sheetnames: | |
st = wb[st_name] | |
mcr_coord_list = [mcr.coord for mcr in st.merged_cells.ranges] | |
range_bound_arr = np.array([range_boundaries(mcr) for mcr in mcr_coord_list]) | |
range_bound_arr_pyindexes = range_bound_arr - [1, 2, 1, 2] | |
for range_bound_arr_pyindex in range_bound_arr_pyindexes: | |
# min_col, min_row, max_col, max_row | |
# 0, , 1 , 2 , 3 | |
df.iloc[range_bound_arr_pyindex[1]:range_bound_arr_pyindex[-1]+1, range_bound_arr_pyindex[0]:range_bound_arr_pyindex[-2]+1] = \ | |
df.iloc[range_bound_arr_pyindex[how_mapping[how][0]],range_bound_arr_pyindex[how_mapping[how][1]]] | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment