Last active
May 4, 2025 15:20
-
-
Save GaryLee/5fa469b462a693d8badb008e973ab70f to your computer and use it in GitHub Desktop.
Access data frame in Excel's way.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!python | |
# coding: utf-8 | |
import os | |
import string | |
import pandas as pd | |
pd.set_option('future.no_silent_downcasting', True) | |
class Excel: | |
ENGINE = 'xlrd' # Specify the engine for reading .xls files | |
def __init__(self, filename, sheet=0): | |
self.filename = os.path.splitext(os.path.basename(filename))[0] | |
self._df = pd.read_excel(filename, sheet_name=sheet, header=None, engine=self.ENGINE) | |
self.iloc = self._df.iloc | |
for a in string.ascii_uppercase: | |
setattr(self, a, self.column_index(a)) | |
for b in string.ascii_uppercase: | |
setattr(self, a+b, self.column_index(a+b)) | |
@staticmethod | |
def column_index(col_str): | |
""" | |
Convert Excel column letter to index. | |
Example: 'A' -> 0, 'B' -> 1, ..., 'Z' -> 25 | |
""" | |
if isinstance(col_str, int): | |
return col_str - 1 | |
col_idx = sum((ord(char) - ord('A') + 1) * (26 ** i) for i, char in enumerate(reversed(col_str))) - 1 | |
return col_idx | |
def cell_by_name(self, cell_name): | |
""" | |
Convert Excel cell name to row and column indices. | |
Example: 'A30' -> (29, 0) | |
""" | |
col_str = ''.join(filter(str.isalpha, cell_name)) | |
row_str = ''.join(filter(str.isdigit, cell_name)) | |
col_idx = sum((ord(char) - ord('A') + 1) * (26 ** i) for i, char in enumerate(reversed(col_str))) - 1 | |
row_idx = int(row_str) - 1 | |
return self._df.iloc[row_idx, col_idx] | |
def __getitem__(self, item): | |
"""Access cell or range of cells using Excel-style indexing. | |
Ex: | |
xls["A30"] -> cell A30 | |
xls["A", 5:30] -> column A from row 5 to 30 (ending inclusive) | |
xls["A", 5:30:2] -> column A from row 5 to 30 with step 2 (ending inclusive) | |
xls["A:B", 5:30] -> column A and B from row 5 to 30 (ending inclusive) | |
xls[1:2, 5:30] -> column 1 and 2 from row 5 to 30 (ending inclusive) | |
xls[0, 3] -> The cell item of column 0 and row 3. | |
""" | |
if isinstance(item, str): | |
return self.cell_by_name(item) | |
elif isinstance(item, (tuple, list)): | |
if isinstance(item[0], str): | |
s = list(map(str.strip, item[0].split(':'))) | |
assert len(s) >= 1 and len(s) <= 3, f"Invalid column range. column={item[0]}" | |
if len(s) == 1: | |
col = self.column_index(s[0]) if isinstance(s[0], str) else int(s[0]) | |
else: | |
col_begin = self.column_index(s[0]) if isinstance(s[0], str) else int(s[0]) | |
col_end = self.column_index(s[1]) if isinstance(s[1], str) else int(s[1]) | |
if len(s) == 2: | |
col = slice(col_begin, col_end + 1) | |
else: | |
col = slice(col_begin, col_end + 1, int(s[2])) | |
elif isinstance(item[0], slice): | |
row = slice(item[0].start - 1, item[0].stop, item[0].step) | |
else: | |
row = int(item[0]) - 1 | |
if isinstance(item[1], slice): | |
row = slice(item[1].start - 1, item[1].stop, item[1].step) | |
else: | |
row = int(item[1]) - 1 | |
return self._df.iloc[row, col] | |
else: | |
raise TypeError(f"Invalid index type. Use string or integer. {repr(item)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment