Skip to content

Instantly share code, notes, and snippets.

@GaryLee
Last active May 4, 2025 15:20
Show Gist options
  • Save GaryLee/5fa469b462a693d8badb008e973ab70f to your computer and use it in GitHub Desktop.
Save GaryLee/5fa469b462a693d8badb008e973ab70f to your computer and use it in GitHub Desktop.
Access data frame in Excel's way.
#!python
# coding: utf-8
import os
import string
import pandas as pd
pd.set_option('future.no_silent_downcasting', True)
class Excel:
ENGINE = 'xlrd' # Specify the engine for reading .xls files
def __init__(self, filename, sheet=0):
self.filename = os.path.splitext(os.path.basename(filename))[0]
self._df = pd.read_excel(filename, sheet_name=sheet, header=None, engine=self.ENGINE)
self.iloc = self._df.iloc
for a in string.ascii_uppercase:
setattr(self, a, self.column_index(a))
for b in string.ascii_uppercase:
setattr(self, a+b, self.column_index(a+b))
@staticmethod
def column_index(col_str):
"""
Convert Excel column letter to index.
Example: 'A' -> 0, 'B' -> 1, ..., 'Z' -> 25
"""
if isinstance(col_str, int):
return col_str - 1
col_idx = sum((ord(char) - ord('A') + 1) * (26 ** i) for i, char in enumerate(reversed(col_str))) - 1
return col_idx
def cell_by_name(self, cell_name):
"""
Convert Excel cell name to row and column indices.
Example: 'A30' -> (29, 0)
"""
col_str = ''.join(filter(str.isalpha, cell_name))
row_str = ''.join(filter(str.isdigit, cell_name))
col_idx = sum((ord(char) - ord('A') + 1) * (26 ** i) for i, char in enumerate(reversed(col_str))) - 1
row_idx = int(row_str) - 1
return self._df.iloc[row_idx, col_idx]
def __getitem__(self, item):
"""Access cell or range of cells using Excel-style indexing.
Ex:
xls["A30"] -> cell A30
xls["A", 5:30] -> column A from row 5 to 30 (ending inclusive)
xls["A", 5:30:2] -> column A from row 5 to 30 with step 2 (ending inclusive)
xls["A:B", 5:30] -> column A and B from row 5 to 30 (ending inclusive)
xls[1:2, 5:30] -> column 1 and 2 from row 5 to 30 (ending inclusive)
xls[0, 3] -> The cell item of column 0 and row 3.
"""
if isinstance(item, str):
return self.cell_by_name(item)
elif isinstance(item, (tuple, list)):
if isinstance(item[0], str):
s = list(map(str.strip, item[0].split(':')))
assert len(s) >= 1 and len(s) <= 3, f"Invalid column range. column={item[0]}"
if len(s) == 1:
col = self.column_index(s[0]) if isinstance(s[0], str) else int(s[0])
else:
col_begin = self.column_index(s[0]) if isinstance(s[0], str) else int(s[0])
col_end = self.column_index(s[1]) if isinstance(s[1], str) else int(s[1])
if len(s) == 2:
col = slice(col_begin, col_end + 1)
else:
col = slice(col_begin, col_end + 1, int(s[2]))
elif isinstance(item[0], slice):
row = slice(item[0].start - 1, item[0].stop, item[0].step)
else:
row = int(item[0]) - 1
if isinstance(item[1], slice):
row = slice(item[1].start - 1, item[1].stop, item[1].step)
else:
row = int(item[1]) - 1
return self._df.iloc[row, col]
else:
raise TypeError(f"Invalid index type. Use string or integer. {repr(item)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment