Skip to content

Instantly share code, notes, and snippets.

@josircg
Created January 7, 2025 14:41
Show Gist options
  • Save josircg/99e07c99264e1f5b701067d12a5963f5 to your computer and use it in GitHub Desktop.
Save josircg/99e07c99264e1f5b701067d12a5963f5 to your computer and use it in GitHub Desktop.
Reading XLS e XLSX using rows and xlrd
# Função import_from_xls customizada para poder ler arquivos xls e xlsx.
# Funciona com a versão 1.2.0 do xlrd. A versão 2.x dropou o suporte para leitura de xlsx.
from __future__ import unicode_literals
import xlrd
from rows.plugins.utils import get_filename_and_fobj, create_table
from rows.plugins.xls import get_table_start, cell_value
def import_from_xls(
filename_or_fobj,
sheet_name=None,
sheet_index=0,
start_row=None,
start_column=None,
end_row=None,
end_column=None,
*args,
**kwargs
):
"""Return a rows.Table created from imported XLS file."""
filename, _ = get_filename_and_fobj(filename_or_fobj, mode="rb")
book = xlrd.open_workbook(filename)
if sheet_name is not None:
sheet = book.sheet_by_name(sheet_name)
else:
sheet = book.sheet_by_index(sheet_index)
# TODO: may re-use Excel data types
# Get header and rows
# xlrd library reads rows and columns starting from 0 and ending on
# sheet.nrows/ncols - 1. rows accepts the same pattern
# The xlrd library reads rows and columns starting from 0 and ending on
# sheet.nrows/ncols - 1. rows also uses 0-based indexes, so no
# transformation is needed
min_row, min_column = get_table_start(sheet)
max_row, max_column = sheet.nrows - 1, sheet.ncols - 1
# TODO: consider adding a parameter `ignore_padding=True` and when it's
# True, consider `start_row` starting from `min_row` and `start_column`
# starting from `min_col`.
start_row = start_row if start_row is not None else min_row
end_row = end_row if end_row is not None else max_row
start_column = start_column if start_column is not None else min_column
end_column = end_column if end_column is not None else max_column
table_rows = [
[
cell_value(sheet, row_index, column_index)
for column_index in range(start_column, end_column + 1)
]
for row_index in range(start_row, end_row + 1)
]
meta = {"imported_from": "xls", "filename": filename, "sheet_name": sheet.name}
return create_table(table_rows, meta=meta, *args, **kwargs)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment