Created
January 7, 2025 14:41
-
-
Save josircg/99e07c99264e1f5b701067d12a5963f5 to your computer and use it in GitHub Desktop.
Reading XLS e XLSX using rows and xlrd
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Função import_from_xls customizada para poder ler arquivos xls e xlsx. | |
# Funciona com a versão 1.2.0 do xlrd. A versão 2.x dropou o suporte para leitura de xlsx. | |
from __future__ import unicode_literals | |
import xlrd | |
from rows.plugins.utils import get_filename_and_fobj, create_table | |
from rows.plugins.xls import get_table_start, cell_value | |
def import_from_xls( | |
filename_or_fobj, | |
sheet_name=None, | |
sheet_index=0, | |
start_row=None, | |
start_column=None, | |
end_row=None, | |
end_column=None, | |
*args, | |
**kwargs | |
): | |
"""Return a rows.Table created from imported XLS file.""" | |
filename, _ = get_filename_and_fobj(filename_or_fobj, mode="rb") | |
book = xlrd.open_workbook(filename) | |
if sheet_name is not None: | |
sheet = book.sheet_by_name(sheet_name) | |
else: | |
sheet = book.sheet_by_index(sheet_index) | |
# TODO: may re-use Excel data types | |
# Get header and rows | |
# xlrd library reads rows and columns starting from 0 and ending on | |
# sheet.nrows/ncols - 1. rows accepts the same pattern | |
# The xlrd library reads rows and columns starting from 0 and ending on | |
# sheet.nrows/ncols - 1. rows also uses 0-based indexes, so no | |
# transformation is needed | |
min_row, min_column = get_table_start(sheet) | |
max_row, max_column = sheet.nrows - 1, sheet.ncols - 1 | |
# TODO: consider adding a parameter `ignore_padding=True` and when it's | |
# True, consider `start_row` starting from `min_row` and `start_column` | |
# starting from `min_col`. | |
start_row = start_row if start_row is not None else min_row | |
end_row = end_row if end_row is not None else max_row | |
start_column = start_column if start_column is not None else min_column | |
end_column = end_column if end_column is not None else max_column | |
table_rows = [ | |
[ | |
cell_value(sheet, row_index, column_index) | |
for column_index in range(start_column, end_column + 1) | |
] | |
for row_index in range(start_row, end_row + 1) | |
] | |
meta = {"imported_from": "xls", "filename": filename, "sheet_name": sheet.name} | |
return create_table(table_rows, meta=meta, *args, **kwargs) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment