Created
November 3, 2018 19:55
-
-
Save DrJackilD/c26d9371b69af5bc5c3621faf1c65e24 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load Excel files in a folder and load content to simple class | |
# Using first row as a header | |
from typing import Tuple, Generator | |
from types import SimpleNamespace | |
import os | |
from openpyxl import load_workbook | |
INPUT_BASE_FOLDER = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'TheKnotResults_250cities') | |
class Row(SimpleNamespace): | |
pass | |
def read_input_rows() -> Tuple[str, Generator[Row, None, None]]: | |
def rows_generator(fp: str) -> Generator[Row, None, None]: | |
wb = load_workbook(fp) | |
ws = iter(wb.active) | |
# get headers | |
headers = ['_'.join(h.value.lower().split()) for h in next(ws)] | |
for line in ws: | |
values = {h: v for h, v in zip(headers, [c.value for c in line])} | |
yield Row(**values, details=None) | |
for filename in filter(lambda f: f.endswith('xlsx'), os.listdir(INPUT_BASE_FOLDER)): | |
filepath = os.path.join(INPUT_BASE_FOLDER, filename) | |
yield filename.split('.')[0], rows_generator(filepath) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment