Created
May 22, 2025 08:59
-
-
Save yeiichi/b7e2c669350fff3a11cfec91a1c0cf31 to your computer and use it in GitHub Desktop.
Combine multiple CSV files into a single Excel workbook.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # Combine multiple CSV files into a single Excel workbook. | |
| import re | |
| from dataclasses import dataclass | |
| from pathlib import Path | |
| import pandas as pd | |
| from openpyxl import Workbook | |
| from openpyxl.workbook.views import BookView | |
| PROMPT_COLOR = '\033[93m' | |
| RESET_COLOR = '\033[0m' | |
| @dataclass(frozen=True) | |
| class SheetConfig: | |
| """ | |
| Represents the configuration for a sheet and its properties. | |
| This class provides a frozen dataclass for configuring sheet parameters such as | |
| default separators, patterns for invalid characters, maximum length constraints, | |
| and window dimensions. It is immutable once instantiated and intended to define | |
| constants for sheet customization. | |
| Attributes: | |
| DEFAULT_SEPARATOR (str): Default string used as a separator in sheet names. | |
| INVALID_CHARS_PATTERN (str): Regular expression pattern for invalid characters | |
| in sheet names. | |
| MAX_LENGTH (int): Maximum permissible length for a sheet name. | |
| WINDOW_WIDTH (int): Width of the sheet window in pixels. | |
| WINDOW_HEIGHT (int): Height of the sheet window in pixels. | |
| """ | |
| DEFAULT_SEPARATOR: str = '_' | |
| INVALID_CHARS_PATTERN: str = r'[*?:\/\[\]]' | |
| MAX_LENGTH: int = 28 # Max 31 - 3 (suffix: '_00') | |
| WINDOW_WIDTH: int = 18140 | |
| WINDOW_HEIGHT: int = 15540 | |
| class SheetNameGenerator: | |
| """Encapsulates sanitization and unique‐naming for sheet titles.""" | |
| def __init__(self, sep: str = SheetConfig.DEFAULT_SEPARATOR): | |
| self.separator = sep | |
| self.existing: set[str] = set() | |
| def generate_sheet_name(self, raw_name: str) -> str: | |
| # collapse whitespace and strip invalid chars | |
| collapsed = re.sub(r'\s+', self.separator, raw_name.strip()) | |
| cleaned = re.sub(SheetConfig.INVALID_CHARS_PATTERN, '', collapsed) | |
| base = cleaned[:SheetConfig.MAX_LENGTH] | |
| # ensure uniqueness | |
| name = base | |
| counter = 1 | |
| while name in self.existing: | |
| name = f"{base}_{counter:02d}" | |
| counter += 1 | |
| self.existing.add(name) | |
| return name | |
| class CsvWorkbookConverter: | |
| CSV_PATTERN = '*.csv' | |
| SOURCE_PROMPT = f"{PROMPT_COLOR}Source CSV directory (default is current): {RESET_COLOR}" | |
| def __init__(self): | |
| self.source_dir = self._prompt_source_directory() | |
| self.workbook = self._create_workbook() | |
| @staticmethod | |
| def _prompt_source_directory() -> Path: | |
| stripped_input = input(CsvWorkbookConverter.SOURCE_PROMPT).strip() | |
| if not stripped_input or stripped_input == '.': | |
| return Path.cwd() | |
| else: | |
| return Path(stripped_input) | |
| @staticmethod | |
| def _create_workbook() -> Workbook: | |
| wb = Workbook() | |
| wb.views = [ | |
| BookView( | |
| xWindow=0, | |
| yWindow=0, | |
| windowWidth=SheetConfig.WINDOW_WIDTH, | |
| windowHeight=SheetConfig.WINDOW_HEIGHT | |
| ) | |
| ] | |
| return wb | |
| def _create_sheet_from_dataframe(self, title: str, data_frame: pd.DataFrame): | |
| ws = self.workbook.create_sheet(title=title) | |
| for row in data_frame.itertuples(index=False): | |
| ws.append(row) | |
| def _import_all_csv_sheets(self): | |
| name_gen = SheetNameGenerator() | |
| for csv_file in sorted(self.source_dir.glob(self.CSV_PATTERN)): | |
| sheet_name = name_gen.generate_sheet_name(csv_file.stem) | |
| data_frame = pd.read_csv(csv_file) | |
| self._create_sheet_from_dataframe(sheet_name, data_frame) | |
| self.remove_default_empty_sheet() | |
| def remove_default_empty_sheet(self): | |
| self.workbook.remove(self.workbook['Sheet']) | |
| def _generate_output_path(self) -> Path: | |
| cwd = Path.cwd() | |
| base = self.source_dir.name if self.source_dir != cwd else cwd.name | |
| return cwd / f"{base}.xlsx" | |
| def save_workbook(self) -> Path: | |
| self._import_all_csv_sheets() | |
| output_path = self._generate_output_path() | |
| self.workbook.save(output_path) | |
| print(f"{PROMPT_COLOR}Saved: {output_path.name}{RESET_COLOR}") | |
| return output_path | |
| def main(): | |
| converter = CsvWorkbookConverter() | |
| converter.save_workbook() | |
| print(f"{PROMPT_COLOR}Done.{RESET_COLOR}") | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment