Last active
December 29, 2024 01:03
-
-
Save yeiichi/414e3774c613dc70bf712b262863b440 to your computer and use it in GitHub Desktop.
Rename and re-encode Nulab Backlog CSV files in a directory.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import re | |
from pathlib import Path | |
import pandas as pd | |
def glob_backlog_csvs(csv_dir): | |
csv_files = list(Path(csv_dir).glob('Backlog-Issues*.csv')) | |
if csv_files: | |
return csv_files | |
else: | |
exit('\033[93mNo CSV files found.\033[0m') | |
def try_open_with_encoding(file_path, encoding): | |
"""Try to open the file with a specific encoding. | |
""" | |
try: | |
with file_path.open(encoding=encoding) as f: | |
f.read(64) # Sample read to validate encoding | |
return True | |
except UnicodeDecodeError: | |
return False | |
def detect_file_encoding(file_path): | |
"""Detect the encoding of a given file. | |
""" | |
for encoding in ['UTF8', 'CP932']: | |
if try_open_with_encoding(file_path, encoding): | |
return encoding | |
return 'unknown' | |
def rename_csv(csv_file_path): | |
"""Rename and re-encode a backlog CSV file. | |
Returns: | |
fname_ (str): New file name. | |
df_ (DataFrame): DataFrame in utf-8. | |
Example: | |
Backlog-Issues-19991231-2359.csv -> backlog_LONG_NOW_19991231-2359.csv | |
""" | |
dttm = re.search(r'-(\d{8}-\d{4})', csv_file_path.stem).group(1) | |
encoding = detect_file_encoding(csv_file_path) | |
if encoding == 'UTF8': | |
df_ = pd.read_csv(csv_file_path, encoding='UTF8') | |
project_key = df_.at[0, 'Key'].rsplit('-', maxsplit=1)[0] | |
elif encoding == 'CP932': | |
df_ = pd.read_csv(csv_file_path, encoding='CP932') | |
project_key = df_.at[0, 'キー'].rsplit('-', maxsplit=1)[0] | |
else: | |
exit(f'\033[93mUnknown encoding for: {csv_file_path}\033[0m') | |
fname_ = f'backlog_{project_key}_{dttm}.csv' | |
return fname_, df_ | |
if __name__ == '__main__': | |
source_dir = Path(input('Backlog CSVs DIR? >> ')) | |
if not source_dir.is_dir(): | |
exit('\033[93mNot a directory.\033[0m') | |
# Prep a directory for original files. | |
old_dir = source_dir / 'old' | |
if not old_dir.is_dir(): | |
old_dir.mkdir() | |
# Rename and re-encode. | |
for orig_csv in glob_backlog_csvs(source_dir): | |
fname, df = rename_csv(orig_csv) | |
fpath = source_dir / fname | |
df.to_csv(fpath, index=False) | |
print(f'\033[93mSaved: {fpath}\033[0m') | |
orig_csv.rename(old_dir / orig_csv.name) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment