Skip to content

Instantly share code, notes, and snippets.

@yeiichi
Last active December 29, 2024 01:03
Show Gist options
  • Save yeiichi/414e3774c613dc70bf712b262863b440 to your computer and use it in GitHub Desktop.
Save yeiichi/414e3774c613dc70bf712b262863b440 to your computer and use it in GitHub Desktop.
Rename and re-encode Nulab Backlog CSV files in a directory.
#!/usr/bin/env python3
import re
from pathlib import Path
import pandas as pd
def glob_backlog_csvs(csv_dir):
csv_files = list(Path(csv_dir).glob('Backlog-Issues*.csv'))
if csv_files:
return csv_files
else:
exit('\033[93mNo CSV files found.\033[0m')
def try_open_with_encoding(file_path, encoding):
"""Try to open the file with a specific encoding.
"""
try:
with file_path.open(encoding=encoding) as f:
f.read(64) # Sample read to validate encoding
return True
except UnicodeDecodeError:
return False
def detect_file_encoding(file_path):
"""Detect the encoding of a given file.
"""
for encoding in ['UTF8', 'CP932']:
if try_open_with_encoding(file_path, encoding):
return encoding
return 'unknown'
def rename_csv(csv_file_path):
"""Rename and re-encode a backlog CSV file.
Returns:
fname_ (str): New file name.
df_ (DataFrame): DataFrame in utf-8.
Example:
Backlog-Issues-19991231-2359.csv -> backlog_LONG_NOW_19991231-2359.csv
"""
dttm = re.search(r'-(\d{8}-\d{4})', csv_file_path.stem).group(1)
encoding = detect_file_encoding(csv_file_path)
if encoding == 'UTF8':
df_ = pd.read_csv(csv_file_path, encoding='UTF8')
project_key = df_.at[0, 'Key'].rsplit('-', maxsplit=1)[0]
elif encoding == 'CP932':
df_ = pd.read_csv(csv_file_path, encoding='CP932')
project_key = df_.at[0, 'キー'].rsplit('-', maxsplit=1)[0]
else:
exit(f'\033[93mUnknown encoding for: {csv_file_path}\033[0m')
fname_ = f'backlog_{project_key}_{dttm}.csv'
return fname_, df_
if __name__ == '__main__':
source_dir = Path(input('Backlog CSVs DIR? >> '))
if not source_dir.is_dir():
exit('\033[93mNot a directory.\033[0m')
# Prep a directory for original files.
old_dir = source_dir / 'old'
if not old_dir.is_dir():
old_dir.mkdir()
# Rename and re-encode.
for orig_csv in glob_backlog_csvs(source_dir):
fname, df = rename_csv(orig_csv)
fpath = source_dir / fname
df.to_csv(fpath, index=False)
print(f'\033[93mSaved: {fpath}\033[0m')
orig_csv.rename(old_dir / orig_csv.name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment