yeiichi · December 29, 2024 01:03
diff --git a/ren_n_utf8_bklog_csv.py b/ren_n_utf8_bklog_csv.py
 #!/usr/bin/env python3
 import re
 from pathlib import Path

 import pandas as pd


 def glob_backlog_csvs(csv_dir):
    csv_files = list(Path(csv_dir).glob('Backlog-Issues*.csv'))
    if csv_files:
        return csv_files
    else:
        exit('\033[93mNo CSV files found.\033[0m')


 def try_open_with_encoding(file_path, encoding):
    """Try to open the file with a specific encoding.
    """
    try:
        with file_path.open(encoding=encoding) as f:
            f.read(64)  # Sample read to validate encoding
            return True
    except UnicodeDecodeError:
        return False


 def detect_file_encoding(file_path):
    """Detect the encoding of a given file.
    """
    for encoding in ['UTF8', 'CP932']:
        if try_open_with_encoding(file_path, encoding):
            return encoding
    return 'unknown'


 def rename_csv(csv_file_path):
    """Rename and re-encode a backlog CSV file.
    Returns:
        fname_ (str): New file name.
        df_ (DataFrame): DataFrame in utf-8.
    Example:
        Backlog-Issues-19991231-2359.csv -> backlog_LONG_NOW_19991231-2359.csv
    """
    dttm = re.search(r'-(\d{8}-\d{4})', csv_file_path.stem).group(1)
    encoding = detect_file_encoding(csv_file_path)

    if encoding == 'UTF8':
        df_ = pd.read_csv(csv_file_path, encoding='UTF8')
        project_key = df_.at[0, 'Key'].rsplit('-', maxsplit=1)[0]
    elif encoding == 'CP932':
        df_ = pd.read_csv(csv_file_path, encoding='CP932')
        project_key = df_.at[0, 'キー'].rsplit('-', maxsplit=1)[0]
    else:
        exit(f'\033[93mUnknown encoding for: {csv_file_path}\033[0m')

    fname_ = f'backlog_{project_key}_{dttm}.csv'
    return fname_, df_


 if __name__ == '__main__':
    source_dir = Path(input('Backlog CSVs DIR? >> '))
    if not source_dir.is_dir():
        exit('\033[93mNot a directory.\033[0m')

    # Prep a directory for original files.
    old_dir = source_dir / 'old'
    if not old_dir.is_dir():
        old_dir.mkdir()

    # Rename and re-encode.
    for orig_csv in glob_backlog_csvs(source_dir):
        fname, df = rename_csv(orig_csv)
        fpath = source_dir / fname
        df.to_csv(fpath, index=False)
        print(f'\033[93mSaved: {fpath}\033[0m')
        orig_csv.rename(old_dir / orig_csv.name)
	#!/usr/bin/env python3
	import re
	from pathlib import Path

	import pandas as pd


	def glob_backlog_csvs(csv_dir):
	csv_files = list(Path(csv_dir).glob('Backlog-Issues*.csv'))
	if csv_files:
	return csv_files
	else:
	exit('\033[93mNo CSV files found.\033[0m')


	def try_open_with_encoding(file_path, encoding):
	"""Try to open the file with a specific encoding.
	"""
	try:
	with file_path.open(encoding=encoding) as f:
	f.read(64) # Sample read to validate encoding
	return True
	except UnicodeDecodeError:
	return False


	def detect_file_encoding(file_path):
	"""Detect the encoding of a given file.
	"""
	for encoding in ['UTF8', 'CP932']:
	if try_open_with_encoding(file_path, encoding):
	return encoding
	return 'unknown'


	def rename_csv(csv_file_path):
	"""Rename and re-encode a backlog CSV file.
	Returns:
	fname_ (str): New file name.
	df_ (DataFrame): DataFrame in utf-8.
	Example:
	Backlog-Issues-19991231-2359.csv -> backlog_LONG_NOW_19991231-2359.csv
	"""
	dttm = re.search(r'-(\d{8}-\d{4})', csv_file_path.stem).group(1)
	encoding = detect_file_encoding(csv_file_path)

	if encoding == 'UTF8':
	df_ = pd.read_csv(csv_file_path, encoding='UTF8')
	project_key = df_.at[0, 'Key'].rsplit('-', maxsplit=1)[0]
	elif encoding == 'CP932':
	df_ = pd.read_csv(csv_file_path, encoding='CP932')
	project_key = df_.at[0, 'キー'].rsplit('-', maxsplit=1)[0]
	else:
	exit(f'\033[93mUnknown encoding for: {csv_file_path}\033[0m')

	fname_ = f'backlog_{project_key}_{dttm}.csv'
	return fname_, df_


	if __name__ == '__main__':
	source_dir = Path(input('Backlog CSVs DIR? >> '))
	if not source_dir.is_dir():
	exit('\033[93mNot a directory.\033[0m')

	# Prep a directory for original files.
	old_dir = source_dir / 'old'
	if not old_dir.is_dir():
	old_dir.mkdir()

	# Rename and re-encode.
	for orig_csv in glob_backlog_csvs(source_dir):
	fname, df = rename_csv(orig_csv)
	fpath = source_dir / fname
	df.to_csv(fpath, index=False)
	print(f'\033[93mSaved: {fpath}\033[0m')
	orig_csv.rename(old_dir / orig_csv.name)