Last active
January 18, 2024 09:59
-
-
Save trueroad/dcbbfd4fe0dbdde547a8733922b4e248 to your computer and use it in GitHub Desktop.
Add date and week columns.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
""" | |
Add date and week columns. | |
https://gist.github.com/trueroad/dcbbfd4fe0dbdde547a8733922b4e248 | |
Copyright (C) 2024 Masamichi Hosoda. | |
All rights reserved. | |
Redistribution and use in source and binary forms, with or without | |
modification, are permitted provided that the following conditions | |
are met: | |
* Redistributions of source code must retain the above copyright notice, | |
this list of conditions and the following disclaimer. | |
* Redistributions in binary form must reproduce the above copyright notice, | |
this list of conditions and the following disclaimer in the documentation | |
and/or other materials provided with the distribution. | |
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
ARE DISCLAIMED. | |
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
SUCH DAMAGE. | |
""" | |
import os | |
import sys | |
from typing import Any, Final, Optional, Union | |
import pandas as pd | |
def week_str(d: pd.Timestamp) -> str: | |
"""日付からその日を含む ISO 週の文字列表記(YYYY-Www 表記)を返す.""" | |
year: int | |
week: int | |
year, week, _ = d.isocalendar() | |
if year < 0 or year > 9999: | |
raise ValueError('Year is out of range.') | |
return f'{year:04}-W{week:02}' | |
class add_date_and_week: | |
"""Add date and week columns class.""" | |
def __init__(self, | |
dtype: Optional[dict[str, type]] = None, | |
index_col: Optional[str] = None, | |
column_datetime: str = 'Datetime', | |
column_date: str = 'Date', | |
column_week: str = 'Week') -> None: | |
""" | |
__init__. | |
Args: | |
dtype (Optional[dict[str, type]]): | |
列名毎の型指定 | |
index_col (Optional[str]): | |
インデックスにする列の名前 | |
column_datetime (str): | |
日時を含む列の名前 | |
column_date (str): | |
追加する日付の列の名前 | |
column_week (str): | |
追加する ISO 週の列の名前 | |
""" | |
self.dtype: Final[Optional[dict[str, type]]] = dtype | |
self.index_col: Final[Optional[str]] = index_col | |
self.column_datetime: Final[str] = column_datetime | |
self.column_date: Final[str] = column_date | |
self.column_week: Final[str] = column_week | |
def process_column(self, df: pd.DataFrame) -> None: | |
""" | |
列処理. | |
Args: | |
df (pd.DataFrame): | |
入出力データフレーム | |
""" | |
if self.column_date in df.columns or self.column_week in df.columns: | |
# 追加したい列名が既に存在している | |
raise RuntimeError('Column name to be added alreay exists.') | |
# 日付だけの列を追加(日へのキャストは仕様が不明なので避けた) | |
df[self.column_date] = df[self.column_datetime].map( | |
lambda x: pd.Timestamp(year=x.year, | |
month=x.month, | |
day=x.day)) | |
# ISO 週番号の列を追加 | |
df[self.column_week] = df[self.column_date].map(week_str) | |
def process_csv(self, | |
filename_in: Union[str, os.PathLike[str]], | |
filename_out: Union[str, os.PathLike[str]]) -> bool: | |
""" | |
CSV ファイルを処理する. | |
Args: | |
filename_in (Union[str, os.PathLike[str]]): | |
入力 CSV ファイル | |
filename_out (Union[str, os.PathLike[str]]): | |
出力 CSV ファイル | |
Returns: | |
bool: True なら成功、False なら失敗 | |
""" | |
# データフレームへ読み込む | |
print(f'Loading: {filename_in} ...') | |
df: pd.DataFrame = \ | |
pd.read_csv(filename_in, | |
index_col=self.index_col, | |
parse_dates=[self.column_datetime], | |
dtype=self.dtype) # type: ignore[arg-type] | |
# 処理する | |
self.process_column(df) | |
# 集計結果を出力 | |
print(f'Writing: {filename_out} ...') | |
# UTF-8 BOM 付き CSVで出力(Excel で開けるように) | |
df.to_csv(filename_out, encoding='utf_8_sig', | |
index=(self.index_col is not None)) | |
return True | |
class commandline(): | |
"""Commandline option class.""" | |
def __init__(self) -> None: | |
"""___init__.""" | |
self.default_dtype: str = 'None' | |
self.default_index_col: Optional[str] = None | |
self.default_col_datetime: str = 'Datetime' | |
self.default_col_date: str = 'Date' | |
self.default_col_week: str = 'Week' | |
def parse(self) -> tuple[str, str, str, Optional[str], str, str, str]: | |
""" | |
コマンドラインをパースする. | |
Returns: | |
tuple: | |
str: 入力 CSV ファイル名 | |
str: 出力 CSV ファイル名 | |
Optional[str]: インデックスにする列の名前 | |
str: 日時を含む列の名前 | |
str: 追加する日付の列の名前 | |
str: 追加する ISO 週の列の名前 | |
""" | |
import argparse | |
parser: argparse.ArgumentParser = argparse.ArgumentParser() | |
parser.add_argument('INPUT.csv', | |
help='Input CSV filename', | |
type=str) | |
parser.add_argument('OUTPUT.csv', | |
help='Output CSV filename', | |
type=str) | |
parser.add_argument('--dtype', | |
help='Dtype dictionary', | |
type=str, default=self.default_dtype, | |
required=False) | |
parser.add_argument('--index-col', | |
help='Index column name', | |
type=str, default=self.default_index_col, | |
required=False) | |
parser.add_argument('--column-datetime', | |
help='Datetime column name', | |
type=str, default=self.default_col_datetime, | |
required=False) | |
parser.add_argument('--column-date', | |
help='Date column name to be added', | |
type=str, default=self.default_col_date, | |
required=False) | |
parser.add_argument('--column-week', | |
help='Week column name to be added', | |
type=str, default=self.default_col_week, | |
required=False) | |
args: argparse.Namespace = parser.parse_args() | |
vargs: dict[str, Any] = vars(args) | |
input_filename: str = vargs['INPUT.csv'] | |
output_filename: str = vargs['OUTPUT.csv'] | |
index_col: Optional[str] = vargs['index_col'] | |
dtype: str = vargs['dtype'] | |
col_datetime: str = vargs['column_datetime'] | |
col_date: str = vargs['column_date'] | |
col_week: str = vargs['column_week'] | |
print('Filenames\n' | |
f' Input filename : {input_filename}\n' | |
f' Output filename : {output_filename}\n' | |
'Column types\n' | |
f' Dtype dictionary: {dtype}\n' | |
'Column names\n' | |
f' Index : {index_col}\n' | |
f' Datetime : {col_datetime}\n' | |
f' Date to be added: {col_date}\n' | |
f' Week to be added: {col_week}\n') | |
return (input_filename, | |
output_filename, | |
dtype, | |
index_col, | |
col_datetime, | |
col_date, | |
col_week) | |
def main() -> None: | |
"""Do main.""" | |
print(f'Add date and week columns.\n\n' | |
'https://gist.github.com/trueroad/' | |
'dcbbfd4fe0dbdde547a8733922b4e248\n\n' | |
'Copyright (C) 2024 Masamichi Hosoda.\n' | |
'All rights reserved.\n') | |
cl: commandline = commandline() | |
input_filename: str | |
output_filename: str | |
dtype: str | |
index_col: Optional[str] | |
col_datetime: str | |
col_date: str | |
col_week: str | |
input_filename, output_filename, \ | |
dtype, index_col, \ | |
col_datetime, col_date, col_week = cl.parse() | |
adw: add_date_and_week = add_date_and_week( | |
dtype=eval(dtype), | |
index_col=index_col, | |
column_datetime=col_datetime, | |
column_date=col_date, | |
column_week=col_week) | |
adw.process_csv(input_filename, output_filename) | |
print('Done.') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment