Skip to content

Instantly share code, notes, and snippets.

@trueroad
Last active January 18, 2024 09:59
Show Gist options
  • Save trueroad/dcbbfd4fe0dbdde547a8733922b4e248 to your computer and use it in GitHub Desktop.
Save trueroad/dcbbfd4fe0dbdde547a8733922b4e248 to your computer and use it in GitHub Desktop.
Add date and week columns.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Add date and week columns.
https://gist.github.com/trueroad/dcbbfd4fe0dbdde547a8733922b4e248
Copyright (C) 2024 Masamichi Hosoda.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
SUCH DAMAGE.
"""
import os
import sys
from typing import Any, Final, Optional, Union
import pandas as pd
def week_str(d: pd.Timestamp) -> str:
"""日付からその日を含む ISO 週の文字列表記(YYYY-Www 表記)を返す."""
year: int
week: int
year, week, _ = d.isocalendar()
if year < 0 or year > 9999:
raise ValueError('Year is out of range.')
return f'{year:04}-W{week:02}'
class add_date_and_week:
"""Add date and week columns class."""
def __init__(self,
dtype: Optional[dict[str, type]] = None,
index_col: Optional[str] = None,
column_datetime: str = 'Datetime',
column_date: str = 'Date',
column_week: str = 'Week') -> None:
"""
__init__.
Args:
dtype (Optional[dict[str, type]]):
列名毎の型指定
index_col (Optional[str]):
インデックスにする列の名前
column_datetime (str):
日時を含む列の名前
column_date (str):
追加する日付の列の名前
column_week (str):
追加する ISO 週の列の名前
"""
self.dtype: Final[Optional[dict[str, type]]] = dtype
self.index_col: Final[Optional[str]] = index_col
self.column_datetime: Final[str] = column_datetime
self.column_date: Final[str] = column_date
self.column_week: Final[str] = column_week
def process_column(self, df: pd.DataFrame) -> None:
"""
列処理.
Args:
df (pd.DataFrame):
入出力データフレーム
"""
if self.column_date in df.columns or self.column_week in df.columns:
# 追加したい列名が既に存在している
raise RuntimeError('Column name to be added alreay exists.')
# 日付だけの列を追加(日へのキャストは仕様が不明なので避けた)
df[self.column_date] = df[self.column_datetime].map(
lambda x: pd.Timestamp(year=x.year,
month=x.month,
day=x.day))
# ISO 週番号の列を追加
df[self.column_week] = df[self.column_date].map(week_str)
def process_csv(self,
filename_in: Union[str, os.PathLike[str]],
filename_out: Union[str, os.PathLike[str]]) -> bool:
"""
CSV ファイルを処理する.
Args:
filename_in (Union[str, os.PathLike[str]]):
入力 CSV ファイル
filename_out (Union[str, os.PathLike[str]]):
出力 CSV ファイル
Returns:
bool: True なら成功、False なら失敗
"""
# データフレームへ読み込む
print(f'Loading: {filename_in} ...')
df: pd.DataFrame = \
pd.read_csv(filename_in,
index_col=self.index_col,
parse_dates=[self.column_datetime],
dtype=self.dtype) # type: ignore[arg-type]
# 処理する
self.process_column(df)
# 集計結果を出力
print(f'Writing: {filename_out} ...')
# UTF-8 BOM 付き CSVで出力(Excel で開けるように)
df.to_csv(filename_out, encoding='utf_8_sig',
index=(self.index_col is not None))
return True
class commandline():
"""Commandline option class."""
def __init__(self) -> None:
"""___init__."""
self.default_dtype: str = 'None'
self.default_index_col: Optional[str] = None
self.default_col_datetime: str = 'Datetime'
self.default_col_date: str = 'Date'
self.default_col_week: str = 'Week'
def parse(self) -> tuple[str, str, str, Optional[str], str, str, str]:
"""
コマンドラインをパースする.
Returns:
tuple:
str: 入力 CSV ファイル名
str: 出力 CSV ファイル名
Optional[str]: インデックスにする列の名前
str: 日時を含む列の名前
str: 追加する日付の列の名前
str: 追加する ISO 週の列の名前
"""
import argparse
parser: argparse.ArgumentParser = argparse.ArgumentParser()
parser.add_argument('INPUT.csv',
help='Input CSV filename',
type=str)
parser.add_argument('OUTPUT.csv',
help='Output CSV filename',
type=str)
parser.add_argument('--dtype',
help='Dtype dictionary',
type=str, default=self.default_dtype,
required=False)
parser.add_argument('--index-col',
help='Index column name',
type=str, default=self.default_index_col,
required=False)
parser.add_argument('--column-datetime',
help='Datetime column name',
type=str, default=self.default_col_datetime,
required=False)
parser.add_argument('--column-date',
help='Date column name to be added',
type=str, default=self.default_col_date,
required=False)
parser.add_argument('--column-week',
help='Week column name to be added',
type=str, default=self.default_col_week,
required=False)
args: argparse.Namespace = parser.parse_args()
vargs: dict[str, Any] = vars(args)
input_filename: str = vargs['INPUT.csv']
output_filename: str = vargs['OUTPUT.csv']
index_col: Optional[str] = vargs['index_col']
dtype: str = vargs['dtype']
col_datetime: str = vargs['column_datetime']
col_date: str = vargs['column_date']
col_week: str = vargs['column_week']
print('Filenames\n'
f' Input filename : {input_filename}\n'
f' Output filename : {output_filename}\n'
'Column types\n'
f' Dtype dictionary: {dtype}\n'
'Column names\n'
f' Index : {index_col}\n'
f' Datetime : {col_datetime}\n'
f' Date to be added: {col_date}\n'
f' Week to be added: {col_week}\n')
return (input_filename,
output_filename,
dtype,
index_col,
col_datetime,
col_date,
col_week)
def main() -> None:
"""Do main."""
print(f'Add date and week columns.\n\n'
'https://gist.github.com/trueroad/'
'dcbbfd4fe0dbdde547a8733922b4e248\n\n'
'Copyright (C) 2024 Masamichi Hosoda.\n'
'All rights reserved.\n')
cl: commandline = commandline()
input_filename: str
output_filename: str
dtype: str
index_col: Optional[str]
col_datetime: str
col_date: str
col_week: str
input_filename, output_filename, \
dtype, index_col, \
col_datetime, col_date, col_week = cl.parse()
adw: add_date_and_week = add_date_and_week(
dtype=eval(dtype),
index_col=index_col,
column_datetime=col_datetime,
column_date=col_date,
column_week=col_week)
adw.process_csv(input_filename, output_filename)
print('Done.')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment