Created
February 21, 2024 04:32
-
-
Save uchidama/c25b2373749dc0eac2be0ab1be9ee6f2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# 参考URL: | |
# https://note.com/oriki111/n/n49ae98873a98?sub_rt=share_h | |
# 実行コマンド。実行時間のログをテキストに書いておく | |
# python3 mc4_load.py | tee mc4_load.txt | |
# 仮想環境の作成 | |
# python3.12 -m venv myenv | |
# 仮想環境をアクティベート | |
# source myenv/bin/activate | |
# datasets と transformers パッケージをインストール | |
# pip install datasets transformers | |
# 使用後は仮想環境をデアクティベート | |
# deactivate | |
# | |
# datetime モジュールをインポート | |
from datetime import datetime | |
from datasets import load_dataset | |
def get_current_datetime_formatted(): | |
# 現在の日時を取得 | |
current_datetime = datetime.now() | |
# 日時を「年-月-日 時:分:秒」の形式に整形 | |
formatted_datetime = current_datetime.strftime('%Y-%m-%d %H:%M:%S') | |
# 整形した日時を表示 | |
return formatted_datetime | |
print("start:" + get_current_datetime_formatted()) | |
dataset = load_dataset('mc4', 'ja', cache_dir="/Volumes/DataSets/DataSets", split='train') | |
print("end:" + get_current_datetime_formatted()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment