Last active
June 23, 2022 18:15
-
-
Save zilunpeng/cce361a16c4ce3ab8239bd8f3056e172 to your computer and use it in GitHub Desktop.
example notebook for training GPT-Neo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Training GPT-Neo models with custom dataset\n", | |
"You can either run this notebook on JupyterLab, or run commands below in a terminal\n", | |
"\n", | |
"Required packages:\n", | |
"```\n", | |
"conda create --name myenv python=3.6\n", | |
"conda activate myenv\n", | |
"pip install torch==1.9.0\n", | |
"pip install datasets==1.10.2\n", | |
"git clone https://github.com/huggingface/transformers.git\n", | |
"git checkout tags/v4.9.1\n", | |
"cd transformers\n", | |
"pip install -e .\n", | |
"cd ..\n", | |
"```\n", | |
"\n", | |
"Additional commands if running on JupyterLab:\n", | |
"```\n", | |
"conda install jupyter\n", | |
"conda install nb_conda\n", | |
"conda install ipykernel\n", | |
"python -m ipykernel install --user --name myenv\n", | |
"```\n", | |
"\n", | |
"Recommended CPU instance: c5.12xlarge\n", | |
"\n", | |
"**Note:** This does not work on a GPU instance!" | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Create or upload your custom dataset\n", | |
"**Note:** when creating the training data file, put every data sample (a sequence of words) as a separate row and have only **one** column with the column name \"text\"" | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"source": [ | |
"%%writefile train_data.csv\n", | |
"text\n", | |
"a\n", | |
"b\n", | |
"c" | |
], | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Writing train_data.csv\n" | |
] | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Continue to pre-train!" | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"source": [ | |
"model = \"EleutherAI/gpt-neo-1.3B\" #@param [\"EleutherAI/gpt-neo-1.3B\", \"gpt2\"]\n", | |
"num_train_epochs = 3 #@param {type:\"integer\"}" | |
], | |
"outputs": [], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"source": [ | |
"!python transformers/examples/pytorch/language-modeling/run_clm.py \\\n", | |
" --model_name_or_path {model} \\\n", | |
" --train_file train_data.csv \\\n", | |
" --do_train --per_device_train_batch_size 1 \\\n", | |
" --output_dir tmp/test-clm \\\n", | |
" --overwrite_output_dir \\\n", | |
" --num_train_epochs {num_train_epochs}" | |
], | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"07/30/2021 20:47:37 - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False\n", | |
"07/30/2021 20:47:37 - INFO - __main__ - Training/evaluation parameters TrainingArguments(\n", | |
"_n_gpu=0,\n", | |
"adafactor=False,\n", | |
"adam_beta1=0.9,\n", | |
"adam_beta2=0.999,\n", | |
"adam_epsilon=1e-08,\n", | |
"dataloader_drop_last=False,\n", | |
"dataloader_num_workers=0,\n", | |
"dataloader_pin_memory=True,\n", | |
"ddp_find_unused_parameters=None,\n", | |
"debug=[],\n", | |
"deepspeed=None,\n", | |
"disable_tqdm=False,\n", | |
"do_eval=False,\n", | |
"do_predict=False,\n", | |
"do_train=True,\n", | |
"eval_accumulation_steps=None,\n", | |
"eval_steps=None,\n", | |
"evaluation_strategy=IntervalStrategy.NO,\n", | |
"fp16=False,\n", | |
"fp16_backend=auto,\n", | |
"fp16_full_eval=False,\n", | |
"fp16_opt_level=O1,\n", | |
"gradient_accumulation_steps=1,\n", | |
"greater_is_better=None,\n", | |
"group_by_length=False,\n", | |
"ignore_data_skip=False,\n", | |
"label_names=None,\n", | |
"label_smoothing_factor=0.0,\n", | |
"learning_rate=5e-05,\n", | |
"length_column_name=length,\n", | |
"load_best_model_at_end=False,\n", | |
"local_rank=-1,\n", | |
"log_level=-1,\n", | |
"log_level_replica=-1,\n", | |
"log_on_each_node=True,\n", | |
"logging_dir=tmp/test-clm/runs/Jul30_20-47-37_ip-172-31-54-251,\n", | |
"logging_first_step=False,\n", | |
"logging_steps=500,\n", | |
"logging_strategy=IntervalStrategy.STEPS,\n", | |
"lr_scheduler_type=SchedulerType.LINEAR,\n", | |
"max_grad_norm=1.0,\n", | |
"max_steps=-1,\n", | |
"metric_for_best_model=None,\n", | |
"mp_parameters=,\n", | |
"no_cuda=False,\n", | |
"num_train_epochs=3.0,\n", | |
"output_dir=tmp/test-clm,\n", | |
"overwrite_output_dir=True,\n", | |
"past_index=-1,\n", | |
"per_device_eval_batch_size=8,\n", | |
"per_device_train_batch_size=1,\n", | |
"prediction_loss_only=False,\n", | |
"push_to_hub=False,\n", | |
"push_to_hub_model_id=test-clm,\n", | |
"push_to_hub_organization=None,\n", | |
"push_to_hub_token=None,\n", | |
"remove_unused_columns=True,\n", | |
"report_to=[],\n", | |
"resume_from_checkpoint=None,\n", | |
"run_name=tmp/test-clm,\n", | |
"save_on_each_node=False,\n", | |
"save_steps=500,\n", | |
"save_strategy=IntervalStrategy.STEPS,\n", | |
"save_total_limit=None,\n", | |
"seed=42,\n", | |
"sharded_ddp=[],\n", | |
"skip_memory_metrics=True,\n", | |
"tpu_metrics_debug=False,\n", | |
"tpu_num_cores=None,\n", | |
"use_legacy_prediction_loop=False,\n", | |
"warmup_ratio=0.0,\n", | |
"warmup_steps=0,\n", | |
"weight_decay=0.0,\n", | |
")\n", | |
"07/30/2021 20:47:37 - WARNING - datasets.builder - Using custom data configuration default-822c539f89fcc9d9\n", | |
"07/30/2021 20:47:37 - INFO - datasets.builder - Generating dataset csv (/home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff)\n", | |
"Downloading and preparing dataset csv/default (download: Unknown size, generated: Unknown size, post-processed: Unknown size, total: Unknown size) to /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff...\n", | |
"100%|██████████████████████████████████████████| 1/1 [00:00<00:00, 11096.04it/s]\n", | |
"07/30/2021 20:47:37 - INFO - datasets.utils.download_manager - Downloading took 0.0 min\n", | |
"07/30/2021 20:47:37 - INFO - datasets.utils.download_manager - Checksum Computation took 0.0 min\n", | |
"100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 2157.56it/s]\n", | |
"07/30/2021 20:47:37 - INFO - datasets.utils.info_utils - Unable to verify checksums.\n", | |
"07/30/2021 20:47:37 - INFO - datasets.builder - Generating split train\n", | |
"07/30/2021 20:47:37 - INFO - datasets.utils.info_utils - Unable to verify splits sizes.\n", | |
"Dataset csv downloaded and prepared to /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff. Subsequent calls will reuse this data.\n", | |
"100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 1100.29it/s]\n", | |
"07/30/2021 20:47:37 - WARNING - datasets.builder - Using custom data configuration default-822c539f89fcc9d9\n", | |
"07/30/2021 20:47:37 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", | |
"07/30/2021 20:47:37 - INFO - datasets.info - Loading Dataset info from /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff\n", | |
"07/30/2021 20:47:37 - WARNING - datasets.builder - Reusing dataset csv (/home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff)\n", | |
"07/30/2021 20:47:37 - INFO - datasets.info - Loading Dataset info from /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff\n", | |
"07/30/2021 20:47:37 - WARNING - datasets.builder - Using custom data configuration default-822c539f89fcc9d9\n", | |
"07/30/2021 20:47:37 - INFO - datasets.builder - Overwrite dataset info from restored data version.\n", | |
"07/30/2021 20:47:37 - INFO - datasets.info - Loading Dataset info from /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff\n", | |
"07/30/2021 20:47:37 - WARNING - datasets.builder - Reusing dataset csv (/home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff)\n", | |
"07/30/2021 20:47:37 - INFO - datasets.info - Loading Dataset info from /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff\n", | |
"[INFO|configuration_utils.py:545] 2021-07-30 20:47:37,750 >> loading configuration file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/42252c2220ae3f9f1ea86a994b63e1dcab20953ba8982117c2384587f7c01c5d.102e6e06599c480a8e55be9ba8dc6226140c958f3cd489f61627520db6817595\n", | |
"[INFO|configuration_utils.py:581] 2021-07-30 20:47:37,751 >> Model config GPTNeoConfig {\n", | |
" \"activation_function\": \"gelu_new\",\n", | |
" \"architectures\": [\n", | |
" \"GPTNeoForCausalLM\"\n", | |
" ],\n", | |
" \"attention_dropout\": 0,\n", | |
" \"attention_layers\": [\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\"\n", | |
" ],\n", | |
" \"attention_types\": [\n", | |
" [\n", | |
" [\n", | |
" \"global\",\n", | |
" \"local\"\n", | |
" ],\n", | |
" 12\n", | |
" ]\n", | |
" ],\n", | |
" \"bos_token_id\": 50256,\n", | |
" \"embed_dropout\": 0,\n", | |
" \"eos_token_id\": 50256,\n", | |
" \"gradient_checkpointing\": false,\n", | |
" \"hidden_size\": 2048,\n", | |
" \"initializer_range\": 0.02,\n", | |
" \"intermediate_size\": null,\n", | |
" \"layer_norm_epsilon\": 1e-05,\n", | |
" \"max_position_embeddings\": 2048,\n", | |
" \"model_type\": \"gpt_neo\",\n", | |
" \"num_heads\": 16,\n", | |
" \"num_layers\": 24,\n", | |
" \"resid_dropout\": 0,\n", | |
" \"summary_activation\": null,\n", | |
" \"summary_first_dropout\": 0.1,\n", | |
" \"summary_proj_to_labels\": true,\n", | |
" \"summary_type\": \"cls_index\",\n", | |
" \"summary_use_proj\": true,\n", | |
" \"task_specific_params\": {\n", | |
" \"text-generation\": {\n", | |
" \"do_sample\": true,\n", | |
" \"max_length\": 50,\n", | |
" \"temperature\": 0.9\n", | |
" }\n", | |
" },\n", | |
" \"tokenizer_class\": \"GPT2Tokenizer\",\n", | |
" \"transformers_version\": \"4.9.1\",\n", | |
" \"use_cache\": true,\n", | |
" \"vocab_size\": 50257,\n", | |
" \"window_size\": 256\n", | |
"}\n", | |
"\n", | |
"[INFO|configuration_utils.py:545] 2021-07-30 20:47:37,789 >> loading configuration file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/42252c2220ae3f9f1ea86a994b63e1dcab20953ba8982117c2384587f7c01c5d.102e6e06599c480a8e55be9ba8dc6226140c958f3cd489f61627520db6817595\n", | |
"[INFO|configuration_utils.py:581] 2021-07-30 20:47:37,789 >> Model config GPTNeoConfig {\n", | |
" \"activation_function\": \"gelu_new\",\n", | |
" \"architectures\": [\n", | |
" \"GPTNeoForCausalLM\"\n", | |
" ],\n", | |
" \"attention_dropout\": 0,\n", | |
" \"attention_layers\": [\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\"\n", | |
" ],\n", | |
" \"attention_types\": [\n", | |
" [\n", | |
" [\n", | |
" \"global\",\n", | |
" \"local\"\n", | |
" ],\n", | |
" 12\n", | |
" ]\n", | |
" ],\n", | |
" \"bos_token_id\": 50256,\n", | |
" \"embed_dropout\": 0,\n", | |
" \"eos_token_id\": 50256,\n", | |
" \"gradient_checkpointing\": false,\n", | |
" \"hidden_size\": 2048,\n", | |
" \"initializer_range\": 0.02,\n", | |
" \"intermediate_size\": null,\n", | |
" \"layer_norm_epsilon\": 1e-05,\n", | |
" \"max_position_embeddings\": 2048,\n", | |
" \"model_type\": \"gpt_neo\",\n", | |
" \"num_heads\": 16,\n", | |
" \"num_layers\": 24,\n", | |
" \"resid_dropout\": 0,\n", | |
" \"summary_activation\": null,\n", | |
" \"summary_first_dropout\": 0.1,\n", | |
" \"summary_proj_to_labels\": true,\n", | |
" \"summary_type\": \"cls_index\",\n", | |
" \"summary_use_proj\": true,\n", | |
" \"task_specific_params\": {\n", | |
" \"text-generation\": {\n", | |
" \"do_sample\": true,\n", | |
" \"max_length\": 50,\n", | |
" \"temperature\": 0.9\n", | |
" }\n", | |
" },\n", | |
" \"tokenizer_class\": \"GPT2Tokenizer\",\n", | |
" \"transformers_version\": \"4.9.1\",\n", | |
" \"use_cache\": true,\n", | |
" \"vocab_size\": 50257,\n", | |
" \"window_size\": 256\n", | |
"}\n", | |
"\n", | |
"[INFO|tokenization_utils_base.py:1730] 2021-07-30 20:47:37,923 >> loading file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/vocab.json from cache at /home/ubuntu/.cache/huggingface/transformers/6111bc9bbed617156dc5c0b9fa9d6793147619aad08053f03b3697f1a5027973.a1b97b074a5ac71fad0544c8abc1b3581803d73832476184bde6cff06a67b6bb\n", | |
"[INFO|tokenization_utils_base.py:1730] 2021-07-30 20:47:37,923 >> loading file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/merges.txt from cache at /home/ubuntu/.cache/huggingface/transformers/ec80888cdc98108f625f7ec7a29ec449eb361ae1325aa1e7e63006ce962c071c.f5b91da9e34259b8f4d88dbc97c740667a0e8430b96314460cdb04e86d4fc435\n", | |
"[INFO|tokenization_utils_base.py:1730] 2021-07-30 20:47:37,923 >> loading file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/tokenizer.json from cache at None\n", | |
"[INFO|tokenization_utils_base.py:1730] 2021-07-30 20:47:37,923 >> loading file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/added_tokens.json from cache at None\n", | |
"[INFO|tokenization_utils_base.py:1730] 2021-07-30 20:47:37,923 >> loading file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/special_tokens_map.json from cache at /home/ubuntu/.cache/huggingface/transformers/1ae5a53fe395100a9213705940d92cc94554a2269777c062d951d1b710c39bb8.3ae9ae72462581d20e36bc528e9c47bb30cd671bb21add40ca0b24a0be9fac22\n", | |
"[INFO|tokenization_utils_base.py:1730] 2021-07-30 20:47:37,923 >> loading file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/tokenizer_config.json from cache at /home/ubuntu/.cache/huggingface/transformers/5fe35a59019a6fb05bfa29a31b59d407cd81ae59da93e6953772a783b740b4c0.c31b6b7d3225be0c43bc0f8e5d84d03a8b49fdb6b9f6009bbfff1f9cc5ec18bc\n", | |
"[INFO|configuration_utils.py:545] 2021-07-30 20:47:37,941 >> loading configuration file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/42252c2220ae3f9f1ea86a994b63e1dcab20953ba8982117c2384587f7c01c5d.102e6e06599c480a8e55be9ba8dc6226140c958f3cd489f61627520db6817595\n", | |
"[INFO|configuration_utils.py:581] 2021-07-30 20:47:37,941 >> Model config GPTNeoConfig {\n", | |
" \"activation_function\": \"gelu_new\",\n", | |
" \"architectures\": [\n", | |
" \"GPTNeoForCausalLM\"\n", | |
" ],\n", | |
" \"attention_dropout\": 0,\n", | |
" \"attention_layers\": [\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\"\n", | |
" ],\n", | |
" \"attention_types\": [\n", | |
" [\n", | |
" [\n", | |
" \"global\",\n", | |
" \"local\"\n", | |
" ],\n", | |
" 12\n", | |
" ]\n", | |
" ],\n", | |
" \"bos_token_id\": 50256,\n", | |
" \"embed_dropout\": 0,\n", | |
" \"eos_token_id\": 50256,\n", | |
" \"gradient_checkpointing\": false,\n", | |
" \"hidden_size\": 2048,\n", | |
" \"initializer_range\": 0.02,\n", | |
" \"intermediate_size\": null,\n", | |
" \"layer_norm_epsilon\": 1e-05,\n", | |
" \"max_position_embeddings\": 2048,\n", | |
" \"model_type\": \"gpt_neo\",\n", | |
" \"num_heads\": 16,\n", | |
" \"num_layers\": 24,\n", | |
" \"resid_dropout\": 0,\n", | |
" \"summary_activation\": null,\n", | |
" \"summary_first_dropout\": 0.1,\n", | |
" \"summary_proj_to_labels\": true,\n", | |
" \"summary_type\": \"cls_index\",\n", | |
" \"summary_use_proj\": true,\n", | |
" \"task_specific_params\": {\n", | |
" \"text-generation\": {\n", | |
" \"do_sample\": true,\n", | |
" \"max_length\": 50,\n", | |
" \"temperature\": 0.9\n", | |
" }\n", | |
" },\n", | |
" \"tokenizer_class\": \"GPT2Tokenizer\",\n", | |
" \"transformers_version\": \"4.9.1\",\n", | |
" \"use_cache\": true,\n", | |
" \"vocab_size\": 50257,\n", | |
" \"window_size\": 256\n", | |
"}\n", | |
"\n", | |
"[INFO|configuration_utils.py:545] 2021-07-30 20:47:38,074 >> loading configuration file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/config.json from cache at /home/ubuntu/.cache/huggingface/transformers/42252c2220ae3f9f1ea86a994b63e1dcab20953ba8982117c2384587f7c01c5d.102e6e06599c480a8e55be9ba8dc6226140c958f3cd489f61627520db6817595\n", | |
"[INFO|configuration_utils.py:581] 2021-07-30 20:47:38,075 >> Model config GPTNeoConfig {\n", | |
" \"activation_function\": \"gelu_new\",\n", | |
" \"architectures\": [\n", | |
" \"GPTNeoForCausalLM\"\n", | |
" ],\n", | |
" \"attention_dropout\": 0,\n", | |
" \"attention_layers\": [\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\",\n", | |
" \"global\",\n", | |
" \"local\"\n", | |
" ],\n", | |
" \"attention_types\": [\n", | |
" [\n", | |
" [\n", | |
" \"global\",\n", | |
" \"local\"\n", | |
" ],\n", | |
" 12\n", | |
" ]\n", | |
" ],\n", | |
" \"bos_token_id\": 50256,\n", | |
" \"embed_dropout\": 0,\n", | |
" \"eos_token_id\": 50256,\n", | |
" \"gradient_checkpointing\": false,\n", | |
" \"hidden_size\": 2048,\n", | |
" \"initializer_range\": 0.02,\n", | |
" \"intermediate_size\": null,\n", | |
" \"layer_norm_epsilon\": 1e-05,\n", | |
" \"max_position_embeddings\": 2048,\n", | |
" \"model_type\": \"gpt_neo\",\n", | |
" \"num_heads\": 16,\n", | |
" \"num_layers\": 24,\n", | |
" \"resid_dropout\": 0,\n", | |
" \"summary_activation\": null,\n", | |
" \"summary_first_dropout\": 0.1,\n", | |
" \"summary_proj_to_labels\": true,\n", | |
" \"summary_type\": \"cls_index\",\n", | |
" \"summary_use_proj\": true,\n", | |
" \"task_specific_params\": {\n", | |
" \"text-generation\": {\n", | |
" \"do_sample\": true,\n", | |
" \"max_length\": 50,\n", | |
" \"temperature\": 0.9\n", | |
" }\n", | |
" },\n", | |
" \"tokenizer_class\": \"GPT2Tokenizer\",\n", | |
" \"transformers_version\": \"4.9.1\",\n", | |
" \"use_cache\": true,\n", | |
" \"vocab_size\": 50257,\n", | |
" \"window_size\": 256\n", | |
"}\n", | |
"\n", | |
"[INFO|modeling_utils.py:1271] 2021-07-30 20:47:38,143 >> loading weights file https://huggingface.co/EleutherAI/gpt-neo-1.3B/resolve/main/pytorch_model.bin from cache at /home/ubuntu/.cache/huggingface/transformers/7c5fac9d60b015cbc7c007ab8fe6d0512787fbaef81968922959898c49468d73.4c6a483fbfb5a25ac384bfcd71a1ff15245f06583a00c4ab4c44ed0f761f0b08\n", | |
"[INFO|modeling_utils.py:1510] 2021-07-30 20:48:13,532 >> All model checkpoint weights were used when initializing GPTNeoForCausalLM.\n", | |
"\n", | |
"[INFO|modeling_utils.py:1519] 2021-07-30 20:48:13,532 >> All the weights of GPTNeoForCausalLM were initialized from the model checkpoint at EleutherAI/gpt-neo-1.3B.\n", | |
"If your task is similar to the task the model of the checkpoint was trained on, you can already use GPTNeoForCausalLM for predictions without further training.\n", | |
"Running tokenizer on dataset: 0%| | 0/1 [00:00<?, ?ba/s]07/30/2021 20:48:13 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff/cache-2a805ec44d0b9072.arrow\n", | |
"Running tokenizer on dataset: 100%|██████████████| 1/1 [00:00<00:00, 383.43ba/s]\n", | |
"07/30/2021 20:48:13 - WARNING - __main__ - The tokenizer picked seems to have a very large `model_max_length` (2048). Picking 1024 instead. You can change that default value by passing --block_size xxx.\n", | |
"Grouping texts in chunks of 1024: 0%| | 0/1 [00:00<?, ?ba/s]07/30/2021 20:48:13 - INFO - datasets.arrow_dataset - Caching processed dataset at /home/ubuntu/.cache/huggingface/datasets/csv/default-822c539f89fcc9d9/0.0.0/9144e0a4e8435090117cea53e6c7537173ef2304525df4a077c435d8ee7828ff/cache-e1c89928ca5bb061.arrow\n", | |
"Grouping texts in chunks of 1024: 100%|█████████| 1/1 [00:00<00:00, 1101.73ba/s]\n", | |
"[INFO|trainer.py:1164] 2021-07-30 20:48:13,598 >> ***** Running training *****\n", | |
"[INFO|trainer.py:1165] 2021-07-30 20:48:13,598 >> Num examples = 1\n", | |
"[INFO|trainer.py:1166] 2021-07-30 20:48:13,598 >> Num Epochs = 3\n", | |
"[INFO|trainer.py:1167] 2021-07-30 20:48:13,598 >> Instantaneous batch size per device = 1\n", | |
"[INFO|trainer.py:1168] 2021-07-30 20:48:13,598 >> Total train batch size (w. parallel, distributed & accumulation) = 1\n", | |
"[INFO|trainer.py:1169] 2021-07-30 20:48:13,598 >> Gradient Accumulation steps = 1\n", | |
"[INFO|trainer.py:1170] 2021-07-30 20:48:13,598 >> Total optimization steps = 3\n", | |
"100%|█████████████████████████████████████████████| 3/3 [00:09<00:00, 3.09s/it][INFO|trainer.py:1360] 2021-07-30 20:48:23,151 >> \n", | |
"\n", | |
"Training completed. Do not forget to share your model on huggingface.co/models =)\n", | |
"\n", | |
"\n", | |
"{'train_runtime': 9.5525, 'train_samples_per_second': 0.314, 'train_steps_per_second': 0.314, 'train_loss': 4.490866978963216, 'epoch': 3.0}\n", | |
"100%|█████████████████████████████████████████████| 3/3 [00:09<00:00, 3.18s/it]\n", | |
"[INFO|trainer.py:1919] 2021-07-30 20:48:23,151 >> Saving model checkpoint to tmp/test-clm\n", | |
"[INFO|configuration_utils.py:379] 2021-07-30 20:48:23,152 >> Configuration saved in tmp/test-clm/config.json\n", | |
"[INFO|modeling_utils.py:997] 2021-07-30 20:48:36,143 >> Model weights saved in tmp/test-clm/pytorch_model.bin\n", | |
"[INFO|tokenization_utils_base.py:2006] 2021-07-30 20:48:36,143 >> tokenizer config file saved in tmp/test-clm/tokenizer_config.json\n", | |
"[INFO|tokenization_utils_base.py:2012] 2021-07-30 20:48:36,143 >> Special tokens file saved in tmp/test-clm/special_tokens_map.json\n", | |
"***** train metrics *****\n", | |
" epoch = 3.0\n", | |
" train_loss = 4.4909\n", | |
" train_runtime = 0:00:09.55\n", | |
" train_samples = 1\n", | |
" train_samples_per_second = 0.314\n", | |
" train_steps_per_second = 0.314\n" | |
] | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Use the trained model for generation" | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"source": [ | |
"!git clone https://gist.github.com/zilunpeng/63358af14fefd035285ce7d09e6b5638" | |
], | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Cloning into '63358af14fefd035285ce7d09e6b5638'...\n", | |
"remote: Enumerating objects: 3, done.\u001b[K\n", | |
"remote: Total 3 (delta 0), reused 0 (delta 0), pack-reused 3\u001b[K\n", | |
"Unpacking objects: 100% (3/3), done.\n" | |
] | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"source": [ | |
"%%writefile testing_prompt.txt\n", | |
"hi" | |
], | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Writing testing_prompt.txt\n" | |
] | |
} | |
], | |
"metadata": {} | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"source": [ | |
"!python 63358af14fefd035285ce7d09e6b5638/run_generation.py \\\n", | |
" --model_type gpt-neo \\\n", | |
" --model_name_or_path tmp/test-clm \\\n", | |
" --seed 32 --prompt_path testing_prompt.txt \\\n", | |
" --length 30" | |
], | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"07/30/2021 21:02:03 - WARNING - __main__ - device: cpu, n_gpu: 0, 16-bits training: False\n", | |
"07/30/2021 21:02:15 - INFO - __main__ - Namespace(device=device(type='cpu'), fp16=False, k=0, length=30, model_name_or_path='tmp/test-clm', model_type='gpt-neo', n_gpu=0, no_cuda=False, num_return_sequences=1, p=0.9, padding_text='', prefix='', prompt='', prompt_path='testing_prompt.txt', repetition_penalty=1.0, seed=32, stop_token=None, temperature=1.0, xlm_language='')\n", | |
"Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n", | |
"=== GENERATED SEQUENCE 1 ===\n", | |
"hibcol\n", | |
"This happened at an event when some of you folks were also there. It is a\n", | |
"great way to learn about it. If\n" | |
] | |
} | |
], | |
"metadata": {} | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3.6.13 64-bit ('train_gpt_neo_fresh': conda)" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.13" | |
}, | |
"interpreter": { | |
"hash": "3941a2565f46efd9744bf9b8a7fccfd29a4b2a0ba5ab610e281a799a536bf67d" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment