Last active
August 17, 2024 19:51
-
-
Save relyt0925/0c4ec82147cd37b82177df7235e12a99 to your computer and use it in GitHub Desktop.
aaaa
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[root@tyler-a100 instructlab]# /root/bin/ilab-sdg.sh data generate --pipeline /var/mnt/inststg1/instructlab/sdg-config/pipelines/agentic/ --taxonomy-path /var/mnt/inststg1/instructlab/taxonomy/ --taxonomy-base empty --endpoint-url http://127.0.0.1:8080/v1 --model-family mixtral --sdg-scale-factor 30 --model /var/mnt/inststg1/instructlab/models/mistralai/Mixtral-8x7B-Instruct-v0.1 --output-dir /var/mnt/inststg1/instructlab/generated/ --tls-insecure | |
INFO 2024-08-17 15:41:56,393 numexpr.utils:145: Note: detected 80 virtual cores but NumExpr set to maximum of 64, check "NUMEXPR_MAX_THREADS" environment variable. | |
INFO 2024-08-17 15:41:56,393 numexpr.utils:148: Note: NumExpr detected 80 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16. | |
INFO 2024-08-17 15:41:56,393 numexpr.utils:161: NumExpr defaulting to 16 threads. | |
INFO 2024-08-17 15:41:57,112 datasets:58: PyTorch version 2.3.1 available. | |
Generating synthetic data using '/var/mnt/inststg1/instructlab/sdg-config/pipelines/agentic/' pipeline, '/var/mnt/inststg1/instructlab/models/mistralai/Mixtral-8x7B-Instruct-v0.1' model, '/var/mnt/inststg1/instructlab/taxonomy/' taxonomy, against http://127.0.0.1:8080/v1 server | |
INFO 2024-08-17 15:42:00,154 instructlab.sdg:375: Synthesizing new instructions. If you aren't satisfied with the generated instructions, interrupt training (Ctrl-C) and try adjusting your YAML files. Adding more examples may help. | |
Generating train split: 553 examples [00:00, 10769.91 examples/s] | |
INFO 2024-08-17 15:42:05,622 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/knowledge_compliance_personally-identifiable-information, with 553 rows | |
INFO 2024-08-17 15:42:05,642 instructlab.sdg.checkpointing:68: Found 13 missing rows in the dataset | |
INFO 2024-08-17 15:42:05,642 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:42:05,728 instructlab.sdg.llmblock:51: LLM server supports batched inputs: True | |
INFO 2024-08-17 15:42:05,728 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:42:05,729 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:42:05,730 instructlab.sdg.llmblock:51: LLM server supports batched inputs: True | |
INFO 2024-08-17 15:42:05,734 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:42:05,734 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3'], | |
num_rows: 5 | |
}) | |
INFO 2024-08-17 15:42:06,735 instructlab.sdg.pipeline:197: Running block: SetClassifierValue | |
INFO 2024-08-17 15:42:06,736 instructlab.sdg.pipeline:197: Running block: SetClassifierValue | |
INFO 2024-08-17 15:42:06,736 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route'], | |
num_rows: 5 | |
}) | |
INFO 2024-08-17 15:42:06,736 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:42:06,749 instructlab.sdg.pipeline:197: Running block: duplicate_document_col | |
INFO 2024-08-17 15:42:06,754 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:42:06,756 instructlab.sdg.pipeline:197: Running block: duplicate_document_col | |
INFO 2024-08-17 15:42:06,756 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route'], | |
num_rows: 5 | |
}) | |
INFO 2024-08-17 15:42:06,762 instructlab.sdg.pipeline:197: Running block: gen_detailed_summary | |
INFO 2024-08-17 15:42:06,762 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'base_document'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:42:06,767 instructlab.sdg.pipeline:197: Running block: gen_detailed_summary | |
INFO 2024-08-17 15:42:06,767 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'base_document'], | |
num_rows: 5 | |
}) | |
INFO 2024-08-17 15:42:17,085 instructlab.sdg.pipeline:197: Running block: gen_atomic_facts | |
INFO 2024-08-17 15:42:17,085 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'base_document', 'summary_detailed'], | |
num_rows: 5 | |
}) | |
INFO 2024-08-17 15:42:18,228 instructlab.sdg.pipeline:197: Running block: gen_atomic_facts | |
INFO 2024-08-17 15:42:18,228 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'base_document', 'summary_detailed'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:42:28,213 instructlab.sdg.pipeline:197: Running block: gen_extractive_summary | |
INFO 2024-08-17 15:42:28,213 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'base_document', 'summary_detailed', 'summary_atomic_facts'], | |
num_rows: 5 | |
}) | |
INFO 2024-08-17 15:42:30,266 instructlab.sdg.pipeline:197: Running block: gen_extractive_summary | |
INFO 2024-08-17 15:42:30,266 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'base_document', 'summary_detailed', 'summary_atomic_facts'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:42:37,034 instructlab.sdg.pipeline:197: Running block: flatten_summary_columns | |
INFO 2024-08-17 15:42:37,034 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'base_document', 'summary_detailed', 'summary_atomic_facts', 'summary_extractive'], | |
num_rows: 5 | |
}) | |
INFO 2024-08-17 15:42:37,051 instructlab.sdg.pipeline:197: Running block: rename_to_document_column | |
INFO 2024-08-17 15:42:37,051 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'summary'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 15:42:37,059 instructlab.sdg.pipeline:197: Running block: knowledge generation | |
INFO 2024-08-17 15:42:37,059 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 15:42:39,966 instructlab.sdg.pipeline:197: Running block: flatten_summary_columns | |
INFO 2024-08-17 15:42:39,966 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'base_document', 'summary_detailed', 'summary_atomic_facts', 'summary_extractive'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:42:39,981 instructlab.sdg.pipeline:197: Running block: rename_to_document_column | |
INFO 2024-08-17 15:42:39,981 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'summary'], | |
num_rows: 32 | |
}) | |
INFO 2024-08-17 15:42:39,988 instructlab.sdg.pipeline:197: Running block: knowledge generation | |
INFO 2024-08-17 15:42:39,988 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document'], | |
num_rows: 32 | |
}) | |
INFO 2024-08-17 15:42:55,797 instructlab.sdg.pipeline:197: Running block: eval_faithfulness_qa_pair | |
INFO 2024-08-17 15:42:55,797 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document', 'question', 'response'], | |
num_rows: 39 | |
}) | |
INFO 2024-08-17 15:43:00,436 instructlab.sdg.pipeline:197: Running block: eval_faithfulness_qa_pair | |
INFO 2024-08-17 15:43:00,436 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document', 'question', 'response'], | |
num_rows: 98 | |
}) | |
INFO 2024-08-17 15:43:11,748 instructlab.sdg.pipeline:197: Running block: filter_faithfulness | |
INFO 2024-08-17 15:43:11,748 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document', 'question', 'response', 'explanation', 'judgment'], | |
num_rows: 39 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:00<00:00, 170.35 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 39/39 [00:00<00:00, 223.60 examples/s] | |
INFO 2024-08-17 15:43:12,346 instructlab.sdg.pipeline:197: Running block: eval_relevancy_qa_pair | |
INFO 2024-08-17 15:43:12,347 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document', 'question', 'response'], | |
num_rows: 33 | |
}) | |
INFO 2024-08-17 15:43:16,050 instructlab.sdg.pipeline:197: Running block: filter_faithfulness | |
INFO 2024-08-17 15:43:16,050 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document', 'question', 'response', 'explanation', 'judgment'], | |
num_rows: 96 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 96/96 [00:00<00:00, 361.66 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 96/96 [00:00<00:00, 501.16 examples/s] | |
INFO 2024-08-17 15:43:16,722 instructlab.sdg.pipeline:197: Running block: eval_relevancy_qa_pair | |
INFO 2024-08-17 15:43:16,722 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document', 'question', 'response'], | |
num_rows: 81 | |
}) | |
INFO 2024-08-17 15:43:22,077 instructlab.sdg.pipeline:197: Running block: filter_relevancy | |
INFO 2024-08-17 15:43:22,077 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document', 'question', 'response', 'feedback', 'score'], | |
num_rows: 33 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:00<00:00, 140.83 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:00<00:00, 184.99 examples/s] | |
INFO 2024-08-17 15:43:22,689 instructlab.sdg.pipeline:197: Running block: eval_verify_question | |
INFO 2024-08-17 15:43:22,690 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document', 'question', 'response'], | |
num_rows: 32 | |
}) | |
INFO 2024-08-17 15:43:26,917 instructlab.sdg.pipeline:197: Running block: filter_relevancy | |
INFO 2024-08-17 15:43:26,917 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document', 'question', 'response', 'feedback', 'score'], | |
num_rows: 81 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 81/81 [00:00<00:00, 353.23 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 81/81 [00:00<00:00, 458.71 examples/s] | |
INFO 2024-08-17 15:43:27,543 instructlab.sdg.pipeline:197: Running block: eval_verify_question | |
INFO 2024-08-17 15:43:27,543 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document', 'question', 'response'], | |
num_rows: 75 | |
}) | |
INFO 2024-08-17 15:43:30,496 instructlab.sdg.pipeline:197: Running block: filter_verify_question | |
INFO 2024-08-17 15:43:30,496 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document', 'question', 'response', 'explanation', 'rating'], | |
num_rows: 32 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 151.66 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 179.35 examples/s] | |
INFO 2024-08-17 15:43:34,978 instructlab.sdg.pipeline:197: Running block: filter_verify_question | |
INFO 2024-08-17 15:43:34,978 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'raw_document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3', 'route', 'dataset_type', 'document', 'question', 'response', 'explanation', 'rating'], | |
num_rows: 75 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 75/75 [00:00<00:00, 314.18 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 75/75 [00:00<00:00, 411.74 examples/s] | |
INFO 2024-08-17 15:43:35,599 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/knowledge_compliance_personally-identifiable-information/data_checkpoint_6c7f3d1396764f48b388d0206f5d97de.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 109.51ba/s] | |
INFO 2024-08-17 15:43:35,614 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/knowledge_compliance_personally-identifiable-information/data_checkpoint_5a26dea2721d4f13ac1f65bb7c252801.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 306.98ba/s] | |
INFO 2024-08-17 15:43:35,630 instructlab.sdg:410: Generated 1 samples | |
INFO 2024-08-17 15:43:35,630 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:43:35,639 instructlab.sdg.pipeline:197: Running block: gen_mmlu_knowledge | |
INFO 2024-08-17 15:43:35,639 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:43:35,642 instructlab.sdg.pipeline:197: Running block: gen_mmlu_knowledge | |
INFO 2024-08-17 15:43:35,642 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:43:35,644 instructlab.sdg.pipeline:197: Running block: gen_mmlu_knowledge | |
INFO 2024-08-17 15:43:35,649 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:43:35,656 instructlab.sdg.pipeline:197: Running block: gen_mmlu_knowledge | |
INFO 2024-08-17 15:43:35,657 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['icl_document', 'document', 'document_outline', 'domain', 'icl_query_1', 'icl_query_2', 'icl_query_3', 'icl_response_1', 'icl_response_2', 'icl_response_3'], | |
num_rows: 6 | |
}) | |
Filter: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 109/109 [00:00<00:00, 19210.82 examples/s] | |
Filter: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 109/109 [00:00<00:00, 14669.63 examples/s] | |
Flattening the indices: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 109/109 [00:00<00:00, 18952.79 examples/s] | |
Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 109/109 [00:00<00:00, 6670.35 examples/s] | |
Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 109/109 [00:00<00:00, 6366.42 examples/s] | |
Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 109/109 [00:00<00:00, 6288.31 examples/s] | |
Filter: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 109/109 [00:00<00:00, 22221.21 examples/s] | |
Filter: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 109/109 [00:00<00:00, 13471.01 examples/s] | |
Filter: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 108/108 [00:00<00:00, 13575.83 examples/s] | |
Flattening the indices: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 108/108 [00:00<00:00, 16544.97 examples/s] | |
Casting to class labels: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 108/108 [00:00<00:00, 6842.98 examples/s] | |
INFO 2024-08-17 15:43:57,736 instructlab.sdg.eval_data:126: Saving MMLU Dataset /var/mnt/inststg1/instructlab/generated//node_datasets_2024-08-17T15_42_00/mmlubench_knowledge_compliance_personally-identifiable-information.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 191.15ba/s] | |
INFO 2024-08-17 15:43:57,743 instructlab.sdg.eval_data:130: Saving MMLU Task yaml /var/mnt/inststg1/instructlab/generated//node_datasets_2024-08-17T15_42_00/knowledge_compliance_personally-identifiable-information_task.yaml | |
Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 659/659 [00:00<00:00, 3944.60 examples/s] | |
Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 659/659 [00:00<00:00, 13652.24 examples/s] | |
Filter: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 659/659 [00:00<00:00, 18367.46 examples/s] | |
Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 108/108 [00:00<00:00, 7352.10 examples/s] | |
Map: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 108/108 [00:00<00:00, 10429.51 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 19.45ba/s] | |
Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 659/659 [00:00<00:00, 4159.84 examples/s] | |
Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 659/659 [00:00<00:00, 5128.00 examples/s] | |
Map: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 108/108 [00:00<00:00, 7562.22 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 11.60ba/s] | |
INFO 2024-08-17 15:43:58,633 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_general_tables_editing_add_remove, with 12 rows | |
INFO 2024-08-17 15:43:58,639 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:43:58,640 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:43:58,643 instructlab.sdg:410: Generated 2 samples | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 559.69ba/s] | |
INFO 2024-08-17 15:43:58,742 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_general_tables_editing_combining_altering, with 13 rows | |
INFO 2024-08-17 15:43:58,749 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:43:58,749 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:43:58,751 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 15:43:58,751 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:43:59,596 instructlab.sdg:410: Generated 3 samples | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 440.07ba/s] | |
INFO 2024-08-17 15:43:59,702 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_general_tables_empty, with 31 rows | |
INFO 2024-08-17 15:43:59,709 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:43:59,709 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:43:59,712 instructlab.sdg:410: Generated 4 samples | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 297.62ba/s] | |
INFO 2024-08-17 15:43:59,817 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_abstractive_title, with 21 rows | |
INFO 2024-08-17 15:43:59,823 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:43:59,824 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:43:59,827 instructlab.sdg:410: Generated 5 samples | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 268.35ba/s] | |
INFO 2024-08-17 15:43:59,935 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_abstractive_abstract, with 29 rows | |
INFO 2024-08-17 15:43:59,941 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:43:59,941 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:43:59,945 instructlab.sdg:410: Generated 6 samples | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 150.90ba/s] | |
INFO 2024-08-17 15:44:00,052 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_abstractive_outline, with 19 rows | |
INFO 2024-08-17 15:44:00,059 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:44:00,059 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:44:00,063 instructlab.sdg:410: Generated 7 samples | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 237.14ba/s] | |
INFO 2024-08-17 15:44:00,174 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_abstractive_main_takeaway, with 20 rows | |
INFO 2024-08-17 15:44:00,180 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:44:00,180 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:44:00,184 instructlab.sdg:410: Generated 8 samples | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 253.36ba/s] | |
INFO 2024-08-17 15:44:00,291 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_abstractive_key_points, with 26 rows | |
INFO 2024-08-17 15:44:00,298 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:44:00,298 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:44:00,301 instructlab.sdg:410: Generated 9 samples | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 188.28ba/s] | |
INFO 2024-08-17 15:44:00,403 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_services_agreement_bullet_points, with 75 rows | |
INFO 2024-08-17 15:44:00,410 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:44:00,410 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:44:00,413 instructlab.sdg:410: Generated 10 samples | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 106.12ba/s] | |
INFO 2024-08-17 15:44:00,523 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_services_agreement_plain_text, with 153 rows | |
INFO 2024-08-17 15:44:00,530 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:44:00,530 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:44:00,534 instructlab.sdg:410: Generated 11 samples | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 44.75ba/s] | |
INFO 2024-08-17 15:44:00,659 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_services_agreement_reasoning, with 140 rows | |
INFO 2024-08-17 15:44:00,667 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:44:00,667 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:44:00,670 instructlab.sdg:410: Generated 12 samples | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 54.98ba/s] | |
INFO 2024-08-17 15:44:00,795 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_inference_qualitative_sentiment, with 7 rows | |
INFO 2024-08-17 15:44:00,801 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:44:00,802 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:44:00,805 instructlab.sdg:410: Generated 13 samples | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 15:44:00,805 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 667.35ba/s] | |
INFO 2024-08-17 15:44:00,909 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_inference_quantitative_asciidoc_tables, with 50 rows | |
INFO 2024-08-17 15:44:00,915 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:44:00,915 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:44:00,918 instructlab.sdg:410: Generated 14 samples | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 262.70ba/s] | |
INFO 2024-08-17 15:44:01,019 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_inference_quantitative_table_analaysis, with 1 rows | |
INFO 2024-08-17 15:44:01,025 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:44:01,025 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:44:01,029 instructlab.sdg:410: Generated 15 samples | |
num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
WARNING 2024-08-17 15:44:01,029 datasets.arrow_dataset:3092: num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
Creating json from Arrow format: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 1038.97ba/s] | |
Generating train split: 51 examples [00:00, 7971.29 examples/s] | |
INFO 2024-08-17 15:44:01,106 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_invoice_bullet_points, with 51 rows | |
INFO 2024-08-17 15:44:01,113 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:44:01,113 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:44:01,117 instructlab.sdg:410: Generated 16 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 51/51 [00:00<00:00, 379.35 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 116.20ba/s] | |
Generating train split: 40 examples [00:00, 8265.45 examples/s] | |
INFO 2024-08-17 15:44:01,447 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_invoice_markdown, with 40 rows | |
INFO 2024-08-17 15:44:01,456 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:44:01,456 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:44:01,459 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:44:01,459 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:44:10,112 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 15:44:10,112 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 15:44:12,739 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 15:44:12,739 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:44:16,876 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 15:44:16,876 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 23 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 137.52 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 156.56 examples/s] | |
INFO 2024-08-17 15:44:17,384 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 15:44:17,384 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 15:44:21,760 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 15:44:21,760 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 21 | |
}) | |
INFO 2024-08-17 15:44:25,054 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 15:44:25,054 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 21 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 119.21 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 140.55 examples/s] | |
INFO 2024-08-17 15:44:25,568 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 15:44:25,568 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 21 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 68.41 examples/s] | |
INFO 2024-08-17 15:44:25,973 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:44:25,973 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 21 | |
}) | |
INFO 2024-08-17 15:44:26,832 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 15:44:26,832 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 21 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 102.95 examples/s] | |
INFO 2024-08-17 15:44:27,139 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 15:44:27,139 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 21 | |
}) | |
INFO 2024-08-17 15:44:34,644 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 15:44:34,644 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 21 | |
}) | |
INFO 2024-08-17 15:44:43,414 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 15:44:43,414 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 15:44:53,623 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 15:44:53,623 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 15:45:01,185 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 15:45:01,185 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 15:45:05,104 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 15:45:05,104 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 20 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 89.37 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 112.49 examples/s] | |
INFO 2024-08-17 15:45:05,728 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 15:45:05,728 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 20 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 43.27 examples/s] | |
INFO 2024-08-17 15:45:06,286 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_invoice_markdown/data_checkpoint_5b99214eb01c4c709f1a560af0a40456.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 291.27ba/s] | |
INFO 2024-08-17 15:45:06,298 instructlab.sdg:410: Generated 17 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 320.99 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 108.98ba/s] | |
Generating train split: 21 examples [00:00, 4692.12 examples/s] | |
INFO 2024-08-17 15:45:06,679 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_invoice_csv, with 21 rows | |
INFO 2024-08-17 15:45:06,687 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:45:06,688 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:45:06,690 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:45:06,691 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:45:28,039 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 15:45:28,040 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 15:45:32,440 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 15:45:32,440 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 15:45:38,380 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 15:45:38,381 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 161.33 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 198.43 examples/s] | |
INFO 2024-08-17 15:45:38,895 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 15:45:38,895 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 15 | |
}) | |
INFO 2024-08-17 15:45:46,144 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 15:45:46,144 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 15:45:48,788 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 15:45:48,788 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 9 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 52.88 examples/s] | |
Filter (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 62.88 examples/s] | |
INFO 2024-08-17 15:45:49,298 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 15:45:49,298 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 8 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 26.34 examples/s] | |
INFO 2024-08-17 15:45:49,701 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:45:49,701 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:45:50,234 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 15:45:50,234 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 8 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 39.02 examples/s] | |
INFO 2024-08-17 15:45:50,546 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 15:45:50,546 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:45:56,149 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 15:45:56,149 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:46:01,929 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 15:46:01,930 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:46:08,740 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 15:46:08,740 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:46:15,682 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 15:46:15,682 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:46:18,755 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 15:46:18,755 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 8 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 36.74 examples/s] | |
Filter (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 44.16 examples/s] | |
INFO 2024-08-17 15:46:19,365 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 15:46:19,365 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 7 | |
}) | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 15:46:19,365 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Map (num_proc=7): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 16.80 examples/s] | |
INFO 2024-08-17 15:46:19,868 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_invoice_csv/data_checkpoint_97b872838c904fb0a92fd2365499314c.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 363.62ba/s] | |
INFO 2024-08-17 15:46:19,879 instructlab.sdg:410: Generated 18 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 171.68 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 159.89ba/s] | |
Generating train split: 46 examples [00:00, 8787.48 examples/s] | |
INFO 2024-08-17 15:46:20,253 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_invoice_plain_text, with 46 rows | |
INFO 2024-08-17 15:46:20,261 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:46:20,261 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:46:20,264 instructlab.sdg:410: Generated 19 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 46/46 [00:00<00:00, 350.05 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 158.60ba/s] | |
Generating train split: 37 examples [00:00, 7213.07 examples/s] | |
INFO 2024-08-17 15:46:20,605 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_invoice_reasoning, with 37 rows | |
INFO 2024-08-17 15:46:20,612 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:46:20,612 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:46:20,616 instructlab.sdg:410: Generated 20 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37/37 [00:00<00:00, 279.58 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 142.07ba/s] | |
INFO 2024-08-17 15:46:20,921 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_information_named_entities_person_names, with 17 rows | |
INFO 2024-08-17 15:46:20,929 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:46:20,929 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:46:20,933 instructlab.sdg:410: Generated 21 samples | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 298.78ba/s] | |
INFO 2024-08-17 15:46:21,042 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_information_named_entities_places, with 19 rows | |
INFO 2024-08-17 15:46:21,049 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:46:21,049 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:46:21,053 instructlab.sdg:410: Generated 22 samples | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 305.46ba/s] | |
INFO 2024-08-17 15:46:21,157 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_information_named_entities_dates_and_events, with 14 rows | |
INFO 2024-08-17 15:46:21,164 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:46:21,164 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:46:21,167 instructlab.sdg:410: Generated 23 samples | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 266.69ba/s] | |
Generating train split: 38 examples [00:00, 9283.22 examples/s] | |
INFO 2024-08-17 15:46:21,289 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_receipt_bullet_points, with 38 rows | |
INFO 2024-08-17 15:46:21,297 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:46:21,297 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:46:21,300 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:46:21,300 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:46:29,531 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 15:46:29,531 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 15:46:32,154 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 15:46:32,154 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 15:46:36,364 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 15:46:36,364 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 30 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 182.09 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 203.66 examples/s] | |
INFO 2024-08-17 15:46:36,861 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 15:46:36,861 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 15:46:39,578 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 15:46:39,578 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:46:43,275 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 15:46:43,275 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 140.04 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 159.89 examples/s] | |
INFO 2024-08-17 15:46:43,784 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 15:46:43,785 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 23 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 75.00 examples/s] | |
INFO 2024-08-17 15:46:44,196 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:46:44,196 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 15:46:44,824 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 15:46:44,824 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 23 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 109.58 examples/s] | |
INFO 2024-08-17 15:46:45,139 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 15:46:45,139 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 15:46:52,345 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 15:46:52,345 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 21 | |
}) | |
INFO 2024-08-17 15:47:00,031 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 15:47:00,031 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 18 | |
}) | |
INFO 2024-08-17 15:47:08,889 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 15:47:08,889 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 18 | |
}) | |
INFO 2024-08-17 15:47:13,791 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 15:47:13,791 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 17 | |
}) | |
INFO 2024-08-17 15:47:18,316 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 15:47:18,316 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 17 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 80.41 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 96.34 examples/s] | |
INFO 2024-08-17 15:47:18,910 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 15:47:18,910 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 16 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 34.04 examples/s] | |
INFO 2024-08-17 15:47:19,486 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_receipt_bullet_points/data_checkpoint_4fcd6e27ccd84c5f9dd4043f86472ce7.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 279.64ba/s] | |
INFO 2024-08-17 15:47:19,500 instructlab.sdg:410: Generated 24 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 54/54 [00:00<00:00, 296.24 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 142.81ba/s] | |
Generating train split: 43 examples [00:00, 8922.29 examples/s] | |
INFO 2024-08-17 15:47:19,877 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_receipt_markdown, with 43 rows | |
INFO 2024-08-17 15:47:19,886 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:47:19,886 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:47:19,889 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:47:19,889 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:47:25,574 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 15:47:25,574 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 15:47:30,971 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 15:47:30,971 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 31 | |
}) | |
INFO 2024-08-17 15:47:35,700 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 15:47:35,700 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 31 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [00:00<00:00, 180.90 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [00:00<00:00, 209.14 examples/s] | |
INFO 2024-08-17 15:47:36,209 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 15:47:36,209 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 15:47:39,721 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 15:47:39,722 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 15:47:43,491 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 15:47:43,491 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 26 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 152.96 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 173.02 examples/s] | |
INFO 2024-08-17 15:47:44,004 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 15:47:44,004 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 79.59 examples/s] | |
INFO 2024-08-17 15:47:44,408 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:47:44,409 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:47:44,971 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 15:47:44,971 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 114.47 examples/s] | |
INFO 2024-08-17 15:47:45,284 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 15:47:45,284 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:47:52,496 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 15:47:52,496 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 15:48:01,584 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 15:48:01,584 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 15:48:10,759 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 15:48:10,759 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 15:48:19,880 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 15:48:19,880 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 19 | |
}) | |
INFO 2024-08-17 15:48:23,494 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 15:48:23,494 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 19 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 89.75 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 105.14 examples/s] | |
INFO 2024-08-17 15:48:24,093 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 15:48:24,093 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 19 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 40.62 examples/s] | |
INFO 2024-08-17 15:48:24,663 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_receipt_markdown/data_checkpoint_27fe3f29b5724715a340246719e8b9a2.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 288.37ba/s] | |
INFO 2024-08-17 15:48:24,677 instructlab.sdg:410: Generated 25 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 326.64 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 126.51ba/s] | |
Generating train split: 18 examples [00:00, 4569.24 examples/s] | |
INFO 2024-08-17 15:48:25,064 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_receipt_csv, with 18 rows | |
INFO 2024-08-17 15:48:25,073 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:48:25,073 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:48:25,076 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:48:25,076 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:48:30,845 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 15:48:30,845 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 15:48:33,763 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 15:48:33,763 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 15:48:38,176 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 15:48:38,177 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 30 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 173.11 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 191.29 examples/s] | |
INFO 2024-08-17 15:48:38,719 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 15:48:38,719 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 15:48:43,152 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 15:48:43,153 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 15:48:46,428 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 15:48:46,428 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 20 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 112.61 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 132.26 examples/s] | |
INFO 2024-08-17 15:48:46,953 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 15:48:46,953 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 19 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 60.74 examples/s] | |
INFO 2024-08-17 15:48:47,367 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:48:47,367 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 19 | |
}) | |
INFO 2024-08-17 15:48:47,851 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 15:48:47,851 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 19 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 93.25 examples/s] | |
INFO 2024-08-17 15:48:48,163 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 15:48:48,163 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 19 | |
}) | |
INFO 2024-08-17 15:48:55,223 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 15:48:55,223 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 18 | |
}) | |
INFO 2024-08-17 15:49:04,562 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 15:49:04,562 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 16 | |
}) | |
INFO 2024-08-17 15:49:13,183 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 15:49:13,183 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 16 | |
}) | |
INFO 2024-08-17 15:49:19,532 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 15:49:19,532 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 15 | |
}) | |
INFO 2024-08-17 15:49:24,078 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 15:49:24,078 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 15 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 70.02 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 83.58 examples/s] | |
INFO 2024-08-17 15:49:24,681 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 15:49:24,681 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 14 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 29.42 examples/s] | |
INFO 2024-08-17 15:49:25,265 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_receipt_csv/data_checkpoint_9a1bde15b9f94673bfdf23bb48e41325.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 318.60ba/s] | |
INFO 2024-08-17 15:49:25,277 instructlab.sdg:410: Generated 26 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 188.44 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 185.03ba/s] | |
Generating train split: 46 examples [00:00, 9051.75 examples/s] | |
INFO 2024-08-17 15:49:25,657 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_receipt_plain_text, with 46 rows | |
INFO 2024-08-17 15:49:25,666 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:49:25,666 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:49:25,669 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:49:25,669 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:49:30,819 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 15:49:30,819 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 15:49:33,934 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 15:49:33,934 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 32 | |
}) | |
INFO 2024-08-17 15:49:38,102 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 15:49:38,102 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 32 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 188.66 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 212.88 examples/s] | |
INFO 2024-08-17 15:49:38,619 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 15:49:38,619 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 15:49:42,261 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 15:49:42,261 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 15:49:46,389 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 15:49:46,389 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 169.62 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 188.39 examples/s] | |
INFO 2024-08-17 15:49:46,909 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 15:49:46,909 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 94.55 examples/s] | |
INFO 2024-08-17 15:49:47,322 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:49:47,322 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 15:49:47,997 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 15:49:47,997 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 131.60 examples/s] | |
INFO 2024-08-17 15:49:48,327 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 15:49:48,327 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 15:49:56,464 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 15:49:56,464 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 15:50:06,424 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 15:50:06,424 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 15:50:16,829 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 15:50:16,830 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 15:50:27,659 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 15:50:27,659 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 15:50:32,035 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 15:50:32,035 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 26 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 116.20 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 144.10 examples/s] | |
INFO 2024-08-17 15:50:32,653 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 15:50:32,654 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 26 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 41.64 examples/s] | |
INFO 2024-08-17 15:50:33,391 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_receipt_plain_text/data_checkpoint_af457ad1be524fdcab875400b8211853.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 261.83ba/s] | |
INFO 2024-08-17 15:50:33,403 instructlab.sdg:410: Generated 27 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 392.99 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 113.27ba/s] | |
Generating train split: 57 examples [00:00, 11118.75 examples/s] | |
INFO 2024-08-17 15:50:33,843 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_receipt_reasoning, with 57 rows | |
INFO 2024-08-17 15:50:33,850 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:50:33,850 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:50:33,854 instructlab.sdg:410: Generated 28 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 57/57 [00:00<00:00, 428.59 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 138.79ba/s] | |
Generating train split: 40 examples [00:00, 7304.29 examples/s] | |
INFO 2024-08-17 15:50:34,174 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_commercial_lease_agreement_bullet_points, with 40 rows | |
INFO 2024-08-17 15:50:34,183 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:50:34,183 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:50:34,186 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:50:34,186 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:50:44,288 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 15:50:44,288 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 15:50:47,203 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 15:50:47,203 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 15:50:52,411 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 15:50:52,412 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 30 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 164.53 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 195.12 examples/s] | |
INFO 2024-08-17 15:50:52,946 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 15:50:52,946 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 15:51:00,067 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 15:51:00,067 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 15:51:04,833 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 15:51:04,833 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 152.54 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 184.17 examples/s] | |
INFO 2024-08-17 15:51:05,373 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 15:51:05,373 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 89.56 examples/s] | |
INFO 2024-08-17 15:51:05,786 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:51:05,786 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 15:51:06,990 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 15:51:06,990 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 133.06 examples/s] | |
INFO 2024-08-17 15:51:07,313 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 15:51:07,313 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 15:51:16,807 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 15:51:16,807 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 15:51:28,621 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 15:51:28,621 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 25 | |
}) | |
INFO 2024-08-17 15:51:39,068 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 15:51:39,068 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 25 | |
}) | |
INFO 2024-08-17 15:51:50,762 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 15:51:50,762 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 25 | |
}) | |
INFO 2024-08-17 15:51:55,852 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 15:51:55,852 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 106.13 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 132.98 examples/s] | |
INFO 2024-08-17 15:51:56,461 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 15:51:56,461 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 50.36 examples/s] | |
INFO 2024-08-17 15:51:57,044 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_commercial_lease_agreement_bullet_points/data_checkpoint_9bd6ec2b62c2491984bfc3d9b614e980.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 205.76ba/s] | |
INFO 2024-08-17 15:51:57,058 instructlab.sdg:410: Generated 29 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [00:00<00:00, 350.60 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 88.54ba/s] | |
Generating train split: 40 examples [00:00, 7404.22 examples/s] | |
INFO 2024-08-17 15:51:57,461 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_commercial_lease_agreement_markdown, with 40 rows | |
INFO 2024-08-17 15:51:57,470 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:51:57,470 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:51:57,473 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:51:57,473 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:52:08,583 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 15:52:08,583 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 15:52:12,524 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 15:52:12,524 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 15:52:17,291 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 15:52:17,291 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 169.79 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 193.99 examples/s] | |
INFO 2024-08-17 15:52:17,808 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 15:52:17,808 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 15:52:23,321 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 15:52:23,322 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 15:52:27,988 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 15:52:27,988 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 152.86 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 179.97 examples/s] | |
INFO 2024-08-17 15:52:28,530 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 15:52:28,530 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 87.89 examples/s] | |
INFO 2024-08-17 15:52:28,945 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:52:28,945 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 15:52:30,255 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 15:52:30,255 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 127.12 examples/s] | |
INFO 2024-08-17 15:52:30,577 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 15:52:30,577 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 15:52:39,921 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 15:52:39,921 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 15:52:51,805 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 15:52:51,805 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 25 | |
}) | |
INFO 2024-08-17 15:53:01,709 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 15:53:01,709 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 25 | |
}) | |
INFO 2024-08-17 15:53:11,715 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 15:53:11,715 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:53:16,807 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 15:53:16,808 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 107.10 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 133.32 examples/s] | |
INFO 2024-08-17 15:53:17,421 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 15:53:17,421 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 23 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 47.79 examples/s] | |
INFO 2024-08-17 15:53:18,008 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_commercial_lease_agreement_markdown/data_checkpoint_72ed16d977a7466695de3f597c1dd5b6.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 216.94ba/s] | |
INFO 2024-08-17 15:53:18,022 instructlab.sdg:410: Generated 30 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:00<00:00, 346.15 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 92.05ba/s] | |
Generating train split: 42 examples [00:00, 7431.38 examples/s] | |
INFO 2024-08-17 15:53:18,401 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_commercial_lease_agreement_csv, with 42 rows | |
INFO 2024-08-17 15:53:18,410 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:53:18,410 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:53:18,413 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:53:18,413 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:53:27,494 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 15:53:27,494 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 15:53:30,747 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 15:53:30,747 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 15:53:35,645 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 15:53:35,645 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 155.60 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 177.82 examples/s] | |
INFO 2024-08-17 15:53:36,168 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 15:53:36,168 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:53:40,467 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 15:53:40,467 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:53:44,288 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 15:53:44,288 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 136.30 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 157.93 examples/s] | |
INFO 2024-08-17 15:53:44,814 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 15:53:44,815 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 77.19 examples/s] | |
INFO 2024-08-17 15:53:45,217 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:53:45,217 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:53:46,199 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 15:53:46,199 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 115.93 examples/s] | |
INFO 2024-08-17 15:53:46,510 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 15:53:46,510 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:53:55,096 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 15:53:55,096 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:54:05,184 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 15:54:05,184 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 22 | |
}) | |
INFO 2024-08-17 15:54:14,217 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 15:54:14,217 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 22 | |
}) | |
INFO 2024-08-17 15:54:26,182 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 15:54:26,182 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 22 | |
}) | |
INFO 2024-08-17 15:54:30,315 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 15:54:30,315 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 22 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 22/22 [00:00<00:00, 98.55 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 22/22 [00:00<00:00, 122.21 examples/s] | |
INFO 2024-08-17 15:54:30,927 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 15:54:30,927 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 22 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 22/22 [00:00<00:00, 34.45 examples/s] | |
INFO 2024-08-17 15:54:31,673 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_commercial_lease_agreement_csv/data_checkpoint_7a42695574154f7093136a25a406044a.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 262.49ba/s] | |
INFO 2024-08-17 15:54:31,685 instructlab.sdg:410: Generated 31 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [00:00<00:00, 351.98 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 98.14ba/s] | |
INFO 2024-08-17 15:54:32,054 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_commercial_lease_agreement_plain_text, with 139 rows | |
INFO 2024-08-17 15:54:32,062 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:54:32,062 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:54:32,066 instructlab.sdg:410: Generated 32 samples | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 46.00ba/s] | |
Generating train split: 74 examples [00:00, 9682.38 examples/s] | |
INFO 2024-08-17 15:54:32,223 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_commercial_lease_agreement_reasoning, with 74 rows | |
INFO 2024-08-17 15:54:32,230 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:54:32,231 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:54:32,234 instructlab.sdg:410: Generated 33 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 74/74 [00:00<00:00, 556.08 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 78.12ba/s] | |
Generating train split: 86 examples [00:00, 9593.10 examples/s] | |
INFO 2024-08-17 15:54:32,569 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_equations_bullet_points, with 86 rows | |
INFO 2024-08-17 15:54:32,578 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:54:32,578 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:54:32,581 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:54:32,581 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:54:32,858 instructlab.sdg:410: Generated 34 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 86/86 [00:00<00:00, 638.46 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 54.56ba/s] | |
Generating train split: 155 examples [00:00, 13580.32 examples/s] | |
INFO 2024-08-17 15:54:33,195 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_equations_markdown, with 155 rows | |
INFO 2024-08-17 15:54:33,203 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:54:33,203 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:54:33,207 instructlab.sdg:410: Generated 35 samples | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 155/155 [00:00<00:00, 1124.36 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 27.84ba/s] | |
Generating train split: 81 examples [00:00, 9591.18 examples/s] | |
INFO 2024-08-17 15:54:33,584 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_equations_csv, with 81 rows | |
INFO 2024-08-17 15:54:33,593 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:54:33,593 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:54:33,596 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:54:33,596 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:54:41,785 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 15:54:41,785 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 15:54:43,683 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 15:54:43,684 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 15:54:46,624 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 15:54:46,624 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 9 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 51.44 examples/s] | |
Filter (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 59.71 examples/s] | |
INFO 2024-08-17 15:54:47,146 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 15:54:47,146 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 15:54:49,850 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 15:54:49,850 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 15:54:52,523 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 15:54:52,523 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 9 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 50.71 examples/s] | |
Filter (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 57.51 examples/s] | |
INFO 2024-08-17 15:54:53,063 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 15:54:53,064 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 7 | |
}) | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 15:54:53,064 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Map (num_proc=7): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 24.55 examples/s] | |
INFO 2024-08-17 15:54:53,447 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:54:53,447 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 15:54:53,806 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 15:54:53,806 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 7 | |
}) | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 15:54:53,806 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Map (num_proc=7): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 34.69 examples/s] | |
INFO 2024-08-17 15:54:54,101 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 15:54:54,102 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 15:55:00,678 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 15:55:00,679 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 15:55:08,798 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 15:55:08,799 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 15:55:15,653 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 15:55:15,654 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 15:55:26,117 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 15:55:26,117 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 15:55:29,165 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 15:55:29,165 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 7 | |
}) | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 15:55:29,166 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Map (num_proc=7): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 35.67 examples/s] | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 15:55:29,460 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Filter (num_proc=7): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 40.73 examples/s] | |
INFO 2024-08-17 15:55:29,725 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 15:55:29,725 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 7 | |
}) | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 15:55:29,725 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Map (num_proc=7): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 17.60 examples/s] | |
INFO 2024-08-17 15:55:30,222 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_equations_csv/data_checkpoint_eaf4595a91ad46298364e1b74df88683.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 342.53ba/s] | |
INFO 2024-08-17 15:55:30,234 instructlab.sdg:410: Generated 36 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 88/88 [00:00<00:00, 524.61 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 54.56ba/s] | |
Generating train split: 113 examples [00:00, 11598.95 examples/s] | |
INFO 2024-08-17 15:55:30,684 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_equations_plain_text, with 113 rows | |
INFO 2024-08-17 15:55:30,693 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:55:30,693 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:55:30,696 instructlab.sdg:410: Generated 37 samples | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 113/113 [00:00<00:00, 837.42 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 43.48ba/s] | |
Generating train split: 59 examples [00:00, 7519.41 examples/s] | |
INFO 2024-08-17 15:55:31,041 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_equations_reasoning, with 59 rows | |
INFO 2024-08-17 15:55:31,050 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:55:31,050 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:55:31,053 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:55:31,053 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:55:43,070 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 15:55:43,071 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 15:55:46,134 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 15:55:46,134 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:55:51,629 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 15:55:51,629 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 141.10 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 159.88 examples/s] | |
INFO 2024-08-17 15:55:52,149 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 15:55:52,149 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:55:58,963 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 15:55:58,963 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:56:03,794 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 15:56:03,794 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 129.01 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 152.29 examples/s] | |
INFO 2024-08-17 15:56:04,342 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 15:56:04,342 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 76.02 examples/s] | |
INFO 2024-08-17 15:56:04,754 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:56:04,754 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:56:05,821 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 15:56:05,821 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 113.53 examples/s] | |
INFO 2024-08-17 15:56:06,137 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 15:56:06,137 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:56:16,008 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 15:56:16,008 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:56:28,797 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 15:56:28,797 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:56:40,184 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 15:56:40,184 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 15:56:55,561 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 15:56:55,561 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 15:57:01,233 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 15:57:01,233 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 23 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 93.73 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 129.84 examples/s] | |
INFO 2024-08-17 15:57:01,872 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 15:57:01,872 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 23 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 46.87 examples/s] | |
INFO 2024-08-17 15:57:02,474 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_equations_reasoning/data_checkpoint_d42ad105181c447184c93fd2a70804ba.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 174.20ba/s] | |
INFO 2024-08-17 15:57:02,489 instructlab.sdg:410: Generated 38 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 82/82 [00:00<00:00, 442.76 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 53.10ba/s] | |
Generating train split: 38 examples [00:00, 5595.55 examples/s] | |
INFO 2024-08-17 15:57:02,886 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_tables_bullet_points, with 38 rows | |
INFO 2024-08-17 15:57:02,895 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:57:02,895 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:57:02,898 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:57:02,898 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:57:30,263 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 15:57:30,263 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 15:57:35,643 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 15:57:35,643 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 15:57:41,676 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 15:57:41,676 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 30 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 169.03 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 196.44 examples/s] | |
INFO 2024-08-17 15:57:42,208 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 15:57:42,208 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 15:57:54,561 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 15:57:54,561 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 15:57:59,458 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 15:57:59,458 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 26 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 145.45 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 169.14 examples/s] | |
INFO 2024-08-17 15:57:59,992 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 15:57:59,992 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 26 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 81.41 examples/s] | |
INFO 2024-08-17 15:58:00,420 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:58:00,420 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 15:58:02,164 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 15:58:02,164 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 26 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 122.82 examples/s] | |
INFO 2024-08-17 15:58:02,484 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 15:58:02,484 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 15:58:13,602 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 15:58:13,602 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 15:58:26,274 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 15:58:26,274 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 15:58:37,707 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 15:58:37,707 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 15:58:50,973 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 15:58:50,974 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 15:58:56,991 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 15:58:56,991 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 26 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 117.50 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 145.72 examples/s] | |
INFO 2024-08-17 15:58:57,597 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 15:58:57,597 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 25 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 40.43 examples/s] | |
INFO 2024-08-17 15:58:58,315 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_tables_bullet_points/data_checkpoint_f6d85354f4f549daa4181a63f76eb9c4.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 133.93ba/s] | |
INFO 2024-08-17 15:58:58,331 instructlab.sdg:410: Generated 39 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:00<00:00, 337.47 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 57.54ba/s] | |
Generating train split: 54 examples [00:00, 6677.06 examples/s] | |
INFO 2024-08-17 15:58:58,734 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_tables_csv, with 54 rows | |
INFO 2024-08-17 15:58:58,742 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:58:58,742 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:58:58,746 instructlab.sdg:410: Generated 40 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 54/54 [00:00<00:00, 413.45 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 65.18ba/s] | |
Generating train split: 66 examples [00:00, 8027.84 examples/s] | |
INFO 2024-08-17 15:58:59,098 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_tables_plain_text, with 66 rows | |
INFO 2024-08-17 15:58:59,106 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:58:59,106 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:58:59,109 instructlab.sdg:410: Generated 41 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 66/66 [00:00<00:00, 488.51 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 54.47ba/s] | |
Generating train split: 65 examples [00:00, 7592.45 examples/s] | |
INFO 2024-08-17 15:58:59,450 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_tables_reasoning, with 65 rows | |
INFO 2024-08-17 15:58:59,457 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:58:59,458 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:58:59,461 instructlab.sdg:410: Generated 42 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 65/65 [00:00<00:00, 476.76 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 54.01ba/s] | |
Generating train split: 71 examples [00:00, 8829.07 examples/s] | |
INFO 2024-08-17 15:58:59,802 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_abstract_bullet_points, with 71 rows | |
INFO 2024-08-17 15:58:59,810 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:58:59,810 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:58:59,814 instructlab.sdg:410: Generated 43 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 71/71 [00:00<00:00, 532.17 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 60.45ba/s] | |
Generating train split: 61 examples [00:00, 7920.15 examples/s] | |
INFO 2024-08-17 15:59:00,149 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_abstract_markdown, with 61 rows | |
INFO 2024-08-17 15:59:00,156 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:59:00,156 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:59:00,160 instructlab.sdg:410: Generated 44 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 457.29 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 72.15ba/s] | |
Generating train split: 41 examples [00:00, 6099.83 examples/s] | |
INFO 2024-08-17 15:59:00,483 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_abstract_csv, with 41 rows | |
INFO 2024-08-17 15:59:00,490 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:59:00,490 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:59:00,494 instructlab.sdg:410: Generated 45 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 41/41 [00:00<00:00, 311.77 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 97.68ba/s] | |
Generating train split: 80 examples [00:00, 9273.02 examples/s] | |
INFO 2024-08-17 15:59:00,822 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_abstract_plain_text, with 80 rows | |
INFO 2024-08-17 15:59:00,829 instructlab.sdg.checkpointing:68: Found 0 missing rows in the dataset | |
INFO 2024-08-17 15:59:00,830 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:59:00,833 instructlab.sdg:410: Generated 46 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:00<00:00, 590.16 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 52.16ba/s] | |
Generating train split: 47 examples [00:00, 6704.72 examples/s] | |
INFO 2024-08-17 15:59:01,168 instructlab.sdg.checkpointing:64: Loading existing checkpoints from /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_abstract_reasoning, with 47 rows | |
INFO 2024-08-17 15:59:01,177 instructlab.sdg.checkpointing:68: Found 1 missing rows in the dataset | |
INFO 2024-08-17 15:59:01,177 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 15:59:01,180 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 15:59:01,180 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 15:59:12,711 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 15:59:12,711 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 15:59:16,473 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 15:59:16,473 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 15:59:21,906 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 15:59:21,907 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 30 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 161.12 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 191.77 examples/s] | |
INFO 2024-08-17 15:59:22,469 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 15:59:22,469 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 15:59:33,090 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 15:59:33,090 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 15:59:38,680 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 15:59:38,680 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 155.83 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 175.03 examples/s] | |
INFO 2024-08-17 15:59:39,231 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 15:59:39,231 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 86.04 examples/s] | |
INFO 2024-08-17 15:59:39,664 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 15:59:39,664 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 15:59:40,824 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 15:59:40,824 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 127.97 examples/s] | |
INFO 2024-08-17 15:59:41,162 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 15:59:41,162 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 15:59:52,608 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 15:59:52,608 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 16:00:08,172 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:00:08,172 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 16:00:22,071 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:00:22,071 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 16:00:42,275 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:00:42,276 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 25 | |
}) | |
INFO 2024-08-17 16:00:48,470 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:00:48,470 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 25 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 105.57 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 136.87 examples/s] | |
INFO 2024-08-17 16:00:49,123 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:00:49,123 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 25 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 50.59 examples/s] | |
INFO 2024-08-17 16:00:49,727 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_technical_paper_abstract_reasoning/data_checkpoint_bb25104d192c431fbe2281bd678b10cd.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 162.04ba/s] | |
INFO 2024-08-17 16:00:49,743 instructlab.sdg:410: Generated 47 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 72/72 [00:00<00:00, 377.69 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 61.30ba/s] | |
INFO 2024-08-17 16:00:50,126 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_annual_report_bullet_points, generating from scratch | |
INFO 2024-08-17 16:00:50,126 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:00:50,130 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:00:50,130 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:01:05,164 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 16:01:05,164 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 2 | |
}) | |
INFO 2024-08-17 16:01:07,209 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 16:01:07,209 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 6 | |
}) | |
INFO 2024-08-17 16:01:09,629 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 16:01:09,629 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 5 | |
}) | |
num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
WARNING 2024-08-17 16:01:09,629 datasets.arrow_dataset:3092: num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
Map (num_proc=5): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 33.49 examples/s] | |
num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
WARNING 2024-08-17 16:01:09,854 datasets.arrow_dataset:3092: num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
Filter (num_proc=5): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 38.25 examples/s] | |
INFO 2024-08-17 16:01:10,060 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 16:01:10,061 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 5 | |
}) | |
INFO 2024-08-17 16:01:12,116 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 16:01:12,117 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:01:13,623 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 16:01:13,623 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 3 | |
}) | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 16:01:13,623 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Map (num_proc=3): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 22.72 examples/s] | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 16:01:13,819 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Filter (num_proc=3): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 25.63 examples/s] | |
INFO 2024-08-17 16:01:13,993 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 16:01:13,994 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 3 | |
}) | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 16:01:13,994 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Map (num_proc=3): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 18.77 examples/s] | |
INFO 2024-08-17 16:01:14,213 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:01:14,213 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:01:14,465 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:01:14,465 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 3 | |
}) | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 16:01:14,465 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Map (num_proc=3): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 19.84 examples/s] | |
INFO 2024-08-17 16:01:14,675 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:01:14,675 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:01:18,585 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:01:18,585 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 2 | |
}) | |
INFO 2024-08-17 16:01:21,692 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:01:21,692 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 16:01:23,375 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:01:23,375 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 1 | |
}) | |
WARNING 2024-08-17 16:01:23,947 instructlab.sdg:403: Empty dataset for qna node: compositional_skills->extraction->annual_report->bullet_points | |
INFO 2024-08-17 16:01:24,021 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_annual_report_markdown, generating from scratch | |
INFO 2024-08-17 16:01:24,021 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:01:24,024 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:01:24,024 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:01:50,236 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 16:01:50,236 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 16:01:55,204 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 16:01:55,204 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 16:02:00,094 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 16:02:00,094 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 17 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 99.11 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 112.59 examples/s] | |
INFO 2024-08-17 16:02:00,618 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 16:02:00,618 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 17 | |
}) | |
INFO 2024-08-17 16:02:07,333 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 16:02:07,333 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 14 | |
}) | |
INFO 2024-08-17 16:02:10,906 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 16:02:10,906 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 12 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 66.45 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 77.97 examples/s] | |
INFO 2024-08-17 16:02:11,449 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 16:02:11,449 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 12 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 37.48 examples/s] | |
INFO 2024-08-17 16:02:11,875 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:02:11,875 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 12 | |
}) | |
INFO 2024-08-17 16:02:12,696 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:02:12,697 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 12 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 58.57 examples/s] | |
INFO 2024-08-17 16:02:13,010 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:02:13,010 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 12 | |
}) | |
INFO 2024-08-17 16:02:19,482 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:02:19,482 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 16:02:26,879 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:02:26,879 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:02:32,983 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:02:32,983 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:02:39,956 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:02:39,956 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 6 | |
}) | |
INFO 2024-08-17 16:02:43,539 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:02:43,539 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 5 | |
}) | |
num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
WARNING 2024-08-17 16:02:43,539 datasets.arrow_dataset:3092: num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
Map (num_proc=5): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 26.49 examples/s] | |
num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
WARNING 2024-08-17 16:02:43,808 datasets.arrow_dataset:3092: num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
Filter (num_proc=5): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 33.80 examples/s] | |
INFO 2024-08-17 16:02:44,031 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:02:44,031 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 5 | |
}) | |
num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
WARNING 2024-08-17 16:02:44,031 datasets.arrow_dataset:3092: num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
Map (num_proc=5): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 17.70 examples/s] | |
INFO 2024-08-17 16:02:44,398 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_annual_report_markdown/data_checkpoint_1dc865ecca2b4ae080f70df5f6c18720.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 313.36ba/s] | |
INFO 2024-08-17 16:02:44,411 instructlab.sdg:410: Generated 48 samples | |
num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
WARNING 2024-08-17 16:02:44,411 datasets.arrow_dataset:3092: num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
Map (num_proc=5): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 25.21 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 286.46ba/s] | |
INFO 2024-08-17 16:02:44,742 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_annual_report_csv, generating from scratch | |
INFO 2024-08-17 16:02:44,742 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:02:44,745 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:02:44,745 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:03:09,051 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 16:03:09,051 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 6 | |
}) | |
INFO 2024-08-17 16:03:13,030 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 16:03:13,030 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 16 | |
}) | |
INFO 2024-08-17 16:03:17,049 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 16:03:17,049 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 15 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 83.45 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 96.76 examples/s] | |
INFO 2024-08-17 16:03:17,590 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 16:03:17,590 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 15 | |
}) | |
INFO 2024-08-17 16:03:24,270 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 16:03:24,270 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 14 | |
}) | |
INFO 2024-08-17 16:03:27,677 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 16:03:27,677 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 13 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 71.47 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 82.23 examples/s] | |
INFO 2024-08-17 16:03:28,233 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 16:03:28,234 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 13 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 41.14 examples/s] | |
INFO 2024-08-17 16:03:28,652 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:03:28,652 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:03:29,745 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:03:29,745 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 13 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 62.01 examples/s] | |
INFO 2024-08-17 16:03:30,065 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:03:30,065 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:03:36,707 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:03:36,707 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:03:43,571 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:03:43,571 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:03:49,148 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:03:49,148 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:03:52,646 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:03:52,646 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:03:54,608 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:03:54,608 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 3 | |
}) | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 16:03:54,608 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Map (num_proc=3): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 19.02 examples/s] | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 16:03:54,827 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Filter (num_proc=3): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 22.93 examples/s] | |
INFO 2024-08-17 16:03:55,013 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:03:55,013 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 3 | |
}) | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 16:03:55,013 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Map (num_proc=3): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 15.91 examples/s] | |
INFO 2024-08-17 16:03:55,263 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_annual_report_csv/data_checkpoint_53e908c8d5834a28b03710506bf11c8e.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 392.58ba/s] | |
INFO 2024-08-17 16:03:55,274 instructlab.sdg:410: Generated 49 samples | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 16:03:55,274 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Map (num_proc=3): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 18.18 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 393.68ba/s] | |
INFO 2024-08-17 16:03:55,578 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_annual_report_plain_text, generating from scratch | |
INFO 2024-08-17 16:03:55,579 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:03:55,582 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:03:55,582 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:04:59,011 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 16:04:59,011 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 16:05:03,564 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 16:05:03,564 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 16:05:08,716 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 16:05:08,716 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 21 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 114.93 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 139.94 examples/s] | |
INFO 2024-08-17 16:05:09,244 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 16:05:09,245 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 21 | |
}) | |
INFO 2024-08-17 16:05:16,323 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 16:05:16,324 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 19 | |
}) | |
INFO 2024-08-17 16:05:20,371 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 16:05:20,371 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 18 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 102.91 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 117.15 examples/s] | |
INFO 2024-08-17 16:05:20,897 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 16:05:20,897 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 17 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 52.03 examples/s] | |
INFO 2024-08-17 16:05:21,326 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:05:21,326 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 17 | |
}) | |
INFO 2024-08-17 16:05:22,361 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:05:22,361 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 17 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 81.62 examples/s] | |
INFO 2024-08-17 16:05:22,682 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:05:22,682 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 17 | |
}) | |
INFO 2024-08-17 16:05:31,791 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:05:31,791 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 14 | |
}) | |
INFO 2024-08-17 16:05:41,887 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:05:41,887 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 11 | |
}) | |
INFO 2024-08-17 16:05:49,163 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:05:49,163 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 16:05:55,881 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:05:55,882 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:05:58,544 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:05:58,544 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 6 | |
}) | |
num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
WARNING 2024-08-17 16:05:58,544 datasets.arrow_dataset:3092: num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
Map (num_proc=6): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 31.03 examples/s] | |
num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
WARNING 2024-08-17 16:05:58,823 datasets.arrow_dataset:3092: num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
Filter (num_proc=6): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 10.10 examples/s] | |
INFO 2024-08-17 16:05:59,502 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:05:59,502 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 6 | |
}) | |
num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
WARNING 2024-08-17 16:05:59,502 datasets.arrow_dataset:3092: num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
Map (num_proc=6): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 18.30 examples/s] | |
INFO 2024-08-17 16:05:59,920 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_annual_report_plain_text/data_checkpoint_6450a4fce6ed47bcac7a42ec567d634b.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 304.02ba/s] | |
INFO 2024-08-17 16:05:59,932 instructlab.sdg:410: Generated 50 samples | |
num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
WARNING 2024-08-17 16:05:59,932 datasets.arrow_dataset:3092: num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
Map (num_proc=6): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 29.60 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 278.43ba/s] | |
INFO 2024-08-17 16:06:00,342 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_annual_report_reasoning, generating from scratch | |
INFO 2024-08-17 16:06:00,342 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:06:00,345 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:06:00,345 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:06:22,984 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 16:06:22,984 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:06:26,947 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 16:06:26,948 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 21 | |
}) | |
INFO 2024-08-17 16:06:31,560 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 16:06:31,560 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 20 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 111.44 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 132.30 examples/s] | |
INFO 2024-08-17 16:06:32,100 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 16:06:32,100 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 16:06:44,190 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 16:06:44,190 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 19 | |
}) | |
INFO 2024-08-17 16:06:48,616 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 16:06:48,616 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 17 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 96.41 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 112.43 examples/s] | |
INFO 2024-08-17 16:06:49,151 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 16:06:49,151 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 17 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 53.60 examples/s] | |
INFO 2024-08-17 16:06:49,572 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:06:49,573 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 17 | |
}) | |
INFO 2024-08-17 16:06:50,434 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:06:50,435 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 17 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 82.61 examples/s] | |
INFO 2024-08-17 16:06:50,747 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:06:50,747 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 17 | |
}) | |
INFO 2024-08-17 16:06:59,414 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:06:59,414 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 12 | |
}) | |
INFO 2024-08-17 16:07:08,472 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:07:08,472 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 16:07:15,694 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:07:15,694 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 16:07:28,222 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:07:28,222 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 5 | |
}) | |
INFO 2024-08-17 16:07:31,231 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:07:31,231 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 5 | |
}) | |
num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
WARNING 2024-08-17 16:07:31,231 datasets.arrow_dataset:3092: num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
Map (num_proc=5): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 27.39 examples/s] | |
num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
WARNING 2024-08-17 16:07:31,494 datasets.arrow_dataset:3092: num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
Filter (num_proc=5): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 33.18 examples/s] | |
INFO 2024-08-17 16:07:31,713 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:07:31,713 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 5 | |
}) | |
num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
WARNING 2024-08-17 16:07:31,713 datasets.arrow_dataset:3092: num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
Map (num_proc=5): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 18.36 examples/s] | |
INFO 2024-08-17 16:07:32,071 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_annual_report_reasoning/data_checkpoint_2b05351022d044cea69a788cb92780b5.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 313.10ba/s] | |
INFO 2024-08-17 16:07:32,083 instructlab.sdg:410: Generated 51 samples | |
num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
WARNING 2024-08-17 16:07:32,083 datasets.arrow_dataset:3092: num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
Map (num_proc=5): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 24.93 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 302.60ba/s] | |
INFO 2024-08-17 16:07:32,432 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_fda_filing_bullet_points, generating from scratch | |
INFO 2024-08-17 16:07:32,432 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:07:32,436 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:07:32,436 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:07:55,585 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 16:07:55,586 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 16:08:00,267 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 16:08:00,267 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 16:08:05,825 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 16:08:05,825 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 165.54 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 191.55 examples/s] | |
INFO 2024-08-17 16:08:06,355 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 16:08:06,355 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 16:08:19,408 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 16:08:19,408 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 16:08:24,399 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 16:08:24,399 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 158.99 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 181.35 examples/s] | |
INFO 2024-08-17 16:08:24,925 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 16:08:24,926 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 88.14 examples/s] | |
INFO 2024-08-17 16:08:25,350 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:08:25,350 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 16:08:27,066 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:08:27,066 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 132.89 examples/s] | |
INFO 2024-08-17 16:08:27,381 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:08:27,381 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 16:08:37,950 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:08:37,950 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 16:08:50,961 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:08:50,961 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 16:09:03,254 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:09:03,254 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 16:09:19,965 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:09:19,965 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 16:09:25,843 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:09:25,843 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 120.61 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 146.26 examples/s] | |
INFO 2024-08-17 16:09:26,460 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:09:26,460 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 55.25 examples/s] | |
INFO 2024-08-17 16:09:27,056 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_fda_filing_bullet_points/data_checkpoint_b9abd729f3c94d63a5f3c3b0d501d1ff.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 141.07ba/s] | |
INFO 2024-08-17 16:09:27,072 instructlab.sdg:410: Generated 52 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 110.60 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 131.75ba/s] | |
INFO 2024-08-17 16:09:27,507 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_fda_filing_markdown, generating from scratch | |
INFO 2024-08-17 16:09:27,507 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:09:27,510 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:09:27,510 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:09:42,861 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 16:09:42,861 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 16:09:49,261 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 16:09:49,261 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 36 | |
}) | |
INFO 2024-08-17 16:09:55,202 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 16:09:55,202 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 35 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35/35 [00:00<00:00, 193.56 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35/35 [00:00<00:00, 225.18 examples/s] | |
INFO 2024-08-17 16:09:55,739 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 16:09:55,739 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 35 | |
}) | |
INFO 2024-08-17 16:10:08,678 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 16:10:08,678 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 32 | |
}) | |
INFO 2024-08-17 16:10:13,630 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 16:10:13,630 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 32 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 160.76 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 206.51 examples/s] | |
INFO 2024-08-17 16:10:14,183 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 16:10:14,183 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 31 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [00:00<00:00, 93.89 examples/s] | |
INFO 2024-08-17 16:10:14,616 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:10:14,616 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 31 | |
}) | |
INFO 2024-08-17 16:10:15,889 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:10:15,889 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 31 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 31/31 [00:00<00:00, 145.99 examples/s] | |
INFO 2024-08-17 16:10:16,208 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:10:16,209 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 31 | |
}) | |
INFO 2024-08-17 16:10:25,379 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:10:25,379 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 16:10:36,436 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:10:36,436 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 16:10:47,731 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:10:47,731 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 16:10:59,810 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:10:59,810 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 16:11:06,075 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:11:06,075 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 26 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 112.34 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 141.72 examples/s] | |
INFO 2024-08-17 16:11:06,708 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:11:06,709 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 48.17 examples/s] | |
INFO 2024-08-17 16:11:07,311 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_fda_filing_markdown/data_checkpoint_184c7407182243c0be08afe464964f80.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 164.64ba/s] | |
INFO 2024-08-17 16:11:07,326 instructlab.sdg:410: Generated 53 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 102.09 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 152.78ba/s] | |
INFO 2024-08-17 16:11:07,748 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_fda_filing_csv, generating from scratch | |
INFO 2024-08-17 16:11:07,749 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:11:07,752 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:11:07,752 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:11:28,549 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 16:11:28,549 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 6 | |
}) | |
INFO 2024-08-17 16:11:31,946 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 16:11:31,946 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 18 | |
}) | |
INFO 2024-08-17 16:11:36,281 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 16:11:36,281 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 18 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 102.02 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 116.62 examples/s] | |
INFO 2024-08-17 16:11:36,808 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 16:11:36,808 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 16 | |
}) | |
INFO 2024-08-17 16:11:43,349 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 16:11:43,349 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:11:46,831 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 16:11:46,831 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 13 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 70.51 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 82.51 examples/s] | |
INFO 2024-08-17 16:11:47,395 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 16:11:47,395 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 13 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 40.86 examples/s] | |
INFO 2024-08-17 16:11:47,815 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:11:47,815 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:11:48,554 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:11:48,554 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 13 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 61.82 examples/s] | |
INFO 2024-08-17 16:11:48,877 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:11:48,877 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:11:56,450 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:11:56,450 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:12:05,174 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:12:05,174 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:12:13,364 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:12:13,364 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:12:20,621 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:12:20,621 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:12:26,006 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:12:26,006 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 12 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 56.66 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 65.88 examples/s] | |
INFO 2024-08-17 16:12:26,611 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:12:26,611 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 12 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 19.41 examples/s] | |
INFO 2024-08-17 16:12:27,337 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_fda_filing_csv/data_checkpoint_9cf3254bb8754268b7edee3b2f26467e.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 264.96ba/s] | |
INFO 2024-08-17 16:12:27,349 instructlab.sdg:410: Generated 54 samples | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 51.34 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 236.29ba/s] | |
INFO 2024-08-17 16:12:27,768 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_fda_filing_plain_text, generating from scratch | |
INFO 2024-08-17 16:12:27,769 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:12:27,772 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:12:27,772 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:12:45,856 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 16:12:45,856 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 16:12:50,039 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 16:12:50,039 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 16:12:55,024 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 16:12:55,024 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 151.05 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 178.35 examples/s] | |
INFO 2024-08-17 16:12:55,552 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 16:12:55,553 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 16:13:02,846 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 16:13:02,846 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 16:13:07,289 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 16:13:07,289 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 150.54 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 169.36 examples/s] | |
INFO 2024-08-17 16:13:07,837 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 16:13:07,837 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 83.38 examples/s] | |
INFO 2024-08-17 16:13:08,263 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:13:08,264 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 16:13:09,297 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:13:09,297 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 126.51 examples/s] | |
INFO 2024-08-17 16:13:09,617 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:13:09,617 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 16:13:19,139 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:13:19,139 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 16:13:31,003 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:13:31,003 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 16:13:42,240 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:13:42,240 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 16:13:55,158 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:13:55,158 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 25 | |
}) | |
INFO 2024-08-17 16:13:59,998 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:13:59,998 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 103.90 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 131.52 examples/s] | |
INFO 2024-08-17 16:14:00,647 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:14:00,647 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 48.10 examples/s] | |
INFO 2024-08-17 16:14:01,263 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_fda_filing_plain_text/data_checkpoint_ae3335c8e8f040f8b601ecf951946bc0.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 164.75ba/s] | |
INFO 2024-08-17 16:14:01,278 instructlab.sdg:410: Generated 55 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 102.30 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 154.02ba/s] | |
INFO 2024-08-17 16:14:01,724 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_fda_filing_reasoning, generating from scratch | |
INFO 2024-08-17 16:14:01,725 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:14:01,728 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:14:01,728 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:14:35,604 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 16:14:35,604 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 16:14:40,073 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 16:14:40,073 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 16:14:46,046 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 16:14:46,046 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 30 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 158.80 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 192.35 examples/s] | |
INFO 2024-08-17 16:14:46,592 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 16:14:46,592 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 16:15:01,910 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 16:15:01,910 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 16:15:07,005 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 16:15:07,006 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 129.63 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 153.37 examples/s] | |
INFO 2024-08-17 16:15:07,544 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 16:15:07,544 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 74.44 examples/s] | |
INFO 2024-08-17 16:15:07,973 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:15:07,973 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 16:15:09,456 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:15:09,456 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 103.20 examples/s] | |
INFO 2024-08-17 16:15:09,793 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:15:09,793 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 16:15:20,098 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:15:20,098 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 16:15:35,109 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:15:35,109 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 16:15:46,914 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:15:46,914 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 16:16:04,139 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:16:04,139 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 17 | |
}) | |
INFO 2024-08-17 16:16:10,143 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:16:10,143 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 17 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 75.59 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 90.99 examples/s] | |
INFO 2024-08-17 16:16:10,766 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:16:10,766 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 17 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 34.02 examples/s] | |
INFO 2024-08-17 16:16:11,380 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_fda_filing_reasoning/data_checkpoint_bd3fe23b7f124511981479b3bc63363b.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 176.27ba/s] | |
INFO 2024-08-17 16:16:11,394 instructlab.sdg:410: Generated 56 samples | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 72.70 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 173.93ba/s] | |
INFO 2024-08-17 16:16:11,821 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_email_bullet_points, generating from scratch | |
INFO 2024-08-17 16:16:11,821 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:16:11,824 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:16:11,825 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:16:20,002 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 16:16:20,002 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 4 | |
}) | |
INFO 2024-08-17 16:16:22,179 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 16:16:22,179 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 12 | |
}) | |
INFO 2024-08-17 16:16:25,012 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 16:16:25,012 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 12 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 68.37 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 79.15 examples/s] | |
INFO 2024-08-17 16:16:25,545 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 16:16:25,545 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 12 | |
}) | |
INFO 2024-08-17 16:16:28,082 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 16:16:28,082 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 16:16:30,258 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 16:16:30,259 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 8 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 47.13 examples/s] | |
Filter (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 52.90 examples/s] | |
INFO 2024-08-17 16:16:30,777 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 16:16:30,777 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 8 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 26.22 examples/s] | |
INFO 2024-08-17 16:16:31,181 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:16:31,181 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 16:16:31,495 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:16:31,495 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 8 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 38.13 examples/s] | |
INFO 2024-08-17 16:16:31,817 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:16:31,817 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 16:16:37,443 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:16:37,443 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 16:16:43,724 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:16:43,725 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:16:49,574 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:16:49,574 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:16:54,477 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:16:54,478 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:16:57,052 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:16:57,052 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 7 | |
}) | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 16:16:57,052 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Map (num_proc=7): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 34.45 examples/s] | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 16:16:57,363 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Filter (num_proc=7): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 41.21 examples/s] | |
INFO 2024-08-17 16:16:57,623 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:16:57,624 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 7 | |
}) | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 16:16:57,624 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Map (num_proc=7): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 13.15 examples/s] | |
INFO 2024-08-17 16:16:58,258 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_email_bullet_points/data_checkpoint_a53a84eb19b44c1991064d5cdb8f81f5.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 395.65ba/s] | |
INFO 2024-08-17 16:16:58,268 instructlab.sdg:410: Generated 57 samples | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 16:16:58,268 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Map (num_proc=7): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 30.69 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 366.00ba/s] | |
INFO 2024-08-17 16:16:58,665 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_email_markdown, generating from scratch | |
INFO 2024-08-17 16:16:58,665 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:16:58,669 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:16:58,669 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
WARNING 2024-08-17 16:16:59,963 instructlab.sdg:403: Empty dataset for qna node: compositional_skills->extraction->email->markdown | |
INFO 2024-08-17 16:17:00,026 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_email_plain_text, generating from scratch | |
INFO 2024-08-17 16:17:00,026 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:17:00,029 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:17:00,029 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:17:11,574 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 16:17:11,574 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 6 | |
}) | |
INFO 2024-08-17 16:17:13,782 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 16:17:13,782 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 18 | |
}) | |
INFO 2024-08-17 16:17:18,092 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 16:17:18,093 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 18 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 97.63 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 113.77 examples/s] | |
INFO 2024-08-17 16:17:18,652 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 16:17:18,652 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 18 | |
}) | |
INFO 2024-08-17 16:17:21,214 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 16:17:21,214 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 18 | |
}) | |
INFO 2024-08-17 16:17:23,983 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 16:17:23,983 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 18 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 98.29 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 115.49 examples/s] | |
INFO 2024-08-17 16:17:24,536 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 16:17:24,536 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 18 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 55.63 examples/s] | |
INFO 2024-08-17 16:17:24,971 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:17:24,971 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 18 | |
}) | |
INFO 2024-08-17 16:17:25,713 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:17:25,713 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 18 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 83.72 examples/s] | |
INFO 2024-08-17 16:17:26,048 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:17:26,048 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 18 | |
}) | |
INFO 2024-08-17 16:17:33,659 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:17:33,659 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 18 | |
}) | |
INFO 2024-08-17 16:17:42,797 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:17:42,797 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 17 | |
}) | |
INFO 2024-08-17 16:17:51,187 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:17:51,187 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 17 | |
}) | |
INFO 2024-08-17 16:18:00,070 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:18:00,070 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 17 | |
}) | |
INFO 2024-08-17 16:18:03,836 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:18:03,836 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 17 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 77.50 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 93.58 examples/s] | |
INFO 2024-08-17 16:18:04,454 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:18:04,454 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 17 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 34.95 examples/s] | |
INFO 2024-08-17 16:18:05,049 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_email_plain_text/data_checkpoint_a3a3573f5e684eee9087f5f7c4719d6c.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 249.94ba/s] | |
INFO 2024-08-17 16:18:05,062 instructlab.sdg:410: Generated 58 samples | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 71.84 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 232.69ba/s] | |
INFO 2024-08-17 16:18:05,507 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_email_reasoning, generating from scratch | |
INFO 2024-08-17 16:18:05,507 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:18:05,510 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 16:18:05,510 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:18:14,671 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 16:18:14,671 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 5 | |
}) | |
INFO 2024-08-17 16:18:16,927 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 16:18:16,927 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 15 | |
}) | |
INFO 2024-08-17 16:18:21,576 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 16:18:21,576 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 15 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 85.49 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [00:00<00:00, 96.03 examples/s] | |
INFO 2024-08-17 16:18:22,121 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 16:18:22,121 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 12 | |
}) | |
INFO 2024-08-17 16:18:25,319 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 16:18:25,319 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:18:27,626 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 16:18:27,626 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 7 | |
}) | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 16:18:27,626 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Map (num_proc=7): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 40.40 examples/s] | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 16:18:27,890 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Filter (num_proc=7): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 47.37 examples/s] | |
INFO 2024-08-17 16:18:28,121 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 16:18:28,121 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 7 | |
}) | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 16:18:28,121 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Map (num_proc=7): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 26.44 examples/s] | |
INFO 2024-08-17 16:18:28,477 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:18:28,477 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:18:28,798 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:18:28,798 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 7 | |
}) | |
num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
WARNING 2024-08-17 16:18:28,798 datasets.arrow_dataset:3092: num_proc must be <= 7. Reducing num_proc to 7 for dataset of size 7. | |
Map (num_proc=7): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 7/7 [00:00<00:00, 34.14 examples/s] | |
INFO 2024-08-17 16:18:29,101 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:18:29,101 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:18:34,207 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:18:34,207 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:18:46,268 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:18:46,268 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 16:18:51,550 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:18:51,550 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 6 | |
}) | |
INFO 2024-08-17 16:18:57,946 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:18:57,946 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 6 | |
}) | |
INFO 2024-08-17 16:19:01,442 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:19:01,442 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 6 | |
}) | |
num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
WARNING 2024-08-17 16:19:01,442 datasets.arrow_dataset:3092: num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
Map (num_proc=6): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 30.04 examples/s] | |
num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
WARNING 2024-08-17 16:19:01,739 datasets.arrow_dataset:3092: num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
Filter (num_proc=6): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 36.90 examples/s] | |
INFO 2024-08-17 16:19:01,985 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:19:01,985 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 5 | |
}) | |
num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
WARNING 2024-08-17 16:19:01,985 datasets.arrow_dataset:3092: num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
Map (num_proc=5): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 16.63 examples/s] | |
INFO 2024-08-17 16:19:02,368 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_extraction_email_reasoning/data_checkpoint_cf06bdc6a062410eb2927f8a5091f6bf.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 454.67ba/s] | |
INFO 2024-08-17 16:19:02,379 instructlab.sdg:410: Generated 59 samples | |
num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
WARNING 2024-08-17 16:19:02,379 datasets.arrow_dataset:3092: num_proc must be <= 5. Reducing num_proc to 5 for dataset of size 5. | |
Map (num_proc=5): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:00<00:00, 24.88 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 402.02ba/s] | |
INFO 2024-08-17 16:19:02,741 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_you_are_abstract, generating from scratch | |
INFO 2024-08-17 16:19:02,741 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:19:02,744 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:19:02,744 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 5 | |
}) | |
INFO 2024-08-17 16:19:27,312 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:19:27,312 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 152 | |
}) | |
INFO 2024-08-17 16:19:40,259 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:19:40,259 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 161 | |
}) | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 161/161 [00:00<00:00, 895.69 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 161/161 [00:00<00:00, 1071.46 examples/s] | |
INFO 2024-08-17 16:19:40,787 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:19:40,787 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 110 | |
}) | |
INFO 2024-08-17 16:20:09,670 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:20:09,670 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 90 | |
}) | |
INFO 2024-08-17 16:20:20,601 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:20:20,602 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 90 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:00<00:00, 506.94 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:00<00:00, 592.65 examples/s] | |
INFO 2024-08-17 16:20:21,134 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:20:21,135 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 90 | |
}) | |
INFO 2024-08-17 16:20:21,492 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:20:21,492 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 90 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:00<00:00, 431.87 examples/s] | |
INFO 2024-08-17 16:20:21,812 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:20:21,812 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 90 | |
}) | |
INFO 2024-08-17 16:20:37,360 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:20:37,360 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 90 | |
}) | |
INFO 2024-08-17 16:21:07,435 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:21:07,435 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 89 | |
}) | |
INFO 2024-08-17 16:21:34,928 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:21:34,928 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 88 | |
}) | |
INFO 2024-08-17 16:22:30,697 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:22:30,697 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 84 | |
}) | |
INFO 2024-08-17 16:22:44,782 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:22:44,782 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 84 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [00:00<00:00, 314.98 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [00:00<00:00, 475.02 examples/s] | |
INFO 2024-08-17 16:22:45,438 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:22:45,438 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 84 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [00:00<00:00, 181.67 examples/s] | |
INFO 2024-08-17 16:22:46,016 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_you_are_abstract/data_checkpoint_d8a57443d15b4e73814eb7f9f33d0f24.jsonl | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 73.91ba/s] | |
INFO 2024-08-17 16:22:46,037 instructlab.sdg:410: Generated 60 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [00:00<00:00, 357.65 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 68.26ba/s] | |
INFO 2024-08-17 16:22:46,532 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_you_are_non_fictional_historical_figures, generating from scratch | |
INFO 2024-08-17 16:22:46,532 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:22:46,535 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:22:46,535 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 2 | |
}) | |
INFO 2024-08-17 16:23:01,922 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:23:01,922 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 59 | |
}) | |
INFO 2024-08-17 16:23:08,059 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:23:08,059 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 59 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 59/59 [00:00<00:00, 333.84 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 59/59 [00:00<00:00, 390.10 examples/s] | |
INFO 2024-08-17 16:23:08,602 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:23:08,602 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 45 | |
}) | |
INFO 2024-08-17 16:23:28,176 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:23:28,176 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 34 | |
}) | |
INFO 2024-08-17 16:23:34,575 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:23:34,575 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 34 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 189.30 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 226.32 examples/s] | |
INFO 2024-08-17 16:23:35,113 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:23:35,113 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 34 | |
}) | |
INFO 2024-08-17 16:23:35,295 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:23:35,295 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 34 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 173.77 examples/s] | |
INFO 2024-08-17 16:23:35,602 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:23:35,602 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 34 | |
}) | |
INFO 2024-08-17 16:23:45,820 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:23:45,820 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 34 | |
}) | |
INFO 2024-08-17 16:24:02,635 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:24:02,635 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 33 | |
}) | |
INFO 2024-08-17 16:24:17,472 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:24:17,472 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 33 | |
}) | |
INFO 2024-08-17 16:24:46,911 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:24:46,911 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 33 | |
}) | |
INFO 2024-08-17 16:24:54,318 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:24:54,318 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 33 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:00<00:00, 158.34 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:00<00:00, 183.83 examples/s] | |
INFO 2024-08-17 16:24:54,920 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:24:54,920 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 33 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:00<00:00, 78.17 examples/s] | |
INFO 2024-08-17 16:24:55,450 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_you_are_non_fictional_historical_figures/data_checkpoint_b410710ccd5d497183319c9c85e58e0d.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 152.39ba/s] | |
INFO 2024-08-17 16:24:55,464 instructlab.sdg:410: Generated 61 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:00<00:00, 143.36 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 142.91ba/s] | |
INFO 2024-08-17 16:24:55,870 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_you_are_non_fictional_popular_personalities, generating from scratch | |
INFO 2024-08-17 16:24:55,871 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:24:55,874 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:24:55,874 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 16:25:18,927 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:25:18,927 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 228 | |
}) | |
INFO 2024-08-17 16:25:37,364 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:25:37,364 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 234 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 234/234 [00:00<00:00, 1195.68 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 234/234 [00:00<00:00, 1537.73 examples/s] | |
INFO 2024-08-17 16:25:37,909 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:25:37,910 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 143 | |
}) | |
INFO 2024-08-17 16:26:09,529 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:26:09,529 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 123 | |
}) | |
INFO 2024-08-17 16:26:24,367 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:26:24,367 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 123 | |
}) | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 123/123 [00:00<00:00, 634.21 examples/s] | |
Filter (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 123/123 [00:00<00:00, 812.12 examples/s] | |
INFO 2024-08-17 16:26:24,924 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:26:24,924 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 123 | |
}) | |
INFO 2024-08-17 16:26:25,410 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:26:25,410 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 123 | |
}) | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 123/123 [00:00<00:00, 559.42 examples/s] | |
INFO 2024-08-17 16:26:25,742 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:26:25,742 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 123 | |
}) | |
INFO 2024-08-17 16:26:45,591 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:26:45,591 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 123 | |
}) | |
INFO 2024-08-17 16:27:24,738 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:27:24,738 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 121 | |
}) | |
INFO 2024-08-17 16:28:02,044 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:28:02,045 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 121 | |
}) | |
INFO 2024-08-17 16:29:12,398 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:29:12,398 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 118 | |
}) | |
INFO 2024-08-17 16:29:31,477 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:29:31,477 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 118 | |
}) | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 118/118 [00:00<00:00, 419.60 examples/s] | |
Filter (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 118/118 [00:00<00:00, 638.64 examples/s] | |
INFO 2024-08-17 16:29:32,168 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:29:32,168 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 118 | |
}) | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 118/118 [00:00<00:00, 187.27 examples/s] | |
INFO 2024-08-17 16:29:32,915 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_you_are_non_fictional_popular_personalities/data_checkpoint_147ca4b65bb3443088005598b8bced85.jsonl | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 55.74ba/s] | |
INFO 2024-08-17 16:29:32,941 instructlab.sdg:410: Generated 62 samples | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 118/118 [00:00<00:00, 475.40 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 51.33ba/s] | |
INFO 2024-08-17 16:29:33,409 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_you_are_fictional_tv_shows, generating from scratch | |
INFO 2024-08-17 16:29:33,409 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:29:33,412 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:29:33,412 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 4 | |
}) | |
INFO 2024-08-17 16:29:58,085 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:29:58,085 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 118 | |
}) | |
INFO 2024-08-17 16:30:07,901 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:30:07,901 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 121 | |
}) | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 121/121 [00:00<00:00, 676.83 examples/s] | |
Filter (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 121/121 [00:00<00:00, 813.35 examples/s] | |
INFO 2024-08-17 16:30:08,436 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:30:08,436 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 36 | |
}) | |
INFO 2024-08-17 16:30:24,590 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:30:24,590 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 16:30:29,548 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:30:29,549 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 135.16 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 156.45 examples/s] | |
INFO 2024-08-17 16:30:30,101 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:30:30,101 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 16:30:30,253 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:30:30,253 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 121.18 examples/s] | |
INFO 2024-08-17 16:30:30,557 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:30:30,557 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 16:30:39,216 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:30:39,216 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 16:30:53,430 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:30:53,430 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 22 | |
}) | |
INFO 2024-08-17 16:31:04,847 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:31:04,847 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 22 | |
}) | |
INFO 2024-08-17 16:31:28,843 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:31:28,844 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 22 | |
}) | |
INFO 2024-08-17 16:31:35,300 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:31:35,300 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 22 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 22/22 [00:00<00:00, 106.00 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 22/22 [00:00<00:00, 126.18 examples/s] | |
INFO 2024-08-17 16:31:35,893 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:31:35,893 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 22 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 22/22 [00:00<00:00, 52.82 examples/s] | |
INFO 2024-08-17 16:31:36,416 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_you_are_fictional_tv_shows/data_checkpoint_4aa82c20fac148ab8f93fe7600bd3580.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 197.70ba/s] | |
INFO 2024-08-17 16:31:36,429 instructlab.sdg:410: Generated 63 samples | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 22/22 [00:00<00:00, 97.26 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 174.81ba/s] | |
INFO 2024-08-17 16:31:36,826 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_you_are_fictional_movies, generating from scratch | |
INFO 2024-08-17 16:31:36,826 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:31:36,829 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:31:36,829 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 16:32:01,569 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:32:01,569 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 230 | |
}) | |
INFO 2024-08-17 16:32:19,342 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:32:19,342 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 239 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 239/239 [00:00<00:00, 1284.98 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 239/239 [00:00<00:00, 1593.14 examples/s] | |
INFO 2024-08-17 16:32:19,891 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:32:19,891 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 95 | |
}) | |
INFO 2024-08-17 16:32:41,440 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:32:41,441 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 85 | |
}) | |
INFO 2024-08-17 16:32:52,478 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:32:52,478 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 85 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 474.44 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 557.87 examples/s] | |
INFO 2024-08-17 16:32:53,020 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:32:53,020 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 85 | |
}) | |
INFO 2024-08-17 16:32:53,391 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:32:53,391 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 85 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 416.00 examples/s] | |
INFO 2024-08-17 16:32:53,711 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:32:53,711 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 85 | |
}) | |
INFO 2024-08-17 16:33:09,504 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:33:09,504 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 85 | |
}) | |
INFO 2024-08-17 16:33:38,162 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:33:38,162 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 84 | |
}) | |
INFO 2024-08-17 16:34:05,328 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:34:05,328 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 84 | |
}) | |
INFO 2024-08-17 16:34:55,870 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:34:55,870 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 83 | |
}) | |
INFO 2024-08-17 16:35:09,637 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:35:09,637 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 83 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 83/83 [00:00<00:00, 334.25 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 83/83 [00:00<00:00, 469.54 examples/s] | |
INFO 2024-08-17 16:35:10,306 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:35:10,306 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 83 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 83/83 [00:00<00:00, 179.70 examples/s] | |
INFO 2024-08-17 16:35:10,876 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_you_are_fictional_movies/data_checkpoint_b06bfcee2b25418aa6327b0365d9bc84.jsonl | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 85.13ba/s] | |
INFO 2024-08-17 16:35:10,895 instructlab.sdg:410: Generated 64 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 83/83 [00:00<00:00, 342.27 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 77.70ba/s] | |
INFO 2024-08-17 16:35:11,357 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_you_are_fictional_video_games, generating from scratch | |
INFO 2024-08-17 16:35:11,357 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:35:11,361 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:35:11,361 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 2 | |
}) | |
INFO 2024-08-17 16:35:25,231 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:35:25,231 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 57 | |
}) | |
INFO 2024-08-17 16:35:30,969 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:35:30,969 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 57 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 57/57 [00:00<00:00, 339.74 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 57/57 [00:00<00:00, 381.00 examples/s] | |
INFO 2024-08-17 16:35:31,509 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:35:31,509 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 21 | |
}) | |
INFO 2024-08-17 16:35:46,910 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:35:46,910 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 16 | |
}) | |
INFO 2024-08-17 16:35:51,259 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:35:51,259 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 16 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 92.04 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 108.05 examples/s] | |
INFO 2024-08-17 16:35:51,810 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:35:51,810 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 16 | |
}) | |
INFO 2024-08-17 16:35:51,927 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:35:51,927 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 16 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 83.92 examples/s] | |
INFO 2024-08-17 16:35:52,239 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:35:52,239 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 16 | |
}) | |
INFO 2024-08-17 16:36:00,760 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:36:00,760 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 16 | |
}) | |
INFO 2024-08-17 16:36:12,400 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:36:12,400 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 15 | |
}) | |
INFO 2024-08-17 16:36:22,269 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:36:22,269 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 15 | |
}) | |
INFO 2024-08-17 16:36:41,101 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:36:41,101 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 14 | |
}) | |
INFO 2024-08-17 16:36:45,495 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:36:45,495 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 14 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 69.82 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 81.76 examples/s] | |
INFO 2024-08-17 16:36:46,089 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:36:46,089 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 14 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 34.76 examples/s] | |
INFO 2024-08-17 16:36:46,604 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_you_are_fictional_video_games/data_checkpoint_d0731a0255fa4ccd974f9c457d80aaa2.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 244.31ba/s] | |
INFO 2024-08-17 16:36:46,616 instructlab.sdg:410: Generated 65 samples | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 63.86 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 234.02ba/s] | |
INFO 2024-08-17 16:36:47,083 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_i_am_primary_schooler, generating from scratch | |
INFO 2024-08-17 16:36:47,083 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:36:47,087 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:36:47,087 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:37:08,359 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:37:08,359 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 87 | |
}) | |
INFO 2024-08-17 16:37:15,363 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:37:15,364 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 89 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 89/89 [00:00<00:00, 546.92 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 89/89 [00:00<00:00, 581.65 examples/s] | |
INFO 2024-08-17 16:37:15,896 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:37:15,897 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 89 | |
}) | |
INFO 2024-08-17 16:37:29,886 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:37:29,886 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 88 | |
}) | |
INFO 2024-08-17 16:37:38,939 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:37:38,939 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 88 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 88/88 [00:00<00:00, 490.25 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 88/88 [00:00<00:00, 579.27 examples/s] | |
INFO 2024-08-17 16:37:39,482 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:37:39,483 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 88 | |
}) | |
INFO 2024-08-17 16:37:39,869 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:37:39,869 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 88 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 88/88 [00:00<00:00, 422.39 examples/s] | |
INFO 2024-08-17 16:37:40,184 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:37:40,184 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 88 | |
}) | |
INFO 2024-08-17 16:37:54,542 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:37:54,542 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 88 | |
}) | |
INFO 2024-08-17 16:38:21,199 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:38:21,199 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 86 | |
}) | |
INFO 2024-08-17 16:38:45,626 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:38:45,626 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 86 | |
}) | |
INFO 2024-08-17 16:39:21,086 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:39:21,087 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 85 | |
}) | |
INFO 2024-08-17 16:39:32,077 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:39:32,077 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 85 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 367.67 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 480.29 examples/s] | |
INFO 2024-08-17 16:39:32,725 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:39:32,725 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 85 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 192.58 examples/s] | |
INFO 2024-08-17 16:39:33,272 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_i_am_primary_schooler/data_checkpoint_c18e2d091dad4c99bd26b7ecea88fdaa.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 118.51ba/s] | |
INFO 2024-08-17 16:39:33,288 instructlab.sdg:410: Generated 66 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 370.81 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 102.26ba/s] | |
INFO 2024-08-17 16:39:33,713 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_i_am_graduate, generating from scratch | |
INFO 2024-08-17 16:39:33,713 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:39:33,716 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:39:33,716 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:39:59,713 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:39:59,713 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 89 | |
}) | |
INFO 2024-08-17 16:40:10,141 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:40:10,141 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 92 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 92/92 [00:00<00:00, 488.03 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 92/92 [00:00<00:00, 612.21 examples/s] | |
INFO 2024-08-17 16:40:10,694 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:40:10,694 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 84 | |
}) | |
INFO 2024-08-17 16:40:38,024 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:40:38,024 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 67 | |
}) | |
INFO 2024-08-17 16:40:48,042 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:40:48,042 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 67 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 67/67 [00:00<00:00, 364.01 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 67/67 [00:00<00:00, 411.26 examples/s] | |
INFO 2024-08-17 16:40:48,613 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:40:48,613 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 67 | |
}) | |
INFO 2024-08-17 16:40:48,970 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:40:48,970 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 67 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 67/67 [00:00<00:00, 308.00 examples/s] | |
INFO 2024-08-17 16:40:49,304 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:40:49,304 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 67 | |
}) | |
INFO 2024-08-17 16:41:03,605 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:41:03,605 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 67 | |
}) | |
INFO 2024-08-17 16:41:28,935 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:41:28,935 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 64 | |
}) | |
INFO 2024-08-17 16:41:52,616 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:41:52,616 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 64 | |
}) | |
INFO 2024-08-17 16:42:49,436 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:42:49,436 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 63 | |
}) | |
INFO 2024-08-17 16:43:02,213 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:43:02,213 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 63 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:00<00:00, 248.32 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:00<00:00, 356.53 examples/s] | |
INFO 2024-08-17 16:43:02,893 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:43:02,893 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 63 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:00<00:00, 133.76 examples/s] | |
INFO 2024-08-17 16:43:03,478 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_roleplay_explain_like_i_am_graduate/data_checkpoint_836b1f2ae7fd48ffa4780e0a145d18fc.jsonl | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 94.03ba/s] | |
INFO 2024-08-17 16:43:03,497 instructlab.sdg:410: Generated 67 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 63/63 [00:00<00:00, 267.59 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 80.28ba/s] | |
INFO 2024-08-17 16:43:03,927 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_jumbled_sentences, generating from scratch | |
INFO 2024-08-17 16:43:03,928 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:43:03,931 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:43:03,931 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:43:52,721 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:43:52,721 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 22 | |
}) | |
INFO 2024-08-17 16:43:58,577 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:43:58,577 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 25 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 137.88 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 170.80 examples/s] | |
INFO 2024-08-17 16:43:59,121 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:43:59,121 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:44:01,083 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:44:01,083 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:44:03,802 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:44:03,802 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 13 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 77.68 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 84.13 examples/s] | |
INFO 2024-08-17 16:44:04,349 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:44:04,349 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:44:04,550 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:44:04,550 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 13 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 68.97 examples/s] | |
INFO 2024-08-17 16:44:04,853 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:44:04,854 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:44:10,788 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:44:10,788 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 13 | |
}) | |
INFO 2024-08-17 16:44:17,483 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:44:17,483 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 12 | |
}) | |
INFO 2024-08-17 16:44:24,554 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:44:24,555 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 12 | |
}) | |
INFO 2024-08-17 16:44:28,951 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:44:28,951 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 12 | |
}) | |
INFO 2024-08-17 16:44:32,319 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:44:32,319 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 12 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 60.66 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 70.01 examples/s] | |
INFO 2024-08-17 16:44:32,910 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:44:32,910 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 9 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 17.22 examples/s] | |
INFO 2024-08-17 16:44:33,547 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_jumbled_sentences/data_checkpoint_8df92fd85c184f47a3f28ff3a2388348.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 544.64ba/s] | |
INFO 2024-08-17 16:44:33,556 instructlab.sdg:410: Generated 68 samples | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:00<00:00, 40.06 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 430.54ba/s] | |
INFO 2024-08-17 16:44:34,012 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_pattern_recognition, generating from scratch | |
INFO 2024-08-17 16:44:34,012 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:44:34,015 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:44:34,015 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 4 | |
}) | |
INFO 2024-08-17 16:45:03,091 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:45:03,091 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 90 | |
}) | |
INFO 2024-08-17 16:45:10,273 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:45:10,273 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 91 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 91/91 [00:00<00:00, 549.64 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 91/91 [00:00<00:00, 610.37 examples/s] | |
INFO 2024-08-17 16:45:10,796 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:45:10,796 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 71 | |
}) | |
INFO 2024-08-17 16:45:14,236 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:45:14,236 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 64 | |
}) | |
INFO 2024-08-17 16:45:21,213 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:45:21,213 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 64 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [00:00<00:00, 382.60 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [00:00<00:00, 397.16 examples/s] | |
INFO 2024-08-17 16:45:21,786 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:45:21,786 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 16:45:22,070 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:45:22,070 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 60 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 310.37 examples/s] | |
INFO 2024-08-17 16:45:22,384 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:45:22,384 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 16:45:32,396 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:45:32,396 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 16:45:46,397 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:45:46,397 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 56 | |
}) | |
INFO 2024-08-17 16:46:00,359 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:46:00,359 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 56 | |
}) | |
INFO 2024-08-17 16:46:15,362 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:46:15,362 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 56 | |
}) | |
INFO 2024-08-17 16:46:21,406 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:46:21,406 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 56 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 56/56 [00:00<00:00, 274.39 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 56/56 [00:00<00:00, 316.60 examples/s] | |
INFO 2024-08-17 16:46:22,011 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:46:22,011 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 49 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 49/49 [00:00<00:00, 121.20 examples/s] | |
INFO 2024-08-17 16:46:22,539 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_pattern_recognition/data_checkpoint_15f4a4f8340549c38baacb86ab256727.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 254.08ba/s] | |
INFO 2024-08-17 16:46:22,551 instructlab.sdg:410: Generated 69 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 49/49 [00:00<00:00, 223.04 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 250.36ba/s] | |
INFO 2024-08-17 16:46:22,966 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_complete_common_expressions, generating from scratch | |
INFO 2024-08-17 16:46:22,967 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:46:22,970 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:46:22,970 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:47:01,170 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:47:01,170 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 90 | |
}) | |
INFO 2024-08-17 16:47:10,429 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:47:10,429 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 98 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 98/98 [00:00<00:00, 575.48 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 98/98 [00:00<00:00, 654.54 examples/s] | |
INFO 2024-08-17 16:47:10,958 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:47:10,958 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 84 | |
}) | |
INFO 2024-08-17 16:47:14,365 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:47:14,365 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 82 | |
}) | |
INFO 2024-08-17 16:47:24,261 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:47:24,261 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 84 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [00:00<00:00, 495.08 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [00:00<00:00, 562.05 examples/s] | |
INFO 2024-08-17 16:47:24,797 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:47:24,797 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 82 | |
}) | |
INFO 2024-08-17 16:47:25,370 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:47:25,371 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 82 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 82/82 [00:00<00:00, 400.55 examples/s] | |
INFO 2024-08-17 16:47:25,688 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:47:25,689 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 82 | |
}) | |
INFO 2024-08-17 16:47:38,460 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:47:38,460 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 82 | |
}) | |
INFO 2024-08-17 16:47:56,922 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:47:56,922 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 77 | |
}) | |
INFO 2024-08-17 16:48:15,673 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:48:15,674 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 77 | |
}) | |
INFO 2024-08-17 16:48:33,210 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:48:33,211 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 76 | |
}) | |
INFO 2024-08-17 16:48:40,567 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:48:40,567 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 76 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 347.97 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 437.63 examples/s] | |
INFO 2024-08-17 16:48:41,181 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:48:41,182 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 54 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 54/54 [00:00<00:00, 127.07 examples/s] | |
INFO 2024-08-17 16:48:41,720 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_complete_common_expressions/data_checkpoint_a5e5f9cee02d41318d32517954fd5116.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 211.60ba/s] | |
INFO 2024-08-17 16:48:41,734 instructlab.sdg:410: Generated 70 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 54/54 [00:00<00:00, 243.15 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 203.58ba/s] | |
INFO 2024-08-17 16:48:42,135 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_summarization_ignore_pii, generating from scratch | |
INFO 2024-08-17 16:48:42,135 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:48:42,138 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:48:42,138 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:49:47,351 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:49:47,351 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 16:49:56,719 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:49:56,719 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 167.59 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 190.41 examples/s] | |
INFO 2024-08-17 16:49:57,280 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:49:57,280 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 6 | |
}) | |
INFO 2024-08-17 16:49:58,964 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:49:58,964 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 4 | |
}) | |
INFO 2024-08-17 16:50:01,079 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:50:01,079 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 4 | |
}) | |
num_proc must be <= 4. Reducing num_proc to 4 for dataset of size 4. | |
WARNING 2024-08-17 16:50:01,080 datasets.arrow_dataset:3092: num_proc must be <= 4. Reducing num_proc to 4 for dataset of size 4. | |
Map (num_proc=4): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 27.64 examples/s] | |
num_proc must be <= 4. Reducing num_proc to 4 for dataset of size 4. | |
WARNING 2024-08-17 16:50:01,299 datasets.arrow_dataset:3092: num_proc must be <= 4. Reducing num_proc to 4 for dataset of size 4. | |
Filter (num_proc=4): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 31.94 examples/s] | |
INFO 2024-08-17 16:50:01,495 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:50:01,495 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 4 | |
}) | |
INFO 2024-08-17 16:50:01,661 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:50:01,661 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 4 | |
}) | |
num_proc must be <= 4. Reducing num_proc to 4 for dataset of size 4. | |
WARNING 2024-08-17 16:50:01,662 datasets.arrow_dataset:3092: num_proc must be <= 4. Reducing num_proc to 4 for dataset of size 4. | |
Map (num_proc=4): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 26.40 examples/s] | |
INFO 2024-08-17 16:50:01,896 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:50:01,896 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 4 | |
}) | |
INFO 2024-08-17 16:50:06,219 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:50:06,219 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 4 | |
}) | |
INFO 2024-08-17 16:50:11,836 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:50:11,836 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:50:15,328 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:50:15,328 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:50:17,179 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:50:17,179 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:50:18,984 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:50:18,984 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 3 | |
}) | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 16:50:18,984 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Map (num_proc=3): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 19.21 examples/s] | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 16:50:19,206 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Filter (num_proc=3): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 21.77 examples/s] | |
INFO 2024-08-17 16:50:19,401 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:50:19,401 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 3 | |
}) | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 16:50:19,401 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Map (num_proc=3): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 16.65 examples/s] | |
INFO 2024-08-17 16:50:19,642 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_summarization_ignore_pii/data_checkpoint_a2ca1028a9254fdc9a1b2da96c6a3e15.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 516.03ba/s] | |
INFO 2024-08-17 16:50:19,652 instructlab.sdg:410: Generated 71 samples | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 16:50:19,652 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Map (num_proc=3): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 18.75 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 523.31ba/s] | |
INFO 2024-08-17 16:50:19,951 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_summarization_list_of_sentences, generating from scratch | |
INFO 2024-08-17 16:50:19,951 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:50:19,954 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:50:19,954 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:51:17,214 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:51:17,214 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 111 | |
}) | |
INFO 2024-08-17 16:51:27,222 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:51:27,222 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 112 | |
}) | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 112/112 [00:00<00:00, 627.66 examples/s] | |
Filter (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 112/112 [00:00<00:00, 728.53 examples/s] | |
INFO 2024-08-17 16:51:27,778 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:51:27,778 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 99 | |
}) | |
INFO 2024-08-17 16:51:32,216 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:51:32,216 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 99 | |
}) | |
INFO 2024-08-17 16:51:41,364 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:51:41,364 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 99 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 99/99 [00:00<00:00, 544.86 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 99/99 [00:00<00:00, 661.28 examples/s] | |
INFO 2024-08-17 16:51:41,927 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:51:41,927 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 99 | |
}) | |
INFO 2024-08-17 16:51:42,675 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:51:42,675 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 99 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 99/99 [00:00<00:00, 492.45 examples/s] | |
INFO 2024-08-17 16:51:42,989 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:51:42,989 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 99 | |
}) | |
INFO 2024-08-17 16:51:56,701 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:51:56,701 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 99 | |
}) | |
INFO 2024-08-17 16:52:14,493 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:52:14,493 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 88 | |
}) | |
INFO 2024-08-17 16:52:35,174 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:52:35,174 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 88 | |
}) | |
INFO 2024-08-17 16:52:51,824 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:52:51,824 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 88 | |
}) | |
INFO 2024-08-17 16:52:59,434 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:52:59,434 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 88 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 88/88 [00:00<00:00, 405.89 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 88/88 [00:00<00:00, 511.15 examples/s] | |
INFO 2024-08-17 16:53:00,066 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:53:00,066 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 64 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [00:00<00:00, 152.00 examples/s] | |
INFO 2024-08-17 16:53:00,609 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_summarization_list_of_sentences/data_checkpoint_c05235bc33a044b4aac0ed1f4a34907a.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 200.48ba/s] | |
INFO 2024-08-17 16:53:00,621 instructlab.sdg:410: Generated 72 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [00:00<00:00, 291.18 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 167.02ba/s] | |
INFO 2024-08-17 16:53:01,023 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_organize_lists, generating from scratch | |
INFO 2024-08-17 16:53:01,023 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:53:01,026 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:53:01,026 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:53:50,296 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:53:50,296 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 81 | |
}) | |
INFO 2024-08-17 16:53:59,577 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:53:59,577 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 84 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [00:00<00:00, 469.97 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [00:00<00:00, 546.34 examples/s] | |
INFO 2024-08-17 16:54:00,134 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:54:00,134 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 65 | |
}) | |
INFO 2024-08-17 16:54:12,396 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:54:12,396 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 62 | |
}) | |
INFO 2024-08-17 16:54:19,954 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:54:19,955 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 62 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 341.53 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 391.64 examples/s] | |
INFO 2024-08-17 16:54:20,519 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:54:20,519 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 62 | |
}) | |
INFO 2024-08-17 16:54:20,988 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:54:20,988 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 62 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 62/62 [00:00<00:00, 300.94 examples/s] | |
INFO 2024-08-17 16:54:21,322 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:54:21,323 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 62 | |
}) | |
INFO 2024-08-17 16:54:32,885 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:54:32,885 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 61 | |
}) | |
INFO 2024-08-17 16:54:49,602 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:54:49,602 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 16:55:07,620 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:55:07,620 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 16:55:32,559 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:55:32,559 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 58 | |
}) | |
INFO 2024-08-17 16:55:40,939 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:55:40,939 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 58 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 58/58 [00:00<00:00, 257.89 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 58/58 [00:00<00:00, 334.88 examples/s] | |
INFO 2024-08-17 16:55:41,552 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:55:41,552 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 51 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 51/51 [00:00<00:00, 118.49 examples/s] | |
INFO 2024-08-17 16:55:42,095 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_organize_lists/data_checkpoint_17b4eac40e864512bc07960a3fa73056.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 163.63ba/s] | |
INFO 2024-08-17 16:55:42,109 instructlab.sdg:410: Generated 73 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 51/51 [00:00<00:00, 228.54 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 154.03ba/s] | |
INFO 2024-08-17 16:55:42,529 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_word_gen, generating from scratch | |
INFO 2024-08-17 16:55:42,529 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:55:42,533 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:55:42,533 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:55:56,408 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:55:56,408 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 89 | |
}) | |
INFO 2024-08-17 16:56:03,522 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:56:03,522 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 91 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 91/91 [00:00<00:00, 520.56 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 91/91 [00:00<00:00, 606.64 examples/s] | |
INFO 2024-08-17 16:56:04,056 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:56:04,056 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 39 | |
}) | |
INFO 2024-08-17 16:56:08,922 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:56:08,922 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 37 | |
}) | |
INFO 2024-08-17 16:56:13,918 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:56:13,918 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 37 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37/37 [00:00<00:00, 213.63 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37/37 [00:00<00:00, 241.85 examples/s] | |
INFO 2024-08-17 16:56:14,465 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:56:14,465 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 37 | |
}) | |
INFO 2024-08-17 16:56:14,622 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:56:14,622 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 37 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 37/37 [00:00<00:00, 190.62 examples/s] | |
INFO 2024-08-17 16:56:14,931 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:56:14,931 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 37 | |
}) | |
INFO 2024-08-17 16:56:24,124 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:56:24,124 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 37 | |
}) | |
INFO 2024-08-17 16:56:40,298 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:56:40,298 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 36 | |
}) | |
INFO 2024-08-17 16:56:53,173 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:56:53,173 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 36 | |
}) | |
INFO 2024-08-17 16:57:12,494 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:57:12,494 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 33 | |
}) | |
INFO 2024-08-17 16:57:17,397 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:57:17,397 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 33 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:00<00:00, 154.49 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:00<00:00, 176.94 examples/s] | |
INFO 2024-08-17 16:57:18,030 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:57:18,030 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 33 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:00<00:00, 81.95 examples/s] | |
INFO 2024-08-17 16:57:18,538 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_word_gen/data_checkpoint_6bbc6c86bb79424d885110e91a9bf987.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 266.07ba/s] | |
INFO 2024-08-17 16:57:18,549 instructlab.sdg:410: Generated 74 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:00<00:00, 149.73 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 228.29ba/s] | |
INFO 2024-08-17 16:57:19,008 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_rhyming_words, generating from scratch | |
INFO 2024-08-17 16:57:19,009 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:57:19,012 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:57:19,012 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:57:30,159 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:57:30,159 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 86 | |
}) | |
INFO 2024-08-17 16:57:35,395 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:57:35,395 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 86 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 86/86 [00:00<00:00, 500.74 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 86/86 [00:00<00:00, 562.89 examples/s] | |
INFO 2024-08-17 16:57:35,950 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:57:35,951 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 85 | |
}) | |
INFO 2024-08-17 16:57:39,994 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:57:39,994 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 84 | |
}) | |
INFO 2024-08-17 16:57:47,278 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:57:47,279 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 84 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [00:00<00:00, 506.28 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [00:00<00:00, 543.97 examples/s] | |
INFO 2024-08-17 16:57:47,825 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:57:47,825 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 83 | |
}) | |
INFO 2024-08-17 16:57:48,073 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:57:48,073 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 83 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 83/83 [00:00<00:00, 422.42 examples/s] | |
INFO 2024-08-17 16:57:48,384 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:57:48,384 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 83 | |
}) | |
INFO 2024-08-17 16:58:01,329 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 16:58:01,329 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 81 | |
}) | |
INFO 2024-08-17 16:58:24,756 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 16:58:24,756 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 72 | |
}) | |
INFO 2024-08-17 16:58:44,092 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 16:58:44,092 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 72 | |
}) | |
INFO 2024-08-17 16:59:09,936 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 16:59:09,936 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 70 | |
}) | |
INFO 2024-08-17 16:59:16,924 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 16:59:16,924 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 70 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 70/70 [00:00<00:00, 313.86 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 70/70 [00:00<00:00, 397.91 examples/s] | |
INFO 2024-08-17 16:59:17,560 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 16:59:17,560 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 67 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 67/67 [00:00<00:00, 117.51 examples/s] | |
INFO 2024-08-17 16:59:18,251 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_rhyming_words/data_checkpoint_2352b5962a5147b6b582ba51a9377926.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 191.46ba/s] | |
INFO 2024-08-17 16:59:18,265 instructlab.sdg:410: Generated 75 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 67/67 [00:00<00:00, 292.33 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 165.49ba/s] | |
INFO 2024-08-17 16:59:18,691 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_reversing_string, generating from scratch | |
INFO 2024-08-17 16:59:18,691 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 16:59:18,694 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 16:59:18,694 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 16:59:34,246 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 16:59:34,246 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 85 | |
}) | |
INFO 2024-08-17 16:59:39,761 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 16:59:39,761 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 85 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 503.87 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 553.40 examples/s] | |
INFO 2024-08-17 16:59:40,314 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 16:59:40,314 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 85 | |
}) | |
INFO 2024-08-17 16:59:43,743 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 16:59:43,743 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 85 | |
}) | |
INFO 2024-08-17 16:59:52,419 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 16:59:52,420 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 85 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 481.26 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 566.09 examples/s] | |
INFO 2024-08-17 16:59:52,961 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 16:59:52,961 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 81 | |
}) | |
INFO 2024-08-17 16:59:53,261 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 16:59:53,262 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 81 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 81/81 [00:00<00:00, 409.78 examples/s] | |
INFO 2024-08-17 16:59:53,577 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 16:59:53,577 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 81 | |
}) | |
INFO 2024-08-17 17:00:05,631 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:00:05,631 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 79 | |
}) | |
INFO 2024-08-17 17:00:22,777 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:00:22,777 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 74 | |
}) | |
INFO 2024-08-17 17:00:41,452 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:00:41,453 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 73 | |
}) | |
INFO 2024-08-17 17:01:00,514 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:01:00,514 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 73 | |
}) | |
INFO 2024-08-17 17:01:07,386 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:01:07,386 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 73 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 73/73 [00:00<00:00, 346.16 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 73/73 [00:00<00:00, 415.19 examples/s] | |
INFO 2024-08-17 17:01:08,010 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:01:08,010 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 64 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [00:00<00:00, 154.88 examples/s] | |
INFO 2024-08-17 17:01:08,544 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_reversing_string/data_checkpoint_c19eb291a0d34a11a1fd3baed2e15fec.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 205.38ba/s] | |
INFO 2024-08-17 17:01:08,557 instructlab.sdg:410: Generated 76 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 64/64 [00:00<00:00, 280.94 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 186.13ba/s] | |
INFO 2024-08-17 17:01:09,030 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_classification_agent_classification, generating from scratch | |
INFO 2024-08-17 17:01:09,030 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:01:09,033 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 17:01:09,033 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:01:40,898 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 17:01:40,898 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 31 | |
}) | |
INFO 2024-08-17 17:01:46,919 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 17:01:46,919 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 33 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:00<00:00, 187.51 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 33/33 [00:00<00:00, 216.39 examples/s] | |
INFO 2024-08-17 17:01:47,474 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 17:01:47,474 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:01:49,686 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 17:01:49,686 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:01:53,495 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 17:01:53,495 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 20 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 122.04 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 133.19 examples/s] | |
INFO 2024-08-17 17:01:54,034 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:01:54,034 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:01:54,259 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:01:54,259 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 20 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 103.92 examples/s] | |
INFO 2024-08-17 17:01:54,569 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:01:54,569 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:02:01,040 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:02:01,040 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:02:12,653 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:02:12,653 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:02:21,520 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:02:21,521 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:02:31,336 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:02:31,336 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:02:34,588 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:02:34,588 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 20 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 97.24 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 112.16 examples/s] | |
INFO 2024-08-17 17:02:35,211 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:02:35,211 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 20 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 48.35 examples/s] | |
INFO 2024-08-17 17:02:35,735 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_linguistics_classification_agent_classification/data_checkpoint_fe0031f34c5247fca34c0306ff676567.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 340.12ba/s] | |
INFO 2024-08-17 17:02:35,746 instructlab.sdg:410: Generated 77 samples | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 86.77 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 316.00ba/s] | |
INFO 2024-08-17 17:02:36,157 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_science_geography, generating from scratch | |
INFO 2024-08-17 17:02:36,158 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:02:36,161 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 17:02:36,161 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:02:47,531 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 17:02:47,531 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 69 | |
}) | |
INFO 2024-08-17 17:02:53,145 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 17:02:53,145 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 70 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 70/70 [00:00<00:00, 406.29 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 70/70 [00:00<00:00, 458.04 examples/s] | |
INFO 2024-08-17 17:02:53,685 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 17:02:53,685 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 67 | |
}) | |
INFO 2024-08-17 17:02:58,962 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 17:02:58,962 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 67 | |
}) | |
INFO 2024-08-17 17:03:06,841 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 17:03:06,841 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 67 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 67/67 [00:00<00:00, 393.84 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 67/67 [00:00<00:00, 434.87 examples/s] | |
INFO 2024-08-17 17:03:07,387 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:03:07,387 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 67 | |
}) | |
INFO 2024-08-17 17:03:07,604 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:03:07,604 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 67 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 67/67 [00:00<00:00, 351.43 examples/s] | |
INFO 2024-08-17 17:03:07,907 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:03:07,907 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 67 | |
}) | |
INFO 2024-08-17 17:03:18,565 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:03:18,565 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 65 | |
}) | |
INFO 2024-08-17 17:03:36,629 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:03:36,629 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 59 | |
}) | |
INFO 2024-08-17 17:03:52,072 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:03:52,072 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 59 | |
}) | |
INFO 2024-08-17 17:04:08,108 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:04:08,108 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 17:04:14,108 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:04:14,109 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 59 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 59/59 [00:00<00:00, 281.69 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 59/59 [00:00<00:00, 333.18 examples/s] | |
INFO 2024-08-17 17:04:14,724 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:04:14,724 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 58 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 58/58 [00:00<00:00, 138.82 examples/s] | |
INFO 2024-08-17 17:04:15,263 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_science_geography/data_checkpoint_2da311e834fa44008dfe1618c9f46585.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 207.87ba/s] | |
INFO 2024-08-17 17:04:15,276 instructlab.sdg:410: Generated 78 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 58/58 [00:00<00:00, 256.90 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 192.48ba/s] | |
INFO 2024-08-17 17:04:15,704 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_science_units_conversion_temperature_conversion, generating from scratch | |
INFO 2024-08-17 17:04:15,704 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:04:15,707 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 17:04:15,708 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:04:31,635 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 17:04:31,635 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 84 | |
}) | |
INFO 2024-08-17 17:04:37,812 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 17:04:37,812 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 84 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [00:00<00:00, 499.90 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [00:00<00:00, 559.87 examples/s] | |
INFO 2024-08-17 17:04:38,355 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 17:04:38,355 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 80 | |
}) | |
INFO 2024-08-17 17:04:44,663 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 17:04:44,663 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 80 | |
}) | |
INFO 2024-08-17 17:04:53,262 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 17:04:53,262 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 80 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:00<00:00, 409.30 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:00<00:00, 525.77 examples/s] | |
INFO 2024-08-17 17:04:53,845 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:04:53,845 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 80 | |
}) | |
INFO 2024-08-17 17:04:54,144 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:04:54,144 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 80 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:00<00:00, 396.28 examples/s] | |
INFO 2024-08-17 17:04:54,461 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:04:54,461 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 80 | |
}) | |
INFO 2024-08-17 17:05:06,752 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:05:06,753 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 78 | |
}) | |
INFO 2024-08-17 17:05:24,685 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:05:24,685 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 71 | |
}) | |
INFO 2024-08-17 17:05:44,732 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:05:44,732 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 70 | |
}) | |
INFO 2024-08-17 17:06:06,401 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:06:06,401 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 61 | |
}) | |
INFO 2024-08-17 17:06:13,198 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:06:13,198 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 61 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 266.16 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 61/61 [00:00<00:00, 346.23 examples/s] | |
INFO 2024-08-17 17:06:13,840 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:06:13,840 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 58 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 58/58 [00:00<00:00, 137.85 examples/s] | |
INFO 2024-08-17 17:06:14,380 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_science_units_conversion_temperature_conversion/data_checkpoint_8fa9b5aa104145319c56fd5e6d55e514.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 202.59ba/s] | |
INFO 2024-08-17 17:06:14,392 instructlab.sdg:410: Generated 79 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 58/58 [00:00<00:00, 261.49 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 182.46ba/s] | |
INFO 2024-08-17 17:06:14,877 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_science_units_conversion_distance_conversion, generating from scratch | |
INFO 2024-08-17 17:06:14,877 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:06:14,880 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 17:06:14,880 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:06:27,181 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 17:06:27,181 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 81 | |
}) | |
INFO 2024-08-17 17:06:33,983 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 17:06:33,983 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 82 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 82/82 [00:00<00:00, 468.69 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 82/82 [00:00<00:00, 541.83 examples/s] | |
INFO 2024-08-17 17:06:34,545 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 17:06:34,546 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 76 | |
}) | |
INFO 2024-08-17 17:06:39,949 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 17:06:39,949 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 76 | |
}) | |
INFO 2024-08-17 17:06:47,841 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 17:06:47,841 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 76 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 432.49 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 495.27 examples/s] | |
INFO 2024-08-17 17:06:48,399 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:06:48,400 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 76 | |
}) | |
INFO 2024-08-17 17:06:48,646 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:06:48,646 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 76 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 373.48 examples/s] | |
INFO 2024-08-17 17:06:48,962 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:06:48,963 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 76 | |
}) | |
INFO 2024-08-17 17:07:00,937 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:07:00,937 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 76 | |
}) | |
INFO 2024-08-17 17:07:19,656 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:07:19,657 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 72 | |
}) | |
INFO 2024-08-17 17:07:39,711 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:07:39,711 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 72 | |
}) | |
INFO 2024-08-17 17:07:59,025 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:07:59,025 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 71 | |
}) | |
INFO 2024-08-17 17:08:05,872 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:08:05,872 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 71 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 71/71 [00:00<00:00, 321.11 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 71/71 [00:00<00:00, 404.66 examples/s] | |
INFO 2024-08-17 17:08:06,518 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:08:06,518 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 71 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 71/71 [00:00<00:00, 166.35 examples/s] | |
INFO 2024-08-17 17:08:07,065 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_science_units_conversion_distance_conversion/data_checkpoint_7dd4b1a946f740148ed46a8bd4d023e8.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 161.17ba/s] | |
INFO 2024-08-17 17:08:07,079 instructlab.sdg:410: Generated 80 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 71/71 [00:00<00:00, 311.29 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 146.48ba/s] | |
INFO 2024-08-17 17:08:07,505 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_pattern_recognition, generating from scratch | |
INFO 2024-08-17 17:08:07,505 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:08:07,508 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 17:08:07,508 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:08:30,676 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 17:08:30,676 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 89 | |
}) | |
INFO 2024-08-17 17:08:36,502 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 17:08:36,502 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 89 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 89/89 [00:00<00:00, 494.61 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 89/89 [00:00<00:00, 574.84 examples/s] | |
INFO 2024-08-17 17:08:37,060 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 17:08:37,061 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 89 | |
}) | |
INFO 2024-08-17 17:09:26,565 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 17:09:26,565 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 81 | |
}) | |
INFO 2024-08-17 17:09:35,732 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 17:09:35,732 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 80 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:00<00:00, 429.62 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 80/80 [00:00<00:00, 525.55 examples/s] | |
INFO 2024-08-17 17:09:36,318 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:09:36,318 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 79 | |
}) | |
INFO 2024-08-17 17:09:36,698 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:09:36,698 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 79 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 79/79 [00:00<00:00, 402.35 examples/s] | |
INFO 2024-08-17 17:09:37,007 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:09:37,007 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 79 | |
}) | |
INFO 2024-08-17 17:09:48,888 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:09:48,888 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 79 | |
}) | |
INFO 2024-08-17 17:10:05,426 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:10:05,426 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 64 | |
}) | |
INFO 2024-08-17 17:10:22,315 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:10:22,315 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 17:10:38,719 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:10:38,719 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 58 | |
}) | |
INFO 2024-08-17 17:10:44,887 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:10:44,887 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 56 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 56/56 [00:00<00:00, 268.45 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 56/56 [00:00<00:00, 313.30 examples/s] | |
INFO 2024-08-17 17:10:45,517 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:10:45,518 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 56 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 56/56 [00:00<00:00, 137.04 examples/s] | |
INFO 2024-08-17 17:10:46,045 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_pattern_recognition/data_checkpoint_ee3e51365a8a4f518f511c1fb4fab57e.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 201.56ba/s] | |
INFO 2024-08-17 17:10:46,059 instructlab.sdg:410: Generated 81 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 56/56 [00:00<00:00, 241.29 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 186.02ba/s] | |
INFO 2024-08-17 17:10:46,486 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_area, generating from scratch | |
INFO 2024-08-17 17:10:46,486 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:10:46,489 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 17:10:46,489 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:11:02,942 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 17:11:02,942 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 88 | |
}) | |
INFO 2024-08-17 17:11:11,941 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 17:11:11,941 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 95 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 95/95 [00:00<00:00, 538.01 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 95/95 [00:00<00:00, 628.36 examples/s] | |
INFO 2024-08-17 17:11:12,495 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 17:11:12,495 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 17:11:19,729 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 17:11:19,729 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 17:11:26,352 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 17:11:26,353 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 60 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 327.09 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 395.29 examples/s] | |
INFO 2024-08-17 17:11:26,909 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:11:26,909 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 17:11:27,149 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:11:27,149 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 60 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [00:00<00:00, 282.20 examples/s] | |
INFO 2024-08-17 17:11:27,477 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:11:27,477 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 17:11:38,321 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:11:38,321 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 17:11:57,533 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:11:57,533 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 17:12:17,758 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:12:17,759 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 17:12:39,745 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:12:39,745 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 60 | |
}) | |
INFO 2024-08-17 17:12:47,100 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:12:47,100 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 57 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 57/57 [00:00<00:00, 250.28 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 57/57 [00:00<00:00, 325.06 examples/s] | |
INFO 2024-08-17 17:12:47,726 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:12:47,726 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 57 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 57/57 [00:00<00:00, 101.38 examples/s] | |
INFO 2024-08-17 17:12:48,415 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_area/data_checkpoint_1936e95ebd4b4d5ebc073acd6b4519b7.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 163.99ba/s] | |
INFO 2024-08-17 17:12:48,429 instructlab.sdg:410: Generated 82 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 57/57 [00:00<00:00, 253.95 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 139.69ba/s] | |
INFO 2024-08-17 17:12:48,841 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_mensurational, generating from scratch | |
INFO 2024-08-17 17:12:48,841 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:12:48,844 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 17:12:48,844 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:13:10,137 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 17:13:10,137 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 88 | |
}) | |
INFO 2024-08-17 17:13:17,298 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 17:13:17,298 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 88 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 88/88 [00:00<00:00, 520.81 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 88/88 [00:00<00:00, 606.86 examples/s] | |
INFO 2024-08-17 17:13:17,823 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 17:13:17,823 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 80 | |
}) | |
INFO 2024-08-17 17:13:30,355 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 17:13:30,355 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 76 | |
}) | |
INFO 2024-08-17 17:13:39,065 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 17:13:39,065 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 76 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 445.64 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 509.97 examples/s] | |
INFO 2024-08-17 17:13:39,602 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:13:39,603 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 76 | |
}) | |
INFO 2024-08-17 17:13:39,964 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:13:39,964 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 76 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 76/76 [00:00<00:00, 393.50 examples/s] | |
INFO 2024-08-17 17:13:40,268 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:13:40,268 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 76 | |
}) | |
INFO 2024-08-17 17:13:53,703 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:13:53,703 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 76 | |
}) | |
INFO 2024-08-17 17:14:15,061 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:14:15,061 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 75 | |
}) | |
INFO 2024-08-17 17:14:40,390 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:14:40,390 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 75 | |
}) | |
INFO 2024-08-17 17:15:10,692 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:15:10,693 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 70 | |
}) | |
INFO 2024-08-17 17:15:18,823 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:15:18,823 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 70 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 70/70 [00:00<00:00, 321.96 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 70/70 [00:00<00:00, 399.93 examples/s] | |
INFO 2024-08-17 17:15:19,443 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:15:19,443 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 70 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 70/70 [00:00<00:00, 166.18 examples/s] | |
INFO 2024-08-17 17:15:19,974 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_mensurational/data_checkpoint_ef922b25cf0247cd9a1982e2a639b7f1.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 150.37ba/s] | |
INFO 2024-08-17 17:15:19,988 instructlab.sdg:410: Generated 83 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 70/70 [00:00<00:00, 312.72 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 131.95ba/s] | |
INFO 2024-08-17 17:15:20,403 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_arithmetic_reasoning, generating from scratch | |
INFO 2024-08-17 17:15:20,403 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:15:20,406 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 17:15:20,406 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:15:46,724 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 17:15:46,724 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 90 | |
}) | |
INFO 2024-08-17 17:15:52,150 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 17:15:52,150 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 90 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:00<00:00, 500.95 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:00<00:00, 574.57 examples/s] | |
INFO 2024-08-17 17:15:52,715 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 17:15:52,715 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 90 | |
}) | |
INFO 2024-08-17 17:16:04,821 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 17:16:04,821 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 86 | |
}) | |
INFO 2024-08-17 17:16:13,435 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 17:16:13,436 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 85 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 477.55 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 549.87 examples/s] | |
INFO 2024-08-17 17:16:13,990 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:16:13,990 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 85 | |
}) | |
INFO 2024-08-17 17:16:14,442 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:16:14,442 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 85 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 414.66 examples/s] | |
INFO 2024-08-17 17:16:14,764 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:16:14,764 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 85 | |
}) | |
INFO 2024-08-17 17:16:28,067 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:16:28,067 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 84 | |
}) | |
INFO 2024-08-17 17:16:51,177 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:16:51,177 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 83 | |
}) | |
INFO 2024-08-17 17:17:20,443 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:17:20,443 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 83 | |
}) | |
INFO 2024-08-17 17:17:49,452 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:17:49,452 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 82 | |
}) | |
INFO 2024-08-17 17:17:58,845 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:17:58,845 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 79 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 79/79 [00:00<00:00, 351.44 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 79/79 [00:00<00:00, 433.27 examples/s] | |
INFO 2024-08-17 17:17:59,509 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:17:59,509 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 77 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 77/77 [00:00<00:00, 171.03 examples/s] | |
INFO 2024-08-17 17:18:00,094 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_arithmetic_reasoning/data_checkpoint_d37ab76f789242d5a6d12293e46fcf7c.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 130.00ba/s] | |
INFO 2024-08-17 17:18:00,110 instructlab.sdg:410: Generated 84 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 77/77 [00:00<00:00, 318.82 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 117.83ba/s] | |
INFO 2024-08-17 17:18:00,540 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_time_series, generating from scratch | |
INFO 2024-08-17 17:18:00,541 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:18:00,544 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 17:18:00,544 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:18:51,590 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 17:18:51,590 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:18:59,005 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 17:18:59,005 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 167.94 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 191.47 examples/s] | |
INFO 2024-08-17 17:18:59,567 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 17:18:59,567 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:19:13,129 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 17:19:13,129 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:19:14,697 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 17:19:14,697 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 1 | |
}) | |
num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
WARNING 2024-08-17 17:19:14,698 datasets.arrow_dataset:3092: num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
Map: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 276.69 examples/s] | |
num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
WARNING 2024-08-17 17:19:14,705 datasets.arrow_dataset:3092: num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
Filter: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 499.08 examples/s] | |
INFO 2024-08-17 17:19:14,709 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:19:14,709 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:19:14,806 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:19:14,806 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 1 | |
}) | |
num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
WARNING 2024-08-17 17:19:14,806 datasets.arrow_dataset:3092: num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
Map: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 243.27 examples/s] | |
INFO 2024-08-17 17:19:14,815 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:19:14,815 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:19:17,410 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:19:17,410 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:19:20,678 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:19:20,679 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:19:23,480 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:19:23,480 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:19:35,585 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:19:35,585 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:19:37,049 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:19:37,050 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 1 | |
}) | |
num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
WARNING 2024-08-17 17:19:37,050 datasets.arrow_dataset:3092: num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
Map: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 216.08 examples/s] | |
num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
WARNING 2024-08-17 17:19:37,058 datasets.arrow_dataset:3092: num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
Filter: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 449.21 examples/s] | |
INFO 2024-08-17 17:19:37,061 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:19:37,061 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 1 | |
}) | |
num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
WARNING 2024-08-17 17:19:37,061 datasets.arrow_dataset:3092: num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
Map: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 221.77 examples/s] | |
INFO 2024-08-17 17:19:37,068 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_time_series/data_checkpoint_a56b680912ff44bbaed85267641dec30.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 524.03ba/s] | |
INFO 2024-08-17 17:19:37,078 instructlab.sdg:410: Generated 85 samples | |
num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
WARNING 2024-08-17 17:19:37,078 datasets.arrow_dataset:3092: num_proc must be <= 1. Reducing num_proc to 1 for dataset of size 1. | |
Map: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 180.18 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 814.74ba/s] | |
INFO 2024-08-17 17:19:37,158 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_distance_conversion, generating from scratch | |
INFO 2024-08-17 17:19:37,158 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:19:37,161 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 17:19:37,161 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 6 | |
}) | |
INFO 2024-08-17 17:19:50,849 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 17:19:50,849 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 155 | |
}) | |
INFO 2024-08-17 17:19:58,879 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 17:19:58,879 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 155 | |
}) | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 155/155 [00:00<00:00, 844.66 examples/s] | |
Filter (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 155/155 [00:00<00:00, 987.97 examples/s] | |
INFO 2024-08-17 17:19:59,445 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 17:19:59,445 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 140 | |
}) | |
INFO 2024-08-17 17:20:09,757 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 17:20:09,757 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 133 | |
}) | |
INFO 2024-08-17 17:20:21,726 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 17:20:21,726 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 133 | |
}) | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 133/133 [00:00<00:00, 729.99 examples/s] | |
Filter (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 133/133 [00:00<00:00, 784.74 examples/s] | |
INFO 2024-08-17 17:20:22,303 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:20:22,303 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 133 | |
}) | |
INFO 2024-08-17 17:20:22,666 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:20:22,666 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 133 | |
}) | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 133/133 [00:00<00:00, 651.57 examples/s] | |
INFO 2024-08-17 17:20:22,986 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:20:22,986 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 133 | |
}) | |
INFO 2024-08-17 17:20:40,376 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:20:40,376 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 131 | |
}) | |
INFO 2024-08-17 17:21:09,035 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:21:09,035 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 127 | |
}) | |
INFO 2024-08-17 17:21:38,958 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:21:38,958 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 124 | |
}) | |
INFO 2024-08-17 17:22:11,402 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:22:11,402 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 123 | |
}) | |
INFO 2024-08-17 17:22:23,236 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:22:23,236 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 123 | |
}) | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 123/123 [00:00<00:00, 535.41 examples/s] | |
Filter (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 123/123 [00:00<00:00, 693.11 examples/s] | |
INFO 2024-08-17 17:22:23,873 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:22:23,873 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 121 | |
}) | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 121/121 [00:00<00:00, 273.65 examples/s] | |
INFO 2024-08-17 17:22:24,427 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_distance_conversion/data_checkpoint_2834cf2ed8af4958a8335d4246941890.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 104.09ba/s] | |
INFO 2024-08-17 17:22:24,444 instructlab.sdg:410: Generated 86 samples | |
Map (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 121/121 [00:00<00:00, 525.71 examples/s] | |
Creating json from Arrow format: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 94.34ba/s] | |
INFO 2024-08-17 17:22:24,875 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_arithmetic_w_grammar, generating from scratch | |
INFO 2024-08-17 17:22:24,875 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:22:24,878 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 17:22:24,878 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:22:44,726 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 17:22:44,726 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 90 | |
}) | |
INFO 2024-08-17 17:22:50,532 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 17:22:50,532 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 90 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:00<00:00, 514.98 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 90/90 [00:00<00:00, 594.48 examples/s] | |
INFO 2024-08-17 17:22:51,079 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 17:22:51,079 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 90 | |
}) | |
INFO 2024-08-17 17:22:58,088 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 17:22:58,088 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 87 | |
}) | |
INFO 2024-08-17 17:23:07,111 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 17:23:07,111 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 85 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 502.39 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 85/85 [00:00<00:00, 577.87 examples/s] | |
INFO 2024-08-17 17:23:07,641 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:23:07,641 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 84 | |
}) | |
INFO 2024-08-17 17:23:07,998 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:23:07,999 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 84 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 84/84 [00:00<00:00, 411.21 examples/s] | |
INFO 2024-08-17 17:23:08,317 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:23:08,317 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 84 | |
}) | |
INFO 2024-08-17 17:23:21,115 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:23:21,115 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 84 | |
}) | |
INFO 2024-08-17 17:23:43,053 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:23:43,053 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 81 | |
}) | |
INFO 2024-08-17 17:24:06,847 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:24:06,847 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 81 | |
}) | |
INFO 2024-08-17 17:24:29,793 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:24:29,793 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 74 | |
}) | |
INFO 2024-08-17 17:24:37,403 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:24:37,404 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 69 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 69/69 [00:00<00:00, 296.49 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 69/69 [00:00<00:00, 393.46 examples/s] | |
INFO 2024-08-17 17:24:38,042 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:24:38,043 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 66 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 66/66 [00:00<00:00, 152.99 examples/s] | |
INFO 2024-08-17 17:24:38,583 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_arithmetic_w_grammar/data_checkpoint_7b0d14e86ef84942bda0eaea56d338f4.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 189.39ba/s] | |
INFO 2024-08-17 17:24:38,596 instructlab.sdg:410: Generated 87 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 66/66 [00:00<00:00, 297.10 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 167.83ba/s] | |
INFO 2024-08-17 17:24:39,009 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_reasoning, generating from scratch | |
INFO 2024-08-17 17:24:39,009 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:24:39,012 instructlab.sdg.pipeline:197: Running block: gen_questions | |
INFO 2024-08-17 17:24:39,012 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:25:17,930 instructlab.sdg.pipeline:197: Running block: eval_questions | |
INFO 2024-08-17 17:25:17,931 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question'], | |
num_rows: 98 | |
}) | |
INFO 2024-08-17 17:25:23,627 instructlab.sdg.pipeline:197: Running block: filter_questions | |
INFO 2024-08-17 17:25:23,627 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 98 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 98/98 [00:00<00:00, 570.43 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 98/98 [00:00<00:00, 664.87 examples/s] | |
INFO 2024-08-17 17:25:24,159 instructlab.sdg.pipeline:197: Running block: gen_responses | |
INFO 2024-08-17 17:25:24,159 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question'], | |
num_rows: 97 | |
}) | |
INFO 2024-08-17 17:25:33,383 instructlab.sdg.pipeline:197: Running block: evaluate_qa_pair | |
INFO 2024-08-17 17:25:33,383 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 94 | |
}) | |
INFO 2024-08-17 17:25:42,756 instructlab.sdg.pipeline:197: Running block: filter_qa_pair | |
INFO 2024-08-17 17:25:42,756 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 94 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 94/94 [00:00<00:00, 540.54 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 94/94 [00:00<00:00, 630.31 examples/s] | |
INFO 2024-08-17 17:25:43,298 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:25:43,298 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response'], | |
num_rows: 94 | |
}) | |
INFO 2024-08-17 17:25:43,805 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:25:43,806 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route'], | |
num_rows: 94 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 94/94 [00:00<00:00, 459.57 examples/s] | |
INFO 2024-08-17 17:25:44,129 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:25:44,130 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 94 | |
}) | |
INFO 2024-08-17 17:25:58,334 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:25:58,334 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 94 | |
}) | |
INFO 2024-08-17 17:26:22,062 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:26:22,062 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 87 | |
}) | |
INFO 2024-08-17 17:26:49,933 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:26:49,933 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 87 | |
}) | |
INFO 2024-08-17 17:27:18,884 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:27:18,885 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 80 | |
}) | |
INFO 2024-08-17 17:27:27,643 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:27:27,643 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 78 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 78/78 [00:00<00:00, 359.96 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 78/78 [00:00<00:00, 453.71 examples/s] | |
INFO 2024-08-17 17:27:28,283 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:27:28,284 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_question', 'seed_response', 'question', 'response', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 77 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 77/77 [00:00<00:00, 182.86 examples/s] | |
INFO 2024-08-17 17:27:28,823 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_STEM_math_reasoning/data_checkpoint_cb38d9a26b3047c5b6a79c2de5bd326d.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 150.44ba/s] | |
INFO 2024-08-17 17:27:28,838 instructlab.sdg:410: Generated 88 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 77/77 [00:00<00:00, 339.82 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 129.85ba/s] | |
INFO 2024-08-17 17:27:29,280 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_summarization_wiki_insights_concise, generating from scratch | |
INFO 2024-08-17 17:27:29,280 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:27:29,283 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 17:27:29,283 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:27:37,942 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 17:27:37,942 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 4 | |
}) | |
INFO 2024-08-17 17:27:40,326 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 17:27:40,326 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 6 | |
}) | |
INFO 2024-08-17 17:27:42,758 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 17:27:42,758 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 4 | |
}) | |
num_proc must be <= 4. Reducing num_proc to 4 for dataset of size 4. | |
WARNING 2024-08-17 17:27:42,758 datasets.arrow_dataset:3092: num_proc must be <= 4. Reducing num_proc to 4 for dataset of size 4. | |
Map (num_proc=4): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 27.63 examples/s] | |
num_proc must be <= 4. Reducing num_proc to 4 for dataset of size 4. | |
WARNING 2024-08-17 17:27:42,978 datasets.arrow_dataset:3092: num_proc must be <= 4. Reducing num_proc to 4 for dataset of size 4. | |
Filter (num_proc=4): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 31.73 examples/s] | |
INFO 2024-08-17 17:27:43,179 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 17:27:43,179 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:27:45,575 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 17:27:45,575 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:27:46,947 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 17:27:46,947 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 3 | |
}) | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 17:27:46,947 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Map (num_proc=3): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 20.93 examples/s] | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 17:27:47,161 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Filter (num_proc=3): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 22.55 examples/s] | |
INFO 2024-08-17 17:27:47,361 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 17:27:47,362 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 3 | |
}) | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 17:27:47,362 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Map (num_proc=3): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 18.67 examples/s] | |
INFO 2024-08-17 17:27:47,577 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:27:47,577 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:27:47,727 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:27:47,727 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 3 | |
}) | |
num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
WARNING 2024-08-17 17:27:47,727 datasets.arrow_dataset:3092: num_proc must be <= 3. Reducing num_proc to 3 for dataset of size 3. | |
Map (num_proc=3): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 18.60 examples/s] | |
INFO 2024-08-17 17:27:47,960 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:27:47,960 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 3 | |
}) | |
INFO 2024-08-17 17:27:51,080 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:27:51,080 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 2 | |
}) | |
INFO 2024-08-17 17:27:54,041 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:27:54,042 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 2 | |
}) | |
INFO 2024-08-17 17:27:57,502 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:27:57,502 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 2 | |
}) | |
INFO 2024-08-17 17:27:59,506 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:27:59,506 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 2 | |
}) | |
INFO 2024-08-17 17:28:00,775 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:28:00,775 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 2 | |
}) | |
num_proc must be <= 2. Reducing num_proc to 2 for dataset of size 2. | |
WARNING 2024-08-17 17:28:00,775 datasets.arrow_dataset:3092: num_proc must be <= 2. Reducing num_proc to 2 for dataset of size 2. | |
Map (num_proc=2): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 13.35 examples/s] | |
num_proc must be <= 2. Reducing num_proc to 2 for dataset of size 2. | |
WARNING 2024-08-17 17:28:00,977 datasets.arrow_dataset:3092: num_proc must be <= 2. Reducing num_proc to 2 for dataset of size 2. | |
Filter (num_proc=2): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 16.43 examples/s] | |
INFO 2024-08-17 17:28:01,135 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:28:01,135 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 2 | |
}) | |
num_proc must be <= 2. Reducing num_proc to 2 for dataset of size 2. | |
WARNING 2024-08-17 17:28:01,135 datasets.arrow_dataset:3092: num_proc must be <= 2. Reducing num_proc to 2 for dataset of size 2. | |
Map (num_proc=2): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 12.77 examples/s] | |
INFO 2024-08-17 17:28:01,335 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_summarization_wiki_insights_concise/data_checkpoint_b2124dccf1ec412fa0cf5140fccc1265.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 494.90ba/s] | |
INFO 2024-08-17 17:28:01,346 instructlab.sdg:410: Generated 89 samples | |
num_proc must be <= 2. Reducing num_proc to 2 for dataset of size 2. | |
WARNING 2024-08-17 17:28:01,346 datasets.arrow_dataset:3092: num_proc must be <= 2. Reducing num_proc to 2 for dataset of size 2. | |
Map (num_proc=2): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 12.66 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 497.60ba/s] | |
INFO 2024-08-17 17:28:01,617 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_summarization_wiki_insights_five_point, generating from scratch | |
INFO 2024-08-17 17:28:01,617 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:28:01,620 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 17:28:01,620 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:28:12,496 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 17:28:12,496 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 17:28:16,147 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 17:28:16,148 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 17:28:22,202 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 17:28:22,202 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 30 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 156.92 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 190.85 examples/s] | |
INFO 2024-08-17 17:28:22,778 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 17:28:22,778 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:28:33,371 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 17:28:33,371 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:28:38,660 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 17:28:38,660 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 141.95 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 167.91 examples/s] | |
INFO 2024-08-17 17:28:39,253 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 17:28:39,253 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 81.05 examples/s] | |
INFO 2024-08-17 17:28:39,691 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:28:39,691 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:28:40,749 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:28:40,750 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 118.69 examples/s] | |
INFO 2024-08-17 17:28:41,112 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:28:41,112 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:28:51,758 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:28:51,758 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:29:06,468 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:29:06,469 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:29:18,800 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:29:18,800 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:29:38,383 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:29:38,383 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 25 | |
}) | |
INFO 2024-08-17 17:29:44,249 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:29:44,249 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 25 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 102.87 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 132.83 examples/s] | |
INFO 2024-08-17 17:29:44,915 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:29:44,915 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 25 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 38.73 examples/s] | |
INFO 2024-08-17 17:29:45,671 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_summarization_wiki_insights_five_point/data_checkpoint_db1fa0cf5a1747c18bff1fdd9313d95f.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 149.78ba/s] | |
INFO 2024-08-17 17:29:45,686 instructlab.sdg:410: Generated 90 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 103.07 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 141.14ba/s] | |
INFO 2024-08-17 17:29:46,133 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_summarization_wiki_insights_detailed, generating from scratch | |
INFO 2024-08-17 17:29:46,134 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:29:46,137 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 17:29:46,137 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:29:57,782 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 17:29:57,782 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 17:30:01,974 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 17:30:01,974 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 17:30:07,616 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 17:30:07,616 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 30 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 156.33 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 190.66 examples/s] | |
INFO 2024-08-17 17:30:08,182 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 17:30:08,182 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:30:22,536 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 17:30:22,536 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 17:30:27,579 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 17:30:27,579 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 125.16 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 150.14 examples/s] | |
INFO 2024-08-17 17:30:28,141 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 17:30:28,141 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 71.47 examples/s] | |
INFO 2024-08-17 17:30:28,590 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:30:28,591 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 17:30:29,682 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:30:29,682 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 105.70 examples/s] | |
INFO 2024-08-17 17:30:30,026 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:30:30,026 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 17:30:40,096 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:30:40,096 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 24 | |
}) | |
INFO 2024-08-17 17:30:53,006 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:30:53,006 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 17:31:03,509 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:31:03,509 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 17:31:22,871 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:31:22,871 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 21 | |
}) | |
INFO 2024-08-17 17:31:28,402 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:31:28,403 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 21 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 87.75 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 111.20 examples/s] | |
INFO 2024-08-17 17:31:29,078 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:31:29,078 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 21 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 41.20 examples/s] | |
INFO 2024-08-17 17:31:29,701 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_summarization_wiki_insights_detailed/data_checkpoint_5487e458ceeb4614b6513f289b06460a.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 193.89ba/s] | |
INFO 2024-08-17 17:31:29,714 instructlab.sdg:410: Generated 91 samples | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 87.57 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 163.85ba/s] | |
INFO 2024-08-17 17:31:30,143 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_summarization_wiki_insights_high_level_outline, generating from scratch | |
INFO 2024-08-17 17:31:30,144 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:31:30,147 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 17:31:30,147 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:31:37,301 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 17:31:37,301 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 17:31:41,707 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 17:31:41,707 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 25 | |
}) | |
INFO 2024-08-17 17:31:46,754 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 17:31:46,754 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 25 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 136.72 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 159.13 examples/s] | |
INFO 2024-08-17 17:31:47,309 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 17:31:47,309 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 21 | |
}) | |
INFO 2024-08-17 17:31:56,951 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 17:31:56,951 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:32:01,236 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 17:32:01,236 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 20 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 103.67 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 122.10 examples/s] | |
INFO 2024-08-17 17:32:01,827 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 17:32:01,827 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 20 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 58.60 examples/s] | |
INFO 2024-08-17 17:32:02,294 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:32:02,294 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:32:02,980 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:32:02,980 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 20 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 93.77 examples/s] | |
INFO 2024-08-17 17:32:03,317 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:32:03,317 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:32:12,823 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:32:12,823 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:32:26,033 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:32:26,033 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:32:36,394 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:32:36,394 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:32:54,924 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:32:54,924 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 18 | |
}) | |
INFO 2024-08-17 17:33:00,404 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:33:00,405 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 18 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 78.59 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 94.72 examples/s] | |
INFO 2024-08-17 17:33:01,062 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:33:01,063 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 18 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 36.07 examples/s] | |
INFO 2024-08-17 17:33:01,680 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_summarization_wiki_insights_high_level_outline/data_checkpoint_14d9135d768a4c0a8bf5b98c23848017.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 204.95ba/s] | |
INFO 2024-08-17 17:33:01,693 instructlab.sdg:410: Generated 92 samples | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 71.48 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 168.45ba/s] | |
INFO 2024-08-17 17:33:02,155 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_summarization_wiki_insights_one_line, generating from scratch | |
INFO 2024-08-17 17:33:02,155 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:33:02,159 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 17:33:02,159 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:33:18,941 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 17:33:18,941 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 17:33:23,952 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 17:33:23,952 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 17:33:29,914 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 17:33:29,914 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 30 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 155.61 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 189.08 examples/s] | |
INFO 2024-08-17 17:33:30,482 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 17:33:30,482 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:33:37,833 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 17:33:37,833 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 17:33:42,416 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 17:33:42,416 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 23 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 122.66 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 144.38 examples/s] | |
INFO 2024-08-17 17:33:42,973 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 17:33:42,973 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 23 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 69.63 examples/s] | |
INFO 2024-08-17 17:33:43,423 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:33:43,423 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 17:33:44,785 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:33:44,785 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 23 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 100.13 examples/s] | |
INFO 2024-08-17 17:33:45,132 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:33:45,132 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 17:33:54,860 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:33:54,860 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 23 | |
}) | |
INFO 2024-08-17 17:34:06,839 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:34:06,839 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 21 | |
}) | |
INFO 2024-08-17 17:34:17,910 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:34:17,910 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 21 | |
}) | |
INFO 2024-08-17 17:34:30,693 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:34:30,693 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 20 | |
}) | |
INFO 2024-08-17 17:34:35,783 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:34:35,783 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 20 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 83.91 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:00<00:00, 104.28 examples/s] | |
INFO 2024-08-17 17:34:36,461 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:34:36,461 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 19 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 30.66 examples/s] | |
INFO 2024-08-17 17:34:37,193 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_summarization_wiki_insights_one_line/data_checkpoint_53c604d7baa4476a875d770b447bbdc7.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 188.70ba/s] | |
INFO 2024-08-17 17:34:37,207 instructlab.sdg:410: Generated 93 samples | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 19/19 [00:00<00:00, 77.08 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 166.69ba/s] | |
INFO 2024-08-17 17:34:37,647 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_meeting_insights_executive_summaries, generating from scratch | |
INFO 2024-08-17 17:34:37,647 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:34:37,650 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 17:34:37,650 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:34:48,191 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 17:34:48,191 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 17:34:51,185 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 17:34:51,185 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 17:34:56,495 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 17:34:56,496 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 30 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 151.29 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 187.83 examples/s] | |
INFO 2024-08-17 17:34:57,086 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 17:34:57,086 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 17:35:03,476 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 17:35:03,477 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 17:35:08,581 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 17:35:08,581 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 153.22 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 178.49 examples/s] | |
INFO 2024-08-17 17:35:09,161 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 17:35:09,162 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 84.92 examples/s] | |
INFO 2024-08-17 17:35:09,627 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:35:09,627 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 17:35:10,858 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:35:10,858 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 122.24 examples/s] | |
INFO 2024-08-17 17:35:11,221 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:35:11,221 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 17:35:20,856 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:35:20,856 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:35:35,415 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:35:35,415 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 17:35:48,282 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:35:48,282 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 17:36:03,964 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:36:03,964 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 25 | |
}) | |
INFO 2024-08-17 17:36:09,205 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:36:09,205 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 25 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 99.12 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 128.12 examples/s] | |
INFO 2024-08-17 17:36:09,903 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:36:09,903 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 25 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 46.95 examples/s] | |
INFO 2024-08-17 17:36:10,560 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_meeting_insights_executive_summaries/data_checkpoint_e82e6118f1644b66a84fe33367dad051.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 134.47ba/s] | |
INFO 2024-08-17 17:36:10,577 instructlab.sdg:410: Generated 94 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 100.38 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 130.87ba/s] | |
INFO 2024-08-17 17:36:11,060 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_meeting_insights_corporate_email, generating from scratch | |
INFO 2024-08-17 17:36:11,060 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:36:11,064 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 17:36:11,064 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:36:20,471 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 17:36:20,471 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 17:36:23,584 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 17:36:23,584 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 17:36:32,104 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 17:36:32,104 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 30 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 156.29 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 182.32 examples/s] | |
INFO 2024-08-17 17:36:32,693 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 17:36:32,693 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 14 | |
}) | |
INFO 2024-08-17 17:36:45,506 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 17:36:45,506 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 11 | |
}) | |
INFO 2024-08-17 17:36:49,216 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 17:36:49,216 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 11 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 58.25 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [00:00<00:00, 68.17 examples/s] | |
INFO 2024-08-17 17:36:49,801 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 17:36:49,801 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 10 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 30.54 examples/s] | |
INFO 2024-08-17 17:36:50,245 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:36:50,245 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 17:36:50,693 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:36:50,693 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 10 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 45.85 examples/s] | |
INFO 2024-08-17 17:36:51,040 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:36:51,040 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 17:36:58,319 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:36:58,319 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 17:37:07,653 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:37:07,653 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 17:37:16,790 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:37:16,790 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 17:37:31,215 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:37:31,216 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 17:37:34,459 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:37:34,460 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 8 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 35.04 examples/s] | |
Filter (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 41.18 examples/s] | |
INFO 2024-08-17 17:37:35,130 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:37:35,130 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 8 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 15.77 examples/s] | |
INFO 2024-08-17 17:37:35,769 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_meeting_insights_corporate_email/data_checkpoint_9f9f021e3ea741c0a5608d119a960fc8.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 283.30ba/s] | |
INFO 2024-08-17 17:37:35,782 instructlab.sdg:410: Generated 95 samples | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 31.45 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 228.71ba/s] | |
INFO 2024-08-17 17:37:36,233 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_meeting_insights_minutes_of_meeting, generating from scratch | |
INFO 2024-08-17 17:37:36,233 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:37:36,236 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 17:37:36,236 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:37:46,413 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 17:37:46,413 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 17:37:49,456 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 17:37:49,456 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:37:54,077 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 17:37:54,077 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 140.12 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 172.54 examples/s] | |
INFO 2024-08-17 17:37:54,670 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 17:37:54,670 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:38:08,412 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 17:38:08,413 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:38:13,776 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 17:38:13,776 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 136.00 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 164.82 examples/s] | |
INFO 2024-08-17 17:38:14,373 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 17:38:14,373 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 79.80 examples/s] | |
INFO 2024-08-17 17:38:14,825 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:38:14,826 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:38:15,980 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:38:15,980 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 118.27 examples/s] | |
INFO 2024-08-17 17:38:16,348 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:38:16,349 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:38:26,149 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:38:26,149 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:38:40,763 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:38:40,763 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:38:55,533 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:38:55,533 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 17:39:17,108 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:39:17,108 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 26 | |
}) | |
INFO 2024-08-17 17:39:23,117 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:39:23,117 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 98.68 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 123.28 examples/s] | |
INFO 2024-08-17 17:39:23,824 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:39:23,824 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 24 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 34.95 examples/s] | |
INFO 2024-08-17 17:39:24,638 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_meeting_insights_minutes_of_meeting/data_checkpoint_64aa2b23600e4fdc9e9adba72932b50f.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 140.56ba/s] | |
INFO 2024-08-17 17:39:24,654 instructlab.sdg:410: Generated 96 samples | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:00<00:00, 96.67 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 114.43ba/s] | |
INFO 2024-08-17 17:39:25,110 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_meeting_insights_action_items, generating from scratch | |
INFO 2024-08-17 17:39:25,110 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:39:25,114 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 17:39:25,114 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:39:38,458 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 17:39:38,458 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 17:39:41,538 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 17:39:41,538 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 17:39:46,641 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 17:39:46,642 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 30 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 154.67 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 184.10 examples/s] | |
INFO 2024-08-17 17:39:47,231 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 17:39:47,231 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 17:39:54,235 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 17:39:54,235 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:39:59,715 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 17:39:59,716 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 150.57 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 173.00 examples/s] | |
INFO 2024-08-17 17:40:00,286 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 17:40:00,286 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 81.56 examples/s] | |
INFO 2024-08-17 17:40:00,751 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:40:00,751 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:40:02,223 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:40:02,223 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 126.83 examples/s] | |
INFO 2024-08-17 17:40:02,572 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:40:02,572 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:40:12,557 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:40:12,557 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:40:26,897 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:40:26,898 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:40:42,021 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:40:42,021 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:40:58,909 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:40:58,909 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 27 | |
}) | |
INFO 2024-08-17 17:41:04,763 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:41:04,763 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 111.46 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 136.88 examples/s] | |
INFO 2024-08-17 17:41:05,474 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:41:05,474 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 27 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 51.07 examples/s] | |
INFO 2024-08-17 17:41:06,133 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_meeting_insights_action_items/data_checkpoint_4dda0a415d9f474cb67a3040d32b4570.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 152.25ba/s] | |
INFO 2024-08-17 17:41:06,148 instructlab.sdg:410: Generated 97 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 27/27 [00:00<00:00, 108.95 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 140.36ba/s] | |
INFO 2024-08-17 17:41:06,587 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_editing_grammar, generating from scratch | |
INFO 2024-08-17 17:41:06,587 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:41:06,591 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 17:41:06,591 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:41:18,624 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 17:41:18,624 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 17:41:23,390 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 17:41:23,390 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 32 | |
}) | |
INFO 2024-08-17 17:41:29,431 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 17:41:29,431 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 32 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 167.35 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:00<00:00, 196.71 examples/s] | |
INFO 2024-08-17 17:41:30,013 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 17:41:30,013 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 17:41:42,964 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 17:41:42,964 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 17:41:48,455 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 17:41:48,455 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 151.41 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 170.32 examples/s] | |
INFO 2024-08-17 17:41:49,032 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 17:41:49,032 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 82.25 examples/s] | |
INFO 2024-08-17 17:41:49,502 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:41:49,502 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:41:50,888 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:41:50,888 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 123.09 examples/s] | |
INFO 2024-08-17 17:41:51,237 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:41:51,237 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:42:01,773 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:42:01,773 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:42:17,193 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:42:17,193 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:42:29,917 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:42:29,917 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:42:51,958 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:42:51,958 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:43:00,476 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:43:00,476 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 109.42 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 142.37 examples/s] | |
INFO 2024-08-17 17:43:01,169 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:43:01,169 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 53.73 examples/s] | |
INFO 2024-08-17 17:43:01,822 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_editing_grammar/data_checkpoint_4fdbbe0414524894b5443c3651b421d7.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 119.27ba/s] | |
INFO 2024-08-17 17:43:01,840 instructlab.sdg:410: Generated 98 samples | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 109.75 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 109.16ba/s] | |
INFO 2024-08-17 17:43:02,305 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_editing_spelling, generating from scratch | |
INFO 2024-08-17 17:43:02,305 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:43:02,308 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 17:43:02,309 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:43:13,012 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 17:43:13,012 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 4 | |
}) | |
INFO 2024-08-17 17:43:15,079 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 17:43:15,079 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 12 | |
}) | |
INFO 2024-08-17 17:43:18,395 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 17:43:18,396 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 10 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 53.71 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 61.38 examples/s] | |
INFO 2024-08-17 17:43:18,980 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 17:43:18,980 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 17:43:21,961 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 17:43:21,961 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 9 | |
}) | |
INFO 2024-08-17 17:43:24,358 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 17:43:24,358 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 8 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 44.41 examples/s] | |
Filter (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 49.40 examples/s] | |
INFO 2024-08-17 17:43:24,926 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 17:43:24,926 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 8 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 24.72 examples/s] | |
INFO 2024-08-17 17:43:25,361 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:43:25,361 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 17:43:25,763 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:43:25,763 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 8 | |
}) | |
Map (num_proc=8): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 37.68 examples/s] | |
INFO 2024-08-17 17:43:26,097 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:43:26,097 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 17:43:31,736 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:43:31,736 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 17:43:38,837 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:43:38,837 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 8 | |
}) | |
INFO 2024-08-17 17:43:44,252 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:43:44,252 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 17:43:53,868 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:43:53,868 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 7 | |
}) | |
INFO 2024-08-17 17:43:56,715 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:43:56,715 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 6 | |
}) | |
num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
WARNING 2024-08-17 17:43:56,715 datasets.arrow_dataset:3092: num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
Map (num_proc=6): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 28.62 examples/s] | |
num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
WARNING 2024-08-17 17:43:57,041 datasets.arrow_dataset:3092: num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
Filter (num_proc=6): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 9.67 examples/s] | |
INFO 2024-08-17 17:43:57,756 instructlab.sdg.pipeline:197: Running block: response_selector | |
INFO 2024-08-17 17:43:57,756 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 6 | |
}) | |
num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
WARNING 2024-08-17 17:43:57,756 datasets.arrow_dataset:3092: num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
Map (num_proc=6): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 17.02 examples/s] | |
INFO 2024-08-17 17:43:58,207 instructlab.sdg.checkpointing:44: Saving checkpoint to /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_editing_spelling/data_checkpoint_82e5ab95874c4a46889f00a1ca98261d.jsonl | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 340.72ba/s] | |
INFO 2024-08-17 17:43:58,218 instructlab.sdg:410: Generated 99 samples | |
num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
WARNING 2024-08-17 17:43:58,219 datasets.arrow_dataset:3092: num_proc must be <= 6. Reducing num_proc to 6 for dataset of size 6. | |
Map (num_proc=6): 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 28.29 examples/s] | |
Creating json from Arrow format: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 311.36ba/s] | |
INFO 2024-08-17 17:43:58,606 instructlab.sdg.checkpointing:59: No existing checkpoints found in /var/mnt/inststg1/instructlab/generated/checkpoints/compositional_skills_writing_grounded_editing_punctuation, generating from scratch | |
INFO 2024-08-17 17:43:58,606 instructlab.sdg.pipeline:158: Running pipeline with multi-threaded batching. Using 10 workers for batches of size 8 | |
INFO 2024-08-17 17:43:58,610 instructlab.sdg.pipeline:197: Running block: gen_contexts | |
INFO 2024-08-17 17:43:58,610 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response'], | |
num_rows: 1 | |
}) | |
INFO 2024-08-17 17:44:04,050 instructlab.sdg.pipeline:197: Running block: gen_grounded_questions | |
INFO 2024-08-17 17:44:04,050 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context'], | |
num_rows: 10 | |
}) | |
INFO 2024-08-17 17:44:08,228 instructlab.sdg.pipeline:197: Running block: eval_grounded_questions | |
INFO 2024-08-17 17:44:08,228 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question'], | |
num_rows: 30 | |
}) | |
INFO 2024-08-17 17:44:13,330 instructlab.sdg.pipeline:197: Running block: filter_grounded_questions | |
INFO 2024-08-17 17:44:13,331 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'num_samples', 'question', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 157.94 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 181.84 examples/s] | |
INFO 2024-08-17 17:44:13,919 instructlab.sdg.pipeline:197: Running block: gen_grounded_responses | |
INFO 2024-08-17 17:44:13,919 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 17:44:21,688 instructlab.sdg.pipeline:197: Running block: evaluate_grounded_qa_pair | |
INFO 2024-08-17 17:44:21,688 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 17:44:26,350 instructlab.sdg.pipeline:197: Running block: filter_grounded_qa_pair | |
INFO 2024-08-17 17:44:26,350 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 160.12 examples/s] | |
Filter (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 178.88 examples/s] | |
INFO 2024-08-17 17:44:26,917 instructlab.sdg.pipeline:197: Running block: combine_question_and_context | |
INFO 2024-08-17 17:44:26,917 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 88.23 examples/s] | |
INFO 2024-08-17 17:44:27,362 instructlab.sdg.pipeline:197: Running block: router | |
INFO 2024-08-17 17:44:27,363 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 17:44:28,089 instructlab.sdg.pipeline:197: Running block: icl_populator | |
INFO 2024-08-17 17:44:28,090 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route'], | |
num_rows: 29 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 135.13 examples/s] | |
INFO 2024-08-17 17:44:28,423 instructlab.sdg.pipeline:197: Running block: analyzer | |
INFO 2024-08-17 17:44:28,423 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response'], | |
num_rows: 29 | |
}) | |
INFO 2024-08-17 17:44:38,531 instructlab.sdg.pipeline:197: Running block: critic | |
INFO 2024-08-17 17:44:38,531 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:44:52,500 instructlab.sdg.pipeline:197: Running block: planner | |
INFO 2024-08-17 17:44:52,500 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:45:04,231 instructlab.sdg.pipeline:197: Running block: revised_responder | |
INFO 2024-08-17 17:45:04,231 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'icl_query', 'icl_response', 'icl_analysis', 'icl_rubric', 'icl_critique', 'icl_plan', 'icl_revised_response', 'analysis', 'rubric', 'critique', 'plan'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:45:22,283 instructlab.sdg.pipeline:197: Running block: judge | |
INFO 2024-08-17 17:45:22,283 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response'], | |
num_rows: 28 | |
}) | |
INFO 2024-08-17 17:45:27,919 instructlab.sdg.pipeline:197: Running block: filter_judgement | |
INFO 2024-08-17 17:45:27,919 instructlab.sdg.pipeline:198: Dataset({ | |
features: ['task_description', 'seed_context', 'seed_question', 'seed_response', 'context', 'question', 'response', 'evaluation', 'score', 'route', 'analysis', 'rubric', 'critique', 'plan', 'revised_response', 'judgement', 'verdict'], | |
num_rows: 28 | |
}) | |
Map (num_proc=8): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 28/28 [00:00<00:00, 117.82 examples/s] | |
Filter (num_proc=8): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████� |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment