ShawonAshraf · April 12, 2024 15:18
diff --git a/main.py b/main.py
 import os
 import shutil

 if os.path.exists(".cache/distilabel"):
    shutil.rmtree(".cache/distilabel")


 from distilabel.steps.tasks import TextGeneration
 from distilabel.steps import LoadHubDataset
 from distilabel.pipeline.local import Pipeline
 from distilabel.llms import OllamaLLM
 from distilabel.steps.conversation import ConversationTemplate
 from distilabel.steps.tasks.text_generation import TextGeneration
 # remember to not use huggingface_hub version 0.22. Breaks things here.

 dataset_name = "isaacrehg/poetry-instructions"

 if __name__ == "__main__":
    with Pipeline("demo-run", description="derp") as pipeline:
        # global step
        load_dataset = LoadHubDataset(
            name="load_dataset",
            output_mappings={"conversation": "instruction"},
        )

        generate_with_mistral = TextGeneration(
            name="generate_with_mistral",
            llm=OllamaLLM(model="mistral"),
            num_generations=2,
        )

        load_dataset.connect(generate_with_mistral)


    distiset = pipeline.run(
        use_cache=False,
        parameters={
            "load_dataset": {
                "repo_id": dataset_name,
                "split": "test",
            },
            "generate_with_mistral": {
                "llm": {
                    "host": "http://localhost:11434",
                    # ollama has a lower timeout, fails some steps
                    "timeout": 1200,
                },
            },
        }
    )

    print(distiset)
	import os
	import shutil

	if os.path.exists(".cache/distilabel"):
	shutil.rmtree(".cache/distilabel")


	from distilabel.steps.tasks import TextGeneration
	from distilabel.steps import LoadHubDataset
	from distilabel.pipeline.local import Pipeline
	from distilabel.llms import OllamaLLM
	from distilabel.steps.conversation import ConversationTemplate
	from distilabel.steps.tasks.text_generation import TextGeneration
	# remember to not use huggingface_hub version 0.22. Breaks things here.

	dataset_name = "isaacrehg/poetry-instructions"

	if __name__ == "__main__":
	with Pipeline("demo-run", description="derp") as pipeline:
	# global step
	load_dataset = LoadHubDataset(
	name="load_dataset",
	output_mappings={"conversation": "instruction"},
	)

	generate_with_mistral = TextGeneration(
	name="generate_with_mistral",
	llm=OllamaLLM(model="mistral"),
	num_generations=2,
	)

	load_dataset.connect(generate_with_mistral)


	distiset = pipeline.run(
	use_cache=False,
	parameters={
	"load_dataset": {
	"repo_id": dataset_name,
	"split": "test",
	},
	"generate_with_mistral": {
	"llm": {
	"host": "http://localhost:11434",
	# ollama has a lower timeout, fails some steps
	"timeout": 1200,
	},
	},
	}
	)

	print(distiset)