Created
April 12, 2024 15:18
-
-
Save ShawonAshraf/39814efbb352935b4398ce6151ae0e94 to your computer and use it in GitHub Desktop.
Ollama + Distilabel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import shutil | |
if os.path.exists(".cache/distilabel"): | |
shutil.rmtree(".cache/distilabel") | |
from distilabel.steps.tasks import TextGeneration | |
from distilabel.steps import LoadHubDataset | |
from distilabel.pipeline.local import Pipeline | |
from distilabel.llms import OllamaLLM | |
from distilabel.steps.conversation import ConversationTemplate | |
from distilabel.steps.tasks.text_generation import TextGeneration | |
# remember to not use huggingface_hub version 0.22. Breaks things here. | |
dataset_name = "isaacrehg/poetry-instructions" | |
if __name__ == "__main__": | |
with Pipeline("demo-run", description="derp") as pipeline: | |
# global step | |
load_dataset = LoadHubDataset( | |
name="load_dataset", | |
output_mappings={"conversation": "instruction"}, | |
) | |
generate_with_mistral = TextGeneration( | |
name="generate_with_mistral", | |
llm=OllamaLLM(model="mistral"), | |
num_generations=2, | |
) | |
load_dataset.connect(generate_with_mistral) | |
distiset = pipeline.run( | |
use_cache=False, | |
parameters={ | |
"load_dataset": { | |
"repo_id": dataset_name, | |
"split": "test", | |
}, | |
"generate_with_mistral": { | |
"llm": { | |
"host": "http://localhost:11434", | |
# ollama has a lower timeout, fails some steps | |
"timeout": 1200, | |
}, | |
}, | |
} | |
) | |
print(distiset) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment