Skip to content

Instantly share code, notes, and snippets.

@ShawonAshraf
Created April 12, 2024 15:18
Show Gist options
  • Save ShawonAshraf/39814efbb352935b4398ce6151ae0e94 to your computer and use it in GitHub Desktop.
Save ShawonAshraf/39814efbb352935b4398ce6151ae0e94 to your computer and use it in GitHub Desktop.
Ollama + Distilabel
import os
import shutil
if os.path.exists(".cache/distilabel"):
shutil.rmtree(".cache/distilabel")
from distilabel.steps.tasks import TextGeneration
from distilabel.steps import LoadHubDataset
from distilabel.pipeline.local import Pipeline
from distilabel.llms import OllamaLLM
from distilabel.steps.conversation import ConversationTemplate
from distilabel.steps.tasks.text_generation import TextGeneration
# remember to not use huggingface_hub version 0.22. Breaks things here.
dataset_name = "isaacrehg/poetry-instructions"
if __name__ == "__main__":
with Pipeline("demo-run", description="derp") as pipeline:
# global step
load_dataset = LoadHubDataset(
name="load_dataset",
output_mappings={"conversation": "instruction"},
)
generate_with_mistral = TextGeneration(
name="generate_with_mistral",
llm=OllamaLLM(model="mistral"),
num_generations=2,
)
load_dataset.connect(generate_with_mistral)
distiset = pipeline.run(
use_cache=False,
parameters={
"load_dataset": {
"repo_id": dataset_name,
"split": "test",
},
"generate_with_mistral": {
"llm": {
"host": "http://localhost:11434",
# ollama has a lower timeout, fails some steps
"timeout": 1200,
},
},
}
)
print(distiset)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment