Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jkaunert/7143cbd41e2a7e33a2104687a7314682 to your computer and use it in GitHub Desktop.
Save jkaunert/7143cbd41e2a7e33a2104687a7314682 to your computer and use it in GitHub Desktop.
# /// script
# requires-python = ">=3.11,<3.12"
# dependencies = [
# "distilabel[hf-transformers, hf-inference-endpoints]",
# ]
# ///
from distilabel.models import InferenceEndpointsLLM
from distilabel.pipeline import InstructionResponsePipeline
repo_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
llm = InferenceEndpointsLLM(
model_id=repo_id,
tokenizer_id=repo_id,
magpie_pre_query_template="<|begin▁of▁sentence|>User: ",
use_magpie_template=True,
)
pipeline = InstructionResponsePipeline(llm=llm, batch_size=5, num_rows=10)
if __name__ == "__main__":
dataset = pipeline.run()
dataset.push_to_hub("davidberenstein1957/sft-dataset")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment