Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save sugatoray/85c5e3518ec5310d575e21092d685806 to your computer and use it in GitHub Desktop.
Save sugatoray/85c5e3518ec5310d575e21092d685806 to your computer and use it in GitHub Desktop.
# /// script
# requires-python = ">=3.11,<3.12"
# dependencies = [
# "distilabel[hf-transformers, hf-inference-endpoints]",
# ]
# ///
from distilabel.models import InferenceEndpointsLLM
from distilabel.pipeline import InstructionResponsePipeline
repo_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
llm = InferenceEndpointsLLM(
model_id=repo_id,
tokenizer_id=repo_id,
magpie_pre_query_template="<|begin▁of▁sentence|>User: ",
use_magpie_template=True,
)
pipeline = InstructionResponsePipeline(llm=llm, batch_size=5, num_rows=10)
if __name__ == "__main__":
dataset = pipeline.run()
dataset.push_to_hub("davidberenstein1957/sft-dataset")
@sugatoray
Copy link
Author

sugatoray commented Jan 27, 2025

Source: LinkedIn Post

@sugatoray
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment