Skip to content

Instantly share code, notes, and snippets.

@alvarobartt
Last active July 6, 2024 14:22
Show Gist options
  • Save alvarobartt/355c7ec331c9b083d46362fdf5d8b4dc to your computer and use it in GitHub Desktop.
Save alvarobartt/355c7ec331c9b083d46362fdf5d8b4dc to your computer and use it in GitHub Desktop.
# pip install "distilabel[vllm]>=1.1.1"
# pip install flash-attn --no-build-isolation
# huggingface-cli login
import time
from distilabel.llms import vLLM
from distilabel.pipeline import Pipeline
from distilabel.steps import KeepColumns, LoadHubDataset
from distilabel.steps.tasks import PrometheusEval
if __name__ == "__main__":
start_time = time.time()
with Pipeline(name="prometheus") as pipeline:
load_dataset = LoadHubDataset(
name="load_dataset",
repo_id="HuggingFaceH4/instruction-dataset",
split="test",
output_mappings={"prompt": "instruction", "completion": "generation"},
)
task = PrometheusEval(
name="task",
llm=vLLM(
model="prometheus-eval/prometheus-7b-v2.0",
chat_template="[INST] {{ messages[0]['content'] }}\n{{ messages[1]['content'] }}[/INST]",
),
mode="absolute",
rubric="factual-validity",
reference=False,
num_generations=1,
group_generations=False,
)
keep_columns = KeepColumns(
name="keep_columns",
columns=["instruction", "generation", "feedback", "result", "model_name"],
)
load_dataset >> task >> keep_columns # type: ignore
distiset = pipeline.run(
parameters={
task.name: { # type: ignore
"llm": {
"generation_kwargs": {
"max_new_tokens": 1024,
"temperature": 0.7,
},
},
},
},
)
print("--- %s seconds ---" % (time.time() - start_time))
if distiset is not None:
distiset.push_to_hub("instruction-dataset-prometheus")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment