Last active
June 26, 2025 04:56
-
-
Save swayson/894865309c5839ff109662c10b9e714d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dspy | |
import os | |
# --- 1. Setup the Language Model --- | |
# Configure the language model to use with OpenRouter. | |
# This example uses Google's Gemini 2.5 Pro. | |
# The API key is fetched from environment variables. | |
llm = dspy.LM( | |
# model="openrouter/qwen/qwq-32b:free", | |
model="openrouter/google/gemma-3-27b-it:free", | |
api_base="https://openrouter.ai/api/v1", | |
api_key="", | |
model_type='chat' # Specify model_type as 'chat' for chat-based models | |
) | |
# You can also configure a potentially more capable model as the teacher/judge. | |
# For this example, we'll use the same model for simplicity. | |
optimizer_llm = llm | |
# Set the default LM and a separate one for the optimizer/teacher. | |
dspy.configure(lm=llm, teacher=optimizer_llm) | |
# --- 2. Define the Two-Step Prompt Chain --- | |
# Step 1: Generate a game concept from a theme. | |
class GenConcept(dspy.Signature): | |
"""Generate a simple, creative game concept based on a theme.""" | |
theme = dspy.InputField(desc="The theme for the game.") | |
game_concept = dspy.OutputField(desc="A short, creative concept for a new game.") | |
# Step 2: Generate a tweet from the game concept. | |
class GenTweet(dspy.Signature): | |
"""Generate a catchy, short tweet (under 280 characters) to announce a new game.""" | |
game_concept = dspy.InputField(desc="The concept of the game.") | |
tweet = dspy.OutputField(desc="A tweet-style announcement for the game.") | |
# Combine the two steps into a single module. | |
class GameAnnouncementPipeline(dspy.Module): | |
def __init__(self): | |
super().__init__() | |
# Instantiate the two steps of our pipeline | |
self.gen_concept = dspy.ChainOfThought(GenConcept) | |
self.gen_tweet = dspy.ChainOfThought(GenTweet) | |
def forward(self, theme): | |
# First, generate the game concept from the theme. | |
concept_prediction = self.gen_concept(theme=theme) | |
# Then, use the generated concept to write a tweet. | |
tweet_prediction = self.gen_tweet(game_concept=concept_prediction.game_concept) | |
return dspy.Prediction( | |
game_concept=concept_prediction.game_concept, | |
tweet=tweet_prediction.tweet | |
) | |
# --- 3. Define the LLM-as-Judge Evaluator --- | |
def llm_as_judge_metric(gold, pred, trace=None): | |
""" | |
An evaluator that uses an LLM to judge the quality of a generated tweet. | |
It checks for creativity, relevance to the theme, and conciseness. | |
""" | |
# Define the signature for the LLM judge. | |
class Judge(dspy.Signature): | |
"""Assess the quality of a tweet announcing a new game based on a theme. | |
Evaluation Criteria: | |
1. Creativity: Is the tweet original and engaging? | |
2. Relevance: Does the tweet accurately reflect the game's theme? | |
3. Conciseness: Is the tweet under 280 characters and punchy? | |
""" | |
theme = dspy.InputField(desc="The original theme for the game.") | |
tweet = dspy.InputField(desc="The generated tweet for the game.") | |
assessment = dspy.OutputField(desc="A brief explanation of why the tweet is good or bad.") | |
rating = dspy.OutputField(desc="An integer rating from 1 to 5.") | |
# Instantiate the judge and get its assessment. | |
judge = dspy.Predict(Judge) | |
assessment = judge(theme=gold.theme, tweet=pred.tweet) | |
# The metric returns True if the rating is 4 or 5, False otherwise. | |
return int(assessment.rating) >= 4 | |
# --- 4. Prepare Data and Optimize --- | |
# Create a few training examples. | |
# In a real scenario, you'd have more. | |
train_data = [ | |
dspy.Example(theme="Cyberpunk Dystopia").with_inputs('theme'), | |
dspy.Example(theme="Enchanted Forest Mystery").with_inputs('theme'), | |
dspy.Example(theme="Space Opera with Pirates").with_inputs('theme'), | |
] | |
# optimizer = dspy.teleprompt.BootstrapFewShot(metric=llm_as_judge_metric) | |
optimizer = dspy.MIPROv2(metric=llm_as_judge_metric, auto=None, num_candidates=3 | |
) | |
# Compile the pipeline. This is where DSPy optimizes the prompts. | |
# It will run the pipeline, evaluate with the LLM judge, and build effective prompts. | |
optimized_pipeline = optimizer.compile(GameAnnouncementPipeline(), | |
trainset=train_data, | |
requires_permission_to_run=False, | |
minibatch_size=2, | |
num_trials=5 | |
) | |
# --- 5. Run and Evaluate --- | |
# Create an example to test with. | |
test_theme = "A haunted Victorian-era carnival" | |
# Run the original, un-optimized pipeline | |
unoptimized_pipeline = GameAnnouncementPipeline() | |
unoptimized_prediction = unoptimized_pipeline(theme=test_theme) | |
# Run the new, optimized pipeline | |
optimized_prediction = optimized_pipeline(theme=test_theme) | |
# Print the results | |
print("--- Unoptimized Pipeline ---") | |
print(f"Theme: {test_theme}") | |
print(f"Generated Concept: {unoptimized_prediction.game_concept}") | |
print(f"Generated Tweet: {unoptimized_prediction.tweet}\n") | |
print("--- Optimized Pipeline ---") | |
print(f"Theme: {test_theme}") | |
print(f"Generated Concept: {optimized_prediction.game_concept}") | |
print(f"Generated Tweet: {optimized_prediction.tweet}\n") | |
# Inspect the last two calls to see the difference in prompts. | |
# The optimized one will include few-shot examples. | |
print("--- LLM History ---") | |
print("\n**Unoptimized Prompt:**") | |
llm.inspect_history(n=2, skip=1) | |
print("\n**Optimized Prompt (with automatically generated few-shot examples):**") | |
llm.inspect_history(n=1) | |
optimized_pipeline.save(f"optimized.json") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment