swayson · June 26, 2025 04:56
diff --git a/dspy-simple-optimize.py b/dspy-simple-optimize.py
 import dspy
 import os

 # --- 1. Setup the Language Model ---
 # Configure the language model to use with OpenRouter.
 # This example uses Google's Gemini 2.5 Pro.
 # The API key is fetched from environment variables.
 llm = dspy.LM(
    # model="openrouter/qwen/qwq-32b:free",
    model="openrouter/google/gemma-3-27b-it:free",
    api_base="https://openrouter.ai/api/v1",
    api_key="",
    model_type='chat' # Specify model_type as 'chat' for chat-based models
 )

 # You can also configure a potentially more capable model as the teacher/judge.
 # For this example, we'll use the same model for simplicity.
 optimizer_llm = llm

 # Set the default LM and a separate one for the optimizer/teacher.
 dspy.configure(lm=llm, teacher=optimizer_llm)

 # --- 2. Define the Two-Step Prompt Chain ---

 # Step 1: Generate a game concept from a theme.
 class GenConcept(dspy.Signature):
    """Generate a simple, creative game concept based on a theme."""
    theme = dspy.InputField(desc="The theme for the game.")
    game_concept = dspy.OutputField(desc="A short, creative concept for a new game.")

 # Step 2: Generate a tweet from the game concept.
 class GenTweet(dspy.Signature):
    """Generate a catchy, short tweet (under 280 characters) to announce a new game."""
    game_concept = dspy.InputField(desc="The concept of the game.")
    tweet = dspy.OutputField(desc="A tweet-style announcement for the game.")

 # Combine the two steps into a single module.
 class GameAnnouncementPipeline(dspy.Module):
    def __init__(self):
        super().__init__()
        # Instantiate the two steps of our pipeline
        self.gen_concept = dspy.ChainOfThought(GenConcept)
        self.gen_tweet = dspy.ChainOfThought(GenTweet)

    def forward(self, theme):
        # First, generate the game concept from the theme.
        concept_prediction = self.gen_concept(theme=theme)

        # Then, use the generated concept to write a tweet.
        tweet_prediction = self.gen_tweet(game_concept=concept_prediction.game_concept)

        return dspy.Prediction(
            game_concept=concept_prediction.game_concept,
            tweet=tweet_prediction.tweet
        )

 # --- 3. Define the LLM-as-Judge Evaluator ---

 def llm_as_judge_metric(gold, pred, trace=None):
    """
    An evaluator that uses an LLM to judge the quality of a generated tweet.
    It checks for creativity, relevance to the theme, and conciseness.
    """
    # Define the signature for the LLM judge.
    class Judge(dspy.Signature):
        """Assess the quality of a tweet announcing a new game based on a theme.

        Evaluation Criteria:
        1. Creativity: Is the tweet original and engaging?
        2. Relevance: Does the tweet accurately reflect the game's theme?
        3. Conciseness: Is the tweet under 280 characters and punchy?
        """
        theme = dspy.InputField(desc="The original theme for the game.")
        tweet = dspy.InputField(desc="The generated tweet for the game.")
        assessment = dspy.OutputField(desc="A brief explanation of why the tweet is good or bad.")
        rating = dspy.OutputField(desc="An integer rating from 1 to 5.")

    # Instantiate the judge and get its assessment.
    judge = dspy.Predict(Judge)
    assessment = judge(theme=gold.theme, tweet=pred.tweet)

    # The metric returns True if the rating is 4 or 5, False otherwise.
    return int(assessment.rating) >= 4

 # --- 4. Prepare Data and Optimize ---

 # Create a few training examples.
 # In a real scenario, you'd have more.
 train_data = [
    dspy.Example(theme="Cyberpunk Dystopia").with_inputs('theme'),
    dspy.Example(theme="Enchanted Forest Mystery").with_inputs('theme'),
    dspy.Example(theme="Space Opera with Pirates").with_inputs('theme'),
 ]

 # optimizer = dspy.teleprompt.BootstrapFewShot(metric=llm_as_judge_metric)
 optimizer = dspy.MIPROv2(metric=llm_as_judge_metric, auto=None, num_candidates=3
                         )


 # Compile the pipeline. This is where DSPy optimizes the prompts.
 # It will run the pipeline, evaluate with the LLM judge, and build effective prompts.
 optimized_pipeline = optimizer.compile(GameAnnouncementPipeline(),
                                       trainset=train_data,
                                       requires_permission_to_run=False,
                                       minibatch_size=2,

                                       num_trials=5
 )


 # --- 5. Run and Evaluate ---

 # Create an example to test with.
 test_theme = "A haunted Victorian-era carnival"

 # Run the original, un-optimized pipeline
 unoptimized_pipeline = GameAnnouncementPipeline()
 unoptimized_prediction = unoptimized_pipeline(theme=test_theme)

 # Run the new, optimized pipeline
 optimized_prediction = optimized_pipeline(theme=test_theme)

 # Print the results
 print("--- Unoptimized Pipeline ---")
 print(f"Theme: {test_theme}")
 print(f"Generated Concept: {unoptimized_prediction.game_concept}")
 print(f"Generated Tweet: {unoptimized_prediction.tweet}\n")


 print("--- Optimized Pipeline ---")
 print(f"Theme: {test_theme}")
 print(f"Generated Concept: {optimized_prediction.game_concept}")
 print(f"Generated Tweet: {optimized_prediction.tweet}\n")

 # Inspect the last two calls to see the difference in prompts.
 # The optimized one will include few-shot examples.
 print("--- LLM History ---")
 print("\n**Unoptimized Prompt:**")
 llm.inspect_history(n=2, skip=1)
 print("\n**Optimized Prompt (with automatically generated few-shot examples):**")
 llm.inspect_history(n=1)
 optimized_pipeline.save(f"optimized.json")
	import dspy
	import os

	# --- 1. Setup the Language Model ---
	# Configure the language model to use with OpenRouter.
	# This example uses Google's Gemini 2.5 Pro.
	# The API key is fetched from environment variables.
	llm = dspy.LM(
	# model="openrouter/qwen/qwq-32b:free",
	model="openrouter/google/gemma-3-27b-it:free",
	api_base="https://openrouter.ai/api/v1",
	api_key="",
	model_type='chat' # Specify model_type as 'chat' for chat-based models
	)

	# You can also configure a potentially more capable model as the teacher/judge.
	# For this example, we'll use the same model for simplicity.
	optimizer_llm = llm

	# Set the default LM and a separate one for the optimizer/teacher.
	dspy.configure(lm=llm, teacher=optimizer_llm)

	# --- 2. Define the Two-Step Prompt Chain ---

	# Step 1: Generate a game concept from a theme.
	class GenConcept(dspy.Signature):
	"""Generate a simple, creative game concept based on a theme."""
	theme = dspy.InputField(desc="The theme for the game.")
	game_concept = dspy.OutputField(desc="A short, creative concept for a new game.")

	# Step 2: Generate a tweet from the game concept.
	class GenTweet(dspy.Signature):
	"""Generate a catchy, short tweet (under 280 characters) to announce a new game."""
	game_concept = dspy.InputField(desc="The concept of the game.")
	tweet = dspy.OutputField(desc="A tweet-style announcement for the game.")

	# Combine the two steps into a single module.
	class GameAnnouncementPipeline(dspy.Module):
	def __init__(self):
	super().__init__()
	# Instantiate the two steps of our pipeline
	self.gen_concept = dspy.ChainOfThought(GenConcept)
	self.gen_tweet = dspy.ChainOfThought(GenTweet)

	def forward(self, theme):
	# First, generate the game concept from the theme.
	concept_prediction = self.gen_concept(theme=theme)

	# Then, use the generated concept to write a tweet.
	tweet_prediction = self.gen_tweet(game_concept=concept_prediction.game_concept)

	return dspy.Prediction(
	game_concept=concept_prediction.game_concept,
	tweet=tweet_prediction.tweet
	)

	# --- 3. Define the LLM-as-Judge Evaluator ---

	def llm_as_judge_metric(gold, pred, trace=None):
	"""
	An evaluator that uses an LLM to judge the quality of a generated tweet.
	It checks for creativity, relevance to the theme, and conciseness.
	"""
	# Define the signature for the LLM judge.
	class Judge(dspy.Signature):
	"""Assess the quality of a tweet announcing a new game based on a theme.

	Evaluation Criteria:
	1. Creativity: Is the tweet original and engaging?
	2. Relevance: Does the tweet accurately reflect the game's theme?
	3. Conciseness: Is the tweet under 280 characters and punchy?
	"""
	theme = dspy.InputField(desc="The original theme for the game.")
	tweet = dspy.InputField(desc="The generated tweet for the game.")
	assessment = dspy.OutputField(desc="A brief explanation of why the tweet is good or bad.")
	rating = dspy.OutputField(desc="An integer rating from 1 to 5.")

	# Instantiate the judge and get its assessment.
	judge = dspy.Predict(Judge)
	assessment = judge(theme=gold.theme, tweet=pred.tweet)

	# The metric returns True if the rating is 4 or 5, False otherwise.
	return int(assessment.rating) >= 4

	# --- 4. Prepare Data and Optimize ---

	# Create a few training examples.
	# In a real scenario, you'd have more.
	train_data = [
	dspy.Example(theme="Cyberpunk Dystopia").with_inputs('theme'),
	dspy.Example(theme="Enchanted Forest Mystery").with_inputs('theme'),
	dspy.Example(theme="Space Opera with Pirates").with_inputs('theme'),
	]

	# optimizer = dspy.teleprompt.BootstrapFewShot(metric=llm_as_judge_metric)
	optimizer = dspy.MIPROv2(metric=llm_as_judge_metric, auto=None, num_candidates=3
	)


	# Compile the pipeline. This is where DSPy optimizes the prompts.
	# It will run the pipeline, evaluate with the LLM judge, and build effective prompts.
	optimized_pipeline = optimizer.compile(GameAnnouncementPipeline(),
	trainset=train_data,
	requires_permission_to_run=False,
	minibatch_size=2,

	num_trials=5
	)


	# --- 5. Run and Evaluate ---

	# Create an example to test with.
	test_theme = "A haunted Victorian-era carnival"

	# Run the original, un-optimized pipeline
	unoptimized_pipeline = GameAnnouncementPipeline()
	unoptimized_prediction = unoptimized_pipeline(theme=test_theme)

	# Run the new, optimized pipeline
	optimized_prediction = optimized_pipeline(theme=test_theme)

	# Print the results
	print("--- Unoptimized Pipeline ---")
	print(f"Theme: {test_theme}")
	print(f"Generated Concept: {unoptimized_prediction.game_concept}")
	print(f"Generated Tweet: {unoptimized_prediction.tweet}\n")


	print("--- Optimized Pipeline ---")
	print(f"Theme: {test_theme}")
	print(f"Generated Concept: {optimized_prediction.game_concept}")
	print(f"Generated Tweet: {optimized_prediction.tweet}\n")

	# Inspect the last two calls to see the difference in prompts.
	# The optimized one will include few-shot examples.
	print("--- LLM History ---")
	print("\nUnoptimized Prompt:")
	llm.inspect_history(n=2, skip=1)
	print("\nOptimized Prompt (with automatically generated few-shot examples):")
	llm.inspect_history(n=1)
	optimized_pipeline.save(f"optimized.json")