Chapter 04: DSPy Optimizers and Compilation
Chapter 04: DSPy Optimizers and Compilation
- Understand how the DSPy compilation process works
- Learn Bootstrap Few-Shot optimizer
- Master the use of LabeledFewShot optimizer
- Explore COPRO optimizer functionality
- Understand optimizer evaluation and tuning strategies
Key Concepts
1. Overview of DSPy Compilation Mechanism
DSPy’s compilation process is the core mechanism for transforming high-level DSPy programs into optimized prompts and reasoning chains. The compiler automatically optimizes prompt templates and reasoning strategies by analyzing program structure and training data.
Core Components of the Compilation Process
# Basic flow of the compilation process
import dspy
# 1. Define language model
lm = dspy.OpenAI(model='gpt-3.5-turbo')
dspy.settings.configure(lm=lm)
# 2. Define program
class BasicQA(dspy.Module):
def __init__(self):
super().__init__()
self.generate_answer = dspy.ChainOfThought("question -> answer")
def forward(self, question):
return self.generate_answer(question=question)
# 3. Prepare training data
trainset = [
dspy.Example(question="What is Python?", answer="Python is a programming language").with_inputs('question'),
dspy.Example(question="What is AI?", answer="AI is artificial intelligence").with_inputs('question')
]
# 4. Configure optimizer
optimizer = dspy.BootstrapFewShot(metric=lambda example, pred, trace=None:
example.answer.lower() in pred.answer.lower())
# 5. Compile program
compiled_qa = optimizer.compile(BasicQA(), trainset=trainset)
How the Compiler Works
# Example of compiler internal workflow
class CompilationProcess:
"""Demonstrates the internal mechanism of DSPy compilation process"""
def __init__(self, program, optimizer, trainset):
self.program = program
self.optimizer = optimizer
self.trainset = trainset
self.compiled_program = None
def analyze_program_structure(self):
"""Analyze program structure, identify optimizable components"""
# Identify all predictor modules
predictors = []
for module in self.program.modules():
if isinstance(module, dspy.Predict):
predictors.append(module)
return predictors
def generate_demonstrations(self, predictors):
"""Generate examples for each predictor"""
demonstrations = {}
for predictor in predictors:
# Generate high-quality examples using training data
demos = []
for example in self.trainset[:5]: # Select first 5 as examples
try:
# Run original program to get intermediate results
with dspy.context(lm=self.optimizer.student):
result = self.program(example.question)
if self.optimizer.metric(example, result):
demos.append({
'input': example.question,
'output': result.answer,
'reasoning': getattr(result, 'rationale', '')
})
except Exception as e:
continue
demonstrations[predictor] = demos
return demonstrations
def optimize_prompts(self, demonstrations):
"""Optimize prompt templates based on examples"""
optimized_predictors = {}
for predictor, demos in demonstrations.items():
if demos:
# Build optimized few-shot prompts
few_shot_examples = []
for demo in demos:
few_shot_examples.append({
'input': demo['input'],
'output': demo['output']
})
# Create optimized predictor
optimized_predictor = dspy.Predict(predictor.signature)
optimized_predictor.demos = few_shot_examples
optimized_predictors[predictor] = optimized_predictor
return optimized_predictors
def compile(self):
"""Execute complete compilation process"""
print("Starting program compilation...")
# 1. Analyze program structure
predictors = self.analyze_program_structure()
print(f"Found {len(predictors)} predictors")
# 2. Generate examples
demonstrations = self.generate_demonstrations(predictors)
print(f"Generated {sum(len(demos) for demos in demonstrations.values())} examples")
# 3. Optimize prompts
optimized_predictors = self.optimize_prompts(demonstrations)
print(f"Optimized {len(optimized_predictors)} predictors")
# 4. Rebuild program
self.compiled_program = self._rebuild_program_with_optimizations(optimized_predictors)
print("Compilation complete!")
return self.compiled_program
# Usage example
program = BasicQA()
optimizer = dspy.BootstrapFewShot()
compiler = CompilationProcess(program, optimizer, trainset)
compiled_program = compiler.compile()
2. Bootstrap Few-Shot Optimizer
Bootstrap Few-Shot is the most commonly used optimizer in DSPy, generating high-quality few-shot examples through bootstrapping methods.
Basic Principles and Usage
import dspy
import random
from typing import List, Callable
class AdvancedBootstrapFewShot:
"""Enhanced Bootstrap Few-Shot optimizer"""
def __init__(self,
metric: Callable,
teacher: dspy.LM = None,
max_bootstrapped_demos: int = 4,
max_labeled_demos: int = 16,
max_rounds: int = 1,
num_candidate_programs: int = 16,
num_threads: int = 6):
self.metric = metric
self.teacher = teacher
self.max_bootstrapped_demos = max_bootstrapped_demos
self.max_labeled_demos = max_labeled_demos
self.max_rounds = max_rounds
self.num_candidate_programs = num_candidate_programs
self.num_threads = num_threads
def bootstrap_one_example(self, program, example):
"""Generate bootstrap demonstration for a single example"""
try:
# Use teacher model (if available) or current model to generate predictions
with dspy.context(lm=self.teacher if self.teacher else dspy.settings.lm):
prediction = program(**example.inputs())
# Validate prediction quality
if self.metric(example, prediction):
# Build demonstration sample
demo = dspy.Example()
demo = demo.with_inputs(**example.inputs())
demo = demo.with_outputs(**prediction.outputs())
return demo
except Exception as e:
print(f"Bootstrap failed: {e}")
return None
def bootstrap_examples(self, program, trainset):
"""Batch generate bootstrap examples"""
bootstrapped_examples = []
for example in trainset:
if len(bootstrapped_examples) >= self.max_bootstrapped_demos:
break
demo = self.bootstrap_one_example(program, example)
if demo:
bootstrapped_examples.append(demo)
print(f"Generated example {len(bootstrapped_examples)}/{self.max_bootstrapped_demos}")
return bootstrapped_examples
def evaluate_program(self, program, devset):
"""Evaluate program performance"""
correct = 0
total = len(devset)
for example in devset:
try:
prediction = program(**example.inputs())
if self.metric(example, prediction):
correct += 1
except Exception:
continue
return correct / total if total > 0 else 0.0
def compile(self, student_program, trainset, valset=None):
"""Compile student program"""
if valset is None:
# If no validation set, randomly split from training set
random.shuffle(trainset)
split_point = int(len(trainset) * 0.8)
trainset, valset = trainset[:split_point], trainset[split_point:]
print(f"Starting Bootstrap optimization, training set: {len(trainset)}, validation set: {len(valset)}")
best_program = None
best_score = 0.0
for round_idx in range(self.max_rounds):
print(f"\nOptimization round {round_idx + 1}")
# Generate bootstrap examples
bootstrapped_demos = self.bootstrap_examples(student_program, trainset)
print(f"Generated {len(bootstrapped_demos)} bootstrap examples")
# Generate multiple candidate programs
candidate_programs = []
for candidate_idx in range(self.num_candidate_programs):
# Randomly select example subset
selected_demos = random.sample(
bootstrapped_demos,
min(len(bootstrapped_demos), self.max_bootstrapped_demos)
)
# Create candidate program
candidate = student_program.deepcopy()
# Add examples to each predictor
for module in candidate.modules():
if isinstance(module, dspy.Predict):
module.demos = selected_demos
candidate_programs.append(candidate)
# Evaluate all candidate programs
print("Evaluating candidate programs...")
for i, candidate in enumerate(candidate_programs):
score = self.evaluate_program(candidate, valset)
print(f"Candidate program {i+1}: {score:.3f}")
if score > best_score:
best_score = score
best_program = candidate
print(f"\nBest program score: {best_score:.3f}")
return best_program
# Practical application example
class MathWordProblem(dspy.Module):
"""Math word problem solver"""
def __init__(self):
super().__init__()
self.solve = dspy.ChainOfThought("problem -> reasoning, answer")
def forward(self, problem):
result = self.solve(problem=problem)
return dspy.Prediction(
reasoning=result.reasoning,
answer=result.answer
)
# Prepare data
math_trainset = [
dspy.Example(
problem="Xiao Ming has 5 apples, ate 2, how many are left?",
answer="3"
).with_inputs('problem'),
dspy.Example(
problem="A class has 30 students, 12 are boys, how many are girls?",
answer="18"
).with_inputs('problem'),
# ... more examples
]
# Define evaluation metric
def math_metric(example, prediction, trace=None):
"""Evaluation metric for math problems"""
try:
# Extract numeric answer
import re
pred_numbers = re.findall(r'\d+', prediction.answer)
true_numbers = re.findall(r'\d+', example.answer)
if pred_numbers and true_numbers:
return pred_numbers[-1] == true_numbers[-1]
except:
pass
return False
# Use Bootstrap optimizer
math_program = MathWordProblem()
optimizer = AdvancedBootstrapFewShot(
metric=math_metric,
max_bootstrapped_demos=6,
num_candidate_programs=10
)
compiled_math_program = optimizer.compile(math_program, math_trainset)
3. LabeledFewShot Optimizer
The LabeledFewShot optimizer uses pre-labeled examples to optimize program performance.
class LabeledFewShotOptimizer:
"""Detailed implementation of Labeled Few-Shot optimizer"""
def __init__(self, k: int = 16):
self.k = k # Number of examples to use
def select_examples(self, trainset, program_signature):
"""Intelligently select best examples"""
# Method 1: Random selection
random_examples = random.sample(trainset, min(self.k, len(trainset)))
# Method 2: Diversity selection
diverse_examples = self.select_diverse_examples(trainset)
# Method 3: Balanced difficulty selection
balanced_examples = self.select_balanced_examples(trainset)
return diverse_examples
def select_diverse_examples(self, trainset):
"""Select diverse examples"""
if len(trainset) <= self.k:
return trainset
selected = []
remaining = trainset.copy()
# First randomly select one
first = random.choice(remaining)
selected.append(first)
remaining.remove(first)
# Iteratively select most dissimilar examples
while len(selected) < self.k and remaining:
best_candidate = None
best_diversity_score = -1
for candidate in remaining:
# Calculate diversity score with already selected examples
diversity_score = self.calculate_diversity(candidate, selected)
if diversity_score > best_diversity_score:
best_diversity_score = diversity_score
best_candidate = candidate
if best_candidate:
selected.append(best_candidate)
remaining.remove(best_candidate)
return selected
def calculate_diversity(self, candidate, selected_examples):
"""Calculate diversity score of example"""
if not selected_examples:
return 1.0
# Simple diversity measure based on text length and vocabulary
candidate_words = set(candidate.question.lower().split())
diversity_scores = []
for selected in selected_examples:
selected_words = set(selected.question.lower().split())
# Jaccard distance as diversity measure
intersection = len(candidate_words & selected_words)
union = len(candidate_words | selected_words)
if union == 0:
diversity = 1.0
else:
diversity = 1.0 - (intersection / union)
diversity_scores.append(diversity)
# Return average diversity
return sum(diversity_scores) / len(diversity_scores)
def select_balanced_examples(self, trainset):
"""Select difficulty-balanced examples"""
# Classify by problem complexity
simple_examples = []
medium_examples = []
complex_examples = []
for example in trainset:
complexity = self.estimate_complexity(example)
if complexity < 0.3:
simple_examples.append(example)
elif complexity < 0.7:
medium_examples.append(example)
else:
complex_examples.append(example)
# Balanced selection
selected = []
target_simple = self.k // 3
target_medium = self.k // 3
target_complex = self.k - target_simple - target_medium
selected.extend(random.sample(simple_examples, min(target_simple, len(simple_examples))))
selected.extend(random.sample(medium_examples, min(target_medium, len(medium_examples))))
selected.extend(random.sample(complex_examples, min(target_complex, len(complex_examples))))
# If insufficient, supplement from remaining
while len(selected) < self.k and len(selected) < len(trainset):
remaining = [ex for ex in trainset if ex not in selected]
if remaining:
selected.append(random.choice(remaining))
return selected[:self.k]
def estimate_complexity(self, example):
"""Estimate example complexity"""
question_length = len(example.question.split())
answer_length = len(example.answer.split())
# Simple complexity estimation based on length and special characters
complexity = (question_length + answer_length) / 50.0
# Add weight for special patterns
if any(word in example.question.lower() for word in ['why', 'how', 'explain']):
complexity += 0.3
if any(char in example.question for char in ['?', '!', ';']):
complexity += 0.1
return min(complexity, 1.0)
def compile(self, student_program, trainset):
"""Compile program using labeled examples"""
print(f"Using LabeledFewShot optimizer, training set size: {len(trainset)}")
# Select best examples
selected_examples = self.select_examples(trainset, None)
print(f"Selected {len(selected_examples)} examples")
# Create optimized program
optimized_program = student_program.deepcopy()
# Add examples to each predictor
for module in optimized_program.modules():
if isinstance(module, dspy.Predict):
module.demos = selected_examples
print(f"Added {len(selected_examples)} examples to predictor")
return optimized_program
# Usage example
class QuestionClassifier(dspy.Module):
"""Question classifier"""
def __init__(self):
super().__init__()
self.classify = dspy.Predict("question -> category")
def forward(self, question):
result = self.classify(question=question)
return result
# Prepare classification training data
classification_trainset = [
dspy.Example(question="What is the weather today?", category="weather").with_inputs('question'),
dspy.Example(question="How do I cook pasta?", category="cooking").with_inputs('question'),
dspy.Example(question="What is machine learning?", category="technology").with_inputs('question'),
dspy.Example(question="Where is Paris?", category="geography").with_inputs('question'),
# ... more examples
]
# Use LabeledFewShot optimization
classifier = QuestionClassifier()
labeled_optimizer = LabeledFewShotOptimizer(k=8)
optimized_classifier = labeled_optimizer.compile(classifier, classification_trainset)
# Test optimized classifier
test_questions = [
"How is the weather in Tokyo?",
"Recipe for chocolate cake",
"Explain neural networks"
]
for question in test_questions:
result = optimized_classifier(question=question)
print(f"Question: {question}")
print(f"Category: {result.category}\n")
4. COPRO Optimizer
COPRO (Constrained Optimization with Prompt-based Reasoning) is an advanced optimizer focused on prompt optimization.
class COPROOptimizer:
"""Implementation of COPRO optimizer"""
def __init__(self,
metric,
breadth: int = 10,
depth: int = 3,
init_temperature: float = 1.4,
verbose: bool = False):
self.metric = metric
self.breadth = breadth # Number of candidates generated each time
self.depth = depth # Number of optimization rounds
self.init_temperature = init_temperature
self.verbose = verbose
def generate_instruction_variants(self, original_instruction, num_variants=10):
"""Generate instruction variants"""
# Use language model to generate instruction variants
variation_prompt = f"""
Given the following instruction, please generate {num_variants} functionally equivalent but differently worded variant instructions.
Requirements:
1. Keep the core purpose of the original instruction unchanged
2. Use different wording and expressions
3. Each variant on one line
Original instruction: {original_instruction}
Variant instructions:
"""
# This needs to call the language model
# For demonstration, we use predefined variants
variants = [
f"Please carefully analyze and {original_instruction.lower()}",
f"Based on the given information, {original_instruction.lower()}",
f"Using the following content, please {original_instruction.lower()}",
f"Please provide detailed {original_instruction.lower()}",
f"After careful consideration, {original_instruction.lower()}",
]
return variants[:num_variants]
def optimize_signature_instructions(self, program, trainset):
"""Optimize instructions in signatures"""
optimized_predictors = {}
for module in program.modules():
if isinstance(module, dspy.Predict):
signature = module.signature
original_instructions = getattr(signature, 'instructions', '')
if self.verbose:
print(f"Optimizing predictor instruction: {original_instructions}")
best_instruction = original_instructions
best_score = self.evaluate_instruction(
module, best_instruction, trainset
)
# Generate instruction variants
instruction_variants = self.generate_instruction_variants(
original_instructions, self.breadth
)
# Test each variant
for variant in instruction_variants:
score = self.evaluate_instruction(module, variant, trainset)
if self.verbose:
print(f" Variant: {variant[:50]}... Score: {score:.3f}")
if score > best_score:
best_score = score
best_instruction = variant
# Save best instruction
optimized_predictors[module] = {
'instruction': best_instruction,
'score': best_score
}
return optimized_predictors
def evaluate_instruction(self, predictor, instruction, examples):
"""Evaluate performance of specific instruction"""
# Create temporary predictor
temp_predictor = predictor.deepcopy()
# Update instruction
if hasattr(temp_predictor.signature, 'instructions'):
temp_predictor.signature.instructions = instruction
# Evaluate on example subset
correct = 0
total = min(len(examples), 20) # Limit evaluation count for speed
for example in examples[:total]:
try:
prediction = temp_predictor(**example.inputs())
if self.metric(example, prediction):
correct += 1
except Exception as e:
if self.verbose:
print(f"Evaluation error: {e}")
continue
return correct / total if total > 0 else 0.0
def progressive_optimization(self, program, trainset):
"""Progressive optimization"""
current_program = program.deepcopy()
for depth_level in range(self.depth):
print(f"\nCOPRO optimization round {depth_level + 1}/{self.depth}")
# Optimize current program instructions
optimizations = self.optimize_signature_instructions(
current_program, trainset
)
# Apply best optimizations
improvements = 0
for module, optimization in optimizations.items():
if optimization['score'] > 0:
# Update module instruction
if hasattr(module.signature, 'instructions'):
module.signature.instructions = optimization['instruction']
improvements += 1
print(f"Round {depth_level + 1} optimized {improvements} modules")
# If no improvement, end early
if improvements == 0:
print("No further improvement, optimization ended")
break
return current_program
def compile(self, student_program, trainset, valset=None):
"""Compile student program"""
print(f"Starting COPRO optimization")
print(f"Training set size: {len(trainset)}")
if valset is None:
# Split training set
random.shuffle(trainset)
split_point = int(len(trainset) * 0.8)
train_subset, val_subset = trainset[:split_point], trainset[split_point:]
else:
train_subset = trainset
val_subset = valset
# Execute progressive optimization
optimized_program = self.progressive_optimization(student_program, train_subset)
# Final evaluation
if val_subset:
final_score = self.evaluate_program(optimized_program, val_subset)
print(f"Final validation score: {final_score:.3f}")
return optimized_program
def evaluate_program(self, program, examples):
"""Evaluate entire program"""
correct = 0
total = len(examples)
for example in examples:
try:
prediction = program(**example.inputs())
if self.metric(example, prediction):
correct += 1
except Exception:
continue
return correct / total if total > 0 else 0.0
# Practical application example
class SentimentAnalyzer(dspy.Module):
"""Sentiment analyzer"""
def __init__(self):
super().__init__()
self.analyze = dspy.ChainOfThought(
"text -> reasoning, sentiment",
instructions="Analyze the sentiment of the given text. Consider context, tone, and emotional indicators."
)
def forward(self, text):
result = self.analyze(text=text)
return dspy.Prediction(
reasoning=result.reasoning,
sentiment=result.sentiment
)
# Prepare sentiment analysis data
sentiment_trainset = [
dspy.Example(text="I love this movie! It's amazing!", sentiment="positive").with_inputs('text'),
dspy.Example(text="This is the worst experience ever.", sentiment="negative").with_inputs('text'),
dspy.Example(text="The weather is okay today.", sentiment="neutral").with_inputs('text'),
# ... more examples
]
def sentiment_metric(example, prediction, trace=None):
"""Sentiment analysis evaluation metric"""
return example.sentiment.lower() in prediction.sentiment.lower()
# Use COPRO optimizer
sentiment_analyzer = SentimentAnalyzer()
copro_optimizer = COPROOptimizer(
metric=sentiment_metric,
breadth=8,
depth=3,
verbose=True
)
optimized_analyzer = copro_optimizer.compile(sentiment_analyzer, sentiment_trainset)
5. Optimizer Evaluation and Tuning Strategies
class OptimizerEvaluator:
"""Optimizer evaluation and comparison tool"""
def __init__(self, base_program, trainset, testset):
self.base_program = base_program
self.trainset = trainset
self.testset = testset
self.results = {}
def evaluate_optimizer(self, optimizer_name, optimizer, metric):
"""Evaluate single optimizer"""
print(f"\nEvaluating optimizer: {optimizer_name}")
# Record start time
import time
start_time = time.time()
try:
# Compile program
compiled_program = optimizer.compile(self.base_program, self.trainset)
compilation_time = time.time() - start_time
# Evaluate performance
test_score = self.evaluate_program(compiled_program, self.testset, metric)
train_score = self.evaluate_program(compiled_program, self.trainset, metric)
# Record results
self.results[optimizer_name] = {
'test_score': test_score,
'train_score': train_score,
'compilation_time': compilation_time,
'overfitting': abs(train_score - test_score)
}
print(f"{optimizer_name}:")
print(f" Train score: {train_score:.3f}")
print(f" Test score: {test_score:.3f}")
print(f" Compilation time: {compilation_time:.2f}s")
print(f" Overfitting degree: {abs(train_score - test_score):.3f}")
except Exception as e:
print(f"{optimizer_name} evaluation failed: {e}")
self.results[optimizer_name] = {
'error': str(e)
}
def evaluate_program(self, program, examples, metric):
"""Evaluate program performance on dataset"""
correct = 0
total = len(examples)
for example in examples:
try:
prediction = program(**example.inputs())
if metric(example, prediction):
correct += 1
except:
continue
return correct / total if total > 0 else 0.0
def compare_optimizers(self, optimizers_config, metric):
"""Compare multiple optimizers"""
print("Starting optimizer performance comparison")
for name, optimizer in optimizers_config.items():
self.evaluate_optimizer(name, optimizer, metric)
# Generate comparison report
self.generate_comparison_report()
def generate_comparison_report(self):
"""Generate comparison report"""
print("\nOptimizer Comparison Report")
print("=" * 60)
# Sort by test score
valid_results = {k: v for k, v in self.results.items() if 'error' not in v}
if not valid_results:
print("No successful optimizer results")
return
sorted_results = sorted(
valid_results.items(),
key=lambda x: x[1]['test_score'],
reverse=True
)
print(f"{'Optimizer':<20} {'Test Score':<10} {'Train Score':<10} {'Compile Time':<10} {'Overfitting':<10}")
print("-" * 60)
for name, result in sorted_results:
print(f"{name:<20} {result['test_score']:<10.3f} {result['train_score']:<10.3f} "
f"{result['compilation_time']:<10.2f} {result['overfitting']:<10.3f}")
# Recommend best optimizer
best_optimizer = sorted_results[0]
print(f"\nRecommended optimizer: {best_optimizer[0]}")
# Analyze results
print("\nAnalysis:")
if best_optimizer[1]['overfitting'] > 0.1:
print("Warning: Best optimizer may have overfitting issues. Suggestions:")
print(" - Increase training data")
print(" - Use regularization techniques")
print(" - Reduce model complexity")
if best_optimizer[1]['compilation_time'] > 300: # 5 minutes
print("Warning: Compilation time is long. Suggestions:")
print(" - Reduce number of candidate programs")
print(" - Use smaller training set for quick iteration")
print(" - Consider parallelizing optimization")
# Practical evaluation example
def comprehensive_optimizer_evaluation():
"""Comprehensive optimizer evaluation example"""
# Prepare test program
class TestProgram(dspy.Module):
def __init__(self):
super().__init__()
self.generate = dspy.ChainOfThought("question -> answer")
def forward(self, question):
return self.generate(question=question)
# Prepare data
import random
full_dataset = [
dspy.Example(question="What is 2+2?", answer="4").with_inputs('question'),
dspy.Example(question="What is the capital of France?", answer="Paris").with_inputs('question'),
# ... more examples
]
# Split data
random.shuffle(full_dataset)
train_size = int(len(full_dataset) * 0.6)
test_size = int(len(full_dataset) * 0.2)
trainset = full_dataset[:train_size]
testset = full_dataset[train_size:train_size + test_size]
# Define evaluation metric
def simple_metric(example, prediction, trace=None):
return example.answer.lower() in prediction.answer.lower()
# Configure optimizers
optimizers = {
'Bootstrap': AdvancedBootstrapFewShot(metric=simple_metric, max_bootstrapped_demos=4),
'LabeledFewShot': LabeledFewShotOptimizer(k=8),
'COPRO': COPROOptimizer(metric=simple_metric, breadth=6, depth=2)
}
# Execute evaluation
evaluator = OptimizerEvaluator(TestProgram(), trainset, testset)
evaluator.compare_optimizers(optimizers, simple_metric)
return evaluator.results
# Run evaluation
# evaluation_results = comprehensive_optimizer_evaluation()
Practice Exercises
Exercise 1: Custom Optimizer
class CustomOptimizer:
"""Custom optimizer exercise"""
def __init__(self, metric, strategy='random'):
self.metric = metric
self.strategy = strategy # 'random', 'similarity', 'difficulty'
def compile(self, program, trainset):
"""Implement your optimization strategy"""
# TODO: Implement custom optimization logic
pass
# Exercise tasks:
# 1. Implement similarity-based example selection strategy
# 2. Implement difficulty-based incremental example selection strategy
# 3. Compare performance differences of different strategies
Exercise 2: Multi-Objective Optimization
class MultiObjectiveOptimizer:
"""Multi-objective optimizer exercise"""
def __init__(self, metrics_config):
"""
metrics_config: {
'accuracy': {'metric': accuracy_func, 'weight': 0.6},
'speed': {'metric': speed_func, 'weight': 0.2},
'robustness': {'metric': robustness_func, 'weight': 0.2}
}
"""
self.metrics_config = metrics_config
def compile(self, program, trainset):
"""Implement multi-objective optimization"""
# TODO: Implement optimization considering multiple metrics
pass
# Exercise tasks:
# 1. Design multiple evaluation metrics
# 2. Implement Pareto optimal solution selection
# 3. Analyze the impact of different weight configurations
Best Practices
1. Optimizer Selection Guide
def select_optimizer_guide():
"""Optimizer selection guide"""
guidelines = {
'Bootstrap Few-Shot': {
'Applicable Scenarios': [
'Medium data size (100-1000 examples)',
'Need to automatically generate high-quality examples',
'Medium task complexity'
],
'Advantages': [
'High automation',
'Usually achieves good performance',
'No need for manual annotation of examples'
],
'Disadvantages': [
'Higher computational cost',
'May overfit',
'Needs good initial program'
]
},
'Labeled Few-Shot': {
'Applicable Scenarios': [
'Have high-quality annotated data',
'Smaller data size (<100 examples)',
'Need quick prototype validation'
],
'Advantages': [
'Simple and direct',
'Low computational cost',
'Strong controllability'
],
'Disadvantages': [
'Requires manual example selection',
'Performance ceiling limited by example quality',
'Cannot adaptively optimize'
]
},
'COPRO': {
'Applicable Scenarios': [
'Prompt-sensitive tasks',
'Need fine-tuning',
'Have sufficient computational resources'
],
'Advantages': [
'Can optimize prompts themselves',
'Theoretically higher performance ceiling',
'Suitable for complex reasoning tasks'
],
'Disadvantages': [
'Highest computational overhead',
'Complex tuning',
'May be unstable'
]
}
}
return guidelines
# Decision tree
def choose_optimizer(data_size, quality, compute_budget, task_complexity):
"""Optimizer selection decision tree"""
if compute_budget == 'low':
return 'LabeledFewShot'
elif data_size < 50:
return 'LabeledFewShot'
elif data_size > 500 and compute_budget == 'high':
if task_complexity == 'high':
return 'COPRO'
else:
return 'Bootstrap'
else:
return 'Bootstrap'
2. Performance Monitoring and Debugging
class OptimizationMonitor:
"""Optimization process monitor"""
def __init__(self):
self.metrics_history = []
self.timing_info = {}
def monitor_compilation(self, optimizer, program, trainset):
"""Monitor compilation process"""
import time
import memory_profiler
start_time = time.time()
start_memory = memory_profiler.memory_usage()[0]
# Execute compilation
compiled_program = optimizer.compile(program, trainset)
end_time = time.time()
end_memory = memory_profiler.memory_usage()[0]
# Record performance metrics
self.timing_info = {
'compilation_time': end_time - start_time,
'memory_usage': end_memory - start_memory,
'trainset_size': len(trainset)
}
print(f"Compilation performance:")
print(f" Time: {self.timing_info['compilation_time']:.2f}s")
print(f" Memory: {self.timing_info['memory_usage']:.2f}MB")
return compiled_program
def track_convergence(self, scores_by_iteration):
"""Track convergence"""
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
plt.plot(scores_by_iteration)
plt.title('Optimizer Convergence Curve')
plt.xlabel('Iteration')
plt.ylabel('Performance Score')
plt.grid(True)
plt.show()
# Detect convergence
if len(scores_by_iteration) > 5:
recent_improvement = scores_by_iteration[-1] - scores_by_iteration[-5]
if recent_improvement < 0.01:
print("Warning: Optimizer may have converged, suggest stopping iterations")
# Use monitor
monitor = OptimizationMonitor()
# compiled_program = monitor.monitor_compilation(optimizer, program, trainset)
Through this chapter, you should have mastered the principles and usage of various optimizers in DSPy. Optimizers are the core feature of the DSPy framework, capable of automatically improving program performance. In practical applications, choose appropriate optimization strategies based on specific task characteristics, data scale, and computational resources.