Skip to content

Commit 56ab508

Browse files
committed
add engine and eval dataset
1 parent e39ad72 commit 56ab508

File tree

2 files changed

+191
-0
lines changed

2 files changed

+191
-0
lines changed
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
import dspy
2+
from typing import List, Dict, Optional
3+
from dataclasses import dataclass
4+
5+
@dataclass
6+
class PromptTemplate:
7+
template: str
8+
input_variables: List[str]
9+
model_type: str # 'openai' or 'llama'
10+
11+
class PromptMigrationEngine:
12+
def __init__(self, source_lm: dspy.OpenAI, target_lm: dspy.LM):
13+
self.source_lm = source_lm
14+
self.target_lm = target_lm
15+
dspy.configure(lm=source_lm)
16+
17+
def _optimize_transformation(self, transformer, eval_dataset):
18+
"""Optimize the transformation using the evaluation dataset."""
19+
class AccuracyMetric:
20+
def __call__(self, example, prediction, trace=None):
21+
return float(prediction.target == example.expected_output)
22+
23+
optimizer = dspy.BootstrapFewShotWithRandomSearch(
24+
metric=AccuracyMetric(),
25+
max_bootstrapped_demos=4,
26+
max_labeled_demos=4,
27+
num_threads=4
28+
)
29+
30+
train_data = [
31+
dspy.Example(
32+
source=item["text"],
33+
expected_output=item["expected_summary"]
34+
).with_inputs("source") for item in eval_dataset
35+
]
36+
37+
return optimizer.compile(transformer, trainset=train_data)
38+
39+
def migrate_prompt(self,
40+
source_prompt: PromptTemplate,
41+
eval_dataset: Optional[List[Dict]] = None) -> PromptTemplate:
42+
"""Migrates a prompt from source LM to target LM format."""
43+
44+
class PromptTransformation(dspy.Signature):
45+
"""Convert a prompt from one format to another."""
46+
source = dspy.InputField(desc="Source prompt template")
47+
target = dspy.OutputField(desc="Transformed prompt template")
48+
49+
class Transformer(dspy.Module):
50+
def __init__(self):
51+
super().__init__()
52+
self.chain = dspy.ChainOfThought(PromptTransformation)
53+
54+
def forward(self, source):
55+
return self.chain(source=source)
56+
57+
transformer = Transformer()
58+
59+
if eval_dataset:
60+
transformer = self._optimize_transformation(transformer, eval_dataset)
61+
62+
result = transformer(source=source_prompt.template)
63+
64+
return PromptTemplate(
65+
template=result.target,
66+
input_variables=source_prompt.input_variables,
67+
model_type='llama'
68+
)
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
from typing import List, Dict
2+
3+
def get_evaluation_dataset() -> List[Dict]:
4+
"""
5+
Returns a comprehensive evaluation dataset for testing prompt migrations.
6+
Each test case includes:
7+
- text: Input text
8+
- expected_summary: Expected output
9+
- prompt_type: Type of prompt (summarization, classification, qa, etc.)
10+
- complexity: Difficulty level (simple, medium, complex)
11+
"""
12+
return [
13+
# Summarization examples
14+
{
15+
"text": "The quick brown fox jumps over the lazy dog.",
16+
"expected_summary": "A fox jumps over a dog.",
17+
"prompt_type": "summarization",
18+
"complexity": "simple"
19+
},
20+
{
21+
"text": """Machine learning is a subset of artificial intelligence that focuses on developing
22+
systems that can learn from and make decisions based on data. It has numerous
23+
applications in various fields including healthcare, finance, and autonomous vehicles.""",
24+
"expected_summary": "Machine learning is an AI technology that enables systems to learn and make decisions from data, used in healthcare, finance, and autonomous vehicles.",
25+
"prompt_type": "summarization",
26+
"complexity": "medium"
27+
},
28+
29+
# Classification examples
30+
{
31+
"text": "I absolutely loved this product! Best purchase ever!",
32+
"expected_summary": "Positive",
33+
"prompt_type": "sentiment_classification",
34+
"complexity": "simple"
35+
},
36+
{
37+
"text": "The product works fine but the customer service could be better.",
38+
"expected_summary": "Neutral",
39+
"prompt_type": "sentiment_classification",
40+
"complexity": "medium"
41+
},
42+
43+
# Question-Answering examples
44+
{
45+
"text": "What is the capital of France? Context: Paris is the capital and largest city of France.",
46+
"expected_summary": "Paris",
47+
"prompt_type": "qa",
48+
"complexity": "simple"
49+
},
50+
{
51+
"text": """What causes rain? Context: Rain is precipitation of liquid water in the form of droplets.
52+
Water vapor in warm air rises and cools, forming clouds. When the droplets become too
53+
heavy, they fall as rain.""",
54+
"expected_summary": "Rain occurs when water vapor in warm air rises, cools to form clouds, and droplets become heavy enough to fall.",
55+
"prompt_type": "qa",
56+
"complexity": "medium"
57+
},
58+
59+
# Code-related examples
60+
{
61+
"text": "Write a function to add two numbers in Python.",
62+
"expected_summary": "def add(a, b):\n return a + b",
63+
"prompt_type": "code_generation",
64+
"complexity": "simple"
65+
},
66+
{
67+
"text": "Explain what this code does: for i in range(len(arr)): arr[i] *= 2",
68+
"expected_summary": "This code multiplies each element in the array 'arr' by 2.",
69+
"prompt_type": "code_explanation",
70+
"complexity": "simple"
71+
},
72+
73+
# Text transformation examples
74+
{
75+
"text": "convert this to passive voice: The cat chased the mouse.",
76+
"expected_summary": "The mouse was chased by the cat.",
77+
"prompt_type": "text_transformation",
78+
"complexity": "simple"
79+
},
80+
{
81+
"text": "translate to French: Hello, how are you?",
82+
"expected_summary": "Bonjour, comment allez-vous?",
83+
"prompt_type": "translation",
84+
"complexity": "simple"
85+
},
86+
87+
# Complex reasoning examples
88+
{
89+
"text": """A train leaves Station A at 2:00 PM traveling at 60 mph. Another train leaves
90+
Station B at 3:00 PM traveling at 75 mph in the opposite direction. If the stations
91+
are 375 miles apart, at what time will the trains meet?""",
92+
"expected_summary": "The trains will meet at 5:00 PM.",
93+
"prompt_type": "problem_solving",
94+
"complexity": "complex"
95+
},
96+
{
97+
"text": """Analyze the environmental impact of electric vehicles versus traditional
98+
gasoline vehicles, considering manufacturing, operation, and disposal.""",
99+
"expected_summary": """Electric vehicles typically have higher manufacturing emissions but lower
100+
operational emissions compared to gasoline vehicles. Overall lifecycle
101+
environmental impact depends on electricity source and battery recycling.""",
102+
"prompt_type": "analysis",
103+
"complexity": "complex"
104+
}
105+
]
106+
107+
def get_eval_subset(prompt_type: str = None, complexity: str = None) -> List[Dict]:
108+
"""
109+
Returns a filtered subset of the evaluation dataset based on prompt type and/or complexity.
110+
111+
Args:
112+
prompt_type: Type of prompts to filter (e.g., 'summarization', 'qa', etc.)
113+
complexity: Complexity level to filter (e.g., 'simple', 'medium', 'complex')
114+
"""
115+
dataset = get_evaluation_dataset()
116+
117+
if prompt_type:
118+
dataset = [d for d in dataset if d["prompt_type"] == prompt_type]
119+
120+
if complexity:
121+
dataset = [d for d in dataset if d["complexity"] == complexity]
122+
123+
return dataset

0 commit comments

Comments
 (0)