Skip to content

Commit 7136224

Browse files
committed
change eval dataset, include more robust judging, improved main
1 parent 5520b65 commit 7136224

File tree

5 files changed

+291
-98
lines changed

5 files changed

+291
-98
lines changed

recipes/use_cases/prompt-migration/examples/usage.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,9 @@
1616
model_type="openai"
1717
)
1818

19-
# Example evaluation dataset
2019
eval_dataset = [
21-
{"text": "Example text 1", "expected_summary": "Summary 1"},
22-
{"text": "Example text 2", "expected_summary": "Summary 2"},
20+
{"text": "Example text 1", "expected_answer": "Summary 1"},
21+
{"text": "Example text 2", "expected_answer": "Summary 2"},
2322
]
2423

2524
# Migrate prompt

recipes/use_cases/prompt-migration/main.py

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,48 @@ def main():
1414
api_key=os.getenv("OPENAI_API_KEY")
1515
)
1616

17-
# target_lm = dspy.LM(
18-
# model="together_ai/togethercomputer/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
19-
# api_key=os.getenv("TOGETHER_API_KEY")
20-
# )
17+
target_lm = dspy.LM(
18+
model="together_ai/meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo",
19+
api_key=os.getenv("TOGETHER_API_KEY")
20+
)
21+
# To run it with ollama
2122
# target_lm = dspy.LM('ollama_chat/llama3.2:3b-instruct-fp16', api_base='http://localhost:11434', api_key='')
22-
target_lm = dspy.HFModel(model="gpt2")
23+
24+
# To run it with huggingface
25+
# target_lm = dspy.HFModel(model="gpt2")
2326

2427
engine = PromptMigrationEngine(openai_lm, target_lm)
2528

2629
source_prompt = PromptTemplate(
27-
template="Write a Python function that takes as input a file path to an image, loads the image into memory as a numpy array, then crops the rows and columns around the perimeter if they are darker than a threshold value. Use the mean value of rows and columns to decide if they should be marked for deletion.",
30+
template="""You are an advanced Large Language Model tasked with generating Python code snippets in response to user prompts. Your primary objective is to provide accurate, concise, and well-structured Python functions. Follow these guidelines:
31+
32+
Understand the Context: Analyze the input prompt and identify its category (e.g., API Usage, File Handling, Error Handling).
33+
34+
Generate Code:
35+
Write Python code that directly addresses the user's request.
36+
Ensure the code is syntactically correct, functional, and adheres to Python best practices.
37+
Include necessary imports and handle potential edge cases.
38+
39+
Error Handling:
40+
Include appropriate error handling where applicable (e.g., try-except blocks).
41+
If exceptions occur, provide meaningful error messages.
42+
43+
Readability:
44+
Use clear variable names and include comments where necessary for clarity.
45+
Prioritize readability and maintainability in all generated code.
46+
47+
Complexity Alignment:
48+
Tailor the code's complexity based on the indicated difficulty (e.g., simple, medium, complex).
49+
Ensure that the solution is neither overly simplistic nor unnecessarily complicated.
50+
51+
Prompt Type:
52+
Focus on the code_generation type for creating Python functions.
53+
Avoid deviating from the task unless additional clarification is requested.
54+
55+
Testing and Validity:
56+
Assume the function might be run immediately. Provide code that is ready for use or minimal adaptation.
57+
Highlight any dependencies or external libraries required.
58+
""",
2859
input_variables=["text"],
2960
model_type="openai"
3061
)
@@ -33,20 +64,31 @@ def main():
3364

3465

3566
# To evaluate on a specific subset, use the following:
36-
#summarization_dataset = get_eval_subset(prompt_type="summarization")
67+
code_generation_dataset = get_eval_subset(prompt_type="code_generation")
3768
#simple_tasks = get_eval_subset(complexity="simple")
69+
evaluator = PromptEvaluator(openai_lm, target_lm)
70+
71+
metrics = evaluator.evaluate(
72+
source_prompt.template, # Same prompt for both
73+
source_prompt.template, # Same prompt for both
74+
code_generation_dataset
75+
)
76+
77+
print(f"Evaluation metrics:")
78+
print(f" Accuracy: {metrics.accuracy:.2f}")
79+
print(f" Similarity: {metrics.similarity:.2f}")
80+
print(f" Consistency: {metrics.consistency:.2f}")
3881

3982
# Migrate prompt
4083
print("Migrating prompt...")
41-
migrated_prompt = engine.migrate_prompt(source_prompt, eval_dataset)
84+
migrated_prompt = engine.migrate_prompt(source_prompt, code_generation_dataset)
4285

4386
# Evaluate migration
4487
print("Evaluating migration...")
45-
evaluator = PromptEvaluator(openai_lm, target_lm)
4688
metrics = evaluator.evaluate(
4789
source_prompt.template,
4890
migrated_prompt.template,
49-
eval_dataset
91+
code_generation_dataset
5092
)
5193

5294
print(f"\nResults:")

recipes/use_cases/prompt-migration/prompt_migration/engine.py

Lines changed: 52 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,30 +9,53 @@ class PromptTemplate:
99
model_type: str # 'openai' or 'llama'
1010

1111
class PromptMigrationEngine:
12-
def __init__(self, source_lm: dspy.OpenAI, target_lm: dspy.LM):
12+
def __init__(self, source_lm: dspy.LM, target_lm: dspy.LM):
1313
self.source_lm = source_lm
1414
self.target_lm = target_lm
1515
dspy.configure(lm=source_lm)
1616

1717
def _optimize_transformation(self, transformer, eval_dataset):
1818
"""Optimize the transformation using the evaluation dataset."""
19-
class AccuracyMetric:
19+
class PromptQualityMetric:
20+
def __init__(self, source_lm, target_lm):
21+
self.source_lm = source_lm
22+
self.target_lm = target_lm
23+
2024
def __call__(self, example, prediction, trace=None):
21-
return float(prediction.target == example.expected_output)
25+
if not hasattr(prediction, 'target'):
26+
return 0.0
27+
28+
try:
29+
# Get outputs from both models using the prompts
30+
source_output = self.source_lm(example.source)
31+
target_output = self.target_lm(prediction.target)
32+
33+
# Compare outputs (basic similarity)
34+
from difflib import SequenceMatcher
35+
similarity = SequenceMatcher(None,
36+
str(source_output),
37+
str(target_output)).ratio()
38+
return similarity
39+
except Exception as e:
40+
print(f"Error in metric: {e}")
41+
return 0.0
2242

2343
optimizer = dspy.BootstrapFewShotWithRandomSearch(
24-
metric=AccuracyMetric(),
25-
max_bootstrapped_demos=4,
26-
max_labeled_demos=4,
27-
num_threads=4
44+
metric=PromptQualityMetric(self.source_lm, self.target_lm),
45+
max_bootstrapped_demos=2,
46+
max_labeled_demos=2,
47+
num_threads=1
2848
)
2949

30-
train_data = [
31-
dspy.Example(
50+
# Prepare training data
51+
train_data = []
52+
for item in eval_dataset:
53+
# Create example with both prompt and expected output
54+
example = dspy.Example(
3255
source=item["text"],
33-
expected_output=item["expected_summary"]
34-
).with_inputs("source") for item in eval_dataset
35-
]
56+
expected_output=item["expected_answer"]
57+
).with_inputs("source")
58+
train_data.append(example)
3659

3760
return optimizer.compile(transformer, trainset=train_data)
3861

@@ -44,15 +67,26 @@ def migrate_prompt(self,
4467
class PromptTransformation(dspy.Signature):
4568
"""Convert a prompt from one format to another."""
4669
source = dspy.InputField(desc="Source prompt template")
47-
target = dspy.OutputField(desc="Transformed prompt template")
70+
target = dspy.OutputField(desc="Transformed prompt template that maintains functionality while adapting to target model format")
4871

4972
class Transformer(dspy.Module):
5073
def __init__(self):
5174
super().__init__()
5275
self.chain = dspy.ChainOfThought(PromptTransformation)
5376

5477
def forward(self, source):
55-
return self.chain(source=source)
78+
# Add context about the transformation task
79+
prompt = f"""
80+
Transform this prompt while:
81+
1. Maintaining core functionality
82+
2. Adapting to target model format
83+
3. Preserving input variables
84+
4. Keeping essential instructions
85+
86+
Source prompt:
87+
{source}
88+
"""
89+
return self.chain(source=prompt)
5690

5791
transformer = Transformer()
5892

@@ -61,6 +95,10 @@ def forward(self, source):
6195

6296
result = transformer(source=source_prompt.template)
6397

98+
# Format for target model
99+
if source_prompt.model_type == "openai" and "llama" in str(self.target_lm):
100+
result.target = f"### Instruction:\n{result.target}\n\n### Response:"
101+
64102
return PromptTemplate(
65103
template=result.target,
66104
input_variables=source_prompt.input_variables,

0 commit comments

Comments
 (0)