@@ -71,7 +71,6 @@ def clean_score(score):
71
71
return result
72
72
except Exception as e :
73
73
print (f"Error in judge: { str (e )} " )
74
- # Return default scores
75
74
return type ('Result' , (), {
76
75
'accuracy' : '0' ,
77
76
'consistency' : '0' ,
@@ -119,12 +118,10 @@ def _calculate_metrics(self, source_prompt: str, target_prompt: str, test_cases:
119
118
expected_output = expected
120
119
)
121
120
122
- # Calculate scores
123
121
accuracy_score = float (judgment .accuracy ) / 100
124
122
consistency_score = float (judgment .consistency ) / 100
125
123
is_equivalent = judgment .equivalence .lower () == "yes"
126
124
127
- # Store individual scores
128
125
case_scores = {
129
126
"input" : input_text ,
130
127
"expected" : expected ,
@@ -137,7 +134,6 @@ def _calculate_metrics(self, source_prompt: str, target_prompt: str, test_cases:
137
134
}
138
135
individual_scores .append (case_scores )
139
136
140
- # Update totals
141
137
total_accuracy += accuracy_score
142
138
total_consistency += consistency_score
143
139
total_similarity += float (is_equivalent )
@@ -149,15 +145,13 @@ def _calculate_metrics(self, source_prompt: str, target_prompt: str, test_cases:
149
145
print (f"Judge's reasoning: { judgment .reasoning } " )
150
146
print (f"Scores - Accuracy: { accuracy_score :.2f} , Consistency: { consistency_score :.2f} , Equivalent: { is_equivalent } " )
151
147
152
- # Calculate final metrics
153
148
metrics = EvaluationMetrics (
154
149
accuracy = total_accuracy / num_cases ,
155
150
similarity = total_similarity / num_cases ,
156
151
consistency = total_consistency / num_cases ,
157
152
individual_scores = individual_scores
158
153
)
159
154
160
- # Save results to JSON
161
155
results = {
162
156
"source_prompt" : source_prompt ,
163
157
"target_prompt" : target_prompt ,
@@ -183,14 +177,12 @@ def evaluate(self,
183
177
184
178
def _save_results (self , results : dict , filename : str = 'results.json' ) -> None :
185
179
"""Save results to a JSON file with a new name if the file already exists."""
186
- # Check if file exists
180
+
187
181
if os .path .exists (filename ):
188
- # Create new filename with timestamp
189
182
timestamp = datetime .now ().strftime ('%Y%m%d_%H%M%S' )
190
183
base , ext = os .path .splitext (filename )
191
184
filename = f"{ base } _{ timestamp } { ext } "
192
185
193
- # Save results
194
186
with open (filename , 'w' ) as f :
195
187
json .dump (results , f , indent = 2 )
196
188
print (f"Results saved to { filename } " )
0 commit comments