aws · jmikko · Mar 13, 2024 · Mar 11, 2024 · Mar 11, 2024 · Mar 11, 2024
diff --git a/src/fmeval/eval_algorithms/__init__.py b/src/fmeval/eval_algorithms/__init__.py
@@ -215,14 +215,14 @@ class ModelTask(Enum):
 DEFAULT_PROMPT_TEMPLATE = "$feature"
 
 BUILT_IN_DATASET_DEFAULT_PROMPT_TEMPLATES = {
-    BOOLQ: 'Respond to the following question. Valid answers are "True" or "False". $feature Answer:',
-    TRIVIA_QA: "Respond to the following question with a short answer: $feature Answer:",
-    NATURAL_QUESTIONS: "Respond to the following question with a short answer: $feature Answer:",
-    GIGAWORD: "Summarise the following text in one sentence: $feature",
-    GOV_REPORT: "Summarise the following text in a few sentences: $feature",
+    BOOLQ: 'Respond to the following question. Valid answers are "True" or "False". $feature',
+    TRIVIA_QA: "Respond to the following question with a short answer: $feature",
+    NATURAL_QUESTIONS: "Respond to the following question with a short answer: $feature",
+    GIGAWORD: "Summarize the following text in one sentence: $feature",
+    GOV_REPORT: "Summarize the following text in a few sentences: $feature",
     WOMENS_CLOTHING_ECOMMERCE_REVIEWS: (
         "Classify the sentiment of the following review with 0 (negative sentiment)"
-        " or 1 (positive sentiment). Review: $feature. Classification:"
+        " or 1 (positive sentiment): $feature"
     ),
 }
 

diff --git a/test/integration/test_summarization_accuracy_semantic_robustness.py b/test/integration/test_summarization_accuracy_semantic_robustness.py
@@ -65,11 +65,11 @@ class TestSummarizationAccuracySemanticRobustness:
                 },
                 expected_evaluate_scores={
                     ROUGE_SCORE: 0.021908,
-                    METEOR_SCORE: 0.107623,
-                    BERT_SCORE: 0.559997,
-                    DELTA_ROUGE_SCORE: 0.019394,
-                    DELTA_METEOR_SCORE: 0.044310,
-                    DELTA_BERT_SCORE: 0.033714,
+                    METEOR_SCORE: 0.105540,
+                    BERT_SCORE: 0.559893,
+                    DELTA_ROUGE_SCORE: 0.021061,
+                    DELTA_METEOR_SCORE: 0.046859,
+                    DELTA_BERT_SCORE: 0.032417,
                 },
             ),
             TestCaseEvaluate(
@@ -84,11 +84,11 @@ class TestSummarizationAccuracySemanticRobustness:
                 },
                 expected_evaluate_scores={
                     ROUGE_SCORE: 0.021908,
-                    METEOR_SCORE: 0.107623,
-                    BERT_SCORE: 0.559998,
-                    DELTA_ROUGE_SCORE: 0.035696,
-                    DELTA_METEOR_SCORE: 0.056931,
-                    DELTA_BERT_SCORE: 0.027971,
+                    METEOR_SCORE: 0.105540,
+                    BERT_SCORE: 0.559893,
+                    DELTA_ROUGE_SCORE: 0.037362,
+                    DELTA_METEOR_SCORE: 0.056909,
+                    DELTA_BERT_SCORE: 0.026363,
                 },
             ),
             TestCaseEvaluate(
@@ -103,11 +103,11 @@ class TestSummarizationAccuracySemanticRobustness:
                 },
                 expected_evaluate_scores={
                     ROUGE_SCORE: 0.021908,
-                    METEOR_SCORE: 0.107623,
-                    BERT_SCORE: 0.559998,
-                    DELTA_ROUGE_SCORE: 0.032187,
-                    DELTA_METEOR_SCORE: 0.057705,
-                    DELTA_BERT_SCORE: 0.027511,
+                    METEOR_SCORE: 0.105540,
+                    BERT_SCORE: 0.559893,
+                    DELTA_ROUGE_SCORE: 0.030725,
+                    DELTA_METEOR_SCORE: 0.054234,
+                    DELTA_BERT_SCORE: 0.026511,
                 },
             ),
         ],

diff --git a/test/unit/eval_algorithms/test_eval_algorithm.py b/test/unit/eval_algorithms/test_eval_algorithm.py
@@ -257,8 +257,5 @@ def test_get_default_prompt_template():
     WHEN get_default_prompt_template() method is called
     THEN expected default prompt template is returned
     """
-    assert (
-        get_default_prompt_template("trivia_qa")
-        == "Respond to the following question with a short answer: $feature Answer:"
-    )
+    assert get_default_prompt_template("trivia_qa") == "Respond to the following question with a short answer: $feature"
     assert get_default_prompt_template("my_custom_dataset") == "$feature"