Thanks to visit codestin.com
Credit goes to github.com

Skip to content
12 changes: 6 additions & 6 deletions src/fmeval/eval_algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,14 +215,14 @@ class ModelTask(Enum):
DEFAULT_PROMPT_TEMPLATE = "$feature"

BUILT_IN_DATASET_DEFAULT_PROMPT_TEMPLATES = {
BOOLQ: 'Respond to the following question. Valid answers are "True" or "False". $feature Answer:',
TRIVIA_QA: "Respond to the following question with a short answer: $feature Answer:",
NATURAL_QUESTIONS: "Respond to the following question with a short answer: $feature Answer:",
GIGAWORD: "Summarise the following text in one sentence: $feature",
GOV_REPORT: "Summarise the following text in a few sentences: $feature",
BOOLQ: 'Respond to the following question. Valid answers are "True" or "False". $feature',
TRIVIA_QA: "Respond to the following question with a short answer: $feature",
NATURAL_QUESTIONS: "Respond to the following question with a short answer: $feature",
GIGAWORD: "Summarize the following text in one sentence: $feature",
GOV_REPORT: "Summarize the following text in a few sentences: $feature",
WOMENS_CLOTHING_ECOMMERCE_REVIEWS: (
"Classify the sentiment of the following review with 0 (negative sentiment)"
" or 1 (positive sentiment). Review: $feature. Classification:"
" or 1 (positive sentiment): $feature"
),
}

Expand Down
30 changes: 15 additions & 15 deletions test/integration/test_summarization_accuracy_semantic_robustness.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,11 @@ class TestSummarizationAccuracySemanticRobustness:
},
expected_evaluate_scores={
ROUGE_SCORE: 0.021908,
METEOR_SCORE: 0.107623,
BERT_SCORE: 0.559997,
DELTA_ROUGE_SCORE: 0.019394,
DELTA_METEOR_SCORE: 0.044310,
DELTA_BERT_SCORE: 0.033714,
METEOR_SCORE: 0.105540,
BERT_SCORE: 0.559893,
DELTA_ROUGE_SCORE: 0.021061,
DELTA_METEOR_SCORE: 0.046859,
DELTA_BERT_SCORE: 0.032417,
},
),
TestCaseEvaluate(
Expand All @@ -84,11 +84,11 @@ class TestSummarizationAccuracySemanticRobustness:
},
expected_evaluate_scores={
ROUGE_SCORE: 0.021908,
METEOR_SCORE: 0.107623,
BERT_SCORE: 0.559998,
DELTA_ROUGE_SCORE: 0.035696,
DELTA_METEOR_SCORE: 0.056931,
DELTA_BERT_SCORE: 0.027971,
METEOR_SCORE: 0.105540,
BERT_SCORE: 0.559893,
DELTA_ROUGE_SCORE: 0.037362,
DELTA_METEOR_SCORE: 0.056909,
DELTA_BERT_SCORE: 0.026363,
},
),
TestCaseEvaluate(
Expand All @@ -103,11 +103,11 @@ class TestSummarizationAccuracySemanticRobustness:
},
expected_evaluate_scores={
ROUGE_SCORE: 0.021908,
METEOR_SCORE: 0.107623,
BERT_SCORE: 0.559998,
DELTA_ROUGE_SCORE: 0.032187,
DELTA_METEOR_SCORE: 0.057705,
DELTA_BERT_SCORE: 0.027511,
METEOR_SCORE: 0.105540,
BERT_SCORE: 0.559893,
DELTA_ROUGE_SCORE: 0.030725,
DELTA_METEOR_SCORE: 0.054234,
DELTA_BERT_SCORE: 0.026511,
},
),
],
Expand Down
5 changes: 1 addition & 4 deletions test/unit/eval_algorithms/test_eval_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,5 @@ def test_get_default_prompt_template():
WHEN get_default_prompt_template() method is called
THEN expected default prompt template is returned
"""
assert (
get_default_prompt_template("trivia_qa")
== "Respond to the following question with a short answer: $feature Answer:"
)
assert get_default_prompt_template("trivia_qa") == "Respond to the following question with a short answer: $feature"
assert get_default_prompt_template("my_custom_dataset") == "$feature"