From f34b401016d0bdeb1ff7fb78bb049d3f4fba5b8f Mon Sep 17 00:00:00 2001 From: Ophelia Yang Date: Mon, 19 Feb 2024 14:15:05 -0800 Subject: [PATCH] fix: add bert_score_dissimilarity description --- src/fmeval/reporting/constants.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fmeval/reporting/constants.py b/src/fmeval/reporting/constants.py index 38923bea..d09d8a49 100644 --- a/src/fmeval/reporting/constants.py +++ b/src/fmeval/reporting/constants.py @@ -32,7 +32,7 @@ DELTA_BERT_SCORE, DELTA_METEOR_SCORE, ) -from fmeval.eval_algorithms.general_semantic_robustness import WER_SCORE +from fmeval.eval_algorithms.general_semantic_robustness import WER_SCORE, BERT_SCORE_DISSIMILARITY from fmeval.eval_algorithms import ( TREX, BOOLQ, @@ -188,6 +188,7 @@ class ListType(Enum): BALANCED_ACCURACY_SCORE: "The balanced accuracy score is the same as accuracy in the binary case, otherwise averaged recall per class.", # General semantic robustness WER_SCORE: "Word error rate (WER) is a value between 0 and 1, and measures the difference between the model output on the unperturbed input and the output(s) on one or more perturbed versions of the same input. For more details on how word error rate is computed, see the [HuggingFace Article on Word Error Rate](https://huggingface.co/spaces/evaluate-metric/wer).", + BERT_SCORE_DISSIMILARITY: "BERTScore Dissimilarity is computed as 1 - BERTScore and measures semantic differences between the original and perturbed versions of the same input.", # Summarization semantic robustness DELTA_ROUGE_SCORE: "Delta ROUGE-N score measures the change in Rouge between the original and perturbed versions of the same input.", DELTA_METEOR_SCORE: "Delta Meteor score measures the change in Meteor between the original and perturbed versions of the same input.",