diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index b8a1a8e5e22b4..f042acc7c87d9 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -2396,7 +2396,8 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):
     pos_label : int or str, default=None
         Label of the positive class.
         Defaults to the greater label unless y_true is all 0 or all -1
-        in which case pos_label defaults to 1.
+        in which case pos_label defaults to 1. If `y_true` contains strings
+        and `pos_label` is not specified, an error will be raised.
 
     Returns
     -------
@@ -2439,16 +2440,22 @@ def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):
     if y_prob.min() < 0:
         raise ValueError("y_prob contains values less than 0.")
 
-    # if pos_label=None, when y_true is in {-1, 1} or {0, 1},
-    # pos_label is set to 1 (consistent with precision_recall_curve/roc_curve),
-    # otherwise pos_label is set to the greater label
-    # (different from precision_recall_curve/roc_curve,
-    # the purpose is to keep backward compatibility).
+    # Default behavior when pos_label=None:
+    # When y_true contains strings, an error will be raised.
+    # (differ from other functions to keep backward compatibility)
+    # When y_true is in {-1, 1} or {0, 1}, pos_label is set to 1.
+    # (consistent with precision_recall_curve/roc_curve)
+    # Otherwise pos_label is set to the greater label.
+    # (differ from other functions to keep backward compatibility)
     if pos_label is None:
-        if (np.array_equal(labels, [0]) or
+        if any(isinstance(label, str) for label in labels):
+            raise ValueError("pos_label must be specified when y_true "
+                             "contains strings.")
+        elif (np.array_equal(labels, [0]) or
                 np.array_equal(labels, [-1])):
             pos_label = 1
         else:
-            pos_label = y_true.max()
+            pos_label = labels.max()
+
     y_true = np.array(y_true == pos_label, int)
     return np.average((y_true - y_prob) ** 2, weights=sample_weight)
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 1f959d95ce844..b831cad6a07da 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -2246,10 +2246,24 @@ def test_brier_score_loss():
     assert_almost_equal(brier_score_loss([-1], [0.4]), 0.16)
     assert_almost_equal(brier_score_loss([0], [0.4]), 0.16)
     assert_almost_equal(brier_score_loss([1], [0.4]), 0.36)
-    assert_almost_equal(
-        brier_score_loss(['foo'], [0.4], pos_label='bar'), 0.16)
-    assert_almost_equal(
-        brier_score_loss(['foo'], [0.4], pos_label='foo'), 0.36)
+
+    # make sure the positive class is correctly inferred
+    y_true = np.array([0, 1, 1, 0])
+    y_pred = np.array([0.8, 0.6, 0.4, 0.2])
+    score1 = brier_score_loss(y_true, y_pred, pos_label=1)
+    score2 = brier_score_loss(y_true, y_pred)
+    assert score1 == pytest.approx(score2)
+    y_true = np.array(["neg", "pos", "pos", "neg"])
+    # raise error when y_true contains strings and pos_label is not specified
+    with pytest.raises(ValueError, match="pos_label must be specified"):
+        brier_score_loss(y_true, y_pred)
+    score2 = brier_score_loss(y_true, y_pred, pos_label="pos")
+    assert score1 == pytest.approx(score2)
+
+    # positive class if correctly inferred an object array with all ints
+    y_pred_num_obj = np.array([0, 1, 1, 0], dtype=object)
+    score3 = brier_score_loss(y_pred_num_obj, y_pred)
+    assert score1 == pytest.approx(score3)
 
 
 def test_balanced_accuracy_score_unseen():