diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index 3ae077f4331cc..58265db58a903 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -1239,7 +1239,7 @@ def predict_proba(self, X):
         ----------
         Zadrozny and Elkan, "Transforming classifier scores into multiclass
         probability estimates", SIGKDD'02,
-        http://www.research.ibm.com/people/z/zadrozny/kdd2002-Transf.pdf
+        https://dl.acm.org/doi/pdf/10.1145/775047.775151
 
         The justification for the formula in the loss="modified_huber"
         case is in the appendix B in: