pytorch · atalman · Jul 22, 2025 · Jul 17, 2025
diff --git a/aten/src/ATen/ScalarOps.cpp b/aten/src/ATen/ScalarOps.cpp
@@ -8,7 +8,28 @@ namespace at {
 namespace {
 template <typename scalar_t>
 inline void fill_inplace(Tensor& self, const Scalar& value_scalar) {
-  auto value = value_scalar.to<scalar_t>();
+  scalar_t value{};
+
+  if constexpr (std::is_same_v<scalar_t, at::Half> ||
+                std::is_same_v<scalar_t, at::BFloat16> ||
+                std::is_same_v<scalar_t, at::Float8_e5m2> ||
+                std::is_same_v<scalar_t, at::Float8_e5m2fnuz> ||
+                std::is_same_v<scalar_t, at::Float8_e4m3fn> ||
+                std::is_same_v<scalar_t, at::Float8_e4m3fnuz> ||
+                std::is_same_v<scalar_t, at::Float8_e8m0fnu>) {
+    // relaxed float cast: allow inf similar to the torch.tensor constructor
+    //
+    // without this, we had the following divergence:
+    //   torch.tensor(1123581321.0, dtype=torch.float16)
+    //     => tensor(inf, dtype=torch.float16)
+    //   torch.ops.aten.scalar_tensor.default(1123581321, dtype=torch.float16)
+    //     => RuntimeError: value cannot be converted to type at::Half without overflow
+
+    value = static_cast<scalar_t>(value_scalar.to<double>());
+  } else {
+    value = value_scalar.to<scalar_t>();
+  }
+
   scalar_t* dptr = static_cast<scalar_t*>(self.data_ptr());
   *dptr = value;
 }

diff --git a/test/dynamo/test_misc.py b/test/dynamo/test_misc.py
@@ -12955,6 +12955,38 @@ def f(actions, n_act, epsilon=0.1):
         y = torch.tensor(5)
         f(x, y)
 
+    def test_dynamic_float_scalar_tensor_coersion(self):
+        # Minified version of https://github.com/pytorch/pytorch/issues/158376#issuecomment-3079591367
+        class Foo:
+            def __init__(self):
+                self.config = type(
+                    "Config", (), {"pad_val": 1123581321.0, "tolerance": 1e-6}
+                )
+
+            @torch.compile(fullgraph=True)
+            def forward(self, input):
+                outputs = torch.where(
+                    torch.abs(input - self.config.pad_val) < self.config.tolerance,
+                    torch.tensor(
+                        self.config.pad_val, dtype=input.dtype, device=input.device
+                    ),
+                    torch.tensor(
+                        self.config.pad_val + 1, dtype=input.dtype, device=input.device
+                    ),
+                )
+                return outputs
+
+        foo = Foo()
+        inputs = torch.randn(3, 4)
+        result = foo.forward(inputs)
+
+        original_pad_val = foo.config.pad_val
+        foo.config.pad_val += 1.0
+        result2 = foo.forward(inputs)
+
+        # Previously would crash with:
+        #   RuntimeError: value cannot be converted to type at::Half without overflow
+
 
 devices = ("cuda", "hpu")
 instantiate_device_type_tests(MiscTestsDevice, globals(), only_for=devices)