Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f7dfc09

Browse files
authored
Merge pull request #25148 from czgdp1807/regrid-win-fix
BUG/ENH: interpolate: Reduce ``_regrid`` memory pressure in large smoothing runs on Windows
2 parents 8e24a4e + 625965e commit f7dfc09

1 file changed

Lines changed: 34 additions & 35 deletions

File tree

scipy/interpolate/_regrid.py

Lines changed: 34 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,8 @@ def _solve_2d_fitpack(Ax, Ay, Q, p,
407407
Residual computation:
408408
--------------------------------------------------------
409409
Zhat = Ax * C * Ay^T
410-
fp = sum((Z - Zhat)^2)
410+
R = Z - Zhat
411+
fp = sum(R^2)
411412
412413
Parameters
413414
----------
@@ -436,6 +437,8 @@ def _solve_2d_fitpack(Ax, Ay, Q, p,
436437
2-D B-spline coefficient grid.
437438
fp : float
438439
Residual sum of squares between fitted surface and `z`.
440+
R : ndarray, shape (mx, my)
441+
Residual matrix ``z - zhat``, where ``zhat = Ax @ C @ Ay.T``.
439442
440443
Notes
441444
-----
@@ -534,11 +537,15 @@ def _solve_2d_fitpack(Ax, Ay, Q, p,
534537
# Note: C currently aligns so that C.T matches x-first multiplication order.
535538
zhat = _Ax @ C.T @ _Ay.T
536539

537-
# Compute the residual sum of squares against the original data z.
538-
fp = np.sum(np.square(z - zhat))
540+
# Compute residual matrix R and fp in one pass; return R so callers that
541+
# need per-span energy for knot placement can reuse it directly instead of
542+
# recomputing zhat a second time.
543+
R = z - zhat
544+
fp = np.sum(np.square(R))
539545

540-
# Return coefficients in the conventional (nx_coef, ny_coef) orientation and fp.
541-
return C.T, fp
546+
# Return coefficients in the conventional (nx_coef, ny_coef) orientation,
547+
# fp, and the residual matrix R.
548+
return C.T, fp, R
542549

543550
class F:
544551
"""
@@ -592,19 +599,15 @@ def __init__(self, Ax, Dx, Ay, Dy, Q,
592599
self.z = z
593600

594601
def __call__(self, p):
595-
Ax_copy = PackedMatrix(
596-
self.Ax.a.copy(), self.Ax.offset.copy(), self.Ax.nc)
597-
Dx_copy = PackedMatrix(
598-
self.Dx.a.copy(), self.Dx.offset.copy(), self.Dx.nc)
599-
Ay_copy = PackedMatrix(
600-
self.Ay.a.copy(), self.Ay.offset.copy(), self.Ay.nc)
601-
Dy_copy = PackedMatrix(
602-
self.Dy.a.copy(), self.Dy.offset.copy(), self.Dy.nc)
603-
C, fp = _solve_2d_fitpack(
604-
Ax_copy, Ay_copy, self.Q.copy(),
602+
# _stack_augmented_fitpack always allocates fresh arrays (np.zeros for
603+
# p != -1, .copy() for p == -1), so qr_reduce never writes back into
604+
# the original PackedMatrix.a buffers. The four copies below are
605+
# therefore unnecessary and are removed to reduce memory pressure.
606+
C, fp, _ = _solve_2d_fitpack(
607+
self.Ax, self.Ay, self.Q.copy(),
605608
p, self.kx, self.tx, self.x_x,
606609
self.ky, self.ty, self.x_y, self.z,
607-
Dx=Dx_copy, Dy=Dy_copy)
610+
Dx=self.Dx, Dy=self.Dy)
608611
self.C = C
609612
self.fp = fp
610613
return fp
@@ -975,9 +978,9 @@ def _regrid_fitpack(
975978
ty = _not_a_knot(y_fit, ky)
976979
(Ax, Ay, Q) = _build_design_matrices(
977980
x_fit, y_fit, Z, tx, ty, kx, ky)
978-
C0, fp = _solve_2d_fitpack(Ax, Ay, Q, p,
979-
kx, tx, x_fit, ky, ty,
980-
y_fit, Z_fit)
981+
C0, fp, _ = _solve_2d_fitpack(Ax, Ay, Q, p,
982+
kx, tx, x_fit, ky, ty,
983+
y_fit, Z_fit)
981984
return return_NdBSpline(fp, (tx, ty, C0), (kx, ky))
982985

983986
tx, nestx, nminx, nmaxx = _initialise_knots(x_fit.size, xb, xe, kx, nest=nestx)
@@ -995,10 +998,13 @@ def _regrid_fitpack(
995998

996999
(Ax, Ay, Q) = _build_design_matrices(
9971000
x_fit, y_fit, Z, tx, ty, kx, ky)
998-
C0, fp = _solve_2d_fitpack(Ax, Ay, Q, p,
999-
kx, tx, x_fit,
1000-
ky, ty, y_fit,
1001-
Z_fit)
1001+
# _solve_2d_fitpack now returns R = Z_fit - zhat alongside C0 and fp,
1002+
# so we can reuse it directly for knot placement instead of
1003+
# recomputing zhat a second time via tocsr + dense matmul.
1004+
C0, fp, R = _solve_2d_fitpack(Ax, Ay, Q, p,
1005+
kx, tx, x_fit,
1006+
ky, ty, y_fit,
1007+
Z_fit)
10021008

10031009
# https://github.com/scipy/scipy/blob/v1.16.2/scipy/interpolate/fitpack/fpregr.f#L190
10041010
# https://github.com/scipy/scipy/blob/v1.16.2/scipy/interpolate/fitpack/fpregr.f#L224
@@ -1008,18 +1014,6 @@ def _regrid_fitpack(
10081014
if fp < s:
10091015
break
10101016

1011-
# Note: We call PackedMatrix.tocsr here because matrix multiplication
1012-
# with the packed banded format (returned by _dierckx.data_matrix)
1013-
# is not implemented. PackedMatrix.tocsr returns the design matrix,
1014-
# in CSR format, that supports standard @ operations for residual
1015-
# evaluation and diagnostics.
1016-
_Ax = Ax.tocsr(kx, x_fit.shape[0], len(tx))
1017-
_Ay = Ay.tocsr(ky, y_fit.shape[0], len(ty))
1018-
1019-
1020-
Z0 = _Ax @ C0 @ _Ay.T
1021-
R = Z_fit - Z0
1022-
10231017
# https://github.com/scipy/scipy/blob/v1.16.2/scipy/interpolate/fitpack/fpregr.f#L265-L295
10241018
if last_axis == "y":
10251019
tx, nplusx = _add_knots(
@@ -1036,6 +1030,11 @@ def _regrid_fitpack(
10361030
nplus=nplusy)
10371031
last_axis = "y"
10381032

1033+
# When both knot vectors have reached their maximum size no further
1034+
# growth is possible; break to avoid redundant loop iterations.
1035+
if len(tx) >= nmaxx and len(ty) >= nmaxy:
1036+
break
1037+
10391038
fpold = fp
10401039

10411040
if len(tx) == nminx and len(ty) == nminy:

0 commit comments

Comments
 (0)