From 2f07ebb302fbb3224e6a1d71563e95bbdc3f0c08 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Tue, 26 Sep 2023 19:21:12 +0000 Subject: [PATCH 1/4] feat: add ml.preprocessing.MinMaxScaler --- bigframes/ml/compose.py | 1 + bigframes/ml/pipeline.py | 12 ++- bigframes/ml/preprocessing.py | 82 ++++++++++++++++- bigframes/ml/sql.py | 4 + tests/system/large/ml/test_pipeline.py | 46 +++++++++- tests/system/small/ml/test_preprocessing.py | 91 +++++++++++++++++++ tests/unit/ml/test_compose.py | 25 +++++ tests/unit/ml/test_sql.py | 7 ++ .../sklearn/preprocessing/_data.py | 36 ++++++++ 9 files changed, 299 insertions(+), 5 deletions(-) diff --git a/bigframes/ml/compose.py b/bigframes/ml/compose.py index a1075c2398..9effbf1968 100644 --- a/bigframes/ml/compose.py +++ b/bigframes/ml/compose.py @@ -30,6 +30,7 @@ preprocessing.OneHotEncoder, preprocessing.StandardScaler, preprocessing.MaxAbsScaler, + preprocessing.MinMaxScaler, preprocessing.LabelEncoder, ] diff --git a/bigframes/ml/pipeline.py b/bigframes/ml/pipeline.py index 86b2099619..ac02c39112 100644 --- a/bigframes/ml/pipeline.py +++ b/bigframes/ml/pipeline.py @@ -51,6 +51,7 @@ def __init__(self, steps: List[Tuple[str, base.BaseEstimator]]): preprocessing.StandardScaler, preprocessing.OneHotEncoder, preprocessing.MaxAbsScaler, + preprocessing.MinMaxScaler, preprocessing.LabelEncoder, ), ): @@ -149,6 +150,7 @@ def _extract_as_column_transformer( preprocessing.OneHotEncoder, preprocessing.StandardScaler, preprocessing.MaxAbsScaler, + preprocessing.MinMaxScaler, preprocessing.LabelEncoder, ], Union[str, List[str]], @@ -177,10 +179,17 @@ def _extract_as_column_transformer( elif transform_sql.startswith("ML.MAX_ABS_SCALER"): transformers.append( ( - "max_abs_encoder", + "max_abs_scaler", *preprocessing.MaxAbsScaler._parse_from_sql(transform_sql), ) ) + elif transform_sql.startswith("ML.MIN_MAX_SCALER"): + transformers.append( + ( + "min_max_scaler", + *preprocessing.MinMaxScaler._parse_from_sql(transform_sql), + ) + ) elif transform_sql.startswith("ML.LABEL_ENCODER"): transformers.append( ( @@ -203,6 +212,7 @@ def _merge_column_transformer( preprocessing.StandardScaler, preprocessing.OneHotEncoder, preprocessing.MaxAbsScaler, + preprocessing.MinMaxScaler, preprocessing.LabelEncoder, ]: """Try to merge the column transformer to a simple transformer.""" diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py index ed0b36deef..5114d428f6 100644 --- a/bigframes/ml/preprocessing.py +++ b/bigframes/ml/preprocessing.py @@ -150,7 +150,7 @@ def _parse_from_sql(cls, sql: str) -> tuple[MaxAbsScaler, str]: sql: SQL string of format "ML.MAX_ABS_SCALER({col_label}) OVER()" Returns: - tuple(StandardScaler, column_label)""" + tuple(MaxAbsScaler, column_label)""" col_label = sql[sql.find("(") + 1 : sql.find(")")] return cls(), col_label @@ -187,6 +187,86 @@ def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: ) +class MinMaxScaler( + base.Transformer, + third_party.bigframes_vendored.sklearn.preprocessing._data.MinMaxScaler, +): + __doc__ = ( + third_party.bigframes_vendored.sklearn.preprocessing._data.MinMaxScaler.__doc__ + ) + + def __init__(self): + self._bqml_model: Optional[core.BqmlModel] = None + self._bqml_model_factory = globals.bqml_model_factory() + self._base_sql_generator = globals.base_sql_generator() + + # TODO(garrettwu): implement __hash__ + def __eq__(self, other: Any) -> bool: + return type(other) is MinMaxScaler and self._bqml_model == other._bqml_model + + def _compile_to_sql(self, columns: List[str]) -> List[Tuple[str, str]]: + """Compile this transformer to a list of SQL expressions that can be included in + a BQML TRANSFORM clause + + Args: + columns: a list of column names to transform + + Returns: a list of tuples of (sql_expression, output_name)""" + return [ + ( + self._base_sql_generator.ml_min_max_scaler( + column, f"min_max_scaled_{column}" + ), + f"min_max_scaled_{column}", + ) + for column in columns + ] + + @classmethod + def _parse_from_sql(cls, sql: str) -> tuple[MinMaxScaler, str]: + """Parse SQL to tuple(MinMaxScaler, column_label). + + Args: + sql: SQL string of format "ML.MIN_MAX_SCALER({col_label}) OVER()" + + Returns: + tuple(MinMaxScaler, column_label)""" + col_label = sql[sql.find("(") + 1 : sql.find(")")] + return cls(), col_label + + def fit( + self, + X: Union[bpd.DataFrame, bpd.Series], + y=None, # ignored + ) -> MinMaxScaler: + (X,) = utils.convert_to_dataframe(X) + + compiled_transforms = self._compile_to_sql(X.columns.tolist()) + transform_sqls = [transform_sql for transform_sql, _ in compiled_transforms] + + self._bqml_model = self._bqml_model_factory.create_model( + X, + options={"model_type": "transform_only"}, + transforms=transform_sqls, + ) + + # The schema of TRANSFORM output is not available in the model API, so save it during fitting + self._output_names = [name for _, name in compiled_transforms] + return self + + def transform(self, X: Union[bpd.DataFrame, bpd.Series]) -> bpd.DataFrame: + if not self._bqml_model: + raise RuntimeError("Must be fitted before transform") + + (X,) = utils.convert_to_dataframe(X) + + df = self._bqml_model.transform(X) + return typing.cast( + bpd.DataFrame, + df[self._output_names], + ) + + class OneHotEncoder( base.Transformer, third_party.bigframes_vendored.sklearn.preprocessing._encoder.OneHotEncoder, diff --git a/bigframes/ml/sql.py b/bigframes/ml/sql.py index a54d39e6b2..e4d7297ae2 100644 --- a/bigframes/ml/sql.py +++ b/bigframes/ml/sql.py @@ -80,6 +80,10 @@ def ml_max_abs_scaler(self, numeric_expr_sql: str, name: str) -> str: """Encode ML.MAX_ABS_SCALER for BQML""" return f"""ML.MAX_ABS_SCALER({numeric_expr_sql}) OVER() AS {name}""" + def ml_min_max_scaler(self, numeric_expr_sql: str, name: str) -> str: + """Encode ML.MIN_MAX_SCALER for BQML""" + return f"""ML.MIN_MAX_SCALER({numeric_expr_sql}) OVER() AS {name}""" + def ml_one_hot_encoder( self, numeric_expr_sql: str, diff --git a/tests/system/large/ml/test_pipeline.py b/tests/system/large/ml/test_pipeline.py index c69a00b81c..34a2ca0101 100644 --- a/tests/system/large/ml/test_pipeline.py +++ b/tests/system/large/ml/test_pipeline.py @@ -575,6 +575,11 @@ def test_pipeline_columntransformer_fit_predict(session, penguins_df_default_ind preprocessing.MaxAbsScaler(), ["culmen_length_mm", "flipper_length_mm"], ), + ( + "min_max_scale", + preprocessing.MinMaxScaler(), + ["culmen_length_mm", "flipper_length_mm"], + ), ( "label", preprocessing.LabelEncoder(), @@ -647,6 +652,11 @@ def test_pipeline_columntransformer_to_gbq(penguins_df_default_index, dataset_id preprocessing.MaxAbsScaler(), ["culmen_length_mm", "flipper_length_mm"], ), + ( + "min_max_scale", + preprocessing.MinMaxScaler(), + ["culmen_length_mm", "flipper_length_mm"], + ), ( "label", preprocessing.LabelEncoder(), @@ -684,9 +694,11 @@ def test_pipeline_columntransformer_to_gbq(penguins_df_default_index, dataset_id "species", ), ("standard_scaler", preprocessing.StandardScaler(), "culmen_length_mm"), - ("max_abs_encoder", preprocessing.MaxAbsScaler(), "culmen_length_mm"), + ("max_abs_scaler", preprocessing.MaxAbsScaler(), "culmen_length_mm"), + ("min_max_scaler", preprocessing.MinMaxScaler(), "culmen_length_mm"), ("standard_scaler", preprocessing.StandardScaler(), "flipper_length_mm"), - ("max_abs_encoder", preprocessing.MaxAbsScaler(), "flipper_length_mm"), + ("max_abs_scaler", preprocessing.MaxAbsScaler(), "flipper_length_mm"), + ("min_max_scaler", preprocessing.MinMaxScaler(), "flipper_length_mm"), ] assert transformers == expected @@ -743,7 +755,7 @@ def test_pipeline_max_abs_scaler_to_gbq(penguins_df_default_index, dataset_id): pl.fit(X_train, y_train) pl_loaded = pl.to_gbq( - f"{dataset_id}.test_penguins_pipeline_standard_scaler", replace=True + f"{dataset_id}.test_penguins_pipeline_min_max_scaler", replace=True ) assert isinstance(pl_loaded._transform, preprocessing.MaxAbsScaler) @@ -751,6 +763,34 @@ def test_pipeline_max_abs_scaler_to_gbq(penguins_df_default_index, dataset_id): assert pl_loaded._estimator.fit_intercept is False +def test_pipeline_min_max_scaler_to_gbq(penguins_df_default_index, dataset_id): + pl = pipeline.Pipeline( + [ + ("transform", preprocessing.MinMaxScaler()), + ("estimator", linear_model.LinearRegression(fit_intercept=False)), + ] + ) + + df = penguins_df_default_index.dropna() + X_train = df[ + [ + "culmen_length_mm", + "culmen_depth_mm", + "flipper_length_mm", + ] + ] + y_train = df[["body_mass_g"]] + pl.fit(X_train, y_train) + + pl_loaded = pl.to_gbq( + f"{dataset_id}.test_penguins_pipeline_min_max_scaler", replace=True + ) + assert isinstance(pl_loaded._transform, preprocessing.MinMaxScaler) + + assert isinstance(pl_loaded._estimator, linear_model.LinearRegression) + assert pl_loaded._estimator.fit_intercept is False + + def test_pipeline_one_hot_encoder_to_gbq(penguins_df_default_index, dataset_id): pl = pipeline.Pipeline( [ diff --git a/tests/system/small/ml/test_preprocessing.py b/tests/system/small/ml/test_preprocessing.py index 61bddb144d..15aa3be606 100644 --- a/tests/system/small/ml/test_preprocessing.py +++ b/tests/system/small/ml/test_preprocessing.py @@ -211,6 +211,97 @@ def test_max_abs_scaler_series_normalizes(penguins_df_default_index, new_penguin pd.testing.assert_frame_equal(result, expected, rtol=1e-3) +def test_min_max_scaler_normalizeds_fit_transform(new_penguins_df): + scaler = bigframes.ml.preprocessing.MinMaxScaler() + result = scaler.fit_transform( + new_penguins_df[["culmen_length_mm", "culmen_depth_mm", "flipper_length_mm"]] + ).to_pandas() + + # TODO: bug? feature columns seem to be in nondeterministic random order + # workaround: sort columns by name. Can't repro it in pantheon, so could + # be a bigframes issue... + result = result.reindex(sorted(result.columns), axis=1) + + expected = pd.DataFrame( + { + "min_max_scaled_culmen_depth_mm": [1.0, 0.0, 0.5625], + "min_max_scaled_culmen_length_mm": [1.0, 0.375, 0.0], + "min_max_scaled_flipper_length_mm": [1.0, 0.0, 0.466667], + }, + dtype="Float64", + index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), + ) + + pd.testing.assert_frame_equal(result, expected, rtol=1e-3) + + +def test_min_max_scaler_series_normalizes(penguins_df_default_index, new_penguins_df): + scaler = bigframes.ml.preprocessing.MinMaxScaler() + scaler.fit(penguins_df_default_index["culmen_length_mm"]) + + result = scaler.transform(penguins_df_default_index["culmen_length_mm"]).to_pandas() + + # If maxabs-scaled correctly, max should be 1.0 + for column in result.columns: + assert math.isclose(result[column].max(), 1.0, abs_tol=1e-3) + + result = scaler.transform(new_penguins_df).to_pandas() + + # TODO: bug? feature columns seem to be in nondeterministic random order + # workaround: sort columns by name. Can't repro it in pantheon, so could + # be a bigframes issue... + result = result.reindex(sorted(result.columns), axis=1) + + expected = pd.DataFrame( + { + "min_max_scaled_culmen_length_mm": [0.269091, 0.232727, 0.210909], + }, + dtype="Float64", + index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), + ) + + pd.testing.assert_frame_equal(result, expected, rtol=1e-3) + + +def test_min_max_scaler_normalizes(penguins_df_default_index, new_penguins_df): + # TODO(http://b/292431644): add a second test that compares output to sklearn.preprocessing.StandardScaler, when BQML's change is in prod. + scaler = bigframes.ml.preprocessing.MinMaxScaler() + scaler.fit( + penguins_df_default_index[ + ["culmen_length_mm", "culmen_depth_mm", "flipper_length_mm"] + ] + ) + + result = scaler.transform( + penguins_df_default_index[ + ["culmen_length_mm", "culmen_depth_mm", "flipper_length_mm"] + ] + ).to_pandas() + + # If maxabs-scaled correctly, max should be 1.0 + for column in result.columns: + assert math.isclose(result[column].max(), 1.0, abs_tol=1e-3) + + result = scaler.transform(new_penguins_df).to_pandas() + + # TODO: bug? feature columns seem to be in nondeterministic random order + # workaround: sort columns by name. Can't repro it in pantheon, so could + # be a bigframes issue... + result = result.reindex(sorted(result.columns), axis=1) + + expected = pd.DataFrame( + { + "min_max_scaled_culmen_depth_mm": [0.678571, 0.4880952, 0.595238], + "min_max_scaled_culmen_length_mm": [0.269091, 0.232727, 0.210909], + "min_max_scaled_flipper_length_mm": [0.40678, 0.152542, 0.271186], + }, + dtype="Float64", + index=pd.Index([1633, 1672, 1690], name="tag_number", dtype="Int64"), + ) + + pd.testing.assert_frame_equal(result, expected, rtol=1e-3) + + def test_one_hot_encoder_default_params(new_penguins_df): encoder = bigframes.ml.preprocessing.OneHotEncoder() encoder.fit(new_penguins_df[["species", "sex"]]) diff --git a/tests/unit/ml/test_compose.py b/tests/unit/ml/test_compose.py index 24cf0a333e..8c8fbd6ab5 100644 --- a/tests/unit/ml/test_compose.py +++ b/tests/unit/ml/test_compose.py @@ -22,6 +22,7 @@ def test_columntransformer_init_expectedtransforms(): onehot_transformer = preprocessing.OneHotEncoder() standard_scaler_transformer = preprocessing.StandardScaler() max_abs_scaler_transformer = preprocessing.MaxAbsScaler() + min_max_scaler_transformer = preprocessing.MinMaxScaler() label_transformer = preprocessing.LabelEncoder() column_transformer = compose.ColumnTransformer( [ @@ -36,6 +37,11 @@ def test_columntransformer_init_expectedtransforms(): max_abs_scaler_transformer, ["culmen_length_mm", "flipper_length_mm"], ), + ( + "min_max_scale", + min_max_scaler_transformer, + ["culmen_length_mm", "flipper_length_mm"], + ), ("label", label_transformer, "species"), ] ) @@ -46,6 +52,8 @@ def test_columntransformer_init_expectedtransforms(): ("standard_scale", standard_scaler_transformer, "flipper_length_mm"), ("max_abs_scale", max_abs_scaler_transformer, "culmen_length_mm"), ("max_abs_scale", max_abs_scaler_transformer, "flipper_length_mm"), + ("min_max_scale", min_max_scaler_transformer, "culmen_length_mm"), + ("min_max_scale", min_max_scaler_transformer, "flipper_length_mm"), ("label", label_transformer, "species"), ] @@ -68,6 +76,11 @@ def test_columntransformer_repr(): preprocessing.MaxAbsScaler(), ["culmen_length_mm", "flipper_length_mm"], ), + ( + "min_max_scale", + preprocessing.MinMaxScaler(), + ["culmen_length_mm", "flipper_length_mm"], + ), ] ) @@ -77,6 +90,8 @@ def test_columntransformer_repr(): ('standard_scale', StandardScaler(), ['culmen_length_mm', 'flipper_length_mm']), ('max_abs_scale', MaxAbsScaler(), + ['culmen_length_mm', 'flipper_length_mm']), + ('min_max_scale', MinMaxScaler(), ['culmen_length_mm', 'flipper_length_mm'])])""" ) @@ -99,6 +114,11 @@ def test_columntransformer_repr_matches_sklearn(): preprocessing.MaxAbsScaler(), ["culmen_length_mm", "flipper_length_mm"], ), + ( + "min_max_scale", + preprocessing.MinMaxScaler(), + ["culmen_length_mm", "flipper_length_mm"], + ), ] ) sk_column_transformer = sklearn_compose.ColumnTransformer( @@ -118,6 +138,11 @@ def test_columntransformer_repr_matches_sklearn(): sklearn_preprocessing.MaxAbsScaler(), ["culmen_length_mm", "flipper_length_mm"], ), + ( + "min_max_scale", + sklearn_preprocessing.MinMaxScaler(), + ["culmen_length_mm", "flipper_length_mm"], + ), ] ) diff --git a/tests/unit/ml/test_sql.py b/tests/unit/ml/test_sql.py index c1b29c5e52..1490ffb5cb 100644 --- a/tests/unit/ml/test_sql.py +++ b/tests/unit/ml/test_sql.py @@ -76,6 +76,13 @@ def test_max_abs_scaler_produces_correct_sql( assert sql == "ML.MAX_ABS_SCALER(col_a) OVER() AS scaled_col_a" +def test_min_max_scaler_produces_correct_sql( + base_sql_generator: ml_sql.BaseSqlGenerator, +): + sql = base_sql_generator.ml_min_max_scaler("col_a", "scaled_col_a") + assert sql == "ML.MIN_MAX_SCALER(col_a) OVER() AS scaled_col_a" + + def test_one_hot_encoder_produces_correct_sql( base_sql_generator: ml_sql.BaseSqlGenerator, ): diff --git a/third_party/bigframes_vendored/sklearn/preprocessing/_data.py b/third_party/bigframes_vendored/sklearn/preprocessing/_data.py index 40b4f76ab7..58e16e135b 100644 --- a/third_party/bigframes_vendored/sklearn/preprocessing/_data.py +++ b/third_party/bigframes_vendored/sklearn/preprocessing/_data.py @@ -106,3 +106,39 @@ def transform(self, X): bigframes.dataframe.DataFrame: Transformed result. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + +class MinMaxScaler(BaseEstimator, TransformerMixin): + """Transform features by scaling each feature to a given range. + + This estimator scales and translates each feature individually such + that it is in the given range on the training set, e.g. between + zero and one. + """ + + def fit(self, X, y=None): + """Compute the minimum and maximum to be used for later scaling. + + Args: + X (bigframes.dataframe.DataFrame or bigframes.series.Series): + The Dataframe or Series with training data. + + y (default None): + Ignored. + + Returns: + MaxAbsScaler: Fitted scaler. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def transform(self, X): + """Scale the data. + + Args: + X (bigframes.dataframe.DataFrame or bigframes.series.Series): + The DataFrame or Series to be transformed. + + Returns: + bigframes.dataframe.DataFrame: Transformed result. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From 2efe05cd065056212089c3cc1a457968df0d5210 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Tue, 26 Sep 2023 21:26:15 +0000 Subject: [PATCH 2/4] fix comments and typo --- bigframes/ml/preprocessing.py | 2 +- tests/system/small/ml/test_preprocessing.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bigframes/ml/preprocessing.py b/bigframes/ml/preprocessing.py index 5114d428f6..caf4657a63 100644 --- a/bigframes/ml/preprocessing.py +++ b/bigframes/ml/preprocessing.py @@ -144,7 +144,7 @@ def _compile_to_sql(self, columns: List[str]) -> List[Tuple[str, str]]: @classmethod def _parse_from_sql(cls, sql: str) -> tuple[MaxAbsScaler, str]: - """Parse SQL to tuple(StandardScaler, column_label). + """Parse SQL to tuple(MaxAbsScaler, column_label). Args: sql: SQL string of format "ML.MAX_ABS_SCALER({col_label}) OVER()" diff --git a/tests/system/small/ml/test_preprocessing.py b/tests/system/small/ml/test_preprocessing.py index 15aa3be606..154e36974b 100644 --- a/tests/system/small/ml/test_preprocessing.py +++ b/tests/system/small/ml/test_preprocessing.py @@ -241,7 +241,7 @@ def test_min_max_scaler_series_normalizes(penguins_df_default_index, new_penguin result = scaler.transform(penguins_df_default_index["culmen_length_mm"]).to_pandas() - # If maxabs-scaled correctly, max should be 1.0 + # If minmax-scaled correctly, min should be 0 and max should be 1. for column in result.columns: assert math.isclose(result[column].max(), 1.0, abs_tol=1e-3) @@ -278,7 +278,7 @@ def test_min_max_scaler_normalizes(penguins_df_default_index, new_penguins_df): ] ).to_pandas() - # If maxabs-scaled correctly, max should be 1.0 + # If minmax-scaled correctly, min should be 0 and max should be 1. for column in result.columns: assert math.isclose(result[column].max(), 1.0, abs_tol=1e-3) From dcb7fde58389d907841ea2fc9d8645ab39f24acf Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Tue, 26 Sep 2023 21:36:09 +0000 Subject: [PATCH 3/4] add test check for min value --- tests/system/small/ml/test_preprocessing.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/system/small/ml/test_preprocessing.py b/tests/system/small/ml/test_preprocessing.py index 154e36974b..c3bd7dcd8e 100644 --- a/tests/system/small/ml/test_preprocessing.py +++ b/tests/system/small/ml/test_preprocessing.py @@ -244,6 +244,8 @@ def test_min_max_scaler_series_normalizes(penguins_df_default_index, new_penguin # If minmax-scaled correctly, min should be 0 and max should be 1. for column in result.columns: assert math.isclose(result[column].max(), 1.0, abs_tol=1e-3) + for column in result.columns: + assert math.isclose(result[column].min(), 0.0, abs_tol=1e-3) result = scaler.transform(new_penguins_df).to_pandas() @@ -281,6 +283,8 @@ def test_min_max_scaler_normalizes(penguins_df_default_index, new_penguins_df): # If minmax-scaled correctly, min should be 0 and max should be 1. for column in result.columns: assert math.isclose(result[column].max(), 1.0, abs_tol=1e-3) + for column in result.columns: + assert math.isclose(result[column].min(), 0.0, abs_tol=1e-3) result = scaler.transform(new_penguins_df).to_pandas() From 2e9512872a6df4530041d575f25a6987aa2e2b31 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Tue, 26 Sep 2023 22:31:19 +0000 Subject: [PATCH 4/4] nit fix --- tests/system/small/ml/test_preprocessing.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/system/small/ml/test_preprocessing.py b/tests/system/small/ml/test_preprocessing.py index c3bd7dcd8e..fc8f3251bd 100644 --- a/tests/system/small/ml/test_preprocessing.py +++ b/tests/system/small/ml/test_preprocessing.py @@ -244,7 +244,6 @@ def test_min_max_scaler_series_normalizes(penguins_df_default_index, new_penguin # If minmax-scaled correctly, min should be 0 and max should be 1. for column in result.columns: assert math.isclose(result[column].max(), 1.0, abs_tol=1e-3) - for column in result.columns: assert math.isclose(result[column].min(), 0.0, abs_tol=1e-3) result = scaler.transform(new_penguins_df).to_pandas() @@ -283,7 +282,6 @@ def test_min_max_scaler_normalizes(penguins_df_default_index, new_penguins_df): # If minmax-scaled correctly, min should be 0 and max should be 1. for column in result.columns: assert math.isclose(result[column].max(), 1.0, abs_tol=1e-3) - for column in result.columns: assert math.isclose(result[column].min(), 0.0, abs_tol=1e-3) result = scaler.transform(new_penguins_df).to_pandas()