Thanks to visit codestin.com
Credit goes to github.com

Skip to content

MNT use check_scalar in SpectralBiClustering and SpectralCoClustering #20817

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 42 commits into from
Jan 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
e6732f3
Used check_scalar to check parameters.
creatornadiran Aug 23, 2021
4940669
Update _bicluster.py
creatornadiran Sep 2, 2021
4a52561
Update _bicluster.py
creatornadiran Sep 15, 2021
653baf9
Update _bicluster.py
creatornadiran Sep 15, 2021
712ceff
Update _bicluster.py
creatornadiran Sep 15, 2021
949c2bb
Create _bicluster.py
creatornadiran Sep 15, 2021
682e80a
Update _bicluster.py
creatornadiran Sep 15, 2021
2aa25fa
Update _bicluster.py
creatornadiran Sep 15, 2021
2ed7cc7
Update _bicluster.py
creatornadiran Sep 15, 2021
1dc369a
Update _bicluster.py
creatornadiran Sep 15, 2021
0674cff
Update _bicluster.py
creatornadiran Sep 15, 2021
f083fa5
Update _bicluster.py
creatornadiran Sep 15, 2021
7b2c26f
Update _bicluster.py
creatornadiran Sep 16, 2021
6a7da74
Update _bicluster.py
creatornadiran Sep 16, 2021
93fd2f3
Update _bicluster.py
creatornadiran Sep 16, 2021
4944b67
Update _bicluster.py
creatornadiran Sep 16, 2021
15dbaa1
Update _bicluster.py
creatornadiran Sep 16, 2021
fba86a8
Update _bicluster.py
creatornadiran Sep 16, 2021
e7e75c7
Update _bicluster.py
creatornadiran Sep 16, 2021
dbb7f61
Update _bicluster.py
creatornadiran Sep 16, 2021
1a59b6b
Update _bicluster.py
creatornadiran Sep 16, 2021
d2c6695
Update _bicluster.py
creatornadiran Sep 16, 2021
e878db6
some change in spelling
creatornadiran Oct 2, 2021
54b0cf6
Update sklearn/cluster/_bicluster.py
creatornadiran Oct 2, 2021
59ecd9a
Update _bicluster.py
creatornadiran Oct 2, 2021
95a458d
Update sklearn/cluster/_bicluster.py
creatornadiran Oct 4, 2021
c5937fe
runned black
creatornadiran Oct 29, 2021
788675a
necessary changes
creatornadiran Oct 29, 2021
6ea505b
test fixed
creatornadiran Oct 29, 2021
bf2d6d3
bicluster_test updated
creatornadiran Oct 30, 2021
d77d8ed
test_bicluster fixed
creatornadiran Oct 30, 2021
87fe753
test_bicluster.py fixed
creatornadiran Nov 11, 2021
42de356
test_bicluster refixed
creatornadiran Nov 11, 2021
8615430
test_bicluster
creatornadiran Nov 11, 2021
2cf26cc
tests/test_bicluster fixed
creatornadiran Nov 11, 2021
feb581a
Update test_bicluster.py
creatornadiran Jan 29, 2022
f70c9ef
rebase and changes
creatornadiran Jan 29, 2022
5b20cec
black
creatornadiran Jan 29, 2022
9569bfa
error fix
creatornadiran Jan 29, 2022
0194654
error fix 2
creatornadiran Jan 29, 2022
e5488f4
Merge remote-tracking branch 'origin/main' into pr/creatornadiran/20817
glemaitre Jan 31, 2022
cda955d
fix regex to match
glemaitre Jan 31, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 53 additions & 27 deletions sklearn/cluster/_bicluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from abc import ABCMeta, abstractmethod

import numpy as np
import numbers

from scipy.linalg import norm
from scipy.sparse import dia_matrix, issparse
Expand All @@ -13,6 +14,7 @@
from . import KMeans, MiniBatchKMeans
from ..base import BaseEstimator, BiclusterMixin
from ..utils import check_random_state
from ..utils import check_scalar

from ..utils.extmath import make_nonnegative, randomized_svd, safe_sparse_dot

Expand Down Expand Up @@ -102,14 +104,15 @@ def __init__(
self.n_init = n_init
self.random_state = random_state

def _check_parameters(self):
def _check_parameters(self, n_samples):
legal_svd_methods = ("randomized", "arpack")
if self.svd_method not in legal_svd_methods:
raise ValueError(
"Unknown SVD method: '{0}'. svd_method must be one of {1}.".format(
self.svd_method, legal_svd_methods
)
)
check_scalar(self.n_init, "n_init", target_type=numbers.Integral, min_val=1)

def fit(self, X, y=None):
"""Create a biclustering for X.
Expand All @@ -128,7 +131,7 @@ def fit(self, X, y=None):
SpectralBiclustering instance.
"""
X = self._validate_data(X, accept_sparse="csr", dtype=np.float64)
self._check_parameters()
self._check_parameters(X.shape[0])
self._fit(X)
return self

Expand Down Expand Up @@ -328,6 +331,16 @@ def __init__(
n_clusters, svd_method, n_svd_vecs, mini_batch, init, n_init, random_state
)

def _check_parameters(self, n_samples):
super()._check_parameters(n_samples)
check_scalar(
self.n_clusters,
"n_clusters",
target_type=numbers.Integral,
min_val=1,
max_val=n_samples,
)

def _fit(self, X):
normalized_data, row_diag, col_diag = _scale_normalize(X)
n_sv = 1 + int(np.ceil(np.log2(self.n_clusters)))
Expand Down Expand Up @@ -492,8 +505,8 @@ def __init__(
self.n_components = n_components
self.n_best = n_best

def _check_parameters(self):
super()._check_parameters()
def _check_parameters(self, n_samples):
super()._check_parameters(n_samples)
legal_methods = ("bistochastic", "scale", "log")
if self.method not in legal_methods:
raise ValueError(
Expand All @@ -502,36 +515,49 @@ def _check_parameters(self):
)
)
try:
int(self.n_clusters)
except TypeError:
check_scalar(
self.n_clusters,
"n_clusters",
target_type=numbers.Integral,
min_val=1,
max_val=n_samples,
)
except (ValueError, TypeError):
try:
r, c = self.n_clusters
int(r)
int(c)
n_row_clusters, n_column_clusters = self.n_clusters
check_scalar(
n_row_clusters,
"n_row_clusters",
target_type=numbers.Integral,
min_val=1,
max_val=n_samples,
)
check_scalar(
n_column_clusters,
"n_column_clusters",
target_type=numbers.Integral,
min_val=1,
max_val=n_samples,
)
except (ValueError, TypeError) as e:
raise ValueError(
"Incorrect parameter n_clusters has value:"
" {}. It should either be a single integer"
f" {self.n_clusters}. It should either be a single integer"
" or an iterable with two integers:"
" (n_row_clusters, n_column_clusters)"
" And the values are should be in the"
" range: (1, n_samples)"
) from e
if self.n_components < 1:
raise ValueError(
"Parameter n_components must be greater than 0,"
" but its value is {}".format(self.n_components)
)
if self.n_best < 1:
raise ValueError(
"Parameter n_best must be greater than 0, but its value is {}".format(
self.n_best
)
)
if self.n_best > self.n_components:
raise ValueError(
"n_best cannot be larger than n_components, but {} > {}".format(
self.n_best, self.n_components
)
)
check_scalar(
self.n_components, "n_components", target_type=numbers.Integral, min_val=1
)
check_scalar(
self.n_best,
"n_best",
target_type=numbers.Integral,
min_val=1,
max_val=self.n_components,
)

def _fit(self, X):
n_sv = self.n_components
Expand Down
69 changes: 52 additions & 17 deletions sklearn/cluster/tests/test_bicluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,30 +208,65 @@ def test_perfect_checkerboard():


@pytest.mark.parametrize(
"args",
"params, type_err, err_msg",
[
{"n_clusters": (3, 3, 3)},
{"n_clusters": "abc"},
{"n_clusters": (3, "abc")},
{"method": "unknown"},
{"n_components": 0},
{"n_best": 0},
{"svd_method": "unknown"},
{"n_components": 3, "n_best": 4},
({"n_init": 0}, ValueError, "n_init == 0, must be >= 1."),
({"n_init": 1.5}, TypeError, "n_init must be an instance of"),
(
{"n_clusters": "abc"},
TypeError,
"n_clusters must be an instance of",
),
({"svd_method": "unknown"}, ValueError, "Unknown SVD method: 'unknown'"),
],
)
def test_errors(args):
def test_spectralcoclustering_parameter_validation(params, type_err, err_msg):
"""Check parameters validation in `SpectralBiClustering`"""
data = np.arange(25).reshape((5, 5))

model = SpectralBiclustering(**args)
with pytest.raises(ValueError):
model = SpectralCoclustering(**params)
with pytest.raises(type_err, match=err_msg):
model.fit(data)


def test_wrong_shape():
model = SpectralBiclustering()
data = np.arange(27).reshape((3, 3, 3))
with pytest.raises(ValueError):
@pytest.mark.parametrize(
"params, type_err, err_msg",
[
({"n_init": 0}, ValueError, "n_init == 0, must be >= 1."),
({"n_init": 1.5}, TypeError, "n_init must be an instance of"),
(
{"n_clusters": (3, 3, 3)},
ValueError,
r"Incorrect parameter n_clusters has value: \(3, 3, 3\)",
),
(
{"n_clusters": "abc"},
ValueError,
"Incorrect parameter n_clusters has value: abc",
),
(
{"n_clusters": (3, "abc")},
ValueError,
r"Incorrect parameter n_clusters has value: \(3, 'abc'\)",
),
(
{"n_clusters": ("abc", 3)},
ValueError,
r"Incorrect parameter n_clusters has value: \('abc', 3\)",
),
({"method": "unknown"}, ValueError, "Unknown method: 'unknown'"),
({"n_components": 0}, ValueError, "n_components == 0, must be >= 1."),
({"n_components": 1.5}, TypeError, "n_components must be an instance of"),
({"n_components": 3, "n_best": 4}, ValueError, "n_best == 4, must be <= 3."),
({"n_best": 0}, ValueError, "n_best == 0, must be >= 1."),
({"n_best": 1.5}, TypeError, "n_best must be an instance of"),
({"svd_method": "unknown"}, ValueError, "Unknown SVD method: 'unknown'"),
],
)
def test_spectralbiclustering_parameter_validation(params, type_err, err_msg):
"""Check parameters validation in `SpectralBiClustering`"""
data = np.arange(25).reshape((5, 5))
model = SpectralBiclustering(**params)
with pytest.raises(type_err, match=err_msg):
model.fit(data)


Expand Down