From d9deec8f762673fcbd539b4aa67bae6bd8bbb844 Mon Sep 17 00:00:00 2001 From: FarahSaeed Date: Mon, 13 Nov 2017 20:07:09 +0500 Subject: [PATCH 1/4] Replacing "the scikit" with "the scikit-learn" In Doc, replacing "the scikit" with "scikit-learn" --- doc/datasets/index.rst | 2 +- doc/developers/performance.rst | 2 +- doc/modules/dp-derivation.rst | 2 +- doc/modules/model_persistence.rst | 4 ++-- doc/presentations.rst | 2 +- doc/tutorial/basic/tutorial.rst | 4 ++-- doc/tutorial/statistical_inference/settings.rst | 4 ++-- doc/tutorial/statistical_inference/unsupervised_learning.rst | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst index f9b400ba83e40..2d43bb4787d98 100644 --- a/doc/datasets/index.rst +++ b/doc/datasets/index.rst @@ -64,7 +64,7 @@ require to download any file from some external website. load_breast_cancer These datasets are useful to quickly illustrate the behavior of the -various algorithms implemented in the scikit. They are however often too +various algorithms implemented in the scikit-learn. They are however often too small to be representative of real world machine learning tasks. .. _sample_images: diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst index 692e7ca1f99a7..6e1179a3f2e4d 100644 --- a/doc/developers/performance.rst +++ b/doc/developers/performance.rst @@ -94,7 +94,7 @@ loads and prepare you data and then use the IPython integrated profiler for interactively exploring the relevant part for the code. Suppose we want to profile the Non Negative Matrix Factorization module -of the scikit. Let us setup a new IPython session and load the digits +of the scikit-learn. Let us setup a new IPython session and load the digits dataset and as in the :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py` example:: In [1]: from sklearn.decomposition import NMF diff --git a/doc/modules/dp-derivation.rst b/doc/modules/dp-derivation.rst index 4509e0fa323bc..b4117688fbe90 100644 --- a/doc/modules/dp-derivation.rst +++ b/doc/modules/dp-derivation.rst @@ -23,7 +23,7 @@ complex, or even more. For this reason we present here a full derivation of the inference algorithm and all the update and lower-bound equations. If you're not interested in learning how to derive similar algorithms yourself and you're not interested in -changing/debugging the implementation in the scikit this document is +changing/debugging the implementation in the scikit-learn this document is not for you. The complexity of this implementation is linear in the number of diff --git a/doc/modules/model_persistence.rst b/doc/modules/model_persistence.rst index 1efe4a8bcd520..2a622eeb2cf8e 100644 --- a/doc/modules/model_persistence.rst +++ b/doc/modules/model_persistence.rst @@ -13,7 +13,7 @@ security and maintainability issues when working with pickle serialization. Persistence example ------------------- -It is possible to save a model in the scikit by using Python's built-in +It is possible to save a model in the scikit-learn by using Python's built-in persistence model, namely `pickle `_:: >>> from sklearn import svm @@ -35,7 +35,7 @@ persistence model, namely `pickle >> y[0] 0 -In the specific case of the scikit, it may be more interesting to use +In the specific case of the scikit-learn, it may be more interesting to use joblib's replacement of pickle (``joblib.dump`` & ``joblib.load``), which is more efficient on objects that carry large numpy arrays internally as is often the case for fitted scikit-learn estimators, but can only pickle to the diff --git a/doc/presentations.rst b/doc/presentations.rst index 8b5d3bdc897ca..9da72531aacac 100644 --- a/doc/presentations.rst +++ b/doc/presentations.rst @@ -37,7 +37,7 @@ Videos `_ by `Gael Varoquaux`_ at ICML 2010 - A three minute video from a very early stage of the scikit, explaining the + A three minute video from a very early stage of the scikit-learn, explaining the basic idea and approach we are following. - `Introduction to statistical learning with scikit-learn `_ diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst index 89600953a870f..58ce637873411 100644 --- a/doc/tutorial/basic/tutorial.rst +++ b/doc/tutorial/basic/tutorial.rst @@ -209,7 +209,7 @@ example that you can run and study: Model persistence ----------------- -It is possible to save a model in the scikit by using Python's built-in +It is possible to save a model in the scikit-learn by using Python's built-in persistence model, namely `pickle `_:: >>> from sklearn import svm @@ -231,7 +231,7 @@ persistence model, namely `pickle >> y[0] 0 -In the specific case of the scikit, it may be more interesting to use +In the specific case of the scikit-learn, it may be more interesting to use joblib's replacement of pickle (``joblib.dump`` & ``joblib.load``), which is more efficient on big data, but can only pickle to the disk and not to a string:: diff --git a/doc/tutorial/statistical_inference/settings.rst b/doc/tutorial/statistical_inference/settings.rst index 1b1e477c5cfdf..5ea372d6e0ac0 100644 --- a/doc/tutorial/statistical_inference/settings.rst +++ b/doc/tutorial/statistical_inference/settings.rst @@ -12,7 +12,7 @@ list of multi-dimensional observations. We say that the first axis of these arrays is the **samples** axis, while the second is the **features** axis. -.. topic:: A simple example shipped with the scikit: iris dataset +.. topic:: A simple example shipped with the scikit-learn: iris dataset :: @@ -46,7 +46,7 @@ needs to be preprocessed in order to be used by scikit-learn. >>> plt.imshow(digits.images[-1], cmap=plt.cm.gray_r) #doctest: +SKIP - To use this dataset with the scikit, we transform each 8x8 image into a + To use this dataset with the scikit-learn, we transform each 8x8 image into a feature vector of length 64 :: >>> data = digits.images.reshape((digits.images.shape[0], -1)) diff --git a/doc/tutorial/statistical_inference/unsupervised_learning.rst b/doc/tutorial/statistical_inference/unsupervised_learning.rst index 0ad16c180385c..9a88696787835 100644 --- a/doc/tutorial/statistical_inference/unsupervised_learning.rst +++ b/doc/tutorial/statistical_inference/unsupervised_learning.rst @@ -171,7 +171,7 @@ Connectivity-constrained clustering ..................................... With agglomerative clustering, it is possible to specify which samples can be -clustered together by giving a connectivity graph. Graphs in the scikit +clustered together by giving a connectivity graph. Graphs in the scikit-learn are represented by their adjacency matrix. Often, a sparse matrix is used. This can be useful, for instance, to retrieve connected regions (sometimes also referred to as connected components) when From dc3e8cc539fc80063f7988e70c806906c0d40221 Mon Sep 17 00:00:00 2001 From: FarahSaeed Date: Tue, 14 Nov 2017 05:50:35 +0500 Subject: [PATCH 2/4] DOC Removing "the" from "the scikit-learn" term --- doc/datasets/index.rst | 2 +- doc/developers/performance.rst | 2 +- doc/modules/dp-derivation.rst | 2 +- doc/modules/model_persistence.rst | 4 ++-- doc/presentations.rst | 2 +- doc/tutorial/basic/tutorial.rst | 4 ++-- doc/tutorial/statistical_inference/settings.rst | 4 ++-- doc/tutorial/statistical_inference/unsupervised_learning.rst | 2 +- 8 files changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst index 2d43bb4787d98..62f5ffd91b375 100644 --- a/doc/datasets/index.rst +++ b/doc/datasets/index.rst @@ -64,7 +64,7 @@ require to download any file from some external website. load_breast_cancer These datasets are useful to quickly illustrate the behavior of the -various algorithms implemented in the scikit-learn. They are however often too +various algorithms implemented in scikit-learn. They are however often too small to be representative of real world machine learning tasks. .. _sample_images: diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst index 6e1179a3f2e4d..d3d6204ec328f 100644 --- a/doc/developers/performance.rst +++ b/doc/developers/performance.rst @@ -94,7 +94,7 @@ loads and prepare you data and then use the IPython integrated profiler for interactively exploring the relevant part for the code. Suppose we want to profile the Non Negative Matrix Factorization module -of the scikit-learn. Let us setup a new IPython session and load the digits +of scikit-learn. Let us setup a new IPython session and load the digits dataset and as in the :ref:`sphx_glr_auto_examples_classification_plot_digits_classification.py` example:: In [1]: from sklearn.decomposition import NMF diff --git a/doc/modules/dp-derivation.rst b/doc/modules/dp-derivation.rst index b4117688fbe90..0625884c279f7 100644 --- a/doc/modules/dp-derivation.rst +++ b/doc/modules/dp-derivation.rst @@ -23,7 +23,7 @@ complex, or even more. For this reason we present here a full derivation of the inference algorithm and all the update and lower-bound equations. If you're not interested in learning how to derive similar algorithms yourself and you're not interested in -changing/debugging the implementation in the scikit-learn this document is +changing/debugging the implementation in scikit-learn this document is not for you. The complexity of this implementation is linear in the number of diff --git a/doc/modules/model_persistence.rst b/doc/modules/model_persistence.rst index 2a622eeb2cf8e..d64657717ba79 100644 --- a/doc/modules/model_persistence.rst +++ b/doc/modules/model_persistence.rst @@ -13,7 +13,7 @@ security and maintainability issues when working with pickle serialization. Persistence example ------------------- -It is possible to save a model in the scikit-learn by using Python's built-in +It is possible to save a model in scikit-learn by using Python's built-in persistence model, namely `pickle `_:: >>> from sklearn import svm @@ -35,7 +35,7 @@ persistence model, namely `pickle >> y[0] 0 -In the specific case of the scikit-learn, it may be more interesting to use +In the specific case of scikit-learn, it may be more interesting to use joblib's replacement of pickle (``joblib.dump`` & ``joblib.load``), which is more efficient on objects that carry large numpy arrays internally as is often the case for fitted scikit-learn estimators, but can only pickle to the diff --git a/doc/presentations.rst b/doc/presentations.rst index 9da72531aacac..6fe17a69f462d 100644 --- a/doc/presentations.rst +++ b/doc/presentations.rst @@ -37,7 +37,7 @@ Videos `_ by `Gael Varoquaux`_ at ICML 2010 - A three minute video from a very early stage of the scikit-learn, explaining the + A three minute video from a very early stage of scikit-learn, explaining the basic idea and approach we are following. - `Introduction to statistical learning with scikit-learn `_ diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst index 58ce637873411..7c6058591b3e3 100644 --- a/doc/tutorial/basic/tutorial.rst +++ b/doc/tutorial/basic/tutorial.rst @@ -209,7 +209,7 @@ example that you can run and study: Model persistence ----------------- -It is possible to save a model in the scikit-learn by using Python's built-in +It is possible to save a model in scikit-learn by using Python's built-in persistence model, namely `pickle `_:: >>> from sklearn import svm @@ -231,7 +231,7 @@ persistence model, namely `pickle >> y[0] 0 -In the specific case of the scikit-learn, it may be more interesting to use +In the specific case of scikit-learn, it may be more interesting to use joblib's replacement of pickle (``joblib.dump`` & ``joblib.load``), which is more efficient on big data, but can only pickle to the disk and not to a string:: diff --git a/doc/tutorial/statistical_inference/settings.rst b/doc/tutorial/statistical_inference/settings.rst index 5ea372d6e0ac0..e3c4ca8fea21f 100644 --- a/doc/tutorial/statistical_inference/settings.rst +++ b/doc/tutorial/statistical_inference/settings.rst @@ -12,7 +12,7 @@ list of multi-dimensional observations. We say that the first axis of these arrays is the **samples** axis, while the second is the **features** axis. -.. topic:: A simple example shipped with the scikit-learn: iris dataset +.. topic:: A simple example shipped with scikit-learn: iris dataset :: @@ -46,7 +46,7 @@ needs to be preprocessed in order to be used by scikit-learn. >>> plt.imshow(digits.images[-1], cmap=plt.cm.gray_r) #doctest: +SKIP - To use this dataset with the scikit-learn, we transform each 8x8 image into a + To use this dataset with scikit-learn, we transform each 8x8 image into a feature vector of length 64 :: >>> data = digits.images.reshape((digits.images.shape[0], -1)) diff --git a/doc/tutorial/statistical_inference/unsupervised_learning.rst b/doc/tutorial/statistical_inference/unsupervised_learning.rst index 9a88696787835..cef8fbe7809d7 100644 --- a/doc/tutorial/statistical_inference/unsupervised_learning.rst +++ b/doc/tutorial/statistical_inference/unsupervised_learning.rst @@ -171,7 +171,7 @@ Connectivity-constrained clustering ..................................... With agglomerative clustering, it is possible to specify which samples can be -clustered together by giving a connectivity graph. Graphs in the scikit-learn +clustered together by giving a connectivity graph. Graphs in scikit-learn are represented by their adjacency matrix. Often, a sparse matrix is used. This can be useful, for instance, to retrieve connected regions (sometimes also referred to as connected components) when From b4f047ee28ea774446024027545078e329e018d0 Mon Sep 17 00:00:00 2001 From: FarahSaeed Date: Tue, 14 Nov 2017 12:54:40 +0500 Subject: [PATCH 3/4] Replacing "the scikit" with "scikit-learn" in all folders --- doc/datasets/index.rst | 2 +- examples/README.txt | 2 +- examples/applications/wikipedia_principal_eigenvector.py | 2 +- sklearn/__check_build/__init__.py | 2 +- sklearn/__init__.py | 2 +- sklearn/preprocessing/label.py | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst index 62f5ffd91b375..1316d596f50f1 100644 --- a/doc/datasets/index.rst +++ b/doc/datasets/index.rst @@ -72,7 +72,7 @@ small to be representative of real world machine learning tasks. Sample images ============= -The scikit also embed a couple of sample JPEG images published under Creative +Scikit-learn also embed a couple of sample JPEG images published under Creative Commons license by their authors. Those image can be useful to test algorithms and pipeline on 2D data. diff --git a/examples/README.txt b/examples/README.txt index 6c084d956fa1e..10b4c32021e73 100644 --- a/examples/README.txt +++ b/examples/README.txt @@ -3,4 +3,4 @@ General examples ---------------- -General-purpose and introductory examples for the scikit. +General-purpose and introductory examples for scikit-learn. diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py index 175c10594440e..3ef921bb3d052 100644 --- a/examples/applications/wikipedia_principal_eigenvector.py +++ b/examples/applications/wikipedia_principal_eigenvector.py @@ -23,7 +23,7 @@ https://en.wikipedia.org/wiki/Power_iteration Here the computation is achieved thanks to Martinsson's Randomized SVD -algorithm implemented in the scikit. +algorithm implemented in scikit-learn. The graph data is fetched from the DBpedia dumps. DBpedia is an extraction of the latent structured data of the Wikipedia content. diff --git a/sklearn/__check_build/__init__.py b/sklearn/__check_build/__init__.py index 5a4018789a777..6c1cdfd9fc7b2 100644 --- a/sklearn/__check_build/__init__.py +++ b/sklearn/__check_build/__init__.py @@ -1,5 +1,5 @@ """ Module to give helpful messages to the user that did not -compile the scikit properly. +compile scikit-learn properly. """ import os diff --git a/sklearn/__init__.py b/sklearn/__init__.py index c45728106ad53..5f2278d1c8c37 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -127,7 +127,7 @@ def config_context(**new_config): if __SKLEARN_SETUP__: sys.stderr.write('Partial import of sklearn during the build process.\n') - # We are not importing the rest of the scikit during the build + # We are not importing the rest of scikit-learn during the build # process, as it may not be compiled yet else: from . import __check_build diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py index 530f376c19fa9..88f1774367670 100644 --- a/sklearn/preprocessing/label.py +++ b/sklearn/preprocessing/label.py @@ -160,7 +160,7 @@ class LabelBinarizer(BaseEstimator, TransformerMixin): """Binarize labels in a one-vs-all fashion Several regression and binary classification algorithms are - available in the scikit. A simple way to extend these algorithms + available in scikit-learn. A simple way to extend these algorithms to the multi-class classification case is to use the so-called one-vs-all scheme. @@ -393,7 +393,7 @@ def label_binarize(y, classes, neg_label=0, pos_label=1, sparse_output=False): """Binarize labels in a one-vs-all fashion Several regression and binary classification algorithms are - available in the scikit. A simple way to extend these algorithms + available in scikit-learn. A simple way to extend these algorithms to the multi-class classification case is to use the so-called one-vs-all scheme. From f0e8fbeeca51bc4963a3113bf2ad923490109baf Mon Sep 17 00:00:00 2001 From: FarahSaeed Date: Tue, 14 Nov 2017 14:47:22 +0500 Subject: [PATCH 4/4] Replacing "the scikit" with "scikit-learn" in all folders --- sklearn/tests/test_common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py index dde6f4c41c3fb..908240cdaf024 100644 --- a/sklearn/tests/test_common.py +++ b/sklearn/tests/test_common.py @@ -77,7 +77,7 @@ def test_non_meta_estimators(): def test_configure(): # Smoke test the 'configure' step of setup, this tests all the - # 'configure' functions in the setup.pys in the scikit + # 'configure' functions in the setup.pys in scikit-learn cwd = os.getcwd() setup_path = os.path.abspath(os.path.join(sklearn.__path__[0], '..')) setup_filename = os.path.join(setup_path, 'setup.py')