From d5e88108a71c6c256b67948515f20955cc96fabf Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Tue, 18 Jul 2017 21:50:10 +0200
Subject: [PATCH 001/269] [WIP] Add Generalized Linear Model, issue #5975,
 initial commit

---
 sklearn/linear_model/__init__.py       |   7 +
 sklearn/linear_model/glm.py            | 872 +++++++++++++++++++++++++
 sklearn/linear_model/tests/test_glm.py |  73 +++
 3 files changed, 952 insertions(+)
 create mode 100644 sklearn/linear_model/glm.py
 create mode 100644 sklearn/linear_model/tests/test_glm.py

diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index 2e01990ccce8c..5acc51e9dc87f 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -18,6 +18,12 @@
                                  lasso_path, enet_path, MultiTaskLasso,
                                  MultiTaskElasticNet, MultiTaskElasticNetCV,
                                  MultiTaskLassoCV)
+from .glm import (Link, IdentityLink, LogLink,
+                 ExponentialDispersionModel, TweedieDistribution,
+                 NormalDistribution, GaussianDistribution,
+                 PoissonDistribution, GammaDistribution,
+                 InverseGaussianDistribution, GeneralizedHyperbolicSecand,
+                 GeneralizedLinearModel)
 from .huber import HuberRegressor
 from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
 from .stochastic_gradient import SGDClassifier, SGDRegressor
@@ -38,6 +44,7 @@
            'BayesianRidge',
            'ElasticNet',
            'ElasticNetCV',
+           'GeneralizedLinearModel',
            'Hinge',
            'Huber',
            'HuberRegressor',
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
new file mode 100644
index 0000000000000..8b6eb8f3bf16c
--- /dev/null
+++ b/sklearn/linear_model/glm.py
@@ -0,0 +1,872 @@
+"""
+Generalized Linear Models with Exponential Dispersion Family
+"""
+
+# Author: Christian Lorentzen <lorentzen.ch@googlemail.ch>
+# License: BSD 3 clause
+
+# TODO: Which name? GeneralizedLinearModel vs GeneralizedLinearRegression.
+#       So far, it is GeneralizedLinearModel, since it could very easily
+#       extended by Bernoulli/Binomial distribution.
+# TODO: Which name/symbol for coefficients and weights in docu?
+#       sklearn.linear_models uses w for coefficients.
+#       So far, coefficients=beta and weight=w (as standard literature)
+# TODO: Add l2-penalty
+# TODO: Add l1-penalty (elastic net)
+# TODO: Add cross validation
+# TODO: Write docu and examples
+
+# Design Decisions:
+# - The link funtion (instance of class Link) is necessary for the evaluation
+#   of deviance, score, Fisher and Hessian matrix as functions of the
+#   coefficients, which is needed by optimizers.
+#   Solution: link as argument in those functions
+
+from __future__ import division
+from abc import ABCMeta, abstractmethod, abstractproperty
+import numbers
+import numpy as np
+from scipy import linalg, optimize, sparse
+import warnings
+from .base import LinearModel, LinearRegression
+from ..base import RegressorMixin
+from ..utils import check_X_y
+from ..utils.extmath import safe_sparse_dot
+from ..utils.optimize import newton_cg
+from ..utils.validation import check_is_fitted
+
+
+
+class Link(metaclass=ABCMeta):
+    """Abstract base class for Link funtions
+    """
+
+    @abstractmethod
+    def link(self, mu):
+        """The link function g(mu) with argument mu=E[Y] returns the
+        linear predictor.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def derivative(self, mu):
+        """Derivative of the link g'(mu).
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def inverse(self, lin_pred):
+        """The inverse link function h(lin_pred) with the linear predictor as
+        argument returns mu=E[Y].
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def inverse_derivative(self, lin_pred):
+        """Derivative of the inverse link function h'(lin_pred).
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def inverse_derivative2(self, lin_pred):
+        """Second derivative of the inverse link function h''(lin_pred).
+        """
+        raise NotImplementedError
+
+class IdentityLink(Link):
+    """The identity link function g(x)=x.
+    """
+
+    def link(self, mu):
+        return mu
+
+    def derivative(self, mu):
+        return np.ones_like(mu)
+
+    def inverse(self, lin_pred):
+        return lin_pred
+
+    def inverse_derivative(self, lin_pred):
+        return np.ones_like(lin_pred)
+
+    def inverse_derivative2(self, lin_pred):
+        return np.zeros_like(lin_pred)
+
+
+class LogLink(Link):
+    """The log link function g(x)=log(x).
+    """
+
+    def link(self, mu):
+        return np.log(mu)
+
+    def derivative(self, mu):
+        return 1./mu
+
+    def inverse(self, lin_pred):
+        return np.exp(lin_pred)
+
+    def inverse_derivative(self, lin_pred):
+        return np.exp(lin_pred)
+
+    def inverse_derivative2(self, lin_pred):
+        return np.exp(lin_pred)
+
+
+class ExponentialDispersionModel(metaclass=ABCMeta):
+    """Base class for reproductive Exponential Dispersion Models (EDM).
+
+    The pdf of :math:`Y\sim \mathrm{EDM}(\mu, \phi)` is given by
+
+    .. math:: p(y| \theta, \phi) = c(y, \phi)
+        \exp\left(\frac{\theta y-A(\theta)}{\phi}\right)
+        = \tilde{c}(y, \phi)
+            \exp\left(-\frac{d(y, \mu)}{2\phi}\right)
+
+    with mean :math:`\mathrm{E}[Y] = A'(\theta) = \mu`,
+    variance :math:`\mathrm{Var}[Y] = \phi \cdot v(\mu)`,
+    unit variance :math:`v(\mu)` and
+    unit deviance :math:`d(y,\mu)`.
+
+    Attributes
+    ----------
+    lower_bound
+    upper_bound
+
+    Methods
+    -------
+    in_y_range
+    unit_variance
+    unit_variance_derivative
+    variance
+    variance_derivative
+    unit_deviance
+    unit_deviance_derivative
+    deviance
+    deviance_derivative
+    starting_mu
+
+    _score
+    _fisher_matrix
+    _observed_information
+    _deviance
+    _deviance_derivative
+    _deviance_hessian
+
+    References
+    ----------
+    See https://en.wikipedia.org/wiki/Exponential_dispersion_model.
+    """
+
+    @abstractproperty
+    def lower_bound(self):
+        """The lower bound of values of Y~EDM.
+        """
+        raise NotImplementedError()
+
+    @abstractproperty
+    def upper_bound(self):
+        """The upper bound of values of Y~EDM.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def in_y_range(self, x):
+        """Returns true if x is in the valid range of Y~EDM.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def unit_variance(self, mu):
+        """The unit variance :math:`v(mu)` determines the variance as
+        a function of the mean mu by
+        :math:`\mathrm{Var}[Y_i] = \phi/w_i*v(\mu_i)`.
+        It can also be derived from the unit deviance :math:`d(y,\mu)` as
+
+        .. math:: v(\mu) = \frac{2}{\frac{\partial^2 d(y,\mu)}{
+            \partial\mu^2}}\big|_{y=\mu}
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def unit_variance_derivative(self, mu):
+        """The derivative of the unit variance w.r.t. mu, :math:`v'(\mu)`.
+        """
+        raise NotImplementedError()
+
+    def variance(self, mu, phi=1, weight=1):
+        """The variance of :math:`Y \sim \mathrm{EDM}(\mu,\phi)` is
+        :math:`\mathrm{Var}[Y_i]=\phi/w_i*v(\mu_i)`,
+        with unit variance v(mu).
+        """
+        return phi/weight * self.unit_variance(mu)
+
+    def variance_derivative(self, mu, phi=1, weight=1):
+        """The derivative of the variance w.r.t. mu,
+        :math:`\frac{\partial}{\partial\mu}\mathrm{Var}[Y_i]
+        =phi/w_i*v'(\mu_i)`, with unit variance v(mu).
+        """
+        return phi/weight * self.unit_variance_derivative(mu)
+
+    @abstractmethod
+    def unit_deviance(self, y, mu):
+        """The unit_deviance :math:`d(y,\mu)`.
+        In terms of the log-likelihood it is given by
+        :math:`d(y,\mu) = -2\phi\cdot
+        \left(loglike(y,\mu,phi) - loglike(y,y,phi)\right).`
+        """
+        raise NotImplementedError()
+
+    def unit_deviance_derivative(self, y, mu):
+        """The derivative w.r.t. mu of the unit_deviance
+        :math:`\frac{d}{d\mu}d(y,\mu) = -2\frac{y-\mu}{v(\mu)}`
+        with unit variance :math:`v(\mu)`.
+
+        Returns
+        -------
+        derivative: array, shape = (n_samples,)
+        """
+        return -2*(y-mu)/self.unit_variance(mu)
+
+    def deviance(self, y, mu, weight=1):
+        """The deviance is given by :math:`D = \sum_i w_i \cdot d(y, \mu)
+        with weight :math:`w_i` and unit_deviance :math:`d(y,mu)`.
+        In terms of the likelihood it is :math:`D = -2\phi\cdot
+        \left(loglike(y,\mu,\frac{phi}{w})
+        - loglike(y,y,\frac{phi}{w})\right).`
+        """
+        return np.sum(weight*self.unit_deviance(y,mu))
+
+    def _deviance(self, coef, X, y, weight, link):
+        """The deviance as a function of the coefficients ``coef``
+        (:math:`beta`).
+        """
+        lin_pred = safe_sparse_dot(X, coef, dense_output=True)
+        mu = link.inverse(lin_pred)
+        return self.deviance(y, mu, weight)
+
+    def deviance_derivative(self, y, mu, weight=1):
+        """The derivative w.r.t. mu of the deviance.`
+        """
+        return weight*self.unit_deviance_derivative(y,mu)
+
+    def _score(self, coef, phi, X, y, weight, link):
+        """The score function :math:`s` is the derivative of the
+        log-likelihood w.r.t. the ``coef`` (:math:`\beta`).
+        It is given by
+
+        .. math:
+
+            \mathbf{s}(\boldsymbol{\beta}) = \mathbf{X}^T \mathbf{D}
+            \boldsymbol{\Sigma}^-1 (\mathbf{y} - \boldsymbol{\mu})\,,
+
+        with :math:`\mathbf{D}=\mathrm{diag}(h'(\eta_1),\ldots)` and
+        :math:`\boldsymbol{\Sigma}=\mathrm{diag}(\mathbf{V}(y_1),\ldots)`.
+        """
+        n_samples = X.shape[0]
+        lin_pred = safe_sparse_dot(X, coef, dense_output=True)
+        mu = link.inverse(lin_pred)
+        sigma_inv = 1/self.variance(mu, phi=phi, weight=weight)
+        d = link.inverse_derivative(lin_pred)
+        d_sigma_inv = sparse.dia_matrix((sigma_inv*d, 0),
+            shape=(n_samples, n_samples))
+        temp = safe_sparse_dot(d_sigma_inv, (y-mu), dense_output=False)
+        score = safe_sparse_dot(X.T, temp, dense_output=False)
+        return score
+
+    def _fisher_matrix(self, coef, phi, X, y, weight, link):
+        """The Fisher information matrix, also known as expected
+        information matrix. It is given by
+
+        .. math:
+
+            \mathbf{F}(\boldsymbol{\beta}) = \mathrm{E}\left[
+            -\frac{\partial^2 loglike}{\partial\boldsymbol{\beta}
+            \partial\boldsymbol{\beta}^T}\right]
+            = \mathbf{X}^T W \mathbf{X} \,,
+
+        with :math:`\mathbf{W} = \mathbf{D}^2 \boldsymbol{\Sigma}^{-1}`,
+        see score function.
+        """
+        n_samples = X.shape[0]
+        lin_pred = safe_sparse_dot(X, coef, dense_output=True)
+        mu = link.inverse(lin_pred)
+        sigma_inv = 1/self.variance(mu, phi=phi, weight=weight)
+        d2 = link.inverse_derivative(lin_pred)**2
+        d2_sigma_inv = sparse.dia_matrix((sigma_inv*d2, 0),
+            shape=(n_samples, n_samples))
+        temp = safe_sparse_dot(d2_sigma_inv, X, dense_output=False)
+        fisher_matrix = safe_sparse_dot(X.T, temp, dense_output=False)
+        return fisher_matrix
+
+    def _observed_information(self, coef, phi, X, y, weight, link):
+        """The observed information matrix, also known as the negative of
+        the Hessian matrix of the log-likelihood. It is given by
+
+        .. math:
+
+            \mathbf{H}(\boldsymbol{\beta}) =
+            -\frac{\partial^2 loglike}{\partial\boldsymbol{\beta}
+            \partial\boldsymbol{\beta}^T}
+            = \mathbf{X}^T \legt[
+            - \mathbf{D}' \mathbf{R}
+            + \mathbf{D}^2 \mathbf{V} \mathbf{R}
+            + \mathbf{D}^2
+            \right] \boldsymbol{\Sigma}^{-1} \mathbf{X} \,,
+
+        with :math:`\mathbf{R} = \mathrm{diag}(y_i - \mu_i)`,
+        :math:`\mathbf{V} = \mathrm{diag}\left(\frac{v'(\mu_i)}{
+        v(\mu_i)}
+        \right)`,
+        see score function and Fisher matrix.
+        """
+        n_samples = X.shape[0]
+        lin_pred = safe_sparse_dot(X, coef, dense_output=True)
+        mu = link.inverse(lin_pred)
+        sigma_inv = 1/self.variance(mu, phi=phi, weight=weight)
+        dp = link.inverse_derivative2(lin_pred)
+        d2 = link.inverse_derivative(lin_pred)**2
+        v = self.unit_variance_derivative(mu)/self.unit_variance(mu)
+        r = y - mu
+        temp = sparse.dia_matrix((sigma_inv*(-dp*r+d2*v*r+d2), 0),
+            shape=(n_samples, n_samples))
+        temp = safe_sparse_dot(temp, X, dense_output=False)
+        observed_information = safe_sparse_dot(X.T, temp, dense_output=False)
+        return observed_information
+
+    def _deviance_derivative(self, coef, X, y, weight, link):
+        """The derivative w.r.t. ``coef`` (:math:`\beta`) of the deviance as a
+        function of the coefficients ``coef``.
+        This is equivalent to :math:`-2\phi` times the score function
+        :math:`s` (derivative of the log-likelihood).
+        """
+        score = self._score(coef=coef, phi=1, X=X, y=y, weight=weight,
+            link=link)
+        return -2*score
+
+    def _deviance_hessian(self, coef, X, y, weight, link):
+        """The hessian matrix w.r.t. ``coef`` (:math:`\beta`) of the deviance
+        as a function of the coefficients ``coef``.
+        This is equivalent to :math:`+2\phi` times the observed information
+        matrix.
+        """
+        info_matrix = self._observed_information(coef=coef, phi=1,
+            X=X, y=y, weight=weight, link=link)
+        return 2*info_matrix
+
+    def starting_mu(self, y, weight=1):
+        """Starting values for the mean mu_i in IRLS."""
+        return (weight*y+np.mean(weight*y))/(2.*np.sum(np.ones_like(y)*weight))
+
+
+class TweedieDistribution(ExponentialDispersionModel):
+    """A class for the Tweedie distribution.
+    They have mu=E[X] and Var[X] \propto mu**power.
+
+    Attributes
+    ----------
+    power : float
+            The variance power of the unit_variance
+            :math:`v(mu) = mu^{power}`.
+    """
+    def __init__(self, power=0):
+        self.power = power
+        self._upper_bound = np.Inf
+        self._upper_compare = lambda x: np.less(x, self.upper_bound)
+        if power < 0:
+            #Extreme Stable
+            self._lower_bound = -np.Inf
+            self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+        elif power == 0:
+            #GaussianDistribution
+            self._lower_bound = -np.Inf
+            self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+        elif (power > 0) and (power < 1):
+            raise ValueError('For 0<power<1, no distribution exists.')
+        elif power == 1:
+            #PoissonDistribution
+            self._lower_bound = 0
+            self._lower_compare = (
+                lambda x: np.greater_equal(x, self.lower_bound))
+        elif (power > 1) and (power < 2):
+            #Compound Poisson
+            self._lower_bound = 0
+            self._lower_compare = (
+                lambda x: np.greater_equal(x, self.lower_bound))
+        elif power == 2:
+            #GammaDistribution
+            self._lower_bound = 0
+            self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+        elif (power > 2) and (power < 3):
+            #Positive Stable
+            self._lower_bound = 0
+            self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+        elif power == 3:
+            #InverseGaussianDistribution
+            self._lower_bound = 0
+            self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+        elif power > 3:
+            #Positive Stable
+            self._lower_bound = 0
+            self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+
+    @property
+    def power(self):
+        return self._power
+
+    @power.setter
+    def power(self, power):
+        if not isinstance(power, numbers.Real):
+            raise TypeError('power must be a real number, input was {0}'
+                .format(power))
+        self._power = power
+
+    @property
+    def lower_bound(self):
+        return self._lower_bound
+
+    @property
+    def upper_bound(self):
+        return self._upper_bound
+
+    def in_y_range(self, x):
+        return np.logical_and(self._lower_compare(x), self._upper_compare(x))
+
+    def unit_variance(self, mu):
+        """The unit variance of a Tweedie distribution is v(mu)=mu**power.
+        """
+        return np.power(mu, self.power)
+
+    def unit_variance_derivative(self, mu):
+        """The derivative of the unit variance of a Tweedie distribution is
+        v(mu)=power*mu**(power-1).
+        """
+        return self.power*np.power(mu, self.power-1)
+
+    def unit_deviance(self, y, mu):
+        p = self.power
+        if p == 0:
+            #NormalDistribution
+            return (y-mu)**2
+        if p == 1:
+            #PoissonDistribution
+            return 2 * (np.where(y==0,0,y*np.log(y/mu))-y+mu)
+        elif p == 2:
+            #GammaDistribution
+            return 2 * (np.log(mu/y)+y/mu-1)
+        else:
+            #return 2 * (np.maximum(y,0)**(2-p)/((1-p)*(2-p))
+            #    - y*mu**(1-p)/(1-p) + mu**(2-p)/(2-p))
+            return 2 * (np.power(np.maximum(y,0), 2-p)/((1-p)*(2-p))
+                - y*np.power(mu, 1-p)/(1-p) + np.power(mu, 2-p)/(2-p))
+
+    def likelihood(self, y, X, beta, phi, weight=1):
+        raise NotImplementedError('This function is not (yet) implemented.')
+
+
+class NormalDistribution(TweedieDistribution):
+    """Class for the Normal (aka Gaussian) distribution"""
+    def __init__(self):
+        super(NormalDistribution, self).__init__(power=0)
+
+GaussianDistribution = NormalDistribution
+
+class PoissonDistribution(TweedieDistribution):
+    """Class for the scaled Poisson distribution"""
+    def __init__(self):
+        super(PoissonDistribution, self).__init__(power=1)
+
+class GammaDistribution(TweedieDistribution):
+    """Class for the Gamma distribution"""
+    def __init__(self):
+        super(GammaDistribution, self).__init__(power=2)
+
+class InverseGaussianDistribution(TweedieDistribution):
+    """Class for the scaled InverseGaussianDistribution distribution"""
+    def __init__(self):
+        super(InverseGaussianDistribution, self).__init__(power=3)
+
+class GeneralizedHyperbolicSecand(ExponentialDispersionModel):
+    """A class for the von Generalized Hyperbolic Secand (GHS) distribution.
+
+    The GHS distribution is for data y in (-inf, inf).
+    """
+    def __init__(self):
+        self._lower_bound = -np.Inf
+        self._upper_bound = np.Inf
+
+    @property
+    def lower_bound(self):
+        return self._lower_bound
+
+    @property
+    def upper_bound(self):
+        return self._upper_bound
+
+    def in_y_range(self, x):
+        np.logical_and(
+            np.greater(x, self.lower_bound),
+            np.less(x, self.lower_bound)
+            )
+
+    def unit_variance(self, mu):
+        return 1 + mu**2
+
+    def unit_variance_derivative(self, mu):
+        return 2*mu
+
+    def unit_deviance(self, y, mu):
+        return (2*y*(np.arctan(y) - np.arctan(mu))
+            + np.log((1+mu**2)/(1+y**2)))
+
+
+
+class GeneralizedLinearModel(LinearModel, RegressorMixin):
+    """
+    Class to fit a Generalized Linear Model (GLM) based on reproductive
+    Exponential Dispersion Models (EDM).
+
+    Assumptions:
+
+    - The target values y_i are realizations of random variables
+      :math:`Y_i \sim \mathrm{EDM}(\mu_i, \frac{\phi}{w_i})` with dispersion
+      parameter :math:`\phi` and weights :math:`w_i`.
+    - The expectation of :math:`Y_i` is :math:`mu_i=\mathrm{E}[Y]=h(\eta_i)`
+      whith the linear predictor :math:`\eta=X*\beta`, inverse link function
+      :math:`h(\eta)`, design matrix :math:`X` and parameters :math:`\beta`
+      to be estimated.
+
+    Note that the first assumption implies
+    :math:`\mathrm{Var}[Y_i]=\frac{\phi}{w_i} v(\mu_i)` with uni variance
+    function :math:`v(\mu)`.
+
+    The fit itself does not need Y to be from an EDM, but only assumes
+    the first two moments :math:`E[Y_i]=\mu_i=h(\eta_i)` and
+    :math:`Var[Y_i]=\frac{\phi}{w_i} v(\mu_i)`
+
+    The parameters :math:`\beta` are estimated by maximum likelihood which is
+    equivalent to minimizing the deviance.
+
+    TODO: Estimation of the dispersion parameter phi.
+
+    TODO: Notes on 'scaled' Poisson and weights
+
+    Parameters
+    ----------
+    fit_intercept : boolean, optional, default True
+        whether to calculate the intercept for this model. If set
+        to False, no intercept will be used in calculations
+        (e.g. data is expected to be already centered).
+
+    family : ExponentialDispersionModel, optional, default NormalDistribution()
+        the distributional assumption of the GLM
+
+    link : Link, optional, default IdentityLink()
+        the link function (class) of the GLM
+
+    fit_dispersion : {None, 'chisqr', 'deviance'}, defaul 'chisqr'
+        method for estimation of the dispersion parameter phi. Whether to use
+        the chi squared statisic or the deviance statistic. If None, the
+        dispersion is not estimated.
+
+    solver : {'irls', 'newton-cg', 'lbfgs'}, defaul 'irls'
+        Algorithm to use in the optimization problem.
+
+        - 'irls' is iterated reweighted least squares. It is the standard
+            algorithm for GLMs.
+
+        - 'newton-cg', 'lbfgs'
+
+    max_iter : int, default 100
+        TODO
+
+    tol : float
+        Stopping criterion. For the irls, newton-cg and lbfgs solvers,
+        the iteration will stop when ``max{|g_i | i = 1, ..., n} <= tol``
+        where ``g_i`` is the i-th component of the gradient (derivative of
+        the deviance).
+
+    start_params : {array shape (n_features, ), 'ols'}, default None
+        sets the start values for coef_ in the fit.
+        If None, default values are taken.
+        If 'ols' the result of an ordinary least squares in the link space
+        (linear predictor) is taken.
+        If an array is given, these values are taken as coef_ to start with.
+        If fit_intercept is true, the first value is assumed to be the start
+        value for the intercept_.
+
+    verbose : int, default: 0
+        For the lbfgs solver set verbose to any positive
+        number for verbosity.
+
+    Attributes
+    ----------
+    coef_ : array, shape (1, n_features)
+        Estimated coefficients for the linear predictor (X*coef_) in the GLM.
+
+    intercept_ : float
+        Intercept (a.k.a. bias) added to linear predictor.
+
+    dispersion_ : float
+        The dispersion parameter :math:`\phi` if fit_dispersion is set.
+
+    n_iter_ : int
+        Actual number of iterations of the solver.
+
+    Notes
+    -----
+
+    References
+    ----------
+    TODO
+    """
+
+    def __init__(self, fit_intercept=True, family=NormalDistribution(),
+        link=IdentityLink(), fit_dispersion='chisqr', solver='irls', max_iter=100,
+        tol=1e-4, start_params=None, verbose=0):
+        self.fit_intercept = fit_intercept
+        self.family = family
+        self.link = link
+        self.fit_dispersion = fit_dispersion
+        self.solver = solver
+        self.max_iter = 100
+        self.tol = tol
+        self.start_params = start_params
+        self.verbose = verbose
+
+    def fit(self, X, y, weight=None):
+        """
+        Fit a generalized linear model.
+
+        Parameters
+        ----------
+        X : numpy array or sparse matrix of shape [n_samples,n_features]
+            Training data
+
+        y : numpy array of shape [n_samples]
+            Target values
+
+        weight : numpy array of shape [n_samples]
+            Individual weights for each sample.
+            Var[Y_i]=phi/weight_i * v(mu)
+            If Y_i ~ EDM(mu, phi/w_i) then
+            sum(w*Y)/sum(w) ~ EDM(mu, phi/sum(w))
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+        if not isinstance(self.family, ExponentialDispersionModel):
+            raise ValueError("The argument family must be an instance of class"
+                             "ExponentialDispersionModel.")
+        if not isinstance(self.fit_intercept, bool):
+            raise ValueError("The argument fit_intercept must be bool,"
+                             " got {0}".format(self.fit_intercept))
+        if not self.solver in ['irls', 'lbfgs', 'newton-cg']:
+            raise ValueError("GLM Regression supports only irls, lbfgs and"
+                             "newton-cg solvers, got {0}".format(self.solver))
+        if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:
+            raise ValueError("Maximum number of iteration must be positive;"
+                             " got (max_iter={0!r})".format(self.max_iter))
+        if not isinstance(self.tol, numbers.Number) or self.tol < 0:
+            raise ValueError("Tolerance for stopping criteria must be "
+                             "positive; got (tol={0!r})".format(self.tol))
+        start_params = self.start_params
+        if start_params is not None and start_params is not 'ols':
+            start_params = np.atleast_1d(start_params)
+            if start_params.shape[0] != X.shape[1] + self.fit_intercept:
+                raise ValueError("Start values for parameters must have the"
+                    "right length; required length {0}, got {1}".format(
+                    X.shape[1] + self.fit_intercept, start_params.shape[0]))
+
+        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
+                         y_numeric=True, multi_output=False)
+        y = y.astype(np.float64)
+
+        if not np.all(self.family.in_y_range(y)):
+            raise ValueError("Some value(s) of y are out of the valid "
+            "range for family {0}".format(self.family.__class__.__name__))
+
+        if weight is None:
+            weight = np.ones_like(y)
+        elif np.isscalar(weight):
+            weight = weight*np.ones_like(y)
+        else:
+            weight = np.atleast_1d(weight)
+            if weight.ndim > 1:
+                raise ValueError("Weights must be 1D array or scalar")
+            elif weight.shape[0] != y.shape[0]:
+                raise ValueError("Weights must have the same length as y")
+
+
+        if self.fit_intercept:
+            #intercept is first column <=> coef[0] is for intecept
+            if sparse.issparse(X):
+                Xnew = sparse.hstack([np.ones([X.shape[0],1]), X])
+            else:
+                Xnew = np.concatenate((np.ones((X.shape[0],1)), X), axis=1)
+        else:
+            Xnew = X
+
+        n_samples, n_features = Xnew.shape
+
+        #Note: Since phi does not enter the estimation of mu_i=E[y_i]
+        #      set it to 1 where convenient.
+
+        #set start values for coef
+        coef = None
+        if start_params is None:
+            #Use mu_start and apply one irls step to calculate coef
+            mu = self.family.starting_mu(y, weight)
+            #linear predictor
+            eta = self.link.link(mu)
+            #h'(eta)
+            hp = self.link.inverse_derivative(eta)
+            #working weights w, in principle a diagonal matrix
+            #therefore here just as 1d array
+            w = (hp**2 / self.family.variance(mu, phi=1, weight=weight))
+            wroot = np.sqrt(w)
+            #working observations
+            yw = eta + (y-mu)/hp
+            #least squares rescaled with wroot
+            wroot = sparse.dia_matrix((wroot, 0), shape=(n_samples, n_samples))
+            X_rescale = safe_sparse_dot(wroot, Xnew, dense_output=True)
+            yw_rescale = safe_sparse_dot(wroot, y, dense_output=True)
+            coef = linalg.lstsq(X_rescale, yw_rescale)[0]
+        elif start_params is 'ols':
+            reg = LinearRegression(copy_X=False,
+                fit_intercept=False)
+            reg.fit(Xnew, self.link.link(y))
+            coef = reg.coef_
+        else:
+            coef = start_params
+
+        #algorithms for optimiation
+        #TODO: Parallelize it
+        self.n_iter_ = 0
+        converged = False
+        if self.solver == 'irls':
+            #linear predictor
+            eta = safe_sparse_dot(Xnew, coef, dense_output=True)
+            mu = self.link.inverse(eta)
+            while self.n_iter_ < self.max_iter:
+                self.n_iter_ += 1
+                #coef_old not used so far.
+                #coef_old = coef
+                #h'(eta)
+                hp = self.link.inverse_derivative(eta)
+                #working weights w, in principle a diagonal matrix
+                #therefore here just as 1d array
+                w = (hp**2 / self.family.variance(mu, phi=1, weight=weight))
+                wroot = np.sqrt(w)
+                #working observations
+                yw = eta + (y-mu)/hp
+                #least squares rescaled with wroot
+                wroot = sparse.dia_matrix((wroot, 0),
+                    shape=(n_samples, n_samples))
+                X_rescale = safe_sparse_dot(wroot, Xnew, dense_output=True)
+                yw_rescale = safe_sparse_dot(wroot, yw, dense_output=True)
+                coef, residues, rank, singular_ =  (
+                    linalg.lstsq(X_rescale, yw_rescale))
+
+                #updated linear predictor
+                #do it here for updated values for tolerance
+                eta = safe_sparse_dot(Xnew, coef, dense_output=True)
+                mu = self.link.inverse(eta)
+
+                #which tolerace? |coef - coef_old| or gradient?
+                #use gradient for compliance with newton-cg and lbfgs
+                #TODO: faster computation of gradient, use mu and eta directly
+                gradient = self.family._deviance_derivative(coef=coef,
+                    X=Xnew, y=y, weight=weight, link=self.link)
+                if (np.max(np.abs(gradient)) <= self.tol):
+                    converged = True
+                    break
+
+            if not converged:
+                warnings.warn("irls failed to converge. Increase the number "
+                    "of iterations (currently {0})".format(self.max_iter))
+
+        #TODO: performance: make one function return both deviance and gradient
+        elif self.solver == 'lbfgs':
+            func = self.family._deviance
+            fprime = self.family._deviance_derivative
+            args = (Xnew, y, weight, self.link)
+            coef, loss, info = optimize.fmin_l_bfgs_b(
+                func, coef, fprime=fprime,
+                args=args,
+                iprint=(self.verbose > 0) - 1, pgtol=self.tol,
+                maxiter=self.max_iter)
+            if self.verbose > 0:
+                if info["warnflag"] == 1:
+                    warnings.warn("lbfgs failed to converge."
+                        " Increase the number of iterations.")
+                elif info["warnflag"] == 2:
+                    warnings.warn("lbfgs failed for the reason: {0}".format(
+                        info["task"]))
+            self.n_iter_ = info['nit']
+        elif self.solver == 'newton-cg':
+            func = self.family._deviance
+            grad = self.family._deviance_derivative
+            def grad_hess(coef, X, y, weight, link):
+                grad = (self.family
+                    ._deviance_derivative(coef, X, y, weight, link))
+                hessian = (self.family
+                    ._deviance_hessian(coef, X, y, weight,link))
+                def Hs(s):
+                    ret = np.dot(hessian, s)
+                    return ret
+                return grad, Hs
+            hess = grad_hess
+            args = (Xnew, y, weight, self.link)
+            coef, n_iter_i = newton_cg(hess, func, grad, coef, args=args,
+                maxiter=self.max_iter, tol=self.tol)
+            self.coef_ = coef
+
+        if self.fit_intercept is True:
+            self.intercept_ = coef[0]
+            self.coef_ = coef[1:]
+        else:
+            self.coef_ = coef
+
+        if self.fit_dispersion in ['chisqr', 'deviance']:
+            self.dispersion_ = self.estimate_phi(y, X, weight)
+
+        return self
+
+    def predict(self, X, weight=1):
+        check_is_fitted(self, "coef_")
+        eta = safe_sparse_dot(X, self.coef_, dense_output=True)
+        if self.fit_intercept is True:
+            eta += self.intercept_
+        mu = self.link.inverse(eta)
+        return mu*weight
+
+    def estimate_phi(self, y, X, weight):
+        n_samples, n_features = X.shape
+        eta = safe_sparse_dot(X, self.coef_, dense_output=True)
+        if self.fit_intercept is True:
+            eta += self.intercept_
+        mu = self.link.inverse(eta)
+        if self.fit_dispersion == 'chisqr':
+            chisq = np.sum(weight*(y-mu)**2/self.family.unit_variance(mu))
+            return chisq/(n_samples - n_features)
+        elif self.fit_dispersion == 'deviance':
+            dev = self.family.deviance(y, mu, weight)
+            return dev/(n_samples - n_features)
+
+    def score(self, X, y, weight=1):
+        """The natural score for a GLM is -deviance.
+        Returns the weight averaged negitive deviance (the better the score,
+        the better the fit). Maximum score is therefore 0.
+        """
+        #RegressorMixin has R^2 score.
+        #TODO: Make it more compatible with the score function in
+        #      sklearn.metrics.regression.py
+        eta = safe_sparse_dot(X, self.coef_, dense_output=True)
+        if self.fit_intercept is True:
+            eta += self.intercept_
+        mu = self.link.inverse(eta)
+        output_errors = self.family.unit_deviance(y,mu)
+        weight = weight * np.ones_like(y)
+        return np.average(output_errors, weights=weight)
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
new file mode 100644
index 0000000000000..2a88a7ec899d9
--- /dev/null
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -0,0 +1,73 @@
+import numpy as np
+
+from sklearn.linear_model.glm import (Link, IdentityLink, LogLink,
+                 ExponentialDispersionModel, TweedieDistribution,
+                 NormalDistribution, GaussianDistribution,
+                 PoissonDistribution, GammaDistribution,
+                 InverseGaussianDistribution, GeneralizedHyperbolicSecand,
+                 GeneralizedLinearModel)
+
+from sklearn.utils.testing import (assert_equal, assert_array_equal,
+    assert_array_almost_equal)
+
+def test_family_bounds():
+    """Test the valid range of distributions
+    """
+    family = NormalDistribution()
+    result = family.in_y_range([-1,0,1])
+    assert_array_equal(result, [True, True, True])
+
+    family = PoissonDistribution()
+    result = family.in_y_range([-1,0,1])
+    assert_array_equal(result, [False, True, True])
+
+    family = TweedieDistribution(power=1.5)
+    result = family.in_y_range([-1,0,1])
+    assert_array_equal(result, [False, True, True])
+
+    family = GammaDistribution()
+    result = family.in_y_range([-1,0,1])
+    assert_array_equal(result, [False, False, True])
+
+    family = InverseGaussianDistribution()
+    result = family.in_y_range([-1,0,1])
+    assert_array_equal(result, [False, False, True])
+
+    family = TweedieDistribution(power=4.5)
+    result = family.in_y_range([-1,0,1])
+    assert_array_equal(result, [False, False, True])
+
+def test_glm_identiy_regression():
+    """Test linear regression on a simple dataset
+    """
+    coef = [1,2]
+    X = np.array([[1,1,1,1,1],[0,1,2,3,4]]).T
+    y = np.dot(X, coef)
+    for solver in ['irls', 'lbfgs', 'newton-cg']:
+        for family in (GaussianDistribution(), PoissonDistribution(),
+            GammaDistribution(), InverseGaussianDistribution(),
+            TweedieDistribution(power=1.5), TweedieDistribution(power=4.5)):
+            glm = GeneralizedLinearModel(family=family,
+                fit_intercept=False, solver=solver)
+            res = glm.fit(X, y)
+            assert_array_almost_equal(res.coef_, coef)
+
+def test_glm_log_regression():
+    """Test linear regression on a simple dataset
+    """
+    coef = [1,2]
+    X = np.array([[1,1,1,1,1],[0,1,2,3,4]]).T
+    y = np.exp(np.dot(X, coef))
+    #for solver in ['irls', 'lbfgs', 'newton-cg']:
+    for solver in ['irls']:
+        #for family in [GaussianDistribution(), PoissonDistribution(),
+        #    GammaDistribution(), InverseGaussianDistribution(),
+        #    TweedieDistribution(power=1.5), TweedieDistribution(power=4.5)]:
+        for family in [GaussianDistribution()]:
+            glm = GeneralizedLinearModel(family=family,
+                link=LogLink(),
+                fit_intercept=False, solver=solver, start_params='ols')
+            res = glm.fit(X, y)
+            assert_array_almost_equal(res.coef_, coef)
+
+#TODO: Test compatibility with R's glm, glmnet

From 2fc189d8351c9710c1329750545539afe3e6e40c Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Wed, 19 Jul 2017 17:33:04 +0200
Subject: [PATCH 002/269] [WIP] Add Generalized Linear Models (#9405)

* Fixed pep8
* Fixed flake8
* Rename GeneralizedLinearModel as GeneralizedLinearRegressor
* Use of six.with_metaclass
* PEP257: summary should be on same line as quotes
* Docstring of class GeneralizedLinearRegressor: \ before mu
* Arguments family and link accept strings
* Use of ConvergenceWarning
---
 sklearn/linear_model/__init__.py       |  13 +-
 sklearn/linear_model/glm.py            | 231 ++++++++++++++-----------
 sklearn/linear_model/tests/test_glm.py |  75 ++++----
 3 files changed, 180 insertions(+), 139 deletions(-)

diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index 5acc51e9dc87f..0c5840f343a3a 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -18,12 +18,8 @@
                                  lasso_path, enet_path, MultiTaskLasso,
                                  MultiTaskElasticNet, MultiTaskElasticNetCV,
                                  MultiTaskLassoCV)
-from .glm import (Link, IdentityLink, LogLink,
-                 ExponentialDispersionModel, TweedieDistribution,
-                 NormalDistribution, GaussianDistribution,
-                 PoissonDistribution, GammaDistribution,
-                 InverseGaussianDistribution, GeneralizedHyperbolicSecand,
-                 GeneralizedLinearModel)
+from .glm import (TweedieDistribution,
+                  GeneralizedLinearRegressor)
 from .huber import HuberRegressor
 from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
 from .stochastic_gradient import SGDClassifier, SGDRegressor
@@ -44,7 +40,6 @@
            'BayesianRidge',
            'ElasticNet',
            'ElasticNetCV',
-           'GeneralizedLinearModel',
            'Hinge',
            'Huber',
            'HuberRegressor',
@@ -84,4 +79,6 @@
            'orthogonal_mp',
            'orthogonal_mp_gram',
            'ridge_regression',
-           'RANSACRegressor']
+           'RANSACRegressor',
+           'GeneralizedLinearRegressor',
+           'TweedieDistribution']
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 8b6eb8f3bf16c..cf91a64fafc12 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -5,9 +5,6 @@
 # Author: Christian Lorentzen <lorentzen.ch@googlemail.ch>
 # License: BSD 3 clause
 
-# TODO: Which name? GeneralizedLinearModel vs GeneralizedLinearRegression.
-#       So far, it is GeneralizedLinearModel, since it could very easily
-#       extended by Bernoulli/Binomial distribution.
 # TODO: Which name/symbol for coefficients and weights in docu?
 #       sklearn.linear_models uses w for coefficients.
 #       So far, coefficients=beta and weight=w (as standard literature)
@@ -17,6 +14,10 @@
 # TODO: Write docu and examples
 
 # Design Decisions:
+# - Which name? GeneralizedLinearModel vs GeneralizedLinearRegressor.
+#   So far, it is GeneralizedLinearModel, since it could very easily
+#   extended by Bernoulli/Binomial distribution.
+#   Solution: GeneralizedLinearRegressor
 # - The link funtion (instance of class Link) is necessary for the evaluation
 #   of deviance, score, Fisher and Hessian matrix as functions of the
 #   coefficients, which is needed by optimizers.
@@ -28,16 +29,17 @@
 import numpy as np
 from scipy import linalg, optimize, sparse
 import warnings
-from .base import LinearModel, LinearRegression
-from ..base import RegressorMixin
+from .base import LinearRegression
+from ..base import BaseEstimator, RegressorMixin
+from ..exceptions import ConvergenceWarning
+from ..externals import six
 from ..utils import check_X_y
 from ..utils.extmath import safe_sparse_dot
 from ..utils.optimize import newton_cg
 from ..utils.validation import check_is_fitted
 
 
-
-class Link(metaclass=ABCMeta):
+class Link(six.with_metaclass(ABCMeta)):
     """Abstract base class for Link funtions
     """
 
@@ -73,6 +75,7 @@ def inverse_derivative2(self, lin_pred):
         """
         raise NotImplementedError
 
+
 class IdentityLink(Link):
     """The identity link function g(x)=x.
     """
@@ -113,7 +116,7 @@ def inverse_derivative2(self, lin_pred):
         return np.exp(lin_pred)
 
 
-class ExponentialDispersionModel(metaclass=ABCMeta):
+class ExponentialDispersionModel(six.with_metaclass(ABCMeta)):
     """Base class for reproductive Exponential Dispersion Models (EDM).
 
     The pdf of :math:`Y\sim \mathrm{EDM}(\mu, \phi)` is given by
@@ -235,7 +238,7 @@ def deviance(self, y, mu, weight=1):
         \left(loglike(y,\mu,\frac{phi}{w})
         - loglike(y,y,\frac{phi}{w})\right).`
         """
-        return np.sum(weight*self.unit_deviance(y,mu))
+        return np.sum(weight*self.unit_deviance(y, mu))
 
     def _deviance(self, coef, X, y, weight, link):
         """The deviance as a function of the coefficients ``coef``
@@ -248,7 +251,7 @@ def _deviance(self, coef, X, y, weight, link):
     def deviance_derivative(self, y, mu, weight=1):
         """The derivative w.r.t. mu of the deviance.`
         """
-        return weight*self.unit_deviance_derivative(y,mu)
+        return weight*self.unit_deviance_derivative(y, mu)
 
     def _score(self, coef, phi, X, y, weight, link):
         """The score function :math:`s` is the derivative of the
@@ -269,7 +272,7 @@ def _score(self, coef, phi, X, y, weight, link):
         sigma_inv = 1/self.variance(mu, phi=phi, weight=weight)
         d = link.inverse_derivative(lin_pred)
         d_sigma_inv = sparse.dia_matrix((sigma_inv*d, 0),
-            shape=(n_samples, n_samples))
+                                        shape=(n_samples, n_samples))
         temp = safe_sparse_dot(d_sigma_inv, (y-mu), dense_output=False)
         score = safe_sparse_dot(X.T, temp, dense_output=False)
         return score
@@ -294,7 +297,7 @@ def _fisher_matrix(self, coef, phi, X, y, weight, link):
         sigma_inv = 1/self.variance(mu, phi=phi, weight=weight)
         d2 = link.inverse_derivative(lin_pred)**2
         d2_sigma_inv = sparse.dia_matrix((sigma_inv*d2, 0),
-            shape=(n_samples, n_samples))
+                                         shape=(n_samples, n_samples))
         temp = safe_sparse_dot(d2_sigma_inv, X, dense_output=False)
         fisher_matrix = safe_sparse_dot(X.T, temp, dense_output=False)
         return fisher_matrix
@@ -329,7 +332,7 @@ def _observed_information(self, coef, phi, X, y, weight, link):
         v = self.unit_variance_derivative(mu)/self.unit_variance(mu)
         r = y - mu
         temp = sparse.dia_matrix((sigma_inv*(-dp*r+d2*v*r+d2), 0),
-            shape=(n_samples, n_samples))
+                                 shape=(n_samples, n_samples))
         temp = safe_sparse_dot(temp, X, dense_output=False)
         observed_information = safe_sparse_dot(X.T, temp, dense_output=False)
         return observed_information
@@ -341,7 +344,7 @@ def _deviance_derivative(self, coef, X, y, weight, link):
         :math:`s` (derivative of the log-likelihood).
         """
         score = self._score(coef=coef, phi=1, X=X, y=y, weight=weight,
-            link=link)
+                            link=link)
         return -2*score
 
     def _deviance_hessian(self, coef, X, y, weight, link):
@@ -350,8 +353,8 @@ def _deviance_hessian(self, coef, X, y, weight, link):
         This is equivalent to :math:`+2\phi` times the observed information
         matrix.
         """
-        info_matrix = self._observed_information(coef=coef, phi=1,
-            X=X, y=y, weight=weight, link=link)
+        info_matrix = self._observed_information(coef=coef, phi=1, X=X, y=y,
+                                                 weight=weight, link=link)
         return 2*info_matrix
 
     def starting_mu(self, y, weight=1):
@@ -374,39 +377,39 @@ def __init__(self, power=0):
         self._upper_bound = np.Inf
         self._upper_compare = lambda x: np.less(x, self.upper_bound)
         if power < 0:
-            #Extreme Stable
+            # Extreme Stable
             self._lower_bound = -np.Inf
             self._lower_compare = lambda x: np.greater(x, self.lower_bound)
         elif power == 0:
-            #GaussianDistribution
+            # GaussianDistribution
             self._lower_bound = -np.Inf
             self._lower_compare = lambda x: np.greater(x, self.lower_bound)
         elif (power > 0) and (power < 1):
             raise ValueError('For 0<power<1, no distribution exists.')
         elif power == 1:
-            #PoissonDistribution
+            # PoissonDistribution
             self._lower_bound = 0
             self._lower_compare = (
                 lambda x: np.greater_equal(x, self.lower_bound))
         elif (power > 1) and (power < 2):
-            #Compound Poisson
+            # Compound Poisson
             self._lower_bound = 0
             self._lower_compare = (
                 lambda x: np.greater_equal(x, self.lower_bound))
         elif power == 2:
-            #GammaDistribution
+            # GammaDistribution
             self._lower_bound = 0
             self._lower_compare = lambda x: np.greater(x, self.lower_bound)
         elif (power > 2) and (power < 3):
-            #Positive Stable
+            # Positive Stable
             self._lower_bound = 0
             self._lower_compare = lambda x: np.greater(x, self.lower_bound)
         elif power == 3:
-            #InverseGaussianDistribution
+            # InverseGaussianDistribution
             self._lower_bound = 0
             self._lower_compare = lambda x: np.greater(x, self.lower_bound)
         elif power > 3:
-            #Positive Stable
+            # Positive Stable
             self._lower_bound = 0
             self._lower_compare = lambda x: np.greater(x, self.lower_bound)
 
@@ -418,7 +421,7 @@ def power(self):
     def power(self, power):
         if not isinstance(power, numbers.Real):
             raise TypeError('power must be a real number, input was {0}'
-                .format(power))
+                            .format(power))
         self._power = power
 
     @property
@@ -446,19 +449,19 @@ def unit_variance_derivative(self, mu):
     def unit_deviance(self, y, mu):
         p = self.power
         if p == 0:
-            #NormalDistribution
+            # NormalDistribution
             return (y-mu)**2
         if p == 1:
-            #PoissonDistribution
-            return 2 * (np.where(y==0,0,y*np.log(y/mu))-y+mu)
+            # PoissonDistribution
+            return 2 * (np.where(y == 0, 0, y*np.log(y/mu))-y+mu)
         elif p == 2:
-            #GammaDistribution
+            # GammaDistribution
             return 2 * (np.log(mu/y)+y/mu-1)
         else:
-            #return 2 * (np.maximum(y,0)**(2-p)/((1-p)*(2-p))
+            # return 2 * (np.maximum(y,0)**(2-p)/((1-p)*(2-p))
             #    - y*mu**(1-p)/(1-p) + mu**(2-p)/(2-p))
-            return 2 * (np.power(np.maximum(y,0), 2-p)/((1-p)*(2-p))
-                - y*np.power(mu, 1-p)/(1-p) + np.power(mu, 2-p)/(2-p))
+            return 2 * (np.power(np.maximum(y, 0), 2-p)/((1-p)*(2-p)) -
+                        y*np.power(mu, 1-p)/(1-p) + np.power(mu, 2-p)/(2-p))
 
     def likelihood(self, y, X, beta, phi, weight=1):
         raise NotImplementedError('This function is not (yet) implemented.')
@@ -469,23 +472,25 @@ class NormalDistribution(TweedieDistribution):
     def __init__(self):
         super(NormalDistribution, self).__init__(power=0)
 
-GaussianDistribution = NormalDistribution
 
 class PoissonDistribution(TweedieDistribution):
     """Class for the scaled Poisson distribution"""
     def __init__(self):
         super(PoissonDistribution, self).__init__(power=1)
 
+
 class GammaDistribution(TweedieDistribution):
     """Class for the Gamma distribution"""
     def __init__(self):
         super(GammaDistribution, self).__init__(power=2)
 
+
 class InverseGaussianDistribution(TweedieDistribution):
     """Class for the scaled InverseGaussianDistribution distribution"""
     def __init__(self):
         super(InverseGaussianDistribution, self).__init__(power=3)
 
+
 class GeneralizedHyperbolicSecand(ExponentialDispersionModel):
     """A class for the von Generalized Hyperbolic Secand (GHS) distribution.
 
@@ -516,12 +521,11 @@ def unit_variance_derivative(self, mu):
         return 2*mu
 
     def unit_deviance(self, y, mu):
-        return (2*y*(np.arctan(y) - np.arctan(mu))
-            + np.log((1+mu**2)/(1+y**2)))
-
+        return (2*y*(np.arctan(y) - np.arctan(mu)) +
+                np.log((1+mu**2)/(1+y**2)))
 
 
-class GeneralizedLinearModel(LinearModel, RegressorMixin):
+class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     """
     Class to fit a Generalized Linear Model (GLM) based on reproductive
     Exponential Dispersion Models (EDM).
@@ -531,7 +535,7 @@ class GeneralizedLinearModel(LinearModel, RegressorMixin):
     - The target values y_i are realizations of random variables
       :math:`Y_i \sim \mathrm{EDM}(\mu_i, \frac{\phi}{w_i})` with dispersion
       parameter :math:`\phi` and weights :math:`w_i`.
-    - The expectation of :math:`Y_i` is :math:`mu_i=\mathrm{E}[Y]=h(\eta_i)`
+    - The expectation of :math:`Y_i` is :math:`\mu_i=\mathrm{E}[Y]=h(\eta_i)`
       whith the linear predictor :math:`\eta=X*\beta`, inverse link function
       :math:`h(\eta)`, design matrix :math:`X` and parameters :math:`\beta`
       to be estimated.
@@ -549,7 +553,9 @@ class GeneralizedLinearModel(LinearModel, RegressorMixin):
 
     TODO: Estimation of the dispersion parameter phi.
 
-    TODO: Notes on 'scaled' Poisson and weights
+    TODO: Notes on weights and 'scaled' Poisson, e.g. fit y = x/w with
+    with x=counts and w=exposure (time, money, persons, ...) => y is a
+    ratio with weights w.
 
     Parameters
     ----------
@@ -558,10 +564,12 @@ class GeneralizedLinearModel(LinearModel, RegressorMixin):
         to False, no intercept will be used in calculations
         (e.g. data is expected to be already centered).
 
-    family : ExponentialDispersionModel, optional, default NormalDistribution()
-        the distributional assumption of the GLM
+    family : {'normal', 'poisson', 'gamma', 'inverse.gaussian'} or an instance
+        of a subclass of ExponentialDispersionModel, optional, default 'normal'
+        the distributional assumption of the GLM.
 
-    link : Link, optional, default IdentityLink()
+    link : {'identity', 'log'} or an instance of a subclass of Link,
+        optional, default IdentityLink()
         the link function (class) of the GLM
 
     fit_dispersion : {None, 'chisqr', 'deviance'}, defaul 'chisqr'
@@ -622,8 +630,8 @@ class GeneralizedLinearModel(LinearModel, RegressorMixin):
     """
 
     def __init__(self, fit_intercept=True, family=NormalDistribution(),
-        link=IdentityLink(), fit_dispersion='chisqr', solver='irls', max_iter=100,
-        tol=1e-4, start_params=None, verbose=0):
+                 link=IdentityLink(), fit_dispersion='chisqr', solver='irls',
+                 max_iter=100, tol=1e-4, start_params=None, verbose=0):
         self.fit_intercept = fit_intercept
         self.family = family
         self.link = link
@@ -635,8 +643,7 @@ def __init__(self, fit_intercept=True, family=NormalDistribution(),
         self.verbose = verbose
 
     def fit(self, X, y, weight=None):
-        """
-        Fit a generalized linear model.
+        """Fit a generalized linear model.
 
         Parameters
         ----------
@@ -657,12 +664,32 @@ def fit(self, X, y, weight=None):
         self : returns an instance of self.
         """
         if not isinstance(self.family, ExponentialDispersionModel):
-            raise ValueError("The argument family must be an instance of class"
-                             "ExponentialDispersionModel.")
+            if self.family == 'normal':
+                self.family = NormalDistribution()
+            elif self.family == 'poisson':
+                self.family = PoissonDistribution()
+            elif self.family == 'gamma':
+                self.family = GammaDistribution()
+            elif self.family == 'inverse.gaussian':
+                self.family = InverseGaussianDistribution()
+            else:
+                raise ValueError(
+                    "The argument family must be an instance of class"
+                    " ExponentialDispersionModel or an element of"
+                    " ['normal', 'poisson', 'gamma', 'inverse.gaussian'].")
+        if not isinstance(self.link, Link):
+            if self.link == 'identity':
+                self.link = IdentityLink()
+            if self.link == 'log':
+                self.link = LogLink()
+            else:
+                raise ValueError(
+                    "The argument link must be an instance of class Link or"
+                    " an element of ['identity', 'log'].")
         if not isinstance(self.fit_intercept, bool):
             raise ValueError("The argument fit_intercept must be bool,"
                              " got {0}".format(self.fit_intercept))
-        if not self.solver in ['irls', 'lbfgs', 'newton-cg']:
+        if self.solver not in ['irls', 'lbfgs', 'newton-cg']:
             raise ValueError("GLM Regression supports only irls, lbfgs and"
                              "newton-cg solvers, got {0}".format(self.solver))
         if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:
@@ -676,8 +703,9 @@ def fit(self, X, y, weight=None):
             start_params = np.atleast_1d(start_params)
             if start_params.shape[0] != X.shape[1] + self.fit_intercept:
                 raise ValueError("Start values for parameters must have the"
-                    "right length; required length {0}, got {1}".format(
-                    X.shape[1] + self.fit_intercept, start_params.shape[0]))
+                                 "right length; required length {0}, got {1}"
+                                 .format(X.shape[1] + self.fit_intercept,
+                                         start_params.shape[0]))
 
         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
                          y_numeric=True, multi_output=False)
@@ -685,7 +713,8 @@ def fit(self, X, y, weight=None):
 
         if not np.all(self.family.in_y_range(y)):
             raise ValueError("Some value(s) of y are out of the valid "
-            "range for family {0}".format(self.family.__class__.__name__))
+                             "range for family {0}"
+                             .format(self.family.__class__.__name__))
 
         if weight is None:
             weight = np.ones_like(y)
@@ -698,96 +727,96 @@ def fit(self, X, y, weight=None):
             elif weight.shape[0] != y.shape[0]:
                 raise ValueError("Weights must have the same length as y")
 
-
         if self.fit_intercept:
-            #intercept is first column <=> coef[0] is for intecept
+            # intercept is first column <=> coef[0] is for intecept
             if sparse.issparse(X):
-                Xnew = sparse.hstack([np.ones([X.shape[0],1]), X])
+                Xnew = sparse.hstack([np.ones([X.shape[0], 1]), X])
             else:
-                Xnew = np.concatenate((np.ones((X.shape[0],1)), X), axis=1)
+                Xnew = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
         else:
             Xnew = X
 
         n_samples, n_features = Xnew.shape
 
-        #Note: Since phi does not enter the estimation of mu_i=E[y_i]
-        #      set it to 1 where convenient.
+        # Note: Since dispersion_ alias phi does not enter the estimation
+        #       of mu_i=E[y_i] set it to 1 where convenient.
 
-        #set start values for coef
+        # set start values for coef
         coef = None
         if start_params is None:
-            #Use mu_start and apply one irls step to calculate coef
+            # Use mu_start and apply one irls step to calculate coef
             mu = self.family.starting_mu(y, weight)
-            #linear predictor
+            # linear predictor
             eta = self.link.link(mu)
-            #h'(eta)
+            # h'(eta)
             hp = self.link.inverse_derivative(eta)
-            #working weights w, in principle a diagonal matrix
-            #therefore here just as 1d array
+            # working weights w, in principle a diagonal matrix
+            # therefore here just as 1d array
             w = (hp**2 / self.family.variance(mu, phi=1, weight=weight))
             wroot = np.sqrt(w)
-            #working observations
+            # working observations
             yw = eta + (y-mu)/hp
-            #least squares rescaled with wroot
+            # least squares rescaled with wroot
             wroot = sparse.dia_matrix((wroot, 0), shape=(n_samples, n_samples))
             X_rescale = safe_sparse_dot(wroot, Xnew, dense_output=True)
             yw_rescale = safe_sparse_dot(wroot, y, dense_output=True)
             coef = linalg.lstsq(X_rescale, yw_rescale)[0]
         elif start_params is 'ols':
-            reg = LinearRegression(copy_X=False,
-                fit_intercept=False)
+            reg = LinearRegression(copy_X=False, fit_intercept=False)
             reg.fit(Xnew, self.link.link(y))
             coef = reg.coef_
         else:
             coef = start_params
 
-        #algorithms for optimiation
-        #TODO: Parallelize it
+        # algorithms for optimiation
+        # TODO: Parallelize it
         self.n_iter_ = 0
         converged = False
         if self.solver == 'irls':
-            #linear predictor
+            # linear predictor
             eta = safe_sparse_dot(Xnew, coef, dense_output=True)
             mu = self.link.inverse(eta)
             while self.n_iter_ < self.max_iter:
                 self.n_iter_ += 1
-                #coef_old not used so far.
-                #coef_old = coef
-                #h'(eta)
+                # coef_old not used so far.
+                # coef_old = coef
+                # h'(eta)
                 hp = self.link.inverse_derivative(eta)
-                #working weights w, in principle a diagonal matrix
-                #therefore here just as 1d array
+                # working weights w, in principle a diagonal matrix
+                # therefore here just as 1d array
                 w = (hp**2 / self.family.variance(mu, phi=1, weight=weight))
                 wroot = np.sqrt(w)
-                #working observations
+                # working observations
                 yw = eta + (y-mu)/hp
-                #least squares rescaled with wroot
+                # least squares rescaled with wroot
                 wroot = sparse.dia_matrix((wroot, 0),
-                    shape=(n_samples, n_samples))
+                                          shape=(n_samples, n_samples))
                 X_rescale = safe_sparse_dot(wroot, Xnew, dense_output=True)
                 yw_rescale = safe_sparse_dot(wroot, yw, dense_output=True)
-                coef, residues, rank, singular_ =  (
+                coef, residues, rank, singular_ = (
                     linalg.lstsq(X_rescale, yw_rescale))
 
-                #updated linear predictor
-                #do it here for updated values for tolerance
+                # updated linear predictor
+                # do it here for updated values for tolerance
                 eta = safe_sparse_dot(Xnew, coef, dense_output=True)
                 mu = self.link.inverse(eta)
 
-                #which tolerace? |coef - coef_old| or gradient?
-                #use gradient for compliance with newton-cg and lbfgs
-                #TODO: faster computation of gradient, use mu and eta directly
-                gradient = self.family._deviance_derivative(coef=coef,
-                    X=Xnew, y=y, weight=weight, link=self.link)
+                # which tolerace? |coef - coef_old| or gradient?
+                # use gradient for compliance with newton-cg and lbfgs
+                # TODO: faster computation of gradient, use mu and eta directly
+                gradient = self.family._deviance_derivative(
+                    coef=coef, X=Xnew, y=y, weight=weight, link=self.link)
                 if (np.max(np.abs(gradient)) <= self.tol):
                     converged = True
                     break
 
             if not converged:
                 warnings.warn("irls failed to converge. Increase the number "
-                    "of iterations (currently {0})".format(self.max_iter))
+                              "of iterations (currently {0})"
+                              .format(self.max_iter), ConvergenceWarning)
 
-        #TODO: performance: make one function return both deviance and gradient
+        # TODO: performance: make one function return both deviance and
+        #       gradient of deviance
         elif self.solver == 'lbfgs':
             func = self.family._deviance
             fprime = self.family._deviance_derivative
@@ -800,7 +829,8 @@ def fit(self, X, y, weight=None):
             if self.verbose > 0:
                 if info["warnflag"] == 1:
                     warnings.warn("lbfgs failed to converge."
-                        " Increase the number of iterations.")
+                                  " Increase the number of iterations.",
+                                  ConvergenceWarning)
                 elif info["warnflag"] == 2:
                     warnings.warn("lbfgs failed for the reason: {0}".format(
                         info["task"]))
@@ -808,11 +838,13 @@ def fit(self, X, y, weight=None):
         elif self.solver == 'newton-cg':
             func = self.family._deviance
             grad = self.family._deviance_derivative
+
             def grad_hess(coef, X, y, weight, link):
-                grad = (self.family
-                    ._deviance_derivative(coef, X, y, weight, link))
-                hessian = (self.family
-                    ._deviance_hessian(coef, X, y, weight,link))
+                grad = (self.family._deviance_derivative(
+                    coef, X, y, weight, link))
+                hessian = (self.family._deviance_hessian(
+                    coef, X, y, weight, link))
+
                 def Hs(s):
                     ret = np.dot(hessian, s)
                     return ret
@@ -820,7 +852,7 @@ def Hs(s):
             hess = grad_hess
             args = (Xnew, y, weight, self.link)
             coef, n_iter_i = newton_cg(hess, func, grad, coef, args=args,
-                maxiter=self.max_iter, tol=self.tol)
+                                       maxiter=self.max_iter, tol=self.tol)
             self.coef_ = coef
 
         if self.fit_intercept is True:
@@ -835,6 +867,9 @@ def Hs(s):
         return self
 
     def predict(self, X, weight=1):
+        """Prediction with features X.
+        If weights are given, returns prediction*weights.
+        """
         check_is_fitted(self, "coef_")
         eta = safe_sparse_dot(X, self.coef_, dense_output=True)
         if self.fit_intercept is True:
@@ -860,13 +895,13 @@ def score(self, X, y, weight=1):
         Returns the weight averaged negitive deviance (the better the score,
         the better the fit). Maximum score is therefore 0.
         """
-        #RegressorMixin has R^2 score.
-        #TODO: Make it more compatible with the score function in
+        # RegressorMixin has R^2 score.
+        # TODO: Make it more compatible with the score function in
         #      sklearn.metrics.regression.py
         eta = safe_sparse_dot(X, self.coef_, dense_output=True)
         if self.fit_intercept is True:
             eta += self.intercept_
         mu = self.link.inverse(eta)
-        output_errors = self.family.unit_deviance(y,mu)
+        output_errors = self.family.unit_deviance(y, mu)
         weight = weight * np.ones_like(y)
         return np.average(output_errors, weights=weight)
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 2a88a7ec899d9..a4d4ea8650860 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -1,73 +1,82 @@
 import numpy as np
 
-from sklearn.linear_model.glm import (Link, IdentityLink, LogLink,
-                 ExponentialDispersionModel, TweedieDistribution,
-                 NormalDistribution, GaussianDistribution,
-                 PoissonDistribution, GammaDistribution,
-                 InverseGaussianDistribution, GeneralizedHyperbolicSecand,
-                 GeneralizedLinearModel)
+from sklearn.linear_model.glm import (
+    # Link, IdentityLink,
+    LogLink,
+    TweedieDistribution,
+    NormalDistribution, PoissonDistribution,
+    GammaDistribution, InverseGaussianDistribution,
+    # GeneralizedHyperbolicSecand,
+    GeneralizedLinearRegressor)
+
+from sklearn.utils.testing import (
+    # assert_equal,
+    assert_array_equal, assert_array_almost_equal)
 
-from sklearn.utils.testing import (assert_equal, assert_array_equal,
-    assert_array_almost_equal)
 
 def test_family_bounds():
     """Test the valid range of distributions
     """
     family = NormalDistribution()
-    result = family.in_y_range([-1,0,1])
+    result = family.in_y_range([-1, 0, 1])
     assert_array_equal(result, [True, True, True])
 
     family = PoissonDistribution()
-    result = family.in_y_range([-1,0,1])
+    result = family.in_y_range([-1, 0, 1])
     assert_array_equal(result, [False, True, True])
 
     family = TweedieDistribution(power=1.5)
-    result = family.in_y_range([-1,0,1])
+    result = family.in_y_range([-1, 0, 1])
     assert_array_equal(result, [False, True, True])
 
     family = GammaDistribution()
-    result = family.in_y_range([-1,0,1])
+    result = family.in_y_range([-1, 0, 1])
     assert_array_equal(result, [False, False, True])
 
     family = InverseGaussianDistribution()
-    result = family.in_y_range([-1,0,1])
+    result = family.in_y_range([-1, 0, 1])
     assert_array_equal(result, [False, False, True])
 
     family = TweedieDistribution(power=4.5)
-    result = family.in_y_range([-1,0,1])
+    result = family.in_y_range([-1, 0, 1])
     assert_array_equal(result, [False, False, True])
 
+
 def test_glm_identiy_regression():
     """Test linear regression on a simple dataset
     """
-    coef = [1,2]
-    X = np.array([[1,1,1,1,1],[0,1,2,3,4]]).T
+    coef = [1, 2]
+    X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.dot(X, coef)
+    families = (
+        NormalDistribution(), PoissonDistribution(),
+        GammaDistribution(), InverseGaussianDistribution(),
+        TweedieDistribution(power=1.5), TweedieDistribution(power=4.5))
     for solver in ['irls', 'lbfgs', 'newton-cg']:
-        for family in (GaussianDistribution(), PoissonDistribution(),
-            GammaDistribution(), InverseGaussianDistribution(),
-            TweedieDistribution(power=1.5), TweedieDistribution(power=4.5)):
-            glm = GeneralizedLinearModel(family=family,
-                fit_intercept=False, solver=solver)
+        for family in families:
+            glm = GeneralizedLinearRegressor(
+                family=family, fit_intercept=False, solver=solver)
             res = glm.fit(X, y)
             assert_array_almost_equal(res.coef_, coef)
 
+
 def test_glm_log_regression():
     """Test linear regression on a simple dataset
     """
-    coef = [1,2]
-    X = np.array([[1,1,1,1,1],[0,1,2,3,4]]).T
+    coef = [1, 2]
+    X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.exp(np.dot(X, coef))
-    #for solver in ['irls', 'lbfgs', 'newton-cg']:
-    for solver in ['irls']:
-        #for family in [GaussianDistribution(), PoissonDistribution(),
-        #    GammaDistribution(), InverseGaussianDistribution(),
-        #    TweedieDistribution(power=1.5), TweedieDistribution(power=4.5)]:
-        for family in [GaussianDistribution()]:
-            glm = GeneralizedLinearModel(family=family,
-                link=LogLink(),
-                fit_intercept=False, solver=solver, start_params='ols')
+    families = (
+        NormalDistribution(), PoissonDistribution(),
+        GammaDistribution(), InverseGaussianDistribution(),
+        TweedieDistribution(power=1.5), TweedieDistribution(power=4.5))
+    for solver in ['irls', 'lbfgs', 'newton-cg']:
+        for family in families:
+            glm = GeneralizedLinearRegressor(
+                family=family, link=LogLink(), fit_intercept=False,
+                solver=solver, start_params='ols')
             res = glm.fit(X, y)
             assert_array_almost_equal(res.coef_, coef)
 
-#TODO: Test compatibility with R's glm, glmnet
+
+# TODO: Test compatibility with R's glm, glmnet

From a6137d85401ef72976327c211f44d721d9f81e00 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Wed, 19 Jul 2017 17:41:49 +0200
Subject: [PATCH 003/269] [WIP] Add Generalized Linear Models (#9405)

* GeneralizedLinearRegressor added to doc/modules/classes.rst
---
 doc/modules/classes.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index e09ca0422d8a7..2d451b6758eb1 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -720,6 +720,7 @@ Kernels:
    linear_model.BayesianRidge
    linear_model.ElasticNet
    linear_model.ElasticNetCV
+   linear_model.GeneralizedLinearRegressor
    linear_model.HuberRegressor
    linear_model.Lars
    linear_model.LarsCV

From b0be167080588a35dc1f4b762d961edb897b1019 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Wed, 9 Aug 2017 13:38:49 +0200
Subject: [PATCH 004/269] [WIP] Add Generalized Linear Models (#9405)

* fixed bug: init parameter max_iter
* fix API for family and link:
  default parameter changed to string
  non public variables self._family_instance and self._link_instance
* fixed bug in score, minus sign forgotten
* added check_is_fitted to estimate_phi and score
* added check_array(X) in predict
* replaced lambda functions in TweedieDistribution
* some documentation
---
 sklearn/linear_model/glm.py | 196 ++++++++++++++++++++++--------------
 1 file changed, 122 insertions(+), 74 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index cf91a64fafc12..0ee1564049329 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -15,9 +15,10 @@
 
 # Design Decisions:
 # - Which name? GeneralizedLinearModel vs GeneralizedLinearRegressor.
-#   So far, it is GeneralizedLinearModel, since it could very easily
-#   extended by Bernoulli/Binomial distribution.
-#   Solution: GeneralizedLinearRegressor
+#   Estimators in sklearn are either regressors or classifiers. A Generalized
+#   Linear Model does both depending on the chosen distribution, e.g. Normal =>
+#   regressor, Bernoulli/Binomial => classifier.
+#   Solution: GeneralizedLinearRegressor since this is the focus.
 # - The link funtion (instance of class Link) is necessary for the evaluation
 #   of deviance, score, Fisher and Hessian matrix as functions of the
 #   coefficients, which is needed by optimizers.
@@ -33,7 +34,7 @@
 from ..base import BaseEstimator, RegressorMixin
 from ..exceptions import ConvergenceWarning
 from ..externals import six
-from ..utils import check_X_y
+from ..utils import check_array, check_X_y
 from ..utils.extmath import safe_sparse_dot
 from ..utils.optimize import newton_cg
 from ..utils.validation import check_is_fitted
@@ -372,46 +373,67 @@ class TweedieDistribution(ExponentialDispersionModel):
             The variance power of the unit_variance
             :math:`v(mu) = mu^{power}`.
     """
+    def _less_upper_bound(self, x):
+        return np.less(x, self.upper_bound)
+
+    def _less_equal_upper_bound(self, x):
+        return np.less_equal(x, self.upper_bound)
+
+    def _greater_lower_bound(self, x):
+        return np.greater(x, self.lower_bound)
+
+    def _greater_equal_lower_bound(self, x):
+        return np.greater_equal(x, self.lower_bound)
+
     def __init__(self, power=0):
         self.power = power
         self._upper_bound = np.Inf
-        self._upper_compare = lambda x: np.less(x, self.upper_bound)
+        # self._upper_compare = lambda x: np.less(x, self.upper_bound)
+        self._upper_compare = self._less_upper_bound
         if power < 0:
             # Extreme Stable
             self._lower_bound = -np.Inf
-            self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+            # self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+            self._lower_compare = self._greater_lower_bound
         elif power == 0:
-            # GaussianDistribution
+            # NormalDistribution
             self._lower_bound = -np.Inf
-            self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+            # self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+            self._lower_compare = self._greater_lower_bound
         elif (power > 0) and (power < 1):
             raise ValueError('For 0<power<1, no distribution exists.')
         elif power == 1:
             # PoissonDistribution
             self._lower_bound = 0
-            self._lower_compare = (
-                lambda x: np.greater_equal(x, self.lower_bound))
+            # self._lower_compare = (
+            #     lambda x: np.greater_equal(x, self.lower_bound))
+            self._lower_compare = self._greater_equal_lower_bound
         elif (power > 1) and (power < 2):
             # Compound Poisson
             self._lower_bound = 0
-            self._lower_compare = (
-                lambda x: np.greater_equal(x, self.lower_bound))
+            # self._lower_compare = (
+            #     lambda x: np.greater_equal(x, self.lower_bound))
+            self._lower_compare = self._greater_equal_lower_bound
         elif power == 2:
             # GammaDistribution
             self._lower_bound = 0
-            self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+            # self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+            self._lower_compare = self._greater_lower_bound
         elif (power > 2) and (power < 3):
             # Positive Stable
             self._lower_bound = 0
-            self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+            # self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+            self._lower_compare = self._greater_lower_bound
         elif power == 3:
             # InverseGaussianDistribution
             self._lower_bound = 0
-            self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+            # self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+            self._lower_compare = self._greater_lower_bound
         elif power > 3:
             # Positive Stable
             self._lower_bound = 0
-            self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+            # self._lower_compare = lambda x: np.greater(x, self.lower_bound)
+            self._lower_compare = self._greater_lower_bound
 
     @property
     def power(self):
@@ -530,6 +552,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     Class to fit a Generalized Linear Model (GLM) based on reproductive
     Exponential Dispersion Models (EDM).
 
+    #TODO: This belongs to User Guide
     Assumptions:
 
     - The target values y_i are realizations of random variables
@@ -559,25 +582,26 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     Parameters
     ----------
-    fit_intercept : boolean, optional, default True
-        whether to calculate the intercept for this model. If set
-        to False, no intercept will be used in calculations
-        (e.g. data is expected to be already centered).
+    fit_intercept : boolean, optional (default=True)
+        Specifies if a constant (a.k.a. bias or intercept) should be
+        added to the linear predictor (X*coef+intercept).
 
     family : {'normal', 'poisson', 'gamma', 'inverse.gaussian'} or an instance
-        of a subclass of ExponentialDispersionModel, optional, default 'normal'
+        of a subclass of ExponentialDispersionModel, optional
+        (default='normal')
         the distributional assumption of the GLM.
 
     link : {'identity', 'log'} or an instance of a subclass of Link,
-        optional, default IdentityLink()
-        the link function (class) of the GLM
+        optional (default='identity')
+        the link function of the GLM, i.e. mapping from linear predictor
+        (X*coef) to expectation (mu).
 
-    fit_dispersion : {None, 'chisqr', 'deviance'}, defaul 'chisqr'
+    fit_dispersion : {None, 'chisqr', 'deviance'}, optional (defaul='chisqr')
         method for estimation of the dispersion parameter phi. Whether to use
         the chi squared statisic or the deviance statistic. If None, the
         dispersion is not estimated.
 
-    solver : {'irls', 'newton-cg', 'lbfgs'}, defaul 'irls'
+    solver : {'irls', 'newton-cg', 'lbfgs'}, optional (defaul='irls')
         Algorithm to use in the optimization problem.
 
         - 'irls' is iterated reweighted least squares. It is the standard
@@ -585,16 +609,16 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
         - 'newton-cg', 'lbfgs'
 
-    max_iter : int, default 100
+    max_iter : int, optional (default=100)
         TODO
 
-    tol : float
+    tol : float, optional (default=1e-4)
         Stopping criterion. For the irls, newton-cg and lbfgs solvers,
         the iteration will stop when ``max{|g_i | i = 1, ..., n} <= tol``
         where ``g_i`` is the i-th component of the gradient (derivative of
         the deviance).
 
-    start_params : {array shape (n_features, ), 'ols'}, default None
+    start_params : {array shape (n_features, ), 'ols'}, optional (default=None)
         sets the start values for coef_ in the fit.
         If None, default values are taken.
         If 'ols' the result of an ordinary least squares in the link space
@@ -603,9 +627,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         If fit_intercept is true, the first value is assumed to be the start
         value for the intercept_.
 
-    verbose : int, default: 0
-        For the lbfgs solver set verbose to any positive
-        number for verbosity.
+    verbose : int, optional (default=0)
+        For the lbfgs solver set verbose to any positive number for verbosity.
 
     Attributes
     ----------
@@ -629,15 +652,15 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     TODO
     """
 
-    def __init__(self, fit_intercept=True, family=NormalDistribution(),
-                 link=IdentityLink(), fit_dispersion='chisqr', solver='irls',
+    def __init__(self, fit_intercept=True, family='normal',
+                 link='identity', fit_dispersion='chisqr', solver='irls',
                  max_iter=100, tol=1e-4, start_params=None, verbose=0):
         self.fit_intercept = fit_intercept
         self.family = family
         self.link = link
         self.fit_dispersion = fit_dispersion
         self.solver = solver
-        self.max_iter = 100
+        self.max_iter = max_iter
         self.tol = tol
         self.start_params = start_params
         self.verbose = verbose
@@ -663,29 +686,38 @@ def fit(self, X, y, weight=None):
         -------
         self : returns an instance of self.
         """
-        if not isinstance(self.family, ExponentialDispersionModel):
+        # Garantee that self._family_instance is an instance of class
+        # ExponentialDispersionModel
+        if isinstance(self.family, ExponentialDispersionModel):
+            self._family_instance = self.family
+        else:
             if self.family == 'normal':
-                self.family = NormalDistribution()
+                self._family_instance = NormalDistribution()
             elif self.family == 'poisson':
-                self.family = PoissonDistribution()
+                self._family_instance = PoissonDistribution()
             elif self.family == 'gamma':
-                self.family = GammaDistribution()
+                self._family_instance = GammaDistribution()
             elif self.family == 'inverse.gaussian':
-                self.family = InverseGaussianDistribution()
+                self._family_instance = InverseGaussianDistribution()
             else:
                 raise ValueError(
-                    "The argument family must be an instance of class"
+                    "The family must be an instance of class"
                     " ExponentialDispersionModel or an element of"
                     " ['normal', 'poisson', 'gamma', 'inverse.gaussian'].")
-        if not isinstance(self.link, Link):
+
+        # Garantee that self._link_instance is set to an instance of class Link
+        if isinstance(self.link, Link):
+            self._link_instance = self.link
+        else:
             if self.link == 'identity':
-                self.link = IdentityLink()
-            if self.link == 'log':
-                self.link = LogLink()
+                self._link_instance = IdentityLink()
+            elif self.link == 'log':
+                self._link_instance = LogLink()
             else:
                 raise ValueError(
-                    "The argument link must be an instance of class Link or"
+                    "The link must be an instance of class Link or"
                     " an element of ['identity', 'log'].")
+
         if not isinstance(self.fit_intercept, bool):
             raise ValueError("The argument fit_intercept must be bool,"
                              " got {0}".format(self.fit_intercept))
@@ -711,10 +743,13 @@ def fit(self, X, y, weight=None):
                          y_numeric=True, multi_output=False)
         y = y.astype(np.float64)
 
-        if not np.all(self.family.in_y_range(y)):
+        family = self._family_instance
+        link = self._link_instance
+
+        if not np.all(family.in_y_range(y)):
             raise ValueError("Some value(s) of y are out of the valid "
                              "range for family {0}"
-                             .format(self.family.__class__.__name__))
+                             .format(family.__class__.__name__))
 
         if weight is None:
             weight = np.ones_like(y)
@@ -745,14 +780,14 @@ def fit(self, X, y, weight=None):
         coef = None
         if start_params is None:
             # Use mu_start and apply one irls step to calculate coef
-            mu = self.family.starting_mu(y, weight)
+            mu = family.starting_mu(y, weight)
             # linear predictor
-            eta = self.link.link(mu)
+            eta = link.link(mu)
             # h'(eta)
-            hp = self.link.inverse_derivative(eta)
+            hp = link.inverse_derivative(eta)
             # working weights w, in principle a diagonal matrix
             # therefore here just as 1d array
-            w = (hp**2 / self.family.variance(mu, phi=1, weight=weight))
+            w = (hp**2 / family.variance(mu, phi=1, weight=weight))
             wroot = np.sqrt(w)
             # working observations
             yw = eta + (y-mu)/hp
@@ -763,7 +798,7 @@ def fit(self, X, y, weight=None):
             coef = linalg.lstsq(X_rescale, yw_rescale)[0]
         elif start_params is 'ols':
             reg = LinearRegression(copy_X=False, fit_intercept=False)
-            reg.fit(Xnew, self.link.link(y))
+            reg.fit(Xnew, link.link(y))
             coef = reg.coef_
         else:
             coef = start_params
@@ -775,16 +810,16 @@ def fit(self, X, y, weight=None):
         if self.solver == 'irls':
             # linear predictor
             eta = safe_sparse_dot(Xnew, coef, dense_output=True)
-            mu = self.link.inverse(eta)
+            mu = link.inverse(eta)
             while self.n_iter_ < self.max_iter:
                 self.n_iter_ += 1
                 # coef_old not used so far.
                 # coef_old = coef
                 # h'(eta)
-                hp = self.link.inverse_derivative(eta)
+                hp = link.inverse_derivative(eta)
                 # working weights w, in principle a diagonal matrix
                 # therefore here just as 1d array
-                w = (hp**2 / self.family.variance(mu, phi=1, weight=weight))
+                w = (hp**2 / family.variance(mu, phi=1, weight=weight))
                 wroot = np.sqrt(w)
                 # working observations
                 yw = eta + (y-mu)/hp
@@ -799,13 +834,13 @@ def fit(self, X, y, weight=None):
                 # updated linear predictor
                 # do it here for updated values for tolerance
                 eta = safe_sparse_dot(Xnew, coef, dense_output=True)
-                mu = self.link.inverse(eta)
+                mu = link.inverse(eta)
 
                 # which tolerace? |coef - coef_old| or gradient?
                 # use gradient for compliance with newton-cg and lbfgs
                 # TODO: faster computation of gradient, use mu and eta directly
-                gradient = self.family._deviance_derivative(
-                    coef=coef, X=Xnew, y=y, weight=weight, link=self.link)
+                gradient = family._deviance_derivative(
+                    coef=coef, X=Xnew, y=y, weight=weight, link=link)
                 if (np.max(np.abs(gradient)) <= self.tol):
                     converged = True
                     break
@@ -818,9 +853,9 @@ def fit(self, X, y, weight=None):
         # TODO: performance: make one function return both deviance and
         #       gradient of deviance
         elif self.solver == 'lbfgs':
-            func = self.family._deviance
-            fprime = self.family._deviance_derivative
-            args = (Xnew, y, weight, self.link)
+            func = family._deviance
+            fprime = family._deviance_derivative
+            args = (Xnew, y, weight, link)
             coef, loss, info = optimize.fmin_l_bfgs_b(
                 func, coef, fprime=fprime,
                 args=args,
@@ -836,13 +871,13 @@ def fit(self, X, y, weight=None):
                         info["task"]))
             self.n_iter_ = info['nit']
         elif self.solver == 'newton-cg':
-            func = self.family._deviance
-            grad = self.family._deviance_derivative
+            func = family._deviance
+            grad = family._deviance_derivative
 
             def grad_hess(coef, X, y, weight, link):
-                grad = (self.family._deviance_derivative(
+                grad = (family._deviance_derivative(
                     coef, X, y, weight, link))
-                hessian = (self.family._deviance_hessian(
+                hessian = (family._deviance_hessian(
                     coef, X, y, weight, link))
 
                 def Hs(s):
@@ -850,7 +885,7 @@ def Hs(s):
                     return ret
                 return grad, Hs
             hess = grad_hess
-            args = (Xnew, y, weight, self.link)
+            args = (Xnew, y, weight, link)
             coef, n_iter_i = newton_cg(hess, func, grad, coef, args=args,
                                        maxiter=self.max_iter, tol=self.tol)
             self.coef_ = coef
@@ -871,37 +906,50 @@ def predict(self, X, weight=1):
         If weights are given, returns prediction*weights.
         """
         check_is_fitted(self, "coef_")
+        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
+        # TODO: validation of weight
         eta = safe_sparse_dot(X, self.coef_, dense_output=True)
         if self.fit_intercept is True:
             eta += self.intercept_
-        mu = self.link.inverse(eta)
+        mu = self._link_instance.inverse(eta)
         return mu*weight
 
     def estimate_phi(self, y, X, weight):
+        """Estimation of the dispersion parameter.
+        Returns the estimate.
+        """
+        check_is_fitted(self, "coef_")
         n_samples, n_features = X.shape
         eta = safe_sparse_dot(X, self.coef_, dense_output=True)
         if self.fit_intercept is True:
             eta += self.intercept_
-        mu = self.link.inverse(eta)
+        mu = self._link_instance.inverse(eta)
         if self.fit_dispersion == 'chisqr':
-            chisq = np.sum(weight*(y-mu)**2/self.family.unit_variance(mu))
+            chisq = np.sum(weight*(y-mu)**2 /
+                           self._family_instance.unit_variance(mu))
             return chisq/(n_samples - n_features)
         elif self.fit_dispersion == 'deviance':
-            dev = self.family.deviance(y, mu, weight)
+            dev = self._family_instance.deviance(y, mu, weight)
             return dev/(n_samples - n_features)
 
+# TODO: Fix "AssertionError: -0.28014056555724598 not greater than 0.5"
+#       in check_estimator for score
+#       from sklearn.utils.estimator_checks import check_estimator
+#       from sklearn.linear_model import GeneralizedLinearRegressor
+#       check_estimator(GeneralizedLinearRegressor)
     def score(self, X, y, weight=1):
         """The natural score for a GLM is -deviance.
-        Returns the weight averaged negitive deviance (the better the score,
+        Returns the weight averaged negative deviance (the better the score,
         the better the fit). Maximum score is therefore 0.
         """
         # RegressorMixin has R^2 score.
         # TODO: Make it more compatible with the score function in
         #      sklearn.metrics.regression.py
+        check_is_fitted(self, "coef_")
         eta = safe_sparse_dot(X, self.coef_, dense_output=True)
         if self.fit_intercept is True:
             eta += self.intercept_
-        mu = self.link.inverse(eta)
-        output_errors = self.family.unit_deviance(y, mu)
+        mu = self._link_instance.inverse(eta)
+        output_errors = self._family_instance.unit_deviance(y, mu)
         weight = weight * np.ones_like(y)
-        return np.average(output_errors, weights=weight)
+        return -np.average(output_errors, weights=weight)

From 85c52ec9c6adb3b1f75650cfa7fe0b770393d24e Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 13 Aug 2017 01:46:16 +0200
Subject: [PATCH 005/269] [WIP] Add Generalized Linear Models (#9405)

* make raw docstrings where appropriate
* make ExponentialDispersionModel (i.e. TweedieDistribution) pickable:
  ExponentialDispersionModel has new properties include_lower_bound,
  method in_y_range is not abstract anymore.
* set self.intercept_=0 if fit_intercept=False, such that it is always defined.
* set score to D2, a generalized R2 with deviance instead of squared error,
  as does glmnet. This also solves issues with
  check_regressors_train(GeneralizedLinearRegressor), which assumes R2 score.
* change of names: weight to weights in ExponentialDispersionModel and to
  sample_weight in GeneralizedLinearRegressor
* add class method linear_predictor
---
 sklearn/linear_model/glm.py | 330 ++++++++++++++++++++++--------------
 1 file changed, 199 insertions(+), 131 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 0ee1564049329..b80842f817f4d 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -5,13 +5,16 @@
 # Author: Christian Lorentzen <lorentzen.ch@googlemail.ch>
 # License: BSD 3 clause
 
+# TODO: Write more tests
 # TODO: Which name/symbol for coefficients and weights in docu?
 #       sklearn.linear_models uses w for coefficients.
-#       So far, coefficients=beta and weight=w (as standard literature)
-# TODO: Add l2-penalty
+#       So far, coefficients=beta and weights=w (as standard literature)
+# TODO: Add l2-penalty (maybe more general w.P.w with P penalty matrix)
 # TODO: Add l1-penalty (elastic net)
 # TODO: Add cross validation
 # TODO: Write docu and examples
+# TODO: Make it as much consistent to other estimators in linear_model as
+#       possible
 
 # Design Decisions:
 # - Which name? GeneralizedLinearModel vs GeneralizedLinearRegressor.
@@ -118,7 +121,7 @@ def inverse_derivative2(self, lin_pred):
 
 
 class ExponentialDispersionModel(six.with_metaclass(ABCMeta)):
-    """Base class for reproductive Exponential Dispersion Models (EDM).
+    r"""Base class for reproductive Exponential Dispersion Models (EDM).
 
     The pdf of :math:`Y\sim \mathrm{EDM}(\mu, \phi)` is given by
 
@@ -136,6 +139,8 @@ class ExponentialDispersionModel(six.with_metaclass(ABCMeta)):
     ----------
     lower_bound
     upper_bound
+    include_lower_bound
+    include_upper_bound
 
     Methods
     -------
@@ -174,15 +179,39 @@ def upper_bound(self):
         """
         raise NotImplementedError()
 
-    @abstractmethod
+    @abstractproperty
+    def include_lower_bound(self):
+        """If True, values of y may equal lower bound: y >= lower_bound.
+        """
+        raise NotImplementedError()
+
+    @abstractproperty
+    def include_upper_bound(self):
+        """If True, values of y may equal upper bound: y <= upper_bound.
+        """
+        raise NotImplementedError()
+
     def in_y_range(self, x):
         """Returns true if x is in the valid range of Y~EDM.
         """
-        raise NotImplementedError()
+        if self.include_lower_bound:
+            if self.include_upper_bound:
+                return np.logical_and(np.greater_equal(x, self.lower_bound),
+                                      np.less_equal(x, self.upper_bound))
+            else:
+                return np.logical_and(np.greater_equal(x, self.lower_bound),
+                                      np.less(x, self.upper_bound))
+        else:
+            if self.include_upper_bound:
+                return np.logical_and(np.greater(x, self.lower_bound),
+                                      np.less_equal(x, self.upper_bound))
+            else:
+                return np.logical_and(np.greater(x, self.lower_bound),
+                                      np.less(x, self.upper_bound))
 
     @abstractmethod
     def unit_variance(self, mu):
-        """The unit variance :math:`v(mu)` determines the variance as
+        r"""The unit variance :math:`v(mu)` determines the variance as
         a function of the mean mu by
         :math:`\mathrm{Var}[Y_i] = \phi/w_i*v(\mu_i)`.
         It can also be derived from the unit deviance :math:`d(y,\mu)` as
@@ -194,27 +223,27 @@ def unit_variance(self, mu):
 
     @abstractmethod
     def unit_variance_derivative(self, mu):
-        """The derivative of the unit variance w.r.t. mu, :math:`v'(\mu)`.
+        r"""The derivative of the unit variance w.r.t. mu, :math:`v'(\mu)`.
         """
         raise NotImplementedError()
 
-    def variance(self, mu, phi=1, weight=1):
-        """The variance of :math:`Y \sim \mathrm{EDM}(\mu,\phi)` is
+    def variance(self, mu, phi=1, weights=1):
+        r"""The variance of :math:`Y \sim \mathrm{EDM}(\mu,\phi)` is
         :math:`\mathrm{Var}[Y_i]=\phi/w_i*v(\mu_i)`,
         with unit variance v(mu).
         """
-        return phi/weight * self.unit_variance(mu)
+        return phi/weights * self.unit_variance(mu)
 
-    def variance_derivative(self, mu, phi=1, weight=1):
-        """The derivative of the variance w.r.t. mu,
+    def variance_derivative(self, mu, phi=1, weights=1):
+        r"""The derivative of the variance w.r.t. mu,
         :math:`\frac{\partial}{\partial\mu}\mathrm{Var}[Y_i]
         =phi/w_i*v'(\mu_i)`, with unit variance v(mu).
         """
-        return phi/weight * self.unit_variance_derivative(mu)
+        return phi/weights * self.unit_variance_derivative(mu)
 
     @abstractmethod
     def unit_deviance(self, y, mu):
-        """The unit_deviance :math:`d(y,\mu)`.
+        r"""The unit_deviance :math:`d(y,\mu)`.
         In terms of the log-likelihood it is given by
         :math:`d(y,\mu) = -2\phi\cdot
         \left(loglike(y,\mu,phi) - loglike(y,y,phi)\right).`
@@ -222,7 +251,7 @@ def unit_deviance(self, y, mu):
         raise NotImplementedError()
 
     def unit_deviance_derivative(self, y, mu):
-        """The derivative w.r.t. mu of the unit_deviance
+        r"""The derivative w.r.t. mu of the unit_deviance
         :math:`\frac{d}{d\mu}d(y,\mu) = -2\frac{y-\mu}{v(\mu)}`
         with unit variance :math:`v(\mu)`.
 
@@ -232,30 +261,30 @@ def unit_deviance_derivative(self, y, mu):
         """
         return -2*(y-mu)/self.unit_variance(mu)
 
-    def deviance(self, y, mu, weight=1):
-        """The deviance is given by :math:`D = \sum_i w_i \cdot d(y, \mu)
-        with weight :math:`w_i` and unit_deviance :math:`d(y,mu)`.
+    def deviance(self, y, mu, weights=1):
+        r"""The deviance is given by :math:`D = \sum_i w_i \cdot d(y, \mu)
+        with weights :math:`w_i` and unit_deviance :math:`d(y,mu)`.
         In terms of the likelihood it is :math:`D = -2\phi\cdot
         \left(loglike(y,\mu,\frac{phi}{w})
         - loglike(y,y,\frac{phi}{w})\right).`
         """
-        return np.sum(weight*self.unit_deviance(y, mu))
+        return np.sum(weights*self.unit_deviance(y, mu))
 
-    def _deviance(self, coef, X, y, weight, link):
+    def _deviance(self, coef, X, y, weights, link):
         """The deviance as a function of the coefficients ``coef``
         (:math:`beta`).
         """
         lin_pred = safe_sparse_dot(X, coef, dense_output=True)
         mu = link.inverse(lin_pred)
-        return self.deviance(y, mu, weight)
+        return self.deviance(y, mu, weights)
 
-    def deviance_derivative(self, y, mu, weight=1):
+    def deviance_derivative(self, y, mu, weights=1):
         """The derivative w.r.t. mu of the deviance.`
         """
-        return weight*self.unit_deviance_derivative(y, mu)
+        return weights*self.unit_deviance_derivative(y, mu)
 
-    def _score(self, coef, phi, X, y, weight, link):
-        """The score function :math:`s` is the derivative of the
+    def _score(self, coef, phi, X, y, weights, link):
+        r"""The score function :math:`s` is the derivative of the
         log-likelihood w.r.t. the ``coef`` (:math:`\beta`).
         It is given by
 
@@ -270,7 +299,7 @@ def _score(self, coef, phi, X, y, weight, link):
         n_samples = X.shape[0]
         lin_pred = safe_sparse_dot(X, coef, dense_output=True)
         mu = link.inverse(lin_pred)
-        sigma_inv = 1/self.variance(mu, phi=phi, weight=weight)
+        sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
         d = link.inverse_derivative(lin_pred)
         d_sigma_inv = sparse.dia_matrix((sigma_inv*d, 0),
                                         shape=(n_samples, n_samples))
@@ -278,8 +307,8 @@ def _score(self, coef, phi, X, y, weight, link):
         score = safe_sparse_dot(X.T, temp, dense_output=False)
         return score
 
-    def _fisher_matrix(self, coef, phi, X, y, weight, link):
-        """The Fisher information matrix, also known as expected
+    def _fisher_matrix(self, coef, phi, X, y, weights, link):
+        r"""The Fisher information matrix, also known as expected
         information matrix. It is given by
 
         .. math:
@@ -295,7 +324,7 @@ def _fisher_matrix(self, coef, phi, X, y, weight, link):
         n_samples = X.shape[0]
         lin_pred = safe_sparse_dot(X, coef, dense_output=True)
         mu = link.inverse(lin_pred)
-        sigma_inv = 1/self.variance(mu, phi=phi, weight=weight)
+        sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
         d2 = link.inverse_derivative(lin_pred)**2
         d2_sigma_inv = sparse.dia_matrix((sigma_inv*d2, 0),
                                          shape=(n_samples, n_samples))
@@ -303,8 +332,8 @@ def _fisher_matrix(self, coef, phi, X, y, weight, link):
         fisher_matrix = safe_sparse_dot(X.T, temp, dense_output=False)
         return fisher_matrix
 
-    def _observed_information(self, coef, phi, X, y, weight, link):
-        """The observed information matrix, also known as the negative of
+    def _observed_information(self, coef, phi, X, y, weights, link):
+        r"""The observed information matrix, also known as the negative of
         the Hessian matrix of the log-likelihood. It is given by
 
         .. math:
@@ -327,7 +356,7 @@ def _observed_information(self, coef, phi, X, y, weight, link):
         n_samples = X.shape[0]
         lin_pred = safe_sparse_dot(X, coef, dense_output=True)
         mu = link.inverse(lin_pred)
-        sigma_inv = 1/self.variance(mu, phi=phi, weight=weight)
+        sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
         dp = link.inverse_derivative2(lin_pred)
         d2 = link.inverse_derivative(lin_pred)**2
         v = self.unit_variance_derivative(mu)/self.unit_variance(mu)
@@ -338,33 +367,34 @@ def _observed_information(self, coef, phi, X, y, weight, link):
         observed_information = safe_sparse_dot(X.T, temp, dense_output=False)
         return observed_information
 
-    def _deviance_derivative(self, coef, X, y, weight, link):
-        """The derivative w.r.t. ``coef`` (:math:`\beta`) of the deviance as a
+    def _deviance_derivative(self, coef, X, y, weights, link):
+        r"""The derivative w.r.t. ``coef`` (:math:`\beta`) of the deviance as a
         function of the coefficients ``coef``.
         This is equivalent to :math:`-2\phi` times the score function
         :math:`s` (derivative of the log-likelihood).
         """
-        score = self._score(coef=coef, phi=1, X=X, y=y, weight=weight,
+        score = self._score(coef=coef, phi=1, X=X, y=y, weights=weights,
                             link=link)
         return -2*score
 
-    def _deviance_hessian(self, coef, X, y, weight, link):
-        """The hessian matrix w.r.t. ``coef`` (:math:`\beta`) of the deviance
+    def _deviance_hessian(self, coef, X, y, weights, link):
+        r"""The hessian matrix w.r.t. ``coef`` (:math:`\beta`) of the deviance
         as a function of the coefficients ``coef``.
         This is equivalent to :math:`+2\phi` times the observed information
         matrix.
         """
         info_matrix = self._observed_information(coef=coef, phi=1, X=X, y=y,
-                                                 weight=weight, link=link)
+                                                 weights=weights, link=link)
         return 2*info_matrix
 
-    def starting_mu(self, y, weight=1):
+    def starting_mu(self, y, weights=1):
         """Starting values for the mean mu_i in IRLS."""
-        return (weight*y+np.mean(weight*y))/(2.*np.sum(np.ones_like(y)*weight))
+        return ((weights*y+np.mean(weights*y))
+                / (2.*np.sum(np.ones_like(y)*weights)))
 
 
 class TweedieDistribution(ExponentialDispersionModel):
-    """A class for the Tweedie distribution.
+    r"""A class for the Tweedie distribution.
     They have mu=E[X] and Var[X] \propto mu**power.
 
     Attributes
@@ -373,67 +403,44 @@ class TweedieDistribution(ExponentialDispersionModel):
             The variance power of the unit_variance
             :math:`v(mu) = mu^{power}`.
     """
-    def _less_upper_bound(self, x):
-        return np.less(x, self.upper_bound)
-
-    def _less_equal_upper_bound(self, x):
-        return np.less_equal(x, self.upper_bound)
-
-    def _greater_lower_bound(self, x):
-        return np.greater(x, self.lower_bound)
-
-    def _greater_equal_lower_bound(self, x):
-        return np.greater_equal(x, self.lower_bound)
-
     def __init__(self, power=0):
         self.power = power
         self._upper_bound = np.Inf
-        # self._upper_compare = lambda x: np.less(x, self.upper_bound)
-        self._upper_compare = self._less_upper_bound
+        self._include_upper_bound = False
         if power < 0:
             # Extreme Stable
             self._lower_bound = -np.Inf
-            # self._lower_compare = lambda x: np.greater(x, self.lower_bound)
-            self._lower_compare = self._greater_lower_bound
+            self._include_lower_bound = False
         elif power == 0:
             # NormalDistribution
             self._lower_bound = -np.Inf
-            # self._lower_compare = lambda x: np.greater(x, self.lower_bound)
-            self._lower_compare = self._greater_lower_bound
+            self._include_lower_bound = False
         elif (power > 0) and (power < 1):
             raise ValueError('For 0<power<1, no distribution exists.')
         elif power == 1:
             # PoissonDistribution
             self._lower_bound = 0
-            # self._lower_compare = (
-            #     lambda x: np.greater_equal(x, self.lower_bound))
-            self._lower_compare = self._greater_equal_lower_bound
+            self._include_lower_bound = True
         elif (power > 1) and (power < 2):
             # Compound Poisson
             self._lower_bound = 0
-            # self._lower_compare = (
-            #     lambda x: np.greater_equal(x, self.lower_bound))
-            self._lower_compare = self._greater_equal_lower_bound
+            self._include_lower_bound = True
         elif power == 2:
             # GammaDistribution
             self._lower_bound = 0
-            # self._lower_compare = lambda x: np.greater(x, self.lower_bound)
-            self._lower_compare = self._greater_lower_bound
+            self._include_lower_bound = False
         elif (power > 2) and (power < 3):
             # Positive Stable
             self._lower_bound = 0
-            # self._lower_compare = lambda x: np.greater(x, self.lower_bound)
-            self._lower_compare = self._greater_lower_bound
+            self._include_lower_bound = False
         elif power == 3:
             # InverseGaussianDistribution
             self._lower_bound = 0
-            # self._lower_compare = lambda x: np.greater(x, self.lower_bound)
-            self._lower_compare = self._greater_lower_bound
+            self._include_lower_bound = False
         elif power > 3:
             # Positive Stable
             self._lower_bound = 0
-            # self._lower_compare = lambda x: np.greater(x, self.lower_bound)
-            self._lower_compare = self._greater_lower_bound
+            self._include_lower_bound = False
 
     @property
     def power(self):
@@ -454,8 +461,13 @@ def lower_bound(self):
     def upper_bound(self):
         return self._upper_bound
 
-    def in_y_range(self, x):
-        return np.logical_and(self._lower_compare(x), self._upper_compare(x))
+    @property
+    def include_lower_bound(self):
+        return self._include_lower_bound
+
+    @property
+    def include_upper_bound(self):
+        return self._include_upper_bound
 
     def unit_variance(self, mu):
         """The unit variance of a Tweedie distribution is v(mu)=mu**power.
@@ -485,7 +497,7 @@ def unit_deviance(self, y, mu):
             return 2 * (np.power(np.maximum(y, 0), 2-p)/((1-p)*(2-p)) -
                         y*np.power(mu, 1-p)/(1-p) + np.power(mu, 2-p)/(2-p))
 
-    def likelihood(self, y, X, beta, phi, weight=1):
+    def likelihood(self, y, X, beta, phi, weights=1):
         raise NotImplementedError('This function is not (yet) implemented.')
 
 
@@ -521,6 +533,8 @@ class GeneralizedHyperbolicSecand(ExponentialDispersionModel):
     def __init__(self):
         self._lower_bound = -np.Inf
         self._upper_bound = np.Inf
+        self._include_lower_bound = False
+        self._include_upper_bound = False
 
     @property
     def lower_bound(self):
@@ -530,11 +544,13 @@ def lower_bound(self):
     def upper_bound(self):
         return self._upper_bound
 
-    def in_y_range(self, x):
-        np.logical_and(
-            np.greater(x, self.lower_bound),
-            np.less(x, self.lower_bound)
-            )
+    @property
+    def include_lower_bound(self):
+        return self._include_lower_bound
+
+    @property
+    def include_upper_bound(self):
+        return self._include_upper_bound
 
     def unit_variance(self, mu):
         return 1 + mu**2
@@ -548,7 +564,7 @@ def unit_deviance(self, y, mu):
 
 
 class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
-    """
+    r"""
     Class to fit a Generalized Linear Model (GLM) based on reproductive
     Exponential Dispersion Models (EDM).
 
@@ -665,7 +681,7 @@ def __init__(self, fit_intercept=True, family='normal',
         self.start_params = start_params
         self.verbose = verbose
 
-    def fit(self, X, y, weight=None):
+    def fit(self, X, y, sample_weight=None):
         """Fit a generalized linear model.
 
         Parameters
@@ -676,11 +692,12 @@ def fit(self, X, y, weight=None):
         y : numpy array of shape [n_samples]
             Target values
 
-        weight : numpy array of shape [n_samples]
+        sample_weight : numpy array of shape [n_samples]
             Individual weights for each sample.
             Var[Y_i]=phi/weight_i * v(mu)
             If Y_i ~ EDM(mu, phi/w_i) then
-            sum(w*Y)/sum(w) ~ EDM(mu, phi/sum(w))
+            sum(w*Y)/sum(w) ~ EDM(mu, phi/sum(w)), i.e. the mean of y is a
+            weighted average with weights=sample_weight.
 
         Returns
         -------
@@ -751,16 +768,17 @@ def fit(self, X, y, weight=None):
                              "range for family {0}"
                              .format(family.__class__.__name__))
 
-        if weight is None:
-            weight = np.ones_like(y)
-        elif np.isscalar(weight):
-            weight = weight*np.ones_like(y)
+        if sample_weight is None:
+            weights = np.ones_like(y)
+        elif np.isscalar(sample_weight):
+            weights = sample_weight*np.ones_like(y)
         else:
-            weight = np.atleast_1d(weight)
-            if weight.ndim > 1:
-                raise ValueError("Weights must be 1D array or scalar")
-            elif weight.shape[0] != y.shape[0]:
-                raise ValueError("Weights must have the same length as y")
+            weights = np.atleast_1d(sample_weight)
+            if weights.ndim > 1:
+                raise ValueError("Sample weight must be 1D array or scalar")
+            elif weights.shape[0] != y.shape[0]:
+                raise ValueError("Sample weights must have the same length as"
+                                 " y")
 
         if self.fit_intercept:
             # intercept is first column <=> coef[0] is for intecept
@@ -780,14 +798,14 @@ def fit(self, X, y, weight=None):
         coef = None
         if start_params is None:
             # Use mu_start and apply one irls step to calculate coef
-            mu = family.starting_mu(y, weight)
+            mu = family.starting_mu(y, weights)
             # linear predictor
             eta = link.link(mu)
             # h'(eta)
             hp = link.inverse_derivative(eta)
             # working weights w, in principle a diagonal matrix
             # therefore here just as 1d array
-            w = (hp**2 / family.variance(mu, phi=1, weight=weight))
+            w = (hp**2 / family.variance(mu, phi=1, weights=weights))
             wroot = np.sqrt(w)
             # working observations
             yw = eta + (y-mu)/hp
@@ -819,7 +837,7 @@ def fit(self, X, y, weight=None):
                 hp = link.inverse_derivative(eta)
                 # working weights w, in principle a diagonal matrix
                 # therefore here just as 1d array
-                w = (hp**2 / family.variance(mu, phi=1, weight=weight))
+                w = (hp**2 / family.variance(mu, phi=1, weights=weights))
                 wroot = np.sqrt(w)
                 # working observations
                 yw = eta + (y-mu)/hp
@@ -840,7 +858,7 @@ def fit(self, X, y, weight=None):
                 # use gradient for compliance with newton-cg and lbfgs
                 # TODO: faster computation of gradient, use mu and eta directly
                 gradient = family._deviance_derivative(
-                    coef=coef, X=Xnew, y=y, weight=weight, link=link)
+                    coef=coef, X=Xnew, y=y, weights=weights, link=link)
                 if (np.max(np.abs(gradient)) <= self.tol):
                     converged = True
                     break
@@ -855,7 +873,7 @@ def fit(self, X, y, weight=None):
         elif self.solver == 'lbfgs':
             func = family._deviance
             fprime = family._deviance_derivative
-            args = (Xnew, y, weight, link)
+            args = (Xnew, y, weights, link)
             coef, loss, info = optimize.fmin_l_bfgs_b(
                 func, coef, fprime=fprime,
                 args=args,
@@ -874,47 +892,72 @@ def fit(self, X, y, weight=None):
             func = family._deviance
             grad = family._deviance_derivative
 
-            def grad_hess(coef, X, y, weight, link):
+            def grad_hess(coef, X, y, weights, link):
                 grad = (family._deviance_derivative(
-                    coef, X, y, weight, link))
+                    coef, X, y, weights, link))
                 hessian = (family._deviance_hessian(
-                    coef, X, y, weight, link))
+                    coef, X, y, weights, link))
 
                 def Hs(s):
                     ret = np.dot(hessian, s)
                     return ret
                 return grad, Hs
             hess = grad_hess
-            args = (Xnew, y, weight, link)
+            args = (Xnew, y, weights, link)
             coef, n_iter_i = newton_cg(hess, func, grad, coef, args=args,
                                        maxiter=self.max_iter, tol=self.tol)
             self.coef_ = coef
 
-        if self.fit_intercept is True:
+        if self.fit_intercept:
             self.intercept_ = coef[0]
             self.coef_ = coef[1:]
         else:
+            self.intercept_ = 0.
             self.coef_ = coef
 
         if self.fit_dispersion in ['chisqr', 'deviance']:
-            self.dispersion_ = self.estimate_phi(y, X, weight)
+            self.dispersion_ = self.estimate_phi(y, X, weights)
 
         return self
 
-    def predict(self, X, weight=1):
-        """Prediction with features X.
-        If weights are given, returns prediction*weights.
+    def linear_predictor(self, X):
+        """The linear_predictor X*coef_ + intercept_.
+
+        Parameters
+        ----------
+        X : numpy array or sparse matrix of shape [n_samples,n_features]
+            Samples.
+
+        Returns
+        -------
+        C : array, shape = (n_samples)
+            Returns predicted values of linear predictor.
         """
         check_is_fitted(self, "coef_")
         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
-        # TODO: validation of weight
-        eta = safe_sparse_dot(X, self.coef_, dense_output=True)
-        if self.fit_intercept is True:
-            eta += self.intercept_
+        return safe_sparse_dot(X, self.coef_,
+                               dense_output=True) + self.intercept_
+
+    def predict(self, X, sample_weight=1):
+        """Predict uing GLM with feature matrix X.
+        If sample_weight is given, returns prediction*sample_weight.
+
+        Parameters
+        ----------
+        X : numpy array or sparse matrix of shape [n_samples,n_features]
+            Samples.
+
+        Returns
+        -------
+        C : array, shape = (n_samples)
+            Returns predicted values times sample_weight.
+        """
+        # TODO: validation of sample_weight
+        eta = self.linear_predictor(X)
         mu = self._link_instance.inverse(eta)
-        return mu*weight
+        return mu*sample_weight
 
-    def estimate_phi(self, y, X, weight):
+    def estimate_phi(self, y, X, sample_weight):
         """Estimation of the dispersion parameter.
         Returns the estimate.
         """
@@ -925,11 +968,11 @@ def estimate_phi(self, y, X, weight):
             eta += self.intercept_
         mu = self._link_instance.inverse(eta)
         if self.fit_dispersion == 'chisqr':
-            chisq = np.sum(weight*(y-mu)**2 /
+            chisq = np.sum(sample_weight*(y-mu)**2 /
                            self._family_instance.unit_variance(mu))
             return chisq/(n_samples - n_features)
         elif self.fit_dispersion == 'deviance':
-            dev = self._family_instance.deviance(y, mu, weight)
+            dev = self._family_instance.deviance(y, mu, sample_weight)
             return dev/(n_samples - n_features)
 
 # TODO: Fix "AssertionError: -0.28014056555724598 not greater than 0.5"
@@ -937,19 +980,44 @@ def estimate_phi(self, y, X, weight):
 #       from sklearn.utils.estimator_checks import check_estimator
 #       from sklearn.linear_model import GeneralizedLinearRegressor
 #       check_estimator(GeneralizedLinearRegressor)
-    def score(self, X, y, weight=1):
-        """The natural score for a GLM is -deviance.
-        Returns the weight averaged negative deviance (the better the score,
-        the better the fit). Maximum score is therefore 0.
+    def score(self, X, y, sample_weight=None):
+        r"""Returns D^2, a generalization of the coefficient of determination
+        R^2, which uses deviance instead of squared error.
+
+        D^2 is defined as
+        :math:`D^2 = 1-\frac{D(y_{true},y_{pred})}{D_{null}}`, :math:`D_{null}`
+        is the null deviance, i.e. the deviance of a model with intercept
+        alone which corresponds to :math:`y_{pred} = \bar{y}`. The mean
+        :math:`\bar{y}` is average by sample_weight. In the case of a Normal
+        distribution, this D^2 equals R^2.
+        Best possible score is 1.0 and it can be negative (because the
+        model can be arbitrarily worse).
+
+        Parameters
+        ----------
+        X : array-like, shape = (n_samples, n_features)
+            Test samples
+
+        y : array-like of shape = (n_samples)
+            True valeus for X.
+
+        sample_weight : array-like, shape = (n_samples), optional
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            D^2 of self.predict(X) wrt. y.
         """
-        # RegressorMixin has R^2 score.
-        # TODO: Make it more compatible with the score function in
-        #      sklearn.metrics.regression.py
-        check_is_fitted(self, "coef_")
-        eta = safe_sparse_dot(X, self.coef_, dense_output=True)
-        if self.fit_intercept is True:
-            eta += self.intercept_
-        mu = self._link_instance.inverse(eta)
-        output_errors = self._family_instance.unit_deviance(y, mu)
-        weight = weight * np.ones_like(y)
-        return -np.average(output_errors, weights=weight)
+        # Note, default score defined in RegressorMixin is R^2 score.
+        # TODO: make D^2 a score function in module metrics (and thereby get
+        #       input validation and so on)
+        if sample_weight is None:
+            weights = np.ones_like(y)
+        else:
+            weights = np.atleast_1d(sample_weight)
+        mu = self.predict(X)
+        dev = self._family_instance.deviance(y, mu, weights=weights)
+        y_mean = np.average(y, weights=weights)
+        dev_null = self._family_instance.deviance(y, y_mean, weights=weights)
+        return 1. - dev / dev_null

From 0f4bdb3a8c5c45e80786b8156398da93bfc597e8 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Mon, 18 Sep 2017 23:41:19 +0200
Subject: [PATCH 006/269] [WIP] Add Generalized Linear Models (#9405)

* added L2 penalty
* api change: alpha, l1_ratio, P1, P2, warm_start, check_input, copy_X
* added entry in user guide
* improved docstrings
* helper function _irls_step
---
 doc/modules/linear_model.rst           | 113 +++-
 sklearn/linear_model/glm.py            | 702 ++++++++++++++++++-------
 sklearn/linear_model/tests/test_glm.py | 115 +++-
 3 files changed, 721 insertions(+), 209 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 75b95f6c7a44f..51b3821fa6207 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -786,7 +786,7 @@ non-smooth `penalty="l1"`. This is therefore the solver of choice for sparse
 multinomial logistic regression. It is also the only solver that supports
 `penalty="elasticnet"`.
 
-The "lbfgs" is an optimization algorithm that approximates the 
+The "lbfgs" is an optimization algorithm that approximates the
 Broyden–Fletcher–Goldfarb–Shanno algorithm [8]_, which belongs to
 quasi-Newton methods. The "lbfgs" solver is recommended for use for
 small data-sets but for larger datasets its performance suffers. [9]_
@@ -874,6 +874,117 @@ to warm-starting (see :term:`Glossary <warm_start>`).
     .. [9] `"Performance Evaluation of Lbfgs vs other solvers"
             <http://www.fuzihao.org/blog/2016/01/16/Comparison-of-Gradient-Descent-Stochastic-Gradient-Descent-and-L-BFGS/>`_
 
+.. _Generalized_linear_regression:
+
+Generalized linear regression
+=============================
+
+:class:`GeneralizedLinearRegressor` generalizes the :ref:`elastic_net` in two
+ways [1]_. First, the predicted values :math:`\hat{y}` are linked to a linear
+combination of the input variables :math:`X` via an inverse link function
+:math:`h` as
+
+.. math::    \hat{y}(w, x) = h(xw) = h(w_0 + w_1 x_1 + ... + w_p x_p).
+
+Secondly, the squared loss function is replaced by the deviance :math:`D` of an
+exponential dispersion model (EDM) [2]_. The objective function beeing minimized
+becomes
+
+.. math::    \frac{1}{2s}D(y, \hat{y}) + \alpha \rho ||P_1w||_1
+            +\frac{\alpha(1-\rho)}{2} w^T P_2 w
+
+with sample weights :math:`s`.
+:math:`P_1` can be used to exclude some of the coefficients in the L1
+penalty, :math:`P_2` (must be positive semi-definite) allows for a more
+versatile L2 penalty.
+
+Use cases, where a loss different from the squared loss might be appropriate,
+are the following:
+
+  * If the target values :math:`y` are counts (integer valued) or frequencies, you might try a Poisson deviance.
+
+  * If the target values are positive valued and skewed, you might try a Gamma deviance.
+
+  * If the target values seem to be heavy tailed, you might try an Inverse Gaussian deviance (or even higher variance power of the Tweedie family).
+
+Since the linear predictor :math:`Xw` can be negative and
+Poisson, Gamma and Inverse Gaussian distributions don't have negative values,
+it is convenient to apply a link function different from the identity link
+:math:`h(x)=x` that guarantees the non-negativeness, e.g. the log-link with
+:math:`h(Xw)=\exp(Xw)`.
+
+Note that the feature matrix `X` should be standardized before fitting. This
+ensures that the penalty treats features equally.
+
+    >>> from sklearn import linear_model
+    >>> reg = linear_model.GeneralizedLinearRegressor(alpha=0.5, l1_ratio=0)
+    >>> reg = linear_model.GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
+    >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2])
+    >>> reg.coef_
+    array([ 0.24630255,  0.43373521])
+    >>> reg.intercept_
+    -0.76383575123143277
+
+Mathematical formulation
+------------------------
+
+In the unpenalized case, the assumptions are the folowing:
+
+    * The target values :math:`y_i` are realizations of random variables
+      :math:`Y_i \overset{i.i.d}{\sim} \mathrm{EDM}(\mu_i, \frac{\phi}{s_i})`
+      with expectation :math:`\mu_i=\mathrm{E}[Y]`, dispersion parameter
+      :math:`\phi` and sample weights :math:`s_i`.
+    * The aim is to predict the expectation :math:`\mu_i` with
+      :math:`\hat{y_i} = h(\eta_i)`, linear predictor
+      :math:`\eta_i=(Xw)_i` and inverse link function :math:`h(\eta)`.
+
+Note that the first assumption implies
+:math:`\mathrm{Var}[Y_i]=\frac{\phi}{s_i} v(\mu_i)` with unit variance
+function :math:`v(\mu)`. Specifying a particular distribution of an EDM is the
+same as specifying a unit variance function (they are one-to-one).
+
+Including penalties helps to avoid overfitting or, in case of L1 penalty, to
+obtain sparse solutions. But there are also other motivations to include them,
+e.g. accounting fo dependence structure of :math:`y`.
+
+The objective function, which is independent of :math:`\phi`, is minimized with
+respect to the coefficients :math:`w`.
+
+The deviance is defined by
+
+.. math::     D(y, \mu) = -2\phi\cdot
+              \left(loglike(y,\mu,\frac{\phi}{s})
+              - loglike(y,y,\frac{\phi}{s})\right)
+
+=====================================  =================================
+Distribution                           Variance Function :math:`v(\mu)`
+=====================================  =================================
+Normal ("normal")                      :math:`1`
+Poisson ("poisson")                    :math:`\mu`
+Gamma ("gamma")                        :math:`\mu^2`
+Inverse Gaussian ("inverse.gaussian")  :math:`\mu^3`
+=====================================  =================================
+
+Two remarks:
+
+* The deviances for at least Normal, Poisson and Gamma distributions are
+  strictly consistent scoring functions for the mean :math:`\mu`, see Eq.
+  (19)-(20) in [3]_.
+
+* If you want to model a frequency, i.e. counts per exposure (time, volume, ...)
+  you can do so by a Poisson distribution and passing
+  :math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values together
+  with :math:`s=\mathrm{exposure}` as sample weights.
+
+
+.. topic:: References:
+
+    .. [1] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models, Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
+
+    .. [2] Jørgensen, B. (1992). The theory of exponential dispersion models and analysis of deviance. Monografias de matemática, no. 51.
+           See also `Exponential dispersion model. <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
+
+    .. [3] Gneiting, T. (2010). `Making and Evaluating Point Forecasts. <https://arxiv.org/pdf/0912.0902.pdf>`_
 
 Stochastic Gradient Descent - SGD
 =================================
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index b80842f817f4d..2db3c56d5e1c1 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -6,15 +6,15 @@
 # License: BSD 3 clause
 
 # TODO: Write more tests
-# TODO: Which name/symbol for coefficients and weights in docu?
-#       sklearn.linear_models uses w for coefficients.
-#       So far, coefficients=beta and weights=w (as standard literature)
-# TODO: Add l2-penalty (maybe more general w.P.w with P penalty matrix)
 # TODO: Add l1-penalty (elastic net)
+# TODO: deal with option self.copy_X
+# TODO: Should the option `normalize` be included (like other linear models)?
+#       So far, it is not included. User must pass a normalized X.
 # TODO: Add cross validation
-# TODO: Write docu and examples
+# TODO: Write examples and more docu
 # TODO: Make it as much consistent to other estimators in linear_model as
 #       possible
+# TODO: options P1 and P2 in fit() or in __init__()???
 
 # Design Decisions:
 # - Which name? GeneralizedLinearModel vs GeneralizedLinearRegressor.
@@ -22,10 +22,21 @@
 #   Linear Model does both depending on the chosen distribution, e.g. Normal =>
 #   regressor, Bernoulli/Binomial => classifier.
 #   Solution: GeneralizedLinearRegressor since this is the focus.
+# - Allow for finer control of penalty terms:
+#   L1: ||P1*w||_1 with P1*w a componentwise product, this allows to exclude
+#       factors from the L1 penalty.
+#   L2: w*P2*w with P2 a (demi-) positive definite matrix, e.g. P2 could be
+#   a 1st or 2nd order difference matrix (compare B-spline penalties and
+#   Tikhonov regularization).
 # - The link funtion (instance of class Link) is necessary for the evaluation
 #   of deviance, score, Fisher and Hessian matrix as functions of the
 #   coefficients, which is needed by optimizers.
 #   Solution: link as argument in those functions
+# - Which name/symbol for sample_weight in docu?
+#   sklearn.linear_models uses w for coefficients, standard literature on
+#   GLMs use beta for coefficients and w for (sample) weights.
+#   So far, coefficients=w and sample weights=s.
+
 
 from __future__ import division
 from abc import ABCMeta, abstractmethod, abstractproperty
@@ -34,6 +45,8 @@
 from scipy import linalg, optimize, sparse
 import warnings
 from .base import LinearRegression
+from .coordinate_descent import ElasticNet
+from .ridge import Ridge
 from ..base import BaseEstimator, RegressorMixin
 from ..exceptions import ConvergenceWarning
 from ..externals import six
@@ -164,7 +177,8 @@ class ExponentialDispersionModel(six.with_metaclass(ABCMeta)):
 
     References
     ----------
-    See https://en.wikipedia.org/wiki/Exponential_dispersion_model.
+
+    https://en.wikipedia.org/wiki/Exponential_dispersion_model.
     """
 
     @abstractproperty
@@ -192,7 +206,7 @@ def include_upper_bound(self):
         raise NotImplementedError()
 
     def in_y_range(self, x):
-        """Returns true if x is in the valid range of Y~EDM.
+        """Returns true if `x` is in the valid range of Y~EDM.
         """
         if self.include_lower_bound:
             if self.include_upper_bound:
@@ -211,33 +225,36 @@ def in_y_range(self, x):
 
     @abstractmethod
     def unit_variance(self, mu):
-        r"""The unit variance :math:`v(mu)` determines the variance as
-        a function of the mean mu by
-        :math:`\mathrm{Var}[Y_i] = \phi/w_i*v(\mu_i)`.
+        r"""The unit variance :math:`v(\mu)` determines the variance as
+        a function of the mean :math:`\mu` by
+        :math:`\mathrm{Var}[Y_i] = \phi/s_i*v(\mu_i)`.
         It can also be derived from the unit deviance :math:`d(y,\mu)` as
 
         .. math:: v(\mu) = \frac{2}{\frac{\partial^2 d(y,\mu)}{
             \partial\mu^2}}\big|_{y=\mu}
+
+        See also :func:`variance`.
         """
         raise NotImplementedError()
 
     @abstractmethod
     def unit_variance_derivative(self, mu):
-        r"""The derivative of the unit variance w.r.t. mu, :math:`v'(\mu)`.
+        r"""The derivative of the unit variance w.r.t. `mu`, :math:`v'(\mu)`.
         """
         raise NotImplementedError()
 
     def variance(self, mu, phi=1, weights=1):
-        r"""The variance of :math:`Y \sim \mathrm{EDM}(\mu,\phi)` is
-        :math:`\mathrm{Var}[Y_i]=\phi/w_i*v(\mu_i)`,
-        with unit variance v(mu).
+        r"""The variance of :math:`Y_i \sim \mathrm{EDM}(\mu_i,\phi/s_i)` is
+        :math:`\mathrm{Var}[Y_i]=\phi/s_i*v(\mu_i)`,
+        with unit variance :math:`v(\mu)` and weights :math:`s_i`.
         """
         return phi/weights * self.unit_variance(mu)
 
     def variance_derivative(self, mu, phi=1, weights=1):
-        r"""The derivative of the variance w.r.t. mu,
+        r"""The derivative of the variance w.r.t. `mu`,
         :math:`\frac{\partial}{\partial\mu}\mathrm{Var}[Y_i]
-        =phi/w_i*v'(\mu_i)`, with unit variance v(mu).
+        =phi/s_i*v'(\mu_i)`, with unit variance :math:`v(\mu)`
+        and weights :math:`s_i`.
         """
         return phi/weights * self.unit_variance_derivative(mu)
 
@@ -251,8 +268,8 @@ def unit_deviance(self, y, mu):
         raise NotImplementedError()
 
     def unit_deviance_derivative(self, y, mu):
-        r"""The derivative w.r.t. mu of the unit_deviance
-        :math:`\frac{d}{d\mu}d(y,\mu) = -2\frac{y-\mu}{v(\mu)}`
+        r"""The derivative w.r.t. `mu` of the unit deviance
+        :math:`\frac{\partial}{\partial\mu}d(y,\mu) = -2\frac{y-\mu}{v(\mu)}`
         with unit variance :math:`v(\mu)`.
 
         Returns
@@ -262,39 +279,39 @@ def unit_deviance_derivative(self, y, mu):
         return -2*(y-mu)/self.unit_variance(mu)
 
     def deviance(self, y, mu, weights=1):
-        r"""The deviance is given by :math:`D = \sum_i w_i \cdot d(y, \mu)
-        with weights :math:`w_i` and unit_deviance :math:`d(y,mu)`.
+        r"""The deviance is given by :math:`D = \sum_i s_i \cdot d(y, \mu)
+        with weights :math:`s_i` and unit deviance :math:`d(y,\mu)`.
         In terms of the likelihood it is :math:`D = -2\phi\cdot
-        \left(loglike(y,\mu,\frac{phi}{w})
-        - loglike(y,y,\frac{phi}{w})\right).`
+        \left(loglike(y,\mu,\frac{phi}{s})
+        - loglike(y,y,\frac{phi}{s})\right)`.
         """
         return np.sum(weights*self.unit_deviance(y, mu))
 
     def _deviance(self, coef, X, y, weights, link):
-        """The deviance as a function of the coefficients ``coef``
-        (:math:`beta`).
+        """The deviance as a function of the coefficients `coef`
+        (:math:`w`).
         """
         lin_pred = safe_sparse_dot(X, coef, dense_output=True)
         mu = link.inverse(lin_pred)
         return self.deviance(y, mu, weights)
 
     def deviance_derivative(self, y, mu, weights=1):
-        """The derivative w.r.t. mu of the deviance.`
+        """The derivative w.r.t. `mu` of the deviance.
         """
         return weights*self.unit_deviance_derivative(y, mu)
 
     def _score(self, coef, phi, X, y, weights, link):
-        r"""The score function :math:`s` is the derivative of the
-        log-likelihood w.r.t. the ``coef`` (:math:`\beta`).
+        r"""The score function is the derivative of the
+        log-likelihood w.r.t. `coef` (:math:`w`).
         It is given by
 
         .. math:
 
-            \mathbf{s}(\boldsymbol{\beta}) = \mathbf{X}^T \mathbf{D}
+            \mathbf{score}(\boldsymbol{w}) = \mathbf{X}^T \mathbf{D}
             \boldsymbol{\Sigma}^-1 (\mathbf{y} - \boldsymbol{\mu})\,,
 
         with :math:`\mathbf{D}=\mathrm{diag}(h'(\eta_1),\ldots)` and
-        :math:`\boldsymbol{\Sigma}=\mathrm{diag}(\mathbf{V}(y_1),\ldots)`.
+        :math:`\boldsymbol{\Sigma}=\mathrm{diag}(\mathbf{V}[y_1],\ldots)`.
         """
         n_samples = X.shape[0]
         lin_pred = safe_sparse_dot(X, coef, dense_output=True)
@@ -303,23 +320,27 @@ def _score(self, coef, phi, X, y, weights, link):
         d = link.inverse_derivative(lin_pred)
         d_sigma_inv = sparse.dia_matrix((sigma_inv*d, 0),
                                         shape=(n_samples, n_samples))
-        temp = safe_sparse_dot(d_sigma_inv, (y-mu), dense_output=False)
-        score = safe_sparse_dot(X.T, temp, dense_output=False)
+        temp = safe_sparse_dot(d_sigma_inv, (y-mu), dense_output=True)
+        score = safe_sparse_dot(X.T, temp, dense_output=True)
         return score
 
     def _fisher_matrix(self, coef, phi, X, y, weights, link):
-        r"""The Fisher information matrix, also known as expected
-        information matrix. It is given by
+        r"""The Fisher information matrix.
+        The Fisher information matrix, also known as expected information
+        matrix is given by
 
         .. math:
 
-            \mathbf{F}(\boldsymbol{\beta}) = \mathrm{E}\left[
-            -\frac{\partial^2 loglike}{\partial\boldsymbol{\beta}
-            \partial\boldsymbol{\beta}^T}\right]
+            \mathbf{F}(\boldsymbol{w}) =
+            \mathrm{E}\left[-\frac{\partial\mathbf{score}}{\partial
+            \boldsymbol{w}} \right]
+            = \mathrm{E}\left[
+            -\frac{\partial^2 loglike}{\partial\boldsymbol{w}
+            \partial\boldsymbol{w}^T}\right]
             = \mathbf{X}^T W \mathbf{X} \,,
 
         with :math:`\mathbf{W} = \mathbf{D}^2 \boldsymbol{\Sigma}^{-1}`,
-        see score function.
+        see func:`score_function`.
         """
         n_samples = X.shape[0]
         lin_pred = safe_sparse_dot(X, coef, dense_output=True)
@@ -333,14 +354,15 @@ def _fisher_matrix(self, coef, phi, X, y, weights, link):
         return fisher_matrix
 
     def _observed_information(self, coef, phi, X, y, weights, link):
-        r"""The observed information matrix, also known as the negative of
+        r"""The observed information matrix.
+        The observed information matrix, also known as the negative of
         the Hessian matrix of the log-likelihood. It is given by
 
         .. math:
 
-            \mathbf{H}(\boldsymbol{\beta}) =
-            -\frac{\partial^2 loglike}{\partial\boldsymbol{\beta}
-            \partial\boldsymbol{\beta}^T}
+            \mathbf{H}(\boldsymbol{w}) =
+            -\frac{\partial^2 loglike}{\partial\boldsymbol{w}
+            \partial\boldsymbol{w}^T}
             = \mathbf{X}^T \legt[
             - \mathbf{D}' \mathbf{R}
             + \mathbf{D}^2 \mathbf{V} \mathbf{R}
@@ -351,7 +373,7 @@ def _observed_information(self, coef, phi, X, y, weights, link):
         :math:`\mathbf{V} = \mathrm{diag}\left(\frac{v'(\mu_i)}{
         v(\mu_i)}
         \right)`,
-        see score function and Fisher matrix.
+        see :func:`score_` function and :func:`_fisher_matrix`.
         """
         n_samples = X.shape[0]
         lin_pred = safe_sparse_dot(X, coef, dense_output=True)
@@ -368,18 +390,18 @@ def _observed_information(self, coef, phi, X, y, weights, link):
         return observed_information
 
     def _deviance_derivative(self, coef, X, y, weights, link):
-        r"""The derivative w.r.t. ``coef`` (:math:`\beta`) of the deviance as a
-        function of the coefficients ``coef``.
+        r"""The derivative w.r.t. `coef` (:math:`w`) of the deviance as a
+        function of the coefficients `coef`.
         This is equivalent to :math:`-2\phi` times the score function
-        :math:`s` (derivative of the log-likelihood).
+        :func:`score_function` (derivative of the log-likelihood).
         """
         score = self._score(coef=coef, phi=1, X=X, y=y, weights=weights,
                             link=link)
         return -2*score
 
     def _deviance_hessian(self, coef, X, y, weights, link):
-        r"""The hessian matrix w.r.t. ``coef`` (:math:`\beta`) of the deviance
-        as a function of the coefficients ``coef``.
+        r"""The hessian matrix w.r.t. `coef` (:math:`w`) of the deviance
+        as a function of the coefficients `coef`.
         This is equivalent to :math:`+2\phi` times the observed information
         matrix.
         """
@@ -388,20 +410,21 @@ def _deviance_hessian(self, coef, X, y, weights, link):
         return 2*info_matrix
 
     def starting_mu(self, y, weights=1):
-        """Starting values for the mean mu_i in IRLS."""
-        return ((weights*y+np.mean(weights*y))
-                / (2.*np.sum(np.ones_like(y)*weights)))
+        """Starting values for the mean mu_i in (unpenalized) IRLS."""
+        return ((weights*y+np.mean(weights*y)) /
+                (2.*np.sum(np.ones_like(y)*weights)))
 
 
 class TweedieDistribution(ExponentialDispersionModel):
     r"""A class for the Tweedie distribution.
-    They have mu=E[X] and Var[X] \propto mu**power.
+    They have :math:`\mu=\mathrm{E}[Y]` and
+    :math:`\mathrm{Var}[Y] \propto \mu^power.
 
     Attributes
     ----------
     power : float
             The variance power of the unit_variance
-            :math:`v(mu) = mu^{power}`.
+            :math:`v(\mu) = \mu^{power}`.
     """
     def __init__(self, power=0):
         self.power = power
@@ -497,7 +520,7 @@ def unit_deviance(self, y, mu):
             return 2 * (np.power(np.maximum(y, 0), 2-p)/((1-p)*(2-p)) -
                         y*np.power(mu, 1-p)/(1-p) + np.power(mu, 2-p)/(2-p))
 
-    def likelihood(self, y, X, beta, phi, weights=1):
+    def likelihood(self, y, X, w, phi, weights=1):
         raise NotImplementedError('This function is not (yet) implemented.')
 
 
@@ -563,51 +586,135 @@ def unit_deviance(self, y, mu):
                 np.log((1+mu**2)/(1+y**2)))
 
 
+def _irls_step(X, W, P2, z):
+    """One step in iteratively reweighted least squares
+
+    Solve A w = b for w with
+    A = (X' W X + P2)
+    b = X' W z
+    z = eta + D^-1 (y-mu)
+
+    See also fit method of :class:`GeneralizedLinearRegressor`.
+
+    Parameters
+    ----------
+    X : numpy array or sparse matrix of shape (n_samples, n_features)
+        Training data (with intercept included if present)
+
+    W : numpy array of shape (n_samples, )
+
+    P2 : numpy array or sparse matrix of shape (n_features, n_features)
+        The l2-penalty matrix or vector (=diagonal matrix)
+
+    z  : numpy array of shape (n_samples, )
+        Working observations
+
+    Returns
+    -------
+    coef: array, shape = (X.shape[1])
+    """
+    # TODO: scipy.linalg.solve if faster, but ordinary least squares uses
+    #       scipy.linalg.lstsq. What is more appropriate?
+    n_samples, n_features = X.shape
+    if sparse.issparse(X):
+        W = sparse.dia_matrix((W, 0), shape=(n_samples, n_samples)).tocsr()
+        if P2.ndim == 1:
+            L2 = (sparse.dia_matrix((P2, 0), shape=(n_features, n_features))
+                  ).tocsr()
+        else:
+            L2 = sparse.csr_matrix(P2)
+        XtW = X.transpose() * W
+        A = XtW * X + L2
+        b = XtW * z
+        coef = sparse.linalg.spsolve(A, b)
+    else:
+        XtW = (X.T * W)
+        A = XtW.dot(X)
+        if P2.ndim == 1:
+            A[np.diag_indices_from(A)] += P2
+        else:
+            A += P2
+        b = XtW.dot(z)
+        coef = linalg.solve(A, b)
+    return coef
+
+
 class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
-    r"""
-    Class to fit a Generalized Linear Model (GLM) based on reproductive
-    Exponential Dispersion Models (EDM).
+    """Regression via a Generalized Linear Model (GLM) based on reproductive
+    Exponential Dispersion Models (EDM) with combined L1 and L2 priors as
+    regularizer.
+
+    Minimizes the objective function::
+
+            1/(2s) * deviance(y, h(X*w))
+            + alpha * l1_ratio * ||P1*w||_1
+            + 1/2 * alpha * (1 - l1_ratio) * w*P2*w
+
+    with inverse link function `h` and s=sum of `sample_weight` (which equals
+    n_samples for `sample_weight=None`).
+    For `P1`=`P2`=identity, the penalty is the elastic net::
 
-    #TODO: This belongs to User Guide
-    Assumptions:
+            alpha * l1_ratio * ||w||_1
+            + 1/2 * alpha * (1 - l1_ratio) * ||w||_2^2
 
-    - The target values y_i are realizations of random variables
-      :math:`Y_i \sim \mathrm{EDM}(\mu_i, \frac{\phi}{w_i})` with dispersion
-      parameter :math:`\phi` and weights :math:`w_i`.
-    - The expectation of :math:`Y_i` is :math:`\mu_i=\mathrm{E}[Y]=h(\eta_i)`
-      whith the linear predictor :math:`\eta=X*\beta`, inverse link function
-      :math:`h(\eta)`, design matrix :math:`X` and parameters :math:`\beta`
-      to be estimated.
+    If you are interested in controlling the L1 and L2 penalty
+    separately, keep in mind that this is equivalent to::
 
-    Note that the first assumption implies
-    :math:`\mathrm{Var}[Y_i]=\frac{\phi}{w_i} v(\mu_i)` with uni variance
-    function :math:`v(\mu)`.
+            a * L1 + b * L2
+
+    where::
+
+            alpha = a + b and l1_ratio = a / (a + b)
+
+    The parameter `l1_ratio` corresponds to alpha in the glmnet R package while
+    alpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio
+    = 1 is the lasso penalty.
+
+    Read more in the :ref:`User Guide <Generalized_linear_regression>`.
 
     The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments :math:`E[Y_i]=\mu_i=h(\eta_i)` and
-    :math:`Var[Y_i]=\frac{\phi}{w_i} v(\mu_i)`
+    the first two moments :math:`E[Y_i]=\\mu_i=h(\\eta_i)` and
+    :math:`Var[Y_i]=\\frac{\\phi}{w_i} v(\\mu_i)`.
+
+    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
+    (penalized) maximum likelihood which is equivalent to minimizing the
+    deviance.
 
-    The parameters :math:`\beta` are estimated by maximum likelihood which is
-    equivalent to minimizing the deviance.
+    TODO: For `alpha` > 0, the feature matrix `X` is assumed to be
+    standardized. Call
+    :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``.
 
     TODO: Estimation of the dispersion parameter phi.
 
-    TODO: Notes on weights and 'scaled' Poisson, e.g. fit y = x/w with
-    with x=counts and w=exposure (time, money, persons, ...) => y is a
-    ratio with weights w.
+    TODO: Notes on weights and 'scaled' distributions. For Poisson, this means
+    to fit y = z/w with z=counts and w=exposure (time, money, persons, ...)
+    => y is a ratio with weights w. Same for other distributions.
 
     Parameters
     ----------
+    alpha : float, optional (default=1)
+        Constant that multiplies the penalty terms und thus determines the
+        regularization strength.
+        See the notes for the exact mathematical meaning of this
+        parameter.``alpha = 0`` is equivalent to unpenalized GLMs. In this
+        case, the design matrix X must have full column rank
+        (no collinearities).
+
+    l1_ratio : float, optional (defaul=0)
+        The elastic net mixing parameter, with ``0 <= l1_ratio <= 1``. For
+        ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it
+        is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a
+        combination of L1 and L2.
+
     fit_intercept : boolean, optional (default=True)
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
-    family : {'normal', 'poisson', 'gamma', 'inverse.gaussian'} or an instance
-        of a subclass of ExponentialDispersionModel, optional
-        (default='normal')
+    family : {'normal', 'poisson', 'gamma', 'inverse.gaussian'} or an instance\
+            of class ExponentialDispersionModel, optional(default='normal')
         the distributional assumption of the GLM.
 
-    link : {'identity', 'log'} or an instance of a subclass of Link,
+    link : {'identity', 'log'} or an instance of class Link,
         optional (default='identity')
         the link function of the GLM, i.e. mapping from linear predictor
         (X*coef) to expectation (mu).
@@ -634,28 +741,41 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         where ``g_i`` is the i-th component of the gradient (derivative of
         the deviance).
 
-    start_params : {array shape (n_features, ), 'ols'}, optional (default=None)
-        sets the start values for coef_ in the fit.
-        If None, default values are taken.
-        If 'ols' the result of an ordinary least squares in the link space
-        (linear predictor) is taken.
-        If an array is given, these values are taken as coef_ to start with.
-        If fit_intercept is true, the first value is assumed to be the start
-        value for the intercept_.
+    warm_start : boolean, optional (default=False)
+        If set to ``True``, reuse the solution of the previous call to fit as
+        initialization for ``coef_`` and ``intercept_`` (supersedes option
+        ``start_params``). If set to ``True`` or if the attribute ``coef_``
+        does not exit (first call to fit), option ``start_params`` sets the
+        starting values for ``coef_`` and ``intercept_``.
+
+    start_params : None or array of shape (n_features, ) or 'least_squares'}, \
+            optional (default=None)
+        If an array of size n_features is supplied, use these as start values
+        for ``coef_`` in the fit. If ``fit_intercept=True``, the first element
+        is assumed to be the start value for the ``intercept_``.
+        If 'least_squares' is set, the result of a least squares fit in the
+        link space (linear predictor) is taken. If ``None``, the start values
+        are calculated by setting mu to family.starting_mu(..) and one step of
+        irls.
+        This option only applies if ``warm_start=False`` or if fit is called
+        the first time (``self.coef_`` does not exist).
+
+    copy_X : boolean, optional, default True
+        If ``True``, X will be copied; else, it may be overwritten.
 
     verbose : int, optional (default=0)
         For the lbfgs solver set verbose to any positive number for verbosity.
 
     Attributes
     ----------
-    coef_ : array, shape (1, n_features)
+    coef_ : array, shape (n_features, )
         Estimated coefficients for the linear predictor (X*coef_) in the GLM.
 
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.
 
     dispersion_ : float
-        The dispersion parameter :math:`\phi` if fit_dispersion is set.
+        The dispersion parameter :math:`\\phi` if fit_dispersion is set.
 
     n_iter_ : int
         Actual number of iterations of the solver.
@@ -667,10 +787,13 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     ----------
     TODO
     """
-
-    def __init__(self, fit_intercept=True, family='normal',
-                 link='identity', fit_dispersion='chisqr', solver='irls',
-                 max_iter=100, tol=1e-4, start_params=None, verbose=0):
+    def __init__(self, alpha=1.0, l1_ratio=0,
+                 fit_intercept=True, family='normal', link='identity',
+                 fit_dispersion='chisqr', solver='irls', max_iter=100,
+                 tol=1e-4, warm_start=False, start_params=None, copy_X=True,
+                 verbose=0):
+        self.alpha = alpha
+        self.l1_ratio = l1_ratio
         self.fit_intercept = fit_intercept
         self.family = family
         self.link = link
@@ -678,31 +801,86 @@ def __init__(self, fit_intercept=True, family='normal',
         self.solver = solver
         self.max_iter = max_iter
         self.tol = tol
+        self.warm_start = warm_start
         self.start_params = start_params
+        self.copy_X = copy_X
         self.verbose = verbose
 
-    def fit(self, X, y, sample_weight=None):
+    def fit(self, X, y, sample_weight=None, P1=None, P2=None,
+            check_input=True):
         """Fit a generalized linear model.
 
         Parameters
         ----------
-        X : numpy array or sparse matrix of shape [n_samples,n_features]
+        X : numpy array or sparse matrix of shape (n_samples, n_features)
             Training data
 
-        y : numpy array of shape [n_samples]
+        y : numpy array of shape (n_samples, )
             Target values
 
-        sample_weight : numpy array of shape [n_samples]
+        sample_weight : array of shape (n_samples, ) or None,\
+                optinal (default=None)
             Individual weights for each sample.
             Var[Y_i]=phi/weight_i * v(mu)
             If Y_i ~ EDM(mu, phi/w_i) then
             sum(w*Y)/sum(w) ~ EDM(mu, phi/sum(w)), i.e. the mean of y is a
             weighted average with weights=sample_weight.
 
+        P1 : None or array of shape (n_features*, ), optional\
+                (default=None)
+            With this array, you can exclude coefficients from ths L1 penalty.
+            Set the corresponding value to 1 (include) or 0 (exclude). The
+            default value ``None`` is the same as an array of ones.
+            Note that n_features* = X.shape[1] = length of coef_ (intercept
+            always excluded from counting).
+
+        P2 : None or array of shape (n_features*, n_features*)
+            With this square matrix the L2 penalty is calculated as `w P2 w`.
+            This gives a fine control over this penalty (Tikhonov
+            regularization).
+            Note that n_features* = X.shape[1] = length of coef_ (intercept
+            always excluded from counting).
+
+        check_input : boolean, optional (default=True)
+            Allow to bypass several input checking.
+            Don't use this parameter unless you know what you do.
+
         Returns
         -------
         self : returns an instance of self.
         """
+        #######################################################################
+        # 1. input validation                                                 #
+        #######################################################################
+        # 1.1 validate arguments of fit #######################################
+        _dtype = [np.float64, np.float32]
+        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
+                         dtype=_dtype, y_numeric=True, multi_output=False)
+        y = y.astype(np.float64)
+
+        if sample_weight is None:
+            weights = np.ones_like(y)
+        elif np.isscalar(sample_weight):
+            weights = sample_weight*np.ones_like(y)
+        else:
+            weights = np.atleast_1d(sample_weight)
+            if weights.ndim > 1:
+                raise ValueError("Sample weight must be 1D array or scalar")
+            elif weights.shape[0] != y.shape[0]:
+                raise ValueError("Sample weights must have the same length as"
+                                 " y")
+        # IMPORTANT NOTE: Since we want to minimize
+        # 1/(2*sum(sample_weight)) * deviance + L1 + L2,
+        # deviance = sum(sample_weight * unit_deviance),
+        # we rescale weights such that sum(weights) = 1 and this becomes
+        # 1/2*deviance + L1 + L2 with deviance=sum(weights * unit_deviance)
+        weights = weights/np.sum(weights)
+
+        if not isinstance(check_input, bool):
+            raise ValueError("The argument check_input must be bool; got "
+                             "(check_input={0})".format(check_input))
+
+        # 1.2 validate arguments of __init__ ##################################
         # Garantee that self._family_instance is an instance of class
         # ExponentialDispersionModel
         if isinstance(self.family, ExponentialDispersionModel):
@@ -720,7 +898,8 @@ def fit(self, X, y, sample_weight=None):
                 raise ValueError(
                     "The family must be an instance of class"
                     " ExponentialDispersionModel or an element of"
-                    " ['normal', 'poisson', 'gamma', 'inverse.gaussian'].")
+                    " ['normal', 'poisson', 'gamma', 'inverse.gaussian'];"
+                    " got (family={0})".format(self.family))
 
         # Garantee that self._link_instance is set to an instance of class Link
         if isinstance(self.link, Link):
@@ -733,132 +912,232 @@ def fit(self, X, y, sample_weight=None):
             else:
                 raise ValueError(
                     "The link must be an instance of class Link or"
-                    " an element of ['identity', 'log'].")
-
+                    " an element of ['identity', 'log']; got (link={0})"
+                    .format(self.link))
+
+        if not isinstance(self.alpha, numbers.Number) or self.alpha < 0:
+            raise ValueError("Penalty term must be non-negative;"
+                             " got (alpha={0})".format(self.alpha))
+        if (not isinstance(self.l1_ratio, numbers.Number) or
+                self.l1_ratio < 0 or self.l1_ratio > 1):
+            raise ValueError("l1_ratio must be in interval [0, 1]; got"
+                             " (l1_ratio={0]})".format(self.l1_ratio))
         if not isinstance(self.fit_intercept, bool):
-            raise ValueError("The argument fit_intercept must be bool,"
+            raise ValueError("The argument fit_intercept must be bool;"
                              " got {0}".format(self.fit_intercept))
         if self.solver not in ['irls', 'lbfgs', 'newton-cg']:
             raise ValueError("GLM Regression supports only irls, lbfgs and"
                              "newton-cg solvers, got {0}".format(self.solver))
+        if self.alpha > 0:
+            if (self.l1_ratio > 0 and
+                    self.solver not in []):
+                # TODO: Add solver for L1
+                # raise ValueError("The solver option (solver={0}) is not "
+                #                  "appropriate for the chosen penalty which"
+                #                  " includes L1 (alpha={1})."
+                #                  .format(self.solver, self.alpha))
+                raise NotImplementedError("Currently, no solver is implemented"
+                                          " that can deal with L1 penalties.")
         if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:
             raise ValueError("Maximum number of iteration must be positive;"
                              " got (max_iter={0!r})".format(self.max_iter))
         if not isinstance(self.tol, numbers.Number) or self.tol < 0:
             raise ValueError("Tolerance for stopping criteria must be "
                              "positive; got (tol={0!r})".format(self.tol))
+        if not isinstance(self.warm_start, bool):
+            raise ValueError("The argument warm_start must be bool;"
+                             " got {0}".format(self.warm_start))
         start_params = self.start_params
-        if start_params is not None and start_params is not 'ols':
+        if start_params is not None and start_params is not 'least_squares':
             start_params = np.atleast_1d(start_params)
-            if start_params.shape[0] != X.shape[1] + self.fit_intercept:
+            if ((start_params.shape[0] != X.shape[1] + self.fit_intercept) or
+                    (start_params.ndim != 1)):
                 raise ValueError("Start values for parameters must have the"
-                                 "right length; required length {0}, got {1}"
+                                 "right length and dimension; required (length"
+                                 "={0}, ndim=1), got (length={1}, ndim={2})."
                                  .format(X.shape[1] + self.fit_intercept,
-                                         start_params.shape[0]))
-
-        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
-                         y_numeric=True, multi_output=False)
-        y = y.astype(np.float64)
+                                         start_params.shape[0],
+                                         start_params.ndim))
+        if not isinstance(self.copy_X, bool):
+            raise ValueError("The argument copy_X must be bool;"
+                             " got {0}".format(self.copy_X))
+
+        if P1 is None:
+            P1 = np.ones(X.shape[1])
+        else:
+            P1 = np.atleast_1d(P1)
+            if (P1.shape[0] != X.shape[1]) or (P1.ndim != 1):
+                raise ValueError("P1 must be either None or an 1D array with "
+                                 "the length of X.shape[1]; "
+                                 "got (P1.shape[0]={0}), "
+                                 "needed (X.shape[1]={1})."
+                                 .format(P1.shape[0], X.shape[1]))
+        if P2 is None:
+            P2 = np.ones(X.shape[1])
+            if sparse.issparse(X):
+                P2 = (sparse.dia_matrix((np.ones(X.shape[1]), 0),
+                      shape=(X.shape[1], X.shape[1]))).tocsr()
+        else:
+            P2 = check_array(P2, accept_sparse=['csr', 'csc', 'coo'],
+                             dtype="numeric", ensure_2d=True)
+            if ((P2.shape[0] != P2.shape[1]) or
+                (P2.shape[0] != X.shape[1]) or
+                    (P2.ndim != 2)):
+                raise ValueError("P2 must be either None or an array of shape "
+                                 "(n_features, n_features) with "
+                                 "n_features=X.shape[1]; "
+                                 "got (P2.shape=({0},{1})), needed ({3},{3})"
+                                 .format(P2.shape[0], P2.shape[1], X.shape[1]))
 
         family = self._family_instance
         link = self._link_instance
 
-        if not np.all(family.in_y_range(y)):
-            raise ValueError("Some value(s) of y are out of the valid "
-                             "range for family {0}"
-                             .format(family.__class__.__name__))
-
-        if sample_weight is None:
-            weights = np.ones_like(y)
-        elif np.isscalar(sample_weight):
-            weights = sample_weight*np.ones_like(y)
-        else:
-            weights = np.atleast_1d(sample_weight)
-            if weights.ndim > 1:
-                raise ValueError("Sample weight must be 1D array or scalar")
-            elif weights.shape[0] != y.shape[0]:
-                raise ValueError("Sample weights must have the same length as"
-                                 " y")
-
         if self.fit_intercept:
             # intercept is first column <=> coef[0] is for intecept
             if sparse.issparse(X):
                 Xnew = sparse.hstack([np.ones([X.shape[0], 1]), X])
             else:
                 Xnew = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
+            P1 = np.concatenate((np.array([0]), P1))
+            if P2.ndim == 1:
+                P2 = np.concatenate((np.array([0]), P2))
+            elif sparse.issparse(P2):
+                P2 = sparse.block_diag((sparse.dia_matrix((1, 1)), P2),
+                                       dtype=P2.dtype).tocsr()
+            else:
+                P2 = np.block([[np.zeros((1, 1)), np.zeros((1, X.shape[1]))],
+                               [np.zeros((X.shape[1], 1)), P2]])
         else:
             Xnew = X
 
         n_samples, n_features = Xnew.shape
-
-        # Note: Since dispersion_ alias phi does not enter the estimation
-        #       of mu_i=E[y_i] set it to 1 where convenient.
+        l1 = self.alpha * self.l1_ratio
+        l2 = self.alpha * (1-self.l1_ratio)
+        P1 *= l1
+        P2 *= l2
+
+        # 1.3 additional validations ##########################################
+        if check_input:
+            if not np.all(family.in_y_range(y)):
+                raise ValueError("Some value(s) of y are out of the valid "
+                                 "range for family {0}"
+                                 .format(family.__class__.__name__))
+            # TODO: if alpha=0 check that Xnew is not rank deficient
+            # TODO: what else to check?
+
+        #######################################################################
+        # 2. initialization of coef = (intercept_, coef_)                     #
+        #######################################################################
+        # Note: Since phi=self.dispersion_ does not enter the estimation
+        #       of mu_i=E[y_i], set it to 1.
 
         # set start values for coef
         coef = None
-        if start_params is None:
-            # Use mu_start and apply one irls step to calculate coef
-            mu = family.starting_mu(y, weights)
-            # linear predictor
-            eta = link.link(mu)
-            # h'(eta)
-            hp = link.inverse_derivative(eta)
-            # working weights w, in principle a diagonal matrix
-            # therefore here just as 1d array
-            w = (hp**2 / family.variance(mu, phi=1, weights=weights))
-            wroot = np.sqrt(w)
-            # working observations
-            yw = eta + (y-mu)/hp
-            # least squares rescaled with wroot
-            wroot = sparse.dia_matrix((wroot, 0), shape=(n_samples, n_samples))
-            X_rescale = safe_sparse_dot(wroot, Xnew, dense_output=True)
-            yw_rescale = safe_sparse_dot(wroot, y, dense_output=True)
-            coef = linalg.lstsq(X_rescale, yw_rescale)[0]
-        elif start_params is 'ols':
-            reg = LinearRegression(copy_X=False, fit_intercept=False)
-            reg.fit(Xnew, link.link(y))
-            coef = reg.coef_
+        if self.warm_start and hasattr(self, "coef_"):
+            if self.fit_intercept:
+                coef = np.concatenate((self.intercept_, self.coef_))
+            else:
+                coef = self.coef_
+        elif self.start_params is None:
+            if self.l1_ratio == 0:
+                # See 3.1 IRLS
+                # Use mu_start and apply one irls step to calculate coef
+                mu = family.starting_mu(y, weights)
+                # linear predictor
+                eta = link.link(mu)
+                # h'(eta)
+                hp = link.inverse_derivative(eta)
+                # working weights W, in principle a diagonal matrix
+                # therefore here just as 1d array
+                W = (hp**2 / family.variance(mu, phi=1, weights=weights))
+                # working observations
+                z = eta + (y-mu)/hp
+                # solve A*coef = b
+                # A = X' W X + l2 P2, b = X' W z
+                coef = _irls_step(Xnew, W, P2, z)
+            else:
+                # with L1 penalty, start with coef = 0
+                coef = np.zeros(n_features)
+        elif self.start_params is 'least_squares':
+            if self.alpha == 0:
+                reg = LinearRegression(copy_X=True, fit_intercept=False)
+                reg.fit(Xnew, link.link(y))
+                coef = reg.coef_
+            elif self.l1_ratio <= 0.01:
+                # ElasticNet says l1_ratio <= 0.01 is not reliable, use Ridge
+                reg = Ridge(copy_X=True, fit_intercept=False,
+                            alpha=self.alpha)
+                reg.fit(Xnew, link.link(y))
+                coef = reg.coef_
+            else:
+                # TODO: Does this make sense?
+                reg = ElasticNet(copy_X=True, fit_intercept=False,
+                                 alpha=self.alpha, l1_ratio=self.l1_ratio)
+                reg.fit(Xnew, link.link(y))
+                coef = reg.coef_
         else:
             coef = start_params
 
+        #######################################################################
+        # 3. fit                                                              #
+        #######################################################################
         # algorithms for optimiation
         # TODO: Parallelize it
         self.n_iter_ = 0
         converged = False
+        # 3.1 IRLS ############################################################
+        # Solve Newton-Raphson (1): Obj'' (w - w_old) = -Obj'
+        #   Obj = objective function = 1/2 Dev + l2/2 w P2 w
+        #   Dev = deviance, s = normalized weights, variance V(mu) but phi=1
+        #   D   = link.inverse_derivative(eta) = diag_matrix(h'(X w))
+        #   D2  = link.inverse_derivative(eta)^2 = D^2
+        #   W   = D2/V(mu)
+        #   l2  = alpha * (1 - l1_ratio)
+        #   Obj' = d(Obj)/d(w) = 1/2 Dev' + P2 w
+        #        = -X' D (y-mu)/V(mu) + l2 P2 w
+        #   Obj''= d2(Obj)/d(w)d(w') = Hessian = -X'(...) X + l2 P2
+        #   Use Fisher matrix instead of full info matrix -X'(...) X,
+        #    i.e. E[Dev''] with E[y-mu]=0:
+        #   Obj'' ~ X' W X + l2 P2
+        # (1): w = (X' W X + l2 P2)^-1 X' W z, with z = eta + D^-1 (y-mu)
+        # Note: P2 = l2*P2, see above
         if self.solver == 'irls':
-            # linear predictor
+            # eta = linear predictor
             eta = safe_sparse_dot(Xnew, coef, dense_output=True)
             mu = link.inverse(eta)
+            # D = h'(eta)
+            hp = link.inverse_derivative(eta)
+            V = family.variance(mu, phi=1, weights=weights)
             while self.n_iter_ < self.max_iter:
                 self.n_iter_ += 1
                 # coef_old not used so far.
                 # coef_old = coef
-                # h'(eta)
-                hp = link.inverse_derivative(eta)
-                # working weights w, in principle a diagonal matrix
+                # working weights W, in principle a diagonal matrix
                 # therefore here just as 1d array
-                w = (hp**2 / family.variance(mu, phi=1, weights=weights))
-                wroot = np.sqrt(w)
+                W = (hp**2 / V)
                 # working observations
-                yw = eta + (y-mu)/hp
-                # least squares rescaled with wroot
-                wroot = sparse.dia_matrix((wroot, 0),
-                                          shape=(n_samples, n_samples))
-                X_rescale = safe_sparse_dot(wroot, Xnew, dense_output=True)
-                yw_rescale = safe_sparse_dot(wroot, yw, dense_output=True)
-                coef, residues, rank, singular_ = (
-                    linalg.lstsq(X_rescale, yw_rescale))
+                z = eta + (y-mu)/hp
+                # solve A*coef = b
+                # A = X' W X + l2 P2, b = X' W z
+                coef = _irls_step(Xnew, W, P2, z)
 
                 # updated linear predictor
                 # do it here for updated values for tolerance
                 eta = safe_sparse_dot(Xnew, coef, dense_output=True)
                 mu = link.inverse(eta)
+                hp = link.inverse_derivative(eta)
+                V = family.variance(mu, phi=1, weights=weights)
 
                 # which tolerace? |coef - coef_old| or gradient?
                 # use gradient for compliance with newton-cg and lbfgs
-                # TODO: faster computation of gradient, use mu and eta directly
-                gradient = family._deviance_derivative(
-                    coef=coef, X=Xnew, y=y, weights=weights, link=link)
+                # gradient = family._deviance_derivative(
+                #     coef=coef, X=Xnew, y=y, weights=weights, link=link)
+                # gradient = -X' D (y-mu)/V(mu) + l2 P2 w
+                gradient = -safe_sparse_dot(Xnew.T, hp*(y-mu)/V)
+                if P2.ndim == 1:
+                    gradient += P2*coef
+                else:
+                    gradient += safe_sparse_dot(P2, coef)
                 if (np.max(np.abs(gradient)) <= self.tol):
                     converged = True
                     break
@@ -868,50 +1147,73 @@ def fit(self, X, y, sample_weight=None):
                               "of iterations (currently {0})"
                               .format(self.max_iter), ConvergenceWarning)
 
+        # 3.2 L-BFGS and Newton-CG ############################################
         # TODO: performance: make one function return both deviance and
         #       gradient of deviance
-        elif self.solver == 'lbfgs':
-            func = family._deviance
-            fprime = family._deviance_derivative
-            args = (Xnew, y, weights, link)
-            coef, loss, info = optimize.fmin_l_bfgs_b(
-                func, coef, fprime=fprime,
-                args=args,
-                iprint=(self.verbose > 0) - 1, pgtol=self.tol,
-                maxiter=self.max_iter)
-            if self.verbose > 0:
-                if info["warnflag"] == 1:
-                    warnings.warn("lbfgs failed to converge."
-                                  " Increase the number of iterations.",
-                                  ConvergenceWarning)
-                elif info["warnflag"] == 2:
-                    warnings.warn("lbfgs failed for the reason: {0}".format(
-                        info["task"]))
-            self.n_iter_ = info['nit']
-        elif self.solver == 'newton-cg':
-            func = family._deviance
-            grad = family._deviance_derivative
+        elif self.solver in ['lbfgs', 'newton-cg']:
+            def func(coef, *args):
+                if P2.ndim == 1:
+                    L2 = safe_sparse_dot(coef.T, P2*coef)
+                else:
+                    L2 = safe_sparse_dot(coef.T, safe_sparse_dot(P2, coef))
+                    # A[np.diag_indices_from(A)] += P2
+                return 0.5*family._deviance(coef, *args) + 0.5*L2
+
+            def fprime(coef, *args):
+                if P2.ndim == 1:
+                    L2 = P2*coef
+                else:
+                    L2 = safe_sparse_dot(P2, coef)
+                return 0.5*family._deviance_derivative(coef, *args) + L2
 
             def grad_hess(coef, X, y, weights, link):
-                grad = (family._deviance_derivative(
-                    coef, X, y, weights, link))
-                hessian = (family._deviance_hessian(
-                    coef, X, y, weights, link))
+                if P2.ndim == 1:
+                    L2 = P2*coef
+                else:
+                    L2 = safe_sparse_dot(P2, coef)
+                grad = 0.5*family._deviance_derivative(
+                    coef, X, y, weights, link) + L2
+                hessian = 0.5*family._deviance_hessian(
+                    coef, X, y, weights, link)
+                if P2.ndim == 1:
+                    hessian[np.diag_indices_from(hessian)] += P2
+                else:
+                    hessian += P2
 
                 def Hs(s):
-                    ret = np.dot(hessian, s)
+                    ret = safe_sparse_dot(hessian, s)
                     return ret
                 return grad, Hs
-            hess = grad_hess
+
             args = (Xnew, y, weights, link)
-            coef, n_iter_i = newton_cg(hess, func, grad, coef, args=args,
-                                       maxiter=self.max_iter, tol=self.tol)
-            self.coef_ = coef
 
+            if self.solver == 'lbfgs':
+                coef, loss, info = optimize.fmin_l_bfgs_b(
+                    func, coef, fprime=fprime, args=args,
+                    iprint=(self.verbose > 0) - 1, pgtol=self.tol,
+                    maxiter=self.max_iter)
+                if self.verbose > 0:
+                    if info["warnflag"] == 1:
+                        warnings.warn("lbfgs failed to converge."
+                                      " Increase the number of iterations.",
+                                      ConvergenceWarning)
+                    elif info["warnflag"] == 2:
+                        warnings.warn("lbfgs failed for the reason: {0}"
+                                      .format(info["task"]))
+                self.n_iter_ = info['nit']
+            elif self.solver == 'newton-cg':
+                coef, n_iter_i = newton_cg(grad_hess, func, fprime, coef,
+                                           args=args, maxiter=self.max_iter,
+                                           tol=self.tol)
+
+        #######################################################################
+        # 4. postprocessing                                                   #
+        #######################################################################
         if self.fit_intercept:
             self.intercept_ = coef[0]
             self.coef_ = coef[1:]
         else:
+            # set intercept to zero as the other linear models do
             self.intercept_ = 0.
             self.coef_ = coef
 
@@ -988,8 +1290,8 @@ def score(self, X, y, sample_weight=None):
         :math:`D^2 = 1-\frac{D(y_{true},y_{pred})}{D_{null}}`, :math:`D_{null}`
         is the null deviance, i.e. the deviance of a model with intercept
         alone which corresponds to :math:`y_{pred} = \bar{y}`. The mean
-        :math:`\bar{y}` is average by sample_weight. In the case of a Normal
-        distribution, this D^2 equals R^2.
+        :math:`\bar{y}` is averaged by sample_weight. In the case of a Normal
+        distribution, D^2 equals R^2.
         Best possible score is 1.0 and it can be negative (because the
         model can be arbitrarily worse).
 
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index a4d4ea8650860..df0413b4d7836 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -1,19 +1,34 @@
 import numpy as np
 
 from sklearn.linear_model.glm import (
-    # Link, IdentityLink,
+    Link,
+    IdentityLink,
     LogLink,
     TweedieDistribution,
     NormalDistribution, PoissonDistribution,
     GammaDistribution, InverseGaussianDistribution,
-    # GeneralizedHyperbolicSecand,
+    GeneralizedHyperbolicSecand,
     GeneralizedLinearRegressor)
+from sklearn.linear_model.ridge import Ridge
 
 from sklearn.utils.testing import (
-    # assert_equal,
+    assert_equal, assert_almost_equal,
     assert_array_equal, assert_array_almost_equal)
 
 
+def test_link_properties():
+    """Test link inverse and derivative
+    """
+    rng = np.random.RandomState(0)
+    x = rng.rand(100)*100
+    from sklearn.linear_model.glm import Link
+    for link in vars()['Link'].__subclasses__():
+        link = link()
+        assert_almost_equal(link.link(link.inverse(x)), x, decimal=10)
+        assert_almost_equal(link.inverse_derivative(link.link(x)),
+                            1/link.derivative(x), decimal=10)
+
+
 def test_family_bounds():
     """Test the valid range of distributions
     """
@@ -42,8 +57,23 @@ def test_family_bounds():
     assert_array_equal(result, [False, False, True])
 
 
+def test_deviance_zero():
+    """Test deviance(y,y) = 0 for different families
+    """
+    for family in [NormalDistribution(), PoissonDistribution(),
+                   GammaDistribution(), InverseGaussianDistribution(),
+                   TweedieDistribution(power=-2.5),
+                   TweedieDistribution(power=-1),
+                   TweedieDistribution(power=1.5),
+                   TweedieDistribution(power=2.5),
+                   TweedieDistribution(power=4),
+                   GeneralizedHyperbolicSecand()]:
+        assert_almost_equal(family.deviance(0.1, 0.1), 0, decimal=10)
+        assert_almost_equal(family.deviance(1.5, 1.5), 0, decimal=10)
+
+
 def test_glm_identiy_regression():
-    """Test linear regression on a simple dataset
+    """Test GLM regression with identity link on a simple dataset
     """
     coef = [1, 2]
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
@@ -55,13 +85,13 @@ def test_glm_identiy_regression():
     for solver in ['irls', 'lbfgs', 'newton-cg']:
         for family in families:
             glm = GeneralizedLinearRegressor(
-                family=family, fit_intercept=False, solver=solver)
+                alpha=0, family=family, fit_intercept=False, solver=solver)
             res = glm.fit(X, y)
             assert_array_almost_equal(res.coef_, coef)
 
 
 def test_glm_log_regression():
-    """Test linear regression on a simple dataset
+    """Test GLM regression with log link on a simple dataset
     """
     coef = [1, 2]
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
@@ -73,10 +103,79 @@ def test_glm_log_regression():
     for solver in ['irls', 'lbfgs', 'newton-cg']:
         for family in families:
             glm = GeneralizedLinearRegressor(
-                family=family, link=LogLink(), fit_intercept=False,
-                solver=solver, start_params='ols')
+                alpha=0, family=family, link=LogLink(), fit_intercept=False,
+                solver=solver, start_params='least_squares')
             res = glm.fit(X, y)
             assert_array_almost_equal(res.coef_, coef)
 
 
+def test_normal_ridge():
+    """Test ridge regression for Normal distributions
+
+    Compare to test_ridge in test_ridge.py.
+    """
+    rng = np.random.RandomState(0)
+    alpha = 1.0
+
+    # With more samples than features
+    n_samples, n_features, n_predict = 6, 5, 10
+    y = rng.randn(n_samples)
+    X = rng.randn(n_samples, n_features)
+    T = rng.randn(n_predict, n_features)
+
+    # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
+    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True)
+    ridge.fit(X, y)
+    for solver in ['irls', 'lbfgs', 'newton-cg']:
+        glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0,
+                                         family='normal', link='identity',
+                                         fit_intercept=True, solver=solver)
+        glm.fit(X, y)
+        assert_equal(glm.coef_.shape, (X.shape[1], ))
+        assert_array_almost_equal(glm.coef_, ridge.coef_)
+        assert_almost_equal(glm.intercept_, ridge.intercept_)
+        assert_array_almost_equal(glm.predict(T), ridge.predict(T))
+
+    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, normalize=False)
+    ridge.fit(X, y)
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0,
+                                     family='normal', link='identity',
+                                     fit_intercept=False, solver='irls')
+    glm.fit(X, y)
+    assert_equal(glm.coef_.shape, (X.shape[1], ))
+    assert_array_almost_equal(glm.coef_, ridge.coef_)
+    assert_almost_equal(glm.intercept_, ridge.intercept_)
+    assert_array_almost_equal(glm.predict(T), ridge.predict(T))
+
+    # With more features than samples
+    n_samples, n_features, n_predict = 5, 10, 10
+    y = rng.randn(n_samples)
+    X = rng.randn(n_samples, n_features)
+    T = rng.randn(n_predict, n_features)
+
+    # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
+    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True)
+    ridge.fit(X, y)
+    for solver in ['irls', 'lbfgs', 'newton-cg']:
+        glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0,
+                                         family='normal', link='identity',
+                                         fit_intercept=True, solver=solver)
+        glm.fit(X, y)
+        assert_equal(glm.coef_.shape, (X.shape[1], ))
+        assert_array_almost_equal(glm.coef_, ridge.coef_)
+        assert_almost_equal(glm.intercept_, ridge.intercept_)
+        assert_array_almost_equal(glm.predict(T), ridge.predict(T))
+
+    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, normalize=False)
+    ridge.fit(X, y)
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0,
+                                     family='normal', link='identity',
+                                     fit_intercept=False, solver='irls')
+    glm.fit(X, y)
+    assert_equal(glm.coef_.shape, (X.shape[1], ))
+    assert_array_almost_equal(glm.coef_, ridge.coef_)
+    assert_almost_equal(glm.intercept_, ridge.intercept_)
+    assert_array_almost_equal(glm.predict(T), ridge.predict(T))
+
+
 # TODO: Test compatibility with R's glm, glmnet

From 5b46c23977a8e386987a2767b2c12d4296d332af Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Tue, 19 Sep 2017 00:40:34 +0200
Subject: [PATCH 007/269] [WIP] Add Generalized Linear Models (#9405)

* fix some bugs in user guide linear_model.rst
* fix some pep8 issues in test_glm.py
---
 doc/modules/linear_model.rst           | 28 +++++++++++++++-----------
 sklearn/linear_model/tests/test_glm.py |  7 ++++---
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 51b3821fa6207..98736facd9b76 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -880,14 +880,14 @@ Generalized linear regression
 =============================
 
 :class:`GeneralizedLinearRegressor` generalizes the :ref:`elastic_net` in two
-ways [1]_. First, the predicted values :math:`\hat{y}` are linked to a linear
+ways [8]_. First, the predicted values :math:`\hat{y}` are linked to a linear
 combination of the input variables :math:`X` via an inverse link function
 :math:`h` as
 
 .. math::    \hat{y}(w, x) = h(xw) = h(w_0 + w_1 x_1 + ... + w_p x_p).
 
 Secondly, the squared loss function is replaced by the deviance :math:`D` of an
-exponential dispersion model (EDM) [2]_. The objective function beeing minimized
+exponential dispersion model (EDM) [9]_. The objective function beeing minimized
 becomes
 
 .. math::    \frac{1}{2s}D(y, \hat{y}) + \alpha \rho ||P_1w||_1
@@ -914,16 +914,20 @@ it is convenient to apply a link function different from the identity link
 :math:`h(Xw)=\exp(Xw)`.
 
 Note that the feature matrix `X` should be standardized before fitting. This
-ensures that the penalty treats features equally.
+ensures that the penalty treats features equally. The estimator can be used as
+follows::
 
-    >>> from sklearn import linear_model
-    >>> reg = linear_model.GeneralizedLinearRegressor(alpha=0.5, l1_ratio=0)
-    >>> reg = linear_model.GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
+    >>> from sklearn.linear_model import GeneralizedLinearRegressor
+    >>> reg = GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
     >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2])
+    GeneralizedLinearRegressor(alpha=0.5, copy_X=True, family='poisson',
+                  fit_dispersion='chisqr', fit_intercept=True, l1_ratio=0,
+                  link='log', max_iter=100, solver='irls', start_params=None,
+                  tol=0.0001, verbose=0, warm_start=False)
     >>> reg.coef_
     array([ 0.24630255,  0.43373521])
-    >>> reg.intercept_
-    -0.76383575123143277
+    >>> reg.intercept_ #doctest: +ELLIPSIS
+    -0.76383575...
 
 Mathematical formulation
 ------------------------
@@ -969,7 +973,7 @@ Two remarks:
 
 * The deviances for at least Normal, Poisson and Gamma distributions are
   strictly consistent scoring functions for the mean :math:`\mu`, see Eq.
-  (19)-(20) in [3]_.
+  (19)-(20) in [10]_.
 
 * If you want to model a frequency, i.e. counts per exposure (time, volume, ...)
   you can do so by a Poisson distribution and passing
@@ -979,12 +983,12 @@ Two remarks:
 
 .. topic:: References:
 
-    .. [1] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models, Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
+    .. [8] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models, Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
 
-    .. [2] Jørgensen, B. (1992). The theory of exponential dispersion models and analysis of deviance. Monografias de matemática, no. 51.
+    .. [9] Jørgensen, B. (1992). The theory of exponential dispersion models and analysis of deviance. Monografias de matemática, no. 51.
            See also `Exponential dispersion model. <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
 
-    .. [3] Gneiting, T. (2010). `Making and Evaluating Point Forecasts. <https://arxiv.org/pdf/0912.0902.pdf>`_
+    .. [10] Gneiting, T. (2010). `Making and Evaluating Point Forecasts. <https://arxiv.org/pdf/0912.0902.pdf>`_
 
 Stochastic Gradient Descent - SGD
 =================================
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index df0413b4d7836..b62b51b5bcb9e 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -2,7 +2,7 @@
 
 from sklearn.linear_model.glm import (
     Link,
-    IdentityLink,
+    # IdentityLink,
     LogLink,
     TweedieDistribution,
     NormalDistribution, PoissonDistribution,
@@ -21,8 +21,9 @@ def test_link_properties():
     """
     rng = np.random.RandomState(0)
     x = rng.rand(100)*100
-    from sklearn.linear_model.glm import Link
-    for link in vars()['Link'].__subclasses__():
+    # from sklearn.linear_model.glm import Link
+    # for link in vars()['Link'].__subclasses__():
+    for link in Link.__subclasses__():
         link = link()
         assert_almost_equal(link.link(link.inverse(x)), x, decimal=10)
         assert_almost_equal(link.inverse_derivative(link.link(x)),

From 10dd14603a5fc04f53ca4920621434aaff662064 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 3 Dec 2017 19:54:57 +0100
Subject: [PATCH 008/269] [WIP] Add Generalized Linear Models (#9405)

* added test: ridge poisson with log-link compared to glmnet
* fix ValueError message for l1_ratio
* fix ValueError message for P2
* string comparison: use '==' and '!=' instead of 'is' and 'is not'
* fix RuntimeWarnings in unit_deviance of poisson: x*log(x) as xlogy
* added test for fisher matrix
* added test for family argument
---
 sklearn/linear_model/glm.py            | 29 ++++++-----
 sklearn/linear_model/tests/test_glm.py | 72 ++++++++++++++++++++++++--
 2 files changed, 84 insertions(+), 17 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 2db3c56d5e1c1..93ce358a8a874 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -15,6 +15,8 @@
 # TODO: Make it as much consistent to other estimators in linear_model as
 #       possible
 # TODO: options P1 and P2 in fit() or in __init__()???
+# TODO: Include further classes in class.rst? ExponentialDispersionModel?
+#       TweedieDistribution?
 
 # Design Decisions:
 # - Which name? GeneralizedLinearModel vs GeneralizedLinearRegressor.
@@ -42,7 +44,7 @@
 from abc import ABCMeta, abstractmethod, abstractproperty
 import numbers
 import numpy as np
-from scipy import linalg, optimize, sparse
+from scipy import linalg, optimize, sparse, special
 import warnings
 from .base import LinearRegression
 from .coordinate_descent import ElasticNet
@@ -340,7 +342,7 @@ def _fisher_matrix(self, coef, phi, X, y, weights, link):
             = \mathbf{X}^T W \mathbf{X} \,,
 
         with :math:`\mathbf{W} = \mathbf{D}^2 \boldsymbol{\Sigma}^{-1}`,
-        see func:`score_function`.
+        see func:`_score`.
         """
         n_samples = X.shape[0]
         lin_pred = safe_sparse_dot(X, coef, dense_output=True)
@@ -363,7 +365,7 @@ def _observed_information(self, coef, phi, X, y, weights, link):
             \mathbf{H}(\boldsymbol{w}) =
             -\frac{\partial^2 loglike}{\partial\boldsymbol{w}
             \partial\boldsymbol{w}^T}
-            = \mathbf{X}^T \legt[
+            = \mathbf{X}^T \left[
             - \mathbf{D}' \mathbf{R}
             + \mathbf{D}^2 \mathbf{V} \mathbf{R}
             + \mathbf{D}^2
@@ -393,7 +395,7 @@ def _deviance_derivative(self, coef, X, y, weights, link):
         r"""The derivative w.r.t. `coef` (:math:`w`) of the deviance as a
         function of the coefficients `coef`.
         This is equivalent to :math:`-2\phi` times the score function
-        :func:`score_function` (derivative of the log-likelihood).
+        :func:`_score` (derivative of the log-likelihood).
         """
         score = self._score(coef=coef, phi=1, X=X, y=y, weights=weights,
                             link=link)
@@ -510,7 +512,8 @@ def unit_deviance(self, y, mu):
             return (y-mu)**2
         if p == 1:
             # PoissonDistribution
-            return 2 * (np.where(y == 0, 0, y*np.log(y/mu))-y+mu)
+            # 2 * (y*log(y/mu) - y + mu), with y*log(y/mu)=0 if y=0
+            return 2 * (special.xlogy(y, y/mu) - y + mu)
         elif p == 2:
             # GammaDistribution
             return 2 * (np.log(mu/y)+y/mu-1)
@@ -921,7 +924,7 @@ def fit(self, X, y, sample_weight=None, P1=None, P2=None,
         if (not isinstance(self.l1_ratio, numbers.Number) or
                 self.l1_ratio < 0 or self.l1_ratio > 1):
             raise ValueError("l1_ratio must be in interval [0, 1]; got"
-                             " (l1_ratio={0]})".format(self.l1_ratio))
+                             " (l1_ratio={0})".format(self.l1_ratio))
         if not isinstance(self.fit_intercept, bool):
             raise ValueError("The argument fit_intercept must be bool;"
                              " got {0}".format(self.fit_intercept))
@@ -948,7 +951,7 @@ def fit(self, X, y, sample_weight=None, P1=None, P2=None,
             raise ValueError("The argument warm_start must be bool;"
                              " got {0}".format(self.warm_start))
         start_params = self.start_params
-        if start_params is not None and start_params is not 'least_squares':
+        if start_params is not None and start_params != 'least_squares':
             start_params = np.atleast_1d(start_params)
             if ((start_params.shape[0] != X.shape[1] + self.fit_intercept) or
                     (start_params.ndim != 1)):
@@ -986,7 +989,7 @@ def fit(self, X, y, sample_weight=None, P1=None, P2=None,
                 raise ValueError("P2 must be either None or an array of shape "
                                  "(n_features, n_features) with "
                                  "n_features=X.shape[1]; "
-                                 "got (P2.shape=({0},{1})), needed ({3},{3})"
+                                 "got (P2.shape=({0}, {1})), needed ({2}, {2})"
                                  .format(P2.shape[0], P2.shape[1], X.shape[1]))
 
         family = self._family_instance
@@ -1058,7 +1061,7 @@ def fit(self, X, y, sample_weight=None, P1=None, P2=None,
             else:
                 # with L1 penalty, start with coef = 0
                 coef = np.zeros(n_features)
-        elif self.start_params is 'least_squares':
+        elif self.start_params == 'least_squares':
             if self.alpha == 0:
                 reg = LinearRegression(copy_X=True, fit_intercept=False)
                 reg.fit(Xnew, link.link(y))
@@ -1277,11 +1280,9 @@ def estimate_phi(self, y, X, sample_weight):
             dev = self._family_instance.deviance(y, mu, sample_weight)
             return dev/(n_samples - n_features)
 
-# TODO: Fix "AssertionError: -0.28014056555724598 not greater than 0.5"
-#       in check_estimator for score
-#       from sklearn.utils.estimator_checks import check_estimator
-#       from sklearn.linear_model import GeneralizedLinearRegressor
-#       check_estimator(GeneralizedLinearRegressor)
+    # Note: check_estimator(GeneralizedLinearRegressor) might raise
+    # "AssertionError: -0.28014056555724598 not greater than 0.5"
+    # unless GeneralizedLinearRegressor has a score which passes the test.
     def score(self, X, y, sample_weight=None):
         r"""Returns D^2, a generalization of the coefficient of determination
         R^2, which uses deviance instead of squared error.
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index b62b51b5bcb9e..de7de90db967b 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -1,4 +1,6 @@
 import numpy as np
+from numpy.testing import assert_allclose
+import scipy as sp
 
 from sklearn.linear_model.glm import (
     Link,
@@ -73,6 +75,46 @@ def test_deviance_zero():
         assert_almost_equal(family.deviance(1.5, 1.5), 0, decimal=10)
 
 
+def test_fisher_matrix():
+    """Test the Fisher matrix numerically.
+    Trick: Use numerical differentiation with y = mu"""
+    for family in [NormalDistribution(), PoissonDistribution(),
+                   GammaDistribution(), InverseGaussianDistribution()]:
+        link = LogLink()
+        rng = np.random.RandomState(0)
+        coef = np.array([-2, 1, 0, 1, 2.5])
+        phi = 0.5
+        X = rng.randn(10, 5)
+        lin_pred = np.dot(X, coef)
+        mu = link.inverse(lin_pred)
+        weights = rng.randn(10)**2 + 1
+        fisher = family._fisher_matrix(coef=coef, phi=phi, X=X, y=mu,
+                                       weights=weights, link=link)
+        approx = np.array([]).reshape(0, coef.shape[0])
+        for i in range(coef.shape[0]):
+            def f(coef):
+                return -family._score(coef=coef, phi=phi, X=X, y=mu,
+                                      weights=weights, link=link)[i]
+            approx = np.vstack(
+                [approx, sp.optimize.approx_fprime(xk=coef, f=f, epsilon=1e-5)]
+                )
+        assert_allclose(fisher, approx, rtol=1e-3)
+
+
+def test_glm_family_argument():
+    """Test GLM family argument set as string
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for (f, fam) in [('normal', NormalDistribution()),
+                     ('poisson', PoissonDistribution()),
+                     ('gamma', GammaDistribution()),
+                     ('inverse.gaussian', InverseGaussianDistribution())]:
+        glm = GeneralizedLinearRegressor(family=f, fit_intercept=False,
+                                         alpha=0).fit(X, y)
+        assert_equal(type(glm._family_instance), type(fam))
+
+
 def test_glm_identiy_regression():
     """Test GLM regression with identity link on a simple dataset
     """
@@ -82,7 +124,8 @@ def test_glm_identiy_regression():
     families = (
         NormalDistribution(), PoissonDistribution(),
         GammaDistribution(), InverseGaussianDistribution(),
-        TweedieDistribution(power=1.5), TweedieDistribution(power=4.5))
+        TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
+        GeneralizedHyperbolicSecand())
     for solver in ['irls', 'lbfgs', 'newton-cg']:
         for family in families:
             glm = GeneralizedLinearRegressor(
@@ -100,7 +143,8 @@ def test_glm_log_regression():
     families = (
         NormalDistribution(), PoissonDistribution(),
         GammaDistribution(), InverseGaussianDistribution(),
-        TweedieDistribution(power=1.5), TweedieDistribution(power=4.5))
+        TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
+        GeneralizedHyperbolicSecand())
     for solver in ['irls', 'lbfgs', 'newton-cg']:
         for family in families:
             glm = GeneralizedLinearRegressor(
@@ -179,4 +223,26 @@ def test_normal_ridge():
     assert_array_almost_equal(glm.predict(T), ridge.predict(T))
 
 
-# TODO: Test compatibility with R's glm, glmnet
+def test_poisson_ridge():
+    """Test ridge regression with poisson family and LogLink
+
+    Compare to R's glmnet"""
+    # library("glmnet")
+    # options(digits=10)
+    # df <- data.frame(a=c(-2,-1,1,2), b=c(0,0,1,1), y=c(0,1,1,2))
+    # x <- data.matrix(df[,c("a", "b")])
+    # y <- df$y
+    # fit <- glmnet(x=x, y=y, alpha=0, intercept=T, family="poisson",
+    #               standardize=F, thresh=1e-10, nlambda=10000)
+    # coef(fit, s=1)
+    # (Intercept) -0.12889386979
+    # a            0.29019207995
+    # b            0.03741173122
+    X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
+    y = np.array([0, 1, 1, 2])
+    glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0, family='poisson',
+                                     link='log', tol=1e-10)
+    glm.fit(X, y)
+    assert_almost_equal(glm.intercept_, -0.12889386979, decimal=7)
+    assert_array_almost_equal(glm.coef_, [0.29019207995, 0.03741173122],
+                              decimal=7)

From 72485b63e89879e65381bca12152b54600fd3970 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Mon, 8 Jan 2018 22:13:45 +0100
Subject: [PATCH 009/269] [WIP] Add Generalized Linear Models (#9405)

* put arguments P1, P2 and check_input from fit to __init__
* added check_input test: is P2 positive definite?
* added solver option: 'auto'
---
 sklearn/linear_model/glm.py | 181 +++++++++++++++++++++++-------------
 1 file changed, 117 insertions(+), 64 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 93ce358a8a874..3f6b91026ef9b 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -14,7 +14,7 @@
 # TODO: Write examples and more docu
 # TODO: Make it as much consistent to other estimators in linear_model as
 #       possible
-# TODO: options P1 and P2 in fit() or in __init__()???
+# TODO: which dtype to force for y and X? Which for P1, P2?
 # TODO: Include further classes in class.rst? ExponentialDispersionModel?
 #       TweedieDistribution?
 
@@ -709,6 +709,21 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a
         combination of L1 and L2.
 
+    P1 : None or array of shape (n_features*, ), optional\
+            (default=None)
+        With this array, you can exclude coefficients from the L1 penalty.
+        Set the corresponding value to 1 (include) or 0 (exclude). The
+        default value ``None`` is the same as an array of ones.
+        Note that n_features* = X.shape[1] = length of coef_ (intercept
+        always excluded from counting).
+
+    P2 : None or array of shape (n_features*, n_features*)
+        With this square matrix the L2 penalty is calculated as `w P2 w`.
+        This gives a fine control over this penalty (Tikhonov
+        regularization).
+        Note that n_features* = X.shape[1] = length of coef_ (intercept
+        always excluded from counting). P2 must be positive semi-definite.
+
     fit_intercept : boolean, optional (default=True)
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
@@ -727,13 +742,16 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         the chi squared statisic or the deviance statistic. If None, the
         dispersion is not estimated.
 
-    solver : {'irls', 'newton-cg', 'lbfgs'}, optional (defaul='irls')
+    solver : {'auto', 'irls', 'newton-cg', 'lbfgs'}, optional (defaul='auto')
         Algorithm to use in the optimization problem.
 
-        - 'irls' is iterated reweighted least squares. It is the standard
-            algorithm for GLMs.
+        - 'irls' is iterated reweighted least squares (Fisher scoring).
+            It is the standard algorithm for GLMs. Cannot deal with
+            L1 penalties.
+
+        - 'newton-cg', 'lbfgs'. Cannot deal with L1 penalties.
 
-        - 'newton-cg', 'lbfgs'
+        - 'auto' sets 'irls'.
 
     max_iter : int, optional (default=100)
         TODO
@@ -766,6 +784,11 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     copy_X : boolean, optional, default True
         If ``True``, X will be copied; else, it may be overwritten.
 
+    check_input : boolean, optional (default=True)
+        Allow to bypass several checks on input: y values in range of family,
+        sample_weights non-negative, P2 positive semi-definite.
+        Don't use this parameter unless you know what you do.
+
     verbose : int, optional (default=0)
         For the lbfgs solver set verbose to any positive number for verbosity.
 
@@ -790,13 +813,15 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     ----------
     TODO
     """
-    def __init__(self, alpha=1.0, l1_ratio=0,
+    def __init__(self, alpha=1.0, l1_ratio=0, P1=None, P2=None,
                  fit_intercept=True, family='normal', link='identity',
-                 fit_dispersion='chisqr', solver='irls', max_iter=100,
+                 fit_dispersion='chisqr', solver='auto', max_iter=100,
                  tol=1e-4, warm_start=False, start_params=None, copy_X=True,
-                 verbose=0):
+                 check_input=True, verbose=0):
         self.alpha = alpha
         self.l1_ratio = l1_ratio
+        self.P1 = P1
+        self.P2 = P2
         self.fit_intercept = fit_intercept
         self.family = family
         self.link = link
@@ -807,10 +832,10 @@ def __init__(self, alpha=1.0, l1_ratio=0,
         self.warm_start = warm_start
         self.start_params = start_params
         self.copy_X = copy_X
+        self.check_input = check_input
         self.verbose = verbose
 
-    def fit(self, X, y, sample_weight=None, P1=None, P2=None,
-            check_input=True):
+    def fit(self, X, y, sample_weight=None):
         """Fit a generalized linear model.
 
         Parameters
@@ -823,31 +848,13 @@ def fit(self, X, y, sample_weight=None, P1=None, P2=None,
 
         sample_weight : array of shape (n_samples, ) or None,\
                 optinal (default=None)
-            Individual weights for each sample.
-            Var[Y_i]=phi/weight_i * v(mu)
-            If Y_i ~ EDM(mu, phi/w_i) then
+            Individual weights w_i for each sample. Note that for an
+            Exponential Dispersion Model (EDM), one has
+            Var[Y_i]=phi/w_i * v(mu).
+            If Y_i ~ EDM(mu, phi/w_i), then
             sum(w*Y)/sum(w) ~ EDM(mu, phi/sum(w)), i.e. the mean of y is a
             weighted average with weights=sample_weight.
 
-        P1 : None or array of shape (n_features*, ), optional\
-                (default=None)
-            With this array, you can exclude coefficients from ths L1 penalty.
-            Set the corresponding value to 1 (include) or 0 (exclude). The
-            default value ``None`` is the same as an array of ones.
-            Note that n_features* = X.shape[1] = length of coef_ (intercept
-            always excluded from counting).
-
-        P2 : None or array of shape (n_features*, n_features*)
-            With this square matrix the L2 penalty is calculated as `w P2 w`.
-            This gives a fine control over this penalty (Tikhonov
-            regularization).
-            Note that n_features* = X.shape[1] = length of coef_ (intercept
-            always excluded from counting).
-
-        check_input : boolean, optional (default=True)
-            Allow to bypass several input checking.
-            Don't use this parameter unless you know what you do.
-
         Returns
         -------
         self : returns an instance of self.
@@ -872,16 +879,6 @@ def fit(self, X, y, sample_weight=None, P1=None, P2=None,
             elif weights.shape[0] != y.shape[0]:
                 raise ValueError("Sample weights must have the same length as"
                                  " y")
-        # IMPORTANT NOTE: Since we want to minimize
-        # 1/(2*sum(sample_weight)) * deviance + L1 + L2,
-        # deviance = sum(sample_weight * unit_deviance),
-        # we rescale weights such that sum(weights) = 1 and this becomes
-        # 1/2*deviance + L1 + L2 with deviance=sum(weights * unit_deviance)
-        weights = weights/np.sum(weights)
-
-        if not isinstance(check_input, bool):
-            raise ValueError("The argument check_input must be bool; got "
-                             "(check_input={0})".format(check_input))
 
         # 1.2 validate arguments of __init__ ##################################
         # Garantee that self._family_instance is an instance of class
@@ -928,17 +925,22 @@ def fit(self, X, y, sample_weight=None, P1=None, P2=None,
         if not isinstance(self.fit_intercept, bool):
             raise ValueError("The argument fit_intercept must be bool;"
                              " got {0}".format(self.fit_intercept))
-        if self.solver not in ['irls', 'lbfgs', 'newton-cg']:
-            raise ValueError("GLM Regression supports only irls, lbfgs and"
-                             "newton-cg solvers, got {0}".format(self.solver))
+        if self.solver == 'auto':
+            solver = 'irls'
+        else:
+            solver = self.solver
+        if solver not in ['irls', 'lbfgs', 'newton-cg']:
+            raise ValueError("GeneralizedLinearRegressor supports only irls, "
+                             "lbfgs and newton-cg solvers, got {0}"
+                             "".format(solver))
         if self.alpha > 0:
             if (self.l1_ratio > 0 and
-                    self.solver not in []):
+                    solver not in []):
                 # TODO: Add solver for L1
                 # raise ValueError("The solver option (solver={0}) is not "
                 #                  "appropriate for the chosen penalty which"
                 #                  " includes L1 (alpha={1})."
-                #                  .format(self.solver, self.alpha))
+                #                  .format(solver, self.alpha))
                 raise NotImplementedError("Currently, no solver is implemented"
                                           " that can deal with L1 penalties.")
         if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:
@@ -964,28 +966,32 @@ def fit(self, X, y, sample_weight=None, P1=None, P2=None,
         if not isinstance(self.copy_X, bool):
             raise ValueError("The argument copy_X must be bool;"
                              " got {0}".format(self.copy_X))
+        if not isinstance(self.check_input, bool):
+            raise ValueError("The attribute check_input must be bool; got "
+                             "(check_input={0})".format(self.check_input))
 
-        if P1 is None:
+        if self.P1 is None:
             P1 = np.ones(X.shape[1])
         else:
-            P1 = np.atleast_1d(P1)
+            P1 = np.atleast_1d(np.copy(self.P1))
             if (P1.shape[0] != X.shape[1]) or (P1.ndim != 1):
                 raise ValueError("P1 must be either None or an 1D array with "
                                  "the length of X.shape[1]; "
                                  "got (P1.shape[0]={0}), "
                                  "needed (X.shape[1]={1})."
                                  .format(P1.shape[0], X.shape[1]))
-        if P2 is None:
+        if self.P2 is None:
             P2 = np.ones(X.shape[1])
             if sparse.issparse(X):
                 P2 = (sparse.dia_matrix((np.ones(X.shape[1]), 0),
                       shape=(X.shape[1], X.shape[1]))).tocsr()
         else:
-            P2 = check_array(P2, accept_sparse=['csr', 'csc', 'coo'],
+            P2 = check_array(self.P2, copy=True,
+                             accept_sparse=['csr', 'csc', 'coo'],
                              dtype="numeric", ensure_2d=True)
-            if ((P2.shape[0] != P2.shape[1]) or
-                (P2.shape[0] != X.shape[1]) or
-                    (P2.ndim != 2)):
+            if ((P2.ndim != 2) or
+                    (P2.shape[0] != P2.shape[1]) or
+                    (P2.shape[0] != X.shape[1])):
                 raise ValueError("P2 must be either None or an array of shape "
                                  "(n_features, n_features) with "
                                  "n_features=X.shape[1]; "
@@ -1020,16 +1026,39 @@ def fit(self, X, y, sample_weight=None, P1=None, P2=None,
         P2 *= l2
 
         # 1.3 additional validations ##########################################
-        if check_input:
+        if self.check_input:
             if not np.all(family.in_y_range(y)):
                 raise ValueError("Some value(s) of y are out of the valid "
                                  "range for family {0}"
                                  .format(family.__class__.__name__))
+            if not np.all(weights >= 0):
+                raise ValueError("Sample weights must be non-negative.")
+            # check that P2 is positive semidefinite
+            # np.linalg.cholesky(P2) 'only' asserts positive definite
+            if self.P2 is not None:
+                if sparse.issparse(P2):
+                    # TODO: check sparse P2 for non-negativeness
+                    raise NotImplementedError("Check sparse P2 for "
+                                              "non-negaitveness is not yet "
+                                              "implemented.")
+                elif P2.ndim == 2:
+                    if not np.all(np.linalg.eigvals(P2) >= -1e-15):
+                        raise ValueError("P2 must be positive definite.")
             # TODO: if alpha=0 check that Xnew is not rank deficient
             # TODO: what else to check?
 
         #######################################################################
-        # 2. initialization of coef = (intercept_, coef_)                     #
+        # 2. rescaling of weights (sample_weight)                             #
+        #######################################################################
+        # IMPORTANT NOTE: Since we want to minimize
+        # 1/(2*sum(sample_weight)) * deviance + L1 + L2,
+        # deviance = sum(sample_weight * unit_deviance),
+        # we rescale weights such that sum(weights) = 1 and this becomes
+        # 1/2*deviance + L1 + L2 with deviance=sum(weights * unit_deviance)
+        weights = weights/np.sum(weights)
+
+        #######################################################################
+        # 3. initialization of coef = (intercept_, coef_)                     #
         #######################################################################
         # Note: Since phi=self.dispersion_ does not enter the estimation
         #       of mu_i=E[y_i], set it to 1.
@@ -1082,13 +1111,13 @@ def fit(self, X, y, sample_weight=None, P1=None, P2=None,
             coef = start_params
 
         #######################################################################
-        # 3. fit                                                              #
+        # 4. fit                                                              #
         #######################################################################
         # algorithms for optimiation
         # TODO: Parallelize it
         self.n_iter_ = 0
         converged = False
-        # 3.1 IRLS ############################################################
+        # 4.1 IRLS ############################################################
         # Solve Newton-Raphson (1): Obj'' (w - w_old) = -Obj'
         #   Obj = objective function = 1/2 Dev + l2/2 w P2 w
         #   Dev = deviance, s = normalized weights, variance V(mu) but phi=1
@@ -1104,7 +1133,7 @@ def fit(self, X, y, sample_weight=None, P1=None, P2=None,
         #   Obj'' ~ X' W X + l2 P2
         # (1): w = (X' W X + l2 P2)^-1 X' W z, with z = eta + D^-1 (y-mu)
         # Note: P2 = l2*P2, see above
-        if self.solver == 'irls':
+        if solver == 'irls':
             # eta = linear predictor
             eta = safe_sparse_dot(Xnew, coef, dense_output=True)
             mu = link.inverse(eta)
@@ -1150,10 +1179,10 @@ def fit(self, X, y, sample_weight=None, P1=None, P2=None,
                               "of iterations (currently {0})"
                               .format(self.max_iter), ConvergenceWarning)
 
-        # 3.2 L-BFGS and Newton-CG ############################################
+        # 4.2 L-BFGS and Newton-CG ############################################
         # TODO: performance: make one function return both deviance and
         #       gradient of deviance
-        elif self.solver in ['lbfgs', 'newton-cg']:
+        elif solver in ['lbfgs', 'newton-cg']:
             def func(coef, *args):
                 if P2.ndim == 1:
                     L2 = safe_sparse_dot(coef.T, P2*coef)
@@ -1190,7 +1219,7 @@ def Hs(s):
 
             args = (Xnew, y, weights, link)
 
-            if self.solver == 'lbfgs':
+            if solver == 'lbfgs':
                 coef, loss, info = optimize.fmin_l_bfgs_b(
                     func, coef, fprime=fprime, args=args,
                     iprint=(self.verbose > 0) - 1, pgtol=self.tol,
@@ -1204,13 +1233,37 @@ def Hs(s):
                         warnings.warn("lbfgs failed for the reason: {0}"
                                       .format(info["task"]))
                 self.n_iter_ = info['nit']
-            elif self.solver == 'newton-cg':
+            elif solver == 'newton-cg':
                 coef, n_iter_i = newton_cg(grad_hess, func, fprime, coef,
                                            args=args, maxiter=self.max_iter,
                                            tol=self.tol)
 
+        # 4.3 coordinate descent ##############################################
+        # Reference: Guo-Xun Yuan, Chia-Hua Ho, Chih-Jen Lin
+        # An Improved GLMNET for L1-regularized Logistic Regression,
+        # Journal of Machine Learning Research 13 (2012) 1999-2030
+        # Note: Use Fisher matrix instead of Hessian
+        #
+        # 1. find optimal descent direction d by minimizing
+        #    min_d F(w+d) = min_d F(w+d) - F(w)
+        #    F = f + g; f(w) = 1/2 dev; g(w) = 1/2*w*P2*w + ||P1*w||_1
+        # 2. quadrdatic approx of f(w+d)-f(w):
+        #    q(d) = f'(w)*d +1/2 d*H*d
+        #    min_d q(d) + g(w+d) - g(w)
+        # 3. coordinate descent by updating coordinate j (d -> d+z*e_j):
+        #    min_z q(d+z*e_j) + g(w+d+z*e_j) - g(w)
+        #    = min_z q(d+z e_j) - q(d) + g(w+d+z*e_j) - g(w+d)
+        # TODO
+        # elif solver == 'cd':
+            # line search parameters
+            # (beta, sigma) = (0.5, 0.01)
+            # for iteration k from 1 to maxiter
+            #     for coordinate j sample at random
+            #     np.random.choice(coord, replace = False)
+            #
+
         #######################################################################
-        # 4. postprocessing                                                   #
+        # 5. postprocessing                                                   #
         #######################################################################
         if self.fit_intercept:
             self.intercept_ = coef[0]

From 5c1369bde863a73aff46a502acd70e58e06dcb85 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Wed, 24 Jan 2018 15:22:08 +0100
Subject: [PATCH 010/269] [WIP] Add Generalized Linear Models (#9405)

* added coordinate descent solver
* skip doctest for GeneralizedLinearRegressor example
* symmetrize P2 => use P2 = 1/2 (P2+P2')
* better validation of parameter start_params
---
 doc/modules/linear_model.rst           |  21 +-
 sklearn/linear_model/glm.py            | 353 ++++++++++++++++++++-----
 sklearn/linear_model/tests/test_glm.py |  82 ++++--
 3 files changed, 365 insertions(+), 91 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 98736facd9b76..834466e494a4a 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -894,9 +894,9 @@ becomes
             +\frac{\alpha(1-\rho)}{2} w^T P_2 w
 
 with sample weights :math:`s`.
-:math:`P_1` can be used to exclude some of the coefficients in the L1
-penalty, :math:`P_2` (must be positive semi-definite) allows for a more
-versatile L2 penalty.
+:math:`P_1` (diagonal matrix) can be used to exclude some of the coefficients in
+the L1 penalty, the matrix :math:`P_2` (must be positive semi-definite) allows
+for a more versatile L2 penalty.
 
 Use cases, where a loss different from the squared loss might be appropriate,
 are the following:
@@ -908,22 +908,23 @@ are the following:
   * If the target values seem to be heavy tailed, you might try an Inverse Gaussian deviance (or even higher variance power of the Tweedie family).
 
 Since the linear predictor :math:`Xw` can be negative and
-Poisson, Gamma and Inverse Gaussian distributions don't have negative values,
+Poisson, Gamma and Inverse Gaussian distributions don't support negative values,
 it is convenient to apply a link function different from the identity link
 :math:`h(x)=x` that guarantees the non-negativeness, e.g. the log-link with
 :math:`h(Xw)=\exp(Xw)`.
 
 Note that the feature matrix `X` should be standardized before fitting. This
 ensures that the penalty treats features equally. The estimator can be used as
-follows::
+follows:
 
     >>> from sklearn.linear_model import GeneralizedLinearRegressor
     >>> reg = GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
-    >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2])
-    GeneralizedLinearRegressor(alpha=0.5, copy_X=True, family='poisson',
-                  fit_dispersion='chisqr', fit_intercept=True, l1_ratio=0,
-                  link='log', max_iter=100, solver='irls', start_params=None,
-                  tol=0.0001, verbose=0, warm_start=False)
+    >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2]) # doctest: +SKIP
+    GeneralizedLinearRegressor(P1=None, P2=None, alpha=0.5, check_input=True,
+              copy_X=True, family='poisson', fit_dispersion='chisqr',
+              fit_intercept=True, l1_ratio=0, link='log', max_iter=100,
+              random_state=None, selection='random', solver='auto',
+              start_params=None, tol=0.0001, verbose=0, warm_start=False)
     >>> reg.coef_
     array([ 0.24630255,  0.43373521])
     >>> reg.intercept_ #doctest: +ELLIPSIS
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 3f6b91026ef9b..3de82c20f33cf 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -3,20 +3,21 @@
 """
 
 # Author: Christian Lorentzen <lorentzen.ch@googlemail.ch>
+# some parts and tricks stolen from other sklearn files.
 # License: BSD 3 clause
 
 # TODO: Write more tests
-# TODO: Add l1-penalty (elastic net)
+# TODO: Write examples and more docu
 # TODO: deal with option self.copy_X
 # TODO: Should the option `normalize` be included (like other linear models)?
 #       So far, it is not included. User must pass a normalized X.
 # TODO: Add cross validation
-# TODO: Write examples and more docu
-# TODO: Make it as much consistent to other estimators in linear_model as
-#       possible
-# TODO: which dtype to force for y and X? Which for P1, P2?
+# TODO: Should GeneralizedLinearRegressor inherit from LinearModel?
+#       So far, it does not.
 # TODO: Include further classes in class.rst? ExponentialDispersionModel?
 #       TweedieDistribution?
+# TODO: Negative values in P1 are not allowed so far. They could be used form
+#       group lasse.
 
 # Design Decisions:
 # - Which name? GeneralizedLinearModel vs GeneralizedLinearRegressor.
@@ -52,10 +53,11 @@
 from ..base import BaseEstimator, RegressorMixin
 from ..exceptions import ConvergenceWarning
 from ..externals import six
+from ..externals.six.moves import xrange
 from ..utils import check_array, check_X_y
 from ..utils.extmath import safe_sparse_dot
 from ..utils.optimize import newton_cg
-from ..utils.validation import check_is_fitted
+from ..utils.validation import check_is_fitted, check_random_state
 
 
 class Link(six.with_metaclass(ABCMeta)):
@@ -309,7 +311,9 @@ def _score(self, coef, phi, X, y, weights, link):
 
         .. math:
 
-            \mathbf{score}(\boldsymbol{w}) = \mathbf{X}^T \mathbf{D}
+            \mathbf{score}(\boldsymbol{w})
+            = \frac{\partial loglike}{\partial\boldsymbol{w}}
+            = \mathbf{X}^T \mathbf{D}
             \boldsymbol{\Sigma}^-1 (\mathbf{y} - \boldsymbol{\mu})\,,
 
         with :math:`\mathbf{D}=\mathrm{diag}(h'(\eta_1),\ldots)` and
@@ -411,6 +415,29 @@ def _deviance_hessian(self, coef, X, y, weights, link):
                                                  weights=weights, link=link)
         return 2*info_matrix
 
+    def _eta_mu_score_fisher(self, coef, phi, X, y, weights, link):
+        """Calculates eta (linear predictor), mu, score function (derivative
+        of log-likelihood) and Fisher matrix (all with phi=1) all in one go"""
+        n_samples, n_features = X.shape
+        # eta = linear predictor
+        eta = safe_sparse_dot(X, coef, dense_output=True)
+        mu = link.inverse(eta)
+        sigma_inv = 1./self.variance(mu, phi=phi, weights=weights)
+        d1 = link.inverse_derivative(eta)  # = h'(eta)
+        # Alternatively:
+        # h'(eta) = h'(g(mu)) = 1/g'(mu), note that h is inverse of g
+        # d1 = 1./link.derivative(mu)
+        d1_sigma_inv = sparse.dia_matrix((sigma_inv*d1, 0),
+                                         shape=(n_samples, n_samples))
+        temp = safe_sparse_dot(d1_sigma_inv, (y-mu), dense_output=True)
+        score = safe_sparse_dot(X.T, temp, dense_output=True)
+        #
+        d2_sigma_inv = sparse.dia_matrix((sigma_inv*(d1**2), 0),
+                                         shape=(n_samples, n_samples))
+        temp = safe_sparse_dot(d2_sigma_inv, X, dense_output=False)
+        fisher = safe_sparse_dot(X.T, temp, dense_output=False)
+        return eta, mu, score, fisher
+
     def starting_mu(self, y, weights=1):
         """Starting values for the mean mu_i in (unpenalized) IRLS."""
         return ((weights*y+np.mean(weights*y)) /
@@ -670,8 +697,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
             alpha = a + b and l1_ratio = a / (a + b)
 
     The parameter `l1_ratio` corresponds to alpha in the glmnet R package while
-    alpha corresponds to the lambda parameter in glmnet. Specifically, l1_ratio
-    = 1 is the lasso penalty.
+    'alpha' corresponds to the lambda parameter in glmnet. Specifically,
+    l1_ratio = 1 is the lasso penalty.
 
     Read more in the :ref:`User Guide <Generalized_linear_regression>`.
 
@@ -686,6 +713,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     TODO: For `alpha` > 0, the feature matrix `X` is assumed to be
     standardized. Call
     :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``.
+    Otherwise, the strength of the penalty is different for the features.
 
     TODO: Estimation of the dispersion parameter phi.
 
@@ -742,19 +770,23 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         the chi squared statisic or the deviance statistic. If None, the
         dispersion is not estimated.
 
-    solver : {'auto', 'irls', 'newton-cg', 'lbfgs'}, optional (defaul='auto')
+    solver : {'auto', 'irls', 'newton-cg', 'lbfgs', 'cd'}, \
+            optional (defaul='auto')
         Algorithm to use in the optimization problem.
 
+        - 'auto' sets 'irls' if l1_ratio equals 0, else 'cd'.
+
         - 'irls' is iterated reweighted least squares (Fisher scoring).
             It is the standard algorithm for GLMs. Cannot deal with
             L1 penalties.
 
         - 'newton-cg', 'lbfgs'. Cannot deal with L1 penalties.
 
-        - 'auto' sets 'irls'.
+        - 'cd' is the coordinate descent algorithm. It can deal with L1 and
+            L2 penalties.
 
     max_iter : int, optional (default=100)
-        TODO
+        The maximal number of iterations for solver algorithms.
 
     tol : float, optional (default=1e-4)
         Stopping criterion. For the irls, newton-cg and lbfgs solvers,
@@ -781,6 +813,23 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         This option only applies if ``warm_start=False`` or if fit is called
         the first time (``self.coef_`` does not exist).
 
+    selection : str, optional (default='random')
+        For the solver 'cd' (coordinate descent), the coordinates (features)
+        can be updated in either cyclic or random order.
+        If set to 'random', a random coefficient is updated every iteration
+        rather than looping over features sequentially by default. This
+        (setting to 'random') often leads to significantly faster convergence
+        especially when tol is higher than 1e-4.
+
+    random_state : int, RandomState instance or None, optional (default=None)
+        The seed of the pseudo random number generator that selects a random
+        feature to be updated for solver 'cd' (coordinate descent).
+        If int, random_state is the seed used by the random
+        number generator; if RandomState instance, random_state is the random
+        number generator; if None, the random number generator is the
+        RandomState instance used by `np.random`. Used when ``selection`` ==
+        'random'.
+
     copy_X : boolean, optional, default True
         If ``True``, X will be copied; else, it may be overwritten.
 
@@ -816,7 +865,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     def __init__(self, alpha=1.0, l1_ratio=0, P1=None, P2=None,
                  fit_intercept=True, family='normal', link='identity',
                  fit_dispersion='chisqr', solver='auto', max_iter=100,
-                 tol=1e-4, warm_start=False, start_params=None, copy_X=True,
+                 tol=1e-4, warm_start=False, start_params=None,
+                 selection='random', random_state=None, copy_X=True,
                  check_input=True, verbose=0):
         self.alpha = alpha
         self.l1_ratio = l1_ratio
@@ -831,6 +881,8 @@ def __init__(self, alpha=1.0, l1_ratio=0, P1=None, P2=None,
         self.tol = tol
         self.warm_start = warm_start
         self.start_params = start_params
+        self.selection = selection
+        self.random_state = random_state
         self.copy_X = copy_X
         self.check_input = check_input
         self.verbose = verbose
@@ -925,24 +977,21 @@ def fit(self, X, y, sample_weight=None):
         if not isinstance(self.fit_intercept, bool):
             raise ValueError("The argument fit_intercept must be bool;"
                              " got {0}".format(self.fit_intercept))
-        if self.solver == 'auto':
-            solver = 'irls'
-        else:
-            solver = self.solver
-        if solver not in ['irls', 'lbfgs', 'newton-cg']:
+        if self.solver not in ['auto', 'irls', 'lbfgs', 'newton-cg', 'cd']:
             raise ValueError("GeneralizedLinearRegressor supports only irls, "
-                             "lbfgs and newton-cg solvers, got {0}"
-                             "".format(solver))
-        if self.alpha > 0:
-            if (self.l1_ratio > 0 and
-                    solver not in []):
-                # TODO: Add solver for L1
-                # raise ValueError("The solver option (solver={0}) is not "
-                #                  "appropriate for the chosen penalty which"
-                #                  " includes L1 (alpha={1})."
-                #                  .format(solver, self.alpha))
-                raise NotImplementedError("Currently, no solver is implemented"
-                                          " that can deal with L1 penalties.")
+                             "auto, lbfgs, newton-cg and cd solvers, got {0}"
+                             "".format(self.solver))
+        solver = self.solver
+        if self.solver == 'auto':
+            if self.l1_ratio == 0:
+                solver = 'irls'
+            else:
+                solver = 'cd'
+        if (self.alpha > 0 and self.l1_ratio > 0 and solver not in ['cd']):
+                raise ValueError("The chosen solver (solver={0}) can't deal "
+                                 "with L1 penalties, which are included with "
+                                 "(alpha={1}) and (l1_ratio={2})."
+                                 .format(solver, self.alpha, self.l1_ratio))
         if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:
             raise ValueError("Maximum number of iteration must be positive;"
                              " got (max_iter={0!r})".format(self.max_iter))
@@ -953,7 +1002,14 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("The argument warm_start must be bool;"
                              " got {0}".format(self.warm_start))
         start_params = self.start_params
-        if start_params is not None and start_params != 'least_squares':
+        if start_params is None:
+            pass
+        elif isinstance(start_params, six.string_types):
+            if start_params not in ['least_squares']:
+                raise ValueError("The argument start_params must be None, "
+                                 "'least-squares' or an array of right length,"
+                                 " got(start_params={0})".format(start_params))
+        else:
             start_params = np.atleast_1d(start_params)
             if ((start_params.shape[0] != X.shape[1] + self.fit_intercept) or
                     (start_params.ndim != 1)):
@@ -963,6 +1019,12 @@ def fit(self, X, y, sample_weight=None):
                                  .format(X.shape[1] + self.fit_intercept,
                                          start_params.shape[0],
                                          start_params.ndim))
+
+        if self.selection not in ['cyclic', 'random']:
+            raise ValueError("The argument selection must be 'cyclic' or "
+                             "'random', got (selection={0})"
+                             .format(self.selection))
+        random_state = check_random_state(self.random_state)
         if not isinstance(self.copy_X, bool):
             raise ValueError("The argument copy_X must be bool;"
                              " got {0}".format(self.copy_X))
@@ -974,15 +1036,16 @@ def fit(self, X, y, sample_weight=None):
             P1 = np.ones(X.shape[1])
         else:
             P1 = np.atleast_1d(np.copy(self.P1))
-            if (P1.shape[0] != X.shape[1]) or (P1.ndim != 1):
+            if (P1.ndim != 1) or (P1.shape[0] != X.shape[1]):
                 raise ValueError("P1 must be either None or an 1D array with "
                                  "the length of X.shape[1]; "
                                  "got (P1.shape[0]={0}), "
                                  "needed (X.shape[1]={1})."
                                  .format(P1.shape[0], X.shape[1]))
         if self.P2 is None:
-            P2 = np.ones(X.shape[1])
-            if sparse.issparse(X):
+            if not sparse.issparse(X):
+                P2 = np.ones(X.shape[1])
+            else:
                 P2 = (sparse.dia_matrix((np.ones(X.shape[1]), 0),
                       shape=(X.shape[1], X.shape[1]))).tocsr()
         else:
@@ -1024,6 +1087,12 @@ def fit(self, X, y, sample_weight=None):
         l2 = self.alpha * (1-self.l1_ratio)
         P1 *= l1
         P2 *= l2
+        # one only ever needs the symmetrized L2 penalty matrix 1/2 (P2 + P2')
+        # reason: w' P2 w = (w' P2 w)', i.e. it is symmetric
+        if sparse.issparse(P2):
+            P2 = 0.5 * (P2 + P2.transpose())
+        else:
+            P2 = 0.5 * (P2 + P2.T)
 
         # 1.3 additional validations ##########################################
         if self.check_input:
@@ -1033,14 +1102,20 @@ def fit(self, X, y, sample_weight=None):
                                  .format(family.__class__.__name__))
             if not np.all(weights >= 0):
                 raise ValueError("Sample weights must be non-negative.")
-            # check that P2 is positive semidefinite
+            # check if P1 has only non-negative values, negative values might
+            # indicate group lasso in the future.
+            if self.P1 is not None:
+                if not np.all(P1 >= 0):
+                    raise ValueError("P1 must not have negative values.")
+            # check if P2 is positive semidefinite
             # np.linalg.cholesky(P2) 'only' asserts positive definite
             if self.P2 is not None:
                 if sparse.issparse(P2):
                     # TODO: check sparse P2 for non-negativeness
-                    raise NotImplementedError("Check sparse P2 for "
-                                              "non-negaitveness is not yet "
-                                              "implemented.")
+                    # raise NotImplementedError("Check sparse P2 for "
+                    #                          "non-negaitveness is not yet "
+                    #                          "implemented.")
+                    pass
                 elif P2.ndim == 2:
                     if not np.all(np.linalg.eigvals(P2) >= -1e-15):
                         raise ValueError("P2 must be positive definite.")
@@ -1090,7 +1165,8 @@ def fit(self, X, y, sample_weight=None):
             else:
                 # with L1 penalty, start with coef = 0
                 coef = np.zeros(n_features)
-        elif self.start_params == 'least_squares':
+        elif (isinstance(self.start_params, six.string_types) and
+                self.start_params == 'least_squares'):
             if self.alpha == 0:
                 reg = LinearRegression(copy_X=True, fit_intercept=False)
                 reg.fit(Xnew, link.link(y))
@@ -1102,7 +1178,7 @@ def fit(self, X, y, sample_weight=None):
                 reg.fit(Xnew, link.link(y))
                 coef = reg.coef_
             else:
-                # TODO: Does this make sense?
+                # TODO: Does this make sense at all?
                 reg = ElasticNet(copy_X=True, fit_intercept=False,
                                  alpha=self.alpha, l1_ratio=self.l1_ratio)
                 reg.fit(Xnew, link.link(y))
@@ -1125,14 +1201,17 @@ def fit(self, X, y, sample_weight=None):
         #   D2  = link.inverse_derivative(eta)^2 = D^2
         #   W   = D2/V(mu)
         #   l2  = alpha * (1 - l1_ratio)
-        #   Obj' = d(Obj)/d(w) = 1/2 Dev' + P2 w
+        #   Obj' = d(Obj)/d(w) = 1/2 Dev' + l2 P2 w
         #        = -X' D (y-mu)/V(mu) + l2 P2 w
         #   Obj''= d2(Obj)/d(w)d(w') = Hessian = -X'(...) X + l2 P2
         #   Use Fisher matrix instead of full info matrix -X'(...) X,
         #    i.e. E[Dev''] with E[y-mu]=0:
         #   Obj'' ~ X' W X + l2 P2
-        # (1): w = (X' W X + l2 P2)^-1 X' W z, with z = eta + D^-1 (y-mu)
-        # Note: P2 = l2*P2, see above
+        # (1): w = (X' W X + l2 P2)^-1 X' W z,
+        #      with z = eta + D^-1 (y-mu)
+        # Note: we already set P2 = l2*P2, see above
+        # Note: we already symmetriezed P2 = 1/2 (P2 + P2')
+        # Note: ' denotes derivative, but also transpose for matrices
         if solver == 'irls':
             # eta = linear predictor
             eta = safe_sparse_dot(Xnew, coef, dense_output=True)
@@ -1150,9 +1229,8 @@ def fit(self, X, y, sample_weight=None):
                 # working observations
                 z = eta + (y-mu)/hp
                 # solve A*coef = b
-                # A = X' W X + l2 P2, b = X' W z
+                # A = X' W X + P2, b = X' W z
                 coef = _irls_step(Xnew, W, P2, z)
-
                 # updated linear predictor
                 # do it here for updated values for tolerance
                 eta = safe_sparse_dot(Xnew, coef, dense_output=True)
@@ -1242,25 +1320,182 @@ def Hs(s):
         # Reference: Guo-Xun Yuan, Chia-Hua Ho, Chih-Jen Lin
         # An Improved GLMNET for L1-regularized Logistic Regression,
         # Journal of Machine Learning Research 13 (2012) 1999-2030
-        # Note: Use Fisher matrix instead of Hessian
+        # Note: Use Fisher matrix instead of Hessian for H
         #
         # 1. find optimal descent direction d by minimizing
         #    min_d F(w+d) = min_d F(w+d) - F(w)
-        #    F = f + g; f(w) = 1/2 dev; g(w) = 1/2*w*P2*w + ||P1*w||_1
-        # 2. quadrdatic approx of f(w+d)-f(w):
-        #    q(d) = f'(w)*d +1/2 d*H*d
-        #    min_d q(d) + g(w+d) - g(w)
+        #    F = f + g, f(w) = 1/2 deviance, g(w) = 1/2 w*P2*w + ||P1*w||_1
+        # 2. quadrdatic approximation of F(w+d)-F(w) = q(d):
+        #    using f(w+d) = f(w) + f'(w)*d + 1/2 d*H(w)*d + O(d^3) gives
+        #    q(d) = (f'(w) + w*P2)*d + 1/2 d*(H(w)+P2)*d
+        #           + ||P1*(w+d)||_1 - ||P1*w||_1
+        #    min_d q(d)
         # 3. coordinate descent by updating coordinate j (d -> d+z*e_j):
-        #    min_z q(d+z*e_j) + g(w+d+z*e_j) - g(w)
-        #    = min_z q(d+z e_j) - q(d) + g(w+d+z*e_j) - g(w+d)
-        # TODO
-        # elif solver == 'cd':
+        #    min_z q(d+z*e_j)
+        #    = min_z q(d+z*e_j) - q(d)
+        #    = min_z A_j z + 1/2 B_jj z^2
+        #            + ||P1_j (w_j+d_j+z)||_1 - ||P1_j (w_j+d_j)||_1
+        #    A = f'(w) + d*H(w) + (w+d)*P2
+        #    B = H+P2
+        # Note: we already set P2 = l2*P2, P1 = l1*P1, see above
+        # Note: we already symmetriezed P2 = 1/2 (P2 + P2')
+        # Note: f' = -score, H = Fisher matrix
+        elif solver == 'cd':
             # line search parameters
-            # (beta, sigma) = (0.5, 0.01)
-            # for iteration k from 1 to maxiter
-            #     for coordinate j sample at random
-            #     np.random.choice(coord, replace = False)
-            #
+            (beta, sigma) = (0.5, 0.01)
+            # max inner loops (cycles through all features)
+            max_inner_iter = 1000
+            # some precalculations
+            eta, mu, score, fisher = family._eta_mu_score_fisher(
+                coef=coef, phi=1, X=Xnew, y=y, weights=weights, link=link)
+            # initial stopping tolerance of inner loop
+            # use L1-norm of minimum-norm of subgradient of F
+            # fp_wP2 = f'(w) + w*P2
+            if P2.ndim == 1:
+                fp_wP2 = -score + coef*P2
+            else:
+                fp_wP2 = -score + safe_sparse_dot(coef, P2)
+            inner_tol = (np.where(coef == 0,
+                         np.sign(fp_wP2)*np.maximum(np.abs(fp_wP2)-P1, 0),
+                         fp_wP2+np.sign(coef)*P1))
+            inner_tol = linalg.norm(inner_tol, ord=1)
+            # outer loop
+            while self.n_iter_ < self.max_iter:
+                self.n_iter_ += 1
+                # initialize search direction d (to be optimized)
+                d = np.zeros_like(coef)
+                # inner loop
+                # TODO: use sparsity (coefficient already 0 due to L1 penalty)
+                d = np.zeros_like(coef)
+                # A = f'(w) + d*H(w) + (w+d)*P2
+                # B = H+P2
+                # Note: f'=-score and H=fisher are updated at the end of outer
+                #       iteration
+                B = fisher
+                if P2.ndim == 1:
+                    coef_P2 = coef * P2
+                    B[np.diag_indices_from(B)] += P2
+                else:
+                    coef_P2 = safe_sparse_dot(coef, P2)
+                    B += P2
+                A = -score + coef_P2  # + d*(H+P2) but d=0 so far
+                inner_iter = 0
+                while inner_iter < max_inner_iter:
+                    inner_iter += 1
+                    if self.selection == 'random':
+                        featurelist = random_state.permutation(n_features)
+                    else:
+                        featurelist = np.arange(n_features)
+                    for j in featurelist:
+                        # minimize_z: a z + 1/2 b z^2 + c |d+z|
+                        # a = A_j
+                        # b = B_jj > 0
+                        # c = |P1_j| = P1_j > 0, ee 1.3
+                        # d = w_j + d_j
+                        # cf. https://arxiv.org/abs/0708.1485 Eqs. (3) - (4)
+                        # with beta = z+d, beta_hat = d-a/b and gamma = c/b
+                        # z = 1/b * S(bd-a,c) - d
+                        # S(a,b) = sign(a) max(|a|-b, 0) soft thresholding
+                        a = A[j]
+                        b = B[j, j]
+                        if P1[j] == 0:
+                            if b == 0:
+                                z = 0
+                            else:
+                                z = -a/b
+                        elif a + P1[j] < b * (coef[j]+d[j]):
+                            if b == 0:
+                                z = 0
+                            else:
+                                z = -(a + P1[j])/b
+                        elif a - P1[j] > b * (coef[j]+d[j]):
+                            if b == 0:
+                                z = 0
+                            else:
+                                z = -(a - P1[j])/b
+                        else:
+                            z = -(coef[j] + d[j])
+                        # update direction d
+                        d[j] += z
+                        # update A because d_j is now d_j+z
+                        # A = f'(w) + d*H(w) + (w+d)*P2
+                        # => A += (H+P2)*e_j z  = B_j * z
+                        # Note: B is symmetric B = B.transpose
+                        if sparse.issparse(B):
+                            if sparse.isspmatrix_csc(B):
+                                # slice columns
+                                A += B[:, j].toarray().ravel() * z
+                            else:
+                                # slice rows
+                                A += B[j, :].toarray().ravel() * z
+                        else:
+                            A += B[j, :] * z
+                        # end of cycle
+                    # stopping criterion for inner loop
+                    # sum_i(|minimum-norm subgrad of q(d)_i|)
+                    mn_subgrad = (np.where(coef + d == 0,
+                                  np.sign(A)*np.maximum(np.abs(A)-P1, 0),
+                                  A+np.sign(coef+d)*P1))
+                    mn_subgrad = np.sum(np.abs(mn_subgrad))
+                    if mn_subgrad <= inner_tol:
+                        if inner_iter == 1:
+                            inner_tol = inner_tol/4.
+                        break
+                    # end of inner loop
+                # line search by sequence beta^k, k=0, 1, ..
+                # F(w + lambda d) - F(w) <= lambda * bound
+                # bound = sigma * (f'(w)*d + w*P2*d
+                #                  +||P1 (w+d)||_1 - ||P1 w||_1)
+                P1w_1 = linalg.norm(P1*coef, ord=1)
+                # Note: coef_P2 already calculated and still valid
+                bound = sigma * (
+                    safe_sparse_dot(-score, d) +
+                    safe_sparse_dot(coef_P2, d) +
+                    linalg.norm(P1*(coef+d), ord=1) -
+                    P1w_1)
+                Fw = (0.5 * family.deviance(y, mu, weights) +
+                      0.5 * safe_sparse_dot(coef_P2, coef) +
+                      P1w_1)
+                la = 1./beta
+                for k in range(20):
+                    la *= beta  # starts with la=1
+                    mu_wd = link.inverse(safe_sparse_dot(Xnew, coef+la*d,
+                                         dense_output=True))
+                    Fwd = (0.5 * family.deviance(y, mu_wd, weights) +
+                           linalg.norm(P1*(coef+la*d), ord=1))
+                    if P2.ndim == 1:
+                        Fwd += 0.5 * safe_sparse_dot((coef+la*d)*P2, coef+la*d)
+                    else:
+                        Fwd += 0.5 * (safe_sparse_dot(coef+la*d,
+                                      safe_sparse_dot(P2, coef+la*d)))
+                    if Fwd-Fw <= sigma*la*bound:
+                        break
+                # update coefficients
+                # coef_old = coef.copy()
+                coef += la * d
+                # calculate eta, mu, score, Fisher matrix for next iteration
+                eta, mu, score, fisher = family._eta_mu_score_fisher(
+                    coef=coef, phi=1, X=Xnew, y=y, weights=weights, link=link)
+                # stopping criterion for outer loop
+                # sum_i(|minimum-norm subgrad of F(w)_i|)
+                # fp_wP2 = f'(w) + w*P2
+                # Note: eta, mu and score are already updated
+                if P2.ndim == 1:
+                    fp_wP2 = -score + coef*P2
+                else:
+                    fp_wP2 = -score + safe_sparse_dot(coef, P2)
+                mn_subgrad = (np.where(coef == 0,
+                              np.sign(fp_wP2)*np.maximum(np.abs(fp_wP2)-P1, 0),
+                              fp_wP2+np.sign(coef)*P1))
+                mn_subgrad = np.sum(np.abs(mn_subgrad))
+                if mn_subgrad <= self.tol:
+                    converged = True
+                    break
+                # end of outer loop
+            if not converged:
+                warnings.warn("Coordinate descent failed to converge. Increase"
+                              " the number of iterations (currently {0})"
+                              .format(self.max_iter), ConvergenceWarning)
 
         #######################################################################
         # 5. postprocessing                                                   #
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index de7de90db967b..87cc8bea45f5b 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -1,6 +1,7 @@
 import numpy as np
 from numpy.testing import assert_allclose
 import scipy as sp
+from scipy import sparse
 
 from sklearn.linear_model.glm import (
     Link,
@@ -126,7 +127,7 @@ def test_glm_identiy_regression():
         GammaDistribution(), InverseGaussianDistribution(),
         TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
         GeneralizedHyperbolicSecand())
-    for solver in ['irls', 'lbfgs', 'newton-cg']:
+    for solver in ['irls', 'lbfgs', 'newton-cg', 'cd']:
         for family in families:
             glm = GeneralizedLinearRegressor(
                 alpha=0, family=family, fit_intercept=False, solver=solver)
@@ -162,28 +163,31 @@ def test_normal_ridge():
     rng = np.random.RandomState(0)
     alpha = 1.0
 
-    # With more samples than features
+    # 1. With more samples than features
     n_samples, n_features, n_predict = 6, 5, 10
     y = rng.randn(n_samples)
     X = rng.randn(n_samples, n_features)
     T = rng.randn(n_predict, n_features)
 
     # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
-    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True)
+    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True, tol=1e-6,
+                  solver='svd', normalize=False)
     ridge.fit(X, y)
-    for solver in ['irls', 'lbfgs', 'newton-cg']:
+    for solver in ['irls', 'lbfgs', 'newton-cg', 'cd']:
         glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0,
                                          family='normal', link='identity',
-                                         fit_intercept=True, solver=solver)
+                                         fit_intercept=True, tol=1e-6,
+                                         max_iter=100, solver=solver)
         glm.fit(X, y)
         assert_equal(glm.coef_.shape, (X.shape[1], ))
         assert_array_almost_equal(glm.coef_, ridge.coef_)
         assert_almost_equal(glm.intercept_, ridge.intercept_)
         assert_array_almost_equal(glm.predict(T), ridge.predict(T))
 
-    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, normalize=False)
+    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-6,
+                  solver='svd', normalize=False)
     ridge.fit(X, y)
-    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0,
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-6,
                                      family='normal', link='identity',
                                      fit_intercept=False, solver='irls')
     glm.fit(X, y)
@@ -192,28 +196,30 @@ def test_normal_ridge():
     assert_almost_equal(glm.intercept_, ridge.intercept_)
     assert_array_almost_equal(glm.predict(T), ridge.predict(T))
 
-    # With more features than samples
+    # 2. With more features than samples and sparse
     n_samples, n_features, n_predict = 5, 10, 10
     y = rng.randn(n_samples)
-    X = rng.randn(n_samples, n_features)
-    T = rng.randn(n_predict, n_features)
+    X = sparse.csr_matrix(rng.randn(n_samples, n_features))
+    T = sparse.csr_matrix(rng.randn(n_predict, n_features))
 
     # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
-    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True)
+    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True, tol=1e-9,
+                  solver='sag', normalize=False, max_iter=100000)
     ridge.fit(X, y)
-    for solver in ['irls', 'lbfgs', 'newton-cg']:
-        glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0,
+    for solver in ['irls', 'lbfgs', 'newton-cg', 'cd']:
+        glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-7,
                                          family='normal', link='identity',
                                          fit_intercept=True, solver=solver)
         glm.fit(X, y)
         assert_equal(glm.coef_.shape, (X.shape[1], ))
-        assert_array_almost_equal(glm.coef_, ridge.coef_)
-        assert_almost_equal(glm.intercept_, ridge.intercept_)
-        assert_array_almost_equal(glm.predict(T), ridge.predict(T))
+        assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=5)
+        assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=5)
+        assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=5)
 
-    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, normalize=False)
+    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-6,
+                  solver='sag', normalize=False, max_iter=1000)
     ridge.fit(X, y)
-    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0,
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-6,
                                      family='normal', link='identity',
                                      fit_intercept=False, solver='irls')
     glm.fit(X, y)
@@ -240,9 +246,41 @@ def test_poisson_ridge():
     # b            0.03741173122
     X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
     y = np.array([0, 1, 1, 2])
-    glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0, family='poisson',
-                                     link='log', tol=1e-10)
+    s_dec = {'irls': 7, 'lbfgs': 5, 'newton-cg': 7, 'cd': 7}
+    for solver in ['irls', 'lbfgs', 'newton-cg', 'cd']:
+        glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0,
+                                         fit_intercept=True, family='poisson',
+                                         link='log', tol=1e-7,
+                                         solver=solver, max_iter=200)
+        glm.fit(X, y)
+        assert_almost_equal(glm.intercept_, -0.12889386979,
+                            decimal=s_dec[solver])
+        assert_array_almost_equal(glm.coef_, [0.29019207995, 0.03741173122],
+                                  decimal=s_dec[solver])
+
+
+def test_poisson_enet():
+    """Test elastic net regression with poisson family and LogLink
+
+    Compare to R's glmnet"""
+    # library("glmnet")
+    # options(digits=10)
+    # library("glmnet")
+    # options(digits=10)
+    # df <- data.frame(a=c(-2,-1,1,2), b=c(0,0,1,1), y=c(0,1,1,2))
+    # x <- data.matrix(df[,c("a", "b")])
+    # y <- df$y
+    # fit <- glmnet(x=x, y=y, alpha=0.5, intercept=T, family="poisson",
+    #               standardize=F, thresh=1e-10, nlambda=10000)
+    # coef(fit, s=1)
+    # (Intercept) -0.03550978409
+    # a            0.16936423283
+    # b            .
+    X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
+    y = np.array([0, 1, 1, 2])
+    glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0.5, family='poisson',
+                                     link='log', tol=1e-7)
     glm.fit(X, y)
-    assert_almost_equal(glm.intercept_, -0.12889386979, decimal=7)
-    assert_array_almost_equal(glm.coef_, [0.29019207995, 0.03741173122],
+    assert_almost_equal(glm.intercept_, -0.03550978409, decimal=7)
+    assert_array_almost_equal(glm.coef_, [0.16936423283, 0.],
                               decimal=7)

From 91497a2abc4824cdcb72f88dc26c9fd347d54b0d Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Wed, 24 Jan 2018 20:37:27 +0100
Subject: [PATCH 011/269] [WIP] Add Generalized Linear Models (#9405)

* bug for sparse matrices for newton-cg solver, function grad_hess
* reduce precision for solver newton-cg in test_poisson_ridge
* remedy doctest issues in linear_model.rst for example of GeneralizedLinearRegressor
* remove unused import of xrange from six
---
 doc/modules/linear_model.rst           | 2 +-
 sklearn/linear_model/glm.py            | 3 +--
 sklearn/linear_model/tests/test_glm.py | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 834466e494a4a..1f0946e97b059 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -919,7 +919,7 @@ follows:
 
     >>> from sklearn.linear_model import GeneralizedLinearRegressor
     >>> reg = GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
-    >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2]) # doctest: +SKIP
+    >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2]) # doctest: +NORMALIZE_WHITESPACE
     GeneralizedLinearRegressor(P1=None, P2=None, alpha=0.5, check_input=True,
               copy_X=True, family='poisson', fit_dispersion='chisqr',
               fit_intercept=True, l1_ratio=0, link='log', max_iter=100,
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 3de82c20f33cf..25f3ee1f52a2e 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -53,7 +53,6 @@
 from ..base import BaseEstimator, RegressorMixin
 from ..exceptions import ConvergenceWarning
 from ..externals import six
-from ..externals.six.moves import xrange
 from ..utils import check_array, check_X_y
 from ..utils.extmath import safe_sparse_dot
 from ..utils.optimize import newton_cg
@@ -1288,7 +1287,7 @@ def grad_hess(coef, X, y, weights, link):
                 if P2.ndim == 1:
                     hessian[np.diag_indices_from(hessian)] += P2
                 else:
-                    hessian += P2
+                    hessian = hessian + P2
 
                 def Hs(s):
                     ret = safe_sparse_dot(hessian, s)
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 87cc8bea45f5b..c48c59ebd0eda 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -246,7 +246,7 @@ def test_poisson_ridge():
     # b            0.03741173122
     X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
     y = np.array([0, 1, 1, 2])
-    s_dec = {'irls': 7, 'lbfgs': 5, 'newton-cg': 7, 'cd': 7}
+    s_dec = {'irls': 7, 'lbfgs': 5, 'newton-cg': 5, 'cd': 7}
     for solver in ['irls', 'lbfgs', 'newton-cg', 'cd']:
         glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0,
                                          fit_intercept=True, family='poisson',

From b9e5105ddb011a2a4efd74eeb3033ebb824fa5a8 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Wed, 24 Jan 2018 21:44:10 +0100
Subject: [PATCH 012/269] [WIP] Add Generalized Linear Models (#9405)

* bug in cd solver for sparse matrices
* higer precision (smaller tol) in test_normal_ridge for sparse matrices
* for each solver a separate precision (tol) in test_poisson_ridge
---
 sklearn/linear_model/glm.py            |  2 +-
 sklearn/linear_model/tests/test_glm.py | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 25f3ee1f52a2e..b428ee7509d14 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -1376,7 +1376,7 @@ def Hs(s):
                     B[np.diag_indices_from(B)] += P2
                 else:
                     coef_P2 = safe_sparse_dot(coef, P2)
-                    B += P2
+                    B = B + P2
                 A = -score + coef_P2  # + d*(H+P2) but d=0 so far
                 inner_iter = 0
                 while inner_iter < max_inner_iter:
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index c48c59ebd0eda..baad852dfb945 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -207,19 +207,20 @@ def test_normal_ridge():
                   solver='sag', normalize=False, max_iter=100000)
     ridge.fit(X, y)
     for solver in ['irls', 'lbfgs', 'newton-cg', 'cd']:
-        glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-7,
+        glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-8,
                                          family='normal', link='identity',
-                                         fit_intercept=True, solver=solver)
+                                         fit_intercept=True, solver=solver,
+                                         max_iter=300)
         glm.fit(X, y)
         assert_equal(glm.coef_.shape, (X.shape[1], ))
         assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=5)
         assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=5)
         assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=5)
 
-    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-6,
+    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-7,
                   solver='sag', normalize=False, max_iter=1000)
     ridge.fit(X, y)
-    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-6,
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-7,
                                      family='normal', link='identity',
                                      fit_intercept=False, solver='irls')
     glm.fit(X, y)
@@ -247,11 +248,12 @@ def test_poisson_ridge():
     X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
     y = np.array([0, 1, 1, 2])
     s_dec = {'irls': 7, 'lbfgs': 5, 'newton-cg': 5, 'cd': 7}
+    s_tol = {'irls': 1e-8, 'lbfgs': 1e-7, 'newton-cg': 1e-7, 'cd': 1e-8}
     for solver in ['irls', 'lbfgs', 'newton-cg', 'cd']:
         glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0,
                                          fit_intercept=True, family='poisson',
-                                         link='log', tol=1e-7,
-                                         solver=solver, max_iter=200)
+                                         link='log', tol=s_tol[solver],
+                                         solver=solver, max_iter=300)
         glm.fit(X, y)
         assert_almost_equal(glm.intercept_, -0.12889386979,
                             decimal=s_dec[solver])

From e317422e9dd860c4ed5a3c6ac6191eb8e560c365 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Thu, 25 Jan 2018 21:44:04 +0100
Subject: [PATCH 013/269] [WIP] Add Generalized Linear Models (#9405)

* improved documentation
* additional option 'zero' for argument start_params
* validation of sample_weight in function predict
* input validation of estimate_phi
* set default fit_dispersion=None
* bug in estimate_phi because of weight rescaling
* test for estimate_phi in normal ridge regression
* extended tests for elastic net poisson
---
 sklearn/linear_model/glm.py            | 116 ++++++++++++++++---------
 sklearn/linear_model/tests/test_glm.py |  39 +++++++--
 2 files changed, 110 insertions(+), 45 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index b428ee7509d14..e5eda6108052c 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -11,13 +11,13 @@
 # TODO: deal with option self.copy_X
 # TODO: Should the option `normalize` be included (like other linear models)?
 #       So far, it is not included. User must pass a normalized X.
-# TODO: Add cross validation
+# TODO: Add cross validation support
 # TODO: Should GeneralizedLinearRegressor inherit from LinearModel?
 #       So far, it does not.
 # TODO: Include further classes in class.rst? ExponentialDispersionModel?
 #       TweedieDistribution?
-# TODO: Negative values in P1 are not allowed so far. They could be used form
-#       group lasse.
+# TODO: Negative values in P1 are not allowed so far. They could be used to
+#       for group lasso.
 
 # Design Decisions:
 # - Which name? GeneralizedLinearModel vs GeneralizedLinearRegressor.
@@ -642,7 +642,7 @@ def _irls_step(X, W, P2, z):
     -------
     coef: array, shape = (X.shape[1])
     """
-    # TODO: scipy.linalg.solve if faster, but ordinary least squares uses
+    # TODO: scipy.linalg.solve is faster, but ordinary least squares uses
     #       scipy.linalg.lstsq. What is more appropriate?
     n_samples, n_features = X.shape
     if sparse.issparse(X):
@@ -709,16 +709,20 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     (penalized) maximum likelihood which is equivalent to minimizing the
     deviance.
 
-    TODO: For `alpha` > 0, the feature matrix `X` is assumed to be
-    standardized. Call
+    For `alpha` > 0, the feature matrix `X` should be standardized in order to
+    penalize features equally strong. Call
     :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``.
-    Otherwise, the strength of the penalty is different for the features.
 
     TODO: Estimation of the dispersion parameter phi.
 
-    TODO: Notes on weights and 'scaled' distributions. For Poisson, this means
-    to fit y = z/w with z=counts and w=exposure (time, money, persons, ...)
-    => y is a ratio with weights w. Same for other distributions.
+    If your target `y` is a ratio, you should also provide appropriate weights
+    `w`. As an example, consider Poission distributed counts `z` (integers) and
+    weights `w`=exposure (time, money, persons years, ...), then you fit
+    `y = z/w`, i.e. ``GeneralizedLinearModel(family='Poisson').fit(X, y,
+    sample_weight=w)``. You need the weights for the right mean, consider:
+    :math:`\bar(y) = \frac{\sum_i w_i y_i}{\sum_i w_i}`.
+    In this case one might say that y has a 'scaled' Poisson distributions.
+    The same holds for other distributions.
 
     Parameters
     ----------
@@ -800,8 +804,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         does not exit (first call to fit), option ``start_params`` sets the
         starting values for ``coef_`` and ``intercept_``.
 
-    start_params : None or array of shape (n_features, ) or 'least_squares'}, \
-            optional (default=None)
+    start_params : {None, 'least_squares', 'zero'} or array of shape \
+            (n_features, ) or }, optional (default=None)
         If an array of size n_features is supplied, use these as start values
         for ``coef_`` in the fit. If ``fit_intercept=True``, the first element
         is assumed to be the start value for the ``intercept_``.
@@ -854,16 +858,18 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     n_iter_ : int
         Actual number of iterations of the solver.
 
-    Notes
-    -----
 
     References
     ----------
-    TODO
+    For the coordinate descent implementation:
+    .. [1] Guo-Xun Yuan, Chia-Hua Ho, Chih-Jen Lin
+           An Improved GLMNET for L1-regularized Logistic Regression,
+           Journal of Machine Learning Research 13 (2012) 1999-2030
+           https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
     """
     def __init__(self, alpha=1.0, l1_ratio=0, P1=None, P2=None,
                  fit_intercept=True, family='normal', link='identity',
-                 fit_dispersion='chisqr', solver='auto', max_iter=100,
+                 fit_dispersion=None, solver='auto', max_iter=100,
                  tol=1e-4, warm_start=False, start_params=None,
                  selection='random', random_state=None, copy_X=True,
                  check_input=True, verbose=0):
@@ -1004,9 +1010,10 @@ def fit(self, X, y, sample_weight=None):
         if start_params is None:
             pass
         elif isinstance(start_params, six.string_types):
-            if start_params not in ['least_squares']:
+            if start_params not in ['least_squares', 'zero']:
                 raise ValueError("The argument start_params must be None, "
-                                 "'least-squares' or an array of right length,"
+                                 "'least-squares', 'zero' or an array of right"
+                                 " length,"
                                  " got(start_params={0})".format(start_params))
         else:
             start_params = np.atleast_1d(start_params)
@@ -1129,6 +1136,7 @@ def fit(self, X, y, sample_weight=None):
         # deviance = sum(sample_weight * unit_deviance),
         # we rescale weights such that sum(weights) = 1 and this becomes
         # 1/2*deviance + L1 + L2 with deviance=sum(weights * unit_deviance)
+        weights_sum = np.sum(weights)
         weights = weights/np.sum(weights)
 
         #######################################################################
@@ -1141,7 +1149,8 @@ def fit(self, X, y, sample_weight=None):
         coef = None
         if self.warm_start and hasattr(self, "coef_"):
             if self.fit_intercept:
-                coef = np.concatenate((self.intercept_, self.coef_))
+                coef = np.concatenate((np.array([self.intercept_]),
+                                       self.coef_))
             else:
                 coef = self.coef_
         elif self.start_params is None:
@@ -1164,24 +1173,27 @@ def fit(self, X, y, sample_weight=None):
             else:
                 # with L1 penalty, start with coef = 0
                 coef = np.zeros(n_features)
-        elif (isinstance(self.start_params, six.string_types) and
-                self.start_params == 'least_squares'):
-            if self.alpha == 0:
-                reg = LinearRegression(copy_X=True, fit_intercept=False)
-                reg.fit(Xnew, link.link(y))
-                coef = reg.coef_
-            elif self.l1_ratio <= 0.01:
-                # ElasticNet says l1_ratio <= 0.01 is not reliable, use Ridge
-                reg = Ridge(copy_X=True, fit_intercept=False,
-                            alpha=self.alpha)
-                reg.fit(Xnew, link.link(y))
-                coef = reg.coef_
-            else:
-                # TODO: Does this make sense at all?
-                reg = ElasticNet(copy_X=True, fit_intercept=False,
-                                 alpha=self.alpha, l1_ratio=self.l1_ratio)
-                reg.fit(Xnew, link.link(y))
-                coef = reg.coef_
+        elif isinstance(self.start_params, six.string_types):
+            if self.start_params == 'zero':
+                coef = np.zeros(n_features)
+            elif self.start_params == 'least_squares':
+                if self.alpha == 0:
+                    reg = LinearRegression(copy_X=True, fit_intercept=False)
+                    reg.fit(Xnew, link.link(y))
+                    coef = reg.coef_
+                elif self.l1_ratio <= 0.01:
+                    # ElasticNet says l1_ratio <= 0.01 is not reliable
+                    # => use Ridge
+                    reg = Ridge(copy_X=True, fit_intercept=False,
+                                alpha=self.alpha)
+                    reg.fit(Xnew, link.link(y))
+                    coef = reg.coef_
+                else:
+                    # TODO: Does this make sense at all?
+                    reg = ElasticNet(copy_X=True, fit_intercept=False,
+                                     alpha=self.alpha, l1_ratio=self.l1_ratio)
+                    reg.fit(Xnew, link.link(y))
+                    coef = reg.coef_
         else:
             coef = start_params
 
@@ -1365,6 +1377,7 @@ def Hs(s):
                 d = np.zeros_like(coef)
                 # inner loop
                 # TODO: use sparsity (coefficient already 0 due to L1 penalty)
+                #       => active set of features for featurelist, see paper
                 d = np.zeros_like(coef)
                 # A = f'(w) + d*H(w) + (w+d)*P2
                 # B = H+P2
@@ -1508,7 +1521,8 @@ def Hs(s):
             self.coef_ = coef
 
         if self.fit_dispersion in ['chisqr', 'deviance']:
-            self.dispersion_ = self.estimate_phi(y, X, weights)
+            # attention because of rescaling of weights
+            self.dispersion_ = self.estimate_phi(y, X, weights)*weights_sum
 
         return self
 
@@ -1544,9 +1558,23 @@ def predict(self, X, sample_weight=1):
         C : array, shape = (n_samples)
             Returns predicted values times sample_weight.
         """
-        # TODO: validation of sample_weight
         eta = self.linear_predictor(X)
         mu = self._link_instance.inverse(eta)
+        if sample_weight is None:
+            return mu
+        elif np.isscalar(sample_weight):
+            if sample_weight <= 0:
+                raise ValueError("Sample weight must be positive, "
+                                 "got (sample_weight={0})."
+                                 .format(sample_weight))
+        else:
+            sample_weights = np.atleast_1d(sample_weight)
+            if sample_weight.ndim > 1:
+                raise ValueError("Sample weight must be 1D array or scalar.")
+            elif sample_weight.shape[0] != mu.shape[0]:
+                raise ValueError("Sample weights must have the same length as"
+                                 " X.shape[1].")
+
         return mu*sample_weight
 
     def estimate_phi(self, y, X, sample_weight):
@@ -1554,10 +1582,20 @@ def estimate_phi(self, y, X, sample_weight):
         Returns the estimate.
         """
         check_is_fitted(self, "coef_")
+        _dtype = [np.float64, np.float32]
+        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
+                         dtype=_dtype, y_numeric=True, multi_output=False)
         n_samples, n_features = X.shape
         eta = safe_sparse_dot(X, self.coef_, dense_output=True)
         if self.fit_intercept is True:
             eta += self.intercept_
+            n_features += 1
+        if n_samples <= n_features:
+            raise ValueError("Estimation of dispersion parameter phi requires"
+                             " more samples than features, got"
+                             " samples=X.shape[0]={0} and"
+                             " n_features=X.shape[1]+fit_intercept={1}."
+                             .format(n_samples, n_features))
         mu = self._link_instance.inverse(eta)
         if self.fit_dispersion == 'chisqr':
             chisq = np.sum(sample_weight*(y-mu)**2 /
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index baad852dfb945..ee90cd51c874d 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -164,7 +164,7 @@ def test_normal_ridge():
     alpha = 1.0
 
     # 1. With more samples than features
-    n_samples, n_features, n_predict = 6, 5, 10
+    n_samples, n_features, n_predict = 10, 5, 10
     y = rng.randn(n_samples)
     X = rng.randn(n_samples, n_features)
     T = rng.randn(n_predict, n_features)
@@ -189,12 +189,16 @@ def test_normal_ridge():
     ridge.fit(X, y)
     glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-6,
                                      family='normal', link='identity',
-                                     fit_intercept=False, solver='irls')
+                                     fit_intercept=False, solver='irls',
+                                     fit_dispersion='chisqr')
     glm.fit(X, y)
     assert_equal(glm.coef_.shape, (X.shape[1], ))
     assert_array_almost_equal(glm.coef_, ridge.coef_)
     assert_almost_equal(glm.intercept_, ridge.intercept_)
     assert_array_almost_equal(glm.predict(T), ridge.predict(T))
+    mu = glm.predict(X)
+    assert_almost_equal(glm.dispersion_,
+                        np.sum((y-mu)**2/(n_samples-n_features)))
 
     # 2. With more features than samples and sparse
     n_samples, n_features, n_predict = 5, 10, 10
@@ -278,11 +282,34 @@ def test_poisson_enet():
     # (Intercept) -0.03550978409
     # a            0.16936423283
     # b            .
+    glmnet_intercept = -0.03550978409
+    glmnet_coef = [0.16936423283, 0.]
     X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
     y = np.array([0, 1, 1, 2])
     glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0.5, family='poisson',
-                                     link='log', tol=1e-7)
+                                     link='log', solver='cd', tol=1e-7)
+    glm.fit(X, y)
+    assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=7)
+    assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=7)
+
+    # same for start_params='zero' with reduced precision
+    glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0.5, family='poisson',
+                                     link='log', solver='cd', tol=1e-5,
+                                     start_params='zero')
+    glm.fit(X, y)
+    assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=4)
+    assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=4)
+
+    # start_params='least_squares' with different alpha
+    glm = GeneralizedLinearRegressor(alpha=0.005, l1_ratio=0.5,
+                                     family='poisson',
+                                     link='log', solver='cd', tol=1e-5,
+                                     start_params='zero')
+    glm.fit(X, y)
+    # warm start with original alpha and use of sparse matrices
+    glm.warm_start = True
+    glm.alpha = 1
+    X = sparse.csr_matrix(X)
     glm.fit(X, y)
-    assert_almost_equal(glm.intercept_, -0.03550978409, decimal=7)
-    assert_array_almost_equal(glm.coef_, [0.16936423283, 0.],
-                              decimal=7)
+    assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=4)
+    assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=4)

From 9a9818441d605bf86547651997db81e969f41cdf Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Thu, 25 Jan 2018 22:59:46 +0100
Subject: [PATCH 014/269] [WIP] Add Generalized Linear Models (#9405)

* new helper function _check_weights for validation of sample_weight
* fix white space issue in doctest of linear_model.rst
---
 doc/modules/linear_model.rst |  8 ++---
 sklearn/linear_model/glm.py  | 59 +++++++++++++++++-------------------
 2 files changed, 31 insertions(+), 36 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 1f0946e97b059..f7b0ca0cc7add 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -921,10 +921,10 @@ follows:
     >>> reg = GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
     >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2]) # doctest: +NORMALIZE_WHITESPACE
     GeneralizedLinearRegressor(P1=None, P2=None, alpha=0.5, check_input=True,
-              copy_X=True, family='poisson', fit_dispersion='chisqr',
-              fit_intercept=True, l1_ratio=0, link='log', max_iter=100,
-              random_state=None, selection='random', solver='auto',
-              start_params=None, tol=0.0001, verbose=0, warm_start=False)
+                  copy_X=True, family='poisson', fit_dispersion='chisqr',
+                  fit_intercept=True, l1_ratio=0, link='log', max_iter=100,
+                  random_state=None, selection='random', solver='auto',
+                  start_params=None, tol=0.0001, verbose=0, warm_start=False)
     >>> reg.coef_
     array([ 0.24630255,  0.43373521])
     >>> reg.intercept_ #doctest: +ELLIPSIS
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index e5eda6108052c..138830ea431c9 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -59,6 +59,26 @@
 from ..utils.validation import check_is_fitted, check_random_state
 
 
+def _check_weights(sample_weight, n_samples):
+    if sample_weight is None:
+        weights = np.ones(n_samples)
+    elif np.isscalar(sample_weight):
+        if sample_weight < 0:
+            raise ValueError("Sample weights must be non-negative.")
+        weights = sample_weight*np.ones(n_samples)
+    else:
+        weights = np.atleast_1d(sample_weight)
+        if weights.ndim > 1:
+            raise ValueError("Sample weight must be 1D array or scalar")
+        elif weights.shape[0] != n_samples:
+            raise ValueError("Sample weights must have the same length as"
+                             " y")
+        if not np.all(sample_weight >= 0):
+            raise ValueError("Sample weights must be non-negative.")
+
+    return weights
+
+
 class Link(six.with_metaclass(ABCMeta)):
     """Abstract base class for Link funtions
     """
@@ -925,17 +945,7 @@ def fit(self, X, y, sample_weight=None):
                          dtype=_dtype, y_numeric=True, multi_output=False)
         y = y.astype(np.float64)
 
-        if sample_weight is None:
-            weights = np.ones_like(y)
-        elif np.isscalar(sample_weight):
-            weights = sample_weight*np.ones_like(y)
-        else:
-            weights = np.atleast_1d(sample_weight)
-            if weights.ndim > 1:
-                raise ValueError("Sample weight must be 1D array or scalar")
-            elif weights.shape[0] != y.shape[0]:
-                raise ValueError("Sample weights must have the same length as"
-                                 " y")
+        weights = _check_weights(sample_weight, y.shape[0])
 
         # 1.2 validate arguments of __init__ ##################################
         # Garantee that self._family_instance is an instance of class
@@ -1544,7 +1554,7 @@ def linear_predictor(self, X):
         return safe_sparse_dot(X, self.coef_,
                                dense_output=True) + self.intercept_
 
-    def predict(self, X, sample_weight=1):
+    def predict(self, X, sample_weight=None):
         """Predict uing GLM with feature matrix X.
         If sample_weight is given, returns prediction*sample_weight.
 
@@ -1558,26 +1568,13 @@ def predict(self, X, sample_weight=1):
         C : array, shape = (n_samples)
             Returns predicted values times sample_weight.
         """
+        weights = _check_weights(sample_weight, X.shape[0])
         eta = self.linear_predictor(X)
         mu = self._link_instance.inverse(eta)
-        if sample_weight is None:
-            return mu
-        elif np.isscalar(sample_weight):
-            if sample_weight <= 0:
-                raise ValueError("Sample weight must be positive, "
-                                 "got (sample_weight={0})."
-                                 .format(sample_weight))
-        else:
-            sample_weights = np.atleast_1d(sample_weight)
-            if sample_weight.ndim > 1:
-                raise ValueError("Sample weight must be 1D array or scalar.")
-            elif sample_weight.shape[0] != mu.shape[0]:
-                raise ValueError("Sample weights must have the same length as"
-                                 " X.shape[1].")
 
-        return mu*sample_weight
+        return mu*weights
 
-    def estimate_phi(self, y, X, sample_weight):
+    def estimate_phi(self, y, X, sample_weight=None):
         """Estimation of the dispersion parameter.
         Returns the estimate.
         """
@@ -1586,6 +1583,7 @@ def estimate_phi(self, y, X, sample_weight):
         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
                          dtype=_dtype, y_numeric=True, multi_output=False)
         n_samples, n_features = X.shape
+        weights = _check_weights(sample_weight, n_samples)
         eta = safe_sparse_dot(X, self.coef_, dense_output=True)
         if self.fit_intercept is True:
             eta += self.intercept_
@@ -1640,10 +1638,7 @@ def score(self, X, y, sample_weight=None):
         # Note, default score defined in RegressorMixin is R^2 score.
         # TODO: make D^2 a score function in module metrics (and thereby get
         #       input validation and so on)
-        if sample_weight is None:
-            weights = np.ones_like(y)
-        else:
-            weights = np.atleast_1d(sample_weight)
+        weights = _check_weights(sample_weight, y.shape[0])
         mu = self.predict(X)
         dev = self._family_instance.deviance(y, mu, weights=weights)
         y_mean = np.average(y, weights=weights)

From db9defe6b7637fe022034ca7f435f4fd37f6c118 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Fri, 26 Jan 2018 08:33:59 +0100
Subject: [PATCH 015/269] [WIP] Add Generalized Linear Models (#9405)

* fit_dispersion default=None also in docs.
* improved docs.
* fixed input validation of predict
* fixed bug for sample_weight in estimate_phi
---
 sklearn/linear_model/glm.py | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 138830ea431c9..22a8be9e50828 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -754,7 +754,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         case, the design matrix X must have full column rank
         (no collinearities).
 
-    l1_ratio : float, optional (defaul=0)
+    l1_ratio : float, optional (default=0)
         The elastic net mixing parameter, with ``0 <= l1_ratio <= 1``. For
         ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it
         is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a
@@ -768,7 +768,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         Note that n_features* = X.shape[1] = length of coef_ (intercept
         always excluded from counting).
 
-    P2 : None or array of shape (n_features*, n_features*)
+    P2 : None or array of shape (n_features*, n_features*), optional\
+            (default=None)
         With this square matrix the L2 penalty is calculated as `w P2 w`.
         This gives a fine control over this penalty (Tikhonov
         regularization).
@@ -781,20 +782,21 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     family : {'normal', 'poisson', 'gamma', 'inverse.gaussian'} or an instance\
             of class ExponentialDispersionModel, optional(default='normal')
-        the distributional assumption of the GLM.
+        the distributional assumption of the GLM, i.e. which loss function to
+        be minimized.
 
     link : {'identity', 'log'} or an instance of class Link,
         optional (default='identity')
         the link function of the GLM, i.e. mapping from linear predictor
         (X*coef) to expectation (mu).
 
-    fit_dispersion : {None, 'chisqr', 'deviance'}, optional (defaul='chisqr')
+    fit_dispersion : {None, 'chisqr', 'deviance'}, optional (defaul=None)
         method for estimation of the dispersion parameter phi. Whether to use
         the chi squared statisic or the deviance statistic. If None, the
         dispersion is not estimated.
 
     solver : {'auto', 'irls', 'newton-cg', 'lbfgs', 'cd'}, \
-            optional (defaul='auto')
+            optional (default='auto')
         Algorithm to use in the optimization problem.
 
         - 'auto' sets 'irls' if l1_ratio equals 0, else 'cd'.
@@ -830,11 +832,12 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         for ``coef_`` in the fit. If ``fit_intercept=True``, the first element
         is assumed to be the start value for the ``intercept_``.
         If 'least_squares' is set, the result of a least squares fit in the
-        link space (linear predictor) is taken. If ``None``, the start values
-        are calculated by setting mu to family.starting_mu(..) and one step of
-        irls.
-        This option only applies if ``warm_start=False`` or if fit is called
-        the first time (``self.coef_`` does not exist).
+        link space (linear predictor) is taken.
+        If 'zero' is set, all coefficients start with zero.
+        If ``None``, the start values are calculated by setting mu to
+        family.starting_mu(..) and one step of irls.
+        These options only apply if ``warm_start=False`` or if fit is called
+        the first time (``self.coef_`` does not yet exist).
 
     selection : str, optional (default='random')
         For the solver 'cd' (coordinate descent), the coordinates (features)
@@ -1550,7 +1553,9 @@ def linear_predictor(self, X):
             Returns predicted values of linear predictor.
         """
         check_is_fitted(self, "coef_")
-        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
+        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
+                        dtype='numeric', copy=True, ensure_2d=True,
+                        allow_nd=False)
         return safe_sparse_dot(X, self.coef_,
                                dense_output=True) + self.intercept_
 
@@ -1568,9 +1573,10 @@ def predict(self, X, sample_weight=None):
         C : array, shape = (n_samples)
             Returns predicted values times sample_weight.
         """
-        weights = _check_weights(sample_weight, X.shape[0])
+        # validation of X in linear_predictor
         eta = self.linear_predictor(X)
         mu = self._link_instance.inverse(eta)
+        weights = _check_weights(sample_weight, X.shape[0])
 
         return mu*weights
 
@@ -1596,11 +1602,11 @@ def estimate_phi(self, y, X, sample_weight=None):
                              .format(n_samples, n_features))
         mu = self._link_instance.inverse(eta)
         if self.fit_dispersion == 'chisqr':
-            chisq = np.sum(sample_weight*(y-mu)**2 /
+            chisq = np.sum(weights*(y-mu)**2 /
                            self._family_instance.unit_variance(mu))
             return chisq/(n_samples - n_features)
         elif self.fit_dispersion == 'deviance':
-            dev = self._family_instance.deviance(y, mu, sample_weight)
+            dev = self._family_instance.deviance(y, mu, weights)
             return dev/(n_samples - n_features)
 
     # Note: check_estimator(GeneralizedLinearRegressor) might raise

From dc7fdd7f8f6a3a2276f5f96aa6d5dd6ad3ce853e Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Fri, 26 Jan 2018 08:41:24 +0100
Subject: [PATCH 016/269] [WIP] Add Generalized Linear Models (#9405)

* improved docs
---
 sklearn/linear_model/tests/test_glm.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index ee90cd51c874d..0af837c9c73f3 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -177,7 +177,8 @@ def test_normal_ridge():
         glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0,
                                          family='normal', link='identity',
                                          fit_intercept=True, tol=1e-6,
-                                         max_iter=100, solver=solver)
+                                         max_iter=100, solver=solver,
+                                         random_state=42)
         glm.fit(X, y)
         assert_equal(glm.coef_.shape, (X.shape[1], ))
         assert_array_almost_equal(glm.coef_, ridge.coef_)
@@ -214,7 +215,7 @@ def test_normal_ridge():
         glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-8,
                                          family='normal', link='identity',
                                          fit_intercept=True, solver=solver,
-                                         max_iter=300)
+                                         max_iter=300, random_state=42)
         glm.fit(X, y)
         assert_equal(glm.coef_.shape, (X.shape[1], ))
         assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=5)
@@ -257,7 +258,8 @@ def test_poisson_ridge():
         glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0,
                                          fit_intercept=True, family='poisson',
                                          link='log', tol=s_tol[solver],
-                                         solver=solver, max_iter=300)
+                                         solver=solver, max_iter=300,
+                                         random_state=42)
         glm.fit(X, y)
         assert_almost_equal(glm.intercept_, -0.12889386979,
                             decimal=s_dec[solver])
@@ -282,20 +284,23 @@ def test_poisson_enet():
     # (Intercept) -0.03550978409
     # a            0.16936423283
     # b            .
+    rand = 0
     glmnet_intercept = -0.03550978409
     glmnet_coef = [0.16936423283, 0.]
     X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
     y = np.array([0, 1, 1, 2])
     glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0.5, family='poisson',
-                                     link='log', solver='cd', tol=1e-7)
+                                     link='log', solver='cd', tol=1e-7,
+                                     selection='random', random_state=42)
     glm.fit(X, y)
     assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=7)
     assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=7)
 
-    # same for start_params='zero' with reduced precision
+    # same for start_params='zero' and selection='cyclic'
+    # with reduced precision
     glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0.5, family='poisson',
                                      link='log', solver='cd', tol=1e-5,
-                                     start_params='zero')
+                                     selection='cyclic', start_params='zero')
     glm.fit(X, y)
     assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=4)
     assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=4)

From b11d06ba72865c14b0532c6d6c34d264a09d7ae4 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Fri, 26 Jan 2018 16:57:45 +0100
Subject: [PATCH 017/269] [WIP] Add Generalized Linear Models (#9405)

* fixed input validation of X in predict
---
 sklearn/linear_model/glm.py            | 4 +++-
 sklearn/linear_model/tests/test_glm.py | 1 -
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 22a8be9e50828..032ded86816dd 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -1573,7 +1573,9 @@ def predict(self, X, sample_weight=None):
         C : array, shape = (n_samples)
             Returns predicted values times sample_weight.
         """
-        # validation of X in linear_predictor
+        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
+                        dtype='numeric', copy=True, ensure_2d=True,
+                        allow_nd=False)
         eta = self.linear_predictor(X)
         mu = self._link_instance.inverse(eta)
         weights = _check_weights(sample_weight, X.shape[0])
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 0af837c9c73f3..776edd8aeec46 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -284,7 +284,6 @@ def test_poisson_enet():
     # (Intercept) -0.03550978409
     # a            0.16936423283
     # b            .
-    rand = 0
     glmnet_intercept = -0.03550978409
     glmnet_coef = [0.16936423283, 0.]
     X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T

From 9e6c01378a4cb245824bcb9429b3d566652af743 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Fri, 26 Jan 2018 17:37:44 +0100
Subject: [PATCH 018/269] [WIP] Add Generalized Linear Models (#9405)

* redundant line of code 'd = np.zeros_like(coef)'
---
 doc/modules/linear_model.rst | 2 +-
 sklearn/linear_model/glm.py  | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index f7b0ca0cc7add..dcb35b6a5d941 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -921,7 +921,7 @@ follows:
     >>> reg = GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
     >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2]) # doctest: +NORMALIZE_WHITESPACE
     GeneralizedLinearRegressor(P1=None, P2=None, alpha=0.5, check_input=True,
-                  copy_X=True, family='poisson', fit_dispersion='chisqr',
+                  copy_X=True, family='poisson', fit_dispersion=None,
                   fit_intercept=True, l1_ratio=0, link='log', max_iter=100,
                   random_state=None, selection='random', solver='auto',
                   start_params=None, tol=0.0001, verbose=0, warm_start=False)
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 032ded86816dd..8856af8ec698a 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -73,7 +73,7 @@ def _check_weights(sample_weight, n_samples):
         elif weights.shape[0] != n_samples:
             raise ValueError("Sample weights must have the same length as"
                              " y")
-        if not np.all(sample_weight >= 0):
+        if not np.all(weights >= 0):
             raise ValueError("Sample weights must be non-negative.")
 
     return weights
@@ -1132,7 +1132,7 @@ def fit(self, X, y, sample_weight=None):
                 if sparse.issparse(P2):
                     # TODO: check sparse P2 for non-negativeness
                     # raise NotImplementedError("Check sparse P2 for "
-                    #                          "non-negaitveness is not yet "
+                    #                          "non-negativeness is not yet "
                     #                          "implemented.")
                     pass
                 elif P2.ndim == 2:
@@ -1391,7 +1391,6 @@ def Hs(s):
                 # inner loop
                 # TODO: use sparsity (coefficient already 0 due to L1 penalty)
                 #       => active set of features for featurelist, see paper
-                d = np.zeros_like(coef)
                 # A = f'(w) + d*H(w) + (w+d)*P2
                 # B = H+P2
                 # Note: f'=-score and H=fisher are updated at the end of outer

From bad0190a22623eae3a2f6dfcdb0fd8caee625111 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 27 Jan 2018 20:38:02 +0100
Subject: [PATCH 019/269] [WIP] Add Generalized Linear Models (#9405)

* added test to compare to ElasticNet
* deleted identical comment lines
---
 sklearn/linear_model/tests/test_glm.py | 29 +++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 776edd8aeec46..9990cafe2cbcf 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -12,7 +12,7 @@
     GammaDistribution, InverseGaussianDistribution,
     GeneralizedHyperbolicSecand,
     GeneralizedLinearRegressor)
-from sklearn.linear_model.ridge import Ridge
+from sklearn.linear_model import ElasticNet, Ridge
 
 from sklearn.utils.testing import (
     assert_equal, assert_almost_equal,
@@ -267,14 +267,37 @@ def test_poisson_ridge():
                                   decimal=s_dec[solver])
 
 
+def test_normal_enet():
+    """Tet elastic net regression with normal/gaussian family"""
+    rng = np.random.RandomState(0)
+    alpha, l1_ratio = 0.3, 0.7
+    n_samples, n_features = 20, 2
+    X = rng.randn(n_samples, n_features).copy(order='F')
+    beta = rng.randn(n_features)
+    y = 2 + np.dot(X, beta) + rng.randn(n_samples)
+
+    glm = GeneralizedLinearRegressor(alpha=alpha, l1_ratio=l1_ratio,
+                                     family='normal', link='identity',
+                                     fit_intercept=True, tol=1e-7,
+                                     max_iter=100, selection='cyclic',
+                                     solver='cd', start_params='zero',
+                                     check_input=False)
+    glm.fit(X, y)
+
+    enet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, fit_intercept=True,
+                      normalize=False, tol=1e-7, copy_X=True)
+    enet.fit(X, y)
+
+    assert_almost_equal(glm.intercept_, enet.intercept_)
+    assert_array_almost_equal(glm.coef_, enet.coef_)
+
+
 def test_poisson_enet():
     """Test elastic net regression with poisson family and LogLink
 
     Compare to R's glmnet"""
     # library("glmnet")
     # options(digits=10)
-    # library("glmnet")
-    # options(digits=10)
     # df <- data.frame(a=c(-2,-1,1,2), b=c(0,0,1,1), y=c(0,1,1,2))
     # x <- data.matrix(df[,c("a", "b")])
     # y <- df$y

From 48137d86079c9a8efd15d57e719e3ee35f1644c9 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 28 Jan 2018 11:52:19 +0100
Subject: [PATCH 020/269] [WIP] Add Generalized Linear Models (#9405)

* increased precision in test_normal_enet
---
 sklearn/linear_model/tests/test_glm.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 9990cafe2cbcf..6f8bdd3a72f40 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -278,18 +278,18 @@ def test_normal_enet():
 
     glm = GeneralizedLinearRegressor(alpha=alpha, l1_ratio=l1_ratio,
                                      family='normal', link='identity',
-                                     fit_intercept=True, tol=1e-7,
+                                     fit_intercept=True, tol=1e-8,
                                      max_iter=100, selection='cyclic',
                                      solver='cd', start_params='zero',
                                      check_input=False)
     glm.fit(X, y)
 
     enet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, fit_intercept=True,
-                      normalize=False, tol=1e-7, copy_X=True)
+                      normalize=False, tol=1e-8, copy_X=True)
     enet.fit(X, y)
 
-    assert_almost_equal(glm.intercept_, enet.intercept_)
-    assert_array_almost_equal(glm.coef_, enet.coef_)
+    assert_almost_equal(glm.intercept_, enet.intercept_, decimal=7)
+    assert_array_almost_equal(glm.coef_, enet.coef_, decimal=7)
 
 
 def test_poisson_enet():

From 2c2a077a2e8c57bdf3c945678526d6efdb5763e9 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 28 Jan 2018 12:42:03 +0100
Subject: [PATCH 021/269] [WIP] Add Generalized Linear Models (#9405)

* better doc for heavy tailed distributions
---
 doc/modules/linear_model.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index dcb35b6a5d941..5cb8e54afbb06 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -905,7 +905,8 @@ are the following:
 
   * If the target values are positive valued and skewed, you might try a Gamma deviance.
 
-  * If the target values seem to be heavy tailed, you might try an Inverse Gaussian deviance (or even higher variance power of the Tweedie family).
+  * If the target values seem to be heavier tailed than a Gamma distribution, you might try an Inverse Gaussian deviance (or even higher variance powers of the Tweedie family).
+    Keep in mind that the mean is not a good measure for very heavy tailed distributions, cf. extreme value theory.
 
 Since the linear predictor :math:`Xw` can be negative and
 Poisson, Gamma and Inverse Gaussian distributions don't support negative values,

From 15931c3148b68c47aa3c3c19983525ae758a0981 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 28 Jan 2018 15:18:26 +0100
Subject: [PATCH 022/269] [WIP] Add Generalized Linear Models (#9405)

* improved input validation and testing of them
---
 sklearn/linear_model/glm.py            |  24 +++-
 sklearn/linear_model/tests/test_glm.py | 174 ++++++++++++++++++++++++-
 2 files changed, 189 insertions(+), 9 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 8856af8ec698a..33df6b4c9b850 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -63,11 +63,14 @@ def _check_weights(sample_weight, n_samples):
     if sample_weight is None:
         weights = np.ones(n_samples)
     elif np.isscalar(sample_weight):
-        if sample_weight < 0:
+        if sample_weight <= 0:
             raise ValueError("Sample weights must be non-negative.")
         weights = sample_weight*np.ones(n_samples)
     else:
-        weights = np.atleast_1d(sample_weight)
+        _dtype = [np.float64, np.float32]
+        weights = check_array(sample_weight, accept_sparse='csr',
+                              force_all_finite=True, ensure_2d=False,
+                              dtype=_dtype)
         if weights.ndim > 1:
             raise ValueError("Sample weight must be 1D array or scalar")
         elif weights.shape[0] != n_samples:
@@ -75,6 +78,9 @@ def _check_weights(sample_weight, n_samples):
                              " y")
         if not np.all(weights >= 0):
             raise ValueError("Sample weights must be non-negative.")
+        elif not np.sum(weights) > 0:
+            raise ValueError("Sample weights must have at least one positive "
+                             "element.")
 
     return weights
 
@@ -1010,10 +1016,12 @@ def fit(self, X, y, sample_weight=None):
                                  "with L1 penalties, which are included with "
                                  "(alpha={1}) and (l1_ratio={2})."
                                  .format(solver, self.alpha, self.l1_ratio))
-        if not isinstance(self.max_iter, numbers.Number) or self.max_iter < 0:
-            raise ValueError("Maximum number of iteration must be positive;"
+        if (not isinstance(self.max_iter, six.integer_types)
+                or self.max_iter <= 0):
+            raise ValueError("Maximum number of iteration must be a positive "
+                             "integer;"
                              " got (max_iter={0!r})".format(self.max_iter))
-        if not isinstance(self.tol, numbers.Number) or self.tol < 0:
+        if not isinstance(self.tol, numbers.Number) or self.tol <= 0:
             raise ValueError("Tolerance for stopping criteria must be "
                              "positive; got (tol={0!r})".format(self.tol))
         if not isinstance(self.warm_start, bool):
@@ -1029,7 +1037,9 @@ def fit(self, X, y, sample_weight=None):
                                  " length,"
                                  " got(start_params={0})".format(start_params))
         else:
-            start_params = np.atleast_1d(start_params)
+            start_params = check_array(start_params, accept_sparse='csr',
+                                       force_all_finite=True, ensure_2d=False,
+                                       dtype=_dtype, copy=True)
             if ((start_params.shape[0] != X.shape[1] + self.fit_intercept) or
                     (start_params.ndim != 1)):
                 raise ValueError("Start values for parameters must have the"
@@ -1160,7 +1170,7 @@ def fit(self, X, y, sample_weight=None):
 
         # set start values for coef
         coef = None
-        if self.warm_start and hasattr(self, "coef_"):
+        if self.warm_start and hasattr(self, 'coef_'):
             if self.fit_intercept:
                 coef = np.concatenate((np.array([self.intercept_]),
                                        self.coef_))
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 6f8bdd3a72f40..1abbcf0540e28 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -5,7 +5,7 @@
 
 from sklearn.linear_model.glm import (
     Link,
-    # IdentityLink,
+    IdentityLink,
     LogLink,
     TweedieDistribution,
     NormalDistribution, PoissonDistribution,
@@ -16,7 +16,8 @@
 
 from sklearn.utils.testing import (
     assert_equal, assert_almost_equal,
-    assert_array_equal, assert_array_almost_equal)
+    assert_array_equal, assert_array_almost_equal,
+    assert_raises)
 
 
 def test_link_properties():
@@ -102,6 +103,34 @@ def f(coef):
         assert_allclose(fisher, approx, rtol=1e-3)
 
 
+def test_sample_weights_validation():
+    """Test the raised errors in the validation of sample_weight"""
+    # 1. scalar value but not positive
+    X = [[1]]
+    y = [1]
+    weights = 0
+    glm = GeneralizedLinearRegressor(fit_intercept=False)
+    assert_raises(ValueError, glm.fit, X, y, weights)
+
+    # 2. 2d array
+    weights = [[0]]
+    assert_raises(ValueError, glm.fit, X, y, weights)
+
+    # 3. 1d but wrong length
+    weights = [1, 0]
+    assert_raises(ValueError, glm.fit, X, y, weights)
+
+    # 4. 1d but only zeros (sum not greater than 0)
+    weights = [0, 0]
+    X = [[0], [1]]
+    y = [1, 2]
+    assert_raises(ValueError, glm.fit, X, y, weights)
+
+    # 5. 1d but weith a negative value
+    weights = [2, -1]
+    assert_raises(ValueError, glm.fit, X, y, weights)
+
+
 def test_glm_family_argument():
     """Test GLM family argument set as string
     """
@@ -115,6 +144,147 @@ def test_glm_family_argument():
                                          alpha=0).fit(X, y)
         assert_equal(type(glm._family_instance), type(fam))
 
+    glm = GeneralizedLinearRegressor(family='not a family',
+                                     fit_intercept=False)
+    assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_link_argument():
+    """Test GLM link argument set as string
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for (l, link) in [('identity', IdentityLink()),
+                      ('log', LogLink())]:
+        glm = GeneralizedLinearRegressor(family='normal', fit_intercept=False,
+                                         link=l).fit(X, y)
+        assert_equal(type(glm._link_instance), type(link))
+
+    glm = GeneralizedLinearRegressor(family='normal', fit_intercept=False,
+                                     link='not a link')
+    assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_alpha_argument():
+    """Test GLM alpha argument
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for alpha in ['not a number', -4.2]:
+        glm = GeneralizedLinearRegressor(family='normal', fit_intercept=False,
+                                         alpha=alpha)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_l1_ratio_argument():
+    """Test GLM l1_ratio argument
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for l1_ratio in ['not a number', -4.2, 1.1, [1]]:
+        glm = GeneralizedLinearRegressor(family='normal', fit_intercept=False,
+                                         l1_ratio=l1_ratio)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_fit_intercept_argument():
+    """Test GLM fit_intercept argument
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for fit_intercept in ['not bool', 1, 0, [True]]:
+        glm = GeneralizedLinearRegressor(fit_intercept=fit_intercept)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_solver_argument():
+    """Test GLM solver argument
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for solver in ['not a solver', 1, [1]]:
+        glm = GeneralizedLinearRegressor(solver=solver)
+        assert_raises(ValueError, glm.fit, X, y)
+
+    # solver not suitable for L1 penalty
+    for solver in ['irls', 'lbfgs', 'newton-cg']:
+        glm = GeneralizedLinearRegressor(solver=solver, alpha=1, l1_ratio=0.1)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_max_iter_argument():
+    """Test GLM max_iter argument
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for max_iter in ['not a number', 0, -1, 5.5, [1]]:
+        glm = GeneralizedLinearRegressor(max_iter=max_iter)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_tol_argument():
+    """Test GLM tol argument
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for tol in ['not a number', 0, -1.0, [1e-3]]:
+        glm = GeneralizedLinearRegressor(tol=tol)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_warm_start_argument():
+    """Test GLM warm_start argument
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for warm_start in ['not bool', 1, 0, [True]]:
+        glm = GeneralizedLinearRegressor(warm_start=warm_start)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_start_params_argument():
+    """Test GLM start_params argument
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for start_params in ['not a start_params', ['zero'], [0, 0, 0],
+                         [[0, 0]], ['a', 'b']]:
+        glm = GeneralizedLinearRegressor(start_params=start_params)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_selection_argument():
+    """Test GLM selection argument
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for selection in ['not a selection', 1, 0, ['cyclic']]:
+        glm = GeneralizedLinearRegressor(selection=selection)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_check_input_argument():
+    """Test GLM check_input argument
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for check_input in ['not bool', 1, 0, [True]]:
+        glm = GeneralizedLinearRegressor(check_input=check_input)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_random_state_argument():
+    """Test GLM random_state argument
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for random_state in ['a string', 0.5, [0]]:
+        glm = GeneralizedLinearRegressor(random_state=random_state)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
+# TODO: check P1 and P2
+# TODO: check additional validations if check_input == True
 
 def test_glm_identiy_regression():
     """Test GLM regression with identity link on a simple dataset

From feedba379c2a8f53a7a9792e33041f29da7a4c95 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Fri, 30 Mar 2018 18:50:06 +0200
Subject: [PATCH 023/269] [MRG] Add Generalized Linear Models (#9405)

* improved input validation and testing of P1
* test case for validation of argument P2
* test case for validation of argument copy_X
---
 sklearn/linear_model/glm.py            |  3 ++
 sklearn/linear_model/tests/test_glm.py | 48 +++++++++++++++++++++-----
 2 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 33df6b4c9b850..eae4a56ea1d95 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -1065,6 +1065,9 @@ def fit(self, X, y, sample_weight=None):
             P1 = np.ones(X.shape[1])
         else:
             P1 = np.atleast_1d(np.copy(self.P1))
+            if P1.dtype.kind not in ['b', 'i', 'u', 'f']:
+                raise ValueError("P1 must be a numeric value; "
+                                 "got (dtype={0}).".format(P1.dtype))
             if (P1.ndim != 1) or (P1.shape[0] != X.shape[1]):
                 raise ValueError("P1 must be either None or an 1D array with "
                                  "the length of X.shape[1]; "
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 1abbcf0540e28..edf579a416973 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -187,6 +187,27 @@ def test_glm_l1_ratio_argument():
         assert_raises(ValueError, glm.fit, X, y)
 
 
+def test_glm_P1_argument():
+    """Test GLM P1 arguments
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for P1 in [['a string', 'a string'], [1, [2]], [1, 2, 3]]:
+        glm = GeneralizedLinearRegressor(P1=P1)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_P2_argument():
+    """Test GLM P2 arguments
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for P2 in [np.full((2, 2), 'a string'), [[1, [2]], [3, 4]], [1, 2, 3],
+               [[1, 2]], [[1], [2]]]:
+        glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
 def test_glm_fit_intercept_argument():
     """Test GLM fit_intercept argument
     """
@@ -263,27 +284,36 @@ def test_glm_selection_argument():
         assert_raises(ValueError, glm.fit, X, y)
 
 
-def test_glm_check_input_argument():
-    """Test GLM check_input argument
+def test_glm_random_state_argument():
+    """Test GLM random_state argument
     """
     y = np.array([1, 2])
     X = np.array([[1], [1]])
-    for check_input in ['not bool', 1, 0, [True]]:
-        glm = GeneralizedLinearRegressor(check_input=check_input)
+    for random_state in ['a string', 0.5, [0]]:
+        glm = GeneralizedLinearRegressor(random_state=random_state)
         assert_raises(ValueError, glm.fit, X, y)
 
 
-def test_glm_random_state_argument():
-    """Test GLM random_state argument
+def test_glm_copy_X_argument():
+    """Test GLM copy_X arguments
     """
     y = np.array([1, 2])
     X = np.array([[1], [1]])
-    for random_state in ['a string', 0.5, [0]]:
-        glm = GeneralizedLinearRegressor(random_state=random_state)
+    for copy_X in ['not bool', 1, 0, [True]]:
+        glm = GeneralizedLinearRegressor(copy_X=copy_X)
+        assert_raises(ValueError, glm.fit, X, y)
+
+
+def test_glm_check_input_argument():
+    """Test GLM check_input argument
+    """
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    for check_input in ['not bool', 1, 0, [True]]:
+        glm = GeneralizedLinearRegressor(check_input=check_input)
         assert_raises(ValueError, glm.fit, X, y)
 
 
-# TODO: check P1 and P2
 # TODO: check additional validations if check_input == True
 
 def test_glm_identiy_regression():

From 6fdfb47428571b4c8e89046a7b1f481711832f61 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Fri, 30 Mar 2018 19:41:09 +0200
Subject: [PATCH 024/269] [MRG] Add Generalized Linear Models (#9405)

* fix doctest failure in example of linear_model.rst

* fix dtype issue in test_glm_P2_argument
---
 doc/modules/linear_model.rst           | 4 ++--
 sklearn/linear_model/tests/test_glm.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 5cb8e54afbb06..9f85da771c6f1 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -926,8 +926,8 @@ follows:
                   fit_intercept=True, l1_ratio=0, link='log', max_iter=100,
                   random_state=None, selection='random', solver='auto',
                   start_params=None, tol=0.0001, verbose=0, warm_start=False)
-    >>> reg.coef_
-    array([ 0.24630255,  0.43373521])
+    >>> reg.coef_ # doctest: +NORMALIZE_WHITESPACE
+    array([0.24630255, 0.43373521])
     >>> reg.intercept_ #doctest: +ELLIPSIS
     -0.76383575...
 
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index edf579a416973..c5d132d35bdb9 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -202,8 +202,8 @@ def test_glm_P2_argument():
     """
     y = np.array([1, 2])
     X = np.array([[1], [1]])
-    for P2 in [np.full((2, 2), 'a string'), [[1, [2]], [3, 4]], [1, 2, 3],
-               [[1, 2]], [[1], [2]]]:
+    for P2 in [np.full((2, 2), 'a string', dtype=np.dtype('<U8')),
+               [[1, [2]], [3, 4]], [1, 2, 3], [[1, 2]], [[1], [2]]]:
         glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False)
         assert_raises(ValueError, glm.fit, X, y)
 

From d489f56204585fb2c57e20d44e05730c47f301a1 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 5 Aug 2018 14:48:33 +0200
Subject: [PATCH 025/269] [MRG] Add Generalized Linear Models (#9405)

* fix typos in doc
---
 doc/modules/linear_model.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 9f85da771c6f1..a204ccb080cc9 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -934,7 +934,7 @@ follows:
 Mathematical formulation
 ------------------------
 
-In the unpenalized case, the assumptions are the folowing:
+In the unpenalized case, the assumptions are the following:
 
     * The target values :math:`y_i` are realizations of random variables
       :math:`Y_i \overset{i.i.d}{\sim} \mathrm{EDM}(\mu_i, \frac{\phi}{s_i})`
@@ -951,7 +951,7 @@ same as specifying a unit variance function (they are one-to-one).
 
 Including penalties helps to avoid overfitting or, in case of L1 penalty, to
 obtain sparse solutions. But there are also other motivations to include them,
-e.g. accounting fo dependence structure of :math:`y`.
+e.g. accounting for the dependence structure of :math:`y`.
 
 The objective function, which is independent of :math:`\phi`, is minimized with
 respect to the coefficients :math:`w`.

From 809e3a2747e451fc97b9a462cf682110d59fac25 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 26 Aug 2018 20:41:25 +0200
Subject: [PATCH 026/269] Remove test_glm_P2_argument

---
 sklearn/linear_model/tests/test_glm.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index c5d132d35bdb9..82b8ec2435543 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -197,15 +197,15 @@ def test_glm_P1_argument():
         assert_raises(ValueError, glm.fit, X, y)
 
 
-def test_glm_P2_argument():
-    """Test GLM P2 arguments
-    """
-    y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    for P2 in [np.full((2, 2), 'a string', dtype=np.dtype('<U8')),
-               [[1, [2]], [3, 4]], [1, 2, 3], [[1, 2]], [[1], [2]]]:
-        glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False)
-        assert_raises(ValueError, glm.fit, X, y)
+# def test_glm_P2_argument():
+#     """Test GLM P2 arguments
+#     """
+#     y = np.array([1, 2])
+#     X = np.array([[1], [1]])
+#     for P2 in [np.full((2, 2), 'a string', dtype=np.dtype('<U8')),
+#                [[1, [2]], [3, 4]], [1, 2, 3], [[1, 2]], [[1], [2]]]:
+#         glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False)
+#         assert_raises(ValueError, glm.fit, X, y)
 
 
 def test_glm_fit_intercept_argument():

From 4edce36f6d4e66325db6213eb31a407d04565ee9 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Thu, 30 Aug 2018 19:33:27 +0200
Subject: [PATCH 027/269] Filter out DeprecationWarning in old versions of
 scipy.sparse.linalg.spsolve about usage of umfpack

---
 sklearn/linear_model/tests/test_glm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 82b8ec2435543..e4be75ddb7a64 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -355,6 +355,7 @@ def test_glm_log_regression():
             assert_array_almost_equal(res.coef_, coef)
 
 
+@pytest.mark.filterwarnings('ignore:DeprecationWarning')
 def test_normal_ridge():
     """Test ridge regression for Normal distributions
 

From 46df5b6555938dc7c9acac67929b0633ea1354a3 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Thu, 30 Aug 2018 20:40:08 +0200
Subject: [PATCH 028/269] import pytest

---
 sklearn/linear_model/tests/test_glm.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index e4be75ddb7a64..dfa205407a193 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -1,5 +1,6 @@
 import numpy as np
 from numpy.testing import assert_allclose
+import pytest
 import scipy as sp
 from scipy import sparse
 

From 21f2136c083b4057868b7886880f356e58703611 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Thu, 30 Aug 2018 21:17:53 +0200
Subject: [PATCH 029/269] Document arguments of abstact methods

---
 sklearn/linear_model/glm.py | 125 +++++++++++++++++++++++++++++++++---
 1 file changed, 115 insertions(+), 10 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index eae4a56ea1d95..825ee6bfe8c45 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -93,12 +93,22 @@ class Link(six.with_metaclass(ABCMeta)):
     def link(self, mu):
         """The link function g(mu) with argument mu=E[Y] returns the
         linear predictor.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Usually the predicted mean.
         """
         raise NotImplementedError
 
     @abstractmethod
     def derivative(self, mu):
         """Derivative of the link g'(mu).
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Usually the predicted mean.
         """
         raise NotImplementedError
 
@@ -106,18 +116,33 @@ def derivative(self, mu):
     def inverse(self, lin_pred):
         """The inverse link function h(lin_pred) with the linear predictor as
         argument returns mu=E[Y].
+
+        Parameters
+        ----------
+        lin_pred : array, shape (n_samples,)
+            Usually the (predicted) linear predictor.
         """
         raise NotImplementedError
 
     @abstractmethod
     def inverse_derivative(self, lin_pred):
         """Derivative of the inverse link function h'(lin_pred).
+
+        Parameters
+        ----------
+        lin_pred : array, shape (n_samples,)
+            Usually the (predicted) linear predictor.
         """
         raise NotImplementedError
 
     @abstractmethod
     def inverse_derivative2(self, lin_pred):
         """Second derivative of the inverse link function h''(lin_pred).
+
+        Parameters
+        ----------
+        lin_pred : array, shape (n_samples,)
+            Usually the (predicted) linear predictor.
         """
         raise NotImplementedError
 
@@ -236,6 +261,11 @@ def include_upper_bound(self):
 
     def in_y_range(self, x):
         """Returns true if `x` is in the valid range of Y~EDM.
+
+        Parameters
+        ----------
+        x : array, shape (n_samples,)
+            Target values.
         """
         if self.include_lower_bound:
             if self.include_upper_bound:
@@ -263,12 +293,22 @@ def unit_variance(self, mu):
             \partial\mu^2}}\big|_{y=\mu}
 
         See also :func:`variance`.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
         """
         raise NotImplementedError()
 
     @abstractmethod
     def unit_variance_derivative(self, mu):
         r"""The derivative of the unit variance w.r.t. `mu`, :math:`v'(\mu)`.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Target values.
         """
         raise NotImplementedError()
 
@@ -276,6 +316,17 @@ def variance(self, mu, phi=1, weights=1):
         r"""The variance of :math:`Y_i \sim \mathrm{EDM}(\mu_i,\phi/s_i)` is
         :math:`\mathrm{Var}[Y_i]=\phi/s_i*v(\mu_i)`,
         with unit variance :math:`v(\mu)` and weights :math:`s_i`.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
+
+        phi : float
+            Dispersion parameter.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
         """
         return phi/weights * self.unit_variance(mu)
 
@@ -284,6 +335,17 @@ def variance_derivative(self, mu, phi=1, weights=1):
         :math:`\frac{\partial}{\partial\mu}\mathrm{Var}[Y_i]
         =phi/s_i*v'(\mu_i)`, with unit variance :math:`v(\mu)`
         and weights :math:`s_i`.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
+
+        phi : float (default=1)
+            Dispersion parameter.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
         """
         return phi/weights * self.unit_variance_derivative(mu)
 
@@ -293,6 +355,14 @@ def unit_deviance(self, y, mu):
         In terms of the log-likelihood it is given by
         :math:`d(y,\mu) = -2\phi\cdot
         \left(loglike(y,\mu,phi) - loglike(y,y,phi)\right).`
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        mu : array, shape (n_samples,)
+            Predicted mean.
         """
         raise NotImplementedError()
 
@@ -301,9 +371,13 @@ def unit_deviance_derivative(self, y, mu):
         :math:`\frac{\partial}{\partial\mu}d(y,\mu) = -2\frac{y-\mu}{v(\mu)}`
         with unit variance :math:`v(\mu)`.
 
-        Returns
-        -------
-        derivative: array, shape = (n_samples,)
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        mu : array, shape (n_samples,)
+            Predicted mean.
         """
         return -2*(y-mu)/self.unit_variance(mu)
 
@@ -313,6 +387,17 @@ def deviance(self, y, mu, weights=1):
         In terms of the likelihood it is :math:`D = -2\phi\cdot
         \left(loglike(y,\mu,\frac{phi}{s})
         - loglike(y,y,\frac{phi}{s})\right)`.
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        mu : array, shape (n_samples,)
+            Predicted mean.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
         """
         return np.sum(weights*self.unit_deviance(y, mu))
 
@@ -326,6 +411,17 @@ def _deviance(self, coef, X, y, weights, link):
 
     def deviance_derivative(self, y, mu, weights=1):
         """The derivative w.r.t. `mu` of the deviance.
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        mu : array, shape (n_samples,)
+            Predicted mean.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
         """
         return weights*self.unit_deviance_derivative(y, mu)
 
@@ -464,7 +560,16 @@ def _eta_mu_score_fisher(self, coef, phi, X, y, weights, link):
         return eta, mu, score, fisher
 
     def starting_mu(self, y, weights=1):
-        """Starting values for the mean mu_i in (unpenalized) IRLS."""
+        """Starting values for the mean mu_i in (unpenalized) IRLS.
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
+        """
         return ((weights*y+np.mean(weights*y)) /
                 (2.*np.sum(np.ones_like(y)*weights)))
 
@@ -656,12 +761,12 @@ def _irls_step(X, W, P2, z):
     X : numpy array or sparse matrix of shape (n_samples, n_features)
         Training data (with intercept included if present)
 
-    W : numpy array of shape (n_samples, )
+    W : numpy array of shape (n_samples,)
 
     P2 : numpy array or sparse matrix of shape (n_features, n_features)
         The l2-penalty matrix or vector (=diagonal matrix)
 
-    z  : numpy array of shape (n_samples, )
+    z  : numpy array of shape (n_samples,)
         Working observations
 
     Returns
@@ -927,12 +1032,12 @@ def fit(self, X, y, sample_weight=None):
         Parameters
         ----------
         X : numpy array or sparse matrix of shape (n_samples, n_features)
-            Training data
+            Training data.
 
-        y : numpy array of shape (n_samples, )
-            Target values
+        y : numpy array of shape (n_samples,)
+            Target values.
 
-        sample_weight : array of shape (n_samples, ) or None,\
+        sample_weight : array of shape (n_samples,) or None,\
                 optinal (default=None)
             Individual weights w_i for each sample. Note that for an
             Exponential Dispersion Model (EDM), one has

From 1faedf87eecfd2c6a668e2aec6e28a9fff8780ec Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Thu, 30 Aug 2018 21:18:51 +0200
Subject: [PATCH 030/269] Pytest filter warnings use two colons

---
 sklearn/linear_model/tests/test_glm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index dfa205407a193..1ac5ccd4d3d5c 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -356,7 +356,7 @@ def test_glm_log_regression():
             assert_array_almost_equal(res.coef_, coef)
 
 
-@pytest.mark.filterwarnings('ignore:DeprecationWarning')
+@pytest.mark.filterwarnings('ignore::DeprecationWarning')
 def test_normal_ridge():
     """Test ridge regression for Normal distributions
 

From 992f9819838336a9372a7cadecc53eeafef439ff Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Thu, 30 Aug 2018 22:07:33 +0200
Subject: [PATCH 031/269] Improve documentation of arguments that were so far
 undocumented

---
 sklearn/linear_model/glm.py | 33 ++++++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 825ee6bfe8c45..a0d0266fa0efe 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -579,6 +579,17 @@ class TweedieDistribution(ExponentialDispersionModel):
     They have :math:`\mu=\mathrm{E}[Y]` and
     :math:`\mathrm{Var}[Y] \propto \mu^power.
 
+    Special cases are:
+
+    ===== ================
+    Power Distribution
+    ===== ================
+    0     Normal
+    1     Poisson
+    (0,1) Compound Poisson
+    2     Gamma
+    3     Inverse Gaussian
+
     Attributes
     ----------
     power : float
@@ -586,6 +597,12 @@ class TweedieDistribution(ExponentialDispersionModel):
             :math:`v(\mu) = \mu^{power}`.
     """
     def __init__(self, power=0):
+        """
+        Parameters
+        ----------
+        power : float (default=0)
+            Power of (of mu) of the variance function.
+        """
         self.power = power
         self._upper_bound = np.Inf
         self._include_upper_bound = False
@@ -623,6 +640,9 @@ def __init__(self, power=0):
             # Positive Stable
             self._lower_bound = 0
             self._include_lower_bound = False
+        else:
+            raise ValueError('The power must be a float, i.e. real number, '
+                             'got (power={})'.format(power))
 
     @property
     def power(self):
@@ -653,12 +673,22 @@ def include_upper_bound(self):
 
     def unit_variance(self, mu):
         """The unit variance of a Tweedie distribution is v(mu)=mu**power.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
         """
         return np.power(mu, self.power)
 
     def unit_variance_derivative(self, mu):
         """The derivative of the unit variance of a Tweedie distribution is
         v(mu)=power*mu**(power-1).
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
         """
         return self.power*np.power(mu, self.power-1)
 
@@ -680,9 +710,6 @@ def unit_deviance(self, y, mu):
             return 2 * (np.power(np.maximum(y, 0), 2-p)/((1-p)*(2-p)) -
                         y*np.power(mu, 1-p)/(1-p) + np.power(mu, 2-p)/(2-p))
 
-    def likelihood(self, y, X, w, phi, weights=1):
-        raise NotImplementedError('This function is not (yet) implemented.')
-
 
 class NormalDistribution(TweedieDistribution):
     """Class for the Normal (aka Gaussian) distribution"""

From 06b8451ea109040371615e9e7baaa8ff505197f0 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Fri, 31 Aug 2018 00:26:39 +0200
Subject: [PATCH 032/269] Further improve documentation of arguments

---
 sklearn/linear_model/glm.py | 43 +++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 14 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index a0d0266fa0efe..9688b1e0c9e5c 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -601,7 +601,7 @@ def __init__(self, power=0):
         Parameters
         ----------
         power : float (default=0)
-            Power of (of mu) of the variance function.
+            Variance power of the `unit_variance` function.
         """
         self.power = power
         self._upper_bound = np.Inf
@@ -798,7 +798,7 @@ def _irls_step(X, W, P2, z):
 
     Returns
     -------
-    coef: array, shape = (X.shape[1])
+    coef: array, shape (X.shape[1])
     """
     # TODO: scipy.linalg.solve is faster, but ordinary least squares uses
     #       scipy.linalg.lstsq. What is more appropriate?
@@ -898,7 +898,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a
         combination of L1 and L2.
 
-    P1 : None or array of shape (n_features*, ), optional\
+    P1 : None or array of shape (n_features*,), optional\
             (default=None)
         With this array, you can exclude coefficients from the L1 penalty.
         Set the corresponding value to 1 (include) or 0 (exclude). The
@@ -1007,7 +1007,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     Attributes
     ----------
-    coef_ : array, shape (n_features, )
+    coef_ : array, shape (n_features,)
         Estimated coefficients for the linear predictor (X*coef_) in the GLM.
 
     intercept_ : float
@@ -1679,7 +1679,7 @@ def Hs(s):
 
         if self.fit_dispersion in ['chisqr', 'deviance']:
             # attention because of rescaling of weights
-            self.dispersion_ = self.estimate_phi(y, X, weights)*weights_sum
+            self.dispersion_ = self.estimate_phi(X, y, weights)*weights_sum
 
         return self
 
@@ -1688,12 +1688,12 @@ def linear_predictor(self, X):
 
         Parameters
         ----------
-        X : numpy array or sparse matrix of shape [n_samples,n_features]
+        X : numpy array or sparse matrix, shape (n_samples, n_features)
             Samples.
 
         Returns
         -------
-        C : array, shape = (n_samples)
+        C : array, shape (n_samples)
             Returns predicted values of linear predictor.
         """
         check_is_fitted(self, "coef_")
@@ -1709,12 +1709,15 @@ def predict(self, X, sample_weight=None):
 
         Parameters
         ----------
-        X : numpy array or sparse matrix of shape [n_samples,n_features]
+        X : numpy array or sparse matrix, shape (n_samples, n_features)
             Samples.
 
+        sample_weight : array of shape (n_samples,) or None , \
+             (default=None)
+
         Returns
         -------
-        C : array, shape = (n_samples)
+        C : array, shape (n_samples,)
             Returns predicted values times sample_weight.
         """
         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
@@ -1726,9 +1729,21 @@ def predict(self, X, sample_weight=None):
 
         return mu*weights
 
-    def estimate_phi(self, y, X, sample_weight=None):
-        """Estimation of the dispersion parameter.
+    def estimate_phi(self, X, y, sample_weight=None):
+        """Estimation of the dispersion parameter phi.
         Returns the estimate.
+
+        Parameters
+        ----------
+        X : numpy array or sparse matrix of shape (n_samples, n_features)
+            Training data.
+
+        y : numpy array, shape (n_samples,)
+            Target values.
+
+        sample_weight : array of shape (n_samples,) or None,\
+                optinal (default=None)
+            Sample weights.
         """
         check_is_fitted(self, "coef_")
         _dtype = [np.float64, np.float32]
@@ -1773,13 +1788,13 @@ def score(self, X, y, sample_weight=None):
 
         Parameters
         ----------
-        X : array-like, shape = (n_samples, n_features)
+        X : array-like, shape (n_samples, n_features)
             Test samples
 
-        y : array-like of shape = (n_samples)
+        y : array-like, shape (n_samples,)
             True valeus for X.
 
-        sample_weight : array-like, shape = (n_samples), optional
+        sample_weight : array-like, shape = (n_samples,), optional
             Sample weights.
 
         Returns

From c93f60d9e98a5be0d493d513e37c3c9de5167542 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Fri, 31 Aug 2018 08:25:01 +0200
Subject: [PATCH 033/269] Remove parameters docstring for __init__

---
 sklearn/linear_model/glm.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 9688b1e0c9e5c..021927b598822 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -597,12 +597,6 @@ class TweedieDistribution(ExponentialDispersionModel):
             :math:`v(\mu) = \mu^{power}`.
     """
     def __init__(self, power=0):
-        """
-        Parameters
-        ----------
-        power : float (default=0)
-            Variance power of the `unit_variance` function.
-        """
         self.power = power
         self._upper_bound = np.Inf
         self._include_upper_bound = False

From 66ec63b5157026f8541e2761c29ef3225d89a44c Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Fri, 31 Aug 2018 19:24:23 +0200
Subject: [PATCH 034/269] Fix typos in docstring of TweedieDistribution

---
 sklearn/linear_model/glm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 021927b598822..65abb42b043d5 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -577,7 +577,7 @@ def starting_mu(self, y, weights=1):
 class TweedieDistribution(ExponentialDispersionModel):
     r"""A class for the Tweedie distribution.
     They have :math:`\mu=\mathrm{E}[Y]` and
-    :math:`\mathrm{Var}[Y] \propto \mu^power.
+    :math:`\mathrm{Var}[Y] \propto \mu^power`.
 
     Special cases are:
 
@@ -593,7 +593,7 @@ class TweedieDistribution(ExponentialDispersionModel):
     Attributes
     ----------
     power : float
-            The variance power of the unit_variance
+            The variance power of the `unit_variance`
             :math:`v(\mu) = \mu^{power}`.
     """
     def __init__(self, power=0):

From 53c69702c790223d3940cd650b9d52a61e39b244 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Fri, 31 Aug 2018 22:20:43 +0200
Subject: [PATCH 035/269] Change docstring section of TweedieDistribution from
 Attributes to Parameters

---
 sklearn/linear_model/glm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 65abb42b043d5..fcb6c9754b826 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -590,7 +590,7 @@ class TweedieDistribution(ExponentialDispersionModel):
     2     Gamma
     3     Inverse Gaussian
 
-    Attributes
+    Parameters
     ----------
     power : float
             The variance power of the `unit_variance`

From 87d5ba38c5663224edd7ec10bc7efdf27172e7f8 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 7 Oct 2018 19:43:42 +0200
Subject: [PATCH 036/269] Minor doc improvements of GeneralizedLinearRegressor

---
 sklearn/linear_model/glm.py | 87 +++++++++++++++++++------------------
 1 file changed, 45 insertions(+), 42 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index fcb6c9754b826..01d5420773ffc 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -6,17 +6,16 @@
 # some parts and tricks stolen from other sklearn files.
 # License: BSD 3 clause
 
-# TODO: Write more tests
-# TODO: Write examples and more docu
-# TODO: deal with option self.copy_X
+# TODO: Write examples
+# TODO: Make option self.copy_X more meaningfull than just for start values.
 # TODO: Should the option `normalize` be included (like other linear models)?
 #       So far, it is not included. User must pass a normalized X.
-# TODO: Add cross validation support
+# TODO: Add cross validation support?
 # TODO: Should GeneralizedLinearRegressor inherit from LinearModel?
 #       So far, it does not.
 # TODO: Include further classes in class.rst? ExponentialDispersionModel?
 #       TweedieDistribution?
-# TODO: Negative values in P1 are not allowed so far. They could be used to
+# TODO: Negative values in P1 are not allowed so far. They could be used
 #       for group lasso.
 
 # Design Decisions:
@@ -26,7 +25,7 @@
 #   regressor, Bernoulli/Binomial => classifier.
 #   Solution: GeneralizedLinearRegressor since this is the focus.
 # - Allow for finer control of penalty terms:
-#   L1: ||P1*w||_1 with P1*w a componentwise product, this allows to exclude
+#   L1: ||P1*w||_1 with P1*w as element-wise product, this allows to exclude
 #       factors from the L1 penalty.
 #   L2: w*P2*w with P2 a (demi-) positive definite matrix, e.g. P2 could be
 #   a 1st or 2nd order difference matrix (compare B-spline penalties and
@@ -322,7 +321,7 @@ def variance(self, mu, phi=1, weights=1):
         mu : array, shape (n_samples,)
             Predicted mean.
 
-        phi : float
+        phi : float (default=1)
             Dispersion parameter.
 
         weights : array, shape (n_samples,) (default=1)
@@ -592,7 +591,7 @@ class TweedieDistribution(ExponentialDispersionModel):
 
     Parameters
     ----------
-    power : float
+    power : float (default=0)
             The variance power of the `unit_variance`
             :math:`v(\mu) = \mu^{power}`.
     """
@@ -779,22 +778,22 @@ def _irls_step(X, W, P2, z):
 
     Parameters
     ----------
-    X : numpy array or sparse matrix of shape (n_samples, n_features)
+    X : {numpy array, sparse matrix}, shape (n_samples, n_features)
         Training data (with intercept included if present)
 
-    W : numpy array of shape (n_samples,)
+    W : numpy array, shape (n_samples,)
 
-    P2 : numpy array or sparse matrix of shape (n_features, n_features)
-        The l2-penalty matrix or vector (=diagonal matrix)
+    P2 : {numpy array, sparse matrix}, shape (n_features, n_features)
+        The L2-penalty matrix or vector (=diagonal matrix)
 
-    z  : numpy array of shape (n_samples,)
+    z  : numpy array, shape (n_samples,)
         Working observations
 
     Returns
     -------
     coef: array, shape (X.shape[1])
     """
-    # TODO: scipy.linalg.solve is faster, but ordinary least squares uses
+    # TODO: scipy.linalg.solve seems faster, but ordinary least squares uses
     #       scipy.linalg.lstsq. What is more appropriate?
     n_samples, n_features = X.shape
     if sparse.issparse(X):
@@ -892,19 +891,20 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a
         combination of L1 and L2.
 
-    P1 : None or array of shape (n_features*,), optional\
+    P1 : {None, array-like}, shape (n_features*,), optional\
             (default=None)
         With this array, you can exclude coefficients from the L1 penalty.
         Set the corresponding value to 1 (include) or 0 (exclude). The
-        default value ``None`` is the same as an array of ones.
+        default value ``None`` is the same as a 1d array of ones.
         Note that n_features* = X.shape[1] = length of coef_ (intercept
         always excluded from counting).
 
-    P2 : None or array of shape (n_features*, n_features*), optional\
-            (default=None)
+    P2 : {None, array-like, sparse matrix}, shape \
+            (n_features*, n_features*), optional (default=None)
         With this square matrix the L2 penalty is calculated as `w P2 w`.
         This gives a fine control over this penalty (Tikhonov
         regularization).
+        The default value ``None`` is the same as the idendity matrix.
         Note that n_features* = X.shape[1] = length of coef_ (intercept
         always excluded from counting). P2 must be positive semi-definite.
 
@@ -939,8 +939,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
         - 'newton-cg', 'lbfgs'. Cannot deal with L1 penalties.
 
-        - 'cd' is the coordinate descent algorithm. It can deal with L1 and
-            L2 penalties.
+        - 'cd' is the coordinate descent algorithm. It can
+            deal with L1 as well as L2 penalties.
 
     max_iter : int, optional (default=100)
         The maximal number of iterations for solver algorithms.
@@ -958,8 +958,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         does not exit (first call to fit), option ``start_params`` sets the
         starting values for ``coef_`` and ``intercept_``.
 
-    start_params : {None, 'least_squares', 'zero'} or array of shape \
-            (n_features, ) or }, optional (default=None)
+    start_params : {None, 'least_squares', 'zero', array of shape \
+            (n_features, )}, optional (default=None)
         If an array of size n_features is supplied, use these as start values
         for ``coef_`` in the fit. If ``fit_intercept=True``, the first element
         is assumed to be the start value for the ``intercept_``.
@@ -979,7 +979,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         (setting to 'random') often leads to significantly faster convergence
         especially when tol is higher than 1e-4.
 
-    random_state : int, RandomState instance or None, optional (default=None)
+    random_state : {int, RandomState instance, None}, optional (default=None)
         The seed of the pseudo random number generator that selects a random
         feature to be updated for solver 'cd' (coordinate descent).
         If int, random_state is the seed used by the random
@@ -1052,13 +1052,13 @@ def fit(self, X, y, sample_weight=None):
 
         Parameters
         ----------
-        X : numpy array or sparse matrix of shape (n_samples, n_features)
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Training data.
 
-        y : numpy array of shape (n_samples,)
+        y : array-like, shape (n_samples,)
             Target values.
 
-        sample_weight : array of shape (n_samples,) or None,\
+        sample_weight : {None, array-like}, shape (n_samples,),\
                 optinal (default=None)
             Individual weights w_i for each sample. Note that for an
             Exponential Dispersion Model (EDM), one has
@@ -1190,12 +1190,12 @@ def fit(self, X, y, sample_weight=None):
         if self.P1 is None:
             P1 = np.ones(X.shape[1])
         else:
-            P1 = np.atleast_1d(np.copy(self.P1))
+            P1 = np.copy(np.atleast_1d(self.P1))
             if P1.dtype.kind not in ['b', 'i', 'u', 'f']:
                 raise ValueError("P1 must be a numeric value; "
                                  "got (dtype={0}).".format(P1.dtype))
             if (P1.ndim != 1) or (P1.shape[0] != X.shape[1]):
-                raise ValueError("P1 must be either None or an 1D array with "
+                raise ValueError("P1 must be either None or a 1d array with "
                                  "the length of X.shape[1]; "
                                  "got (P1.shape[0]={0}), "
                                  "needed (X.shape[1]={1})."
@@ -1324,6 +1324,7 @@ def fit(self, X, y, sample_weight=None):
                 coef = _irls_step(Xnew, W, P2, z)
             else:
                 # with L1 penalty, start with coef = 0
+                # TODO: Are there better options?
                 coef = np.zeros(n_features)
         elif isinstance(self.start_params, six.string_types):
             if self.start_params == 'zero':
@@ -1353,7 +1354,7 @@ def fit(self, X, y, sample_weight=None):
         # 4. fit                                                              #
         #######################################################################
         # algorithms for optimiation
-        # TODO: Parallelize it
+        # TODO: Parallelize it?
         self.n_iter_ = 0
         converged = False
         # 4.1 IRLS ############################################################
@@ -1682,12 +1683,12 @@ def linear_predictor(self, X):
 
         Parameters
         ----------
-        X : numpy array or sparse matrix, shape (n_samples, n_features)
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Samples.
 
         Returns
         -------
-        C : array, shape (n_samples)
+        C : array, shape (n_samples,)
             Returns predicted values of linear predictor.
         """
         check_is_fitted(self, "coef_")
@@ -1703,17 +1704,18 @@ def predict(self, X, sample_weight=None):
 
         Parameters
         ----------
-        X : numpy array or sparse matrix, shape (n_samples, n_features)
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Samples.
 
-        sample_weight : array of shape (n_samples,) or None , \
-             (default=None)
+        sample_weight : {None, array-like}, shape (n_samples,), optional \
+                (default=None)
 
         Returns
         -------
         C : array, shape (n_samples,)
             Returns predicted values times sample_weight.
         """
+        # TODO: Is copy=True necessary?
         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
                         dtype='numeric', copy=True, ensure_2d=True,
                         allow_nd=False)
@@ -1729,14 +1731,14 @@ def estimate_phi(self, X, y, sample_weight=None):
 
         Parameters
         ----------
-        X : numpy array or sparse matrix of shape (n_samples, n_features)
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Training data.
 
-        y : numpy array, shape (n_samples,)
+        y : array-like, shape (n_samples,)
             Target values.
 
-        sample_weight : array of shape (n_samples,) or None,\
-                optinal (default=None)
+        sample_weight : {None, array-like}, shape (n_samples,), optional \
+                (default=None)
             Sample weights.
         """
         check_is_fitted(self, "coef_")
@@ -1782,13 +1784,14 @@ def score(self, X, y, sample_weight=None):
 
         Parameters
         ----------
-        X : array-like, shape (n_samples, n_features)
-            Test samples
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            Test samples.
 
         y : array-like, shape (n_samples,)
-            True valeus for X.
+            True values of target.
 
-        sample_weight : array-like, shape = (n_samples,), optional
+        sample_weight : {None, array-like}, shape (n_samples,), optional \
+                (default=None)
             Sample weights.
 
         Returns

From a9ae023ec331e782d7a23de18f7e6fbb0dd1f57d Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Mon, 8 Oct 2018 20:21:35 +0200
Subject: [PATCH 037/269] Double escape in doctring of
 GeneralizedLinearRegressor

---
 sklearn/linear_model/glm.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 01d5420773ffc..535af60289f8e 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -866,13 +866,13 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     TODO: Estimation of the dispersion parameter phi.
 
-    If your target `y` is a ratio, you should also provide appropriate weights
-    `w`. As an example, consider Poission distributed counts `z` (integers) and
-    weights `w`=exposure (time, money, persons years, ...), then you fit
+    If the target `y` is a ratio, appropriate weights `w` should be provided.
+    As an example, consider Poission distributed counts `z` (integers) and
+    weights `w`=exposure (time, money, persons years, ...). Then you fit
     `y = z/w`, i.e. ``GeneralizedLinearModel(family='Poisson').fit(X, y,
-    sample_weight=w)``. You need the weights for the right mean, consider:
-    :math:`\bar(y) = \frac{\sum_i w_i y_i}{\sum_i w_i}`.
-    In this case one might say that y has a 'scaled' Poisson distributions.
+    sample_weight=w)``. The weights are necessary for the right mean, consider:
+    :math:`\\bar(y) = \\frac{\\sum_i w_i y_i}{\\sum_i w_i}`.
+    In this case one might say that 'y' has a 'scaled' Poisson distributions.
     The same holds for other distributions.
 
     Parameters
@@ -891,7 +891,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a
         combination of L1 and L2.
 
-    P1 : {None, array-like}, shape (n_features*,), optional\
+    P1 : {None, array-like}, shape (n_features*,), optional \
             (default=None)
         With this array, you can exclude coefficients from the L1 penalty.
         Set the corresponding value to 1 (include) or 0 (exclude). The

From bb62485166412d8ba6393e5ba753b015b806867c Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Mon, 31 Dec 2018 16:54:40 +0100
Subject: [PATCH 038/269] Add example for GeneralizedLinearRegressor

* add example

* improve docstring of GeneralizedLinearRegressor

* improve user guide for GeneralizedLinearRegressor
---
 doc/modules/linear_model.rst                  | 34 +++++---
 .../plot_poisson_spline_regression.py         | 83 +++++++++++++++++++
 sklearn/linear_model/glm.py                   | 22 ++---
 3 files changed, 115 insertions(+), 24 deletions(-)
 create mode 100644 examples/linear_model/plot_poisson_spline_regression.py

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index a204ccb080cc9..d65f7ed121f8e 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -901,12 +901,15 @@ for a more versatile L2 penalty.
 Use cases, where a loss different from the squared loss might be appropriate,
 are the following:
 
-  * If the target values :math:`y` are counts (integer valued) or frequencies, you might try a Poisson deviance.
+  * If the target values :math:`y` are counts (non-negative integer valued) or
+    frequencies (non-negative), you might use a Poisson deviance with log-link.
 
-  * If the target values are positive valued and skewed, you might try a Gamma deviance.
+  * If the target values are positive valued and skewed, you might try a
+    Gamma deviance with log-link.
 
-  * If the target values seem to be heavier tailed than a Gamma distribution, you might try an Inverse Gaussian deviance (or even higher variance powers of the Tweedie family).
-    Keep in mind that the mean is not a good measure for very heavy tailed distributions, cf. extreme value theory.
+  * If the target values seem to be heavier tailed than a Gamma distribution,
+    you might try an Inverse Gaussian deviance (or even higher variance powers
+    of the Tweedie family).
 
 Since the linear predictor :math:`Xw` can be negative and
 Poisson, Gamma and Inverse Gaussian distributions don't support negative values,
@@ -931,6 +934,11 @@ follows:
     >>> reg.intercept_ #doctest: +ELLIPSIS
     -0.76383575...
 
+
+.. topic:: Examples:
+
+  * :ref:`sphx_glr_auto_examples_linear_model_plot_poisson_spline_regression.py`
+
 Mathematical formulation
 ------------------------
 
@@ -956,20 +964,20 @@ e.g. accounting for the dependence structure of :math:`y`.
 The objective function, which is independent of :math:`\phi`, is minimized with
 respect to the coefficients :math:`w`.
 
-The deviance is defined by
+The deviance is defined by the log of the EDM likelihood as
 
 .. math::     D(y, \mu) = -2\phi\cdot
               \left(loglike(y,\mu,\frac{\phi}{s})
               - loglike(y,y,\frac{\phi}{s})\right)
 
-=====================================  =================================
-Distribution                           Variance Function :math:`v(\mu)`
-=====================================  =================================
-Normal ("normal")                      :math:`1`
-Poisson ("poisson")                    :math:`\mu`
-Gamma ("gamma")                        :math:`\mu^2`
-Inverse Gaussian ("inverse.gaussian")  :math:`\mu^3`
-=====================================  =================================
+===================================== ===============================  ================================= ============================================
+Distribution                          Target Domain                    Variance Function :math:`v(\mu)`  Deviance :math:`D(y, \mu)`
+===================================== ===============================  ================================= ============================================
+Normal ("normal")                     :math:`y \in (-\infty, \infty)`  :math:`1`                         :math:`(y-\mu)^2`
+Poisson ("poisson")                   :math:`y \in [0, \infty)`        :math:`\mu`                       :math:`2(y\log\frac{y}{/mu}-y+\mu)`
+Gamma ("gamma")                       :math:`y \in (0, \infty)`        :math:`\mu^2`                     :math:`2(\log\frac{\mu}{y}+\frac{y}{\mu}-1)`
+Inverse Gaussian ("inverse.gaussian") :math:`y \in (0, \infty)`        :math:`\mu^3`                     :math:`\frac{(y-\mu)^2}{y\mu^2}`
+===================================== ===============================  ================================= ============================================
 
 Two remarks:
 
diff --git a/examples/linear_model/plot_poisson_spline_regression.py b/examples/linear_model/plot_poisson_spline_regression.py
new file mode 100644
index 0000000000000..b98bca5d8f867
--- /dev/null
+++ b/examples/linear_model/plot_poisson_spline_regression.py
@@ -0,0 +1,83 @@
+"""
+=================================
+Poisson Regression with B-Splines
+=================================
+
+As in the :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py`
+example, a Poisson regression with penalized B-splines (P-splines) [1]_ is
+fitted on slightly different sinusodial, Poisson distributed data and
+compared to an AdaBoost model with decision trees.
+One can see, that this is a hard problem for both estimators.
+
+.. [1] Eilers, Paul H. C.; Marx, Brian D. "Flexible smoothing with B -splines
+       and penalties". Statist. Sci. 11 (1996), no. 2, 89--121.
+       `doi:10.1214/ss/1038425655
+       <https://projecteuclid.org/euclid.ss/1038425655>`_
+
+"""
+print(__doc__)
+
+# Author: Christian Lorentzen <lorentzen.ch@gmail.com>
+# based on the AdaBoost regression example from Noel Dawe <noel.dawe@gmail.com>
+# License: BSD 3 clause
+
+# importing necessary libraries
+import numpy as np
+from scipy.linalg import toeplitz
+from scipy.interpolate import BSpline
+import matplotlib.pyplot as plt
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.ensemble import AdaBoostRegressor
+from sklearn.linear_model import GeneralizedLinearRegressor
+
+
+# Create the dataset
+xmin, xmax = 0, 6
+rng = np.random.RandomState(1)
+X = np.linspace(xmin, xmax, 500)[:, np.newaxis]
+y_true = 0.5 * (2.1 + np.sin(X).ravel() + np.sin(6 * X).ravel())
+y = rng.poisson(y_true, X.shape[0])
+
+# b-spline basis
+nknots, degree = 40, 3
+ns = nknots - degree - 1  # number of base spline functions
+dx = (xmax - xmin) / (nknots - 1 - 2 * degree)
+knots = np.linspace(xmin - degree * dx, 6 + degree * dx, nknots)
+coef = np.zeros(ns)
+splineBasis = np.empty((X.shape[0], ns), dtype=float)
+for i in range(ns):
+    coef[i] = 1
+    splineBasis[:, i] = BSpline(knots, coef, degree, extrapolate=False)(X) \
+        .ravel()
+    coef[i] = 0
+
+# second order difference matrix
+P2 = toeplitz([2, -1] + [0] * (ns - 2)).astype(float)
+P2[0, 0] = P2[-1, -1] = 1
+
+# Fit regression model
+regr_1 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),
+                           n_estimators=10, random_state=rng)
+
+regr_2 = GeneralizedLinearRegressor(family='poisson', link='log',
+                                    fit_intercept=True, alpha=0.02,
+                                    l1_ratio=0.1, P2=P2)
+
+regr_1.fit(X, y)
+regr_2.fit(splineBasis, y)
+
+# Predict
+y_1 = regr_1.predict(X)
+y_2 = regr_2.predict(splineBasis)
+
+# Plot the results
+plt.figure()
+plt.plot(X, y_true, c="b", label="true mean")
+plt.scatter(X, y, c="k", marker='.', label="training samples")
+plt.plot(X, y_1, c="g", label="AdaBoost n_estimator=10", linewidth=2)
+plt.plot(X, y_2, c="r", label="Poisson GLM with B-splines", linewidth=2)
+plt.xlabel("data")
+plt.ylabel("target")
+plt.title("Regression Comparison")
+plt.legend()
+plt.show()
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 535af60289f8e..37afc8da2d6db 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -6,7 +6,7 @@
 # some parts and tricks stolen from other sklearn files.
 # License: BSD 3 clause
 
-# TODO: Write examples
+# TODO: Write more examples.
 # TODO: Make option self.copy_X more meaningfull than just for start values.
 # TODO: Should the option `normalize` be included (like other linear models)?
 #       So far, it is not included. User must pass a normalized X.
@@ -832,7 +832,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     with inverse link function `h` and s=sum of `sample_weight` (which equals
     n_samples for `sample_weight=None`).
-    For `P1`=`P2`=identity, the penalty is the elastic net::
+    For `P1=P2=identity`, the penalty is the elastic net::
 
             alpha * l1_ratio * ||w||_1
             + 1/2 * alpha * (1 - l1_ratio) * ||w||_2^2
@@ -868,11 +868,11 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     If the target `y` is a ratio, appropriate weights `w` should be provided.
     As an example, consider Poission distributed counts `z` (integers) and
-    weights `w`=exposure (time, money, persons years, ...). Then you fit
-    `y = z/w`, i.e. ``GeneralizedLinearModel(family='Poisson').fit(X, y,
-    sample_weight=w)``. The weights are necessary for the right mean, consider:
-    :math:`\\bar(y) = \\frac{\\sum_i w_i y_i}{\\sum_i w_i}`.
-    In this case one might say that 'y' has a 'scaled' Poisson distributions.
+    weights `w=exposure` (time, money, persons years, ...). Then you fit
+    `y = z/w`, i.e. ``GeneralizedLinearModel(family='poisson').fit(X, y,
+    sample_weight=w)``. The weights are necessary for the right meanself.
+    Consider :math:`\\bar{y} = \\frac{\\sum_i w_i y_i}{\\sum_i w_i}`,
+    in this case one might say that `y` has a 'scaled' Poisson distributions.
     The same holds for other distributions.
 
     Parameters
@@ -1017,10 +1017,10 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     References
     ----------
     For the coordinate descent implementation:
-    .. [1] Guo-Xun Yuan, Chia-Hua Ho, Chih-Jen Lin
-           An Improved GLMNET for L1-regularized Logistic Regression,
-           Journal of Machine Learning Research 13 (2012) 1999-2030
-           https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
+        * Guo-Xun Yuan, Chia-Hua Ho, Chih-Jen Lin
+          An Improved GLMNET for L1-regularized Logistic Regression,
+          Journal of Machine Learning Research 13 (2012) 1999-2030
+          https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
     """
     def __init__(self, alpha=1.0, l1_ratio=0, P1=None, P2=None,
                  fit_intercept=True, family='normal', link='identity',

From 16d064db7cee1d59569d21631cc2fa41be8b3b14 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Tue, 1 Jan 2019 11:58:41 +0100
Subject: [PATCH 039/269] Resolve merge conflicts

* resolve merge conflicts in linear_model.rst

* replace BSpline by splev to support older scipy versions
---
 doc/modules/linear_model.rst                       | 14 +++++++-------
 .../linear_model/plot_poisson_spline_regression.py |  8 +++++---
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index d65f7ed121f8e..09f14735c2907 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -876,18 +876,18 @@ to warm-starting (see :term:`Glossary <warm_start>`).
 
 .. _Generalized_linear_regression:
 
-Generalized linear regression
+Generalized Linear Regression
 =============================
 
 :class:`GeneralizedLinearRegressor` generalizes the :ref:`elastic_net` in two
-ways [8]_. First, the predicted values :math:`\hat{y}` are linked to a linear
+ways [10]_. First, the predicted values :math:`\hat{y}` are linked to a linear
 combination of the input variables :math:`X` via an inverse link function
 :math:`h` as
 
 .. math::    \hat{y}(w, x) = h(xw) = h(w_0 + w_1 x_1 + ... + w_p x_p).
 
 Secondly, the squared loss function is replaced by the deviance :math:`D` of an
-exponential dispersion model (EDM) [9]_. The objective function beeing minimized
+exponential dispersion model (EDM) [11]_. The objective function beeing minimized
 becomes
 
 .. math::    \frac{1}{2s}D(y, \hat{y}) + \alpha \rho ||P_1w||_1
@@ -983,7 +983,7 @@ Two remarks:
 
 * The deviances for at least Normal, Poisson and Gamma distributions are
   strictly consistent scoring functions for the mean :math:`\mu`, see Eq.
-  (19)-(20) in [10]_.
+  (19)-(20) in [12]_.
 
 * If you want to model a frequency, i.e. counts per exposure (time, volume, ...)
   you can do so by a Poisson distribution and passing
@@ -993,12 +993,12 @@ Two remarks:
 
 .. topic:: References:
 
-    .. [8] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models, Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
+    .. [10] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models, Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
 
-    .. [9] Jørgensen, B. (1992). The theory of exponential dispersion models and analysis of deviance. Monografias de matemática, no. 51.
+    .. [11] Jørgensen, B. (1992). The theory of exponential dispersion models and analysis of deviance. Monografias de matemática, no. 51.
            See also `Exponential dispersion model. <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
 
-    .. [10] Gneiting, T. (2010). `Making and Evaluating Point Forecasts. <https://arxiv.org/pdf/0912.0902.pdf>`_
+    .. [12] Gneiting, T. (2010). `Making and Evaluating Point Forecasts. <https://arxiv.org/pdf/0912.0902.pdf>`_
 
 Stochastic Gradient Descent - SGD
 =================================
diff --git a/examples/linear_model/plot_poisson_spline_regression.py b/examples/linear_model/plot_poisson_spline_regression.py
index b98bca5d8f867..fce85fae1ea8c 100644
--- a/examples/linear_model/plot_poisson_spline_regression.py
+++ b/examples/linear_model/plot_poisson_spline_regression.py
@@ -24,7 +24,8 @@
 # importing necessary libraries
 import numpy as np
 from scipy.linalg import toeplitz
-from scipy.interpolate import BSpline
+# from scipy.interpolate import BSpline
+from scipy.interpolate import splev
 import matplotlib.pyplot as plt
 from sklearn.tree import DecisionTreeRegressor
 from sklearn.ensemble import AdaBoostRegressor
@@ -47,8 +48,9 @@
 splineBasis = np.empty((X.shape[0], ns), dtype=float)
 for i in range(ns):
     coef[i] = 1
-    splineBasis[:, i] = BSpline(knots, coef, degree, extrapolate=False)(X) \
-        .ravel()
+#    splineBasis[:, i] = BSpline(knots, coef, degree, extrapolate=False)(X) \
+#        .ravel()
+    splineBasis[:, i] = splev(X, (knots, coef, degree)).ravel()
     coef[i] = 0
 
 # second order difference matrix

From 1a02a901d1a6d99484241ca205178ac61fc47846 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Tue, 1 Jan 2019 12:59:30 +0100
Subject: [PATCH 040/269] Adapt for minimum numpy version

* replace np.block
---
 sklearn/linear_model/glm.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 37afc8da2d6db..851767055c61c 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -1235,8 +1235,11 @@ def fit(self, X, y, sample_weight=None):
                 P2 = sparse.block_diag((sparse.dia_matrix((1, 1)), P2),
                                        dtype=P2.dtype).tocsr()
             else:
-                P2 = np.block([[np.zeros((1, 1)), np.zeros((1, X.shape[1]))],
-                               [np.zeros((X.shape[1], 1)), P2]])
+                # as of numpy 1.13 this would work:
+                # P2 = np.block([[np.zeros((1, 1)), np.zeros((1, X.shape[1]))],
+                #                [np.zeros((X.shape[1], 1)), P2]])
+                P2 = np.hstack((np.zeros((X.shape[1], 1)), P2))
+                P2 = np.vstack((np.zeros((1, X.shape[1]+1)), P2))
         else:
             Xnew = X
 

From 177eb4cc017a7262e472070b4a920250711a099c Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 6 Jan 2019 19:46:19 +0100
Subject: [PATCH 041/269] Remove six dependencies as in #12639

* replace six.with_metaclass(ABCMeta) by metaclass=ABCMeta

* replace six.integer_types by int

* replace six.string_types by str

* rebase

* correct email address
---
 sklearn/linear_model/glm.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 851767055c61c..aca49ec7edf28 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -2,12 +2,13 @@
 Generalized Linear Models with Exponential Dispersion Family
 """
 
-# Author: Christian Lorentzen <lorentzen.ch@googlemail.ch>
+# Author: Christian Lorentzen <lorentzen.ch@gmail.com>
 # some parts and tricks stolen from other sklearn files.
 # License: BSD 3 clause
 
 # TODO: Write more examples.
-# TODO: Make option self.copy_X more meaningfull than just for start values.
+# TODO: Make option self.copy_X more meaningful.
+# So far, fit uses Xnew instead of X.
 # TODO: Should the option `normalize` be included (like other linear models)?
 #       So far, it is not included. User must pass a normalized X.
 # TODO: Add cross validation support?
@@ -51,7 +52,6 @@
 from .ridge import Ridge
 from ..base import BaseEstimator, RegressorMixin
 from ..exceptions import ConvergenceWarning
-from ..externals import six
 from ..utils import check_array, check_X_y
 from ..utils.extmath import safe_sparse_dot
 from ..utils.optimize import newton_cg
@@ -84,7 +84,7 @@ def _check_weights(sample_weight, n_samples):
     return weights
 
 
-class Link(six.with_metaclass(ABCMeta)):
+class Link(metaclass=ABCMeta):
     """Abstract base class for Link funtions
     """
 
@@ -186,7 +186,7 @@ def inverse_derivative2(self, lin_pred):
         return np.exp(lin_pred)
 
 
-class ExponentialDispersionModel(six.with_metaclass(ABCMeta)):
+class ExponentialDispersionModel(metaclass=ABCMeta):
     r"""Base class for reproductive Exponential Dispersion Models (EDM).
 
     The pdf of :math:`Y\sim \mathrm{EDM}(\mu, \phi)` is given by
@@ -1142,7 +1142,7 @@ def fit(self, X, y, sample_weight=None):
                                  "with L1 penalties, which are included with "
                                  "(alpha={1}) and (l1_ratio={2})."
                                  .format(solver, self.alpha, self.l1_ratio))
-        if (not isinstance(self.max_iter, six.integer_types)
+        if (not isinstance(self.max_iter, int)
                 or self.max_iter <= 0):
             raise ValueError("Maximum number of iteration must be a positive "
                              "integer;"
@@ -1156,7 +1156,7 @@ def fit(self, X, y, sample_weight=None):
         start_params = self.start_params
         if start_params is None:
             pass
-        elif isinstance(start_params, six.string_types):
+        elif isinstance(start_params, str):
             if start_params not in ['least_squares', 'zero']:
                 raise ValueError("The argument start_params must be None, "
                                  "'least-squares', 'zero' or an array of right"
@@ -1329,7 +1329,7 @@ def fit(self, X, y, sample_weight=None):
                 # with L1 penalty, start with coef = 0
                 # TODO: Are there better options?
                 coef = np.zeros(n_features)
-        elif isinstance(self.start_params, six.string_types):
+        elif isinstance(self.start_params, str):
             if self.start_params == 'zero':
                 coef = np.zeros(n_features)
             elif self.start_params == 'least_squares':

From 3d4c784df6797c244b31dd3393083d2c63114bf2 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 3 Feb 2019 21:38:24 +0100
Subject: [PATCH 042/269] Improve user guide, doc and fix penalty parameter for
 Ridge

* move parts of docstring to new Notes section

* improve user guide and doc

* fix typos

* fix scaling of penalty parameter in Ridge()

* docstring for _check_weights

* reduce tol for parameter initialization
---
 doc/modules/linear_model.rst |  18 +++---
 sklearn/linear_model/glm.py  | 114 +++++++++++++++++++----------------
 2 files changed, 73 insertions(+), 59 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 09f14735c2907..174d1e4eddae4 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -890,7 +890,7 @@ Secondly, the squared loss function is replaced by the deviance :math:`D` of an
 exponential dispersion model (EDM) [11]_. The objective function beeing minimized
 becomes
 
-.. math::    \frac{1}{2s}D(y, \hat{y}) + \alpha \rho ||P_1w||_1
+.. math::    \frac{1}{2\mathrm{sum}(s)}D(y, \hat{y}; s) + \alpha \rho ||P_1w||_1
             +\frac{\alpha(1-\rho)}{2} w^T P_2 w
 
 with sample weights :math:`s`.
@@ -914,7 +914,7 @@ are the following:
 Since the linear predictor :math:`Xw` can be negative and
 Poisson, Gamma and Inverse Gaussian distributions don't support negative values,
 it is convenient to apply a link function different from the identity link
-:math:`h(x)=x` that guarantees the non-negativeness, e.g. the log-link with
+:math:`h(Xw)=Xw` that guarantees the non-negativeness, e.g. the log-link with
 :math:`h(Xw)=\exp(Xw)`.
 
 Note that the feature matrix `X` should be standardized before fitting. This
@@ -964,17 +964,19 @@ e.g. accounting for the dependence structure of :math:`y`.
 The objective function, which is independent of :math:`\phi`, is minimized with
 respect to the coefficients :math:`w`.
 
-The deviance is defined by the log of the EDM likelihood as
+The deviance is defined by the log of the :math:`\mathrm{EDM}(\mu, \phi)`
+likelihood as
 
-.. math::     D(y, \mu) = -2\phi\cdot
-              \left(loglike(y,\mu,\frac{\phi}{s})
-              - loglike(y,y,\frac{\phi}{s})\right)
+.. math::     d(y, \mu) = -2\phi\cdot
+              \left(loglike(y,\mu,\phi)
+              - loglike(y,y,\phi)\right) \\
+              D(y, \mu; s) = \sum_i s_i \cdot d(y_i, \mu_i)
 
 ===================================== ===============================  ================================= ============================================
-Distribution                          Target Domain                    Variance Function :math:`v(\mu)`  Deviance :math:`D(y, \mu)`
+Distribution                          Target Domain                    Variance Function :math:`v(\mu)`  Unit Deviance :math:`d(y, \mu)`
 ===================================== ===============================  ================================= ============================================
 Normal ("normal")                     :math:`y \in (-\infty, \infty)`  :math:`1`                         :math:`(y-\mu)^2`
-Poisson ("poisson")                   :math:`y \in [0, \infty)`        :math:`\mu`                       :math:`2(y\log\frac{y}{/mu}-y+\mu)`
+Poisson ("poisson")                   :math:`y \in [0, \infty)`        :math:`\mu`                       :math:`2(y\log\frac{y}{\mu}-y+\mu)`
 Gamma ("gamma")                       :math:`y \in (0, \infty)`        :math:`\mu^2`                     :math:`2(\log\frac{\mu}{y}+\frac{y}{\mu}-1)`
 Inverse Gaussian ("inverse.gaussian") :math:`y \in (0, \infty)`        :math:`\mu^3`                     :math:`\frac{(y-\mu)^2}{y\mu^2}`
 ===================================== ===============================  ================================= ============================================
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index aca49ec7edf28..5fc869f81195f 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -8,7 +8,7 @@
 
 # TODO: Write more examples.
 # TODO: Make option self.copy_X more meaningful.
-# So far, fit uses Xnew instead of X.
+#       So far, fit uses Xnew instead of X.
 # TODO: Should the option `normalize` be included (like other linear models)?
 #       So far, it is not included. User must pass a normalized X.
 # TODO: Add cross validation support?
@@ -28,7 +28,7 @@
 # - Allow for finer control of penalty terms:
 #   L1: ||P1*w||_1 with P1*w as element-wise product, this allows to exclude
 #       factors from the L1 penalty.
-#   L2: w*P2*w with P2 a (demi-) positive definite matrix, e.g. P2 could be
+#   L2: w*P2*w with P2 a (semi-) positive definite matrix, e.g. P2 could be
 #   a 1st or 2nd order difference matrix (compare B-spline penalties and
 #   Tikhonov regularization).
 # - The link funtion (instance of class Link) is necessary for the evaluation
@@ -59,6 +59,8 @@
 
 
 def _check_weights(sample_weight, n_samples):
+    """Check that weights are non-negative and have the right shape
+    """
     if sample_weight is None:
         weights = np.ones(n_samples)
     elif np.isscalar(sample_weight):
@@ -594,6 +596,7 @@ class TweedieDistribution(ExponentialDispersionModel):
     power : float (default=0)
             The variance power of the `unit_variance`
             :math:`v(\mu) = \mu^{power}`.
+            For ``0<power<1``, no distribution exists.
     """
     def __init__(self, power=0):
         self.power = power
@@ -826,12 +829,12 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     Minimizes the objective function::
 
-            1/(2s) * deviance(y, h(X*w))
+            1/(2*sum(s)) * deviance(y, h(X*w); s)
             + alpha * l1_ratio * ||P1*w||_1
             + 1/2 * alpha * (1 - l1_ratio) * w*P2*w
 
-    with inverse link function `h` and s=sum of `sample_weight` (which equals
-    n_samples for `sample_weight=None`).
+    with inverse link function `h` and s=`sample_weight` (for
+    `sample_weight=Nones` one has s=1 and sum(s) equals `n_samples`).
     For `P1=P2=identity`, the penalty is the elastic net::
 
             alpha * l1_ratio * ||w||_1
@@ -852,29 +855,6 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     Read more in the :ref:`User Guide <Generalized_linear_regression>`.
 
-    The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments :math:`E[Y_i]=\\mu_i=h(\\eta_i)` and
-    :math:`Var[Y_i]=\\frac{\\phi}{w_i} v(\\mu_i)`.
-
-    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
-    (penalized) maximum likelihood which is equivalent to minimizing the
-    deviance.
-
-    For `alpha` > 0, the feature matrix `X` should be standardized in order to
-    penalize features equally strong. Call
-    :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``.
-
-    TODO: Estimation of the dispersion parameter phi.
-
-    If the target `y` is a ratio, appropriate weights `w` should be provided.
-    As an example, consider Poission distributed counts `z` (integers) and
-    weights `w=exposure` (time, money, persons years, ...). Then you fit
-    `y = z/w`, i.e. ``GeneralizedLinearModel(family='poisson').fit(X, y,
-    sample_weight=w)``. The weights are necessary for the right meanself.
-    Consider :math:`\\bar{y} = \\frac{\\sum_i w_i y_i}{\\sum_i w_i}`,
-    in this case one might say that `y` has a 'scaled' Poisson distributions.
-    The same holds for other distributions.
-
     Parameters
     ----------
     alpha : float, optional (default=1)
@@ -891,22 +871,21 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a
         combination of L1 and L2.
 
-    P1 : {None, array-like}, shape (n_features*,), optional \
+    P1 : {None, array-like}, shape (n_features,), optional \
             (default=None)
         With this array, you can exclude coefficients from the L1 penalty.
         Set the corresponding value to 1 (include) or 0 (exclude). The
         default value ``None`` is the same as a 1d array of ones.
-        Note that n_features* = X.shape[1] = length of coef_ (intercept
-        always excluded from counting).
+        Note that n_features = X.shape[1].
 
     P2 : {None, array-like, sparse matrix}, shape \
-            (n_features*, n_features*), optional (default=None)
+            (n_features, n_features), optional (default=None)
         With this square matrix the L2 penalty is calculated as `w P2 w`.
         This gives a fine control over this penalty (Tikhonov
-        regularization).
-        The default value ``None`` is the same as the idendity matrix.
-        Note that n_features* = X.shape[1] = length of coef_ (intercept
-        always excluded from counting). P2 must be positive semi-definite.
+        regularization). The diagonal zeros of a diagonal P2, for example,
+        exclude all corresponding coefficients from the L2 penalty.
+        The default value ``None`` is the same as the identity matrix.
+        Note that n_features = X.shape[1]. P2 must be positive semi-definite.
 
     fit_intercept : boolean, optional (default=True)
         Specifies if a constant (a.k.a. bias or intercept) should be
@@ -929,18 +908,22 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     solver : {'auto', 'irls', 'newton-cg', 'lbfgs', 'cd'}, \
             optional (default='auto')
-        Algorithm to use in the optimization problem.
+        Algorithm to use in the optimization problem:
 
-        - 'auto' sets 'irls' if l1_ratio equals 0, else 'cd'.
+        'auto'
+            Sets 'irls' if l1_ratio equals 0, else 'cd'.
 
-        - 'irls' is iterated reweighted least squares (Fisher scoring).
+        'irls'
+            iterated reweighted least squares (Fisher scoring).
             It is the standard algorithm for GLMs. Cannot deal with
             L1 penalties.
 
-        - 'newton-cg', 'lbfgs'. Cannot deal with L1 penalties.
+        'newton-cg', 'lbfgs'
+            Cannot deal with L1 penalties.
 
-        - 'cd' is the coordinate descent algorithm. It can
-            deal with L1 as well as L2 penalties.
+        'cd'
+            coordinate descent algorithm. It can deal with L1 as well as L2
+            penalties.
 
     max_iter : int, optional (default=100)
         The maximal number of iterations for solver algorithms.
@@ -959,10 +942,12 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         starting values for ``coef_`` and ``intercept_``.
 
     start_params : {None, 'least_squares', 'zero', array of shape \
-            (n_features, )}, optional (default=None)
-        If an array of size n_features is supplied, use these as start values
+            (n_features*, )}, optional (default=None)
+        If an array of size n_features* is supplied, use it as start values
         for ``coef_`` in the fit. If ``fit_intercept=True``, the first element
         is assumed to be the start value for the ``intercept_``.
+        Note that n_features* = X.shape[1] + fit_intercept includes the
+        intercept in counting.
         If 'least_squares' is set, the result of a least squares fit in the
         link space (linear predictor) is taken.
         If 'zero' is set, all coefficients start with zero.
@@ -1013,6 +998,30 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     n_iter_ : int
         Actual number of iterations of the solver.
 
+    Notes
+    -----
+    The fit itself does not need Y to be from an EDM, but only assumes
+    the first two moments :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
+    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`.
+
+    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
+    (penalized) maximum likelihood which is equivalent to minimizing the
+    deviance.
+
+    For `alpha` > 0, the feature matrix `X` should be standardized in order to
+    penalize features equally strong. Call
+    :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``.
+
+    If the target `y` is a ratio, appropriate sample weights `s` should be
+    provided.
+    As an example, consider Poission distributed counts `z` (integers) and
+    weights `s=exposure` (time, money, persons years, ...). Then you fit
+    `y = z/s`, i.e. ``GeneralizedLinearModel(family='poisson').fit(X, y,
+    sample_weight=s)``. The weights are necessary for the right (finite
+    sample) mean.
+    Consider :math:`\\bar{y} = \\frac{\\sum_i s_i y_i}{\\sum_i s_i}`,
+    in this case one might say that `y` has a 'scaled' Poisson distributions.
+    The same holds for other distributions.
 
     References
     ----------
@@ -1138,10 +1147,10 @@ def fit(self, X, y, sample_weight=None):
             else:
                 solver = 'cd'
         if (self.alpha > 0 and self.l1_ratio > 0 and solver not in ['cd']):
-                raise ValueError("The chosen solver (solver={0}) can't deal "
-                                 "with L1 penalties, which are included with "
-                                 "(alpha={1}) and (l1_ratio={2})."
-                                 .format(solver, self.alpha, self.l1_ratio))
+            raise ValueError("The chosen solver (solver={0}) can't deal "
+                             "with L1 penalties, which are included with "
+                             "(alpha={1}) and (l1_ratio={2})."
+                             .format(solver, self.alpha, self.l1_ratio))
         if (not isinstance(self.max_iter, int)
                 or self.max_iter <= 0):
             raise ValueError("Maximum number of iteration must be a positive "
@@ -1340,14 +1349,17 @@ def fit(self, X, y, sample_weight=None):
                 elif self.l1_ratio <= 0.01:
                     # ElasticNet says l1_ratio <= 0.01 is not reliable
                     # => use Ridge
+                    # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
                     reg = Ridge(copy_X=True, fit_intercept=False,
-                                alpha=self.alpha)
+                                alpha=self.alpha*n_samples,
+                                tol=np.max([self.tol, np.sqrt(self.tol)]))
                     reg.fit(Xnew, link.link(y))
                     coef = reg.coef_
                 else:
                     # TODO: Does this make sense at all?
                     reg = ElasticNet(copy_X=True, fit_intercept=False,
-                                     alpha=self.alpha, l1_ratio=self.l1_ratio)
+                                     alpha=self.alpha, l1_ratio=self.l1_ratio,
+                                     tol=np.max([self.tol, np.sqrt(self.tol)]))
                     reg.fit(Xnew, link.link(y))
                     coef = reg.coef_
         else:
@@ -1557,7 +1569,7 @@ def Hs(s):
                         # minimize_z: a z + 1/2 b z^2 + c |d+z|
                         # a = A_j
                         # b = B_jj > 0
-                        # c = |P1_j| = P1_j > 0, ee 1.3
+                        # c = |P1_j| = P1_j > 0, see 1.3
                         # d = w_j + d_j
                         # cf. https://arxiv.org/abs/0708.1485 Eqs. (3) - (4)
                         # with beta = z+d, beta_hat = d-a/b and gamma = c/b

From 919912c3f98e6d1190737e344c14f31c2eef9077 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 17 Feb 2019 18:38:36 +0100
Subject: [PATCH 043/269] Smarter intercept initialization and docstring
 improvements

* smarter initialization of intercept

* PEP 257 -- Docstring Conventions

* minor docstring changes
---
 sklearn/linear_model/glm.py | 265 +++++++++++++++++++++---------------
 1 file changed, 153 insertions(+), 112 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 5fc869f81195f..d69ccd0a66486 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -59,14 +59,13 @@
 
 
 def _check_weights(sample_weight, n_samples):
-    """Check that weights are non-negative and have the right shape
-    """
+    """Check that weights are non-negative and have the right shape."""
     if sample_weight is None:
         weights = np.ones(n_samples)
     elif np.isscalar(sample_weight):
         if sample_weight <= 0:
             raise ValueError("Sample weights must be non-negative.")
-        weights = sample_weight*np.ones(n_samples)
+        weights = sample_weight * np.ones(n_samples)
     else:
         _dtype = [np.float64, np.float32]
         weights = check_array(sample_weight, accept_sparse='csr',
@@ -75,8 +74,8 @@ def _check_weights(sample_weight, n_samples):
         if weights.ndim > 1:
             raise ValueError("Sample weight must be 1D array or scalar")
         elif weights.shape[0] != n_samples:
-            raise ValueError("Sample weights must have the same length as"
-                             " y")
+            raise ValueError("Sample weights must have the same length as "
+                             "y")
         if not np.all(weights >= 0):
             raise ValueError("Sample weights must be non-negative.")
         elif not np.sum(weights) > 0:
@@ -87,70 +86,72 @@ def _check_weights(sample_weight, n_samples):
 
 
 class Link(metaclass=ABCMeta):
-    """Abstract base class for Link funtions
-    """
+    """Abstract base class for Link funtions."""
 
     @abstractmethod
     def link(self, mu):
-        """The link function g(mu) with argument mu=E[Y] returns the
-        linear predictor.
+        """Compute the link function g(mu).
+
+        The link function links the mean mu=E[Y] to the so called linear
+        predictor (X*w), i.e. g(mu) = linear predictor.
 
         Parameters
         ----------
         mu : array, shape (n_samples,)
-            Usually the predicted mean.
+            Usually the (predicted) mean.
         """
         raise NotImplementedError
 
     @abstractmethod
     def derivative(self, mu):
-        """Derivative of the link g'(mu).
+        """Compute the derivative of the link g'(mu).
 
         Parameters
         ----------
         mu : array, shape (n_samples,)
-            Usually the predicted mean.
+            Usually the (predicted) mean.
         """
         raise NotImplementedError
 
     @abstractmethod
     def inverse(self, lin_pred):
-        """The inverse link function h(lin_pred) with the linear predictor as
-        argument returns mu=E[Y].
+        """Compute the inverse link function h(lin_pred).
+
+        Gives the inverse relationship between linkear predictor and the mean
+        mu=E[Y], i.e. h(linear predictor) = mu.
 
         Parameters
         ----------
         lin_pred : array, shape (n_samples,)
-            Usually the (predicted) linear predictor.
+            Usually the (fitted) linear predictor.
         """
         raise NotImplementedError
 
     @abstractmethod
     def inverse_derivative(self, lin_pred):
-        """Derivative of the inverse link function h'(lin_pred).
+        """Compute the derivative of the inverse link function h'(lin_pred).
 
         Parameters
         ----------
         lin_pred : array, shape (n_samples,)
-            Usually the (predicted) linear predictor.
+            Usually the (fitted) linear predictor.
         """
         raise NotImplementedError
 
     @abstractmethod
     def inverse_derivative2(self, lin_pred):
-        """Second derivative of the inverse link function h''(lin_pred).
+        """Compute 2nd derivative of the inverse link function h''(lin_pred).
 
         Parameters
         ----------
         lin_pred : array, shape (n_samples,)
-            Usually the (predicted) linear predictor.
+            Usually the (fitted) linear predictor.
         """
         raise NotImplementedError
 
 
 class IdentityLink(Link):
-    """The identity link function g(x)=x.
-    """
+    """The identity link function g(x)=x."""
 
     def link(self, mu):
         return mu
@@ -169,8 +170,7 @@ def inverse_derivative2(self, lin_pred):
 
 
 class LogLink(Link):
-    """The log link function g(x)=log(x).
-    """
+    """The log link function g(x)=log(x)."""
 
     def link(self, mu):
         return np.log(mu)
@@ -238,26 +238,22 @@ class ExponentialDispersionModel(metaclass=ABCMeta):
 
     @abstractproperty
     def lower_bound(self):
-        """The lower bound of values of Y~EDM.
-        """
+        """The lower bound of values of Y~EDM."""
         raise NotImplementedError()
 
     @abstractproperty
     def upper_bound(self):
-        """The upper bound of values of Y~EDM.
-        """
+        """The upper bound of values of Y~EDM."""
         raise NotImplementedError()
 
     @abstractproperty
     def include_lower_bound(self):
-        """If True, values of y may equal lower bound: y >= lower_bound.
-        """
+        """If True, values of y may equal lower bound: y >= lower_bound."""
         raise NotImplementedError()
 
     @abstractproperty
     def include_upper_bound(self):
-        """If True, values of y may equal upper bound: y <= upper_bound.
-        """
+        """If True, values of y may equal upper bound: y <= upper_bound."""
         raise NotImplementedError()
 
     def in_y_range(self, x):
@@ -285,7 +281,9 @@ def in_y_range(self, x):
 
     @abstractmethod
     def unit_variance(self, mu):
-        r"""The unit variance :math:`v(\mu)` determines the variance as
+        r"""Compute the unit variance function.
+
+        The unit variance :math:`v(\mu)` determines the variance as
         a function of the mean :math:`\mu` by
         :math:`\mathrm{Var}[Y_i] = \phi/s_i*v(\mu_i)`.
         It can also be derived from the unit deviance :math:`d(y,\mu)` as
@@ -304,7 +302,9 @@ def unit_variance(self, mu):
 
     @abstractmethod
     def unit_variance_derivative(self, mu):
-        r"""The derivative of the unit variance w.r.t. `mu`, :math:`v'(\mu)`.
+        r"""Compute the derivative of the unit variance w.r.t. mu.
+
+        Return :math:`v'(\mu)`.
 
         Parameters
         ----------
@@ -314,7 +314,9 @@ def unit_variance_derivative(self, mu):
         raise NotImplementedError()
 
     def variance(self, mu, phi=1, weights=1):
-        r"""The variance of :math:`Y_i \sim \mathrm{EDM}(\mu_i,\phi/s_i)` is
+        r"""Compute the variance function.
+
+        The variance of :math:`Y_i \sim \mathrm{EDM}(\mu_i,\phi/s_i)` is
         :math:`\mathrm{Var}[Y_i]=\phi/s_i*v(\mu_i)`,
         with unit variance :math:`v(\mu)` and weights :math:`s_i`.
 
@@ -332,7 +334,9 @@ def variance(self, mu, phi=1, weights=1):
         return phi/weights * self.unit_variance(mu)
 
     def variance_derivative(self, mu, phi=1, weights=1):
-        r"""The derivative of the variance w.r.t. `mu`,
+        r"""Compute the derivative of the variance w.r.t. mu.
+
+        Returns
         :math:`\frac{\partial}{\partial\mu}\mathrm{Var}[Y_i]
         =phi/s_i*v'(\mu_i)`, with unit variance :math:`v(\mu)`
         and weights :math:`s_i`.
@@ -352,10 +356,12 @@ def variance_derivative(self, mu, phi=1, weights=1):
 
     @abstractmethod
     def unit_deviance(self, y, mu):
-        r"""The unit_deviance :math:`d(y,\mu)`.
-        In terms of the log-likelihood it is given by
+        r"""Compute the unit deviance.
+
+        The unit_deviance :math:`d(y,\mu)` can be defined by the
+        log-likelihood as
         :math:`d(y,\mu) = -2\phi\cdot
-        \left(loglike(y,\mu,phi) - loglike(y,y,phi)\right).`
+        \left(loglike(y,\mu,\phi) - loglike(y,y,\phi)\right).`
 
         Parameters
         ----------
@@ -368,7 +374,9 @@ def unit_deviance(self, y, mu):
         raise NotImplementedError()
 
     def unit_deviance_derivative(self, y, mu):
-        r"""The derivative w.r.t. `mu` of the unit deviance
+        r"""Compute the derivative of the unit deviance w.r.t. mu.
+
+        The derivative of the unit deviance is given by
         :math:`\frac{\partial}{\partial\mu}d(y,\mu) = -2\frac{y-\mu}{v(\mu)}`
         with unit variance :math:`v(\mu)`.
 
@@ -383,9 +391,12 @@ def unit_deviance_derivative(self, y, mu):
         return -2*(y-mu)/self.unit_variance(mu)
 
     def deviance(self, y, mu, weights=1):
-        r"""The deviance is given by :math:`D = \sum_i s_i \cdot d(y, \mu)
+        r"""Compute the deviance.
+
+        The deviance is a weighted sum of the per sample unit deviances,
+        :math:`D = \sum_i s_i \cdot d(y_i, \mu_i)`
         with weights :math:`s_i` and unit deviance :math:`d(y,\mu)`.
-        In terms of the likelihood it is :math:`D = -2\phi\cdot
+        In terms of the log-likelihood it is :math:`D = -2\phi\cdot
         \left(loglike(y,\mu,\frac{phi}{s})
         - loglike(y,y,\frac{phi}{s})\right)`.
 
@@ -403,15 +414,15 @@ def deviance(self, y, mu, weights=1):
         return np.sum(weights*self.unit_deviance(y, mu))
 
     def _deviance(self, coef, X, y, weights, link):
-        """The deviance as a function of the coefficients `coef`
-        (:math:`w`).
-        """
+        """Compute the deviance as a function of the coefficients and data."""
         lin_pred = safe_sparse_dot(X, coef, dense_output=True)
         mu = link.inverse(lin_pred)
         return self.deviance(y, mu, weights)
 
     def deviance_derivative(self, y, mu, weights=1):
-        """The derivative w.r.t. `mu` of the deviance.
+        """Compute the derivative of the deviance w.r.t. mu.
+
+        It gives :math:`\\frac{\\partial}{\\partial\\mu} D(y, \\mu; weights)`.
 
         Parameters
         ----------
@@ -427,7 +438,9 @@ def deviance_derivative(self, y, mu, weights=1):
         return weights*self.unit_deviance_derivative(y, mu)
 
     def _score(self, coef, phi, X, y, weights, link):
-        r"""The score function is the derivative of the
+        r"""Compute the score function.
+
+        The score function is the derivative of the
         log-likelihood w.r.t. `coef` (:math:`w`).
         It is given by
 
@@ -453,7 +466,8 @@ def _score(self, coef, phi, X, y, weights, link):
         return score
 
     def _fisher_matrix(self, coef, phi, X, y, weights, link):
-        r"""The Fisher information matrix.
+        r"""Compute the Fisher information matrix.
+
         The Fisher information matrix, also known as expected information
         matrix is given by
 
@@ -482,9 +496,10 @@ def _fisher_matrix(self, coef, phi, X, y, weights, link):
         return fisher_matrix
 
     def _observed_information(self, coef, phi, X, y, weights, link):
-        r"""The observed information matrix.
+        r"""Compute the observed information matrix.
+
         The observed information matrix, also known as the negative of
-        the Hessian matrix of the log-likelihood. It is given by
+        the Hessian matrix of the log-likelihood, is given by
 
         .. math:
 
@@ -518,8 +533,10 @@ def _observed_information(self, coef, phi, X, y, weights, link):
         return observed_information
 
     def _deviance_derivative(self, coef, X, y, weights, link):
-        r"""The derivative w.r.t. `coef` (:math:`w`) of the deviance as a
-        function of the coefficients `coef`.
+        r"""Compute the derivative of the deviance w.r.t. coef.
+
+        The derivative of the deviance w.r.t. `coef` (:math:`w`) as a
+        function of the coefficients `coef` and the data.
         This is equivalent to :math:`-2\phi` times the score function
         :func:`_score` (derivative of the log-likelihood).
         """
@@ -528,9 +545,11 @@ def _deviance_derivative(self, coef, X, y, weights, link):
         return -2*score
 
     def _deviance_hessian(self, coef, X, y, weights, link):
-        r"""The hessian matrix w.r.t. `coef` (:math:`w`) of the deviance
-        as a function of the coefficients `coef`.
-        This is equivalent to :math:`+2\phi` times the observed information
+        r"""Compute the hessian matrix of the deviance w.r.t. coef.
+
+        The hessian of the deviance w.r.t. `coef` (:math:`w`) is evaluated as
+        a function of the coefficients `coef` and the data.
+        It is equivalent to :math:`+2\phi` times the observed information
         matrix.
         """
         info_matrix = self._observed_information(coef=coef, phi=1, X=X, y=y,
@@ -538,8 +557,12 @@ def _deviance_hessian(self, coef, X, y, weights, link):
         return 2*info_matrix
 
     def _eta_mu_score_fisher(self, coef, phi, X, y, weights, link):
-        """Calculates eta (linear predictor), mu, score function (derivative
-        of log-likelihood) and Fisher matrix (all with phi=1) all in one go"""
+        """Compute linear predictor, mean, score function and fisher matrix.
+
+        It calculates the linear predictor, the mean, score function
+        (derivative of log-likelihood) and Fisher information matrix
+        all in one go as function of `coef` (:math:`w`) and the data.
+        """
         n_samples, n_features = X.shape
         # eta = linear predictor
         eta = safe_sparse_dot(X, coef, dense_output=True)
@@ -561,7 +584,9 @@ def _eta_mu_score_fisher(self, coef, phi, X, y, weights, link):
         return eta, mu, score, fisher
 
     def starting_mu(self, y, weights=1):
-        """Starting values for the mean mu_i in (unpenalized) IRLS.
+        """Set starting values for the mean mu.
+
+        These may be good starting points for the (unpenalized) IRLS solver.
 
         Parameters
         ----------
@@ -577,7 +602,9 @@ def starting_mu(self, y, weights=1):
 
 class TweedieDistribution(ExponentialDispersionModel):
     r"""A class for the Tweedie distribution.
-    They have :math:`\mu=\mathrm{E}[Y]` and
+
+    A Tweedie distribution with mean :math:`\mu=\mathrm{E}[Y]` is uniquely
+    defined by it's mean-variance relationship
     :math:`\mathrm{Var}[Y] \propto \mu^power`.
 
     Special cases are:
@@ -668,7 +695,7 @@ def include_upper_bound(self):
         return self._include_upper_bound
 
     def unit_variance(self, mu):
-        """The unit variance of a Tweedie distribution is v(mu)=mu**power.
+        """Compute the unit variance of a Tweedie distribution v(mu)=mu**power.
 
         Parameters
         ----------
@@ -678,8 +705,8 @@ def unit_variance(self, mu):
         return np.power(mu, self.power)
 
     def unit_variance_derivative(self, mu):
-        """The derivative of the unit variance of a Tweedie distribution is
-        v(mu)=power*mu**(power-1).
+        """Compute the derivative of the unit variance of a Tweedie
+        distribution v(mu)=power*mu**(power-1).
 
         Parameters
         ----------
@@ -732,9 +759,9 @@ def __init__(self):
 
 
 class GeneralizedHyperbolicSecand(ExponentialDispersionModel):
-    """A class for the von Generalized Hyperbolic Secand (GHS) distribution.
+    """A class for the Generalized Hyperbolic Secand (GHS) distribution.
 
-    The GHS distribution is for data y in (-inf, inf).
+    The GHS distribution is for tagets y in (-inf, inf).
     """
     def __init__(self):
         self._lower_bound = -np.Inf
@@ -770,7 +797,7 @@ def unit_deviance(self, y, mu):
 
 
 def _irls_step(X, W, P2, z):
-    """One step in iteratively reweighted least squares
+    """Compute one step in iteratively reweighted least squares.
 
     Solve A w = b for w with
     A = (X' W X + P2)
@@ -823,18 +850,18 @@ def _irls_step(X, W, P2, z):
 
 
 class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
-    """Regression via a Generalized Linear Model (GLM) based on reproductive
-    Exponential Dispersion Models (EDM) with combined L1 and L2 priors as
-    regularizer.
+    """Regression via a Generalized Linear Model (GLM) with penalties.
 
-    Minimizes the objective function::
+    GLMs based on a reproductive Exponential Dispersion Model (EDM) with
+    combined L1 and L2 priors as regularizer minimizes the following objective
+    function::
 
             1/(2*sum(s)) * deviance(y, h(X*w); s)
             + alpha * l1_ratio * ||P1*w||_1
             + 1/2 * alpha * (1 - l1_ratio) * w*P2*w
 
     with inverse link function `h` and s=`sample_weight` (for
-    `sample_weight=Nones` one has s=1 and sum(s) equals `n_samples`).
+    `sample_weight=None`, one has s=1 and sum(s)=`n_samples`).
     For `P1=P2=identity`, the penalty is the elastic net::
 
             alpha * l1_ratio * ||w||_1
@@ -893,16 +920,16 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     family : {'normal', 'poisson', 'gamma', 'inverse.gaussian'} or an instance\
             of class ExponentialDispersionModel, optional(default='normal')
-        the distributional assumption of the GLM, i.e. which loss function to
-        be minimized.
+        The distributional assumption of the GLM, i.e. which distribution from
+        the EDM, specifies the loss function to be minimized.
 
     link : {'identity', 'log'} or an instance of class Link,
         optional (default='identity')
-        the link function of the GLM, i.e. mapping from linear predictor
+        The link function of the GLM, i.e. mapping from linear predictor
         (X*coef) to expectation (mu).
 
     fit_dispersion : {None, 'chisqr', 'deviance'}, optional (defaul=None)
-        method for estimation of the dispersion parameter phi. Whether to use
+        Method for estimation of the dispersion parameter phi. Whether to use
         the chi squared statisic or the deviance statistic. If None, the
         dispersion is not estimated.
 
@@ -914,15 +941,15 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
             Sets 'irls' if l1_ratio equals 0, else 'cd'.
 
         'irls'
-            iterated reweighted least squares (Fisher scoring).
-            It is the standard algorithm for GLMs. Cannot deal with
+            Iterated reweighted least squares (with Fisher scoring).
+            It is the standard algorithm for GLMs. It cannot deal with
             L1 penalties.
 
         'newton-cg', 'lbfgs'
             Cannot deal with L1 penalties.
 
         'cd'
-            coordinate descent algorithm. It can deal with L1 as well as L2
+            Coordinate descent algorithm. It can deal with L1 as well as L2
             penalties.
 
     max_iter : int, optional (default=100)
@@ -930,15 +957,15 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     tol : float, optional (default=1e-4)
         Stopping criterion. For the irls, newton-cg and lbfgs solvers,
-        the iteration will stop when ``max{|g_i | i = 1, ..., n} <= tol``
+        the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
         where ``g_i`` is the i-th component of the gradient (derivative of
-        the deviance).
+        the objective function).
 
     warm_start : boolean, optional (default=False)
-        If set to ``True``, reuse the solution of the previous call to fit as
-        initialization for ``coef_`` and ``intercept_`` (supersedes option
+        If set to ``True``, reuse the solution of the previous call to ``fit``
+        as initialization for ``coef_`` and ``intercept_`` (supersedes option
         ``start_params``). If set to ``True`` or if the attribute ``coef_``
-        does not exit (first call to fit), option ``start_params`` sets the
+        does not exit (first call to ``fit``), option ``start_params`` sets the
         starting values for ``coef_`` and ``intercept_``.
 
     start_params : {None, 'least_squares', 'zero', array of shape \
@@ -946,8 +973,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         If an array of size n_features* is supplied, use it as start values
         for ``coef_`` in the fit. If ``fit_intercept=True``, the first element
         is assumed to be the start value for the ``intercept_``.
-        Note that n_features* = X.shape[1] + fit_intercept includes the
-        intercept in counting.
+        Note that n_features* = X.shape[1] + fit_intercept, i.e. it includes
+        the intercept in counting.
         If 'least_squares' is set, the result of a least squares fit in the
         link space (linear predictor) is taken.
         If 'zero' is set, all coefficients start with zero.
@@ -960,7 +987,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         For the solver 'cd' (coordinate descent), the coordinates (features)
         can be updated in either cyclic or random order.
         If set to 'random', a random coefficient is updated every iteration
-        rather than looping over features sequentially by default. This
+        rather than looping over features sequentially in the same order. This
         (setting to 'random') often leads to significantly faster convergence
         especially when tol is higher than 1e-4.
 
@@ -1057,7 +1084,7 @@ def __init__(self, alpha=1.0, l1_ratio=0, P1=None, P2=None,
         self.verbose = verbose
 
     def fit(self, X, y, sample_weight=None):
-        """Fit a generalized linear model.
+        """Fit a Generalized Linear Model.
 
         Parameters
         ----------
@@ -1087,12 +1114,14 @@ def fit(self, X, y, sample_weight=None):
         _dtype = [np.float64, np.float32]
         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
                          dtype=_dtype, y_numeric=True, multi_output=False)
+        # Without converting y to float, deviance might raise
+        # ValueError: Integers to negative integer powers are not allowed.
         y = y.astype(np.float64)
 
         weights = _check_weights(sample_weight, y.shape[0])
 
         # 1.2 validate arguments of __init__ ##################################
-        # Garantee that self._family_instance is an instance of class
+        # Guarantee that self._family_instance is an instance of class
         # ExponentialDispersionModel
         if isinstance(self.family, ExponentialDispersionModel):
             self._family_instance = self.family
@@ -1112,7 +1141,8 @@ def fit(self, X, y, sample_weight=None):
                     " ['normal', 'poisson', 'gamma', 'inverse.gaussian'];"
                     " got (family={0})".format(self.family))
 
-        # Garantee that self._link_instance is set to an instance of class Link
+        # Guarantee that self._link_instance is set to an instance of
+        # class Link
         if isinstance(self.link, Link):
             self._link_instance = self.link
         else:
@@ -1127,19 +1157,19 @@ def fit(self, X, y, sample_weight=None):
                     .format(self.link))
 
         if not isinstance(self.alpha, numbers.Number) or self.alpha < 0:
-            raise ValueError("Penalty term must be non-negative;"
+            raise ValueError("Penalty term must be a non-negative number;"
                              " got (alpha={0})".format(self.alpha))
         if (not isinstance(self.l1_ratio, numbers.Number) or
                 self.l1_ratio < 0 or self.l1_ratio > 1):
-            raise ValueError("l1_ratio must be in interval [0, 1]; got"
-                             " (l1_ratio={0})".format(self.l1_ratio))
+            raise ValueError("l1_ratio must be a number in interval [0, 1];"
+                             " got (l1_ratio={0})".format(self.l1_ratio))
         if not isinstance(self.fit_intercept, bool):
             raise ValueError("The argument fit_intercept must be bool;"
                              " got {0}".format(self.fit_intercept))
         if self.solver not in ['auto', 'irls', 'lbfgs', 'newton-cg', 'cd']:
-            raise ValueError("GeneralizedLinearRegressor supports only irls, "
-                             "auto, lbfgs, newton-cg and cd solvers, got {0}"
-                             "".format(self.solver))
+            raise ValueError("GeneralizedLinearRegressor supports only solvers"
+                             " 'auto', 'irls', 'lbfgs', 'newton-cg' and 'cd';"
+                             " got {0}".format(self.solver))
         solver = self.solver
         if self.solver == 'auto':
             if self.l1_ratio == 0:
@@ -1168,8 +1198,8 @@ def fit(self, X, y, sample_weight=None):
         elif isinstance(start_params, str):
             if start_params not in ['least_squares', 'zero']:
                 raise ValueError("The argument start_params must be None, "
-                                 "'least-squares', 'zero' or an array of right"
-                                 " length,"
+                                 "'least-squares', 'zero' or an array of "
+                                 " correct length;"
                                  " got(start_params={0})".format(start_params))
         else:
             start_params = check_array(start_params, accept_sparse='csr',
@@ -1179,21 +1209,21 @@ def fit(self, X, y, sample_weight=None):
                     (start_params.ndim != 1)):
                 raise ValueError("Start values for parameters must have the"
                                  "right length and dimension; required (length"
-                                 "={0}, ndim=1), got (length={1}, ndim={2})."
+                                 "={0}, ndim=1); got (length={1}, ndim={2})."
                                  .format(X.shape[1] + self.fit_intercept,
                                          start_params.shape[0],
                                          start_params.ndim))
 
         if self.selection not in ['cyclic', 'random']:
             raise ValueError("The argument selection must be 'cyclic' or "
-                             "'random', got (selection={0})"
+                             "'random'; got (selection={0})"
                              .format(self.selection))
         random_state = check_random_state(self.random_state)
         if not isinstance(self.copy_X, bool):
             raise ValueError("The argument copy_X must be bool;"
                              " got {0}".format(self.copy_X))
         if not isinstance(self.check_input, bool):
-            raise ValueError("The attribute check_input must be bool; got "
+            raise ValueError("The argument check_input must be bool; got "
                              "(check_input={0})".format(self.check_input))
 
         if self.P1 is None:
@@ -1232,7 +1262,7 @@ def fit(self, X, y, sample_weight=None):
         link = self._link_instance
 
         if self.fit_intercept:
-            # intercept is first column <=> coef[0] is for intecept
+            # Note: intercept is first column <=> coef[0] is for intecept
             if sparse.issparse(X):
                 Xnew = sparse.hstack([np.ones([X.shape[0], 1]), X])
             else:
@@ -1259,10 +1289,11 @@ def fit(self, X, y, sample_weight=None):
         P2 *= l2
         # one only ever needs the symmetrized L2 penalty matrix 1/2 (P2 + P2')
         # reason: w' P2 w = (w' P2 w)', i.e. it is symmetric
-        if sparse.issparse(P2):
-            P2 = 0.5 * (P2 + P2.transpose())
-        else:
-            P2 = 0.5 * (P2 + P2.T)
+        if P2.ndim == 2:
+            if sparse.issparse(P2):
+                P2 = 0.5 * (P2 + P2.transpose())
+            else:
+                P2 = 0.5 * (P2 + P2.T)
 
         # 1.3 additional validations ##########################################
         if self.check_input:
@@ -1301,7 +1332,7 @@ def fit(self, X, y, sample_weight=None):
         # we rescale weights such that sum(weights) = 1 and this becomes
         # 1/2*deviance + L1 + L2 with deviance=sum(weights * unit_deviance)
         weights_sum = np.sum(weights)
-        weights = weights/np.sum(weights)
+        weights = weights/weights_sum
 
         #######################################################################
         # 3. initialization of coef = (intercept_, coef_)                     #
@@ -1338,6 +1369,8 @@ def fit(self, X, y, sample_weight=None):
                 # with L1 penalty, start with coef = 0
                 # TODO: Are there better options?
                 coef = np.zeros(n_features)
+                if self.fit_intercept:
+                    coef[0] = link.link(np.mean(y))
         elif isinstance(self.start_params, str):
             if self.start_params == 'zero':
                 coef = np.zeros(n_features)
@@ -1546,6 +1579,8 @@ def Hs(s):
                 # inner loop
                 # TODO: use sparsity (coefficient already 0 due to L1 penalty)
                 #       => active set of features for featurelist, see paper
+                #          of Improved GLMNET or Gap Safe Screening Rules
+                #          https://arxiv.org/abs/1611.05780
                 # A = f'(w) + d*H(w) + (w+d)*P2
                 # B = H+P2
                 # Note: f'=-score and H=fisher are updated at the end of outer
@@ -1694,7 +1729,7 @@ def Hs(s):
         return self
 
     def linear_predictor(self, X):
-        """The linear_predictor X*coef_ + intercept_.
+        """Compute the linear_predictor = X*coef_ + intercept_.
 
         Parameters
         ----------
@@ -1741,8 +1776,7 @@ def predict(self, X, sample_weight=None):
         return mu*weights
 
     def estimate_phi(self, X, y, sample_weight=None):
-        """Estimation of the dispersion parameter phi.
-        Returns the estimate.
+        """Estimate/fit the dispersion parameter phi.
 
         Parameters
         ----------
@@ -1755,6 +1789,11 @@ def estimate_phi(self, X, y, sample_weight=None):
         sample_weight : {None, array-like}, shape (n_samples,), optional \
                 (default=None)
             Sample weights.
+
+        Returns
+        -------
+        phi : float
+            Dispersion parameter.
         """
         check_is_fitted(self, "coef_")
         _dtype = [np.float64, np.float32]
@@ -1785,15 +1824,17 @@ def estimate_phi(self, X, y, sample_weight=None):
     # "AssertionError: -0.28014056555724598 not greater than 0.5"
     # unless GeneralizedLinearRegressor has a score which passes the test.
     def score(self, X, y, sample_weight=None):
-        r"""Returns D^2, a generalization of the coefficient of determination
-        R^2, which uses deviance instead of squared error.
+        r"""Compute D^2, the percentage of deviance explained.
+
+        D^2 is a generalization of the coefficient of determination R^2.
+        R^2 uses squared error and D^2 deviance. Note that those two are equal
+        for family='normal'.
 
         D^2 is defined as
         :math:`D^2 = 1-\frac{D(y_{true},y_{pred})}{D_{null}}`, :math:`D_{null}`
         is the null deviance, i.e. the deviance of a model with intercept
         alone which corresponds to :math:`y_{pred} = \bar{y}`. The mean
-        :math:`\bar{y}` is averaged by sample_weight. In the case of a Normal
-        distribution, D^2 equals R^2.
+        :math:`\bar{y}` is averaged by sample_weight.
         Best possible score is 1.0 and it can be negative (because the
         model can be arbitrarily worse).
 
@@ -1812,7 +1853,7 @@ def score(self, X, y, sample_weight=None):
         Returns
         -------
         score : float
-            D^2 of self.predict(X) wrt. y.
+            D^2 of self.predict(X) w.r.t. y.
         """
         # Note, default score defined in RegressorMixin is R^2 score.
         # TODO: make D^2 a score function in module metrics (and thereby get

From 01033e36d913756f7ff5e2214189cf1d7426dee1 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Wed, 20 Feb 2019 17:20:09 +0100
Subject: [PATCH 044/269] Fix false formula in starting_mu and improve
 start_params

---
 sklearn/linear_model/glm.py | 108 ++++++++++++++++++++----------------
 1 file changed, 60 insertions(+), 48 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index d69ccd0a66486..fad7492acc2fb 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -583,7 +583,7 @@ def _eta_mu_score_fisher(self, coef, phi, X, y, weights, link):
         fisher = safe_sparse_dot(X.T, temp, dense_output=False)
         return eta, mu, score, fisher
 
-    def starting_mu(self, y, weights=1):
+    def starting_mu(self, y, weights=1, ind_weight=0.5):
         """Set starting values for the mean mu.
 
         These may be good starting points for the (unpenalized) IRLS solver.
@@ -595,9 +595,13 @@ def starting_mu(self, y, weights=1):
 
         weights : array, shape (n_samples,) (default=1)
             Weights or exposure to which variance is inverse proportional.
+
+        ind_weight : float (default=0.5)
+            Must be between 0 and 1. Specifies how much weight is given to the
+            individual observations instead of the mean of y.
         """
-        return ((weights*y+np.mean(weights*y)) /
-                (2.*np.sum(np.ones_like(y)*weights)))
+        return (ind_weight * y +
+                (1. - ind_weight) * np.average(y, weights=weights))
 
 
 class TweedieDistribution(ExponentialDispersionModel):
@@ -852,17 +856,19 @@ def _irls_step(X, W, P2, z):
 class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     """Regression via a Generalized Linear Model (GLM) with penalties.
 
-    GLMs based on a reproductive Exponential Dispersion Model (EDM) with
-    combined L1 and L2 priors as regularizer minimizes the following objective
-    function::
+    GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
+    fitting and predicting the mean `mu=h(X*w)`. Therefore the fit minimizes
+    the following objective function with combined L1 and L2 priors as
+    regularizer::
 
             1/(2*sum(s)) * deviance(y, h(X*w); s)
             + alpha * l1_ratio * ||P1*w||_1
             + 1/2 * alpha * (1 - l1_ratio) * w*P2*w
 
     with inverse link function `h` and s=`sample_weight` (for
-    `sample_weight=None`, one has s=1 and sum(s)=`n_samples`).
-    For `P1=P2=identity`, the penalty is the elastic net::
+    ``sample_weight=None``, one has s=1 and sum(s)=`n_samples`).
+    For `P1=P2=identity` (``P1=None``, ``P2=None``), the penalty is the
+    elastic net::
 
             alpha * l1_ratio * ||w||_1
             + 1/2 * alpha * (1 - l1_ratio) * ||w||_2^2
@@ -966,24 +972,34 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         as initialization for ``coef_`` and ``intercept_`` (supersedes option
         ``start_params``). If set to ``True`` or if the attribute ``coef_``
         does not exit (first call to ``fit``), option ``start_params`` sets the
-        starting values for ``coef_`` and ``intercept_``.
+        start values for ``coef_`` and ``intercept_``.
+
+    start_params : {'irls', 'least_squares', 'zero', array of shape \
+            (n_features*, )}, optional (default='irls')
+        Relevant only if ``warm_start=False`` or if fit is called
+        the first time (``self.coef_`` does not yet exist).
+
+        'irls'
+            Start values of mu are calculated by family.starting_mu(..). Then,
+            one step of irls obtains start values for ``coef_`. This gives
+            usually good results.
 
-    start_params : {None, 'least_squares', 'zero', array of shape \
-            (n_features*, )}, optional (default=None)
-        If an array of size n_features* is supplied, use it as start values
-        for ``coef_`` in the fit. If ``fit_intercept=True``, the first element
+        'least_squares'
+        Start values for ``coef_`` are obtained by a least squares fit in the
+        link space (y is transformed to the space of the linear predictor).
+
+        'zero'
+        All coefficients are set to zero. If ``fit_intercept=True``, the
+        start value for the intercept is obtained by the average of y.
+
+        array
+        The array of size n_features* is directly used as start values
+        for ``coef_``. If ``fit_intercept=True``, the first element
         is assumed to be the start value for the ``intercept_``.
         Note that n_features* = X.shape[1] + fit_intercept, i.e. it includes
         the intercept in counting.
-        If 'least_squares' is set, the result of a least squares fit in the
-        link space (linear predictor) is taken.
-        If 'zero' is set, all coefficients start with zero.
-        If ``None``, the start values are calculated by setting mu to
-        family.starting_mu(..) and one step of irls.
-        These options only apply if ``warm_start=False`` or if fit is called
-        the first time (``self.coef_`` does not yet exist).
 
-    selection : str, optional (default='random')
+    selection : str, optional (default='cyclic')
         For the solver 'cd' (coordinate descent), the coordinates (features)
         can be updated in either cyclic or random order.
         If set to 'random', a random coefficient is updated every iteration
@@ -1005,7 +1021,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     check_input : boolean, optional (default=True)
         Allow to bypass several checks on input: y values in range of family,
-        sample_weights non-negative, P2 positive semi-definite.
+        sample_weight non-negative, P2 positive semi-definite.
         Don't use this parameter unless you know what you do.
 
     verbose : int, optional (default=0)
@@ -1061,8 +1077,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     def __init__(self, alpha=1.0, l1_ratio=0, P1=None, P2=None,
                  fit_intercept=True, family='normal', link='identity',
                  fit_dispersion=None, solver='auto', max_iter=100,
-                 tol=1e-4, warm_start=False, start_params=None,
-                 selection='random', random_state=None, copy_X=True,
+                 tol=1e-4, warm_start=False, start_params='irls',
+                 selection='cyclic', random_state=None, copy_X=True,
                  check_input=True, verbose=0):
         self.alpha = alpha
         self.l1_ratio = l1_ratio
@@ -1193,11 +1209,9 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("The argument warm_start must be bool;"
                              " got {0}".format(self.warm_start))
         start_params = self.start_params
-        if start_params is None:
-            pass
-        elif isinstance(start_params, str):
-            if start_params not in ['least_squares', 'zero']:
-                raise ValueError("The argument start_params must be None, "
+        if isinstance(start_params, str):
+            if start_params not in ['irls', 'least_squares', 'zero']:
+                raise ValueError("The argument start_params must be 'irls', "
                                  "'least-squares', 'zero' or an array of "
                                  " correct length;"
                                  " got(start_params={0})".format(start_params))
@@ -1348,11 +1362,11 @@ def fit(self, X, y, sample_weight=None):
                                        self.coef_))
             else:
                 coef = self.coef_
-        elif self.start_params is None:
-            if self.l1_ratio == 0:
+        elif isinstance(start_params, str):
+            if start_params == 'irls':
                 # See 3.1 IRLS
                 # Use mu_start and apply one irls step to calculate coef
-                mu = family.starting_mu(y, weights)
+                mu = family.starting_mu(y, weights=weights)
                 # linear predictor
                 eta = link.link(mu)
                 # h'(eta)
@@ -1365,16 +1379,9 @@ def fit(self, X, y, sample_weight=None):
                 # solve A*coef = b
                 # A = X' W X + l2 P2, b = X' W z
                 coef = _irls_step(Xnew, W, P2, z)
-            else:
-                # with L1 penalty, start with coef = 0
-                # TODO: Are there better options?
-                coef = np.zeros(n_features)
-                if self.fit_intercept:
-                    coef[0] = link.link(np.mean(y))
-        elif isinstance(self.start_params, str):
-            if self.start_params == 'zero':
-                coef = np.zeros(n_features)
-            elif self.start_params == 'least_squares':
+            elif start_params == 'least_squares':
+                # less restrictive tolerance for finding start values
+                tol = np.max([self.tol, np.sqrt(self.tol)])
                 if self.alpha == 0:
                     reg = LinearRegression(copy_X=True, fit_intercept=False)
                     reg.fit(Xnew, link.link(y))
@@ -1384,18 +1391,21 @@ def fit(self, X, y, sample_weight=None):
                     # => use Ridge
                     # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
                     reg = Ridge(copy_X=True, fit_intercept=False,
-                                alpha=self.alpha*n_samples,
-                                tol=np.max([self.tol, np.sqrt(self.tol)]))
+                                alpha=self.alpha*n_samples, tol=tol)
                     reg.fit(Xnew, link.link(y))
                     coef = reg.coef_
                 else:
                     # TODO: Does this make sense at all?
                     reg = ElasticNet(copy_X=True, fit_intercept=False,
                                      alpha=self.alpha, l1_ratio=self.l1_ratio,
-                                     tol=np.max([self.tol, np.sqrt(self.tol)]))
+                                     tol=tol)
                     reg.fit(Xnew, link.link(y))
                     coef = reg.coef_
-        else:
+            else:  # start_params == 'zero'
+                coef = np.zeros(n_features)
+                if self.fit_intercept:
+                    coef[0] = link.link(np.average(y, weights=weights))
+        else:  # assign given array as start values
             coef = start_params
 
         #######################################################################
@@ -1560,6 +1570,8 @@ def Hs(s):
             # some precalculations
             eta, mu, score, fisher = family._eta_mu_score_fisher(
                 coef=coef, phi=1, X=Xnew, y=y, weights=weights, link=link)
+            # set up space for search direction d for inner loop
+            d = np.zeros_like(coef)
             # initial stopping tolerance of inner loop
             # use L1-norm of minimum-norm of subgradient of F
             # fp_wP2 = f'(w) + w*P2
@@ -1574,8 +1586,8 @@ def Hs(s):
             # outer loop
             while self.n_iter_ < self.max_iter:
                 self.n_iter_ += 1
-                # initialize search direction d (to be optimized)
-                d = np.zeros_like(coef)
+                # initialize search direction d (to be optimized) with zero
+                d.fill(0)
                 # inner loop
                 # TODO: use sparsity (coefficient already 0 due to L1 penalty)
                 #       => active set of features for featurelist, see paper

From 4071a8a54de0112fd1afd6d8fc5b5585708c84ea Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Wed, 20 Feb 2019 21:15:04 +0100
Subject: [PATCH 045/269] Improve argument handling of P1 and P2

* P2 also accepts 1d array and interprets it as diagonal matrix

* improved input checks for P1 and P2
---
 sklearn/linear_model/glm.py            | 103 ++++++++++++++++---------
 sklearn/linear_model/tests/test_glm.py |  31 ++++----
 2 files changed, 81 insertions(+), 53 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index fad7492acc2fb..33e0d75730e3a 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -45,7 +45,10 @@
 from abc import ABCMeta, abstractmethod, abstractproperty
 import numbers
 import numpy as np
-from scipy import linalg, optimize, sparse, special
+from scipy import linalg, sparse
+import scipy.sparse.linalg as splinalg
+from scipy.optimize import fmin_l_bfgs_b
+from scipy.special import xlogy
 import warnings
 from .base import LinearRegression
 from .coordinate_descent import ElasticNet
@@ -727,7 +730,7 @@ def unit_deviance(self, y, mu):
         if p == 1:
             # PoissonDistribution
             # 2 * (y*log(y/mu) - y + mu), with y*log(y/mu)=0 if y=0
-            return 2 * (special.xlogy(y, y/mu) - y + mu)
+            return 2 * (xlogy(y, y/mu) - y + mu)
         elif p == 2:
             # GammaDistribution
             return 2 * (np.log(mu/y)+y/mu-1)
@@ -840,7 +843,7 @@ def _irls_step(X, W, P2, z):
         XtW = X.transpose() * W
         A = XtW * X + L2
         b = XtW * z
-        coef = sparse.linalg.spsolve(A, b)
+        coef = splinalg.spsolve(A, b)
     else:
         XtW = (X.T * W)
         A = XtW.dot(X)
@@ -867,7 +870,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     with inverse link function `h` and s=`sample_weight` (for
     ``sample_weight=None``, one has s=1 and sum(s)=`n_samples`).
-    For `P1=P2=identity` (``P1=None``, ``P2=None``), the penalty is the
+    For ``P1=P2='identity'`` (``P1=None``, ``P2=None``), the penalty is the
     elastic net::
 
             alpha * l1_ratio * ||w||_1
@@ -904,21 +907,24 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a
         combination of L1 and L2.
 
-    P1 : {None, array-like}, shape (n_features,), optional \
-            (default=None)
+    P1 : {'identity', array-like}, shape (n_features,), optional \
+            (default='identity')
         With this array, you can exclude coefficients from the L1 penalty.
         Set the corresponding value to 1 (include) or 0 (exclude). The
-        default value ``None`` is the same as a 1d array of ones.
+        default value ``'identity'`` is the same as a 1d array of ones.
         Note that n_features = X.shape[1].
 
-    P2 : {None, array-like, sparse matrix}, shape \
-            (n_features, n_features), optional (default=None)
-        With this square matrix the L2 penalty is calculated as `w P2 w`.
-        This gives a fine control over this penalty (Tikhonov
-        regularization). The diagonal zeros of a diagonal P2, for example,
-        exclude all corresponding coefficients from the L2 penalty.
-        The default value ``None`` is the same as the identity matrix.
-        Note that n_features = X.shape[1]. P2 must be positive semi-definite.
+    P2 : {'identity', array-like, sparse matrix}, shape \
+            (n_features,) or (n_features, n_features), optional \
+            (default='identity')
+        With this option, you can set the P2 matrix in the L2 penalty `w*P2*w`.
+        This gives a fine control over this penalty (Tikhonov regularization).
+        A 2d array is directly used as the square matrix P2. A 1d array is
+        interpreted as diagonal (square) matrix. The default 'identity' sets
+        the identity matrix, which gives the usual squared L2-norm. If you just
+        want to exclude certain coefficients, pass a 1d array filled with 1,
+        and 0 for the coefficients to be excluded.
+        Note that P2 must be positive semi-definite.
 
     fit_intercept : boolean, optional (default=True)
         Specifies if a constant (a.k.a. bias or intercept) should be
@@ -1074,7 +1080,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
           Journal of Machine Learning Research 13 (2012) 1999-2030
           https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
     """
-    def __init__(self, alpha=1.0, l1_ratio=0, P1=None, P2=None,
+    def __init__(self, alpha=1.0, l1_ratio=0, P1='identity', P2='identity',
                  fit_intercept=True, family='normal', link='identity',
                  fit_dispersion=None, solver='auto', max_iter=100,
                  tol=1e-4, warm_start=False, start_params='irls',
@@ -1240,20 +1246,23 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("The argument check_input must be bool; got "
                              "(check_input={0})".format(self.check_input))
 
-        if self.P1 is None:
+        if isinstance(self.P1, str) and self.P1 == 'identity':
             P1 = np.ones(X.shape[1])
         else:
-            P1 = np.copy(np.atleast_1d(self.P1))
-            if P1.dtype.kind not in ['b', 'i', 'u', 'f']:
-                raise ValueError("P1 must be a numeric value; "
-                                 "got (dtype={0}).".format(P1.dtype))
+            P1 = np.atleast_1d(self.P1)
+            try:
+                P1 = P1.astype(np.float64, casting='safe', copy=True)
+            except TypeError:
+                raise TypeError("The given P1 cannot be converted to a numeric"
+                                "array; got (P1.dtype={0})."
+                                .format(P1.dtype))
             if (P1.ndim != 1) or (P1.shape[0] != X.shape[1]):
-                raise ValueError("P1 must be either None or a 1d array with "
-                                 "the length of X.shape[1]; "
+                raise ValueError("P1 must be either 'identity' or a 1d array "
+                                 "with the length of X.shape[1]; "
                                  "got (P1.shape[0]={0}), "
                                  "needed (X.shape[1]={1})."
                                  .format(P1.shape[0], X.shape[1]))
-        if self.P2 is None:
+        if isinstance(self.P2, str) and self.P2 == 'identity':
             if not sparse.issparse(X):
                 P2 = np.ones(X.shape[1])
             else:
@@ -1262,8 +1271,15 @@ def fit(self, X, y, sample_weight=None):
         else:
             P2 = check_array(self.P2, copy=True,
                              accept_sparse=['csr', 'csc', 'coo'],
-                             dtype="numeric", ensure_2d=True)
-            if ((P2.ndim != 2) or
+                             dtype=_dtype, ensure_2d=False)
+            if P2.ndim == 1:
+                if P2.shape[0] != X.shape[1]:
+                    raise ValueError("P2 should be a 1d array of shape "
+                                     "(n_features,) with "
+                                     "n_features=X.shape[1]; "
+                                     "got (P2.shape=({0},)), needed ({1},)"
+                                     .format(P2.shape[0], X.shape[1]))
+            elif ((P2.ndim != 2) or
                     (P2.shape[0] != P2.shape[1]) or
                     (P2.shape[0] != X.shape[1])):
                 raise ValueError("P2 must be either None or an array of shape "
@@ -1319,21 +1335,32 @@ def fit(self, X, y, sample_weight=None):
                 raise ValueError("Sample weights must be non-negative.")
             # check if P1 has only non-negative values, negative values might
             # indicate group lasso in the future.
-            if self.P1 is not None:
+            if self.P1 != 'identity':
                 if not np.all(P1 >= 0):
                     raise ValueError("P1 must not have negative values.")
             # check if P2 is positive semidefinite
             # np.linalg.cholesky(P2) 'only' asserts positive definite
-            if self.P2 is not None:
-                if sparse.issparse(P2):
-                    # TODO: check sparse P2 for non-negativeness
-                    # raise NotImplementedError("Check sparse P2 for "
-                    #                          "non-negativeness is not yet "
-                    #                          "implemented.")
-                    pass
-                elif P2.ndim == 2:
-                    if not np.all(np.linalg.eigvals(P2) >= -1e-15):
-                        raise ValueError("P2 must be positive definite.")
+            if self.P2 != 'identity':
+                # due to numerical precision, we allow eigenvalues to be a
+                # tiny bit negative
+                epsneg = 10 * np.finfo(P2.dtype).epsneg
+                if P2.ndim == 1 or P2.shape[0] == 1:
+                    if not np.all(P2 >= 0):
+                        raise ValueError("1d array P2 must not have negative "
+                                         "values.")
+                elif sparse.issparse(P2):
+                    # for sparse matrices, not all eigenvals can be computed
+                    # efficiently, use only half of n_features
+                    # k = how many eigenvals to compute
+                    k = np.min([10, n_features // 10 + 1])
+                    sigma = 0  # start searching near this value
+                    which = 'SA'  # find smallest algebraic eigenvalues first
+                    if not np.all(splinalg.eigsh(P2, k=k, sigma=sigma,
+                                                 which=which) >= epsneg):
+                        raise ValueError("P2 must be positive semi-definite.")
+                else:
+                    if not np.all(linalg.eigvalsh(P2) >= epsneg):
+                        raise ValueError("P2 must be positive semi-definite.")
             # TODO: if alpha=0 check that Xnew is not rank deficient
             # TODO: what else to check?
 
@@ -1520,7 +1547,7 @@ def Hs(s):
             args = (Xnew, y, weights, link)
 
             if solver == 'lbfgs':
-                coef, loss, info = optimize.fmin_l_bfgs_b(
+                coef, loss, info = fmin_l_bfgs_b(
                     func, coef, fprime=fprime, args=args,
                     iprint=(self.verbose > 0) - 1, pgtol=self.tol,
                     maxiter=self.max_iter)
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 1ac5ccd4d3d5c..fde1604ad16e3 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -188,25 +188,26 @@ def test_glm_l1_ratio_argument():
         assert_raises(ValueError, glm.fit, X, y)
 
 
-def test_glm_P1_argument():
-    """Test GLM P1 arguments
-    """
+@pytest.mark.parametrize('P1', [['a string', 'a string'], [1, [2]], [1, 2, 3]])
+def test_glm_P1_argument(P1):
+    """Test GLM P1 arguments."""
     y = np.array([1, 2])
     X = np.array([[1], [1]])
-    for P1 in [['a string', 'a string'], [1, [2]], [1, 2, 3]]:
-        glm = GeneralizedLinearRegressor(P1=P1)
-        assert_raises(ValueError, glm.fit, X, y)
+    glm = GeneralizedLinearRegressor(P1=P1)
+    with pytest.raises((ValueError, TypeError)):
+        glm.fit(X, y)
 
 
-# def test_glm_P2_argument():
-#     """Test GLM P2 arguments
-#     """
-#     y = np.array([1, 2])
-#     X = np.array([[1], [1]])
-#     for P2 in [np.full((2, 2), 'a string', dtype=np.dtype('<U8')),
-#                [[1, [2]], [3, 4]], [1, 2, 3], [[1, 2]], [[1], [2]]]:
-#         glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False)
-#         assert_raises(ValueError, glm.fit, X, y)
+@pytest.mark.parametrize('P2', ['a string', [1, 2, 3], [[2, 3]],
+                                sparse.csr_matrix([1, 2, 3]),
+                                sparse.lil_matrix([[1]])])
+def test_glm_P2_argument(P2):
+    """Test GLM P2 arguments."""
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False)
+    with pytest.raises((ValueError, TypeError)):
+        glm.fit(X, y)
 
 
 def test_glm_fit_intercept_argument():

From 757bc3c53facc32ca8eb3ef9b10100c3accb40dd Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Wed, 20 Feb 2019 22:48:14 +0100
Subject: [PATCH 046/269] Fix doctest, test_poisson_enet, change IRLS to use
 lstsq, fix input checks

* adapt examples of GeneralizedLinearModel to new defaults for
  P1, P2 and selection

* fix precision/decimal issue in test_poisson_enet

* use more robust least squares instead of solve in IRLS

* fix sign error in input checks
---
 doc/modules/linear_model.rst           | 16 +++++++++-------
 sklearn/linear_model/glm.py            | 25 ++++++++++++++++---------
 sklearn/linear_model/tests/test_glm.py | 24 ++++++++++++++++++++----
 3 files changed, 45 insertions(+), 20 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 174d1e4eddae4..e60e9e84a4747 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -924,15 +924,17 @@ follows:
     >>> from sklearn.linear_model import GeneralizedLinearRegressor
     >>> reg = GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
     >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2]) # doctest: +NORMALIZE_WHITESPACE
-    GeneralizedLinearRegressor(P1=None, P2=None, alpha=0.5, check_input=True,
-                  copy_X=True, family='poisson', fit_dispersion=None,
-                  fit_intercept=True, l1_ratio=0, link='log', max_iter=100,
-                  random_state=None, selection='random', solver='auto',
-                  start_params=None, tol=0.0001, verbose=0, warm_start=False)
+    GeneralizedLinearRegressor(P1='identity', P2='identity', alpha=0.5,
+                               check_input=True, copy_X=True, family='poisson',
+                               fit_dispersion=None, fit_intercept=True, l1_ratio=0,
+                               link='log', max_iter=100, random_state=None,
+                               selection='cyclic', solver='auto',
+                               start_params='irls', tol=0.0001, verbose=0,
+                               warm_start=False)
     >>> reg.coef_ # doctest: +NORMALIZE_WHITESPACE
-    array([0.24630255, 0.43373521])
+    array([0.24630169, 0.43373464])
     >>> reg.intercept_ #doctest: +ELLIPSIS
-    -0.76383575...
+    -0.76383633...
 
 
 .. topic:: Examples:
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 33e0d75730e3a..bc1a0434fa3b0 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -830,8 +830,9 @@ def _irls_step(X, W, P2, z):
     -------
     coef: array, shape (X.shape[1])
     """
-    # TODO: scipy.linalg.solve seems faster, but ordinary least squares uses
-    #       scipy.linalg.lstsq. What is more appropriate?
+    # Note: solve vs least squares, what is more appropriate?
+    #       scipy.linalg.solve seems faster, but scipy.linalg.lstsq
+    #       is more robust.
     n_samples, n_features = X.shape
     if sparse.issparse(X):
         W = sparse.dia_matrix((W, 0), shape=(n_samples, n_samples)).tocsr()
@@ -843,7 +844,8 @@ def _irls_step(X, W, P2, z):
         XtW = X.transpose() * W
         A = XtW * X + L2
         b = XtW * z
-        coef = splinalg.spsolve(A, b)
+        # coef = splinalg.spsolve(A, b)
+        coef, *_ = splinalg.lsmr(A, b)
     else:
         XtW = (X.T * W)
         A = XtW.dot(X)
@@ -852,7 +854,8 @@ def _irls_step(X, W, P2, z):
         else:
             A += P2
         b = XtW.dot(z)
-        coef = linalg.solve(A, b)
+        # coef = linalg.solve(A, b, overwrite_a=True, overwrite_b=True)
+        coef, *_ = linalg.lstsq(A, b, overwrite_a=True, overwrite_b=True)
     return coef
 
 
@@ -1340,12 +1343,15 @@ def fit(self, X, y, sample_weight=None):
                     raise ValueError("P1 must not have negative values.")
             # check if P2 is positive semidefinite
             # np.linalg.cholesky(P2) 'only' asserts positive definite
-            if self.P2 != 'identity':
+            if not isinstance(self.P2, str):  # self.P2 != 'identity'
                 # due to numerical precision, we allow eigenvalues to be a
                 # tiny bit negative
-                epsneg = 10 * np.finfo(P2.dtype).epsneg
+                epsneg = -10 * np.finfo(P2.dtype).epsneg
                 if P2.ndim == 1 or P2.shape[0] == 1:
-                    if not np.all(P2 >= 0):
+                    p2 = P2
+                    if sparse.issparse(P2):
+                        p2 = P2.toarray()
+                    if not np.all(p2 >= 0):
                         raise ValueError("1d array P2 must not have negative "
                                          "values.")
                 elif sparse.issparse(P2):
@@ -1360,6 +1366,7 @@ def fit(self, X, y, sample_weight=None):
                         raise ValueError("P2 must be positive semi-definite.")
                 else:
                     if not np.all(linalg.eigvalsh(P2) >= epsneg):
+                        return P2
                         raise ValueError("P2 must be positive semi-definite.")
             # TODO: if alpha=0 check that Xnew is not rank deficient
             # TODO: what else to check?
@@ -1689,7 +1696,7 @@ def Hs(s):
                     mn_subgrad = (np.where(coef + d == 0,
                                   np.sign(A)*np.maximum(np.abs(A)-P1, 0),
                                   A+np.sign(coef+d)*P1))
-                    mn_subgrad = np.sum(np.abs(mn_subgrad))
+                    mn_subgrad = linalg.norm(mn_subgrad, ord=1)
                     if mn_subgrad <= inner_tol:
                         if inner_iter == 1:
                             inner_tol = inner_tol/4.
@@ -1740,7 +1747,7 @@ def Hs(s):
                 mn_subgrad = (np.where(coef == 0,
                               np.sign(fp_wP2)*np.maximum(np.abs(fp_wP2)-P1, 0),
                               fp_wP2+np.sign(coef)*P1))
-                mn_subgrad = np.sum(np.abs(mn_subgrad))
+                mn_subgrad = linalg.norm(mn_subgrad, ord=1)
                 if mn_subgrad <= self.tol:
                     converged = True
                     break
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index fde1604ad16e3..8893028d0176a 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -2,7 +2,7 @@
 from numpy.testing import assert_allclose
 import pytest
 import scipy as sp
-from scipy import sparse
+from scipy import sparse, optimize
 
 from sklearn.linear_model.glm import (
     Link,
@@ -199,8 +199,7 @@ def test_glm_P1_argument(P1):
 
 
 @pytest.mark.parametrize('P2', ['a string', [1, 2, 3], [[2, 3]],
-                                sparse.csr_matrix([1, 2, 3]),
-                                sparse.lil_matrix([[1]])])
+                                sparse.csr_matrix([1, 2, 3])])
 def test_glm_P2_argument(P2):
     """Test GLM P2 arguments."""
     y = np.array([1, 2])
@@ -515,12 +514,29 @@ def test_poisson_enet():
     X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
     y = np.array([0, 1, 1, 2])
     glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0.5, family='poisson',
-                                     link='log', solver='cd', tol=1e-7,
+                                     link='log', solver='cd', tol=1e-8,
                                      selection='random', random_state=42)
     glm.fit(X, y)
     assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=7)
     assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=7)
 
+    # test results with general optimization procedure
+    def obj(coef):
+        pd = PoissonDistribution()
+        link = LogLink()
+        N = y.shape[0]
+        mu = link.inverse(X @ coef[1:]+coef[0])
+        alpha, l1_ratio = (1, 0.5)
+        return 1./(2.*N) * pd.deviance(y, mu) \
+            + 0.5 * alpha * (1-l1_ratio) * (coef[1:]**2).sum() \
+            + alpha * l1_ratio * np.sum(np.abs(coef[1:]))
+    res = optimize.minimize(obj, [0, 0, 0], method='nelder-mead', tol=1e-10,
+                            options={'maxiter': 1000, 'disp': False})
+    assert_almost_equal(glm.intercept_, res.x[0], decimal=5)
+    assert_almost_equal(glm.coef_, res.x[1:], decimal=5)
+    assert_almost_equal(obj(np.concatenate(([glm.intercept_], glm.coef_))),
+                        res.fun, decimal=8)
+
     # same for start_params='zero' and selection='cyclic'
     # with reduced precision
     glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0.5, family='poisson',

From ed8e74f97d2b1921af4b8c2907c9e30629788bdc Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 23 Feb 2019 14:13:22 +0100
Subject: [PATCH 047/269] Use pytest decorators and pytest.raises

---
 sklearn/linear_model/glm.py            |   7 +-
 sklearn/linear_model/tests/test_glm.py | 510 ++++++++++++-------------
 2 files changed, 257 insertions(+), 260 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index bc1a0434fa3b0..f583e17433ee3 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -765,8 +765,8 @@ def __init__(self):
         super(InverseGaussianDistribution, self).__init__(power=3)
 
 
-class GeneralizedHyperbolicSecand(ExponentialDispersionModel):
-    """A class for the Generalized Hyperbolic Secand (GHS) distribution.
+class GeneralizedHyperbolicSecant(ExponentialDispersionModel):
+    """A class for the Generalized Hyperbolic Secant (GHS) distribution.
 
     The GHS distribution is for tagets y in (-inf, inf).
     """
@@ -1338,7 +1338,7 @@ def fit(self, X, y, sample_weight=None):
                 raise ValueError("Sample weights must be non-negative.")
             # check if P1 has only non-negative values, negative values might
             # indicate group lasso in the future.
-            if self.P1 != 'identity':
+            if not isinstance(self.P1, str):  # if self.P1 != 'identity':
                 if not np.all(P1 >= 0):
                     raise ValueError("P1 must not have negative values.")
             # check if P2 is positive semidefinite
@@ -1366,7 +1366,6 @@ def fit(self, X, y, sample_weight=None):
                         raise ValueError("P2 must be positive semi-definite.")
                 else:
                     if not np.all(linalg.eigvalsh(P2) >= epsneg):
-                        return P2
                         raise ValueError("P2 must be positive semi-definite.")
             # TODO: if alpha=0 check that Xnew is not rank deficient
             # TODO: what else to check?
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 8893028d0176a..361a237f2cc9f 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -2,7 +2,7 @@
 from numpy.testing import assert_allclose
 import pytest
 import scipy as sp
-from scipy import sparse, optimize
+from scipy import linalg, optimize, sparse
 
 from sklearn.linear_model.glm import (
     Link,
@@ -11,354 +11,355 @@
     TweedieDistribution,
     NormalDistribution, PoissonDistribution,
     GammaDistribution, InverseGaussianDistribution,
-    GeneralizedHyperbolicSecand,
+    GeneralizedHyperbolicSecant,
     GeneralizedLinearRegressor)
 from sklearn.linear_model import ElasticNet, Ridge
 
 from sklearn.utils.testing import (
     assert_equal, assert_almost_equal,
-    assert_array_equal, assert_array_almost_equal,
-    assert_raises)
+    assert_array_equal, assert_array_almost_equal)
 
 
-def test_link_properties():
-    """Test link inverse and derivative
-    """
+@pytest.mark.parametrize('link', Link.__subclasses__())
+def test_link_properties(link):
+    """Test link inverse and derivative."""
     rng = np.random.RandomState(0)
     x = rng.rand(100)*100
-    # from sklearn.linear_model.glm import Link
-    # for link in vars()['Link'].__subclasses__():
-    for link in Link.__subclasses__():
-        link = link()
-        assert_almost_equal(link.link(link.inverse(x)), x, decimal=10)
-        assert_almost_equal(link.inverse_derivative(link.link(x)),
-                            1/link.derivative(x), decimal=10)
-
-
-def test_family_bounds():
-    """Test the valid range of distributions
-    """
-    family = NormalDistribution()
-    result = family.in_y_range([-1, 0, 1])
-    assert_array_equal(result, [True, True, True])
-
-    family = PoissonDistribution()
+    link = link()  # instatiate object
+    assert_almost_equal(link.link(link.inverse(x)), x, decimal=10)
+    assert_almost_equal(link.inverse_derivative(link.link(x)),
+                        1/link.derivative(x), decimal=10)
+
+
+@pytest.mark.parametrize(
+    'family, expected',
+    [(NormalDistribution(), [True, True, True]),
+     (PoissonDistribution(), [False, True, True]),
+     (TweedieDistribution(power=1.5), [False, True, True]),
+     (GammaDistribution(), [False, False, True]),
+     (InverseGaussianDistribution(), [False, False, True]),
+     (TweedieDistribution(power=4.5), [False, False, True])])
+def test_family_bounds(family, expected):
+    """Test the valid range of distributions at -1, 0, 1."""
     result = family.in_y_range([-1, 0, 1])
-    assert_array_equal(result, [False, True, True])
-
-    family = TweedieDistribution(power=1.5)
-    result = family.in_y_range([-1, 0, 1])
-    assert_array_equal(result, [False, True, True])
-
-    family = GammaDistribution()
-    result = family.in_y_range([-1, 0, 1])
-    assert_array_equal(result, [False, False, True])
-
-    family = InverseGaussianDistribution()
-    result = family.in_y_range([-1, 0, 1])
-    assert_array_equal(result, [False, False, True])
-
-    family = TweedieDistribution(power=4.5)
-    result = family.in_y_range([-1, 0, 1])
-    assert_array_equal(result, [False, False, True])
-
-
-def test_deviance_zero():
-    """Test deviance(y,y) = 0 for different families
-    """
-    for family in [NormalDistribution(), PoissonDistribution(),
-                   GammaDistribution(), InverseGaussianDistribution(),
-                   TweedieDistribution(power=-2.5),
-                   TweedieDistribution(power=-1),
-                   TweedieDistribution(power=1.5),
-                   TweedieDistribution(power=2.5),
-                   TweedieDistribution(power=4),
-                   GeneralizedHyperbolicSecand()]:
-        assert_almost_equal(family.deviance(0.1, 0.1), 0, decimal=10)
-        assert_almost_equal(family.deviance(1.5, 1.5), 0, decimal=10)
-
-
-def test_fisher_matrix():
+    assert_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    'family, chk_values',
+    [(NormalDistribution(), [-1.5, -0.1, 0.1, 2.5]),
+     (PoissonDistribution(), [0.1, 1.5]),
+     (GammaDistribution(), [0.1, 1.5]),
+     (InverseGaussianDistribution(), [0.1, 1.5]),
+     (TweedieDistribution(power=-2.5), [0.1, 1.5]),
+     (TweedieDistribution(power=-1), [0.1, 1.5]),
+     (TweedieDistribution(power=1.5), [0.1, 1.5]),
+     (TweedieDistribution(power=2.5), [0.1, 1.5]),
+     (TweedieDistribution(power=-4), [0.1, 1.5]),
+     (GeneralizedHyperbolicSecant(), [0.1, 1.5])])
+def test_deviance_zero(family, chk_values):
+    """Test deviance(y,y) = 0 for different families."""
+    for x in chk_values:
+        assert_almost_equal(family.deviance(x, x), 0, decimal=10)
+
+
+@pytest.mark.parametrize(
+    'family, link',
+    [(NormalDistribution(), IdentityLink()),
+     (PoissonDistribution(), LogLink()),
+     (GammaDistribution(), LogLink()),
+     (InverseGaussianDistribution(), LogLink()),
+     (TweedieDistribution(power=1.5), LogLink()),
+     (TweedieDistribution(power=4.5), LogLink())])
+def test_fisher_matrix(family, link):
     """Test the Fisher matrix numerically.
     Trick: Use numerical differentiation with y = mu"""
-    for family in [NormalDistribution(), PoissonDistribution(),
-                   GammaDistribution(), InverseGaussianDistribution()]:
-        link = LogLink()
-        rng = np.random.RandomState(0)
-        coef = np.array([-2, 1, 0, 1, 2.5])
-        phi = 0.5
-        X = rng.randn(10, 5)
-        lin_pred = np.dot(X, coef)
-        mu = link.inverse(lin_pred)
-        weights = rng.randn(10)**2 + 1
-        fisher = family._fisher_matrix(coef=coef, phi=phi, X=X, y=mu,
-                                       weights=weights, link=link)
-        approx = np.array([]).reshape(0, coef.shape[0])
-        for i in range(coef.shape[0]):
-            def f(coef):
-                return -family._score(coef=coef, phi=phi, X=X, y=mu,
-                                      weights=weights, link=link)[i]
-            approx = np.vstack(
-                [approx, sp.optimize.approx_fprime(xk=coef, f=f, epsilon=1e-5)]
-                )
-        assert_allclose(fisher, approx, rtol=1e-3)
+    rng = np.random.RandomState(0)
+    coef = np.array([-2, 1, 0, 1, 2.5])
+    phi = 0.5
+    X = rng.randn(10, 5)
+    lin_pred = np.dot(X, coef)
+    mu = link.inverse(lin_pred)
+    weights = rng.randn(10)**2 + 1
+    fisher = family._fisher_matrix(coef=coef, phi=phi, X=X, y=mu,
+                                   weights=weights, link=link)
+    approx = np.array([]).reshape(0, coef.shape[0])
+    for i in range(coef.shape[0]):
+        def f(coef):
+            return -family._score(coef=coef, phi=phi, X=X, y=mu,
+                                  weights=weights, link=link)[i]
+        approx = np.vstack(
+            [approx, sp.optimize.approx_fprime(xk=coef, f=f, epsilon=1e-5)])
+    assert_allclose(fisher, approx, rtol=1e-3)
 
 
 def test_sample_weights_validation():
-    """Test the raised errors in the validation of sample_weight"""
+    """Test the raised errors in the validation of sample_weight."""
     # 1. scalar value but not positive
     X = [[1]]
     y = [1]
     weights = 0
     glm = GeneralizedLinearRegressor(fit_intercept=False)
-    assert_raises(ValueError, glm.fit, X, y, weights)
+    with pytest.raises(ValueError):
+        glm.fit(X, y, weights)
 
     # 2. 2d array
     weights = [[0]]
-    assert_raises(ValueError, glm.fit, X, y, weights)
+    with pytest.raises(ValueError):
+        glm.fit(X, y, weights)
 
     # 3. 1d but wrong length
     weights = [1, 0]
-    assert_raises(ValueError, glm.fit, X, y, weights)
+    with pytest.raises(ValueError):
+        glm.fit(X, y, weights)
 
     # 4. 1d but only zeros (sum not greater than 0)
     weights = [0, 0]
     X = [[0], [1]]
     y = [1, 2]
-    assert_raises(ValueError, glm.fit, X, y, weights)
+    with pytest.raises(ValueError):
+        glm.fit(X, y, weights)
 
     # 5. 1d but weith a negative value
     weights = [2, -1]
-    assert_raises(ValueError, glm.fit, X, y, weights)
+    with pytest.raises(ValueError):
+        glm.fit(X, y, weights)
 
 
 def test_glm_family_argument():
-    """Test GLM family argument set as string
-    """
+    """Test GLM family argument set as string."""
     y = np.array([1, 2])
-    X = np.array([[1], [1]])
+    X = np.array([[1], [2]])
     for (f, fam) in [('normal', NormalDistribution()),
                      ('poisson', PoissonDistribution()),
                      ('gamma', GammaDistribution()),
                      ('inverse.gaussian', InverseGaussianDistribution())]:
-        glm = GeneralizedLinearRegressor(family=f, fit_intercept=False,
-                                         alpha=0).fit(X, y)
+        glm = GeneralizedLinearRegressor(family=f, alpha=0).fit(X, y)
         assert_equal(type(glm._family_instance), type(fam))
 
     glm = GeneralizedLinearRegressor(family='not a family',
                                      fit_intercept=False)
-    assert_raises(ValueError, glm.fit, X, y)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
 
 def test_glm_link_argument():
-    """Test GLM link argument set as string
-    """
+    """Test GLM link argument set as string."""
     y = np.array([1, 2])
-    X = np.array([[1], [1]])
+    X = np.array([[1], [2]])
     for (l, link) in [('identity', IdentityLink()),
                       ('log', LogLink())]:
-        glm = GeneralizedLinearRegressor(family='normal', fit_intercept=False,
-                                         link=l).fit(X, y)
+        glm = GeneralizedLinearRegressor(family='normal', link=l).fit(X, y)
         assert_equal(type(glm._link_instance), type(link))
 
-    glm = GeneralizedLinearRegressor(family='normal', fit_intercept=False,
-                                     link='not a link')
-    assert_raises(ValueError, glm.fit, X, y)
+    glm = GeneralizedLinearRegressor(family='normal', link='not a link')
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
 
-def test_glm_alpha_argument():
-    """Test GLM alpha argument
-    """
+@pytest.mark.parametrize('alpha', ['not a number', -4.2])
+def test_glm_alpha_argument(alpha):
+    """Test GLM for invalid alpha argument."""
     y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    for alpha in ['not a number', -4.2]:
-        glm = GeneralizedLinearRegressor(family='normal', fit_intercept=False,
-                                         alpha=alpha)
-        assert_raises(ValueError, glm.fit, X, y)
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(family='normal', alpha=alpha)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
 
-def test_glm_l1_ratio_argument():
-    """Test GLM l1_ratio argument
-    """
+@pytest.mark.parametrize('l1_ratio', ['not a number', -4.2, 1.1, [1]])
+def test_glm_l1_ratio_argument(l1_ratio):
+    """Test GLM for invalid l1_ratio argument."""
     y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    for l1_ratio in ['not a number', -4.2, 1.1, [1]]:
-        glm = GeneralizedLinearRegressor(family='normal', fit_intercept=False,
-                                         l1_ratio=l1_ratio)
-        assert_raises(ValueError, glm.fit, X, y)
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(family='normal', l1_ratio=l1_ratio)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
 
-@pytest.mark.parametrize('P1', [['a string', 'a string'], [1, [2]], [1, 2, 3]])
+@pytest.mark.parametrize('P1', [['a string', 'a string'], [1, [2]], [1, 2, 3],
+                                [-1]])
 def test_glm_P1_argument(P1):
-    """Test GLM P1 arguments."""
+    """Test GLM for invalid P1 argument."""
     y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    glm = GeneralizedLinearRegressor(P1=P1)
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(P1=P1, l1_ratio=0.5, check_input=True)
     with pytest.raises((ValueError, TypeError)):
         glm.fit(X, y)
 
 
 @pytest.mark.parametrize('P2', ['a string', [1, 2, 3], [[2, 3]],
-                                sparse.csr_matrix([1, 2, 3])])
+                                sparse.csr_matrix([1, 2, 3]), [-1]])
 def test_glm_P2_argument(P2):
-    """Test GLM P2 arguments."""
+    """Test GLM for invalid P2 argument."""
     y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False)
-    with pytest.raises((ValueError, TypeError)):
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(P2=P2, check_input=True)
+    with pytest.raises(ValueError):
         glm.fit(X, y)
 
 
-def test_glm_fit_intercept_argument():
-    """Test GLM fit_intercept argument
-    """
-    y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    for fit_intercept in ['not bool', 1, 0, [True]]:
-        glm = GeneralizedLinearRegressor(fit_intercept=fit_intercept)
-        assert_raises(ValueError, glm.fit, X, y)
+def test_glm_P2_positive_semidefinite():
+    """Test GLM for a positive semi-definite P2 argument."""
+    n_samples, n_features = 10, 5
+    rng = np.random.RandomState(42)
+    y = np.arange(n_samples)
+    X = np.zeros((n_samples, n_features))
+    P2 = np.diag([100, 10, 5, 0, -1E-5])
+    # construct random orthogonal matrix Q
+    Q, R = linalg.qr(rng.randn(n_features, n_features))
+    P2 = Q.T @ P2 @ Q
+    glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False,
+                                     check_input=True)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
 
-def test_glm_solver_argument():
-    """Test GLM solver argument
-    """
+@pytest.mark.parametrize('fit_intercept', ['not bool', 1, 0, [True]])
+def test_glm_fit_intercept_argument(fit_intercept):
+    """Test GLM for invalid fit_intercept argument."""
     y = np.array([1, 2])
     X = np.array([[1], [1]])
-    for solver in ['not a solver', 1, [1]]:
-        glm = GeneralizedLinearRegressor(solver=solver)
-        assert_raises(ValueError, glm.fit, X, y)
+    glm = GeneralizedLinearRegressor(fit_intercept=fit_intercept)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
-    # solver not suitable for L1 penalty
-    for solver in ['irls', 'lbfgs', 'newton-cg']:
-        glm = GeneralizedLinearRegressor(solver=solver, alpha=1, l1_ratio=0.1)
-        assert_raises(ValueError, glm.fit, X, y)
 
+@pytest.mark.parametrize('solver, l1_ratio',
+                         [('not a solver', 0), (1, 0), ([1], 0),
+                          ('irls', 0.5), ('lbfgs', 0.5), ('newton-cg', 0.5)])
+def test_glm_solver_argument(solver, l1_ratio):
+    """Test GLM for invalid solver argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(solver=solver, l1_ratio=l1_ratio)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
-def test_glm_max_iter_argument():
-    """Test GLM max_iter argument
-    """
+
+@pytest.mark.parametrize('max_iter', ['not a number', 0, -1, 5.5, [1]])
+def test_glm_max_iter_argument(max_iter):
+    """Test GLM for invalid max_iter argument."""
     y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    for max_iter in ['not a number', 0, -1, 5.5, [1]]:
-        glm = GeneralizedLinearRegressor(max_iter=max_iter)
-        assert_raises(ValueError, glm.fit, X, y)
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(max_iter=max_iter)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
 
-def test_glm_tol_argument():
-    """Test GLM tol argument
-    """
+@pytest.mark.parametrize('tol', ['not a number', 0, -1.0, [1e-3]])
+def test_glm_tol_argument(tol):
+    """Test GLM for invalid tol argument."""
     y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    for tol in ['not a number', 0, -1.0, [1e-3]]:
-        glm = GeneralizedLinearRegressor(tol=tol)
-        assert_raises(ValueError, glm.fit, X, y)
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(tol=tol)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
 
-def test_glm_warm_start_argument():
-    """Test GLM warm_start argument
-    """
+@pytest.mark.parametrize('warm_start', ['not bool', 1, 0, [True]])
+def test_glm_warm_start_argument(warm_start):
+    """Test GLM for invalid warm_start argument."""
     y = np.array([1, 2])
     X = np.array([[1], [1]])
-    for warm_start in ['not bool', 1, 0, [True]]:
-        glm = GeneralizedLinearRegressor(warm_start=warm_start)
-        assert_raises(ValueError, glm.fit, X, y)
+    glm = GeneralizedLinearRegressor(warm_start=warm_start)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
 
-def test_glm_start_params_argument():
-    """Test GLM start_params argument
-    """
+@pytest.mark.parametrize('start_params',
+                         ['not a start_params', ['zero'], [0, 0, 0],
+                          [[0, 0]], ['a', 'b']])
+def test_glm_start_params_argument(start_params):
+    """Test GLM for invalid start_params argument."""
     y = np.array([1, 2])
     X = np.array([[1], [1]])
-    for start_params in ['not a start_params', ['zero'], [0, 0, 0],
-                         [[0, 0]], ['a', 'b']]:
-        glm = GeneralizedLinearRegressor(start_params=start_params)
-        assert_raises(ValueError, glm.fit, X, y)
+    glm = GeneralizedLinearRegressor(start_params=start_params)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
 
-def test_glm_selection_argument():
-    """Test GLM selection argument
-    """
+@pytest.mark.parametrize('selection', ['not a selection', 1, 0, ['cyclic']])
+def test_glm_selection_argument(selection):
+    """Test GLM for invalid selection argument"""
     y = np.array([1, 2])
     X = np.array([[1], [1]])
-    for selection in ['not a selection', 1, 0, ['cyclic']]:
-        glm = GeneralizedLinearRegressor(selection=selection)
-        assert_raises(ValueError, glm.fit, X, y)
+    glm = GeneralizedLinearRegressor(selection=selection)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
 
-def test_glm_random_state_argument():
-    """Test GLM random_state argument
-    """
+@pytest.mark.parametrize('random_state', ['a string', 0.5, [0]])
+def test_glm_random_state_argument(random_state):
+    """Test GLM for invalid random_state argument."""
     y = np.array([1, 2])
     X = np.array([[1], [1]])
-    for random_state in ['a string', 0.5, [0]]:
-        glm = GeneralizedLinearRegressor(random_state=random_state)
-        assert_raises(ValueError, glm.fit, X, y)
+    glm = GeneralizedLinearRegressor(random_state=random_state)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
 
-def test_glm_copy_X_argument():
-    """Test GLM copy_X arguments
-    """
+@pytest.mark.parametrize('copy_X', ['not bool', 1, 0, [True]])
+def test_glm_copy_X_argument(copy_X):
+    """Test GLM for invalid copy_X arguments."""
     y = np.array([1, 2])
     X = np.array([[1], [1]])
-    for copy_X in ['not bool', 1, 0, [True]]:
-        glm = GeneralizedLinearRegressor(copy_X=copy_X)
-        assert_raises(ValueError, glm.fit, X, y)
+    glm = GeneralizedLinearRegressor(copy_X=copy_X)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
 
-def test_glm_check_input_argument():
-    """Test GLM check_input argument
-    """
+@pytest.mark.parametrize('check_input', ['not bool', 1, 0, [True]])
+def test_glm_check_input_argument(check_input):
+    """Test GLM for invalid check_input argument."""
     y = np.array([1, 2])
     X = np.array([[1], [1]])
-    for check_input in ['not bool', 1, 0, [True]]:
-        glm = GeneralizedLinearRegressor(check_input=check_input)
-        assert_raises(ValueError, glm.fit, X, y)
-
+    glm = GeneralizedLinearRegressor(check_input=check_input)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
 
-# TODO: check additional validations if check_input == True
 
-def test_glm_identiy_regression():
-    """Test GLM regression with identity link on a simple dataset
-    """
+@pytest.mark.parametrize(
+    'family',
+    [NormalDistribution(), PoissonDistribution(),
+     GammaDistribution(), InverseGaussianDistribution(),
+     TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
+     GeneralizedHyperbolicSecant()])
+@pytest.mark.parametrize('solver', ['irls', 'lbfgs', 'newton-cg', 'cd'])
+def test_glm_identiy_regression(family, solver):
+    """Test GLM regression with identity link on a simple dataset."""
     coef = [1, 2]
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.dot(X, coef)
-    families = (
-        NormalDistribution(), PoissonDistribution(),
-        GammaDistribution(), InverseGaussianDistribution(),
-        TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
-        GeneralizedHyperbolicSecand())
-    for solver in ['irls', 'lbfgs', 'newton-cg', 'cd']:
-        for family in families:
-            glm = GeneralizedLinearRegressor(
-                alpha=0, family=family, fit_intercept=False, solver=solver)
-            res = glm.fit(X, y)
-            assert_array_almost_equal(res.coef_, coef)
-
-
-def test_glm_log_regression():
-    """Test GLM regression with log link on a simple dataset
-    """
+    glm = GeneralizedLinearRegressor(alpha=0, family=family,
+                                     fit_intercept=False, solver=solver)
+    res = glm.fit(X, y)
+    assert_array_almost_equal(res.coef_, coef)
+
+
+@pytest.mark.parametrize(
+    'family',
+    [NormalDistribution(), PoissonDistribution(),
+     GammaDistribution(), InverseGaussianDistribution(),
+     TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
+     GeneralizedHyperbolicSecant()])
+@pytest.mark.parametrize('solver', ['irls', 'lbfgs', 'newton-cg', 'cd'])
+def test_glm_log_regression(family, solver):
+    """Test GLM regression with log link on a simple dataset."""
     coef = [1, 2]
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.exp(np.dot(X, coef))
-    families = (
-        NormalDistribution(), PoissonDistribution(),
-        GammaDistribution(), InverseGaussianDistribution(),
-        TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
-        GeneralizedHyperbolicSecand())
-    for solver in ['irls', 'lbfgs', 'newton-cg']:
-        for family in families:
-            glm = GeneralizedLinearRegressor(
+    glm = GeneralizedLinearRegressor(
                 alpha=0, family=family, link=LogLink(), fit_intercept=False,
                 solver=solver, start_params='least_squares')
-            res = glm.fit(X, y)
-            assert_array_almost_equal(res.coef_, coef)
+    res = glm.fit(X, y)
+    assert_array_almost_equal(res.coef_, coef)
 
 
 @pytest.mark.filterwarnings('ignore::DeprecationWarning')
-def test_normal_ridge():
-    """Test ridge regression for Normal distributions
+@pytest.mark.parametrize('solver', ['irls', 'lbfgs', 'newton-cg', 'cd'])
+def test_normal_ridge(solver):
+    """Test ridge regression for Normal distributions.
 
     Compare to test_ridge in test_ridge.py.
     """
@@ -375,25 +376,23 @@ def test_normal_ridge():
     ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True, tol=1e-6,
                   solver='svd', normalize=False)
     ridge.fit(X, y)
-    for solver in ['irls', 'lbfgs', 'newton-cg', 'cd']:
-        glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0,
-                                         family='normal', link='identity',
-                                         fit_intercept=True, tol=1e-6,
-                                         max_iter=100, solver=solver,
-                                         random_state=42)
-        glm.fit(X, y)
-        assert_equal(glm.coef_.shape, (X.shape[1], ))
-        assert_array_almost_equal(glm.coef_, ridge.coef_)
-        assert_almost_equal(glm.intercept_, ridge.intercept_)
-        assert_array_almost_equal(glm.predict(T), ridge.predict(T))
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
+                                     link='identity', fit_intercept=True,
+                                     tol=1e-6, max_iter=100, solver=solver,
+                                     random_state=42)
+    glm.fit(X, y)
+    assert_equal(glm.coef_.shape, (X.shape[1], ))
+    assert_array_almost_equal(glm.coef_, ridge.coef_)
+    assert_almost_equal(glm.intercept_, ridge.intercept_)
+    assert_array_almost_equal(glm.predict(T), ridge.predict(T))
 
     ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-6,
                   solver='svd', normalize=False)
     ridge.fit(X, y)
-    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-6,
-                                     family='normal', link='identity',
-                                     fit_intercept=False, solver='irls',
-                                     fit_dispersion='chisqr')
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
+                                     link='identity', fit_intercept=False,
+                                     tol=1e-6, max_iter=100, solver=solver,
+                                     random_state=42, fit_dispersion='chisqr')
     glm.fit(X, y)
     assert_equal(glm.coef_.shape, (X.shape[1], ))
     assert_array_almost_equal(glm.coef_, ridge.coef_)
@@ -413,23 +412,22 @@ def test_normal_ridge():
     ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True, tol=1e-9,
                   solver='sag', normalize=False, max_iter=100000)
     ridge.fit(X, y)
-    for solver in ['irls', 'lbfgs', 'newton-cg', 'cd']:
-        glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-8,
-                                         family='normal', link='identity',
-                                         fit_intercept=True, solver=solver,
-                                         max_iter=300, random_state=42)
-        glm.fit(X, y)
-        assert_equal(glm.coef_.shape, (X.shape[1], ))
-        assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=5)
-        assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=5)
-        assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=5)
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-8,
+                                     family='normal', link='identity',
+                                     fit_intercept=True, solver=solver,
+                                     max_iter=300, random_state=42)
+    glm.fit(X, y)
+    assert_equal(glm.coef_.shape, (X.shape[1], ))
+    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=5)
+    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=5)
+    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=5)
 
     ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-7,
                   solver='sag', normalize=False, max_iter=1000)
     ridge.fit(X, y)
     glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-7,
                                      family='normal', link='identity',
-                                     fit_intercept=False, solver='irls')
+                                     fit_intercept=False, solver=solver)
     glm.fit(X, y)
     assert_equal(glm.coef_.shape, (X.shape[1], ))
     assert_array_almost_equal(glm.coef_, ridge.coef_)
@@ -438,7 +436,7 @@ def test_normal_ridge():
 
 
 def test_poisson_ridge():
-    """Test ridge regression with poisson family and LogLink
+    """Test ridge regression with poisson family and LogLink.
 
     Compare to R's glmnet"""
     # library("glmnet")
@@ -470,7 +468,7 @@ def test_poisson_ridge():
 
 
 def test_normal_enet():
-    """Tet elastic net regression with normal/gaussian family"""
+    """Test elastic net regression with normal/gaussian family."""
     rng = np.random.RandomState(0)
     alpha, l1_ratio = 0.3, 0.7
     n_samples, n_features = 20, 2
@@ -495,7 +493,7 @@ def test_normal_enet():
 
 
 def test_poisson_enet():
-    """Test elastic net regression with poisson family and LogLink
+    """Test elastic net regression with poisson family and LogLink.
 
     Compare to R's glmnet"""
     # library("glmnet")

From fe876da908a7d5aefe8fa9ac56f4c5130ccf83df Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 24 Feb 2019 12:45:55 +0100
Subject: [PATCH 048/269] Add Logistic regression=Binomial + Logit

    * add Binomial distribution

    * add Logit link

    * tests for binomial against LogisticRegression

    * option 'auto' for link

    * reduce code duplication by replacing @abstractproperty by @property
---
 sklearn/linear_model/glm.py            | 160 +++++++++++++++----------
 sklearn/linear_model/tests/test_glm.py |  57 +++++++--
 2 files changed, 149 insertions(+), 68 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index f583e17433ee3..01e40b322946c 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -42,13 +42,12 @@
 
 
 from __future__ import division
-from abc import ABCMeta, abstractmethod, abstractproperty
+from abc import ABCMeta, abstractmethod
 import numbers
 import numpy as np
-from scipy import linalg, sparse
+from scipy import linalg, sparse, special
 import scipy.sparse.linalg as splinalg
 from scipy.optimize import fmin_l_bfgs_b
-from scipy.special import xlogy
 import warnings
 from .base import LinearRegression
 from .coordinate_descent import ElasticNet
@@ -191,6 +190,28 @@ def inverse_derivative2(self, lin_pred):
         return np.exp(lin_pred)
 
 
+class LogitLink(Link):
+    """The logit link function g(x)=logit(x)."""
+
+    def link(self, mu):
+        return special.logit(mu)
+
+    def derivative(self, mu):
+        return 1. / (mu * (1 - mu))
+
+    def inverse(self, lin_pred):
+        return special.expit(lin_pred)
+
+    def inverse_derivative(self, lin_pred):
+        ep = special.expit(lin_pred)
+        return ep * (1. - ep)
+
+    def inverse_derivative2(self, lin_pred):
+        ep = special.expit(lin_pred)
+        ep = special.expit(lin_pred)
+        return ep * (1. - ep) * (1. - 2 * ep)
+
+
 class ExponentialDispersionModel(metaclass=ABCMeta):
     r"""Base class for reproductive Exponential Dispersion Models (EDM).
 
@@ -238,26 +259,25 @@ class ExponentialDispersionModel(metaclass=ABCMeta):
 
     https://en.wikipedia.org/wiki/Exponential_dispersion_model.
     """
-
-    @abstractproperty
+    @property
     def lower_bound(self):
-        """The lower bound of values of Y~EDM."""
-        raise NotImplementedError()
+        """Get the lower bound of values for Y~EDM."""
+        return self._lower_bound
 
-    @abstractproperty
+    @property
     def upper_bound(self):
-        """The upper bound of values of Y~EDM."""
-        raise NotImplementedError()
+        """Get the upper bound of values for Y~EDM."""
+        return self._upper_bound
 
-    @abstractproperty
+    @property
     def include_lower_bound(self):
-        """If True, values of y may equal lower bound: y >= lower_bound."""
-        raise NotImplementedError()
+        """Get True if lower bound for y is included: y >= lower_bound."""
+        return self._include_lower_bound
 
-    @abstractproperty
+    @property
     def include_upper_bound(self):
-        """If True, values of y may equal upper bound: y <= upper_bound."""
-        raise NotImplementedError()
+        """Get True if upper bound for y is includede: y <= upper_bound."""
+        return self._include_upper_bound
 
     def in_y_range(self, x):
         """Returns true if `x` is in the valid range of Y~EDM.
@@ -685,22 +705,6 @@ def power(self, power):
                             .format(power))
         self._power = power
 
-    @property
-    def lower_bound(self):
-        return self._lower_bound
-
-    @property
-    def upper_bound(self):
-        return self._upper_bound
-
-    @property
-    def include_lower_bound(self):
-        return self._include_lower_bound
-
-    @property
-    def include_upper_bound(self):
-        return self._include_upper_bound
-
     def unit_variance(self, mu):
         """Compute the unit variance of a Tweedie distribution v(mu)=mu**power.
 
@@ -730,7 +734,7 @@ def unit_deviance(self, y, mu):
         if p == 1:
             # PoissonDistribution
             # 2 * (y*log(y/mu) - y + mu), with y*log(y/mu)=0 if y=0
-            return 2 * (xlogy(y, y/mu) - y + mu)
+            return 2 * (special.xlogy(y, y/mu) - y + mu)
         elif p == 2:
             # GammaDistribution
             return 2 * (np.log(mu/y)+y/mu-1)
@@ -776,22 +780,6 @@ def __init__(self):
         self._include_lower_bound = False
         self._include_upper_bound = False
 
-    @property
-    def lower_bound(self):
-        return self._lower_bound
-
-    @property
-    def upper_bound(self):
-        return self._upper_bound
-
-    @property
-    def include_lower_bound(self):
-        return self._include_lower_bound
-
-    @property
-    def include_upper_bound(self):
-        return self._include_upper_bound
-
     def unit_variance(self, mu):
         return 1 + mu**2
 
@@ -803,6 +791,27 @@ def unit_deviance(self, y, mu):
                 np.log((1+mu**2)/(1+y**2)))
 
 
+class BinomialDistribution(ExponentialDispersionModel):
+    """A class for the Binomial distribution.
+
+    The Binomial distribution is for tagets y in [0, 1].
+    """
+    def __init__(self):
+        self._lower_bound = 0
+        self._upper_bound = 1
+        self._include_lower_bound = True
+        self._include_upper_bound = True
+
+    def unit_variance(self, mu):
+        return mu * (1 - mu)
+
+    def unit_variance_derivative(self, mu):
+        return 1 - 2 * mu
+
+    def unit_deviance(self, y, mu):
+        return 2*(special.xlogy(y, y/mu) + special.xlogy(1-y, (1-y)/(1-mu)))
+
+
 def _irls_step(X, W, P2, z):
     """Compute one step in iteratively reweighted least squares.
 
@@ -933,15 +942,23 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
-    family : {'normal', 'poisson', 'gamma', 'inverse.gaussian'} or an instance\
-            of class ExponentialDispersionModel, optional(default='normal')
+    family : {'normal', 'poisson', 'gamma', 'inverse.gaussian', 'binomial'} \
+            or an instance of class ExponentialDispersionModel, \
+            optional(default='normal')
         The distributional assumption of the GLM, i.e. which distribution from
         the EDM, specifies the loss function to be minimized.
 
-    link : {'identity', 'log'} or an instance of class Link,
-        optional (default='identity')
+    link : {'auto', 'identity', 'log', 'logit'} or an instance of class Link,
+        optional (default='auto')
         The link function of the GLM, i.e. mapping from linear predictor
-        (X*coef) to expectation (mu).
+        (X*coef) to expectation (mu). Option 'auto' sets the link depending on
+        the chosen family as follows:
+
+        - 'identity' for family 'normal'
+
+        - 'log' for families 'poisson', 'gamma', 'inverse.gaussian'
+
+        - 'logit' for family 'binomial'
 
     fit_dispersion : {None, 'chisqr', 'deviance'}, optional (defaul=None)
         Method for estimation of the dispersion parameter phi. Whether to use
@@ -1084,7 +1101,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
           https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
     """
     def __init__(self, alpha=1.0, l1_ratio=0, P1='identity', P2='identity',
-                 fit_intercept=True, family='normal', link='identity',
+                 fit_intercept=True, family='normal', link='auto',
                  fit_dispersion=None, solver='auto', max_iter=100,
                  tol=1e-4, warm_start=False, start_params='irls',
                  selection='cyclic', random_state=None, copy_X=True,
@@ -1159,27 +1176,48 @@ def fit(self, X, y, sample_weight=None):
                 self._family_instance = GammaDistribution()
             elif self.family == 'inverse.gaussian':
                 self._family_instance = InverseGaussianDistribution()
+            elif self.family == 'binomial':
+                self._family_instance = BinomialDistribution()
             else:
                 raise ValueError(
                     "The family must be an instance of class"
                     " ExponentialDispersionModel or an element of"
-                    " ['normal', 'poisson', 'gamma', 'inverse.gaussian'];"
-                    " got (family={0})".format(self.family))
+                    " ['normal', 'poisson', 'gamma', 'inverse.gaussian', "
+                    "'binomial']; got (family={0})".format(self.family))
 
         # Guarantee that self._link_instance is set to an instance of
         # class Link
         if isinstance(self.link, Link):
             self._link_instance = self.link
         else:
-            if self.link == 'identity':
+            if self.link == 'auto':
+                if isinstance(self._family_instance, TweedieDistribution):
+                    if self._family_instance.power <= 0:
+                        self._link_instance = IdentityLink()
+                    if self._family_instance.power >= 1:
+                        self._link_instance = LogLink()
+                elif isinstance(self._family_instance,
+                                GeneralizedHyperbolicSecant):
+                    self._link_instance = IdentityLink()
+                elif isinstance(self._family_instance, BinomialDistribution):
+                    self._link_instance = LogitLink()
+                else:
+                    raise ValueError("No default link known for the "
+                                     "specified distribution family. Please "
+                                     "set link manually, i.e. not to 'auto'; "
+                                     "got (link='auto', family={}"
+                                     .format(self.family))
+            elif self.link == 'identity':
                 self._link_instance = IdentityLink()
             elif self.link == 'log':
                 self._link_instance = LogLink()
+            elif self.link == 'logit':
+                self._link_instance = LogitLink()
             else:
                 raise ValueError(
-                    "The link must be an instance of class Link or"
-                    " an element of ['identity', 'log']; got (link={0})"
-                    .format(self.link))
+                    "The link must be an instance of class Link or "
+                    "an element of ['auto', 'identity', 'log', 'logit']; "
+                    "got (link={0})".format(self.link))
 
         if not isinstance(self.alpha, numbers.Number) or self.alpha < 0:
             raise ValueError("Penalty term must be a non-negative number;"
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 361a237f2cc9f..de0857a34fe3a 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -4,16 +4,18 @@
 import scipy as sp
 from scipy import linalg, optimize, sparse
 
+from sklearn.datasets import make_classification
 from sklearn.linear_model.glm import (
     Link,
     IdentityLink,
     LogLink,
+    LogitLink,
     TweedieDistribution,
     NormalDistribution, PoissonDistribution,
     GammaDistribution, InverseGaussianDistribution,
-    GeneralizedHyperbolicSecant,
+    GeneralizedHyperbolicSecant, BinomialDistribution,
     GeneralizedLinearRegressor)
-from sklearn.linear_model import ElasticNet, Ridge
+from sklearn.linear_model import ElasticNet, LogisticRegression, Ridge
 
 from sklearn.utils.testing import (
     assert_equal, assert_almost_equal,
@@ -26,9 +28,19 @@ def test_link_properties(link):
     rng = np.random.RandomState(0)
     x = rng.rand(100)*100
     link = link()  # instatiate object
-    assert_almost_equal(link.link(link.inverse(x)), x, decimal=10)
-    assert_almost_equal(link.inverse_derivative(link.link(x)),
-                        1/link.derivative(x), decimal=10)
+    decimal = 10
+    if isinstance(link, LogitLink):
+        # careful for large x, note expit(36) = 1
+        # limit max eta to 15
+        x = x / 100 * 15
+        decimal = 8
+    assert_almost_equal(link.link(link.inverse(x)), x, decimal=decimal)
+    # if f(g(x)) = x, then f'(g(x)) = 1/g'(x)
+    assert_almost_equal(link.derivative(link.inverse(x)),
+                        1./link.inverse_derivative(x), decimal=decimal)
+    # for LogitLink, in the following x should be between 0 and 1.
+    # assert_almost_equal(link.inverse_derivative(link.link(x)),
+    #                     1./link.derivative(x), decimal=decimal)
 
 
 @pytest.mark.parametrize(
@@ -214,6 +226,12 @@ def test_glm_P2_positive_semidefinite():
     with pytest.raises(ValueError):
         glm.fit(X, y)
 
+    P2 = sparse.csr_matrix(P2)
+    glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False,
+                                     check_input=True)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
 
 @pytest.mark.parametrize('fit_intercept', ['not bool', 1, 0, [True]])
 def test_glm_fit_intercept_argument(fit_intercept):
@@ -331,7 +349,7 @@ def test_glm_identiy_regression(family, solver):
     coef = [1, 2]
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.dot(X, coef)
-    glm = GeneralizedLinearRegressor(alpha=0, family=family,
+    glm = GeneralizedLinearRegressor(alpha=0, family=family, link='identity',
                                      fit_intercept=False, solver=solver)
     res = glm.fit(X, y)
     assert_array_almost_equal(res.coef_, coef)
@@ -350,7 +368,7 @@ def test_glm_log_regression(family, solver):
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.exp(np.dot(X, coef))
     glm = GeneralizedLinearRegressor(
-                alpha=0, family=family, link=LogLink(), fit_intercept=False,
+                alpha=0, family=family, link='log', fit_intercept=False,
                 solver=solver, start_params='least_squares')
     res = glm.fit(X, y)
     assert_array_almost_equal(res.coef_, coef)
@@ -557,3 +575,28 @@ def obj(coef):
     glm.fit(X, y)
     assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=4)
     assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=4)
+
+
+@pytest.mark.parametrize('alpha', [0.01, 0.1, 1, 10])
+def test_binomial_enet(alpha):
+    """Test elastic net regression with binomial family and LogitLink.
+
+    Compare to LogisticRegression.
+    """
+    l1_ratio = 0.5
+    n_samples = 500
+    X, y = make_classification(n_samples=n_samples, n_classes=2, n_features=6,
+                               n_informative=5, n_redundant=0, n_repeated=0,
+                               random_state=0)
+    log = LogisticRegression(
+        penalty='elasticnet', random_state=0, fit_intercept=False, tol=1e-6,
+        max_iter=1000, l1_ratio=l1_ratio, C=1./(n_samples * alpha),
+        solver='saga')
+    log.fit(X, y)
+    glm = GeneralizedLinearRegressor(
+        family=BinomialDistribution(), link=LogitLink(), fit_intercept=False,
+        alpha=alpha, l1_ratio=l1_ratio, solver='cd', selection='cyclic',
+        tol=1e-7)
+    glm.fit(X, y)
+    assert_almost_equal(log.intercept_[0], glm.intercept_, decimal=6)
+    assert_array_almost_equal(log.coef_[0, :], glm.coef_, decimal=6)

From 2993e03dbfc89b068373718c82f65957639767ac Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 7 Apr 2019 15:33:27 +0200
Subject: [PATCH 049/269] More efficient sparse matrices and refactor of irls
 and cd solver

* refactor into function _irls_solver

* refactor into function _cd_solver

* replace of safe_sparse_dot by matmul operator @

* more efficient handling of fisher matrix

* sparse coo matrices are converted to csc or csr

* sample weights don't except sparse matrices

* minor doc changes
---
 sklearn/linear_model/glm.py            | 1101 ++++++++++++++----------
 sklearn/linear_model/tests/test_glm.py |   85 +-
 2 files changed, 717 insertions(+), 469 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 01e40b322946c..b2de866a4b69d 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -55,13 +55,12 @@
 from ..base import BaseEstimator, RegressorMixin
 from ..exceptions import ConvergenceWarning
 from ..utils import check_array, check_X_y
-from ..utils.extmath import safe_sparse_dot
 from ..utils.optimize import newton_cg
 from ..utils.validation import check_is_fitted, check_random_state
 
 
 def _check_weights(sample_weight, n_samples):
-    """Check that weights are non-negative and have the right shape."""
+    """Check that sample weights are non-negative and have the right shape."""
     if sample_weight is None:
         weights = np.ones(n_samples)
     elif np.isscalar(sample_weight):
@@ -70,7 +69,7 @@ def _check_weights(sample_weight, n_samples):
         weights = sample_weight * np.ones(n_samples)
     else:
         _dtype = [np.float64, np.float32]
-        weights = check_array(sample_weight, accept_sparse='csr',
+        weights = check_array(sample_weight, accept_sparse=False,
                               force_all_finite=True, ensure_2d=False,
                               dtype=_dtype)
         if weights.ndim > 1:
@@ -247,12 +246,11 @@ class ExponentialDispersionModel(metaclass=ABCMeta):
     deviance_derivative
     starting_mu
 
+    _mu_deviance_derivative
     _score
     _fisher_matrix
     _observed_information
-    _deviance
-    _deviance_derivative
-    _deviance_hessian
+    _eta_mu_score_fisher
 
     References
     ----------
@@ -280,7 +278,7 @@ def include_upper_bound(self):
         return self._include_upper_bound
 
     def in_y_range(self, x):
-        """Returns true if `x` is in the valid range of Y~EDM.
+        """Returns ``True`` if x is in the valid range of Y~EDM.
 
         Parameters
         ----------
@@ -411,7 +409,7 @@ def unit_deviance_derivative(self, y, mu):
         mu : array, shape (n_samples,)
             Predicted mean.
         """
-        return -2*(y-mu)/self.unit_variance(mu)
+        return -2 * (y - mu) / self.unit_variance(mu)
 
     def deviance(self, y, mu, weights=1):
         r"""Compute the deviance.
@@ -434,13 +432,7 @@ def deviance(self, y, mu, weights=1):
         weights : array, shape (n_samples,) (default=1)
             Weights or exposure to which variance is inverse proportional.
         """
-        return np.sum(weights*self.unit_deviance(y, mu))
-
-    def _deviance(self, coef, X, y, weights, link):
-        """Compute the deviance as a function of the coefficients and data."""
-        lin_pred = safe_sparse_dot(X, coef, dense_output=True)
-        mu = link.inverse(lin_pred)
-        return self.deviance(y, mu, weights)
+        return np.sum(weights * self.unit_deviance(y, mu))
 
     def deviance_derivative(self, y, mu, weights=1):
         """Compute the derivative of the deviance w.r.t. mu.
@@ -458,7 +450,36 @@ def deviance_derivative(self, y, mu, weights=1):
         weights : array, shape (n_samples,) (default=1)
             Weights or exposure to which variance is inverse proportional.
         """
-        return weights*self.unit_deviance_derivative(y, mu)
+        return weights * self.unit_deviance_derivative(y, mu)
+
+    def starting_mu(self, y, weights=1, ind_weight=0.5):
+        """Set starting values for the mean mu.
+
+        These may be good starting points for the (unpenalized) IRLS solver.
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
+
+        ind_weight : float (default=0.5)
+            Must be between 0 and 1. Specifies how much weight is given to the
+            individual observations instead of the mean of y.
+        """
+        return (ind_weight * y +
+                (1. - ind_weight) * np.average(y, weights=weights))
+
+    def _mu_deviance_derivative(self, coef, X, y, weights, link):
+        """Compute mu, the deviance and it's derivative w.r.t coef."""
+        lin_pred = X @ coef
+        mu = link.inverse(lin_pred)
+        dev = self.deviance(y, mu, weights)
+        d1 = link.inverse_derivative(lin_pred)
+        devp = X.T @ (d1 * self.deviance_derivative(y, mu, weights))
+        return mu, dev, devp
 
     def _score(self, coef, phi, X, y, weights, link):
         r"""Compute the score function.
@@ -476,16 +497,14 @@ def _score(self, coef, phi, X, y, weights, link):
 
         with :math:`\mathbf{D}=\mathrm{diag}(h'(\eta_1),\ldots)` and
         :math:`\boldsymbol{\Sigma}=\mathrm{diag}(\mathbf{V}[y_1],\ldots)`.
+        Note: The derivative of the deviance w.r.t. coef equals -2 * score.
         """
-        n_samples = X.shape[0]
-        lin_pred = safe_sparse_dot(X, coef, dense_output=True)
+        lin_pred = X @ coef
         mu = link.inverse(lin_pred)
         sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
         d = link.inverse_derivative(lin_pred)
-        d_sigma_inv = sparse.dia_matrix((sigma_inv*d, 0),
-                                        shape=(n_samples, n_samples))
-        temp = safe_sparse_dot(d_sigma_inv, (y-mu), dense_output=True)
-        score = safe_sparse_dot(X.T, temp, dense_output=True)
+        temp = sigma_inv * d * (y - mu)
+        score = X.T @ temp
         return score
 
     def _fisher_matrix(self, coef, phi, X, y, weights, link):
@@ -508,14 +527,13 @@ def _fisher_matrix(self, coef, phi, X, y, weights, link):
         see func:`_score`.
         """
         n_samples = X.shape[0]
-        lin_pred = safe_sparse_dot(X, coef, dense_output=True)
+        lin_pred = X @ coef
         mu = link.inverse(lin_pred)
         sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
         d2 = link.inverse_derivative(lin_pred)**2
         d2_sigma_inv = sparse.dia_matrix((sigma_inv*d2, 0),
                                          shape=(n_samples, n_samples))
-        temp = safe_sparse_dot(d2_sigma_inv, X, dense_output=False)
-        fisher_matrix = safe_sparse_dot(X.T, temp, dense_output=False)
+        fisher_matrix = X.T @ d2_sigma_inv @ X
         return fisher_matrix
 
     def _observed_information(self, coef, phi, X, y, weights, link):
@@ -542,7 +560,7 @@ def _observed_information(self, coef, phi, X, y, weights, link):
         see :func:`score_` function and :func:`_fisher_matrix`.
         """
         n_samples = X.shape[0]
-        lin_pred = safe_sparse_dot(X, coef, dense_output=True)
+        lin_pred = X @ coef
         mu = link.inverse(lin_pred)
         sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
         dp = link.inverse_derivative2(lin_pred)
@@ -551,80 +569,59 @@ def _observed_information(self, coef, phi, X, y, weights, link):
         r = y - mu
         temp = sparse.dia_matrix((sigma_inv*(-dp*r+d2*v*r+d2), 0),
                                  shape=(n_samples, n_samples))
-        temp = safe_sparse_dot(temp, X, dense_output=False)
-        observed_information = safe_sparse_dot(X.T, temp, dense_output=False)
+        observed_information = X.T @ temp @ X
         return observed_information
 
-    def _deviance_derivative(self, coef, X, y, weights, link):
-        r"""Compute the derivative of the deviance w.r.t. coef.
-
-        The derivative of the deviance w.r.t. `coef` (:math:`w`) as a
-        function of the coefficients `coef` and the data.
-        This is equivalent to :math:`-2\phi` times the score function
-        :func:`_score` (derivative of the log-likelihood).
-        """
-        score = self._score(coef=coef, phi=1, X=X, y=y, weights=weights,
-                            link=link)
-        return -2*score
-
-    def _deviance_hessian(self, coef, X, y, weights, link):
-        r"""Compute the hessian matrix of the deviance w.r.t. coef.
-
-        The hessian of the deviance w.r.t. `coef` (:math:`w`) is evaluated as
-        a function of the coefficients `coef` and the data.
-        It is equivalent to :math:`+2\phi` times the observed information
-        matrix.
-        """
-        info_matrix = self._observed_information(coef=coef, phi=1, X=X, y=y,
-                                                 weights=weights, link=link)
-        return 2*info_matrix
-
-    def _eta_mu_score_fisher(self, coef, phi, X, y, weights, link):
+    def _eta_mu_score_fisher(self, coef, phi, X, y, weights, link,
+                             diag_fisher=False):
         """Compute linear predictor, mean, score function and fisher matrix.
 
         It calculates the linear predictor, the mean, score function
         (derivative of log-likelihood) and Fisher information matrix
         all in one go as function of `coef` (:math:`w`) and the data.
+
+        Parameters
+        ----------
+        diag_fisher : boolean, optional (default=False)
+            If ``True``, returns only an array d such that
+            fisher = X.T @ np.diag(d) @ X.
+
+        Returns
+        -------
+        (eta, mu, score, fisher) : tuple with 4 elements
+            The 4 elements are:
+
+            * eta: ndarray, shape (X.shape[0],)
+            * mu: ndarray, shape (X.shape[0],)
+            * score: ndarray, shape (X.shape[0],)
+            * fisher:
+
+                * If diag_fisher is ``False``, the full fisher matrix,
+                  an array of shape (X.shape[1], X.shape[1])
+                * If diag_fisher is ``True`, an array of shape (X.shape[0])
         """
         n_samples, n_features = X.shape
         # eta = linear predictor
-        eta = safe_sparse_dot(X, coef, dense_output=True)
+        eta = X @ coef
         mu = link.inverse(eta)
         sigma_inv = 1./self.variance(mu, phi=phi, weights=weights)
         d1 = link.inverse_derivative(eta)  # = h'(eta)
         # Alternatively:
         # h'(eta) = h'(g(mu)) = 1/g'(mu), note that h is inverse of g
         # d1 = 1./link.derivative(mu)
-        d1_sigma_inv = sparse.dia_matrix((sigma_inv*d1, 0),
-                                         shape=(n_samples, n_samples))
-        temp = safe_sparse_dot(d1_sigma_inv, (y-mu), dense_output=True)
-        score = safe_sparse_dot(X.T, temp, dense_output=True)
+        score = X.T @ (sigma_inv * d1 * (y - mu))
         #
-        d2_sigma_inv = sparse.dia_matrix((sigma_inv*(d1**2), 0),
-                                         shape=(n_samples, n_samples))
-        temp = safe_sparse_dot(d2_sigma_inv, X, dense_output=False)
-        fisher = safe_sparse_dot(X.T, temp, dense_output=False)
-        return eta, mu, score, fisher
-
-    def starting_mu(self, y, weights=1, ind_weight=0.5):
-        """Set starting values for the mean mu.
-
-        These may be good starting points for the (unpenalized) IRLS solver.
-
-        Parameters
-        ----------
-        y : array, shape (n_samples,)
-            Target values.
-
-        weights : array, shape (n_samples,) (default=1)
-            Weights or exposure to which variance is inverse proportional.
-
-        ind_weight : float (default=0.5)
-            Must be between 0 and 1. Specifies how much weight is given to the
-            individual observations instead of the mean of y.
-        """
-        return (ind_weight * y +
-                (1. - ind_weight) * np.average(y, weights=weights))
+        d2_sigma_inv = sigma_inv * (d1**2)
+        if diag_fisher:
+            return eta, mu, score, d2_sigma_inv
+        else:
+            if sparse.issparse(X):
+                d2_sigma_inv = sparse.dia_matrix((d2_sigma_inv, 0),
+                                                 shape=(n_samples, n_samples))
+                fisher = (X.T @ d2_sigma_inv @ X).toarray()
+            else:
+                fisher = (X.T * d2_sigma_inv) @ X
+            return eta, mu, score, fisher
 
 
 class TweedieDistribution(ExponentialDispersionModel):
@@ -724,20 +721,20 @@ def unit_variance_derivative(self, mu):
         mu : array, shape (n_samples,)
             Predicted mean.
         """
-        return self.power*np.power(mu, self.power-1)
+        return self.power * np.power(mu, self.power - 1)
 
     def unit_deviance(self, y, mu):
         p = self.power
         if p == 0:
             # NormalDistribution
-            return (y-mu)**2
+            return (y - mu)**2
         if p == 1:
             # PoissonDistribution
             # 2 * (y*log(y/mu) - y + mu), with y*log(y/mu)=0 if y=0
             return 2 * (special.xlogy(y, y/mu) - y + mu)
         elif p == 2:
             # GammaDistribution
-            return 2 * (np.log(mu/y)+y/mu-1)
+            return 2 * (np.log(mu/y) + y/mu - 1)
         else:
             # return 2 * (np.maximum(y,0)**(2-p)/((1-p)*(2-p))
             #    - y*mu**(1-p)/(1-p) + mu**(2-p)/(2-p))
@@ -784,11 +781,11 @@ def unit_variance(self, mu):
         return 1 + mu**2
 
     def unit_variance_derivative(self, mu):
-        return 2*mu
+        return 2 * mu
 
     def unit_deviance(self, y, mu):
-        return (2*y*(np.arctan(y) - np.arctan(mu)) +
-                np.log((1+mu**2)/(1+y**2)))
+        return (2 * y * (np.arctan(y) - np.arctan(mu)) +
+                np.log((1 + mu**2)/(1 + y**2)))
 
 
 class BinomialDistribution(ExponentialDispersionModel):
@@ -809,7 +806,7 @@ def unit_variance_derivative(self, mu):
         return 1 - 2 * mu
 
     def unit_deviance(self, y, mu):
-        return 2*(special.xlogy(y, y/mu) + special.xlogy(1-y, (1-y)/(1-mu)))
+        return 2 * (special.xlogy(y, y/mu) + special.xlogy(1-y, (1-y)/(1-mu)))
 
 
 def _irls_step(X, W, P2, z):
@@ -824,20 +821,20 @@ def _irls_step(X, W, P2, z):
 
     Parameters
     ----------
-    X : {numpy array, sparse matrix}, shape (n_samples, n_features)
+    X : {ndarray, sparse matrix}, shape (n_samples, n_features)
         Training data (with intercept included if present)
 
-    W : numpy array, shape (n_samples,)
+    W : ndarray, shape (n_samples,)
 
-    P2 : {numpy array, sparse matrix}, shape (n_features, n_features)
+    P2 : {ndarray, sparse matrix}, shape (n_features, n_features)
         The L2-penalty matrix or vector (=diagonal matrix)
 
-    z  : numpy array, shape (n_samples,)
+    z  : ndarray, shape (n_samples,)
         Working observations
 
     Returns
     -------
-    coef: array, shape (X.shape[1])
+    coef: ndarray, shape (X.shape[1])
     """
     # Note: solve vs least squares, what is more appropriate?
     #       scipy.linalg.solve seems faster, but scipy.linalg.lstsq
@@ -868,27 +865,422 @@ def _irls_step(X, W, P2, z):
     return coef
 
 
+def _irls_solver(coef, X, y, weights, P2, family, link, max_iter, tol):
+    """Solve GLM with L2 penalty by IRLS algorithm.
+
+    Note: If X is sparse, P2 must also be sparse.
+    """
+    # Solve Newton-Raphson (1): Obj'' (w - w_old) = -Obj'
+    #   Obj = objective function = 1/2 Dev + l2/2 w P2 w
+    #   Dev = deviance, s = normalized weights, variance V(mu) but phi=1
+    #   D   = link.inverse_derivative(eta) = diag_matrix(h'(X w))
+    #   D2  = link.inverse_derivative(eta)^2 = D^2
+    #   W   = D2/V(mu)
+    #   l2  = alpha * (1 - l1_ratio)
+    #   Obj' = d(Obj)/d(w) = 1/2 Dev' + l2 P2 w
+    #        = -X' D (y-mu)/V(mu) + l2 P2 w
+    #   Obj''= d2(Obj)/d(w)d(w') = Hessian = -X'(...) X + l2 P2
+    #   Use Fisher matrix instead of full info matrix -X'(...) X,
+    #    i.e. E[Dev''] with E[y-mu]=0:
+    #   Obj'' ~ X' W X + l2 P2
+    # (1): w = (X' W X + l2 P2)^-1 X' W z,
+    #      with z = eta + D^-1 (y-mu)
+    # Note: P2 must be symmetrized
+    # Note: ' denotes derivative, but also transpose for matrices
+
+    # eta = linear predictor
+    eta = X @ coef
+    mu = link.inverse(eta)
+    # D = h'(eta)
+    hp = link.inverse_derivative(eta)
+    V = family.variance(mu, phi=1, weights=weights)
+    n_iter = 0
+    while n_iter < max_iter:
+        n_iter += 1
+        # coef_old not used so far.
+        # coef_old = coef
+        # working weights W, in principle a diagonal matrix
+        # therefore here just as 1d array
+        W = hp**2 / V
+        # working observations
+        z = eta + (y - mu) / hp
+        # solve A*coef = b
+        # A = X' W X + P2, b = X' W z
+        coef = _irls_step(X, W, P2, z)
+        # updated linear predictor
+        # do it here for updated values for tolerance
+        eta = X @ coef
+        mu = link.inverse(eta)
+        hp = link.inverse_derivative(eta)
+        V = family.variance(mu, phi=1, weights=weights)
+
+        # which tolerace? |coef - coef_old| or gradient?
+        # use gradient for compliance with newton-cg and lbfgs
+        # gradient = -X' D (y-mu)/V(mu) + l2 P2 w
+        gradient = -(X.T @ (hp*(y-mu)/V))
+        if P2.ndim == 1:
+            gradient += P2*coef
+        else:
+            gradient += P2 @ coef
+        if (np.max(np.abs(gradient)) <= tol):
+            converged = True
+            break
+
+    if not converged:
+        warnings.warn("irls failed to converge. Increase the number "
+                      "of iterations (currently {0})"
+                      .format(max_iter), ConvergenceWarning)
+
+    return coef, n_iter
+
+
+def _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles, inner_tol,
+              max_inner_iter=1000, selection='cyclic',
+              random_state=None, diag_fisher=False):
+    """Compute inner loop of coordinate descent = cycles through features.
+
+    Minimization of 1-d subproblems::
+
+        min_z q(d+z*e_j) - q(d)
+        = min_z A_j z + 1/2 B_jj z^2 + ||P1_j (w_j+d_j+z)||_1
+
+    A = f'(w) + d*H(w) + (w+d)*P2
+    B = H+P2
+    Note: f'=-score and H=fisher are updated at the end of outer iteration.
+    """
+    # TODO: use sparsity (coefficient already 0 due to L1 penalty)
+    #       => active set of features for featurelist, see paper
+    #          of Improved GLMNET or Gap Safe Screening Rules
+    #          https://arxiv.org/abs/1611.05780
+    n_samples, n_features = X.shape
+    B = fisher
+    if P2.ndim == 1:
+        coef_P2 = coef * P2
+        if not diag_fisher:
+            B[np.diag_indices_from(B)] += P2
+    else:
+        coef_P2 = P2 @ coef  # P2 is symmetric, mat @ vec is usually faster
+        if not diag_fisher:
+            if sparse.issparse(P2):
+                B += P2.toarray()
+            else:
+                B += P2
+    A = -score + coef_P2  # + d @ (H+P2) but d=0 so far
+    # inner loop
+    inner_iter = 0
+    while inner_iter < max_inner_iter:
+        inner_iter += 1
+        n_cycles += 1
+        if selection == 'random':
+            featurelist = random_state.permutation(n_features)
+        else:
+            featurelist = np.arange(n_features)
+        for j in featurelist:
+            # minimize_z: a z + 1/2 b z^2 + c |d+z|
+            # a = A_j
+            # b = B_jj > 0
+            # c = |P1_j| = P1_j > 0, see 1.3
+            # d = w_j + d_j
+            # cf. https://arxiv.org/abs/0708.1485 Eqs. (3) - (4)
+            # with beta = z+d, beta_hat = d-a/b and gamma = c/b
+            # z = 1/b * S(bd-a,c) - d
+            # S(a,b) = sign(a) max(|a|-b, 0) soft thresholding
+            a = A[j]
+            if diag_fisher:
+                if sparse.issparse(X):
+                    xj = X[:, j]
+                    b = xj.transpose() @ xj.multiply(fisher[:, np.newaxis])
+                    b = b[0, 0]
+                else:
+                    b = X[:, j] @ (fisher * X[:, j])
+
+                if P2.ndim == 1:
+                    b += P2[j]
+                else:
+                    b += P2[j, j]
+            else:
+                b = B[j, j]
+
+            if b <= 0:
+                z = 0
+            elif P1[j] == 0:
+                z = -a/b
+            elif a + P1[j] < b * (coef[j] + d[j]):
+                z = -(a + P1[j])/b
+            elif a - P1[j] > b * (coef[j] + d[j]):
+                z = -(a - P1[j])/b
+            else:
+                z = -(coef[j] + d[j])
+
+            # update direction d
+            d[j] += z
+            # update A because d_j is now d_j+z
+            # A = f'(w) + d*H(w) + (w+d)*P2
+            # => A += (H+P2)*e_j z = B_j * z
+            # Note: B is symmetric B = B.transpose
+            if diag_fisher:
+                if sparse.issparse(X):
+                    A += (X.transpose() @
+                          X[:, j].multiply(fisher[:, np.newaxis])
+                          ).toarray().ravel() * z
+                else:
+                    # A += (X.T @ (fisher * X[:, j])) * z
+                    # same without transpose of X
+                    A += ((fisher * X[:, j]) @ X) * z
+
+                if P2.ndim == 1:
+                    A[j] += P2[j] * z
+                elif sparse.issparse(P2):
+                    # slice columns as P2 is csc
+                    A += P2[:, j].toarray().ravel() * z
+                else:
+                    A += P2[:, j] * z
+            else:
+                # B is symmetric, C- or F-contiguous, but never sparse
+                if B.flags['F_CONTIGUOUS']:
+                    # slice columns like for sparse csc
+                    A += B[:, j] * z
+                else:  # B.flags['C_CONTIGUOUS'] might be true
+                    # slice rows
+                    A += B[j, :] * z
+            # end of cycle
+        # stopping criterion for inner loop
+        # sum_i(|minimum of norm of subgrad of q(d)_i|)
+        mn_subgrad = np.where(coef + d == 0,
+                              np.sign(A) * np.maximum(np.abs(A) - P1, 0),
+                              A + np.sign(coef + d) * P1)
+        mn_subgrad = linalg.norm(mn_subgrad, ord=1)
+        if mn_subgrad <= inner_tol:
+            if inner_iter == 1:
+                inner_tol = inner_tol/4.
+            break
+        # end of inner loop
+    return d, coef_P2, n_cycles, inner_tol
+
+
+def _cd_solver(coef, X, y, weights, P1, P2, family, link,
+               max_iter=100, max_inner_iter=1000, tol=1e-4,
+               selection='cyclic ', random_state=None,
+               diag_fisher=False, copy_X=True):
+    """Solve GLM with L1 and L2 penalty by coordinate descent algorithm.
+
+    The objective beeing minimized in the coefficients w=coef is::
+
+        F = f + g, f(w) = 1/2 deviance, g = 1/2 w*P2*w + ||P1*w||_1
+
+    An Improved GLMNET for L1-regularized Logistic Regression:
+
+    1. Find optimal descent direction d by minimizing
+       min_d F(w+d) = min_d F(w+d) - F(w)
+    2. Quadrdatic approximation of F(w+d)-F(w) = q(d):
+       using f(w+d) = f(w) + f'(w)*d + 1/2 d*H(w)*d + O(d^3) gives:
+       q(d) = (f'(w) + w*P2)*d + 1/2 d*(H(w)+P2)*d
+       + ||P1*(w+d)||_1 - ||P1*w||_1
+       Then minimize q(d): min_d q(d)
+    3. Coordinate descent by updating coordinate j (d -> d+z*e_j):
+       min_z q(d+z*e_j)
+       = min_z q(d+z*e_j) - q(d)
+       = min_z A_j z + 1/2 B_jj z^2
+               + ||P1_j (w_j+d_j+z)||_1 - ||P1_j (w_j+d_j)||_1
+       A = f'(w) + d*H(w) + (w+d)*P2
+       B = H+P2
+
+    Repeat steps 1-3 until convergence.
+    Note: Use Fisher matrix instead of Hessian for H.
+    Note: f' = -score, H = Fisher matrix
+
+    Parameters
+    ----------
+    coef: ndarray, shape (n_features,)
+
+    X : {ndarray, csc sparse matrix}, shape (n_samples, n_features)
+        Training data (with intercept included if present). If not sparse,
+        pass directly as Fortran-contiguous data to avoid
+        unnecessary memory duplication.
+
+    y : ndarray, shape (n_samples,)
+        Target values.
+
+    weights: ndarray, shape (n_samples,)
+        Sample weights with which the deviance is weighted. The weights must
+        bee normalized and sum to 1.
+
+    P1 : {ndarray}, shape (n_features,)
+        The L1-penalty vector (=diagonal matrix)
+
+    P2 : {ndarray, csc sparse matrix}, shape (n_features, n_features)
+        The L2-penalty matrix or vector (=diagonal matrix). If a matrix is
+        passed, it must be symmetric. If X is sparse, P2 must also be sparse.
+
+    family : ExponentialDispersionModel
+
+    link : Link
+
+    max_iter : int, optional (default=100)
+        Maximum numer of outer (Newton) iterations.
+
+    max_inner_iter : int, optional (default=1000)
+        Maximum number of iterations, i.e. cycles over all features, in inner
+        loop.
+
+    tol : float, optional (default=1e-4)
+        Covergence criterion is
+        sum_i(|minimum of norm of subgrad of objective_i|)<=tol.
+
+    selection : str, optional (default='cyclic')
+        If 'random', randomly chose features in inner loop.
+
+    random_state : {int, RandomState instance, None}, optional (default=None)
+
+    diag_fisher : boolean, optional (default=False)
+        'False' calculates full fisher matrix, 'True' only diagonal matrix s.t.
+        fisher = X.T @ diag @ X. This saves storage but needs more
+        matrix-vector multiplications.
+
+    copy_X : boolean, optional (default=True)
+        If ``True``, X will be copied; else, it may be overwritten.
+
+    Returns
+    -------
+    coef : ndarray, shape (n_features,)
+
+    n_iter : numer of outer iterations = newton iterations
+
+    n_cycles : number of cycles over features
+
+    References
+    ----------
+    Guo-Xun Yuan, Chia-Hua Ho, Chih-Jen Lin
+    An Improved GLMNET for L1-regularized Logistic Regression,
+    Journal of Machine Learning Research 13 (2012) 1999-2030
+    https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
+    """
+    X = check_array(X, 'csc', dtype=[np.float64, np.float32],
+                    order='F', copy=copy_X)
+    if P2.ndim == 2:
+        P2 = check_array(P2, 'csc', dtype=[np.float64, np.float32],
+                         order='F', copy=copy_X)
+    if sparse.issparse(X):
+        if not sparse.isspmatrix_csc(X):
+            raise ValueError("If X is sparse, it must be in csc format"
+                             "; got (format={})".format(X.format))
+        if not sparse.isspmatrix_csc(P2):
+            raise ValueError("If X is sparse, P2 must also be sparse csc"
+                             "format. Got P2 not sparse.")
+    random_state = check_random_state(random_state)
+    # Note: we already set P2 = l2*P2, P1 = l1*P1
+    # Note: we already symmetriezed P2 = 1/2 (P2 + P2')
+    n_iter = 0  # number of outer iterations
+    n_cycles = 0  # number of (complete) cycles over features
+    converged = False
+    n_samples, n_features = X.shape
+    # line search parameters
+    (beta, sigma) = (0.5, 0.01)
+    # some precalculations
+    # Note: For diag_fisher=False, fisher = X.T @ fisher @ X and fisher is a
+    #       1d array representing a diagonal matrix.
+    eta, mu, score, fisher = family._eta_mu_score_fisher(
+        coef=coef, phi=1, X=X, y=y, weights=weights, link=link,
+        diag_fisher=diag_fisher)
+    # set up space for search direction d for inner loop
+    d = np.zeros_like(coef)
+    # initial stopping tolerance of inner loop
+    # use L1-norm of minimum of norm of subgradient of F
+    # fp_wP2 = f'(w) + w*P2
+    if P2.ndim == 1:
+        fp_wP2 = -score + coef * P2
+    else:
+        # Note: P2 is symmetric and matrix @ vector is faster for sparse
+        #       matrices.
+        fp_wP2 = -score + P2 @ coef
+    inner_tol = np.where(coef == 0,
+                         np.sign(fp_wP2) * np.maximum(np.abs(fp_wP2) - P1, 0),
+                         fp_wP2 + np.sign(coef) * P1)
+    inner_tol = linalg.norm(inner_tol, ord=1)
+    # outer loop
+    while n_iter < max_iter:
+        n_iter += 1
+        # initialize search direction d (to be optimized) with zero
+        d.fill(0)
+        # inner loop = _cd_cycle
+        d, coef_P2, n_cycles, inner_tol = \
+            _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles, inner_tol,
+                      max_inner_iter=max_inner_iter, selection=selection,
+                      random_state=random_state, diag_fisher=diag_fisher)
+        # line search by sequence beta^k, k=0, 1, ..
+        # F(w + lambda d) - F(w) <= lambda * bound
+        # bound = sigma * (f'(w)*d + w*P2*d
+        #                  +||P1 (w+d)||_1 - ||P1 w||_1)
+        P1w_1 = linalg.norm(P1 * coef, ord=1)
+        # Note: coef_P2 already calculated and still valid
+        bound = sigma * (-(score @ d) + coef_P2 @ d +
+                         linalg.norm(P1 * (coef + d), ord=1) - P1w_1)
+        Fw = (0.5 * family.deviance(y, mu, weights) +
+              0.5 * (coef_P2 @ coef) + P1w_1)
+        la = 1./beta
+        for k in range(20):
+            la *= beta  # starts with la=1
+            coef_wd = coef + la * d
+            mu_wd = link.inverse(X @ coef_wd)
+            Fwd = (0.5 * family.deviance(y, mu_wd, weights) +
+                   linalg.norm(P1 * coef_wd, ord=1))
+            if P2.ndim == 1:
+                Fwd += 0.5 * ((coef_wd * P2) @ coef_wd)
+            else:
+                Fwd += 0.5 * (coef_wd @ (P2 @ coef_wd))
+            if Fwd - Fw <= sigma * la * bound:
+                break
+        # update coefficients
+        # coef_old = coef.copy()
+        coef += la * d
+        # calculate eta, mu, score, Fisher matrix for next iteration
+        eta, mu, score, fisher = family._eta_mu_score_fisher(
+            coef=coef, phi=1, X=X, y=y, weights=weights, link=link,
+            diag_fisher=diag_fisher)
+        # stopping criterion for outer loop
+        # sum_i(|minimum of norm of subgrad of F(w)_i|)
+        # fp_wP2 = f'(w) + w*P2
+        # Note: eta, mu and score are already updated
+        if P2.ndim == 1:
+            fp_wP2 = -score + coef * P2
+        else:
+            fp_wP2 = -score + P2 @ coef  # P2 is symmetric, mat @ vec is faster
+        mn_subgrad = np.where(coef == 0,
+                              np.sign(fp_wP2)*np.maximum(np.abs(fp_wP2)-P1, 0),
+                              fp_wP2 + np.sign(coef) * P1)
+        mn_subgrad = linalg.norm(mn_subgrad, ord=1)
+        if mn_subgrad <= tol:
+            converged = True
+            break
+        # end of outer loop
+    if not converged:
+        warnings.warn("Coordinate descent failed to converge. Increase"
+                      " the number of iterations (currently {0})"
+                      .format(max_iter), ConvergenceWarning)
+
+    return coef, n_iter, n_cycles
+
+
 class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     """Regression via a Generalized Linear Model (GLM) with penalties.
 
     GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
-    fitting and predicting the mean `mu=h(X*w)`. Therefore the fit minimizes
-    the following objective function with combined L1 and L2 priors as
-    regularizer::
+    fitting and predicting the mean of the target y as mu=h(X*w). Therefore,
+    the fit minimizes the following objective function with combined L1 and L2
+    priors as regularizer::
 
             1/(2*sum(s)) * deviance(y, h(X*w); s)
             + alpha * l1_ratio * ||P1*w||_1
             + 1/2 * alpha * (1 - l1_ratio) * w*P2*w
 
-    with inverse link function `h` and s=`sample_weight` (for
-    ``sample_weight=None``, one has s=1 and sum(s)=`n_samples`).
-    For ``P1=P2='identity'`` (``P1=None``, ``P2=None``), the penalty is the
-    elastic net::
+    with inverse link function h and s=sample_weight. Note that for
+    ``sample_weight=None``, one has s_i=1 and sum(s)=n_samples).
+    For ``P1=P2='identity'``, the penalty is the elastic net::
 
             alpha * l1_ratio * ||w||_1
             + 1/2 * alpha * (1 - l1_ratio) * ||w||_2^2
 
-    If you are interested in controlling the L1 and L2 penalty
+    If you are interested in controlling the L1 and L2 penalties
     separately, keep in mind that this is equivalent to::
 
             a * L1 + b * L2
@@ -897,9 +1289,9 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
             alpha = a + b and l1_ratio = a / (a + b)
 
-    The parameter `l1_ratio` corresponds to alpha in the glmnet R package while
-    'alpha' corresponds to the lambda parameter in glmnet. Specifically,
-    l1_ratio = 1 is the lasso penalty.
+    The parameter ``l1_ratio`` corresponds to alpha in the R package glmnet,
+    while ``alpha`` corresponds to the lambda parameter in glmnet.
+    Specifically, l1_ratio = 1 is the lasso penalty.
 
     Read more in the :ref:`User Guide <Generalized_linear_regression>`.
 
@@ -948,8 +1340,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         The distributional assumption of the GLM, i.e. which distribution from
         the EDM, specifies the loss function to be minimized.
 
-    link : {'auto', 'identity', 'log', 'logit'} or an instance of class Link,
-        optional (default='auto')
+    link : {'auto', 'identity', 'log', 'logit'} or an instance of class Link, \
+            optional (default='auto')
         The link function of the GLM, i.e. mapping from linear predictor
         (X*coef) to expectation (mu). Option 'auto' sets the link depending on
         the chosen family as follows:
@@ -982,7 +1374,10 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
         'cd'
             Coordinate descent algorithm. It can deal with L1 as well as L2
-            penalties.
+            penalties. Note that in order to avoid unnecessary memory
+            duplication of the X argument in the ``fit`` method, X should be
+            directly passed as a Fortran-contiguous numpy array or sparse csc
+            matrix.
 
     max_iter : int, optional (default=100)
         The maximal number of iterations for solver algorithms.
@@ -990,8 +1385,10 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     tol : float, optional (default=1e-4)
         Stopping criterion. For the irls, newton-cg and lbfgs solvers,
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
-        where ``g_i`` is the i-th component of the gradient (derivative of
-        the objective function).
+        where g_i is the i-th component of the gradient (derivative) of
+        the objective function. For the cd solver, covergence is reached
+        when ``sum_i(|minimum of norm of g_i|)``, where g_i is the
+        subgradient of the objective.
 
     warm_start : boolean, optional (default=False)
         If set to ``True``, reuse the solution of the previous call to ``fit``
@@ -1007,7 +1404,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
         'irls'
             Start values of mu are calculated by family.starting_mu(..). Then,
-            one step of irls obtains start values for ``coef_`. This gives
+            one step of irls obtains start values for ``coef_``. This gives
             usually good results.
 
         'least_squares'
@@ -1042,6 +1439,16 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         RandomState instance used by `np.random`. Used when ``selection`` ==
         'random'.
 
+    diag_fisher : boolean, (default=False)
+        Only relevant for solver 'cd'. If ``False``, the full Fisher matrix
+        (expected Hessian) is computed in each outer iteretion (Newton
+        iteration). If ``True``, only a diagonal matrix (stored as 1d array) is
+        computed, such that fisher = X.T @ diag @ X. This saves memory and
+        matrix-matrix multiplications, but needs more matrix-vector
+        multiplications. If you use large sparse X or if you have many
+        features, i.e. n_features >> n_samples, you might set this option to
+        ``True``.
+
     copy_X : boolean, optional, default True
         If ``True``, X will be copied; else, it may be overwritten.
 
@@ -1056,40 +1463,43 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     Attributes
     ----------
     coef_ : array, shape (n_features,)
-        Estimated coefficients for the linear predictor (X*coef_) in the GLM.
+        Estimated coefficients for the linear predictor (X*coef_+intercept_) in
+        the GLM.
 
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.
 
     dispersion_ : float
-        The dispersion parameter :math:`\\phi` if fit_dispersion is set.
+        The dispersion parameter :math:`\\phi` if ``fit_dispersion`` was set.
 
     n_iter_ : int
-        Actual number of iterations of the solver.
+        Actual number of iterations used in solver.
 
     Notes
     -----
     The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
-    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`.
+    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
+    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
+    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
+    :ref:`User Guide <Generalized_linear_regression>`.
 
     The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
-    (penalized) maximum likelihood which is equivalent to minimizing the
-    deviance.
+    minimizing the deviance plus penalty term, which is equivalent to
+    (penalized) maximum likelihood estimation.
 
-    For `alpha` > 0, the feature matrix `X` should be standardized in order to
+    For alpha > 0, the feature matrix X should be standardized in order to
     penalize features equally strong. Call
     :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``.
 
-    If the target `y` is a ratio, appropriate sample weights `s` should be
+    If the target y is a ratio, appropriate sample weights s should be
     provided.
-    As an example, consider Poission distributed counts `z` (integers) and
-    weights `s=exposure` (time, money, persons years, ...). Then you fit
-    `y = z/s`, i.e. ``GeneralizedLinearModel(family='poisson').fit(X, y,
+    As an example, consider Poission distributed counts z (integers) and
+    weights s=exposure (time, money, persons years, ...). Then you fit
+    y = z/s, i.e. ``GeneralizedLinearModel(family='poisson').fit(X, y,
     sample_weight=s)``. The weights are necessary for the right (finite
     sample) mean.
     Consider :math:`\\bar{y} = \\frac{\\sum_i s_i y_i}{\\sum_i s_i}`,
-    in this case one might say that `y` has a 'scaled' Poisson distributions.
+    in this case one might say that y has a 'scaled' Poisson distributions.
     The same holds for other distributions.
 
     References
@@ -1104,8 +1514,8 @@ def __init__(self, alpha=1.0, l1_ratio=0, P1='identity', P2='identity',
                  fit_intercept=True, family='normal', link='auto',
                  fit_dispersion=None, solver='auto', max_iter=100,
                  tol=1e-4, warm_start=False, start_params='irls',
-                 selection='cyclic', random_state=None, copy_X=True,
-                 check_input=True, verbose=0):
+                 selection='cyclic', random_state=None, diag_fisher=False,
+                 copy_X=True, check_input=True, verbose=0):
         self.alpha = alpha
         self.l1_ratio = l1_ratio
         self.P1 = P1
@@ -1121,6 +1531,7 @@ def __init__(self, alpha=1.0, l1_ratio=0, P1='identity', P2='identity',
         self.start_params = start_params
         self.selection = selection
         self.random_state = random_state
+        self.diag_fisher = diag_fisher
         self.copy_X = copy_X
         self.check_input = check_input
         self.verbose = verbose
@@ -1154,11 +1565,13 @@ def fit(self, X, y, sample_weight=None):
         #######################################################################
         # 1.1 validate arguments of fit #######################################
         _dtype = [np.float64, np.float32]
-        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
-                         dtype=_dtype, y_numeric=True, multi_output=False)
+        X, y = check_X_y(X, y, accept_sparse=['csc', 'csr'],
+                         dtype=_dtype, y_numeric=True, multi_output=False,
+                         copy=self.copy_X)
         # Without converting y to float, deviance might raise
         # ValueError: Integers to negative integer powers are not allowed.
-        y = y.astype(np.float64)
+        # Also, y must not be sparse.
+        y = np.asarray(y, dtype=np.float64)
 
         weights = _check_weights(sample_weight, y.shape[0])
 
@@ -1263,7 +1676,7 @@ def fit(self, X, y, sample_weight=None):
                                  " correct length;"
                                  " got(start_params={0})".format(start_params))
         else:
-            start_params = check_array(start_params, accept_sparse='csr',
+            start_params = check_array(start_params, accept_sparse=False,
                                        force_all_finite=True, ensure_2d=False,
                                        dtype=_dtype, copy=True)
             if ((start_params.shape[0] != X.shape[1] + self.fit_intercept) or
@@ -1274,12 +1687,14 @@ def fit(self, X, y, sample_weight=None):
                                  .format(X.shape[1] + self.fit_intercept,
                                          start_params.shape[0],
                                          start_params.ndim))
-
         if self.selection not in ['cyclic', 'random']:
             raise ValueError("The argument selection must be 'cyclic' or "
                              "'random'; got (selection={0})"
                              .format(self.selection))
         random_state = check_random_state(self.random_state)
+        if not isinstance(self.diag_fisher, bool):
+            raise ValueError("The argument diag_fisher must be bool;"
+                             " got {0}".format(self.diag_fisher))
         if not isinstance(self.copy_X, bool):
             raise ValueError("The argument copy_X must be bool;"
                              " got {0}".format(self.copy_X))
@@ -1303,26 +1718,34 @@ def fit(self, X, y, sample_weight=None):
                                  "got (P1.shape[0]={0}), "
                                  "needed (X.shape[1]={1})."
                                  .format(P1.shape[0], X.shape[1]))
+        # If X is sparse, make P2 sparse, too.
         if isinstance(self.P2, str) and self.P2 == 'identity':
-            if not sparse.issparse(X):
-                P2 = np.ones(X.shape[1])
-            else:
+            if sparse.issparse(X):
                 P2 = (sparse.dia_matrix((np.ones(X.shape[1]), 0),
                       shape=(X.shape[1], X.shape[1]))).tocsr()
+            else:
+                P2 = np.ones(X.shape[1])
         else:
             P2 = check_array(self.P2, copy=True,
-                             accept_sparse=['csr', 'csc', 'coo'],
+                             accept_sparse=['csr', 'csc'],
                              dtype=_dtype, ensure_2d=False)
             if P2.ndim == 1:
+                P2 = np.asarray(P2)
                 if P2.shape[0] != X.shape[1]:
                     raise ValueError("P2 should be a 1d array of shape "
                                      "(n_features,) with "
                                      "n_features=X.shape[1]; "
                                      "got (P2.shape=({0},)), needed ({1},)"
                                      .format(P2.shape[0], X.shape[1]))
-            elif ((P2.ndim != 2) or
-                    (P2.shape[0] != P2.shape[1]) or
-                    (P2.shape[0] != X.shape[1])):
+                if sparse.issparse(X):
+                    P2 = (sparse.dia_matrix((P2, 0),
+                          shape=(X.shape[1], X.shape[1]))).tocsr()
+            elif (P2.ndim == 2 and P2.shape[0] == P2.shape[1] and
+                    P2.shape[0] == X.shape[1]):
+                if sparse.issparse(X):
+                    P2 = (sparse.dia_matrix((P2, 0),
+                          shape=(X.shape[1], X.shape[1]))).tocsr()
+            else:
                 raise ValueError("P2 must be either None or an array of shape "
                                  "(n_features, n_features) with "
                                  "n_features=X.shape[1]; "
@@ -1335,7 +1758,8 @@ def fit(self, X, y, sample_weight=None):
         if self.fit_intercept:
             # Note: intercept is first column <=> coef[0] is for intecept
             if sparse.issparse(X):
-                Xnew = sparse.hstack([np.ones([X.shape[0], 1]), X])
+                Xnew = sparse.hstack([np.ones([X.shape[0], 1]), X],
+                                     format=X.format)
             else:
                 Xnew = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
             P1 = np.concatenate((np.array([0]), P1))
@@ -1343,6 +1767,7 @@ def fit(self, X, y, sample_weight=None):
                 P2 = np.concatenate((np.array([0]), P2))
             elif sparse.issparse(P2):
                 P2 = sparse.block_diag((sparse.dia_matrix((1, 1)), P2),
+                                       format=P2.format,
                                        dtype=P2.dtype).tocsr()
             else:
                 # as of numpy 1.13 this would work:
@@ -1355,7 +1780,7 @@ def fit(self, X, y, sample_weight=None):
 
         n_samples, n_features = Xnew.shape
         l1 = self.alpha * self.l1_ratio
-        l2 = self.alpha * (1-self.l1_ratio)
+        l2 = self.alpha * (1 - self.l1_ratio)
         P1 *= l1
         P2 *= l2
         # one only ever needs the symmetrized L2 penalty matrix 1/2 (P2 + P2')
@@ -1484,315 +1909,114 @@ def fit(self, X, y, sample_weight=None):
         #######################################################################
         # algorithms for optimiation
         # TODO: Parallelize it?
-        self.n_iter_ = 0
-        converged = False
+
         # 4.1 IRLS ############################################################
-        # Solve Newton-Raphson (1): Obj'' (w - w_old) = -Obj'
-        #   Obj = objective function = 1/2 Dev + l2/2 w P2 w
-        #   Dev = deviance, s = normalized weights, variance V(mu) but phi=1
-        #   D   = link.inverse_derivative(eta) = diag_matrix(h'(X w))
-        #   D2  = link.inverse_derivative(eta)^2 = D^2
-        #   W   = D2/V(mu)
-        #   l2  = alpha * (1 - l1_ratio)
-        #   Obj' = d(Obj)/d(w) = 1/2 Dev' + l2 P2 w
-        #        = -X' D (y-mu)/V(mu) + l2 P2 w
-        #   Obj''= d2(Obj)/d(w)d(w') = Hessian = -X'(...) X + l2 P2
-        #   Use Fisher matrix instead of full info matrix -X'(...) X,
-        #    i.e. E[Dev''] with E[y-mu]=0:
-        #   Obj'' ~ X' W X + l2 P2
-        # (1): w = (X' W X + l2 P2)^-1 X' W z,
-        #      with z = eta + D^-1 (y-mu)
         # Note: we already set P2 = l2*P2, see above
         # Note: we already symmetriezed P2 = 1/2 (P2 + P2')
-        # Note: ' denotes derivative, but also transpose for matrices
         if solver == 'irls':
-            # eta = linear predictor
-            eta = safe_sparse_dot(Xnew, coef, dense_output=True)
-            mu = link.inverse(eta)
-            # D = h'(eta)
-            hp = link.inverse_derivative(eta)
-            V = family.variance(mu, phi=1, weights=weights)
-            while self.n_iter_ < self.max_iter:
-                self.n_iter_ += 1
-                # coef_old not used so far.
-                # coef_old = coef
-                # working weights W, in principle a diagonal matrix
-                # therefore here just as 1d array
-                W = (hp**2 / V)
-                # working observations
-                z = eta + (y-mu)/hp
-                # solve A*coef = b
-                # A = X' W X + P2, b = X' W z
-                coef = _irls_step(Xnew, W, P2, z)
-                # updated linear predictor
-                # do it here for updated values for tolerance
-                eta = safe_sparse_dot(Xnew, coef, dense_output=True)
-                mu = link.inverse(eta)
-                hp = link.inverse_derivative(eta)
-                V = family.variance(mu, phi=1, weights=weights)
-
-                # which tolerace? |coef - coef_old| or gradient?
-                # use gradient for compliance with newton-cg and lbfgs
-                # gradient = family._deviance_derivative(
-                #     coef=coef, X=Xnew, y=y, weights=weights, link=link)
-                # gradient = -X' D (y-mu)/V(mu) + l2 P2 w
-                gradient = -safe_sparse_dot(Xnew.T, hp*(y-mu)/V)
+            coef, self.n_iter_ = \
+                _irls_solver(coef=coef, X=Xnew, y=y, weights=weights, P2=P2,
+                             family=family, link=link, max_iter=self.max_iter,
+                             tol=self.tol)
+
+        # 4.2 L-BFGS ##########################################################
+        elif solver == 'lbfgs':
+            def func(coef, X, y, weights, P2, family, link):
+                mu, dev, devp = \
+                    family._mu_deviance_derivative(coef, X, y, weights, link)
                 if P2.ndim == 1:
-                    gradient += P2*coef
+                    L2 = P2 * coef
                 else:
-                    gradient += safe_sparse_dot(P2, coef)
-                if (np.max(np.abs(gradient)) <= self.tol):
-                    converged = True
-                    break
-
-            if not converged:
-                warnings.warn("irls failed to converge. Increase the number "
-                              "of iterations (currently {0})"
-                              .format(self.max_iter), ConvergenceWarning)
-
-        # 4.2 L-BFGS and Newton-CG ############################################
-        # TODO: performance: make one function return both deviance and
-        #       gradient of deviance
-        elif solver in ['lbfgs', 'newton-cg']:
-            def func(coef, *args):
+                    L2 = P2 @ coef
+                obj = 0.5 * dev + 0.5 * (coef @ L2)
+                objp = 0.5 * devp + L2
+                return obj, objp
+
+            args = (Xnew, y, weights, P2, family, link)
+            coef, loss, info = fmin_l_bfgs_b(
+                func, coef, fprime=None, args=args,
+                iprint=(self.verbose > 0) - 1, pgtol=self.tol,
+                maxiter=self.max_iter)
+            if self.verbose > 0:
+                if info["warnflag"] == 1:
+                    warnings.warn("lbfgs failed to converge."
+                                  " Increase the number of iterations.",
+                                  ConvergenceWarning)
+                elif info["warnflag"] == 2:
+                    warnings.warn("lbfgs failed for the reason: {0}"
+                                  .format(info["task"]))
+            self.n_iter_ = info['nit']
+
+        # 4.3 Newton-CG #######################################################
+        # We use again the fisher matrix instead of the hessian. More
+        # precisely, expected hessian of deviance.
+        elif solver == 'newton-cg':
+            def func(coef, X, y, weights, P2, family, link):
                 if P2.ndim == 1:
-                    L2 = safe_sparse_dot(coef.T, P2*coef)
+                    L2 = coef @ (P2 * coef)
                 else:
-                    L2 = safe_sparse_dot(coef.T, safe_sparse_dot(P2, coef))
-                    # A[np.diag_indices_from(A)] += P2
-                return 0.5*family._deviance(coef, *args) + 0.5*L2
+                    L2 = coef @ (P2 @ coef)
+                mu = link.inverse(X @ coef)
+                return 0.5 * family.deviance(y, mu, weights) + 0.5 * L2
 
-            def fprime(coef, *args):
+            def grad(coef, X, y, weights, P2, family, link):
                 if P2.ndim == 1:
-                    L2 = P2*coef
+                    L2 = P2 * coef
                 else:
-                    L2 = safe_sparse_dot(P2, coef)
-                return 0.5*family._deviance_derivative(coef, *args) + L2
+                    L2 = P2 @ coef
+                eta = X @ coef
+                mu = link.inverse(eta)
+                d1 = link.inverse_derivative(eta)
+                grad = X.T @ (d1 * family.deviance_derivative(y, mu, weights))
+                return 0.5 * grad + L2
 
-            def grad_hess(coef, X, y, weights, link):
+            def grad_hess(coef, X, y, weights, P2, family, link):
                 if P2.ndim == 1:
-                    L2 = P2*coef
+                    L2 = P2 * coef
                 else:
-                    L2 = safe_sparse_dot(P2, coef)
-                grad = 0.5*family._deviance_derivative(
-                    coef, X, y, weights, link) + L2
-                hessian = 0.5*family._deviance_hessian(
-                    coef, X, y, weights, link)
-                if P2.ndim == 1:
-                    hessian[np.diag_indices_from(hessian)] += P2
-                else:
-                    hessian = hessian + P2
+                    L2 = P2 @ coef
+                eta = X @ coef
+                mu = link.inverse(eta)
+                d1 = link.inverse_derivative(eta)
+                grad = 0.5 * \
+                    (X.T @ (d1 * family.deviance_derivative(y, mu, weights))) \
+                    + L2
+                # expected hessian = X.T @ diag_matrix @ X
+                # calculate only diag_matrix
+                diag = d1**2 / family.variance(mu, phi=1, weights=weights)
 
                 def Hs(s):
-                    ret = safe_sparse_dot(hessian, s)
+                    ret = 0.5 * (X.T @ (diag * (X @ s)))
+                    if P2.ndim == 1:
+                        ret += P2 * s
+                    else:
+                        ret += P2 @ s
                     return ret
+
                 return grad, Hs
 
-            args = (Xnew, y, weights, link)
-
-            if solver == 'lbfgs':
-                coef, loss, info = fmin_l_bfgs_b(
-                    func, coef, fprime=fprime, args=args,
-                    iprint=(self.verbose > 0) - 1, pgtol=self.tol,
-                    maxiter=self.max_iter)
-                if self.verbose > 0:
-                    if info["warnflag"] == 1:
-                        warnings.warn("lbfgs failed to converge."
-                                      " Increase the number of iterations.",
-                                      ConvergenceWarning)
-                    elif info["warnflag"] == 2:
-                        warnings.warn("lbfgs failed for the reason: {0}"
-                                      .format(info["task"]))
-                self.n_iter_ = info['nit']
-            elif solver == 'newton-cg':
-                coef, n_iter_i = newton_cg(grad_hess, func, fprime, coef,
-                                           args=args, maxiter=self.max_iter,
-                                           tol=self.tol)
-
-        # 4.3 coordinate descent ##############################################
-        # Reference: Guo-Xun Yuan, Chia-Hua Ho, Chih-Jen Lin
-        # An Improved GLMNET for L1-regularized Logistic Regression,
-        # Journal of Machine Learning Research 13 (2012) 1999-2030
-        # Note: Use Fisher matrix instead of Hessian for H
-        #
-        # 1. find optimal descent direction d by minimizing
-        #    min_d F(w+d) = min_d F(w+d) - F(w)
-        #    F = f + g, f(w) = 1/2 deviance, g(w) = 1/2 w*P2*w + ||P1*w||_1
-        # 2. quadrdatic approximation of F(w+d)-F(w) = q(d):
-        #    using f(w+d) = f(w) + f'(w)*d + 1/2 d*H(w)*d + O(d^3) gives
-        #    q(d) = (f'(w) + w*P2)*d + 1/2 d*(H(w)+P2)*d
-        #           + ||P1*(w+d)||_1 - ||P1*w||_1
-        #    min_d q(d)
-        # 3. coordinate descent by updating coordinate j (d -> d+z*e_j):
-        #    min_z q(d+z*e_j)
-        #    = min_z q(d+z*e_j) - q(d)
-        #    = min_z A_j z + 1/2 B_jj z^2
-        #            + ||P1_j (w_j+d_j+z)||_1 - ||P1_j (w_j+d_j)||_1
-        #    A = f'(w) + d*H(w) + (w+d)*P2
-        #    B = H+P2
-        # Note: we already set P2 = l2*P2, P1 = l1*P1, see above
+            args = (Xnew, y, weights, P2, family, link)
+            coef, n_iter_i = newton_cg(grad_hess, func, grad, coef,
+                                       args=args, maxiter=self.max_iter,
+                                       tol=self.tol)
+
+        # 4.4 coordinate descent ##############################################
+        # Note: we already set P1 = l1*P1, see above
+        # Note: we already set P2 = l2*P2, see above
         # Note: we already symmetriezed P2 = 1/2 (P2 + P2')
-        # Note: f' = -score, H = Fisher matrix
         elif solver == 'cd':
-            # line search parameters
-            (beta, sigma) = (0.5, 0.01)
-            # max inner loops (cycles through all features)
-            max_inner_iter = 1000
-            # some precalculations
-            eta, mu, score, fisher = family._eta_mu_score_fisher(
-                coef=coef, phi=1, X=Xnew, y=y, weights=weights, link=link)
-            # set up space for search direction d for inner loop
-            d = np.zeros_like(coef)
-            # initial stopping tolerance of inner loop
-            # use L1-norm of minimum-norm of subgradient of F
-            # fp_wP2 = f'(w) + w*P2
-            if P2.ndim == 1:
-                fp_wP2 = -score + coef*P2
-            else:
-                fp_wP2 = -score + safe_sparse_dot(coef, P2)
-            inner_tol = (np.where(coef == 0,
-                         np.sign(fp_wP2)*np.maximum(np.abs(fp_wP2)-P1, 0),
-                         fp_wP2+np.sign(coef)*P1))
-            inner_tol = linalg.norm(inner_tol, ord=1)
-            # outer loop
-            while self.n_iter_ < self.max_iter:
-                self.n_iter_ += 1
-                # initialize search direction d (to be optimized) with zero
-                d.fill(0)
-                # inner loop
-                # TODO: use sparsity (coefficient already 0 due to L1 penalty)
-                #       => active set of features for featurelist, see paper
-                #          of Improved GLMNET or Gap Safe Screening Rules
-                #          https://arxiv.org/abs/1611.05780
-                # A = f'(w) + d*H(w) + (w+d)*P2
-                # B = H+P2
-                # Note: f'=-score and H=fisher are updated at the end of outer
-                #       iteration
-                B = fisher
-                if P2.ndim == 1:
-                    coef_P2 = coef * P2
-                    B[np.diag_indices_from(B)] += P2
-                else:
-                    coef_P2 = safe_sparse_dot(coef, P2)
-                    B = B + P2
-                A = -score + coef_P2  # + d*(H+P2) but d=0 so far
-                inner_iter = 0
-                while inner_iter < max_inner_iter:
-                    inner_iter += 1
-                    if self.selection == 'random':
-                        featurelist = random_state.permutation(n_features)
-                    else:
-                        featurelist = np.arange(n_features)
-                    for j in featurelist:
-                        # minimize_z: a z + 1/2 b z^2 + c |d+z|
-                        # a = A_j
-                        # b = B_jj > 0
-                        # c = |P1_j| = P1_j > 0, see 1.3
-                        # d = w_j + d_j
-                        # cf. https://arxiv.org/abs/0708.1485 Eqs. (3) - (4)
-                        # with beta = z+d, beta_hat = d-a/b and gamma = c/b
-                        # z = 1/b * S(bd-a,c) - d
-                        # S(a,b) = sign(a) max(|a|-b, 0) soft thresholding
-                        a = A[j]
-                        b = B[j, j]
-                        if P1[j] == 0:
-                            if b == 0:
-                                z = 0
-                            else:
-                                z = -a/b
-                        elif a + P1[j] < b * (coef[j]+d[j]):
-                            if b == 0:
-                                z = 0
-                            else:
-                                z = -(a + P1[j])/b
-                        elif a - P1[j] > b * (coef[j]+d[j]):
-                            if b == 0:
-                                z = 0
-                            else:
-                                z = -(a - P1[j])/b
-                        else:
-                            z = -(coef[j] + d[j])
-                        # update direction d
-                        d[j] += z
-                        # update A because d_j is now d_j+z
-                        # A = f'(w) + d*H(w) + (w+d)*P2
-                        # => A += (H+P2)*e_j z  = B_j * z
-                        # Note: B is symmetric B = B.transpose
-                        if sparse.issparse(B):
-                            if sparse.isspmatrix_csc(B):
-                                # slice columns
-                                A += B[:, j].toarray().ravel() * z
-                            else:
-                                # slice rows
-                                A += B[j, :].toarray().ravel() * z
-                        else:
-                            A += B[j, :] * z
-                        # end of cycle
-                    # stopping criterion for inner loop
-                    # sum_i(|minimum-norm subgrad of q(d)_i|)
-                    mn_subgrad = (np.where(coef + d == 0,
-                                  np.sign(A)*np.maximum(np.abs(A)-P1, 0),
-                                  A+np.sign(coef+d)*P1))
-                    mn_subgrad = linalg.norm(mn_subgrad, ord=1)
-                    if mn_subgrad <= inner_tol:
-                        if inner_iter == 1:
-                            inner_tol = inner_tol/4.
-                        break
-                    # end of inner loop
-                # line search by sequence beta^k, k=0, 1, ..
-                # F(w + lambda d) - F(w) <= lambda * bound
-                # bound = sigma * (f'(w)*d + w*P2*d
-                #                  +||P1 (w+d)||_1 - ||P1 w||_1)
-                P1w_1 = linalg.norm(P1*coef, ord=1)
-                # Note: coef_P2 already calculated and still valid
-                bound = sigma * (
-                    safe_sparse_dot(-score, d) +
-                    safe_sparse_dot(coef_P2, d) +
-                    linalg.norm(P1*(coef+d), ord=1) -
-                    P1w_1)
-                Fw = (0.5 * family.deviance(y, mu, weights) +
-                      0.5 * safe_sparse_dot(coef_P2, coef) +
-                      P1w_1)
-                la = 1./beta
-                for k in range(20):
-                    la *= beta  # starts with la=1
-                    mu_wd = link.inverse(safe_sparse_dot(Xnew, coef+la*d,
-                                         dense_output=True))
-                    Fwd = (0.5 * family.deviance(y, mu_wd, weights) +
-                           linalg.norm(P1*(coef+la*d), ord=1))
-                    if P2.ndim == 1:
-                        Fwd += 0.5 * safe_sparse_dot((coef+la*d)*P2, coef+la*d)
-                    else:
-                        Fwd += 0.5 * (safe_sparse_dot(coef+la*d,
-                                      safe_sparse_dot(P2, coef+la*d)))
-                    if Fwd-Fw <= sigma*la*bound:
-                        break
-                # update coefficients
-                # coef_old = coef.copy()
-                coef += la * d
-                # calculate eta, mu, score, Fisher matrix for next iteration
-                eta, mu, score, fisher = family._eta_mu_score_fisher(
-                    coef=coef, phi=1, X=Xnew, y=y, weights=weights, link=link)
-                # stopping criterion for outer loop
-                # sum_i(|minimum-norm subgrad of F(w)_i|)
-                # fp_wP2 = f'(w) + w*P2
-                # Note: eta, mu and score are already updated
-                if P2.ndim == 1:
-                    fp_wP2 = -score + coef*P2
-                else:
-                    fp_wP2 = -score + safe_sparse_dot(coef, P2)
-                mn_subgrad = (np.where(coef == 0,
-                              np.sign(fp_wP2)*np.maximum(np.abs(fp_wP2)-P1, 0),
-                              fp_wP2+np.sign(coef)*P1))
-                mn_subgrad = linalg.norm(mn_subgrad, ord=1)
-                if mn_subgrad <= self.tol:
-                    converged = True
-                    break
-                # end of outer loop
-            if not converged:
-                warnings.warn("Coordinate descent failed to converge. Increase"
-                              " the number of iterations (currently {0})"
-                              .format(self.max_iter), ConvergenceWarning)
+            # For coordinate descent, if X is sparse, it should be csc format
+            # If X is sparse, P2 must also be csc
+            if sparse.issparse(Xnew):
+                Xnew = Xnew.tocsc(copy=self.copy_X)
+                P2 = sparse.csc_matrix(P2)
+
+            coef, self.n_iter_, self._n_cycles = \
+                _cd_solver(coef=coef, X=Xnew, y=y, weights=weights, P1=P1,
+                           P2=P2, family=family, link=link,
+                           max_iter=self.max_iter, tol=self.tol,
+                           selection=self.selection, random_state=random_state,
+                           diag_fisher=self.diag_fisher, copy_X=self.copy_X)
 
         #######################################################################
         # 5. postprocessing                                                   #
@@ -1828,8 +2052,7 @@ def linear_predictor(self, X):
         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
                         dtype='numeric', copy=True, ensure_2d=True,
                         allow_nd=False)
-        return safe_sparse_dot(X, self.coef_,
-                               dense_output=True) + self.intercept_
+        return X @ self.coef_ + self.intercept_
 
     def predict(self, X, sample_weight=None):
         """Predict uing GLM with feature matrix X.
@@ -1884,7 +2107,7 @@ def estimate_phi(self, X, y, sample_weight=None):
                          dtype=_dtype, y_numeric=True, multi_output=False)
         n_samples, n_features = X.shape
         weights = _check_weights(sample_weight, n_samples)
-        eta = safe_sparse_dot(X, self.coef_, dense_output=True)
+        eta = X @ self.coef_
         if self.fit_intercept is True:
             eta += self.intercept_
             n_features += 1
@@ -1907,19 +2130,19 @@ def estimate_phi(self, X, y, sample_weight=None):
     # "AssertionError: -0.28014056555724598 not greater than 0.5"
     # unless GeneralizedLinearRegressor has a score which passes the test.
     def score(self, X, y, sample_weight=None):
-        r"""Compute D^2, the percentage of deviance explained.
+        """Compute D^2, the percentage of deviance explained.
 
         D^2 is a generalization of the coefficient of determination R^2.
         R^2 uses squared error and D^2 deviance. Note that those two are equal
         for family='normal'.
 
         D^2 is defined as
-        :math:`D^2 = 1-\frac{D(y_{true},y_{pred})}{D_{null}}`, :math:`D_{null}`
-        is the null deviance, i.e. the deviance of a model with intercept
-        alone which corresponds to :math:`y_{pred} = \bar{y}`. The mean
-        :math:`\bar{y}` is averaged by sample_weight.
-        Best possible score is 1.0 and it can be negative (because the
-        model can be arbitrarily worse).
+        :math:`D^2 = 1-\\frac{D(y_{true},y_{pred})}{D_{null}}`,
+        :math:`D_{null}` is the null deviance, i.e. the deviance of a model
+        with intercept alone, which corresponds to :math:`y_{pred} = \\bar{y}`.
+        The mean :math:`\\bar{y}` is averaged by sample_weight.
+        Best possible score is 1.0 and it can be negative (because the model
+        can be arbitrarily worse).
 
         Parameters
         ----------
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index de0857a34fe3a..ae8a5f4cfc5e7 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -138,16 +138,18 @@ def test_sample_weights_validation():
         glm.fit(X, y, weights)
 
 
-def test_glm_family_argument():
+@pytest.mark.parametrize('f, fam',
+                         [('normal', NormalDistribution()),
+                          ('poisson', PoissonDistribution()),
+                          ('gamma', GammaDistribution()),
+                          ('inverse.gaussian', InverseGaussianDistribution()),
+                          ('binomial', BinomialDistribution())])
+def test_glm_family_argument(f, fam):
     """Test GLM family argument set as string."""
-    y = np.array([1, 2])
+    y = np.array([0.1, 0.5])  # in range of all distributions
     X = np.array([[1], [2]])
-    for (f, fam) in [('normal', NormalDistribution()),
-                     ('poisson', PoissonDistribution()),
-                     ('gamma', GammaDistribution()),
-                     ('inverse.gaussian', InverseGaussianDistribution())]:
-        glm = GeneralizedLinearRegressor(family=f, alpha=0).fit(X, y)
-        assert_equal(type(glm._family_instance), type(fam))
+    glm = GeneralizedLinearRegressor(family=f, alpha=0).fit(X, y)
+    assert_equal(type(glm._family_instance), type(fam))
 
     glm = GeneralizedLinearRegressor(family='not a family',
                                      fit_intercept=False)
@@ -155,14 +157,16 @@ def test_glm_family_argument():
         glm.fit(X, y)
 
 
-def test_glm_link_argument():
+@pytest.mark.parametrize('l, link',
+                         [('identity', IdentityLink()),
+                          ('log', LogLink()),
+                          ('logit', LogitLink())])
+def test_glm_link_argument(l, link):
     """Test GLM link argument set as string."""
-    y = np.array([1, 2])
+    y = np.array([0.1, 0.5])  # in range of all distributions
     X = np.array([[1], [2]])
-    for (l, link) in [('identity', IdentityLink()),
-                      ('log', LogLink())]:
-        glm = GeneralizedLinearRegressor(family='normal', link=l).fit(X, y)
-        assert_equal(type(glm._link_instance), type(link))
+    glm = GeneralizedLinearRegressor(family='normal', link=l).fit(X, y)
+    assert_equal(type(glm._link_instance), type(link))
 
     glm = GeneralizedLinearRegressor(family='normal', link='not a link')
     with pytest.raises(ValueError):
@@ -317,6 +321,16 @@ def test_glm_random_state_argument(random_state):
         glm.fit(X, y)
 
 
+@pytest.mark.parametrize('diag_fisher', ['not bool', 1, 0, [True]])
+def test_glm_diag_fisher_argument(diag_fisher):
+    """Test GLM for invalid diag_fisher arguments."""
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    glm = GeneralizedLinearRegressor(diag_fisher=diag_fisher)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
 @pytest.mark.parametrize('copy_X', ['not bool', 1, 0, [True]])
 def test_glm_copy_X_argument(copy_X):
     """Test GLM for invalid copy_X arguments."""
@@ -453,7 +467,12 @@ def test_normal_ridge(solver):
     assert_array_almost_equal(glm.predict(T), ridge.predict(T))
 
 
-def test_poisson_ridge():
+@pytest.mark.parametrize('solver, decimal, tol',
+                         [('irls', 7, 1e-8),
+                          ('lbfgs', 5, 1e-7),
+                          ('newton-cg', 5, 1e-7),
+                          ('cd', 7, 1e-8)])
+def test_poisson_ridge(solver, decimal, tol):
     """Test ridge regression with poisson family and LogLink.
 
     Compare to R's glmnet"""
@@ -470,22 +489,20 @@ def test_poisson_ridge():
     # b            0.03741173122
     X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
     y = np.array([0, 1, 1, 2])
-    s_dec = {'irls': 7, 'lbfgs': 5, 'newton-cg': 5, 'cd': 7}
-    s_tol = {'irls': 1e-8, 'lbfgs': 1e-7, 'newton-cg': 1e-7, 'cd': 1e-8}
-    for solver in ['irls', 'lbfgs', 'newton-cg', 'cd']:
-        glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0,
-                                         fit_intercept=True, family='poisson',
-                                         link='log', tol=s_tol[solver],
-                                         solver=solver, max_iter=300,
-                                         random_state=42)
-        glm.fit(X, y)
-        assert_almost_equal(glm.intercept_, -0.12889386979,
-                            decimal=s_dec[solver])
-        assert_array_almost_equal(glm.coef_, [0.29019207995, 0.03741173122],
-                                  decimal=s_dec[solver])
+    glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0,
+                                     fit_intercept=True, family='poisson',
+                                     link='log', tol=tol,
+                                     solver=solver, max_iter=300,
+                                     random_state=42)
+    glm.fit(X, y)
+    assert_almost_equal(glm.intercept_, -0.12889386979,
+                        decimal=decimal)
+    assert_array_almost_equal(glm.coef_, [0.29019207995, 0.03741173122],
+                              decimal=decimal)
 
 
-def test_normal_enet():
+@pytest.mark.parametrize('diag_fisher', [False, True])
+def test_normal_enet(diag_fisher):
     """Test elastic net regression with normal/gaussian family."""
     rng = np.random.RandomState(0)
     alpha, l1_ratio = 0.3, 0.7
@@ -494,12 +511,14 @@ def test_normal_enet():
     beta = rng.randn(n_features)
     y = 2 + np.dot(X, beta) + rng.randn(n_samples)
 
+    # 1. test normal enet on dense data
     glm = GeneralizedLinearRegressor(alpha=alpha, l1_ratio=l1_ratio,
                                      family='normal', link='identity',
                                      fit_intercept=True, tol=1e-8,
                                      max_iter=100, selection='cyclic',
                                      solver='cd', start_params='zero',
-                                     check_input=False)
+                                     check_input=False,
+                                     diag_fisher=diag_fisher)
     glm.fit(X, y)
 
     enet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, fit_intercept=True,
@@ -509,6 +528,12 @@ def test_normal_enet():
     assert_almost_equal(glm.intercept_, enet.intercept_, decimal=7)
     assert_array_almost_equal(glm.coef_, enet.coef_, decimal=7)
 
+    # 2. test normal enet on sparse data
+    X = sparse.csc_matrix(X)
+    glm.fit(X, y)
+    assert_almost_equal(glm.intercept_, enet.intercept_, decimal=7)
+    assert_array_almost_equal(glm.coef_, enet.coef_, decimal=7)
+
 
 def test_poisson_enet():
     """Test elastic net regression with poisson family and LogLink.

From a6f9f13db9dd11d01d1dcab93819e67d5ca18b9d Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 20 Apr 2019 15:59:39 +0200
Subject: [PATCH 050/269] Treat the intercept separately, i.e. X, P1, P2 never
 include intercept

---
 sklearn/linear_model/glm.py            | 637 +++++++++++++++----------
 sklearn/linear_model/tests/test_glm.py |   6 +-
 2 files changed, 386 insertions(+), 257 deletions(-)

diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index b2de866a4b69d..2afd1ddf8c79c 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -7,8 +7,6 @@
 # License: BSD 3 clause
 
 # TODO: Write more examples.
-# TODO: Make option self.copy_X more meaningful.
-#       So far, fit uses Xnew instead of X.
 # TODO: Should the option `normalize` be included (like other linear models)?
 #       So far, it is not included. User must pass a normalized X.
 # TODO: Add cross validation support?
@@ -39,6 +37,7 @@
 #   sklearn.linear_models uses w for coefficients, standard literature on
 #   GLMs use beta for coefficients and w for (sample) weights.
 #   So far, coefficients=w and sample weights=s.
+# - The intercept term is the first index, i.e. coef[0]
 
 
 from __future__ import division
@@ -86,6 +85,83 @@ def _check_weights(sample_weight, n_samples):
     return weights
 
 
+def _safe_lin_pred(X, coef):
+    """Compute the linear predictor taking care if intercept is present."""
+    if coef.size == X.shape[1] + 1:
+        return X @ coef[1:] + coef[0]
+    else:
+        return X @ coef
+
+
+def _safe_sandwich_dot(X, d, intercept=False):
+    """Compute sandwich product X.T @ diag(d) @ X.
+
+    With ``intercept=True``, X is treated as if a column of 1 were appended as
+    first column of X.
+    X can be sparse, d must be an ndarray. Always returns a ndarray."""
+    if sparse.issparse(X):
+        temp = (X.transpose().multiply(d) @ X).toarray()
+    else:
+        temp = (X.T * d) @ X
+    if intercept:
+        dim = X.shape[1] + 1
+        if sparse.issparse(X):
+            order = 'F' if sparse.isspmatrix_csc(X) else 'C'
+        else:
+            order = 'F' if X.flags['F_CONTIGUOUS'] else 'C'
+        res = np.empty((dim, dim), dtype=max(X.dtype, d.dtype), order=order)
+        res[0, 0] = d.sum()
+        res[1:, 0] = d @ X
+        res[0, 1:] = res[1:, 0]
+        res[1:, 1:] = temp
+    else:
+        res = temp
+    return res
+
+
+def _min_norm_sugrad(coef, grad, P2, P1):
+    """Compute the gradient of all subgradients with minimal L2-norm.
+
+    subgrad = grad + P2 * coef + P1 * subgrad(|coef|_1)
+
+    g_i = grad_i + (P2*coef)_i
+
+    if coef_i > 0:   g_i + P1_i
+    if coef_i < 0:   g_i - P1_i
+    if coef_i = 0:   sign(g_i) * max(|g_i|-P1_i, 0)
+
+    Parameters
+    ----------
+    coef : ndarray
+        coef[0] may be intercept.
+
+    grad : ndarray, shape=coef.shape
+
+    P2 : {1d or 2d array, None}
+        always without intercept, ``None`` means P2 = 0
+
+    P1 : ndarray
+        always without intercept
+    """
+    intercept = (coef.size == P1.size + 1)
+    idx = 1 if intercept else 0  # offset if coef[0] is intercept
+    # compute grad + coef @ P2 without intercept
+    grad_wP2 = grad[idx:].copy()
+    if P2 is None:
+        pass
+    elif P2.ndim == 1:
+        grad_wP2 += coef[idx:] * P2
+    else:
+        grad_wP2 += coef[idx:] @ P2
+    res = np.where(coef[idx:] == 0,
+                   np.sign(grad_wP2) * np.maximum(np.abs(grad_wP2) - P1, 0),
+                   grad_wP2 + np.sign(coef[idx:]) * P1)
+    if intercept:
+        return np.concatenate(([grad[0]], res))
+    else:
+        return res
+
+
 class Link(metaclass=ABCMeta):
     """Abstract base class for Link funtions."""
 
@@ -473,13 +549,16 @@ def starting_mu(self, y, weights=1, ind_weight=0.5):
                 (1. - ind_weight) * np.average(y, weights=weights))
 
     def _mu_deviance_derivative(self, coef, X, y, weights, link):
-        """Compute mu, the deviance and it's derivative w.r.t coef."""
-        lin_pred = X @ coef
+        """Compute mu and the derivative of the deviance w.r.t coef."""
+        lin_pred = _safe_lin_pred(X, coef)
         mu = link.inverse(lin_pred)
-        dev = self.deviance(y, mu, weights)
         d1 = link.inverse_derivative(lin_pred)
-        devp = X.T @ (d1 * self.deviance_derivative(y, mu, weights))
-        return mu, dev, devp
+        temp = d1 * self.deviance_derivative(y, mu, weights)
+        if coef.size == X.shape[1] + 1:
+            devp = np.concatenate(([temp.sum()], temp @ X))
+        else:
+            devp = temp @ X  # sampe as X.T @ temp
+        return mu, devp
 
     def _score(self, coef, phi, X, y, weights, link):
         r"""Compute the score function.
@@ -499,12 +578,15 @@ def _score(self, coef, phi, X, y, weights, link):
         :math:`\boldsymbol{\Sigma}=\mathrm{diag}(\mathbf{V}[y_1],\ldots)`.
         Note: The derivative of the deviance w.r.t. coef equals -2 * score.
         """
-        lin_pred = X @ coef
+        lin_pred = _safe_lin_pred(X, coef)
         mu = link.inverse(lin_pred)
         sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
         d = link.inverse_derivative(lin_pred)
         temp = sigma_inv * d * (y - mu)
-        score = X.T @ temp
+        if coef.size == X.shape[1] + 1:
+            score = np.concatenate(([temp.sum()], temp @ X))
+        else:
+            score = temp @ X  # sampe as X.T @ temp
         return score
 
     def _fisher_matrix(self, coef, phi, X, y, weights, link):
@@ -526,14 +608,14 @@ def _fisher_matrix(self, coef, phi, X, y, weights, link):
         with :math:`\mathbf{W} = \mathbf{D}^2 \boldsymbol{\Sigma}^{-1}`,
         see func:`_score`.
         """
-        n_samples = X.shape[0]
-        lin_pred = X @ coef
+        lin_pred = _safe_lin_pred(X, coef)
         mu = link.inverse(lin_pred)
         sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
-        d2 = link.inverse_derivative(lin_pred)**2
-        d2_sigma_inv = sparse.dia_matrix((sigma_inv*d2, 0),
-                                         shape=(n_samples, n_samples))
-        fisher_matrix = X.T @ d2_sigma_inv @ X
+        d = link.inverse_derivative(lin_pred)
+        d2_sigma_inv = sigma_inv * d * d
+        intercept = (coef.size == X.shape[1] + 1)
+        fisher_matrix = _safe_sandwich_dot(X, d2_sigma_inv,
+                                           intercept=intercept)
         return fisher_matrix
 
     def _observed_information(self, coef, phi, X, y, weights, link):
@@ -559,17 +641,17 @@ def _observed_information(self, coef, phi, X, y, weights, link):
         \right)`,
         see :func:`score_` function and :func:`_fisher_matrix`.
         """
-        n_samples = X.shape[0]
-        lin_pred = X @ coef
+        lin_pred = _safe_lin_pred(X, coef)
         mu = link.inverse(lin_pred)
         sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
         dp = link.inverse_derivative2(lin_pred)
         d2 = link.inverse_derivative(lin_pred)**2
         v = self.unit_variance_derivative(mu)/self.unit_variance(mu)
         r = y - mu
-        temp = sparse.dia_matrix((sigma_inv*(-dp*r+d2*v*r+d2), 0),
-                                 shape=(n_samples, n_samples))
-        observed_information = X.T @ temp @ X
+        temp = sigma_inv * (-dp * r + d2 * v * r + d2)
+        intercept = (coef.size == X.shape[1] + 1)
+        observed_information = _safe_sandwich_dot(X, temp,
+                                                  intercept=intercept)
         return observed_information
 
     def _eta_mu_score_fisher(self, coef, phi, X, y, weights, link,
@@ -600,28 +682,29 @@ def _eta_mu_score_fisher(self, coef, phi, X, y, weights, link,
                   an array of shape (X.shape[1], X.shape[1])
                 * If diag_fisher is ``True`, an array of shape (X.shape[0])
         """
-        n_samples, n_features = X.shape
+        intercept = (coef.size == X.shape[1] + 1)
         # eta = linear predictor
-        eta = X @ coef
+        eta = _safe_lin_pred(X, coef)
         mu = link.inverse(eta)
         sigma_inv = 1./self.variance(mu, phi=phi, weights=weights)
         d1 = link.inverse_derivative(eta)  # = h'(eta)
         # Alternatively:
         # h'(eta) = h'(g(mu)) = 1/g'(mu), note that h is inverse of g
         # d1 = 1./link.derivative(mu)
-        score = X.T @ (sigma_inv * d1 * (y - mu))
-        #
-        d2_sigma_inv = sigma_inv * (d1**2)
+        d1_sigma_inv = d1 * sigma_inv
+        temp = d1_sigma_inv * (y - mu)
+        if intercept:
+            score = np.concatenate(([temp.sum()], temp @ X))
+        else:
+            score = temp @ X
+
+        d2_sigma_inv = d1 * d1_sigma_inv
         if diag_fisher:
-            return eta, mu, score, d2_sigma_inv
+            fisher_matrix = d2_sigma_inv
         else:
-            if sparse.issparse(X):
-                d2_sigma_inv = sparse.dia_matrix((d2_sigma_inv, 0),
-                                                 shape=(n_samples, n_samples))
-                fisher = (X.T @ d2_sigma_inv @ X).toarray()
-            else:
-                fisher = (X.T * d2_sigma_inv) @ X
-            return eta, mu, score, fisher
+            fisher_matrix = _safe_sandwich_dot(X, d2_sigma_inv,
+                                               intercept=intercept)
+        return eta, mu, score, fisher_matrix
 
 
 class TweedieDistribution(ExponentialDispersionModel):
@@ -809,7 +892,7 @@ def unit_deviance(self, y, mu):
         return 2 * (special.xlogy(y, y/mu) + special.xlogy(1-y, (1-y)/(1-mu)))
 
 
-def _irls_step(X, W, P2, z):
+def _irls_step(X, W, P2, z, fit_intercept=True):
     """Compute one step in iteratively reweighted least squares.
 
     Solve A w = b for w with
@@ -829,43 +912,57 @@ def _irls_step(X, W, P2, z):
     P2 : {ndarray, sparse matrix}, shape (n_features, n_features)
         The L2-penalty matrix or vector (=diagonal matrix)
 
-    z  : ndarray, shape (n_samples,)
+    z : ndarray, shape (n_samples,)
         Working observations
 
+    fit_intercept : boolean, optional (default=True)
+
     Returns
     -------
-    coef: ndarray, shape (X.shape[1])
+    coef : ndarray, shape (c,)
+        If fit_intercept=False, shape c=X.shape[1].
+        If fit_intercept=True, then c=X.shapee[1] + 1.
     """
     # Note: solve vs least squares, what is more appropriate?
     #       scipy.linalg.solve seems faster, but scipy.linalg.lstsq
     #       is more robust.
-    n_samples, n_features = X.shape
-    if sparse.issparse(X):
-        W = sparse.dia_matrix((W, 0), shape=(n_samples, n_samples)).tocsr()
+    # Note: X.T @ W @ X is not sparse, even when X is sparse.
+    #      Sparse solver would splinalg.spsolve(A, b) or splinalg.lsmr(A, b)
+    if fit_intercept:
+        Wz = W * z
+        if sparse.issparse(X):
+            b = np.concatenate(([Wz.sum()], X.transpose() @ Wz))
+        else:
+            b = np.concatenate(([Wz.sum()], X.T @ Wz))
+        A = _safe_sandwich_dot(X, W, intercept=fit_intercept)
         if P2.ndim == 1:
-            L2 = (sparse.dia_matrix((P2, 0), shape=(n_features, n_features))
-                  ).tocsr()
+            idx = np.arange(start=1, stop=A.shape[0])
+            A[(idx, idx)] += P2  # add to diag elements without intercept
+        elif sparse.issparse(P2):
+            A[1:, 1:] += P2.toarray()
         else:
-            L2 = sparse.csr_matrix(P2)
-        XtW = X.transpose() * W
-        A = XtW * X + L2
-        b = XtW * z
-        # coef = splinalg.spsolve(A, b)
-        coef, *_ = splinalg.lsmr(A, b)
+            A[1:, 1:] += P2
     else:
-        XtW = (X.T * W)
-        A = XtW.dot(X)
+        if sparse.issparse(X):
+            XtW = X.transpose().multiply(W)
+            A = (XtW @ X).toarray()
+        else:
+            XtW = (X.T * W)
+            A = XtW @ X
+        b = XtW @ z
         if P2.ndim == 1:
             A[np.diag_indices_from(A)] += P2
+        elif sparse.issparse(P2):
+            A += P2.toarray()
         else:
             A += P2
-        b = XtW.dot(z)
-        # coef = linalg.solve(A, b, overwrite_a=True, overwrite_b=True)
-        coef, *_ = linalg.lstsq(A, b, overwrite_a=True, overwrite_b=True)
+    # coef = linalg.solve(A, b, overwrite_a=True, overwrite_b=True)
+    coef, *_ = linalg.lstsq(A, b, overwrite_a=True, overwrite_b=True)
     return coef
 
 
-def _irls_solver(coef, X, y, weights, P2, family, link, max_iter, tol):
+def _irls_solver(coef, X, y, weights, P2, fit_intercept, family, link,
+                 max_iter, tol):
     """Solve GLM with L2 penalty by IRLS algorithm.
 
     Note: If X is sparse, P2 must also be sparse.
@@ -889,7 +986,7 @@ def _irls_solver(coef, X, y, weights, P2, family, link, max_iter, tol):
     # Note: ' denotes derivative, but also transpose for matrices
 
     # eta = linear predictor
-    eta = X @ coef
+    eta = _safe_lin_pred(X, coef)
     mu = link.inverse(eta)
     # D = h'(eta)
     hp = link.inverse_derivative(eta)
@@ -906,10 +1003,10 @@ def _irls_solver(coef, X, y, weights, P2, family, link, max_iter, tol):
         z = eta + (y - mu) / hp
         # solve A*coef = b
         # A = X' W X + P2, b = X' W z
-        coef = _irls_step(X, W, P2, z)
+        coef = _irls_step(X, W, P2, z, fit_intercept=fit_intercept)
         # updated linear predictor
         # do it here for updated values for tolerance
-        eta = X @ coef
+        eta = _safe_lin_pred(X, coef)
         mu = link.inverse(eta)
         hp = link.inverse_derivative(eta)
         V = family.variance(mu, phi=1, weights=weights)
@@ -917,11 +1014,18 @@ def _irls_solver(coef, X, y, weights, P2, family, link, max_iter, tol):
         # which tolerace? |coef - coef_old| or gradient?
         # use gradient for compliance with newton-cg and lbfgs
         # gradient = -X' D (y-mu)/V(mu) + l2 P2 w
-        gradient = -(X.T @ (hp*(y-mu)/V))
+        temp = hp * (y - mu) / V
+        if sparse.issparse(X):
+            gradient = -(X.transpose() @ temp)
+        else:
+            gradient = -(X.T @ temp)
+        idx = 1 if fit_intercept else 0  # offset if coef[0] is intercept
         if P2.ndim == 1:
-            gradient += P2*coef
+            gradient += P2 * coef[idx:]
         else:
-            gradient += P2 @ coef
+            gradient += P2 @ coef[idx:]
+        if fit_intercept:
+            gradient = np.concatenate(([-temp.sum()], gradient))
         if (np.max(np.abs(gradient)) <= tol):
             converged = True
             break
@@ -937,7 +1041,7 @@ def _irls_solver(coef, X, y, weights, P2, family, link, max_iter, tol):
 def _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles, inner_tol,
               max_inner_iter=1000, selection='cyclic',
               random_state=None, diag_fisher=False):
-    """Compute inner loop of coordinate descent = cycles through features.
+    """Compute inner loop of coordinate descent, i.e. cycles through features.
 
     Minimization of 1-d subproblems::
 
@@ -953,24 +1057,31 @@ def _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles, inner_tol,
     #          of Improved GLMNET or Gap Safe Screening Rules
     #          https://arxiv.org/abs/1611.05780
     n_samples, n_features = X.shape
+    intercept = (coef.size == X.shape[1] + 1)
+    idx = 1 if intercept else 0  # offset if coef[0] is intercept
     B = fisher
     if P2.ndim == 1:
-        coef_P2 = coef * P2
+        coef_P2 = coef[idx:] * P2
         if not diag_fisher:
-            B[np.diag_indices_from(B)] += P2
+            idiag = np.arange(start=idx, stop=B.shape[0])
+            # B[np.diag_indices_from(B)] += P2
+            B[(idiag, idiag)] += P2
     else:
-        coef_P2 = P2 @ coef  # P2 is symmetric, mat @ vec is usually faster
+        coef_P2 = coef[idx:] @ P2
         if not diag_fisher:
             if sparse.issparse(P2):
-                B += P2.toarray()
+                B[idx:, idx:] += P2.toarray()
             else:
-                B += P2
-    A = -score + coef_P2  # + d @ (H+P2) but d=0 so far
+                B[idx:, idx:] += P2
+    # A = -score + coef_P2
+    A = -score
+    A[idx:] += coef_P2
+    # A += d @ (H+P2) but so far d=0
     # inner loop
-    inner_iter = 0
-    while inner_iter < max_inner_iter:
+    for inner_iter in range(1, max_inner_iter+1):
         inner_iter += 1
         n_cycles += 1
+        # cycle through features, update intercept separately at the end
         if selection == 'random':
             featurelist = random_state.permutation(n_features)
         else:
@@ -985,70 +1096,85 @@ def _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles, inner_tol,
             # with beta = z+d, beta_hat = d-a/b and gamma = c/b
             # z = 1/b * S(bd-a,c) - d
             # S(a,b) = sign(a) max(|a|-b, 0) soft thresholding
-            a = A[j]
+            jdx = j+idx  # index for arrays containing entries for intercept
+            a = A[jdx]
             if diag_fisher:
+                # Note: fisher is ndarray of shape (n_samples,) => no idx
+                # Calculate Bj = B[j, :] = B[:, j] as it is needed later anyway
+                Bj = np.zeros_like(A)
+                if intercept:
+                    Bj[0] = fisher.sum()
                 if sparse.issparse(X):
-                    xj = X[:, j]
-                    b = xj.transpose() @ xj.multiply(fisher[:, np.newaxis])
-                    b = b[0, 0]
+                    Bj[idx:] = (X[:, j].transpose().multiply(fisher) @ X
+                                ).toarray().ravel()
                 else:
-                    b = X[:, j] @ (fisher * X[:, j])
+                    Bj[idx:] = (fisher * X[:, j]) @ X
 
                 if P2.ndim == 1:
-                    b += P2[j]
+                    Bj[idx:] += P2[j]
                 else:
-                    b += P2[j, j]
+                    if sparse.issparse(P2):
+                        # slice columns as P2 is csc
+                        Bj[idx:] += P2[:, j].toarray().ravel()
+                    else:
+                        Bj[idx:] += P2[:, j]
+                b = Bj[jdx]
             else:
-                b = B[j, j]
+                b = B[jdx, jdx]
 
+            # those ten lines aree what it is all about
             if b <= 0:
                 z = 0
             elif P1[j] == 0:
                 z = -a/b
-            elif a + P1[j] < b * (coef[j] + d[j]):
+            elif a + P1[j] < b * (coef[jdx] + d[jdx]):
                 z = -(a + P1[j])/b
-            elif a - P1[j] > b * (coef[j] + d[j]):
+            elif a - P1[j] > b * (coef[jdx] + d[jdx]):
                 z = -(a - P1[j])/b
             else:
-                z = -(coef[j] + d[j])
+                z = -(coef[jdx] + d[jdx])
 
             # update direction d
-            d[j] += z
+            d[jdx] += z
             # update A because d_j is now d_j+z
             # A = f'(w) + d*H(w) + (w+d)*P2
             # => A += (H+P2)*e_j z = B_j * z
             # Note: B is symmetric B = B.transpose
             if diag_fisher:
-                if sparse.issparse(X):
-                    A += (X.transpose() @
-                          X[:, j].multiply(fisher[:, np.newaxis])
-                          ).toarray().ravel() * z
-                else:
-                    # A += (X.T @ (fisher * X[:, j])) * z
-                    # same without transpose of X
-                    A += ((fisher * X[:, j]) @ X) * z
-
-                if P2.ndim == 1:
-                    A[j] += P2[j] * z
-                elif sparse.issparse(P2):
-                    # slice columns as P2 is csc
-                    A += P2[:, j].toarray().ravel() * z
-                else:
-                    A += P2[:, j] * z
+                # Bj = B[:, j] calculated above, still valid
+                A += Bj * z
             else:
                 # B is symmetric, C- or F-contiguous, but never sparse
                 if B.flags['F_CONTIGUOUS']:
                     # slice columns like for sparse csc
-                    A += B[:, j] * z
+                    A += B[:, jdx] * z
                 else:  # B.flags['C_CONTIGUOUS'] might be true
                     # slice rows
-                    A += B[j, :] * z
-            # end of cycle
+                    A += B[jdx, :] * z
+            # end of cycle over features
+        # update intercept
+        if intercept:
+            if diag_fisher:
+                Bj = np.zeros_like(A)
+                Bj[0] = fisher.sum()
+                Bj[1:] = fisher @ X
+                b = Bj[0]
+            else:
+                b = B[0, 0]
+            z = 0 if b <= 0 else -A[0]/b
+            d[0] += z
+            if diag_fisher:
+                A += Bj * z
+            else:
+                if B.flags['F_CONTIGUOUS']:
+                    A += B[:, 0] * z
+                else:
+                    A += B[0, :] * z
+        # end of complete cycle
         # stopping criterion for inner loop
         # sum_i(|minimum of norm of subgrad of q(d)_i|)
-        mn_subgrad = np.where(coef + d == 0,
-                              np.sign(A) * np.maximum(np.abs(A) - P1, 0),
-                              A + np.sign(coef + d) * P1)
+        # subgrad q(d) = A + subgrad ||P1*(w+d)||_1
+        mn_subgrad = _min_norm_sugrad(coef=coef + d, grad=A, P2=None, P1=P1)
         mn_subgrad = linalg.norm(mn_subgrad, ord=1)
         if mn_subgrad <= inner_tol:
             if inner_iter == 1:
@@ -1058,7 +1184,7 @@ def _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles, inner_tol,
     return d, coef_P2, n_cycles, inner_tol
 
 
-def _cd_solver(coef, X, y, weights, P1, P2, family, link,
+def _cd_solver(coef, X, y, weights, P1, P2, fit_intercept, family, link,
                max_iter=100, max_inner_iter=1000, tol=1e-4,
                selection='cyclic ', random_state=None,
                diag_fisher=False, copy_X=True):
@@ -1083,7 +1209,7 @@ def _cd_solver(coef, X, y, weights, P1, P2, family, link,
        = min_z A_j z + 1/2 B_jj z^2
                + ||P1_j (w_j+d_j+z)||_1 - ||P1_j (w_j+d_j)||_1
        A = f'(w) + d*H(w) + (w+d)*P2
-       B = H+P2
+       B = H + P2
 
     Repeat steps 1-3 until convergence.
     Note: Use Fisher matrix instead of Hessian for H.
@@ -1091,7 +1217,9 @@ def _cd_solver(coef, X, y, weights, P1, P2, family, link,
 
     Parameters
     ----------
-    coef: ndarray, shape (n_features,)
+    coef : ndarray, shape (c,)
+        If fit_intercept=False, shape c=X.shape[1].
+        If fit_intercept=True, then c=X.shapee[1] + 1.
 
     X : {ndarray, csc sparse matrix}, shape (n_samples, n_features)
         Training data (with intercept included if present). If not sparse,
@@ -1112,6 +1240,10 @@ def _cd_solver(coef, X, y, weights, P1, P2, family, link,
         The L2-penalty matrix or vector (=diagonal matrix). If a matrix is
         passed, it must be symmetric. If X is sparse, P2 must also be sparse.
 
+    fit_intercept : boolean, optional (default=True)
+        Specifies if a constant (a.k.a. bias or intercept) should be
+        added to the linear predictor (X*coef+intercept).
+
     family : ExponentialDispersionModel
 
     link : Link
@@ -1120,8 +1252,8 @@ def _cd_solver(coef, X, y, weights, P1, P2, family, link,
         Maximum numer of outer (Newton) iterations.
 
     max_inner_iter : int, optional (default=1000)
-        Maximum number of iterations, i.e. cycles over all features, in inner
-        loop.
+        Maximum number of iterations in each inner loop, i.e. max number of
+        cycles over all features per inner loop.
 
     tol : float, optional (default=1e-4)
         Covergence criterion is
@@ -1133,8 +1265,8 @@ def _cd_solver(coef, X, y, weights, P1, P2, family, link,
     random_state : {int, RandomState instance, None}, optional (default=None)
 
     diag_fisher : boolean, optional (default=False)
-        'False' calculates full fisher matrix, 'True' only diagonal matrix s.t.
-        fisher = X.T @ diag @ X. This saves storage but needs more
+        ``False`` calculates full fisher matrix, ``True`` only diagonal matrix
+        s.t. fisher = X.T @ diag @ X. This saves storage but needs more
         matrix-vector multiplications.
 
     copy_X : boolean, optional (default=True)
@@ -1142,7 +1274,9 @@ def _cd_solver(coef, X, y, weights, P1, P2, family, link,
 
     Returns
     -------
-    coef : ndarray, shape (n_features,)
+    coef : ndarray, shape (c,)
+        If fit_intercept=False, shape c=X.shape[1].
+        If fit_intercept=True, then c=X.shapee[1] + 1.
 
     n_iter : numer of outer iterations = newton iterations
 
@@ -1174,6 +1308,7 @@ def _cd_solver(coef, X, y, weights, P1, P2, family, link,
     n_cycles = 0  # number of (complete) cycles over features
     converged = False
     n_samples, n_features = X.shape
+    idx = 1 if fit_intercept else 0  # offset if coef[0] is intercept
     # line search parameters
     (beta, sigma) = (0.5, 0.01)
     # some precalculations
@@ -1186,16 +1321,7 @@ def _cd_solver(coef, X, y, weights, P1, P2, family, link,
     d = np.zeros_like(coef)
     # initial stopping tolerance of inner loop
     # use L1-norm of minimum of norm of subgradient of F
-    # fp_wP2 = f'(w) + w*P2
-    if P2.ndim == 1:
-        fp_wP2 = -score + coef * P2
-    else:
-        # Note: P2 is symmetric and matrix @ vector is faster for sparse
-        #       matrices.
-        fp_wP2 = -score + P2 @ coef
-    inner_tol = np.where(coef == 0,
-                         np.sign(fp_wP2) * np.maximum(np.abs(fp_wP2) - P1, 0),
-                         fp_wP2 + np.sign(coef) * P1)
+    inner_tol = _min_norm_sugrad(coef=coef, grad=-score, P2=P2, P1=P1)
     inner_tol = linalg.norm(inner_tol, ord=1)
     # outer loop
     while n_iter < max_iter:
@@ -1211,23 +1337,23 @@ def _cd_solver(coef, X, y, weights, P1, P2, family, link,
         # F(w + lambda d) - F(w) <= lambda * bound
         # bound = sigma * (f'(w)*d + w*P2*d
         #                  +||P1 (w+d)||_1 - ||P1 w||_1)
-        P1w_1 = linalg.norm(P1 * coef, ord=1)
+        P1w_1 = linalg.norm(P1 * coef[idx:], ord=1)
+        P1wd_1 = linalg.norm(P1 * (coef + d)[idx:], ord=1)
         # Note: coef_P2 already calculated and still valid
-        bound = sigma * (-(score @ d) + coef_P2 @ d +
-                         linalg.norm(P1 * (coef + d), ord=1) - P1w_1)
+        bound = sigma * (-(score @ d) + coef_P2 @ d[idx:] + P1wd_1 - P1w_1)
         Fw = (0.5 * family.deviance(y, mu, weights) +
-              0.5 * (coef_P2 @ coef) + P1w_1)
+              0.5 * (coef_P2 @ coef[idx:]) + P1w_1)
         la = 1./beta
         for k in range(20):
             la *= beta  # starts with la=1
             coef_wd = coef + la * d
-            mu_wd = link.inverse(X @ coef_wd)
+            mu_wd = link.inverse(_safe_lin_pred(X, coef_wd))
             Fwd = (0.5 * family.deviance(y, mu_wd, weights) +
-                   linalg.norm(P1 * coef_wd, ord=1))
+                   linalg.norm(P1 * coef_wd[idx:], ord=1))
             if P2.ndim == 1:
-                Fwd += 0.5 * ((coef_wd * P2) @ coef_wd)
+                Fwd += 0.5 * ((coef_wd[idx:] * P2) @ coef_wd[idx:])
             else:
-                Fwd += 0.5 * (coef_wd @ (P2 @ coef_wd))
+                Fwd += 0.5 * (coef_wd[idx:] @ (P2 @ coef_wd[idx:]))
             if Fwd - Fw <= sigma * la * bound:
                 break
         # update coefficients
@@ -1238,16 +1364,10 @@ def _cd_solver(coef, X, y, weights, P1, P2, family, link,
             coef=coef, phi=1, X=X, y=y, weights=weights, link=link,
             diag_fisher=diag_fisher)
         # stopping criterion for outer loop
-        # sum_i(|minimum of norm of subgrad of F(w)_i|)
+        # sum_i(|minimum-norm of subgrad of F(w)_i|)
         # fp_wP2 = f'(w) + w*P2
         # Note: eta, mu and score are already updated
-        if P2.ndim == 1:
-            fp_wP2 = -score + coef * P2
-        else:
-            fp_wP2 = -score + P2 @ coef  # P2 is symmetric, mat @ vec is faster
-        mn_subgrad = np.where(coef == 0,
-                              np.sign(fp_wP2)*np.maximum(np.abs(fp_wP2)-P1, 0),
-                              fp_wP2 + np.sign(coef) * P1)
+        mn_subgrad = _min_norm_sugrad(coef=coef, grad=-score, P2=P2, P1=P1)
         mn_subgrad = linalg.norm(mn_subgrad, ord=1)
         if mn_subgrad <= tol:
             converged = True
@@ -1255,8 +1375,8 @@ def _cd_solver(coef, X, y, weights, P1, P2, family, link,
         # end of outer loop
     if not converged:
         warnings.warn("Coordinate descent failed to converge. Increase"
-                      " the number of iterations (currently {0})"
-                      .format(max_iter), ConvergenceWarning)
+                      " the maximum number of iterations max_iter"
+                      " (currently {0})".format(max_iter), ConvergenceWarning)
 
     return coef, n_iter, n_cycles
 
@@ -1387,8 +1507,9 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
         where g_i is the i-th component of the gradient (derivative) of
         the objective function. For the cd solver, covergence is reached
-        when ``sum_i(|minimum of norm of g_i|)``, where g_i is the
-        subgradient of the objective.
+        when ``sum_i(|minimum-norm of g_i|)``, where g_i is the
+        subgradient of the objective and minimum-norm of g_i is the element of
+        the subgradient g_i with the smallest L2-norm.
 
     warm_start : boolean, optional (default=False)
         If set to ``True``, reuse the solution of the previous call to ``fit``
@@ -1563,19 +1684,7 @@ def fit(self, X, y, sample_weight=None):
         #######################################################################
         # 1. input validation                                                 #
         #######################################################################
-        # 1.1 validate arguments of fit #######################################
-        _dtype = [np.float64, np.float32]
-        X, y = check_X_y(X, y, accept_sparse=['csc', 'csr'],
-                         dtype=_dtype, y_numeric=True, multi_output=False,
-                         copy=self.copy_X)
-        # Without converting y to float, deviance might raise
-        # ValueError: Integers to negative integer powers are not allowed.
-        # Also, y must not be sparse.
-        y = np.asarray(y, dtype=np.float64)
-
-        weights = _check_weights(sample_weight, y.shape[0])
-
-        # 1.2 validate arguments of __init__ ##################################
+        # 1.1 validate arguments of __init__ ##################################
         # Guarantee that self._family_instance is an instance of class
         # ExponentialDispersionModel
         if isinstance(self.family, ExponentialDispersionModel):
@@ -1668,25 +1777,6 @@ def fit(self, X, y, sample_weight=None):
         if not isinstance(self.warm_start, bool):
             raise ValueError("The argument warm_start must be bool;"
                              " got {0}".format(self.warm_start))
-        start_params = self.start_params
-        if isinstance(start_params, str):
-            if start_params not in ['irls', 'least_squares', 'zero']:
-                raise ValueError("The argument start_params must be 'irls', "
-                                 "'least-squares', 'zero' or an array of "
-                                 " correct length;"
-                                 " got(start_params={0})".format(start_params))
-        else:
-            start_params = check_array(start_params, accept_sparse=False,
-                                       force_all_finite=True, ensure_2d=False,
-                                       dtype=_dtype, copy=True)
-            if ((start_params.shape[0] != X.shape[1] + self.fit_intercept) or
-                    (start_params.ndim != 1)):
-                raise ValueError("Start values for parameters must have the"
-                                 "right length and dimension; required (length"
-                                 "={0}, ndim=1); got (length={1}, ndim={2})."
-                                 .format(X.shape[1] + self.fit_intercept,
-                                         start_params.shape[0],
-                                         start_params.ndim))
         if self.selection not in ['cyclic', 'random']:
             raise ValueError("The argument selection must be 'cyclic' or "
                              "'random'; got (selection={0})"
@@ -1702,36 +1792,59 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("The argument check_input must be bool; got "
                              "(check_input={0})".format(self.check_input))
 
+        family = self._family_instance
+        link = self._link_instance
+
+        # 1.2 validate arguments of fit #######################################
+        _dtype = [np.float64, np.float32]
+        if solver == 'cd':
+            _stype = ['csc']
+        else:
+            _stype = ['csc', 'csr']
+        X, y = check_X_y(X, y, accept_sparse=_stype,
+                         dtype=_dtype, y_numeric=True, multi_output=False,
+                         copy=self.copy_X)
+        # Without converting y to float, deviance might raise
+        # ValueError: Integers to negative integer powers are not allowed.
+        # Also, y must not be sparse.
+        y = np.asarray(y, dtype=np.float64)
+
+        weights = _check_weights(sample_weight, y.shape[0])
+
+        n_samples, n_features = X.shape
+
+        # 1.3 arguments to take special care ##################################
+        # P1, P2, start_params
         if isinstance(self.P1, str) and self.P1 == 'identity':
-            P1 = np.ones(X.shape[1])
+            P1 = np.ones(n_features)
         else:
             P1 = np.atleast_1d(self.P1)
             try:
-                P1 = P1.astype(np.float64, casting='safe', copy=True)
+                P1 = P1.astype(np.float64, casting='safe', copy=False)
             except TypeError:
                 raise TypeError("The given P1 cannot be converted to a numeric"
                                 "array; got (P1.dtype={0})."
                                 .format(P1.dtype))
-            if (P1.ndim != 1) or (P1.shape[0] != X.shape[1]):
+            if (P1.ndim != 1) or (P1.shape[0] != n_features):
                 raise ValueError("P1 must be either 'identity' or a 1d array "
                                  "with the length of X.shape[1]; "
                                  "got (P1.shape[0]={0}), "
                                  "needed (X.shape[1]={1})."
-                                 .format(P1.shape[0], X.shape[1]))
+                                 .format(P1.shape[0], n_features))
         # If X is sparse, make P2 sparse, too.
         if isinstance(self.P2, str) and self.P2 == 'identity':
             if sparse.issparse(X):
-                P2 = (sparse.dia_matrix((np.ones(X.shape[1]), 0),
-                      shape=(X.shape[1], X.shape[1]))).tocsr()
+                P2 = (sparse.dia_matrix((np.ones(n_features), 0),
+                      shape=(n_features, n_features))).tocsc()
             else:
-                P2 = np.ones(X.shape[1])
+                P2 = np.ones(n_features)
         else:
             P2 = check_array(self.P2, copy=True,
-                             accept_sparse=['csr', 'csc'],
+                             accept_sparse=_stype,
                              dtype=_dtype, ensure_2d=False)
             if P2.ndim == 1:
                 P2 = np.asarray(P2)
-                if P2.shape[0] != X.shape[1]:
+                if P2.shape[0] != n_features:
                     raise ValueError("P2 should be a 1d array of shape "
                                      "(n_features,) with "
                                      "n_features=X.shape[1]; "
@@ -1739,12 +1852,12 @@ def fit(self, X, y, sample_weight=None):
                                      .format(P2.shape[0], X.shape[1]))
                 if sparse.issparse(X):
                     P2 = (sparse.dia_matrix((P2, 0),
-                          shape=(X.shape[1], X.shape[1]))).tocsr()
+                          shape=(n_features, n_features))).tocsc()
             elif (P2.ndim == 2 and P2.shape[0] == P2.shape[1] and
                     P2.shape[0] == X.shape[1]):
                 if sparse.issparse(X):
                     P2 = (sparse.dia_matrix((P2, 0),
-                          shape=(X.shape[1], X.shape[1]))).tocsr()
+                          shape=(n_features, n_features))).tocsc()
             else:
                 raise ValueError("P2 must be either None or an array of shape "
                                  "(n_features, n_features) with "
@@ -1752,37 +1865,31 @@ def fit(self, X, y, sample_weight=None):
                                  "got (P2.shape=({0}, {1})), needed ({2}, {2})"
                                  .format(P2.shape[0], P2.shape[1], X.shape[1]))
 
-        family = self._family_instance
-        link = self._link_instance
-
-        if self.fit_intercept:
-            # Note: intercept is first column <=> coef[0] is for intecept
-            if sparse.issparse(X):
-                Xnew = sparse.hstack([np.ones([X.shape[0], 1]), X],
-                                     format=X.format)
-            else:
-                Xnew = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1)
-            P1 = np.concatenate((np.array([0]), P1))
-            if P2.ndim == 1:
-                P2 = np.concatenate((np.array([0]), P2))
-            elif sparse.issparse(P2):
-                P2 = sparse.block_diag((sparse.dia_matrix((1, 1)), P2),
-                                       format=P2.format,
-                                       dtype=P2.dtype).tocsr()
-            else:
-                # as of numpy 1.13 this would work:
-                # P2 = np.block([[np.zeros((1, 1)), np.zeros((1, X.shape[1]))],
-                #                [np.zeros((X.shape[1], 1)), P2]])
-                P2 = np.hstack((np.zeros((X.shape[1], 1)), P2))
-                P2 = np.vstack((np.zeros((1, X.shape[1]+1)), P2))
+        start_params = self.start_params
+        if isinstance(start_params, str):
+            if start_params not in ['irls', 'least_squares', 'zero']:
+                raise ValueError("The argument start_params must be 'irls', "
+                                 "'least-squares', 'zero' or an array of "
+                                 " correct length;"
+                                 " got(start_params={0})".format(start_params))
         else:
-            Xnew = X
+            start_params = check_array(start_params, accept_sparse=False,
+                                       force_all_finite=True, ensure_2d=False,
+                                       dtype=_dtype, copy=True)
+            if ((start_params.shape[0] != X.shape[1] + self.fit_intercept) or
+                    (start_params.ndim != 1)):
+                raise ValueError("Start values for parameters must have the"
+                                 "right length and dimension; required (length"
+                                 "={0}, ndim=1); got (length={1}, ndim={2})."
+                                 .format(X.shape[1] + self.fit_intercept,
+                                         start_params.shape[0],
+                                         start_params.ndim))
 
-        n_samples, n_features = Xnew.shape
         l1 = self.alpha * self.l1_ratio
         l2 = self.alpha * (1 - self.l1_ratio)
-        P1 *= l1
-        P2 *= l2
+        # P1 and P2 are now for sure copies
+        P1 = l1 * P1
+        P2 = l2 * P2
         # one only ever needs the symmetrized L2 penalty matrix 1/2 (P2 + P2')
         # reason: w' P2 w = (w' P2 w)', i.e. it is symmetric
         if P2.ndim == 2:
@@ -1791,14 +1898,12 @@ def fit(self, X, y, sample_weight=None):
             else:
                 P2 = 0.5 * (P2 + P2.T)
 
-        # 1.3 additional validations ##########################################
+        # 1.4 additional validations ##########################################
         if self.check_input:
             if not np.all(family.in_y_range(y)):
                 raise ValueError("Some value(s) of y are out of the valid "
                                  "range for family {0}"
                                  .format(family.__class__.__name__))
-            if not np.all(weights >= 0):
-                raise ValueError("Sample weights must be non-negative.")
             # check if P1 has only non-negative values, negative values might
             # indicate group lasso in the future.
             if not isinstance(self.P1, str):  # if self.P1 != 'identity':
@@ -1830,7 +1935,7 @@ def fit(self, X, y, sample_weight=None):
                 else:
                     if not np.all(linalg.eigvalsh(P2) >= epsneg):
                         raise ValueError("P2 must be positive semi-definite.")
-            # TODO: if alpha=0 check that Xnew is not rank deficient
+            # TODO: if alpha=0 check that X is not rank deficient
             # TODO: what else to check?
 
         #######################################################################
@@ -1874,13 +1979,14 @@ def fit(self, X, y, sample_weight=None):
                 z = eta + (y-mu)/hp
                 # solve A*coef = b
                 # A = X' W X + l2 P2, b = X' W z
-                coef = _irls_step(Xnew, W, P2, z)
+                coef = _irls_step(X, W, P2, z,
+                                  fit_intercept=self.fit_intercept)
             elif start_params == 'least_squares':
                 # less restrictive tolerance for finding start values
                 tol = np.max([self.tol, np.sqrt(self.tol)])
                 if self.alpha == 0:
                     reg = LinearRegression(copy_X=True, fit_intercept=False)
-                    reg.fit(Xnew, link.link(y))
+                    reg.fit(X, link.link(y))
                     coef = reg.coef_
                 elif self.l1_ratio <= 0.01:
                     # ElasticNet says l1_ratio <= 0.01 is not reliable
@@ -1888,19 +1994,21 @@ def fit(self, X, y, sample_weight=None):
                     # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
                     reg = Ridge(copy_X=True, fit_intercept=False,
                                 alpha=self.alpha*n_samples, tol=tol)
-                    reg.fit(Xnew, link.link(y))
+                    reg.fit(X, link.link(y))
                     coef = reg.coef_
                 else:
                     # TODO: Does this make sense at all?
                     reg = ElasticNet(copy_X=True, fit_intercept=False,
                                      alpha=self.alpha, l1_ratio=self.l1_ratio,
                                      tol=tol)
-                    reg.fit(Xnew, link.link(y))
+                    reg.fit(X, link.link(y))
                     coef = reg.coef_
             else:  # start_params == 'zero'
-                coef = np.zeros(n_features)
                 if self.fit_intercept:
+                    coef = np.zeros(n_features+1)
                     coef[0] = link.link(np.average(y, weights=weights))
+                else:
+                    coef = np.zeros(n_features)
         else:  # assign given array as start values
             coef = start_params
 
@@ -1915,24 +2023,28 @@ def fit(self, X, y, sample_weight=None):
         # Note: we already symmetriezed P2 = 1/2 (P2 + P2')
         if solver == 'irls':
             coef, self.n_iter_ = \
-                _irls_solver(coef=coef, X=Xnew, y=y, weights=weights, P2=P2,
-                             family=family, link=link, max_iter=self.max_iter,
-                             tol=self.tol)
+                _irls_solver(coef=coef, X=X, y=y, weights=weights, P2=P2,
+                             fit_intercept=self.fit_intercept, family=family,
+                             link=link, max_iter=self.max_iter, tol=self.tol)
 
         # 4.2 L-BFGS ##########################################################
         elif solver == 'lbfgs':
             def func(coef, X, y, weights, P2, family, link):
-                mu, dev, devp = \
+                mu, devp = \
                     family._mu_deviance_derivative(coef, X, y, weights, link)
+                dev = family.deviance(y, mu, weights)
+                intercept = (coef.size == X.shape[1] + 1)
+                idx = 1 if intercept else 0  # offset if coef[0] is intercept
                 if P2.ndim == 1:
-                    L2 = P2 * coef
+                    L2 = P2 * coef[idx:]
                 else:
-                    L2 = P2 @ coef
-                obj = 0.5 * dev + 0.5 * (coef @ L2)
-                objp = 0.5 * devp + L2
+                    L2 = P2 @ coef[idx:]
+                obj = 0.5 * dev + 0.5 * (coef[idx:] @ L2)
+                objp = 0.5 * devp
+                objp[idx:] += L2
                 return obj, objp
 
-            args = (Xnew, y, weights, P2, family, link)
+            args = (X, y, weights, P2, family, link)
             coef, loss, info = fmin_l_bfgs_b(
                 func, coef, fprime=None, args=args,
                 iprint=(self.verbose > 0) - 1, pgtol=self.tol,
@@ -1952,50 +2064,66 @@ def func(coef, X, y, weights, P2, family, link):
         # precisely, expected hessian of deviance.
         elif solver == 'newton-cg':
             def func(coef, X, y, weights, P2, family, link):
+                intercept = (coef.size == X.shape[1] + 1)
+                idx = 1 if intercept else 0  # offset if coef[0] is intercept
                 if P2.ndim == 1:
-                    L2 = coef @ (P2 * coef)
+                    L2 = coef[idx:] @ (P2 * coef[idx:])
                 else:
-                    L2 = coef @ (P2 @ coef)
-                mu = link.inverse(X @ coef)
+                    L2 = coef[idx:] @ (P2 @ coef[idx:])
+                mu = link.inverse(_safe_lin_pred(X, coef))
                 return 0.5 * family.deviance(y, mu, weights) + 0.5 * L2
 
             def grad(coef, X, y, weights, P2, family, link):
+                mu, devp = \
+                    family._mu_deviance_derivative(coef, X, y, weights, link)
+                intercept = (coef.size == X.shape[1] + 1)
+                idx = 1 if intercept else 0  # offset if coef[0] is intercept
                 if P2.ndim == 1:
-                    L2 = P2 * coef
+                    L2 = P2 * coef[idx:]
                 else:
-                    L2 = P2 @ coef
-                eta = X @ coef
-                mu = link.inverse(eta)
-                d1 = link.inverse_derivative(eta)
-                grad = X.T @ (d1 * family.deviance_derivative(y, mu, weights))
-                return 0.5 * grad + L2
+                    L2 = P2 @ coef[idx:]
+                objp = 0.5 * devp
+                objp[idx:] += L2
+                return objp
 
             def grad_hess(coef, X, y, weights, P2, family, link):
+                intercept = (coef.size == X.shape[1] + 1)
+                idx = 1 if intercept else 0  # offset if coef[0] is intercept
                 if P2.ndim == 1:
-                    L2 = P2 * coef
+                    L2 = P2 * coef[idx:]
                 else:
-                    L2 = P2 @ coef
-                eta = X @ coef
+                    L2 = P2 @ coef[idx:]
+                eta = _safe_lin_pred(X, coef)
                 mu = link.inverse(eta)
                 d1 = link.inverse_derivative(eta)
-                grad = 0.5 * \
-                    (X.T @ (d1 * family.deviance_derivative(y, mu, weights))) \
-                    + L2
-                # expected hessian = X.T @ diag_matrix @ X
+                temp = d1 * family.deviance_derivative(y, mu, weights)
+                if intercept:
+                    grad = np.concatenate(([0.5 * temp.sum()],
+                                           0.5 * temp @ X + L2))
+                else:
+                    grad = 0.5 * temp @ X + L2  # sampe as 0.5* X.T @ temp + L2
+
+                # expected hessian = fisher = X.T @ diag_matrix @ X
                 # calculate only diag_matrix
                 diag = d1**2 / family.variance(mu, phi=1, weights=weights)
 
-                def Hs(s):
-                    ret = 0.5 * (X.T @ (diag * (X @ s)))
+                def Hs(coef):
+                    # return (0.5 * fisher + P2) @ coef
+                    # ret = 0.5 * (X.T @ (diag * (X @ coef)))
+                    ret = 0.5 * ((diag * (X @ coef[idx:])) @ X)
                     if P2.ndim == 1:
-                        ret += P2 * s
+                        ret += P2 * coef[idx:]
                     else:
-                        ret += P2 @ s
+                        ret += P2 @ coef[idx:]
+                    if intercept:
+                        h0i = np.concatenate(([diag.sum()], diag @ X))
+                        ret = np.concatenate(([0.5 * (h0i @ coef)],
+                                             ret + 0.5 * coef[0] * h0i[1:]))
                     return ret
 
                 return grad, Hs
 
-            args = (Xnew, y, weights, P2, family, link)
+            args = (X, y, weights, P2, family, link)
             coef, n_iter_i = newton_cg(grad_hess, func, grad, coef,
                                        args=args, maxiter=self.max_iter,
                                        tol=self.tol)
@@ -2007,13 +2135,14 @@ def Hs(s):
         elif solver == 'cd':
             # For coordinate descent, if X is sparse, it should be csc format
             # If X is sparse, P2 must also be csc
-            if sparse.issparse(Xnew):
-                Xnew = Xnew.tocsc(copy=self.copy_X)
+            if sparse.issparse(X):
+                X = X.tocsc(copy=self.copy_X)
                 P2 = sparse.csc_matrix(P2)
 
             coef, self.n_iter_, self._n_cycles = \
-                _cd_solver(coef=coef, X=Xnew, y=y, weights=weights, P1=P1,
-                           P2=P2, family=family, link=link,
+                _cd_solver(coef=coef, X=X, y=y, weights=weights, P1=P1,
+                           P2=P2, fit_intercept=self.fit_intercept,
+                           family=family, link=link,
                            max_iter=self.max_iter, tol=self.tol,
                            selection=self.selection, random_state=random_state,
                            diag_fisher=self.diag_fisher, copy_X=self.copy_X)
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index ae8a5f4cfc5e7..cdac151b77de6 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -566,7 +566,7 @@ def obj(coef):
         pd = PoissonDistribution()
         link = LogLink()
         N = y.shape[0]
-        mu = link.inverse(X @ coef[1:]+coef[0])
+        mu = link.inverse(X @ coef[1:] + coef[0])
         alpha, l1_ratio = (1, 0.5)
         return 1./(2.*N) * pd.deviance(y, mu) \
             + 0.5 * alpha * (1-l1_ratio) * (coef[1:]**2).sum() \
@@ -587,9 +587,9 @@ def obj(coef):
     assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=4)
     assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=4)
 
-    # start_params='least_squares' with different alpha
+    # check warm_start, therefore start with different alpha
     glm = GeneralizedLinearRegressor(alpha=0.005, l1_ratio=0.5,
-                                     family='poisson',
+                                     family='poisson', max_iter=300,
                                      link='log', solver='cd', tol=1e-5,
                                      start_params='zero')
     glm.fit(X, y)

From c9a7a95e89deaadc40b92edd8e1208d550998a72 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 21 Apr 2019 17:03:07 +0200
Subject: [PATCH 051/269] Revised option start_params

* renamed option irls into guess

* removed option least_squares

* updated tests
---
 doc/modules/linear_model.rst           |  12 +-
 sklearn/linear_model/glm.py            | 223 ++++++++++++++-----------
 sklearn/linear_model/tests/test_glm.py | 140 ++++++++--------
 3 files changed, 207 insertions(+), 168 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index e60e9e84a4747..4bede17af581a 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -925,12 +925,12 @@ follows:
     >>> reg = GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
     >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2]) # doctest: +NORMALIZE_WHITESPACE
     GeneralizedLinearRegressor(P1='identity', P2='identity', alpha=0.5,
-                               check_input=True, copy_X=True, family='poisson',
-                               fit_dispersion=None, fit_intercept=True, l1_ratio=0,
-                               link='log', max_iter=100, random_state=None,
-                               selection='cyclic', solver='auto',
-                               start_params='irls', tol=0.0001, verbose=0,
-                               warm_start=False)
+                               check_input=True, copy_X=True, diag_fisher=False,
+                               family='poisson', fit_dispersion=None,
+                               fit_intercept=True, l1_ratio=0, link='log',
+                               max_iter=100, random_state=None, selection='cyclic',
+                               solver='auto', start_params='guess', tol=0.0001,
+                               verbose=0, warm_start=False)
     >>> reg.coef_ # doctest: +NORMALIZE_WHITESPACE
     array([0.24630169, 0.43373464])
     >>> reg.intercept_ #doctest: +ELLIPSIS
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index 2afd1ddf8c79c..a53cc39ecd307 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -48,9 +48,6 @@
 import scipy.sparse.linalg as splinalg
 from scipy.optimize import fmin_l_bfgs_b
 import warnings
-from .base import LinearRegression
-from .coordinate_descent import ElasticNet
-from .ridge import Ridge
 from ..base import BaseEstimator, RegressorMixin
 from ..exceptions import ConvergenceWarning
 from ..utils import check_array, check_X_y
@@ -93,6 +90,14 @@ def _safe_lin_pred(X, coef):
         return X @ coef
 
 
+def _safe_toarray(X):
+    """Returns a numpy array."""
+    if sparse.issparse(X):
+        return X.toarray()
+    else:
+        return np.asarray(X)
+
+
 def _safe_sandwich_dot(X, d, intercept=False):
     """Compute sandwich product X.T @ diag(d) @ X.
 
@@ -100,7 +105,9 @@ def _safe_sandwich_dot(X, d, intercept=False):
     first column of X.
     X can be sparse, d must be an ndarray. Always returns a ndarray."""
     if sparse.issparse(X):
-        temp = (X.transpose().multiply(d) @ X).toarray()
+        temp = (X.transpose() @ X.multiply(d[:, np.newaxis]))
+        # for older versions of numpy and scipy, temp may be a np.matrix
+        temp = _safe_toarray(temp)
     else:
         temp = (X.T * d) @ X
     if intercept:
@@ -945,7 +952,8 @@ def _irls_step(X, W, P2, z, fit_intercept=True):
     else:
         if sparse.issparse(X):
             XtW = X.transpose().multiply(W)
-            A = (XtW @ X).toarray()
+            # for older versions of numpy and scipy, A may be a np.matrix
+            A = _safe_toarray(XtW @ X)
         else:
             XtW = (X.T * W)
             A = XtW @ X
@@ -1105,8 +1113,9 @@ def _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles, inner_tol,
                 if intercept:
                     Bj[0] = fisher.sum()
                 if sparse.issparse(X):
-                    Bj[idx:] = (X[:, j].transpose().multiply(fisher) @ X
-                                ).toarray().ravel()
+                    Bj[idx:] = _safe_toarray(X[:, j].transpose() @
+                                             X.multiply(fisher[:, np.newaxis])
+                                             ).ravel()
                 else:
                     Bj[idx:] = (fisher * X[:, j]) @ X
 
@@ -1477,27 +1486,32 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         the chi squared statisic or the deviance statistic. If None, the
         dispersion is not estimated.
 
-    solver : {'auto', 'irls', 'newton-cg', 'lbfgs', 'cd'}, \
+    solver : {'auto', 'cd', 'irls', 'lbfgs', 'newton-cg'}, \
             optional (default='auto')
         Algorithm to use in the optimization problem:
 
         'auto'
             Sets 'irls' if l1_ratio equals 0, else 'cd'.
 
+        'cd'
+            Coordinate descent algorithm. It can deal with L1 as well as L2
+            penalties. Note that in order to avoid unnecessary memory
+            duplication of X in the ``fit`` method, X should be directly passed
+            as a Fortran-contiguous numpy array or sparse csc matrix.
+
         'irls'
-            Iterated reweighted least squares (with Fisher scoring).
+            Iterated reweighted least squares.
             It is the standard algorithm for GLMs. It cannot deal with
             L1 penalties.
 
+        'lbfgs'
+            Calls scipy's L-BFGS-B optimizer. It cannot deal with L1 penalties.
+
         'newton-cg', 'lbfgs'
-            Cannot deal with L1 penalties.
+            Newton conjugate gradient algorithm cannot deal with L1 penalties.
 
-        'cd'
-            Coordinate descent algorithm. It can deal with L1 as well as L2
-            penalties. Note that in order to avoid unnecessary memory
-            duplication of the X argument in the ``fit`` method, X should be
-            directly passed as a Fortran-contiguous numpy array or sparse csc
-            matrix.
+        Note that all solvers except lbfgs use the fisher matrix, i.e. the
+        expected Hessian instead of the Hessian matrix.
 
     max_iter : int, optional (default=100)
         The maximal number of iterations for solver algorithms.
@@ -1505,11 +1519,11 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     tol : float, optional (default=1e-4)
         Stopping criterion. For the irls, newton-cg and lbfgs solvers,
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
-        where g_i is the i-th component of the gradient (derivative) of
+        where ``g_i`` is the i-th component of the gradient (derivative) of
         the objective function. For the cd solver, covergence is reached
-        when ``sum_i(|minimum-norm of g_i|)``, where g_i is the
-        subgradient of the objective and minimum-norm of g_i is the element of
-        the subgradient g_i with the smallest L2-norm.
+        when ``sum_i(|minimum-norm of g_i|)``, where ``g_i`` is the
+        subgradient of the objective and minimum-norm of ``g_i`` is the element
+        of the subgradient ``g_i`` with the smallest L2-norm.
 
     warm_start : boolean, optional (default=False)
         If set to ``True``, reuse the solution of the previous call to ``fit``
@@ -1518,23 +1532,21 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         does not exit (first call to ``fit``), option ``start_params`` sets the
         start values for ``coef_`` and ``intercept_``.
 
-    start_params : {'irls', 'least_squares', 'zero', array of shape \
-            (n_features*, )}, optional (default='irls')
+    start_params : {'guess', 'zero', array of shape (n_features*, )}, \
+            optional (default='guess')
         Relevant only if ``warm_start=False`` or if fit is called
         the first time (``self.coef_`` does not yet exist).
 
-        'irls'
+        'guess'
             Start values of mu are calculated by family.starting_mu(..). Then,
-            one step of irls obtains start values for ``coef_``. This gives
-            usually good results.
-
-        'least_squares'
-        Start values for ``coef_`` are obtained by a least squares fit in the
-        link space (y is transformed to the space of the linear predictor).
+            one Newton step obtains start values for ``coef_``. If
+            ``solver='irls'``, it uses one irls step, else the Newton step is
+            calculated by the cd solver.
+            This gives usually good starting values.
 
         'zero'
         All coefficients are set to zero. If ``fit_intercept=True``, the
-        start value for the intercept is obtained by the average of y.
+        start value for the intercept is obtained by the weighted average of y.
 
         array
         The array of size n_features* is directly used as start values
@@ -1560,17 +1572,17 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         RandomState instance used by `np.random`. Used when ``selection`` ==
         'random'.
 
-    diag_fisher : boolean, (default=False)
-        Only relevant for solver 'cd'. If ``False``, the full Fisher matrix
-        (expected Hessian) is computed in each outer iteretion (Newton
-        iteration). If ``True``, only a diagonal matrix (stored as 1d array) is
-        computed, such that fisher = X.T @ diag @ X. This saves memory and
-        matrix-matrix multiplications, but needs more matrix-vector
-        multiplications. If you use large sparse X or if you have many
-        features, i.e. n_features >> n_samples, you might set this option to
-        ``True``.
-
-    copy_X : boolean, optional, default True
+    diag_fisher : boolean, optional, (default=False)
+        Only relevant for solver 'cd' (see also ``start_params='guess'``).
+        If ``False``, the full Fisher matrix (expected Hessian) is computed in
+        each outer iteration (Newton iteration). If ``True``, only a diagonal
+        matrix (stored as 1d array) is computed, such that
+        fisher = X.T @ diag @ X. This saves memory and matrix-matrix
+        multiplications, but needs more matrix-vector multiplications. If you
+        use large sparse X or if you have many features,
+        i.e. n_features >> n_samples, you might set this option to ``True``.
+
+    copy_X : boolean, optional, (default=True)
         If ``True``, X will be copied; else, it may be overwritten.
 
     check_input : boolean, optional (default=True)
@@ -1634,7 +1646,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     def __init__(self, alpha=1.0, l1_ratio=0, P1='identity', P2='identity',
                  fit_intercept=True, family='normal', link='auto',
                  fit_dispersion=None, solver='auto', max_iter=100,
-                 tol=1e-4, warm_start=False, start_params='irls',
+                 tol=1e-4, warm_start=False, start_params='guess',
                  selection='cyclic', random_state=None, diag_fisher=False,
                  copy_X=True, check_input=True, verbose=0):
         self.alpha = alpha
@@ -1867,11 +1879,10 @@ def fit(self, X, y, sample_weight=None):
 
         start_params = self.start_params
         if isinstance(start_params, str):
-            if start_params not in ['irls', 'least_squares', 'zero']:
-                raise ValueError("The argument start_params must be 'irls', "
-                                 "'least-squares', 'zero' or an array of "
-                                 " correct length;"
-                                 " got(start_params={0})".format(start_params))
+            if start_params not in ['guess',  'zero']:
+                raise ValueError("The argument start_params must be 'guess', "
+                                 "'zero' or an array of correct length; "
+                                 "got(start_params={0})".format(start_params))
         else:
             start_params = check_array(start_params, accept_sparse=False,
                                        force_all_finite=True, ensure_2d=False,
@@ -1894,10 +1905,17 @@ def fit(self, X, y, sample_weight=None):
         # reason: w' P2 w = (w' P2 w)', i.e. it is symmetric
         if P2.ndim == 2:
             if sparse.issparse(P2):
-                P2 = 0.5 * (P2 + P2.transpose())
+                if sparse.isspmatrix_csc(P2):
+                    P2 = 0.5 * (P2 + P2.transpose()).tocsc()
+                else:
+                    P2 = 0.5 * (P2 + P2.transpose()).tocsr()
             else:
                 P2 = 0.5 * (P2 + P2.T)
 
+        # For coordinate descent, if X is sparse, P2 must also be csc
+        if solver == 'cd' and sparse.issparse(X):
+            P2 = sparse.csc_matrix(P2)
+
         # 1.4 additional validations ##########################################
         if self.check_input:
             if not np.all(family.in_y_range(y)):
@@ -1964,45 +1982,63 @@ def fit(self, X, y, sample_weight=None):
             else:
                 coef = self.coef_
         elif isinstance(start_params, str):
-            if start_params == 'irls':
-                # See 3.1 IRLS
-                # Use mu_start and apply one irls step to calculate coef
+            if start_params == 'guess':
+                # Set mu=starting_mu of the family and do one Newton step
+                # If solver=cd use cd, else irls
                 mu = family.starting_mu(y, weights=weights)
-                # linear predictor
-                eta = link.link(mu)
-                # h'(eta)
-                hp = link.inverse_derivative(eta)
-                # working weights W, in principle a diagonal matrix
-                # therefore here just as 1d array
-                W = (hp**2 / family.variance(mu, phi=1, weights=weights))
-                # working observations
-                z = eta + (y-mu)/hp
-                # solve A*coef = b
-                # A = X' W X + l2 P2, b = X' W z
-                coef = _irls_step(X, W, P2, z,
-                                  fit_intercept=self.fit_intercept)
-            elif start_params == 'least_squares':
-                # less restrictive tolerance for finding start values
-                tol = np.max([self.tol, np.sqrt(self.tol)])
-                if self.alpha == 0:
-                    reg = LinearRegression(copy_X=True, fit_intercept=False)
-                    reg.fit(X, link.link(y))
-                    coef = reg.coef_
-                elif self.l1_ratio <= 0.01:
-                    # ElasticNet says l1_ratio <= 0.01 is not reliable
-                    # => use Ridge
-                    # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
-                    reg = Ridge(copy_X=True, fit_intercept=False,
-                                alpha=self.alpha*n_samples, tol=tol)
-                    reg.fit(X, link.link(y))
-                    coef = reg.coef_
+                eta = link.link(mu)  # linear predictor
+                if solver in ['cd', 'lbfgs', 'newton-cg']:
+                    # see function _cd_solver
+                    sigma_inv = 1/family.variance(mu, phi=1, weights=weights)
+                    d1 = link.inverse_derivative(eta)
+                    temp = sigma_inv * d1 * (y - mu)
+                    if self.fit_intercept:
+                        score = np.concatenate(([temp.sum()], temp @ X))
+                    else:
+                        score = temp @ X  # sampe as X.T @ temp
+
+                    d2_sigma_inv = d1 * d1 * sigma_inv
+                    diag_fisher = self.diag_fisher
+                    if diag_fisher:
+                        fisher = d2_sigma_inv
+                    else:
+                        fisher = \
+                            _safe_sandwich_dot(X, d2_sigma_inv,
+                                               intercept=self.fit_intercept)
+                    # set up space for search direction d for inner loop
+                    if self.fit_intercept:
+                        coef = np.zeros(n_features+1)
+                    else:
+                        coef = np.zeros(n_features)
+                    d = np.zeros_like(coef)
+                    # initial stopping tolerance of inner loop
+                    # use L1-norm of minimum of norm of subgradient of F
+                    # use less restrictive tolerance for initial guess
+                    inner_tol = _min_norm_sugrad(coef=coef, grad=-score, P2=P2,
+                                                 P1=P1)
+                    inner_tol = 4 * linalg.norm(inner_tol, ord=1)
+                    # just one outer loop = Newton step
+                    n_cycles = 0
+                    d, coef_P2, n_cycles, inner_tol = \
+                        _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles,
+                                  inner_tol, max_inner_iter=1000,
+                                  selection=self.selection,
+                                  random_state=self.random_state,
+                                  diag_fisher=self.diag_fisher)
+                    coef += d  # for simplicity no line search here
                 else:
-                    # TODO: Does this make sense at all?
-                    reg = ElasticNet(copy_X=True, fit_intercept=False,
-                                     alpha=self.alpha, l1_ratio=self.l1_ratio,
-                                     tol=tol)
-                    reg.fit(X, link.link(y))
-                    coef = reg.coef_
+                    # See _irls_solver
+                    # h'(eta)
+                    hp = link.inverse_derivative(eta)
+                    # working weights W, in principle a diagonal matrix
+                    # therefore here just as 1d array
+                    W = (hp**2 / family.variance(mu, phi=1, weights=weights))
+                    # working observations
+                    z = eta + (y-mu)/hp
+                    # solve A*coef = b
+                    # A = X' W X + l2 P2, b = X' W z
+                    coef = _irls_step(X, W, P2, z,
+                                      fit_intercept=self.fit_intercept)
             else:  # start_params == 'zero'
                 if self.fit_intercept:
                     coef = np.zeros(n_features+1)
@@ -2048,7 +2084,7 @@ def func(coef, X, y, weights, P2, family, link):
             coef, loss, info = fmin_l_bfgs_b(
                 func, coef, fprime=None, args=args,
                 iprint=(self.verbose > 0) - 1, pgtol=self.tol,
-                maxiter=self.max_iter)
+                maxiter=self.max_iter, factr=1e3)
             if self.verbose > 0:
                 if info["warnflag"] == 1:
                     warnings.warn("lbfgs failed to converge."
@@ -2106,6 +2142,8 @@ def grad_hess(coef, X, y, weights, P2, family, link):
                 # expected hessian = fisher = X.T @ diag_matrix @ X
                 # calculate only diag_matrix
                 diag = d1**2 / family.variance(mu, phi=1, weights=weights)
+                if intercept:
+                    h0i = np.concatenate(([diag.sum()], diag @ X))
 
                 def Hs(coef):
                     # return (0.5 * fisher + P2) @ coef
@@ -2116,7 +2154,6 @@ def Hs(coef):
                     else:
                         ret += P2 @ coef[idx:]
                     if intercept:
-                        h0i = np.concatenate(([diag.sum()], diag @ X))
                         ret = np.concatenate(([0.5 * (h0i @ coef)],
                                              ret + 0.5 * coef[0] * h0i[1:]))
                     return ret
@@ -2124,21 +2161,15 @@ def Hs(coef):
                 return grad, Hs
 
             args = (X, y, weights, P2, family, link)
-            coef, n_iter_i = newton_cg(grad_hess, func, grad, coef,
-                                       args=args, maxiter=self.max_iter,
-                                       tol=self.tol)
+            coef, self.n_iter_ = newton_cg(grad_hess, func, grad, coef,
+                                           args=args, maxiter=self.max_iter,
+                                           tol=self.tol)
 
         # 4.4 coordinate descent ##############################################
         # Note: we already set P1 = l1*P1, see above
         # Note: we already set P2 = l2*P2, see above
         # Note: we already symmetriezed P2 = 1/2 (P2 + P2')
         elif solver == 'cd':
-            # For coordinate descent, if X is sparse, it should be csc format
-            # If X is sparse, P2 must also be csc
-            if sparse.issparse(X):
-                X = X.tocsc(copy=self.copy_X)
-                P2 = sparse.csc_matrix(P2)
-
             coef, self.n_iter_, self._n_cycles = \
                 _cd_solver(coef=coef, X=X, y=y, weights=weights, P1=P1,
                            P2=P2, fit_intercept=self.fit_intercept,
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index cdac151b77de6..17535c067bbb6 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -4,7 +4,7 @@
 import scipy as sp
 from scipy import linalg, optimize, sparse
 
-from sklearn.datasets import make_classification
+from sklearn.datasets import make_classification, make_regression
 from sklearn.linear_model.glm import (
     Link,
     IdentityLink,
@@ -22,10 +22,12 @@
     assert_array_equal, assert_array_almost_equal)
 
 
+rng = np.random.RandomState(42)
+
+
 @pytest.mark.parametrize('link', Link.__subclasses__())
 def test_link_properties(link):
     """Test link inverse and derivative."""
-    rng = np.random.RandomState(0)
     x = rng.rand(100)*100
     link = link()  # instatiate object
     decimal = 10
@@ -86,7 +88,6 @@ def test_deviance_zero(family, chk_values):
 def test_fisher_matrix(family, link):
     """Test the Fisher matrix numerically.
     Trick: Use numerical differentiation with y = mu"""
-    rng = np.random.RandomState(0)
     coef = np.array([-2, 1, 0, 1, 2.5])
     phi = 0.5
     X = rng.randn(10, 5)
@@ -218,7 +219,6 @@ def test_glm_P2_argument(P2):
 def test_glm_P2_positive_semidefinite():
     """Test GLM for a positive semi-definite P2 argument."""
     n_samples, n_features = 10, 5
-    rng = np.random.RandomState(42)
     y = np.arange(n_samples)
     X = np.zeros((n_samples, n_features))
     P2 = np.diag([100, 10, 5, 0, -1E-5])
@@ -351,20 +351,15 @@ def test_glm_check_input_argument(check_input):
         glm.fit(X, y)
 
 
-@pytest.mark.parametrize(
-    'family',
-    [NormalDistribution(), PoissonDistribution(),
-     GammaDistribution(), InverseGaussianDistribution(),
-     TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
-     GeneralizedHyperbolicSecant()])
 @pytest.mark.parametrize('solver', ['irls', 'lbfgs', 'newton-cg', 'cd'])
-def test_glm_identiy_regression(family, solver):
+def test_glm_identiy_regression(solver):
     """Test GLM regression with identity link on a simple dataset."""
     coef = [1, 2]
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.dot(X, coef)
-    glm = GeneralizedLinearRegressor(alpha=0, family=family, link='identity',
-                                     fit_intercept=False, solver=solver)
+    glm = GeneralizedLinearRegressor(alpha=0, family='normal', link='identity',
+                                     fit_intercept=False, solver=solver,
+                                     start_params='zero', tol=1e-7)
     res = glm.fit(X, y)
     assert_array_almost_equal(res.coef_, coef)
 
@@ -375,34 +370,42 @@ def test_glm_identiy_regression(family, solver):
      GammaDistribution(), InverseGaussianDistribution(),
      TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
      GeneralizedHyperbolicSecant()])
-@pytest.mark.parametrize('solver', ['irls', 'lbfgs', 'newton-cg', 'cd'])
-def test_glm_log_regression(family, solver):
+@pytest.mark.parametrize('solver, tol, dec', [('irls', 1e-6, 6),
+                                              ('lbfgs', 1e-6, 6),
+                                              ('newton-cg', 1e-7, 6),
+                                              ('cd', 1e-7, 6)])
+def test_glm_log_regression(family, solver, tol, dec):
     """Test GLM regression with log link on a simple dataset."""
-    coef = [1, 2]
+    coef = [0.2, -0.1]
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.exp(np.dot(X, coef))
     glm = GeneralizedLinearRegressor(
                 alpha=0, family=family, link='log', fit_intercept=False,
-                solver=solver, start_params='least_squares')
+                solver=solver, start_params='guess', tol=tol)
     res = glm.fit(X, y)
-    assert_array_almost_equal(res.coef_, coef)
+    assert_array_almost_equal(res.coef_, coef, decimal=dec)
 
 
 @pytest.mark.filterwarnings('ignore::DeprecationWarning')
-@pytest.mark.parametrize('solver', ['irls', 'lbfgs', 'newton-cg', 'cd'])
-def test_normal_ridge(solver):
+@pytest.mark.parametrize('solver, tol, dec', [('irls', 1e-6, 6),
+                                              ('lbfgs', 1e-6, 5),
+                                              ('newton-cg', 1e-6, 5),
+                                              ('cd', 1e-6, 6)])
+def test_normal_ridge(solver, tol, dec):
     """Test ridge regression for Normal distributions.
 
     Compare to test_ridge in test_ridge.py.
     """
-    rng = np.random.RandomState(0)
     alpha = 1.0
 
     # 1. With more samples than features
-    n_samples, n_features, n_predict = 10, 5, 10
-    y = rng.randn(n_samples)
-    X = rng.randn(n_samples, n_features)
-    T = rng.randn(n_predict, n_features)
+    n_samples, n_features, n_predict = 100, 7, 10
+    X, y, coef = make_regression(n_samples=n_samples+n_predict,
+                                 n_features=n_features,
+                                 n_informative=n_features-2, noise=0.5,
+                                 coef=True, random_state=rng)
+    y = y[0:n_samples]
+    X, T = X[0:n_samples], X[n_samples:]
 
     # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
     ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True, tol=1e-6,
@@ -410,69 +413,74 @@ def test_normal_ridge(solver):
     ridge.fit(X, y)
     glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
                                      link='identity', fit_intercept=True,
-                                     tol=1e-6, max_iter=100, solver=solver,
-                                     random_state=42)
+                                     tol=tol, max_iter=100, solver=solver,
+                                     check_input=False, random_state=rng)
     glm.fit(X, y)
     assert_equal(glm.coef_.shape, (X.shape[1], ))
-    assert_array_almost_equal(glm.coef_, ridge.coef_)
-    assert_almost_equal(glm.intercept_, ridge.intercept_)
-    assert_array_almost_equal(glm.predict(T), ridge.predict(T))
+    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec)
+    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec)
+    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec)
 
     ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-6,
                   solver='svd', normalize=False)
     ridge.fit(X, y)
     glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
                                      link='identity', fit_intercept=False,
-                                     tol=1e-6, max_iter=100, solver=solver,
-                                     random_state=42, fit_dispersion='chisqr')
+                                     tol=tol, max_iter=100, solver=solver,
+                                     check_input=False, random_state=rng,
+                                     fit_dispersion='chisqr')
     glm.fit(X, y)
     assert_equal(glm.coef_.shape, (X.shape[1], ))
-    assert_array_almost_equal(glm.coef_, ridge.coef_)
-    assert_almost_equal(glm.intercept_, ridge.intercept_)
-    assert_array_almost_equal(glm.predict(T), ridge.predict(T))
+    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec)
+    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec)
+    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec)
     mu = glm.predict(X)
     assert_almost_equal(glm.dispersion_,
                         np.sum((y-mu)**2/(n_samples-n_features)))
 
     # 2. With more features than samples and sparse
-    n_samples, n_features, n_predict = 5, 10, 10
-    y = rng.randn(n_samples)
-    X = sparse.csr_matrix(rng.randn(n_samples, n_features))
-    T = sparse.csr_matrix(rng.randn(n_predict, n_features))
+    n_samples, n_features, n_predict = 10, 100, 10
+    X, y, coef = make_regression(n_samples=n_samples+n_predict,
+                                 n_features=n_features,
+                                 n_informative=n_features-2, noise=0.5,
+                                 coef=True, random_state=rng)
+    y = y[0:n_samples]
+    X, T = X[0:n_samples], X[n_samples:]
 
     # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
     ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True, tol=1e-9,
                   solver='sag', normalize=False, max_iter=100000)
     ridge.fit(X, y)
-    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-8,
-                                     family='normal', link='identity',
-                                     fit_intercept=True, solver=solver,
-                                     max_iter=300, random_state=42)
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
+                                     link='identity', fit_intercept=True,
+                                     tol=tol, max_iter=300, solver=solver,
+                                     check_input=False, random_state=rng)
     glm.fit(X, y)
     assert_equal(glm.coef_.shape, (X.shape[1], ))
-    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=5)
-    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=5)
-    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=5)
+    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec)
+    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec)
+    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec)
 
     ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-7,
                   solver='sag', normalize=False, max_iter=1000)
     ridge.fit(X, y)
-    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, tol=1e-7,
-                                     family='normal', link='identity',
-                                     fit_intercept=False, solver=solver)
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
+                                     link='identity', fit_intercept=False,
+                                     tol=tol*2, max_iter=300, solver=solver,
+                                     check_input=False, random_state=rng)
     glm.fit(X, y)
     assert_equal(glm.coef_.shape, (X.shape[1], ))
-    assert_array_almost_equal(glm.coef_, ridge.coef_)
-    assert_almost_equal(glm.intercept_, ridge.intercept_)
-    assert_array_almost_equal(glm.predict(T), ridge.predict(T))
+    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec-1)
+    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec-1)
+    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec-2)
 
 
-@pytest.mark.parametrize('solver, decimal, tol',
-                         [('irls', 7, 1e-8),
-                          ('lbfgs', 5, 1e-7),
-                          ('newton-cg', 5, 1e-7),
-                          ('cd', 7, 1e-8)])
-def test_poisson_ridge(solver, decimal, tol):
+@pytest.mark.parametrize('solver, tol, dec',
+                         [('irls', 1e-7, 6),
+                          ('lbfgs', 1e-7, 5),
+                          ('newton-cg', 1e-7, 5),
+                          ('cd', 1e-7, 7)])
+def test_poisson_ridge(solver, tol, dec):
     """Test ridge regression with poisson family and LogLink.
 
     Compare to R's glmnet"""
@@ -493,18 +501,17 @@ def test_poisson_ridge(solver, decimal, tol):
                                      fit_intercept=True, family='poisson',
                                      link='log', tol=tol,
                                      solver=solver, max_iter=300,
-                                     random_state=42)
+                                     random_state=rng)
     glm.fit(X, y)
     assert_almost_equal(glm.intercept_, -0.12889386979,
-                        decimal=decimal)
+                        decimal=dec)
     assert_array_almost_equal(glm.coef_, [0.29019207995, 0.03741173122],
-                              decimal=decimal)
+                              decimal=dec)
 
 
 @pytest.mark.parametrize('diag_fisher', [False, True])
 def test_normal_enet(diag_fisher):
     """Test elastic net regression with normal/gaussian family."""
-    rng = np.random.RandomState(0)
     alpha, l1_ratio = 0.3, 0.7
     n_samples, n_features = 20, 2
     X = rng.randn(n_samples, n_features).copy(order='F')
@@ -556,7 +563,8 @@ def test_poisson_enet():
     y = np.array([0, 1, 1, 2])
     glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0.5, family='poisson',
                                      link='log', solver='cd', tol=1e-8,
-                                     selection='random', random_state=42)
+                                     selection='random', random_state=rng,
+                                     start_params='guess')
     glm.fit(X, y)
     assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=7)
     assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=7)
@@ -591,7 +599,7 @@ def obj(coef):
     glm = GeneralizedLinearRegressor(alpha=0.005, l1_ratio=0.5,
                                      family='poisson', max_iter=300,
                                      link='log', solver='cd', tol=1e-5,
-                                     start_params='zero')
+                                     selection='cyclic', start_params='zero')
     glm.fit(X, y)
     # warm start with original alpha and use of sparse matrices
     glm.warm_start = True
@@ -612,9 +620,9 @@ def test_binomial_enet(alpha):
     n_samples = 500
     X, y = make_classification(n_samples=n_samples, n_classes=2, n_features=6,
                                n_informative=5, n_redundant=0, n_repeated=0,
-                               random_state=0)
+                               random_state=rng)
     log = LogisticRegression(
-        penalty='elasticnet', random_state=0, fit_intercept=False, tol=1e-6,
+        penalty='elasticnet', random_state=rng, fit_intercept=False, tol=1e-6,
         max_iter=1000, l1_ratio=l1_ratio, C=1./(n_samples * alpha),
         solver='saga')
     log.fit(X, y)

From a7755de2cdc5022b6d41285730ed12bd41628e66 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 4 Jun 2019 17:34:11 +0200
Subject: [PATCH 052/269] Fix a few typos

---
 doc/modules/linear_model.rst                  |  2 +-
 .../plot_poisson_spline_regression.py         |  2 +-
 sklearn/linear_model/glm.py                   | 53 ++++++++++---------
 sklearn/linear_model/tests/test_glm.py        |  4 +-
 4 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 4bede17af581a..8f17c67d950de 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -887,7 +887,7 @@ combination of the input variables :math:`X` via an inverse link function
 .. math::    \hat{y}(w, x) = h(xw) = h(w_0 + w_1 x_1 + ... + w_p x_p).
 
 Secondly, the squared loss function is replaced by the deviance :math:`D` of an
-exponential dispersion model (EDM) [11]_. The objective function beeing minimized
+exponential dispersion model (EDM) [11]_. The objective function being minimized
 becomes
 
 .. math::    \frac{1}{2\mathrm{sum}(s)}D(y, \hat{y}; s) + \alpha \rho ||P_1w||_1
diff --git a/examples/linear_model/plot_poisson_spline_regression.py b/examples/linear_model/plot_poisson_spline_regression.py
index fce85fae1ea8c..30b5881bba1f5 100644
--- a/examples/linear_model/plot_poisson_spline_regression.py
+++ b/examples/linear_model/plot_poisson_spline_regression.py
@@ -5,7 +5,7 @@
 
 As in the :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py`
 example, a Poisson regression with penalized B-splines (P-splines) [1]_ is
-fitted on slightly different sinusodial, Poisson distributed data and
+fitted on slightly different sinusoidal, Poisson distributed data and
 compared to an AdaBoost model with decision trees.
 One can see, that this is a hard problem for both estimators.
 
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
index a53cc39ecd307..a35c68828de81 100644
--- a/sklearn/linear_model/glm.py
+++ b/sklearn/linear_model/glm.py
@@ -29,7 +29,7 @@
 #   L2: w*P2*w with P2 a (semi-) positive definite matrix, e.g. P2 could be
 #   a 1st or 2nd order difference matrix (compare B-spline penalties and
 #   Tikhonov regularization).
-# - The link funtion (instance of class Link) is necessary for the evaluation
+# - The link function (instance of class Link) is necessary for the evaluation
 #   of deviance, score, Fisher and Hessian matrix as functions of the
 #   coefficients, which is needed by optimizers.
 #   Solution: link as argument in those functions
@@ -170,7 +170,7 @@ def _min_norm_sugrad(coef, grad, P2, P1):
 
 
 class Link(metaclass=ABCMeta):
-    """Abstract base class for Link funtions."""
+    """Abstract base class for Link functions."""
 
     @abstractmethod
     def link(self, mu):
@@ -201,7 +201,7 @@ def derivative(self, mu):
     def inverse(self, lin_pred):
         """Compute the inverse link function h(lin_pred).
 
-        Gives the inverse relationship between linkear predictor and the mean
+        Gives the inverse relationship between linker predictor and the mean
         mu=E[Y], i.e. h(linear predictor) = mu.
 
         Parameters
@@ -357,7 +357,7 @@ def include_lower_bound(self):
 
     @property
     def include_upper_bound(self):
-        """Get True if upper bound for y is includede: y <= upper_bound."""
+        """Get True if upper bound for y is included: y <= upper_bound."""
         return self._include_upper_bound
 
     def in_y_range(self, x):
@@ -859,7 +859,7 @@ def __init__(self):
 class GeneralizedHyperbolicSecant(ExponentialDispersionModel):
     """A class for the Generalized Hyperbolic Secant (GHS) distribution.
 
-    The GHS distribution is for tagets y in (-inf, inf).
+    The GHS distribution is for targets y in (-inf, inf).
     """
     def __init__(self):
         self._lower_bound = -np.Inf
@@ -881,7 +881,7 @@ def unit_deviance(self, y, mu):
 class BinomialDistribution(ExponentialDispersionModel):
     """A class for the Binomial distribution.
 
-    The Binomial distribution is for tagets y in [0, 1].
+    The Binomial distribution is for targets y in [0, 1].
     """
     def __init__(self):
         self._lower_bound = 0
@@ -1131,7 +1131,7 @@ def _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles, inner_tol,
             else:
                 b = B[jdx, jdx]
 
-            # those ten lines aree what it is all about
+            # those ten lines are what it is all about
             if b <= 0:
                 z = 0
             elif P1[j] == 0:
@@ -1199,7 +1199,7 @@ def _cd_solver(coef, X, y, weights, P1, P2, fit_intercept, family, link,
                diag_fisher=False, copy_X=True):
     """Solve GLM with L1 and L2 penalty by coordinate descent algorithm.
 
-    The objective beeing minimized in the coefficients w=coef is::
+    The objective being minimized in the coefficients w=coef is::
 
         F = f + g, f(w) = 1/2 deviance, g = 1/2 w*P2*w + ||P1*w||_1
 
@@ -1207,7 +1207,7 @@ def _cd_solver(coef, X, y, weights, P1, P2, fit_intercept, family, link,
 
     1. Find optimal descent direction d by minimizing
        min_d F(w+d) = min_d F(w+d) - F(w)
-    2. Quadrdatic approximation of F(w+d)-F(w) = q(d):
+    2. Quadratic approximation of F(w+d)-F(w) = q(d):
        using f(w+d) = f(w) + f'(w)*d + 1/2 d*H(w)*d + O(d^3) gives:
        q(d) = (f'(w) + w*P2)*d + 1/2 d*(H(w)+P2)*d
        + ||P1*(w+d)||_1 - ||P1*w||_1
@@ -1228,7 +1228,7 @@ def _cd_solver(coef, X, y, weights, P1, P2, fit_intercept, family, link,
     ----------
     coef : ndarray, shape (c,)
         If fit_intercept=False, shape c=X.shape[1].
-        If fit_intercept=True, then c=X.shapee[1] + 1.
+        If fit_intercept=True, then c=X.shape[1] + 1.
 
     X : {ndarray, csc sparse matrix}, shape (n_samples, n_features)
         Training data (with intercept included if present). If not sparse,
@@ -1265,7 +1265,7 @@ def _cd_solver(coef, X, y, weights, P1, P2, fit_intercept, family, link,
         cycles over all features per inner loop.
 
     tol : float, optional (default=1e-4)
-        Covergence criterion is
+        Convergence criterion is
         sum_i(|minimum of norm of subgrad of objective_i|)<=tol.
 
     selection : str, optional (default='cyclic')
@@ -1285,9 +1285,9 @@ def _cd_solver(coef, X, y, weights, P1, P2, fit_intercept, family, link,
     -------
     coef : ndarray, shape (c,)
         If fit_intercept=False, shape c=X.shape[1].
-        If fit_intercept=True, then c=X.shapee[1] + 1.
+        If fit_intercept=True, then c=X.shape[1] + 1.
 
-    n_iter : numer of outer iterations = newton iterations
+    n_iter : number of outer iterations = newton iterations
 
     n_cycles : number of cycles over features
 
@@ -1312,7 +1312,7 @@ def _cd_solver(coef, X, y, weights, P1, P2, fit_intercept, family, link,
                              "format. Got P2 not sparse.")
     random_state = check_random_state(random_state)
     # Note: we already set P2 = l2*P2, P1 = l1*P1
-    # Note: we already symmetriezed P2 = 1/2 (P2 + P2')
+    # Note: we already symmetrized P2 = 1/2 (P2 + P2')
     n_iter = 0  # number of outer iterations
     n_cycles = 0  # number of (complete) cycles over features
     converged = False
@@ -1427,7 +1427,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     Parameters
     ----------
     alpha : float, optional (default=1)
-        Constant that multiplies the penalty terms und thus determines the
+        Constant that multiplies the penalty terms and thus determines the
         regularization strength.
         See the notes for the exact mathematical meaning of this
         parameter.``alpha = 0`` is equivalent to unpenalized GLMs. In this
@@ -1481,9 +1481,9 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
         - 'logit' for family 'binomial'
 
-    fit_dispersion : {None, 'chisqr', 'deviance'}, optional (defaul=None)
+    fit_dispersion : {None, 'chisqr', 'deviance'}, optional (default=None)
         Method for estimation of the dispersion parameter phi. Whether to use
-        the chi squared statisic or the deviance statistic. If None, the
+        the chi squared statistic or the deviance statistic. If None, the
         dispersion is not estimated.
 
     solver : {'auto', 'cd', 'irls', 'lbfgs', 'newton-cg'}, \
@@ -1520,7 +1520,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         Stopping criterion. For the irls, newton-cg and lbfgs solvers,
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
         where ``g_i`` is the i-th component of the gradient (derivative) of
-        the objective function. For the cd solver, covergence is reached
+        the objective function. For the cd solver, convergence is reached
         when ``sum_i(|minimum-norm of g_i|)``, where ``g_i`` is the
         subgradient of the objective and minimum-norm of ``g_i`` is the element
         of the subgradient ``g_i`` with the smallest L2-norm.
@@ -1626,7 +1626,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     If the target y is a ratio, appropriate sample weights s should be
     provided.
-    As an example, consider Poission distributed counts z (integers) and
+    As an example, consider Poisson distributed counts z (integers) and
     weights s=exposure (time, money, persons years, ...). Then you fit
     y = z/s, i.e. ``GeneralizedLinearModel(family='poisson').fit(X, y,
     sample_weight=s)``. The weights are necessary for the right (finite
@@ -1681,7 +1681,7 @@ def fit(self, X, y, sample_weight=None):
             Target values.
 
         sample_weight : {None, array-like}, shape (n_samples,),\
-                optinal (default=None)
+                optional (default=None)
             Individual weights w_i for each sample. Note that for an
             Exponential Dispersion Model (EDM), one has
             Var[Y_i]=phi/w_i * v(mu).
@@ -1995,7 +1995,7 @@ def fit(self, X, y, sample_weight=None):
                     if self.fit_intercept:
                         score = np.concatenate(([temp.sum()], temp @ X))
                     else:
-                        score = temp @ X  # sampe as X.T @ temp
+                        score = temp @ X  # same as X.T @ temp
 
                     d2_sigma_inv = d1 * d1 * sigma_inv
                     diag_fisher = self.diag_fisher
@@ -2051,12 +2051,12 @@ def fit(self, X, y, sample_weight=None):
         #######################################################################
         # 4. fit                                                              #
         #######################################################################
-        # algorithms for optimiation
+        # algorithms for optimization
         # TODO: Parallelize it?
 
         # 4.1 IRLS ############################################################
         # Note: we already set P2 = l2*P2, see above
-        # Note: we already symmetriezed P2 = 1/2 (P2 + P2')
+        # Note: we already symmetrized P2 = 1/2 (P2 + P2')
         if solver == 'irls':
             coef, self.n_iter_ = \
                 _irls_solver(coef=coef, X=X, y=y, weights=weights, P2=P2,
@@ -2137,7 +2137,7 @@ def grad_hess(coef, X, y, weights, P2, family, link):
                     grad = np.concatenate(([0.5 * temp.sum()],
                                            0.5 * temp @ X + L2))
                 else:
-                    grad = 0.5 * temp @ X + L2  # sampe as 0.5* X.T @ temp + L2
+                    grad = 0.5 * temp @ X + L2  # same as 0.5* X.T @ temp + L2
 
                 # expected hessian = fisher = X.T @ diag_matrix @ X
                 # calculate only diag_matrix
@@ -2168,7 +2168,7 @@ def Hs(coef):
         # 4.4 coordinate descent ##############################################
         # Note: we already set P1 = l1*P1, see above
         # Note: we already set P2 = l2*P2, see above
-        # Note: we already symmetriezed P2 = 1/2 (P2 + P2')
+        # Note: we already symmetrized P2 = 1/2 (P2 + P2')
         elif solver == 'cd':
             coef, self.n_iter_, self._n_cycles = \
                 _cd_solver(coef=coef, X=X, y=y, weights=weights, P1=P1,
@@ -2215,7 +2215,8 @@ def linear_predictor(self, X):
         return X @ self.coef_ + self.intercept_
 
     def predict(self, X, sample_weight=None):
-        """Predict uing GLM with feature matrix X.
+        """Predict using GLM with feature matrix X.
+
         If sample_weight is given, returns prediction*sample_weight.
 
         Parameters
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 17535c067bbb6..6172824cf1b79 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -29,7 +29,7 @@
 def test_link_properties(link):
     """Test link inverse and derivative."""
     x = rng.rand(100)*100
-    link = link()  # instatiate object
+    link = link()  # instantiate object
     decimal = 10
     if isinstance(link, LogitLink):
         # careful for large x, note expit(36) = 1
@@ -133,7 +133,7 @@ def test_sample_weights_validation():
     with pytest.raises(ValueError):
         glm.fit(X, y, weights)
 
-    # 5. 1d but weith a negative value
+    # 5. 1d but with a negative value
     weights = [2, -1]
     with pytest.raises(ValueError):
         glm.fit(X, y, weights)

From 9aa1fc41d726e5ac9007d9c867546aa70ea79ba7 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 4 Jun 2019 17:40:08 +0200
Subject: [PATCH 053/269] Make module private

---
 sklearn/linear_model/__init__.py         | 4 ++--
 sklearn/linear_model/{glm.py => _glm.py} | 0
 sklearn/linear_model/tests/test_glm.py   | 9 +++++++--
 3 files changed, 9 insertions(+), 4 deletions(-)
 rename sklearn/linear_model/{glm.py => _glm.py} (100%)

diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index 0c5840f343a3a..0f7856fcc2046 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -18,8 +18,8 @@
                                  lasso_path, enet_path, MultiTaskLasso,
                                  MultiTaskElasticNet, MultiTaskElasticNetCV,
                                  MultiTaskLassoCV)
-from .glm import (TweedieDistribution,
-                  GeneralizedLinearRegressor)
+from ._glm import (TweedieDistribution,
+                   GeneralizedLinearRegressor)
 from .huber import HuberRegressor
 from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
 from .stochastic_gradient import SGDClassifier, SGDRegressor
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/_glm.py
similarity index 100%
rename from sklearn/linear_model/glm.py
rename to sklearn/linear_model/_glm.py
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 6172824cf1b79..a148d11cb2632 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -1,3 +1,7 @@
+# Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
+#
+# License: BSD 3 clause
+
 import numpy as np
 from numpy.testing import assert_allclose
 import pytest
@@ -5,7 +9,8 @@
 from scipy import linalg, optimize, sparse
 
 from sklearn.datasets import make_classification, make_regression
-from sklearn.linear_model.glm import (
+from sklearn.linear_model import GeneralizedLinearRegressor
+from sklearn.linear_model._glm import (
     Link,
     IdentityLink,
     LogLink,
@@ -14,7 +19,7 @@
     NormalDistribution, PoissonDistribution,
     GammaDistribution, InverseGaussianDistribution,
     GeneralizedHyperbolicSecant, BinomialDistribution,
-    GeneralizedLinearRegressor)
+)
 from sklearn.linear_model import ElasticNet, LogisticRegression, Ridge
 
 from sklearn.utils.testing import (

From ca3eae24b4db9931eeac9fb925dba4f48199976c Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 4 Jun 2019 21:35:11 +0200
Subject: [PATCH 054/269] Working on tests

---
 sklearn/linear_model/tests/test_glm.py | 52 +++++++++++++-------------
 1 file changed, 25 insertions(+), 27 deletions(-)

diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index a148d11cb2632..2c8a9c3d2c72c 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -23,7 +23,7 @@
 from sklearn.linear_model import ElasticNet, LogisticRegression, Ridge
 
 from sklearn.utils.testing import (
-    assert_equal, assert_almost_equal,
+    assert_almost_equal,
     assert_array_equal, assert_array_almost_equal)
 
 
@@ -35,16 +35,14 @@ def test_link_properties(link):
     """Test link inverse and derivative."""
     x = rng.rand(100)*100
     link = link()  # instantiate object
-    decimal = 10
     if isinstance(link, LogitLink):
         # careful for large x, note expit(36) = 1
         # limit max eta to 15
         x = x / 100 * 15
-        decimal = 8
-    assert_almost_equal(link.link(link.inverse(x)), x, decimal=decimal)
+    assert_allclose(link.link(link.inverse(x)), x)
     # if f(g(x)) = x, then f'(g(x)) = 1/g'(x)
-    assert_almost_equal(link.derivative(link.inverse(x)),
-                        1./link.inverse_derivative(x), decimal=decimal)
+    assert_allclose(link.derivative(link.inverse(x)),
+                    1./link.inverse_derivative(x))
     # for LogitLink, in the following x should be between 0 and 1.
     # assert_almost_equal(link.inverse_derivative(link.link(x)),
     #                     1./link.derivative(x), decimal=decimal)
@@ -79,7 +77,7 @@ def test_family_bounds(family, expected):
 def test_deviance_zero(family, chk_values):
     """Test deviance(y,y) = 0 for different families."""
     for x in chk_values:
-        assert_almost_equal(family.deviance(x, x), 0, decimal=10)
+        assert_allclose(family.deviance(x, x), 0, atol=1e-9)
 
 
 @pytest.mark.parametrize(
@@ -155,7 +153,7 @@ def test_glm_family_argument(f, fam):
     y = np.array([0.1, 0.5])  # in range of all distributions
     X = np.array([[1], [2]])
     glm = GeneralizedLinearRegressor(family=f, alpha=0).fit(X, y)
-    assert_equal(type(glm._family_instance), type(fam))
+    assert isinstance(glm._family_instance, fam.__class__)
 
     glm = GeneralizedLinearRegressor(family='not a family',
                                      fit_intercept=False)
@@ -172,7 +170,7 @@ def test_glm_link_argument(l, link):
     y = np.array([0.1, 0.5])  # in range of all distributions
     X = np.array([[1], [2]])
     glm = GeneralizedLinearRegressor(family='normal', link=l).fit(X, y)
-    assert_equal(type(glm._link_instance), type(link))
+    assert isinstance(glm._link_instance, link.__class__)
 
     glm = GeneralizedLinearRegressor(family='normal', link='not a link')
     with pytest.raises(ValueError):
@@ -366,7 +364,7 @@ def test_glm_identiy_regression(solver):
                                      fit_intercept=False, solver=solver,
                                      start_params='zero', tol=1e-7)
     res = glm.fit(X, y)
-    assert_array_almost_equal(res.coef_, coef)
+    assert_allclose(res.coef_, coef)
 
 
 @pytest.mark.parametrize(
@@ -375,11 +373,11 @@ def test_glm_identiy_regression(solver):
      GammaDistribution(), InverseGaussianDistribution(),
      TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
      GeneralizedHyperbolicSecant()])
-@pytest.mark.parametrize('solver, tol, dec', [('irls', 1e-6, 6),
-                                              ('lbfgs', 1e-6, 6),
-                                              ('newton-cg', 1e-7, 6),
-                                              ('cd', 1e-7, 6)])
-def test_glm_log_regression(family, solver, tol, dec):
+@pytest.mark.parametrize('solver, tol', [('irls', 1e-6),
+                                         ('lbfgs', 1e-6),
+                                         ('newton-cg', 1e-7),
+                                         ('cd', 1e-7)])
+def test_glm_log_regression(family, solver, tol):
     """Test GLM regression with log link on a simple dataset."""
     coef = [0.2, -0.1]
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
@@ -388,7 +386,7 @@ def test_glm_log_regression(family, solver, tol, dec):
                 alpha=0, family=family, link='log', fit_intercept=False,
                 solver=solver, start_params='guess', tol=tol)
     res = glm.fit(X, y)
-    assert_array_almost_equal(res.coef_, coef, decimal=dec)
+    assert_allclose(res.coef_, coef)
 
 
 @pytest.mark.filterwarnings('ignore::DeprecationWarning')
@@ -421,10 +419,10 @@ def test_normal_ridge(solver, tol, dec):
                                      tol=tol, max_iter=100, solver=solver,
                                      check_input=False, random_state=rng)
     glm.fit(X, y)
-    assert_equal(glm.coef_.shape, (X.shape[1], ))
-    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec)
-    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec)
-    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec)
+    assert glm.coef_.shape == (X.shape[1], )
+    assert_allclose(glm.coef_, ridge.coef_)
+    assert glm.intercept_ == pytest.approx(ridge.intercept_)
+    assert_allclose(glm.predict(T), ridge.predict(T))
 
     ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-6,
                   solver='svd', normalize=False)
@@ -435,13 +433,13 @@ def test_normal_ridge(solver, tol, dec):
                                      check_input=False, random_state=rng,
                                      fit_dispersion='chisqr')
     glm.fit(X, y)
-    assert_equal(glm.coef_.shape, (X.shape[1], ))
-    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec)
+    assert glm.coef_.shape == (X.shape[1], )
+    assert_allclose(glm.coef_, ridge.coef_)
     assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec)
-    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec)
+    assert_allclose(glm.predict(T), ridge.predict(T))
     mu = glm.predict(X)
-    assert_almost_equal(glm.dispersion_,
-                        np.sum((y-mu)**2/(n_samples-n_features)))
+    assert_allclose(glm.dispersion_,
+                    np.sum((y-mu)**2/(n_samples-n_features)))
 
     # 2. With more features than samples and sparse
     n_samples, n_features, n_predict = 10, 100, 10
@@ -461,7 +459,7 @@ def test_normal_ridge(solver, tol, dec):
                                      tol=tol, max_iter=300, solver=solver,
                                      check_input=False, random_state=rng)
     glm.fit(X, y)
-    assert_equal(glm.coef_.shape, (X.shape[1], ))
+    assert glm.coef_.shape == (X.shape[1], )
     assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec)
     assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec)
     assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec)
@@ -474,7 +472,7 @@ def test_normal_ridge(solver, tol, dec):
                                      tol=tol*2, max_iter=300, solver=solver,
                                      check_input=False, random_state=rng)
     glm.fit(X, y)
-    assert_equal(glm.coef_.shape, (X.shape[1], ))
+    assert glm.coef_.shape == (X.shape[1], )
     assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec-1)
     assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec-1)
     assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec-2)

From 61bc6b8e2b1e227f539656744e9b4a4fa9f514f2 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 5 Jun 2019 18:45:20 +0200
Subject: [PATCH 055/269] Improve tests

---
 sklearn/linear_model/tests/test_glm.py | 82 +++++++++++++-------------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 2c8a9c3d2c72c..7d747c23ae441 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -22,9 +22,7 @@
 )
 from sklearn.linear_model import ElasticNet, LogisticRegression, Ridge
 
-from sklearn.utils.testing import (
-    assert_almost_equal,
-    assert_array_equal, assert_array_almost_equal)
+from sklearn.utils.testing import assert_array_equal
 
 
 rng = np.random.RandomState(42)
@@ -355,16 +353,16 @@ def test_glm_check_input_argument(check_input):
 
 
 @pytest.mark.parametrize('solver', ['irls', 'lbfgs', 'newton-cg', 'cd'])
-def test_glm_identiy_regression(solver):
+def test_glm_identity_regression(solver):
     """Test GLM regression with identity link on a simple dataset."""
-    coef = [1, 2]
+    coef = [1., 2.]
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.dot(X, coef)
     glm = GeneralizedLinearRegressor(alpha=0, family='normal', link='identity',
                                      fit_intercept=False, solver=solver,
                                      start_params='zero', tol=1e-7)
     res = glm.fit(X, y)
-    assert_allclose(res.coef_, coef)
+    assert_allclose(res.coef_, coef, rtol=1e-6)
 
 
 @pytest.mark.parametrize(
@@ -386,7 +384,7 @@ def test_glm_log_regression(family, solver, tol):
                 alpha=0, family=family, link='log', fit_intercept=False,
                 solver=solver, start_params='guess', tol=tol)
     res = glm.fit(X, y)
-    assert_allclose(res.coef_, coef)
+    assert_allclose(res.coef_, coef, rtol=5e-6)
 
 
 @pytest.mark.filterwarnings('ignore::DeprecationWarning')
@@ -420,9 +418,9 @@ def test_normal_ridge(solver, tol, dec):
                                      check_input=False, random_state=rng)
     glm.fit(X, y)
     assert glm.coef_.shape == (X.shape[1], )
-    assert_allclose(glm.coef_, ridge.coef_)
-    assert glm.intercept_ == pytest.approx(ridge.intercept_)
-    assert_allclose(glm.predict(T), ridge.predict(T))
+    assert_allclose(glm.coef_, ridge.coef_, rtol=1e-6)
+    assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-5)
+    assert_allclose(glm.predict(T), ridge.predict(T), rtol=1e-6)
 
     ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-6,
                   solver='svd', normalize=False)
@@ -434,9 +432,9 @@ def test_normal_ridge(solver, tol, dec):
                                      fit_dispersion='chisqr')
     glm.fit(X, y)
     assert glm.coef_.shape == (X.shape[1], )
-    assert_allclose(glm.coef_, ridge.coef_)
-    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec)
-    assert_allclose(glm.predict(T), ridge.predict(T))
+    assert_allclose(glm.coef_, ridge.coef_, rtol=1e-5)
+    assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-6)
+    assert_allclose(glm.predict(T), ridge.predict(T), rtol=1e-6)
     mu = glm.predict(X)
     assert_allclose(glm.dispersion_,
                     np.sum((y-mu)**2/(n_samples-n_features)))
@@ -452,7 +450,8 @@ def test_normal_ridge(solver, tol, dec):
 
     # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
     ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True, tol=1e-9,
-                  solver='sag', normalize=False, max_iter=100000)
+                  solver='sag', normalize=False, max_iter=100000,
+                  random_state=42)
     ridge.fit(X, y)
     glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
                                      link='identity', fit_intercept=True,
@@ -460,22 +459,24 @@ def test_normal_ridge(solver, tol, dec):
                                      check_input=False, random_state=rng)
     glm.fit(X, y)
     assert glm.coef_.shape == (X.shape[1], )
-    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec)
-    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec)
-    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec)
+    assert_allclose(glm.coef_, ridge.coef_, rtol=1e-6)
+    assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-6)
+    assert_allclose(glm.predict(T), ridge.predict(T), rtol=1e-5)
 
     ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-7,
-                  solver='sag', normalize=False, max_iter=1000)
+                  solver='sag', normalize=False, max_iter=1000,
+                  random_state=42)
     ridge.fit(X, y)
+
     glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
                                      link='identity', fit_intercept=False,
                                      tol=tol*2, max_iter=300, solver=solver,
                                      check_input=False, random_state=rng)
     glm.fit(X, y)
     assert glm.coef_.shape == (X.shape[1], )
-    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec-1)
-    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec-1)
-    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec-2)
+    assert_allclose(glm.coef_, ridge.coef_, rtol=1e-4)
+    assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-5)
+    assert_allclose(glm.predict(T), ridge.predict(T), rtol=1e-5)
 
 
 @pytest.mark.parametrize('solver, tol, dec',
@@ -506,10 +507,8 @@ def test_poisson_ridge(solver, tol, dec):
                                      solver=solver, max_iter=300,
                                      random_state=rng)
     glm.fit(X, y)
-    assert_almost_equal(glm.intercept_, -0.12889386979,
-                        decimal=dec)
-    assert_array_almost_equal(glm.coef_, [0.29019207995, 0.03741173122],
-                              decimal=dec)
+    assert_allclose(glm.intercept_, -0.12889386979, rtol=1e-5)
+    assert_allclose(glm.coef_, [0.29019207995, 0.03741173122], rtol=1e-6)
 
 
 @pytest.mark.parametrize('diag_fisher', [False, True])
@@ -535,14 +534,14 @@ def test_normal_enet(diag_fisher):
                       normalize=False, tol=1e-8, copy_X=True)
     enet.fit(X, y)
 
-    assert_almost_equal(glm.intercept_, enet.intercept_, decimal=7)
-    assert_array_almost_equal(glm.coef_, enet.coef_, decimal=7)
+    assert_allclose(glm.intercept_, enet.intercept_, rtol=2e-7)
+    assert_allclose(glm.coef_, enet.coef_, rtol=5e-5)
 
     # 2. test normal enet on sparse data
     X = sparse.csc_matrix(X)
     glm.fit(X, y)
-    assert_almost_equal(glm.intercept_, enet.intercept_, decimal=7)
-    assert_array_almost_equal(glm.coef_, enet.coef_, decimal=7)
+    assert_allclose(glm.intercept_, enet.intercept_, rtol=2e-7)
+    assert_allclose(glm.coef_, enet.coef_, rtol=5e-5)
 
 
 def test_poisson_enet():
@@ -569,8 +568,8 @@ def test_poisson_enet():
                                      selection='random', random_state=rng,
                                      start_params='guess')
     glm.fit(X, y)
-    assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=7)
-    assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=7)
+    assert_allclose(glm.intercept_, glmnet_intercept, rtol=2e-6)
+    assert_allclose(glm.coef_, glmnet_coef, rtol=2e-7)
 
     # test results with general optimization procedure
     def obj(coef):
@@ -584,10 +583,10 @@ def obj(coef):
             + alpha * l1_ratio * np.sum(np.abs(coef[1:]))
     res = optimize.minimize(obj, [0, 0, 0], method='nelder-mead', tol=1e-10,
                             options={'maxiter': 1000, 'disp': False})
-    assert_almost_equal(glm.intercept_, res.x[0], decimal=5)
-    assert_almost_equal(glm.coef_, res.x[1:], decimal=5)
-    assert_almost_equal(obj(np.concatenate(([glm.intercept_], glm.coef_))),
-                        res.fun, decimal=8)
+    assert_allclose(glm.intercept_, res.x[0], rtol=1e-5)
+    assert_allclose(glm.coef_, res.x[1:], rtol=1e-5, atol=1e-9)
+    assert_allclose(obj(np.concatenate(([glm.intercept_], glm.coef_))),
+                    res.fun, rtol=1e-8)
 
     # same for start_params='zero' and selection='cyclic'
     # with reduced precision
@@ -595,8 +594,8 @@ def obj(coef):
                                      link='log', solver='cd', tol=1e-5,
                                      selection='cyclic', start_params='zero')
     glm.fit(X, y)
-    assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=4)
-    assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=4)
+    assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-4)
+    assert_allclose(glm.coef_, glmnet_coef, rtol=1e-4)
 
     # check warm_start, therefore start with different alpha
     glm = GeneralizedLinearRegressor(alpha=0.005, l1_ratio=0.5,
@@ -609,8 +608,8 @@ def obj(coef):
     glm.alpha = 1
     X = sparse.csr_matrix(X)
     glm.fit(X, y)
-    assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=4)
-    assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=4)
+    assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-4)
+    assert_allclose(glm.coef_, glmnet_coef, rtol=1e-4)
 
 
 @pytest.mark.parametrize('alpha', [0.01, 0.1, 1, 10])
@@ -629,10 +628,11 @@ def test_binomial_enet(alpha):
         max_iter=1000, l1_ratio=l1_ratio, C=1./(n_samples * alpha),
         solver='saga')
     log.fit(X, y)
+
     glm = GeneralizedLinearRegressor(
         family=BinomialDistribution(), link=LogitLink(), fit_intercept=False,
         alpha=alpha, l1_ratio=l1_ratio, solver='cd', selection='cyclic',
         tol=1e-7)
     glm.fit(X, y)
-    assert_almost_equal(log.intercept_[0], glm.intercept_, decimal=6)
-    assert_array_almost_equal(log.coef_[0, :], glm.coef_, decimal=6)
+    assert_allclose(log.intercept_[0], glm.intercept_, rtol=1e-6)
+    assert_allclose(log.coef_[0, :], glm.coef_, rtol=2e-6)

From b24a7cab9fc7d9f5dcec5b9d7657fee2d0a94283 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 5 Jun 2019 18:53:58 +0200
Subject: [PATCH 056/269] Remove unused dec parameter in tests

---
 sklearn/linear_model/tests/test_glm.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 7d747c23ae441..a3e943403a7a7 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -388,11 +388,11 @@ def test_glm_log_regression(family, solver, tol):
 
 
 @pytest.mark.filterwarnings('ignore::DeprecationWarning')
-@pytest.mark.parametrize('solver, tol, dec', [('irls', 1e-6, 6),
-                                              ('lbfgs', 1e-6, 5),
-                                              ('newton-cg', 1e-6, 5),
-                                              ('cd', 1e-6, 6)])
-def test_normal_ridge(solver, tol, dec):
+@pytest.mark.parametrize('solver, tol', [('irls', 1e-6),
+                                         ('lbfgs', 1e-6),
+                                         ('newton-cg', 1e-6),
+                                         ('cd', 1e-6)])
+def test_normal_ridge(solver, tol):
     """Test ridge regression for Normal distributions.
 
     Compare to test_ridge in test_ridge.py.
@@ -479,12 +479,12 @@ def test_normal_ridge(solver, tol, dec):
     assert_allclose(glm.predict(T), ridge.predict(T), rtol=1e-5)
 
 
-@pytest.mark.parametrize('solver, tol, dec',
-                         [('irls', 1e-7, 6),
-                          ('lbfgs', 1e-7, 5),
-                          ('newton-cg', 1e-7, 5),
-                          ('cd', 1e-7, 7)])
-def test_poisson_ridge(solver, tol, dec):
+@pytest.mark.parametrize('solver, tol',
+                         [('irls', 1e-7),
+                          ('lbfgs', 1e-7),
+                          ('newton-cg', 1e-7),
+                          ('cd', 1e-7)])
+def test_poisson_ridge(solver, tol):
     """Test ridge regression with poisson family and LogLink.
 
     Compare to R's glmnet"""

From f95b390c04fd4c0a333f08a649d9450ec44f0395 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Tue, 18 Jul 2017 21:50:10 +0200
Subject: [PATCH 057/269] ENH: add Generalized Linear Models, issue #5975

* new estimator GeneralizedLinearRegressor
* loss functions for Tweedie family and Binomial
* elasitc net penalties
* control of penalties by matrix P2 and vector P1
* new solvers: coordinate descent, irls
* tests
* documentation
* example for Poisson regression
---
 doc/modules/classes.rst                       |    1 +
 doc/modules/linear_model.rst                  |  129 +
 .../plot_poisson_spline_regression.py         |   85 +
 sklearn/linear_model/__init__.py              |    6 +-
 sklearn/linear_model/glm.py                   | 2331 +++++++++++++++++
 sklearn/linear_model/tests/test_glm.py        |  640 +++++
 6 files changed, 3191 insertions(+), 1 deletion(-)
 create mode 100644 examples/linear_model/plot_poisson_spline_regression.py
 create mode 100644 sklearn/linear_model/glm.py
 create mode 100644 sklearn/linear_model/tests/test_glm.py

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 2dcf582a6ab39..4158e34e8bb8c 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -727,6 +727,7 @@ Kernels:
    linear_model.BayesianRidge
    linear_model.ElasticNet
    linear_model.ElasticNetCV
+   linear_model.GeneralizedLinearRegressor
    linear_model.HuberRegressor
    linear_model.Lars
    linear_model.LarsCV
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 02f406f629e04..888566fab3601 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -893,6 +893,135 @@ to warm-starting (see :term:`Glossary <warm_start>`).
     .. [9] `"Performance Evaluation of Lbfgs vs other solvers"
             <http://www.fuzihao.org/blog/2016/01/16/Comparison-of-Gradient-Descent-Stochastic-Gradient-Descent-and-L-BFGS/>`_
 
+.. _Generalized_linear_regression:
+
+Generalized Linear Regression
+=============================
+
+:class:`GeneralizedLinearRegressor` generalizes the :ref:`elastic_net` in two
+ways [10]_. First, the predicted values :math:`\hat{y}` are linked to a linear
+combination of the input variables :math:`X` via an inverse link function
+:math:`h` as
+
+.. math::    \hat{y}(w, x) = h(xw) = h(w_0 + w_1 x_1 + ... + w_p x_p).
+
+Secondly, the squared loss function is replaced by the deviance :math:`D` of an
+exponential dispersion model (EDM) [11]_. The objective function beeing minimized
+becomes
+
+.. math::    \frac{1}{2\mathrm{sum}(s)}D(y, \hat{y}; s) + \alpha \rho ||P_1w||_1
+            +\frac{\alpha(1-\rho)}{2} w^T P_2 w
+
+with sample weights :math:`s`.
+:math:`P_1` (diagonal matrix) can be used to exclude some of the coefficients in
+the L1 penalty, the matrix :math:`P_2` (must be positive semi-definite) allows
+for a more versatile L2 penalty.
+
+Use cases, where a loss different from the squared loss might be appropriate,
+are the following:
+
+  * If the target values :math:`y` are counts (non-negative integer valued) or
+    frequencies (non-negative), you might use a Poisson deviance with log-link.
+
+  * If the target values are positive valued and skewed, you might try a
+    Gamma deviance with log-link.
+
+  * If the target values seem to be heavier tailed than a Gamma distribution,
+    you might try an Inverse Gaussian deviance (or even higher variance powers
+    of the Tweedie family).
+
+Since the linear predictor :math:`Xw` can be negative and
+Poisson, Gamma and Inverse Gaussian distributions don't support negative values,
+it is convenient to apply a link function different from the identity link
+:math:`h(Xw)=Xw` that guarantees the non-negativeness, e.g. the log-link with
+:math:`h(Xw)=\exp(Xw)`.
+
+Note that the feature matrix `X` should be standardized before fitting. This
+ensures that the penalty treats features equally. The estimator can be used as
+follows:
+
+    >>> from sklearn.linear_model import GeneralizedLinearRegressor
+    >>> reg = GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
+    >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2]) # doctest: +NORMALIZE_WHITESPACE
+    GeneralizedLinearRegressor(P1='identity', P2='identity', alpha=0.5,
+                               check_input=True, copy_X=True, diag_fisher=False,
+                               family='poisson', fit_dispersion=None,
+                               fit_intercept=True, l1_ratio=0, link='log',
+                               max_iter=100, random_state=None, selection='cyclic',
+                               solver='auto', start_params='guess', tol=0.0001,
+                               verbose=0, warm_start=False)
+    >>> reg.coef_ # doctest: +NORMALIZE_WHITESPACE
+    array([0.24630169, 0.43373464])
+    >>> reg.intercept_ #doctest: +ELLIPSIS
+    -0.76383633...
+
+
+.. topic:: Examples:
+
+  * :ref:`sphx_glr_auto_examples_linear_model_plot_poisson_spline_regression.py`
+
+Mathematical formulation
+------------------------
+
+In the unpenalized case, the assumptions are the following:
+
+    * The target values :math:`y_i` are realizations of random variables
+      :math:`Y_i \overset{i.i.d}{\sim} \mathrm{EDM}(\mu_i, \frac{\phi}{s_i})`
+      with expectation :math:`\mu_i=\mathrm{E}[Y]`, dispersion parameter
+      :math:`\phi` and sample weights :math:`s_i`.
+    * The aim is to predict the expectation :math:`\mu_i` with
+      :math:`\hat{y_i} = h(\eta_i)`, linear predictor
+      :math:`\eta_i=(Xw)_i` and inverse link function :math:`h(\eta)`.
+
+Note that the first assumption implies
+:math:`\mathrm{Var}[Y_i]=\frac{\phi}{s_i} v(\mu_i)` with unit variance
+function :math:`v(\mu)`. Specifying a particular distribution of an EDM is the
+same as specifying a unit variance function (they are one-to-one).
+
+Including penalties helps to avoid overfitting or, in case of L1 penalty, to
+obtain sparse solutions. But there are also other motivations to include them,
+e.g. accounting for the dependence structure of :math:`y`.
+
+The objective function, which is independent of :math:`\phi`, is minimized with
+respect to the coefficients :math:`w`.
+
+The deviance is defined by the log of the :math:`\mathrm{EDM}(\mu, \phi)`
+likelihood as
+
+.. math::     d(y, \mu) = -2\phi\cdot
+              \left(loglike(y,\mu,\phi)
+              - loglike(y,y,\phi)\right) \\
+              D(y, \mu; s) = \sum_i s_i \cdot d(y_i, \mu_i)
+
+===================================== ===============================  ================================= ============================================
+Distribution                          Target Domain                    Variance Function :math:`v(\mu)`  Unit Deviance :math:`d(y, \mu)`
+===================================== ===============================  ================================= ============================================
+Normal ("normal")                     :math:`y \in (-\infty, \infty)`  :math:`1`                         :math:`(y-\mu)^2`
+Poisson ("poisson")                   :math:`y \in [0, \infty)`        :math:`\mu`                       :math:`2(y\log\frac{y}{\mu}-y+\mu)`
+Gamma ("gamma")                       :math:`y \in (0, \infty)`        :math:`\mu^2`                     :math:`2(\log\frac{\mu}{y}+\frac{y}{\mu}-1)`
+Inverse Gaussian ("inverse.gaussian") :math:`y \in (0, \infty)`        :math:`\mu^3`                     :math:`\frac{(y-\mu)^2}{y\mu^2}`
+===================================== ===============================  ================================= ============================================
+
+Two remarks:
+
+* The deviances for at least Normal, Poisson and Gamma distributions are
+  strictly consistent scoring functions for the mean :math:`\mu`, see Eq.
+  (19)-(20) in [12]_.
+
+* If you want to model a frequency, i.e. counts per exposure (time, volume, ...)
+  you can do so by a Poisson distribution and passing
+  :math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values together
+  with :math:`s=\mathrm{exposure}` as sample weights.
+
+
+.. topic:: References:
+
+    .. [10] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models, Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
+
+    .. [11] Jørgensen, B. (1992). The theory of exponential dispersion models and analysis of deviance. Monografias de matemática, no. 51.
+           See also `Exponential dispersion model. <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
+
+    .. [12] Gneiting, T. (2010). `Making and Evaluating Point Forecasts. <https://arxiv.org/pdf/0912.0902.pdf>`_
 
 Stochastic Gradient Descent - SGD
 =================================
diff --git a/examples/linear_model/plot_poisson_spline_regression.py b/examples/linear_model/plot_poisson_spline_regression.py
new file mode 100644
index 0000000000000..fce85fae1ea8c
--- /dev/null
+++ b/examples/linear_model/plot_poisson_spline_regression.py
@@ -0,0 +1,85 @@
+"""
+=================================
+Poisson Regression with B-Splines
+=================================
+
+As in the :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py`
+example, a Poisson regression with penalized B-splines (P-splines) [1]_ is
+fitted on slightly different sinusodial, Poisson distributed data and
+compared to an AdaBoost model with decision trees.
+One can see, that this is a hard problem for both estimators.
+
+.. [1] Eilers, Paul H. C.; Marx, Brian D. "Flexible smoothing with B -splines
+       and penalties". Statist. Sci. 11 (1996), no. 2, 89--121.
+       `doi:10.1214/ss/1038425655
+       <https://projecteuclid.org/euclid.ss/1038425655>`_
+
+"""
+print(__doc__)
+
+# Author: Christian Lorentzen <lorentzen.ch@gmail.com>
+# based on the AdaBoost regression example from Noel Dawe <noel.dawe@gmail.com>
+# License: BSD 3 clause
+
+# importing necessary libraries
+import numpy as np
+from scipy.linalg import toeplitz
+# from scipy.interpolate import BSpline
+from scipy.interpolate import splev
+import matplotlib.pyplot as plt
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.ensemble import AdaBoostRegressor
+from sklearn.linear_model import GeneralizedLinearRegressor
+
+
+# Create the dataset
+xmin, xmax = 0, 6
+rng = np.random.RandomState(1)
+X = np.linspace(xmin, xmax, 500)[:, np.newaxis]
+y_true = 0.5 * (2.1 + np.sin(X).ravel() + np.sin(6 * X).ravel())
+y = rng.poisson(y_true, X.shape[0])
+
+# b-spline basis
+nknots, degree = 40, 3
+ns = nknots - degree - 1  # number of base spline functions
+dx = (xmax - xmin) / (nknots - 1 - 2 * degree)
+knots = np.linspace(xmin - degree * dx, 6 + degree * dx, nknots)
+coef = np.zeros(ns)
+splineBasis = np.empty((X.shape[0], ns), dtype=float)
+for i in range(ns):
+    coef[i] = 1
+#    splineBasis[:, i] = BSpline(knots, coef, degree, extrapolate=False)(X) \
+#        .ravel()
+    splineBasis[:, i] = splev(X, (knots, coef, degree)).ravel()
+    coef[i] = 0
+
+# second order difference matrix
+P2 = toeplitz([2, -1] + [0] * (ns - 2)).astype(float)
+P2[0, 0] = P2[-1, -1] = 1
+
+# Fit regression model
+regr_1 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),
+                           n_estimators=10, random_state=rng)
+
+regr_2 = GeneralizedLinearRegressor(family='poisson', link='log',
+                                    fit_intercept=True, alpha=0.02,
+                                    l1_ratio=0.1, P2=P2)
+
+regr_1.fit(X, y)
+regr_2.fit(splineBasis, y)
+
+# Predict
+y_1 = regr_1.predict(X)
+y_2 = regr_2.predict(splineBasis)
+
+# Plot the results
+plt.figure()
+plt.plot(X, y_true, c="b", label="true mean")
+plt.scatter(X, y, c="k", marker='.', label="training samples")
+plt.plot(X, y_1, c="g", label="AdaBoost n_estimator=10", linewidth=2)
+plt.plot(X, y_2, c="r", label="Poisson GLM with B-splines", linewidth=2)
+plt.xlabel("data")
+plt.ylabel("target")
+plt.title("Regression Comparison")
+plt.legend()
+plt.show()
diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index 770a1a49b600e..cbb2ad8826358 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -18,6 +18,8 @@
                                  lasso_path, enet_path, MultiTaskLasso,
                                  MultiTaskElasticNet, MultiTaskElasticNetCV,
                                  MultiTaskLassoCV)
+from .glm import (TweedieDistribution,
+                  GeneralizedLinearRegressor)
 from .huber import HuberRegressor
 from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
 from .stochastic_gradient import SGDClassifier, SGDRegressor
@@ -78,4 +80,6 @@
            'orthogonal_mp',
            'orthogonal_mp_gram',
            'ridge_regression',
-           'RANSACRegressor']
+           'RANSACRegressor',
+           'GeneralizedLinearRegressor',
+           'TweedieDistribution']
diff --git a/sklearn/linear_model/glm.py b/sklearn/linear_model/glm.py
new file mode 100644
index 0000000000000..ac0007c1789a8
--- /dev/null
+++ b/sklearn/linear_model/glm.py
@@ -0,0 +1,2331 @@
+"""
+Generalized Linear Models with Exponential Dispersion Family
+"""
+
+# Author: Christian Lorentzen <lorentzen.ch@googlemail.com>
+# some parts and tricks stolen from other sklearn files.
+# License: BSD 3 clause
+
+# TODO: Should the option `normalize` be included (like other linear models)?
+#       So far, it is not included. User must pass a normalized X.
+# TODO: Add cross validation support, e.g. GCV?
+# TODO: Should GeneralizedLinearRegressor inherit from LinearModel?
+#       So far, it does not.
+# TODO: Include further classes in class.rst? ExponentialDispersionModel?
+#       TweedieDistribution?
+# TODO: Negative values in P1 are not allowed so far. They could be used
+#       for group lasso.
+
+# Design Decisions:
+# - Which name? GeneralizedLinearModel vs GeneralizedLinearRegressor.
+#   Estimators in sklearn are either regressors or classifiers. A GLM can do
+#   both depending on the distr (Normal => regressor, Binomial => classifier).
+#   Solution: GeneralizedLinearRegressor since this is the focus.
+# - Allow for finer control of penalty terms:
+#   L1: ||P1*w||_1 with P1*w as element-wise product, this allows to exclude
+#       factors from the L1 penalty.
+#   L2: w*P2*w with P2 a positive (semi-) definite matrix, e.g. P2 could be
+#   a 1st or 2nd order difference matrix (compare B-spline penalties and
+#   Tikhonov regularization).
+# - The link funtion (instance of class Link) is necessary for the evaluation
+#   of deviance, score, Fisher and Hessian matrix as a functions of the
+#   coefficients, which is needed by optimizers.
+#   Solution: link as argument in those functions
+# - Which name/symbol for sample_weight in docu?
+#   sklearn.linear_models uses w for coefficients, standard literature on
+#   GLMs use beta for coefficients and w for (sample) weights.
+#   So far, coefficients=w and sample weights=s.
+# - The intercept term is the first index, i.e. coef[0]
+
+
+from __future__ import division
+from abc import ABCMeta, abstractmethod
+import numbers
+import numpy as np
+from scipy import linalg, sparse, special
+import scipy.sparse.linalg as splinalg
+from scipy.optimize import fmin_l_bfgs_b
+import warnings
+from ..base import BaseEstimator, RegressorMixin
+from ..exceptions import ConvergenceWarning
+from ..utils import check_array, check_X_y
+from ..utils.optimize import newton_cg
+from ..utils.validation import check_is_fitted, check_random_state
+
+
+def _check_weights(sample_weight, n_samples):
+    """Check that sample weights are non-negative and have the right shape."""
+    if sample_weight is None:
+        weights = np.ones(n_samples)
+    elif np.isscalar(sample_weight):
+        if sample_weight <= 0:
+            raise ValueError("Sample weights must be non-negative.")
+        weights = sample_weight * np.ones(n_samples)
+    else:
+        _dtype = [np.float64, np.float32]
+        weights = check_array(sample_weight, accept_sparse=False,
+                              force_all_finite=True, ensure_2d=False,
+                              dtype=_dtype)
+        if weights.ndim > 1:
+            raise ValueError("Sample weight must be 1D array or scalar")
+        elif weights.shape[0] != n_samples:
+            raise ValueError("Sample weights must have the same length as "
+                             "y")
+        if not np.all(weights >= 0):
+            raise ValueError("Sample weights must be non-negative.")
+        elif not np.sum(weights) > 0:
+            raise ValueError("Sample weights must have at least one positive "
+                             "element.")
+
+    return weights
+
+
+def _safe_lin_pred(X, coef):
+    """Compute the linear predictor taking care if intercept is present."""
+    if coef.size == X.shape[1] + 1:
+        return X @ coef[1:] + coef[0]
+    else:
+        return X @ coef
+
+
+def _safe_toarray(X):
+    """Returns a numpy array."""
+    if sparse.issparse(X):
+        return X.toarray()
+    else:
+        return np.asarray(X)
+
+
+def _safe_sandwich_dot(X, d, intercept=False):
+    """Compute sandwich product X.T @ diag(d) @ X.
+
+    With ``intercept=True``, X is treated as if a column of 1 were appended as
+    first column of X.
+    X can be sparse, d must be an ndarray. Always returns a ndarray."""
+    if sparse.issparse(X):
+        temp = (X.transpose() @ X.multiply(d[:, np.newaxis]))
+        # for older versions of numpy and scipy, temp may be a np.matrix
+        temp = _safe_toarray(temp)
+    else:
+        temp = (X.T * d) @ X
+    if intercept:
+        dim = X.shape[1] + 1
+        if sparse.issparse(X):
+            order = 'F' if sparse.isspmatrix_csc(X) else 'C'
+        else:
+            order = 'F' if X.flags['F_CONTIGUOUS'] else 'C'
+        res = np.empty((dim, dim), dtype=max(X.dtype, d.dtype), order=order)
+        res[0, 0] = d.sum()
+        res[1:, 0] = d @ X
+        res[0, 1:] = res[1:, 0]
+        res[1:, 1:] = temp
+    else:
+        res = temp
+    return res
+
+
+def _min_norm_sugrad(coef, grad, P2, P1):
+    """Compute the gradient of all subgradients with minimal L2-norm.
+
+    subgrad = grad + P2 * coef + P1 * subgrad(|coef|_1)
+
+    g_i = grad_i + (P2*coef)_i
+
+    if coef_i > 0:   g_i + P1_i
+    if coef_i < 0:   g_i - P1_i
+    if coef_i = 0:   sign(g_i) * max(|g_i|-P1_i, 0)
+
+    Parameters
+    ----------
+    coef : ndarray
+        coef[0] may be intercept.
+
+    grad : ndarray, shape=coef.shape
+
+    P2 : {1d or 2d array, None}
+        always without intercept, ``None`` means P2 = 0
+
+    P1 : ndarray
+        always without intercept
+    """
+    intercept = (coef.size == P1.size + 1)
+    idx = 1 if intercept else 0  # offset if coef[0] is intercept
+    # compute grad + coef @ P2 without intercept
+    grad_wP2 = grad[idx:].copy()
+    if P2 is None:
+        pass
+    elif P2.ndim == 1:
+        grad_wP2 += coef[idx:] * P2
+    else:
+        grad_wP2 += coef[idx:] @ P2
+    res = np.where(coef[idx:] == 0,
+                   np.sign(grad_wP2) * np.maximum(np.abs(grad_wP2) - P1, 0),
+                   grad_wP2 + np.sign(coef[idx:]) * P1)
+    if intercept:
+        return np.concatenate(([grad[0]], res))
+    else:
+        return res
+
+
+class Link(metaclass=ABCMeta):
+    """Abstract base class for Link funtions."""
+
+    @abstractmethod
+    def link(self, mu):
+        """Compute the link function g(mu).
+
+        The link function links the mean mu=E[Y] to the so called linear
+        predictor (X*w), i.e. g(mu) = linear predictor.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Usually the (predicted) mean.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def derivative(self, mu):
+        """Compute the derivative of the link g'(mu).
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Usually the (predicted) mean.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def inverse(self, lin_pred):
+        """Compute the inverse link function h(lin_pred).
+
+        Gives the inverse relationship between linkear predictor and the mean
+        mu=E[Y], i.e. h(linear predictor) = mu.
+
+        Parameters
+        ----------
+        lin_pred : array, shape (n_samples,)
+            Usually the (fitted) linear predictor.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def inverse_derivative(self, lin_pred):
+        """Compute the derivative of the inverse link function h'(lin_pred).
+
+        Parameters
+        ----------
+        lin_pred : array, shape (n_samples,)
+            Usually the (fitted) linear predictor.
+        """
+        raise NotImplementedError
+
+    @abstractmethod
+    def inverse_derivative2(self, lin_pred):
+        """Compute 2nd derivative of the inverse link function h''(lin_pred).
+
+        Parameters
+        ----------
+        lin_pred : array, shape (n_samples,)
+            Usually the (fitted) linear predictor.
+        """
+        raise NotImplementedError
+
+
+class IdentityLink(Link):
+    """The identity link function g(x)=x."""
+
+    def link(self, mu):
+        return mu
+
+    def derivative(self, mu):
+        return np.ones_like(mu)
+
+    def inverse(self, lin_pred):
+        return lin_pred
+
+    def inverse_derivative(self, lin_pred):
+        return np.ones_like(lin_pred)
+
+    def inverse_derivative2(self, lin_pred):
+        return np.zeros_like(lin_pred)
+
+
+class LogLink(Link):
+    """The log link function g(x)=log(x)."""
+
+    def link(self, mu):
+        return np.log(mu)
+
+    def derivative(self, mu):
+        return 1./mu
+
+    def inverse(self, lin_pred):
+        return np.exp(lin_pred)
+
+    def inverse_derivative(self, lin_pred):
+        return np.exp(lin_pred)
+
+    def inverse_derivative2(self, lin_pred):
+        return np.exp(lin_pred)
+
+
+class LogitLink(Link):
+    """The logit link function g(x)=logit(x)."""
+
+    def link(self, mu):
+        return special.logit(mu)
+
+    def derivative(self, mu):
+        return 1. / (mu * (1 - mu))
+
+    def inverse(self, lin_pred):
+        return special.expit(lin_pred)
+
+    def inverse_derivative(self, lin_pred):
+        ep = special.expit(lin_pred)
+        return ep * (1. - ep)
+
+    def inverse_derivative2(self, lin_pred):
+        ep = special.expit(lin_pred)
+        ep = special.expit(lin_pred)
+        return ep * (1. - ep) * (1. - 2 * ep)
+
+
+class ExponentialDispersionModel(metaclass=ABCMeta):
+    r"""Base class for reproductive Exponential Dispersion Models (EDM).
+
+    The pdf of :math:`Y\sim \mathrm{EDM}(\mu, \phi)` is given by
+
+    .. math:: p(y| \theta, \phi) = c(y, \phi)
+        \exp\left(\frac{\theta y-A(\theta)}{\phi}\right)
+        = \tilde{c}(y, \phi)
+            \exp\left(-\frac{d(y, \mu)}{2\phi}\right)
+
+    with mean :math:`\mathrm{E}[Y] = A'(\theta) = \mu`,
+    variance :math:`\mathrm{Var}[Y] = \phi \cdot v(\mu)`,
+    unit variance :math:`v(\mu)` and
+    unit deviance :math:`d(y,\mu)`.
+
+    Attributes
+    ----------
+    lower_bound
+    upper_bound
+    include_lower_bound
+    include_upper_bound
+
+    Methods
+    -------
+    in_y_range
+    unit_variance
+    unit_variance_derivative
+    variance
+    variance_derivative
+    unit_deviance
+    unit_deviance_derivative
+    deviance
+    deviance_derivative
+    starting_mu
+
+    _mu_deviance_derivative
+    _score
+    _fisher_matrix
+    _observed_information
+    _eta_mu_score_fisher
+
+    References
+    ----------
+
+    https://en.wikipedia.org/wiki/Exponential_dispersion_model.
+    """
+    @property
+    def lower_bound(self):
+        """Get the lower bound of values for Y~EDM."""
+        return self._lower_bound
+
+    @property
+    def upper_bound(self):
+        """Get the upper bound of values for Y~EDM."""
+        return self._upper_bound
+
+    @property
+    def include_lower_bound(self):
+        """Get True if lower bound for y is included: y >= lower_bound."""
+        return self._include_lower_bound
+
+    @property
+    def include_upper_bound(self):
+        """Get True if upper bound for y is includede: y <= upper_bound."""
+        return self._include_upper_bound
+
+    def in_y_range(self, x):
+        """Returns ``True`` if x is in the valid range of Y~EDM.
+
+        Parameters
+        ----------
+        x : array, shape (n_samples,)
+            Target values.
+        """
+        if self.include_lower_bound:
+            if self.include_upper_bound:
+                return np.logical_and(np.greater_equal(x, self.lower_bound),
+                                      np.less_equal(x, self.upper_bound))
+            else:
+                return np.logical_and(np.greater_equal(x, self.lower_bound),
+                                      np.less(x, self.upper_bound))
+        else:
+            if self.include_upper_bound:
+                return np.logical_and(np.greater(x, self.lower_bound),
+                                      np.less_equal(x, self.upper_bound))
+            else:
+                return np.logical_and(np.greater(x, self.lower_bound),
+                                      np.less(x, self.upper_bound))
+
+    @abstractmethod
+    def unit_variance(self, mu):
+        r"""Compute the unit variance function.
+
+        The unit variance :math:`v(\mu)` determines the variance as
+        a function of the mean :math:`\mu` by
+        :math:`\mathrm{Var}[Y_i] = \phi/s_i*v(\mu_i)`.
+        It can also be derived from the unit deviance :math:`d(y,\mu)` as
+
+        .. math:: v(\mu) = \frac{2}{\frac{\partial^2 d(y,\mu)}{
+            \partial\mu^2}}\big|_{y=\mu}
+
+        See also :func:`variance`.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
+        """
+        raise NotImplementedError()
+
+    @abstractmethod
+    def unit_variance_derivative(self, mu):
+        r"""Compute the derivative of the unit variance w.r.t. mu.
+
+        Return :math:`v'(\mu)`.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Target values.
+        """
+        raise NotImplementedError()
+
+    def variance(self, mu, phi=1, weights=1):
+        r"""Compute the variance function.
+
+        The variance of :math:`Y_i \sim \mathrm{EDM}(\mu_i,\phi/s_i)` is
+        :math:`\mathrm{Var}[Y_i]=\phi/s_i*v(\mu_i)`,
+        with unit variance :math:`v(\mu)` and weights :math:`s_i`.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
+
+        phi : float (default=1)
+            Dispersion parameter.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
+        """
+        return phi/weights * self.unit_variance(mu)
+
+    def variance_derivative(self, mu, phi=1, weights=1):
+        r"""Compute the derivative of the variance w.r.t. mu.
+
+        Returns
+        :math:`\frac{\partial}{\partial\mu}\mathrm{Var}[Y_i]
+        =phi/s_i*v'(\mu_i)`, with unit variance :math:`v(\mu)`
+        and weights :math:`s_i`.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
+
+        phi : float (default=1)
+            Dispersion parameter.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
+        """
+        return phi/weights * self.unit_variance_derivative(mu)
+
+    @abstractmethod
+    def unit_deviance(self, y, mu):
+        r"""Compute the unit deviance.
+
+        The unit_deviance :math:`d(y,\mu)` can be defined by the
+        log-likelihood as
+        :math:`d(y,\mu) = -2\phi\cdot
+        \left(loglike(y,\mu,\phi) - loglike(y,y,\phi)\right).`
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        mu : array, shape (n_samples,)
+            Predicted mean.
+        """
+        raise NotImplementedError()
+
+    def unit_deviance_derivative(self, y, mu):
+        r"""Compute the derivative of the unit deviance w.r.t. mu.
+
+        The derivative of the unit deviance is given by
+        :math:`\frac{\partial}{\partial\mu}d(y,\mu) = -2\frac{y-\mu}{v(\mu)}`
+        with unit variance :math:`v(\mu)`.
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        mu : array, shape (n_samples,)
+            Predicted mean.
+        """
+        return -2 * (y - mu) / self.unit_variance(mu)
+
+    def deviance(self, y, mu, weights=1):
+        r"""Compute the deviance.
+
+        The deviance is a weighted sum of the per sample unit deviances,
+        :math:`D = \sum_i s_i \cdot d(y_i, \mu_i)`
+        with weights :math:`s_i` and unit deviance :math:`d(y,\mu)`.
+        In terms of the log-likelihood it is :math:`D = -2\phi\cdot
+        \left(loglike(y,\mu,\frac{phi}{s})
+        - loglike(y,y,\frac{phi}{s})\right)`.
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        mu : array, shape (n_samples,)
+            Predicted mean.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
+        """
+        return np.sum(weights * self.unit_deviance(y, mu))
+
+    def deviance_derivative(self, y, mu, weights=1):
+        """Compute the derivative of the deviance w.r.t. mu.
+
+        It gives :math:`\\frac{\\partial}{\\partial\\mu} D(y, \\mu; weights)`.
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        mu : array, shape (n_samples,)
+            Predicted mean.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
+        """
+        return weights * self.unit_deviance_derivative(y, mu)
+
+    def starting_mu(self, y, weights=1, ind_weight=0.5):
+        """Set starting values for the mean mu.
+
+        These may be good starting points for the (unpenalized) IRLS solver.
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
+
+        ind_weight : float (default=0.5)
+            Must be between 0 and 1. Specifies how much weight is given to the
+            individual observations instead of the mean of y.
+        """
+        return (ind_weight * y +
+                (1. - ind_weight) * np.average(y, weights=weights))
+
+    def _mu_deviance_derivative(self, coef, X, y, weights, link):
+        """Compute mu and the derivative of the deviance w.r.t coef."""
+        lin_pred = _safe_lin_pred(X, coef)
+        mu = link.inverse(lin_pred)
+        d1 = link.inverse_derivative(lin_pred)
+        temp = d1 * self.deviance_derivative(y, mu, weights)
+        if coef.size == X.shape[1] + 1:
+            devp = np.concatenate(([temp.sum()], temp @ X))
+        else:
+            devp = temp @ X  # sampe as X.T @ temp
+        return mu, devp
+
+    def _score(self, coef, phi, X, y, weights, link):
+        r"""Compute the score function.
+
+        The score function is the derivative of the
+        log-likelihood w.r.t. `coef` (:math:`w`).
+        It is given by
+
+        .. math:
+
+            \mathbf{score}(\boldsymbol{w})
+            = \frac{\partial loglike}{\partial\boldsymbol{w}}
+            = \mathbf{X}^T \mathbf{D}
+            \boldsymbol{\Sigma}^-1 (\mathbf{y} - \boldsymbol{\mu})\,,
+
+        with :math:`\mathbf{D}=\mathrm{diag}(h'(\eta_1),\ldots)` and
+        :math:`\boldsymbol{\Sigma}=\mathrm{diag}(\mathbf{V}[y_1],\ldots)`.
+        Note: The derivative of the deviance w.r.t. coef equals -2 * score.
+        """
+        lin_pred = _safe_lin_pred(X, coef)
+        mu = link.inverse(lin_pred)
+        sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
+        d = link.inverse_derivative(lin_pred)
+        temp = sigma_inv * d * (y - mu)
+        if coef.size == X.shape[1] + 1:
+            score = np.concatenate(([temp.sum()], temp @ X))
+        else:
+            score = temp @ X  # sampe as X.T @ temp
+        return score
+
+    def _fisher_matrix(self, coef, phi, X, y, weights, link):
+        r"""Compute the Fisher information matrix.
+
+        The Fisher information matrix, also known as expected information
+        matrix is given by
+
+        .. math:
+
+            \mathbf{F}(\boldsymbol{w}) =
+            \mathrm{E}\left[-\frac{\partial\mathbf{score}}{\partial
+            \boldsymbol{w}} \right]
+            = \mathrm{E}\left[
+            -\frac{\partial^2 loglike}{\partial\boldsymbol{w}
+            \partial\boldsymbol{w}^T}\right]
+            = \mathbf{X}^T W \mathbf{X} \,,
+
+        with :math:`\mathbf{W} = \mathbf{D}^2 \boldsymbol{\Sigma}^{-1}`,
+        see func:`_score`.
+        """
+        lin_pred = _safe_lin_pred(X, coef)
+        mu = link.inverse(lin_pred)
+        sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
+        d = link.inverse_derivative(lin_pred)
+        d2_sigma_inv = sigma_inv * d * d
+        intercept = (coef.size == X.shape[1] + 1)
+        fisher_matrix = _safe_sandwich_dot(X, d2_sigma_inv,
+                                           intercept=intercept)
+        return fisher_matrix
+
+    def _observed_information(self, coef, phi, X, y, weights, link):
+        r"""Compute the observed information matrix.
+
+        The observed information matrix, also known as the negative of
+        the Hessian matrix of the log-likelihood, is given by
+
+        .. math:
+
+            \mathbf{H}(\boldsymbol{w}) =
+            -\frac{\partial^2 loglike}{\partial\boldsymbol{w}
+            \partial\boldsymbol{w}^T}
+            = \mathbf{X}^T \left[
+            - \mathbf{D}' \mathbf{R}
+            + \mathbf{D}^2 \mathbf{V} \mathbf{R}
+            + \mathbf{D}^2
+            \right] \boldsymbol{\Sigma}^{-1} \mathbf{X} \,,
+
+        with :math:`\mathbf{R} = \mathrm{diag}(y_i - \mu_i)`,
+        :math:`\mathbf{V} = \mathrm{diag}\left(\frac{v'(\mu_i)}{
+        v(\mu_i)}
+        \right)`,
+        see :func:`score_` function and :func:`_fisher_matrix`.
+        """
+        lin_pred = _safe_lin_pred(X, coef)
+        mu = link.inverse(lin_pred)
+        sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
+        dp = link.inverse_derivative2(lin_pred)
+        d2 = link.inverse_derivative(lin_pred)**2
+        v = self.unit_variance_derivative(mu)/self.unit_variance(mu)
+        r = y - mu
+        temp = sigma_inv * (-dp * r + d2 * v * r + d2)
+        intercept = (coef.size == X.shape[1] + 1)
+        observed_information = _safe_sandwich_dot(X, temp,
+                                                  intercept=intercept)
+        return observed_information
+
+    def _eta_mu_score_fisher(self, coef, phi, X, y, weights, link,
+                             diag_fisher=False):
+        """Compute linear predictor, mean, score function and fisher matrix.
+
+        It calculates the linear predictor, the mean, score function
+        (derivative of log-likelihood) and Fisher information matrix
+        all in one go as function of `coef` (:math:`w`) and the data.
+
+        Parameters
+        ----------
+        diag_fisher : boolean, optional (default=False)
+            If ``True``, returns only an array d such that
+            fisher = X.T @ np.diag(d) @ X.
+
+        Returns
+        -------
+        (eta, mu, score, fisher) : tuple with 4 elements
+            The 4 elements are:
+
+            * eta: ndarray, shape (X.shape[0],)
+            * mu: ndarray, shape (X.shape[0],)
+            * score: ndarray, shape (X.shape[0],)
+            * fisher:
+
+                * If diag_fisher is ``False``, the full fisher matrix,
+                  an array of shape (X.shape[1], X.shape[1])
+                * If diag_fisher is ``True`, an array of shape (X.shape[0])
+        """
+        intercept = (coef.size == X.shape[1] + 1)
+        # eta = linear predictor
+        eta = _safe_lin_pred(X, coef)
+        mu = link.inverse(eta)
+        sigma_inv = 1./self.variance(mu, phi=phi, weights=weights)
+        d1 = link.inverse_derivative(eta)  # = h'(eta)
+        # Alternatively:
+        # h'(eta) = h'(g(mu)) = 1/g'(mu), note that h is inverse of g
+        # d1 = 1./link.derivative(mu)
+        d1_sigma_inv = d1 * sigma_inv
+        temp = d1_sigma_inv * (y - mu)
+        if intercept:
+            score = np.concatenate(([temp.sum()], temp @ X))
+        else:
+            score = temp @ X
+
+        d2_sigma_inv = d1 * d1_sigma_inv
+        if diag_fisher:
+            fisher_matrix = d2_sigma_inv
+        else:
+            fisher_matrix = _safe_sandwich_dot(X, d2_sigma_inv,
+                                               intercept=intercept)
+        return eta, mu, score, fisher_matrix
+
+
+class TweedieDistribution(ExponentialDispersionModel):
+    r"""A class for the Tweedie distribution.
+
+    A Tweedie distribution with mean :math:`\mu=\mathrm{E}[Y]` is uniquely
+    defined by it's mean-variance relationship
+    :math:`\mathrm{Var}[Y] \propto \mu^power`.
+
+    Special cases are:
+
+    ===== ================
+    Power Distribution
+    ===== ================
+    0     Normal
+    1     Poisson
+    (0,1) Compound Poisson
+    2     Gamma
+    3     Inverse Gaussian
+
+    Parameters
+    ----------
+    power : float (default=0)
+            The variance power of the `unit_variance`
+            :math:`v(\mu) = \mu^{power}`.
+            For ``0<power<1``, no distribution exists.
+    """
+    def __init__(self, power=0):
+        self.power = power
+        self._upper_bound = np.Inf
+        self._include_upper_bound = False
+        if power < 0:
+            # Extreme Stable
+            self._lower_bound = -np.Inf
+            self._include_lower_bound = False
+        elif power == 0:
+            # NormalDistribution
+            self._lower_bound = -np.Inf
+            self._include_lower_bound = False
+        elif (power > 0) and (power < 1):
+            raise ValueError('For 0<power<1, no distribution exists.')
+        elif power == 1:
+            # PoissonDistribution
+            self._lower_bound = 0
+            self._include_lower_bound = True
+        elif (power > 1) and (power < 2):
+            # Compound Poisson
+            self._lower_bound = 0
+            self._include_lower_bound = True
+        elif power == 2:
+            # GammaDistribution
+            self._lower_bound = 0
+            self._include_lower_bound = False
+        elif (power > 2) and (power < 3):
+            # Positive Stable
+            self._lower_bound = 0
+            self._include_lower_bound = False
+        elif power == 3:
+            # InverseGaussianDistribution
+            self._lower_bound = 0
+            self._include_lower_bound = False
+        elif power > 3:
+            # Positive Stable
+            self._lower_bound = 0
+            self._include_lower_bound = False
+        else:
+            raise ValueError('The power must be a float, i.e. real number, '
+                             'got (power={})'.format(power))
+
+    @property
+    def power(self):
+        return self._power
+
+    @power.setter
+    def power(self, power):
+        if not isinstance(power, numbers.Real):
+            raise TypeError('power must be a real number, input was {0}'
+                            .format(power))
+        self._power = power
+
+    def unit_variance(self, mu):
+        """Compute the unit variance of a Tweedie distribution v(mu)=mu**power.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
+        """
+        return np.power(mu, self.power)
+
+    def unit_variance_derivative(self, mu):
+        """Compute the derivative of the unit variance of a Tweedie
+        distribution v(mu)=power*mu**(power-1).
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
+        """
+        return self.power * np.power(mu, self.power - 1)
+
+    def unit_deviance(self, y, mu):
+        p = self.power
+        if p == 0:
+            # NormalDistribution
+            return (y - mu)**2
+        if p == 1:
+            # PoissonDistribution
+            # 2 * (y*log(y/mu) - y + mu), with y*log(y/mu)=0 if y=0
+            return 2 * (special.xlogy(y, y/mu) - y + mu)
+        elif p == 2:
+            # GammaDistribution
+            return 2 * (np.log(mu/y) + y/mu - 1)
+        else:
+            # return 2 * (np.maximum(y,0)**(2-p)/((1-p)*(2-p))
+            #    - y*mu**(1-p)/(1-p) + mu**(2-p)/(2-p))
+            return 2 * (np.power(np.maximum(y, 0), 2-p)/((1-p)*(2-p)) -
+                        y*np.power(mu, 1-p)/(1-p) + np.power(mu, 2-p)/(2-p))
+
+
+class NormalDistribution(TweedieDistribution):
+    """Class for the Normal (aka Gaussian) distribution"""
+    def __init__(self):
+        super(NormalDistribution, self).__init__(power=0)
+
+
+class PoissonDistribution(TweedieDistribution):
+    """Class for the scaled Poisson distribution"""
+    def __init__(self):
+        super(PoissonDistribution, self).__init__(power=1)
+
+
+class GammaDistribution(TweedieDistribution):
+    """Class for the Gamma distribution"""
+    def __init__(self):
+        super(GammaDistribution, self).__init__(power=2)
+
+
+class InverseGaussianDistribution(TweedieDistribution):
+    """Class for the scaled InverseGaussianDistribution distribution"""
+    def __init__(self):
+        super(InverseGaussianDistribution, self).__init__(power=3)
+
+
+class GeneralizedHyperbolicSecant(ExponentialDispersionModel):
+    """A class for the Generalized Hyperbolic Secant (GHS) distribution.
+
+    The GHS distribution is for tagets y in (-inf, inf).
+    """
+    def __init__(self):
+        self._lower_bound = -np.Inf
+        self._upper_bound = np.Inf
+        self._include_lower_bound = False
+        self._include_upper_bound = False
+
+    def unit_variance(self, mu):
+        return 1 + mu**2
+
+    def unit_variance_derivative(self, mu):
+        return 2 * mu
+
+    def unit_deviance(self, y, mu):
+        return (2 * y * (np.arctan(y) - np.arctan(mu)) +
+                np.log((1 + mu**2)/(1 + y**2)))
+
+
+class BinomialDistribution(ExponentialDispersionModel):
+    """A class for the Binomial distribution.
+
+    The Binomial distribution is for tagets y in [0, 1].
+    """
+    def __init__(self):
+        self._lower_bound = 0
+        self._upper_bound = 1
+        self._include_lower_bound = True
+        self._include_upper_bound = True
+
+    def unit_variance(self, mu):
+        return mu * (1 - mu)
+
+    def unit_variance_derivative(self, mu):
+        return 1 - 2 * mu
+
+    def unit_deviance(self, y, mu):
+        return 2 * (special.xlogy(y, y/mu) + special.xlogy(1-y, (1-y)/(1-mu)))
+
+
+def _irls_step(X, W, P2, z, fit_intercept=True):
+    """Compute one step in iteratively reweighted least squares.
+
+    Solve A w = b for w with
+    A = (X' W X + P2)
+    b = X' W z
+    z = eta + D^-1 (y-mu)
+
+    See also fit method of :class:`GeneralizedLinearRegressor`.
+
+    Parameters
+    ----------
+    X : {ndarray, sparse matrix}, shape (n_samples, n_features)
+        Training data (with intercept included if present)
+
+    W : ndarray, shape (n_samples,)
+
+    P2 : {ndarray, sparse matrix}, shape (n_features, n_features)
+        The L2-penalty matrix or vector (=diagonal matrix)
+
+    z : ndarray, shape (n_samples,)
+        Working observations
+
+    fit_intercept : boolean, optional (default=True)
+
+    Returns
+    -------
+    coef : ndarray, shape (c,)
+        If fit_intercept=False, shape c=X.shape[1].
+        If fit_intercept=True, then c=X.shapee[1] + 1.
+    """
+    # Note: solve vs least squares, what is more appropriate?
+    #       scipy.linalg.solve seems faster, but scipy.linalg.lstsq
+    #       is more robust.
+    # Note: X.T @ W @ X is not sparse, even when X is sparse.
+    #      Sparse solver would splinalg.spsolve(A, b) or splinalg.lsmr(A, b)
+    if fit_intercept:
+        Wz = W * z
+        if sparse.issparse(X):
+            b = np.concatenate(([Wz.sum()], X.transpose() @ Wz))
+        else:
+            b = np.concatenate(([Wz.sum()], X.T @ Wz))
+        A = _safe_sandwich_dot(X, W, intercept=fit_intercept)
+        if P2.ndim == 1:
+            idx = np.arange(start=1, stop=A.shape[0])
+            A[(idx, idx)] += P2  # add to diag elements without intercept
+        elif sparse.issparse(P2):
+            A[1:, 1:] += P2.toarray()
+        else:
+            A[1:, 1:] += P2
+    else:
+        if sparse.issparse(X):
+            XtW = X.transpose().multiply(W)
+            # for older versions of numpy and scipy, A may be a np.matrix
+            A = _safe_toarray(XtW @ X)
+        else:
+            XtW = (X.T * W)
+            A = XtW @ X
+        b = XtW @ z
+        if P2.ndim == 1:
+            A[np.diag_indices_from(A)] += P2
+        elif sparse.issparse(P2):
+            A += P2.toarray()
+        else:
+            A += P2
+    # coef = linalg.solve(A, b, overwrite_a=True, overwrite_b=True)
+    coef, *_ = linalg.lstsq(A, b, overwrite_a=True, overwrite_b=True)
+    return coef
+
+
+def _irls_solver(coef, X, y, weights, P2, fit_intercept, family, link,
+                 max_iter, tol):
+    """Solve GLM with L2 penalty by IRLS algorithm.
+
+    Note: If X is sparse, P2 must also be sparse.
+    """
+    # Solve Newton-Raphson (1): Obj'' (w - w_old) = -Obj'
+    #   Obj = objective function = 1/2 Dev + l2/2 w P2 w
+    #   Dev = deviance, s = normalized weights, variance V(mu) but phi=1
+    #   D   = link.inverse_derivative(eta) = diag_matrix(h'(X w))
+    #   D2  = link.inverse_derivative(eta)^2 = D^2
+    #   W   = D2/V(mu)
+    #   l2  = alpha * (1 - l1_ratio)
+    #   Obj' = d(Obj)/d(w) = 1/2 Dev' + l2 P2 w
+    #        = -X' D (y-mu)/V(mu) + l2 P2 w
+    #   Obj''= d2(Obj)/d(w)d(w') = Hessian = -X'(...) X + l2 P2
+    #   Use Fisher matrix instead of full info matrix -X'(...) X,
+    #    i.e. E[Dev''] with E[y-mu]=0:
+    #   Obj'' ~ X' W X + l2 P2
+    # (1): w = (X' W X + l2 P2)^-1 X' W z,
+    #      with z = eta + D^-1 (y-mu)
+    # Note: P2 must be symmetrized
+    # Note: ' denotes derivative, but also transpose for matrices
+
+    # eta = linear predictor
+    eta = _safe_lin_pred(X, coef)
+    mu = link.inverse(eta)
+    # D = h'(eta)
+    hp = link.inverse_derivative(eta)
+    V = family.variance(mu, phi=1, weights=weights)
+    n_iter = 0
+    while n_iter < max_iter:
+        n_iter += 1
+        # coef_old not used so far.
+        # coef_old = coef
+        # working weights W, in principle a diagonal matrix
+        # therefore here just as 1d array
+        W = hp**2 / V
+        # working observations
+        z = eta + (y - mu) / hp
+        # solve A*coef = b
+        # A = X' W X + P2, b = X' W z
+        coef = _irls_step(X, W, P2, z, fit_intercept=fit_intercept)
+        # updated linear predictor
+        # do it here for updated values for tolerance
+        eta = _safe_lin_pred(X, coef)
+        mu = link.inverse(eta)
+        hp = link.inverse_derivative(eta)
+        V = family.variance(mu, phi=1, weights=weights)
+
+        # which tolerace? |coef - coef_old| or gradient?
+        # use gradient for compliance with newton-cg and lbfgs
+        # gradient = -X' D (y-mu)/V(mu) + l2 P2 w
+        temp = hp * (y - mu) / V
+        if sparse.issparse(X):
+            gradient = -(X.transpose() @ temp)
+        else:
+            gradient = -(X.T @ temp)
+        idx = 1 if fit_intercept else 0  # offset if coef[0] is intercept
+        if P2.ndim == 1:
+            gradient += P2 * coef[idx:]
+        else:
+            gradient += P2 @ coef[idx:]
+        if fit_intercept:
+            gradient = np.concatenate(([-temp.sum()], gradient))
+        if (np.max(np.abs(gradient)) <= tol):
+            converged = True
+            break
+
+    if not converged:
+        warnings.warn("irls failed to converge. Increase the number "
+                      "of iterations (currently {0})"
+                      .format(max_iter), ConvergenceWarning)
+
+    return coef, n_iter
+
+
+def _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles, inner_tol,
+              max_inner_iter=1000, selection='cyclic',
+              random_state=None, diag_fisher=False):
+    """Compute inner loop of coordinate descent, i.e. cycles through features.
+
+    Minimization of 1-d subproblems::
+
+        min_z q(d+z*e_j) - q(d)
+        = min_z A_j z + 1/2 B_jj z^2 + ||P1_j (w_j+d_j+z)||_1
+
+    A = f'(w) + d*H(w) + (w+d)*P2
+    B = H+P2
+    Note: f'=-score and H=fisher are updated at the end of outer iteration.
+    """
+    # TODO: use sparsity (coefficient already 0 due to L1 penalty)
+    #       => active set of features for featurelist, see paper
+    #          of Improved GLMNET or Gap Safe Screening Rules
+    #          https://arxiv.org/abs/1611.05780
+    n_samples, n_features = X.shape
+    intercept = (coef.size == X.shape[1] + 1)
+    idx = 1 if intercept else 0  # offset if coef[0] is intercept
+    B = fisher
+    if P2.ndim == 1:
+        coef_P2 = coef[idx:] * P2
+        if not diag_fisher:
+            idiag = np.arange(start=idx, stop=B.shape[0])
+            # B[np.diag_indices_from(B)] += P2
+            B[(idiag, idiag)] += P2
+    else:
+        coef_P2 = coef[idx:] @ P2
+        if not diag_fisher:
+            if sparse.issparse(P2):
+                B[idx:, idx:] += P2.toarray()
+            else:
+                B[idx:, idx:] += P2
+    # A = -score + coef_P2
+    A = -score
+    A[idx:] += coef_P2
+    # A += d @ (H+P2) but so far d=0
+    # inner loop
+    for inner_iter in range(1, max_inner_iter+1):
+        inner_iter += 1
+        n_cycles += 1
+        # cycle through features, update intercept separately at the end
+        if selection == 'random':
+            featurelist = random_state.permutation(n_features)
+        else:
+            featurelist = np.arange(n_features)
+        for j in featurelist:
+            # minimize_z: a z + 1/2 b z^2 + c |d+z|
+            # a = A_j
+            # b = B_jj > 0
+            # c = |P1_j| = P1_j > 0, see 1.3
+            # d = w_j + d_j
+            # cf. https://arxiv.org/abs/0708.1485 Eqs. (3) - (4)
+            # with beta = z+d, beta_hat = d-a/b and gamma = c/b
+            # z = 1/b * S(bd-a,c) - d
+            # S(a,b) = sign(a) max(|a|-b, 0) soft thresholding
+            jdx = j+idx  # index for arrays containing entries for intercept
+            a = A[jdx]
+            if diag_fisher:
+                # Note: fisher is ndarray of shape (n_samples,) => no idx
+                # Calculate Bj = B[j, :] = B[:, j] as it is needed later anyway
+                Bj = np.zeros_like(A)
+                if intercept:
+                    Bj[0] = fisher.sum()
+                if sparse.issparse(X):
+                    Bj[idx:] = _safe_toarray(X[:, j].transpose() @
+                                             X.multiply(fisher[:, np.newaxis])
+                                             ).ravel()
+                else:
+                    Bj[idx:] = (fisher * X[:, j]) @ X
+
+                if P2.ndim == 1:
+                    Bj[idx:] += P2[j]
+                else:
+                    if sparse.issparse(P2):
+                        # slice columns as P2 is csc
+                        Bj[idx:] += P2[:, j].toarray().ravel()
+                    else:
+                        Bj[idx:] += P2[:, j]
+                b = Bj[jdx]
+            else:
+                b = B[jdx, jdx]
+
+            # those ten lines aree what it is all about
+            if b <= 0:
+                z = 0
+            elif P1[j] == 0:
+                z = -a/b
+            elif a + P1[j] < b * (coef[jdx] + d[jdx]):
+                z = -(a + P1[j])/b
+            elif a - P1[j] > b * (coef[jdx] + d[jdx]):
+                z = -(a - P1[j])/b
+            else:
+                z = -(coef[jdx] + d[jdx])
+
+            # update direction d
+            d[jdx] += z
+            # update A because d_j is now d_j+z
+            # A = f'(w) + d*H(w) + (w+d)*P2
+            # => A += (H+P2)*e_j z = B_j * z
+            # Note: B is symmetric B = B.transpose
+            if diag_fisher:
+                # Bj = B[:, j] calculated above, still valid
+                A += Bj * z
+            else:
+                # B is symmetric, C- or F-contiguous, but never sparse
+                if B.flags['F_CONTIGUOUS']:
+                    # slice columns like for sparse csc
+                    A += B[:, jdx] * z
+                else:  # B.flags['C_CONTIGUOUS'] might be true
+                    # slice rows
+                    A += B[jdx, :] * z
+            # end of cycle over features
+        # update intercept
+        if intercept:
+            if diag_fisher:
+                Bj = np.zeros_like(A)
+                Bj[0] = fisher.sum()
+                Bj[1:] = fisher @ X
+                b = Bj[0]
+            else:
+                b = B[0, 0]
+            z = 0 if b <= 0 else -A[0]/b
+            d[0] += z
+            if diag_fisher:
+                A += Bj * z
+            else:
+                if B.flags['F_CONTIGUOUS']:
+                    A += B[:, 0] * z
+                else:
+                    A += B[0, :] * z
+        # end of complete cycle
+        # stopping criterion for inner loop
+        # sum_i(|minimum of norm of subgrad of q(d)_i|)
+        # subgrad q(d) = A + subgrad ||P1*(w+d)||_1
+        mn_subgrad = _min_norm_sugrad(coef=coef + d, grad=A, P2=None, P1=P1)
+        mn_subgrad = linalg.norm(mn_subgrad, ord=1)
+        if mn_subgrad <= inner_tol:
+            if inner_iter == 1:
+                inner_tol = inner_tol/4.
+            break
+        # end of inner loop
+    return d, coef_P2, n_cycles, inner_tol
+
+
+def _cd_solver(coef, X, y, weights, P1, P2, fit_intercept, family, link,
+               max_iter=100, max_inner_iter=1000, tol=1e-4,
+               selection='cyclic ', random_state=None,
+               diag_fisher=False, copy_X=True):
+    """Solve GLM with L1 and L2 penalty by coordinate descent algorithm.
+
+    The objective beeing minimized in the coefficients w=coef is::
+
+        F = f + g, f(w) = 1/2 deviance, g = 1/2 w*P2*w + ||P1*w||_1
+
+    An Improved GLMNET for L1-regularized Logistic Regression:
+
+    1. Find optimal descent direction d by minimizing
+       min_d F(w+d) = min_d F(w+d) - F(w)
+    2. Quadrdatic approximation of F(w+d)-F(w) = q(d):
+       using f(w+d) = f(w) + f'(w)*d + 1/2 d*H(w)*d + O(d^3) gives:
+       q(d) = (f'(w) + w*P2)*d + 1/2 d*(H(w)+P2)*d
+       + ||P1*(w+d)||_1 - ||P1*w||_1
+       Then minimize q(d): min_d q(d)
+    3. Coordinate descent by updating coordinate j (d -> d+z*e_j):
+       min_z q(d+z*e_j)
+       = min_z q(d+z*e_j) - q(d)
+       = min_z A_j z + 1/2 B_jj z^2
+               + ||P1_j (w_j+d_j+z)||_1 - ||P1_j (w_j+d_j)||_1
+       A = f'(w) + d*H(w) + (w+d)*P2
+       B = H + P2
+
+    Repeat steps 1-3 until convergence.
+    Note: Use Fisher matrix instead of Hessian for H.
+    Note: f' = -score, H = Fisher matrix
+
+    Parameters
+    ----------
+    coef : ndarray, shape (c,)
+        If fit_intercept=False, shape c=X.shape[1].
+        If fit_intercept=True, then c=X.shapee[1] + 1.
+
+    X : {ndarray, csc sparse matrix}, shape (n_samples, n_features)
+        Training data (with intercept included if present). If not sparse,
+        pass directly as Fortran-contiguous data to avoid
+        unnecessary memory duplication.
+
+    y : ndarray, shape (n_samples,)
+        Target values.
+
+    weights: ndarray, shape (n_samples,)
+        Sample weights with which the deviance is weighted. The weights must
+        bee normalized and sum to 1.
+
+    P1 : {ndarray}, shape (n_features,)
+        The L1-penalty vector (=diagonal matrix)
+
+    P2 : {ndarray, csc sparse matrix}, shape (n_features, n_features)
+        The L2-penalty matrix or vector (=diagonal matrix). If a matrix is
+        passed, it must be symmetric. If X is sparse, P2 must also be sparse.
+
+    fit_intercept : boolean, optional (default=True)
+        Specifies if a constant (a.k.a. bias or intercept) should be
+        added to the linear predictor (X*coef+intercept).
+
+    family : ExponentialDispersionModel
+
+    link : Link
+
+    max_iter : int, optional (default=100)
+        Maximum numer of outer (Newton) iterations.
+
+    max_inner_iter : int, optional (default=1000)
+        Maximum number of iterations in each inner loop, i.e. max number of
+        cycles over all features per inner loop.
+
+    tol : float, optional (default=1e-4)
+        Covergence criterion is
+        sum_i(|minimum of norm of subgrad of objective_i|)<=tol.
+
+    selection : str, optional (default='cyclic')
+        If 'random', randomly chose features in inner loop.
+
+    random_state : {int, RandomState instance, None}, optional (default=None)
+
+    diag_fisher : boolean, optional (default=False)
+        ``False`` calculates full fisher matrix, ``True`` only diagonal matrix
+        s.t. fisher = X.T @ diag @ X. This saves storage but needs more
+        matrix-vector multiplications.
+
+    copy_X : boolean, optional (default=True)
+        If ``True``, X will be copied; else, it may be overwritten.
+
+    Returns
+    -------
+    coef : ndarray, shape (c,)
+        If fit_intercept=False, shape c=X.shape[1].
+        If fit_intercept=True, then c=X.shapee[1] + 1.
+
+    n_iter : numer of outer iterations = newton iterations
+
+    n_cycles : number of cycles over features
+
+    References
+    ----------
+    Guo-Xun Yuan, Chia-Hua Ho, Chih-Jen Lin
+    An Improved GLMNET for L1-regularized Logistic Regression,
+    Journal of Machine Learning Research 13 (2012) 1999-2030
+    https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
+    """
+    X = check_array(X, 'csc', dtype=[np.float64, np.float32],
+                    order='F', copy=copy_X)
+    if P2.ndim == 2:
+        P2 = check_array(P2, 'csc', dtype=[np.float64, np.float32],
+                         order='F', copy=copy_X)
+    if sparse.issparse(X):
+        if not sparse.isspmatrix_csc(X):
+            raise ValueError("If X is sparse, it must be in csc format"
+                             "; got (format={})".format(X.format))
+        if not sparse.isspmatrix_csc(P2):
+            raise ValueError("If X is sparse, P2 must also be sparse csc"
+                             "format. Got P2 not sparse.")
+    random_state = check_random_state(random_state)
+    # Note: we already set P2 = l2*P2, P1 = l1*P1
+    # Note: we already symmetriezed P2 = 1/2 (P2 + P2')
+    n_iter = 0  # number of outer iterations
+    n_cycles = 0  # number of (complete) cycles over features
+    converged = False
+    n_samples, n_features = X.shape
+    idx = 1 if fit_intercept else 0  # offset if coef[0] is intercept
+    # line search parameters
+    (beta, sigma) = (0.5, 0.01)
+    # some precalculations
+    # Note: For diag_fisher=False, fisher = X.T @ fisher @ X and fisher is a
+    #       1d array representing a diagonal matrix.
+    eta, mu, score, fisher = family._eta_mu_score_fisher(
+        coef=coef, phi=1, X=X, y=y, weights=weights, link=link,
+        diag_fisher=diag_fisher)
+    # set up space for search direction d for inner loop
+    d = np.zeros_like(coef)
+    # initial stopping tolerance of inner loop
+    # use L1-norm of minimum of norm of subgradient of F
+    inner_tol = _min_norm_sugrad(coef=coef, grad=-score, P2=P2, P1=P1)
+    inner_tol = linalg.norm(inner_tol, ord=1)
+    # outer loop
+    while n_iter < max_iter:
+        n_iter += 1
+        # initialize search direction d (to be optimized) with zero
+        d.fill(0)
+        # inner loop = _cd_cycle
+        d, coef_P2, n_cycles, inner_tol = \
+            _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles, inner_tol,
+                      max_inner_iter=max_inner_iter, selection=selection,
+                      random_state=random_state, diag_fisher=diag_fisher)
+        # line search by sequence beta^k, k=0, 1, ..
+        # F(w + lambda d) - F(w) <= lambda * bound
+        # bound = sigma * (f'(w)*d + w*P2*d
+        #                  +||P1 (w+d)||_1 - ||P1 w||_1)
+        P1w_1 = linalg.norm(P1 * coef[idx:], ord=1)
+        P1wd_1 = linalg.norm(P1 * (coef + d)[idx:], ord=1)
+        # Note: coef_P2 already calculated and still valid
+        bound = sigma * (-(score @ d) + coef_P2 @ d[idx:] + P1wd_1 - P1w_1)
+        Fw = (0.5 * family.deviance(y, mu, weights) +
+              0.5 * (coef_P2 @ coef[idx:]) + P1w_1)
+        la = 1./beta
+        for k in range(20):
+            la *= beta  # starts with la=1
+            coef_wd = coef + la * d
+            mu_wd = link.inverse(_safe_lin_pred(X, coef_wd))
+            Fwd = (0.5 * family.deviance(y, mu_wd, weights) +
+                   linalg.norm(P1 * coef_wd[idx:], ord=1))
+            if P2.ndim == 1:
+                Fwd += 0.5 * ((coef_wd[idx:] * P2) @ coef_wd[idx:])
+            else:
+                Fwd += 0.5 * (coef_wd[idx:] @ (P2 @ coef_wd[idx:]))
+            if Fwd - Fw <= sigma * la * bound:
+                break
+        # update coefficients
+        # coef_old = coef.copy()
+        coef += la * d
+        # calculate eta, mu, score, Fisher matrix for next iteration
+        eta, mu, score, fisher = family._eta_mu_score_fisher(
+            coef=coef, phi=1, X=X, y=y, weights=weights, link=link,
+            diag_fisher=diag_fisher)
+        # stopping criterion for outer loop
+        # sum_i(|minimum-norm of subgrad of F(w)_i|)
+        # fp_wP2 = f'(w) + w*P2
+        # Note: eta, mu and score are already updated
+        mn_subgrad = _min_norm_sugrad(coef=coef, grad=-score, P2=P2, P1=P1)
+        mn_subgrad = linalg.norm(mn_subgrad, ord=1)
+        if mn_subgrad <= tol:
+            converged = True
+            break
+        # end of outer loop
+    if not converged:
+        warnings.warn("Coordinate descent failed to converge. Increase"
+                      " the maximum number of iterations max_iter"
+                      " (currently {0})".format(max_iter), ConvergenceWarning)
+
+    return coef, n_iter, n_cycles
+
+
+class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
+    """Regression via a Generalized Linear Model (GLM) with penalties.
+
+    GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
+    fitting and predicting the mean of the target y as mu=h(X*w). Therefore,
+    the fit minimizes the following objective function with combined L1 and L2
+    priors as regularizer::
+
+            1/(2*sum(s)) * deviance(y, h(X*w); s)
+            + alpha * l1_ratio * ||P1*w||_1
+            + 1/2 * alpha * (1 - l1_ratio) * w*P2*w
+
+    with inverse link function h and s=sample_weight. Note that for
+    ``sample_weight=None``, one has s_i=1 and sum(s)=n_samples).
+    For ``P1=P2='identity'``, the penalty is the elastic net::
+
+            alpha * l1_ratio * ||w||_1
+            + 1/2 * alpha * (1 - l1_ratio) * ||w||_2^2
+
+    If you are interested in controlling the L1 and L2 penalties
+    separately, keep in mind that this is equivalent to::
+
+            a * L1 + b * L2
+
+    where::
+
+            alpha = a + b and l1_ratio = a / (a + b)
+
+    The parameter ``l1_ratio`` corresponds to alpha in the R package glmnet,
+    while ``alpha`` corresponds to the lambda parameter in glmnet.
+    Specifically, l1_ratio = 1 is the lasso penalty.
+
+    Read more in the :ref:`User Guide <Generalized_linear_regression>`.
+
+    Parameters
+    ----------
+    alpha : float, optional (default=1)
+        Constant that multiplies the penalty terms und thus determines the
+        regularization strength.
+        See the notes for the exact mathematical meaning of this
+        parameter.``alpha = 0`` is equivalent to unpenalized GLMs. In this
+        case, the design matrix X must have full column rank
+        (no collinearities).
+
+    l1_ratio : float, optional (default=0)
+        The elastic net mixing parameter, with ``0 <= l1_ratio <= 1``. For
+        ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it
+        is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a
+        combination of L1 and L2.
+
+    P1 : {'identity', array-like}, shape (n_features,), optional \
+            (default='identity')
+        With this array, you can exclude coefficients from the L1 penalty.
+        Set the corresponding value to 1 (include) or 0 (exclude). The
+        default value ``'identity'`` is the same as a 1d array of ones.
+        Note that n_features = X.shape[1].
+
+    P2 : {'identity', array-like, sparse matrix}, shape \
+            (n_features,) or (n_features, n_features), optional \
+            (default='identity')
+        With this option, you can set the P2 matrix in the L2 penalty `w*P2*w`.
+        This gives a fine control over this penalty (Tikhonov regularization).
+        A 2d array is directly used as the square matrix P2. A 1d array is
+        interpreted as diagonal (square) matrix. The default 'identity' sets
+        the identity matrix, which gives the usual squared L2-norm. If you just
+        want to exclude certain coefficients, pass a 1d array filled with 1,
+        and 0 for the coefficients to be excluded.
+        Note that P2 must be positive semi-definite.
+
+    fit_intercept : boolean, optional (default=True)
+        Specifies if a constant (a.k.a. bias or intercept) should be
+        added to the linear predictor (X*coef+intercept).
+
+    family : {'normal', 'poisson', 'gamma', 'inverse.gaussian', 'binomial'} \
+            or an instance of class ExponentialDispersionModel, \
+            optional(default='normal')
+        The distributional assumption of the GLM, i.e. which distribution from
+        the EDM, specifies the loss function to be minimized.
+
+    link : {'auto', 'identity', 'log', 'logit'} or an instance of class Link, \
+            optional (default='auto')
+        The link function of the GLM, i.e. mapping from linear predictor
+        (X*coef) to expectation (mu). Option 'auto' sets the link depending on
+        the chosen family as follows:
+
+        - 'identity' for family 'normal'
+
+        - 'log' for families 'poisson', 'gamma', 'inverse.gaussian'
+
+        - 'logit' for family 'binomial'
+
+    fit_dispersion : {None, 'chisqr', 'deviance'}, optional (defaul=None)
+        Method for estimation of the dispersion parameter phi. Whether to use
+        the chi squared statisic or the deviance statistic. If None, the
+        dispersion is not estimated.
+
+    solver : {'auto', 'cd', 'irls', 'lbfgs', 'newton-cg'}, \
+            optional (default='auto')
+        Algorithm to use in the optimization problem:
+
+        'auto'
+            Sets 'irls' if l1_ratio equals 0, else 'cd'.
+
+        'cd'
+            Coordinate descent algorithm. It can deal with L1 as well as L2
+            penalties. Note that in order to avoid unnecessary memory
+            duplication of X in the ``fit`` method, X should be directly passed
+            as a Fortran-contiguous numpy array or sparse csc matrix.
+
+        'irls'
+            Iterated reweighted least squares.
+            It is the standard algorithm for GLMs. It cannot deal with
+            L1 penalties.
+
+        'lbfgs'
+            Calls scipy's L-BFGS-B optimizer. It cannot deal with L1 penalties.
+
+        'newton-cg', 'lbfgs'
+            Newton conjugate gradient algorithm cannot deal with L1 penalties.
+
+        Note that all solvers except lbfgs use the fisher matrix, i.e. the
+        expected Hessian instead of the Hessian matrix.
+
+    max_iter : int, optional (default=100)
+        The maximal number of iterations for solver algorithms.
+
+    tol : float, optional (default=1e-4)
+        Stopping criterion. For the irls, newton-cg and lbfgs solvers,
+        the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
+        where ``g_i`` is the i-th component of the gradient (derivative) of
+        the objective function. For the cd solver, covergence is reached
+        when ``sum_i(|minimum-norm of g_i|)``, where ``g_i`` is the
+        subgradient of the objective and minimum-norm of ``g_i`` is the element
+        of the subgradient ``g_i`` with the smallest L2-norm.
+
+    warm_start : boolean, optional (default=False)
+        If set to ``True``, reuse the solution of the previous call to ``fit``
+        as initialization for ``coef_`` and ``intercept_`` (supersedes option
+        ``start_params``). If set to ``True`` or if the attribute ``coef_``
+        does not exit (first call to ``fit``), option ``start_params`` sets the
+        start values for ``coef_`` and ``intercept_``.
+
+    start_params : {'guess', 'zero', array of shape (n_features*, )}, \
+            optional (default='guess')
+        Relevant only if ``warm_start=False`` or if fit is called
+        the first time (``self.coef_`` does not yet exist).
+
+        'guess'
+            Start values of mu are calculated by family.starting_mu(..). Then,
+            one Newton step obtains start values for ``coef_``. If
+            ``solver='irls'``, it uses one irls step, else the Newton step is
+            calculated by the cd solver.
+            This gives usually good starting values.
+
+        'zero'
+        All coefficients are set to zero. If ``fit_intercept=True``, the
+        start value for the intercept is obtained by the weighted average of y.
+
+        array
+        The array of size n_features* is directly used as start values
+        for ``coef_``. If ``fit_intercept=True``, the first element
+        is assumed to be the start value for the ``intercept_``.
+        Note that n_features* = X.shape[1] + fit_intercept, i.e. it includes
+        the intercept in counting.
+
+    selection : str, optional (default='cyclic')
+        For the solver 'cd' (coordinate descent), the coordinates (features)
+        can be updated in either cyclic or random order.
+        If set to 'random', a random coefficient is updated every iteration
+        rather than looping over features sequentially in the same order. This
+        (setting to 'random') often leads to significantly faster convergence
+        especially when tol is higher than 1e-4.
+
+    random_state : {int, RandomState instance, None}, optional (default=None)
+        The seed of the pseudo random number generator that selects a random
+        feature to be updated for solver 'cd' (coordinate descent).
+        If int, random_state is the seed used by the random
+        number generator; if RandomState instance, random_state is the random
+        number generator; if None, the random number generator is the
+        RandomState instance used by `np.random`. Used when ``selection`` ==
+        'random'.
+
+    diag_fisher : boolean, optional, (default=False)
+        Only relevant for solver 'cd' (see also ``start_params='guess'``).
+        If ``False``, the full Fisher matrix (expected Hessian) is computed in
+        each outer iteration (Newton iteration). If ``True``, only a diagonal
+        matrix (stored as 1d array) is computed, such that
+        fisher = X.T @ diag @ X. This saves memory and matrix-matrix
+        multiplications, but needs more matrix-vector multiplications. If you
+        use large sparse X or if you have many features,
+        i.e. n_features >> n_samples, you might set this option to ``True``.
+
+    copy_X : boolean, optional, (default=True)
+        If ``True``, X will be copied; else, it may be overwritten.
+
+    check_input : boolean, optional (default=True)
+        Allow to bypass several checks on input: y values in range of family,
+        sample_weight non-negative, P2 positive semi-definite.
+        Don't use this parameter unless you know what you do.
+
+    verbose : int, optional (default=0)
+        For the lbfgs solver set verbose to any positive number for verbosity.
+
+    Attributes
+    ----------
+    coef_ : array, shape (n_features,)
+        Estimated coefficients for the linear predictor (X*coef_+intercept_) in
+        the GLM.
+
+    intercept_ : float
+        Intercept (a.k.a. bias) added to linear predictor.
+
+    dispersion_ : float
+        The dispersion parameter :math:`\\phi` if ``fit_dispersion`` was set.
+
+    n_iter_ : int
+        Actual number of iterations used in solver.
+
+    Notes
+    -----
+    The fit itself does not need Y to be from an EDM, but only assumes
+    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
+    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
+    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
+    :ref:`User Guide <Generalized_linear_regression>`.
+
+    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
+    minimizing the deviance plus penalty term, which is equivalent to
+    (penalized) maximum likelihood estimation.
+
+    For alpha > 0, the feature matrix X should be standardized in order to
+    penalize features equally strong. Call
+    :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``.
+
+    If the target y is a ratio, appropriate sample weights s should be
+    provided.
+    As an example, consider Poission distributed counts z (integers) and
+    weights s=exposure (time, money, persons years, ...). Then you fit
+    y = z/s, i.e. ``GeneralizedLinearModel(family='poisson').fit(X, y,
+    sample_weight=s)``. The weights are necessary for the right (finite
+    sample) mean.
+    Consider :math:`\\bar{y} = \\frac{\\sum_i s_i y_i}{\\sum_i s_i}`,
+    in this case one might say that y has a 'scaled' Poisson distributions.
+    The same holds for other distributions.
+
+    References
+    ----------
+    For the coordinate descent implementation:
+        * Guo-Xun Yuan, Chia-Hua Ho, Chih-Jen Lin
+          An Improved GLMNET for L1-regularized Logistic Regression,
+          Journal of Machine Learning Research 13 (2012) 1999-2030
+          https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
+    """
+    def __init__(self, alpha=1.0, l1_ratio=0, P1='identity', P2='identity',
+                 fit_intercept=True, family='normal', link='auto',
+                 fit_dispersion=None, solver='auto', max_iter=100,
+                 tol=1e-4, warm_start=False, start_params='guess',
+                 selection='cyclic', random_state=None, diag_fisher=False,
+                 copy_X=True, check_input=True, verbose=0):
+        self.alpha = alpha
+        self.l1_ratio = l1_ratio
+        self.P1 = P1
+        self.P2 = P2
+        self.fit_intercept = fit_intercept
+        self.family = family
+        self.link = link
+        self.fit_dispersion = fit_dispersion
+        self.solver = solver
+        self.max_iter = max_iter
+        self.tol = tol
+        self.warm_start = warm_start
+        self.start_params = start_params
+        self.selection = selection
+        self.random_state = random_state
+        self.diag_fisher = diag_fisher
+        self.copy_X = copy_X
+        self.check_input = check_input
+        self.verbose = verbose
+
+    def fit(self, X, y, sample_weight=None):
+        """Fit a Generalized Linear Model.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            Training data.
+
+        y : array-like, shape (n_samples,)
+            Target values.
+
+        sample_weight : {None, array-like}, shape (n_samples,),\
+                optinal (default=None)
+            Individual weights w_i for each sample. Note that for an
+            Exponential Dispersion Model (EDM), one has
+            Var[Y_i]=phi/w_i * v(mu).
+            If Y_i ~ EDM(mu, phi/w_i), then
+            sum(w*Y)/sum(w) ~ EDM(mu, phi/sum(w)), i.e. the mean of y is a
+            weighted average with weights=sample_weight.
+
+        Returns
+        -------
+        self : returns an instance of self.
+        """
+        #######################################################################
+        # 1. input validation                                                 #
+        #######################################################################
+        # 1.1 validate arguments of __init__ ##################################
+        # Guarantee that self._family_instance is an instance of class
+        # ExponentialDispersionModel
+        if isinstance(self.family, ExponentialDispersionModel):
+            self._family_instance = self.family
+        else:
+            if self.family == 'normal':
+                self._family_instance = NormalDistribution()
+            elif self.family == 'poisson':
+                self._family_instance = PoissonDistribution()
+            elif self.family == 'gamma':
+                self._family_instance = GammaDistribution()
+            elif self.family == 'inverse.gaussian':
+                self._family_instance = InverseGaussianDistribution()
+            elif self.family == 'binomial':
+                self._family_instance = BinomialDistribution()
+            else:
+                raise ValueError(
+                    "The family must be an instance of class"
+                    " ExponentialDispersionModel or an element of"
+                    " ['normal', 'poisson', 'gamma', 'inverse.gaussian', "
+                    "'binomial']; got (family={0})".format(self.family))
+
+        # Guarantee that self._link_instance is set to an instance of
+        # class Link
+        if isinstance(self.link, Link):
+            self._link_instance = self.link
+        else:
+            if self.link == 'auto':
+                if isinstance(self._family_instance, TweedieDistribution):
+                    if self._family_instance.power <= 0:
+                        self._link_instance = IdentityLink()
+                    if self._family_instance.power >= 1:
+                        self._link_instance = LogLink()
+                elif isinstance(self._family_instance,
+                                GeneralizedHyperbolicSecant):
+                    self._link_instance = IdentityLink()
+                elif isinstance(self._family_instance, BinomialDistribution):
+                    self._link_instance = LogitLink()
+                else:
+                    raise ValueError("No default link known for the "
+                                     "specified distribution family. Please "
+                                     "set link manually, i.e. not to 'auto'; "
+                                     "got (link='auto', family={}"
+                                     .format(self.family))
+            elif self.link == 'identity':
+                self._link_instance = IdentityLink()
+            elif self.link == 'log':
+                self._link_instance = LogLink()
+            elif self.link == 'logit':
+                self._link_instance = LogitLink()
+            else:
+                raise ValueError(
+                    "The link must be an instance of class Link or "
+                    "an element of ['auto', 'identity', 'log', 'logit']; "
+                    "got (link={0})".format(self.link))
+
+        # validate further arguments
+        if not isinstance(self.alpha, numbers.Number) or self.alpha < 0:
+            raise ValueError("Penalty term must be a non-negative number;"
+                             " got (alpha={0})".format(self.alpha))
+        if (not isinstance(self.l1_ratio, numbers.Number) or
+                self.l1_ratio < 0 or self.l1_ratio > 1):
+            raise ValueError("l1_ratio must be a number in interval [0, 1];"
+                             " got (l1_ratio={0})".format(self.l1_ratio))
+        if not isinstance(self.fit_intercept, bool):
+            raise ValueError("The argument fit_intercept must be bool;"
+                             " got {0}".format(self.fit_intercept))
+        if self.solver not in ['auto', 'irls', 'lbfgs', 'newton-cg', 'cd']:
+            raise ValueError("GeneralizedLinearRegressor supports only solvers"
+                             " 'auto', 'irls', 'lbfgs', 'newton-cg' and 'cd';"
+                             " got {0}".format(self.solver))
+        solver = self.solver
+        if self.solver == 'auto':
+            if self.l1_ratio == 0:
+                solver = 'irls'
+            else:
+                solver = 'cd'
+        if (self.alpha > 0 and self.l1_ratio > 0 and solver not in ['cd']):
+            raise ValueError("The chosen solver (solver={0}) can't deal "
+                             "with L1 penalties, which are included with "
+                             "(alpha={1}) and (l1_ratio={2})."
+                             .format(solver, self.alpha, self.l1_ratio))
+        if (not isinstance(self.max_iter, int)
+                or self.max_iter <= 0):
+            raise ValueError("Maximum number of iteration must be a positive "
+                             "integer;"
+                             " got (max_iter={0!r})".format(self.max_iter))
+        if not isinstance(self.tol, numbers.Number) or self.tol <= 0:
+            raise ValueError("Tolerance for stopping criteria must be "
+                             "positive; got (tol={0!r})".format(self.tol))
+        if not isinstance(self.warm_start, bool):
+            raise ValueError("The argument warm_start must be bool;"
+                             " got {0}".format(self.warm_start))
+        if self.selection not in ['cyclic', 'random']:
+            raise ValueError("The argument selection must be 'cyclic' or "
+                             "'random'; got (selection={0})"
+                             .format(self.selection))
+        random_state = check_random_state(self.random_state)
+        if not isinstance(self.diag_fisher, bool):
+            raise ValueError("The argument diag_fisher must be bool;"
+                             " got {0}".format(self.diag_fisher))
+        if not isinstance(self.copy_X, bool):
+            raise ValueError("The argument copy_X must be bool;"
+                             " got {0}".format(self.copy_X))
+        if not isinstance(self.check_input, bool):
+            raise ValueError("The argument check_input must be bool; got "
+                             "(check_input={0})".format(self.check_input))
+
+        family = self._family_instance
+        link = self._link_instance
+
+        # 1.2 validate arguments of fit #######################################
+        _dtype = [np.float64, np.float32]
+        if solver == 'cd':
+            _stype = ['csc']
+        else:
+            _stype = ['csc', 'csr']
+        X, y = check_X_y(X, y, accept_sparse=_stype,
+                         dtype=_dtype, y_numeric=True, multi_output=False,
+                         copy=self.copy_X)
+        # Without converting y to float, deviance might raise
+        # ValueError: Integers to negative integer powers are not allowed.
+        # Also, y must not be sparse.
+        y = np.asarray(y, dtype=np.float64)
+
+        weights = _check_weights(sample_weight, y.shape[0])
+
+        n_samples, n_features = X.shape
+
+        # 1.3 arguments to take special care ##################################
+        # P1, P2, start_params
+        if isinstance(self.P1, str) and self.P1 == 'identity':
+            P1 = np.ones(n_features)
+        else:
+            P1 = np.atleast_1d(self.P1)
+            try:
+                P1 = P1.astype(np.float64, casting='safe', copy=False)
+            except TypeError:
+                raise TypeError("The given P1 cannot be converted to a numeric"
+                                "array; got (P1.dtype={0})."
+                                .format(P1.dtype))
+            if (P1.ndim != 1) or (P1.shape[0] != n_features):
+                raise ValueError("P1 must be either 'identity' or a 1d array "
+                                 "with the length of X.shape[1]; "
+                                 "got (P1.shape[0]={0}), "
+                                 "needed (X.shape[1]={1})."
+                                 .format(P1.shape[0], n_features))
+        # If X is sparse, make P2 sparse, too.
+        if isinstance(self.P2, str) and self.P2 == 'identity':
+            if sparse.issparse(X):
+                P2 = (sparse.dia_matrix((np.ones(n_features), 0),
+                      shape=(n_features, n_features))).tocsc()
+            else:
+                P2 = np.ones(n_features)
+        else:
+            P2 = check_array(self.P2, copy=True,
+                             accept_sparse=_stype,
+                             dtype=_dtype, ensure_2d=False)
+            if P2.ndim == 1:
+                P2 = np.asarray(P2)
+                if P2.shape[0] != n_features:
+                    raise ValueError("P2 should be a 1d array of shape "
+                                     "(n_features,) with "
+                                     "n_features=X.shape[1]; "
+                                     "got (P2.shape=({0},)), needed ({1},)"
+                                     .format(P2.shape[0], X.shape[1]))
+                if sparse.issparse(X):
+                    P2 = (sparse.dia_matrix((P2, 0),
+                          shape=(n_features, n_features))).tocsc()
+            elif (P2.ndim == 2 and P2.shape[0] == P2.shape[1] and
+                    P2.shape[0] == X.shape[1]):
+                if sparse.issparse(X):
+                    P2 = (sparse.dia_matrix((P2, 0),
+                          shape=(n_features, n_features))).tocsc()
+            else:
+                raise ValueError("P2 must be either None or an array of shape "
+                                 "(n_features, n_features) with "
+                                 "n_features=X.shape[1]; "
+                                 "got (P2.shape=({0}, {1})), needed ({2}, {2})"
+                                 .format(P2.shape[0], P2.shape[1], X.shape[1]))
+
+        start_params = self.start_params
+        if isinstance(start_params, str):
+            if start_params not in ['guess',  'zero']:
+                raise ValueError("The argument start_params must be 'guess', "
+                                 "'zero' or an array of correct length; "
+                                 "got(start_params={0})".format(start_params))
+        else:
+            start_params = check_array(start_params, accept_sparse=False,
+                                       force_all_finite=True, ensure_2d=False,
+                                       dtype=_dtype, copy=True)
+            if ((start_params.shape[0] != X.shape[1] + self.fit_intercept) or
+                    (start_params.ndim != 1)):
+                raise ValueError("Start values for parameters must have the"
+                                 "right length and dimension; required (length"
+                                 "={0}, ndim=1); got (length={1}, ndim={2})."
+                                 .format(X.shape[1] + self.fit_intercept,
+                                         start_params.shape[0],
+                                         start_params.ndim))
+
+        l1 = self.alpha * self.l1_ratio
+        l2 = self.alpha * (1 - self.l1_ratio)
+        # P1 and P2 are now for sure copies
+        P1 = l1 * P1
+        P2 = l2 * P2
+        # one only ever needs the symmetrized L2 penalty matrix 1/2 (P2 + P2')
+        # reason: w' P2 w = (w' P2 w)', i.e. it is symmetric
+        if P2.ndim == 2:
+            if sparse.issparse(P2):
+                if sparse.isspmatrix_csc(P2):
+                    P2 = 0.5 * (P2 + P2.transpose()).tocsc()
+                else:
+                    P2 = 0.5 * (P2 + P2.transpose()).tocsr()
+            else:
+                P2 = 0.5 * (P2 + P2.T)
+
+        # For coordinate descent, if X is sparse, P2 must also be csc
+        if solver == 'cd' and sparse.issparse(X):
+            P2 = sparse.csc_matrix(P2)
+
+        # 1.4 additional validations ##########################################
+        if self.check_input:
+            if not np.all(family.in_y_range(y)):
+                raise ValueError("Some value(s) of y are out of the valid "
+                                 "range for family {0}"
+                                 .format(family.__class__.__name__))
+            # check if P1 has only non-negative values, negative values might
+            # indicate group lasso in the future.
+            if not isinstance(self.P1, str):  # if self.P1 != 'identity':
+                if not np.all(P1 >= 0):
+                    raise ValueError("P1 must not have negative values.")
+            # check if P2 is positive semidefinite
+            # np.linalg.cholesky(P2) 'only' asserts positive definite
+            if not isinstance(self.P2, str):  # self.P2 != 'identity'
+                # due to numerical precision, we allow eigenvalues to be a
+                # tiny bit negative
+                epsneg = -10 * np.finfo(P2.dtype).epsneg
+                if P2.ndim == 1 or P2.shape[0] == 1:
+                    p2 = P2
+                    if sparse.issparse(P2):
+                        p2 = P2.toarray()
+                    if not np.all(p2 >= 0):
+                        raise ValueError("1d array P2 must not have negative "
+                                         "values.")
+                elif sparse.issparse(P2):
+                    # for sparse matrices, not all eigenvals can be computed
+                    # efficiently, use only half of n_features
+                    # k = how many eigenvals to compute
+                    k = np.min([10, n_features // 10 + 1])
+                    sigma = 0  # start searching near this value
+                    which = 'SA'  # find smallest algebraic eigenvalues first
+                    if not np.all(splinalg.eigsh(P2, k=k, sigma=sigma,
+                                                 which=which) >= epsneg):
+                        raise ValueError("P2 must be positive semi-definite.")
+                else:
+                    if not np.all(linalg.eigvalsh(P2) >= epsneg):
+                        raise ValueError("P2 must be positive semi-definite.")
+            # TODO: if alpha=0 check that X is not rank deficient
+            # TODO: what else to check?
+
+        #######################################################################
+        # 2. rescaling of weights (sample_weight)                             #
+        #######################################################################
+        # IMPORTANT NOTE: Since we want to minimize
+        # 1/(2*sum(sample_weight)) * deviance + L1 + L2,
+        # deviance = sum(sample_weight * unit_deviance),
+        # we rescale weights such that sum(weights) = 1 and this becomes
+        # 1/2*deviance + L1 + L2 with deviance=sum(weights * unit_deviance)
+        weights_sum = np.sum(weights)
+        weights = weights/weights_sum
+
+        #######################################################################
+        # 3. initialization of coef = (intercept_, coef_)                     #
+        #######################################################################
+        # Note: Since phi=self.dispersion_ does not enter the estimation
+        #       of mu_i=E[y_i], set it to 1.
+
+        # set start values for coef
+        coef = None
+        if self.warm_start and hasattr(self, 'coef_'):
+            if self.fit_intercept:
+                coef = np.concatenate((np.array([self.intercept_]),
+                                       self.coef_))
+            else:
+                coef = self.coef_
+        elif isinstance(start_params, str):
+            if start_params == 'guess':
+                # Set mu=starting_mu of the family and do one Newton step
+                # If solver=cd use cd, else irls
+                mu = family.starting_mu(y, weights=weights)
+                eta = link.link(mu)  # linear predictor
+                if solver in ['cd', 'lbfgs', 'newton-cg']:
+                    # see function _cd_solver
+                    sigma_inv = 1/family.variance(mu, phi=1, weights=weights)
+                    d1 = link.inverse_derivative(eta)
+                    temp = sigma_inv * d1 * (y - mu)
+                    if self.fit_intercept:
+                        score = np.concatenate(([temp.sum()], temp @ X))
+                    else:
+                        score = temp @ X  # sampe as X.T @ temp
+
+                    d2_sigma_inv = d1 * d1 * sigma_inv
+                    diag_fisher = self.diag_fisher
+                    if diag_fisher:
+                        fisher = d2_sigma_inv
+                    else:
+                        fisher = \
+                            _safe_sandwich_dot(X, d2_sigma_inv,
+                                               intercept=self.fit_intercept)
+                    # set up space for search direction d for inner loop
+                    if self.fit_intercept:
+                        coef = np.zeros(n_features+1)
+                    else:
+                        coef = np.zeros(n_features)
+                    d = np.zeros_like(coef)
+                    # initial stopping tolerance of inner loop
+                    # use L1-norm of minimum of norm of subgradient of F
+                    # use less restrictive tolerance for initial guess
+                    inner_tol = _min_norm_sugrad(coef=coef, grad=-score, P2=P2,
+                                                 P1=P1)
+                    inner_tol = 4 * linalg.norm(inner_tol, ord=1)
+                    # just one outer loop = Newton step
+                    n_cycles = 0
+                    d, coef_P2, n_cycles, inner_tol = \
+                        _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles,
+                                  inner_tol, max_inner_iter=1000,
+                                  selection=self.selection,
+                                  random_state=self.random_state,
+                                  diag_fisher=self.diag_fisher)
+                    coef += d  # for simplicity no line search here
+                else:
+                    # See _irls_solver
+                    # h'(eta)
+                    hp = link.inverse_derivative(eta)
+                    # working weights W, in principle a diagonal matrix
+                    # therefore here just as 1d array
+                    W = (hp**2 / family.variance(mu, phi=1, weights=weights))
+                    # working observations
+                    z = eta + (y-mu)/hp
+                    # solve A*coef = b
+                    # A = X' W X + l2 P2, b = X' W z
+                    coef = _irls_step(X, W, P2, z,
+                                      fit_intercept=self.fit_intercept)
+            else:  # start_params == 'zero'
+                if self.fit_intercept:
+                    coef = np.zeros(n_features+1)
+                    coef[0] = link.link(np.average(y, weights=weights))
+                else:
+                    coef = np.zeros(n_features)
+        else:  # assign given array as start values
+            coef = start_params
+
+        #######################################################################
+        # 4. fit                                                              #
+        #######################################################################
+        # algorithms for optimiation
+        # TODO: Parallelize it?
+
+        # 4.1 IRLS ############################################################
+        # Note: we already set P2 = l2*P2, see above
+        # Note: we already symmetriezed P2 = 1/2 (P2 + P2')
+        if solver == 'irls':
+            coef, self.n_iter_ = \
+                _irls_solver(coef=coef, X=X, y=y, weights=weights, P2=P2,
+                             fit_intercept=self.fit_intercept, family=family,
+                             link=link, max_iter=self.max_iter, tol=self.tol)
+
+        # 4.2 L-BFGS ##########################################################
+        elif solver == 'lbfgs':
+            def func(coef, X, y, weights, P2, family, link):
+                mu, devp = \
+                    family._mu_deviance_derivative(coef, X, y, weights, link)
+                dev = family.deviance(y, mu, weights)
+                intercept = (coef.size == X.shape[1] + 1)
+                idx = 1 if intercept else 0  # offset if coef[0] is intercept
+                if P2.ndim == 1:
+                    L2 = P2 * coef[idx:]
+                else:
+                    L2 = P2 @ coef[idx:]
+                obj = 0.5 * dev + 0.5 * (coef[idx:] @ L2)
+                objp = 0.5 * devp
+                objp[idx:] += L2
+                return obj, objp
+
+            args = (X, y, weights, P2, family, link)
+            coef, loss, info = fmin_l_bfgs_b(
+                func, coef, fprime=None, args=args,
+                iprint=(self.verbose > 0) - 1, pgtol=self.tol,
+                maxiter=self.max_iter, factr=1e3)
+            if self.verbose > 0:
+                if info["warnflag"] == 1:
+                    warnings.warn("lbfgs failed to converge."
+                                  " Increase the number of iterations.",
+                                  ConvergenceWarning)
+                elif info["warnflag"] == 2:
+                    warnings.warn("lbfgs failed for the reason: {0}"
+                                  .format(info["task"]))
+            self.n_iter_ = info['nit']
+
+        # 4.3 Newton-CG #######################################################
+        # We use again the fisher matrix instead of the hessian. More
+        # precisely, expected hessian of deviance.
+        elif solver == 'newton-cg':
+            def func(coef, X, y, weights, P2, family, link):
+                intercept = (coef.size == X.shape[1] + 1)
+                idx = 1 if intercept else 0  # offset if coef[0] is intercept
+                if P2.ndim == 1:
+                    L2 = coef[idx:] @ (P2 * coef[idx:])
+                else:
+                    L2 = coef[idx:] @ (P2 @ coef[idx:])
+                mu = link.inverse(_safe_lin_pred(X, coef))
+                return 0.5 * family.deviance(y, mu, weights) + 0.5 * L2
+
+            def grad(coef, X, y, weights, P2, family, link):
+                mu, devp = \
+                    family._mu_deviance_derivative(coef, X, y, weights, link)
+                intercept = (coef.size == X.shape[1] + 1)
+                idx = 1 if intercept else 0  # offset if coef[0] is intercept
+                if P2.ndim == 1:
+                    L2 = P2 * coef[idx:]
+                else:
+                    L2 = P2 @ coef[idx:]
+                objp = 0.5 * devp
+                objp[idx:] += L2
+                return objp
+
+            def grad_hess(coef, X, y, weights, P2, family, link):
+                intercept = (coef.size == X.shape[1] + 1)
+                idx = 1 if intercept else 0  # offset if coef[0] is intercept
+                if P2.ndim == 1:
+                    L2 = P2 * coef[idx:]
+                else:
+                    L2 = P2 @ coef[idx:]
+                eta = _safe_lin_pred(X, coef)
+                mu = link.inverse(eta)
+                d1 = link.inverse_derivative(eta)
+                temp = d1 * family.deviance_derivative(y, mu, weights)
+                if intercept:
+                    grad = np.concatenate(([0.5 * temp.sum()],
+                                           0.5 * temp @ X + L2))
+                else:
+                    grad = 0.5 * temp @ X + L2  # sampe as 0.5* X.T @ temp + L2
+
+                # expected hessian = fisher = X.T @ diag_matrix @ X
+                # calculate only diag_matrix
+                diag = d1**2 / family.variance(mu, phi=1, weights=weights)
+                if intercept:
+                    h0i = np.concatenate(([diag.sum()], diag @ X))
+
+                def Hs(coef):
+                    # return (0.5 * fisher + P2) @ coef
+                    # ret = 0.5 * (X.T @ (diag * (X @ coef)))
+                    ret = 0.5 * ((diag * (X @ coef[idx:])) @ X)
+                    if P2.ndim == 1:
+                        ret += P2 * coef[idx:]
+                    else:
+                        ret += P2 @ coef[idx:]
+                    if intercept:
+                        ret = np.concatenate(([0.5 * (h0i @ coef)],
+                                             ret + 0.5 * coef[0] * h0i[1:]))
+                    return ret
+
+                return grad, Hs
+
+            args = (X, y, weights, P2, family, link)
+            coef, self.n_iter_ = newton_cg(grad_hess, func, grad, coef,
+                                           args=args, maxiter=self.max_iter,
+                                           tol=self.tol)
+
+        # 4.4 coordinate descent ##############################################
+        # Note: we already set P1 = l1*P1, see above
+        # Note: we already set P2 = l2*P2, see above
+        # Note: we already symmetriezed P2 = 1/2 (P2 + P2')
+        elif solver == 'cd':
+            coef, self.n_iter_, self._n_cycles = \
+                _cd_solver(coef=coef, X=X, y=y, weights=weights, P1=P1,
+                           P2=P2, fit_intercept=self.fit_intercept,
+                           family=family, link=link,
+                           max_iter=self.max_iter, tol=self.tol,
+                           selection=self.selection, random_state=random_state,
+                           diag_fisher=self.diag_fisher, copy_X=self.copy_X)
+
+        #######################################################################
+        # 5. postprocessing                                                   #
+        #######################################################################
+        if self.fit_intercept:
+            self.intercept_ = coef[0]
+            self.coef_ = coef[1:]
+        else:
+            # set intercept to zero as the other linear models do
+            self.intercept_ = 0.
+            self.coef_ = coef
+
+        if self.fit_dispersion in ['chisqr', 'deviance']:
+            # attention because of rescaling of weights
+            self.dispersion_ = self.estimate_phi(X, y, weights)*weights_sum
+
+        return self
+
+    def linear_predictor(self, X):
+        """Compute the linear_predictor = X*coef_ + intercept_.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            Samples.
+
+        Returns
+        -------
+        C : array, shape (n_samples,)
+            Returns predicted values of linear predictor.
+        """
+        check_is_fitted(self, "coef_")
+        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
+                        dtype='numeric', copy=True, ensure_2d=True,
+                        allow_nd=False)
+        return X @ self.coef_ + self.intercept_
+
+    def predict(self, X, sample_weight=None):
+        """Predict uing GLM with feature matrix X.
+        If sample_weight is given, returns prediction*sample_weight.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            Samples.
+
+        sample_weight : {None, array-like}, shape (n_samples,), optional \
+                (default=None)
+
+        Returns
+        -------
+        C : array, shape (n_samples,)
+            Returns predicted values times sample_weight.
+        """
+        # TODO: Is copy=True necessary?
+        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
+                        dtype='numeric', copy=True, ensure_2d=True,
+                        allow_nd=False)
+        eta = self.linear_predictor(X)
+        mu = self._link_instance.inverse(eta)
+        weights = _check_weights(sample_weight, X.shape[0])
+
+        return mu*weights
+
+    def estimate_phi(self, X, y, sample_weight=None):
+        """Estimate/fit the dispersion parameter phi.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            Training data.
+
+        y : array-like, shape (n_samples,)
+            Target values.
+
+        sample_weight : {None, array-like}, shape (n_samples,), optional \
+                (default=None)
+            Sample weights.
+
+        Returns
+        -------
+        phi : float
+            Dispersion parameter.
+        """
+        check_is_fitted(self, "coef_")
+        _dtype = [np.float64, np.float32]
+        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
+                         dtype=_dtype, y_numeric=True, multi_output=False)
+        n_samples, n_features = X.shape
+        weights = _check_weights(sample_weight, n_samples)
+        eta = X @ self.coef_
+        if self.fit_intercept is True:
+            eta += self.intercept_
+            n_features += 1
+        if n_samples <= n_features:
+            raise ValueError("Estimation of dispersion parameter phi requires"
+                             " more samples than features, got"
+                             " samples=X.shape[0]={0} and"
+                             " n_features=X.shape[1]+fit_intercept={1}."
+                             .format(n_samples, n_features))
+        mu = self._link_instance.inverse(eta)
+        if self.fit_dispersion == 'chisqr':
+            chisq = np.sum(weights*(y-mu)**2 /
+                           self._family_instance.unit_variance(mu))
+            return chisq/(n_samples - n_features)
+        elif self.fit_dispersion == 'deviance':
+            dev = self._family_instance.deviance(y, mu, weights)
+            return dev/(n_samples - n_features)
+
+    # Note: check_estimator(GeneralizedLinearRegressor) might raise
+    # "AssertionError: -0.28014056555724598 not greater than 0.5"
+    # unless GeneralizedLinearRegressor has a score which passes the test.
+    def score(self, X, y, sample_weight=None):
+        """Compute D^2, the percentage of deviance explained.
+
+        D^2 is a generalization of the coefficient of determination R^2.
+        R^2 uses squared error and D^2 deviance. Note that those two are equal
+        for family='normal'.
+
+        D^2 is defined as
+        :math:`D^2 = 1-\\frac{D(y_{true},y_{pred})}{D_{null}}`,
+        :math:`D_{null}` is the null deviance, i.e. the deviance of a model
+        with intercept alone, which corresponds to :math:`y_{pred} = \\bar{y}`.
+        The mean :math:`\\bar{y}` is averaged by sample_weight.
+        Best possible score is 1.0 and it can be negative (because the model
+        can be arbitrarily worse).
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+            Test samples.
+
+        y : array-like, shape (n_samples,)
+            True values of target.
+
+        sample_weight : {None, array-like}, shape (n_samples,), optional \
+                (default=None)
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            D^2 of self.predict(X) w.r.t. y.
+        """
+        # Note, default score defined in RegressorMixin is R^2 score.
+        # TODO: make D^2 a score function in module metrics (and thereby get
+        #       input validation and so on)
+        weights = _check_weights(sample_weight, y.shape[0])
+        mu = self.predict(X)
+        dev = self._family_instance.deviance(y, mu, weights=weights)
+        y_mean = np.average(y, weights=weights)
+        dev_null = self._family_instance.deviance(y, y_mean, weights=weights)
+        return 1. - dev / dev_null
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
new file mode 100644
index 0000000000000..7cb3f4a5b5969
--- /dev/null
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -0,0 +1,640 @@
+import numpy as np
+from numpy.testing import assert_allclose
+import pytest
+import scipy as sp
+from scipy import linalg, optimize, sparse
+
+from sklearn.datasets import make_classification, make_regression
+from sklearn.linear_model.glm import (
+    Link,
+    IdentityLink,
+    LogLink,
+    LogitLink,
+    TweedieDistribution,
+    NormalDistribution, PoissonDistribution,
+    GammaDistribution, InverseGaussianDistribution,
+    GeneralizedHyperbolicSecant, BinomialDistribution,
+    GeneralizedLinearRegressor)
+from sklearn.linear_model import ElasticNet, LogisticRegression, Ridge
+
+from sklearn.utils.testing import (
+    assert_equal, assert_almost_equal,
+    assert_array_equal, assert_array_almost_equal)
+
+
+@pytest.mark.parametrize('link', Link.__subclasses__())
+def test_link_properties(link):
+    """Test link inverse and derivative."""
+    rng = np.random.RandomState(42)
+    x = rng.rand(100)*100
+    link = link()  # instatiate object
+    decimal = 10
+    if isinstance(link, LogitLink):
+        # careful for large x, note expit(36) = 1
+        # limit max eta to 15
+        x = x / 100 * 15
+        decimal = 8
+    assert_almost_equal(link.link(link.inverse(x)), x, decimal=decimal)
+    # if f(g(x)) = x, then f'(g(x)) = 1/g'(x)
+    assert_almost_equal(link.derivative(link.inverse(x)),
+                        1./link.inverse_derivative(x), decimal=decimal)
+    # for LogitLink, in the following x should be between 0 and 1.
+    # assert_almost_equal(link.inverse_derivative(link.link(x)),
+    #                     1./link.derivative(x), decimal=decimal)
+
+
+@pytest.mark.parametrize(
+    'family, expected',
+    [(NormalDistribution(), [True, True, True]),
+     (PoissonDistribution(), [False, True, True]),
+     (TweedieDistribution(power=1.5), [False, True, True]),
+     (GammaDistribution(), [False, False, True]),
+     (InverseGaussianDistribution(), [False, False, True]),
+     (TweedieDistribution(power=4.5), [False, False, True])])
+def test_family_bounds(family, expected):
+    """Test the valid range of distributions at -1, 0, 1."""
+    result = family.in_y_range([-1, 0, 1])
+    assert_array_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    'family, chk_values',
+    [(NormalDistribution(), [-1.5, -0.1, 0.1, 2.5]),
+     (PoissonDistribution(), [0.1, 1.5]),
+     (GammaDistribution(), [0.1, 1.5]),
+     (InverseGaussianDistribution(), [0.1, 1.5]),
+     (TweedieDistribution(power=-2.5), [0.1, 1.5]),
+     (TweedieDistribution(power=-1), [0.1, 1.5]),
+     (TweedieDistribution(power=1.5), [0.1, 1.5]),
+     (TweedieDistribution(power=2.5), [0.1, 1.5]),
+     (TweedieDistribution(power=-4), [0.1, 1.5]),
+     (GeneralizedHyperbolicSecant(), [0.1, 1.5])])
+def test_deviance_zero(family, chk_values):
+    """Test deviance(y,y) = 0 for different families."""
+    for x in chk_values:
+        assert_almost_equal(family.deviance(x, x), 0, decimal=10)
+
+
+@pytest.mark.parametrize(
+    'family, link',
+    [(NormalDistribution(), IdentityLink()),
+     (PoissonDistribution(), LogLink()),
+     (GammaDistribution(), LogLink()),
+     (InverseGaussianDistribution(), LogLink()),
+     (TweedieDistribution(power=1.5), LogLink()),
+     (TweedieDistribution(power=4.5), LogLink())])
+def test_fisher_matrix(family, link):
+    """Test the Fisher matrix numerically.
+    Trick: Use numerical differentiation with y = mu"""
+    coef = np.array([-2, 1, 0, 1, 2.5])
+    phi = 0.5
+    rng = np.random.RandomState(42)
+    X = rng.randn(10, 5)
+    lin_pred = np.dot(X, coef)
+    mu = link.inverse(lin_pred)
+    weights = rng.randn(10)**2 + 1
+    fisher = family._fisher_matrix(coef=coef, phi=phi, X=X, y=mu,
+                                   weights=weights, link=link)
+    approx = np.array([]).reshape(0, coef.shape[0])
+    for i in range(coef.shape[0]):
+        def f(coef):
+            return -family._score(coef=coef, phi=phi, X=X, y=mu,
+                                  weights=weights, link=link)[i]
+        approx = np.vstack(
+            [approx, sp.optimize.approx_fprime(xk=coef, f=f, epsilon=1e-5)])
+    assert_allclose(fisher, approx, rtol=1e-3)
+
+
+def test_sample_weights_validation():
+    """Test the raised errors in the validation of sample_weight."""
+    # 1. scalar value but not positive
+    X = [[1]]
+    y = [1]
+    weights = 0
+    glm = GeneralizedLinearRegressor(fit_intercept=False)
+    with pytest.raises(ValueError):
+        glm.fit(X, y, weights)
+
+    # 2. 2d array
+    weights = [[0]]
+    with pytest.raises(ValueError):
+        glm.fit(X, y, weights)
+
+    # 3. 1d but wrong length
+    weights = [1, 0]
+    with pytest.raises(ValueError):
+        glm.fit(X, y, weights)
+
+    # 4. 1d but only zeros (sum not greater than 0)
+    weights = [0, 0]
+    X = [[0], [1]]
+    y = [1, 2]
+    with pytest.raises(ValueError):
+        glm.fit(X, y, weights)
+
+    # 5. 1d but weith a negative value
+    weights = [2, -1]
+    with pytest.raises(ValueError):
+        glm.fit(X, y, weights)
+
+
+@pytest.mark.parametrize('f, fam',
+                         [('normal', NormalDistribution()),
+                          ('poisson', PoissonDistribution()),
+                          ('gamma', GammaDistribution()),
+                          ('inverse.gaussian', InverseGaussianDistribution()),
+                          ('binomial', BinomialDistribution())])
+def test_glm_family_argument(f, fam):
+    """Test GLM family argument set as string."""
+    y = np.array([0.1, 0.5])  # in range of all distributions
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(family=f, alpha=0).fit(X, y)
+    assert_equal(type(glm._family_instance), type(fam))
+
+    glm = GeneralizedLinearRegressor(family='not a family',
+                                     fit_intercept=False)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('l, link',
+                         [('identity', IdentityLink()),
+                          ('log', LogLink()),
+                          ('logit', LogitLink())])
+def test_glm_link_argument(l, link):
+    """Test GLM link argument set as string."""
+    y = np.array([0.1, 0.5])  # in range of all distributions
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(family='normal', link=l).fit(X, y)
+    assert_equal(type(glm._link_instance), type(link))
+
+    glm = GeneralizedLinearRegressor(family='normal', link='not a link')
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('alpha', ['not a number', -4.2])
+def test_glm_alpha_argument(alpha):
+    """Test GLM for invalid alpha argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(family='normal', alpha=alpha)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('l1_ratio', ['not a number', -4.2, 1.1, [1]])
+def test_glm_l1_ratio_argument(l1_ratio):
+    """Test GLM for invalid l1_ratio argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(family='normal', l1_ratio=l1_ratio)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('P1', [['a string', 'a string'], [1, [2]], [1, 2, 3],
+                                [-1]])
+def test_glm_P1_argument(P1):
+    """Test GLM for invalid P1 argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(P1=P1, l1_ratio=0.5, check_input=True)
+    with pytest.raises((ValueError, TypeError)):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('P2', ['a string', [1, 2, 3], [[2, 3]],
+                                sparse.csr_matrix([1, 2, 3]), [-1]])
+def test_glm_P2_argument(P2):
+    """Test GLM for invalid P2 argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(P2=P2, check_input=True)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+def test_glm_P2_positive_semidefinite():
+    """Test GLM for a positive semi-definite P2 argument."""
+    n_samples, n_features = 10, 5
+    y = np.arange(n_samples)
+    X = np.zeros((n_samples, n_features))
+    P2 = np.diag([100, 10, 5, 0, -1E-5])
+    rng = np.random.RandomState(42)
+    # construct random orthogonal matrix Q
+    Q, R = linalg.qr(rng.randn(n_features, n_features))
+    P2 = Q.T @ P2 @ Q
+    glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False,
+                                     check_input=True)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+    P2 = sparse.csr_matrix(P2)
+    glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False,
+                                     check_input=True)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('fit_intercept', ['not bool', 1, 0, [True]])
+def test_glm_fit_intercept_argument(fit_intercept):
+    """Test GLM for invalid fit_intercept argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    glm = GeneralizedLinearRegressor(fit_intercept=fit_intercept)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('solver, l1_ratio',
+                         [('not a solver', 0), (1, 0), ([1], 0),
+                          ('irls', 0.5), ('lbfgs', 0.5), ('newton-cg', 0.5)])
+def test_glm_solver_argument(solver, l1_ratio):
+    """Test GLM for invalid solver argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(solver=solver, l1_ratio=l1_ratio)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('max_iter', ['not a number', 0, -1, 5.5, [1]])
+def test_glm_max_iter_argument(max_iter):
+    """Test GLM for invalid max_iter argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(max_iter=max_iter)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('tol', ['not a number', 0, -1.0, [1e-3]])
+def test_glm_tol_argument(tol):
+    """Test GLM for invalid tol argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(tol=tol)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('warm_start', ['not bool', 1, 0, [True]])
+def test_glm_warm_start_argument(warm_start):
+    """Test GLM for invalid warm_start argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    glm = GeneralizedLinearRegressor(warm_start=warm_start)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('start_params',
+                         ['not a start_params', ['zero'], [0, 0, 0],
+                          [[0, 0]], ['a', 'b']])
+def test_glm_start_params_argument(start_params):
+    """Test GLM for invalid start_params argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    glm = GeneralizedLinearRegressor(start_params=start_params)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('selection', ['not a selection', 1, 0, ['cyclic']])
+def test_glm_selection_argument(selection):
+    """Test GLM for invalid selection argument"""
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    glm = GeneralizedLinearRegressor(selection=selection)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('random_state', ['a string', 0.5, [0]])
+def test_glm_random_state_argument(random_state):
+    """Test GLM for invalid random_state argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    glm = GeneralizedLinearRegressor(random_state=random_state)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('diag_fisher', ['not bool', 1, 0, [True]])
+def test_glm_diag_fisher_argument(diag_fisher):
+    """Test GLM for invalid diag_fisher arguments."""
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    glm = GeneralizedLinearRegressor(diag_fisher=diag_fisher)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('copy_X', ['not bool', 1, 0, [True]])
+def test_glm_copy_X_argument(copy_X):
+    """Test GLM for invalid copy_X arguments."""
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    glm = GeneralizedLinearRegressor(copy_X=copy_X)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('check_input', ['not bool', 1, 0, [True]])
+def test_glm_check_input_argument(check_input):
+    """Test GLM for invalid check_input argument."""
+    y = np.array([1, 2])
+    X = np.array([[1], [1]])
+    glm = GeneralizedLinearRegressor(check_input=check_input)
+    with pytest.raises(ValueError):
+        glm.fit(X, y)
+
+
+@pytest.mark.parametrize('solver', ['irls', 'lbfgs', 'newton-cg', 'cd'])
+def test_glm_identiy_regression(solver):
+    """Test GLM regression with identity link on a simple dataset."""
+    coef = [1, 2]
+    X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
+    y = np.dot(X, coef)
+    glm = GeneralizedLinearRegressor(alpha=0, family='normal', link='identity',
+                                     fit_intercept=False, solver=solver,
+                                     start_params='zero', tol=1e-7)
+    res = glm.fit(X, y)
+    assert_array_almost_equal(res.coef_, coef)
+
+
+@pytest.mark.parametrize(
+    'family',
+    [NormalDistribution(), PoissonDistribution(),
+     GammaDistribution(), InverseGaussianDistribution(),
+     TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
+     GeneralizedHyperbolicSecant()])
+@pytest.mark.parametrize('solver, tol, dec', [('irls', 1e-6, 6),
+                                              ('lbfgs', 1e-6, 6),
+                                              ('newton-cg', 1e-7, 6),
+                                              ('cd', 1e-7, 6)])
+def test_glm_log_regression(family, solver, tol, dec):
+    """Test GLM regression with log link on a simple dataset."""
+    coef = [0.2, -0.1]
+    X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
+    y = np.exp(np.dot(X, coef))
+    glm = GeneralizedLinearRegressor(
+                alpha=0, family=family, link='log', fit_intercept=False,
+                solver=solver, start_params='guess', tol=tol)
+    res = glm.fit(X, y)
+    assert_array_almost_equal(res.coef_, coef, decimal=dec)
+
+
+@pytest.mark.filterwarnings('ignore::DeprecationWarning')
+@pytest.mark.parametrize('solver, tol, dec', [('irls', 1e-6, 5),
+                                              ('lbfgs', 1e-6, 5),
+                                              ('newton-cg', 1e-5, 5),
+                                              ('cd', 1e-6, 6)])
+def test_normal_ridge(solver, tol, dec):
+    """Test ridge regression for Normal distributions.
+
+    Compare to test_ridge in test_ridge.py.
+    """
+    rng = np.random.RandomState(42)
+    alpha = 1.0
+
+    # 1. With more samples than features
+    n_samples, n_features, n_predict = 100, 7, 10
+    X, y, coef = make_regression(n_samples=n_samples+n_predict,
+                                 n_features=n_features,
+                                 n_informative=n_features-2, noise=0.5,
+                                 coef=True, random_state=rng)
+    y = y[0:n_samples]
+    X, T = X[0:n_samples], X[n_samples:]
+
+    # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
+    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True, tol=1e-6,
+                  solver='svd', normalize=False)
+    ridge.fit(X, y)
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
+                                     link='identity', fit_intercept=True,
+                                     tol=tol, max_iter=100, solver=solver,
+                                     check_input=False, random_state=rng)
+    glm.fit(X, y)
+    assert_equal(glm.coef_.shape, (X.shape[1], ))
+    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec)
+    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec)
+    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec)
+
+    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-6,
+                  solver='svd', normalize=False)
+    ridge.fit(X, y)
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
+                                     link='identity', fit_intercept=False,
+                                     tol=tol, max_iter=100, solver=solver,
+                                     check_input=False, random_state=rng,
+                                     fit_dispersion='chisqr')
+    glm.fit(X, y)
+    assert_equal(glm.coef_.shape, (X.shape[1], ))
+    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec)
+    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec)
+    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec)
+    mu = glm.predict(X)
+    assert_almost_equal(glm.dispersion_,
+                        np.sum((y-mu)**2/(n_samples-n_features)))
+
+    # 2. With more features than samples and sparse
+    n_samples, n_features, n_predict = 10, 100, 10
+    X, y, coef = make_regression(n_samples=n_samples+n_predict,
+                                 n_features=n_features,
+                                 n_informative=n_features-2, noise=0.5,
+                                 coef=True, random_state=rng)
+    y = y[0:n_samples]
+    X, T = X[0:n_samples], X[n_samples:]
+
+    # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
+    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True, tol=1e-9,
+                  solver='sag', normalize=False, max_iter=100000)
+    ridge.fit(X, y)
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
+                                     link='identity', fit_intercept=True,
+                                     tol=tol, max_iter=300, solver=solver,
+                                     check_input=False, random_state=rng)
+    glm.fit(X, y)
+    assert_equal(glm.coef_.shape, (X.shape[1], ))
+    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec)
+    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec)
+    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec)
+
+    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-7,
+                  solver='sag', normalize=False, max_iter=1000)
+    ridge.fit(X, y)
+    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
+                                     link='identity', fit_intercept=False,
+                                     tol=tol*2, max_iter=300, solver=solver,
+                                     check_input=False, random_state=rng)
+    glm.fit(X, y)
+    assert_equal(glm.coef_.shape, (X.shape[1], ))
+    assert_array_almost_equal(glm.coef_, ridge.coef_, decimal=dec-1)
+    assert_almost_equal(glm.intercept_, ridge.intercept_, decimal=dec-1)
+    assert_array_almost_equal(glm.predict(T), ridge.predict(T), decimal=dec-2)
+
+
+@pytest.mark.parametrize('solver, tol, dec',
+                         [('irls', 1e-7, 6),
+                          ('lbfgs', 1e-7, 5),
+                          ('newton-cg', 1e-7, 5),
+                          ('cd', 1e-7, 7)])
+def test_poisson_ridge(solver, tol, dec):
+    """Test ridge regression with poisson family and LogLink.
+
+    Compare to R's glmnet"""
+    # library("glmnet")
+    # options(digits=10)
+    # df <- data.frame(a=c(-2,-1,1,2), b=c(0,0,1,1), y=c(0,1,1,2))
+    # x <- data.matrix(df[,c("a", "b")])
+    # y <- df$y
+    # fit <- glmnet(x=x, y=y, alpha=0, intercept=T, family="poisson",
+    #               standardize=F, thresh=1e-10, nlambda=10000)
+    # coef(fit, s=1)
+    # (Intercept) -0.12889386979
+    # a            0.29019207995
+    # b            0.03741173122
+    X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
+    y = np.array([0, 1, 1, 2])
+    rng = np.random.RandomState(42)
+    glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0,
+                                     fit_intercept=True, family='poisson',
+                                     link='log', tol=tol,
+                                     solver=solver, max_iter=300,
+                                     random_state=rng)
+    glm.fit(X, y)
+    assert_almost_equal(glm.intercept_, -0.12889386979,
+                        decimal=dec)
+    assert_array_almost_equal(glm.coef_, [0.29019207995, 0.03741173122],
+                              decimal=dec)
+
+
+@pytest.mark.parametrize('diag_fisher', [False, True])
+def test_normal_enet(diag_fisher):
+    """Test elastic net regression with normal/gaussian family."""
+    alpha, l1_ratio = 0.3, 0.7
+    n_samples, n_features = 20, 2
+    rng = np.random.RandomState(42)
+    X = rng.randn(n_samples, n_features).copy(order='F')
+    beta = rng.randn(n_features)
+    y = 2 + np.dot(X, beta) + rng.randn(n_samples)
+
+    # 1. test normal enet on dense data
+    glm = GeneralizedLinearRegressor(alpha=alpha, l1_ratio=l1_ratio,
+                                     family='normal', link='identity',
+                                     fit_intercept=True, tol=1e-8,
+                                     max_iter=100, selection='cyclic',
+                                     solver='cd', start_params='zero',
+                                     check_input=False,
+                                     diag_fisher=diag_fisher)
+    glm.fit(X, y)
+
+    enet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, fit_intercept=True,
+                      normalize=False, tol=1e-8, copy_X=True)
+    enet.fit(X, y)
+
+    assert_almost_equal(glm.intercept_, enet.intercept_, decimal=7)
+    assert_array_almost_equal(glm.coef_, enet.coef_, decimal=7)
+
+    # 2. test normal enet on sparse data
+    X = sparse.csc_matrix(X)
+    glm.fit(X, y)
+    assert_almost_equal(glm.intercept_, enet.intercept_, decimal=7)
+    assert_array_almost_equal(glm.coef_, enet.coef_, decimal=7)
+
+
+def test_poisson_enet():
+    """Test elastic net regression with poisson family and LogLink.
+
+    Compare to R's glmnet"""
+    # library("glmnet")
+    # options(digits=10)
+    # df <- data.frame(a=c(-2,-1,1,2), b=c(0,0,1,1), y=c(0,1,1,2))
+    # x <- data.matrix(df[,c("a", "b")])
+    # y <- df$y
+    # fit <- glmnet(x=x, y=y, alpha=0.5, intercept=T, family="poisson",
+    #               standardize=F, thresh=1e-10, nlambda=10000)
+    # coef(fit, s=1)
+    # (Intercept) -0.03550978409
+    # a            0.16936423283
+    # b            .
+    glmnet_intercept = -0.03550978409
+    glmnet_coef = [0.16936423283, 0.]
+    X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
+    y = np.array([0, 1, 1, 2])
+    rng = np.random.RandomState(42)
+    glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0.5, family='poisson',
+                                     link='log', solver='cd', tol=1e-8,
+                                     selection='random', random_state=rng,
+                                     start_params='guess')
+    glm.fit(X, y)
+    assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=7)
+    assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=7)
+
+    # test results with general optimization procedure
+    def obj(coef):
+        pd = PoissonDistribution()
+        link = LogLink()
+        N = y.shape[0]
+        mu = link.inverse(X @ coef[1:] + coef[0])
+        alpha, l1_ratio = (1, 0.5)
+        return 1./(2.*N) * pd.deviance(y, mu) \
+            + 0.5 * alpha * (1-l1_ratio) * (coef[1:]**2).sum() \
+            + alpha * l1_ratio * np.sum(np.abs(coef[1:]))
+    res = optimize.minimize(obj, [0, 0, 0], method='nelder-mead', tol=1e-10,
+                            options={'maxiter': 1000, 'disp': False})
+    assert_almost_equal(glm.intercept_, res.x[0], decimal=5)
+    assert_almost_equal(glm.coef_, res.x[1:], decimal=5)
+    assert_almost_equal(obj(np.concatenate(([glm.intercept_], glm.coef_))),
+                        res.fun, decimal=8)
+
+    # same for start_params='zero' and selection='cyclic'
+    # with reduced precision
+    glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0.5, family='poisson',
+                                     link='log', solver='cd', tol=1e-5,
+                                     selection='cyclic', start_params='zero')
+    glm.fit(X, y)
+    assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=4)
+    assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=4)
+
+    # check warm_start, therefore start with different alpha
+    glm = GeneralizedLinearRegressor(alpha=0.005, l1_ratio=0.5,
+                                     family='poisson', max_iter=300,
+                                     link='log', solver='cd', tol=1e-5,
+                                     selection='cyclic', start_params='zero')
+    glm.fit(X, y)
+    # warm start with original alpha and use of sparse matrices
+    glm.warm_start = True
+    glm.alpha = 1
+    X = sparse.csr_matrix(X)
+    glm.fit(X, y)
+    assert_almost_equal(glm.intercept_, glmnet_intercept, decimal=4)
+    assert_array_almost_equal(glm.coef_, glmnet_coef, decimal=4)
+
+
+@pytest.mark.parametrize('alpha', [0.01, 0.1, 1, 10])
+def test_binomial_enet(alpha):
+    """Test elastic net regression with binomial family and LogitLink.
+
+    Compare to LogisticRegression.
+    """
+    l1_ratio = 0.5
+    n_samples = 500
+    rng = np.random.RandomState(42)
+    X, y = make_classification(n_samples=n_samples, n_classes=2, n_features=6,
+                               n_informative=5, n_redundant=0, n_repeated=0,
+                               random_state=rng)
+    log = LogisticRegression(
+        penalty='elasticnet', random_state=rng, fit_intercept=False, tol=1e-6,
+        max_iter=1000, l1_ratio=l1_ratio, C=1./(n_samples * alpha),
+        solver='saga')
+    log.fit(X, y)
+    glm = GeneralizedLinearRegressor(
+        family=BinomialDistribution(), link=LogitLink(), fit_intercept=False,
+        alpha=alpha, l1_ratio=l1_ratio, solver='cd', selection='cyclic',
+        tol=1e-7)
+    glm.fit(X, y)
+    assert_almost_equal(log.intercept_[0], glm.intercept_, decimal=6)
+    assert_array_almost_equal(log.coef_[0, :], glm.coef_, decimal=6)

From 9b574bdc0b3a4a1081f73563d0468c864fcbbb22 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Tue, 11 Jun 2019 07:30:52 -0500
Subject: [PATCH 058/269] Fix docstrings for the new print_changed_only=True by
 default

---
 doc/modules/linear_model.rst | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 52f28346cc047..b9f0e96734ae5 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -942,17 +942,11 @@ follows:
 
     >>> from sklearn.linear_model import GeneralizedLinearRegressor
     >>> reg = GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
-    >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2]) # doctest: +NORMALIZE_WHITESPACE
-    GeneralizedLinearRegressor(P1='identity', P2='identity', alpha=0.5,
-                               check_input=True, copy_X=True, diag_fisher=False,
-                               family='poisson', fit_dispersion=None,
-                               fit_intercept=True, l1_ratio=0, link='log',
-                               max_iter=100, random_state=None, selection='cyclic',
-                               solver='auto', start_params='guess', tol=0.0001,
-                               verbose=0, warm_start=False)
-    >>> reg.coef_ # doctest: +NORMALIZE_WHITESPACE
+    >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2])
+    GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
+    >>> reg.coef_
     array([0.24630169, 0.43373464])
-    >>> reg.intercept_ #doctest: +ELLIPSIS
+    >>> reg.intercept_
     -0.76383633...
 
 
From 90299fdd8a4745965eb8f9dbe26ddb685f1531f1 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Wed, 12 Jun 2019 04:31:50 -0500
Subject: [PATCH 059/269] Increase coverage

---
 sklearn/linear_model/_glm.py           | 22 +++-----
 sklearn/linear_model/tests/test_glm.py | 78 ++++++++++++++++++++++++--
 2 files changed, 83 insertions(+), 17 deletions(-)

diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm.py
index 8152e84ac7253..055aacf26d747 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm.py
@@ -182,7 +182,7 @@ def link(self, mu):
         mu : array, shape (n_samples,)
             Usually the (predicted) mean.
         """
-        raise NotImplementedError
+        pass
 
     @abstractmethod
     def derivative(self, mu):
@@ -193,7 +193,7 @@ def derivative(self, mu):
         mu : array, shape (n_samples,)
             Usually the (predicted) mean.
         """
-        raise NotImplementedError
+        pass
 
     @abstractmethod
     def inverse(self, lin_pred):
@@ -207,7 +207,7 @@ def inverse(self, lin_pred):
         lin_pred : array, shape (n_samples,)
             Usually the (fitted) linear predictor.
         """
-        raise NotImplementedError
+        pass
 
     @abstractmethod
     def inverse_derivative(self, lin_pred):
@@ -218,7 +218,7 @@ def inverse_derivative(self, lin_pred):
         lin_pred : array, shape (n_samples,)
             Usually the (fitted) linear predictor.
         """
-        raise NotImplementedError
+        pass
 
     @abstractmethod
     def inverse_derivative2(self, lin_pred):
@@ -229,7 +229,7 @@ def inverse_derivative2(self, lin_pred):
         lin_pred : array, shape (n_samples,)
             Usually the (fitted) linear predictor.
         """
-        raise NotImplementedError
+        pass
 
 
 class IdentityLink(Link):
@@ -400,7 +400,7 @@ def unit_variance(self, mu):
         mu : array, shape (n_samples,)
             Predicted mean.
         """
-        raise NotImplementedError()
+        pass
 
     @abstractmethod
     def unit_variance_derivative(self, mu):
@@ -413,7 +413,7 @@ def unit_variance_derivative(self, mu):
         mu : array, shape (n_samples,)
             Target values.
         """
-        raise NotImplementedError()
+        pass
 
     def variance(self, mu, phi=1, weights=1):
         r"""Compute the variance function.
@@ -473,7 +473,7 @@ def unit_deviance(self, y, mu):
         mu : array, shape (n_samples,)
             Predicted mean.
         """
-        raise NotImplementedError()
+        pass
 
     def unit_deviance_derivative(self, y, mu):
         r"""Compute the derivative of the unit deviance w.r.t. mu.
@@ -1079,7 +1079,6 @@ def _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles, inner_tol,
                 B[idx:, idx:] += P2.toarray()
             else:
                 B[idx:, idx:] += P2
-    # A = -score + coef_P2
     A = -score
     A[idx:] += coef_P2
     # A += d @ (H+P2) but so far d=0
@@ -1302,9 +1301,6 @@ def _cd_solver(coef, X, y, weights, P1, P2, fit_intercept, family, link,
         P2 = check_array(P2, 'csc', dtype=[np.float64, np.float32],
                          order='F', copy=copy_X)
     if sparse.issparse(X):
-        if not sparse.isspmatrix_csc(X):
-            raise ValueError("If X is sparse, it must be in csc format"
-                             "; got (format={})".format(X.format))
         if not sparse.isspmatrix_csc(P2):
             raise ValueError("If X is sparse, P2 must also be sparse csc"
                              "format. Got P2 not sparse.")
@@ -2021,7 +2017,7 @@ def fit(self, X, y, sample_weight=None):
                         _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles,
                                   inner_tol, max_inner_iter=1000,
                                   selection=self.selection,
-                                  random_state=self.random_state,
+                                  random_state=random_state,
                                   diag_fisher=self.diag_fisher)
                     coef += d  # for simplicity no line search here
                 else:
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index d42a8739f6aa0..230bbdabae201 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -21,10 +21,20 @@
     GeneralizedHyperbolicSecant, BinomialDistribution,
 )
 from sklearn.linear_model import ElasticNet, LogisticRegression, Ridge
+from sklearn.metrics import mean_absolute_error
 
 from sklearn.utils.testing import assert_array_equal
 
 
+@pytest.fixture(scope="module")
+def regression_data():
+    X, y = make_regression(n_samples=107,
+                           n_features=10,
+                           n_informative=80, noise=0.5,
+                           random_state=2)
+    return X, y
+
+
 @pytest.mark.parametrize('link', Link.__subclasses__())
 def test_link_properties(link):
     """Test link inverse and derivative."""
@@ -39,6 +49,10 @@ def test_link_properties(link):
     # if f(g(x)) = x, then f'(g(x)) = 1/g'(x)
     assert_allclose(link.derivative(link.inverse(x)),
                     1./link.inverse_derivative(x))
+
+    assert (
+      link.inverse_derivative2(x).shape == link.inverse_derivative(x).shape)
+
     # for LogitLink, in the following x should be between 0 and 1.
     # assert_almost_equal(link.inverse_derivative(link.link(x)),
     #                     1./link.derivative(x), decimal=decimal)
@@ -108,7 +122,7 @@ def f(coef):
 
 def test_sample_weights_validation():
     """Test the raised errors in the validation of sample_weight."""
-    # 1. scalar value but not positive
+    # scalar value but not positive
     X = [[1]]
     y = [1]
     weights = 0
@@ -116,17 +130,20 @@ def test_sample_weights_validation():
     with pytest.raises(ValueError):
         glm.fit(X, y, weights)
 
-    # 2. 2d array
+    # Positive weights are accepted
+    glm.fit(X, y, sample_weight=1)
+
+    # 2d array
     weights = [[0]]
     with pytest.raises(ValueError):
         glm.fit(X, y, weights)
 
-    # 3. 1d but wrong length
+    # 1d but wrong length
     weights = [1, 0]
     with pytest.raises(ValueError):
         glm.fit(X, y, weights)
 
-    # 4. 1d but only zeros (sum not greater than 0)
+    # 1d but only zeros (sum not greater than 0)
     weights = [0, 0]
     X = [[0], [1]]
     y = [1, 2]
@@ -643,3 +660,56 @@ def test_binomial_enet(alpha):
     glm.fit(X, y)
     assert_allclose(log.intercept_[0], glm.intercept_, rtol=1e-6)
     assert_allclose(log.coef_[0, :], glm.coef_, rtol=5e-6)
+
+
+@pytest.mark.parametrize(
+        "params",
+        [
+            {"solver": "irls", "start_params": "guess"},
+            {"solver": "irls", "start_params": "zero"},
+            {"solver": "lbfgs", "start_params": "guess"},
+            {"solver": "lbfgs", "start_params": "zero"},
+            {"solver": "newton-cg"},
+            {"solver": "cd", "selection": "cyclic", "diag_fisher": False},
+            {"solver": "cd", "selection": "cyclic", "diag_fisher": True},
+            {"solver": "cd", "selection": "random", "diag_fisher": False},
+        ],
+        ids=lambda params: ', '.join("%s=%s" % (key, val)
+                                     for key,  val in params.items())
+)
+def test_solver_equivalence(params, regression_data):
+    X, y = regression_data
+    est_ref = GeneralizedLinearRegressor(random_state=2)
+    est_ref.fit(X, y)
+
+    estimator = GeneralizedLinearRegressor(**params)
+    estimator.set_params(random_state=2)
+
+    estimator.fit(X, y)
+
+    assert_allclose(estimator.intercept_, est_ref.intercept_, rtol=1e-4)
+    assert_allclose(estimator.coef_, est_ref.coef_, rtol=1e-4)
+    assert_allclose(
+        mean_absolute_error(estimator.predict(X), y),
+        mean_absolute_error(est_ref.predict(X), y),
+        rtol=1e-4
+    )
+
+
+def test_fit_dispersion(regression_data):
+    X, y = regression_data
+
+    est1 = GeneralizedLinearRegressor(random_state=2)
+    est1.fit(X, y)
+    assert not hasattr(est1, "dispersion_")
+
+    est2 = GeneralizedLinearRegressor(random_state=2, fit_dispersion="chisqr")
+    est2.fit(X, y)
+    assert isinstance(est2.dispersion_, float)
+
+    est3 = GeneralizedLinearRegressor(
+            random_state=2, fit_dispersion="deviance")
+    est3.fit(X, y)
+    assert isinstance(est3.dispersion_, float)
+
+    assert_allclose(est2.dispersion_,  est3.dispersion_)

From e3a5a9aa6855928d89bdc5dad7f3a46ba934fb8e Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Wed, 12 Jun 2019 12:00:28 -0500
Subject: [PATCH 060/269] More tests and addressing some review comments

---
 sklearn/linear_model/_glm.py           | 51 +++++++++++++-------------
 sklearn/linear_model/tests/test_glm.py | 43 +++++++++++++++++++++-
 2 files changed, 67 insertions(+), 27 deletions(-)

diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm.py
index 055aacf26d747..acce438b2b5e2 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm.py
@@ -6,8 +6,6 @@
 # some parts and tricks stolen from other sklearn files.
 # License: BSD 3 clause
 
-# TODO: Should the option `normalize` be included (like other linear models)?
-#       So far, it is not included. User must pass a normalized X.
 # TODO: Add cross validation support, e.g. GCV?
 # TODO: Should GeneralizedLinearRegressor inherit from LinearModel?
 #       So far, it does not.
@@ -287,7 +285,6 @@ def inverse_derivative(self, lin_pred):
         return ep * (1. - ep)
 
     def inverse_derivative2(self, lin_pred):
-        ep = special.expit(lin_pred)
         ep = special.expit(lin_pred)
         return ep * (1. - ep) * (1. - 2 * ep)
 
@@ -738,7 +735,19 @@ class TweedieDistribution(ExponentialDispersionModel):
             For ``0<power<1``, no distribution exists.
     """
     def __init__(self, power=0):
+        # validate power and set _upper_bound, _include_upper_bound attrs
         self.power = power
+
+    @property
+    def power(self):
+        return self._power
+
+    @power.setter
+    def power(self, power):
+        if not isinstance(power, numbers.Real):
+            raise TypeError('power must be a real number, input was {0}'
+                            .format(power))
+
         self._upper_bound = np.Inf
         self._include_upper_bound = False
         if power < 0:
@@ -775,19 +784,10 @@ def __init__(self, power=0):
             # Positive Stable
             self._lower_bound = 0
             self._include_lower_bound = False
-        else:
-            raise ValueError('The power must be a float, i.e. real number, '
-                             'got (power={})'.format(power))
+        else:  # pragma: no cover
+            # this branch should be unreachable.
+            raise ValueError
 
-    @property
-    def power(self):
-        return self._power
-
-    @power.setter
-    def power(self, power):
-        if not isinstance(power, numbers.Real):
-            raise TypeError('power must be a real number, input was {0}'
-                            .format(power))
         self._power = power
 
     def unit_variance(self, mu):
@@ -962,7 +962,7 @@ def _irls_step(X, W, P2, z, fit_intercept=True):
             A += P2.toarray()
         else:
             A += P2
-    # coef = linalg.solve(A, b, overwrite_a=True, overwrite_b=True)
+
     coef, *_ = linalg.lstsq(A, b, overwrite_a=True, overwrite_b=True)
     return coef
 
@@ -991,12 +991,13 @@ def _irls_solver(coef, X, y, weights, P2, fit_intercept, family, link,
     # Note: P2 must be symmetrized
     # Note: ' denotes derivative, but also transpose for matrices
 
-    # eta = linear predictor
     eta = _safe_lin_pred(X, coef)
     mu = link.inverse(eta)
     # D = h'(eta)
     hp = link.inverse_derivative(eta)
     V = family.variance(mu, phi=1, weights=weights)
+
+    converged = False
     n_iter = 0
     while n_iter < max_iter:
         n_iter += 1
@@ -1360,7 +1361,6 @@ def _cd_solver(coef, X, y, weights, P1, P2, fit_intercept, family, link,
             if Fwd - Fw <= sigma * la * bound:
                 break
         # update coefficients
-        # coef_old = coef.copy()
         coef += la * d
         # calculate eta, mu, score, Fisher matrix for next iteration
         eta, mu, score, fisher = family._eta_mu_score_fisher(
@@ -2079,14 +2079,13 @@ def func(coef, X, y, weights, P2, family, link):
                 func, coef, fprime=None, args=args,
                 iprint=(self.verbose > 0) - 1, pgtol=self.tol,
                 maxiter=self.max_iter, factr=1e3)
-            if self.verbose > 0:
-                if info["warnflag"] == 1:
-                    warnings.warn("lbfgs failed to converge."
-                                  " Increase the number of iterations.",
-                                  ConvergenceWarning)
-                elif info["warnflag"] == 2:
-                    warnings.warn("lbfgs failed for the reason: {0}"
-                                  .format(info["task"]))
+            if info["warnflag"] == 1:
+                warnings.warn("lbfgs failed to converge."
+                              " Increase the number of iterations.",
+                              ConvergenceWarning)
+            elif info["warnflag"] == 2:
+                warnings.warn("lbfgs failed for the reason: {0}"
+                              .format(info["task"]))
             self.n_iter_ = info['nit']
 
         # 4.3 Newton-CG #######################################################
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 230bbdabae201..7229f21840829 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -22,6 +22,7 @@
 )
 from sklearn.linear_model import ElasticNet, LogisticRegression, Ridge
 from sklearn.metrics import mean_absolute_error
+from sklearn.exceptions import ConvergenceWarning
 
 from sklearn.utils.testing import assert_array_equal
 
@@ -72,6 +73,23 @@ def test_family_bounds(family, expected):
     assert_array_equal(result, expected)
 
 
+def test_tweedie_distribution_power():
+    with pytest.raises(ValueError, match="no distribution exists"):
+        TweedieDistribution(power=0.5)
+
+    with pytest.raises(TypeError, match="must be a real number"):
+        TweedieDistribution(power=1j)
+
+    with pytest.raises(TypeError, match="must be a real number"):
+        dist = TweedieDistribution()
+        dist.power = 1j
+
+    dist = TweedieDistribution()
+    assert dist._include_lower_bound is False
+    dist.power = 1
+    assert dist._include_lower_bound is True
+
+
 @pytest.mark.parametrize(
     'family, chk_values',
     [(NormalDistribution(), [-1.5, -0.1, 0.1, 2.5]),
@@ -97,7 +115,8 @@ def test_deviance_zero(family, chk_values):
      (GammaDistribution(), LogLink()),
      (InverseGaussianDistribution(), LogLink()),
      (TweedieDistribution(power=1.5), LogLink()),
-     (TweedieDistribution(power=4.5), LogLink())])
+     (TweedieDistribution(power=4.5), LogLink())],
+    ids=lambda args: args.__class__.__name__)
 def test_fisher_matrix(family, link):
     """Test the Fisher matrix numerically.
     Trick: Use numerical differentiation with y = mu"""
@@ -110,6 +129,11 @@ def test_fisher_matrix(family, link):
     weights = rng.randn(10)**2 + 1
     fisher = family._fisher_matrix(coef=coef, phi=phi, X=X, y=mu,
                                    weights=weights, link=link)
+    # check that the Fisher matrix is square and positive definite
+    assert fisher.ndim == 2
+    assert fisher.shape[0] == fisher.shape[1]
+    assert np.all(np.linalg.eigvals(fisher) >= 0)
+
     approx = np.array([]).reshape(0, coef.shape[0])
     for i in range(coef.shape[0]):
         def f(coef):
@@ -119,6 +143,13 @@ def f(coef):
             [approx, sp.optimize.approx_fprime(xk=coef, f=f, epsilon=1e-5)])
     assert_allclose(fisher, approx, rtol=1e-3)
 
+    # check the observed information matrix
+    oim = family._observed_information(coef=coef, phi=phi, X=X, y=mu,
+                                       weights=weights, link=link)
+    assert oim.ndim == 2
+    assert oim.shape == fisher.shape
+    assert_allclose(oim, fisher)
+
 
 def test_sample_weights_validation():
     """Test the raised errors in the validation of sample_weight."""
@@ -713,3 +744,13 @@ def test_fit_dispersion(regression_data):
     assert isinstance(est3.dispersion_, float)
 
     assert_allclose(est2.dispersion_,  est3.dispersion_)
+
+
+@pytest.mark.parametrize("solver", ["irls", "lbfgs", "newton-cg", "cd"])
+def test_convergence_warning(solver, regression_data):
+    X, y = regression_data
+
+    est = GeneralizedLinearRegressor(solver=solver, random_state=2,
+                                     max_iter=1, tol=1e-20)
+    with pytest.warns(ConvergenceWarning):
+        est.fit(X, y)

From 54b80b8db780fbd3b438c6d5dc0001cdafe7a47e Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Thu, 13 Jun 2019 09:50:53 -0500
Subject: [PATCH 061/269] TST More specific checks of error messages in tests

---
 sklearn/linear_model/_glm.py           |  6 ++--
 sklearn/linear_model/tests/test_glm.py | 44 ++++++++++++++------------
 2 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm.py
index acce438b2b5e2..c4f8cf7a975d3 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm.py
@@ -1941,8 +1941,10 @@ def fit(self, X, y, sample_weight=None):
                     k = np.min([10, n_features // 10 + 1])
                     sigma = 0  # start searching near this value
                     which = 'SA'  # find smallest algebraic eigenvalues first
-                    if not np.all(splinalg.eigsh(P2, k=k, sigma=sigma,
-                                                 which=which) >= epsneg):
+                    eigenvalues = splinalg.eigsh(P2, k=k, sigma=sigma,
+                                                 which=which,
+                                                 return_eigenvectors=False)
+                    if not np.all(eigenvalues >= epsneg):
                         raise ValueError("P2 must be positive semi-definite.")
                 else:
                     if not np.all(linalg.eigvalsh(P2) >= epsneg):
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 7229f21840829..de1a5262b36ce 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -158,7 +158,7 @@ def test_sample_weights_validation():
     y = [1]
     weights = 0
     glm = GeneralizedLinearRegressor(fit_intercept=False)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="weights must be non-negative"):
         glm.fit(X, y, weights)
 
     # Positive weights are accepted
@@ -166,24 +166,26 @@ def test_sample_weights_validation():
 
     # 2d array
     weights = [[0]]
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="must be 1D array or scalar"):
         glm.fit(X, y, weights)
 
     # 1d but wrong length
     weights = [1, 0]
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError,
+                       match="weights must have the same length as y"):
         glm.fit(X, y, weights)
 
     # 1d but only zeros (sum not greater than 0)
     weights = [0, 0]
     X = [[0], [1]]
     y = [1, 2]
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError,
+                       match="must have at least one positive element"):
         glm.fit(X, y, weights)
 
     # 5. 1d but with a negative value
     weights = [2, -1]
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="weights must be non-negative"):
         glm.fit(X, y, weights)
 
 
@@ -202,7 +204,7 @@ def test_glm_family_argument(f, fam):
 
     glm = GeneralizedLinearRegressor(family='not a family',
                                      fit_intercept=False)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="family must be"):
         glm.fit(X, y)
 
 
@@ -218,7 +220,7 @@ def test_glm_link_argument(l, link):
     assert isinstance(glm._link_instance, link.__class__)
 
     glm = GeneralizedLinearRegressor(family='normal', link='not a link')
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="link must be"):
         glm.fit(X, y)
 
 
@@ -228,7 +230,8 @@ def test_glm_alpha_argument(alpha):
     y = np.array([1, 2])
     X = np.array([[1], [2]])
     glm = GeneralizedLinearRegressor(family='normal', alpha=alpha)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError,
+                       match="Penalty term must be a non-negative"):
         glm.fit(X, y)
 
 
@@ -238,7 +241,8 @@ def test_glm_l1_ratio_argument(l1_ratio):
     y = np.array([1, 2])
     X = np.array([[1], [2]])
     glm = GeneralizedLinearRegressor(family='normal', l1_ratio=l1_ratio)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError,
+                       match="l1_ratio must be a number in interval.*0, 1"):
         glm.fit(X, y)
 
 
@@ -276,13 +280,13 @@ def test_glm_P2_positive_semidefinite():
     P2 = Q.T @ P2 @ Q
     glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False,
                                      check_input=True)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="P2 must be positive semi-definite"):
         glm.fit(X, y)
 
     P2 = sparse.csr_matrix(P2)
     glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False,
                                      check_input=True)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="P2 must be positive semi-definite"):
         glm.fit(X, y)
 
 
@@ -292,7 +296,7 @@ def test_glm_fit_intercept_argument(fit_intercept):
     y = np.array([1, 2])
     X = np.array([[1], [1]])
     glm = GeneralizedLinearRegressor(fit_intercept=fit_intercept)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="fit_intercept must be bool"):
         glm.fit(X, y)
 
 
@@ -314,7 +318,7 @@ def test_glm_max_iter_argument(max_iter):
     y = np.array([1, 2])
     X = np.array([[1], [2]])
     glm = GeneralizedLinearRegressor(max_iter=max_iter)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="must be a positive integer"):
         glm.fit(X, y)
 
 
@@ -324,7 +328,7 @@ def test_glm_tol_argument(tol):
     y = np.array([1, 2])
     X = np.array([[1], [2]])
     glm = GeneralizedLinearRegressor(tol=tol)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="stopping criteria must be positive"):
         glm.fit(X, y)
 
 
@@ -334,7 +338,7 @@ def test_glm_warm_start_argument(warm_start):
     y = np.array([1, 2])
     X = np.array([[1], [1]])
     glm = GeneralizedLinearRegressor(warm_start=warm_start)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="warm_start must be bool"):
         glm.fit(X, y)
 
 
@@ -356,7 +360,7 @@ def test_glm_selection_argument(selection):
     y = np.array([1, 2])
     X = np.array([[1], [1]])
     glm = GeneralizedLinearRegressor(selection=selection)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="argument selection must be"):
         glm.fit(X, y)
 
 
@@ -366,7 +370,7 @@ def test_glm_random_state_argument(random_state):
     y = np.array([1, 2])
     X = np.array([[1], [1]])
     glm = GeneralizedLinearRegressor(random_state=random_state)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="cannot be used to seed"):
         glm.fit(X, y)
 
 
@@ -376,7 +380,7 @@ def test_glm_diag_fisher_argument(diag_fisher):
     y = np.array([1, 2])
     X = np.array([[1], [1]])
     glm = GeneralizedLinearRegressor(diag_fisher=diag_fisher)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="diag_fisher must be bool"):
         glm.fit(X, y)
 
 
@@ -386,7 +390,7 @@ def test_glm_copy_X_argument(copy_X):
     y = np.array([1, 2])
     X = np.array([[1], [1]])
     glm = GeneralizedLinearRegressor(copy_X=copy_X)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="copy_X must be bool"):
         glm.fit(X, y)
 
 
@@ -396,7 +400,7 @@ def test_glm_check_input_argument(check_input):
     y = np.array([1, 2])
     X = np.array([[1], [1]])
     glm = GeneralizedLinearRegressor(check_input=check_input)
-    with pytest.raises(ValueError):
+    with pytest.raises(ValueError, match="check_input must be bool"):
         glm.fit(X, y)
 
 
From 7db0320f460676d8bed8e01bf64657a89532e2cb Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 14 Jun 2019 10:44:16 -0500
Subject: [PATCH 062/269] Add PoissonRegressor alias

---
 sklearn/linear_model/__init__.py |   5 +-
 sklearn/linear_model/_glm.py     | 163 +++++++++++++++++++++++++++++++
 2 files changed, 166 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index e5ede64413eb5..121418f901a1a 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -19,7 +19,7 @@
                                  MultiTaskElasticNet, MultiTaskElasticNetCV,
                                  MultiTaskLassoCV)
 from ._glm import (TweedieDistribution,
-                   GeneralizedLinearRegressor)
+                   GeneralizedLinearRegressor, PoissonRegressor)
 from .huber import HuberRegressor
 from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
 from .stochastic_gradient import SGDClassifier, SGDRegressor
@@ -82,4 +82,5 @@
            'ridge_regression',
            'RANSACRegressor',
            'GeneralizedLinearRegressor',
-           'TweedieDistribution']
+           'TweedieDistribution',
+           'PoissonRegressor']
diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm.py
index c4f8cf7a975d3..b18731e73f328 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm.py
@@ -2326,3 +2326,166 @@ def score(self, X, y, sample_weight=None):
         y_mean = np.average(y, weights=weights)
         dev_null = self._family_instance.deviance(y, y_mean, weights=weights)
         return 1. - dev / dev_null
+
+    def _more_tags(self):
+        return {"requires_positive_y": True}
+
+
+class PoissonRegressor(GeneralizedLinearRegressor):
+    """Regression with the response variable y following a Poisson distribution
+
+    GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
+    fitting and predicting the mean of the target y as mu=h(X*w).
+    The fit minimizes the following objective function with L2 regularization::
+
+            1/(2*sum(s)) * deviance(y, h(X*w); s) + 1/2 * alpha * ||w||_2^2
+
+    with inverse link function h and s=sample_weight. Note that for
+    ``sample_weight=None``, one has s_i=1 and sum(s)=n_samples).
+
+    Read more in the :ref:`User Guide <Generalized_linear_regression>`.
+
+    Parameters
+    ----------
+    alpha : float, optional (default=1)
+        Constant that multiplies the penalty terms and thus determines the
+        regularization strength.
+        See the notes for the exact mathematical meaning of this
+        parameter.``alpha = 0`` is equivalent to unpenalized GLMs. In this
+        case, the design matrix X must have full column rank
+        (no collinearities).
+
+    fit_intercept : boolean, optional (default=True)
+        Specifies if a constant (a.k.a. bias or intercept) should be
+        added to the linear predictor (X*coef+intercept).
+
+    fit_dispersion : {None, 'chisqr', 'deviance'}, optional (default=None)
+        Method for estimation of the dispersion parameter phi. Whether to use
+        the chi squared statistic or the deviance statistic. If None, the
+        dispersion is not estimated.
+
+    solver : {'irls', 'lbfgs', 'newton-cg'}, optional (default='irls')
+        Algorithm to use in the optimization problem:
+
+        'irls'
+            Iterated reweighted least squares. It is the standard algorithm
+            for GLMs.
+
+        'lbfgs'
+            Calls scipy's L-BFGS-B optimizer.
+
+        'newton-cg'
+            Newton conjugate gradient algorithm.
+
+        Note that all solvers except lbfgs use the fisher matrix, i.e. the
+        expected Hessian instead of the Hessian matrix.
+
+    max_iter : int, optional (default=100)
+        The maximal number of iterations for solver algorithms.
+
+    tol : float, optional (default=1e-4)
+        Stopping criterion. For the irls, newton-cg and lbfgs solvers,
+        the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
+        where ``g_i`` is the i-th component of the gradient (derivative) of
+        the objective function.
+
+    warm_start : boolean, optional (default=False)
+        If set to ``True``, reuse the solution of the previous call to ``fit``
+        as initialization for ``coef_`` and ``intercept_`` (supersedes option
+        ``start_params``). If set to ``True`` or if the attribute ``coef_``
+        does not exit (first call to ``fit``), option ``start_params`` sets the
+        start values for ``coef_`` and ``intercept_``.
+
+    start_params : {'guess', 'zero', array of shape (n_features*, )}, \
+            optional (default='guess')
+        Relevant only if ``warm_start=False`` or if fit is called
+        the first time (``self.coef_`` does not yet exist).
+
+        'guess'
+            Start values of mu are calculated by family.starting_mu(..). Then,
+            one Newton step obtains start values for ``coef_``. If
+            ``solver='irls'``, it uses one irls step. This gives usually good
+            starting values.
+
+        'zero'
+        All coefficients are set to zero. If ``fit_intercept=True``, the
+        start value for the intercept is obtained by the weighted average of y.
+
+        array
+        The array of size n_features* is directly used as start values
+        for ``coef_``. If ``fit_intercept=True``, the first element
+        is assumed to be the start value for the ``intercept_``.
+        Note that n_features* = X.shape[1] + fit_intercept, i.e. it includes
+        the intercept in counting.
+
+    random_state : {int, RandomState instance, None}, optional (default=None)
+        If int, random_state is the seed used by the random
+        number generator; if RandomState instance, random_state is the random
+        number generator; if None, the random number generator is the
+        RandomState instance used by `np.random`. Used when ``selection`` ==
+        'random'.
+
+    copy_X : boolean, optional, (default=True)
+        If ``True``, X will be copied; else, it may be overwritten.
+
+    verbose : int, optional (default=0)
+        For the lbfgs solver set verbose to any positive number for verbosity.
+
+    Attributes
+    ----------
+    coef_ : array, shape (n_features,)
+        Estimated coefficients for the linear predictor (X*coef_+intercept_) in
+        the GLM.
+
+    intercept_ : float
+        Intercept (a.k.a. bias) added to linear predictor.
+
+    dispersion_ : float
+        The dispersion parameter :math:`\\phi` if ``fit_dispersion`` was set.
+
+    n_iter_ : int
+        Actual number of iterations used in solver.
+
+    Notes
+    -----
+    The fit itself does not need Y to be from an EDM, but only assumes
+    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
+    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
+    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
+    :ref:`User Guide <Generalized_linear_regression>`.
+
+    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
+    minimizing the deviance plus penalty term, which is equivalent to
+    (penalized) maximum likelihood estimation.
+
+    For alpha > 0, the feature matrix X should be standardized in order to
+    penalize features equally strong.
+
+    If the target y is a ratio, appropriate sample weights s should be
+    provided.
+    As an example, consider Poisson distributed counts z (integers) and
+    weights s=exposure (time, money, persons years, ...). Then you fit
+    y = z/s, i.e. ``PoissonRegressor().fit(X, y, sample_weight=s)``.
+    The weights are necessary for the right (finite sample) mean.
+    Consider :math:`\\bar{y} = \\frac{\\sum_i s_i y_i}{\\sum_i s_i}`,
+    in this case one might say that y has a 'scaled' Poisson distributions.
+
+    References
+    ----------
+    For the coordinate descent implementation:
+        * Guo-Xun Yuan, Chia-Hua Ho, Chih-Jen Lin
+          An Improved GLMNET for L1-regularized Logistic Regression,
+          Journal of Machine Learning Research 13 (2012) 1999-2030
+          https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
+    """
+    def __init__(self, alpha=1.0, fit_intercept=True, fit_dispersion=None,
+                 solver='irls', max_iter=100,
+                 tol=1e-4, warm_start=False, start_params='guess',
+                 random_state=None, copy_X=True, check_input=True, verbose=0):
+
+        super().__init__(alpha=alpha, fit_intercept=fit_intercept,
+                         family="poisson", link='log',
+                         fit_dispersion=fit_dispersion, solver=solver,
+                         max_iter=max_iter, tol=tol, warm_start=warm_start,
+                         start_params=start_params, random_state=random_state,
+                         copy_X=copy_X, verbose=verbose)

From dcfe9edaf83509d2b6d98ee840c28f674ea4f496 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Thu, 27 Jun 2019 08:41:04 -0500
Subject: [PATCH 063/269] TST Simplify comparison with ridge

---
 sklearn/linear_model/tests/test_glm.py | 94 ++++++--------------------
 1 file changed, 22 insertions(+), 72 deletions(-)

diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index de1a5262b36ce..1416bdcfad680 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -26,6 +26,8 @@
 
 from sklearn.utils.testing import assert_array_equal
 
+GLM_SOLVERS = ['irls', 'lbfgs', 'newton-cg', 'cd']
+
 
 @pytest.fixture(scope="module")
 def regression_data():
@@ -404,7 +406,7 @@ def test_glm_check_input_argument(check_input):
         glm.fit(X, y)
 
 
-@pytest.mark.parametrize('solver', ['irls', 'lbfgs', 'newton-cg', 'cd'])
+@pytest.mark.parametrize('solver', GLM_SOLVERS)
 def test_glm_identity_regression(solver):
     """Test GLM regression with identity link on a simple dataset."""
     coef = [1., 2.]
@@ -442,97 +444,45 @@ def test_glm_log_regression(family, solver, tol):
 # newton-cg may issue a LineSearchWarning, which we filter out
 @pytest.mark.filterwarnings('ignore:The line search algorithm')
 @pytest.mark.filterwarnings('ignore:Line Search failed')
-@pytest.mark.parametrize('solver, tol', [('irls', 1e-6),
-                                         ('lbfgs', 1e-6),
-                                         ('newton-cg', 1e-6),
-                                         ('cd', 1e-6)])
-def test_normal_ridge(solver, tol):
+@pytest.mark.parametrize('n_samples, n_features', [(100, 10), (10, 100)])
+@pytest.mark.parametrize('fit_intercept', [True, False])
+@pytest.mark.parametrize('solver', GLM_SOLVERS)
+def test_normal_ridge_comparison(n_samples, n_features, fit_intercept, solver):
     """Test ridge regression for Normal distributions.
 
+    Case n_samples >> n_features
+
     Compare to test_ridge in test_ridge.py.
     """
-    rng = np.random.RandomState(42)
     alpha = 1.0
-
-    # 1. With more samples than features
-    n_samples, n_features, n_predict = 100, 7, 10
+    n_predict = 10
     X, y, coef = make_regression(n_samples=n_samples+n_predict,
                                  n_features=n_features,
                                  n_informative=n_features-2, noise=0.5,
-                                 coef=True, random_state=rng)
+                                 coef=True, random_state=42)
     y = y[0:n_samples]
     X, T = X[0:n_samples], X[n_samples:]
 
-    # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
-    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True, tol=1e-6,
-                  solver='svd', normalize=False)
-    ridge.fit(X, y)
-    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
-                                     link='identity', fit_intercept=True,
-                                     tol=tol, max_iter=100, solver=solver,
-                                     check_input=False, random_state=rng)
-    glm.fit(X, y)
-    assert glm.coef_.shape == (X.shape[1], )
-    assert_allclose(glm.coef_, ridge.coef_, rtol=1e-6)
-    assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-5)
-    assert_allclose(glm.predict(T), ridge.predict(T), rtol=1e-6)
-
-    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-6,
-                  solver='svd', normalize=False)
-    ridge.fit(X, y)
-    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
-                                     link='identity', fit_intercept=False,
-                                     tol=tol, max_iter=100, solver=solver,
-                                     check_input=False, random_state=rng,
-                                     fit_dispersion='chisqr')
-    glm.fit(X, y)
-    assert glm.coef_.shape == (X.shape[1], )
-    assert_allclose(glm.coef_, ridge.coef_, rtol=1e-5)
-    assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-6)
-    assert_allclose(glm.predict(T), ridge.predict(T), rtol=1e-6)
-    mu = glm.predict(X)
-    assert_allclose(glm.dispersion_,
-                    np.sum((y-mu)**2/(n_samples-n_features)))
-
-    # 2. With more features than samples and sparse
-    n_samples, n_features, n_predict = 10, 100, 10
-    X, y, coef = make_regression(n_samples=n_samples+n_predict,
-                                 n_features=n_features,
-                                 n_informative=n_features-2, noise=0.5,
-                                 coef=True, random_state=rng)
-    y = y[0:n_samples]
-    X, T = X[0:n_samples], X[n_samples:]
+    if n_samples > n_features:
+        ridge_params = {"solver": "svd"}
+    else:
+        ridge_params = {"solver": "sag", "max_iter": 10000, "tol": 1e-9}
 
     # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
-    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=True, tol=1e-9,
-                  solver='sag', normalize=False, max_iter=100000,
-                  random_state=42)
+    ridge = Ridge(alpha=alpha*n_samples, normalize=False,
+                  random_state=42, **ridge_params)
     ridge.fit(X, y)
+
     glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
                                      link='identity', fit_intercept=True,
-                                     tol=tol, max_iter=300, solver=solver,
-                                     check_input=False, random_state=rng)
+                                     max_iter=300, solver=solver, tol=1e-6,
+                                     check_input=False, random_state=42)
     glm.fit(X, y)
     assert glm.coef_.shape == (X.shape[1], )
     assert_allclose(glm.coef_, ridge.coef_, rtol=5e-6)
     assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-6)
     assert_allclose(glm.predict(T), ridge.predict(T), rtol=1e-5)
 
-    ridge = Ridge(alpha=alpha*n_samples, fit_intercept=False, tol=1e-7,
-                  solver='sag', normalize=False, max_iter=1000,
-                  random_state=42)
-    ridge.fit(X, y)
-
-    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
-                                     link='identity', fit_intercept=False,
-                                     tol=tol*2, max_iter=300, solver=solver,
-                                     check_input=False, random_state=rng)
-    glm.fit(X, y)
-    assert glm.coef_.shape == (X.shape[1], )
-    assert_allclose(glm.coef_, ridge.coef_, rtol=1e-4)
-    assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-5)
-    assert_allclose(glm.predict(T), ridge.predict(T), rtol=1e-5)
-
 
 @pytest.mark.parametrize('solver, tol',
                          [('irls', 1e-7),
@@ -559,7 +509,7 @@ def test_poisson_ridge(solver, tol):
     rng = np.random.RandomState(42)
     glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0,
                                      fit_intercept=True, family='poisson',
-                                     link='log', tol=tol,
+                                     link='log', tol=1e-7,
                                      solver=solver, max_iter=300,
                                      random_state=rng)
     glm.fit(X, y)
@@ -750,7 +700,7 @@ def test_fit_dispersion(regression_data):
     assert_allclose(est2.dispersion_,  est3.dispersion_)
 
 
-@pytest.mark.parametrize("solver", ["irls", "lbfgs", "newton-cg", "cd"])
+@pytest.mark.parametrize("solver", GLM_SOLVERS)
 def test_convergence_warning(solver, regression_data):
     X, y = regression_data
 

From 4879bb6e057a95c38ee4950d13ef2dbd98da0a19 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 28 Jun 2019 09:54:59 -0500
Subject: [PATCH 064/269] EXA Add plot_tweedie_regression_insurance_claims.py

---
 ...lot_tweedie_regression_insurance_claims.py | 500 ++++++++++++++++++
 1 file changed, 500 insertions(+)
 create mode 100644 examples/linear_model/plot_tweedie_regression_insurance_claims.py

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
new file mode 100644
index 0000000000000..cb2ff667e8379
--- /dev/null
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -0,0 +1,500 @@
+"""
+======================================
+Tweedie regression on insurance claims
+======================================
+
+This example illustrate the use Poisson, Gamma and Tweedie regression
+on the French Motor Third-Party Liability Claims dataset, and is inspired
+by an R tutorial [1].
+
+Insurance claims data consist of the number of claims and the total claim
+amount. Often, the final goal is to predict the expected value, i.e. the mean,
+of the total claim amount. There are several possibilities to do that, two of
+which are:
+
+1. Model the number of claims with a Poisson distribution, the average
+   claim amount as a Gamma distribution and multiply the predictions, to get
+   the total claim amount.
+2. Model total claim amount directly, typically with a Tweedie distribution.
+
+In this example we will illustrate both approaches. We start by defining a few
+helper functions for loading the data and visualizing results.
+
+
+.. [1]  A. Noll, R. Salzmann and M.V. Wuthrich, Case Study: French Motor
+    Third-Party Liability Claims (November 8, 2018).
+    `doi:10.2139/ssrn.3164764 <http://dx.doi.org/10.2139/ssrn.3164764>`_
+
+"""
+print(__doc__)
+
+# Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
+#          Roman Yurchak <rth.yurchak@gmail.com>
+# License: BSD 3 clause
+from functools import partial
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+
+from sklearn.compose import ColumnTransformer
+from sklearn.linear_model import GeneralizedLinearRegressor
+from sklearn.linear_model._glm import TweedieDistribution
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
+from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
+
+from sklearn.metrics import mean_absolute_error
+
+
+def load_mtpl2(n_samples=100000):
+    """Fetcher for French Motor Third-Party Liability Claims dataset
+
+    Parameters
+    ----------
+    n_samples: int, default=100000
+      number of samples to select (for faster run time).
+    """
+
+    # Note: this should use the OpenML DataFrame fetcher in the future
+    df_freq = pd.read_csv(
+        "https://www.openml.org/data/get_csv/20649148/freMTPL2freq.csv",
+        dtype={"IDpol": np.int},
+        index_col=0,
+    )
+
+    df_sev = pd.read_csv(
+        "https://www.openml.org/data/get_csv/20649149/freMTPL2sev.arff",
+        index_col=0,
+    )
+
+    # sum ClaimAmount over identical IDs
+    df_sev = df_sev.groupby(level=0).sum()
+
+    df = df_freq.join(df_sev, how="left")
+    df["ClaimAmount"].fillna(0, inplace=True)
+
+    # unquote string fields
+    for column_name in df.columns[df.dtypes.values == np.object]:
+        df[column_name] = df[column_name].str.strip("'")
+    return df.iloc[:n_samples]
+
+
+def plot_obs_pred(df, feature, observed, y_predicted, weight, y_label=None,
+                  title=None, kind_weight=None, ax=None):
+    """Plot observed and predicted - aggregated per feature level.
+
+    Parameters
+    ----------
+    df : DataFrame with at least one column named feature
+    observed : str
+        a column name of the observed target
+    predicted : frame
+        a dataframe, with the same index as df, with the predicted target
+    weight : str
+        column name with the values of weights/exposure
+    """
+    # aggregate observed and predicted variables by feature level
+    df_ = df.loc[:, [feature, weight]].copy()
+    df_["observed"] = df[observed] * df[weight]
+    df_["predicted"] = y_predicted * df[weight]
+    df_ = (
+        df_.groupby([feature])[weight, "observed", "predicted"]
+        .sum()
+        .assign(observed=lambda x: x["observed"] / x[weight])
+        .assign(predicted=lambda x: x["predicted"] / x[weight])
+    )
+
+    ax = df_.loc[:, ["observed", "predicted"]].plot(style=".", ax=ax)
+    y_max = df_.loc[:, ["observed", "predicted"]].values.max() * 0.8
+    ax.fill_between(
+        df_.index,
+        0,
+        y_max * df_[weight] / df_[weight].values.max(),
+        color="g",
+        alpha=0.1,
+    )
+    ax.set(
+        ylabel=y_label if y_label is not None else None,
+        title=title if title is not None else "Train: Observed vs Predicted",
+    )
+
+
+##############################################################################
+#
+# 1. Loading datasets and pre-processing
+# --------------------------------------
+#
+# We construct the freMTPL2 dataset by joining the  freMTPL2freq table,
+# containing the number of claims (``ClaimNb``) with the freMTPL2sev table
+# containing the claim amount (``ClaimAmount``) for the same user ids.
+
+df = load_mtpl2(n_samples=100000)
+
+# Note: filter out claims with zero amount, as the severity model
+# requires a strictly positive target values.
+df.loc[(df.ClaimAmount == 0) & (df.ClaimNb >= 1), "ClaimNb"] = 0
+
+# correct for unreasonable observations (that might be data error)
+df["ClaimNb"].clip(upper=4, inplace=True)
+df["Exposure"].clip(upper=1, inplace=True)
+
+column_trans = ColumnTransformer(
+    [
+        ("Veh_Driv_Age", KBinsDiscretizer(n_bins=10), ["VehAge", "DrivAge"]),
+        (
+            "Veh_Brand_Gas_Region",
+            OneHotEncoder(),
+            ["VehBrand", "VehPower", "VehGas", "Region", "Area"],
+        ),
+        ("BonusMalus", "passthrough", ["BonusMalus"]),
+        (
+            "Density_log",
+            make_pipeline(
+                FunctionTransformer(np.log, validate=False), StandardScaler()
+            ),
+            ["Density"],
+        ),
+    ],
+    remainder="drop",
+)
+X = column_trans.fit_transform(df)
+
+
+df["Frequency"] = df.ClaimNb / df.Exposure
+df["AvgClaimAmount"] = df.ClaimAmount / np.fmax(df.ClaimNb, 1)
+
+print(df[df.ClaimAmount > 0].head())
+
+##############################################################################
+#
+# 2. Frequency model -- Poisson distribution
+# -------------------------------------------
+#
+# The number of claims (``ClaimNb``) is a positive integer that can be modeled
+# as a Poisson distribution. It is then assumed to be the number of discrete
+# events occuring with a constant rate in a given time interval (``Exposure``).
+# Here we model the frequency ``y = ClaimNb / Exposure``,
+# which is still a (scaled) Poisson distribution.
+#
+# A very important property of the Poisson distribution is its mean-variance
+# relation: The variance is proportional to the mean.
+
+df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=2)
+
+glm_freq = GeneralizedLinearRegressor(family="poisson", alpha=0)
+glm_freq.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
+
+
+def mean_deviance(estimator, y, y_pred, weights):
+    if hasattr(estimator, "_family_instance"):
+        return estimator._family_instance.deviance(y, y_pred, weights) / len(y)
+    else:
+        return np.nan
+
+
+def score_estimator(
+    estimator, X_train, X_test, df_train, df_test, target, weights
+):
+    res = []
+
+    for subset_label, X, df in [
+        ("train", X_train, df_train),
+        ("test", X_test, df_test),
+    ]:
+        y, _weights = df[target], df[weights]
+
+        for score_label, metric in [
+            ("D² explaned", None),
+            ("mean deviance", partial(mean_deviance, estimator)),
+            ("mean abs. error", mean_absolute_error),
+        ]:
+            if estimator.__class__.__name__ == "ClaimProdEstimator":
+                # ClaimProdEstimator is the product of the frequency and
+                # severity models, together with a denormalized by the exposure
+                # values. It does not fully follow the scikit-learn API and we
+                # must handle it separately.
+                y_pred = estimator.predict(X, exposure=df.Exposure.values)
+            else:
+                y_pred = estimator.predict(X)
+            if metric is None:
+                if not hasattr(estimator, "score"):
+                    continue
+                score = estimator.score(X, y, _weights)
+            else:
+                score = metric(y, y_pred, _weights)
+
+            res.append(
+                {"subset": subset_label, "metric": score_label, "score": score}
+            )
+
+    res = (
+        pd.DataFrame(res)
+        .set_index(["metric", "subset"])
+        .score.unstack(-1)
+        .round(3)
+    )
+    return res
+
+
+scores = score_estimator(
+    glm_freq,
+    X_train,
+    X_test,
+    df_train,
+    df_test,
+    target="Frequency",
+    weights="Exposure",
+)
+print(scores)
+
+##############################################################################
+#
+# We can visually compare observed and predicted values, aggregated by
+# the drivers age (``DrivAge``), vehicle age (``VehAge``) and the insurance
+# bonus/penalty (``BonusMalus``),
+
+fig, ax = plt.subplots(2, 2, figsize=(16, 8))
+fig.subplots_adjust(hspace=0.3, wspace=0.2)
+
+plot_obs_pred(
+    df_train,
+    "DrivAge",
+    "Frequency",
+    glm_freq.predict(X_train),
+    weight="Exposure",
+    y_label="Claim Frequency",
+    title="train data",
+    ax=ax[0, 0],
+)
+
+plot_obs_pred(
+    df_test,
+    "DrivAge",
+    "Frequency",
+    glm_freq.predict(X_test),
+    weight="Exposure",
+    y_label="Claim Frequency",
+    title="test data",
+    ax=ax[0, 1],
+)
+
+plot_obs_pred(
+    df_test,
+    "VehAge",
+    "Frequency",
+    glm_freq.predict(X_test),
+    weight="Exposure",
+    y_label="Claim Frequency",
+    title="test data",
+    ax=ax[1, 0],
+)
+
+plot_obs_pred(
+    df_test,
+    "BonusMalus",
+    "Frequency",
+    glm_freq.predict(X_test),
+    weight="Exposure",
+    y_label="Claim Frequency",
+    title="test data",
+    ax=ax[1, 1],
+)
+
+
+##############################################################################
+#
+# 3. Severity model -  Gamma Distribution
+# ---------------------------------------
+# The mean claim amount or severity (`AvgClaimAmount`) can be empirically
+# shown to follow a Gamma distribution. We fit a GLM model for the severity
+# with the same features as the frequency model.
+#
+# Note:
+# - We filter out ``ClaimAmount == 0``` as the Gamma distribution as support
+# on :math:`(0, \infty)` not :math:`[0, \infty)`.
+# - We use ``ClaimNb`` as sample weights.
+
+mask_train = df_train["ClaimAmount"] > 0
+mask_test = df_test["ClaimAmount"] > 0
+
+glm_sev = GeneralizedLinearRegressor(family="gamma", alpha=1)
+
+glm_sev.fit(
+    X_train[mask_train.values],
+    df_train.loc[mask_train, "AvgClaimAmount"],
+    sample_weight=df_train.loc[mask_train, "ClaimNb"],
+)
+
+
+scores = score_estimator(
+    glm_sev,
+    X_train[mask_train.values],
+    X_test[mask_test.values],
+    df_train[mask_train],
+    df_test[mask_test],
+    target="AvgClaimAmount",
+    weights="ClaimNb",
+)
+print(scores)
+
+##############################################################################
+#
+# Note that the resulting model is conditional on having at least one claim,
+# and cannot be used to predict the average claim amount in general,
+
+print(
+    "Mean AvgClaim Amount:               %.2f "
+    % df_train.AvgClaimAmount.mean()
+)
+print(
+    "Mean AvgClaim Amount | NbClaim > 0: %.2f"
+    % df_train.AvgClaimAmount[df_train.AvgClaimAmount > 0].mean()
+)
+print(
+    "Predicted Mean AvgClaim Amount:     %.2f"
+    % glm_sev.predict(X_train).mean()
+)
+
+
+##############################################################################
+#
+# We can visually compare observed and predicted values, aggregated for
+# the drivers age (``Driv Age``),
+
+fig, ax = plt.subplots(1, 2, figsize=(16, 4))
+
+# plot DivAge
+plot_obs_pred(
+    df_train.loc[mask_train],
+    "DrivAge",
+    "AvgClaimAmount",
+    glm_sev.predict(X_train[mask_train.values]),
+    weight="Exposure",
+    y_label="Average Claim Severity",
+    title="train data",
+    ax=ax[0],
+)
+
+plot_obs_pred(
+    df_test.loc[mask_test],
+    "DrivAge",
+    "AvgClaimAmount",
+    glm_sev.predict(X_test[mask_test.values]),
+    weight="Exposure",
+    y_label="Average Claim Severity",
+    title="test data",
+    ax=ax[1],
+)
+
+
+##############################################################################
+#
+# 3. Total Claims Amount -- Compound Poisson distribution
+# -------------------------------------------------------
+#
+# As mentionned in the introduction, the total claim amount can be modeled
+# either as the product of the frequency model by the severity model.
+
+
+class ClaimProdEstimator:
+    """Total claim amount estimator
+
+    Computed as the product of the frequency model by the serverity model,
+    denormalized by exposure.
+    """
+
+    def __init__(self, est_freq, est_sev):
+        self.est_freq = est_freq
+        self.est_sev = est_sev
+
+    def predict(self, X, exposure):
+        """Predict the total claim amount
+
+        The predict method is not compatible with the scikit-learn API.
+        """
+        return exposure * self.est_freq.predict(X) * self.est_sev.predict(X)
+
+
+est_prod = ClaimProdEstimator(glm_freq, glm_sev)
+
+scores = score_estimator(
+    est_prod,
+    X_train,
+    X_test,
+    df_train,
+    df_test,
+    target="ClaimAmount",
+    weights="Exposure",
+)
+print(scores)
+
+
+##############################################################################
+#
+# or as a unique Compound Poisson model, also corresponding to a Tweedie model
+# with a power :math:`p \in (1, 2)`. We determine the optimal hyperparameter
+# ``p`` with a grid search,
+
+from sklearn.model_selection import GridSearchCV
+
+# this takes a while
+params = {
+    "family": [
+        TweedieDistribution(power=power) for power in np.linspace(1, 2, 8)
+    ]
+}
+
+glm_total = GridSearchCV(
+    GeneralizedLinearRegressor(), cv=3, param_grid=params, n_jobs=-1
+)
+glm_total.fit(
+    X_train, df_train["ClaimAmount"], sample_weight=df_train["Exposure"]
+)
+
+
+print(
+    "Best hyperparameters: power=%.2f\n"
+    % glm_total.best_estimator_.family.power
+)
+
+scores = score_estimator(
+    glm_total.best_estimator_,
+    X_train,
+    X_test,
+    df_train,
+    df_test,
+    target="ClaimAmount",
+    weights="Exposure",
+)
+print(scores)
+
+##############################################################################
+#
+# In this example, the mean absolute error is lower for the Compound Poisson
+# model than when using separate models for frequency and severity.
+#
+# We can additionally validate these models by comparing observed and predicted
+# total claim amount over the test and train subsets.
+
+res = []
+for subset_label, X, df in [
+    ("train", X_train, df_train),
+    ("test", X_test, df_test),
+]:
+    res.append(
+        {
+            "subset": subset_label,
+            "observed": df.ClaimAmount.values.sum(),
+            "predicted, frequency*severity model": np.sum(
+                est_prod.predict(X, exposure=df.Exposure.values)
+            ),
+            "predicted, tweedie, p=%.2f"
+            % glm_total.best_estimator_.family.power: np.sum(
+                glm_total.best_estimator_.predict(X)
+            ),
+        }
+    )
+
+print(pd.DataFrame(res).set_index("subset").T)

From 56069e5b3f5b453d9e8a487c9d27e20900ce4d63 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 28 Jun 2019 10:11:37 -0500
Subject: [PATCH 065/269] EXA Fix issues with older pandas versions in example

---
 .../linear_model/plot_tweedie_regression_insurance_claims.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index cb2ff667e8379..063d12e6e291b 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -137,8 +137,8 @@ def plot_obs_pred(df, feature, observed, y_predicted, weight, y_label=None,
 df.loc[(df.ClaimAmount == 0) & (df.ClaimNb >= 1), "ClaimNb"] = 0
 
 # correct for unreasonable observations (that might be data error)
-df["ClaimNb"].clip(upper=4, inplace=True)
-df["Exposure"].clip(upper=1, inplace=True)
+df["ClaimNb"] = df["ClaimNb"].clip(upper=4)
+df["Exposure"] = df["Exposure"].clip(upper=1)
 
 column_trans = ColumnTransformer(
     [

From 53f3c5f6670bbdefcd5ddf6d63e405401909cf06 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Tue, 9 Jul 2019 09:14:30 -0500
Subject: [PATCH 066/269] DOC Add second poisson regression example

---
 ...plot_poisson_regression_non_normal_loss.py | 257 ++++++++++++++++++
 1 file changed, 257 insertions(+)
 create mode 100644 examples/linear_model/plot_poisson_regression_non_normal_loss.py

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
new file mode 100644
index 0000000000000..b06adcb787560
--- /dev/null
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -0,0 +1,257 @@
+"""
+======================================
+Poisson regression and non normal loss
+======================================
+
+This example illustrate the use linear Poisson regression
+on the French Motor Third-Party Liability Claims dataset [1] and compare
+it with learning models with least squared error.
+
+
+We start by defining a few helper functions for loading the data and
+visualizing results.
+
+
+.. [1]  A. Noll, R. Salzmann and M.V. Wuthrich, Case Study: French Motor
+    Third-Party Liability Claims (November 8, 2018).
+    `doi:10.2139/ssrn.3164764 <http://dx.doi.org/10.2139/ssrn.3164764>`_
+
+"""
+print(__doc__)
+
+# Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
+#          Roman Yurchak <rth.yurchak@gmail.com>
+# License: BSD 3 clause
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+from scipy.special import xlogy
+
+from sklearn.compose import ColumnTransformer
+from sklearn.linear_model import GeneralizedLinearRegressor, LinearRegression
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
+from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
+from sklearn.ensemble import GradientBoostingRegressor
+
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+
+
+def load_mtpl2(n_samples=100000):
+    """Fetcher for French Motor Third-Party Liability Claims dataset
+
+    Parameters
+    ----------
+    n_samples: int, default=100000
+      number of samples to select (for faster run time).
+    """
+
+    # Note: this should use the OpenML DataFrame fetcher in the future
+    df_freq = pd.read_csv(
+        "https://www.openml.org/data/get_csv/20649148/freMTPL2freq.csv",
+        dtype={"IDpol": np.int},
+        index_col=0,
+    )
+
+    df_sev = pd.read_csv(
+        "https://www.openml.org/data/get_csv/20649149/freMTPL2sev.arff",
+        index_col=0,
+    )
+
+    # sum ClaimAmount over identical IDs
+    df_sev = df_sev.groupby(level=0).sum()
+
+    df = df_freq.join(df_sev, how="left")
+    df["ClaimAmount"].fillna(0, inplace=True)
+
+    # unquote string fields
+    for column_name in df.columns[df.dtypes.values == np.object]:
+        df[column_name] = df[column_name].str.strip("'")
+    return df.iloc[:n_samples]
+
+
+##############################################################################
+#
+# 1. Loading datasets and pre-processing
+# --------------------------------------
+#
+# We construct the freMTPL2 dataset by joining the  freMTPL2freq table,
+# containing the number of claims (``ClaimNb``) with the freMTPL2sev table
+# containing the claim amount (``ClaimAmount``) for the same user ids.
+
+df = load_mtpl2(n_samples=100000)
+
+# Note: filter out claims with zero amount, as the severity model
+# requires a strictly positive target values.
+df.loc[(df.ClaimAmount == 0) & (df.ClaimNb >= 1), "ClaimNb"] = 0
+
+# correct for unreasonable observations (that might be data error)
+df["ClaimNb"] = df["ClaimNb"].clip(upper=4)
+df["Exposure"] = df["Exposure"].clip(upper=1)
+
+column_trans = ColumnTransformer(
+    [
+        ("Veh_Driv_Age", KBinsDiscretizer(n_bins=10), ["VehAge", "DrivAge"]),
+        (
+            "Veh_Brand_Gas_Region",
+            OneHotEncoder(),
+            ["VehBrand", "VehPower", "VehGas", "Region", "Area"],
+        ),
+        ("BonusMalus", "passthrough", ["BonusMalus"]),
+        (
+            "Density_log",
+            make_pipeline(
+                FunctionTransformer(np.log, validate=False), StandardScaler()
+            ),
+            ["Density"],
+        ),
+    ],
+    remainder="drop",
+)
+X = column_trans.fit_transform(df)
+
+##############################################################################
+#
+# The number of claims (``ClaimNb``) is a positive integer that can be modeled
+# as a Poisson distribution. It is then assumed to be the number of discrete
+# events occurring with a constant rate in a given time interval
+# (``Exposure``). Here we model the frequency ``y = ClaimNb / Exposure``,
+# which is still a (scaled) Poisson distribution.
+#
+# A very important property of the Poisson distribution is its mean-variance
+# relation: The variance is proportional to the mean.
+
+df["Frequency"] = df.ClaimNb / df.Exposure
+
+print(
+   pd.cut(df.Frequency, [-1e-6, 1e-6, 1, 2, 3, 4, 5]).value_counts()
+)
+
+##############################################################################
+#
+# It worth noting that 96 % of users have 0 claims, and if we were to convert
+# this problem into a binary classification task, it would be significantly
+# imbalanced.
+#
+# To evaluate the pertinence of the used metrics, we will consider as a
+# baseline an estimator that returns 0 for any input.
+
+df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=2)
+
+
+def mean_poisson_deviance_score(y_true, y_pred, sample_weights=None):
+    y_true = np.atleast_1d(y_true)
+    y_pred = np.atleast_1d(y_pred)
+    dev = 2 * (xlogy(y_true, y_true/y_pred) - y_true + y_pred)
+    return np.average(dev, weights=sample_weights)
+
+
+eps = 1e-5
+print("MSE: %.3f" % mean_squared_error(
+        df_test.Frequency.values, np.zeros(len(df_test)),
+        df_test.Exposure.values))
+print("MAE: %.3f" % mean_absolute_error(
+        df_test.Frequency.values, np.zeros(len(df_test)),
+        df_test.Exposure.values))
+print("mean Poisson deviance: %.3f" % mean_poisson_deviance_score(
+        df_test.Frequency.values, eps + np.zeros(len(df_test)),
+        df_test.Exposure.values))
+
+
+##############################################################################
+#
+# We start by modeling the target variable with the least squares linear
+# regression model,
+
+
+linregr = LinearRegression()
+linregr.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
+
+print("LinearRegression")
+print("MSE: %.3f" % mean_squared_error(
+          df_test.Frequency.values, linregr.predict(X_test),
+          df_test.Exposure.values))
+print("MSE: %.3f" % mean_absolute_error(
+          df_test.Frequency.values, linregr.predict(X_test),
+          df_test.Exposure.values))
+print("mean Poisson deviance: %.3f" % mean_poisson_deviance_score(
+        df_test.Frequency.values, np.fmax(linregr.predict(X_test), eps),
+        df_test.Exposure.values))
+
+##############################################################################
+#
+# The Poisson deviance cannot be computed because negative values are
+# predicted by the model,
+
+print('Number Negatives: %s / total: %s' % (
+      (linregr.predict(X_test) < 0).sum(), X_test.shape[0]))
+
+##############################################################################
+#
+# Next we fit the Poisson regressor on the target variable,
+
+glm_freq = GeneralizedLinearRegressor(family="poisson", alpha=0)
+glm_freq.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
+
+print("PoissonRegressor")
+print("MSE: %.3f" % mean_squared_error(
+        df_test.Frequency.values, glm_freq.predict(X_test),
+        df_test.Exposure.values))
+print("MAE: %.3f" % mean_absolute_error(
+        df_test.Frequency.values, glm_freq.predict(X_test),
+        df_test.Exposure.values))
+print("mean Poisson deviance: %.3f" % mean_poisson_deviance_score(
+        df_test.Frequency.values, glm_freq.predict(X_test),
+        df_test.Exposure.values))
+
+##############################################################################
+#
+# Finally we will consider a non linear model  with Gradient boosting that
+# still minimizes the least square error.
+
+
+gbr = GradientBoostingRegressor(max_depth=3)
+gbr.fit(X_train, df_train.Frequency.values,
+        sample_weight=df_train.Exposure.values)
+
+
+print("GradientBoostingRegressor")
+print("MSE: %.3f" % mean_squared_error(
+      df_test.Frequency.values, gbr.predict(X_test), df_test.Exposure.values))
+print("MAE: %.3f" % mean_absolute_error(
+      df_test.Frequency.values, gbr.predict(X_test), df_test.Exposure.values))
+print("mean Poisson deviance: %.3f" % mean_poisson_deviance_score(
+      df_test.Frequency.values, gbr.predict(X_test), df_test.Exposure.values))
+
+##############################################################################
+#
+# In this example, although Gradient boosting minimizes the least square error,
+# because of a higher predictive power it also results in a smaller Poisson
+# deviance than the Poisson regression model.
+#
+# Evaluating models with a single train / test split is prone to numerical
+# errors, we can verify that we would also get equivalent resuts with the
+# cross-validation score.
+#
+# The difference between these models can also be visualized by comparing the
+# histogram of observed target values with that of predicted values,
+
+
+fig, ax = plt.subplots(1, 4, figsize=(16, 3))
+
+df_train.Frequency.hist(bins=np.linspace(-1, 10, 50), ax=ax[0])
+
+ax[0].set_title('Experimental data')
+
+for idx, model in enumerate([linregr, glm_freq, gbr]):
+    y_pred = model.predict(X_train)
+
+    pd.Series(y_pred).hist(bins=np.linspace(-1, 8, 50), ax=ax[idx+1])
+    ax[idx+1].set_title(model.__class__.__name__)
+
+for axi in ax:
+    axi.set(
+        yscale='log',
+        xlabel="y (Frequency)"
+    )

From be5a3c485684ae45835258a71a1870a59549fbda Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Tue, 9 Jul 2019 09:55:57 -0500
Subject: [PATCH 067/269] Add GeneralizedHyperbolicSecant and
 BinomialDistributions

---
 sklearn/linear_model/_glm.py           | 109 ++++---------------------
 sklearn/linear_model/tests/test_glm.py |  34 +-------
 2 files changed, 18 insertions(+), 125 deletions(-)

diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm.py
index b18731e73f328..736e50960dcda 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm.py
@@ -6,36 +6,6 @@
 # some parts and tricks stolen from other sklearn files.
 # License: BSD 3 clause
 
-# TODO: Add cross validation support, e.g. GCV?
-# TODO: Should GeneralizedLinearRegressor inherit from LinearModel?
-#       So far, it does not.
-# TODO: Include further classes in class.rst? ExponentialDispersionModel?
-#       TweedieDistribution?
-# TODO: Negative values in P1 are not allowed so far. They could be used
-#       for group lasso.
-
-# Design Decisions:
-# - Which name? GeneralizedLinearModel vs GeneralizedLinearRegressor.
-#   Estimators in sklearn are either regressors or classifiers. A GLM can do
-#   both depending on the distr (Normal => regressor, Binomial => classifier).
-#   Solution: GeneralizedLinearRegressor since this is the focus.
-# - Allow for finer control of penalty terms:
-#   L1: ||P1*w||_1 with P1*w as element-wise product, this allows to exclude
-#       factors from the L1 penalty.
-#   L2: w*P2*w with P2 a positive (semi-) definite matrix, e.g. P2 could be
-#   a 1st or 2nd order difference matrix (compare B-spline penalties and
-#   Tikhonov regularization).
-# - The link function (instance of class Link) is necessary for the evaluation
-#   of deviance, score, Fisher and Hessian matrix as functions of the
-#   coefficients, which is needed by optimizers.
-#   Solution: link as argument in those functions
-# - Which name/symbol for sample_weight in docu?
-#   sklearn.linear_models uses w for coefficients, standard literature on
-#   GLMs use beta for coefficients and w for (sample) weights.
-#   So far, coefficients=w and sample weights=s.
-# - The intercept term is the first index, i.e. coef[0]
-
-
 from __future__ import division
 from abc import ABCMeta, abstractmethod
 import numbers
@@ -51,6 +21,7 @@
 from ..utils.validation import check_is_fitted, check_random_state
 
 
+
 def _check_weights(sample_weight, n_samples):
     """Check that sample weights are non-negative and have the right shape."""
     if sample_weight is None:
@@ -854,47 +825,12 @@ def __init__(self):
         super(InverseGaussianDistribution, self).__init__(power=3)
 
 
-class GeneralizedHyperbolicSecant(ExponentialDispersionModel):
-    """A class for the Generalized Hyperbolic Secant (GHS) distribution.
-
-    The GHS distribution is for targets y in (-inf, inf).
-    """
-    def __init__(self):
-        self._lower_bound = -np.Inf
-        self._upper_bound = np.Inf
-        self._include_lower_bound = False
-        self._include_upper_bound = False
-
-    def unit_variance(self, mu):
-        return 1 + mu**2
-
-    def unit_variance_derivative(self, mu):
-        return 2 * mu
-
-    def unit_deviance(self, y, mu):
-        return (2 * y * (np.arctan(y) - np.arctan(mu)) +
-                np.log((1 + mu**2)/(1 + y**2)))
-
-
-class BinomialDistribution(ExponentialDispersionModel):
-    """A class for the Binomial distribution.
-
-    The Binomial distribution is for targets y in [0, 1].
-    """
-    def __init__(self):
-        self._lower_bound = 0
-        self._upper_bound = 1
-        self._include_lower_bound = True
-        self._include_upper_bound = True
-
-    def unit_variance(self, mu):
-        return mu * (1 - mu)
-
-    def unit_variance_derivative(self, mu):
-        return 1 - 2 * mu
-
-    def unit_deviance(self, y, mu):
-        return 2 * (special.xlogy(y, y/mu) + special.xlogy(1-y, (1-y)/(1-mu)))
+EDM_DISTRIBUTIONS = {
+    'normal': NormalDistribution,
+    'poisson': PoissonDistribution,
+    'gamma': GammaDistribution,
+    'inverse.gaussian': InverseGaussianDistribution,
+}
 
 
 def _irls_step(X, W, P2, z, fit_intercept=True):
@@ -1690,28 +1626,19 @@ def fit(self, X, y, sample_weight=None):
         #######################################################################
         # 1. input validation                                                 #
         #######################################################################
-        # 1.1 validate arguments of __init__ ##################################
+        # 1.1 validate arguments of __init__
         # Guarantee that self._family_instance is an instance of class
         # ExponentialDispersionModel
         if isinstance(self.family, ExponentialDispersionModel):
             self._family_instance = self.family
+        elif self.family in EDM_DISTRIBUTIONS:
+            self._family_instance = EDM_DISTRIBUTIONS[self.family]()
         else:
-            if self.family == 'normal':
-                self._family_instance = NormalDistribution()
-            elif self.family == 'poisson':
-                self._family_instance = PoissonDistribution()
-            elif self.family == 'gamma':
-                self._family_instance = GammaDistribution()
-            elif self.family == 'inverse.gaussian':
-                self._family_instance = InverseGaussianDistribution()
-            elif self.family == 'binomial':
-                self._family_instance = BinomialDistribution()
-            else:
-                raise ValueError(
-                    "The family must be an instance of class"
-                    " ExponentialDispersionModel or an element of"
-                    " ['normal', 'poisson', 'gamma', 'inverse.gaussian', "
-                    "'binomial']; got (family={0})".format(self.family))
+            raise ValueError(
+                "The family must be an instance of class"
+                " ExponentialDispersionModel or an element of"
+                " ['normal', 'poisson', 'gamma', 'inverse.gaussian', "
+                "'binomial']; got (family={0})".format(self.family))
 
         # Guarantee that self._link_instance is set to an instance of
         # class Link
@@ -1724,11 +1651,6 @@ def fit(self, X, y, sample_weight=None):
                         self._link_instance = IdentityLink()
                     if self._family_instance.power >= 1:
                         self._link_instance = LogLink()
-                elif isinstance(self._family_instance,
-                                GeneralizedHyperbolicSecant):
-                    self._link_instance = IdentityLink()
-                elif isinstance(self._family_instance, BinomialDistribution):
-                    self._link_instance = LogitLink()
                 else:
                     raise ValueError("No default link known for the "
                                      "specified distribution family. Please "
@@ -2048,7 +1970,6 @@ def fit(self, X, y, sample_weight=None):
         # 4. fit                                                              #
         #######################################################################
         # algorithms for optimization
-        # TODO: Parallelize it?
 
         # 4.1 IRLS ############################################################
         # Note: we already set P2 = l2*P2, see above
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 1416bdcfad680..f51f630ebae7e 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -18,7 +18,6 @@
     TweedieDistribution,
     NormalDistribution, PoissonDistribution,
     GammaDistribution, InverseGaussianDistribution,
-    GeneralizedHyperbolicSecant, BinomialDistribution,
 )
 from sklearn.linear_model import ElasticNet, LogisticRegression, Ridge
 from sklearn.metrics import mean_absolute_error
@@ -103,7 +102,7 @@ def test_tweedie_distribution_power():
      (TweedieDistribution(power=1.5), [0.1, 1.5]),
      (TweedieDistribution(power=2.5), [0.1, 1.5]),
      (TweedieDistribution(power=-4), [0.1, 1.5]),
-     (GeneralizedHyperbolicSecant(), [0.1, 1.5])])
+])
 def test_deviance_zero(family, chk_values):
     """Test deviance(y,y) = 0 for different families."""
     for x in chk_values:
@@ -196,7 +195,7 @@ def test_sample_weights_validation():
                           ('poisson', PoissonDistribution()),
                           ('gamma', GammaDistribution()),
                           ('inverse.gaussian', InverseGaussianDistribution()),
-                          ('binomial', BinomialDistribution())])
+])
 def test_glm_family_argument(f, fam):
     """Test GLM family argument set as string."""
     y = np.array([0.1, 0.5])  # in range of all distributions
@@ -424,7 +423,7 @@ def test_glm_identity_regression(solver):
     [NormalDistribution(), PoissonDistribution(),
      GammaDistribution(), InverseGaussianDistribution(),
      TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
-     GeneralizedHyperbolicSecant()])
+])
 @pytest.mark.parametrize('solver, tol', [('irls', 1e-6),
                                          ('lbfgs', 1e-6),
                                          ('newton-cg', 1e-7),
@@ -620,33 +619,6 @@ def obj(coef):
     assert_allclose(glm.coef_, glmnet_coef, rtol=1e-4)
 
 
-@pytest.mark.parametrize('alpha', [0.01, 0.1, 1, 10])
-def test_binomial_enet(alpha):
-    """Test elastic net regression with binomial family and LogitLink.
-
-    Compare to LogisticRegression.
-    """
-    l1_ratio = 0.5
-    n_samples = 500
-    rng = np.random.RandomState(42)
-    X, y = make_classification(n_samples=n_samples, n_classes=2, n_features=6,
-                               n_informative=5, n_redundant=0, n_repeated=0,
-                               random_state=rng)
-    log = LogisticRegression(
-        penalty='elasticnet', random_state=rng, fit_intercept=False, tol=1e-6,
-        max_iter=1000, l1_ratio=l1_ratio, C=1./(n_samples * alpha),
-        solver='saga')
-    log.fit(X, y)
-
-    glm = GeneralizedLinearRegressor(
-        family=BinomialDistribution(), link=LogitLink(), fit_intercept=False,
-        alpha=alpha, l1_ratio=l1_ratio, solver='cd', selection='cyclic',
-        tol=1e-7)
-    glm.fit(X, y)
-    assert_allclose(log.intercept_[0], glm.intercept_, rtol=1e-6)
-    assert_allclose(log.coef_[0, :], glm.coef_, rtol=5e-6)
-
-
 @pytest.mark.parametrize(
         "params",
         [

From e67fecb9bc1ee056ad7934803818fd46a0a1f8b3 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Tue, 9 Jul 2019 10:00:24 -0500
Subject: [PATCH 068/269] Remove start params option

---
 sklearn/linear_model/_glm.py | 157 +++--------------------------------
 1 file changed, 13 insertions(+), 144 deletions(-)

diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm.py
index 736e50960dcda..18eba80080670 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm.py
@@ -1457,33 +1457,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     warm_start : boolean, optional (default=False)
         If set to ``True``, reuse the solution of the previous call to ``fit``
-        as initialization for ``coef_`` and ``intercept_`` (supersedes option
-        ``start_params``). If set to ``True`` or if the attribute ``coef_``
-        does not exit (first call to ``fit``), option ``start_params`` sets the
-        start values for ``coef_`` and ``intercept_``.
-
-    start_params : {'guess', 'zero', array of shape (n_features*, )}, \
-            optional (default='guess')
-        Relevant only if ``warm_start=False`` or if fit is called
-        the first time (``self.coef_`` does not yet exist).
-
-        'guess'
-            Start values of mu are calculated by family.starting_mu(..). Then,
-            one Newton step obtains start values for ``coef_``. If
-            ``solver='irls'``, it uses one irls step, else the Newton step is
-            calculated by the cd solver.
-            This gives usually good starting values.
-
-        'zero'
-        All coefficients are set to zero. If ``fit_intercept=True``, the
-        start value for the intercept is obtained by the weighted average of y.
-
-        array
-        The array of size n_features* is directly used as start values
-        for ``coef_``. If ``fit_intercept=True``, the first element
-        is assumed to be the start value for the ``intercept_``.
-        Note that n_features* = X.shape[1] + fit_intercept, i.e. it includes
-        the intercept in counting.
+        as initialization for ``coef_`` and ``intercept_``.
 
     selection : str, optional (default='cyclic')
         For the solver 'cd' (coordinate descent), the coordinates (features)
@@ -1503,7 +1477,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         'random'.
 
     diag_fisher : boolean, optional, (default=False)
-        Only relevant for solver 'cd' (see also ``start_params='guess'``).
+        Only relevant for solver 'cd'.
         If ``False``, the full Fisher matrix (expected Hessian) is computed in
         each outer iteration (Newton iteration). If ``True``, only a diagonal
         matrix (stored as 1d array) is computed, such that
@@ -1576,7 +1550,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     def __init__(self, alpha=1.0, l1_ratio=0, P1='identity', P2='identity',
                  fit_intercept=True, family='normal', link='auto',
                  fit_dispersion=None, solver='auto', max_iter=100,
-                 tol=1e-4, warm_start=False, start_params='guess',
+                 tol=1e-4, warm_start=False,
                  selection='cyclic', random_state=None, diag_fisher=False,
                  copy_X=True, check_input=True, verbose=0):
         self.alpha = alpha
@@ -1591,7 +1565,6 @@ def __init__(self, alpha=1.0, l1_ratio=0, P1='identity', P2='identity',
         self.max_iter = max_iter
         self.tol = tol
         self.warm_start = warm_start
-        self.start_params = start_params
         self.selection = selection
         self.random_state = random_state
         self.diag_fisher = diag_fisher
@@ -1742,7 +1715,7 @@ def fit(self, X, y, sample_weight=None):
         n_samples, n_features = X.shape
 
         # 1.3 arguments to take special care ##################################
-        # P1, P2, start_params
+        # P1, P2
         if isinstance(self.P1, str) and self.P1 == 'identity':
             P1 = np.ones(n_features)
         else:
@@ -1793,25 +1766,6 @@ def fit(self, X, y, sample_weight=None):
                                  "got (P2.shape=({0}, {1})), needed ({2}, {2})"
                                  .format(P2.shape[0], P2.shape[1], X.shape[1]))
 
-        start_params = self.start_params
-        if isinstance(start_params, str):
-            if start_params not in ['guess',  'zero']:
-                raise ValueError("The argument start_params must be 'guess', "
-                                 "'zero' or an array of correct length; "
-                                 "got(start_params={0})".format(start_params))
-        else:
-            start_params = check_array(start_params, accept_sparse=False,
-                                       force_all_finite=True, ensure_2d=False,
-                                       dtype=_dtype, copy=True)
-            if ((start_params.shape[0] != X.shape[1] + self.fit_intercept) or
-                    (start_params.ndim != 1)):
-                raise ValueError("Start values for parameters must have the"
-                                 "right length and dimension; required (length"
-                                 "={0}, ndim=1); got (length={1}, ndim={2})."
-                                 .format(X.shape[1] + self.fit_intercept,
-                                         start_params.shape[0],
-                                         start_params.ndim))
-
         l1 = self.alpha * self.l1_ratio
         l2 = self.alpha * (1 - self.l1_ratio)
         # P1 and P2 are now for sure copies
@@ -1899,72 +1853,12 @@ def fit(self, X, y, sample_weight=None):
                                        self.coef_))
             else:
                 coef = self.coef_
-        elif isinstance(start_params, str):
-            if start_params == 'guess':
-                # Set mu=starting_mu of the family and do one Newton step
-                # If solver=cd use cd, else irls
-                mu = family.starting_mu(y, weights=weights)
-                eta = link.link(mu)  # linear predictor
-                if solver in ['cd', 'lbfgs', 'newton-cg']:
-                    # see function _cd_solver
-                    sigma_inv = 1/family.variance(mu, phi=1, weights=weights)
-                    d1 = link.inverse_derivative(eta)
-                    temp = sigma_inv * d1 * (y - mu)
-                    if self.fit_intercept:
-                        score = np.concatenate(([temp.sum()], temp @ X))
-                    else:
-                        score = temp @ X  # same as X.T @ temp
-
-                    d2_sigma_inv = d1 * d1 * sigma_inv
-                    diag_fisher = self.diag_fisher
-                    if diag_fisher:
-                        fisher = d2_sigma_inv
-                    else:
-                        fisher = \
-                            _safe_sandwich_dot(X, d2_sigma_inv,
-                                               intercept=self.fit_intercept)
-                    # set up space for search direction d for inner loop
-                    if self.fit_intercept:
-                        coef = np.zeros(n_features+1)
-                    else:
-                        coef = np.zeros(n_features)
-                    d = np.zeros_like(coef)
-                    # initial stopping tolerance of inner loop
-                    # use L1-norm of minimum of norm of subgradient of F
-                    # use less restrictive tolerance for initial guess
-                    inner_tol = _min_norm_sugrad(coef=coef, grad=-score, P2=P2,
-                                                 P1=P1)
-                    inner_tol = 4 * linalg.norm(inner_tol, ord=1)
-                    # just one outer loop = Newton step
-                    n_cycles = 0
-                    d, coef_P2, n_cycles, inner_tol = \
-                        _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles,
-                                  inner_tol, max_inner_iter=1000,
-                                  selection=self.selection,
-                                  random_state=random_state,
-                                  diag_fisher=self.diag_fisher)
-                    coef += d  # for simplicity no line search here
-                else:
-                    # See _irls_solver
-                    # h'(eta)
-                    hp = link.inverse_derivative(eta)
-                    # working weights W, in principle a diagonal matrix
-                    # therefore here just as 1d array
-                    W = (hp**2 / family.variance(mu, phi=1, weights=weights))
-                    # working observations
-                    z = eta + (y-mu)/hp
-                    # solve A*coef = b
-                    # A = X' W X + l2 P2, b = X' W z
-                    coef = _irls_step(X, W, P2, z,
-                                      fit_intercept=self.fit_intercept)
-            else:  # start_params == 'zero'
-                if self.fit_intercept:
-                    coef = np.zeros(n_features+1)
-                    coef[0] = link.link(np.average(y, weights=weights))
-                else:
-                    coef = np.zeros(n_features)
-        else:  # assign given array as start values
-            coef = start_params
+        else:
+            if self.fit_intercept:
+                coef = np.zeros(n_features+1)
+                coef[0] = link.link(np.average(y, weights=weights))
+            else:
+                coef = np.zeros(n_features)
 
         #######################################################################
         # 4. fit                                                              #
@@ -2312,32 +2206,7 @@ class PoissonRegressor(GeneralizedLinearRegressor):
 
     warm_start : boolean, optional (default=False)
         If set to ``True``, reuse the solution of the previous call to ``fit``
-        as initialization for ``coef_`` and ``intercept_`` (supersedes option
-        ``start_params``). If set to ``True`` or if the attribute ``coef_``
-        does not exit (first call to ``fit``), option ``start_params`` sets the
-        start values for ``coef_`` and ``intercept_``.
-
-    start_params : {'guess', 'zero', array of shape (n_features*, )}, \
-            optional (default='guess')
-        Relevant only if ``warm_start=False`` or if fit is called
-        the first time (``self.coef_`` does not yet exist).
-
-        'guess'
-            Start values of mu are calculated by family.starting_mu(..). Then,
-            one Newton step obtains start values for ``coef_``. If
-            ``solver='irls'``, it uses one irls step. This gives usually good
-            starting values.
-
-        'zero'
-        All coefficients are set to zero. If ``fit_intercept=True``, the
-        start value for the intercept is obtained by the weighted average of y.
-
-        array
-        The array of size n_features* is directly used as start values
-        for ``coef_``. If ``fit_intercept=True``, the first element
-        is assumed to be the start value for the ``intercept_``.
-        Note that n_features* = X.shape[1] + fit_intercept, i.e. it includes
-        the intercept in counting.
+        as initialization for ``coef_`` and ``intercept_`` .
 
     random_state : {int, RandomState instance, None}, optional (default=None)
         If int, random_state is the seed used by the random
@@ -2401,12 +2270,12 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     """
     def __init__(self, alpha=1.0, fit_intercept=True, fit_dispersion=None,
                  solver='irls', max_iter=100,
-                 tol=1e-4, warm_start=False, start_params='guess',
+                 tol=1e-4, warm_start=False,
                  random_state=None, copy_X=True, check_input=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family="poisson", link='log',
                          fit_dispersion=fit_dispersion, solver=solver,
                          max_iter=max_iter, tol=tol, warm_start=warm_start,
-                         start_params=start_params, random_state=random_state,
+                         random_state=random_state,
                          copy_X=copy_X, verbose=verbose)

From 62f4448101c16a797d74119fe1df5b45b93136b0 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Tue, 9 Jul 2019 10:31:42 -0500
Subject: [PATCH 069/269] Remove L1 penalty and CD solver

---
 sklearn/linear_model/_glm.py           | 536 +------------------------
 sklearn/linear_model/tests/test_glm.py | 185 +--------
 2 files changed, 36 insertions(+), 685 deletions(-)

diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm.py
index 18eba80080670..15211c6038007 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm.py
@@ -93,49 +93,6 @@ def _safe_sandwich_dot(X, d, intercept=False):
     return res
 
 
-def _min_norm_sugrad(coef, grad, P2, P1):
-    """Compute the gradient of all subgradients with minimal L2-norm.
-
-    subgrad = grad + P2 * coef + P1 * subgrad(|coef|_1)
-
-    g_i = grad_i + (P2*coef)_i
-
-    if coef_i > 0:   g_i + P1_i
-    if coef_i < 0:   g_i - P1_i
-    if coef_i = 0:   sign(g_i) * max(|g_i|-P1_i, 0)
-
-    Parameters
-    ----------
-    coef : ndarray
-        coef[0] may be intercept.
-
-    grad : ndarray, shape=coef.shape
-
-    P2 : {1d or 2d array, None}
-        always without intercept, ``None`` means P2 = 0
-
-    P1 : ndarray
-        always without intercept
-    """
-    intercept = (coef.size == P1.size + 1)
-    idx = 1 if intercept else 0  # offset if coef[0] is intercept
-    # compute grad + coef @ P2 without intercept
-    grad_wP2 = grad[idx:].copy()
-    if P2 is None:
-        pass
-    elif P2.ndim == 1:
-        grad_wP2 += coef[idx:] * P2
-    else:
-        grad_wP2 += coef[idx:] @ P2
-    res = np.where(coef[idx:] == 0,
-                   np.sign(grad_wP2) * np.maximum(np.abs(grad_wP2) - P1, 0),
-                   grad_wP2 + np.sign(coef[idx:]) * P1)
-    if intercept:
-        return np.concatenate(([grad[0]], res))
-    else:
-        return res
-
-
 class Link(metaclass=ABCMeta):
     """Abstract base class for Link functions."""
 
@@ -915,7 +872,7 @@ def _irls_solver(coef, X, y, weights, P2, fit_intercept, family, link,
     #   D   = link.inverse_derivative(eta) = diag_matrix(h'(X w))
     #   D2  = link.inverse_derivative(eta)^2 = D^2
     #   W   = D2/V(mu)
-    #   l2  = alpha * (1 - l1_ratio)
+    #   l2  = alpha
     #   Obj' = d(Obj)/d(w) = 1/2 Dev' + l2 P2 w
     #        = -X' D (y-mu)/V(mu) + l2 P2 w
     #   Obj''= d2(Obj)/d(w)d(w') = Hessian = -X'(...) X + l2 P2
@@ -981,345 +938,6 @@ def _irls_solver(coef, X, y, weights, P2, fit_intercept, family, link,
     return coef, n_iter
 
 
-def _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles, inner_tol,
-              max_inner_iter=1000, selection='cyclic',
-              random_state=None, diag_fisher=False):
-    """Compute inner loop of coordinate descent, i.e. cycles through features.
-
-    Minimization of 1-d subproblems::
-
-        min_z q(d+z*e_j) - q(d)
-        = min_z A_j z + 1/2 B_jj z^2 + ||P1_j (w_j+d_j+z)||_1
-
-    A = f'(w) + d*H(w) + (w+d)*P2
-    B = H+P2
-    Note: f'=-score and H=fisher are updated at the end of outer iteration.
-    """
-    # TODO: use sparsity (coefficient already 0 due to L1 penalty)
-    #       => active set of features for featurelist, see paper
-    #          of Improved GLMNET or Gap Safe Screening Rules
-    #          https://arxiv.org/abs/1611.05780
-    n_samples, n_features = X.shape
-    intercept = (coef.size == X.shape[1] + 1)
-    idx = 1 if intercept else 0  # offset if coef[0] is intercept
-    B = fisher
-    if P2.ndim == 1:
-        coef_P2 = coef[idx:] * P2
-        if not diag_fisher:
-            idiag = np.arange(start=idx, stop=B.shape[0])
-            # B[np.diag_indices_from(B)] += P2
-            B[(idiag, idiag)] += P2
-    else:
-        coef_P2 = coef[idx:] @ P2
-        if not diag_fisher:
-            if sparse.issparse(P2):
-                B[idx:, idx:] += P2.toarray()
-            else:
-                B[idx:, idx:] += P2
-    A = -score
-    A[idx:] += coef_P2
-    # A += d @ (H+P2) but so far d=0
-    # inner loop
-    for inner_iter in range(1, max_inner_iter+1):
-        inner_iter += 1
-        n_cycles += 1
-        # cycle through features, update intercept separately at the end
-        if selection == 'random':
-            featurelist = random_state.permutation(n_features)
-        else:
-            featurelist = np.arange(n_features)
-        for j in featurelist:
-            # minimize_z: a z + 1/2 b z^2 + c |d+z|
-            # a = A_j
-            # b = B_jj > 0
-            # c = |P1_j| = P1_j > 0, see 1.3
-            # d = w_j + d_j
-            # cf. https://arxiv.org/abs/0708.1485 Eqs. (3) - (4)
-            # with beta = z+d, beta_hat = d-a/b and gamma = c/b
-            # z = 1/b * S(bd-a,c) - d
-            # S(a,b) = sign(a) max(|a|-b, 0) soft thresholding
-            jdx = j+idx  # index for arrays containing entries for intercept
-            a = A[jdx]
-            if diag_fisher:
-                # Note: fisher is ndarray of shape (n_samples,) => no idx
-                # Calculate Bj = B[j, :] = B[:, j] as it is needed later anyway
-                Bj = np.zeros_like(A)
-                if intercept:
-                    Bj[0] = fisher.sum()
-                if sparse.issparse(X):
-                    Bj[idx:] = _safe_toarray(X[:, j].transpose() @
-                                             X.multiply(fisher[:, np.newaxis])
-                                             ).ravel()
-                else:
-                    Bj[idx:] = (fisher * X[:, j]) @ X
-
-                if P2.ndim == 1:
-                    Bj[idx:] += P2[j]
-                else:
-                    if sparse.issparse(P2):
-                        # slice columns as P2 is csc
-                        Bj[idx:] += P2[:, j].toarray().ravel()
-                    else:
-                        Bj[idx:] += P2[:, j]
-                b = Bj[jdx]
-            else:
-                b = B[jdx, jdx]
-
-            # those ten lines are what it is all about
-            if b <= 0:
-                z = 0
-            elif P1[j] == 0:
-                z = -a/b
-            elif a + P1[j] < b * (coef[jdx] + d[jdx]):
-                z = -(a + P1[j])/b
-            elif a - P1[j] > b * (coef[jdx] + d[jdx]):
-                z = -(a - P1[j])/b
-            else:
-                z = -(coef[jdx] + d[jdx])
-
-            # update direction d
-            d[jdx] += z
-            # update A because d_j is now d_j+z
-            # A = f'(w) + d*H(w) + (w+d)*P2
-            # => A += (H+P2)*e_j z = B_j * z
-            # Note: B is symmetric B = B.transpose
-            if diag_fisher:
-                # Bj = B[:, j] calculated above, still valid
-                A += Bj * z
-            else:
-                # B is symmetric, C- or F-contiguous, but never sparse
-                if B.flags['F_CONTIGUOUS']:
-                    # slice columns like for sparse csc
-                    A += B[:, jdx] * z
-                else:  # B.flags['C_CONTIGUOUS'] might be true
-                    # slice rows
-                    A += B[jdx, :] * z
-            # end of cycle over features
-        # update intercept
-        if intercept:
-            if diag_fisher:
-                Bj = np.zeros_like(A)
-                Bj[0] = fisher.sum()
-                Bj[1:] = fisher @ X
-                b = Bj[0]
-            else:
-                b = B[0, 0]
-            z = 0 if b <= 0 else -A[0]/b
-            d[0] += z
-            if diag_fisher:
-                A += Bj * z
-            else:
-                if B.flags['F_CONTIGUOUS']:
-                    A += B[:, 0] * z
-                else:
-                    A += B[0, :] * z
-        # end of complete cycle
-        # stopping criterion for inner loop
-        # sum_i(|minimum of norm of subgrad of q(d)_i|)
-        # subgrad q(d) = A + subgrad ||P1*(w+d)||_1
-        mn_subgrad = _min_norm_sugrad(coef=coef + d, grad=A, P2=None, P1=P1)
-        mn_subgrad = linalg.norm(mn_subgrad, ord=1)
-        if mn_subgrad <= inner_tol:
-            if inner_iter == 1:
-                inner_tol = inner_tol/4.
-            break
-        # end of inner loop
-    return d, coef_P2, n_cycles, inner_tol
-
-
-def _cd_solver(coef, X, y, weights, P1, P2, fit_intercept, family, link,
-               max_iter=100, max_inner_iter=1000, tol=1e-4,
-               selection='cyclic ', random_state=None,
-               diag_fisher=False, copy_X=True):
-    """Solve GLM with L1 and L2 penalty by coordinate descent algorithm.
-
-    The objective being minimized in the coefficients w=coef is::
-
-        F = f + g, f(w) = 1/2 deviance, g = 1/2 w*P2*w + ||P1*w||_1
-
-    An Improved GLMNET for L1-regularized Logistic Regression:
-
-    1. Find optimal descent direction d by minimizing
-       min_d F(w+d) = min_d F(w+d) - F(w)
-    2. Quadratic approximation of F(w+d)-F(w) = q(d):
-       using f(w+d) = f(w) + f'(w)*d + 1/2 d*H(w)*d + O(d^3) gives:
-       q(d) = (f'(w) + w*P2)*d + 1/2 d*(H(w)+P2)*d
-       + ||P1*(w+d)||_1 - ||P1*w||_1
-       Then minimize q(d): min_d q(d)
-    3. Coordinate descent by updating coordinate j (d -> d+z*e_j):
-       min_z q(d+z*e_j)
-       = min_z q(d+z*e_j) - q(d)
-       = min_z A_j z + 1/2 B_jj z^2
-               + ||P1_j (w_j+d_j+z)||_1 - ||P1_j (w_j+d_j)||_1
-       A = f'(w) + d*H(w) + (w+d)*P2
-       B = H + P2
-
-    Repeat steps 1-3 until convergence.
-    Note: Use Fisher matrix instead of Hessian for H.
-    Note: f' = -score, H = Fisher matrix
-
-    Parameters
-    ----------
-    coef : ndarray, shape (c,)
-        If fit_intercept=False, shape c=X.shape[1].
-        If fit_intercept=True, then c=X.shape[1] + 1.
-
-    X : {ndarray, csc sparse matrix}, shape (n_samples, n_features)
-        Training data (with intercept included if present). If not sparse,
-        pass directly as Fortran-contiguous data to avoid
-        unnecessary memory duplication.
-
-    y : ndarray, shape (n_samples,)
-        Target values.
-
-    weights: ndarray, shape (n_samples,)
-        Sample weights with which the deviance is weighted. The weights must
-        bee normalized and sum to 1.
-
-    P1 : {ndarray}, shape (n_features,)
-        The L1-penalty vector (=diagonal matrix)
-
-    P2 : {ndarray, csc sparse matrix}, shape (n_features, n_features)
-        The L2-penalty matrix or vector (=diagonal matrix). If a matrix is
-        passed, it must be symmetric. If X is sparse, P2 must also be sparse.
-
-    fit_intercept : boolean, optional (default=True)
-        Specifies if a constant (a.k.a. bias or intercept) should be
-        added to the linear predictor (X*coef+intercept).
-
-    family : ExponentialDispersionModel
-
-    link : Link
-
-    max_iter : int, optional (default=100)
-        Maximum numer of outer (Newton) iterations.
-
-    max_inner_iter : int, optional (default=1000)
-        Maximum number of iterations in each inner loop, i.e. max number of
-        cycles over all features per inner loop.
-
-    tol : float, optional (default=1e-4)
-        Convergence criterion is
-        sum_i(|minimum of norm of subgrad of objective_i|)<=tol.
-
-    selection : str, optional (default='cyclic')
-        If 'random', randomly chose features in inner loop.
-
-    random_state : {int, RandomState instance, None}, optional (default=None)
-
-    diag_fisher : boolean, optional (default=False)
-        ``False`` calculates full fisher matrix, ``True`` only diagonal matrix
-        s.t. fisher = X.T @ diag @ X. This saves storage but needs more
-        matrix-vector multiplications.
-
-    copy_X : boolean, optional (default=True)
-        If ``True``, X will be copied; else, it may be overwritten.
-
-    Returns
-    -------
-    coef : ndarray, shape (c,)
-        If fit_intercept=False, shape c=X.shape[1].
-        If fit_intercept=True, then c=X.shape[1] + 1.
-
-    n_iter : number of outer iterations = newton iterations
-
-    n_cycles : number of cycles over features
-
-    References
-    ----------
-    Guo-Xun Yuan, Chia-Hua Ho, Chih-Jen Lin
-    An Improved GLMNET for L1-regularized Logistic Regression,
-    Journal of Machine Learning Research 13 (2012) 1999-2030
-    https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
-    """
-    X = check_array(X, 'csc', dtype=[np.float64, np.float32],
-                    order='F', copy=copy_X)
-    if P2.ndim == 2:
-        P2 = check_array(P2, 'csc', dtype=[np.float64, np.float32],
-                         order='F', copy=copy_X)
-    if sparse.issparse(X):
-        if not sparse.isspmatrix_csc(P2):
-            raise ValueError("If X is sparse, P2 must also be sparse csc"
-                             "format. Got P2 not sparse.")
-    random_state = check_random_state(random_state)
-    # Note: we already set P2 = l2*P2, P1 = l1*P1
-    # Note: we already symmetrized P2 = 1/2 (P2 + P2')
-    n_iter = 0  # number of outer iterations
-    n_cycles = 0  # number of (complete) cycles over features
-    converged = False
-    n_samples, n_features = X.shape
-    idx = 1 if fit_intercept else 0  # offset if coef[0] is intercept
-    # line search parameters
-    (beta, sigma) = (0.5, 0.01)
-    # some precalculations
-    # Note: For diag_fisher=False, fisher = X.T @ fisher @ X and fisher is a
-    #       1d array representing a diagonal matrix.
-    eta, mu, score, fisher = family._eta_mu_score_fisher(
-        coef=coef, phi=1, X=X, y=y, weights=weights, link=link,
-        diag_fisher=diag_fisher)
-    # set up space for search direction d for inner loop
-    d = np.zeros_like(coef)
-    # initial stopping tolerance of inner loop
-    # use L1-norm of minimum of norm of subgradient of F
-    inner_tol = _min_norm_sugrad(coef=coef, grad=-score, P2=P2, P1=P1)
-    inner_tol = linalg.norm(inner_tol, ord=1)
-    # outer loop
-    while n_iter < max_iter:
-        n_iter += 1
-        # initialize search direction d (to be optimized) with zero
-        d.fill(0)
-        # inner loop = _cd_cycle
-        d, coef_P2, n_cycles, inner_tol = \
-            _cd_cycle(d, X, coef, score, fisher, P1, P2, n_cycles, inner_tol,
-                      max_inner_iter=max_inner_iter, selection=selection,
-                      random_state=random_state, diag_fisher=diag_fisher)
-        # line search by sequence beta^k, k=0, 1, ..
-        # F(w + lambda d) - F(w) <= lambda * bound
-        # bound = sigma * (f'(w)*d + w*P2*d
-        #                  +||P1 (w+d)||_1 - ||P1 w||_1)
-        P1w_1 = linalg.norm(P1 * coef[idx:], ord=1)
-        P1wd_1 = linalg.norm(P1 * (coef + d)[idx:], ord=1)
-        # Note: coef_P2 already calculated and still valid
-        bound = sigma * (-(score @ d) + coef_P2 @ d[idx:] + P1wd_1 - P1w_1)
-        Fw = (0.5 * family.deviance(y, mu, weights) +
-              0.5 * (coef_P2 @ coef[idx:]) + P1w_1)
-        la = 1./beta
-        for k in range(20):
-            la *= beta  # starts with la=1
-            coef_wd = coef + la * d
-            mu_wd = link.inverse(_safe_lin_pred(X, coef_wd))
-            Fwd = (0.5 * family.deviance(y, mu_wd, weights) +
-                   linalg.norm(P1 * coef_wd[idx:], ord=1))
-            if P2.ndim == 1:
-                Fwd += 0.5 * ((coef_wd[idx:] * P2) @ coef_wd[idx:])
-            else:
-                Fwd += 0.5 * (coef_wd[idx:] @ (P2 @ coef_wd[idx:]))
-            if Fwd - Fw <= sigma * la * bound:
-                break
-        # update coefficients
-        coef += la * d
-        # calculate eta, mu, score, Fisher matrix for next iteration
-        eta, mu, score, fisher = family._eta_mu_score_fisher(
-            coef=coef, phi=1, X=X, y=y, weights=weights, link=link,
-            diag_fisher=diag_fisher)
-        # stopping criterion for outer loop
-        # sum_i(|minimum-norm of subgrad of F(w)_i|)
-        # fp_wP2 = f'(w) + w*P2
-        # Note: eta, mu and score are already updated
-        mn_subgrad = _min_norm_sugrad(coef=coef, grad=-score, P2=P2, P1=P1)
-        mn_subgrad = linalg.norm(mn_subgrad, ord=1)
-        if mn_subgrad <= tol:
-            converged = True
-            break
-        # end of outer loop
-    if not converged:
-        warnings.warn("Coordinate descent failed to converge. Increase"
-                      " the maximum number of iterations max_iter"
-                      " (currently {0})".format(max_iter), ConvergenceWarning)
-
-    return coef, n_iter, n_cycles
-
-
 class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     """Regression via a Generalized Linear Model (GLM) with penalties.
 
@@ -1329,28 +947,10 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     priors as regularizer::
 
             1/(2*sum(s)) * deviance(y, h(X*w); s)
-            + alpha * l1_ratio * ||P1*w||_1
-            + 1/2 * alpha * (1 - l1_ratio) * w*P2*w
-
-    with inverse link function h and s=sample_weight. Note that for
-    ``sample_weight=None``, one has s_i=1 and sum(s)=n_samples).
-    For ``P1=P2='identity'``, the penalty is the elastic net::
-
-            alpha * l1_ratio * ||w||_1
-            + 1/2 * alpha * (1 - l1_ratio) * ||w||_2^2
-
-    If you are interested in controlling the L1 and L2 penalties
-    separately, keep in mind that this is equivalent to::
+            + 1/2 * alpha * w*P2*w
 
-            a * L1 + b * L2
-
-    where::
-
-            alpha = a + b and l1_ratio = a / (a + b)
-
-    The parameter ``l1_ratio`` corresponds to alpha in the R package glmnet,
-    while ``alpha`` corresponds to the lambda parameter in glmnet.
-    Specifically, l1_ratio = 1 is the lasso penalty.
+    with inverse link function h and s=sample_weight. 
+    The parameter ``alpha`` corresponds to the lambda parameter in glmnet.
 
     Read more in the :ref:`User Guide <Generalized_linear_regression>`.
 
@@ -1364,19 +964,6 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         case, the design matrix X must have full column rank
         (no collinearities).
 
-    l1_ratio : float, optional (default=0)
-        The elastic net mixing parameter, with ``0 <= l1_ratio <= 1``. For
-        ``l1_ratio = 0`` the penalty is an L2 penalty. ``For l1_ratio = 1`` it
-        is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a
-        combination of L1 and L2.
-
-    P1 : {'identity', array-like}, shape (n_features,), optional \
-            (default='identity')
-        With this array, you can exclude coefficients from the L1 penalty.
-        Set the corresponding value to 1 (include) or 0 (exclude). The
-        default value ``'identity'`` is the same as a 1d array of ones.
-        Note that n_features = X.shape[1].
-
     P2 : {'identity', array-like, sparse matrix}, shape \
             (n_features,) or (n_features, n_features), optional \
             (default='identity')
@@ -1416,18 +1003,12 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         the chi squared statistic or the deviance statistic. If None, the
         dispersion is not estimated.
 
-    solver : {'auto', 'cd', 'irls', 'lbfgs', 'newton-cg'}, \
+    solver : {'auto', 'irls', 'lbfgs', 'newton-cg'}, \
             optional (default='auto')
         Algorithm to use in the optimization problem:
 
         'auto'
-            Sets 'irls' if l1_ratio equals 0, else 'cd'.
-
-        'cd'
-            Coordinate descent algorithm. It can deal with L1 as well as L2
-            penalties. Note that in order to avoid unnecessary memory
-            duplication of X in the ``fit`` method, X should be directly passed
-            as a Fortran-contiguous numpy array or sparse csc matrix.
+            Sets 'irls'
 
         'irls'
             Iterated reweighted least squares.
@@ -1450,31 +1031,17 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         Stopping criterion. For the irls, newton-cg and lbfgs solvers,
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
         where ``g_i`` is the i-th component of the gradient (derivative) of
-        the objective function. For the cd solver, convergence is reached
-        when ``sum_i(|minimum-norm of g_i|)``, where ``g_i`` is the
-        subgradient of the objective and minimum-norm of ``g_i`` is the element
-        of the subgradient ``g_i`` with the smallest L2-norm.
+        the objective function. 
 
     warm_start : boolean, optional (default=False)
         If set to ``True``, reuse the solution of the previous call to ``fit``
         as initialization for ``coef_`` and ``intercept_``.
 
-    selection : str, optional (default='cyclic')
-        For the solver 'cd' (coordinate descent), the coordinates (features)
-        can be updated in either cyclic or random order.
-        If set to 'random', a random coefficient is updated every iteration
-        rather than looping over features sequentially in the same order. This
-        (setting to 'random') often leads to significantly faster convergence
-        especially when tol is higher than 1e-4.
-
     random_state : {int, RandomState instance, None}, optional (default=None)
-        The seed of the pseudo random number generator that selects a random
-        feature to be updated for solver 'cd' (coordinate descent).
         If int, random_state is the seed used by the random
         number generator; if RandomState instance, random_state is the random
         number generator; if None, the random number generator is the
-        RandomState instance used by `np.random`. Used when ``selection`` ==
-        'random'.
+        RandomState instance used by `np.random`. 
 
     diag_fisher : boolean, optional, (default=False)
         Only relevant for solver 'cd'.
@@ -1547,15 +1114,13 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
           Journal of Machine Learning Research 13 (2012) 1999-2030
           https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
     """
-    def __init__(self, alpha=1.0, l1_ratio=0, P1='identity', P2='identity',
+    def __init__(self, alpha=1.0, P2='identity',
                  fit_intercept=True, family='normal', link='auto',
                  fit_dispersion=None, solver='auto', max_iter=100,
                  tol=1e-4, warm_start=False,
-                 selection='cyclic', random_state=None, diag_fisher=False,
+                 random_state=None, diag_fisher=False,
                  copy_X=True, check_input=True, verbose=0):
         self.alpha = alpha
-        self.l1_ratio = l1_ratio
-        self.P1 = P1
         self.P2 = P2
         self.fit_intercept = fit_intercept
         self.family = family
@@ -1565,7 +1130,6 @@ def __init__(self, alpha=1.0, l1_ratio=0, P1='identity', P2='identity',
         self.max_iter = max_iter
         self.tol = tol
         self.warm_start = warm_start
-        self.selection = selection
         self.random_state = random_state
         self.diag_fisher = diag_fisher
         self.copy_X = copy_X
@@ -1645,28 +1209,16 @@ def fit(self, X, y, sample_weight=None):
         if not isinstance(self.alpha, numbers.Number) or self.alpha < 0:
             raise ValueError("Penalty term must be a non-negative number;"
                              " got (alpha={0})".format(self.alpha))
-        if (not isinstance(self.l1_ratio, numbers.Number) or
-                self.l1_ratio < 0 or self.l1_ratio > 1):
-            raise ValueError("l1_ratio must be a number in interval [0, 1];"
-                             " got (l1_ratio={0})".format(self.l1_ratio))
         if not isinstance(self.fit_intercept, bool):
             raise ValueError("The argument fit_intercept must be bool;"
                              " got {0}".format(self.fit_intercept))
-        if self.solver not in ['auto', 'irls', 'lbfgs', 'newton-cg', 'cd']:
+        if self.solver not in ['auto', 'irls', 'lbfgs', 'newton-cg']:
             raise ValueError("GeneralizedLinearRegressor supports only solvers"
-                             " 'auto', 'irls', 'lbfgs', 'newton-cg' and 'cd';"
+                             " 'auto', 'irls', 'lbfgs', 'newton-cg';"
                              " got {0}".format(self.solver))
         solver = self.solver
         if self.solver == 'auto':
-            if self.l1_ratio == 0:
-                solver = 'irls'
-            else:
-                solver = 'cd'
-        if (self.alpha > 0 and self.l1_ratio > 0 and solver not in ['cd']):
-            raise ValueError("The chosen solver (solver={0}) can't deal "
-                             "with L1 penalties, which are included with "
-                             "(alpha={1}) and (l1_ratio={2})."
-                             .format(solver, self.alpha, self.l1_ratio))
+            solver = 'irls'
         if (not isinstance(self.max_iter, int)
                 or self.max_iter <= 0):
             raise ValueError("Maximum number of iteration must be a positive "
@@ -1678,10 +1230,6 @@ def fit(self, X, y, sample_weight=None):
         if not isinstance(self.warm_start, bool):
             raise ValueError("The argument warm_start must be bool;"
                              " got {0}".format(self.warm_start))
-        if self.selection not in ['cyclic', 'random']:
-            raise ValueError("The argument selection must be 'cyclic' or "
-                             "'random'; got (selection={0})"
-                             .format(self.selection))
         random_state = check_random_state(self.random_state)
         if not isinstance(self.diag_fisher, bool):
             raise ValueError("The argument diag_fisher must be bool;"
@@ -1698,16 +1246,10 @@ def fit(self, X, y, sample_weight=None):
 
         # 1.2 validate arguments of fit #######################################
         _dtype = [np.float64, np.float32]
-        if solver == 'cd':
-            _stype = ['csc']
-        else:
-            _stype = ['csc', 'csr']
+        _stype = ['csc', 'csr']
         X, y = check_X_y(X, y, accept_sparse=_stype,
                          dtype=_dtype, y_numeric=True, multi_output=False,
                          copy=self.copy_X)
-        # Without converting y to float, deviance might raise
-        # ValueError: Integers to negative integer powers are not allowed.
-        # Also, y must not be sparse.
         y = np.asarray(y, dtype=np.float64)
 
         weights = _check_weights(sample_weight, y.shape[0])
@@ -1715,23 +1257,8 @@ def fit(self, X, y, sample_weight=None):
         n_samples, n_features = X.shape
 
         # 1.3 arguments to take special care ##################################
-        # P1, P2
-        if isinstance(self.P1, str) and self.P1 == 'identity':
-            P1 = np.ones(n_features)
-        else:
-            P1 = np.atleast_1d(self.P1)
-            try:
-                P1 = P1.astype(np.float64, casting='safe', copy=False)
-            except TypeError:
-                raise TypeError("The given P1 cannot be converted to a numeric"
-                                "array; got (P1.dtype={0})."
-                                .format(P1.dtype))
-            if (P1.ndim != 1) or (P1.shape[0] != n_features):
-                raise ValueError("P1 must be either 'identity' or a 1d array "
-                                 "with the length of X.shape[1]; "
-                                 "got (P1.shape[0]={0}), "
-                                 "needed (X.shape[1]={1})."
-                                 .format(P1.shape[0], n_features))
+        # P2
+
         # If X is sparse, make P2 sparse, too.
         if isinstance(self.P2, str) and self.P2 == 'identity':
             if sparse.issparse(X):
@@ -1766,10 +1293,8 @@ def fit(self, X, y, sample_weight=None):
                                  "got (P2.shape=({0}, {1})), needed ({2}, {2})"
                                  .format(P2.shape[0], P2.shape[1], X.shape[1]))
 
-        l1 = self.alpha * self.l1_ratio
-        l2 = self.alpha * (1 - self.l1_ratio)
-        # P1 and P2 are now for sure copies
-        P1 = l1 * P1
+        l2 = self.alpha
+        # P2 is now for sure a copy
         P2 = l2 * P2
         # one only ever needs the symmetrized L2 penalty matrix 1/2 (P2 + P2')
         # reason: w' P2 w = (w' P2 w)', i.e. it is symmetric
@@ -1792,11 +1317,6 @@ def fit(self, X, y, sample_weight=None):
                 raise ValueError("Some value(s) of y are out of the valid "
                                  "range for family {0}"
                                  .format(family.__class__.__name__))
-            # check if P1 has only non-negative values, negative values might
-            # indicate group lasso in the future.
-            if not isinstance(self.P1, str):  # if self.P1 != 'identity':
-                if not np.all(P1 >= 0):
-                    raise ValueError("P1 must not have negative values.")
             # check if P2 is positive semidefinite
             # np.linalg.cholesky(P2) 'only' asserts positive definite
             if not isinstance(self.P2, str):  # self.P2 != 'identity'
@@ -1845,8 +1365,6 @@ def fit(self, X, y, sample_weight=None):
         # Note: Since phi=self.dispersion_ does not enter the estimation
         #       of mu_i=E[y_i], set it to 1.
 
-        # set start values for coef
-        coef = None
         if self.warm_start and hasattr(self, 'coef_'):
             if self.fit_intercept:
                 coef = np.concatenate((np.array([self.intercept_]),
@@ -1975,18 +1493,6 @@ def Hs(coef):
                                            args=args, maxiter=self.max_iter,
                                            tol=self.tol)
 
-        # 4.4 coordinate descent ##############################################
-        # Note: we already set P1 = l1*P1, see above
-        # Note: we already set P2 = l2*P2, see above
-        # Note: we already symmetrized P2 = 1/2 (P2 + P2')
-        elif solver == 'cd':
-            coef, self.n_iter_, self._n_cycles = \
-                _cd_solver(coef=coef, X=X, y=y, weights=weights, P1=P1,
-                           P2=P2, fit_intercept=self.fit_intercept,
-                           family=family, link=link,
-                           max_iter=self.max_iter, tol=self.tol,
-                           selection=self.selection, random_state=random_state,
-                           diag_fisher=self.diag_fisher, copy_X=self.copy_X)
 
         #######################################################################
         # 5. postprocessing                                                   #
@@ -2097,9 +1603,6 @@ def estimate_phi(self, X, y, sample_weight=None):
             dev = self._family_instance.deviance(y, mu, weights)
             return dev/(n_samples - n_features)
 
-    # Note: check_estimator(GeneralizedLinearRegressor) might raise
-    # "AssertionError: -0.28014056555724598 not greater than 0.5"
-    # unless GeneralizedLinearRegressor has a score which passes the test.
     def score(self, X, y, sample_weight=None):
         """Compute D^2, the percentage of deviance explained.
 
@@ -2212,8 +1715,7 @@ class PoissonRegressor(GeneralizedLinearRegressor):
         If int, random_state is the seed used by the random
         number generator; if RandomState instance, random_state is the random
         number generator; if None, the random number generator is the
-        RandomState instance used by `np.random`. Used when ``selection`` ==
-        'random'.
+        RandomState instance used by `np.random`.
 
     copy_X : boolean, optional, (default=True)
         If ``True``, X will be copied; else, it may be overwritten.
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index f51f630ebae7e..8fc1241e1da7a 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -25,7 +25,7 @@
 
 from sklearn.utils.testing import assert_array_equal
 
-GLM_SOLVERS = ['irls', 'lbfgs', 'newton-cg', 'cd']
+GLM_SOLVERS = ['irls', 'lbfgs', 'newton-cg']
 
 
 @pytest.fixture(scope="module")
@@ -236,28 +236,6 @@ def test_glm_alpha_argument(alpha):
         glm.fit(X, y)
 
 
-@pytest.mark.parametrize('l1_ratio', ['not a number', -4.2, 1.1, [1]])
-def test_glm_l1_ratio_argument(l1_ratio):
-    """Test GLM for invalid l1_ratio argument."""
-    y = np.array([1, 2])
-    X = np.array([[1], [2]])
-    glm = GeneralizedLinearRegressor(family='normal', l1_ratio=l1_ratio)
-    with pytest.raises(ValueError,
-                       match="l1_ratio must be a number in interval.*0, 1"):
-        glm.fit(X, y)
-
-
-@pytest.mark.parametrize('P1', [['a string', 'a string'], [1, [2]], [1, 2, 3],
-                                [-1]])
-def test_glm_P1_argument(P1):
-    """Test GLM for invalid P1 argument."""
-    y = np.array([1, 2])
-    X = np.array([[1], [2]])
-    glm = GeneralizedLinearRegressor(P1=P1, l1_ratio=0.5, check_input=True)
-    with pytest.raises((ValueError, TypeError)):
-        glm.fit(X, y)
-
-
 @pytest.mark.parametrize('P2', ['a string', [1, 2, 3], [[2, 3]],
                                 sparse.csr_matrix([1, 2, 3]), [-1]])
 def test_glm_P2_argument(P2):
@@ -301,14 +279,13 @@ def test_glm_fit_intercept_argument(fit_intercept):
         glm.fit(X, y)
 
 
-@pytest.mark.parametrize('solver, l1_ratio',
-                         [('not a solver', 0), (1, 0), ([1], 0),
-                          ('irls', 0.5), ('lbfgs', 0.5), ('newton-cg', 0.5)])
-def test_glm_solver_argument(solver, l1_ratio):
+@pytest.mark.parametrize('solver',
+                         ['not a solver', 1, [1]])
+def test_glm_solver_argument(solver):
     """Test GLM for invalid solver argument."""
     y = np.array([1, 2])
     X = np.array([[1], [2]])
-    glm = GeneralizedLinearRegressor(solver=solver, l1_ratio=l1_ratio)
+    glm = GeneralizedLinearRegressor(solver=solver)
     with pytest.raises(ValueError):
         glm.fit(X, y)
 
@@ -343,28 +320,6 @@ def test_glm_warm_start_argument(warm_start):
         glm.fit(X, y)
 
 
-@pytest.mark.parametrize('start_params',
-                         ['not a start_params', ['zero'], [0, 0, 0],
-                          [[0, 0]], ['a', 'b']])
-def test_glm_start_params_argument(start_params):
-    """Test GLM for invalid start_params argument."""
-    y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    glm = GeneralizedLinearRegressor(start_params=start_params)
-    with pytest.raises(ValueError):
-        glm.fit(X, y)
-
-
-@pytest.mark.parametrize('selection', ['not a selection', 1, 0, ['cyclic']])
-def test_glm_selection_argument(selection):
-    """Test GLM for invalid selection argument"""
-    y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    glm = GeneralizedLinearRegressor(selection=selection)
-    with pytest.raises(ValueError, match="argument selection must be"):
-        glm.fit(X, y)
-
-
 @pytest.mark.parametrize('random_state', ['a string', 0.5, [0]])
 def test_glm_random_state_argument(random_state):
     """Test GLM for invalid random_state argument."""
@@ -413,7 +368,7 @@ def test_glm_identity_regression(solver):
     y = np.dot(X, coef)
     glm = GeneralizedLinearRegressor(alpha=0, family='normal', link='identity',
                                      fit_intercept=False, solver=solver,
-                                     start_params='zero', tol=1e-7)
+                                     tol=1e-7)
     res = glm.fit(X, y)
     assert_allclose(res.coef_, coef, rtol=1e-6)
 
@@ -427,7 +382,7 @@ def test_glm_identity_regression(solver):
 @pytest.mark.parametrize('solver, tol', [('irls', 1e-6),
                                          ('lbfgs', 1e-6),
                                          ('newton-cg', 1e-7),
-                                         ('cd', 1e-7)])
+])
 def test_glm_log_regression(family, solver, tol):
     """Test GLM regression with log link on a simple dataset."""
     coef = [0.2, -0.1]
@@ -435,7 +390,7 @@ def test_glm_log_regression(family, solver, tol):
     y = np.exp(np.dot(X, coef))
     glm = GeneralizedLinearRegressor(
                 alpha=0, family=family, link='log', fit_intercept=False,
-                solver=solver, start_params='guess', tol=tol)
+                solver=solver, tol=tol)
     res = glm.fit(X, y)
     assert_allclose(res.coef_, coef, rtol=5e-6)
 
@@ -472,14 +427,14 @@ def test_normal_ridge_comparison(n_samples, n_features, fit_intercept, solver):
                   random_state=42, **ridge_params)
     ridge.fit(X, y)
 
-    glm = GeneralizedLinearRegressor(alpha=1.0, l1_ratio=0, family='normal',
+    glm = GeneralizedLinearRegressor(alpha=1.0, family='normal',
                                      link='identity', fit_intercept=True,
                                      max_iter=300, solver=solver, tol=1e-6,
                                      check_input=False, random_state=42)
     glm.fit(X, y)
     assert glm.coef_.shape == (X.shape[1], )
     assert_allclose(glm.coef_, ridge.coef_, rtol=5e-6)
-    assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-6)
+    assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-5)
     assert_allclose(glm.predict(T), ridge.predict(T), rtol=1e-5)
 
 
@@ -487,7 +442,7 @@ def test_normal_ridge_comparison(n_samples, n_features, fit_intercept, solver):
                          [('irls', 1e-7),
                           ('lbfgs', 1e-7),
                           ('newton-cg', 1e-7),
-                          ('cd', 1e-7)])
+])
 def test_poisson_ridge(solver, tol):
     """Test ridge regression with poisson family and LogLink.
 
@@ -506,130 +461,24 @@ def test_poisson_ridge(solver, tol):
     X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
     y = np.array([0, 1, 1, 2])
     rng = np.random.RandomState(42)
-    glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0,
+    glm = GeneralizedLinearRegressor(alpha=1,
                                      fit_intercept=True, family='poisson',
                                      link='log', tol=1e-7,
                                      solver=solver, max_iter=300,
                                      random_state=rng)
     glm.fit(X, y)
     assert_allclose(glm.intercept_, -0.12889386979, rtol=1e-5)
-    assert_allclose(glm.coef_, [0.29019207995, 0.03741173122], rtol=1e-6)
-
-
-@pytest.mark.parametrize('diag_fisher', [False, True])
-def test_normal_enet(diag_fisher):
-    """Test elastic net regression with normal/gaussian family."""
-    alpha, l1_ratio = 0.3, 0.7
-    n_samples, n_features = 20, 2
-    rng = np.random.RandomState(42)
-    X = rng.randn(n_samples, n_features).copy(order='F')
-    beta = rng.randn(n_features)
-    y = 2 + np.dot(X, beta) + rng.randn(n_samples)
-
-    # 1. test normal enet on dense data
-    glm = GeneralizedLinearRegressor(alpha=alpha, l1_ratio=l1_ratio,
-                                     family='normal', link='identity',
-                                     fit_intercept=True, tol=1e-8,
-                                     max_iter=100, selection='cyclic',
-                                     solver='cd', start_params='zero',
-                                     check_input=False,
-                                     diag_fisher=diag_fisher)
-    glm.fit(X, y)
-
-    enet = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, fit_intercept=True,
-                      normalize=False, tol=1e-8, copy_X=True)
-    enet.fit(X, y)
-
-    assert_allclose(glm.intercept_, enet.intercept_, rtol=2e-7)
-    assert_allclose(glm.coef_, enet.coef_, rtol=5e-5)
-
-    # 2. test normal enet on sparse data
-    X = sparse.csc_matrix(X)
-    glm.fit(X, y)
-    assert_allclose(glm.intercept_, enet.intercept_, rtol=2e-7)
-    assert_allclose(glm.coef_, enet.coef_, rtol=5e-5)
-
-
-def test_poisson_enet():
-    """Test elastic net regression with poisson family and LogLink.
-
-    Compare to R's glmnet"""
-    # library("glmnet")
-    # options(digits=10)
-    # df <- data.frame(a=c(-2,-1,1,2), b=c(0,0,1,1), y=c(0,1,1,2))
-    # x <- data.matrix(df[,c("a", "b")])
-    # y <- df$y
-    # fit <- glmnet(x=x, y=y, alpha=0.5, intercept=T, family="poisson",
-    #               standardize=F, thresh=1e-10, nlambda=10000)
-    # coef(fit, s=1)
-    # (Intercept) -0.03550978409
-    # a            0.16936423283
-    # b            .
-    glmnet_intercept = -0.03550978409
-    glmnet_coef = [0.16936423283, 0.]
-    X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
-    y = np.array([0, 1, 1, 2])
-    rng = np.random.RandomState(42)
-    glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0.5, family='poisson',
-                                     link='log', solver='cd', tol=1e-8,
-                                     selection='random', random_state=rng,
-                                     start_params='guess')
-    glm.fit(X, y)
-    assert_allclose(glm.intercept_, glmnet_intercept, rtol=2e-6)
-    assert_allclose(glm.coef_, glmnet_coef, rtol=2e-7)
-
-    # test results with general optimization procedure
-    def obj(coef):
-        pd = PoissonDistribution()
-        link = LogLink()
-        N = y.shape[0]
-        mu = link.inverse(X @ coef[1:] + coef[0])
-        alpha, l1_ratio = (1, 0.5)
-        return 1./(2.*N) * pd.deviance(y, mu) \
-            + 0.5 * alpha * (1-l1_ratio) * (coef[1:]**2).sum() \
-            + alpha * l1_ratio * np.sum(np.abs(coef[1:]))
-    res = optimize.minimize(obj, [0, 0, 0], method='nelder-mead', tol=1e-10,
-                            options={'maxiter': 1000, 'disp': False})
-    assert_allclose(glm.intercept_, res.x[0], rtol=5e-5)
-    assert_allclose(glm.coef_, res.x[1:], rtol=1e-5, atol=1e-9)
-    assert_allclose(obj(np.concatenate(([glm.intercept_], glm.coef_))),
-                    res.fun, rtol=1e-8)
-
-    # same for start_params='zero' and selection='cyclic'
-    # with reduced precision
-    glm = GeneralizedLinearRegressor(alpha=1, l1_ratio=0.5, family='poisson',
-                                     link='log', solver='cd', tol=1e-5,
-                                     selection='cyclic', start_params='zero')
-    glm.fit(X, y)
-    assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-4)
-    assert_allclose(glm.coef_, glmnet_coef, rtol=1e-4)
-
-    # check warm_start, therefore start with different alpha
-    glm = GeneralizedLinearRegressor(alpha=0.005, l1_ratio=0.5,
-                                     family='poisson', max_iter=300,
-                                     link='log', solver='cd', tol=1e-5,
-                                     selection='cyclic', start_params='zero')
-    glm.fit(X, y)
-    # warm start with original alpha and use of sparse matrices
-    glm.warm_start = True
-    glm.alpha = 1
-    X = sparse.csr_matrix(X)
-    glm.fit(X, y)
-    assert_allclose(glm.intercept_, glmnet_intercept, rtol=1e-4)
-    assert_allclose(glm.coef_, glmnet_coef, rtol=1e-4)
+    assert_allclose(glm.coef_, [0.29019207995, 0.03741173122], rtol=1e-5)
 
 
 @pytest.mark.parametrize(
         "params",
         [
-            {"solver": "irls", "start_params": "guess"},
-            {"solver": "irls", "start_params": "zero"},
-            {"solver": "lbfgs", "start_params": "guess"},
-            {"solver": "lbfgs", "start_params": "zero"},
+            {"solver": "irls" },
+            {"solver": "irls" },
+            {"solver": "lbfgs" },
+            {"solver": "lbfgs"},
             {"solver": "newton-cg"},
-            {"solver": "cd", "selection": "cyclic", "diag_fisher": False},
-            {"solver": "cd", "selection": "cyclic", "diag_fisher": True},
-            {"solver": "cd", "selection": "random", "diag_fisher": False},
         ],
         ids=lambda params: ', '.join("%s=%s" % (key, val)
                                      for key,  val in params.items())

From d25042e23c595b34cd5382a60bfd39613ac3a2ae Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Tue, 9 Jul 2019 10:50:27 -0500
Subject: [PATCH 070/269] Remove newton CG algorithm

---
 sklearn/linear_model/_glm.py           | 100 +++----------------------
 sklearn/linear_model/tests/test_glm.py |   8 +-
 2 files changed, 13 insertions(+), 95 deletions(-)

diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm.py
index 15211c6038007..e11d7c08064ce 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm.py
@@ -17,7 +17,6 @@
 from ..base import BaseEstimator, RegressorMixin
 from ..exceptions import ConvergenceWarning
 from ..utils import check_array, check_X_y
-from ..utils.optimize import newton_cg
 from ..utils.validation import check_is_fitted, check_random_state
 
 
@@ -1003,7 +1002,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         the chi squared statistic or the deviance statistic. If None, the
         dispersion is not estimated.
 
-    solver : {'auto', 'irls', 'lbfgs', 'newton-cg'}, \
+    solver : {'auto', 'irls', 'lbfgs'}, \
             optional (default='auto')
         Algorithm to use in the optimization problem:
 
@@ -1016,10 +1015,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
             L1 penalties.
 
         'lbfgs'
-            Calls scipy's L-BFGS-B optimizer. It cannot deal with L1 penalties.
+            Calls scipy's L-BFGS-B optimizer.
 
-        'newton-cg', 'lbfgs'
-            Newton conjugate gradient algorithm cannot deal with L1 penalties.
 
         Note that all solvers except lbfgs use the fisher matrix, i.e. the
         expected Hessian instead of the Hessian matrix.
@@ -1028,7 +1025,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         The maximal number of iterations for solver algorithms.
 
     tol : float, optional (default=1e-4)
-        Stopping criterion. For the irls, newton-cg and lbfgs solvers,
+        Stopping criterion. For the irls, and lbfgs solvers,
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
         where ``g_i`` is the i-th component of the gradient (derivative) of
         the objective function. 
@@ -1212,9 +1209,9 @@ def fit(self, X, y, sample_weight=None):
         if not isinstance(self.fit_intercept, bool):
             raise ValueError("The argument fit_intercept must be bool;"
                              " got {0}".format(self.fit_intercept))
-        if self.solver not in ['auto', 'irls', 'lbfgs', 'newton-cg']:
+        if self.solver not in ['auto', 'irls', 'lbfgs']:
             raise ValueError("GeneralizedLinearRegressor supports only solvers"
-                             " 'auto', 'irls', 'lbfgs', 'newton-cg';"
+                             "'auto', 'irls', 'lbfgs';"
                              " got {0}".format(self.solver))
         solver = self.solver
         if self.solver == 'auto':
@@ -1410,6 +1407,9 @@ def func(coef, X, y, weights, P2, family, link):
                 return obj, objp
 
             args = (X, y, weights, P2, family, link)
+            # TODO: refactor this once
+            # https://github.com/scikit-learn/scikit-learn/pull/14250
+            # is merged.
             coef, loss, info = fmin_l_bfgs_b(
                 func, coef, fprime=None, args=args,
                 iprint=(self.verbose > 0) - 1, pgtol=self.tol,
@@ -1423,76 +1423,6 @@ def func(coef, X, y, weights, P2, family, link):
                               .format(info["task"]))
             self.n_iter_ = info['nit']
 
-        # 4.3 Newton-CG #######################################################
-        # We use again the fisher matrix instead of the hessian. More
-        # precisely, expected hessian of deviance.
-        elif solver == 'newton-cg':
-            def func(coef, X, y, weights, P2, family, link):
-                intercept = (coef.size == X.shape[1] + 1)
-                idx = 1 if intercept else 0  # offset if coef[0] is intercept
-                if P2.ndim == 1:
-                    L2 = coef[idx:] @ (P2 * coef[idx:])
-                else:
-                    L2 = coef[idx:] @ (P2 @ coef[idx:])
-                mu = link.inverse(_safe_lin_pred(X, coef))
-                return 0.5 * family.deviance(y, mu, weights) + 0.5 * L2
-
-            def grad(coef, X, y, weights, P2, family, link):
-                mu, devp = \
-                    family._mu_deviance_derivative(coef, X, y, weights, link)
-                intercept = (coef.size == X.shape[1] + 1)
-                idx = 1 if intercept else 0  # offset if coef[0] is intercept
-                if P2.ndim == 1:
-                    L2 = P2 * coef[idx:]
-                else:
-                    L2 = P2 @ coef[idx:]
-                objp = 0.5 * devp
-                objp[idx:] += L2
-                return objp
-
-            def grad_hess(coef, X, y, weights, P2, family, link):
-                intercept = (coef.size == X.shape[1] + 1)
-                idx = 1 if intercept else 0  # offset if coef[0] is intercept
-                if P2.ndim == 1:
-                    L2 = P2 * coef[idx:]
-                else:
-                    L2 = P2 @ coef[idx:]
-                eta = _safe_lin_pred(X, coef)
-                mu = link.inverse(eta)
-                d1 = link.inverse_derivative(eta)
-                temp = d1 * family.deviance_derivative(y, mu, weights)
-                if intercept:
-                    grad = np.concatenate(([0.5 * temp.sum()],
-                                           0.5 * temp @ X + L2))
-                else:
-                    grad = 0.5 * temp @ X + L2  # same as 0.5* X.T @ temp + L2
-
-                # expected hessian = fisher = X.T @ diag_matrix @ X
-                # calculate only diag_matrix
-                diag = d1**2 / family.variance(mu, phi=1, weights=weights)
-                if intercept:
-                    h0i = np.concatenate(([diag.sum()], diag @ X))
-
-                def Hs(coef):
-                    # return (0.5 * fisher + P2) @ coef
-                    # ret = 0.5 * (X.T @ (diag * (X @ coef)))
-                    ret = 0.5 * ((diag * (X @ coef[idx:])) @ X)
-                    if P2.ndim == 1:
-                        ret += P2 * coef[idx:]
-                    else:
-                        ret += P2 @ coef[idx:]
-                    if intercept:
-                        ret = np.concatenate(([0.5 * (h0i @ coef)],
-                                             ret + 0.5 * coef[0] * h0i[1:]))
-                    return ret
-
-                return grad, Hs
-
-            args = (X, y, weights, P2, family, link)
-            coef, self.n_iter_ = newton_cg(grad_hess, func, grad, coef,
-                                           args=args, maxiter=self.max_iter,
-                                           tol=self.tol)
-
 
         #######################################################################
         # 5. postprocessing                                                   #
@@ -1511,7 +1441,7 @@ def Hs(coef):
 
         return self
 
-    def linear_predictor(self, X):
+    def _linear_predictor(self, X):
         """Compute the linear_predictor = X*coef_ + intercept_.
 
         Parameters
@@ -1552,7 +1482,7 @@ def predict(self, X, sample_weight=None):
         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
                         dtype='numeric', copy=True, ensure_2d=True,
                         allow_nd=False)
-        eta = self.linear_predictor(X)
+        eta = self._linear_predictor(X)
         mu = self._link_instance.inverse(eta)
         weights = _check_weights(sample_weight, X.shape[0])
 
@@ -1682,7 +1612,7 @@ class PoissonRegressor(GeneralizedLinearRegressor):
         the chi squared statistic or the deviance statistic. If None, the
         dispersion is not estimated.
 
-    solver : {'irls', 'lbfgs', 'newton-cg'}, optional (default='irls')
+    solver : {'irls', 'lbfgs'}, optional (default='irls')
         Algorithm to use in the optimization problem:
 
         'irls'
@@ -1692,17 +1622,11 @@ class PoissonRegressor(GeneralizedLinearRegressor):
         'lbfgs'
             Calls scipy's L-BFGS-B optimizer.
 
-        'newton-cg'
-            Newton conjugate gradient algorithm.
-
-        Note that all solvers except lbfgs use the fisher matrix, i.e. the
-        expected Hessian instead of the Hessian matrix.
-
     max_iter : int, optional (default=100)
         The maximal number of iterations for solver algorithms.
 
     tol : float, optional (default=1e-4)
-        Stopping criterion. For the irls, newton-cg and lbfgs solvers,
+        Stopping criterion. For the irls, and lbfgs solvers,
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
         where ``g_i`` is the i-th component of the gradient (derivative) of
         the objective function.
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 8fc1241e1da7a..0cee56afb3042 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -25,7 +25,7 @@
 
 from sklearn.utils.testing import assert_array_equal
 
-GLM_SOLVERS = ['irls', 'lbfgs', 'newton-cg']
+GLM_SOLVERS = ['irls', 'lbfgs']
 
 
 @pytest.fixture(scope="module")
@@ -381,7 +381,6 @@ def test_glm_identity_regression(solver):
 ])
 @pytest.mark.parametrize('solver, tol', [('irls', 1e-6),
                                          ('lbfgs', 1e-6),
-                                         ('newton-cg', 1e-7),
 ])
 def test_glm_log_regression(family, solver, tol):
     """Test GLM regression with log link on a simple dataset."""
@@ -395,9 +394,6 @@ def test_glm_log_regression(family, solver, tol):
     assert_allclose(res.coef_, coef, rtol=5e-6)
 
 
-# newton-cg may issue a LineSearchWarning, which we filter out
-@pytest.mark.filterwarnings('ignore:The line search algorithm')
-@pytest.mark.filterwarnings('ignore:Line Search failed')
 @pytest.mark.parametrize('n_samples, n_features', [(100, 10), (10, 100)])
 @pytest.mark.parametrize('fit_intercept', [True, False])
 @pytest.mark.parametrize('solver', GLM_SOLVERS)
@@ -441,7 +437,6 @@ def test_normal_ridge_comparison(n_samples, n_features, fit_intercept, solver):
 @pytest.mark.parametrize('solver, tol',
                          [('irls', 1e-7),
                           ('lbfgs', 1e-7),
-                          ('newton-cg', 1e-7),
 ])
 def test_poisson_ridge(solver, tol):
     """Test ridge regression with poisson family and LogLink.
@@ -478,7 +473,6 @@ def test_poisson_ridge(solver, tol):
             {"solver": "irls" },
             {"solver": "lbfgs" },
             {"solver": "lbfgs"},
-            {"solver": "newton-cg"},
         ],
         ids=lambda params: ', '.join("%s=%s" % (key, val)
                                      for key,  val in params.items())

From 07ee4954ef118227832d9ac2ad562a5aec7af38b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Tue, 9 Jul 2019 10:54:20 -0500
Subject: [PATCH 071/269] Remove fisher_matrix, _observed_information and
 _eta_mu_score_fisher

---
 sklearn/linear_model/_glm.py           | 120 -------------------------
 sklearn/linear_model/tests/test_glm.py |  43 ---------
 2 files changed, 163 deletions(-)

diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm.py
index e11d7c08064ce..fa434821bb80e 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm.py
@@ -253,9 +253,6 @@ class ExponentialDispersionModel(metaclass=ABCMeta):
 
     _mu_deviance_derivative
     _score
-    _fisher_matrix
-    _observed_information
-    _eta_mu_score_fisher
 
     References
     ----------
@@ -518,123 +515,6 @@ def _score(self, coef, phi, X, y, weights, link):
             score = temp @ X  # sampe as X.T @ temp
         return score
 
-    def _fisher_matrix(self, coef, phi, X, y, weights, link):
-        r"""Compute the Fisher information matrix.
-
-        The Fisher information matrix, also known as expected information
-        matrix is given by
-
-        .. math:
-
-            \mathbf{F}(\boldsymbol{w}) =
-            \mathrm{E}\left[-\frac{\partial\mathbf{score}}{\partial
-            \boldsymbol{w}} \right]
-            = \mathrm{E}\left[
-            -\frac{\partial^2 loglike}{\partial\boldsymbol{w}
-            \partial\boldsymbol{w}^T}\right]
-            = \mathbf{X}^T W \mathbf{X} \,,
-
-        with :math:`\mathbf{W} = \mathbf{D}^2 \boldsymbol{\Sigma}^{-1}`,
-        see func:`_score`.
-        """
-        lin_pred = _safe_lin_pred(X, coef)
-        mu = link.inverse(lin_pred)
-        sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
-        d = link.inverse_derivative(lin_pred)
-        d2_sigma_inv = sigma_inv * d * d
-        intercept = (coef.size == X.shape[1] + 1)
-        fisher_matrix = _safe_sandwich_dot(X, d2_sigma_inv,
-                                           intercept=intercept)
-        return fisher_matrix
-
-    def _observed_information(self, coef, phi, X, y, weights, link):
-        r"""Compute the observed information matrix.
-
-        The observed information matrix, also known as the negative of
-        the Hessian matrix of the log-likelihood, is given by
-
-        .. math:
-
-            \mathbf{H}(\boldsymbol{w}) =
-            -\frac{\partial^2 loglike}{\partial\boldsymbol{w}
-            \partial\boldsymbol{w}^T}
-            = \mathbf{X}^T \left[
-            - \mathbf{D}' \mathbf{R}
-            + \mathbf{D}^2 \mathbf{V} \mathbf{R}
-            + \mathbf{D}^2
-            \right] \boldsymbol{\Sigma}^{-1} \mathbf{X} \,,
-
-        with :math:`\mathbf{R} = \mathrm{diag}(y_i - \mu_i)`,
-        :math:`\mathbf{V} = \mathrm{diag}\left(\frac{v'(\mu_i)}{
-        v(\mu_i)}
-        \right)`,
-        see :func:`score_` function and :func:`_fisher_matrix`.
-        """
-        lin_pred = _safe_lin_pred(X, coef)
-        mu = link.inverse(lin_pred)
-        sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
-        dp = link.inverse_derivative2(lin_pred)
-        d2 = link.inverse_derivative(lin_pred)**2
-        v = self.unit_variance_derivative(mu)/self.unit_variance(mu)
-        r = y - mu
-        temp = sigma_inv * (-dp * r + d2 * v * r + d2)
-        intercept = (coef.size == X.shape[1] + 1)
-        observed_information = _safe_sandwich_dot(X, temp,
-                                                  intercept=intercept)
-        return observed_information
-
-    def _eta_mu_score_fisher(self, coef, phi, X, y, weights, link,
-                             diag_fisher=False):
-        """Compute linear predictor, mean, score function and fisher matrix.
-
-        It calculates the linear predictor, the mean, score function
-        (derivative of log-likelihood) and Fisher information matrix
-        all in one go as function of `coef` (:math:`w`) and the data.
-
-        Parameters
-        ----------
-        diag_fisher : boolean, optional (default=False)
-            If ``True``, returns only an array d such that
-            fisher = X.T @ np.diag(d) @ X.
-
-        Returns
-        -------
-        (eta, mu, score, fisher) : tuple with 4 elements
-            The 4 elements are:
-
-            * eta: ndarray, shape (X.shape[0],)
-            * mu: ndarray, shape (X.shape[0],)
-            * score: ndarray, shape (X.shape[0],)
-            * fisher:
-
-                * If diag_fisher is ``False``, the full fisher matrix,
-                  an array of shape (X.shape[1], X.shape[1])
-                * If diag_fisher is ``True`, an array of shape (X.shape[0])
-        """
-        intercept = (coef.size == X.shape[1] + 1)
-        # eta = linear predictor
-        eta = _safe_lin_pred(X, coef)
-        mu = link.inverse(eta)
-        sigma_inv = 1./self.variance(mu, phi=phi, weights=weights)
-        d1 = link.inverse_derivative(eta)  # = h'(eta)
-        # Alternatively:
-        # h'(eta) = h'(g(mu)) = 1/g'(mu), note that h is inverse of g
-        # d1 = 1./link.derivative(mu)
-        d1_sigma_inv = d1 * sigma_inv
-        temp = d1_sigma_inv * (y - mu)
-        if intercept:
-            score = np.concatenate(([temp.sum()], temp @ X))
-        else:
-            score = temp @ X
-
-        d2_sigma_inv = d1 * d1_sigma_inv
-        if diag_fisher:
-            fisher_matrix = d2_sigma_inv
-        else:
-            fisher_matrix = _safe_sandwich_dot(X, d2_sigma_inv,
-                                               intercept=intercept)
-        return eta, mu, score, fisher_matrix
-
 
 class TweedieDistribution(ExponentialDispersionModel):
     r"""A class for the Tweedie distribution.
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 0cee56afb3042..b9716388fb36c 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -109,49 +109,6 @@ def test_deviance_zero(family, chk_values):
         assert_allclose(family.deviance(x, x), 0, atol=1e-9)
 
 
-@pytest.mark.parametrize(
-    'family, link',
-    [(NormalDistribution(), IdentityLink()),
-     (PoissonDistribution(), LogLink()),
-     (GammaDistribution(), LogLink()),
-     (InverseGaussianDistribution(), LogLink()),
-     (TweedieDistribution(power=1.5), LogLink()),
-     (TweedieDistribution(power=4.5), LogLink())],
-    ids=lambda args: args.__class__.__name__)
-def test_fisher_matrix(family, link):
-    """Test the Fisher matrix numerically.
-    Trick: Use numerical differentiation with y = mu"""
-    coef = np.array([-2, 1, 0, 1, 2.5])
-    phi = 0.5
-    rng = np.random.RandomState(42)
-    X = rng.randn(10, 5)
-    lin_pred = np.dot(X, coef)
-    mu = link.inverse(lin_pred)
-    weights = rng.randn(10)**2 + 1
-    fisher = family._fisher_matrix(coef=coef, phi=phi, X=X, y=mu,
-                                   weights=weights, link=link)
-    # check that the Fisher matrix is square and positive definite
-    assert fisher.ndim == 2
-    assert fisher.shape[0] == fisher.shape[1]
-    assert np.all(np.linalg.eigvals(fisher) >= 0)
-
-    approx = np.array([]).reshape(0, coef.shape[0])
-    for i in range(coef.shape[0]):
-        def f(coef):
-            return -family._score(coef=coef, phi=phi, X=X, y=mu,
-                                  weights=weights, link=link)[i]
-        approx = np.vstack(
-            [approx, sp.optimize.approx_fprime(xk=coef, f=f, epsilon=1e-5)])
-    assert_allclose(fisher, approx, rtol=1e-3)
-
-    # check the observed information matrix
-    oim = family._observed_information(coef=coef, phi=phi, X=X, y=mu,
-                                       weights=weights, link=link)
-    assert oim.ndim == 2
-    assert oim.shape == fisher.shape
-    assert_allclose(oim, fisher)
-
-
 def test_sample_weights_validation():
     """Test the raised errors in the validation of sample_weight."""
     # scalar value but not positive

From d0eb2850b91e74d1e8591f1bbd758cb06d85a3bc Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Tue, 9 Jul 2019 11:21:18 -0500
Subject: [PATCH 072/269] Remove matrix L2 penalty and IRLS solver

---
 sklearn/linear_model/_glm.py           | 327 ++-----------------------
 sklearn/linear_model/tests/test_glm.py |  57 +----
 2 files changed, 26 insertions(+), 358 deletions(-)

diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm.py
index fa434821bb80e..b469b3c4edd17 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm.py
@@ -669,164 +669,16 @@ def __init__(self):
 }
 
 
-def _irls_step(X, W, P2, z, fit_intercept=True):
-    """Compute one step in iteratively reweighted least squares.
-
-    Solve A w = b for w with
-    A = (X' W X + P2)
-    b = X' W z
-    z = eta + D^-1 (y-mu)
-
-    See also fit method of :class:`GeneralizedLinearRegressor`.
-
-    Parameters
-    ----------
-    X : {ndarray, sparse matrix}, shape (n_samples, n_features)
-        Training data (with intercept included if present)
-
-    W : ndarray, shape (n_samples,)
-
-    P2 : {ndarray, sparse matrix}, shape (n_features, n_features)
-        The L2-penalty matrix or vector (=diagonal matrix)
-
-    z : ndarray, shape (n_samples,)
-        Working observations
-
-    fit_intercept : boolean, optional (default=True)
-
-    Returns
-    -------
-    coef : ndarray, shape (c,)
-        If fit_intercept=False, shape c=X.shape[1].
-        If fit_intercept=True, then c=X.shapee[1] + 1.
-    """
-    # Note: solve vs least squares, what is more appropriate?
-    #       scipy.linalg.solve seems faster, but scipy.linalg.lstsq
-    #       is more robust.
-    # Note: X.T @ W @ X is not sparse, even when X is sparse.
-    #      Sparse solver would splinalg.spsolve(A, b) or splinalg.lsmr(A, b)
-    if fit_intercept:
-        Wz = W * z
-        if sparse.issparse(X):
-            b = np.concatenate(([Wz.sum()], X.transpose() @ Wz))
-        else:
-            b = np.concatenate(([Wz.sum()], X.T @ Wz))
-        A = _safe_sandwich_dot(X, W, intercept=fit_intercept)
-        if P2.ndim == 1:
-            idx = np.arange(start=1, stop=A.shape[0])
-            A[(idx, idx)] += P2  # add to diag elements without intercept
-        elif sparse.issparse(P2):
-            A[1:, 1:] += P2.toarray()
-        else:
-            A[1:, 1:] += P2
-    else:
-        if sparse.issparse(X):
-            XtW = X.transpose().multiply(W)
-            # for older versions of numpy and scipy, A may be a np.matrix
-            A = _safe_toarray(XtW @ X)
-        else:
-            XtW = (X.T * W)
-            A = XtW @ X
-        b = XtW @ z
-        if P2.ndim == 1:
-            A[np.diag_indices_from(A)] += P2
-        elif sparse.issparse(P2):
-            A += P2.toarray()
-        else:
-            A += P2
-
-    coef, *_ = linalg.lstsq(A, b, overwrite_a=True, overwrite_b=True)
-    return coef
-
-
-def _irls_solver(coef, X, y, weights, P2, fit_intercept, family, link,
-                 max_iter, tol):
-    """Solve GLM with L2 penalty by IRLS algorithm.
-
-    Note: If X is sparse, P2 must also be sparse.
-    """
-    # Solve Newton-Raphson (1): Obj'' (w - w_old) = -Obj'
-    #   Obj = objective function = 1/2 Dev + l2/2 w P2 w
-    #   Dev = deviance, s = normalized weights, variance V(mu) but phi=1
-    #   D   = link.inverse_derivative(eta) = diag_matrix(h'(X w))
-    #   D2  = link.inverse_derivative(eta)^2 = D^2
-    #   W   = D2/V(mu)
-    #   l2  = alpha
-    #   Obj' = d(Obj)/d(w) = 1/2 Dev' + l2 P2 w
-    #        = -X' D (y-mu)/V(mu) + l2 P2 w
-    #   Obj''= d2(Obj)/d(w)d(w') = Hessian = -X'(...) X + l2 P2
-    #   Use Fisher matrix instead of full info matrix -X'(...) X,
-    #    i.e. E[Dev''] with E[y-mu]=0:
-    #   Obj'' ~ X' W X + l2 P2
-    # (1): w = (X' W X + l2 P2)^-1 X' W z,
-    #      with z = eta + D^-1 (y-mu)
-    # Note: P2 must be symmetrized
-    # Note: ' denotes derivative, but also transpose for matrices
-
-    eta = _safe_lin_pred(X, coef)
-    mu = link.inverse(eta)
-    # D = h'(eta)
-    hp = link.inverse_derivative(eta)
-    V = family.variance(mu, phi=1, weights=weights)
-
-    converged = False
-    n_iter = 0
-    while n_iter < max_iter:
-        n_iter += 1
-        # coef_old not used so far.
-        # coef_old = coef
-        # working weights W, in principle a diagonal matrix
-        # therefore here just as 1d array
-        W = hp**2 / V
-        # working observations
-        z = eta + (y - mu) / hp
-        # solve A*coef = b
-        # A = X' W X + P2, b = X' W z
-        coef = _irls_step(X, W, P2, z, fit_intercept=fit_intercept)
-        # updated linear predictor
-        # do it here for updated values for tolerance
-        eta = _safe_lin_pred(X, coef)
-        mu = link.inverse(eta)
-        hp = link.inverse_derivative(eta)
-        V = family.variance(mu, phi=1, weights=weights)
-
-        # which tolerace? |coef - coef_old| or gradient?
-        # use gradient for compliance with newton-cg and lbfgs
-        # gradient = -X' D (y-mu)/V(mu) + l2 P2 w
-        temp = hp * (y - mu) / V
-        if sparse.issparse(X):
-            gradient = -(X.transpose() @ temp)
-        else:
-            gradient = -(X.T @ temp)
-        idx = 1 if fit_intercept else 0  # offset if coef[0] is intercept
-        if P2.ndim == 1:
-            gradient += P2 * coef[idx:]
-        else:
-            gradient += P2 @ coef[idx:]
-        if fit_intercept:
-            gradient = np.concatenate(([-temp.sum()], gradient))
-        if (np.max(np.abs(gradient)) <= tol):
-            converged = True
-            break
-
-    if not converged:
-        warnings.warn("irls failed to converge. Increase the number "
-                      "of iterations (currently {0})"
-                      .format(max_iter), ConvergenceWarning)
-
-    return coef, n_iter
-
-
 class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     """Regression via a Generalized Linear Model (GLM) with penalties.
 
     GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
     fitting and predicting the mean of the target y as mu=h(X*w). Therefore,
-    the fit minimizes the following objective function with combined L1 and L2
+    the fit minimizes the following objective function with L2
     priors as regularizer::
 
             1/(2*sum(s)) * deviance(y, h(X*w); s)
-            + 1/2 * alpha * w*P2*w
+            + 1/2 * alpha * |w|_2
 
     with inverse link function h and s=sample_weight. 
     The parameter ``alpha`` corresponds to the lambda parameter in glmnet.
@@ -843,18 +695,6 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         case, the design matrix X must have full column rank
         (no collinearities).
 
-    P2 : {'identity', array-like, sparse matrix}, shape \
-            (n_features,) or (n_features, n_features), optional \
-            (default='identity')
-        With this option, you can set the P2 matrix in the L2 penalty `w*P2*w`.
-        This gives a fine control over this penalty (Tikhonov regularization).
-        A 2d array is directly used as the square matrix P2. A 1d array is
-        interpreted as diagonal (square) matrix. The default 'identity' sets
-        the identity matrix, which gives the usual squared L2-norm. If you just
-        want to exclude certain coefficients, pass a 1d array filled with 1,
-        and 0 for the coefficients to be excluded.
-        Note that P2 must be positive semi-definite.
-
     fit_intercept : boolean, optional (default=True)
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
@@ -882,17 +722,11 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         the chi squared statistic or the deviance statistic. If None, the
         dispersion is not estimated.
 
-    solver : {'auto', 'irls', 'lbfgs'}, \
-            optional (default='auto')
+    solver : {'auto', 'lbfgs'}, optional (default='auto')
         Algorithm to use in the optimization problem:
 
         'auto'
-            Sets 'irls'
-
-        'irls'
-            Iterated reweighted least squares.
-            It is the standard algorithm for GLMs. It cannot deal with
-            L1 penalties.
+            Sets 'lbfgs'
 
         'lbfgs'
             Calls scipy's L-BFGS-B optimizer.
@@ -905,7 +739,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         The maximal number of iterations for solver algorithms.
 
     tol : float, optional (default=1e-4)
-        Stopping criterion. For the irls, and lbfgs solvers,
+        Stopping criterion. For the lbfgs solver,
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
         where ``g_i`` is the i-th component of the gradient (derivative) of
         the objective function. 
@@ -920,22 +754,12 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         number generator; if None, the random number generator is the
         RandomState instance used by `np.random`. 
 
-    diag_fisher : boolean, optional, (default=False)
-        Only relevant for solver 'cd'.
-        If ``False``, the full Fisher matrix (expected Hessian) is computed in
-        each outer iteration (Newton iteration). If ``True``, only a diagonal
-        matrix (stored as 1d array) is computed, such that
-        fisher = X.T @ diag @ X. This saves memory and matrix-matrix
-        multiplications, but needs more matrix-vector multiplications. If you
-        use large sparse X or if you have many features,
-        i.e. n_features >> n_samples, you might set this option to ``True``.
-
     copy_X : boolean, optional, (default=True)
         If ``True``, X will be copied; else, it may be overwritten.
 
     check_input : boolean, optional (default=True)
         Allow to bypass several checks on input: y values in range of family,
-        sample_weight non-negative, P2 positive semi-definite.
+        sample_weight non-negative.
         Don't use this parameter unless you know what you do.
 
     verbose : int, optional (default=0)
@@ -991,14 +815,13 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
           Journal of Machine Learning Research 13 (2012) 1999-2030
           https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
     """
-    def __init__(self, alpha=1.0, P2='identity',
+    def __init__(self, alpha=1.0,
                  fit_intercept=True, family='normal', link='auto',
                  fit_dispersion=None, solver='auto', max_iter=100,
                  tol=1e-4, warm_start=False,
-                 random_state=None, diag_fisher=False,
+                 random_state=None,
                  copy_X=True, check_input=True, verbose=0):
         self.alpha = alpha
-        self.P2 = P2
         self.fit_intercept = fit_intercept
         self.family = family
         self.link = link
@@ -1008,7 +831,6 @@ def __init__(self, alpha=1.0, P2='identity',
         self.tol = tol
         self.warm_start = warm_start
         self.random_state = random_state
-        self.diag_fisher = diag_fisher
         self.copy_X = copy_X
         self.check_input = check_input
         self.verbose = verbose
@@ -1051,8 +873,8 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError(
                 "The family must be an instance of class"
                 " ExponentialDispersionModel or an element of"
-                " ['normal', 'poisson', 'gamma', 'inverse.gaussian', "
-                "'binomial']; got (family={0})".format(self.family))
+                " ['normal', 'poisson', 'gamma', 'inverse.gaussian']"
+                "; got (family={0})".format(self.family))
 
         # Guarantee that self._link_instance is set to an instance of
         # class Link
@@ -1089,13 +911,13 @@ def fit(self, X, y, sample_weight=None):
         if not isinstance(self.fit_intercept, bool):
             raise ValueError("The argument fit_intercept must be bool;"
                              " got {0}".format(self.fit_intercept))
-        if self.solver not in ['auto', 'irls', 'lbfgs']:
+        if self.solver not in ['auto', 'lbfgs']:
             raise ValueError("GeneralizedLinearRegressor supports only solvers"
-                             "'auto', 'irls', 'lbfgs';"
+                             "'auto', 'lbfgs';"
                              " got {0}".format(self.solver))
         solver = self.solver
         if self.solver == 'auto':
-            solver = 'irls'
+            solver = 'lbfgs'
         if (not isinstance(self.max_iter, int)
                 or self.max_iter <= 0):
             raise ValueError("Maximum number of iteration must be a positive "
@@ -1108,9 +930,6 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError("The argument warm_start must be bool;"
                              " got {0}".format(self.warm_start))
         random_state = check_random_state(self.random_state)
-        if not isinstance(self.diag_fisher, bool):
-            raise ValueError("The argument diag_fisher must be bool;"
-                             " got {0}".format(self.diag_fisher))
         if not isinstance(self.copy_X, bool):
             raise ValueError("The argument copy_X must be bool;"
                              " got {0}".format(self.copy_X))
@@ -1133,95 +952,12 @@ def fit(self, X, y, sample_weight=None):
 
         n_samples, n_features = X.shape
 
-        # 1.3 arguments to take special care ##################################
-        # P2
-
-        # If X is sparse, make P2 sparse, too.
-        if isinstance(self.P2, str) and self.P2 == 'identity':
-            if sparse.issparse(X):
-                P2 = (sparse.dia_matrix((np.ones(n_features), 0),
-                      shape=(n_features, n_features))).tocsc()
-            else:
-                P2 = np.ones(n_features)
-        else:
-            P2 = check_array(self.P2, copy=True,
-                             accept_sparse=_stype,
-                             dtype=_dtype, ensure_2d=False)
-            if P2.ndim == 1:
-                P2 = np.asarray(P2)
-                if P2.shape[0] != n_features:
-                    raise ValueError("P2 should be a 1d array of shape "
-                                     "(n_features,) with "
-                                     "n_features=X.shape[1]; "
-                                     "got (P2.shape=({0},)), needed ({1},)"
-                                     .format(P2.shape[0], X.shape[1]))
-                if sparse.issparse(X):
-                    P2 = (sparse.dia_matrix((P2, 0),
-                          shape=(n_features, n_features))).tocsc()
-            elif (P2.ndim == 2 and P2.shape[0] == P2.shape[1] and
-                    P2.shape[0] == X.shape[1]):
-                if sparse.issparse(X):
-                    P2 = (sparse.dia_matrix((P2, 0),
-                          shape=(n_features, n_features))).tocsc()
-            else:
-                raise ValueError("P2 must be either None or an array of shape "
-                                 "(n_features, n_features) with "
-                                 "n_features=X.shape[1]; "
-                                 "got (P2.shape=({0}, {1})), needed ({2}, {2})"
-                                 .format(P2.shape[0], P2.shape[1], X.shape[1]))
-
-        l2 = self.alpha
-        # P2 is now for sure a copy
-        P2 = l2 * P2
-        # one only ever needs the symmetrized L2 penalty matrix 1/2 (P2 + P2')
-        # reason: w' P2 w = (w' P2 w)', i.e. it is symmetric
-        if P2.ndim == 2:
-            if sparse.issparse(P2):
-                if sparse.isspmatrix_csc(P2):
-                    P2 = 0.5 * (P2 + P2.transpose()).tocsc()
-                else:
-                    P2 = 0.5 * (P2 + P2.transpose()).tocsr()
-            else:
-                P2 = 0.5 * (P2 + P2.T)
-
-        # For coordinate descent, if X is sparse, P2 must also be csc
-        if solver == 'cd' and sparse.issparse(X):
-            P2 = sparse.csc_matrix(P2)
-
         # 1.4 additional validations ##########################################
         if self.check_input:
             if not np.all(family.in_y_range(y)):
                 raise ValueError("Some value(s) of y are out of the valid "
                                  "range for family {0}"
                                  .format(family.__class__.__name__))
-            # check if P2 is positive semidefinite
-            # np.linalg.cholesky(P2) 'only' asserts positive definite
-            if not isinstance(self.P2, str):  # self.P2 != 'identity'
-                # due to numerical precision, we allow eigenvalues to be a
-                # tiny bit negative
-                epsneg = -10 * np.finfo(P2.dtype).epsneg
-                if P2.ndim == 1 or P2.shape[0] == 1:
-                    p2 = P2
-                    if sparse.issparse(P2):
-                        p2 = P2.toarray()
-                    if not np.all(p2 >= 0):
-                        raise ValueError("1d array P2 must not have negative "
-                                         "values.")
-                elif sparse.issparse(P2):
-                    # for sparse matrices, not all eigenvals can be computed
-                    # efficiently, use only half of n_features
-                    # k = how many eigenvals to compute
-                    k = np.min([10, n_features // 10 + 1])
-                    sigma = 0  # start searching near this value
-                    which = 'SA'  # find smallest algebraic eigenvalues first
-                    eigenvalues = splinalg.eigsh(P2, k=k, sigma=sigma,
-                                                 which=which,
-                                                 return_eigenvectors=False)
-                    if not np.all(eigenvalues >= epsneg):
-                        raise ValueError("P2 must be positive semi-definite.")
-                else:
-                    if not np.all(linalg.eigvalsh(P2) >= epsneg):
-                        raise ValueError("P2 must be positive semi-definite.")
             # TODO: if alpha=0 check that X is not rank deficient
             # TODO: what else to check?
 
@@ -1229,10 +965,10 @@ def fit(self, X, y, sample_weight=None):
         # 2. rescaling of weights (sample_weight)                             #
         #######################################################################
         # IMPORTANT NOTE: Since we want to minimize
-        # 1/(2*sum(sample_weight)) * deviance + L1 + L2,
+        # 1/(2*sum(sample_weight)) * deviance + L2,
         # deviance = sum(sample_weight * unit_deviance),
         # we rescale weights such that sum(weights) = 1 and this becomes
-        # 1/2*deviance + L1 + L2 with deviance=sum(weights * unit_deviance)
+        # 1/2*deviance + L2 with deviance=sum(weights * unit_deviance)
         weights_sum = np.sum(weights)
         weights = weights/weights_sum
 
@@ -1260,33 +996,21 @@ def fit(self, X, y, sample_weight=None):
         #######################################################################
         # algorithms for optimization
 
-        # 4.1 IRLS ############################################################
-        # Note: we already set P2 = l2*P2, see above
-        # Note: we already symmetrized P2 = 1/2 (P2 + P2')
-        if solver == 'irls':
-            coef, self.n_iter_ = \
-                _irls_solver(coef=coef, X=X, y=y, weights=weights, P2=P2,
-                             fit_intercept=self.fit_intercept, family=family,
-                             link=link, max_iter=self.max_iter, tol=self.tol)
-
-        # 4.2 L-BFGS ##########################################################
-        elif solver == 'lbfgs':
-            def func(coef, X, y, weights, P2, family, link):
+        # 4.1 L-BFGS ##########################################################
+        if solver == 'lbfgs':
+            def func(coef, X, y, weights, alpha, family, link):
                 mu, devp = \
                     family._mu_deviance_derivative(coef, X, y, weights, link)
                 dev = family.deviance(y, mu, weights)
                 intercept = (coef.size == X.shape[1] + 1)
                 idx = 1 if intercept else 0  # offset if coef[0] is intercept
-                if P2.ndim == 1:
-                    L2 = P2 * coef[idx:]
-                else:
-                    L2 = P2 @ coef[idx:]
+                L2 = alpha * coef[idx:]
                 obj = 0.5 * dev + 0.5 * (coef[idx:] @ L2)
                 objp = 0.5 * devp
                 objp[idx:] += L2
                 return obj, objp
 
-            args = (X, y, weights, P2, family, link)
+            args = (X, y, weights, self.alpha, family, link)
             # TODO: refactor this once
             # https://github.com/scikit-learn/scikit-learn/pull/14250
             # is merged.
@@ -1492,13 +1216,9 @@ class PoissonRegressor(GeneralizedLinearRegressor):
         the chi squared statistic or the deviance statistic. If None, the
         dispersion is not estimated.
 
-    solver : {'irls', 'lbfgs'}, optional (default='irls')
+    solver : {'lbfgs'}, optional (default='lbfgs')
         Algorithm to use in the optimization problem:
 
-        'irls'
-            Iterated reweighted least squares. It is the standard algorithm
-            for GLMs.
-
         'lbfgs'
             Calls scipy's L-BFGS-B optimizer.
 
@@ -1506,7 +1226,7 @@ class PoissonRegressor(GeneralizedLinearRegressor):
         The maximal number of iterations for solver algorithms.
 
     tol : float, optional (default=1e-4)
-        Stopping criterion. For the irls, and lbfgs solvers,
+        Stopping criterion. For the lbfgs solver,
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
         where ``g_i`` is the i-th component of the gradient (derivative) of
         the objective function.
@@ -1575,8 +1295,7 @@ class PoissonRegressor(GeneralizedLinearRegressor):
           https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
     """
     def __init__(self, alpha=1.0, fit_intercept=True, fit_dispersion=None,
-                 solver='irls', max_iter=100,
-                 tol=1e-4, warm_start=False,
+                 solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
                  random_state=None, copy_X=True, check_input=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index b9716388fb36c..877b37993f587 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -25,7 +25,7 @@
 
 from sklearn.utils.testing import assert_array_equal
 
-GLM_SOLVERS = ['irls', 'lbfgs']
+GLM_SOLVERS = ['lbfgs']
 
 
 @pytest.fixture(scope="module")
@@ -193,39 +193,6 @@ def test_glm_alpha_argument(alpha):
         glm.fit(X, y)
 
 
-@pytest.mark.parametrize('P2', ['a string', [1, 2, 3], [[2, 3]],
-                                sparse.csr_matrix([1, 2, 3]), [-1]])
-def test_glm_P2_argument(P2):
-    """Test GLM for invalid P2 argument."""
-    y = np.array([1, 2])
-    X = np.array([[1], [2]])
-    glm = GeneralizedLinearRegressor(P2=P2, check_input=True)
-    with pytest.raises(ValueError):
-        glm.fit(X, y)
-
-
-def test_glm_P2_positive_semidefinite():
-    """Test GLM for a positive semi-definite P2 argument."""
-    n_samples, n_features = 10, 5
-    y = np.arange(n_samples)
-    X = np.zeros((n_samples, n_features))
-    P2 = np.diag([100, 10, 5, 0, -1E-5])
-    rng = np.random.RandomState(42)
-    # construct random orthogonal matrix Q
-    Q, R = linalg.qr(rng.randn(n_features, n_features))
-    P2 = Q.T @ P2 @ Q
-    glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False,
-                                     check_input=True)
-    with pytest.raises(ValueError, match="P2 must be positive semi-definite"):
-        glm.fit(X, y)
-
-    P2 = sparse.csr_matrix(P2)
-    glm = GeneralizedLinearRegressor(P2=P2, fit_intercept=False,
-                                     check_input=True)
-    with pytest.raises(ValueError, match="P2 must be positive semi-definite"):
-        glm.fit(X, y)
-
-
 @pytest.mark.parametrize('fit_intercept', ['not bool', 1, 0, [True]])
 def test_glm_fit_intercept_argument(fit_intercept):
     """Test GLM for invalid fit_intercept argument."""
@@ -287,16 +254,6 @@ def test_glm_random_state_argument(random_state):
         glm.fit(X, y)
 
 
-@pytest.mark.parametrize('diag_fisher', ['not bool', 1, 0, [True]])
-def test_glm_diag_fisher_argument(diag_fisher):
-    """Test GLM for invalid diag_fisher arguments."""
-    y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    glm = GeneralizedLinearRegressor(diag_fisher=diag_fisher)
-    with pytest.raises(ValueError, match="diag_fisher must be bool"):
-        glm.fit(X, y)
-
-
 @pytest.mark.parametrize('copy_X', ['not bool', 1, 0, [True]])
 def test_glm_copy_X_argument(copy_X):
     """Test GLM for invalid copy_X arguments."""
@@ -336,9 +293,7 @@ def test_glm_identity_regression(solver):
      GammaDistribution(), InverseGaussianDistribution(),
      TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
 ])
-@pytest.mark.parametrize('solver, tol', [('irls', 1e-6),
-                                         ('lbfgs', 1e-6),
-])
+@pytest.mark.parametrize('solver, tol', [('lbfgs', 1e-6)])
 def test_glm_log_regression(family, solver, tol):
     """Test GLM regression with log link on a simple dataset."""
     coef = [0.2, -0.1]
@@ -391,10 +346,7 @@ def test_normal_ridge_comparison(n_samples, n_features, fit_intercept, solver):
     assert_allclose(glm.predict(T), ridge.predict(T), rtol=1e-5)
 
 
-@pytest.mark.parametrize('solver, tol',
-                         [('irls', 1e-7),
-                          ('lbfgs', 1e-7),
-])
+@pytest.mark.parametrize('solver, tol', [('lbfgs', 1e-7)])
 def test_poisson_ridge(solver, tol):
     """Test ridge regression with poisson family and LogLink.
 
@@ -426,9 +378,6 @@ def test_poisson_ridge(solver, tol):
 @pytest.mark.parametrize(
         "params",
         [
-            {"solver": "irls" },
-            {"solver": "irls" },
-            {"solver": "lbfgs" },
             {"solver": "lbfgs"},
         ],
         ids=lambda params: ', '.join("%s=%s" % (key, val)

From 1e4b5380b7ad699e5d059ef6d9836c37a7ba16e2 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Tue, 9 Jul 2019 11:23:31 -0500
Subject: [PATCH 073/269] Remove plot_poisson_spline_regression.py example

---
 .../plot_poisson_spline_regression.py         | 85 -------------------
 1 file changed, 85 deletions(-)
 delete mode 100644 examples/linear_model/plot_poisson_spline_regression.py

diff --git a/examples/linear_model/plot_poisson_spline_regression.py b/examples/linear_model/plot_poisson_spline_regression.py
deleted file mode 100644
index 30b5881bba1f5..0000000000000
--- a/examples/linear_model/plot_poisson_spline_regression.py
+++ /dev/null
@@ -1,85 +0,0 @@
-"""
-=================================
-Poisson Regression with B-Splines
-=================================
-
-As in the :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py`
-example, a Poisson regression with penalized B-splines (P-splines) [1]_ is
-fitted on slightly different sinusoidal, Poisson distributed data and
-compared to an AdaBoost model with decision trees.
-One can see, that this is a hard problem for both estimators.
-
-.. [1] Eilers, Paul H. C.; Marx, Brian D. "Flexible smoothing with B -splines
-       and penalties". Statist. Sci. 11 (1996), no. 2, 89--121.
-       `doi:10.1214/ss/1038425655
-       <https://projecteuclid.org/euclid.ss/1038425655>`_
-
-"""
-print(__doc__)
-
-# Author: Christian Lorentzen <lorentzen.ch@gmail.com>
-# based on the AdaBoost regression example from Noel Dawe <noel.dawe@gmail.com>
-# License: BSD 3 clause
-
-# importing necessary libraries
-import numpy as np
-from scipy.linalg import toeplitz
-# from scipy.interpolate import BSpline
-from scipy.interpolate import splev
-import matplotlib.pyplot as plt
-from sklearn.tree import DecisionTreeRegressor
-from sklearn.ensemble import AdaBoostRegressor
-from sklearn.linear_model import GeneralizedLinearRegressor
-
-
-# Create the dataset
-xmin, xmax = 0, 6
-rng = np.random.RandomState(1)
-X = np.linspace(xmin, xmax, 500)[:, np.newaxis]
-y_true = 0.5 * (2.1 + np.sin(X).ravel() + np.sin(6 * X).ravel())
-y = rng.poisson(y_true, X.shape[0])
-
-# b-spline basis
-nknots, degree = 40, 3
-ns = nknots - degree - 1  # number of base spline functions
-dx = (xmax - xmin) / (nknots - 1 - 2 * degree)
-knots = np.linspace(xmin - degree * dx, 6 + degree * dx, nknots)
-coef = np.zeros(ns)
-splineBasis = np.empty((X.shape[0], ns), dtype=float)
-for i in range(ns):
-    coef[i] = 1
-#    splineBasis[:, i] = BSpline(knots, coef, degree, extrapolate=False)(X) \
-#        .ravel()
-    splineBasis[:, i] = splev(X, (knots, coef, degree)).ravel()
-    coef[i] = 0
-
-# second order difference matrix
-P2 = toeplitz([2, -1] + [0] * (ns - 2)).astype(float)
-P2[0, 0] = P2[-1, -1] = 1
-
-# Fit regression model
-regr_1 = AdaBoostRegressor(DecisionTreeRegressor(max_depth=4),
-                           n_estimators=10, random_state=rng)
-
-regr_2 = GeneralizedLinearRegressor(family='poisson', link='log',
-                                    fit_intercept=True, alpha=0.02,
-                                    l1_ratio=0.1, P2=P2)
-
-regr_1.fit(X, y)
-regr_2.fit(splineBasis, y)
-
-# Predict
-y_1 = regr_1.predict(X)
-y_2 = regr_2.predict(splineBasis)
-
-# Plot the results
-plt.figure()
-plt.plot(X, y_true, c="b", label="true mean")
-plt.scatter(X, y, c="k", marker='.', label="training samples")
-plt.plot(X, y_1, c="g", label="AdaBoost n_estimator=10", linewidth=2)
-plt.plot(X, y_2, c="r", label="Poisson GLM with B-splines", linewidth=2)
-plt.xlabel("data")
-plt.ylabel("target")
-plt.title("Regression Comparison")
-plt.legend()
-plt.show()

From 3265148aa4df59bd1a4c8ea5eb8e2102bb650006 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Tue, 9 Jul 2019 11:38:48 -0500
Subject: [PATCH 074/269] Remove random_state parameter

---
 sklearn/linear_model/_glm.py           | 30 ++++---------------
 sklearn/linear_model/tests/test_glm.py | 41 ++++++++------------------
 2 files changed, 17 insertions(+), 54 deletions(-)

diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm.py
index b469b3c4edd17..4a1f7c260e649 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm.py
@@ -10,15 +10,13 @@
 from abc import ABCMeta, abstractmethod
 import numbers
 import numpy as np
-from scipy import linalg, sparse, special
-import scipy.sparse.linalg as splinalg
+from scipy import sparse, special
 from scipy.optimize import fmin_l_bfgs_b
 import warnings
 from ..base import BaseEstimator, RegressorMixin
 from ..exceptions import ConvergenceWarning
 from ..utils import check_array, check_X_y
-from ..utils.validation import check_is_fitted, check_random_state
-
+from ..utils.validation import check_is_fitted
 
 
 def _check_weights(sample_weight, n_samples):
@@ -680,7 +678,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
             1/(2*sum(s)) * deviance(y, h(X*w); s)
             + 1/2 * alpha * |w|_2
 
-    with inverse link function h and s=sample_weight. 
+    with inverse link function h and s=sample_weight.
     The parameter ``alpha`` corresponds to the lambda parameter in glmnet.
 
     Read more in the :ref:`User Guide <Generalized_linear_regression>`.
@@ -742,18 +740,12 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         Stopping criterion. For the lbfgs solver,
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
         where ``g_i`` is the i-th component of the gradient (derivative) of
-        the objective function. 
+        the objective function.
 
     warm_start : boolean, optional (default=False)
         If set to ``True``, reuse the solution of the previous call to ``fit``
         as initialization for ``coef_`` and ``intercept_``.
 
-    random_state : {int, RandomState instance, None}, optional (default=None)
-        If int, random_state is the seed used by the random
-        number generator; if RandomState instance, random_state is the random
-        number generator; if None, the random number generator is the
-        RandomState instance used by `np.random`. 
-
     copy_X : boolean, optional, (default=True)
         If ``True``, X will be copied; else, it may be overwritten.
 
@@ -819,7 +811,6 @@ def __init__(self, alpha=1.0,
                  fit_intercept=True, family='normal', link='auto',
                  fit_dispersion=None, solver='auto', max_iter=100,
                  tol=1e-4, warm_start=False,
-                 random_state=None,
                  copy_X=True, check_input=True, verbose=0):
         self.alpha = alpha
         self.fit_intercept = fit_intercept
@@ -830,7 +821,6 @@ def __init__(self, alpha=1.0,
         self.max_iter = max_iter
         self.tol = tol
         self.warm_start = warm_start
-        self.random_state = random_state
         self.copy_X = copy_X
         self.check_input = check_input
         self.verbose = verbose
@@ -929,7 +919,6 @@ def fit(self, X, y, sample_weight=None):
         if not isinstance(self.warm_start, bool):
             raise ValueError("The argument warm_start must be bool;"
                              " got {0}".format(self.warm_start))
-        random_state = check_random_state(self.random_state)
         if not isinstance(self.copy_X, bool):
             raise ValueError("The argument copy_X must be bool;"
                              " got {0}".format(self.copy_X))
@@ -959,7 +948,6 @@ def fit(self, X, y, sample_weight=None):
                                  "range for family {0}"
                                  .format(family.__class__.__name__))
             # TODO: if alpha=0 check that X is not rank deficient
-            # TODO: what else to check?
 
         #######################################################################
         # 2. rescaling of weights (sample_weight)                             #
@@ -1027,7 +1015,6 @@ def func(coef, X, y, weights, alpha, family, link):
                               .format(info["task"]))
             self.n_iter_ = info['nit']
 
-
         #######################################################################
         # 5. postprocessing                                                   #
         #######################################################################
@@ -1235,12 +1222,6 @@ class PoissonRegressor(GeneralizedLinearRegressor):
         If set to ``True``, reuse the solution of the previous call to ``fit``
         as initialization for ``coef_`` and ``intercept_`` .
 
-    random_state : {int, RandomState instance, None}, optional (default=None)
-        If int, random_state is the seed used by the random
-        number generator; if RandomState instance, random_state is the random
-        number generator; if None, the random number generator is the
-        RandomState instance used by `np.random`.
-
     copy_X : boolean, optional, (default=True)
         If ``True``, X will be copied; else, it may be overwritten.
 
@@ -1296,11 +1277,10 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     """
     def __init__(self, alpha=1.0, fit_intercept=True, fit_dispersion=None,
                  solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
-                 random_state=None, copy_X=True, check_input=True, verbose=0):
+                 copy_X=True, check_input=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family="poisson", link='log',
                          fit_dispersion=fit_dispersion, solver=solver,
                          max_iter=max_iter, tol=tol, warm_start=warm_start,
-                         random_state=random_state,
                          copy_X=copy_X, verbose=verbose)
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index 877b37993f587..a921c7e065878 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -5,10 +5,8 @@
 import numpy as np
 from numpy.testing import assert_allclose
 import pytest
-import scipy as sp
-from scipy import linalg, optimize, sparse
 
-from sklearn.datasets import make_classification, make_regression
+from sklearn.datasets import make_regression
 from sklearn.linear_model import GeneralizedLinearRegressor
 from sklearn.linear_model._glm import (
     Link,
@@ -19,7 +17,7 @@
     NormalDistribution, PoissonDistribution,
     GammaDistribution, InverseGaussianDistribution,
 )
-from sklearn.linear_model import ElasticNet, LogisticRegression, Ridge
+from sklearn.linear_model import Ridge
 from sklearn.metrics import mean_absolute_error
 from sklearn.exceptions import ConvergenceWarning
 
@@ -101,8 +99,7 @@ def test_tweedie_distribution_power():
      (TweedieDistribution(power=-1), [0.1, 1.5]),
      (TweedieDistribution(power=1.5), [0.1, 1.5]),
      (TweedieDistribution(power=2.5), [0.1, 1.5]),
-     (TweedieDistribution(power=-4), [0.1, 1.5]),
-])
+     (TweedieDistribution(power=-4), [0.1, 1.5])])
 def test_deviance_zero(family, chk_values):
     """Test deviance(y,y) = 0 for different families."""
     for x in chk_values:
@@ -151,8 +148,7 @@ def test_sample_weights_validation():
                          [('normal', NormalDistribution()),
                           ('poisson', PoissonDistribution()),
                           ('gamma', GammaDistribution()),
-                          ('inverse.gaussian', InverseGaussianDistribution()),
-])
+                          ('inverse.gaussian', InverseGaussianDistribution())])
 def test_glm_family_argument(f, fam):
     """Test GLM family argument set as string."""
     y = np.array([0.1, 0.5])  # in range of all distributions
@@ -244,16 +240,6 @@ def test_glm_warm_start_argument(warm_start):
         glm.fit(X, y)
 
 
-@pytest.mark.parametrize('random_state', ['a string', 0.5, [0]])
-def test_glm_random_state_argument(random_state):
-    """Test GLM for invalid random_state argument."""
-    y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    glm = GeneralizedLinearRegressor(random_state=random_state)
-    with pytest.raises(ValueError, match="cannot be used to seed"):
-        glm.fit(X, y)
-
-
 @pytest.mark.parametrize('copy_X', ['not bool', 1, 0, [True]])
 def test_glm_copy_X_argument(copy_X):
     """Test GLM for invalid copy_X arguments."""
@@ -291,8 +277,7 @@ def test_glm_identity_regression(solver):
     'family',
     [NormalDistribution(), PoissonDistribution(),
      GammaDistribution(), InverseGaussianDistribution(),
-     TweedieDistribution(power=1.5), TweedieDistribution(power=4.5),
-])
+     TweedieDistribution(power=1.5), TweedieDistribution(power=4.5)])
 @pytest.mark.parametrize('solver, tol', [('lbfgs', 1e-6)])
 def test_glm_log_regression(family, solver, tol):
     """Test GLM regression with log link on a simple dataset."""
@@ -338,7 +323,7 @@ def test_normal_ridge_comparison(n_samples, n_features, fit_intercept, solver):
     glm = GeneralizedLinearRegressor(alpha=1.0, family='normal',
                                      link='identity', fit_intercept=True,
                                      max_iter=300, solver=solver, tol=1e-6,
-                                     check_input=False, random_state=42)
+                                     check_input=False)
     glm.fit(X, y)
     assert glm.coef_.shape == (X.shape[1], )
     assert_allclose(glm.coef_, ridge.coef_, rtol=5e-6)
@@ -369,7 +354,7 @@ def test_poisson_ridge(solver, tol):
                                      fit_intercept=True, family='poisson',
                                      link='log', tol=1e-7,
                                      solver=solver, max_iter=300,
-                                     random_state=rng)
+                                     )
     glm.fit(X, y)
     assert_allclose(glm.intercept_, -0.12889386979, rtol=1e-5)
     assert_allclose(glm.coef_, [0.29019207995, 0.03741173122], rtol=1e-5)
@@ -385,11 +370,10 @@ def test_poisson_ridge(solver, tol):
 )
 def test_solver_equivalence(params, regression_data):
     X, y = regression_data
-    est_ref = GeneralizedLinearRegressor(random_state=2)
+    est_ref = GeneralizedLinearRegressor()
     est_ref.fit(X, y)
 
     estimator = GeneralizedLinearRegressor(**params)
-    estimator.set_params(random_state=2)
 
     estimator.fit(X, y)
 
@@ -405,16 +389,15 @@ def test_solver_equivalence(params, regression_data):
 def test_fit_dispersion(regression_data):
     X, y = regression_data
 
-    est1 = GeneralizedLinearRegressor(random_state=2)
+    est1 = GeneralizedLinearRegressor()
     est1.fit(X, y)
     assert not hasattr(est1, "dispersion_")
 
-    est2 = GeneralizedLinearRegressor(random_state=2, fit_dispersion="chisqr")
+    est2 = GeneralizedLinearRegressor(fit_dispersion="chisqr")
     est2.fit(X, y)
     assert isinstance(est2.dispersion_, float)
 
-    est3 = GeneralizedLinearRegressor(
-            random_state=2, fit_dispersion="deviance")
+    est3 = GeneralizedLinearRegressor(fit_dispersion="deviance")
     est3.fit(X, y)
     assert isinstance(est3.dispersion_, float)
 
@@ -425,7 +408,7 @@ def test_fit_dispersion(regression_data):
 def test_convergence_warning(solver, regression_data):
     X, y = regression_data
 
-    est = GeneralizedLinearRegressor(solver=solver, random_state=2,
+    est = GeneralizedLinearRegressor(solver=solver,
                                      max_iter=1, tol=1e-20)
     with pytest.warns(ConvergenceWarning):
         est.fit(X, y)

From 1862ab6811db3a6a4ad54719fcd048dc151d0a37 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Tue, 9 Jul 2019 11:55:13 -0500
Subject: [PATCH 075/269] Lint

---
 sklearn/linear_model/tests/test_glm.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/tests/test_glm.py
index a921c7e065878..1712f7b5e1d3d 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/tests/test_glm.py
@@ -349,7 +349,6 @@ def test_poisson_ridge(solver, tol):
     # b            0.03741173122
     X = np.array([[-2, -1, 1, 2], [0, 0, 1, 1]]).T
     y = np.array([0, 1, 1, 2])
-    rng = np.random.RandomState(42)
     glm = GeneralizedLinearRegressor(alpha=1,
                                      fit_intercept=True, family='poisson',
                                      link='log', tol=1e-7,

From 4154074a7367be310976a8a1bba00ed737cf9e3d Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 10 Jul 2019 14:16:05 +0200
Subject: [PATCH 076/269] Fix docstring

---
 doc/modules/linear_model.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index eba51315d2ae8..681a13cdf9d42 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -940,9 +940,9 @@ follows:
     >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2])
     GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
     >>> reg.coef_
-    array([0.24630169, 0.43373464])
+    array([0.2463..., 0.4337...])
     >>> reg.intercept_
-    -0.76383633...
+    -0.7638...
 
 
 .. topic:: Examples:

From c5d77d78d78a57b242e401f0f2f3efa572e74105 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 10 Jul 2019 14:20:42 +0200
Subject: [PATCH 077/269] Remove unused core

---
 sklearn/linear_model/_glm.py | 68 +-----------------------------------
 1 file changed, 1 insertion(+), 67 deletions(-)

diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm.py
index 4a1f7c260e649..244c781ae8f48 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm.py
@@ -10,7 +10,7 @@
 from abc import ABCMeta, abstractmethod
 import numbers
 import numpy as np
-from scipy import sparse, special
+from scipy import special
 from scipy.optimize import fmin_l_bfgs_b
 import warnings
 from ..base import BaseEstimator, RegressorMixin
@@ -54,42 +54,6 @@ def _safe_lin_pred(X, coef):
         return X @ coef
 
 
-def _safe_toarray(X):
-    """Returns a numpy array."""
-    if sparse.issparse(X):
-        return X.toarray()
-    else:
-        return np.asarray(X)
-
-
-def _safe_sandwich_dot(X, d, intercept=False):
-    """Compute sandwich product X.T @ diag(d) @ X.
-
-    With ``intercept=True``, X is treated as if a column of 1 were appended as
-    first column of X.
-    X can be sparse, d must be an ndarray. Always returns a ndarray."""
-    if sparse.issparse(X):
-        temp = (X.transpose() @ X.multiply(d[:, np.newaxis]))
-        # for older versions of numpy and scipy, temp may be a np.matrix
-        temp = _safe_toarray(temp)
-    else:
-        temp = (X.T * d) @ X
-    if intercept:
-        dim = X.shape[1] + 1
-        if sparse.issparse(X):
-            order = 'F' if sparse.isspmatrix_csc(X) else 'C'
-        else:
-            order = 'F' if X.flags['F_CONTIGUOUS'] else 'C'
-        res = np.empty((dim, dim), dtype=max(X.dtype, d.dtype), order=order)
-        res[0, 0] = d.sum()
-        res[1:, 0] = d @ X
-        res[0, 1:] = res[1:, 0]
-        res[1:, 1:] = temp
-    else:
-        res = temp
-    return res
-
-
 class Link(metaclass=ABCMeta):
     """Abstract base class for Link functions."""
 
@@ -250,7 +214,6 @@ class ExponentialDispersionModel(metaclass=ABCMeta):
     starting_mu
 
     _mu_deviance_derivative
-    _score
 
     References
     ----------
@@ -484,35 +447,6 @@ def _mu_deviance_derivative(self, coef, X, y, weights, link):
             devp = temp @ X  # sampe as X.T @ temp
         return mu, devp
 
-    def _score(self, coef, phi, X, y, weights, link):
-        r"""Compute the score function.
-
-        The score function is the derivative of the
-        log-likelihood w.r.t. `coef` (:math:`w`).
-        It is given by
-
-        .. math:
-
-            \mathbf{score}(\boldsymbol{w})
-            = \frac{\partial loglike}{\partial\boldsymbol{w}}
-            = \mathbf{X}^T \mathbf{D}
-            \boldsymbol{\Sigma}^-1 (\mathbf{y} - \boldsymbol{\mu})\,,
-
-        with :math:`\mathbf{D}=\mathrm{diag}(h'(\eta_1),\ldots)` and
-        :math:`\boldsymbol{\Sigma}=\mathrm{diag}(\mathbf{V}[y_1],\ldots)`.
-        Note: The derivative of the deviance w.r.t. coef equals -2 * score.
-        """
-        lin_pred = _safe_lin_pred(X, coef)
-        mu = link.inverse(lin_pred)
-        sigma_inv = 1/self.variance(mu, phi=phi, weights=weights)
-        d = link.inverse_derivative(lin_pred)
-        temp = sigma_inv * d * (y - mu)
-        if coef.size == X.shape[1] + 1:
-            score = np.concatenate(([temp.sum()], temp @ X))
-        else:
-            score = temp @ X  # sampe as X.T @ temp
-        return score
-
 
 class TweedieDistribution(ExponentialDispersionModel):
     r"""A class for the Tweedie distribution.

From 9ab5ac2506bd33d84c96f9f848b62cb4ee5b0853 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Sat, 13 Jul 2019 15:13:14 +0200
Subject: [PATCH 078/269] Update
 examples/linear_model/plot_poisson_regression_non_normal_loss.py

Co-Authored-By: Alexandre Gramfort <alexandre.gramfort@m4x.org>
---
 .../linear_model/plot_poisson_regression_non_normal_loss.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index b06adcb787560..471c137840e82 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -1,6 +1,6 @@
 """
 ======================================
-Poisson regression and non normal loss
+Poisson regression and non-normal loss
 ======================================
 
 This example illustrate the use linear Poisson regression

From e4d0be190239435e7dcd4d27cd616006909db98b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Sat, 13 Jul 2019 15:13:26 +0200
Subject: [PATCH 079/269] Update
 examples/linear_model/plot_poisson_regression_non_normal_loss.py

Co-Authored-By: Alexandre Gramfort <alexandre.gramfort@m4x.org>
---
 .../linear_model/plot_poisson_regression_non_normal_loss.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 471c137840e82..0537704b2cf1f 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -3,7 +3,7 @@
 Poisson regression and non-normal loss
 ======================================
 
-This example illustrate the use linear Poisson regression
+This example illustrates the use of linear Poisson regression
 on the French Motor Third-Party Liability Claims dataset [1] and compare
 it with learning models with least squared error.
 

From 6ff4d588f5adae2aed6810ddff64733c5bc595a3 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Sat, 13 Jul 2019 15:13:50 +0200
Subject: [PATCH 080/269] Update doc/modules/linear_model.rst

Co-Authored-By: Alexandre Gramfort <alexandre.gramfort@m4x.org>
---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 681a13cdf9d42..2f750b33623aa 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -929,7 +929,7 @@ Since the linear predictor :math:`Xw` can be negative and
 Poisson, Gamma and Inverse Gaussian distributions don't support negative values,
 it is convenient to apply a link function different from the identity link
 :math:`h(Xw)=Xw` that guarantees the non-negativeness, e.g. the log-link with
-:math:`h(Xw)=\exp(Xw)`.
+:math:`h(x^\top w)=\exp(x^\top w)`.
 
 Note that the feature matrix `X` should be standardized before fitting. This
 ensures that the penalty treats features equally. The estimator can be used as

From 13102d5a2b4538734f162a70979df874a31c7798 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Sat, 13 Jul 2019 15:13:59 +0200
Subject: [PATCH 081/269] Update doc/modules/linear_model.rst

Co-Authored-By: Alexandre Gramfort <alexandre.gramfort@m4x.org>
---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 2f750b33623aa..49662064827f3 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -928,7 +928,7 @@ are the following:
 Since the linear predictor :math:`Xw` can be negative and
 Poisson, Gamma and Inverse Gaussian distributions don't support negative values,
 it is convenient to apply a link function different from the identity link
-:math:`h(Xw)=Xw` that guarantees the non-negativeness, e.g. the log-link with
+:math:`h(x^\top w)=x^\top w` that guarantees the non-negativeness, e.g. the log-link with
 :math:`h(x^\top w)=\exp(x^\top w)`.
 
 Note that the feature matrix `X` should be standardized before fitting. This

From af89e5281b880b19346db625838e1623275b438a Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Sat, 13 Jul 2019 15:14:12 +0200
Subject: [PATCH 082/269] Update doc/modules/linear_model.rst

Co-Authored-By: Alexandre Gramfort <alexandre.gramfort@m4x.org>
---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 49662064827f3..c8c103288c5ab 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -898,7 +898,7 @@ ways [10]_. First, the predicted values :math:`\hat{y}` are linked to a linear
 combination of the input variables :math:`X` via an inverse link function
 :math:`h` as
 
-.. math::    \hat{y}(w, x) = h(xw) = h(w_0 + w_1 x_1 + ... + w_p x_p).
+.. math::    \hat{y}(w, x) = h(x^\top w) = h(w_0 + w_1 x_1 + ... + w_p x_p).
 
 Secondly, the squared loss function is replaced by the deviance :math:`D` of an
 exponential dispersion model (EDM) [11]_. The objective function being minimized

From ddc4b717bcece9c8d0347d46b9f4a89b681aec96 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Sat, 13 Jul 2019 15:27:22 +0200
Subject: [PATCH 083/269] Use scipy.optimize.minimize interface for LBFGS
 optimizer

---
 sklearn/linear_model/_glm.py | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm.py
index 244c781ae8f48..939249e42e4f4 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm.py
@@ -6,16 +6,16 @@
 # some parts and tricks stolen from other sklearn files.
 # License: BSD 3 clause
 
-from __future__ import division
 from abc import ABCMeta, abstractmethod
 import numbers
+
 import numpy as np
 from scipy import special
-from scipy.optimize import fmin_l_bfgs_b
-import warnings
+import scipy.optimize
+
 from ..base import BaseEstimator, RegressorMixin
-from ..exceptions import ConvergenceWarning
 from ..utils import check_array, check_X_y
+from ..utils.optimize import _check_optimize_result
 from ..utils.validation import check_is_fitted
 
 
@@ -933,21 +933,18 @@ def func(coef, X, y, weights, alpha, family, link):
                 return obj, objp
 
             args = (X, y, weights, self.alpha, family, link)
-            # TODO: refactor this once
-            # https://github.com/scikit-learn/scikit-learn/pull/14250
-            # is merged.
-            coef, loss, info = fmin_l_bfgs_b(
-                func, coef, fprime=None, args=args,
-                iprint=(self.verbose > 0) - 1, pgtol=self.tol,
-                maxiter=self.max_iter, factr=1e3)
-            if info["warnflag"] == 1:
-                warnings.warn("lbfgs failed to converge."
-                              " Increase the number of iterations.",
-                              ConvergenceWarning)
-            elif info["warnflag"] == 2:
-                warnings.warn("lbfgs failed for the reason: {0}"
-                              .format(info["task"]))
-            self.n_iter_ = info['nit']
+
+            opt_res = scipy.optimize.minimize(
+                func, coef, method="L-BFGS-B", jac=True,
+                options={
+                    "maxiter": self.max_iter,
+                    "iprint": (self.verbose > 0) - 1,
+                    "gtol": self.tol,
+                    "ftol": 1e3*np.finfo(float).eps,
+                },
+                args=args)
+            self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
+            coef = opt_res.x
 
         #######################################################################
         # 5. postprocessing                                                   #

From 426ae1d711c27d14a7dd7d22763fee5d9d20c1f5 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 14 Jul 2019 18:20:09 +0200
Subject: [PATCH 084/269] EXA wording and score in
 plot_tweedie_regression_insurance_claims.html

---
 ...lot_tweedie_regression_insurance_claims.py | 137 ++++++++++--------
 1 file changed, 78 insertions(+), 59 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 063d12e6e291b..00111b811f923 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -3,7 +3,7 @@
 Tweedie regression on insurance claims
 ======================================
 
-This example illustrate the use Poisson, Gamma and Tweedie regression
+This example illustrates the use of Poisson, Gamma and Tweedie regression
 on the French Motor Third-Party Liability Claims dataset, and is inspired
 by an R tutorial [1].
 
@@ -13,9 +13,10 @@
 which are:
 
 1. Model the number of claims with a Poisson distribution, the average
-   claim amount as a Gamma distribution and multiply the predictions, to get
-   the total claim amount.
-2. Model total claim amount directly, typically with a Tweedie distribution.
+   claim amount as a Gamma distribution and multiply the predictions of both in
+   order to get the total claim amount.
+2. Model total claim amount directly, typically with a Tweedie distribution of
+   Tweedie power :math:`p \\in (1, 2)`.
 
 In this example we will illustrate both approaches. We start by defining a few
 helper functions for loading the data and visualizing results.
@@ -49,7 +50,7 @@
 
 
 def load_mtpl2(n_samples=100000):
-    """Fetcher for French Motor Third-Party Liability Claims dataset
+    """Fetch the French Motor Third-Party Liability Claims dataset.
 
     Parameters
     ----------
@@ -81,24 +82,27 @@ def load_mtpl2(n_samples=100000):
     return df.iloc[:n_samples]
 
 
-def plot_obs_pred(df, feature, observed, y_predicted, weight, y_label=None,
+def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
                   title=None, kind_weight=None, ax=None):
     """Plot observed and predicted - aggregated per feature level.
 
     Parameters
     ----------
-    df : DataFrame with at least one column named feature
+    df : DataFrame with at least three columns named feature, weight and
+         observed
+    feature: str
+        a column name of df for the feature to be plotted
+    weight : str
+        column name of df with the values of weights or exposure
     observed : str
-        a column name of the observed target
+        a column name of df with the observed target
     predicted : frame
         a dataframe, with the same index as df, with the predicted target
-    weight : str
-        column name with the values of weights/exposure
     """
     # aggregate observed and predicted variables by feature level
     df_ = df.loc[:, [feature, weight]].copy()
     df_["observed"] = df[observed] * df[weight]
-    df_["predicted"] = y_predicted * df[weight]
+    df_["predicted"] = predicted * df[weight]
     df_ = (
         df_.groupby([feature])[weight, "observed", "predicted"]
         .sum()
@@ -126,9 +130,10 @@ def plot_obs_pred(df, feature, observed, y_predicted, weight, y_label=None,
 # 1. Loading datasets and pre-processing
 # --------------------------------------
 #
-# We construct the freMTPL2 dataset by joining the  freMTPL2freq table,
-# containing the number of claims (``ClaimNb``) with the freMTPL2sev table
-# containing the claim amount (``ClaimAmount``) for the same user ids.
+# We construct the freMTPL2 dataset by joining the freMTPL2freq table,
+# containing the number of claims (``ClaimNb``), with the freMTPL2sev table,
+# containing the claim amount (``ClaimAmount``) for the same policy ids
+# (``IDpol``).
 
 df = load_mtpl2(n_samples=100000)
 
@@ -206,14 +211,14 @@ def score_estimator(
         y, _weights = df[target], df[weights]
 
         for score_label, metric in [
-            ("D² explaned", None),
+            ("D² explained", None),
             ("mean deviance", partial(mean_deviance, estimator)),
             ("mean abs. error", mean_absolute_error),
         ]:
             if estimator.__class__.__name__ == "ClaimProdEstimator":
-                # ClaimProdEstimator is the product of the frequency and
-                # severity models, together with a denormalized by the exposure
-                # values. It does not fully follow the scikit-learn API and we
+                # ClaimProdEstimator is the product of frequency and severity
+                # models, denormalized by the exposure values.
+                # It does not fully follow the scikit-learn API and we
                 # must handle it separately.
                 y_pred = estimator.predict(X, exposure=df.Exposure.values)
             else:
@@ -253,50 +258,50 @@ def score_estimator(
 #
 # We can visually compare observed and predicted values, aggregated by
 # the drivers age (``DrivAge``), vehicle age (``VehAge``) and the insurance
-# bonus/penalty (``BonusMalus``),
+# bonus/malus (``BonusMalus``).
 
 fig, ax = plt.subplots(2, 2, figsize=(16, 8))
 fig.subplots_adjust(hspace=0.3, wspace=0.2)
 
 plot_obs_pred(
-    df_train,
-    "DrivAge",
-    "Frequency",
-    glm_freq.predict(X_train),
+    df=df_train,
+    feature="DrivAge",
     weight="Exposure",
+    observed="Frequency",
+    predicted=glm_freq.predict(X_train),
     y_label="Claim Frequency",
     title="train data",
     ax=ax[0, 0],
 )
 
 plot_obs_pred(
-    df_test,
-    "DrivAge",
-    "Frequency",
-    glm_freq.predict(X_test),
+    df=df_test,
+    feature="DrivAge",
     weight="Exposure",
+    observed="Frequency",
+    predicted=glm_freq.predict(X_test),
     y_label="Claim Frequency",
     title="test data",
     ax=ax[0, 1],
 )
 
 plot_obs_pred(
-    df_test,
-    "VehAge",
-    "Frequency",
-    glm_freq.predict(X_test),
+    df=df_test,
+    feature="VehAge",
     weight="Exposure",
+    observed="Frequency",
+    predicted=glm_freq.predict(X_test),
     y_label="Claim Frequency",
     title="test data",
     ax=ax[1, 0],
 )
 
 plot_obs_pred(
-    df_test,
-    "BonusMalus",
-    "Frequency",
-    glm_freq.predict(X_test),
+    df=df_test,
+    feature="BonusMalus",
     weight="Exposure",
+    observed="Frequency",
+    predicted=glm_freq.predict(X_test),
     y_label="Claim Frequency",
     title="test data",
     ax=ax[1, 1],
@@ -308,12 +313,13 @@ def score_estimator(
 # 3. Severity model -  Gamma Distribution
 # ---------------------------------------
 # The mean claim amount or severity (`AvgClaimAmount`) can be empirically
-# shown to follow a Gamma distribution. We fit a GLM model for the severity
-# with the same features as the frequency model.
+# shown to follow approximately a Gamma distribution. We fit a GLM model for
+# the severity with the same features as the frequency model.
 #
 # Note:
-# - We filter out ``ClaimAmount == 0``` as the Gamma distribution as support
-# on :math:`(0, \infty)` not :math:`[0, \infty)`.
+#
+# - We filter out ``ClaimAmount == 0`` as the Gamma distribution has support
+#   on :math:`(0, \infty)`, not :math:`[0, \infty)`.
 # - We use ``ClaimNb`` as sample weights.
 
 mask_train = df_train["ClaimAmount"] > 0
@@ -341,19 +347,20 @@ def score_estimator(
 
 ##############################################################################
 #
-# Note that the resulting model is conditional on having at least one claim,
-# and cannot be used to predict the average claim amount in general,
+# Note that the resulting model is the average claim amount per claim. As such,
+# it is conditional on having at least one claim, and cannot be used to predict
+# the average claim amount per policy in general.
 
 print(
-    "Mean AvgClaim Amount:               %.2f "
+    "Mean AvgClaim Amount per policy:              %.2f "
     % df_train.AvgClaimAmount.mean()
 )
 print(
-    "Mean AvgClaim Amount | NbClaim > 0: %.2f"
+    "Mean AvgClaim Amount | NbClaim > 0:           %.2f"
     % df_train.AvgClaimAmount[df_train.AvgClaimAmount > 0].mean()
 )
 print(
-    "Predicted Mean AvgClaim Amount:     %.2f"
+    "Predicted Mean AvgClaim Amount | NbClaim > 0: %.2f"
     % glm_sev.predict(X_train).mean()
 )
 
@@ -361,28 +368,28 @@ def score_estimator(
 ##############################################################################
 #
 # We can visually compare observed and predicted values, aggregated for
-# the drivers age (``Driv Age``),
+# the drivers age (``DrivAge``).
 
 fig, ax = plt.subplots(1, 2, figsize=(16, 4))
 
 # plot DivAge
 plot_obs_pred(
-    df_train.loc[mask_train],
-    "DrivAge",
-    "AvgClaimAmount",
-    glm_sev.predict(X_train[mask_train.values]),
+    df=df_train.loc[mask_train],
+    feature="DrivAge",
     weight="Exposure",
+    observed="AvgClaimAmount",
+    predicted=glm_sev.predict(X_train[mask_train.values]),
     y_label="Average Claim Severity",
     title="train data",
     ax=ax[0],
 )
 
 plot_obs_pred(
-    df_test.loc[mask_test],
-    "DrivAge",
-    "AvgClaimAmount",
-    glm_sev.predict(X_test[mask_test.values]),
+    df=df_test.loc[mask_test],
+    feature="DrivAge",
     weight="Exposure",
+    observed="AvgClaimAmount",
+    predicted=glm_sev.predict(X_test[mask_test.values]),
     y_label="Average Claim Severity",
     title="test data",
     ax=ax[1],
@@ -391,31 +398,41 @@ def score_estimator(
 
 ##############################################################################
 #
-# 3. Total Claims Amount -- Compound Poisson distribution
+# 4. Total Claims Amount -- Compound Poisson distribution
 # -------------------------------------------------------
 #
 # As mentionned in the introduction, the total claim amount can be modeled
-# either as the product of the frequency model by the severity model.
+# either as the product of the frequency model by the severity model,
 
 
 class ClaimProdEstimator:
-    """Total claim amount estimator
+    """Total claim amount estimator.
 
     Computed as the product of the frequency model by the serverity model,
-    denormalized by exposure.
+    denormalized by exposure. Use Tweedie deviance with `p=1.5`.
     """
 
     def __init__(self, est_freq, est_sev):
         self.est_freq = est_freq
         self.est_sev = est_sev
+        self._family_instance = TweedieDistribution(power=1.5)
 
     def predict(self, X, exposure):
-        """Predict the total claim amount
+        """Predict the total claim amount.
 
         The predict method is not compatible with the scikit-learn API.
         """
         return exposure * self.est_freq.predict(X) * self.est_sev.predict(X)
 
+    def score(self, X, y, sample_weight=None):
+        """Compute D², the percentage of deviance explained."""
+        mu = self.predict(X, exposure=sample_weight)
+        dev = self._family_instance.deviance(y, mu, weights=sample_weight)
+        y_mean = np.average(y, weights=sample_weight)
+        dev_null = self._family_instance.deviance(y, y_mean,
+                                                  weights=sample_weight)
+        return 1. - dev / dev_null
+
 
 est_prod = ClaimProdEstimator(glm_freq, glm_sev)
 
@@ -476,7 +493,9 @@ def predict(self, X, exposure):
 # model than when using separate models for frequency and severity.
 #
 # We can additionally validate these models by comparing observed and predicted
-# total claim amount over the test and train subsets.
+# total claim amount over the test and train subsets. We see that in our case
+# the frequency-severity model underestimates the total claim amount, whereas
+# the Tweedie model overestimates.
 
 res = []
 for subset_label, X, df in [

From a4043847d0dfde68bb09dc0d9de3ab10fc07d41b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Mon, 15 Jul 2019 14:15:26 +0200
Subject: [PATCH 085/269] Address review comments

---
 .../plot_tweedie_regression_insurance_claims.py   | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 00111b811f923..1c8dd42df336d 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -46,7 +46,7 @@
 from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
 from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
 
-from sklearn.metrics import mean_absolute_error
+from sklearn.metrics import mean_absolute_error, mean_squared_error
 
 
 def load_mtpl2(n_samples=100000):
@@ -83,7 +83,7 @@ def load_mtpl2(n_samples=100000):
 
 
 def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
-                  title=None, kind_weight=None, ax=None):
+                  title=None, ax=None):
     """Plot observed and predicted - aggregated per feature level.
 
     Parameters
@@ -141,9 +141,11 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 # requires a strictly positive target values.
 df.loc[(df.ClaimAmount == 0) & (df.ClaimNb >= 1), "ClaimNb"] = 0
 
-# correct for unreasonable observations (that might be data error)
+# Correct for unreasonable observations (that might be data error)
+# and a few exceptionally large claim amounts
 df["ClaimNb"] = df["ClaimNb"].clip(upper=4)
 df["Exposure"] = df["Exposure"].clip(upper=1)
+df["ClaimAmount"] = df["ClaimAmount"].clip(upper=200000)
 
 column_trans = ColumnTransformer(
     [
@@ -188,7 +190,9 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 
 df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=2)
 
-glm_freq = GeneralizedLinearRegressor(family="poisson", alpha=0)
+# Some of the features are colinear, we use a weak penalization to avoid
+# numerical issues.
+glm_freq = GeneralizedLinearRegressor(family="poisson", alpha=1e-2)
 glm_freq.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
 
 
@@ -214,6 +218,7 @@ def score_estimator(
             ("D² explained", None),
             ("mean deviance", partial(mean_deviance, estimator)),
             ("mean abs. error", mean_absolute_error),
+            ("mean squared error", mean_squared_error),
         ]:
             if estimator.__class__.__name__ == "ClaimProdEstimator":
                 # ClaimProdEstimator is the product of frequency and severity
@@ -325,7 +330,7 @@ def score_estimator(
 mask_train = df_train["ClaimAmount"] > 0
 mask_test = df_test["ClaimAmount"] > 0
 
-glm_sev = GeneralizedLinearRegressor(family="gamma", alpha=1)
+glm_sev = GeneralizedLinearRegressor(family="gamma")
 
 glm_sev.fit(
     X_train[mask_train.values],

From 65796a3d3e74aff111c187b6be20e0e9287e0797 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 16 Jul 2019 16:41:02 +0200
Subject: [PATCH 086/269] Review comments on the documentation

---
 doc/modules/linear_model.rst | 50 +++++++++++++++++-------------------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index c8c103288c5ab..b6f7c2b82c1f5 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -893,27 +893,23 @@ to warm-starting (see :term:`Glossary <warm_start>`).
 Generalized Linear Regression
 =============================
 
-:class:`GeneralizedLinearRegressor` generalizes the :ref:`elastic_net` in two
-ways [10]_. First, the predicted values :math:`\hat{y}` are linked to a linear
+:class:`GeneralizedLinearRegressor` generalizes linear models in two ways
+[10]_. First, the predicted values :math:`\hat{y}` are linked to a linear
 combination of the input variables :math:`X` via an inverse link function
 :math:`h` as
 
 .. math::    \hat{y}(w, x) = h(x^\top w) = h(w_0 + w_1 x_1 + ... + w_p x_p).
 
 Secondly, the squared loss function is replaced by the deviance :math:`D` of an
-exponential dispersion model (EDM) [11]_. The objective function being minimized
-becomes
+exponential dispersion model (EDM) [11]_. The objective function being
+minimized becomes
 
-.. math::    \frac{1}{2\mathrm{sum}(s)}D(y, \hat{y}; s) + \alpha \rho ||P_1w||_1
-            +\frac{\alpha(1-\rho)}{2} w^T P_2 w
+.. math::    \frac{1}{2 \sum s_i}D(y, \hat{y}; s) +\frac{\alpha}{2} ||w||_2
 
-with sample weights :math:`s`.
-:math:`P_1` (diagonal matrix) can be used to exclude some of the coefficients in
-the L1 penalty, the matrix :math:`P_2` (must be positive semi-definite) allows
-for a more versatile L2 penalty.
+with sample weights :math:`s`, and L2 regularization penalty :math:`\alpha`.
 
-Use cases, where a loss different from the squared loss might be appropriate,
-are the following:
+In the following use cases, a loss different from the squared loss might be
+appropriate,
 
   * If the target values :math:`y` are counts (non-negative integer valued) or
     frequencies (non-negative), you might use a Poisson deviance with log-link.
@@ -928,10 +924,10 @@ are the following:
 Since the linear predictor :math:`Xw` can be negative and
 Poisson, Gamma and Inverse Gaussian distributions don't support negative values,
 it is convenient to apply a link function different from the identity link
-:math:`h(x^\top w)=x^\top w` that guarantees the non-negativeness, e.g. the log-link with
-:math:`h(x^\top w)=\exp(x^\top w)`.
+:math:`h(x^\top w)=x^\top w` that guarantees the non-negativeness, e.g. the
+log-link with :math:`h(x^\top w)=\exp(x^\top w)`.
 
-Note that the feature matrix `X` should be standardized before fitting. This
+Note that the feature matrix ``X`` should be standardized before fitting. This
 ensures that the penalty treats features equally. The estimator can be used as
 follows:
 
@@ -947,7 +943,8 @@ follows:
 
 .. topic:: Examples:
 
-  * :ref:`sphx_glr_auto_examples_linear_model_plot_poisson_spline_regression.py`
+  * :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py`
+  * :ref:`sphx_glr_auto_examples_linear_model_plot_poisson_regression_non_normal_loss.py`
 
 Mathematical formulation
 ------------------------
@@ -967,12 +964,9 @@ Note that the first assumption implies
 function :math:`v(\mu)`. Specifying a particular distribution of an EDM is the
 same as specifying a unit variance function (they are one-to-one).
 
-Including penalties helps to avoid overfitting or, in case of L1 penalty, to
-obtain sparse solutions. But there are also other motivations to include them,
-e.g. accounting for the dependence structure of :math:`y`.
-
-The objective function, which is independent of :math:`\phi`, is minimized with
-respect to the coefficients :math:`w`.
+The objective function (the penalized negative log likelihood) is
+independent of :math:`\phi` and is minimized with respect to the
+coefficients :math:`w`.
 
 The deviance is defined by the log of the :math:`\mathrm{EDM}(\mu, \phi)`
 likelihood as
@@ -1005,12 +999,16 @@ Two remarks:
 
 .. topic:: References:
 
-    .. [10] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models, Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
+    .. [10] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models,
+       Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
 
-    .. [11] Jørgensen, B. (1992). The theory of exponential dispersion models and analysis of deviance. Monografias de matemática, no. 51.
-           See also `Exponential dispersion model. <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
+    .. [11] Jørgensen, B. (1992). The theory of exponential dispersion models
+       and analysis of deviance. Monografias de matemática, no. 51.  See also
+       `Exponential dispersion model.
+       <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
 
-    .. [12] Gneiting, T. (2010). `Making and Evaluating Point Forecasts. <https://arxiv.org/pdf/0912.0902.pdf>`_
+    .. [12] Gneiting, T. (2010). `Making and Evaluating Point Forecasts.
+       <https://arxiv.org/pdf/0912.0902.pdf>`_
 
 Stochastic Gradient Descent - SGD
 =================================

From e44afe7b6c9d2c7169ea2d61ecc58c389e67c6de Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 16 Jul 2019 18:08:30 +0200
Subject: [PATCH 087/269] Split the implementation into several files

---
 sklearn/linear_model/__init__.py              |   3 +-
 sklearn/linear_model/_glm/__init__.py         |   5 +
 sklearn/linear_model/_glm/distribution.py     | 443 ++++++++++++++
 sklearn/linear_model/{_glm.py => _glm/glm.py} | 576 +-----------------
 sklearn/linear_model/_glm/link.py             | 135 ++++
 sklearn/linear_model/_glm/tests/__init__.py   |   1 +
 .../_glm/tests/test_distribution.py           |  61 ++
 .../linear_model/{ => _glm}/tests/test_glm.py |  78 +--
 sklearn/linear_model/_glm/tests/test_link.py  |  38 ++
 9 files changed, 702 insertions(+), 638 deletions(-)
 create mode 100644 sklearn/linear_model/_glm/__init__.py
 create mode 100644 sklearn/linear_model/_glm/distribution.py
 rename sklearn/linear_model/{_glm.py => _glm/glm.py} (62%)
 create mode 100644 sklearn/linear_model/_glm/link.py
 create mode 100644 sklearn/linear_model/_glm/tests/__init__.py
 create mode 100644 sklearn/linear_model/_glm/tests/test_distribution.py
 rename sklearn/linear_model/{ => _glm}/tests/test_glm.py (81%)
 create mode 100644 sklearn/linear_model/_glm/tests/test_link.py

diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index 121418f901a1a..1c0df55d27c90 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -18,8 +18,7 @@
                                  lasso_path, enet_path, MultiTaskLasso,
                                  MultiTaskElasticNet, MultiTaskElasticNetCV,
                                  MultiTaskLassoCV)
-from ._glm import (TweedieDistribution,
-                   GeneralizedLinearRegressor, PoissonRegressor)
+from ._glm.glm import (GeneralizedLinearRegressor, PoissonRegressor)
 from .huber import HuberRegressor
 from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
 from .stochastic_gradient import SGDClassifier, SGDRegressor
diff --git a/sklearn/linear_model/_glm/__init__.py b/sklearn/linear_model/_glm/__init__.py
new file mode 100644
index 0000000000000..9a88e5604de8a
--- /dev/null
+++ b/sklearn/linear_model/_glm/__init__.py
@@ -0,0 +1,5 @@
+# License: BSD 3 clause
+
+from . import distribution
+
+__all__ = ['distribution']
diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
new file mode 100644
index 0000000000000..2dc720124b48b
--- /dev/null
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -0,0 +1,443 @@
+"""
+Distribution functions used in GLM
+"""
+
+# Author: Christian Lorentzen <lorentzen.ch@googlemail.com>
+# License: BSD 3 clause
+
+from abc import ABCMeta, abstractmethod
+import numbers
+
+import numpy as np
+from scipy.special import xlogy
+
+
+def _safe_lin_pred(X, coef):
+    """Compute the linear predictor taking care if intercept is present."""
+    if coef.size == X.shape[1] + 1:
+        return X @ coef[1:] + coef[0]
+    else:
+        return X @ coef
+
+
+class ExponentialDispersionModel(metaclass=ABCMeta):
+    r"""Base class for reproductive Exponential Dispersion Models (EDM).
+
+    The pdf of :math:`Y\sim \mathrm{EDM}(\mu, \phi)` is given by
+
+    .. math:: p(y| \theta, \phi) = c(y, \phi)
+        \exp\left(\frac{\theta y-A(\theta)}{\phi}\right)
+        = \tilde{c}(y, \phi)
+            \exp\left(-\frac{d(y, \mu)}{2\phi}\right)
+
+    with mean :math:`\mathrm{E}[Y] = A'(\theta) = \mu`,
+    variance :math:`\mathrm{Var}[Y] = \phi \cdot v(\mu)`,
+    unit variance :math:`v(\mu)` and
+    unit deviance :math:`d(y,\mu)`.
+
+    Attributes
+    ----------
+    lower_bound
+    upper_bound
+    include_lower_bound
+    include_upper_bound
+
+    Methods
+    -------
+    in_y_range
+    unit_variance
+    unit_variance_derivative
+    variance
+    variance_derivative
+    unit_deviance
+    unit_deviance_derivative
+    deviance
+    deviance_derivative
+    starting_mu
+
+    _mu_deviance_derivative
+
+    References
+    ----------
+
+    https://en.wikipedia.org/wiki/Exponential_dispersion_model.
+    """
+    @property
+    def lower_bound(self):
+        """Get the lower bound of values for Y~EDM."""
+        return self._lower_bound
+
+    @property
+    def upper_bound(self):
+        """Get the upper bound of values for Y~EDM."""
+        return self._upper_bound
+
+    @property
+    def include_lower_bound(self):
+        """Get True if lower bound for y is included: y >= lower_bound."""
+        return self._include_lower_bound
+
+    @property
+    def include_upper_bound(self):
+        """Get True if upper bound for y is included: y <= upper_bound."""
+        return self._include_upper_bound
+
+    def in_y_range(self, x):
+        """Returns ``True`` if x is in the valid range of Y~EDM.
+
+        Parameters
+        ----------
+        x : array, shape (n_samples,)
+            Target values.
+        """
+        if self.include_lower_bound:
+            if self.include_upper_bound:
+                return np.logical_and(np.greater_equal(x, self.lower_bound),
+                                      np.less_equal(x, self.upper_bound))
+            else:
+                return np.logical_and(np.greater_equal(x, self.lower_bound),
+                                      np.less(x, self.upper_bound))
+        else:
+            if self.include_upper_bound:
+                return np.logical_and(np.greater(x, self.lower_bound),
+                                      np.less_equal(x, self.upper_bound))
+            else:
+                return np.logical_and(np.greater(x, self.lower_bound),
+                                      np.less(x, self.upper_bound))
+
+    @abstractmethod
+    def unit_variance(self, mu):
+        r"""Compute the unit variance function.
+
+        The unit variance :math:`v(\mu)` determines the variance as
+        a function of the mean :math:`\mu` by
+        :math:`\mathrm{Var}[Y_i] = \phi/s_i*v(\mu_i)`.
+        It can also be derived from the unit deviance :math:`d(y,\mu)` as
+
+        .. math:: v(\mu) = \frac{2}{\frac{\partial^2 d(y,\mu)}{
+            \partial\mu^2}}\big|_{y=\mu}
+
+        See also :func:`variance`.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
+        """
+        pass
+
+    @abstractmethod
+    def unit_variance_derivative(self, mu):
+        r"""Compute the derivative of the unit variance w.r.t. mu.
+
+        Return :math:`v'(\mu)`.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Target values.
+        """
+        pass
+
+    def variance(self, mu, phi=1, weights=1):
+        r"""Compute the variance function.
+
+        The variance of :math:`Y_i \sim \mathrm{EDM}(\mu_i,\phi/s_i)` is
+        :math:`\mathrm{Var}[Y_i]=\phi/s_i*v(\mu_i)`,
+        with unit variance :math:`v(\mu)` and weights :math:`s_i`.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
+
+        phi : float (default=1)
+            Dispersion parameter.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
+        """
+        return phi/weights * self.unit_variance(mu)
+
+    def variance_derivative(self, mu, phi=1, weights=1):
+        r"""Compute the derivative of the variance w.r.t. mu.
+
+        Returns
+        :math:`\frac{\partial}{\partial\mu}\mathrm{Var}[Y_i]
+        =phi/s_i*v'(\mu_i)`, with unit variance :math:`v(\mu)`
+        and weights :math:`s_i`.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
+
+        phi : float (default=1)
+            Dispersion parameter.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
+        """
+        return phi/weights * self.unit_variance_derivative(mu)
+
+    @abstractmethod
+    def unit_deviance(self, y, mu):
+        r"""Compute the unit deviance.
+
+        The unit_deviance :math:`d(y,\mu)` can be defined by the
+        log-likelihood as
+        :math:`d(y,\mu) = -2\phi\cdot
+        \left(loglike(y,\mu,\phi) - loglike(y,y,\phi)\right).`
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        mu : array, shape (n_samples,)
+            Predicted mean.
+        """
+        pass
+
+    def unit_deviance_derivative(self, y, mu):
+        r"""Compute the derivative of the unit deviance w.r.t. mu.
+
+        The derivative of the unit deviance is given by
+        :math:`\frac{\partial}{\partial\mu}d(y,\mu) = -2\frac{y-\mu}{v(\mu)}`
+        with unit variance :math:`v(\mu)`.
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        mu : array, shape (n_samples,)
+            Predicted mean.
+        """
+        return -2 * (y - mu) / self.unit_variance(mu)
+
+    def deviance(self, y, mu, weights=1):
+        r"""Compute the deviance.
+
+        The deviance is a weighted sum of the per sample unit deviances,
+        :math:`D = \sum_i s_i \cdot d(y_i, \mu_i)`
+        with weights :math:`s_i` and unit deviance :math:`d(y,\mu)`.
+        In terms of the log-likelihood it is :math:`D = -2\phi\cdot
+        \left(loglike(y,\mu,\frac{phi}{s})
+        - loglike(y,y,\frac{phi}{s})\right)`.
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        mu : array, shape (n_samples,)
+            Predicted mean.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
+        """
+        return np.sum(weights * self.unit_deviance(y, mu))
+
+    def deviance_derivative(self, y, mu, weights=1):
+        """Compute the derivative of the deviance w.r.t. mu.
+
+        It gives :math:`\\frac{\\partial}{\\partial\\mu} D(y, \\mu; weights)`.
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        mu : array, shape (n_samples,)
+            Predicted mean.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
+        """
+        return weights * self.unit_deviance_derivative(y, mu)
+
+    def starting_mu(self, y, weights=1, ind_weight=0.5):
+        """Set starting values for the mean mu.
+
+        These may be good starting points for the (unpenalized) IRLS solver.
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        weights : array, shape (n_samples,) (default=1)
+            Weights or exposure to which variance is inverse proportional.
+
+        ind_weight : float (default=0.5)
+            Must be between 0 and 1. Specifies how much weight is given to the
+            individual observations instead of the mean of y.
+        """
+        return (ind_weight * y +
+                (1. - ind_weight) * np.average(y, weights=weights))
+
+    def _mu_deviance_derivative(self, coef, X, y, weights, link):
+        """Compute mu and the derivative of the deviance w.r.t coef."""
+        lin_pred = _safe_lin_pred(X, coef)
+        mu = link.inverse(lin_pred)
+        d1 = link.inverse_derivative(lin_pred)
+        temp = d1 * self.deviance_derivative(y, mu, weights)
+        if coef.size == X.shape[1] + 1:
+            devp = np.concatenate(([temp.sum()], temp @ X))
+        else:
+            devp = temp @ X  # sampe as X.T @ temp
+        return mu, devp
+
+
+class TweedieDistribution(ExponentialDispersionModel):
+    r"""A class for the Tweedie distribution.
+
+    A Tweedie distribution with mean :math:`\mu=\mathrm{E}[Y]` is uniquely
+    defined by it's mean-variance relationship
+    :math:`\mathrm{Var}[Y] \propto \mu^power`.
+
+    Special cases are:
+
+    ===== ================
+    Power Distribution
+    ===== ================
+    0     Normal
+    1     Poisson
+    (0,1) Compound Poisson
+    2     Gamma
+    3     Inverse Gaussian
+
+    Parameters
+    ----------
+    power : float (default=0)
+            The variance power of the `unit_variance`
+            :math:`v(\mu) = \mu^{power}`.
+            For ``0<power<1``, no distribution exists.
+    """
+    def __init__(self, power=0):
+        # validate power and set _upper_bound, _include_upper_bound attrs
+        self.power = power
+
+    @property
+    def power(self):
+        return self._power
+
+    @power.setter
+    def power(self, power):
+        if not isinstance(power, numbers.Real):
+            raise TypeError('power must be a real number, input was {0}'
+                            .format(power))
+
+        self._upper_bound = np.Inf
+        self._include_upper_bound = False
+        if power < 0:
+            # Extreme Stable
+            self._lower_bound = -np.Inf
+            self._include_lower_bound = False
+        elif power == 0:
+            # NormalDistribution
+            self._lower_bound = -np.Inf
+            self._include_lower_bound = False
+        elif (power > 0) and (power < 1):
+            raise ValueError('For 0<power<1, no distribution exists.')
+        elif power == 1:
+            # PoissonDistribution
+            self._lower_bound = 0
+            self._include_lower_bound = True
+        elif (power > 1) and (power < 2):
+            # Compound Poisson
+            self._lower_bound = 0
+            self._include_lower_bound = True
+        elif power == 2:
+            # GammaDistribution
+            self._lower_bound = 0
+            self._include_lower_bound = False
+        elif (power > 2) and (power < 3):
+            # Positive Stable
+            self._lower_bound = 0
+            self._include_lower_bound = False
+        elif power == 3:
+            # InverseGaussianDistribution
+            self._lower_bound = 0
+            self._include_lower_bound = False
+        elif power > 3:
+            # Positive Stable
+            self._lower_bound = 0
+            self._include_lower_bound = False
+        else:  # pragma: no cover
+            # this branch should be unreachable.
+            raise ValueError
+
+        self._power = power
+
+    def unit_variance(self, mu):
+        """Compute the unit variance of a Tweedie distribution v(mu)=mu**power.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
+        """
+        return np.power(mu, self.power)
+
+    def unit_variance_derivative(self, mu):
+        """Compute the derivative of the unit variance of a Tweedie
+        distribution v(mu)=power*mu**(power-1).
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Predicted mean.
+        """
+        return self.power * np.power(mu, self.power - 1)
+
+    def unit_deviance(self, y, mu):
+        p = self.power
+        if p == 0:
+            # NormalDistribution
+            return (y - mu)**2
+        if p == 1:
+            # PoissonDistribution
+            # 2 * (y*log(y/mu) - y + mu), with y*log(y/mu)=0 if y=0
+            return 2 * (xlogy(y, y/mu) - y + mu)
+        elif p == 2:
+            # GammaDistribution
+            return 2 * (np.log(mu/y) + y/mu - 1)
+        else:
+            # return 2 * (np.maximum(y,0)**(2-p)/((1-p)*(2-p))
+            #    - y*mu**(1-p)/(1-p) + mu**(2-p)/(2-p))
+            return 2 * (np.power(np.maximum(y, 0), 2-p)/((1-p)*(2-p)) -
+                        y*np.power(mu, 1-p)/(1-p) + np.power(mu, 2-p)/(2-p))
+
+
+class NormalDistribution(TweedieDistribution):
+    """Class for the Normal (aka Gaussian) distribution"""
+    def __init__(self):
+        super(NormalDistribution, self).__init__(power=0)
+
+
+class PoissonDistribution(TweedieDistribution):
+    """Class for the scaled Poisson distribution"""
+    def __init__(self):
+        super(PoissonDistribution, self).__init__(power=1)
+
+
+class GammaDistribution(TweedieDistribution):
+    """Class for the Gamma distribution"""
+    def __init__(self):
+        super(GammaDistribution, self).__init__(power=2)
+
+
+class InverseGaussianDistribution(TweedieDistribution):
+    """Class for the scaled InverseGaussianDistribution distribution"""
+    def __init__(self):
+        super(InverseGaussianDistribution, self).__init__(power=3)
+
+
+EDM_DISTRIBUTIONS = {
+    'normal': NormalDistribution,
+    'poisson': PoissonDistribution,
+    'gamma': GammaDistribution,
+    'inverse.gaussian': InverseGaussianDistribution,
+}
diff --git a/sklearn/linear_model/_glm.py b/sklearn/linear_model/_glm/glm.py
similarity index 62%
rename from sklearn/linear_model/_glm.py
rename to sklearn/linear_model/_glm/glm.py
index 939249e42e4f4..bff8ea43fd550 100644
--- a/sklearn/linear_model/_glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -6,17 +6,26 @@
 # some parts and tricks stolen from other sklearn files.
 # License: BSD 3 clause
 
-from abc import ABCMeta, abstractmethod
 import numbers
 
 import numpy as np
-from scipy import special
 import scipy.optimize
 
-from ..base import BaseEstimator, RegressorMixin
-from ..utils import check_array, check_X_y
-from ..utils.optimize import _check_optimize_result
-from ..utils.validation import check_is_fitted
+from ...base import BaseEstimator, RegressorMixin
+from ...utils import check_array, check_X_y
+from ...utils.optimize import _check_optimize_result
+from ...utils.validation import check_is_fitted
+from .distribution import (
+        ExponentialDispersionModel,
+        TweedieDistribution,
+        EDM_DISTRIBUTIONS
+)
+from .link import (
+        Link,
+        IdentityLink,
+        LogLink,
+        LogitLink,
+)
 
 
 def _check_weights(sample_weight, n_samples):
@@ -46,561 +55,6 @@ def _check_weights(sample_weight, n_samples):
     return weights
 
 
-def _safe_lin_pred(X, coef):
-    """Compute the linear predictor taking care if intercept is present."""
-    if coef.size == X.shape[1] + 1:
-        return X @ coef[1:] + coef[0]
-    else:
-        return X @ coef
-
-
-class Link(metaclass=ABCMeta):
-    """Abstract base class for Link functions."""
-
-    @abstractmethod
-    def link(self, mu):
-        """Compute the link function g(mu).
-
-        The link function links the mean mu=E[Y] to the so called linear
-        predictor (X*w), i.e. g(mu) = linear predictor.
-
-        Parameters
-        ----------
-        mu : array, shape (n_samples,)
-            Usually the (predicted) mean.
-        """
-        pass
-
-    @abstractmethod
-    def derivative(self, mu):
-        """Compute the derivative of the link g'(mu).
-
-        Parameters
-        ----------
-        mu : array, shape (n_samples,)
-            Usually the (predicted) mean.
-        """
-        pass
-
-    @abstractmethod
-    def inverse(self, lin_pred):
-        """Compute the inverse link function h(lin_pred).
-
-        Gives the inverse relationship between linear predictor and the mean
-        mu=E[Y], i.e. h(linear predictor) = mu.
-
-        Parameters
-        ----------
-        lin_pred : array, shape (n_samples,)
-            Usually the (fitted) linear predictor.
-        """
-        pass
-
-    @abstractmethod
-    def inverse_derivative(self, lin_pred):
-        """Compute the derivative of the inverse link function h'(lin_pred).
-
-        Parameters
-        ----------
-        lin_pred : array, shape (n_samples,)
-            Usually the (fitted) linear predictor.
-        """
-        pass
-
-    @abstractmethod
-    def inverse_derivative2(self, lin_pred):
-        """Compute 2nd derivative of the inverse link function h''(lin_pred).
-
-        Parameters
-        ----------
-        lin_pred : array, shape (n_samples,)
-            Usually the (fitted) linear predictor.
-        """
-        pass
-
-
-class IdentityLink(Link):
-    """The identity link function g(x)=x."""
-
-    def link(self, mu):
-        return mu
-
-    def derivative(self, mu):
-        return np.ones_like(mu)
-
-    def inverse(self, lin_pred):
-        return lin_pred
-
-    def inverse_derivative(self, lin_pred):
-        return np.ones_like(lin_pred)
-
-    def inverse_derivative2(self, lin_pred):
-        return np.zeros_like(lin_pred)
-
-
-class LogLink(Link):
-    """The log link function g(x)=log(x)."""
-
-    def link(self, mu):
-        return np.log(mu)
-
-    def derivative(self, mu):
-        return 1./mu
-
-    def inverse(self, lin_pred):
-        return np.exp(lin_pred)
-
-    def inverse_derivative(self, lin_pred):
-        return np.exp(lin_pred)
-
-    def inverse_derivative2(self, lin_pred):
-        return np.exp(lin_pred)
-
-
-class LogitLink(Link):
-    """The logit link function g(x)=logit(x)."""
-
-    def link(self, mu):
-        return special.logit(mu)
-
-    def derivative(self, mu):
-        return 1. / (mu * (1 - mu))
-
-    def inverse(self, lin_pred):
-        return special.expit(lin_pred)
-
-    def inverse_derivative(self, lin_pred):
-        ep = special.expit(lin_pred)
-        return ep * (1. - ep)
-
-    def inverse_derivative2(self, lin_pred):
-        ep = special.expit(lin_pred)
-        return ep * (1. - ep) * (1. - 2 * ep)
-
-
-class ExponentialDispersionModel(metaclass=ABCMeta):
-    r"""Base class for reproductive Exponential Dispersion Models (EDM).
-
-    The pdf of :math:`Y\sim \mathrm{EDM}(\mu, \phi)` is given by
-
-    .. math:: p(y| \theta, \phi) = c(y, \phi)
-        \exp\left(\frac{\theta y-A(\theta)}{\phi}\right)
-        = \tilde{c}(y, \phi)
-            \exp\left(-\frac{d(y, \mu)}{2\phi}\right)
-
-    with mean :math:`\mathrm{E}[Y] = A'(\theta) = \mu`,
-    variance :math:`\mathrm{Var}[Y] = \phi \cdot v(\mu)`,
-    unit variance :math:`v(\mu)` and
-    unit deviance :math:`d(y,\mu)`.
-
-    Attributes
-    ----------
-    lower_bound
-    upper_bound
-    include_lower_bound
-    include_upper_bound
-
-    Methods
-    -------
-    in_y_range
-    unit_variance
-    unit_variance_derivative
-    variance
-    variance_derivative
-    unit_deviance
-    unit_deviance_derivative
-    deviance
-    deviance_derivative
-    starting_mu
-
-    _mu_deviance_derivative
-
-    References
-    ----------
-
-    https://en.wikipedia.org/wiki/Exponential_dispersion_model.
-    """
-    @property
-    def lower_bound(self):
-        """Get the lower bound of values for Y~EDM."""
-        return self._lower_bound
-
-    @property
-    def upper_bound(self):
-        """Get the upper bound of values for Y~EDM."""
-        return self._upper_bound
-
-    @property
-    def include_lower_bound(self):
-        """Get True if lower bound for y is included: y >= lower_bound."""
-        return self._include_lower_bound
-
-    @property
-    def include_upper_bound(self):
-        """Get True if upper bound for y is included: y <= upper_bound."""
-        return self._include_upper_bound
-
-    def in_y_range(self, x):
-        """Returns ``True`` if x is in the valid range of Y~EDM.
-
-        Parameters
-        ----------
-        x : array, shape (n_samples,)
-            Target values.
-        """
-        if self.include_lower_bound:
-            if self.include_upper_bound:
-                return np.logical_and(np.greater_equal(x, self.lower_bound),
-                                      np.less_equal(x, self.upper_bound))
-            else:
-                return np.logical_and(np.greater_equal(x, self.lower_bound),
-                                      np.less(x, self.upper_bound))
-        else:
-            if self.include_upper_bound:
-                return np.logical_and(np.greater(x, self.lower_bound),
-                                      np.less_equal(x, self.upper_bound))
-            else:
-                return np.logical_and(np.greater(x, self.lower_bound),
-                                      np.less(x, self.upper_bound))
-
-    @abstractmethod
-    def unit_variance(self, mu):
-        r"""Compute the unit variance function.
-
-        The unit variance :math:`v(\mu)` determines the variance as
-        a function of the mean :math:`\mu` by
-        :math:`\mathrm{Var}[Y_i] = \phi/s_i*v(\mu_i)`.
-        It can also be derived from the unit deviance :math:`d(y,\mu)` as
-
-        .. math:: v(\mu) = \frac{2}{\frac{\partial^2 d(y,\mu)}{
-            \partial\mu^2}}\big|_{y=\mu}
-
-        See also :func:`variance`.
-
-        Parameters
-        ----------
-        mu : array, shape (n_samples,)
-            Predicted mean.
-        """
-        pass
-
-    @abstractmethod
-    def unit_variance_derivative(self, mu):
-        r"""Compute the derivative of the unit variance w.r.t. mu.
-
-        Return :math:`v'(\mu)`.
-
-        Parameters
-        ----------
-        mu : array, shape (n_samples,)
-            Target values.
-        """
-        pass
-
-    def variance(self, mu, phi=1, weights=1):
-        r"""Compute the variance function.
-
-        The variance of :math:`Y_i \sim \mathrm{EDM}(\mu_i,\phi/s_i)` is
-        :math:`\mathrm{Var}[Y_i]=\phi/s_i*v(\mu_i)`,
-        with unit variance :math:`v(\mu)` and weights :math:`s_i`.
-
-        Parameters
-        ----------
-        mu : array, shape (n_samples,)
-            Predicted mean.
-
-        phi : float (default=1)
-            Dispersion parameter.
-
-        weights : array, shape (n_samples,) (default=1)
-            Weights or exposure to which variance is inverse proportional.
-        """
-        return phi/weights * self.unit_variance(mu)
-
-    def variance_derivative(self, mu, phi=1, weights=1):
-        r"""Compute the derivative of the variance w.r.t. mu.
-
-        Returns
-        :math:`\frac{\partial}{\partial\mu}\mathrm{Var}[Y_i]
-        =phi/s_i*v'(\mu_i)`, with unit variance :math:`v(\mu)`
-        and weights :math:`s_i`.
-
-        Parameters
-        ----------
-        mu : array, shape (n_samples,)
-            Predicted mean.
-
-        phi : float (default=1)
-            Dispersion parameter.
-
-        weights : array, shape (n_samples,) (default=1)
-            Weights or exposure to which variance is inverse proportional.
-        """
-        return phi/weights * self.unit_variance_derivative(mu)
-
-    @abstractmethod
-    def unit_deviance(self, y, mu):
-        r"""Compute the unit deviance.
-
-        The unit_deviance :math:`d(y,\mu)` can be defined by the
-        log-likelihood as
-        :math:`d(y,\mu) = -2\phi\cdot
-        \left(loglike(y,\mu,\phi) - loglike(y,y,\phi)\right).`
-
-        Parameters
-        ----------
-        y : array, shape (n_samples,)
-            Target values.
-
-        mu : array, shape (n_samples,)
-            Predicted mean.
-        """
-        pass
-
-    def unit_deviance_derivative(self, y, mu):
-        r"""Compute the derivative of the unit deviance w.r.t. mu.
-
-        The derivative of the unit deviance is given by
-        :math:`\frac{\partial}{\partial\mu}d(y,\mu) = -2\frac{y-\mu}{v(\mu)}`
-        with unit variance :math:`v(\mu)`.
-
-        Parameters
-        ----------
-        y : array, shape (n_samples,)
-            Target values.
-
-        mu : array, shape (n_samples,)
-            Predicted mean.
-        """
-        return -2 * (y - mu) / self.unit_variance(mu)
-
-    def deviance(self, y, mu, weights=1):
-        r"""Compute the deviance.
-
-        The deviance is a weighted sum of the per sample unit deviances,
-        :math:`D = \sum_i s_i \cdot d(y_i, \mu_i)`
-        with weights :math:`s_i` and unit deviance :math:`d(y,\mu)`.
-        In terms of the log-likelihood it is :math:`D = -2\phi\cdot
-        \left(loglike(y,\mu,\frac{phi}{s})
-        - loglike(y,y,\frac{phi}{s})\right)`.
-
-        Parameters
-        ----------
-        y : array, shape (n_samples,)
-            Target values.
-
-        mu : array, shape (n_samples,)
-            Predicted mean.
-
-        weights : array, shape (n_samples,) (default=1)
-            Weights or exposure to which variance is inverse proportional.
-        """
-        return np.sum(weights * self.unit_deviance(y, mu))
-
-    def deviance_derivative(self, y, mu, weights=1):
-        """Compute the derivative of the deviance w.r.t. mu.
-
-        It gives :math:`\\frac{\\partial}{\\partial\\mu} D(y, \\mu; weights)`.
-
-        Parameters
-        ----------
-        y : array, shape (n_samples,)
-            Target values.
-
-        mu : array, shape (n_samples,)
-            Predicted mean.
-
-        weights : array, shape (n_samples,) (default=1)
-            Weights or exposure to which variance is inverse proportional.
-        """
-        return weights * self.unit_deviance_derivative(y, mu)
-
-    def starting_mu(self, y, weights=1, ind_weight=0.5):
-        """Set starting values for the mean mu.
-
-        These may be good starting points for the (unpenalized) IRLS solver.
-
-        Parameters
-        ----------
-        y : array, shape (n_samples,)
-            Target values.
-
-        weights : array, shape (n_samples,) (default=1)
-            Weights or exposure to which variance is inverse proportional.
-
-        ind_weight : float (default=0.5)
-            Must be between 0 and 1. Specifies how much weight is given to the
-            individual observations instead of the mean of y.
-        """
-        return (ind_weight * y +
-                (1. - ind_weight) * np.average(y, weights=weights))
-
-    def _mu_deviance_derivative(self, coef, X, y, weights, link):
-        """Compute mu and the derivative of the deviance w.r.t coef."""
-        lin_pred = _safe_lin_pred(X, coef)
-        mu = link.inverse(lin_pred)
-        d1 = link.inverse_derivative(lin_pred)
-        temp = d1 * self.deviance_derivative(y, mu, weights)
-        if coef.size == X.shape[1] + 1:
-            devp = np.concatenate(([temp.sum()], temp @ X))
-        else:
-            devp = temp @ X  # sampe as X.T @ temp
-        return mu, devp
-
-
-class TweedieDistribution(ExponentialDispersionModel):
-    r"""A class for the Tweedie distribution.
-
-    A Tweedie distribution with mean :math:`\mu=\mathrm{E}[Y]` is uniquely
-    defined by it's mean-variance relationship
-    :math:`\mathrm{Var}[Y] \propto \mu^power`.
-
-    Special cases are:
-
-    ===== ================
-    Power Distribution
-    ===== ================
-    0     Normal
-    1     Poisson
-    (0,1) Compound Poisson
-    2     Gamma
-    3     Inverse Gaussian
-
-    Parameters
-    ----------
-    power : float (default=0)
-            The variance power of the `unit_variance`
-            :math:`v(\mu) = \mu^{power}`.
-            For ``0<power<1``, no distribution exists.
-    """
-    def __init__(self, power=0):
-        # validate power and set _upper_bound, _include_upper_bound attrs
-        self.power = power
-
-    @property
-    def power(self):
-        return self._power
-
-    @power.setter
-    def power(self, power):
-        if not isinstance(power, numbers.Real):
-            raise TypeError('power must be a real number, input was {0}'
-                            .format(power))
-
-        self._upper_bound = np.Inf
-        self._include_upper_bound = False
-        if power < 0:
-            # Extreme Stable
-            self._lower_bound = -np.Inf
-            self._include_lower_bound = False
-        elif power == 0:
-            # NormalDistribution
-            self._lower_bound = -np.Inf
-            self._include_lower_bound = False
-        elif (power > 0) and (power < 1):
-            raise ValueError('For 0<power<1, no distribution exists.')
-        elif power == 1:
-            # PoissonDistribution
-            self._lower_bound = 0
-            self._include_lower_bound = True
-        elif (power > 1) and (power < 2):
-            # Compound Poisson
-            self._lower_bound = 0
-            self._include_lower_bound = True
-        elif power == 2:
-            # GammaDistribution
-            self._lower_bound = 0
-            self._include_lower_bound = False
-        elif (power > 2) and (power < 3):
-            # Positive Stable
-            self._lower_bound = 0
-            self._include_lower_bound = False
-        elif power == 3:
-            # InverseGaussianDistribution
-            self._lower_bound = 0
-            self._include_lower_bound = False
-        elif power > 3:
-            # Positive Stable
-            self._lower_bound = 0
-            self._include_lower_bound = False
-        else:  # pragma: no cover
-            # this branch should be unreachable.
-            raise ValueError
-
-        self._power = power
-
-    def unit_variance(self, mu):
-        """Compute the unit variance of a Tweedie distribution v(mu)=mu**power.
-
-        Parameters
-        ----------
-        mu : array, shape (n_samples,)
-            Predicted mean.
-        """
-        return np.power(mu, self.power)
-
-    def unit_variance_derivative(self, mu):
-        """Compute the derivative of the unit variance of a Tweedie
-        distribution v(mu)=power*mu**(power-1).
-
-        Parameters
-        ----------
-        mu : array, shape (n_samples,)
-            Predicted mean.
-        """
-        return self.power * np.power(mu, self.power - 1)
-
-    def unit_deviance(self, y, mu):
-        p = self.power
-        if p == 0:
-            # NormalDistribution
-            return (y - mu)**2
-        if p == 1:
-            # PoissonDistribution
-            # 2 * (y*log(y/mu) - y + mu), with y*log(y/mu)=0 if y=0
-            return 2 * (special.xlogy(y, y/mu) - y + mu)
-        elif p == 2:
-            # GammaDistribution
-            return 2 * (np.log(mu/y) + y/mu - 1)
-        else:
-            # return 2 * (np.maximum(y,0)**(2-p)/((1-p)*(2-p))
-            #    - y*mu**(1-p)/(1-p) + mu**(2-p)/(2-p))
-            return 2 * (np.power(np.maximum(y, 0), 2-p)/((1-p)*(2-p)) -
-                        y*np.power(mu, 1-p)/(1-p) + np.power(mu, 2-p)/(2-p))
-
-
-class NormalDistribution(TweedieDistribution):
-    """Class for the Normal (aka Gaussian) distribution"""
-    def __init__(self):
-        super(NormalDistribution, self).__init__(power=0)
-
-
-class PoissonDistribution(TweedieDistribution):
-    """Class for the scaled Poisson distribution"""
-    def __init__(self):
-        super(PoissonDistribution, self).__init__(power=1)
-
-
-class GammaDistribution(TweedieDistribution):
-    """Class for the Gamma distribution"""
-    def __init__(self):
-        super(GammaDistribution, self).__init__(power=2)
-
-
-class InverseGaussianDistribution(TweedieDistribution):
-    """Class for the scaled InverseGaussianDistribution distribution"""
-    def __init__(self):
-        super(InverseGaussianDistribution, self).__init__(power=3)
-
-
-EDM_DISTRIBUTIONS = {
-    'normal': NormalDistribution,
-    'poisson': PoissonDistribution,
-    'gamma': GammaDistribution,
-    'inverse.gaussian': InverseGaussianDistribution,
-}
-
-
 class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     """Regression via a Generalized Linear Model (GLM) with penalties.
 
diff --git a/sklearn/linear_model/_glm/link.py b/sklearn/linear_model/_glm/link.py
new file mode 100644
index 0000000000000..f79f6163ada48
--- /dev/null
+++ b/sklearn/linear_model/_glm/link.py
@@ -0,0 +1,135 @@
+"""
+Link functions used in GLM
+"""
+
+# Author: Christian Lorentzen <lorentzen.ch@googlemail.com>
+# License: BSD 3 clause
+
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+from scipy.special import expit, logit
+
+
+class Link(metaclass=ABCMeta):
+    """Abstract base class for Link functions."""
+
+    @abstractmethod
+    def link(self, mu):
+        """Compute the link function g(mu).
+
+        The link function links the mean mu=E[Y] to the so called linear
+        predictor (X*w), i.e. g(mu) = linear predictor.
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Usually the (predicted) mean.
+        """
+        pass
+
+    @abstractmethod
+    def derivative(self, mu):
+        """Compute the derivative of the link g'(mu).
+
+        Parameters
+        ----------
+        mu : array, shape (n_samples,)
+            Usually the (predicted) mean.
+        """
+        pass
+
+    @abstractmethod
+    def inverse(self, lin_pred):
+        """Compute the inverse link function h(lin_pred).
+
+        Gives the inverse relationship between linear predictor and the mean
+        mu=E[Y], i.e. h(linear predictor) = mu.
+
+        Parameters
+        ----------
+        lin_pred : array, shape (n_samples,)
+            Usually the (fitted) linear predictor.
+        """
+        pass
+
+    @abstractmethod
+    def inverse_derivative(self, lin_pred):
+        """Compute the derivative of the inverse link function h'(lin_pred).
+
+        Parameters
+        ----------
+        lin_pred : array, shape (n_samples,)
+            Usually the (fitted) linear predictor.
+        """
+        pass
+
+    @abstractmethod
+    def inverse_derivative2(self, lin_pred):
+        """Compute 2nd derivative of the inverse link function h''(lin_pred).
+
+        Parameters
+        ----------
+        lin_pred : array, shape (n_samples,)
+            Usually the (fitted) linear predictor.
+        """
+        pass
+
+
+class IdentityLink(Link):
+    """The identity link function g(x)=x."""
+
+    def link(self, mu):
+        return mu
+
+    def derivative(self, mu):
+        return np.ones_like(mu)
+
+    def inverse(self, lin_pred):
+        return lin_pred
+
+    def inverse_derivative(self, lin_pred):
+        return np.ones_like(lin_pred)
+
+    def inverse_derivative2(self, lin_pred):
+        return np.zeros_like(lin_pred)
+
+
+class LogLink(Link):
+    """The log link function g(x)=log(x)."""
+
+    def link(self, mu):
+        return np.log(mu)
+
+    def derivative(self, mu):
+        return 1./mu
+
+    def inverse(self, lin_pred):
+        return np.exp(lin_pred)
+
+    def inverse_derivative(self, lin_pred):
+        return np.exp(lin_pred)
+
+    def inverse_derivative2(self, lin_pred):
+        return np.exp(lin_pred)
+
+
+class LogitLink(Link):
+    """The logit link function g(x)=logit(x)."""
+
+    def link(self, mu):
+        return logit(mu)
+
+    def derivative(self, mu):
+        return 1. / (mu * (1 - mu))
+
+    def inverse(self, lin_pred):
+        return expit(lin_pred)
+
+    def inverse_derivative(self, lin_pred):
+        ep = expit(lin_pred)
+        return ep * (1. - ep)
+
+    def inverse_derivative2(self, lin_pred):
+        ep = expit(lin_pred)
+        return ep * (1. - ep) * (1. - 2 * ep)
diff --git a/sklearn/linear_model/_glm/tests/__init__.py b/sklearn/linear_model/_glm/tests/__init__.py
new file mode 100644
index 0000000000000..588cf7e93eef0
--- /dev/null
+++ b/sklearn/linear_model/_glm/tests/__init__.py
@@ -0,0 +1 @@
+# License: BSD 3 clause
diff --git a/sklearn/linear_model/_glm/tests/test_distribution.py b/sklearn/linear_model/_glm/tests/test_distribution.py
new file mode 100644
index 0000000000000..1a2efdba0e563
--- /dev/null
+++ b/sklearn/linear_model/_glm/tests/test_distribution.py
@@ -0,0 +1,61 @@
+# Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
+#
+# License: BSD 3 clause
+
+from numpy.testing import assert_allclose
+from numpy.testing import assert_array_equal
+import pytest
+
+from sklearn.linear_model._glm.distribution import (
+    TweedieDistribution,
+    NormalDistribution, PoissonDistribution,
+    GammaDistribution, InverseGaussianDistribution,
+)
+
+
+@pytest.mark.parametrize(
+    'family, expected',
+    [(NormalDistribution(), [True, True, True]),
+     (PoissonDistribution(), [False, True, True]),
+     (TweedieDistribution(power=1.5), [False, True, True]),
+     (GammaDistribution(), [False, False, True]),
+     (InverseGaussianDistribution(), [False, False, True]),
+     (TweedieDistribution(power=4.5), [False, False, True])])
+def test_family_bounds(family, expected):
+    """Test the valid range of distributions at -1, 0, 1."""
+    result = family.in_y_range([-1, 0, 1])
+    assert_array_equal(result, expected)
+
+
+def test_tweedie_distribution_power():
+    with pytest.raises(ValueError, match="no distribution exists"):
+        TweedieDistribution(power=0.5)
+
+    with pytest.raises(TypeError, match="must be a real number"):
+        TweedieDistribution(power=1j)
+
+    with pytest.raises(TypeError, match="must be a real number"):
+        dist = TweedieDistribution()
+        dist.power = 1j
+
+    dist = TweedieDistribution()
+    assert dist._include_lower_bound is False
+    dist.power = 1
+    assert dist._include_lower_bound is True
+
+
+@pytest.mark.parametrize(
+    'family, chk_values',
+    [(NormalDistribution(), [-1.5, -0.1, 0.1, 2.5]),
+     (PoissonDistribution(), [0.1, 1.5]),
+     (GammaDistribution(), [0.1, 1.5]),
+     (InverseGaussianDistribution(), [0.1, 1.5]),
+     (TweedieDistribution(power=-2.5), [0.1, 1.5]),
+     (TweedieDistribution(power=-1), [0.1, 1.5]),
+     (TweedieDistribution(power=1.5), [0.1, 1.5]),
+     (TweedieDistribution(power=2.5), [0.1, 1.5]),
+     (TweedieDistribution(power=-4), [0.1, 1.5])])
+def test_deviance_zero(family, chk_values):
+    """Test deviance(y,y) = 0 for different families."""
+    for x in chk_values:
+        assert_allclose(family.deviance(x, x), 0, atol=1e-9)
diff --git a/sklearn/linear_model/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
similarity index 81%
rename from sklearn/linear_model/tests/test_glm.py
rename to sklearn/linear_model/_glm/tests/test_glm.py
index 1712f7b5e1d3d..ffac6dd11c243 100644
--- a/sklearn/linear_model/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -8,11 +8,12 @@
 
 from sklearn.datasets import make_regression
 from sklearn.linear_model import GeneralizedLinearRegressor
-from sklearn.linear_model._glm import (
-    Link,
+from sklearn.linear_model._glm.link import (
     IdentityLink,
     LogLink,
     LogitLink,
+)
+from sklearn.linear_model._glm.distribution import (
     TweedieDistribution,
     NormalDistribution, PoissonDistribution,
     GammaDistribution, InverseGaussianDistribution,
@@ -21,8 +22,6 @@
 from sklearn.metrics import mean_absolute_error
 from sklearn.exceptions import ConvergenceWarning
 
-from sklearn.utils.testing import assert_array_equal
-
 GLM_SOLVERS = ['lbfgs']
 
 
@@ -35,77 +34,6 @@ def regression_data():
     return X, y
 
 
-@pytest.mark.parametrize('link', Link.__subclasses__())
-def test_link_properties(link):
-    """Test link inverse and derivative."""
-    rng = np.random.RandomState(42)
-    x = rng.rand(100)*100
-    link = link()  # instantiate object
-    if isinstance(link, LogitLink):
-        # careful for large x, note expit(36) = 1
-        # limit max eta to 15
-        x = x / 100 * 15
-    assert_allclose(link.link(link.inverse(x)), x)
-    # if f(g(x)) = x, then f'(g(x)) = 1/g'(x)
-    assert_allclose(link.derivative(link.inverse(x)),
-                    1./link.inverse_derivative(x))
-
-    assert (
-      link.inverse_derivative2(x).shape == link.inverse_derivative(x).shape)
-
-    # for LogitLink, in the following x should be between 0 and 1.
-    # assert_almost_equal(link.inverse_derivative(link.link(x)),
-    #                     1./link.derivative(x), decimal=decimal)
-
-
-@pytest.mark.parametrize(
-    'family, expected',
-    [(NormalDistribution(), [True, True, True]),
-     (PoissonDistribution(), [False, True, True]),
-     (TweedieDistribution(power=1.5), [False, True, True]),
-     (GammaDistribution(), [False, False, True]),
-     (InverseGaussianDistribution(), [False, False, True]),
-     (TweedieDistribution(power=4.5), [False, False, True])])
-def test_family_bounds(family, expected):
-    """Test the valid range of distributions at -1, 0, 1."""
-    result = family.in_y_range([-1, 0, 1])
-    assert_array_equal(result, expected)
-
-
-def test_tweedie_distribution_power():
-    with pytest.raises(ValueError, match="no distribution exists"):
-        TweedieDistribution(power=0.5)
-
-    with pytest.raises(TypeError, match="must be a real number"):
-        TweedieDistribution(power=1j)
-
-    with pytest.raises(TypeError, match="must be a real number"):
-        dist = TweedieDistribution()
-        dist.power = 1j
-
-    dist = TweedieDistribution()
-    assert dist._include_lower_bound is False
-    dist.power = 1
-    assert dist._include_lower_bound is True
-
-
-@pytest.mark.parametrize(
-    'family, chk_values',
-    [(NormalDistribution(), [-1.5, -0.1, 0.1, 2.5]),
-     (PoissonDistribution(), [0.1, 1.5]),
-     (GammaDistribution(), [0.1, 1.5]),
-     (InverseGaussianDistribution(), [0.1, 1.5]),
-     (TweedieDistribution(power=-2.5), [0.1, 1.5]),
-     (TweedieDistribution(power=-1), [0.1, 1.5]),
-     (TweedieDistribution(power=1.5), [0.1, 1.5]),
-     (TweedieDistribution(power=2.5), [0.1, 1.5]),
-     (TweedieDistribution(power=-4), [0.1, 1.5])])
-def test_deviance_zero(family, chk_values):
-    """Test deviance(y,y) = 0 for different families."""
-    for x in chk_values:
-        assert_allclose(family.deviance(x, x), 0, atol=1e-9)
-
-
 def test_sample_weights_validation():
     """Test the raised errors in the validation of sample_weight."""
     # scalar value but not positive
diff --git a/sklearn/linear_model/_glm/tests/test_link.py b/sklearn/linear_model/_glm/tests/test_link.py
new file mode 100644
index 0000000000000..3a2a21c4c04e0
--- /dev/null
+++ b/sklearn/linear_model/_glm/tests/test_link.py
@@ -0,0 +1,38 @@
+# Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
+#
+# License: BSD 3 clause
+import numpy as np
+from numpy.testing import assert_allclose
+import pytest
+
+from sklearn.linear_model._glm.link import (
+    IdentityLink,
+    LogLink,
+    LogitLink,
+)
+
+
+LINK_FUNCTIONS = [IdentityLink, LogLink, LogitLink]
+
+
+@pytest.mark.parametrize('link', LINK_FUNCTIONS)
+def test_link_properties(link):
+    """Test link inverse and derivative."""
+    rng = np.random.RandomState(42)
+    x = rng.rand(100)*100
+    link = link()  # instantiate object
+    if isinstance(link, LogitLink):
+        # careful for large x, note expit(36) = 1
+        # limit max eta to 15
+        x = x / 100 * 15
+    assert_allclose(link.link(link.inverse(x)), x)
+    # if f(g(x)) = x, then f'(g(x)) = 1/g'(x)
+    assert_allclose(link.derivative(link.inverse(x)),
+                    1./link.inverse_derivative(x))
+
+    assert (
+      link.inverse_derivative2(x).shape == link.inverse_derivative(x).shape)
+
+    # for LogitLink, in the following x should be between 0 and 1.
+    # assert_almost_equal(link.inverse_derivative(link.link(x)),
+    #                     1./link.derivative(x), decimal=decimal)

From 5927379debd8e591d72c5915ad434fc0592f5665 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 16 Jul 2019 18:27:36 +0200
Subject: [PATCH 088/269] Fix CI

---
 sklearn/linear_model/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index 1c0df55d27c90..8fc662a48a6ae 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -81,5 +81,4 @@
            'ridge_regression',
            'RANSACRegressor',
            'GeneralizedLinearRegressor',
-           'TweedieDistribution',
            'PoissonRegressor']

From a6df2a788b5a4907b6c3a9b51a758f5b05e95257 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 16 Jul 2019 18:57:53 +0200
Subject: [PATCH 089/269] Add test_deviance_derivative

---
 sklearn/linear_model/__init__.py              |  2 +-
 sklearn/linear_model/_glm/__init__.py         |  4 +-
 .../_glm/tests/test_distribution.py           | 44 +++++++++++++++++--
 3 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index 8fc662a48a6ae..e8f63e95cdfa8 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -18,7 +18,7 @@
                                  lasso_path, enet_path, MultiTaskLasso,
                                  MultiTaskElasticNet, MultiTaskElasticNetCV,
                                  MultiTaskLassoCV)
-from ._glm.glm import (GeneralizedLinearRegressor, PoissonRegressor)
+from ._glm import (GeneralizedLinearRegressor, PoissonRegressor)
 from .huber import HuberRegressor
 from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
 from .stochastic_gradient import SGDClassifier, SGDRegressor
diff --git a/sklearn/linear_model/_glm/__init__.py b/sklearn/linear_model/_glm/__init__.py
index 9a88e5604de8a..ac0f2c206ffe0 100644
--- a/sklearn/linear_model/_glm/__init__.py
+++ b/sklearn/linear_model/_glm/__init__.py
@@ -1,5 +1,5 @@
 # License: BSD 3 clause
 
-from . import distribution
+from .glm import (GeneralizedLinearRegressor, PoissonRegressor)
 
-__all__ = ['distribution']
+__all__ = ['GeneralizedLinearRegressor', 'PoissonRegressor']
diff --git a/sklearn/linear_model/_glm/tests/test_distribution.py b/sklearn/linear_model/_glm/tests/test_distribution.py
index 1a2efdba0e563..f457fc882a809 100644
--- a/sklearn/linear_model/_glm/tests/test_distribution.py
+++ b/sklearn/linear_model/_glm/tests/test_distribution.py
@@ -1,9 +1,12 @@
 # Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
 #
 # License: BSD 3 clause
-
-from numpy.testing import assert_allclose
-from numpy.testing import assert_array_equal
+import numpy as np
+from numpy.testing import (
+    assert_allclose,
+    assert_array_equal,
+)
+from scipy.optimize import check_grad
 import pytest
 
 from sklearn.linear_model._glm.distribution import (
@@ -59,3 +62,38 @@ def test_deviance_zero(family, chk_values):
     """Test deviance(y,y) = 0 for different families."""
     for x in chk_values:
         assert_allclose(family.deviance(x, x), 0, atol=1e-9)
+
+
+@pytest.mark.parametrize(
+    'family',
+    [NormalDistribution(),
+     PoissonDistribution(),
+     GammaDistribution(),
+     InverseGaussianDistribution(),
+     TweedieDistribution(power=-2.5),
+     TweedieDistribution(power=-1),
+     TweedieDistribution(power=1.5),
+     TweedieDistribution(power=2.5),
+     TweedieDistribution(power=-4)],
+    ids=lambda x: x.__class__.__name__
+)
+def test_deviance_derivative(family):
+    """Test deviance derivative for different families."""
+    rng = np.random.RandomState(0)
+    y_true = rng.rand(10)
+    # make data positive
+    y_true += np.abs(y_true.min()) + 1e-2
+
+    y_pred = y_true + np.fmax(rng.rand(10), 0.)
+
+    dev = family.deviance(y_true, y_pred)
+    assert isinstance(dev, float)
+    dev_derivative = family.deviance_derivative(y_true, y_pred)
+    assert dev_derivative.shape == y_pred.shape
+
+    err = check_grad(
+            lambda mu: family.deviance(y_true, mu),
+            lambda mu: family.deviance_derivative(y_true, mu),
+            y_pred,
+    ) / np.linalg.norm(dev_derivative)
+    assert err < 1e-6

From 5af89a70fd4a168f3ab1d0d1966d6357e6bc4e68 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 16 Jul 2019 19:58:54 +0200
Subject: [PATCH 090/269] Fix sklearn/linear_model/setup.py

---
 .../linear_model/plot_tweedie_regression_insurance_claims.py    | 2 +-
 sklearn/linear_model/_glm/tests/test_distribution.py            | 2 +-
 sklearn/linear_model/setup.py                                   | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 1c8dd42df336d..fba1cc42e20a7 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -40,7 +40,7 @@
 
 from sklearn.compose import ColumnTransformer
 from sklearn.linear_model import GeneralizedLinearRegressor
-from sklearn.linear_model._glm import TweedieDistribution
+from sklearn.linear_model._glm.distribution import TweedieDistribution
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
diff --git a/sklearn/linear_model/_glm/tests/test_distribution.py b/sklearn/linear_model/_glm/tests/test_distribution.py
index f457fc882a809..f984077bed5cb 100644
--- a/sklearn/linear_model/_glm/tests/test_distribution.py
+++ b/sklearn/linear_model/_glm/tests/test_distribution.py
@@ -96,4 +96,4 @@ def test_deviance_derivative(family):
             lambda mu: family.deviance_derivative(y_true, mu),
             y_pred,
     ) / np.linalg.norm(dev_derivative)
-    assert err < 1e-6
+    assert abs(err) < 1e-6
diff --git a/sklearn/linear_model/setup.py b/sklearn/linear_model/setup.py
index 8226412fdecbd..5cf7040d4c9d4 100644
--- a/sklearn/linear_model/setup.py
+++ b/sklearn/linear_model/setup.py
@@ -42,6 +42,7 @@ def configuration(parent_package='', top_path=None):
 
     # add other directories
     config.add_subpackage('tests')
+    config.add_subpackage('_glm')
 
     return config
 

From cd347d4610d00dca9e7a01de2067595a4e4b8a59 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 17 Jul 2019 14:01:44 +0200
Subject: [PATCH 091/269] Remove variance and variance_derivative methods from
 distributions

---
 sklearn/linear_model/_glm/distribution.py | 72 ++---------------------
 1 file changed, 4 insertions(+), 68 deletions(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index 2dc720124b48b..18e7cf0c0a227 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -44,16 +44,13 @@ class ExponentialDispersionModel(metaclass=ABCMeta):
 
     Methods
     -------
+    deviance
+    deviance_derivative
     in_y_range
-    unit_variance
-    unit_variance_derivative
-    variance
-    variance_derivative
     unit_deviance
     unit_deviance_derivative
-    deviance
-    deviance_derivative
-    starting_mu
+    unit_variance
+    unit_variance_derivative
 
     _mu_deviance_derivative
 
@@ -139,47 +136,6 @@ def unit_variance_derivative(self, mu):
         """
         pass
 
-    def variance(self, mu, phi=1, weights=1):
-        r"""Compute the variance function.
-
-        The variance of :math:`Y_i \sim \mathrm{EDM}(\mu_i,\phi/s_i)` is
-        :math:`\mathrm{Var}[Y_i]=\phi/s_i*v(\mu_i)`,
-        with unit variance :math:`v(\mu)` and weights :math:`s_i`.
-
-        Parameters
-        ----------
-        mu : array, shape (n_samples,)
-            Predicted mean.
-
-        phi : float (default=1)
-            Dispersion parameter.
-
-        weights : array, shape (n_samples,) (default=1)
-            Weights or exposure to which variance is inverse proportional.
-        """
-        return phi/weights * self.unit_variance(mu)
-
-    def variance_derivative(self, mu, phi=1, weights=1):
-        r"""Compute the derivative of the variance w.r.t. mu.
-
-        Returns
-        :math:`\frac{\partial}{\partial\mu}\mathrm{Var}[Y_i]
-        =phi/s_i*v'(\mu_i)`, with unit variance :math:`v(\mu)`
-        and weights :math:`s_i`.
-
-        Parameters
-        ----------
-        mu : array, shape (n_samples,)
-            Predicted mean.
-
-        phi : float (default=1)
-            Dispersion parameter.
-
-        weights : array, shape (n_samples,) (default=1)
-            Weights or exposure to which variance is inverse proportional.
-        """
-        return phi/weights * self.unit_variance_derivative(mu)
-
     @abstractmethod
     def unit_deviance(self, y, mu):
         r"""Compute the unit deviance.
@@ -257,26 +213,6 @@ def deviance_derivative(self, y, mu, weights=1):
         """
         return weights * self.unit_deviance_derivative(y, mu)
 
-    def starting_mu(self, y, weights=1, ind_weight=0.5):
-        """Set starting values for the mean mu.
-
-        These may be good starting points for the (unpenalized) IRLS solver.
-
-        Parameters
-        ----------
-        y : array, shape (n_samples,)
-            Target values.
-
-        weights : array, shape (n_samples,) (default=1)
-            Weights or exposure to which variance is inverse proportional.
-
-        ind_weight : float (default=0.5)
-            Must be between 0 and 1. Specifies how much weight is given to the
-            individual observations instead of the mean of y.
-        """
-        return (ind_weight * y +
-                (1. - ind_weight) * np.average(y, weights=weights))
-
     def _mu_deviance_derivative(self, coef, X, y, weights, link):
         """Compute mu and the derivative of the deviance w.r.t coef."""
         lin_pred = _safe_lin_pred(X, coef)

From 0d7f9cd9babe928a01266f43f1f5e4e41829075b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 17 Jul 2019 15:05:49 +0200
Subject: [PATCH 092/269] Improve coverage

---
 sklearn/linear_model/_glm/distribution.py   |  6 ++--
 sklearn/linear_model/_glm/link.py           | 10 +++----
 sklearn/linear_model/_glm/tests/test_glm.py | 31 +++++++++++++++++++++
 3 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index 18e7cf0c0a227..6fffd816ff8f9 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -121,7 +121,7 @@ def unit_variance(self, mu):
         mu : array, shape (n_samples,)
             Predicted mean.
         """
-        pass
+        pass  # pragma: no cover
 
     @abstractmethod
     def unit_variance_derivative(self, mu):
@@ -134,7 +134,7 @@ def unit_variance_derivative(self, mu):
         mu : array, shape (n_samples,)
             Target values.
         """
-        pass
+        pass  # pragma: no cover
 
     @abstractmethod
     def unit_deviance(self, y, mu):
@@ -153,7 +153,7 @@ def unit_deviance(self, y, mu):
         mu : array, shape (n_samples,)
             Predicted mean.
         """
-        pass
+        pass  # pragma: no cover
 
     def unit_deviance_derivative(self, y, mu):
         r"""Compute the derivative of the unit deviance w.r.t. mu.
diff --git a/sklearn/linear_model/_glm/link.py b/sklearn/linear_model/_glm/link.py
index f79f6163ada48..ec9a7b7736eb1 100644
--- a/sklearn/linear_model/_glm/link.py
+++ b/sklearn/linear_model/_glm/link.py
@@ -26,7 +26,7 @@ def link(self, mu):
         mu : array, shape (n_samples,)
             Usually the (predicted) mean.
         """
-        pass
+        pass  # pragma: no cover
 
     @abstractmethod
     def derivative(self, mu):
@@ -37,7 +37,7 @@ def derivative(self, mu):
         mu : array, shape (n_samples,)
             Usually the (predicted) mean.
         """
-        pass
+        pass  # pragma: no cover
 
     @abstractmethod
     def inverse(self, lin_pred):
@@ -51,7 +51,7 @@ def inverse(self, lin_pred):
         lin_pred : array, shape (n_samples,)
             Usually the (fitted) linear predictor.
         """
-        pass
+        pass  # pragma: no cover
 
     @abstractmethod
     def inverse_derivative(self, lin_pred):
@@ -62,7 +62,7 @@ def inverse_derivative(self, lin_pred):
         lin_pred : array, shape (n_samples,)
             Usually the (fitted) linear predictor.
         """
-        pass
+        pass  # pragma: no cover
 
     @abstractmethod
     def inverse_derivative2(self, lin_pred):
@@ -73,7 +73,7 @@ def inverse_derivative2(self, lin_pred):
         lin_pred : array, shape (n_samples,)
             Usually the (fitted) linear predictor.
         """
-        pass
+        pass  # pragma: no cover
 
 
 class IdentityLink(Link):
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index ffac6dd11c243..a0ac77b65aa9f 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -219,6 +219,37 @@ def test_glm_log_regression(family, solver, tol):
     assert_allclose(res.coef_, coef, rtol=5e-6)
 
 
+@pytest.mark.parametrize('fit_intercept', [True, False])
+def test_warm_start(fit_intercept):
+    n_samples, n_features = 100, 10
+    n_predict = 10
+    X, y, coef = make_regression(n_samples=n_samples+n_predict,
+                                 n_features=n_features,
+                                 n_informative=n_features-2, noise=0.5,
+                                 coef=True, random_state=42)
+
+    glm1 = GeneralizedLinearRegressor(
+        warm_start=False,
+        fit_intercept=fit_intercept,
+        max_iter=1000
+    )
+    glm1.fit(X, y)
+
+    glm2 = GeneralizedLinearRegressor(
+        warm_start=True,
+        fit_intercept=fit_intercept,
+        max_iter=1
+    )
+    glm2.fit(X, y)
+    assert glm1.score(X, y) > glm2.score(X, y)
+    glm2.set_params(max_iter=1000)
+    glm2.fit(X, y)
+    assert_allclose(glm1.coef_, glm2.coef_, rtol=1e-4, atol=1e-5)
+    assert_allclose(glm1.score(X, y), glm2.score(X, y), rtol=1e-4)
+    # TODO: investigate why this doesn't match
+    # assert glm1.n_iter_ == glm2.n_iter_ + 2
+
+
 @pytest.mark.parametrize('n_samples, n_features', [(100, 10), (10, 100)])
 @pytest.mark.parametrize('fit_intercept', [True, False])
 @pytest.mark.parametrize('solver', GLM_SOLVERS)

From dbffad84fcab9521265e85d5e7cbf9da0673b380 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 17 Jul 2019 15:44:44 +0200
Subject: [PATCH 093/269] Remove mentions of the binomial distribution

---
 sklearn/linear_model/_glm/glm.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index bff8ea43fd550..0115f9d160337 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -85,7 +85,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
-    family : {'normal', 'poisson', 'gamma', 'inverse.gaussian', 'binomial'} \
+    family : {'normal', 'poisson', 'gamma', 'inverse.gaussian'} \
             or an instance of class ExponentialDispersionModel, \
             optional(default='normal')
         The distributional assumption of the GLM, i.e. which distribution from
@@ -101,8 +101,6 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
         - 'log' for families 'poisson', 'gamma', 'inverse.gaussian'
 
-        - 'logit' for family 'binomial'
-
     fit_dispersion : {None, 'chisqr', 'deviance'}, optional (default=None)
         Method for estimation of the dispersion parameter phi. Whether to use
         the chi squared statistic or the deviance statistic. If None, the

From 3187204dc4c1bb41a6131c49262c8380a34b9ec1 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 19 Jul 2019 17:37:11 +0200
Subject: [PATCH 094/269] Use common simple weight validation

---
 sklearn/linear_model/_glm/glm.py            | 37 +++------------------
 sklearn/linear_model/_glm/tests/test_glm.py | 19 ++---------
 2 files changed, 7 insertions(+), 49 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 0115f9d160337..546e71c583fa8 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -14,7 +14,7 @@
 from ...base import BaseEstimator, RegressorMixin
 from ...utils import check_array, check_X_y
 from ...utils.optimize import _check_optimize_result
-from ...utils.validation import check_is_fitted
+from ...utils.validation import check_is_fitted, _check_sample_weight
 from .distribution import (
         ExponentialDispersionModel,
         TweedieDistribution,
@@ -28,33 +28,6 @@
 )
 
 
-def _check_weights(sample_weight, n_samples):
-    """Check that sample weights are non-negative and have the right shape."""
-    if sample_weight is None:
-        weights = np.ones(n_samples)
-    elif np.isscalar(sample_weight):
-        if sample_weight <= 0:
-            raise ValueError("Sample weights must be non-negative.")
-        weights = sample_weight * np.ones(n_samples)
-    else:
-        _dtype = [np.float64, np.float32]
-        weights = check_array(sample_weight, accept_sparse=False,
-                              force_all_finite=True, ensure_2d=False,
-                              dtype=_dtype)
-        if weights.ndim > 1:
-            raise ValueError("Sample weight must be 1D array or scalar")
-        elif weights.shape[0] != n_samples:
-            raise ValueError("Sample weights must have the same length as "
-                             "y")
-        if not np.all(weights >= 0):
-            raise ValueError("Sample weights must be non-negative.")
-        elif not np.sum(weights) > 0:
-            raise ValueError("Sample weights must have at least one positive "
-                             "element.")
-
-    return weights
-
-
 class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     """Regression via a Generalized Linear Model (GLM) with penalties.
 
@@ -323,7 +296,7 @@ def fit(self, X, y, sample_weight=None):
                          copy=self.copy_X)
         y = np.asarray(y, dtype=np.float64)
 
-        weights = _check_weights(sample_weight, y.shape[0])
+        weights = _check_sample_weight(sample_weight, X)
 
         n_samples, n_features = X.shape
 
@@ -458,7 +431,7 @@ def predict(self, X, sample_weight=None):
                         allow_nd=False)
         eta = self._linear_predictor(X)
         mu = self._link_instance.inverse(eta)
-        weights = _check_weights(sample_weight, X.shape[0])
+        weights = _check_sample_weight(sample_weight, X)
 
         return mu*weights
 
@@ -487,7 +460,7 @@ def estimate_phi(self, X, y, sample_weight=None):
         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
                          dtype=_dtype, y_numeric=True, multi_output=False)
         n_samples, n_features = X.shape
-        weights = _check_weights(sample_weight, n_samples)
+        weights = _check_sample_weight(sample_weight, X)
         eta = X @ self.coef_
         if self.fit_intercept is True:
             eta += self.intercept_
@@ -542,7 +515,7 @@ def score(self, X, y, sample_weight=None):
         # Note, default score defined in RegressorMixin is R^2 score.
         # TODO: make D^2 a score function in module metrics (and thereby get
         #       input validation and so on)
-        weights = _check_weights(sample_weight, y.shape[0])
+        weights = _check_sample_weight(sample_weight, X)
         mu = self.predict(X)
         dev = self._family_instance.deviance(y, mu, weights=weights)
         y_mean = np.average(y, weights=weights)
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index a0ac77b65aa9f..2909c85ba8c40 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -41,8 +41,6 @@ def test_sample_weights_validation():
     y = [1]
     weights = 0
     glm = GeneralizedLinearRegressor(fit_intercept=False)
-    with pytest.raises(ValueError, match="weights must be non-negative"):
-        glm.fit(X, y, weights)
 
     # Positive weights are accepted
     glm.fit(X, y, sample_weight=1)
@@ -54,21 +52,8 @@ def test_sample_weights_validation():
 
     # 1d but wrong length
     weights = [1, 0]
-    with pytest.raises(ValueError,
-                       match="weights must have the same length as y"):
-        glm.fit(X, y, weights)
-
-    # 1d but only zeros (sum not greater than 0)
-    weights = [0, 0]
-    X = [[0], [1]]
-    y = [1, 2]
-    with pytest.raises(ValueError,
-                       match="must have at least one positive element"):
-        glm.fit(X, y, weights)
-
-    # 5. 1d but with a negative value
-    weights = [2, -1]
-    with pytest.raises(ValueError, match="weights must be non-negative"):
+    msg = r"sample_weight.shape == \(2,\), expected \(1,\)!"
+    with pytest.raises(ValueError, match=msg):
         glm.fit(X, y, weights)
 
 
From cc03c1ad58db4108ea1bfc3d2d47225be8822bd8 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 19 Jul 2019 17:41:01 +0200
Subject: [PATCH 095/269] Simplify comments formatting

---
 sklearn/linear_model/_glm/glm.py | 27 ++++-----------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 546e71c583fa8..ea3e4964529bf 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -208,12 +208,6 @@ def fit(self, X, y, sample_weight=None):
         -------
         self : returns an instance of self.
         """
-        #######################################################################
-        # 1. input validation                                                 #
-        #######################################################################
-        # 1.1 validate arguments of __init__
-        # Guarantee that self._family_instance is an instance of class
-        # ExponentialDispersionModel
         if isinstance(self.family, ExponentialDispersionModel):
             self._family_instance = self.family
         elif self.family in EDM_DISTRIBUTIONS:
@@ -288,7 +282,6 @@ def fit(self, X, y, sample_weight=None):
         family = self._family_instance
         link = self._link_instance
 
-        # 1.2 validate arguments of fit #######################################
         _dtype = [np.float64, np.float32]
         _stype = ['csc', 'csr']
         X, y = check_X_y(X, y, accept_sparse=_stype,
@@ -300,7 +293,6 @@ def fit(self, X, y, sample_weight=None):
 
         n_samples, n_features = X.shape
 
-        # 1.4 additional validations ##########################################
         if self.check_input:
             if not np.all(family.in_y_range(y)):
                 raise ValueError("Some value(s) of y are out of the valid "
@@ -308,9 +300,8 @@ def fit(self, X, y, sample_weight=None):
                                  .format(family.__class__.__name__))
             # TODO: if alpha=0 check that X is not rank deficient
 
-        #######################################################################
-        # 2. rescaling of weights (sample_weight)                             #
-        #######################################################################
+        # rescaling of sample_weight
+        #
         # IMPORTANT NOTE: Since we want to minimize
         # 1/(2*sum(sample_weight)) * deviance + L2,
         # deviance = sum(sample_weight * unit_deviance),
@@ -319,9 +310,7 @@ def fit(self, X, y, sample_weight=None):
         weights_sum = np.sum(weights)
         weights = weights/weights_sum
 
-        #######################################################################
-        # 3. initialization of coef = (intercept_, coef_)                     #
-        #######################################################################
+        # initialization of coef = (intercept_, coef)
         # Note: Since phi=self.dispersion_ does not enter the estimation
         #       of mu_i=E[y_i], set it to 1.
 
@@ -338,12 +327,8 @@ def fit(self, X, y, sample_weight=None):
             else:
                 coef = np.zeros(n_features)
 
-        #######################################################################
-        # 4. fit                                                              #
-        #######################################################################
         # algorithms for optimization
 
-        # 4.1 L-BFGS ##########################################################
         if solver == 'lbfgs':
             def func(coef, X, y, weights, alpha, family, link):
                 mu, devp = \
@@ -371,9 +356,6 @@ def func(coef, X, y, weights, alpha, family, link):
             self.n_iter_ = _check_optimize_result("lbfgs", opt_res)
             coef = opt_res.x
 
-        #######################################################################
-        # 5. postprocessing                                                   #
-        #######################################################################
         if self.fit_intercept:
             self.intercept_ = coef[0]
             self.coef_ = coef[1:]
@@ -425,9 +407,8 @@ def predict(self, X, sample_weight=None):
         C : array, shape (n_samples,)
             Returns predicted values times sample_weight.
         """
-        # TODO: Is copy=True necessary?
         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
-                        dtype='numeric', copy=True, ensure_2d=True,
+                        dtype='numeric', ensure_2d=True,
                         allow_nd=False)
         eta = self._linear_predictor(X)
         mu = self._link_instance.inverse(eta)

From aa52b4a32e216bfdd516776ac6796f8c0b4e5137 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Mon, 22 Jul 2019 15:50:47 +0200
Subject: [PATCH 096/269] Refactor to use TweedieDistribition in metrics

---
 sklearn/linear_model/_glm/distribution.py | 33 ++++++++++++++---------
 sklearn/metrics/regression.py             | 14 +++-------
 2 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index 6fffd816ff8f9..b99e1b40b2871 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -330,21 +330,28 @@ def unit_variance_derivative(self, mu):
 
     def unit_deviance(self, y, mu):
         p = self.power
-        if p == 0:
-            # NormalDistribution
-            return (y - mu)**2
-        if p == 1:
-            # PoissonDistribution
-            # 2 * (y*log(y/mu) - y + mu), with y*log(y/mu)=0 if y=0
-            return 2 * (xlogy(y, y/mu) - y + mu)
+        if p < 0:
+            # 'Extreme stable', y_true any realy number, y_pred > 0
+            dev = 2 * (np.power(np.maximum(y, 0), 2-p)/((1-p) * (2-p)) -
+                       y * np.power(mu, 1-p)/(1-p) +
+                       np.power(mu, 2-p)/(2-p))
+        elif p == 0:
+            # Normal distribution, y_true and y_pred any real number
+            dev = (y - mu)**2
+        elif p < 1:
+            raise ValueError("Tweedie deviance is only defined for p<=0 and "
+                             "p>=1.")
+        elif p == 1:
+            # Poisson distribution
+            dev = 2 * (xlogy(y, y/mu) - y + mu)
         elif p == 2:
-            # GammaDistribution
-            return 2 * (np.log(mu/y) + y/mu - 1)
+            # Gamma distribution
+            dev = 2 * (np.log(mu/y) + y/mu - 1)
         else:
-            # return 2 * (np.maximum(y,0)**(2-p)/((1-p)*(2-p))
-            #    - y*mu**(1-p)/(1-p) + mu**(2-p)/(2-p))
-            return 2 * (np.power(np.maximum(y, 0), 2-p)/((1-p)*(2-p)) -
-                        y*np.power(mu, 1-p)/(1-p) + np.power(mu, 2-p)/(2-p))
+            dev = 2 * (np.power(y, 2-p)/((1-p) * (2-p)) -
+                       y * np.power(mu, 1-p)/(1-p) +
+                       np.power(mu, 2-p)/(2-p))
+        return dev
 
 
 class NormalDistribution(TweedieDistribution):
diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
index 2cba3d31ec84a..2f6c442c1f824 100644
--- a/sklearn/metrics/regression.py
+++ b/sklearn/metrics/regression.py
@@ -665,6 +665,7 @@ def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, p=0):
     >>> mean_tweedie_deviance(y_true, y_pred, p=1)
     1.4260...
     """
+    from ..linear_model._glm.distribution import TweedieDistribution
     y_type, y_true, y_pred, _ = _check_reg_targets(
         y_true, y_pred, None, dtype=[np.float64, np.float32])
     if y_type == 'continuous-multioutput':
@@ -681,12 +682,8 @@ def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, p=0):
         # 'Extreme stable', y_true any realy number, y_pred > 0
         if (y_pred <= 0).any():
             raise ValueError(message + "strictly positive y_pred.")
-        dev = 2 * (np.power(np.maximum(y_true, 0), 2-p)/((1-p) * (2-p)) -
-                   y_true * np.power(y_pred, 1-p)/(1-p) +
-                   np.power(y_pred, 2-p)/(2-p))
     elif p == 0:
-        # Normal distribution, y_true and y_pred any real number
-        dev = (y_true - y_pred)**2
+        pass
     elif p < 1:
         raise ValueError("Tweedie deviance is only defined for p<=0 and "
                          "p>=1.")
@@ -695,12 +692,10 @@ def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, p=0):
         if (y_true < 0).any() or (y_pred <= 0).any():
             raise ValueError(message + "non-negative y_true and strictly "
                              "positive y_pred.")
-        dev = 2 * (xlogy(y_true, y_true/y_pred) - y_true + y_pred)
     elif p == 2:
         # Gamma distribution, y_true and y_pred > 0
         if (y_true <= 0).any() or (y_pred <= 0).any():
             raise ValueError(message + "strictly positive y_true and y_pred.")
-        dev = 2 * (np.log(y_pred/y_true) + y_true/y_pred - 1)
     else:
         if p < 2:
             # 1 < p < 2 is Compound Poisson, y_true >= 0, y_pred > 0
@@ -712,9 +707,8 @@ def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, p=0):
                 raise ValueError(message + "strictly positive y_true and "
                                            "y_pred.")
 
-        dev = 2 * (np.power(y_true, 2-p)/((1-p) * (2-p)) -
-                   y_true * np.power(y_pred, 1-p)/(1-p) +
-                   np.power(y_pred, 2-p)/(2-p))
+    dist = TweedieDistribution(power=p)
+    dev = dist.unit_deviance(y_true, y_pred)
 
     return np.average(dev, weights=sample_weight)
 

From 816aa8f36f3f96a6616a7aac0b4d2583b71f0fc7 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 25 Jul 2019 07:54:28 +0200
Subject: [PATCH 097/269] WIP

---
 .../plot_poisson_regression_non_normal_loss.py  | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 0537704b2cf1f..6b12370a9433c 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -25,7 +25,6 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
-from scipy.special import xlogy
 
 from sklearn.compose import ColumnTransformer
 from sklearn.linear_model import GeneralizedLinearRegressor, LinearRegression
@@ -36,6 +35,7 @@
 from sklearn.ensemble import GradientBoostingRegressor
 
 from sklearn.metrics import mean_squared_error, mean_absolute_error
+from sklearn.metrics import mean_poisson_deviance
 
 
 def load_mtpl2(n_samples=100000):
@@ -140,13 +140,6 @@ def load_mtpl2(n_samples=100000):
 df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=2)
 
 
-def mean_poisson_deviance_score(y_true, y_pred, sample_weights=None):
-    y_true = np.atleast_1d(y_true)
-    y_pred = np.atleast_1d(y_pred)
-    dev = 2 * (xlogy(y_true, y_true/y_pred) - y_true + y_pred)
-    return np.average(dev, weights=sample_weights)
-
-
 eps = 1e-5
 print("MSE: %.3f" % mean_squared_error(
         df_test.Frequency.values, np.zeros(len(df_test)),
@@ -154,7 +147,7 @@ def mean_poisson_deviance_score(y_true, y_pred, sample_weights=None):
 print("MAE: %.3f" % mean_absolute_error(
         df_test.Frequency.values, np.zeros(len(df_test)),
         df_test.Exposure.values))
-print("mean Poisson deviance: %.3f" % mean_poisson_deviance_score(
+print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
         df_test.Frequency.values, eps + np.zeros(len(df_test)),
         df_test.Exposure.values))
 
@@ -175,7 +168,7 @@ def mean_poisson_deviance_score(y_true, y_pred, sample_weights=None):
 print("MSE: %.3f" % mean_absolute_error(
           df_test.Frequency.values, linregr.predict(X_test),
           df_test.Exposure.values))
-print("mean Poisson deviance: %.3f" % mean_poisson_deviance_score(
+print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
         df_test.Frequency.values, np.fmax(linregr.predict(X_test), eps),
         df_test.Exposure.values))
 
@@ -201,7 +194,7 @@ def mean_poisson_deviance_score(y_true, y_pred, sample_weights=None):
 print("MAE: %.3f" % mean_absolute_error(
         df_test.Frequency.values, glm_freq.predict(X_test),
         df_test.Exposure.values))
-print("mean Poisson deviance: %.3f" % mean_poisson_deviance_score(
+print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
         df_test.Frequency.values, glm_freq.predict(X_test),
         df_test.Exposure.values))
 
@@ -221,7 +214,7 @@ def mean_poisson_deviance_score(y_true, y_pred, sample_weights=None):
       df_test.Frequency.values, gbr.predict(X_test), df_test.Exposure.values))
 print("MAE: %.3f" % mean_absolute_error(
       df_test.Frequency.values, gbr.predict(X_test), df_test.Exposure.values))
-print("mean Poisson deviance: %.3f" % mean_poisson_deviance_score(
+print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
       df_test.Frequency.values, gbr.predict(X_test), df_test.Exposure.values))
 
 ##############################################################################

From 6500c81a354c13de43ce0956240893b21c3979fb Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 25 Jul 2019 16:47:13 +0200
Subject: [PATCH 098/269] Use Poisson deviance in examples

---
 .../linear_model/plot_poisson_regression_non_normal_loss.py  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 6b12370a9433c..ba9c0c8898fcd 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -184,7 +184,7 @@ def load_mtpl2(n_samples=100000):
 #
 # Next we fit the Poisson regressor on the target variable,
 
-glm_freq = GeneralizedLinearRegressor(family="poisson", alpha=0)
+glm_freq = GeneralizedLinearRegressor(family="poisson", alpha=0, max_iter=1000)
 glm_freq.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
 
 print("PoissonRegressor")
@@ -215,7 +215,8 @@ def load_mtpl2(n_samples=100000):
 print("MAE: %.3f" % mean_absolute_error(
       df_test.Frequency.values, gbr.predict(X_test), df_test.Exposure.values))
 print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
-      df_test.Frequency.values, gbr.predict(X_test), df_test.Exposure.values))
+      df_test.Frequency.values, np.fmax(gbr.predict(X_test), eps),
+      df_test.Exposure.values))
 
 ##############################################################################
 #

From 59a6d9dd2aeae4901f138f6d1c892c48c427f9a8 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 25 Jul 2019 18:22:26 +0200
Subject: [PATCH 099/269] Use PoissonRegressor and GammaRegressor in examples

---
 doc/modules/classes.rst                       |   2 +
 ...plot_poisson_regression_non_normal_loss.py |   4 +-
 ...lot_tweedie_regression_insurance_claims.py |  15 +-
 sklearn/linear_model/__init__.py              |   2 +-
 sklearn/linear_model/_glm/__init__.py         |   4 +-
 sklearn/linear_model/_glm/glm.py              | 146 +++++++++++++++---
 sklearn/linear_model/_glm/tests/test_glm.py   |   3 +-
 7 files changed, 145 insertions(+), 31 deletions(-)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 6346c0c65fadc..177cd0780f9be 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -750,6 +750,7 @@ Kernels:
    linear_model.BayesianRidge
    linear_model.ElasticNet
    linear_model.ElasticNetCV
+   linear_model.GammaRegressor
    linear_model.GeneralizedLinearRegressor
    linear_model.HuberRegressor
    linear_model.Lars
@@ -771,6 +772,7 @@ Kernels:
    linear_model.PassiveAggressiveClassifier
    linear_model.PassiveAggressiveRegressor
    linear_model.Perceptron
+   linear_model.PoissonRegressor
    linear_model.RANSACRegressor
    linear_model.Ridge
    linear_model.RidgeClassifier
diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index ba9c0c8898fcd..713866a712aea 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -27,7 +27,7 @@
 import pandas as pd
 
 from sklearn.compose import ColumnTransformer
-from sklearn.linear_model import GeneralizedLinearRegressor, LinearRegression
+from sklearn.linear_model import PoissonRegressor, LinearRegression
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
@@ -184,7 +184,7 @@ def load_mtpl2(n_samples=100000):
 #
 # Next we fit the Poisson regressor on the target variable,
 
-glm_freq = GeneralizedLinearRegressor(family="poisson", alpha=0, max_iter=1000)
+glm_freq = PoissonRegressor(alpha=0, max_iter=1000)
 glm_freq.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
 
 print("PoissonRegressor")
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index fba1cc42e20a7..55d0ca24ce477 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -39,7 +39,8 @@
 import pandas as pd
 
 from sklearn.compose import ColumnTransformer
-from sklearn.linear_model import GeneralizedLinearRegressor
+from sklearn.linear_model import PoissonRegressor, GammaRegressor
+from sklearn.linear_model._glm import GeneralizedLinearRegressor
 from sklearn.linear_model._glm.distribution import TweedieDistribution
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
@@ -192,7 +193,7 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 
 # Some of the features are colinear, we use a weak penalization to avoid
 # numerical issues.
-glm_freq = GeneralizedLinearRegressor(family="poisson", alpha=1e-2)
+glm_freq = PoissonRegressor(alpha=1e-2)
 glm_freq.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
 
 
@@ -330,7 +331,7 @@ def score_estimator(
 mask_train = df_train["ClaimAmount"] > 0
 mask_test = df_test["ClaimAmount"] > 0
 
-glm_sev = GeneralizedLinearRegressor(family="gamma")
+glm_sev = GammaRegressor()
 
 glm_sev.fit(
     X_train[mask_train.values],
@@ -464,12 +465,16 @@ def score(self, X, y, sample_weight=None):
 # this takes a while
 params = {
     "family": [
-        TweedieDistribution(power=power) for power in np.linspace(1, 2, 8)
+        TweedieDistribution(power=power)
+        # exclude upper bound as power=2 does not support null y samples.
+        for power in np.linspace(1 + 1e-4, 2 - 1e-4, 8)
     ]
 }
 
+
 glm_total = GridSearchCV(
-    GeneralizedLinearRegressor(), cv=3, param_grid=params, n_jobs=-1
+    GeneralizedLinearRegressor(tol=1e-3, max_iter=500), cv=3,
+    param_grid=params, n_jobs=-1
 )
 glm_total.fit(
     X_train, df_train["ClaimAmount"], sample_weight=df_train["Exposure"]
diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index e8f63e95cdfa8..d4227a126e5ec 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -18,7 +18,7 @@
                                  lasso_path, enet_path, MultiTaskLasso,
                                  MultiTaskElasticNet, MultiTaskElasticNetCV,
                                  MultiTaskLassoCV)
-from ._glm import (GeneralizedLinearRegressor, PoissonRegressor)
+from ._glm import (GeneralizedLinearRegressor, PoissonRegressor, GammaRegressor)
 from .huber import HuberRegressor
 from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
 from .stochastic_gradient import SGDClassifier, SGDRegressor
diff --git a/sklearn/linear_model/_glm/__init__.py b/sklearn/linear_model/_glm/__init__.py
index ac0f2c206ffe0..a7d4b67817730 100644
--- a/sklearn/linear_model/_glm/__init__.py
+++ b/sklearn/linear_model/_glm/__init__.py
@@ -1,5 +1,5 @@
 # License: BSD 3 clause
 
-from .glm import (GeneralizedLinearRegressor, PoissonRegressor)
+from .glm import GeneralizedLinearRegressor, PoissonRegressor, GammaRegressor
 
-__all__ = ['GeneralizedLinearRegressor', 'PoissonRegressor']
+__all__ = ["GeneralizedLinearRegressor", "PoissonRegressor", "GammaRegressor"]
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index ea3e4964529bf..12486e188951a 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -24,7 +24,6 @@
         Link,
         IdentityLink,
         LogLink,
-        LogitLink,
 )
 
 
@@ -64,7 +63,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         The distributional assumption of the GLM, i.e. which distribution from
         the EDM, specifies the loss function to be minimized.
 
-    link : {'auto', 'identity', 'log', 'logit'} or an instance of class Link, \
+    link : {'auto', 'identity', 'log'} or an instance of class Link, \
             optional (default='auto')
         The link function of the GLM, i.e. mapping from linear predictor
         (X*coef) to expectation (mu). Option 'auto' sets the link depending on
@@ -160,11 +159,13 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     References
     ----------
-    For the coordinate descent implementation:
-        * Guo-Xun Yuan, Chia-Hua Ho, Chih-Jen Lin
-          An Improved GLMNET for L1-regularized Logistic Regression,
-          Journal of Machine Learning Research 13 (2012) 1999-2030
-          https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
+    .. McCullagh, Peter; Nelder, John (1989). Generalized Linear Models,
+       Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
+
+    .. Jørgensen, B. (1992). The theory of exponential dispersion models
+       and analysis of deviance. Monografias de matemática, no. 51.  See also
+       `Exponential dispersion model.
+       <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
     """
     def __init__(self, alpha=1.0,
                  fit_intercept=True, family='normal', link='auto',
@@ -240,12 +241,10 @@ def fit(self, X, y, sample_weight=None):
                 self._link_instance = IdentityLink()
             elif self.link == 'log':
                 self._link_instance = LogLink()
-            elif self.link == 'logit':
-                self._link_instance = LogitLink()
             else:
                 raise ValueError(
                     "The link must be an instance of class Link or "
-                    "an element of ['auto', 'identity', 'log', 'logit']; "
+                    "an element of ['auto', 'identity', 'log']; "
                     "got (link={0})".format(self.link))
 
         if not isinstance(self.alpha, numbers.Number) or self.alpha < 0:
@@ -606,18 +605,127 @@ class PoissonRegressor(GeneralizedLinearRegressor):
 
     References
     ----------
-    For the coordinate descent implementation:
-        * Guo-Xun Yuan, Chia-Hua Ho, Chih-Jen Lin
-          An Improved GLMNET for L1-regularized Logistic Regression,
-          Journal of Machine Learning Research 13 (2012) 1999-2030
-          https://www.csie.ntu.edu.tw/~cjlin/papers/l1_glmnet/long-glmnet.pdf
+    .. McCullagh, Peter; Nelder, John (1989). Generalized Linear Models,
+       Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
+
+    .. Jørgensen, B. (1992). The theory of exponential dispersion models
+       and analysis of deviance. Monografias de matemática, no. 51.  See also
+       `Exponential dispersion model.
+       <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
     """
-    def __init__(self, alpha=1.0, fit_intercept=True, fit_dispersion=None,
-                 solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
-                 copy_X=True, check_input=True, verbose=0):
+    def __init__(self, alpha=1.0, fit_intercept=True, link='log',
+                 fit_dispersion=None, solver='lbfgs', max_iter=100, tol=1e-4,
+                 warm_start=False, copy_X=True, check_input=True, verbose=0):
+
+        super().__init__(alpha=alpha, fit_intercept=fit_intercept,
+                         family="poisson", link=link,
+                         fit_dispersion=fit_dispersion, solver=solver,
+                         max_iter=max_iter, tol=tol, warm_start=warm_start,
+                         copy_X=copy_X, verbose=verbose)
+
+
+class GammaRegressor(GeneralizedLinearRegressor):
+    """Regression with the response variable y following a Gamma distribution
+
+    GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
+    fitting and predicting the mean of the target y as mu=h(X*w).
+    The fit minimizes the following objective function with L2 regularization::
+
+            1/(2*sum(s)) * deviance(y, h(X*w); s) + 1/2 * alpha * ||w||_2^2
+
+    with inverse link function h and s=sample_weight. Note that for
+    ``sample_weight=None``, one has s_i=1 and sum(s)=n_samples).
+
+    Read more in the :ref:`User Guide <Generalized_linear_regression>`.
+
+    Parameters
+    ----------
+    alpha : float, optional (default=1)
+        Constant that multiplies the penalty terms and thus determines the
+        regularization strength.
+        See the notes for the exact mathematical meaning of this
+        parameter.``alpha = 0`` is equivalent to unpenalized GLMs. In this
+        case, the design matrix X must have full column rank
+        (no collinearities).
+
+    fit_intercept : boolean, optional (default=True)
+        Specifies if a constant (a.k.a. bias or intercept) should be
+        added to the linear predictor (X*coef+intercept).
+
+    fit_dispersion : {None, 'chisqr', 'deviance'}, optional (default=None)
+        Method for estimation of the dispersion parameter phi. Whether to use
+        the chi squared statistic or the deviance statistic. If None, the
+        dispersion is not estimated.
+
+    solver : {'lbfgs'}, optional (default='lbfgs')
+        Algorithm to use in the optimization problem:
+
+        'lbfgs'
+            Calls scipy's L-BFGS-B optimizer.
+
+    max_iter : int, optional (default=100)
+        The maximal number of iterations for solver algorithms.
+
+    tol : float, optional (default=1e-4)
+        Stopping criterion. For the lbfgs solver,
+        the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
+        where ``g_i`` is the i-th component of the gradient (derivative) of
+        the objective function.
+
+    warm_start : boolean, optional (default=False)
+        If set to ``True``, reuse the solution of the previous call to ``fit``
+        as initialization for ``coef_`` and ``intercept_`` .
+
+    copy_X : boolean, optional, (default=True)
+        If ``True``, X will be copied; else, it may be overwritten.
+
+    verbose : int, optional (default=0)
+        For the lbfgs solver set verbose to any positive number for verbosity.
+
+    Attributes
+    ----------
+    coef_ : array, shape (n_features,)
+        Estimated coefficients for the linear predictor (X*coef_+intercept_) in
+        the GLM.
+
+    intercept_ : float
+        Intercept (a.k.a. bias) added to linear predictor.
+
+    dispersion_ : float
+        The dispersion parameter :math:`\\phi` if ``fit_dispersion`` was set.
+
+    n_iter_ : int
+        Actual number of iterations used in solver.
+
+    Notes
+    -----
+    The fit itself does not need Y to be from an EDM, but only assumes
+    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
+    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
+    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
+    :ref:`User Guide <Generalized_linear_regression>`.
+
+    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
+    minimizing the deviance plus penalty term, which is equivalent to
+    (penalized) maximum likelihood estimation.
+
+
+    References
+    ----------
+    .. McCullagh, Peter; Nelder, John (1989). Generalized Linear Models,
+       Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
+
+    .. Jørgensen, B. (1992). The theory of exponential dispersion models
+       and analysis of deviance. Monografias de matemática, no. 51.  See also
+       `Exponential dispersion model.
+       <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
+    """
+    def __init__(self, alpha=1.0, fit_intercept=True, link='log',
+                 fit_dispersion=None, solver='lbfgs', max_iter=100, tol=1e-4,
+                 warm_start=False, copy_X=True, check_input=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
-                         family="poisson", link='log',
+                         family="gamma", link=link,
                          fit_dispersion=fit_dispersion, solver=solver,
                          max_iter=max_iter, tol=tol, warm_start=warm_start,
                          copy_X=copy_X, verbose=verbose)
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 2909c85ba8c40..089f251d77049 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -77,8 +77,7 @@ def test_glm_family_argument(f, fam):
 
 @pytest.mark.parametrize('l, link',
                          [('identity', IdentityLink()),
-                          ('log', LogLink()),
-                          ('logit', LogitLink())])
+                          ('log', LogLink())])
 def test_glm_link_argument(l, link):
     """Test GLM link argument set as string."""
     y = np.array([0.1, 0.5])  # in range of all distributions

From 03a8a2d06acaf1b2d57bdef51991fecb7482cf75 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 26 Jul 2019 11:12:48 +0200
Subject: [PATCH 100/269] Improve documentation wording

---
 doc/modules/linear_model.rst                  | 41 ++++++++++---------
 ...plot_poisson_regression_non_normal_loss.py |  5 +--
 sklearn/linear_model/__init__.py              |  6 ++-
 sklearn/linear_model/_glm/tests/test_glm.py   |  1 -
 4 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index b6f7c2b82c1f5..0eaaab2ecded5 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -893,7 +893,7 @@ to warm-starting (see :term:`Glossary <warm_start>`).
 Generalized Linear Regression
 =============================
 
-:class:`GeneralizedLinearRegressor` generalizes linear models in two ways
+Generalized Linear Models (GLM) extend linear models in two ways
 [10]_. First, the predicted values :math:`\hat{y}` are linked to a linear
 combination of the input variables :math:`X` via an inverse link function
 :math:`h` as
@@ -901,13 +901,31 @@ combination of the input variables :math:`X` via an inverse link function
 .. math::    \hat{y}(w, x) = h(x^\top w) = h(w_0 + w_1 x_1 + ... + w_p x_p).
 
 Secondly, the squared loss function is replaced by the deviance :math:`D` of an
-exponential dispersion model (EDM) [11]_. The objective function being
-minimized becomes
+exponential dispersion model (EDM) [11]_. The minimized objective function is
+the penalized negative log likelihood,
 
 .. math::    \frac{1}{2 \sum s_i}D(y, \hat{y}; s) +\frac{\alpha}{2} ||w||_2
 
 with sample weights :math:`s`, and L2 regularization penalty :math:`\alpha`.
 
+The deviance is defined by the log of the :math:`\mathrm{EDM}(\mu, \phi)`
+likelihood as
+
+.. math::     d(y, \mu) = -2\phi\cdot
+              \left(loglike(y,\mu,\phi)
+              - loglike(y,y,\phi)\right) \\
+              D(y, \mu; s) = \sum_i s_i \cdot d(y_i, \mu_i)
+
+===================================== ===============================  ================================= ============================================
+Distribution                          Target Domain                    Variance Function :math:`v(\mu)`  Unit Deviance :math:`d(y, \mu)`
+===================================== ===============================  ================================= ============================================
+Normal ("normal")                     :math:`y \in (-\infty, \infty)`  :math:`1`                         :math:`(y-\mu)^2`
+Poisson ("poisson")                   :math:`y \in [0, \infty)`        :math:`\mu`                       :math:`2(y\log\frac{y}{\mu}-y+\mu)`
+Gamma ("gamma")                       :math:`y \in (0, \infty)`        :math:`\mu^2`                     :math:`2(\log\frac{\mu}{y}+\frac{y}{\mu}-1)`
+Inverse Gaussian ("inverse.gaussian") :math:`y \in (0, \infty)`        :math:`\mu^3`                     :math:`\frac{(y-\mu)^2}{y\mu^2}`
+===================================== ===============================  ================================= ============================================
+
+
 In the following use cases, a loss different from the squared loss might be
 appropriate,
 
@@ -968,23 +986,6 @@ The objective function (the penalized negative log likelihood) is
 independent of :math:`\phi` and is minimized with respect to the
 coefficients :math:`w`.
 
-The deviance is defined by the log of the :math:`\mathrm{EDM}(\mu, \phi)`
-likelihood as
-
-.. math::     d(y, \mu) = -2\phi\cdot
-              \left(loglike(y,\mu,\phi)
-              - loglike(y,y,\phi)\right) \\
-              D(y, \mu; s) = \sum_i s_i \cdot d(y_i, \mu_i)
-
-===================================== ===============================  ================================= ============================================
-Distribution                          Target Domain                    Variance Function :math:`v(\mu)`  Unit Deviance :math:`d(y, \mu)`
-===================================== ===============================  ================================= ============================================
-Normal ("normal")                     :math:`y \in (-\infty, \infty)`  :math:`1`                         :math:`(y-\mu)^2`
-Poisson ("poisson")                   :math:`y \in [0, \infty)`        :math:`\mu`                       :math:`2(y\log\frac{y}{\mu}-y+\mu)`
-Gamma ("gamma")                       :math:`y \in (0, \infty)`        :math:`\mu^2`                     :math:`2(\log\frac{\mu}{y}+\frac{y}{\mu}-1)`
-Inverse Gaussian ("inverse.gaussian") :math:`y \in (0, \infty)`        :math:`\mu^3`                     :math:`\frac{(y-\mu)^2}{y\mu^2}`
-===================================== ===============================  ================================= ============================================
-
 Two remarks:
 
 * The deviances for at least Normal, Poisson and Gamma distributions are
diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 713866a712aea..695a7c7594472 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -4,9 +4,8 @@
 ======================================
 
 This example illustrates the use of linear Poisson regression
-on the French Motor Third-Party Liability Claims dataset [1] and compare
-it with learning models with least squared error.
-
+on the French Motor Third-Party Liability Claims dataset [1] and compares
+it with models learned with least squared error.
 
 We start by defining a few helper functions for loading the data and
 visualizing results.
diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index d4227a126e5ec..63a52a9c9898e 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -18,7 +18,8 @@
                                  lasso_path, enet_path, MultiTaskLasso,
                                  MultiTaskElasticNet, MultiTaskElasticNetCV,
                                  MultiTaskLassoCV)
-from ._glm import (GeneralizedLinearRegressor, PoissonRegressor, GammaRegressor)
+from ._glm import (GeneralizedLinearRegressor, PoissonRegressor,
+                   GammaRegressor)
 from .huber import HuberRegressor
 from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
 from .stochastic_gradient import SGDClassifier, SGDRegressor
@@ -81,4 +82,5 @@
            'ridge_regression',
            'RANSACRegressor',
            'GeneralizedLinearRegressor',
-           'PoissonRegressor']
+           'PoissonRegressor',
+           'GammaRegressor']
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 089f251d77049..542c18b65cad2 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -11,7 +11,6 @@
 from sklearn.linear_model._glm.link import (
     IdentityLink,
     LogLink,
-    LogitLink,
 )
 from sklearn.linear_model._glm.distribution import (
     TweedieDistribution,

From bbf7f38fa69a9335397fa1cdfd73d153cbc3670d Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 26 Jul 2019 11:29:54 +0200
Subject: [PATCH 101/269] Use dataframe OpenML fetcher

---
 ...plot_poisson_regression_non_normal_loss.py | 19 ++++++++-----------
 ...lot_tweedie_regression_insurance_claims.py | 19 ++++++++-----------
 2 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 695a7c7594472..347a424d8f4fe 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -25,6 +25,7 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 
+from sklearn.datasets import fetch_openml
 from sklearn.compose import ColumnTransformer
 from sklearn.linear_model import PoissonRegressor, LinearRegression
 from sklearn.model_selection import train_test_split
@@ -46,20 +47,16 @@ def load_mtpl2(n_samples=100000):
       number of samples to select (for faster run time).
     """
 
-    # Note: this should use the OpenML DataFrame fetcher in the future
-    df_freq = pd.read_csv(
-        "https://www.openml.org/data/get_csv/20649148/freMTPL2freq.csv",
-        dtype={"IDpol": np.int},
-        index_col=0,
-    )
+    # freMTPL2freq dataset from https://www.openml.org/d/41214
+    df_freq = fetch_openml(data_id=41214, as_frame=True)['data']
+    df_freq['IDpol'] = df_freq['IDpol'].astype(np.int)
+    df_freq.set_index('IDpol', inplace=True)
 
-    df_sev = pd.read_csv(
-        "https://www.openml.org/data/get_csv/20649149/freMTPL2sev.arff",
-        index_col=0,
-    )
+    # freMTPL2sev dataset from https://www.openml.org/d/41215
+    df_sev = fetch_openml(data_id=41215, as_frame=True)['data']
 
     # sum ClaimAmount over identical IDs
-    df_sev = df_sev.groupby(level=0).sum()
+    df_sev = df_sev.groupby('IDpol').sum()
 
     df = df_freq.join(df_sev, how="left")
     df["ClaimAmount"].fillna(0, inplace=True)
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 55d0ca24ce477..418127699dc1a 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -38,6 +38,7 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 
+from sklearn.datasets import fetch_openml
 from sklearn.compose import ColumnTransformer
 from sklearn.linear_model import PoissonRegressor, GammaRegressor
 from sklearn.linear_model._glm import GeneralizedLinearRegressor
@@ -59,20 +60,16 @@ def load_mtpl2(n_samples=100000):
       number of samples to select (for faster run time).
     """
 
-    # Note: this should use the OpenML DataFrame fetcher in the future
-    df_freq = pd.read_csv(
-        "https://www.openml.org/data/get_csv/20649148/freMTPL2freq.csv",
-        dtype={"IDpol": np.int},
-        index_col=0,
-    )
+    # freMTPL2freq dataset from https://www.openml.org/d/41214
+    df_freq = fetch_openml(data_id=41214, as_frame=True)['data']
+    df_freq['IDpol'] = df_freq['IDpol'].astype(np.int)
+    df_freq.set_index('IDpol', inplace=True)
 
-    df_sev = pd.read_csv(
-        "https://www.openml.org/data/get_csv/20649149/freMTPL2sev.arff",
-        index_col=0,
-    )
+    # freMTPL2sev dataset from https://www.openml.org/d/41215
+    df_sev = fetch_openml(data_id=41215, as_frame=True)['data']
 
     # sum ClaimAmount over identical IDs
-    df_sev = df_sev.groupby(level=0).sum()
+    df_sev = df_sev.groupby('IDpol').sum()
 
     df = df_freq.join(df_sev, how="left")
     df["ClaimAmount"].fillna(0, inplace=True)

From 49a3a8e116322c59f56b9d0b3fdb77028b144dd3 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 26 Jul 2019 12:00:26 +0200
Subject: [PATCH 102/269] Refactor distibution bounds

---
 sklearn/linear_model/_glm/distribution.py     | 111 +++++-------------
 .../_glm/tests/test_distribution.py           |  21 +++-
 2 files changed, 50 insertions(+), 82 deletions(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index b99e1b40b2871..e6635d2003850 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -6,6 +6,7 @@
 # License: BSD 3 clause
 
 from abc import ABCMeta, abstractmethod
+from collections import namedtuple
 import numbers
 
 import numpy as np
@@ -20,6 +21,10 @@ def _safe_lin_pred(X, coef):
         return X @ coef
 
 
+DistributionBoundary = namedtuple("DistributionBoundary",
+                                  ("value", "inclusive"))
+
+
 class ExponentialDispersionModel(metaclass=ABCMeta):
     r"""Base class for reproductive Exponential Dispersion Models (EDM).
 
@@ -35,13 +40,6 @@ class ExponentialDispersionModel(metaclass=ABCMeta):
     unit variance :math:`v(\mu)` and
     unit deviance :math:`d(y,\mu)`.
 
-    Attributes
-    ----------
-    lower_bound
-    upper_bound
-    include_lower_bound
-    include_upper_bound
-
     Methods
     -------
     deviance
@@ -52,55 +50,33 @@ class ExponentialDispersionModel(metaclass=ABCMeta):
     unit_variance
     unit_variance_derivative
 
-    _mu_deviance_derivative
-
     References
     ----------
-
     https://en.wikipedia.org/wiki/Exponential_dispersion_model.
     """
-    @property
-    def lower_bound(self):
-        """Get the lower bound of values for Y~EDM."""
-        return self._lower_bound
 
-    @property
-    def upper_bound(self):
-        """Get the upper bound of values for Y~EDM."""
-        return self._upper_bound
-
-    @property
-    def include_lower_bound(self):
-        """Get True if lower bound for y is included: y >= lower_bound."""
-        return self._include_lower_bound
-
-    @property
-    def include_upper_bound(self):
-        """Get True if upper bound for y is included: y <= upper_bound."""
-        return self._include_upper_bound
-
-    def in_y_range(self, x):
-        """Returns ``True`` if x is in the valid range of Y~EDM.
+    def in_y_range(self, y):
+        """Returns ``True`` if y is in the valid range of Y~EDM.
 
         Parameters
         ----------
-        x : array, shape (n_samples,)
+        y : array, shape (n_samples,)
             Target values.
         """
-        if self.include_lower_bound:
-            if self.include_upper_bound:
-                return np.logical_and(np.greater_equal(x, self.lower_bound),
-                                      np.less_equal(x, self.upper_bound))
-            else:
-                return np.logical_and(np.greater_equal(x, self.lower_bound),
-                                      np.less(x, self.upper_bound))
+        if hasattr(self, '_upper_bound'):
+            # All currently supported distributions have an upper bound at
+            # +inf, however this may need to be implemented for other
+            # distributions
+            raise NotImplementedError
+
+        if not isinstance(self._lower_bound, DistributionBoundary):
+            raise TypeError('_lower_bound attribute must be of type '
+                            'DistributionBoundary')
+
+        if self._lower_bound.inclusive:
+            return np.greater_equal(y, self._lower_bound.value)
         else:
-            if self.include_upper_bound:
-                return np.logical_and(np.greater(x, self.lower_bound),
-                                      np.less_equal(x, self.upper_bound))
-            else:
-                return np.logical_and(np.greater(x, self.lower_bound),
-                                      np.less(x, self.upper_bound))
+            return np.greater(y, self._lower_bound.value)
 
     @abstractmethod
     def unit_variance(self, mu):
@@ -265,42 +241,17 @@ def power(self, power):
             raise TypeError('power must be a real number, input was {0}'
                             .format(power))
 
-        self._upper_bound = np.Inf
-        self._include_upper_bound = False
-        if power < 0:
-            # Extreme Stable
-            self._lower_bound = -np.Inf
-            self._include_lower_bound = False
-        elif power == 0:
-            # NormalDistribution
-            self._lower_bound = -np.Inf
-            self._include_lower_bound = False
-        elif (power > 0) and (power < 1):
+        if power <= 0:
+            # Extreme Stable or Normal distribution
+            self._lower_bound = DistributionBoundary(-np.Inf, inclusive=False)
+        elif 0 < power < 1:
             raise ValueError('For 0<power<1, no distribution exists.')
-        elif power == 1:
-            # PoissonDistribution
-            self._lower_bound = 0
-            self._include_lower_bound = True
-        elif (power > 1) and (power < 2):
-            # Compound Poisson
-            self._lower_bound = 0
-            self._include_lower_bound = True
-        elif power == 2:
-            # GammaDistribution
-            self._lower_bound = 0
-            self._include_lower_bound = False
-        elif (power > 2) and (power < 3):
-            # Positive Stable
-            self._lower_bound = 0
-            self._include_lower_bound = False
-        elif power == 3:
-            # InverseGaussianDistribution
-            self._lower_bound = 0
-            self._include_lower_bound = False
-        elif power > 3:
-            # Positive Stable
-            self._lower_bound = 0
-            self._include_lower_bound = False
+        elif 1 <= power < 2:
+            # Poisson or Compound Poisson distribution
+            self._lower_bound = DistributionBoundary(0, inclusive=True)
+        elif power >= 2:
+            # Gamma, Positive Stable, Inverse Gaussian distributions
+            self._lower_bound = DistributionBoundary(0, inclusive=False)
         else:  # pragma: no cover
             # this branch should be unreachable.
             raise ValueError
diff --git a/sklearn/linear_model/_glm/tests/test_distribution.py b/sklearn/linear_model/_glm/tests/test_distribution.py
index f984077bed5cb..f9c329f35caa4 100644
--- a/sklearn/linear_model/_glm/tests/test_distribution.py
+++ b/sklearn/linear_model/_glm/tests/test_distribution.py
@@ -13,6 +13,7 @@
     TweedieDistribution,
     NormalDistribution, PoissonDistribution,
     GammaDistribution, InverseGaussianDistribution,
+    DistributionBoundary
 )
 
 
@@ -30,6 +31,19 @@ def test_family_bounds(family, expected):
     assert_array_equal(result, expected)
 
 
+def test_invalid_distribution_bound():
+    dist = TweedieDistribution()
+    dist._lower_bound = 0
+    with pytest.raises(TypeError,
+                       match="must be of type DistributionBoundary"):
+        dist.in_y_range([-1, 0, 1])
+
+    dist = TweedieDistribution()
+    dist._upper_bound = None
+    with pytest.raises(NotImplementedError):
+        dist.in_y_range([-1, 0, 1])
+
+
 def test_tweedie_distribution_power():
     with pytest.raises(ValueError, match="no distribution exists"):
         TweedieDistribution(power=0.5)
@@ -42,9 +56,12 @@ def test_tweedie_distribution_power():
         dist.power = 1j
 
     dist = TweedieDistribution()
-    assert dist._include_lower_bound is False
+    assert isinstance(dist._lower_bound, DistributionBoundary)
+
+    assert dist._lower_bound.inclusive is False
     dist.power = 1
-    assert dist._include_lower_bound is True
+    assert dist._lower_bound.value == 0.0
+    assert dist._lower_bound.inclusive is True
 
 
 @pytest.mark.parametrize(

From 228e8c8633d3bae7bbe9a36fa3fccb97567fd653 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 26 Jul 2019 15:13:58 +0200
Subject: [PATCH 103/269] Move deviance checks under destribution

---
 sklearn/linear_model/_glm/distribution.py     | 70 +++++++++++++++++--
 .../_glm/tests/test_distribution.py           |  3 +-
 sklearn/metrics/regression.py                 | 35 +---------
 sklearn/metrics/tests/test_regression.py      | 12 ++--
 4 files changed, 74 insertions(+), 46 deletions(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index e6635d2003850..c30996662114e 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -113,7 +113,7 @@ def unit_variance_derivative(self, mu):
         pass  # pragma: no cover
 
     @abstractmethod
-    def unit_deviance(self, y, mu):
+    def unit_deviance(self, y, mu, check_input=False):
         r"""Compute the unit deviance.
 
         The unit_deviance :math:`d(y,\mu)` can be defined by the
@@ -128,6 +128,14 @@ def unit_deviance(self, y, mu):
 
         mu : array, shape (n_samples,)
             Predicted mean.
+
+        check_input : bool, default=False
+            If True raise an exception on invalid y or mu values, otherwise
+            they will be propagated as NaN.
+        Returns
+        -------
+        deviance: array, shape (n_samples,)
+            Computed deviance
         """
         pass  # pragma: no cover
 
@@ -245,7 +253,8 @@ def power(self, power):
             # Extreme Stable or Normal distribution
             self._lower_bound = DistributionBoundary(-np.Inf, inclusive=False)
         elif 0 < power < 1:
-            raise ValueError('For 0<power<1, no distribution exists.')
+            raise ValueError('Tweedie distribution is only defined for p<=0 '
+                             'and p>=1.')
         elif 1 <= power < 2:
             # Poisson or Compound Poisson distribution
             self._lower_bound = DistributionBoundary(0, inclusive=True)
@@ -279,15 +288,66 @@ def unit_variance_derivative(self, mu):
         """
         return self.power * np.power(mu, self.power - 1)
 
-    def unit_deviance(self, y, mu):
+    def unit_deviance(self, y, mu, check_input=False):
+        r"""Compute the unit deviance.
+
+        The unit_deviance :math:`d(y,\mu)` can be defined by the
+        log-likelihood as
+        :math:`d(y,\mu) = -2\phi\cdot
+        \left(loglike(y,\mu,\phi) - loglike(y,y,\phi)\right).`
+
+        Parameters
+        ----------
+        y : array, shape (n_samples,)
+            Target values.
+
+        mu : array, shape (n_samples,)
+            Predicted mean.
+
+        check_input : bool, default=False
+            If True raise an exception on invalid y or mu values, otherwise
+            they will be propagated as NaN.
+        Returns
+        -------
+        deviance: array, shape (n_samples,)
+            Computed deviance
+        """
         p = self.power
+
+        if check_input:
+            message = ("Mean Tweedie deviance error with p={} can only be "
+                       "used on ".format(p))
+            if p < 0:
+                # 'Extreme stable', y any realy number, mu > 0
+                if (mu <= 0).any():
+                    raise ValueError(message + "strictly positive mu.")
+            elif p == 0:
+                # Normal, y and mu can be any real number
+                pass
+            elif 0 < p < 1:
+                raise ValueError("Tweedie deviance is only defined for p<=0 "
+                                 "and p>=1.")
+            elif 1 <= p < 2:
+                # Poisson and Compount poisson distribution, y >= 0, mu > 0
+                if (y < 0).any() or (mu <= 0).any():
+                    raise ValueError(message + "non-negative y and strictly "
+                                     "positive mu.")
+            elif p >= 2:
+                # Gamma and Extreme stable distribution, y and mu > 0
+                if (y <= 0).any() or (mu <= 0).any():
+                    raise ValueError(message + "strictly positive y and mu.")
+            else:  # pragma: nocover
+                # Unreachable statement
+                raise ValueError
+
         if p < 0:
-            # 'Extreme stable', y_true any realy number, y_pred > 0
+            # 'Extreme stable', y any realy number, mu > 0
             dev = 2 * (np.power(np.maximum(y, 0), 2-p)/((1-p) * (2-p)) -
                        y * np.power(mu, 1-p)/(1-p) +
                        np.power(mu, 2-p)/(2-p))
+
         elif p == 0:
-            # Normal distribution, y_true and y_pred any real number
+            # Normal distribution, y and mu any real number
             dev = (y - mu)**2
         elif p < 1:
             raise ValueError("Tweedie deviance is only defined for p<=0 and "
diff --git a/sklearn/linear_model/_glm/tests/test_distribution.py b/sklearn/linear_model/_glm/tests/test_distribution.py
index f9c329f35caa4..82e493b7a2149 100644
--- a/sklearn/linear_model/_glm/tests/test_distribution.py
+++ b/sklearn/linear_model/_glm/tests/test_distribution.py
@@ -45,7 +45,8 @@ def test_invalid_distribution_bound():
 
 
 def test_tweedie_distribution_power():
-    with pytest.raises(ValueError, match="no distribution exists"):
+    msg = "distribution is only defined for p<=0 and p>=1"
+    with pytest.raises(ValueError, match=msg):
         TweedieDistribution(power=0.5)
 
     with pytest.raises(TypeError, match="must be a real number"):
diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
index 2f6c442c1f824..538fd7eec4631 100644
--- a/sklearn/metrics/regression.py
+++ b/sklearn/metrics/regression.py
@@ -22,9 +22,7 @@
 #          Christian Lorentzen <lorentzen.ch@googlemail.com>
 # License: BSD 3 clause
 
-
 import numpy as np
-from scipy.special import xlogy
 import warnings
 
 from ..utils.validation import (check_array, check_consistent_length,
@@ -676,39 +674,8 @@ def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, p=0):
         sample_weight = column_or_1d(sample_weight)
         sample_weight = sample_weight[:, np.newaxis]
 
-    message = ("Mean Tweedie deviance error with p={} can only be used on "
-               .format(p))
-    if p < 0:
-        # 'Extreme stable', y_true any realy number, y_pred > 0
-        if (y_pred <= 0).any():
-            raise ValueError(message + "strictly positive y_pred.")
-    elif p == 0:
-        pass
-    elif p < 1:
-        raise ValueError("Tweedie deviance is only defined for p<=0 and "
-                         "p>=1.")
-    elif p == 1:
-        # Poisson distribution, y_true >= 0, y_pred > 0
-        if (y_true < 0).any() or (y_pred <= 0).any():
-            raise ValueError(message + "non-negative y_true and strictly "
-                             "positive y_pred.")
-    elif p == 2:
-        # Gamma distribution, y_true and y_pred > 0
-        if (y_true <= 0).any() or (y_pred <= 0).any():
-            raise ValueError(message + "strictly positive y_true and y_pred.")
-    else:
-        if p < 2:
-            # 1 < p < 2 is Compound Poisson, y_true >= 0, y_pred > 0
-            if (y_true < 0).any() or (y_pred <= 0).any():
-                raise ValueError(message + "non-negative y_true and strictly "
-                                           "positive y_pred.")
-        else:
-            if (y_true <= 0).any() or (y_pred <= 0).any():
-                raise ValueError(message + "strictly positive y_true and "
-                                           "y_pred.")
-
     dist = TweedieDistribution(power=p)
-    dev = dist.unit_deviance(y_true, y_pred)
+    dev = dist.unit_deviance(y_true, y_pred, check_input=True)
 
     return np.average(dev, weights=sample_weight)
 
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index 526c27f0a036c..d46bca1301b1e 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -101,35 +101,35 @@ def test_regression_metrics_at_limits():
     assert_allclose(mean_tweedie_deviance([0], [1.], p=p),
                     2./(2.-p), rtol=1e-3)
     with pytest.raises(ValueError,
-                       match="can only be used on strictly positive y_pred."):
+                       match="can only be used on strictly positive mu."):
         mean_tweedie_deviance([0.], [0.], p=p)
     assert_almost_equal(mean_tweedie_deviance([0.], [0.], p=0), 0.00, 2)
 
-    msg = "only be used on non-negative y_true and strictly positive y_pred."
+    msg = "only be used on non-negative y and strictly positive mu."
     with pytest.raises(ValueError, match=msg):
         mean_tweedie_deviance([0.], [0.], p=1.0)
 
     p = 1.5
     assert_allclose(mean_tweedie_deviance([0.], [1.], p=p), 2./(2.-p))
-    msg = "only be used on non-negative y_true and strictly positive y_pred."
+    msg = "only be used on non-negative y and strictly positive mu."
     with pytest.raises(ValueError, match=msg):
         mean_tweedie_deviance([0.], [0.], p=p)
     p = 2.
     assert_allclose(mean_tweedie_deviance([1.], [1.], p=p), 0.00,
                     atol=1e-8)
-    msg = "can only be used on strictly positive y_true and y_pred."
+    msg = "can only be used on strictly positive y and mu."
     with pytest.raises(ValueError, match=msg):
         mean_tweedie_deviance([0.], [0.], p=p)
     p = 3.
     assert_allclose(mean_tweedie_deviance([1.], [1.], p=p),
                     0.00, atol=1e-8)
 
-    msg = "can only be used on strictly positive y_true and y_pred."
+    msg = "can only be used on strictly positive y and mu."
     with pytest.raises(ValueError, match=msg):
         mean_tweedie_deviance([0.], [0.], p=p)
 
     with pytest.raises(ValueError,
-                       match="deviance is only defined for p<=0 and p>=1."):
+                       match="is only defined for p<=0 and p>=1"):
         mean_tweedie_deviance([0.], [0.], p=0.5)
 
 
From 09a57c9cfedfebb2452c50921260c310987a0057 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 26 Jul 2019 18:12:55 +0200
Subject: [PATCH 104/269] Expose TweedieRegressor

---
 doc/modules/linear_model.rst                  |   6 +-
 ...lot_tweedie_regression_insurance_claims.py |  26 ++-
 sklearn/linear_model/__init__.py              |   7 +-
 sklearn/linear_model/_glm/__init__.py         |  14 +-
 sklearn/linear_model/_glm/glm.py              | 154 ++++++++++++++++++
 sklearn/linear_model/_glm/tests/test_glm.py   |  38 ++++-
 6 files changed, 221 insertions(+), 24 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 0eaaab2ecded5..cab918b06f3b0 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -949,10 +949,10 @@ Note that the feature matrix ``X`` should be standardized before fitting. This
 ensures that the penalty treats features equally. The estimator can be used as
 follows:
 
-    >>> from sklearn.linear_model import GeneralizedLinearRegressor
-    >>> reg = GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
+    >>> from sklearn.linear_model import TweedieRegressor
+    >>> reg = TweedieRegressor(alpha=0.5, family='poisson', link='log')
     >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2])
-    GeneralizedLinearRegressor(alpha=0.5, family='poisson', link='log')
+    TweedieRegressor(alpha=0.5, family='poisson', link='log')
     >>> reg.coef_
     array([0.2463..., 0.4337...])
     >>> reg.intercept_
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 418127699dc1a..55a21c8d8723d 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -41,8 +41,7 @@
 from sklearn.datasets import fetch_openml
 from sklearn.compose import ColumnTransformer
 from sklearn.linear_model import PoissonRegressor, GammaRegressor
-from sklearn.linear_model._glm import GeneralizedLinearRegressor
-from sklearn.linear_model._glm.distribution import TweedieDistribution
+from sklearn.linear_model import TweedieRegressor
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
@@ -418,7 +417,6 @@ class ClaimProdEstimator:
     def __init__(self, est_freq, est_sev):
         self.est_freq = est_freq
         self.est_sev = est_sev
-        self._family_instance = TweedieDistribution(power=1.5)
 
     def predict(self, X, exposure):
         """Predict the total claim amount.
@@ -429,11 +427,14 @@ def predict(self, X, exposure):
 
     def score(self, X, y, sample_weight=None):
         """Compute D², the percentage of deviance explained."""
+        # TODO: remove this private import once d2_score is available
+        from sklearn.linear_model._glm.distribution import TweedieDistribution
+
         mu = self.predict(X, exposure=sample_weight)
-        dev = self._family_instance.deviance(y, mu, weights=sample_weight)
+        family = TweedieDistribution(power=1.5)
+        dev = family.deviance(y, mu, weights=sample_weight)
         y_mean = np.average(y, weights=sample_weight)
-        dev_null = self._family_instance.deviance(y, y_mean,
-                                                  weights=sample_weight)
+        dev_null = family.deviance(y, y_mean, weights=sample_weight)
         return 1. - dev / dev_null
 
 
@@ -459,18 +460,13 @@ def score(self, X, y, sample_weight=None):
 
 from sklearn.model_selection import GridSearchCV
 
-# this takes a while
-params = {
-    "family": [
-        TweedieDistribution(power=power)
-        # exclude upper bound as power=2 does not support null y samples.
-        for power in np.linspace(1 + 1e-4, 2 - 1e-4, 8)
-    ]
-}
+# exclude upper bound as power=2 does not support null y values.
+params = {"power": np.linspace(1 + 1e-4, 2 - 1e-4, 8)}
 
 
+# this takes a while
 glm_total = GridSearchCV(
-    GeneralizedLinearRegressor(tol=1e-3, max_iter=500), cv=3,
+    TweedieRegressor(tol=1e-3, max_iter=500), cv=3,
     param_grid=params, n_jobs=-1
 )
 glm_total.fit(
diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index 63a52a9c9898e..46d1efe63de2e 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -18,8 +18,8 @@
                                  lasso_path, enet_path, MultiTaskLasso,
                                  MultiTaskElasticNet, MultiTaskElasticNetCV,
                                  MultiTaskLassoCV)
-from ._glm import (GeneralizedLinearRegressor, PoissonRegressor,
-                   GammaRegressor)
+from ._glm import (PoissonRegressor,
+                   GammaRegressor, TweedieRegressor)
 from .huber import HuberRegressor
 from .sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
 from .stochastic_gradient import SGDClassifier, SGDRegressor
@@ -83,4 +83,5 @@
            'RANSACRegressor',
            'GeneralizedLinearRegressor',
            'PoissonRegressor',
-           'GammaRegressor']
+           'GammaRegressor',
+           'TweedieRegressor']
diff --git a/sklearn/linear_model/_glm/__init__.py b/sklearn/linear_model/_glm/__init__.py
index a7d4b67817730..3b5c0d95d6124 100644
--- a/sklearn/linear_model/_glm/__init__.py
+++ b/sklearn/linear_model/_glm/__init__.py
@@ -1,5 +1,15 @@
 # License: BSD 3 clause
 
-from .glm import GeneralizedLinearRegressor, PoissonRegressor, GammaRegressor
+from .glm import (
+    GeneralizedLinearRegressor,
+    PoissonRegressor,
+    GammaRegressor,
+    TweedieRegressor
+)
 
-__all__ = ["GeneralizedLinearRegressor", "PoissonRegressor", "GammaRegressor"]
+__all__ = [
+    "GeneralizedLinearRegressor",
+    "PoissonRegressor",
+    "GammaRegressor",
+    "TweedieRegressor"
+]
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 12486e188951a..ca33c45c76292 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -623,6 +623,15 @@ def __init__(self, alpha=1.0, fit_intercept=True, link='log',
                          max_iter=max_iter, tol=tol, warm_start=warm_start,
                          copy_X=copy_X, verbose=verbose)
 
+    @property
+    def family(self):
+        return "poisson"
+
+    @family.setter
+    def family(self, value):
+        if value != "poisson":
+            raise ValueError("PoissonRegressor.family must be 'poisson'!")
+
 
 class GammaRegressor(GeneralizedLinearRegressor):
     """Regression with the response variable y following a Gamma distribution
@@ -729,3 +738,148 @@ def __init__(self, alpha=1.0, fit_intercept=True, link='log',
                          fit_dispersion=fit_dispersion, solver=solver,
                          max_iter=max_iter, tol=tol, warm_start=warm_start,
                          copy_X=copy_X, verbose=verbose)
+
+    @property
+    def family(self):
+        return "gamma"
+
+    @family.setter
+    def family(self, value):
+        if value != "gamma":
+            raise ValueError("GammaRegressor.family must be 'gamma'!")
+
+
+class TweedieRegressor(GeneralizedLinearRegressor):
+    """Regression with the response variable y following a Tweedie distribution
+
+    GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
+    fitting and predicting the mean of the target y as mu=h(X*w).
+    The fit minimizes the following objective function with L2 regularization::
+
+            1/(2*sum(s)) * deviance(y, h(X*w); s) + 1/2 * alpha * ||w||_2^2
+
+    with inverse link function h and s=sample_weight. Note that for
+    ``sample_weight=None``, one has s_i=1 and sum(s)=n_samples).
+
+    Read more in the :ref:`User Guide <Generalized_linear_regression>`.
+
+    Parameters
+    ----------
+    power : float (default=0)
+            The variance power: :math:`v(\mu) = \mu^{power}`.
+            For ``0<power<1``, no distribution exists.
+
+            Special cases are:
+
+            ===== ================
+            Power Distribution
+            ===== ================
+            0     Normal
+            1     Poisson
+            (0,1) Compound Poisson
+            2     Gamma
+            3     Inverse Gaussian
+
+    alpha : float, optional (default=1)
+        Constant that multiplies the penalty terms and thus determines the
+        regularization strength.
+        See the notes for the exact mathematical meaning of this
+        parameter.``alpha = 0`` is equivalent to unpenalized GLMs. In this
+        case, the design matrix X must have full column rank
+        (no collinearities).
+
+    fit_intercept : boolean, optional (default=True)
+        Specifies if a constant (a.k.a. bias or intercept) should be
+        added to the linear predictor (X*coef+intercept).
+
+    fit_dispersion : {None, 'chisqr', 'deviance'}, optional (default=None)
+        Method for estimation of the dispersion parameter phi. Whether to use
+        the chi squared statistic or the deviance statistic. If None, the
+        dispersion is not estimated.
+
+    solver : {'lbfgs'}, optional (default='lbfgs')
+        Algorithm to use in the optimization problem:
+
+        'lbfgs'
+            Calls scipy's L-BFGS-B optimizer.
+
+    max_iter : int, optional (default=100)
+        The maximal number of iterations for solver algorithms.
+
+    tol : float, optional (default=1e-4)
+        Stopping criterion. For the lbfgs solver,
+        the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
+        where ``g_i`` is the i-th component of the gradient (derivative) of
+        the objective function.
+
+    warm_start : boolean, optional (default=False)
+        If set to ``True``, reuse the solution of the previous call to ``fit``
+        as initialization for ``coef_`` and ``intercept_`` .
+
+    copy_X : boolean, optional, (default=True)
+        If ``True``, X will be copied; else, it may be overwritten.
+
+    verbose : int, optional (default=0)
+        For the lbfgs solver set verbose to any positive number for verbosity.
+
+    Attributes
+    ----------
+    coef_ : array, shape (n_features,)
+        Estimated coefficients for the linear predictor (X*coef_+intercept_) in
+        the GLM.
+
+    intercept_ : float
+        Intercept (a.k.a. bias) added to linear predictor.
+
+    dispersion_ : float
+        The dispersion parameter :math:`\\phi` if ``fit_dispersion`` was set.
+
+    n_iter_ : int
+        Actual number of iterations used in solver.
+
+    Notes
+    -----
+    The fit itself does not need Y to be from an EDM, but only assumes
+    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
+    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
+    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
+    :ref:`User Guide <Generalized_linear_regression>`.
+
+    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
+    minimizing the deviance plus penalty term, which is equivalent to
+    (penalized) maximum likelihood estimation.
+
+
+    References
+    ----------
+    .. McCullagh, Peter; Nelder, John (1989). Generalized Linear Models,
+       Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
+
+    .. Jørgensen, B. (1992). The theory of exponential dispersion models
+       and analysis of deviance. Monografias de matemática, no. 51.  See also
+       `Exponential dispersion model.
+       <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
+    """
+    def __init__(self, power=0.0, alpha=1.0, fit_intercept=True, link='log',
+                 fit_dispersion=None, solver='lbfgs', max_iter=100, tol=1e-4,
+                 warm_start=False, copy_X=True, check_input=True, verbose=0):
+
+        super().__init__(alpha=alpha, fit_intercept=fit_intercept,
+                         family=TweedieDistribution(power=power), link=link,
+                         fit_dispersion=fit_dispersion, solver=solver,
+                         max_iter=max_iter, tol=tol, warm_start=warm_start,
+                         copy_X=copy_X, verbose=verbose)
+
+    @property
+    def family(self):
+        dist = TweedieDistribution(power=self.power)
+        # TODO: make the returned object immutable
+        return dist
+
+    @family.setter
+    def family(self, value):
+        if isinstance(value, TweedieDistribution):
+            self.power = value.power
+        else:
+            raise TypeError("TweedieRegressor.family must be of type "
+                            "TweedieDistribution!")
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 542c18b65cad2..a56155fe03f22 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -7,7 +7,12 @@
 import pytest
 
 from sklearn.datasets import make_regression
-from sklearn.linear_model import GeneralizedLinearRegressor
+from sklearn.linear_model._glm import GeneralizedLinearRegressor
+from sklearn.linear_model import (
+    TweedieRegressor,
+    PoissonRegressor,
+    GammaRegressor
+)
 from sklearn.linear_model._glm.link import (
     IdentityLink,
     LogLink,
@@ -353,3 +358,34 @@ def test_convergence_warning(solver, regression_data):
                                      max_iter=1, tol=1e-20)
     with pytest.warns(ConvergenceWarning):
         est.fit(X, y)
+
+
+def test_poisson_regression_family(regression_data):
+    est = PoissonRegressor()
+    est.family == "poisson"
+
+    msg = "PoissonRegressor.family must be 'poisson'!"
+    with pytest.raises(ValueError, match=msg):
+        est.family = 0
+
+
+def test_gamma_regression_family(regression_data):
+    est = GammaRegressor()
+    est.family == "gamma"
+
+    msg = "GammaRegressor.family must be 'gamma'!"
+    with pytest.raises(ValueError, match=msg):
+        est.family = 0
+
+
+def test_tweedie_regression_family(regression_data):
+    power = 2.0
+    est = TweedieRegressor(power=power)
+    assert isinstance(est.family, TweedieDistribution)
+    assert est.family.power == power
+    msg = "TweedieRegressor.family must be of type TweedieDistribution!"
+    with pytest.raises(TypeError, match=msg):
+        est.family = None
+
+    # TODO: the following should not be allowed
+    # est.family.power = 2

From 4b485cac821d30ece504a43d0c3316140cec5d33 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 26 Jul 2019 18:32:07 +0200
Subject: [PATCH 105/269] Improve documentation

---
 doc/modules/classes.rst      |  2 +-
 doc/modules/linear_model.rst | 38 ++++++++++++++++++++++++++----------
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 177cd0780f9be..e5b61faf352e4 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -751,7 +751,6 @@ Kernels:
    linear_model.ElasticNet
    linear_model.ElasticNetCV
    linear_model.GammaRegressor
-   linear_model.GeneralizedLinearRegressor
    linear_model.HuberRegressor
    linear_model.Lars
    linear_model.LarsCV
@@ -781,6 +780,7 @@ Kernels:
    linear_model.SGDClassifier
    linear_model.SGDRegressor
    linear_model.TheilSenRegressor
+   linear_model.TweedieRegressor
 
 .. autosummary::
    :toctree: generated/
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index cab918b06f3b0..f4a48fcaf3acd 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -916,14 +916,14 @@ likelihood as
               - loglike(y,y,\phi)\right) \\
               D(y, \mu; s) = \sum_i s_i \cdot d(y_i, \mu_i)
 
-===================================== ===============================  ================================= ============================================
-Distribution                          Target Domain                    Variance Function :math:`v(\mu)`  Unit Deviance :math:`d(y, \mu)`
-===================================== ===============================  ================================= ============================================
-Normal ("normal")                     :math:`y \in (-\infty, \infty)`  :math:`1`                         :math:`(y-\mu)^2`
-Poisson ("poisson")                   :math:`y \in [0, \infty)`        :math:`\mu`                       :math:`2(y\log\frac{y}{\mu}-y+\mu)`
-Gamma ("gamma")                       :math:`y \in (0, \infty)`        :math:`\mu^2`                     :math:`2(\log\frac{\mu}{y}+\frac{y}{\mu}-1)`
-Inverse Gaussian ("inverse.gaussian") :math:`y \in (0, \infty)`        :math:`\mu^3`                     :math:`\frac{(y-\mu)^2}{y\mu^2}`
-===================================== ===============================  ================================= ============================================
+================= ===============================  ================================= ============================================
+Distribution       Target Domain                    Variance Function :math:`v(\mu)`  Unit Deviance :math:`d(y, \mu)`
+================= ===============================  ================================= ============================================
+Normal            :math:`y \in (-\infty, \infty)`  :math:`1`                         :math:`(y-\mu)^2`
+Poisson           :math:`y \in [0, \infty)`        :math:`\mu`                       :math:`2(y\log\frac{y}{\mu}-y+\mu)`
+Gamma             :math:`y \in (0, \infty)`        :math:`\mu^2`                     :math:`2(\log\frac{\mu}{y}+\frac{y}{\mu}-1)`
+Inverse Gaussian  :math:`y \in (0, \infty)`        :math:`\mu^3`                     :math:`\frac{(y-\mu)^2}{y\mu^2}`
+================= ===============================  ================================= ============================================
 
 
 In the following use cases, a loss different from the squared loss might be
@@ -945,14 +945,32 @@ it is convenient to apply a link function different from the identity link
 :math:`h(x^\top w)=x^\top w` that guarantees the non-negativeness, e.g. the
 log-link with :math:`h(x^\top w)=\exp(x^\top w)`.
 
+:class:`linear_model.TweedieRegressor` implements a generalized linear model
+for the Tweedie distribution, that allows to model any of the above mentionned
+distribution using the appropriate power parameter `p`,
+
+ - `p = 0`: Normal distribution. Specialized solvers such as
+   :class:`linear_model.Ridge`, :class:`linear_model.ElasticNet` are generally
+   more appropriate in this case.
+
+ - `p = 1`: Poisson distribution. :class:`PoissonRegressor` is exposed for
+   convinience however it is strictly equivalent to `TweedieRegressor(power=1)`.
+
+ - `p = 2`: Gamma distribution. :class:`GammaRegressor` is exposed for
+   convinience however it is also strictly equivalent to
+   `TweedieRegressor(power=2)`.
+
+ - `p = 3`: Inverse Gamma distribution.
+
+
 Note that the feature matrix ``X`` should be standardized before fitting. This
 ensures that the penalty treats features equally. The estimator can be used as
 follows:
 
     >>> from sklearn.linear_model import TweedieRegressor
-    >>> reg = TweedieRegressor(alpha=0.5, family='poisson', link='log')
+    >>> reg = TweedieRegressor(power=1, alpha=0.5, link='log')
     >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2])
-    TweedieRegressor(alpha=0.5, family='poisson', link='log')
+    TweedieRegressor(alpha=0.5, power=1)
     >>> reg.coef_
     array([0.2463..., 0.4337...])
     >>> reg.intercept_

From aa0adf1e6304d9f8aa3497e2fa406244e8d3405b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 26 Jul 2019 18:34:13 +0200
Subject: [PATCH 106/269] Lint

---
 sklearn/linear_model/_glm/glm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index ca33c45c76292..7d7ef099cd04a 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -766,7 +766,7 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     Parameters
     ----------
     power : float (default=0)
-            The variance power: :math:`v(\mu) = \mu^{power}`.
+            The variance power: :math:`v(\\mu) = \\mu^{power}`.
             For ``0<power<1``, no distribution exists.
 
             Special cases are:

From abd47d7cf82ac1cc7d98edb635dc634af844b173 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Tue, 30 Jul 2019 14:13:46 +0200
Subject: [PATCH 107/269] Fix __init__

---
 sklearn/linear_model/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index 46d1efe63de2e..9f696a14985c3 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -81,7 +81,6 @@
            'orthogonal_mp_gram',
            'ridge_regression',
            'RANSACRegressor',
-           'GeneralizedLinearRegressor',
            'PoissonRegressor',
            'GammaRegressor',
            'TweedieRegressor']

From 7a9d0674174af423b5bfe8290631db10c53a946b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 2 Aug 2019 15:39:19 +0200
Subject: [PATCH 108/269] Update doc/modules/linear_model.rst

Co-Authored-By: Joel Nothman <joel.nothman@gmail.com>
---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index f4a48fcaf3acd..8123eac5fc4a2 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -954,7 +954,7 @@ distribution using the appropriate power parameter `p`,
    more appropriate in this case.
 
  - `p = 1`: Poisson distribution. :class:`PoissonRegressor` is exposed for
-   convinience however it is strictly equivalent to `TweedieRegressor(power=1)`.
+   convenience however it is strictly equivalent to `TweedieRegressor(power=1)`.
 
  - `p = 2`: Gamma distribution. :class:`GammaRegressor` is exposed for
    convinience however it is also strictly equivalent to

From 18b45037fa6cb4d172c4e1b3cb92c1c33c0700c4 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 2 Aug 2019 15:39:29 +0200
Subject: [PATCH 109/269] Update doc/modules/linear_model.rst

Co-Authored-By: Joel Nothman <joel.nothman@gmail.com>
---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 8123eac5fc4a2..d159a8ad77039 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -957,7 +957,7 @@ distribution using the appropriate power parameter `p`,
    convenience however it is strictly equivalent to `TweedieRegressor(power=1)`.
 
  - `p = 2`: Gamma distribution. :class:`GammaRegressor` is exposed for
-   convinience however it is also strictly equivalent to
+   convenience however it is strictly equivalent to
    `TweedieRegressor(power=2)`.
 
  - `p = 3`: Inverse Gamma distribution.

From 29658d66ff34eb633f2728d69a956cde760c271e Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 2 Aug 2019 15:39:40 +0200
Subject: [PATCH 110/269] Update doc/modules/linear_model.rst

Co-Authored-By: Joel Nothman <joel.nothman@gmail.com>
---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index d159a8ad77039..02c01b674f467 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -965,7 +965,7 @@ distribution using the appropriate power parameter `p`,
 
 Note that the feature matrix ``X`` should be standardized before fitting. This
 ensures that the penalty treats features equally. The estimator can be used as
-follows:
+follows::
 
     >>> from sklearn.linear_model import TweedieRegressor
     >>> reg = TweedieRegressor(power=1, alpha=0.5, link='log')

From 1ea70d3a0c2da89ff164aef9975080de66a1cebd Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Wed, 7 Aug 2019 08:22:56 +0200
Subject: [PATCH 111/269] Fix typos in documentation

---
 doc/modules/linear_model.rst              | 13 +++++++------
 sklearn/linear_model/_glm/distribution.py |  2 +-
 sklearn/linear_model/_glm/glm.py          |  2 +-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 02c01b674f467..ce160d341af5f 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -733,8 +733,8 @@ of a single trial are modeled using a
 `logistic function <https://en.wikipedia.org/wiki/Logistic_function>`_.
 
 Logistic regression is implemented in :class:`LogisticRegression`.
-This implementation can fit binary, One-vs-Rest, or multinomial logistic 
-regression with optional :math:`\ell_1`, :math:`\ell_2` or Elastic-Net 
+This implementation can fit binary, One-vs-Rest, or multinomial logistic
+regression with optional :math:`\ell_1`, :math:`\ell_2` or Elastic-Net
 regularization.
 
 .. note::
@@ -946,18 +946,19 @@ it is convenient to apply a link function different from the identity link
 log-link with :math:`h(x^\top w)=\exp(x^\top w)`.
 
 :class:`linear_model.TweedieRegressor` implements a generalized linear model
-for the Tweedie distribution, that allows to model any of the above mentionned
-distribution using the appropriate power parameter `p`,
+for the Tweedie distribution, that allows to model any of the above mentioned
+distributions using the appropriate power parameter `p`,
 
  - `p = 0`: Normal distribution. Specialized solvers such as
    :class:`linear_model.Ridge`, :class:`linear_model.ElasticNet` are generally
    more appropriate in this case.
 
  - `p = 1`: Poisson distribution. :class:`PoissonRegressor` is exposed for
-   convenience however it is strictly equivalent to `TweedieRegressor(power=1)`.
+   convenience. However, it is strictly equivalent to
+   `TweedieRegressor(power=1)`.
 
  - `p = 2`: Gamma distribution. :class:`GammaRegressor` is exposed for
-   convenience however it is strictly equivalent to
+   convenience. However, it is strictly equivalent to
    `TweedieRegressor(power=2)`.
 
  - `p = 3`: Inverse Gamma distribution.
diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index c30996662114e..c7b4ec6c7836d 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -224,7 +224,7 @@ class TweedieDistribution(ExponentialDispersionModel):
     ===== ================
     0     Normal
     1     Poisson
-    (0,1) Compound Poisson
+    (1,2) Compound Poisson
     2     Gamma
     3     Inverse Gaussian
 
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 7d7ef099cd04a..af411c38f87ca 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -776,7 +776,7 @@ class TweedieRegressor(GeneralizedLinearRegressor):
             ===== ================
             0     Normal
             1     Poisson
-            (0,1) Compound Poisson
+            (1,2) Compound Poisson
             2     Gamma
             3     Inverse Gaussian
 

From efdcb5be1ed18d681c9b7c358b6f23adb6d0f795 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 9 Aug 2019 16:58:46 +0300
Subject: [PATCH 112/269] Update doc/modules/linear_model.rst

Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com>
---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index ce160d341af5f..67e4fbbde182c 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -964,7 +964,7 @@ distributions using the appropriate power parameter `p`,
  - `p = 3`: Inverse Gamma distribution.
 
 
-Note that the feature matrix ``X`` should be standardized before fitting. This
+Note that the feature matrix `X` should be standardized before fitting. This
 ensures that the penalty treats features equally. The estimator can be used as
 follows::
 

From ef0d063ec406c73aaee715af4cf06e5ca1f3b78c Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 9 Aug 2019 16:59:26 +0300
Subject: [PATCH 113/269] Update doc/modules/linear_model.rst

Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com>
---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 67e4fbbde182c..9645690d594f6 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -945,7 +945,7 @@ it is convenient to apply a link function different from the identity link
 :math:`h(x^\top w)=x^\top w` that guarantees the non-negativeness, e.g. the
 log-link with :math:`h(x^\top w)=\exp(x^\top w)`.
 
-:class:`linear_model.TweedieRegressor` implements a generalized linear model
+:class:`TweedieRegressor` implements a generalized linear model
 for the Tweedie distribution, that allows to model any of the above mentioned
 distributions using the appropriate power parameter `p`,
 

From 0125e1cbe9105f8d4f4cf08ee652add64deee384 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 9 Aug 2019 16:59:59 +0300
Subject: [PATCH 114/269] Update doc/modules/linear_model.rst

Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com>
---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 9645690d594f6..fe52cad236392 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -950,7 +950,7 @@ for the Tweedie distribution, that allows to model any of the above mentioned
 distributions using the appropriate power parameter `p`,
 
  - `p = 0`: Normal distribution. Specialized solvers such as
-   :class:`linear_model.Ridge`, :class:`linear_model.ElasticNet` are generally
+   :class:`Ridge`, :class:`ElasticNet` are generally
    more appropriate in this case.
 
  - `p = 1`: Poisson distribution. :class:`PoissonRegressor` is exposed for

From 6a8a600258c8be783f02dad7519fc2aa012418eb Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Fri, 9 Aug 2019 17:02:46 +0300
Subject: [PATCH 115/269] Update
 examples/linear_model/plot_poisson_regression_non_normal_loss.py

Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com>
---
 .../linear_model/plot_poisson_regression_non_normal_loss.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 347a424d8f4fe..6cee852866018 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -238,7 +238,7 @@ def load_mtpl2(n_samples=100000):
     y_pred = model.predict(X_train)
 
     pd.Series(y_pred).hist(bins=np.linspace(-1, 8, 50), ax=ax[idx+1])
-    ax[idx+1].set_title(model.__class__.__name__)
+    ax[idx + 1].set_title(model.__class__.__name__)
 
 for axi in ax:
     axi.set(

From 73f3bd1f646ec1dc1b9a3808149cc185d3fdea0d Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 9 Aug 2019 17:13:00 +0300
Subject: [PATCH 116/269] Rename inverse.gaussian to inverse-gaussian

---
 sklearn/linear_model/_glm/distribution.py   | 2 +-
 sklearn/linear_model/_glm/glm.py            | 6 +++---
 sklearn/linear_model/_glm/tests/test_glm.py | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index c7b4ec6c7836d..950fa3fbb03e7 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -393,5 +393,5 @@ def __init__(self):
     'normal': NormalDistribution,
     'poisson': PoissonDistribution,
     'gamma': GammaDistribution,
-    'inverse.gaussian': InverseGaussianDistribution,
+    'inverse-gaussian': InverseGaussianDistribution,
 }
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index af411c38f87ca..86f4c544d8f84 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -57,7 +57,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
-    family : {'normal', 'poisson', 'gamma', 'inverse.gaussian'} \
+    family : {'normal', 'poisson', 'gamma', 'inverse-gaussian'} \
             or an instance of class ExponentialDispersionModel, \
             optional(default='normal')
         The distributional assumption of the GLM, i.e. which distribution from
@@ -71,7 +71,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
         - 'identity' for family 'normal'
 
-        - 'log' for families 'poisson', 'gamma', 'inverse.gaussian'
+        - 'log' for families 'poisson', 'gamma', 'inverse-gaussian'
 
     fit_dispersion : {None, 'chisqr', 'deviance'}, optional (default=None)
         Method for estimation of the dispersion parameter phi. Whether to use
@@ -217,7 +217,7 @@ def fit(self, X, y, sample_weight=None):
             raise ValueError(
                 "The family must be an instance of class"
                 " ExponentialDispersionModel or an element of"
-                " ['normal', 'poisson', 'gamma', 'inverse.gaussian']"
+                " ['normal', 'poisson', 'gamma', 'inverse-gaussian']"
                 "; got (family={0})".format(self.family))
 
         # Guarantee that self._link_instance is set to an instance of
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index a56155fe03f22..12edbefbf2833 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -65,7 +65,7 @@ def test_sample_weights_validation():
                          [('normal', NormalDistribution()),
                           ('poisson', PoissonDistribution()),
                           ('gamma', GammaDistribution()),
-                          ('inverse.gaussian', InverseGaussianDistribution())])
+                          ('inverse-gaussian', InverseGaussianDistribution())])
 def test_glm_family_argument(f, fam):
     """Test GLM family argument set as string."""
     y = np.array([0.1, 0.5])  # in range of all distributions

From 11b178fffc7d279596acd122c7b6c33ba4b84a2f Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 9 Aug 2019 17:17:42 +0300
Subject: [PATCH 117/269] Remove sample_weight parameter from predict

---
 sklearn/linear_model/_glm/glm.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 86f4c544d8f84..921317cd0965a 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -388,32 +388,25 @@ def _linear_predictor(self, X):
                         allow_nd=False)
         return X @ self.coef_ + self.intercept_
 
-    def predict(self, X, sample_weight=None):
+    def predict(self, X):
         """Predict using GLM with feature matrix X.
 
-        If sample_weight is given, returns prediction*sample_weight.
-
         Parameters
         ----------
         X : {array-like, sparse matrix}, shape (n_samples, n_features)
             Samples.
 
-        sample_weight : {None, array-like}, shape (n_samples,), optional \
-                (default=None)
-
         Returns
         -------
         C : array, shape (n_samples,)
-            Returns predicted values times sample_weight.
+            Returns predicted values.
         """
         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
                         dtype='numeric', ensure_2d=True,
                         allow_nd=False)
         eta = self._linear_predictor(X)
         mu = self._link_instance.inverse(eta)
-        weights = _check_sample_weight(sample_weight, X)
-
-        return mu*weights
+        return mu
 
     def estimate_phi(self, X, y, sample_weight=None):
         """Estimate/fit the dispersion parameter phi.

From 3806fbe05c164d3fd74aada0b8a068c43d3998cd Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 9 Aug 2019 17:22:20 +0300
Subject: [PATCH 118/269] Remove redundant check_array in predict

---
 sklearn/linear_model/_glm/glm.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 921317cd0965a..0ab9e8c1db777 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -384,7 +384,7 @@ def _linear_predictor(self, X):
         """
         check_is_fitted(self, "coef_")
         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
-                        dtype='numeric', copy=True, ensure_2d=True,
+                        dtype='numeric', ensure_2d=True,
                         allow_nd=False)
         return X @ self.coef_ + self.intercept_
 
@@ -401,9 +401,7 @@ def predict(self, X):
         C : array, shape (n_samples,)
             Returns predicted values.
         """
-        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
-                        dtype='numeric', ensure_2d=True,
-                        allow_nd=False)
+        # check_array is done in _linear_predictor
         eta = self._linear_predictor(X)
         mu = self._link_instance.inverse(eta)
         return mu

From ae1c6721876fe5e9f1f14cd092fb198067899457 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 11 Aug 2019 23:11:52 +0200
Subject: [PATCH 119/269] Update doc/modules/linear_model.rst

---
 doc/modules/linear_model.rst | 66 +++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 31 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index fe52cad236392..09b657c26b915 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -900,30 +900,31 @@ combination of the input variables :math:`X` via an inverse link function
 
 .. math::    \hat{y}(w, x) = h(x^\top w) = h(w_0 + w_1 x_1 + ... + w_p x_p).
 
-Secondly, the squared loss function is replaced by the deviance :math:`D` of an
-exponential dispersion model (EDM) [11]_. The minimized objective function is
-the penalized negative log likelihood,
+Secondly, the squared loss function is replaced by the unit deviance :math:`d`
+of a reproductive exponential dispersion model (EDM) [11]_. The minimization
+problem becomes
 
-.. math::    \frac{1}{2 \sum s_i}D(y, \hat{y}; s) +\frac{\alpha}{2} ||w||_2
+.. math::    \min_{w} \frac{1}{2 \sum_i s_i} \sum_i s_i \cdot d(y_i, \hat{y}(w, x_i)) + \frac{\alpha}{2} ||w||_2
 
 with sample weights :math:`s`, and L2 regularization penalty :math:`\alpha`.
-
-The deviance is defined by the log of the :math:`\mathrm{EDM}(\mu, \phi)`
+The unit deviance is defined by the log of the :math:`\mathrm{EDM}(\mu, \phi)`
 likelihood as
 
 .. math::     d(y, \mu) = -2\phi\cdot
               \left(loglike(y,\mu,\phi)
-              - loglike(y,y,\phi)\right) \\
-              D(y, \mu; s) = \sum_i s_i \cdot d(y_i, \mu_i)
+              - loglike(y,y,\phi)\right)
+
+The following table lists some specific EDM distributions&mdash;all are Tweedie
+distributions&mdash;and some properties.
 
-================= ===============================  ================================= ============================================
-Distribution       Target Domain                    Variance Function :math:`v(\mu)`  Unit Deviance :math:`d(y, \mu)`
-================= ===============================  ================================= ============================================
-Normal            :math:`y \in (-\infty, \infty)`  :math:`1`                         :math:`(y-\mu)^2`
-Poisson           :math:`y \in [0, \infty)`        :math:`\mu`                       :math:`2(y\log\frac{y}{\mu}-y+\mu)`
-Gamma             :math:`y \in (0, \infty)`        :math:`\mu^2`                     :math:`2(\log\frac{\mu}{y}+\frac{y}{\mu}-1)`
-Inverse Gaussian  :math:`y \in (0, \infty)`        :math:`\mu^3`                     :math:`\frac{(y-\mu)^2}{y\mu^2}`
-================= ===============================  ================================= ============================================
+================= ===============================  ====================================== ============================================
+Distribution       Target Domain                    Unit Variance Function :math:`v(\mu)`  Unit Deviance :math:`d(y, \mu)`
+================= ===============================  ====================================== ============================================
+Normal            :math:`y \in (-\infty, \infty)`  :math:`1`                              :math:`(y-\mu)^2`
+Poisson           :math:`y \in [0, \infty)`        :math:`\mu`                            :math:`2(y\log\frac{y}{\mu}-y+\mu)`
+Gamma             :math:`y \in (0, \infty)`        :math:`\mu^2`                          :math:`2(\log\frac{\mu}{y}+\frac{y}{\mu}-1)`
+Inverse Gaussian  :math:`y \in (0, \infty)`        :math:`\mu^3`                          :math:`\frac{(y-\mu)^2}{y\mu^2}`
+================= ===============================  ====================================== ============================================
 
 
 In the following use cases, a loss different from the squared loss might be
@@ -947,7 +948,8 @@ log-link with :math:`h(x^\top w)=\exp(x^\top w)`.
 
 :class:`TweedieRegressor` implements a generalized linear model
 for the Tweedie distribution, that allows to model any of the above mentioned
-distributions using the appropriate power parameter `p`,
+distributions using the appropriate power parameter `p`, i.e. the exponent of
+the unit variance function,
 
  - `p = 0`: Normal distribution. Specialized solvers such as
    :class:`Ridge`, :class:`ElasticNet` are generally
@@ -964,9 +966,16 @@ distributions using the appropriate power parameter `p`,
  - `p = 3`: Inverse Gamma distribution.
 
 
-Note that the feature matrix `X` should be standardized before fitting. This
-ensures that the penalty treats features equally. The estimator can be used as
-follows::
+Note:
+* The feature matrix `X` should be standardized before fitting. This
+  ensures that the penalty treats features equally.
+* If you want to model a relative frequency, i.e. counts per exposure (time,
+  volume, ...) you can do so by a Poisson distribution and passing
+  :math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values together
+  with :math:`s=\mathrm{exposure}` as sample weights. This is done in both
+  examples linked below.
+
+The estimator can be used as follows::
 
     >>> from sklearn.linear_model import TweedieRegressor
     >>> reg = TweedieRegressor(power=1, alpha=0.5, link='log')
@@ -993,7 +1002,7 @@ In the unpenalized case, the assumptions are the following:
       with expectation :math:`\mu_i=\mathrm{E}[Y]`, dispersion parameter
       :math:`\phi` and sample weights :math:`s_i`.
     * The aim is to predict the expectation :math:`\mu_i` with
-      :math:`\hat{y_i} = h(\eta_i)`, linear predictor
+      :math:`\hat{y}_i = h(\eta_i)`, linear predictor
       :math:`\eta_i=(Xw)_i` and inverse link function :math:`h(\eta)`.
 
 Note that the first assumption implies
@@ -1001,21 +1010,16 @@ Note that the first assumption implies
 function :math:`v(\mu)`. Specifying a particular distribution of an EDM is the
 same as specifying a unit variance function (they are one-to-one).
 
-The objective function (the penalized negative log likelihood) is
-independent of :math:`\phi` and is minimized with respect to the
-coefficients :math:`w`.
-
-Two remarks:
+A few remarks:
 
+* The deviance is independent of :math:`\phi`. Therefore, also the estimation
+  of the coefficients :math:`w` is independent of the dispersion parameter of
+  the EDM.
+* The minimization is equivalent to (penalized) maximum likelihood estimation.
 * The deviances for at least Normal, Poisson and Gamma distributions are
   strictly consistent scoring functions for the mean :math:`\mu`, see Eq.
   (19)-(20) in [12]_.
 
-* If you want to model a frequency, i.e. counts per exposure (time, volume, ...)
-  you can do so by a Poisson distribution and passing
-  :math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values together
-  with :math:`s=\mathrm{exposure}` as sample weights.
-
 
 .. topic:: References:
 

From f07c831c29bc37ab0b922e1482d9f027f84049c1 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 11 Aug 2019 23:32:57 +0200
Subject: [PATCH 120/269] Remove dispersion

---
 sklearn/linear_model/_glm/glm.py            | 116 +++-----------------
 sklearn/linear_model/_glm/tests/test_glm.py |  18 ---
 2 files changed, 15 insertions(+), 119 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 0ab9e8c1db777..cf9b00527c8cf 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -73,11 +73,6 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
         - 'log' for families 'poisson', 'gamma', 'inverse-gaussian'
 
-    fit_dispersion : {None, 'chisqr', 'deviance'}, optional (default=None)
-        Method for estimation of the dispersion parameter phi. Whether to use
-        the chi squared statistic or the deviance statistic. If None, the
-        dispersion is not estimated.
-
     solver : {'auto', 'lbfgs'}, optional (default='auto')
         Algorithm to use in the optimization problem:
 
@@ -124,9 +119,6 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.
 
-    dispersion_ : float
-        The dispersion parameter :math:`\\phi` if ``fit_dispersion`` was set.
-
     n_iter_ : int
         Actual number of iterations used in solver.
 
@@ -169,14 +161,12 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     """
     def __init__(self, alpha=1.0,
                  fit_intercept=True, family='normal', link='auto',
-                 fit_dispersion=None, solver='auto', max_iter=100,
-                 tol=1e-4, warm_start=False,
+                 solver='auto', max_iter=100, tol=1e-4, warm_start=False,
                  copy_X=True, check_input=True, verbose=0):
         self.alpha = alpha
         self.fit_intercept = fit_intercept
         self.family = family
         self.link = link
-        self.fit_dispersion = fit_dispersion
         self.solver = solver
         self.max_iter = max_iter
         self.tol = tol
@@ -310,8 +300,8 @@ def fit(self, X, y, sample_weight=None):
         weights = weights/weights_sum
 
         # initialization of coef = (intercept_, coef)
-        # Note: Since phi=self.dispersion_ does not enter the estimation
-        #       of mu_i=E[y_i], set it to 1.
+        # Note: The dispersion parameter phi does not enter the estimation
+        #       of mu_i=E[y_i].
 
         if self.warm_start and hasattr(self, 'coef_'):
             if self.fit_intercept:
@@ -363,10 +353,6 @@ def func(coef, X, y, weights, alpha, family, link):
             self.intercept_ = 0.
             self.coef_ = coef
 
-        if self.fit_dispersion in ['chisqr', 'deviance']:
-            # attention because of rescaling of weights
-            self.dispersion_ = self.estimate_phi(X, y, weights)*weights_sum
-
         return self
 
     def _linear_predictor(self, X):
@@ -406,51 +392,6 @@ def predict(self, X):
         mu = self._link_instance.inverse(eta)
         return mu
 
-    def estimate_phi(self, X, y, sample_weight=None):
-        """Estimate/fit the dispersion parameter phi.
-
-        Parameters
-        ----------
-        X : {array-like, sparse matrix}, shape (n_samples, n_features)
-            Training data.
-
-        y : array-like, shape (n_samples,)
-            Target values.
-
-        sample_weight : {None, array-like}, shape (n_samples,), optional \
-                (default=None)
-            Sample weights.
-
-        Returns
-        -------
-        phi : float
-            Dispersion parameter.
-        """
-        check_is_fitted(self, "coef_")
-        _dtype = [np.float64, np.float32]
-        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
-                         dtype=_dtype, y_numeric=True, multi_output=False)
-        n_samples, n_features = X.shape
-        weights = _check_sample_weight(sample_weight, X)
-        eta = X @ self.coef_
-        if self.fit_intercept is True:
-            eta += self.intercept_
-            n_features += 1
-        if n_samples <= n_features:
-            raise ValueError("Estimation of dispersion parameter phi requires"
-                             " more samples than features, got"
-                             " samples=X.shape[0]={0} and"
-                             " n_features=X.shape[1]+fit_intercept={1}."
-                             .format(n_samples, n_features))
-        mu = self._link_instance.inverse(eta)
-        if self.fit_dispersion == 'chisqr':
-            chisq = np.sum(weights*(y-mu)**2 /
-                           self._family_instance.unit_variance(mu))
-            return chisq/(n_samples - n_features)
-        elif self.fit_dispersion == 'deviance':
-            dev = self._family_instance.deviance(y, mu, weights)
-            return dev/(n_samples - n_features)
-
     def score(self, X, y, sample_weight=None):
         """Compute D^2, the percentage of deviance explained.
 
@@ -525,11 +466,6 @@ class PoissonRegressor(GeneralizedLinearRegressor):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
-    fit_dispersion : {None, 'chisqr', 'deviance'}, optional (default=None)
-        Method for estimation of the dispersion parameter phi. Whether to use
-        the chi squared statistic or the deviance statistic. If None, the
-        dispersion is not estimated.
-
     solver : {'lbfgs'}, optional (default='lbfgs')
         Algorithm to use in the optimization problem:
 
@@ -564,9 +500,6 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.
 
-    dispersion_ : float
-        The dispersion parameter :math:`\\phi` if ``fit_dispersion`` was set.
-
     n_iter_ : int
         Actual number of iterations used in solver.
 
@@ -605,14 +538,13 @@ class PoissonRegressor(GeneralizedLinearRegressor):
        <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
     """
     def __init__(self, alpha=1.0, fit_intercept=True, link='log',
-                 fit_dispersion=None, solver='lbfgs', max_iter=100, tol=1e-4,
-                 warm_start=False, copy_X=True, check_input=True, verbose=0):
+                 solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
+                 copy_X=True, check_input=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family="poisson", link=link,
-                         fit_dispersion=fit_dispersion, solver=solver,
-                         max_iter=max_iter, tol=tol, warm_start=warm_start,
-                         copy_X=copy_X, verbose=verbose)
+                         solver=solver, max_iter=max_iter, tol=tol,
+                         warm_start=warm_start, copy_X=copy_X, verbose=verbose)
 
     @property
     def family(self):
@@ -652,11 +584,6 @@ class GammaRegressor(GeneralizedLinearRegressor):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
-    fit_dispersion : {None, 'chisqr', 'deviance'}, optional (default=None)
-        Method for estimation of the dispersion parameter phi. Whether to use
-        the chi squared statistic or the deviance statistic. If None, the
-        dispersion is not estimated.
-
     solver : {'lbfgs'}, optional (default='lbfgs')
         Algorithm to use in the optimization problem:
 
@@ -691,9 +618,6 @@ class GammaRegressor(GeneralizedLinearRegressor):
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.
 
-    dispersion_ : float
-        The dispersion parameter :math:`\\phi` if ``fit_dispersion`` was set.
-
     n_iter_ : int
         Actual number of iterations used in solver.
 
@@ -721,14 +645,13 @@ class GammaRegressor(GeneralizedLinearRegressor):
        <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
     """
     def __init__(self, alpha=1.0, fit_intercept=True, link='log',
-                 fit_dispersion=None, solver='lbfgs', max_iter=100, tol=1e-4,
-                 warm_start=False, copy_X=True, check_input=True, verbose=0):
+                 solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
+                 copy_X=True, check_input=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family="gamma", link=link,
-                         fit_dispersion=fit_dispersion, solver=solver,
-                         max_iter=max_iter, tol=tol, warm_start=warm_start,
-                         copy_X=copy_X, verbose=verbose)
+                         solver=solver, max_iter=max_iter, tol=tol,
+                         warm_start=warm_start, copy_X=copy_X, verbose=verbose)
 
     @property
     def family(self):
@@ -783,11 +706,6 @@ class TweedieRegressor(GeneralizedLinearRegressor):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
-    fit_dispersion : {None, 'chisqr', 'deviance'}, optional (default=None)
-        Method for estimation of the dispersion parameter phi. Whether to use
-        the chi squared statistic or the deviance statistic. If None, the
-        dispersion is not estimated.
-
     solver : {'lbfgs'}, optional (default='lbfgs')
         Algorithm to use in the optimization problem:
 
@@ -822,9 +740,6 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.
 
-    dispersion_ : float
-        The dispersion parameter :math:`\\phi` if ``fit_dispersion`` was set.
-
     n_iter_ : int
         Actual number of iterations used in solver.
 
@@ -852,14 +767,13 @@ class TweedieRegressor(GeneralizedLinearRegressor):
        <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
     """
     def __init__(self, power=0.0, alpha=1.0, fit_intercept=True, link='log',
-                 fit_dispersion=None, solver='lbfgs', max_iter=100, tol=1e-4,
-                 warm_start=False, copy_X=True, check_input=True, verbose=0):
+                 solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
+                 copy_X=True, check_input=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family=TweedieDistribution(power=power), link=link,
-                         fit_dispersion=fit_dispersion, solver=solver,
-                         max_iter=max_iter, tol=tol, warm_start=warm_start,
-                         copy_X=copy_X, verbose=verbose)
+                         solver=solver, max_iter=max_iter, tol=tol,
+                         warm_start=warm_start, copy_X=copy_X, verbose=verbose)
 
     @property
     def family(self):
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 12edbefbf2833..5bd80cbf76fcf 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -332,24 +332,6 @@ def test_solver_equivalence(params, regression_data):
     )
 
 
-def test_fit_dispersion(regression_data):
-    X, y = regression_data
-
-    est1 = GeneralizedLinearRegressor()
-    est1.fit(X, y)
-    assert not hasattr(est1, "dispersion_")
-
-    est2 = GeneralizedLinearRegressor(fit_dispersion="chisqr")
-    est2.fit(X, y)
-    assert isinstance(est2.dispersion_, float)
-
-    est3 = GeneralizedLinearRegressor(fit_dispersion="deviance")
-    est3.fit(X, y)
-    assert isinstance(est3.dispersion_, float)
-
-    assert_allclose(est2.dispersion_,  est3.dispersion_)
-
-
 @pytest.mark.parametrize("solver", GLM_SOLVERS)
 def test_convergence_warning(solver, regression_data):
     X, y = regression_data

From ebbbe9cd1c670e15aca835b92730f570c49e6e4c Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Tue, 13 Aug 2019 08:44:50 +0200
Subject: [PATCH 121/269] Update doc/modules/linear_model.rst

Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com>
---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 09b657c26b915..f352860b6826f 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -1003,7 +1003,7 @@ In the unpenalized case, the assumptions are the following:
       :math:`\phi` and sample weights :math:`s_i`.
     * The aim is to predict the expectation :math:`\mu_i` with
       :math:`\hat{y}_i = h(\eta_i)`, linear predictor
-      :math:`\eta_i=(Xw)_i` and inverse link function :math:`h(\eta)`.
+      :math:`\eta_i=(Xw)_i` and inverse link function :math:`h`.
 
 Note that the first assumption implies
 :math:`\mathrm{Var}[Y_i]=\frac{\phi}{s_i} v(\mu_i)` with unit variance

From 918e2574ad99724d4d4a437af5283f32442d5752 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Tue, 13 Aug 2019 08:53:57 +0200
Subject: [PATCH 122/269] Update doc/modules/linear_model.rst

---
 doc/modules/linear_model.rst | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 09b657c26b915..32b7494ba7c00 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -927,6 +927,9 @@ Inverse Gaussian  :math:`y \in (0, \infty)`        :math:`\mu^3`
 ================= ===============================  ====================================== ============================================
 
 
+Usage
+-----
+
 In the following use cases, a loss different from the squared loss might be
 appropriate,
 
@@ -944,7 +947,7 @@ Since the linear predictor :math:`Xw` can be negative and
 Poisson, Gamma and Inverse Gaussian distributions don't support negative values,
 it is convenient to apply a link function different from the identity link
 :math:`h(x^\top w)=x^\top w` that guarantees the non-negativeness, e.g. the
-log-link with :math:`h(x^\top w)=\exp(x^\top w)`.
+log-link `link='log'` with :math:`h(x^\top w)=\exp(x^\top w)`.
 
 :class:`TweedieRegressor` implements a generalized linear model
 for the Tweedie distribution, that allows to model any of the above mentioned
@@ -1018,7 +1021,9 @@ A few remarks:
 * The minimization is equivalent to (penalized) maximum likelihood estimation.
 * The deviances for at least Normal, Poisson and Gamma distributions are
   strictly consistent scoring functions for the mean :math:`\mu`, see Eq.
-  (19)-(20) in [12]_.
+  (19)-(20) in [12]_. This means that, given an appropriate feature matrix `X`,
+  you get good (asymptotic) estimators for the expectation when using these
+  deviances.
 
 
 .. topic:: References:

From 37d0f47bac45c27d01f949a835ecddad471b8d42 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Sat, 17 Aug 2019 00:14:20 +0300
Subject: [PATCH 123/269] Use double `` when necessary

---
 doc/modules/linear_model.rst     | 10 +++++-----
 sklearn/linear_model/_glm/glm.py |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index d0e3f9542a641..7bb684e8bcf87 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -951,22 +951,22 @@ log-link `link='log'` with :math:`h(x^\top w)=\exp(x^\top w)`.
 
 :class:`TweedieRegressor` implements a generalized linear model
 for the Tweedie distribution, that allows to model any of the above mentioned
-distributions using the appropriate power parameter `p`, i.e. the exponent of
+distributions using the appropriate power parameter ``p``, i.e. the exponent of
 the unit variance function,
 
- - `p = 0`: Normal distribution. Specialized solvers such as
+ - ``p = 0``: Normal distribution. Specialized solvers such as
    :class:`Ridge`, :class:`ElasticNet` are generally
    more appropriate in this case.
 
- - `p = 1`: Poisson distribution. :class:`PoissonRegressor` is exposed for
+ - ``p = 1``: Poisson distribution. :class:`PoissonRegressor` is exposed for
    convenience. However, it is strictly equivalent to
    `TweedieRegressor(power=1)`.
 
- - `p = 2`: Gamma distribution. :class:`GammaRegressor` is exposed for
+ - ``p = 2``: Gamma distribution. :class:`GammaRegressor` is exposed for
    convenience. However, it is strictly equivalent to
    `TweedieRegressor(power=2)`.
 
- - `p = 3`: Inverse Gamma distribution.
+ - ``p = 3``: Inverse Gamma distribution.
 
 
 Note:
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index cf9b00527c8cf..baed5fecb2b97 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -397,7 +397,7 @@ def score(self, X, y, sample_weight=None):
 
         D^2 is a generalization of the coefficient of determination R^2.
         R^2 uses squared error and D^2 deviance. Note that those two are equal
-        for family='normal'.
+        for ``family='normal'``.
 
         D^2 is defined as
         :math:`D^2 = 1-\\frac{D(y_{true},y_{pred})}{D_{null}}`,

From 9c337f25ea389c15e32ce943292fbe7b953a95b3 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Sat, 17 Aug 2019 00:16:59 +0300
Subject: [PATCH 124/269] ax -> axes in
 plot_poisson_regression_non_normal_loss.py

---
 .../plot_poisson_regression_non_normal_loss.py       | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 6cee852866018..5c0b64faea255 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -228,19 +228,19 @@ def load_mtpl2(n_samples=100000):
 # histogram of observed target values with that of predicted values,
 
 
-fig, ax = plt.subplots(1, 4, figsize=(16, 3))
+fig, axes = plt.subplots(1, 4, figsize=(16, 3))
 
-df_train.Frequency.hist(bins=np.linspace(-1, 10, 50), ax=ax[0])
+df_train.Frequency.hist(bins=np.linspace(-1, 10, 50), ax=axes[0])
 
-ax[0].set_title('Experimental data')
+axes[0].set_title('Experimental data')
 
 for idx, model in enumerate([linregr, glm_freq, gbr]):
     y_pred = model.predict(X_train)
 
-    pd.Series(y_pred).hist(bins=np.linspace(-1, 8, 50), ax=ax[idx+1])
-    ax[idx + 1].set_title(model.__class__.__name__)
+    pd.Series(y_pred).hist(bins=np.linspace(-1, 8, 50), ax=axes[idx+1])
+    axes[idx + 1].set_title(model.__class__.__name__)
 
-for axi in ax:
+for axi in axes:
     axi.set(
         yscale='log',
         xlabel="y (Frequency)"

From 5e05935caf29ff6bffb25ab5c83a1e4c855b83b0 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Sat, 17 Aug 2019 00:21:13 +0300
Subject: [PATCH 125/269] Update sklearn/linear_model/_glm/distribution.py

Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com>
---
 sklearn/linear_model/_glm/distribution.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index 950fa3fbb03e7..b1845eb13e921 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -206,7 +206,7 @@ def _mu_deviance_derivative(self, coef, X, y, weights, link):
         if coef.size == X.shape[1] + 1:
             devp = np.concatenate(([temp.sum()], temp @ X))
         else:
-            devp = temp @ X  # sampe as X.T @ temp
+            devp = temp @ X  # same as X.T @ temp
         return mu, devp
 
 
From 4a6821393715e42150fb8673f3d45840002d0114 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Sat, 17 Aug 2019 00:26:47 +0300
Subject: [PATCH 126/269] Remove solver=auto

---
 sklearn/linear_model/_glm/glm.py | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index baed5fecb2b97..4786d231336b6 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -73,19 +73,12 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
         - 'log' for families 'poisson', 'gamma', 'inverse-gaussian'
 
-    solver : {'auto', 'lbfgs'}, optional (default='auto')
+    solver : 'lbfgs', optional (default='lbfgs')
         Algorithm to use in the optimization problem:
 
-        'auto'
-            Sets 'lbfgs'
-
         'lbfgs'
             Calls scipy's L-BFGS-B optimizer.
 
-
-        Note that all solvers except lbfgs use the fisher matrix, i.e. the
-        expected Hessian instead of the Hessian matrix.
-
     max_iter : int, optional (default=100)
         The maximal number of iterations for solver algorithms.
 
@@ -161,7 +154,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     """
     def __init__(self, alpha=1.0,
                  fit_intercept=True, family='normal', link='auto',
-                 solver='auto', max_iter=100, tol=1e-4, warm_start=False,
+                 solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
                  copy_X=True, check_input=True, verbose=0):
         self.alpha = alpha
         self.fit_intercept = fit_intercept
@@ -243,14 +236,11 @@ def fit(self, X, y, sample_weight=None):
         if not isinstance(self.fit_intercept, bool):
             raise ValueError("The argument fit_intercept must be bool;"
                              " got {0}".format(self.fit_intercept))
-        if self.solver not in ['auto', 'lbfgs']:
+        if self.solver not in ['lbfgs']:
             raise ValueError("GeneralizedLinearRegressor supports only solvers"
-                             "'auto', 'lbfgs';"
-                             " got {0}".format(self.solver))
+                             "'lbfgs'; got {0}".format(self.solver))
         solver = self.solver
-        if self.solver == 'auto':
-            solver = 'lbfgs'
-        if (not isinstance(self.max_iter, int)
+        if (not isinstance(self.max_iter, numbers.Integral)
                 or self.max_iter <= 0):
             raise ValueError("Maximum number of iteration must be a positive "
                              "integer;"

From 8ee5c85a0140175e354ca9f8dd4db08f23606b9f Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Sat, 17 Aug 2019 00:38:56 +0300
Subject: [PATCH 127/269] Update sklearn/linear_model/_glm/glm.py

Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com>
---
 sklearn/linear_model/_glm/glm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index baed5fecb2b97..f770337e40e62 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -384,7 +384,7 @@ def predict(self, X):
 
         Returns
         -------
-        C : array, shape (n_samples,)
+        y_pred : array, shape (n_samples,)
             Returns predicted values.
         """
         # check_array is done in _linear_predictor

From a1f8aabb6042aac39929a65faaac78e3b6b68e32 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Sat, 17 Aug 2019 00:39:53 +0300
Subject: [PATCH 128/269] More review comments

---
 sklearn/linear_model/_glm/glm.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index f81408a83a5cf..25f018c3e0eb4 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -261,16 +261,14 @@ def fit(self, X, y, sample_weight=None):
         family = self._family_instance
         link = self._link_instance
 
-        _dtype = [np.float64, np.float32]
-        _stype = ['csc', 'csr']
-        X, y = check_X_y(X, y, accept_sparse=_stype,
-                         dtype=_dtype, y_numeric=True, multi_output=False,
-                         copy=self.copy_X)
+        X, y = check_X_y(X, y, accept_sparse=['csc', 'csr'],
+                         dtype=[np.float64, np.float32],
+                         y_numeric=True, multi_output=False, copy=self.copy_X)
         y = np.asarray(y, dtype=np.float64)
 
         weights = _check_sample_weight(sample_weight, X)
 
-        n_samples, n_features = X.shape
+        _, n_features = X.shape
 
         if self.check_input:
             if not np.all(family.in_y_range(y)):
@@ -287,7 +285,7 @@ def fit(self, X, y, sample_weight=None):
         # we rescale weights such that sum(weights) = 1 and this becomes
         # 1/2*deviance + L2 with deviance=sum(weights * unit_deviance)
         weights_sum = np.sum(weights)
-        weights = weights/weights_sum
+        weights = weights / weights_sum
 
         # initialization of coef = (intercept_, coef)
         # Note: The dispersion parameter phi does not enter the estimation
@@ -355,7 +353,7 @@ def _linear_predictor(self, X):
 
         Returns
         -------
-        C : array, shape (n_samples,)
+        y_pred : array, shape (n_samples,)
             Returns predicted values of linear predictor.
         """
         check_is_fitted(self, "coef_")

From c0999ead8e8e834456f4a0f61ec77cec94790f4d Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Sat, 17 Aug 2019 00:44:36 +0300
Subject: [PATCH 129/269] Addressing reviews in tests

---
 sklearn/linear_model/_glm/glm.py            |  4 ----
 sklearn/linear_model/_glm/tests/test_glm.py | 16 ++++++++--------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 25f018c3e0eb4..c1f7aecd32e4f 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -287,10 +287,6 @@ def fit(self, X, y, sample_weight=None):
         weights_sum = np.sum(weights)
         weights = weights / weights_sum
 
-        # initialization of coef = (intercept_, coef)
-        # Note: The dispersion parameter phi does not enter the estimation
-        #       of mu_i=E[y_i].
-
         if self.warm_start and hasattr(self, 'coef_'):
             if self.fit_intercept:
                 coef = np.concatenate((np.array([self.intercept_]),
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 5bd80cbf76fcf..c4e8c883ff379 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -61,17 +61,17 @@ def test_sample_weights_validation():
         glm.fit(X, y, weights)
 
 
-@pytest.mark.parametrize('f, fam',
+@pytest.mark.parametrize('name, instance',
                          [('normal', NormalDistribution()),
                           ('poisson', PoissonDistribution()),
                           ('gamma', GammaDistribution()),
                           ('inverse-gaussian', InverseGaussianDistribution())])
-def test_glm_family_argument(f, fam):
+def test_glm_family_argument(name, instance):
     """Test GLM family argument set as string."""
     y = np.array([0.1, 0.5])  # in range of all distributions
     X = np.array([[1], [2]])
-    glm = GeneralizedLinearRegressor(family=f, alpha=0).fit(X, y)
-    assert isinstance(glm._family_instance, fam.__class__)
+    glm = GeneralizedLinearRegressor(family=name, alpha=0).fit(X, y)
+    assert isinstance(glm._family_instance, instance.__class__)
 
     glm = GeneralizedLinearRegressor(family='not a family',
                                      fit_intercept=False)
@@ -79,15 +79,15 @@ def test_glm_family_argument(f, fam):
         glm.fit(X, y)
 
 
-@pytest.mark.parametrize('l, link',
+@pytest.mark.parametrize('name, instance',
                          [('identity', IdentityLink()),
                           ('log', LogLink())])
-def test_glm_link_argument(l, link):
+def test_glm_link_argument(name, instance):
     """Test GLM link argument set as string."""
     y = np.array([0.1, 0.5])  # in range of all distributions
     X = np.array([[1], [2]])
-    glm = GeneralizedLinearRegressor(family='normal', link=l).fit(X, y)
-    assert isinstance(glm._link_instance, link.__class__)
+    glm = GeneralizedLinearRegressor(family='normal', link=name).fit(X, y)
+    assert isinstance(glm._link_instance, instance.__class__)
 
     glm = GeneralizedLinearRegressor(family='normal', link='not a link')
     with pytest.raises(ValueError, match="link must be"):

From e09e3368d5e3c173569dba1669cbf97334d20db0 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Sat, 17 Aug 2019 00:50:37 +0300
Subject: [PATCH 130/269] More comments in tests

---
 sklearn/linear_model/_glm/tests/test_glm.py | 52 ++++-----------------
 1 file changed, 9 insertions(+), 43 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index c4e8c883ff379..ebcab6395e5b4 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -23,7 +23,6 @@
     GammaDistribution, InverseGaussianDistribution,
 )
 from sklearn.linear_model import Ridge
-from sklearn.metrics import mean_absolute_error
 from sklearn.exceptions import ConvergenceWarning
 
 GLM_SOLVERS = ['lbfgs']
@@ -183,8 +182,7 @@ def test_glm_identity_regression(solver):
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.dot(X, coef)
     glm = GeneralizedLinearRegressor(alpha=0, family='normal', link='identity',
-                                     fit_intercept=False, solver=solver,
-                                     tol=1e-7)
+                                     fit_intercept=False, solver=solver)
     res = glm.fit(X, y)
     assert_allclose(res.coef_, coef, rtol=1e-6)
 
@@ -242,18 +240,13 @@ def test_warm_start(fit_intercept):
 @pytest.mark.parametrize('fit_intercept', [True, False])
 @pytest.mark.parametrize('solver', GLM_SOLVERS)
 def test_normal_ridge_comparison(n_samples, n_features, fit_intercept, solver):
-    """Test ridge regression for Normal distributions.
-
-    Case n_samples >> n_features
-
-    Compare to test_ridge in test_ridge.py.
-    """
+    """Compare with Ridge regression for Normal distributions."""
     alpha = 1.0
     n_predict = 10
-    X, y, coef = make_regression(n_samples=n_samples+n_predict,
-                                 n_features=n_features,
-                                 n_informative=n_features-2, noise=0.5,
-                                 coef=True, random_state=42)
+    X, y, _ = make_regression(n_samples=n_samples+n_predict,
+                              n_features=n_features,
+                              n_informative=n_features-2, noise=0.5,
+                              coef=True, random_state=42)
     y = y[0:n_samples]
     X, T = X[0:n_samples], X[n_samples:]
 
@@ -279,10 +272,9 @@ def test_normal_ridge_comparison(n_samples, n_features, fit_intercept, solver):
 
 
 @pytest.mark.parametrize('solver, tol', [('lbfgs', 1e-7)])
-def test_poisson_ridge(solver, tol):
-    """Test ridge regression with poisson family and LogLink.
-
-    Compare to R's glmnet"""
+def test_poisson_glmnet(solver, tol):
+    """Compare Poisson regression with L2 regularization and LogLink to glmnet
+    """
     # library("glmnet")
     # options(digits=10)
     # df <- data.frame(a=c(-2,-1,1,2), b=c(0,0,1,1), y=c(0,1,1,2))
@@ -306,32 +298,6 @@ def test_poisson_ridge(solver, tol):
     assert_allclose(glm.coef_, [0.29019207995, 0.03741173122], rtol=1e-5)
 
 
-@pytest.mark.parametrize(
-        "params",
-        [
-            {"solver": "lbfgs"},
-        ],
-        ids=lambda params: ', '.join("%s=%s" % (key, val)
-                                     for key,  val in params.items())
-)
-def test_solver_equivalence(params, regression_data):
-    X, y = regression_data
-    est_ref = GeneralizedLinearRegressor()
-    est_ref.fit(X, y)
-
-    estimator = GeneralizedLinearRegressor(**params)
-
-    estimator.fit(X, y)
-
-    assert_allclose(estimator.intercept_, est_ref.intercept_, rtol=1e-4)
-    assert_allclose(estimator.coef_, est_ref.coef_, rtol=1e-4)
-    assert_allclose(
-        mean_absolute_error(estimator.predict(X), y),
-        mean_absolute_error(est_ref.predict(X), y),
-        rtol=1e-4
-    )
-
-
 @pytest.mark.parametrize("solver", GLM_SOLVERS)
 def test_convergence_warning(solver, regression_data):
     X, y = regression_data

From 6601d3049d3637c1823ff709d5416b306386d46a Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 17 Aug 2019 15:49:25 +0200
Subject: [PATCH 131/269] Update linear_model.rst

---
 doc/modules/linear_model.rst | 35 ++++++++++++++++++-----------------
 1 file changed, 18 insertions(+), 17 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 7bb684e8bcf87..8b3764e7cefd0 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -914,8 +914,8 @@ likelihood as
               \left(loglike(y,\mu,\phi)
               - loglike(y,y,\phi)\right)
 
-The following table lists some specific EDM distributions&mdash;all are Tweedie
-distributions&mdash;and some properties.
+The following table lists some specific EDM distributions—all are Tweedie
+distributions—and some of their properties.
 
 ================= ===============================  ====================================== ============================================
 Distribution       Target Domain                    Unit Variance Function :math:`v(\mu)`  Unit Deviance :math:`d(y, \mu)`
@@ -943,7 +943,7 @@ appropriate,
     you might try an Inverse Gaussian deviance (or even higher variance powers
     of the Tweedie family).
 
-Since the linear predictor :math:`Xw` can be negative and
+Since the linear predictor :math:`x^\top w` can be negative and
 Poisson, Gamma and Inverse Gaussian distributions don't support negative values,
 it is convenient to apply a link function different from the identity link
 :math:`h(x^\top w)=x^\top w` that guarantees the non-negativeness, e.g. the
@@ -951,32 +951,33 @@ log-link `link='log'` with :math:`h(x^\top w)=\exp(x^\top w)`.
 
 :class:`TweedieRegressor` implements a generalized linear model
 for the Tweedie distribution, that allows to model any of the above mentioned
-distributions using the appropriate power parameter ``p``, i.e. the exponent of
-the unit variance function,
+distributions using the appropriate ``power`` parameter, i.e. the exponent
+of the unit variance function,
 
- - ``p = 0``: Normal distribution. Specialized solvers such as
+ - ``power = 0``: Normal distribution. Specialized solvers such as
    :class:`Ridge`, :class:`ElasticNet` are generally
    more appropriate in this case.
 
- - ``p = 1``: Poisson distribution. :class:`PoissonRegressor` is exposed for
+ - ``power = 1``: Poisson distribution. :class:`PoissonRegressor` is exposed for
    convenience. However, it is strictly equivalent to
    `TweedieRegressor(power=1)`.
 
- - ``p = 2``: Gamma distribution. :class:`GammaRegressor` is exposed for
+ - ``power = 2``: Gamma distribution. :class:`GammaRegressor` is exposed for
    convenience. However, it is strictly equivalent to
    `TweedieRegressor(power=2)`.
 
- - ``p = 3``: Inverse Gamma distribution.
+ - ``power = 3``: Inverse Gamma distribution.
 
 
-Note:
-* The feature matrix `X` should be standardized before fitting. This
-  ensures that the penalty treats features equally.
-* If you want to model a relative frequency, i.e. counts per exposure (time,
-  volume, ...) you can do so by a Poisson distribution and passing
-  :math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values together
-  with :math:`s=\mathrm{exposure}` as sample weights. This is done in both
-  examples linked below.
+.. note::
+
+   * The feature matrix `X` should be standardized before fitting. This
+     ensures that the penalty treats features equally.
+   * If you want to model a relative frequency, i.e. counts per exposure (time,
+     volume, ...) you can do so by a Poisson distribution and passing
+     :math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values
+     together with :math:`s=\mathrm{exposure}` as sample weights. This is done
+     in both examples linked below.
 
 The estimator can be used as follows::
 

From 5174dae1cbc7ea9442243f15f3887f8252520f46 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 17 Aug 2019 18:55:24 +0200
Subject: [PATCH 132/269] Address check_is_fitted deprication of attributes

---
 sklearn/linear_model/_glm/glm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index c1f7aecd32e4f..13ca3673c8b3d 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -352,7 +352,7 @@ def _linear_predictor(self, X):
         y_pred : array, shape (n_samples,)
             Returns predicted values of linear predictor.
         """
-        check_is_fitted(self, "coef_")
+        check_is_fitted(self)
         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
                         dtype='numeric', ensure_2d=True,
                         allow_nd=False)

From 61dc13fdd5b71fe0e1d30e5a9b58934441a9dcb1 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 17 Aug 2019 20:18:21 +0200
Subject: [PATCH 133/269] No LaTeX in docstrings

---
 sklearn/linear_model/_glm/distribution.py | 89 ++++++++++-------------
 sklearn/linear_model/_glm/glm.py          | 66 ++++++++---------
 2 files changed, 70 insertions(+), 85 deletions(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index b1845eb13e921..1baf99a465326 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -26,19 +26,15 @@ def _safe_lin_pred(X, coef):
 
 
 class ExponentialDispersionModel(metaclass=ABCMeta):
-    r"""Base class for reproductive Exponential Dispersion Models (EDM).
+    """Base class for reproductive Exponential Dispersion Models (EDM).
 
-    The pdf of :math:`Y\sim \mathrm{EDM}(\mu, \phi)` is given by
+    The pdf of Y∼EDM(μ, φ) is given by::
 
-    .. math:: p(y| \theta, \phi) = c(y, \phi)
-        \exp\left(\frac{\theta y-A(\theta)}{\phi}\right)
-        = \tilde{c}(y, \phi)
-            \exp\left(-\frac{d(y, \mu)}{2\phi}\right)
+        p(y| θ, φ) = c1(y, φ) * exp((θy-A(θ))/φ)
+        = c2(y, φ) * exp(-d(y, μ)/(2φ))
 
-    with mean :math:`\mathrm{E}[Y] = A'(\theta) = \mu`,
-    variance :math:`\mathrm{Var}[Y] = \phi \cdot v(\mu)`,
-    unit variance :math:`v(\mu)` and
-    unit deviance :math:`d(y,\mu)`.
+    with mean E[Y] = A'(θ) = μ, variance Var[Y] = φ * v(μ),
+    unit variance v(μ), unit deviance d(y,μ) and dispersion parameter φ.
 
     Methods
     -------
@@ -56,7 +52,7 @@ class ExponentialDispersionModel(metaclass=ABCMeta):
     """
 
     def in_y_range(self, y):
-        """Returns ``True`` if y is in the valid range of Y~EDM.
+        """Returns ``True`` if y is in the valid range of Y∼EDM.
 
         Parameters
         ----------
@@ -80,17 +76,13 @@ def in_y_range(self, y):
 
     @abstractmethod
     def unit_variance(self, mu):
-        r"""Compute the unit variance function.
+        """Compute the unit variance function.
 
-        The unit variance :math:`v(\mu)` determines the variance as
-        a function of the mean :math:`\mu` by
-        :math:`\mathrm{Var}[Y_i] = \phi/s_i*v(\mu_i)`.
-        It can also be derived from the unit deviance :math:`d(y,\mu)` as
+        The unit variance v(μ) determines the variance as a function of the
+        mean μ by Var[Y_i] = φ/s_i * v(μ_i).
+        It can also be derived from the unit deviance d(y,μ) as::
 
-        .. math:: v(\mu) = \frac{2}{\frac{\partial^2 d(y,\mu)}{
-            \partial\mu^2}}\big|_{y=\mu}
-
-        See also :func:`variance`.
+            v(μ) = 2/(∂^2 d(y,μ)/(∂ μ^2))|_{y=μ}
 
         Parameters
         ----------
@@ -101,9 +93,9 @@ def unit_variance(self, mu):
 
     @abstractmethod
     def unit_variance_derivative(self, mu):
-        r"""Compute the derivative of the unit variance w.r.t. mu.
+        """Compute the derivative of the unit variance w.r.t. mu.
 
-        Return :math:`v'(\mu)`.
+        Return v'(μ).
 
         Parameters
         ----------
@@ -114,12 +106,11 @@ def unit_variance_derivative(self, mu):
 
     @abstractmethod
     def unit_deviance(self, y, mu, check_input=False):
-        r"""Compute the unit deviance.
+        """Compute the unit deviance.
+
+        The unit_deviance d(y,μ) can be defined by the log-likelihood as::
 
-        The unit_deviance :math:`d(y,\mu)` can be defined by the
-        log-likelihood as
-        :math:`d(y,\mu) = -2\phi\cdot
-        \left(loglike(y,\mu,\phi) - loglike(y,y,\phi)\right).`
+        d(y,μ) = -2φ * (loglike(y,μ,φ) - loglike(y,y,φ))
 
         Parameters
         ----------
@@ -140,11 +131,10 @@ def unit_deviance(self, y, mu, check_input=False):
         pass  # pragma: no cover
 
     def unit_deviance_derivative(self, y, mu):
-        r"""Compute the derivative of the unit deviance w.r.t. mu.
+        """Compute the derivative of the unit deviance w.r.t. mu.
 
         The derivative of the unit deviance is given by
-        :math:`\frac{\partial}{\partial\mu}d(y,\mu) = -2\frac{y-\mu}{v(\mu)}`
-        with unit variance :math:`v(\mu)`.
+        ∂ d(y,μ)/(∂ μ) = -2(y-μ)/v(μ) with unit variance v(μ).
 
         Parameters
         ----------
@@ -157,14 +147,13 @@ def unit_deviance_derivative(self, y, mu):
         return -2 * (y - mu) / self.unit_variance(mu)
 
     def deviance(self, y, mu, weights=1):
-        r"""Compute the deviance.
+        """Compute the deviance.
 
         The deviance is a weighted sum of the per sample unit deviances,
-        :math:`D = \sum_i s_i \cdot d(y_i, \mu_i)`
-        with weights :math:`s_i` and unit deviance :math:`d(y,\mu)`.
-        In terms of the log-likelihood it is :math:`D = -2\phi\cdot
-        \left(loglike(y,\mu,\frac{phi}{s})
-        - loglike(y,y,\frac{phi}{s})\right)`.
+        D = sum_i s_i * d(y_i,μ_i)
+        with weights s_i and unit deviance d(y,μ).
+        In terms of the log-likelihood it is
+        D = -2φ * (loglike(y,μ,φ/s) - loglike(y,y,φ/s)).
 
         Parameters
         ----------
@@ -182,7 +171,7 @@ def deviance(self, y, mu, weights=1):
     def deviance_derivative(self, y, mu, weights=1):
         """Compute the derivative of the deviance w.r.t. mu.
 
-        It gives :math:`\\frac{\\partial}{\\partial\\mu} D(y, \\mu; weights)`.
+        It gives ∂ D(y, μ; weights)/(∂ μ).
 
         Parameters
         ----------
@@ -211,11 +200,10 @@ def _mu_deviance_derivative(self, coef, X, y, weights, link):
 
 
 class TweedieDistribution(ExponentialDispersionModel):
-    r"""A class for the Tweedie distribution.
+    """A class for the Tweedie distribution.
 
-    A Tweedie distribution with mean :math:`\mu=\mathrm{E}[Y]` is uniquely
-    defined by it's mean-variance relationship
-    :math:`\mathrm{Var}[Y] \propto \mu^power`.
+    A Tweedie distribution with mean μ=E[Y] is uniquely defined by it's
+    mean-variance relationship Var[Y] ∝ μ^power.
 
     Special cases are:
 
@@ -231,8 +219,7 @@ class TweedieDistribution(ExponentialDispersionModel):
     Parameters
     ----------
     power : float (default=0)
-            The variance power of the `unit_variance`
-            :math:`v(\mu) = \mu^{power}`.
+            The variance power of the unit variance v(μ) = μ^power.
             For ``0<power<1``, no distribution exists.
     """
     def __init__(self, power=0):
@@ -253,8 +240,8 @@ def power(self, power):
             # Extreme Stable or Normal distribution
             self._lower_bound = DistributionBoundary(-np.Inf, inclusive=False)
         elif 0 < power < 1:
-            raise ValueError('Tweedie distribution is only defined for p<=0 '
-                             'and p>=1.')
+            raise ValueError('Tweedie distribution is only defined for '
+                             'power<=0 and p>=1.')
         elif 1 <= power < 2:
             # Poisson or Compound Poisson distribution
             self._lower_bound = DistributionBoundary(0, inclusive=True)
@@ -279,7 +266,7 @@ def unit_variance(self, mu):
 
     def unit_variance_derivative(self, mu):
         """Compute the derivative of the unit variance of a Tweedie
-        distribution v(mu)=power*mu**(power-1).
+        distribution v(mu)=power * mu**(power-1).
 
         Parameters
         ----------
@@ -289,12 +276,10 @@ def unit_variance_derivative(self, mu):
         return self.power * np.power(mu, self.power - 1)
 
     def unit_deviance(self, y, mu, check_input=False):
-        r"""Compute the unit deviance.
+        """Compute the unit deviance.
 
-        The unit_deviance :math:`d(y,\mu)` can be defined by the
-        log-likelihood as
-        :math:`d(y,\mu) = -2\phi\cdot
-        \left(loglike(y,\mu,\phi) - loglike(y,y,\phi)\right).`
+        The unit deviance d(y,μ) can be defined by the log-likelihood as
+        d(y,μ) = -2φ * (loglike(y,μ,φ) - loglike(y,y,φ)).
 
         Parameters
         ----------
@@ -328,7 +313,7 @@ def unit_deviance(self, y, mu, check_input=False):
                 raise ValueError("Tweedie deviance is only defined for p<=0 "
                                  "and p>=1.")
             elif 1 <= p < 2:
-                # Poisson and Compount poisson distribution, y >= 0, mu > 0
+                # Poisson and Compound poisson distribution, y >= 0, mu > 0
                 if (y < 0).any() or (mu <= 0).any():
                     raise ValueError(message + "non-negative y and strictly "
                                      "positive mu.")
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 13ca3673c8b3d..fb653b5e4361f 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -31,7 +31,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     """Regression via a Generalized Linear Model (GLM) with penalties.
 
     GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
-    fitting and predicting the mean of the target y as mu=h(X*w). Therefore,
+    fitting and predicting the mean of the target y as μ=h(X*w). Therefore,
     the fit minimizes the following objective function with L2
     priors as regularizer::
 
@@ -118,16 +118,16 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     Notes
     -----
     The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
-    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
-    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
+    the first two moments to be E[Y_i]=μ_i=h((Xw)_i) and
+    Var[Y_i]=φ/s_i * v(μ_i). The unit variance function v(μ_i) is a property of
+    and given by the specific EDM, see
     :ref:`User Guide <Generalized_linear_regression>`.
 
-    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
+    The parameters w (``coef_`` and ``intercept_``) are estimated by
     minimizing the deviance plus penalty term, which is equivalent to
     (penalized) maximum likelihood estimation.
 
-    For alpha > 0, the feature matrix X should be standardized in order to
+    For ``alpha > 0``, the feature matrix X should be standardized in order to
     penalize features equally strong. Call
     :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``.
 
@@ -138,7 +138,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     y = z/s, i.e. ``GeneralizedLinearModel(family='poisson').fit(X, y,
     sample_weight=s)``. The weights are necessary for the right (finite
     sample) mean.
-    Consider :math:`\\bar{y} = \\frac{\\sum_i s_i y_i}{\\sum_i s_i}`,
+    Consider ȳ = (sum_i s_i y_i)(sum_i s_i),
     in this case one might say that y has a 'scaled' Poisson distributions.
     The same holds for other distributions.
 
@@ -183,9 +183,9 @@ def fit(self, X, y, sample_weight=None):
                 optional (default=None)
             Individual weights w_i for each sample. Note that for an
             Exponential Dispersion Model (EDM), one has
-            Var[Y_i]=phi/w_i * v(mu).
-            If Y_i ~ EDM(mu, phi/w_i), then
-            sum(w*Y)/sum(w) ~ EDM(mu, phi/sum(w)), i.e. the mean of y is a
+            Var[Y_i]=φ/w_i * v(mu).
+            If Y_i ~ EDM(mu, φ/w_i), then
+            sum(w*Y)/sum(w) ~ EDM(mu, φ/sum(w)), i.e. the mean of y is a
             weighted average with weights=sample_weight.
 
         Returns
@@ -218,7 +218,7 @@ def fit(self, X, y, sample_weight=None):
                     raise ValueError("No default link known for the "
                                      "specified distribution family. Please "
                                      "set link manually, i.e. not to 'auto'; "
-                                     "got (link='auto', family={}"
+                                     "got (link='auto', family={})"
                                      .format(self.family))
             elif self.link == 'identity':
                 self._link_instance = IdentityLink()
@@ -383,11 +383,10 @@ def score(self, X, y, sample_weight=None):
         R^2 uses squared error and D^2 deviance. Note that those two are equal
         for ``family='normal'``.
 
-        D^2 is defined as
-        :math:`D^2 = 1-\\frac{D(y_{true},y_{pred})}{D_{null}}`,
-        :math:`D_{null}` is the null deviance, i.e. the deviance of a model
-        with intercept alone, which corresponds to :math:`y_{pred} = \\bar{y}`.
-        The mean :math:`\\bar{y}` is averaged by sample_weight.
+        D^2 is defined as D^2 = 1 - D(y_true,y_pred) / D_null,
+        D_null is the null deviance, i.e. the deviance of a model
+        with intercept alone, which corresponds to y_pred = ȳ.
+        The mean ȳ is averaged by sample_weight.
         Best possible score is 1.0 and it can be negative (because the model
         can be arbitrarily worse).
 
@@ -490,17 +489,18 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     Notes
     -----
     The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
-    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
-    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
+    the first two moments to be E[Y_i]=μ_i=h((Xw)_i) and
+    Var[Y_i]=φ/s_i * v(μ_i). The unit variance function v(μ_i) is a property of
+    and given by the specific EDM, see
     :ref:`User Guide <Generalized_linear_regression>`.
 
-    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
+    The parameters w (``coef_`` and ``intercept_``) are estimated by
     minimizing the deviance plus penalty term, which is equivalent to
     (penalized) maximum likelihood estimation.
 
-    For alpha > 0, the feature matrix X should be standardized in order to
-    penalize features equally strong.
+    For ``alpha > 0``, the feature matrix X should be standardized in order to
+    penalize features equally strong. Call
+    :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``.
 
     If the target y is a ratio, appropriate sample weights s should be
     provided.
@@ -508,7 +508,7 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     weights s=exposure (time, money, persons years, ...). Then you fit
     y = z/s, i.e. ``PoissonRegressor().fit(X, y, sample_weight=s)``.
     The weights are necessary for the right (finite sample) mean.
-    Consider :math:`\\bar{y} = \\frac{\\sum_i s_i y_i}{\\sum_i s_i}`,
+    Consider ȳ = (sum_i s_i y_i)(sum_i s_i),
     in this case one might say that y has a 'scaled' Poisson distributions.
 
     References
@@ -608,12 +608,12 @@ class GammaRegressor(GeneralizedLinearRegressor):
     Notes
     -----
     The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
-    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
-    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
+    the first two moments to be E[Y_i]=μ_i=h((Xw)_i) and
+    Var[Y_i]=φ/s_i * v(μ_i). The unit variance function v(μ_i) is a property of
+    and given by the specific EDM, see
     :ref:`User Guide <Generalized_linear_regression>`.
 
-    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
+    The parameters w (``coef_`` and ``intercept_``) are estimated by
     minimizing the deviance plus penalty term, which is equivalent to
     (penalized) maximum likelihood estimation.
 
@@ -664,7 +664,7 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     Parameters
     ----------
     power : float (default=0)
-            The variance power: :math:`v(\\mu) = \\mu^{power}`.
+            The variance power: v(μ) = μ^{power}.
             For ``0<power<1``, no distribution exists.
 
             Special cases are:
@@ -682,7 +682,7 @@ class TweedieRegressor(GeneralizedLinearRegressor):
         Constant that multiplies the penalty terms and thus determines the
         regularization strength.
         See the notes for the exact mathematical meaning of this
-        parameter.``alpha = 0`` is equivalent to unpenalized GLMs. In this
+        parameter. ``alpha = 0`` is equivalent to unpenalized GLMs. In this
         case, the design matrix X must have full column rank
         (no collinearities).
 
@@ -730,12 +730,12 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     Notes
     -----
     The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
-    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
-    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
+    the first two moments to be E[Y_i]=μ_i=h((Xw)_i) and
+    Var[Y_i]=φ/s_i * v(μ_i). The unit variance function v(μ_i) is a property of
+    and given by the specific EDM, see
     :ref:`User Guide <Generalized_linear_regression>`.
 
-    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
+    The parameters w (``coef_`` and ``intercept_``) are estimated by
     minimizing the deviance plus penalty term, which is equivalent to
     (penalized) maximum likelihood estimation.
 

From 44524cafc0aac0289267f1099b2ec2f8d53fe6c7 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 17 Aug 2019 20:28:54 +0200
Subject: [PATCH 134/269] Replace Tweedie p->power

---
 sklearn/linear_model/_glm/distribution.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index 1baf99a465326..e82f787d7e710 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -241,7 +241,7 @@ def power(self, power):
             self._lower_bound = DistributionBoundary(-np.Inf, inclusive=False)
         elif 0 < power < 1:
             raise ValueError('Tweedie distribution is only defined for '
-                             'power<=0 and p>=1.')
+                             'power<=0 and power>=1.')
         elif 1 <= power < 2:
             # Poisson or Compound Poisson distribution
             self._lower_bound = DistributionBoundary(0, inclusive=True)

From 58d240973a175ee705d4e8cfbd4e9b6495eca609 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 17 Aug 2019 20:34:11 +0200
Subject: [PATCH 135/269] Replace Tweedie p->power

---
 sklearn/linear_model/_glm/distribution.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index e82f787d7e710..e0b3511734cee 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -300,7 +300,7 @@ def unit_deviance(self, y, mu, check_input=False):
         p = self.power
 
         if check_input:
-            message = ("Mean Tweedie deviance error with p={} can only be "
+            message = ("Mean Tweedie deviance error with power={} can only be "
                        "used on ".format(p))
             if p < 0:
                 # 'Extreme stable', y any realy number, mu > 0
@@ -310,8 +310,8 @@ def unit_deviance(self, y, mu, check_input=False):
                 # Normal, y and mu can be any real number
                 pass
             elif 0 < p < 1:
-                raise ValueError("Tweedie deviance is only defined for p<=0 "
-                                 "and p>=1.")
+                raise ValueError("Tweedie deviance is only defined for "
+                                 "power<=0 and power>=1.")
             elif 1 <= p < 2:
                 # Poisson and Compound poisson distribution, y >= 0, mu > 0
                 if (y < 0).any() or (mu <= 0).any():
@@ -335,8 +335,8 @@ def unit_deviance(self, y, mu, check_input=False):
             # Normal distribution, y and mu any real number
             dev = (y - mu)**2
         elif p < 1:
-            raise ValueError("Tweedie deviance is only defined for p<=0 and "
-                             "p>=1.")
+            raise ValueError("Tweedie deviance is only defined for power<=0 "
+                             "and power>=1.")
         elif p == 1:
             # Poisson distribution
             dev = 2 * (xlogy(y, y/mu) - y + mu)

From ee351e1f7137bc225d8f964558009612303b37db Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 18 Aug 2019 00:11:57 +0200
Subject: [PATCH 136/269] Fix tests due to Tweedie p->power

---
 sklearn/linear_model/_glm/tests/test_distribution.py | 2 +-
 sklearn/metrics/tests/test_regression.py             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_distribution.py b/sklearn/linear_model/_glm/tests/test_distribution.py
index 82e493b7a2149..ed81c8328c87a 100644
--- a/sklearn/linear_model/_glm/tests/test_distribution.py
+++ b/sklearn/linear_model/_glm/tests/test_distribution.py
@@ -45,7 +45,7 @@ def test_invalid_distribution_bound():
 
 
 def test_tweedie_distribution_power():
-    msg = "distribution is only defined for p<=0 and p>=1"
+    msg = "distribution is only defined for power<=0 and power>=1"
     with pytest.raises(ValueError, match=msg):
         TweedieDistribution(power=0.5)
 
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index 8e18321f80303..8febf9064e58e 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -133,7 +133,7 @@ def test_regression_metrics_at_limits():
         mean_tweedie_deviance([0.], [0.], p=p)
 
     with pytest.raises(ValueError,
-                       match="is only defined for p<=0 and p>=1"):
+                       match="is only defined for power<=0 and power>=1"):
         mean_tweedie_deviance([0.], [0.], p=0.5)
 
 
From 33fe9be737001045eb9fad5616f678fb27fb8d79 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 18 Aug 2019 12:09:24 +0200
Subject: [PATCH 137/269] Simplify super(...)

---
 sklearn/linear_model/_glm/distribution.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index e0b3511734cee..7ce51deb0921d 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -353,25 +353,25 @@ def unit_deviance(self, y, mu, check_input=False):
 class NormalDistribution(TweedieDistribution):
     """Class for the Normal (aka Gaussian) distribution"""
     def __init__(self):
-        super(NormalDistribution, self).__init__(power=0)
+        super().__init__(power=0)
 
 
 class PoissonDistribution(TweedieDistribution):
     """Class for the scaled Poisson distribution"""
     def __init__(self):
-        super(PoissonDistribution, self).__init__(power=1)
+        super().__init__(power=1)
 
 
 class GammaDistribution(TweedieDistribution):
     """Class for the Gamma distribution"""
     def __init__(self):
-        super(GammaDistribution, self).__init__(power=2)
+        super().__init__(power=2)
 
 
 class InverseGaussianDistribution(TweedieDistribution):
     """Class for the scaled InverseGaussianDistribution distribution"""
     def __init__(self):
-        super(InverseGaussianDistribution, self).__init__(power=3)
+        super().__init__(power=3)
 
 
 EDM_DISTRIBUTIONS = {

From 94272e79d565672416fbc6772aa8c0eb9e7ce519 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 18 Aug 2019 12:26:12 +0200
Subject: [PATCH 138/269] Replace Link.link(..) by __call__(..)

---
 sklearn/linear_model/_glm/glm.py             | 8 ++++----
 sklearn/linear_model/_glm/link.py            | 8 ++++----
 sklearn/linear_model/_glm/tests/test_link.py | 4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index fb653b5e4361f..4d7a05095cb27 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -49,7 +49,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         Constant that multiplies the penalty terms and thus determines the
         regularization strength.
         See the notes for the exact mathematical meaning of this
-        parameter.``alpha = 0`` is equivalent to unpenalized GLMs. In this
+        parameter. ``alpha = 0`` is equivalent to unpenalized GLMs. In this
         case, the design matrix X must have full column rank
         (no collinearities).
 
@@ -296,7 +296,7 @@ def fit(self, X, y, sample_weight=None):
         else:
             if self.fit_intercept:
                 coef = np.zeros(n_features+1)
-                coef[0] = link.link(np.average(y, weights=weights))
+                coef[0] = link(np.average(y, weights=weights))
             else:
                 coef = np.zeros(n_features)
 
@@ -441,7 +441,7 @@ class PoissonRegressor(GeneralizedLinearRegressor):
         Constant that multiplies the penalty terms and thus determines the
         regularization strength.
         See the notes for the exact mathematical meaning of this
-        parameter.``alpha = 0`` is equivalent to unpenalized GLMs. In this
+        parameter. ``alpha = 0`` is equivalent to unpenalized GLMs. In this
         case, the design matrix X must have full column rank
         (no collinearities).
 
@@ -560,7 +560,7 @@ class GammaRegressor(GeneralizedLinearRegressor):
         Constant that multiplies the penalty terms and thus determines the
         regularization strength.
         See the notes for the exact mathematical meaning of this
-        parameter.``alpha = 0`` is equivalent to unpenalized GLMs. In this
+        parameter. ``alpha = 0`` is equivalent to unpenalized GLMs. In this
         case, the design matrix X must have full column rank
         (no collinearities).
 
diff --git a/sklearn/linear_model/_glm/link.py b/sklearn/linear_model/_glm/link.py
index ec9a7b7736eb1..1ef485cc85a55 100644
--- a/sklearn/linear_model/_glm/link.py
+++ b/sklearn/linear_model/_glm/link.py
@@ -15,7 +15,7 @@ class Link(metaclass=ABCMeta):
     """Abstract base class for Link functions."""
 
     @abstractmethod
-    def link(self, mu):
+    def __call__(self, mu):
         """Compute the link function g(mu).
 
         The link function links the mean mu=E[Y] to the so called linear
@@ -79,7 +79,7 @@ def inverse_derivative2(self, lin_pred):
 class IdentityLink(Link):
     """The identity link function g(x)=x."""
 
-    def link(self, mu):
+    def __call__(self, mu):
         return mu
 
     def derivative(self, mu):
@@ -98,7 +98,7 @@ def inverse_derivative2(self, lin_pred):
 class LogLink(Link):
     """The log link function g(x)=log(x)."""
 
-    def link(self, mu):
+    def __call__(self, mu):
         return np.log(mu)
 
     def derivative(self, mu):
@@ -117,7 +117,7 @@ def inverse_derivative2(self, lin_pred):
 class LogitLink(Link):
     """The logit link function g(x)=logit(x)."""
 
-    def link(self, mu):
+    def __call__(self, mu):
         return logit(mu)
 
     def derivative(self, mu):
diff --git a/sklearn/linear_model/_glm/tests/test_link.py b/sklearn/linear_model/_glm/tests/test_link.py
index 3a2a21c4c04e0..feafb151e14a2 100644
--- a/sklearn/linear_model/_glm/tests/test_link.py
+++ b/sklearn/linear_model/_glm/tests/test_link.py
@@ -25,7 +25,7 @@ def test_link_properties(link):
         # careful for large x, note expit(36) = 1
         # limit max eta to 15
         x = x / 100 * 15
-    assert_allclose(link.link(link.inverse(x)), x)
+    assert_allclose(link(link.inverse(x)), x)
     # if f(g(x)) = x, then f'(g(x)) = 1/g'(x)
     assert_allclose(link.derivative(link.inverse(x)),
                     1./link.inverse_derivative(x))
@@ -34,5 +34,5 @@ def test_link_properties(link):
       link.inverse_derivative2(x).shape == link.inverse_derivative(x).shape)
 
     # for LogitLink, in the following x should be between 0 and 1.
-    # assert_almost_equal(link.inverse_derivative(link.link(x)),
+    # assert_almost_equal(link.inverse_derivative(link(x)),
     #                     1./link.derivative(x), decimal=decimal)

From 2457039f82ea308e634e4853484df17029f311ac Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 18 Aug 2019 12:45:35 +0200
Subject: [PATCH 139/269] Replace 1. -> 1

---
 sklearn/linear_model/_glm/glm.py             |  2 +-
 sklearn/linear_model/_glm/link.py            |  8 ++++----
 sklearn/linear_model/_glm/tests/test_link.py | 11 ++++++-----
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 4d7a05095cb27..b8d87799f8a93 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -415,7 +415,7 @@ def score(self, X, y, sample_weight=None):
         dev = self._family_instance.deviance(y, mu, weights=weights)
         y_mean = np.average(y, weights=weights)
         dev_null = self._family_instance.deviance(y, y_mean, weights=weights)
-        return 1. - dev / dev_null
+        return 1 - dev / dev_null
 
     def _more_tags(self):
         return {"requires_positive_y": True}
diff --git a/sklearn/linear_model/_glm/link.py b/sklearn/linear_model/_glm/link.py
index 1ef485cc85a55..04b485c067cca 100644
--- a/sklearn/linear_model/_glm/link.py
+++ b/sklearn/linear_model/_glm/link.py
@@ -102,7 +102,7 @@ def __call__(self, mu):
         return np.log(mu)
 
     def derivative(self, mu):
-        return 1./mu
+        return 1 / mu
 
     def inverse(self, lin_pred):
         return np.exp(lin_pred)
@@ -121,15 +121,15 @@ def __call__(self, mu):
         return logit(mu)
 
     def derivative(self, mu):
-        return 1. / (mu * (1 - mu))
+        return 1 / (mu * (1 - mu))
 
     def inverse(self, lin_pred):
         return expit(lin_pred)
 
     def inverse_derivative(self, lin_pred):
         ep = expit(lin_pred)
-        return ep * (1. - ep)
+        return ep * (1 - ep)
 
     def inverse_derivative2(self, lin_pred):
         ep = expit(lin_pred)
-        return ep * (1. - ep) * (1. - 2 * ep)
+        return ep * (1 - ep) * (1 - 2 * ep)
diff --git a/sklearn/linear_model/_glm/tests/test_link.py b/sklearn/linear_model/_glm/tests/test_link.py
index feafb151e14a2..a631509baca79 100644
--- a/sklearn/linear_model/_glm/tests/test_link.py
+++ b/sklearn/linear_model/_glm/tests/test_link.py
@@ -19,20 +19,21 @@
 def test_link_properties(link):
     """Test link inverse and derivative."""
     rng = np.random.RandomState(42)
-    x = rng.rand(100)*100
+    x = rng.rand(100) * 100
     link = link()  # instantiate object
     if isinstance(link, LogitLink):
         # careful for large x, note expit(36) = 1
         # limit max eta to 15
         x = x / 100 * 15
     assert_allclose(link(link.inverse(x)), x)
-    # if f(g(x)) = x, then f'(g(x)) = 1/g'(x)
+    # if g(h(x)) = x, then g'(h(x)) = 1/h'(x)
+    # g = link, h = link.inverse
     assert_allclose(link.derivative(link.inverse(x)),
-                    1./link.inverse_derivative(x))
+                    1 / link.inverse_derivative(x))
 
     assert (
       link.inverse_derivative2(x).shape == link.inverse_derivative(x).shape)
 
-    # for LogitLink, in the following x should be between 0 and 1.
+    # for LogitLink, in the following x should be between 0 and 1
     # assert_almost_equal(link.inverse_derivative(link(x)),
-    #                     1./link.derivative(x), decimal=decimal)
+    #                     1 / link.derivative(x), decimal=decimal)

From 6396d2c4495681d7d774f7c7de7054b336ba2709 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 18 Aug 2019 13:46:45 +0200
Subject: [PATCH 140/269] Fix table in TweedieRegressor

---
 sklearn/linear_model/_glm/glm.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index b8d87799f8a93..f538aa6121747 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -664,19 +664,24 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     Parameters
     ----------
     power : float (default=0)
-            The variance power: v(μ) = μ^{power}.
+            The variance power: v(μ) = μ^power.
             For ``0<power<1``, no distribution exists.
 
             Special cases are:
 
-            ===== ================
-            Power Distribution
-            ===== ================
-            0     Normal
-            1     Poisson
-            (1,2) Compound Poisson
-            2     Gamma
-            3     Inverse Gaussian
+            +-------+------------------+
+            | Power | Distribution     |
+            +=======+==================+
+            | 0     | Normal           |
+            +-------+------------------+
+            | 1     | Poisson          |
+            +-------+------------------+
+            | (1,2) | Compound Poisson |
+            +-------+------------------+
+            | 2     | Gamma            |
+            +-------+------------------+
+            | 3     | Inverse Gaussian |
+            +-------+------------------+
 
     alpha : float, optional (default=1)
         Constant that multiplies the penalty terms and thus determines the

From 8be0387a8ed1f35b6c2e978c2500c02b9d74b7b4 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 22 Aug 2019 12:02:28 +0300
Subject: [PATCH 141/269] Improve docstring in
 plot_tweedie_regression_insurance_claims.py

---
 .../linear_model/plot_tweedie_regression_insurance_claims.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 55a21c8d8723d..24b3afcadf120 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -85,8 +85,8 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 
     Parameters
     ----------
-    df : DataFrame with at least three columns named feature, weight and
-         observed
+    df : DataFrame
+        input data
     feature: str
         a column name of df for the feature to be plotted
     weight : str

From da66fd5dc0fb8fc89825e5cf75057eed70c09aed Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 22 Aug 2019 12:23:40 +0300
Subject: [PATCH 142/269] Use train_test_split in tests

---
 sklearn/linear_model/_glm/tests/test_glm.py | 36 +++++++++++----------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index ebcab6395e5b4..d6c7c7c3f767e 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -24,6 +24,7 @@
 )
 from sklearn.linear_model import Ridge
 from sklearn.exceptions import ConvergenceWarning
+from sklearn.model_selection import train_test_split
 
 GLM_SOLVERS = ['lbfgs']
 
@@ -207,9 +208,8 @@ def test_glm_log_regression(family, solver, tol):
 
 @pytest.mark.parametrize('fit_intercept', [True, False])
 def test_warm_start(fit_intercept):
-    n_samples, n_features = 100, 10
-    n_predict = 10
-    X, y, coef = make_regression(n_samples=n_samples+n_predict,
+    n_samples, n_features = 110, 10
+    X, y, coef = make_regression(n_samples=n_samples,
                                  n_features=n_features,
                                  n_informative=n_features-2, noise=0.5,
                                  coef=True, random_state=42)
@@ -230,7 +230,7 @@ def test_warm_start(fit_intercept):
     assert glm1.score(X, y) > glm2.score(X, y)
     glm2.set_params(max_iter=1000)
     glm2.fit(X, y)
-    assert_allclose(glm1.coef_, glm2.coef_, rtol=1e-4, atol=1e-5)
+    assert_allclose(glm1.coef_, glm2.coef_, rtol=1e-5)
     assert_allclose(glm1.score(X, y), glm2.score(X, y), rtol=1e-4)
     # TODO: investigate why this doesn't match
     # assert glm1.n_iter_ == glm2.n_iter_ + 2
@@ -242,13 +242,14 @@ def test_warm_start(fit_intercept):
 def test_normal_ridge_comparison(n_samples, n_features, fit_intercept, solver):
     """Compare with Ridge regression for Normal distributions."""
     alpha = 1.0
-    n_predict = 10
-    X, y, _ = make_regression(n_samples=n_samples+n_predict,
-                              n_features=n_features,
-                              n_informative=n_features-2, noise=0.5,
-                              coef=True, random_state=42)
-    y = y[0:n_samples]
-    X, T = X[0:n_samples], X[n_samples:]
+    test_size = 10
+    X, y = make_regression(n_samples=n_samples + test_size,
+                           n_features=n_features,
+                           n_informative=n_features-2, noise=0.5,
+                           random_state=42)
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=test_size, random_state=0
+    )
 
     if n_samples > n_features:
         ridge_params = {"solver": "svd"}
@@ -258,17 +259,18 @@ def test_normal_ridge_comparison(n_samples, n_features, fit_intercept, solver):
     # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
     ridge = Ridge(alpha=alpha*n_samples, normalize=False,
                   random_state=42, **ridge_params)
-    ridge.fit(X, y)
+    ridge.fit(X_train, y_train)
 
     glm = GeneralizedLinearRegressor(alpha=1.0, family='normal',
                                      link='identity', fit_intercept=True,
-                                     max_iter=300, solver=solver, tol=1e-6,
-                                     check_input=False)
-    glm.fit(X, y)
+                                     solver=solver, check_input=False,
+                                     max_iter=300)
+    glm.fit(X_train, y_train)
     assert glm.coef_.shape == (X.shape[1], )
-    assert_allclose(glm.coef_, ridge.coef_, rtol=5e-6)
+    assert_allclose(glm.coef_, ridge.coef_, atol=5e-5)
     assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-5)
-    assert_allclose(glm.predict(T), ridge.predict(T), rtol=1e-5)
+    assert_allclose(glm.predict(X_train), ridge.predict(X_train), rtol=5e-5)
+    assert_allclose(glm.predict(X_test), ridge.predict(X_test), rtol=5e-5)
 
 
 @pytest.mark.parametrize('solver, tol', [('lbfgs', 1e-7)])

From b9bc170b2ef7673d6b40103a6583cd9a56d7c517 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 22 Aug 2019 14:07:32 +0300
Subject: [PATCH 143/269] Fix TODO in test_warm_start

---
 sklearn/linear_model/_glm/tests/test_glm.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index d6c7c7c3f767e..4975d1454b922 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -232,8 +232,7 @@ def test_warm_start(fit_intercept):
     glm2.fit(X, y)
     assert_allclose(glm1.coef_, glm2.coef_, rtol=1e-5)
     assert_allclose(glm1.score(X, y), glm2.score(X, y), rtol=1e-4)
-    # TODO: investigate why this doesn't match
-    # assert glm1.n_iter_ == glm2.n_iter_ + 2
+    assert glm1.n_iter_ == glm2.n_iter_
 
 
 @pytest.mark.parametrize('n_samples, n_features', [(100, 10), (10, 100)])

From ab6c5d82ea6149f8acc20834abcc735c630a516c Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 22 Aug 2019 14:31:06 +0300
Subject: [PATCH 144/269] Revert "No LaTeX in docstrings"

This reverts commit 61dc13fdd5b71fe0e1d30e5a9b58934441a9dcb1.
---
 sklearn/linear_model/_glm/distribution.py | 85 +++++++++++++----------
 sklearn/linear_model/_glm/glm.py          | 66 +++++++++---------
 2 files changed, 83 insertions(+), 68 deletions(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index 7ce51deb0921d..5754cb391ec61 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -26,15 +26,19 @@ def _safe_lin_pred(X, coef):
 
 
 class ExponentialDispersionModel(metaclass=ABCMeta):
-    """Base class for reproductive Exponential Dispersion Models (EDM).
+    r"""Base class for reproductive Exponential Dispersion Models (EDM).
 
-    The pdf of Y∼EDM(μ, φ) is given by::
+    The pdf of :math:`Y\sim \mathrm{EDM}(\mu, \phi)` is given by
 
-        p(y| θ, φ) = c1(y, φ) * exp((θy-A(θ))/φ)
-        = c2(y, φ) * exp(-d(y, μ)/(2φ))
+    .. math:: p(y| \theta, \phi) = c(y, \phi)
+        \exp\left(\frac{\theta y-A(\theta)}{\phi}\right)
+        = \tilde{c}(y, \phi)
+            \exp\left(-\frac{d(y, \mu)}{2\phi}\right)
 
-    with mean E[Y] = A'(θ) = μ, variance Var[Y] = φ * v(μ),
-    unit variance v(μ), unit deviance d(y,μ) and dispersion parameter φ.
+    with mean :math:`\mathrm{E}[Y] = A'(\theta) = \mu`,
+    variance :math:`\mathrm{Var}[Y] = \phi \cdot v(\mu)`,
+    unit variance :math:`v(\mu)` and
+    unit deviance :math:`d(y,\mu)`.
 
     Methods
     -------
@@ -52,7 +56,7 @@ class ExponentialDispersionModel(metaclass=ABCMeta):
     """
 
     def in_y_range(self, y):
-        """Returns ``True`` if y is in the valid range of Y∼EDM.
+        """Returns ``True`` if y is in the valid range of Y~EDM.
 
         Parameters
         ----------
@@ -76,13 +80,17 @@ def in_y_range(self, y):
 
     @abstractmethod
     def unit_variance(self, mu):
-        """Compute the unit variance function.
+        r"""Compute the unit variance function.
 
-        The unit variance v(μ) determines the variance as a function of the
-        mean μ by Var[Y_i] = φ/s_i * v(μ_i).
-        It can also be derived from the unit deviance d(y,μ) as::
+        The unit variance :math:`v(\mu)` determines the variance as
+        a function of the mean :math:`\mu` by
+        :math:`\mathrm{Var}[Y_i] = \phi/s_i*v(\mu_i)`.
+        It can also be derived from the unit deviance :math:`d(y,\mu)` as
 
-            v(μ) = 2/(∂^2 d(y,μ)/(∂ μ^2))|_{y=μ}
+        .. math:: v(\mu) = \frac{2}{\frac{\partial^2 d(y,\mu)}{
+            \partial\mu^2}}\big|_{y=\mu}
+
+        See also :func:`variance`.
 
         Parameters
         ----------
@@ -93,9 +101,9 @@ def unit_variance(self, mu):
 
     @abstractmethod
     def unit_variance_derivative(self, mu):
-        """Compute the derivative of the unit variance w.r.t. mu.
+        r"""Compute the derivative of the unit variance w.r.t. mu.
 
-        Return v'(μ).
+        Return :math:`v'(\mu)`.
 
         Parameters
         ----------
@@ -106,11 +114,12 @@ def unit_variance_derivative(self, mu):
 
     @abstractmethod
     def unit_deviance(self, y, mu, check_input=False):
-        """Compute the unit deviance.
-
-        The unit_deviance d(y,μ) can be defined by the log-likelihood as::
+        r"""Compute the unit deviance.
 
-        d(y,μ) = -2φ * (loglike(y,μ,φ) - loglike(y,y,φ))
+        The unit_deviance :math:`d(y,\mu)` can be defined by the
+        log-likelihood as
+        :math:`d(y,\mu) = -2\phi\cdot
+        \left(loglike(y,\mu,\phi) - loglike(y,y,\phi)\right).`
 
         Parameters
         ----------
@@ -131,10 +140,11 @@ def unit_deviance(self, y, mu, check_input=False):
         pass  # pragma: no cover
 
     def unit_deviance_derivative(self, y, mu):
-        """Compute the derivative of the unit deviance w.r.t. mu.
+        r"""Compute the derivative of the unit deviance w.r.t. mu.
 
         The derivative of the unit deviance is given by
-        ∂ d(y,μ)/(∂ μ) = -2(y-μ)/v(μ) with unit variance v(μ).
+        :math:`\frac{\partial}{\partial\mu}d(y,\mu) = -2\frac{y-\mu}{v(\mu)}`
+        with unit variance :math:`v(\mu)`.
 
         Parameters
         ----------
@@ -147,13 +157,14 @@ def unit_deviance_derivative(self, y, mu):
         return -2 * (y - mu) / self.unit_variance(mu)
 
     def deviance(self, y, mu, weights=1):
-        """Compute the deviance.
+        r"""Compute the deviance.
 
         The deviance is a weighted sum of the per sample unit deviances,
-        D = sum_i s_i * d(y_i,μ_i)
-        with weights s_i and unit deviance d(y,μ).
-        In terms of the log-likelihood it is
-        D = -2φ * (loglike(y,μ,φ/s) - loglike(y,y,φ/s)).
+        :math:`D = \sum_i s_i \cdot d(y_i, \mu_i)`
+        with weights :math:`s_i` and unit deviance :math:`d(y,\mu)`.
+        In terms of the log-likelihood it is :math:`D = -2\phi\cdot
+        \left(loglike(y,\mu,\frac{phi}{s})
+        - loglike(y,y,\frac{phi}{s})\right)`.
 
         Parameters
         ----------
@@ -171,7 +182,7 @@ def deviance(self, y, mu, weights=1):
     def deviance_derivative(self, y, mu, weights=1):
         """Compute the derivative of the deviance w.r.t. mu.
 
-        It gives ∂ D(y, μ; weights)/(∂ μ).
+        It gives :math:`\\frac{\\partial}{\\partial\\mu} D(y, \\mu; weights)`.
 
         Parameters
         ----------
@@ -200,10 +211,11 @@ def _mu_deviance_derivative(self, coef, X, y, weights, link):
 
 
 class TweedieDistribution(ExponentialDispersionModel):
-    """A class for the Tweedie distribution.
+    r"""A class for the Tweedie distribution.
 
-    A Tweedie distribution with mean μ=E[Y] is uniquely defined by it's
-    mean-variance relationship Var[Y] ∝ μ^power.
+    A Tweedie distribution with mean :math:`\mu=\mathrm{E}[Y]` is uniquely
+    defined by it's mean-variance relationship
+    :math:`\mathrm{Var}[Y] \propto \mu^power`.
 
     Special cases are:
 
@@ -219,7 +231,8 @@ class TweedieDistribution(ExponentialDispersionModel):
     Parameters
     ----------
     power : float (default=0)
-            The variance power of the unit variance v(μ) = μ^power.
+            The variance power of the `unit_variance`
+            :math:`v(\mu) = \mu^{power}`.
             For ``0<power<1``, no distribution exists.
     """
     def __init__(self, power=0):
@@ -266,7 +279,7 @@ def unit_variance(self, mu):
 
     def unit_variance_derivative(self, mu):
         """Compute the derivative of the unit variance of a Tweedie
-        distribution v(mu)=power * mu**(power-1).
+        distribution v(mu)=power*mu**(power-1).
 
         Parameters
         ----------
@@ -276,10 +289,12 @@ def unit_variance_derivative(self, mu):
         return self.power * np.power(mu, self.power - 1)
 
     def unit_deviance(self, y, mu, check_input=False):
-        """Compute the unit deviance.
+        r"""Compute the unit deviance.
 
-        The unit deviance d(y,μ) can be defined by the log-likelihood as
-        d(y,μ) = -2φ * (loglike(y,μ,φ) - loglike(y,y,φ)).
+        The unit_deviance :math:`d(y,\mu)` can be defined by the
+        log-likelihood as
+        :math:`d(y,\mu) = -2\phi\cdot
+        \left(loglike(y,\mu,\phi) - loglike(y,y,\phi)\right).`
 
         Parameters
         ----------
@@ -313,7 +328,7 @@ def unit_deviance(self, y, mu, check_input=False):
                 raise ValueError("Tweedie deviance is only defined for "
                                  "power<=0 and power>=1.")
             elif 1 <= p < 2:
-                # Poisson and Compound poisson distribution, y >= 0, mu > 0
+                # Poisson and Compount poisson distribution, y >= 0, mu > 0
                 if (y < 0).any() or (mu <= 0).any():
                     raise ValueError(message + "non-negative y and strictly "
                                      "positive mu.")
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index f538aa6121747..69e68c1eda0f8 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -31,7 +31,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     """Regression via a Generalized Linear Model (GLM) with penalties.
 
     GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
-    fitting and predicting the mean of the target y as μ=h(X*w). Therefore,
+    fitting and predicting the mean of the target y as mu=h(X*w). Therefore,
     the fit minimizes the following objective function with L2
     priors as regularizer::
 
@@ -118,16 +118,16 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     Notes
     -----
     The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments to be E[Y_i]=μ_i=h((Xw)_i) and
-    Var[Y_i]=φ/s_i * v(μ_i). The unit variance function v(μ_i) is a property of
-    and given by the specific EDM, see
+    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
+    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
+    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
     :ref:`User Guide <Generalized_linear_regression>`.
 
-    The parameters w (``coef_`` and ``intercept_``) are estimated by
+    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
     minimizing the deviance plus penalty term, which is equivalent to
     (penalized) maximum likelihood estimation.
 
-    For ``alpha > 0``, the feature matrix X should be standardized in order to
+    For alpha > 0, the feature matrix X should be standardized in order to
     penalize features equally strong. Call
     :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``.
 
@@ -138,7 +138,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     y = z/s, i.e. ``GeneralizedLinearModel(family='poisson').fit(X, y,
     sample_weight=s)``. The weights are necessary for the right (finite
     sample) mean.
-    Consider ȳ = (sum_i s_i y_i)(sum_i s_i),
+    Consider :math:`\\bar{y} = \\frac{\\sum_i s_i y_i}{\\sum_i s_i}`,
     in this case one might say that y has a 'scaled' Poisson distributions.
     The same holds for other distributions.
 
@@ -183,9 +183,9 @@ def fit(self, X, y, sample_weight=None):
                 optional (default=None)
             Individual weights w_i for each sample. Note that for an
             Exponential Dispersion Model (EDM), one has
-            Var[Y_i]=φ/w_i * v(mu).
-            If Y_i ~ EDM(mu, φ/w_i), then
-            sum(w*Y)/sum(w) ~ EDM(mu, φ/sum(w)), i.e. the mean of y is a
+            Var[Y_i]=phi/w_i * v(mu).
+            If Y_i ~ EDM(mu, phi/w_i), then
+            sum(w*Y)/sum(w) ~ EDM(mu, phi/sum(w)), i.e. the mean of y is a
             weighted average with weights=sample_weight.
 
         Returns
@@ -218,7 +218,7 @@ def fit(self, X, y, sample_weight=None):
                     raise ValueError("No default link known for the "
                                      "specified distribution family. Please "
                                      "set link manually, i.e. not to 'auto'; "
-                                     "got (link='auto', family={})"
+                                     "got (link='auto', family={}"
                                      .format(self.family))
             elif self.link == 'identity':
                 self._link_instance = IdentityLink()
@@ -383,10 +383,11 @@ def score(self, X, y, sample_weight=None):
         R^2 uses squared error and D^2 deviance. Note that those two are equal
         for ``family='normal'``.
 
-        D^2 is defined as D^2 = 1 - D(y_true,y_pred) / D_null,
-        D_null is the null deviance, i.e. the deviance of a model
-        with intercept alone, which corresponds to y_pred = ȳ.
-        The mean ȳ is averaged by sample_weight.
+        D^2 is defined as
+        :math:`D^2 = 1-\\frac{D(y_{true},y_{pred})}{D_{null}}`,
+        :math:`D_{null}` is the null deviance, i.e. the deviance of a model
+        with intercept alone, which corresponds to :math:`y_{pred} = \\bar{y}`.
+        The mean :math:`\\bar{y}` is averaged by sample_weight.
         Best possible score is 1.0 and it can be negative (because the model
         can be arbitrarily worse).
 
@@ -489,18 +490,17 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     Notes
     -----
     The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments to be E[Y_i]=μ_i=h((Xw)_i) and
-    Var[Y_i]=φ/s_i * v(μ_i). The unit variance function v(μ_i) is a property of
-    and given by the specific EDM, see
+    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
+    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
+    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
     :ref:`User Guide <Generalized_linear_regression>`.
 
-    The parameters w (``coef_`` and ``intercept_``) are estimated by
+    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
     minimizing the deviance plus penalty term, which is equivalent to
     (penalized) maximum likelihood estimation.
 
-    For ``alpha > 0``, the feature matrix X should be standardized in order to
-    penalize features equally strong. Call
-    :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``.
+    For alpha > 0, the feature matrix X should be standardized in order to
+    penalize features equally strong.
 
     If the target y is a ratio, appropriate sample weights s should be
     provided.
@@ -508,7 +508,7 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     weights s=exposure (time, money, persons years, ...). Then you fit
     y = z/s, i.e. ``PoissonRegressor().fit(X, y, sample_weight=s)``.
     The weights are necessary for the right (finite sample) mean.
-    Consider ȳ = (sum_i s_i y_i)(sum_i s_i),
+    Consider :math:`\\bar{y} = \\frac{\\sum_i s_i y_i}{\\sum_i s_i}`,
     in this case one might say that y has a 'scaled' Poisson distributions.
 
     References
@@ -608,12 +608,12 @@ class GammaRegressor(GeneralizedLinearRegressor):
     Notes
     -----
     The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments to be E[Y_i]=μ_i=h((Xw)_i) and
-    Var[Y_i]=φ/s_i * v(μ_i). The unit variance function v(μ_i) is a property of
-    and given by the specific EDM, see
+    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
+    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
+    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
     :ref:`User Guide <Generalized_linear_regression>`.
 
-    The parameters w (``coef_`` and ``intercept_``) are estimated by
+    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
     minimizing the deviance plus penalty term, which is equivalent to
     (penalized) maximum likelihood estimation.
 
@@ -664,7 +664,7 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     Parameters
     ----------
     power : float (default=0)
-            The variance power: v(μ) = μ^power.
+            The variance power: :math:`v(\\mu) = \\mu^{power}`.
             For ``0<power<1``, no distribution exists.
 
             Special cases are:
@@ -687,7 +687,7 @@ class TweedieRegressor(GeneralizedLinearRegressor):
         Constant that multiplies the penalty terms and thus determines the
         regularization strength.
         See the notes for the exact mathematical meaning of this
-        parameter. ``alpha = 0`` is equivalent to unpenalized GLMs. In this
+        parameter.``alpha = 0`` is equivalent to unpenalized GLMs. In this
         case, the design matrix X must have full column rank
         (no collinearities).
 
@@ -735,12 +735,12 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     Notes
     -----
     The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments to be E[Y_i]=μ_i=h((Xw)_i) and
-    Var[Y_i]=φ/s_i * v(μ_i). The unit variance function v(μ_i) is a property of
-    and given by the specific EDM, see
+    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
+    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
+    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
     :ref:`User Guide <Generalized_linear_regression>`.
 
-    The parameters w (``coef_`` and ``intercept_``) are estimated by
+    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
     minimizing the deviance plus penalty term, which is equivalent to
     (penalized) maximum likelihood estimation.
 

From b424a070f696262174959c1927c921d301fd8ba5 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 22 Aug 2019 14:42:39 +0300
Subject: [PATCH 145/269] Remove n_iter_ check when warm start.

---
 sklearn/linear_model/_glm/glm.py            | 2 +-
 sklearn/linear_model/_glm/tests/test_glm.py | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 69e68c1eda0f8..38f27eec397e8 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -218,7 +218,7 @@ def fit(self, X, y, sample_weight=None):
                     raise ValueError("No default link known for the "
                                      "specified distribution family. Please "
                                      "set link manually, i.e. not to 'auto'; "
-                                     "got (link='auto', family={}"
+                                     "got (link='auto', family={})"
                                      .format(self.family))
             elif self.link == 'identity':
                 self._link_instance = IdentityLink()
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 4975d1454b922..879d382ce28dc 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -232,7 +232,6 @@ def test_warm_start(fit_intercept):
     glm2.fit(X, y)
     assert_allclose(glm1.coef_, glm2.coef_, rtol=1e-5)
     assert_allclose(glm1.score(X, y), glm2.score(X, y), rtol=1e-4)
-    assert glm1.n_iter_ == glm2.n_iter_
 
 
 @pytest.mark.parametrize('n_samples, n_features', [(100, 10), (10, 100)])

From 95a90580c9d034e7bcb9c488cc19d09ac692deca Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 22 Aug 2019 15:02:00 +0300
Subject: [PATCH 146/269] Rename variable L2 -> coef_scaled

---
 sklearn/linear_model/_glm/glm.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 38f27eec397e8..a7128eb78df07 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -304,15 +304,16 @@ def fit(self, X, y, sample_weight=None):
 
         if solver == 'lbfgs':
             def func(coef, X, y, weights, alpha, family, link):
-                mu, devp = \
-                    family._mu_deviance_derivative(coef, X, y, weights, link)
+                mu, devp = family._mu_deviance_derivative(
+                    coef, X, y, weights, link
+                )
                 dev = family.deviance(y, mu, weights)
                 intercept = (coef.size == X.shape[1] + 1)
                 idx = 1 if intercept else 0  # offset if coef[0] is intercept
-                L2 = alpha * coef[idx:]
-                obj = 0.5 * dev + 0.5 * (coef[idx:] @ L2)
+                coef_scaled = alpha * coef[idx:]
+                obj = 0.5 * dev + 0.5 * (coef[idx:] @ coef_scaled)
                 objp = 0.5 * devp
-                objp[idx:] += L2
+                objp[idx:] += coef_scaled
                 return obj, objp
 
             args = (X, y, weights, self.alpha, family, link)

From 59eceb42d874d94d0bb7fdced4f3e05fd030dc62 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 22 Aug 2019 15:59:54 +0300
Subject: [PATCH 147/269] Minor fixes

---
 .../plot_tweedie_regression_insurance_claims.py      |  3 ---
 sklearn/linear_model/_glm/distribution.py            | 12 ++++++------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 24b3afcadf120..24451069eeb68 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -181,9 +181,6 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 # events occuring with a constant rate in a given time interval (``Exposure``).
 # Here we model the frequency ``y = ClaimNb / Exposure``,
 # which is still a (scaled) Poisson distribution.
-#
-# A very important property of the Poisson distribution is its mean-variance
-# relation: The variance is proportional to the mean.
 
 df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=2)
 
diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index 5754cb391ec61..8779cd616cc5a 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -342,9 +342,9 @@ def unit_deviance(self, y, mu, check_input=False):
 
         if p < 0:
             # 'Extreme stable', y any realy number, mu > 0
-            dev = 2 * (np.power(np.maximum(y, 0), 2-p)/((1-p) * (2-p)) -
-                       y * np.power(mu, 1-p)/(1-p) +
-                       np.power(mu, 2-p)/(2-p))
+            dev = 2 * (np.power(np.maximum(y, 0), 2-p) / ((1-p) * (2-p))
+                       - y * np.power(mu, 1-p) / (1-p)
+                       + np.power(mu, 2-p) / (2-p))
 
         elif p == 0:
             # Normal distribution, y and mu any real number
@@ -359,9 +359,9 @@ def unit_deviance(self, y, mu, check_input=False):
             # Gamma distribution
             dev = 2 * (np.log(mu/y) + y/mu - 1)
         else:
-            dev = 2 * (np.power(y, 2-p)/((1-p) * (2-p)) -
-                       y * np.power(mu, 1-p)/(1-p) +
-                       np.power(mu, 2-p)/(2-p))
+            dev = 2 * (np.power(y, 2-p) / ((1-p) * (2-p))
+                       - y * np.power(mu, 1-p) / (1-p)
+                       + np.power(mu, 2-p) / (2-p))
         return dev
 
 
From 04f30f40e572d6e919fef8a4c2d4bcb27d562b6b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 28 Aug 2019 10:13:27 +0300
Subject: [PATCH 148/269] Better wording in example

---
 .../linear_model/plot_poisson_regression_non_normal_loss.py   | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 5c0b64faea255..d7f9b65660453 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -5,7 +5,9 @@
 
 This example illustrates the use of linear Poisson regression
 on the French Motor Third-Party Liability Claims dataset [1] and compares
-it with models learned with least squared error.
+it with models learned with least squared error. The goal is to predict the
+number of insurance claims (or frequency) following car accidents for a user
+given historical data over a population of users.
 
 We start by defining a few helper functions for loading the data and
 visualizing results.

From 3630b5277f68cd9805ec5f3af21b7fe9336cd46d Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 28 Aug 2019 11:25:19 +0300
Subject: [PATCH 149/269] Improvements in
 plot_poisson_regression_non_normal_loss.py

---
 ...plot_poisson_regression_non_normal_loss.py | 95 ++++++++++---------
 1 file changed, 48 insertions(+), 47 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index d7f9b65660453..6a98030817245 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -28,6 +28,7 @@
 import pandas as pd
 
 from sklearn.datasets import fetch_openml
+from sklearn.dummy import DummyRegressor
 from sklearn.compose import ColumnTransformer
 from sklearn.linear_model import PoissonRegressor, LinearRegression
 from sklearn.model_selection import train_test_split
@@ -78,7 +79,7 @@ def load_mtpl2(n_samples=100000):
 # containing the number of claims (``ClaimNb``) with the freMTPL2sev table
 # containing the claim amount (``ClaimAmount``) for the same user ids.
 
-df = load_mtpl2(n_samples=100000)
+df = load_mtpl2(n_samples=50000)
 
 # Note: filter out claims with zero amount, as the severity model
 # requires a strictly positive target values.
@@ -117,8 +118,6 @@ def load_mtpl2(n_samples=100000):
 # (``Exposure``). Here we model the frequency ``y = ClaimNb / Exposure``,
 # which is still a (scaled) Poisson distribution.
 #
-# A very important property of the Poisson distribution is its mean-variance
-# relation: The variance is proportional to the mean.
 
 df["Frequency"] = df.ClaimNb / df.Exposure
 
@@ -135,49 +134,50 @@ def load_mtpl2(n_samples=100000):
 # To evaluate the pertinence of the used metrics, we will consider as a
 # baseline an estimator that returns 0 for any input.
 
-df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=2)
+df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=0)
 
+dummy = DummyRegressor(strategy='constant', constant=0)
+dummy.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
 
-eps = 1e-5
-print("MSE: %.3f" % mean_squared_error(
-        df_test.Frequency.values, np.zeros(len(df_test)),
-        df_test.Exposure.values))
-print("MAE: %.3f" % mean_absolute_error(
-        df_test.Frequency.values, np.zeros(len(df_test)),
-        df_test.Exposure.values))
-print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
-        df_test.Frequency.values, eps + np.zeros(len(df_test)),
-        df_test.Exposure.values))
+##############################################################################
+#
+# The Poisson deviance cannot be computed on negative values predicted by the
+# model, so we set the minimum predicted value to eps,
+
+
+def score_estimator(estimator, df_test, eps=1e-5):
+    """Score an estimatr on the test set"""
+
+    print("MSE: %.3f" % mean_squared_error(
+              df_test.Frequency.values, estimator.predict(X_test),
+              df_test.Exposure.values))
+    print("MAE: %.3f" % mean_absolute_error(
+              df_test.Frequency.values, estimator.predict(X_test),
+              df_test.Exposure.values))
 
+    print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
+            df_test.Frequency.values, np.fmax(estimator.predict(X_test), eps),
+            df_test.Exposure.values))
+
+
+print("DummyRegressor")
+score_estimator(dummy, df_test)
 
 ##############################################################################
 #
 # We start by modeling the target variable with the least squares linear
 # regression model,
 
-
 linregr = LinearRegression()
 linregr.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
 
-print("LinearRegression")
-print("MSE: %.3f" % mean_squared_error(
-          df_test.Frequency.values, linregr.predict(X_test),
-          df_test.Exposure.values))
-print("MSE: %.3f" % mean_absolute_error(
-          df_test.Frequency.values, linregr.predict(X_test),
-          df_test.Exposure.values))
-print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
-        df_test.Frequency.values, np.fmax(linregr.predict(X_test), eps),
-        df_test.Exposure.values))
-
-##############################################################################
-#
-# The Poisson deviance cannot be computed because negative values are
-# predicted by the model,
 
 print('Number Negatives: %s / total: %s' % (
       (linregr.predict(X_test) < 0).sum(), X_test.shape[0]))
 
+print("LinearRegression")
+score_estimator(linregr, df_test)
+
 ##############################################################################
 #
 # Next we fit the Poisson regressor on the target variable,
@@ -186,15 +186,7 @@ def load_mtpl2(n_samples=100000):
 glm_freq.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
 
 print("PoissonRegressor")
-print("MSE: %.3f" % mean_squared_error(
-        df_test.Frequency.values, glm_freq.predict(X_test),
-        df_test.Exposure.values))
-print("MAE: %.3f" % mean_absolute_error(
-        df_test.Frequency.values, glm_freq.predict(X_test),
-        df_test.Exposure.values))
-print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
-        df_test.Frequency.values, glm_freq.predict(X_test),
-        df_test.Exposure.values))
+score_estimator(glm_freq, df_test)
 
 ##############################################################################
 #
@@ -202,19 +194,13 @@ def load_mtpl2(n_samples=100000):
 # still minimizes the least square error.
 
 
-gbr = GradientBoostingRegressor(max_depth=3)
+gbr = GradientBoostingRegressor()
 gbr.fit(X_train, df_train.Frequency.values,
         sample_weight=df_train.Exposure.values)
 
 
 print("GradientBoostingRegressor")
-print("MSE: %.3f" % mean_squared_error(
-      df_test.Frequency.values, gbr.predict(X_test), df_test.Exposure.values))
-print("MAE: %.3f" % mean_absolute_error(
-      df_test.Frequency.values, gbr.predict(X_test), df_test.Exposure.values))
-print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
-      df_test.Frequency.values, np.fmax(gbr.predict(X_test), eps),
-      df_test.Exposure.values))
+score_estimator(gbr, df_test)
 
 ##############################################################################
 #
@@ -231,6 +217,7 @@ def load_mtpl2(n_samples=100000):
 
 
 fig, axes = plt.subplots(1, 4, figsize=(16, 3))
+fig.subplots_adjust(bottom=0.2)
 
 df_train.Frequency.hist(bins=np.linspace(-1, 10, 50), ax=axes[0])
 
@@ -247,3 +234,17 @@ def load_mtpl2(n_samples=100000):
         yscale='log',
         xlabel="y (Frequency)"
     )
+
+##############################################################################
+#
+# The experimental data presents a long tail distribution for ``y``. In all
+# models we predict the mean expected value, so we will have necessairily fewer
+# extreme values. Additionally normal distribution used in ``Ridge`` and
+# ``GradientBoostingRegressor`` has a constant variance, while for the Poisson
+# distribution used in ``PoissonRegressor``, the variance is proportional to
+# the mean predicted value.
+#
+# Thus, among the considered estimators,
+# ``PoissonRegressor`` and ``GradientBoostingRegressor`` are better suited for
+# modeling the long tail distribution of the data as compared to the ``Ridge``
+# estimator.

From 516eadba0cbddd9c016f66fc6dce41773f0350e7 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 28 Aug 2019 12:28:06 +0300
Subject: [PATCH 150/269] Improvements in
 plot_tweedie_regression_insurance_claims.py

---
 ...plot_poisson_regression_non_normal_loss.py |  4 +--
 ...lot_tweedie_regression_insurance_claims.py | 30 +++++++++++++++----
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 6a98030817245..9deca6c25032a 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -116,8 +116,8 @@ def load_mtpl2(n_samples=100000):
 # as a Poisson distribution. It is then assumed to be the number of discrete
 # events occurring with a constant rate in a given time interval
 # (``Exposure``). Here we model the frequency ``y = ClaimNb / Exposure``,
-# which is still a (scaled) Poisson distribution.
-#
+# which is still a (scaled) Poisson distribution, and use ``Exposure`` as
+# `sample_weight`.
 
 df["Frequency"] = df.ClaimNb / df.Exposure
 
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 24451069eeb68..f866518b69db8 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -80,7 +80,7 @@ def load_mtpl2(n_samples=100000):
 
 
 def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
-                  title=None, ax=None):
+                  title=None, ax=None, fill_legend=False):
     """Plot observed and predicted - aggregated per feature level.
 
     Parameters
@@ -95,6 +95,8 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
         a column name of df with the observed target
     predicted : frame
         a dataframe, with the same index as df, with the predicted target
+    fill_legend : bool, default=False
+        wgether to show fill_between legend
     """
     # aggregate observed and predicted variables by feature level
     df_ = df.loc[:, [feature, weight]].copy()
@@ -109,13 +111,15 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 
     ax = df_.loc[:, ["observed", "predicted"]].plot(style=".", ax=ax)
     y_max = df_.loc[:, ["observed", "predicted"]].values.max() * 0.8
-    ax.fill_between(
+    p2 = ax.fill_between(
         df_.index,
         0,
         y_max * df_[weight] / df_[weight].values.max(),
         color="g",
         alpha=0.1,
     )
+    if fill_legend:
+        ax.legend([p2], ["{} distribution".format(feature)])
     ax.set(
         ylabel=y_label if y_label is not None else None,
         title=title if title is not None else "Train: Observed vs Predicted",
@@ -132,7 +136,7 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 # containing the claim amount (``ClaimAmount``) for the same policy ids
 # (``IDpol``).
 
-df = load_mtpl2(n_samples=100000)
+df = load_mtpl2(n_samples=60000)
 
 # Note: filter out claims with zero amount, as the severity model
 # requires a strictly positive target values.
@@ -180,9 +184,10 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 # as a Poisson distribution. It is then assumed to be the number of discrete
 # events occuring with a constant rate in a given time interval (``Exposure``).
 # Here we model the frequency ``y = ClaimNb / Exposure``,
-# which is still a (scaled) Poisson distribution.
+# which is still a (scaled) Poisson distribution, and use ``Exposure`` as
+# `sample_weight`.
 
-df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=2)
+df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=0)
 
 # Some of the features are colinear, we use a weak penalization to avoid
 # numerical issues.
@@ -200,6 +205,7 @@ def mean_deviance(estimator, y, y_pred, weights):
 def score_estimator(
     estimator, X_train, X_test, df_train, df_test, target, weights
 ):
+    """Evaluate an estimator on train and test sets with different metrics"""
     res = []
 
     for subset_label, X, df in [
@@ -282,6 +288,7 @@ def score_estimator(
     y_label="Claim Frequency",
     title="test data",
     ax=ax[0, 1],
+    fill_legend=True
 )
 
 plot_obs_pred(
@@ -293,6 +300,7 @@ def score_estimator(
     y_label="Claim Frequency",
     title="test data",
     ax=ax[1, 0],
+    fill_legend=True
 )
 
 plot_obs_pred(
@@ -304,11 +312,17 @@ def score_estimator(
     y_label="Claim Frequency",
     title="test data",
     ax=ax[1, 1],
+    fill_legend=True
 )
 
 
 ##############################################################################
 #
+# According to the observed data, the frequency of accidents is higher for
+# drivers younger than 30 years old, and it positively correlated with the
+# `BonusMalus` variable. Out model is able to mostly correctly model
+# this behaviour.
+#
 # 3. Severity model -  Gamma Distribution
 # ---------------------------------------
 # The mean claim amount or severity (`AvgClaimAmount`) can be empirically
@@ -392,11 +406,15 @@ def score_estimator(
     y_label="Average Claim Severity",
     title="test data",
     ax=ax[1],
+    fill_legend=True
 )
 
 
 ##############################################################################
 #
+# Overall the drivers age (``DrivAge``) has a weak impact on the claim
+# severity, both in observed and predicted data.
+#
 # 4. Total Claims Amount -- Compound Poisson distribution
 # -------------------------------------------------------
 #
@@ -517,3 +535,5 @@ def score(self, X, y, sample_weight=None):
     )
 
 print(pd.DataFrame(res).set_index("subset").T)
+
+plt.show()

From 5e14928507f04893d131942c57e566c5a5789517 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 28 Aug 2019 12:35:28 +0300
Subject: [PATCH 151/269] Drop unused ExponentialDispersionModel._upper_bound

---
 sklearn/linear_model/_glm/distribution.py            | 7 +------
 sklearn/linear_model/_glm/tests/test_distribution.py | 5 -----
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index 8779cd616cc5a..e3dfd1fcd3cf5 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -63,11 +63,7 @@ def in_y_range(self, y):
         y : array, shape (n_samples,)
             Target values.
         """
-        if hasattr(self, '_upper_bound'):
-            # All currently supported distributions have an upper bound at
-            # +inf, however this may need to be implemented for other
-            # distributions
-            raise NotImplementedError
+        # Note that currently supported distributions have +inf upper bound
 
         if not isinstance(self._lower_bound, DistributionBoundary):
             raise TypeError('_lower_bound attribute must be of type '
@@ -236,7 +232,6 @@ class TweedieDistribution(ExponentialDispersionModel):
             For ``0<power<1``, no distribution exists.
     """
     def __init__(self, power=0):
-        # validate power and set _upper_bound, _include_upper_bound attrs
         self.power = power
 
     @property
diff --git a/sklearn/linear_model/_glm/tests/test_distribution.py b/sklearn/linear_model/_glm/tests/test_distribution.py
index ed81c8328c87a..fa45a972f6af1 100644
--- a/sklearn/linear_model/_glm/tests/test_distribution.py
+++ b/sklearn/linear_model/_glm/tests/test_distribution.py
@@ -38,11 +38,6 @@ def test_invalid_distribution_bound():
                        match="must be of type DistributionBoundary"):
         dist.in_y_range([-1, 0, 1])
 
-    dist = TweedieDistribution()
-    dist._upper_bound = None
-    with pytest.raises(NotImplementedError):
-        dist.in_y_range([-1, 0, 1])
-
 
 def test_tweedie_distribution_power():
     msg = "distribution is only defined for power<=0 and power>=1"

From 6cc1df53361827053be2655982d9606a2f372843 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 28 Aug 2019 13:10:55 +0300
Subject: [PATCH 152/269] Move notes and references from docstrings to user
 manual

---
 doc/modules/linear_model.rst     |  12 ++++
 sklearn/linear_model/_glm/glm.py | 117 -------------------------------
 2 files changed, 12 insertions(+), 117 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index fc92cd2002948..b127a2c345a36 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -986,6 +986,18 @@ of the unit variance function,
      :math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values
      together with :math:`s=\mathrm{exposure}` as sample weights. This is done
      in both examples linked below.
+   * The fit itself does not need Y to be from an EDM, but only assumes
+     the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
+     :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`.
+   * If the target y is a ratio, appropriate sample weights s should be
+     provided.
+     As an example, consider Poisson distributed counts z (integers) and
+     weights s=exposure (time, money, persons years, ...). Then you fit
+     y = z/s, i.e. ``PoissonRegressor.fit(X, y, sample_weight=s)``.
+     The weights are necessary for the right (finite sample) mean.
+     Consider :math:`\\bar{y} = \\frac{\\sum_i s_i y_i}{\\sum_i s_i}`,
+     in this case one might say that y has a 'scaled' Poisson distributions.
+     The same holds for other distributions.
 
 The estimator can be used as follows::
 
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index a7128eb78df07..826f5924776b3 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -114,43 +114,6 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     n_iter_ : int
         Actual number of iterations used in solver.
-
-    Notes
-    -----
-    The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
-    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
-    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
-    :ref:`User Guide <Generalized_linear_regression>`.
-
-    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
-    minimizing the deviance plus penalty term, which is equivalent to
-    (penalized) maximum likelihood estimation.
-
-    For alpha > 0, the feature matrix X should be standardized in order to
-    penalize features equally strong. Call
-    :class:`sklearn.preprocessing.StandardScaler` before calling ``fit``.
-
-    If the target y is a ratio, appropriate sample weights s should be
-    provided.
-    As an example, consider Poisson distributed counts z (integers) and
-    weights s=exposure (time, money, persons years, ...). Then you fit
-    y = z/s, i.e. ``GeneralizedLinearModel(family='poisson').fit(X, y,
-    sample_weight=s)``. The weights are necessary for the right (finite
-    sample) mean.
-    Consider :math:`\\bar{y} = \\frac{\\sum_i s_i y_i}{\\sum_i s_i}`,
-    in this case one might say that y has a 'scaled' Poisson distributions.
-    The same holds for other distributions.
-
-    References
-    ----------
-    .. McCullagh, Peter; Nelder, John (1989). Generalized Linear Models,
-       Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
-
-    .. Jørgensen, B. (1992). The theory of exponential dispersion models
-       and analysis of deviance. Monografias de matemática, no. 51.  See also
-       `Exponential dispersion model.
-       <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
     """
     def __init__(self, alpha=1.0,
                  fit_intercept=True, family='normal', link='auto',
@@ -487,40 +450,6 @@ class PoissonRegressor(GeneralizedLinearRegressor):
 
     n_iter_ : int
         Actual number of iterations used in solver.
-
-    Notes
-    -----
-    The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
-    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
-    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
-    :ref:`User Guide <Generalized_linear_regression>`.
-
-    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
-    minimizing the deviance plus penalty term, which is equivalent to
-    (penalized) maximum likelihood estimation.
-
-    For alpha > 0, the feature matrix X should be standardized in order to
-    penalize features equally strong.
-
-    If the target y is a ratio, appropriate sample weights s should be
-    provided.
-    As an example, consider Poisson distributed counts z (integers) and
-    weights s=exposure (time, money, persons years, ...). Then you fit
-    y = z/s, i.e. ``PoissonRegressor().fit(X, y, sample_weight=s)``.
-    The weights are necessary for the right (finite sample) mean.
-    Consider :math:`\\bar{y} = \\frac{\\sum_i s_i y_i}{\\sum_i s_i}`,
-    in this case one might say that y has a 'scaled' Poisson distributions.
-
-    References
-    ----------
-    .. McCullagh, Peter; Nelder, John (1989). Generalized Linear Models,
-       Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
-
-    .. Jørgensen, B. (1992). The theory of exponential dispersion models
-       and analysis of deviance. Monografias de matemática, no. 51.  See also
-       `Exponential dispersion model.
-       <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
     """
     def __init__(self, alpha=1.0, fit_intercept=True, link='log',
                  solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
@@ -605,29 +534,6 @@ class GammaRegressor(GeneralizedLinearRegressor):
 
     n_iter_ : int
         Actual number of iterations used in solver.
-
-    Notes
-    -----
-    The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
-    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
-    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
-    :ref:`User Guide <Generalized_linear_regression>`.
-
-    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
-    minimizing the deviance plus penalty term, which is equivalent to
-    (penalized) maximum likelihood estimation.
-
-
-    References
-    ----------
-    .. McCullagh, Peter; Nelder, John (1989). Generalized Linear Models,
-       Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
-
-    .. Jørgensen, B. (1992). The theory of exponential dispersion models
-       and analysis of deviance. Monografias de matemática, no. 51.  See also
-       `Exponential dispersion model.
-       <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
     """
     def __init__(self, alpha=1.0, fit_intercept=True, link='log',
                  solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
@@ -732,29 +638,6 @@ class TweedieRegressor(GeneralizedLinearRegressor):
 
     n_iter_ : int
         Actual number of iterations used in solver.
-
-    Notes
-    -----
-    The fit itself does not need Y to be from an EDM, but only assumes
-    the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
-    :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`. The unit variance function
-    :math:`v(\\mu_i)` is a property of and given by the specific EDM, see
-    :ref:`User Guide <Generalized_linear_regression>`.
-
-    The parameters :math:`w` (`coef_` and `intercept_`) are estimated by
-    minimizing the deviance plus penalty term, which is equivalent to
-    (penalized) maximum likelihood estimation.
-
-
-    References
-    ----------
-    .. McCullagh, Peter; Nelder, John (1989). Generalized Linear Models,
-       Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
-
-    .. Jørgensen, B. (1992). The theory of exponential dispersion models
-       and analysis of deviance. Monografias de matemática, no. 51.  See also
-       `Exponential dispersion model.
-       <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
     """
     def __init__(self, power=0.0, alpha=1.0, fit_intercept=True, link='log',
                  solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,

From 752d6aa90bc034426b0348a4fd12f0fc3421027d Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 28 Aug 2019 14:01:04 +0300
Subject: [PATCH 153/269] More explanatory comments in the code

---
 sklearn/linear_model/_glm/glm.py            | 22 +++++++++++++++++----
 sklearn/linear_model/_glm/tests/test_glm.py |  3 +++
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 826f5924776b3..54560dbae0867 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -462,6 +462,10 @@ def __init__(self, alpha=1.0, fit_intercept=True, link='log',
 
     @property
     def family(self):
+        # We use a property with a setter, since the GLM solver relies
+        # on self.family attribute, but we can't set it in __init__ according
+        # to scikit-learn API constraints. This attribute is made read-only
+        # to disallow changing distribution to other than Poisson.
         return "poisson"
 
     @family.setter
@@ -546,6 +550,10 @@ def __init__(self, alpha=1.0, fit_intercept=True, link='log',
 
     @property
     def family(self):
+        # We use a property with a setter, since the GLM solver relies
+        # on self.family attribute, but we can't set it in __init__ according
+        # to scikit-learn API constraints. This attribute is made read-only
+        # to disallow changing distribution to other than Gamma.
         return "gamma"
 
     @family.setter
@@ -571,7 +579,10 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     Parameters
     ----------
     power : float (default=0)
-            The variance power: :math:`v(\\mu) = \\mu^{power}`.
+            The power determines the underlying target distribution. By
+            definition it links distribution variance (:math:`v`) and
+            mean (:math:`\\mu`): :math:`v(\\mu) = \\mu^{power}`.
+
             For ``0<power<1``, no distribution exists.
 
             Special cases are:
@@ -629,9 +640,8 @@ class TweedieRegressor(GeneralizedLinearRegressor):
 
     Attributes
     ----------
-    coef_ : array, shape (n_features,)
-        Estimated coefficients for the linear predictor (X*coef_+intercept_) in
-        the GLM.
+    coef_ : array, shape (n_features,) Estimated coefficients for the linear
+        predictor (X*coef_+intercept_) in the GLM.
 
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.
@@ -650,6 +660,10 @@ def __init__(self, power=0.0, alpha=1.0, fit_intercept=True, link='log',
 
     @property
     def family(self):
+        # We use a property with a setter, since the GLM solver relies
+        # on self.family attribute, but we can't set it in __init__ according
+        # to scikit-learn API constraints. This also ensures that self.power
+        # and self.family.power are identical by construction.
         dist = TweedieDistribution(power=self.power)
         # TODO: make the returned object immutable
         return dist
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 879d382ce28dc..93d7ee1f08324 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -230,6 +230,9 @@ def test_warm_start(fit_intercept):
     assert glm1.score(X, y) > glm2.score(X, y)
     glm2.set_params(max_iter=1000)
     glm2.fit(X, y)
+    # The two model are not exactly identical since the lbfgs solver
+    # computes the approximate hessian from previous iterations, which
+    # will not be strictly identical in the case of a warm start.
     assert_allclose(glm1.coef_, glm2.coef_, rtol=1e-5)
     assert_allclose(glm1.score(X, y), glm2.score(X, y), rtol=1e-4)
 

From 38a4ad4e44ed4c4b27c48306afc74b85710dd889 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 28 Aug 2019 14:18:43 +0300
Subject: [PATCH 154/269] Fix requires_positive_y tag

---
 sklearn/linear_model/_glm/glm.py            | 11 ++++++++++-
 sklearn/linear_model/_glm/tests/test_glm.py | 13 +++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 54560dbae0867..7c518bb3f8149 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -383,7 +383,16 @@ def score(self, X, y, sample_weight=None):
         return 1 - dev / dev_null
 
     def _more_tags(self):
-        return {"requires_positive_y": True}
+        # create the _family_instance if fit wasn't called yet.
+        if hasattr(self, '_family_instance'):
+            _family_instance = self._family_instance
+        elif isinstance(self.family, ExponentialDispersionModel):
+            _family_instance = self.family
+        elif self.family in EDM_DISTRIBUTIONS:
+            _family_instance = EDM_DISTRIBUTIONS[self.family]()
+        else:
+            raise ValueError
+        return {"requires_positive_y": not _family_instance.in_y_range(-1.0)}
 
 
 class PoissonRegressor(GeneralizedLinearRegressor):
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 93d7ee1f08324..898d3c4edf9c0 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -340,3 +340,16 @@ def test_tweedie_regression_family(regression_data):
 
     # TODO: the following should not be allowed
     # est.family.power = 2
+
+
+@pytest.mark.parametrize(
+        'estimator, value',
+        [
+            (PoissonRegressor(), True),
+            (GammaRegressor(), True),
+            (TweedieRegressor(power=1.5), True),
+            (TweedieRegressor(power=0), False)
+        ],
+)
+def test_tags(estimator, value):
+    assert estimator._get_tags()['requires_positive_y'] is value

From c15a1cc573a555cc1a2e35c0063f6795cc2f7d84 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 28 Aug 2019 14:56:06 +0300
Subject: [PATCH 155/269] Remove Link.inverse_derivative2

---
 doc/modules/linear_model.rst                 |  4 ++--
 sklearn/linear_model/_glm/link.py            | 21 --------------------
 sklearn/linear_model/_glm/tests/test_link.py |  7 -------
 3 files changed, 2 insertions(+), 30 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index b127a2c345a36..622ab335059ab 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -919,8 +919,8 @@ The unit deviance is defined by the log of the :math:`\mathrm{EDM}(\mu, \phi)`
 likelihood as
 
 .. math::     d(y, \mu) = -2\phi\cdot
-              \left(loglike(y,\mu,\phi)
-              - loglike(y,y,\phi)\right)
+              \left( log P(y|\mu,\phi)
+              - log P(y|y,\phi)\right)
 
 The following table lists some specific EDM distributions—all are Tweedie
 distributions—and some of their properties.
diff --git a/sklearn/linear_model/_glm/link.py b/sklearn/linear_model/_glm/link.py
index 04b485c067cca..b257036ac4727 100644
--- a/sklearn/linear_model/_glm/link.py
+++ b/sklearn/linear_model/_glm/link.py
@@ -64,17 +64,6 @@ def inverse_derivative(self, lin_pred):
         """
         pass  # pragma: no cover
 
-    @abstractmethod
-    def inverse_derivative2(self, lin_pred):
-        """Compute 2nd derivative of the inverse link function h''(lin_pred).
-
-        Parameters
-        ----------
-        lin_pred : array, shape (n_samples,)
-            Usually the (fitted) linear predictor.
-        """
-        pass  # pragma: no cover
-
 
 class IdentityLink(Link):
     """The identity link function g(x)=x."""
@@ -91,9 +80,6 @@ def inverse(self, lin_pred):
     def inverse_derivative(self, lin_pred):
         return np.ones_like(lin_pred)
 
-    def inverse_derivative2(self, lin_pred):
-        return np.zeros_like(lin_pred)
-
 
 class LogLink(Link):
     """The log link function g(x)=log(x)."""
@@ -110,9 +96,6 @@ def inverse(self, lin_pred):
     def inverse_derivative(self, lin_pred):
         return np.exp(lin_pred)
 
-    def inverse_derivative2(self, lin_pred):
-        return np.exp(lin_pred)
-
 
 class LogitLink(Link):
     """The logit link function g(x)=logit(x)."""
@@ -129,7 +112,3 @@ def inverse(self, lin_pred):
     def inverse_derivative(self, lin_pred):
         ep = expit(lin_pred)
         return ep * (1 - ep)
-
-    def inverse_derivative2(self, lin_pred):
-        ep = expit(lin_pred)
-        return ep * (1 - ep) * (1 - 2 * ep)
diff --git a/sklearn/linear_model/_glm/tests/test_link.py b/sklearn/linear_model/_glm/tests/test_link.py
index a631509baca79..36219e09b58e3 100644
--- a/sklearn/linear_model/_glm/tests/test_link.py
+++ b/sklearn/linear_model/_glm/tests/test_link.py
@@ -30,10 +30,3 @@ def test_link_properties(link):
     # g = link, h = link.inverse
     assert_allclose(link.derivative(link.inverse(x)),
                     1 / link.inverse_derivative(x))
-
-    assert (
-      link.inverse_derivative2(x).shape == link.inverse_derivative(x).shape)
-
-    # for LogitLink, in the following x should be between 0 and 1
-    # assert_almost_equal(link.inverse_derivative(link(x)),
-    #                     1 / link.derivative(x), decimal=decimal)

From 37de07b7b78fe7ced78da6ed068fde3e4a08425b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 30 Aug 2019 12:41:44 +0300
Subject: [PATCH 156/269] Rename p to power parameter in mean_tweedie_deviance

---
 doc/modules/model_evaluation.rst         | 45 ++++++++---------
 sklearn/metrics/regression.py            | 25 +++++-----
 sklearn/metrics/tests/test_regression.py | 61 ++++++++++++------------
 3 files changed, 67 insertions(+), 64 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 2db5053e08cce..1fe8ece51df67 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -2028,14 +2028,14 @@ Mean Poisson, Gamma, and Tweedie deviances
 The :func:`mean_tweedie_deviance` function computes the `mean Tweedie
 deviance error
 <https://en.wikipedia.org/wiki/Tweedie_distribution#The_Tweedie_deviance>`_
-with power parameter `p`. This is a metric that elicits predicted expectation
+with a ``power`` parameter. This is a metric that elicits predicted expectation
 values of regression targets.
 
 Following special cases exist,
 
-- when `p=0` it is equivalent to :func:`mean_squared_error`.
-- when `p=1` it is equivalent to :func:`mean_poisson_deviance`.
-- when `p=2` it is equivalent to :func:`mean_gamma_deviance`.
+- when ``power=0`` it is equivalent to :func:`mean_squared_error`.
+- when ``power=1`` it is equivalent to :func:`mean_poisson_deviance`.
+- when ``power=2`` it is equivalent to :func:`mean_gamma_deviance`.
 
 If :math:`\hat{y}_i` is the predicted value of the :math:`i`-th sample,
 and :math:`y_i` is the corresponding true value, then the mean Tweedie
@@ -2046,48 +2046,49 @@ deviance error (D) estimated over :math:`n_{\text{samples}}` is defined as
   \text{D}(y, \hat{y}) = \frac{1}{n_\text{samples}}
   \sum_{i=0}^{n_\text{samples} - 1}
   \begin{cases}
-  (y_i-\hat{y}_i)^2, & \text{for }p=0\text{ (Normal)}\\
-  2(y_i \log(y/\hat{y}_i) + \hat{y}_i - y_i),  & \text{for }p=1\text{ (Poisson)}\\
-  2(\log(\hat{y}_i/y_i) + y_i/\hat{y}_i - 1),  & \text{for }p=2\text{ (Gamma)}\\
+  (y_i-\hat{y}_i)^2, & \text{for }\text{power}=0\text{ (Normal)}\\
+  2(y_i \log(y/\hat{y}_i) + \hat{y}_i - y_i),  & \text{for power}=1\text{ (Poisson)}\\
+  2(\log(\hat{y}_i/y_i) + y_i/\hat{y}_i - 1),  & \text{for power}=2\text{ (Gamma)}\\
   2\left(\frac{\max(y_i,0)^{2-p}}{(1-p)(2-p)}-
   \frac{y\,\hat{y}^{1-p}_i}{1-p}+\frac{\hat{y}^{2-p}_i}{2-p}\right),
   & \text{otherwise}
   \end{cases}
 
-Tweedie deviance is a homogeneous function of degree ``2-p``.
-Thus, Gamma distribution with `p=2` means that simultaneously scaling `y_true`
-and `y_pred` has no effect on the deviance. For Poisson distribution `p=1`
-the deviance scales linearly, and for Normal distribution (`p=0`),
-quadratically.  In general, the higher `p` the less weight is given to extreme
-deviations between true and predicted targets.
+Tweedie deviance is a homogeneous function of degree ``2-power``.
+Thus, Gamma distribution with ``power=2`` means that simultaneously scaling
+``y_true`` and ``y_pred`` has no effect on the deviance. For Poisson
+distribution ``power=1`` the deviance scales linearly, and for Normal
+distribution (``power=0``), quadratically.  In general, the higher
+``power`` the less weight is given to extreme deviations between true
+and predicted targets.
 
 For instance, let's compare the two predictions 1.0 and 100 that are both
 50% of their corresponding true value.
 
-The mean squared error (``p=0``) is very sensitive to the
+The mean squared error (``power=0``) is very sensitive to the
 prediction difference of the second point,::
 
     >>> from sklearn.metrics import mean_tweedie_deviance
-    >>> mean_tweedie_deviance([1.0], [1.5], p=0)
+    >>> mean_tweedie_deviance([1.0], [1.5], power=0)
     0.25
-    >>> mean_tweedie_deviance([100.], [150.], p=0)
+    >>> mean_tweedie_deviance([100.], [150.], power=0)
     2500.0
 
 If we increase ``p`` to 1,::
 
-    >>> mean_tweedie_deviance([1.0], [1.5], p=1)
+    >>> mean_tweedie_deviance([1.0], [1.5], power=1)
     0.18...
-    >>> mean_tweedie_deviance([100.], [150.], p=1)
+    >>> mean_tweedie_deviance([100.], [150.], power=1)
     18.9...
 
-the difference in errors decreases. Finally, by setting, ``p=2``::
+the difference in errors decreases. Finally, by setting, ``power=2``::
 
-    >>> mean_tweedie_deviance([1.0], [1.5], p=2)
+    >>> mean_tweedie_deviance([1.0], [1.5], power=2)
     0.14...
-    >>> mean_tweedie_deviance([100.], [150.], p=2)
+    >>> mean_tweedie_deviance([100.], [150.], power=2)
     0.14...
 
-we would get identical errors. The deviance when `p=2` is thus only
+we would get identical errors. The deviance when ``power=2`` is thus only
 sensitive to relative errors.
 
 .. _clustering_metrics:
diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
index a1ae25fec278c..73db0acc945e6 100644
--- a/sklearn/metrics/regression.py
+++ b/sklearn/metrics/regression.py
@@ -624,7 +624,7 @@ def max_error(y_true, y_pred):
     return np.max(np.abs(y_true - y_pred))
 
 
-def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, p=0):
+def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, power=0):
     """Mean Tweedie deviance regression loss.
 
     Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
@@ -640,20 +640,21 @@ def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, p=0):
     sample_weight : array-like, shape (n_samples,), optional
         Sample weights.
 
-    p : float, optional
-        Tweedie power parameter. Either p <= 0 or p >= 1.
+    power : float, default=0
+        Tweedie power parameter. Either power <= 0 or power >= 1.
 
         The higher `p` the less weight is given to extreme
         deviations between true and predicted targets.
 
-        - p < 0: Extreme stable distribution. Requires: y_pred > 0.
-        - p = 0 : Normal distribution, output corresponds to
+        - power < 0: Extreme stable distribution. Requires: y_pred > 0.
+        - power = 0 : Normal distribution, output corresponds to
           mean_squared_error. y_true and y_pred can be any real numbers.
-        - p = 1 : Poisson distribution. Requires: y_true >= 0 and y_pred > 0.
+        - power = 1 : Poisson distribution. Requires: y_true >= 0 and
+          y_pred > 0.
         - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0
           and y_pred > 0.
-        - p = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.
-        - p = 3 : Inverse Gaussian distribution. Requires: y_true > 0
+        - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.
+        - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0
           and y_pred > 0.
         - otherwise : Positive stable distribution. Requires: y_true > 0
           and y_pred > 0.
@@ -668,7 +669,7 @@ def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, p=0):
     >>> from sklearn.metrics import mean_tweedie_deviance
     >>> y_true = [2, 0, 1, 4]
     >>> y_pred = [0.5, 0.5, 2., 2.]
-    >>> mean_tweedie_deviance(y_true, y_pred, p=1)
+    >>> mean_tweedie_deviance(y_true, y_pred, power=1)
     1.4260...
     """
     from ..linear_model._glm.distribution import TweedieDistribution
@@ -682,7 +683,7 @@ def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, p=0):
         sample_weight = column_or_1d(sample_weight)
         sample_weight = sample_weight[:, np.newaxis]
 
-    dist = TweedieDistribution(power=p)
+    dist = TweedieDistribution(power=power)
     dev = dist.unit_deviance(y_true, y_pred, check_input=True)
 
     return np.average(dev, weights=sample_weight)
@@ -721,7 +722,7 @@ def mean_poisson_deviance(y_true, y_pred, sample_weight=None):
     1.4260...
     """
     return mean_tweedie_deviance(
-        y_true, y_pred, sample_weight=sample_weight, p=1
+        y_true, y_pred, sample_weight=sample_weight, power=1
     )
 
 
@@ -759,5 +760,5 @@ def mean_gamma_deviance(y_true, y_pred, sample_weight=None):
     1.0568...
     """
     return mean_tweedie_deviance(
-        y_true, y_pred, sample_weight=sample_weight, p=2
+        y_true, y_pred, sample_weight=sample_weight, power=2
     )
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index 6dc31676357f5..0f987a088bb84 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -36,7 +36,7 @@ def test_regression_metrics(n_samples=50):
     assert_almost_equal(max_error(y_true, y_pred), 1.)
     assert_almost_equal(r2_score(y_true, y_pred),  0.995, 2)
     assert_almost_equal(explained_variance_score(y_true, y_pred), 1.)
-    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, p=0),
+    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=0),
                         mean_squared_error(y_true, y_pred))
 
     # Tweedie deviance needs positive y_pred, except for p=0,
@@ -45,15 +45,15 @@ def test_regression_metrics(n_samples=50):
     y_true = np.arange(1, 1 + n_samples)
     y_pred = 2 * y_true
     n = n_samples
-    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, p=-1),
+    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=-1),
                         5/12 * n * (n**2 + 2 * n + 1))
-    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, p=1),
+    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=1),
                         (n + 1) * (1 - np.log(2)))
-    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, p=2),
+    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=2),
                         2 * np.log(2) - 1)
-    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, p=3/2),
+    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=3/2),
                         ((6 * np.sqrt(2) - 8) / n) * np.sqrt(y_true).sum())
-    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, p=3),
+    assert_almost_equal(mean_tweedie_deviance(y_true, y_pred, power=3),
                         np.sum(1 / y_true) / (4 * n))
 
 
@@ -101,40 +101,41 @@ def test_regression_metrics_at_limits():
                         mean_squared_log_error, [1., -2., 3.], [1., 2., 3.])
 
     # Tweedie deviance error
-    p = -1.2
-    assert_allclose(mean_tweedie_deviance([0], [1.], p=p),
-                    2./(2.-p), rtol=1e-3)
+    power = -1.2
+    assert_allclose(mean_tweedie_deviance([0], [1.], power=power),
+                    2 / (2 - power), rtol=1e-3)
     with pytest.raises(ValueError,
                        match="can only be used on strictly positive mu."):
-        mean_tweedie_deviance([0.], [0.], p=p)
-    assert_almost_equal(mean_tweedie_deviance([0.], [0.], p=0), 0.00, 2)
+        mean_tweedie_deviance([0.], [0.], power=power)
+    assert_almost_equal(mean_tweedie_deviance([0.], [0.], power=0), 0.00, 2)
 
     msg = "only be used on non-negative y and strictly positive mu."
     with pytest.raises(ValueError, match=msg):
-        mean_tweedie_deviance([0.], [0.], p=1.0)
+        mean_tweedie_deviance([0.], [0.], power=1.0)
 
-    p = 1.5
-    assert_allclose(mean_tweedie_deviance([0.], [1.], p=p), 2./(2.-p))
+    power = 1.5
+    assert_allclose(mean_tweedie_deviance([0.], [1.], power=power),
+                    2 / (2 - power))
     msg = "only be used on non-negative y and strictly positive mu."
     with pytest.raises(ValueError, match=msg):
-        mean_tweedie_deviance([0.], [0.], p=p)
-    p = 2.
-    assert_allclose(mean_tweedie_deviance([1.], [1.], p=p), 0.00,
+        mean_tweedie_deviance([0.], [0.], power=power)
+    power = 2.
+    assert_allclose(mean_tweedie_deviance([1.], [1.], power=power), 0.00,
                     atol=1e-8)
     msg = "can only be used on strictly positive y and mu."
     with pytest.raises(ValueError, match=msg):
-        mean_tweedie_deviance([0.], [0.], p=p)
-    p = 3.
-    assert_allclose(mean_tweedie_deviance([1.], [1.], p=p),
+        mean_tweedie_deviance([0.], [0.], power=power)
+    power = 3.
+    assert_allclose(mean_tweedie_deviance([1.], [1.], power=power),
                     0.00, atol=1e-8)
 
     msg = "can only be used on strictly positive y and mu."
     with pytest.raises(ValueError, match=msg):
-        mean_tweedie_deviance([0.], [0.], p=p)
+        mean_tweedie_deviance([0.], [0.], power=power)
 
     with pytest.raises(ValueError,
                        match="is only defined for power<=0 and power>=1"):
-        mean_tweedie_deviance([0.], [0.], p=0.5)
+        mean_tweedie_deviance([0.], [0.], power=0.5)
 
 
 def test__check_reg_targets():
@@ -274,21 +275,21 @@ def test_tweedie_deviance_continuity():
     y_true = np.random.RandomState(0).rand(n_samples) + 0.1
     y_pred = np.random.RandomState(1).rand(n_samples) + 0.1
 
-    assert_allclose(mean_tweedie_deviance(y_true, y_pred, p=0 - 1e-10),
-                    mean_tweedie_deviance(y_true, y_pred, p=0))
+    assert_allclose(mean_tweedie_deviance(y_true, y_pred, power=0 - 1e-10),
+                    mean_tweedie_deviance(y_true, y_pred, power=0))
 
     # Ws we get closer to the limit, with 1e-12 difference the absolute
     # tolerance to pass the below check increases. There are likely
     # numerical precision issues on the edges of different definition
     # regions.
-    assert_allclose(mean_tweedie_deviance(y_true, y_pred, p=1 + 1e-10),
-                    mean_tweedie_deviance(y_true, y_pred, p=1),
+    assert_allclose(mean_tweedie_deviance(y_true, y_pred, power=1 + 1e-10),
+                    mean_tweedie_deviance(y_true, y_pred, power=1),
                     atol=1e-6)
 
-    assert_allclose(mean_tweedie_deviance(y_true, y_pred, p=2 - 1e-10),
-                    mean_tweedie_deviance(y_true, y_pred, p=2),
+    assert_allclose(mean_tweedie_deviance(y_true, y_pred, power=2 - 1e-10),
+                    mean_tweedie_deviance(y_true, y_pred, power=2),
                     atol=1e-6)
 
-    assert_allclose(mean_tweedie_deviance(y_true, y_pred, p=2 + 1e-10),
-                    mean_tweedie_deviance(y_true, y_pred, p=2),
+    assert_allclose(mean_tweedie_deviance(y_true, y_pred, power=2 + 1e-10),
+                    mean_tweedie_deviance(y_true, y_pred, power=2),
                     atol=1e-6)

From adbf997ba53d68e53a8d5997ffecd98ce4e13863 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 30 Aug 2019 12:07:41 +0200
Subject: [PATCH 157/269] Rename predicted mean mu to y_pred

---
 sklearn/linear_model/_glm/distribution.py     | 173 +++++++++---------
 sklearn/linear_model/_glm/glm.py              |  37 ++--
 sklearn/linear_model/_glm/link.py             |  42 ++---
 .../_glm/tests/test_distribution.py           |   4 +-
 4 files changed, 132 insertions(+), 124 deletions(-)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index e3dfd1fcd3cf5..a5e42bcee5d1c 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -28,17 +28,17 @@ def _safe_lin_pred(X, coef):
 class ExponentialDispersionModel(metaclass=ABCMeta):
     r"""Base class for reproductive Exponential Dispersion Models (EDM).
 
-    The pdf of :math:`Y\sim \mathrm{EDM}(\mu, \phi)` is given by
+    The pdf of :math:`Y\sim \mathrm{EDM}(y_\textrm{pred}, \phi)` is given by
 
     .. math:: p(y| \theta, \phi) = c(y, \phi)
         \exp\left(\frac{\theta y-A(\theta)}{\phi}\right)
         = \tilde{c}(y, \phi)
-            \exp\left(-\frac{d(y, \mu)}{2\phi}\right)
+            \exp\left(-\frac{d(y, y_\textrm{pred})}{2\phi}\right)
 
-    with mean :math:`\mathrm{E}[Y] = A'(\theta) = \mu`,
-    variance :math:`\mathrm{Var}[Y] = \phi \cdot v(\mu)`,
-    unit variance :math:`v(\mu)` and
-    unit deviance :math:`d(y,\mu)`.
+    with mean :math:`\mathrm{E}[Y] = A'(\theta) = y_\textrm{pred}`,
+    variance :math:`\mathrm{Var}[Y] = \phi \cdot v(y_\textrm{pred})`,
+    unit variance :math:`v(y_\textrm{pred})` and
+    unit deviance :math:`d(y,y_\textrm{pred})`.
 
     Methods
     -------
@@ -75,58 +75,60 @@ def in_y_range(self, y):
             return np.greater(y, self._lower_bound.value)
 
     @abstractmethod
-    def unit_variance(self, mu):
+    def unit_variance(self, y_pred):
         r"""Compute the unit variance function.
 
-        The unit variance :math:`v(\mu)` determines the variance as
-        a function of the mean :math:`\mu` by
-        :math:`\mathrm{Var}[Y_i] = \phi/s_i*v(\mu_i)`.
-        It can also be derived from the unit deviance :math:`d(y,\mu)` as
+        The unit variance :math:`v(y_\textrm{pred})` determines the variance as
+        a function of the mean :math:`y_\textrm{pred}` by
+        :math:`\mathrm{Var}[Y_i] = \phi/s_i*v(y_\textrm{pred}_i)`.
+        It can also be derived from the unit deviance
+        :math:`d(y,y_\textrm{pred})` as
 
-        .. math:: v(\mu) = \frac{2}{\frac{\partial^2 d(y,\mu)}{
-            \partial\mu^2}}\big|_{y=\mu}
+        .. math:: v(y_\textrm{pred}) = \frac{2}{
+            \frac{\partial^2 d(y,y_\textrm{pred})}{
+            \partialy_\textrm{pred}^2}}\big|_{y=y_\textrm{pred}}
 
         See also :func:`variance`.
 
         Parameters
         ----------
-        mu : array, shape (n_samples,)
+        y_pred : array, shape (n_samples,)
             Predicted mean.
         """
         pass  # pragma: no cover
 
     @abstractmethod
-    def unit_variance_derivative(self, mu):
-        r"""Compute the derivative of the unit variance w.r.t. mu.
+    def unit_variance_derivative(self, y_pred):
+        r"""Compute the derivative of the unit variance w.r.t. y_pred.
 
-        Return :math:`v'(\mu)`.
+        Return :math:`v'(y_\textrm{pred})`.
 
         Parameters
         ----------
-        mu : array, shape (n_samples,)
+        y_pred : array, shape (n_samples,)
             Target values.
         """
         pass  # pragma: no cover
 
     @abstractmethod
-    def unit_deviance(self, y, mu, check_input=False):
+    def unit_deviance(self, y, y_pred, check_input=False):
         r"""Compute the unit deviance.
 
-        The unit_deviance :math:`d(y,\mu)` can be defined by the
+        The unit_deviance :math:`d(y,y_\textrm{pred})` can be defined by the
         log-likelihood as
-        :math:`d(y,\mu) = -2\phi\cdot
-        \left(loglike(y,\mu,\phi) - loglike(y,y,\phi)\right).`
+        :math:`d(y,y_\textrm{pred}) = -2\phi\cdot
+        \left(loglike(y,y_\textrm{pred},\phi) - loglike(y,y,\phi)\right).`
 
         Parameters
         ----------
         y : array, shape (n_samples,)
             Target values.
 
-        mu : array, shape (n_samples,)
+        y_pred : array, shape (n_samples,)
             Predicted mean.
 
         check_input : bool, default=False
-            If True raise an exception on invalid y or mu values, otherwise
+            If True raise an exception on invalid y or y_pred values, otherwise
             they will be propagated as NaN.
         Returns
         -------
@@ -135,31 +137,33 @@ def unit_deviance(self, y, mu, check_input=False):
         """
         pass  # pragma: no cover
 
-    def unit_deviance_derivative(self, y, mu):
-        r"""Compute the derivative of the unit deviance w.r.t. mu.
+    def unit_deviance_derivative(self, y, y_pred):
+        r"""Compute the derivative of the unit deviance w.r.t. y_pred.
 
         The derivative of the unit deviance is given by
-        :math:`\frac{\partial}{\partial\mu}d(y,\mu) = -2\frac{y-\mu}{v(\mu)}`
-        with unit variance :math:`v(\mu)`.
+        :math:`\frac{\partial}{\partialy_\textrm{pred}}d(y,y_\textrm{pred})
+             = -2\frac{y-y_\textrm{pred}}{v(y_\textrm{pred})}`
+        with unit variance :math:`v(y_\textrm{pred})`.
 
         Parameters
         ----------
         y : array, shape (n_samples,)
             Target values.
 
-        mu : array, shape (n_samples,)
+        y_pred : array, shape (n_samples,)
             Predicted mean.
         """
-        return -2 * (y - mu) / self.unit_variance(mu)
+        return -2 * (y - y_pred) / self.unit_variance(y_pred)
 
-    def deviance(self, y, mu, weights=1):
+    def deviance(self, y, y_pred, weights=1):
         r"""Compute the deviance.
 
         The deviance is a weighted sum of the per sample unit deviances,
-        :math:`D = \sum_i s_i \cdot d(y_i, \mu_i)`
-        with weights :math:`s_i` and unit deviance :math:`d(y,\mu)`.
+        :math:`D = \sum_i s_i \cdot d(y_i, y_\textrm{pred}_i)`
+        with weights :math:`s_i` and unit deviance
+        :math:`d(y,y_\textrm{pred})`.
         In terms of the log-likelihood it is :math:`D = -2\phi\cdot
-        \left(loglike(y,\mu,\frac{phi}{s})
+        \left(loglike(y,y_\textrm{pred},\frac{phi}{s})
         - loglike(y,y,\frac{phi}{s})\right)`.
 
         Parameters
@@ -167,51 +171,52 @@ def deviance(self, y, mu, weights=1):
         y : array, shape (n_samples,)
             Target values.
 
-        mu : array, shape (n_samples,)
+        y_pred : array, shape (n_samples,)
             Predicted mean.
 
         weights : array, shape (n_samples,) (default=1)
             Weights or exposure to which variance is inverse proportional.
         """
-        return np.sum(weights * self.unit_deviance(y, mu))
+        return np.sum(weights * self.unit_deviance(y, y_pred))
 
-    def deviance_derivative(self, y, mu, weights=1):
-        """Compute the derivative of the deviance w.r.t. mu.
+    def deviance_derivative(self, y, y_pred, weights=1):
+        r"""Compute the derivative of the deviance w.r.t. y_pred.
 
-        It gives :math:`\\frac{\\partial}{\\partial\\mu} D(y, \\mu; weights)`.
+        It gives :math:`\frac{\partial}{\partial y_\textrm{pred}}
+        D(y, \y_\textrm{pred}; weights)`.
 
         Parameters
         ----------
         y : array, shape (n_samples,)
             Target values.
 
-        mu : array, shape (n_samples,)
+        y_pred : array, shape (n_samples,)
             Predicted mean.
 
         weights : array, shape (n_samples,) (default=1)
             Weights or exposure to which variance is inverse proportional.
         """
-        return weights * self.unit_deviance_derivative(y, mu)
+        return weights * self.unit_deviance_derivative(y, y_pred)
 
-    def _mu_deviance_derivative(self, coef, X, y, weights, link):
-        """Compute mu and the derivative of the deviance w.r.t coef."""
+    def _y_pred_deviance_derivative(self, coef, X, y, weights, link):
+        """Compute y_pred and the derivative of the deviance w.r.t coef."""
         lin_pred = _safe_lin_pred(X, coef)
-        mu = link.inverse(lin_pred)
+        y_pred = link.inverse(lin_pred)
         d1 = link.inverse_derivative(lin_pred)
-        temp = d1 * self.deviance_derivative(y, mu, weights)
+        temp = d1 * self.deviance_derivative(y, y_pred, weights)
         if coef.size == X.shape[1] + 1:
             devp = np.concatenate(([temp.sum()], temp @ X))
         else:
             devp = temp @ X  # same as X.T @ temp
-        return mu, devp
+        return y_pred, devp
 
 
 class TweedieDistribution(ExponentialDispersionModel):
     r"""A class for the Tweedie distribution.
 
-    A Tweedie distribution with mean :math:`\mu=\mathrm{E}[Y]` is uniquely
-    defined by it's mean-variance relationship
-    :math:`\mathrm{Var}[Y] \propto \mu^power`.
+    A Tweedie distribution with mean :math:`y_\textrm{pred}=\mathrm{E}[Y]`
+    is uniquely defined by it's mean-variance relationship
+    :math:`\mathrm{Var}[Y] \propto y_\textrm{pred}^power`.
 
     Special cases are:
 
@@ -228,7 +233,7 @@ class TweedieDistribution(ExponentialDispersionModel):
     ----------
     power : float (default=0)
             The variance power of the `unit_variance`
-            :math:`v(\mu) = \mu^{power}`.
+            :math:`v(y_\textrm{pred}) = y_\textrm{pred}^{power}`.
             For ``0<power<1``, no distribution exists.
     """
     def __init__(self, power=0):
@@ -262,45 +267,46 @@ def power(self, power):
 
         self._power = power
 
-    def unit_variance(self, mu):
-        """Compute the unit variance of a Tweedie distribution v(mu)=mu**power.
+    def unit_variance(self, y_pred):
+        """Compute the unit variance of a Tweedie distribution
+        v(y_\textrm{pred})=y_\textrm{pred}**power.
 
         Parameters
         ----------
-        mu : array, shape (n_samples,)
+        y_pred : array, shape (n_samples,)
             Predicted mean.
         """
-        return np.power(mu, self.power)
+        return np.power(y_pred, self.power)
 
-    def unit_variance_derivative(self, mu):
+    def unit_variance_derivative(self, y_pred):
         """Compute the derivative of the unit variance of a Tweedie
-        distribution v(mu)=power*mu**(power-1).
+        distribution v(y_pred)=power*y_pred**(power-1).
 
         Parameters
         ----------
-        mu : array, shape (n_samples,)
+        y_pred : array, shape (n_samples,)
             Predicted mean.
         """
-        return self.power * np.power(mu, self.power - 1)
+        return self.power * np.power(y_pred, self.power - 1)
 
-    def unit_deviance(self, y, mu, check_input=False):
+    def unit_deviance(self, y, y_pred, check_input=False):
         r"""Compute the unit deviance.
 
-        The unit_deviance :math:`d(y,\mu)` can be defined by the
+        The unit_deviance :math:`d(y,y_\textrm{pred})` can be defined by the
         log-likelihood as
-        :math:`d(y,\mu) = -2\phi\cdot
-        \left(loglike(y,\mu,\phi) - loglike(y,y,\phi)\right).`
+        :math:`d(y,y_\textrm{pred}) = -2\phi\cdot
+        \left(loglike(y,y_\textrm{pred},\phi) - loglike(y,y,\phi)\right).`
 
         Parameters
         ----------
         y : array, shape (n_samples,)
             Target values.
 
-        mu : array, shape (n_samples,)
+        y_pred : array, shape (n_samples,)
             Predicted mean.
 
         check_input : bool, default=False
-            If True raise an exception on invalid y or mu values, otherwise
+            If True raise an exception on invalid y or y_pred values, otherwise
             they will be propagated as NaN.
         Returns
         -------
@@ -313,50 +319,51 @@ def unit_deviance(self, y, mu, check_input=False):
             message = ("Mean Tweedie deviance error with power={} can only be "
                        "used on ".format(p))
             if p < 0:
-                # 'Extreme stable', y any realy number, mu > 0
-                if (mu <= 0).any():
-                    raise ValueError(message + "strictly positive mu.")
+                # 'Extreme stable', y any realy number, y_pred > 0
+                if (y_pred <= 0).any():
+                    raise ValueError(message + "strictly positive y_pred.")
             elif p == 0:
-                # Normal, y and mu can be any real number
+                # Normal, y and y_pred can be any real number
                 pass
             elif 0 < p < 1:
                 raise ValueError("Tweedie deviance is only defined for "
                                  "power<=0 and power>=1.")
             elif 1 <= p < 2:
-                # Poisson and Compount poisson distribution, y >= 0, mu > 0
-                if (y < 0).any() or (mu <= 0).any():
+                # Poisson and Compount poisson distribution, y >= 0, y_pred > 0
+                if (y < 0).any() or (y_pred <= 0).any():
                     raise ValueError(message + "non-negative y and strictly "
-                                     "positive mu.")
+                                     "positive y_pred.")
             elif p >= 2:
-                # Gamma and Extreme stable distribution, y and mu > 0
-                if (y <= 0).any() or (mu <= 0).any():
-                    raise ValueError(message + "strictly positive y and mu.")
+                # Gamma and Extreme stable distribution, y and y_pred > 0
+                if (y <= 0).any() or (y_pred <= 0).any():
+                    raise ValueError(message
+                                     + "strictly positive y and y_pred.")
             else:  # pragma: nocover
                 # Unreachable statement
                 raise ValueError
 
         if p < 0:
-            # 'Extreme stable', y any realy number, mu > 0
+            # 'Extreme stable', y any realy number, y_pred > 0
             dev = 2 * (np.power(np.maximum(y, 0), 2-p) / ((1-p) * (2-p))
-                       - y * np.power(mu, 1-p) / (1-p)
-                       + np.power(mu, 2-p) / (2-p))
+                       - y * np.power(y_pred, 1-p) / (1-p)
+                       + np.power(y_pred, 2-p) / (2-p))
 
         elif p == 0:
-            # Normal distribution, y and mu any real number
-            dev = (y - mu)**2
+            # Normal distribution, y and y_pred any real number
+            dev = (y - y_pred)**2
         elif p < 1:
             raise ValueError("Tweedie deviance is only defined for power<=0 "
                              "and power>=1.")
         elif p == 1:
             # Poisson distribution
-            dev = 2 * (xlogy(y, y/mu) - y + mu)
+            dev = 2 * (xlogy(y, y/y_pred) - y + y_pred)
         elif p == 2:
             # Gamma distribution
-            dev = 2 * (np.log(mu/y) + y/mu - 1)
+            dev = 2 * (np.log(y_pred/y) + y/y_pred - 1)
         else:
             dev = 2 * (np.power(y, 2-p) / ((1-p) * (2-p))
-                       - y * np.power(mu, 1-p) / (1-p)
-                       + np.power(mu, 2-p) / (2-p))
+                       - y * np.power(y_pred, 1-p) / (1-p)
+                       + np.power(y_pred, 2-p) / (2-p))
         return dev
 
 
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 7c518bb3f8149..cab89766c0b92 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -31,8 +31,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     """Regression via a Generalized Linear Model (GLM) with penalties.
 
     GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
-    fitting and predicting the mean of the target y as mu=h(X*w). Therefore,
-    the fit minimizes the following objective function with L2
+    fitting and predicting the mean of the target y as y_pred=h(X*w).
+    Therefore, the fit minimizes the following objective function with L2
     priors as regularizer::
 
             1/(2*sum(s)) * deviance(y, h(X*w); s)
@@ -66,8 +66,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     link : {'auto', 'identity', 'log'} or an instance of class Link, \
             optional (default='auto')
         The link function of the GLM, i.e. mapping from linear predictor
-        (X*coef) to expectation (mu). Option 'auto' sets the link depending on
-        the chosen family as follows:
+        (X*coef) to expectation (y_pred). Option 'auto' sets the link
+        depending on the chosen family as follows:
 
         - 'identity' for family 'normal'
 
@@ -146,9 +146,9 @@ def fit(self, X, y, sample_weight=None):
                 optional (default=None)
             Individual weights w_i for each sample. Note that for an
             Exponential Dispersion Model (EDM), one has
-            Var[Y_i]=phi/w_i * v(mu).
-            If Y_i ~ EDM(mu, phi/w_i), then
-            sum(w*Y)/sum(w) ~ EDM(mu, phi/sum(w)), i.e. the mean of y is a
+            Var[Y_i]=phi/w_i * v(y_pred).
+            If Y_i ~ EDM(y_pred, phi/w_i), then
+            sum(w*Y)/sum(w) ~ EDM(y_pred, phi/sum(w)), i.e. the mean of y is a
             weighted average with weights=sample_weight.
 
         Returns
@@ -267,10 +267,10 @@ def fit(self, X, y, sample_weight=None):
 
         if solver == 'lbfgs':
             def func(coef, X, y, weights, alpha, family, link):
-                mu, devp = family._mu_deviance_derivative(
+                y_pred, devp = family._y_pred_deviance_derivative(
                     coef, X, y, weights, link
                 )
-                dev = family.deviance(y, mu, weights)
+                dev = family.deviance(y, y_pred, weights)
                 intercept = (coef.size == X.shape[1] + 1)
                 idx = 1 if intercept else 0  # offset if coef[0] is intercept
                 coef_scaled = alpha * coef[idx:]
@@ -337,8 +337,8 @@ def predict(self, X):
         """
         # check_array is done in _linear_predictor
         eta = self._linear_predictor(X)
-        mu = self._link_instance.inverse(eta)
-        return mu
+        y_pred = self._link_instance.inverse(eta)
+        return y_pred
 
     def score(self, X, y, sample_weight=None):
         """Compute D^2, the percentage of deviance explained.
@@ -376,8 +376,8 @@ def score(self, X, y, sample_weight=None):
         # TODO: make D^2 a score function in module metrics (and thereby get
         #       input validation and so on)
         weights = _check_sample_weight(sample_weight, X)
-        mu = self.predict(X)
-        dev = self._family_instance.deviance(y, mu, weights=weights)
+        y_pred = self.predict(X)
+        dev = self._family_instance.deviance(y, y_pred, weights=weights)
         y_mean = np.average(y, weights=weights)
         dev_null = self._family_instance.deviance(y, y_mean, weights=weights)
         return 1 - dev / dev_null
@@ -399,7 +399,7 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     """Regression with the response variable y following a Poisson distribution
 
     GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
-    fitting and predicting the mean of the target y as mu=h(X*w).
+    fitting and predicting the mean of the target y as y_pred=h(X*w).
     The fit minimizes the following objective function with L2 regularization::
 
             1/(2*sum(s)) * deviance(y, h(X*w); s) + 1/2 * alpha * ||w||_2^2
@@ -487,7 +487,7 @@ class GammaRegressor(GeneralizedLinearRegressor):
     """Regression with the response variable y following a Gamma distribution
 
     GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
-    fitting and predicting the mean of the target y as mu=h(X*w).
+    fitting and predicting the mean of the target y as y_pred=h(X*w).
     The fit minimizes the following objective function with L2 regularization::
 
             1/(2*sum(s)) * deviance(y, h(X*w); s) + 1/2 * alpha * ||w||_2^2
@@ -572,10 +572,10 @@ def family(self, value):
 
 
 class TweedieRegressor(GeneralizedLinearRegressor):
-    """Regression with the response variable y following a Tweedie distribution
+    r"""Regression with the response variable y following a Tweedie distribution
 
     GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
-    fitting and predicting the mean of the target y as mu=h(X*w).
+    fitting and predicting the mean of the target y as y_pred=h(X*w).
     The fit minimizes the following objective function with L2 regularization::
 
             1/(2*sum(s)) * deviance(y, h(X*w); s) + 1/2 * alpha * ||w||_2^2
@@ -590,7 +590,8 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     power : float (default=0)
             The power determines the underlying target distribution. By
             definition it links distribution variance (:math:`v`) and
-            mean (:math:`\\mu`): :math:`v(\\mu) = \\mu^{power}`.
+            mean (:math:`\y_\textrm{pred}`):
+            :math:`v(\y_\textrm{pred}) = \y_\textrm{pred}^{power}`.
 
             For ``0<power<1``, no distribution exists.
 
diff --git a/sklearn/linear_model/_glm/link.py b/sklearn/linear_model/_glm/link.py
index b257036ac4727..cfdc6f181a832 100644
--- a/sklearn/linear_model/_glm/link.py
+++ b/sklearn/linear_model/_glm/link.py
@@ -15,26 +15,26 @@ class Link(metaclass=ABCMeta):
     """Abstract base class for Link functions."""
 
     @abstractmethod
-    def __call__(self, mu):
-        """Compute the link function g(mu).
+    def __call__(self, y_pred):
+        """Compute the link function g(y_pred).
 
-        The link function links the mean mu=E[Y] to the so called linear
-        predictor (X*w), i.e. g(mu) = linear predictor.
+        The link function links the mean y_pred=E[Y] to the so called linear
+        predictor (X*w), i.e. g(y_pred) = linear predictor.
 
         Parameters
         ----------
-        mu : array, shape (n_samples,)
+        y_pred : array, shape (n_samples,)
             Usually the (predicted) mean.
         """
         pass  # pragma: no cover
 
     @abstractmethod
-    def derivative(self, mu):
-        """Compute the derivative of the link g'(mu).
+    def derivative(self, y_pred):
+        """Compute the derivative of the link g'(y_pred).
 
         Parameters
         ----------
-        mu : array, shape (n_samples,)
+        y_pred : array, shape (n_samples,)
             Usually the (predicted) mean.
         """
         pass  # pragma: no cover
@@ -44,7 +44,7 @@ def inverse(self, lin_pred):
         """Compute the inverse link function h(lin_pred).
 
         Gives the inverse relationship between linear predictor and the mean
-        mu=E[Y], i.e. h(linear predictor) = mu.
+        y_pred=E[Y], i.e. h(linear predictor) = y_pred.
 
         Parameters
         ----------
@@ -68,11 +68,11 @@ def inverse_derivative(self, lin_pred):
 class IdentityLink(Link):
     """The identity link function g(x)=x."""
 
-    def __call__(self, mu):
-        return mu
+    def __call__(self, y_pred):
+        return y_pred
 
-    def derivative(self, mu):
-        return np.ones_like(mu)
+    def derivative(self, y_pred):
+        return np.ones_like(y_pred)
 
     def inverse(self, lin_pred):
         return lin_pred
@@ -84,11 +84,11 @@ def inverse_derivative(self, lin_pred):
 class LogLink(Link):
     """The log link function g(x)=log(x)."""
 
-    def __call__(self, mu):
-        return np.log(mu)
+    def __call__(self, y_pred):
+        return np.log(y_pred)
 
-    def derivative(self, mu):
-        return 1 / mu
+    def derivative(self, y_pred):
+        return 1 / y_pred
 
     def inverse(self, lin_pred):
         return np.exp(lin_pred)
@@ -100,11 +100,11 @@ def inverse_derivative(self, lin_pred):
 class LogitLink(Link):
     """The logit link function g(x)=logit(x)."""
 
-    def __call__(self, mu):
-        return logit(mu)
+    def __call__(self, y_pred):
+        return logit(y_pred)
 
-    def derivative(self, mu):
-        return 1 / (mu * (1 - mu))
+    def derivative(self, y_pred):
+        return 1 / (y_pred * (1 - y_pred))
 
     def inverse(self, lin_pred):
         return expit(lin_pred)
diff --git a/sklearn/linear_model/_glm/tests/test_distribution.py b/sklearn/linear_model/_glm/tests/test_distribution.py
index fa45a972f6af1..97c3a485ef4bb 100644
--- a/sklearn/linear_model/_glm/tests/test_distribution.py
+++ b/sklearn/linear_model/_glm/tests/test_distribution.py
@@ -105,8 +105,8 @@ def test_deviance_derivative(family):
     assert dev_derivative.shape == y_pred.shape
 
     err = check_grad(
-            lambda mu: family.deviance(y_true, mu),
-            lambda mu: family.deviance_derivative(y_true, mu),
+            lambda y_pred: family.deviance(y_true, y_pred),
+            lambda y_pred: family.deviance_derivative(y_true, y_pred),
             y_pred,
     ) / np.linalg.norm(dev_derivative)
     assert abs(err) < 1e-6

From 47dbc842a21eb5f63139ecadc14f6b783c2255ab Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 30 Aug 2019 12:13:43 +0200
Subject: [PATCH 158/269] Fix link parameter documentation in TweedieRegression

---
 sklearn/linear_model/_glm/glm.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index cab89766c0b92..d6da8b8b80949 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -460,12 +460,12 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     n_iter_ : int
         Actual number of iterations used in solver.
     """
-    def __init__(self, alpha=1.0, fit_intercept=True, link='log',
+    def __init__(self, alpha=1.0, fit_intercept=True,
                  solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
                  copy_X=True, check_input=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
-                         family="poisson", link=link,
+                         family="poisson", link='log',
                          solver=solver, max_iter=max_iter, tol=tol,
                          warm_start=warm_start, copy_X=copy_X, verbose=verbose)
 
@@ -548,12 +548,12 @@ class GammaRegressor(GeneralizedLinearRegressor):
     n_iter_ : int
         Actual number of iterations used in solver.
     """
-    def __init__(self, alpha=1.0, fit_intercept=True, link='log',
-                 solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
+    def __init__(self, alpha=1.0, fit_intercept=True, solver='lbfgs',
+                 max_iter=100, tol=1e-4, warm_start=False,
                  copy_X=True, check_input=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
-                         family="gamma", link=link,
+                         family="gamma", link='log',
                          solver=solver, max_iter=max_iter, tol=tol,
                          warm_start=warm_start, copy_X=copy_X, verbose=verbose)
 
@@ -619,6 +619,15 @@ class TweedieRegressor(GeneralizedLinearRegressor):
         case, the design matrix X must have full column rank
         (no collinearities).
 
+    link : {'auto', 'identity', 'log'}, default='auto'
+        The link function of the GLM, i.e. mapping from linear predictor
+        (X*coef) to expectation (y_pred). Option 'auto' sets the link
+        depending on the chosen family as follows:
+
+        - 'identity' for Normal distribution
+
+        - 'log' for Poisson,  Gamma or Inverse Gaussian distributions
+
     fit_intercept : boolean, optional (default=True)
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
@@ -659,9 +668,9 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     n_iter_ : int
         Actual number of iterations used in solver.
     """
-    def __init__(self, power=0.0, alpha=1.0, fit_intercept=True, link='log',
-                 solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
-                 copy_X=True, check_input=True, verbose=0):
+    def __init__(self, power=0.0, alpha=1.0, fit_intercept=True,
+                 link='auto', solver='lbfgs', max_iter=100, tol=1e-4,
+                 warm_start=False, copy_X=True, check_input=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family=TweedieDistribution(power=power), link=link,

From 3b526e98ce11514090cbdad890f3c2d13664cc83 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Fri, 30 Aug 2019 12:44:05 +0200
Subject: [PATCH 159/269] EXA Use a simpler pipeline for GBDT in poisson
 regression example

---
 ...plot_poisson_regression_non_normal_loss.py | 66 ++++++++++++++-----
 1 file changed, 48 insertions(+), 18 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 9deca6c25032a..7ab772fb59920 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -34,6 +34,7 @@
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
+from sklearn.preprocessing import OrdinalEncoder
 from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
 from sklearn.ensemble import GradientBoostingRegressor
 
@@ -134,10 +135,14 @@ def load_mtpl2(n_samples=100000):
 # To evaluate the pertinence of the used metrics, we will consider as a
 # baseline an estimator that returns 0 for any input.
 
-df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=0)
+df_train, df_test = train_test_split(df, random_state=0)
 
-dummy = DummyRegressor(strategy='constant', constant=0)
-dummy.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
+dummy = make_pipeline(
+    column_trans,
+    DummyRegressor(strategy='constant', constant=0)
+)
+dummy.fit(df_train, df_train.Frequency,
+          dummyregressor__sample_weight=df_train.Exposure)
 
 ##############################################################################
 #
@@ -149,14 +154,14 @@ def score_estimator(estimator, df_test, eps=1e-5):
     """Score an estimatr on the test set"""
 
     print("MSE: %.3f" % mean_squared_error(
-              df_test.Frequency.values, estimator.predict(X_test),
+              df_test.Frequency.values, estimator.predict(df_test),
               df_test.Exposure.values))
     print("MAE: %.3f" % mean_absolute_error(
-              df_test.Frequency.values, estimator.predict(X_test),
+              df_test.Frequency.values, estimator.predict(df_test),
               df_test.Exposure.values))
 
     print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
-            df_test.Frequency.values, np.fmax(estimator.predict(X_test), eps),
+            df_test.Frequency.values, np.fmax(estimator.predict(df_test), eps),
             df_test.Exposure.values))
 
 
@@ -168,12 +173,14 @@ def score_estimator(estimator, df_test, eps=1e-5):
 # We start by modeling the target variable with the least squares linear
 # regression model,
 
-linregr = LinearRegression()
-linregr.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
+linregr = make_pipeline(column_trans, LinearRegression())
+linregr.fit(df_train, df_train.Frequency,
+            linearregression__sample_weight=df_train.Exposure)
 
 
 print('Number Negatives: %s / total: %s' % (
-      (linregr.predict(X_test) < 0).sum(), X_test.shape[0]))
+      (linregr.predict(df_train) < 0).sum(),
+      df_train.shape[0]))
 
 print("LinearRegression")
 score_estimator(linregr, df_test)
@@ -182,8 +189,12 @@ def score_estimator(estimator, df_test, eps=1e-5):
 #
 # Next we fit the Poisson regressor on the target variable,
 
-glm_freq = PoissonRegressor(alpha=0, max_iter=1000)
-glm_freq.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
+glm_freq = make_pipeline(
+    column_trans,
+    PoissonRegressor(alpha=0, max_iter=1000)
+)
+glm_freq.fit(df_train, df_train.Frequency,
+             poissonregressor__sample_weight=df_train.Exposure)
 
 print("PoissonRegressor")
 score_estimator(glm_freq, df_test)
@@ -191,12 +202,31 @@ def score_estimator(estimator, df_test, eps=1e-5):
 ##############################################################################
 #
 # Finally we will consider a non linear model  with Gradient boosting that
-# still minimizes the least square error.
-
-
-gbr = GradientBoostingRegressor()
-gbr.fit(X_train, df_train.Frequency.values,
-        sample_weight=df_train.Exposure.values)
+# still minimizes the least square error. Gradient Boostring Decision Trees do
+# not require for categorical data to be one hot encoded, therefore here we use
+# a simpler pre-processing pipeline without ``KBinsDiscretizer`` and with
+# ``OrdinalEncoder`` instead of ``OneHotEncoder``.
+
+
+gbr = make_pipeline(
+    ColumnTransformer(
+        [
+            (
+                "Veh_Brand_Gas_Region",
+                OrdinalEncoder(),
+                ["VehBrand", "VehPower", "VehGas", "Region", "Area"],
+            ),
+            ("Continious", "passthrough", ["VehAge", "DrivAge", "BonusMalus"]),
+            ("Density_log", make_pipeline(
+                FunctionTransformer(np.log, validate=False), StandardScaler()),
+                ["Density"]),
+        ],
+        remainder="drop",
+    ),
+    GradientBoostingRegressor()
+)
+gbr.fit(df_train, df_train.Frequency.values,
+        gradientboostingregressor__sample_weight=df_train.Exposure.values)
 
 
 print("GradientBoostingRegressor")
@@ -224,7 +254,7 @@ def score_estimator(estimator, df_test, eps=1e-5):
 axes[0].set_title('Experimental data')
 
 for idx, model in enumerate([linregr, glm_freq, gbr]):
-    y_pred = model.predict(X_train)
+    y_pred = model.predict(df_train)
 
     pd.Series(y_pred).hist(bins=np.linspace(-1, 8, 50), ax=axes[idx+1])
     axes[idx + 1].set_title(model.__class__.__name__)

From b1eb611f1d7364af25f1d8f4222ff0a9cb494efa Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 1 Sep 2019 16:49:52 +0200
Subject: [PATCH 160/269] Minor fixes for user guide

---
 doc/modules/linear_model.rst | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 622ab335059ab..29c329f6f333a 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -874,7 +874,7 @@ with 'log' loss, which might be even faster but requires more tuning.
     It is possible to obtain the p-values and confidence intervals for
     coefficients in cases of regression without penalization. The `statsmodels
     package <https://pypi.org/project/statsmodels/>` natively supports this.
-    Within sklearn, one could use bootstrapping instead as well.  
+    Within sklearn, one could use bootstrapping instead as well.
 
 
 :class:`LogisticRegressionCV` implements Logistic Regression with built-in
@@ -919,8 +919,8 @@ The unit deviance is defined by the log of the :math:`\mathrm{EDM}(\mu, \phi)`
 likelihood as
 
 .. math::     d(y, \mu) = -2\phi\cdot
-              \left( log P(y|\mu,\phi)
-              - log P(y|y,\phi)\right)
+              \left( \log p(y|\mu,\phi)
+              - \log p(y|y,\phi)\right).
 
 The following table lists some specific EDM distributions—all are Tweedie
 distributions—and some of their properties.
@@ -939,7 +939,7 @@ Usage
 -----
 
 In the following use cases, a loss different from the squared loss might be
-appropriate,
+appropriate:
 
   * If the target values :math:`y` are counts (non-negative integer valued) or
     frequencies (non-negative), you might use a Poisson deviance with log-link.
@@ -960,7 +960,7 @@ log-link `link='log'` with :math:`h(x^\top w)=\exp(x^\top w)`.
 :class:`TweedieRegressor` implements a generalized linear model
 for the Tweedie distribution, that allows to model any of the above mentioned
 distributions using the appropriate ``power`` parameter, i.e. the exponent
-of the unit variance function,
+of the unit variance function:
 
  - ``power = 0``: Normal distribution. Specialized solvers such as
    :class:`Ridge`, :class:`ElasticNet` are generally
@@ -987,15 +987,15 @@ of the unit variance function,
      together with :math:`s=\mathrm{exposure}` as sample weights. This is done
      in both examples linked below.
    * The fit itself does not need Y to be from an EDM, but only assumes
-     the first two moments to be :math:`E[Y_i]=\\mu_i=h((Xw)_i)` and
-     :math:`Var[Y_i]=\\frac{\\phi}{s_i} v(\\mu_i)`.
-   * If the target y is a ratio, appropriate sample weights s should be
+     the first two moments to be :math:`E[Y_i]=\mu_i=h((Xw)_i)` and
+     :math:`Var[Y_i]=\frac{\phi}{s_i} v(\mu_i)`.
+   * If the target `y` is a ratio, appropriate sample weights ``s`` should be
      provided.
      As an example, consider Poisson distributed counts z (integers) and
      weights s=exposure (time, money, persons years, ...). Then you fit
      y = z/s, i.e. ``PoissonRegressor.fit(X, y, sample_weight=s)``.
      The weights are necessary for the right (finite sample) mean.
-     Consider :math:`\\bar{y} = \\frac{\\sum_i s_i y_i}{\\sum_i s_i}`,
+     Consider :math:`\bar{y} = \frac{\\sum_i s_i y_i}{\sum_i s_i}`,
      in this case one might say that y has a 'scaled' Poisson distributions.
      The same holds for other distributions.
 

From d964c01324a9945253549fbfb2ec1d3b3b18b79f Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 1 Sep 2019 19:01:08 +0200
Subject: [PATCH 161/269] EXA Poisson: minor changes

---
 ...plot_poisson_regression_non_normal_loss.py | 37 +++++++++++--------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 7ab772fb59920..b71fba9236ef5 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -3,11 +3,11 @@
 Poisson regression and non-normal loss
 ======================================
 
-This example illustrates the use of linear Poisson regression
+This example illustrates the use of log-linear Poisson regression
 on the French Motor Third-Party Liability Claims dataset [1] and compares
 it with models learned with least squared error. The goal is to predict the
-number of insurance claims (or frequency) following car accidents for a user
-given historical data over a population of users.
+number of insurance claims (or frequency) following car accidents for a
+policyholder given historical data over a population of policyholders.
 
 We start by defining a few helper functions for loading the data and
 visualizing results.
@@ -48,7 +48,8 @@ def load_mtpl2(n_samples=100000):
     Parameters
     ----------
     n_samples: int, default=100000
-      number of samples to select (for faster run time).
+      number of samples to select (for faster run time). Full dataset has
+      678013 samples.
     """
 
     # freMTPL2freq dataset from https://www.openml.org/d/41214
@@ -76,14 +77,15 @@ def load_mtpl2(n_samples=100000):
 # 1. Loading datasets and pre-processing
 # --------------------------------------
 #
-# We construct the freMTPL2 dataset by joining the  freMTPL2freq table,
+# We construct the freMTPL2 dataset by joining the freMTPL2freq table,
 # containing the number of claims (``ClaimNb``) with the freMTPL2sev table
-# containing the claim amount (``ClaimAmount``) for the same user ids.
+# containing the claim amount (``ClaimAmount``) for the same policy ids
+# (``IDpol``).
 
 df = load_mtpl2(n_samples=50000)
 
 # Note: filter out claims with zero amount, as the severity model
-# requires a strictly positive target values.
+# requires strictly positive target values.
 df.loc[(df.ClaimAmount == 0) & (df.ClaimNb >= 1), "ClaimNb"] = 0
 
 # correct for unreasonable observations (that might be data error)
@@ -116,9 +118,9 @@ def load_mtpl2(n_samples=100000):
 # The number of claims (``ClaimNb``) is a positive integer that can be modeled
 # as a Poisson distribution. It is then assumed to be the number of discrete
 # events occurring with a constant rate in a given time interval
-# (``Exposure``). Here we model the frequency ``y = ClaimNb / Exposure``,
-# which is still a (scaled) Poisson distribution, and use ``Exposure`` as
-# `sample_weight`.
+# (``Exposure``, in units of years). Here we model the frequency
+# ``y = ClaimNb / Exposure``, which is still a (scaled) Poisson distribution,
+# and use ``Exposure`` as `sample_weight`.
 
 df["Frequency"] = df.ClaimNb / df.Exposure
 
@@ -126,20 +128,23 @@ def load_mtpl2(n_samples=100000):
    pd.cut(df.Frequency, [-1e-6, 1e-6, 1, 2, 3, 4, 5]).value_counts()
 )
 
+print("Average Frequency = {}"
+      .format(np.average(df.Frequency, weights=df.Exposure)))
+
 ##############################################################################
 #
-# It worth noting that 96 % of users have 0 claims, and if we were to convert
-# this problem into a binary classification task, it would be significantly
-# imbalanced.
+# It worth noting that 96 % of policyholders have zero claims, and if we were
+# to convert this problem into a binary classification task, it would be
+# significantly imbalanced.
 #
 # To evaluate the pertinence of the used metrics, we will consider as a
-# baseline an estimator that returns 0 for any input.
+# baseline an estimator that returns the mean of the training sample.
 
 df_train, df_test = train_test_split(df, random_state=0)
 
 dummy = make_pipeline(
     column_trans,
-    DummyRegressor(strategy='constant', constant=0)
+    DummyRegressor(strategy='mean')
 )
 dummy.fit(df_train, df_train.Frequency,
           dummyregressor__sample_weight=df_train.Exposure)
@@ -257,7 +262,7 @@ def score_estimator(estimator, df_test, eps=1e-5):
     y_pred = model.predict(df_train)
 
     pd.Series(y_pred).hist(bins=np.linspace(-1, 8, 50), ax=axes[idx+1])
-    axes[idx + 1].set_title(model.__class__.__name__)
+    axes[idx + 1].set_title(model[-1].__class__.__name__)
 
 for axi in axes:
     axi.set(

From a1844b8543a9a43fadfc0523e48cd0135e58ba37 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Mon, 2 Sep 2019 20:29:56 +0200
Subject: [PATCH 162/269] Fix mu->y_pred and p->power

---
 doc/whats_new/v0.22.rst                          | 16 ++++++++--------
 .../plot_tweedie_regression_insurance_claims.py  |  4 ++--
 sklearn/metrics/regression.py                    |  4 ++--
 sklearn/metrics/scorer.py                        |  4 ++--
 sklearn/metrics/tests/test_common.py             |  4 ++--
 sklearn/metrics/tests/test_regression.py         | 10 +++++-----
 6 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index 6e3d4822b261d..d6d52732ba714 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -241,10 +241,10 @@ Changelog
   :user:`Mohamed Maskani <maskani-moh>`, and :user:`Thomas Fan <thomasjpfan>`.
 
 - |Feature| Add :class:`metrics.mean_tweedie_deviance` measuring the
-  Tweedie deviance for a power parameter ``p``. Also add mean Poisson deviance
-  :class:`metrics.mean_poisson_deviance` and mean Gamma deviance
+  Tweedie deviance for a power parameter ``power``. Also add mean Poisson
+  deviance :class:`metrics.mean_poisson_deviance` and mean Gamma deviance
   :class:`metrics.mean_gamma_deviance` that are special cases of the Tweedie
-  deviance for `p=1` and `p=2` respectively.
+  deviance for `power=1` and `power=2` respectively.
   :pr:`13938` by :user:`Christian Lorentzen <lorentzenchr>` and
   `Roman Yurchak`_.
 
@@ -306,19 +306,19 @@ Changelog
 - |Enhancement| SVM now throws more specific error when fit on non-square data
   and kernel = precomputed.  :class:`svm.BaseLibSVM`
   :pr:`14336` by :user:`Gregory Dexter <gdex1>`.
-  
+
 :mod:`sklearn.tree`
 ...................
 
 - |Feature| Adds minimal cost complexity pruning, controlled by ``ccp_alpha``,
   to :class:`tree.DecisionTreeClassifier`, :class:`tree.DecisionTreeRegressor`,
   :class:`tree.ExtraTreeClassifier`, :class:`tree.ExtraTreeRegressor`,
-  :class:`ensemble.RandomForestClassifier`, 
+  :class:`ensemble.RandomForestClassifier`,
   :class:`ensemble.RandomForestRegressor`,
-  :class:`ensemble.ExtraTreesClassifier`, 
+  :class:`ensemble.ExtraTreesClassifier`,
   :class:`ensemble.ExtraTreesRegressor`,
-  :class:`ensemble.RandomTreesEmbedding`, 
-  :class:`ensemble.GradientBoostingClassifier`, 
+  :class:`ensemble.RandomTreesEmbedding`,
+  :class:`ensemble.GradientBoostingClassifier`,
   and :class:`ensemble.GradientBoostingRegressor`.
   :pr:`12887` by `Thomas Fan`_.
 
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index f866518b69db8..eb9769814ade5 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -426,7 +426,7 @@ class ClaimProdEstimator:
     """Total claim amount estimator.
 
     Computed as the product of the frequency model by the serverity model,
-    denormalized by exposure. Use Tweedie deviance with `p=1.5`.
+    denormalized by exposure. Use Tweedie deviance with `power=1.5`.
     """
 
     def __init__(self, est_freq, est_sev):
@@ -527,7 +527,7 @@ def score(self, X, y, sample_weight=None):
             "predicted, frequency*severity model": np.sum(
                 est_prod.predict(X, exposure=df.Exposure.values)
             ),
-            "predicted, tweedie, p=%.2f"
+            "predicted, tweedie, power=%.2f"
             % glm_total.best_estimator_.family.power: np.sum(
                 glm_total.best_estimator_.predict(X)
             ),
diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
index 73db0acc945e6..706c484334d21 100644
--- a/sklearn/metrics/regression.py
+++ b/sklearn/metrics/regression.py
@@ -693,7 +693,7 @@ def mean_poisson_deviance(y_true, y_pred, sample_weight=None):
     """Mean Poisson deviance regression loss.
 
     Poisson deviance is equivalent to the Tweedie deviance with
-    the power parameter `p=1`.
+    the power parameter `power=1`.
 
     Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
 
@@ -730,7 +730,7 @@ def mean_gamma_deviance(y_true, y_pred, sample_weight=None):
     """Mean Gamma deviance regression loss.
 
     Gamma deviance is equivalent to the Tweedie deviance with
-    the power parameter `p=2`. It is invariant to scaling of
+    the power parameter `power=2`. It is invariant to scaling of
     the target variable, and mesures relative errors.
 
     Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
diff --git a/sklearn/metrics/scorer.py b/sklearn/metrics/scorer.py
index bf2892bdf83a2..e2496c83b666d 100644
--- a/sklearn/metrics/scorer.py
+++ b/sklearn/metrics/scorer.py
@@ -499,11 +499,11 @@ def make_scorer(score_func, greater_is_better=True, needs_proba=False,
                                                  greater_is_better=False,
                                                  squared=False)
 neg_mean_poisson_deviance_scorer = make_scorer(
-    mean_tweedie_deviance, p=1., greater_is_better=False
+    mean_tweedie_deviance, power=1., greater_is_better=False
 )
 
 neg_mean_gamma_deviance_scorer = make_scorer(
-    mean_tweedie_deviance, p=2., greater_is_better=False
+    mean_tweedie_deviance, power=2., greater_is_better=False
 )
 
 # Standard Classification Scores
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 6459f93c68449..a8cabe984e563 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -102,11 +102,11 @@
     "median_absolute_error": median_absolute_error,
     "explained_variance_score": explained_variance_score,
     "r2_score": partial(r2_score, multioutput='variance_weighted'),
-    "mean_normal_deviance": partial(mean_tweedie_deviance, p=0),
+    "mean_normal_deviance": partial(mean_tweedie_deviance, power=0),
     "mean_poisson_deviance": mean_poisson_deviance,
     "mean_gamma_deviance": mean_gamma_deviance,
     "mean_compound_poisson_deviance":
-    partial(mean_tweedie_deviance, p=1.4),
+    partial(mean_tweedie_deviance, power=1.4),
 }
 
 CLASSIFICATION_METRICS = {
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index 0f987a088bb84..c3947db5ed857 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -105,31 +105,31 @@ def test_regression_metrics_at_limits():
     assert_allclose(mean_tweedie_deviance([0], [1.], power=power),
                     2 / (2 - power), rtol=1e-3)
     with pytest.raises(ValueError,
-                       match="can only be used on strictly positive mu."):
+                       match="can only be used on strictly positive y_pred."):
         mean_tweedie_deviance([0.], [0.], power=power)
     assert_almost_equal(mean_tweedie_deviance([0.], [0.], power=0), 0.00, 2)
 
-    msg = "only be used on non-negative y and strictly positive mu."
+    msg = "only be used on non-negative y and strictly positive y_pred."
     with pytest.raises(ValueError, match=msg):
         mean_tweedie_deviance([0.], [0.], power=1.0)
 
     power = 1.5
     assert_allclose(mean_tweedie_deviance([0.], [1.], power=power),
                     2 / (2 - power))
-    msg = "only be used on non-negative y and strictly positive mu."
+    msg = "only be used on non-negative y and strictly positive y_pred."
     with pytest.raises(ValueError, match=msg):
         mean_tweedie_deviance([0.], [0.], power=power)
     power = 2.
     assert_allclose(mean_tweedie_deviance([1.], [1.], power=power), 0.00,
                     atol=1e-8)
-    msg = "can only be used on strictly positive y and mu."
+    msg = "can only be used on strictly positive y and y_pred."
     with pytest.raises(ValueError, match=msg):
         mean_tweedie_deviance([0.], [0.], power=power)
     power = 3.
     assert_allclose(mean_tweedie_deviance([1.], [1.], power=power),
                     0.00, atol=1e-8)
 
-    msg = "can only be used on strictly positive y and mu."
+    msg = "can only be used on strictly positive y and y_pred."
     with pytest.raises(ValueError, match=msg):
         mean_tweedie_deviance([0.], [0.], power=power)
 

From f5133920b47070f5b252dc4d8015745195130c44 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Tue, 3 Sep 2019 18:34:08 +0200
Subject: [PATCH 163/269] EXA Tweedie: some improvements

---
 ...lot_tweedie_regression_insurance_claims.py | 56 +++++++++++--------
 1 file changed, 33 insertions(+), 23 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index eb9769814ade5..22a26d880a869 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -13,8 +13,8 @@
 which are:
 
 1. Model the number of claims with a Poisson distribution, the average
-   claim amount as a Gamma distribution and multiply the predictions of both in
-   order to get the total claim amount.
+   claim amount per claim, also known as severity, as a Gamma distribution and
+   multiply the predictions of both in order to get the total claim amount.
 2. Model total claim amount directly, typically with a Tweedie distribution of
    Tweedie power :math:`p \\in (1, 2)`.
 
@@ -42,6 +42,7 @@
 from sklearn.compose import ColumnTransformer
 from sklearn.linear_model import PoissonRegressor, GammaRegressor
 from sklearn.linear_model import TweedieRegressor
+from sklearn.metrics import mean_tweedie_deviance
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
@@ -56,7 +57,8 @@ def load_mtpl2(n_samples=100000):
     Parameters
     ----------
     n_samples: int, default=100000
-      number of samples to select (for faster run time).
+      number of samples to select (for faster run time). Full dataset has
+      678013 samples.
     """
 
     # freMTPL2freq dataset from https://www.openml.org/d/41214
@@ -139,7 +141,7 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 df = load_mtpl2(n_samples=60000)
 
 # Note: filter out claims with zero amount, as the severity model
-# requires a strictly positive target values.
+# requires strictly positive target values.
 df.loc[(df.ClaimAmount == 0) & (df.ClaimNb >= 1), "ClaimNb"] = 0
 
 # Correct for unreasonable observations (that might be data error)
@@ -182,10 +184,10 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 #
 # The number of claims (``ClaimNb``) is a positive integer that can be modeled
 # as a Poisson distribution. It is then assumed to be the number of discrete
-# events occuring with a constant rate in a given time interval (``Exposure``).
-# Here we model the frequency ``y = ClaimNb / Exposure``,
-# which is still a (scaled) Poisson distribution, and use ``Exposure`` as
-# `sample_weight`.
+# events occuring with a constant rate in a given time interval
+# (``Exposure``, in units of years). Here we model the frequency
+# ``y = ClaimNb / Exposure``, which is still a (scaled) Poisson distribution,
+# and use ``Exposure`` as `sample_weight`.
 
 df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=0)
 
@@ -197,7 +199,10 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 
 def mean_deviance(estimator, y, y_pred, weights):
     if hasattr(estimator, "_family_instance"):
-        return estimator._family_instance.deviance(y, y_pred, weights) / len(y)
+        if weights is None:
+            weights = np.ones_like(y)
+        return (estimator._family_instance.deviance(y, y_pred, weights)
+                / np.sum(weights))
     else:
         return np.nan
 
@@ -320,10 +325,10 @@ def score_estimator(
 #
 # According to the observed data, the frequency of accidents is higher for
 # drivers younger than 30 years old, and it positively correlated with the
-# `BonusMalus` variable. Out model is able to mostly correctly model
+# `BonusMalus` variable. Our model is able to mostly correctly model
 # this behaviour.
 #
-# 3. Severity model -  Gamma Distribution
+# 3. Severity model -  Gamma distribution
 # ---------------------------------------
 # The mean claim amount or severity (`AvgClaimAmount`) can be empirically
 # shown to follow approximately a Gamma distribution. We fit a GLM model for
@@ -333,7 +338,7 @@ def score_estimator(
 #
 # - We filter out ``ClaimAmount == 0`` as the Gamma distribution has support
 #   on :math:`(0, \infty)`, not :math:`[0, \infty)`.
-# - We use ``ClaimNb`` as sample weights.
+# - We use ``ClaimNb`` as `sample_weight`.
 
 mask_train = df_train["ClaimAmount"] > 0
 mask_test = df_test["ClaimAmount"] > 0
@@ -360,6 +365,8 @@ def score_estimator(
 
 ##############################################################################
 #
+# Here, the scores for the test data call for caution as they are significantly
+# worse than for the training data indicating an overfit.
 # Note that the resulting model is the average claim amount per claim. As such,
 # it is conditional on having at least one claim, and cannot be used to predict
 # the average claim amount per policy in general.
@@ -412,10 +419,10 @@ def score_estimator(
 
 ##############################################################################
 #
-# Overall the drivers age (``DrivAge``) has a weak impact on the claim
+# Overall, the drivers age (``DrivAge``) has a weak impact on the claim
 # severity, both in observed and predicted data.
 #
-# 4. Total Claims Amount -- Compound Poisson distribution
+# 4. Total claim amount -- Compound Poisson distribution
 # -------------------------------------------------------
 #
 # As mentionned in the introduction, the total claim amount can be modeled
@@ -426,12 +433,16 @@ class ClaimProdEstimator:
     """Total claim amount estimator.
 
     Computed as the product of the frequency model by the serverity model,
-    denormalized by exposure. Use Tweedie deviance with `power=1.5`.
+    denormalized by exposure. For scores, use Tweedie deviance with
+    `power=1.5`.
     """
 
     def __init__(self, est_freq, est_sev):
+        from sklearn.linear_model._glm.distribution import TweedieDistribution
+
         self.est_freq = est_freq
         self.est_sev = est_sev
+        self._family_instance = TweedieDistribution(power=1.5)
 
     def predict(self, X, exposure):
         """Predict the total claim amount.
@@ -442,14 +453,13 @@ def predict(self, X, exposure):
 
     def score(self, X, y, sample_weight=None):
         """Compute D², the percentage of deviance explained."""
-        # TODO: remove this private import once d2_score is available
-        from sklearn.linear_model._glm.distribution import TweedieDistribution
-
+        # TODO: use d2_score directly once it is available
         mu = self.predict(X, exposure=sample_weight)
-        family = TweedieDistribution(power=1.5)
-        dev = family.deviance(y, mu, weights=sample_weight)
-        y_mean = np.average(y, weights=sample_weight)
-        dev_null = family.deviance(y, y_mean, weights=sample_weight)
+        dev = mean_tweedie_deviance(
+            y, mu, sample_weight=sample_weight, power=1.5)
+        y_mean = np.average(y, weights=sample_weight) * np.ones_like(y)
+        dev_null = mean_tweedie_deviance(
+            y, y_mean, sample_weight=sample_weight, power=1.5)
         return 1. - dev / dev_null
 
 
@@ -475,7 +485,7 @@ def score(self, X, y, sample_weight=None):
 
 from sklearn.model_selection import GridSearchCV
 
-# exclude upper bound as power=2 does not support null y values.
+# exclude upper bound as power>=2 does not support y=0.
 params = {"power": np.linspace(1 + 1e-4, 2 - 1e-4, 8)}
 
 
From 84229a6d5fbe6cf9964f573496c66fe8c88bd2ab Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Tue, 3 Sep 2019 21:19:44 +0200
Subject: [PATCH 164/269] Fix doc test

---
 doc/modules/linear_model.rst                                  | 2 +-
 .../linear_model/plot_poisson_regression_non_normal_loss.py   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 29c329f6f333a..ee418af1d414b 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -1004,7 +1004,7 @@ The estimator can be used as follows::
     >>> from sklearn.linear_model import TweedieRegressor
     >>> reg = TweedieRegressor(power=1, alpha=0.5, link='log')
     >>> reg.fit([[0, 0], [0, 1], [2, 2]], [0, 1, 2])
-    TweedieRegressor(alpha=0.5, power=1)
+    TweedieRegressor(alpha=0.5, link='log', power=1)
     >>> reg.coef_
     array([0.2463..., 0.4337...])
     >>> reg.intercept_
diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index b71fba9236ef5..d739c37d2bb60 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -206,8 +206,8 @@ def score_estimator(estimator, df_test, eps=1e-5):
 
 ##############################################################################
 #
-# Finally we will consider a non linear model  with Gradient boosting that
-# still minimizes the least square error. Gradient Boostring Decision Trees do
+# Finally, we will consider a non linear model with Gradient boosting that
+# still minimizes the least square error. Gradient Boosting Decision Trees do
 # not require for categorical data to be one hot encoded, therefore here we use
 # a simpler pre-processing pipeline without ``KBinsDiscretizer`` and with
 # ``OrdinalEncoder`` instead of ``OneHotEncoder``.

From 8c6c255cbad9cae5c82b5154f94f9a6a14cc6b3a Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 11 Sep 2019 15:20:10 +0200
Subject: [PATCH 165/269] Fix test

---
 sklearn/metrics/tests/test_regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index 17bf7f828948c..f29e7d2ad1c13 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -118,7 +118,7 @@ def test_regression_metrics_at_limits():
     power = 1.5
     assert_allclose(mean_tweedie_deviance([0.], [1.], power=power),
                     2 / (2 - power))
-    msg = "only be used on non-negative y_true and strictly positive y_pred."
+    msg = "only be used on non-negative y and strictly positive y_pred."
     with pytest.raises(ValueError, match=msg):
         mean_tweedie_deviance([0.], [0.], power=power)
     power = 2.

From 0a2331385daca833429b82a3493471cb45329ac4 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 12 Sep 2019 17:17:41 +0200
Subject: [PATCH 166/269] EXA Use Ridge and remove eps

---
 ...plot_poisson_regression_non_normal_loss.py | 28 +++++++++++--------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index d739c37d2bb60..4fb16f6419209 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -30,7 +30,7 @@
 from sklearn.datasets import fetch_openml
 from sklearn.dummy import DummyRegressor
 from sklearn.compose import ColumnTransformer
-from sklearn.linear_model import PoissonRegressor, LinearRegression
+from sklearn.linear_model import Ridge, PoissonRegressor
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
@@ -152,22 +152,28 @@ def load_mtpl2(n_samples=100000):
 ##############################################################################
 #
 # The Poisson deviance cannot be computed on negative values predicted by the
-# model, so we set the minimum predicted value to eps,
+# model, so all models need to return positive preditions if we intend to
+# use this metric,
 
 
-def score_estimator(estimator, df_test, eps=1e-5):
+def score_estimator(estimator, df_test):
     """Score an estimatr on the test set"""
 
+    y_pred = estimator.predict(df_test)
+
     print("MSE: %.3f" % mean_squared_error(
-              df_test.Frequency.values, estimator.predict(df_test),
+              df_test.Frequency.values, y_pred,
               df_test.Exposure.values))
     print("MAE: %.3f" % mean_absolute_error(
-              df_test.Frequency.values, estimator.predict(df_test),
+              df_test.Frequency.values, y_pred,
               df_test.Exposure.values))
 
+    # ignore negative predictions
+    mask = y_pred > 0
+
     print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
-            df_test.Frequency.values, np.fmax(estimator.predict(df_test), eps),
-            df_test.Exposure.values))
+            df_test.Frequency.values[mask], y_pred[mask],
+            df_test.Exposure.values[mask]))
 
 
 print("DummyRegressor")
@@ -178,16 +184,16 @@ def score_estimator(estimator, df_test, eps=1e-5):
 # We start by modeling the target variable with the least squares linear
 # regression model,
 
-linregr = make_pipeline(column_trans, LinearRegression())
+linregr = make_pipeline(column_trans, Ridge(alpha=1.0))
 linregr.fit(df_train, df_train.Frequency,
-            linearregression__sample_weight=df_train.Exposure)
+            ridge__sample_weight=df_train.Exposure)
 
 
 print('Number Negatives: %s / total: %s' % (
       (linregr.predict(df_train) < 0).sum(),
       df_train.shape[0]))
 
-print("LinearRegression")
+print("Ridge")
 score_estimator(linregr, df_test)
 
 ##############################################################################
@@ -196,7 +202,7 @@ def score_estimator(estimator, df_test, eps=1e-5):
 
 glm_freq = make_pipeline(
     column_trans,
-    PoissonRegressor(alpha=0, max_iter=1000)
+    PoissonRegressor(alpha=1/df_train.shape[0], max_iter=1000)
 )
 glm_freq.fit(df_train, df_train.Frequency,
              poissonregressor__sample_weight=df_train.Exposure)

From 976b436ebbeff0f6d21f9ae06352cb6bb25c174e Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Mon, 16 Sep 2019 13:26:16 +0200
Subject: [PATCH 167/269] Address comments in
 plot_poisson_regression_non_normal_loss.py

---
 ...plot_poisson_regression_non_normal_loss.py | 73 +++++++++++--------
 1 file changed, 41 insertions(+), 32 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 4fb16f6419209..fa1bc09a76285 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -23,6 +23,8 @@
 # Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
 #          Roman Yurchak <rth.yurchak@gmail.com>
 # License: BSD 3 clause
+import warnings
+
 import numpy as np
 import matplotlib.pyplot as plt
 import pandas as pd
@@ -36,7 +38,7 @@
 from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
 from sklearn.preprocessing import OrdinalEncoder
 from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
-from sklearn.ensemble import GradientBoostingRegressor
+from sklearn.ensemble import RandomForestRegressor
 
 from sklearn.metrics import mean_squared_error, mean_absolute_error
 from sklearn.metrics import mean_poisson_deviance
@@ -149,11 +151,6 @@ def load_mtpl2(n_samples=100000):
 dummy.fit(df_train, df_train.Frequency,
           dummyregressor__sample_weight=df_train.Exposure)
 
-##############################################################################
-#
-# The Poisson deviance cannot be computed on negative values predicted by the
-# model, so all models need to return positive preditions if we intend to
-# use this metric,
 
 
 def score_estimator(estimator, df_test):
@@ -168,11 +165,17 @@ def score_estimator(estimator, df_test):
               df_test.Frequency.values, y_pred,
               df_test.Exposure.values))
 
-    # ignore negative predictions
+    # ignore negative predictions, as they are invalid for
+    # the Poisson deviance
     mask = y_pred > 0
+    if (~mask).any():
+        warnings.warn("estimator yields negative predictions for {} samples "
+                      "out of {}. These will be ignored while computing the "
+                      "poisson deviance".format((~mask).sum(), mask.shape[0]))
 
     print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
-            df_test.Frequency.values[mask], y_pred[mask],
+            df_test.Frequency.values[mask],
+            y_pred[mask],
             df_test.Exposure.values[mask]))
 
 
@@ -184,14 +187,21 @@ def score_estimator(estimator, df_test):
 # We start by modeling the target variable with the least squares linear
 # regression model,
 
-linregr = make_pipeline(column_trans, Ridge(alpha=1.0))
+linregr = make_pipeline(
+    column_trans,
+    Ridge(alpha=1.0)
+)
 linregr.fit(df_train, df_train.Frequency,
             ridge__sample_weight=df_train.Exposure)
 
-
-print('Number Negatives: %s / total: %s' % (
-      (linregr.predict(df_train) < 0).sum(),
-      df_train.shape[0]))
+##############################################################################
+#
+# The Poisson deviance cannot be computed on negative values predicted by the
+# model. For models that do return a few negative predictions
+# (e.g. :class:`linear_model.Ridge`) we ignore the corresponding samples,
+# meaning that the obtained Poisson deviance is approximate. An alternative
+# apporach could be to use class:`compose.TransformedTargetRegressor`
+# meta-estimator to map ``y_pred`` to strictly positive domain.
 
 print("Ridge")
 score_estimator(linregr, df_test)
@@ -212,40 +222,38 @@ def score_estimator(estimator, df_test):
 
 ##############################################################################
 #
-# Finally, we will consider a non linear model with Gradient boosting that
-# still minimizes the least square error. Gradient Boosting Decision Trees do
+# Finally, we will consider a non linear model with a random forest that
+# still minimizes the least square error.  Random forest does
 # not require for categorical data to be one hot encoded, therefore here we use
-# a simpler pre-processing pipeline without ``KBinsDiscretizer`` and with
-# ``OrdinalEncoder`` instead of ``OneHotEncoder``.
+# a simpler pre-processing pipeline with :class:`preprocessing.OrdinalEncoder`,
 
 
 gbr = make_pipeline(
     ColumnTransformer(
         [
             (
-                "Veh_Brand_Gas_Region",
-                OrdinalEncoder(),
+                "Veh_Brand_Gas_Region", OrdinalEncoder(),
                 ["VehBrand", "VehPower", "VehGas", "Region", "Area"],
             ),
-            ("Continious", "passthrough", ["VehAge", "DrivAge", "BonusMalus"]),
-            ("Density_log", make_pipeline(
-                FunctionTransformer(np.log, validate=False), StandardScaler()),
-                ["Density"]),
+            (
+                "Continious", "passthrough",
+                ["VehAge", "DrivAge", "BonusMalus", "Density"]
+            ),
         ],
         remainder="drop",
     ),
-    GradientBoostingRegressor()
+    RandomForestRegressor(min_weight_fraction_leaf=1e-2)
 )
 gbr.fit(df_train, df_train.Frequency.values,
-        gradientboostingregressor__sample_weight=df_train.Exposure.values)
+        randomforestregressor__sample_weight=df_train.Exposure.values)
 
 
-print("GradientBoostingRegressor")
+print("RandomForestRegressor")
 score_estimator(gbr, df_test)
 
 ##############################################################################
 #
-# In this example, although Gradient boosting minimizes the least square error,
+# In this example, although random forest minimizes the least square error,
 # because of a higher predictive power it also results in a smaller Poisson
 # deviance than the Poisson regression model.
 #
@@ -281,11 +289,12 @@ def score_estimator(estimator, df_test):
 # The experimental data presents a long tail distribution for ``y``. In all
 # models we predict the mean expected value, so we will have necessairily fewer
 # extreme values. Additionally normal distribution used in ``Ridge`` and
-# ``GradientBoostingRegressor`` has a constant variance, while for the Poisson
+# ``RandomForestRegressor`` has a constant variance, while for the Poisson
 # distribution used in ``PoissonRegressor``, the variance is proportional to
 # the mean predicted value.
 #
-# Thus, among the considered estimators,
-# ``PoissonRegressor`` and ``GradientBoostingRegressor`` are better suited for
-# modeling the long tail distribution of the data as compared to the ``Ridge``
-# estimator.
+# Thus, among the considered estimators, ``PoissonRegressor`` is better suited
+# for modeling the long tail distribution of the data as compared to the
+# ``Ridge`` and ``RandomForestRegressor`` estimators.
+
+plt.show()

From 7c850d1a7c9a5f468ee0136ee91e2d662aefa4da Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Mon, 16 Sep 2019 13:28:06 +0200
Subject: [PATCH 168/269] Lint

---
 examples/linear_model/plot_poisson_regression_non_normal_loss.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index fa1bc09a76285..5098016f22913 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -152,7 +152,6 @@ def load_mtpl2(n_samples=100000):
           dummyregressor__sample_weight=df_train.Exposure)
 
 
-
 def score_estimator(estimator, df_test):
     """Score an estimatr on the test set"""
 

From f64dc4a4c51128c6d1017911e23760a866ca4007 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Mon, 16 Sep 2019 15:24:30 +0200
Subject: [PATCH 169/269] Simplify plot_tweedie_regression_insurance_claims.py
 example

---
 ...plot_poisson_regression_non_normal_loss.py |  2 -
 ...lot_tweedie_regression_insurance_claims.py | 85 ++++++-------------
 2 files changed, 25 insertions(+), 62 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 5098016f22913..769f321ff1562 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -295,5 +295,3 @@ def score_estimator(estimator, df_test):
 # Thus, among the considered estimators, ``PoissonRegressor`` is better suited
 # for modeling the long tail distribution of the data as compared to the
 # ``Ridge`` and ``RandomForestRegressor`` estimators.
-
-plt.show()
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 22a26d880a869..5a312f656d9ce 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -197,16 +197,6 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 glm_freq.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
 
 
-def mean_deviance(estimator, y, y_pred, weights):
-    if hasattr(estimator, "_family_instance"):
-        if weights is None:
-            weights = np.ones_like(y)
-        return (estimator._family_instance.deviance(y, y_pred, weights)
-                / np.sum(weights))
-    else:
-        return np.nan
-
-
 def score_estimator(
     estimator, X_train, X_test, df_train, df_test, target, weights
 ):
@@ -221,18 +211,25 @@ def score_estimator(
 
         for score_label, metric in [
             ("D² explained", None),
-            ("mean deviance", partial(mean_deviance, estimator)),
+            ("mean deviance", mean_tweedie_deviance),
             ("mean abs. error", mean_absolute_error),
             ("mean squared error", mean_squared_error),
         ]:
-            if estimator.__class__.__name__ == "ClaimProdEstimator":
-                # ClaimProdEstimator is the product of frequency and severity
-                # models, denormalized by the exposure values.
-                # It does not fully follow the scikit-learn API and we
-                # must handle it separately.
-                y_pred = estimator.predict(X, exposure=df.Exposure.values)
+            if isinstance(estimator, tuple) and len(estimator) == 2:
+                # Score the model consisting of the product of frequency and
+                # severity models, denormalized by the exposure values.
+                est_freq, est_sev = estimator
+                y_pred = (df.Exposure.values * est_freq.predict(X)
+                          * est_sev.predict(X))
+                power = 1.5
             else:
                 y_pred = estimator.predict(X)
+                power = getattr(getattr(estimator, "_family_instance"),
+                                "power")
+
+            if score_label == "mean deviance":
+                metric = partial(mean_tweedie_deviance, power=power)
+
             if metric is None:
                 if not hasattr(estimator, "score"):
                     continue
@@ -248,7 +245,8 @@ def score_estimator(
         pd.DataFrame(res)
         .set_index(["metric", "subset"])
         .score.unstack(-1)
-        .round(3)
+        .round(2)
+        .loc[:, ['train', 'test']]
     )
     return res
 
@@ -425,48 +423,16 @@ def score_estimator(
 # 4. Total claim amount -- Compound Poisson distribution
 # -------------------------------------------------------
 #
-# As mentionned in the introduction, the total claim amount can be modeled
+# As mentioned in the introduction, the total claim amount can be modeled
 # either as the product of the frequency model by the severity model,
+# denormalized by exposure. In the following code sample, the
+# ``score_estimator`` is extended to score such a model. The mean deviance
+# is computed assuming a Tweedie distribution with ``power=1.5`` to be
+# comparable with the model from the following section,
 
 
-class ClaimProdEstimator:
-    """Total claim amount estimator.
-
-    Computed as the product of the frequency model by the serverity model,
-    denormalized by exposure. For scores, use Tweedie deviance with
-    `power=1.5`.
-    """
-
-    def __init__(self, est_freq, est_sev):
-        from sklearn.linear_model._glm.distribution import TweedieDistribution
-
-        self.est_freq = est_freq
-        self.est_sev = est_sev
-        self._family_instance = TweedieDistribution(power=1.5)
-
-    def predict(self, X, exposure):
-        """Predict the total claim amount.
-
-        The predict method is not compatible with the scikit-learn API.
-        """
-        return exposure * self.est_freq.predict(X) * self.est_sev.predict(X)
-
-    def score(self, X, y, sample_weight=None):
-        """Compute D², the percentage of deviance explained."""
-        # TODO: use d2_score directly once it is available
-        mu = self.predict(X, exposure=sample_weight)
-        dev = mean_tweedie_deviance(
-            y, mu, sample_weight=sample_weight, power=1.5)
-        y_mean = np.average(y, weights=sample_weight) * np.ones_like(y)
-        dev_null = mean_tweedie_deviance(
-            y, y_mean, sample_weight=sample_weight, power=1.5)
-        return 1. - dev / dev_null
-
-
-est_prod = ClaimProdEstimator(glm_freq, glm_sev)
-
 scores = score_estimator(
-    est_prod,
+    (glm_freq, glm_sev),
     X_train,
     X_test,
     df_train,
@@ -479,7 +445,8 @@ def score(self, X, y, sample_weight=None):
 
 ##############################################################################
 #
-# or as a unique Compound Poisson model, also corresponding to a Tweedie model
+# Indeed, an alternative approach for modeling the total loss is with a unique
+# Compound Poisson model, also corresponding to a Tweedie model
 # with a power :math:`p \in (1, 2)`. We determine the optimal hyperparameter
 # ``p`` with a grid search,
 
@@ -535,7 +502,7 @@ def score(self, X, y, sample_weight=None):
             "subset": subset_label,
             "observed": df.ClaimAmount.values.sum(),
             "predicted, frequency*severity model": np.sum(
-                est_prod.predict(X, exposure=df.Exposure.values)
+                df.Exposure.values*glm_freq.predict(X)*glm_sev.predict(X)
             ),
             "predicted, tweedie, power=%.2f"
             % glm_total.best_estimator_.family.power: np.sum(
@@ -545,5 +512,3 @@ def score(self, X, y, sample_weight=None):
     )
 
 print(pd.DataFrame(res).set_index("subset").T)
-
-plt.show()

From b1f5bde2ea12f7195d5efcf30d3a28fa9db1ef7f Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 18 Sep 2019 16:57:48 +0200
Subject: [PATCH 170/269] Add "lift curve" for model validation in Poisson
 example

---
 ...plot_poisson_regression_non_normal_loss.py | 91 +++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 769f321ff1562..3cdbc7cc1a789 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -39,6 +39,7 @@
 from sklearn.preprocessing import OrdinalEncoder
 from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.utils import gen_batches
 
 from sklearn.metrics import mean_squared_error, mean_absolute_error
 from sklearn.metrics import mean_poisson_deviance
@@ -295,3 +296,93 @@ def score_estimator(estimator, df_test):
 # Thus, among the considered estimators, ``PoissonRegressor`` is better suited
 # for modeling the long tail distribution of the data as compared to the
 # ``Ridge`` and ``RandomForestRegressor`` estimators.
+#
+# To ensure that estimators yield reasonable predictions for different
+# policyholder types, we can bin test samples according to `y_pred` returned by
+# each model. Then for each bin, compare the mean predicted `y_pred`, with
+# the mean observed target.
+
+
+def _lift_curve(y_true, y_pred, sample_weights=None, n_bins=100):
+    """Compare predictions and observations for bins
+    ordered by y_pred
+
+    We order the samples by ``y_pred`` and split it in bins.
+    In each bin the observed mean is compared with the predicted
+    mean.
+
+    Parameters
+    ----------
+    y_true: array-like of shape (n_samples,)
+        Ground truth (correct) target values.
+    y_pred: array-like of shape (n_samples,)
+        Estimated target values.
+    sample_weight : array-like of shape (n_samples,)
+        Sample weights.
+    n_bins: int
+        number of bins to use
+
+    Returns
+    -------
+    bin_centers: ndarray of shape (n_bins,)
+        bin centers
+    y_true_bin: ndarray of shape (n_bins,)
+        average y_pred for each bin
+    y_pred_bin: ndarray of shape (n_bins,)
+        average y_pred for each bin
+    """
+    idx_sort = np.argsort(y_pred)
+
+    bin_centers = np.arange(0, 1, 1/n_bins) + 0.5/n_bins
+
+    y_pred_bin = np.zeros(n_bins)
+    y_true_bin = np.zeros(n_bins)
+    bin_size = len(y_true) // n_bins
+    for n, sl in enumerate(gen_batches(len(y_true), bin_size)):
+        weights = sample_weights[idx_sort][sl]
+        y_pred_bin[n] = np.average(
+               y_pred[idx_sort][sl], weights=weights
+        )
+        y_true_bin[n] = np.average(
+            y_true[idx_sort][sl],
+            weights=weights
+        )
+    return bin_centers, y_true_bin, y_pred_bin
+
+
+fig, ax = plt.subplots(1, 3, figsize=(12, 3.2))
+plt.subplots_adjust(wspace=0.3)
+
+
+for axi, (label, model, color) in zip(ax, [
+        ('Ridge', linregr, 'b'),
+        ('PoissonRegressor', glm_freq, 'k'),
+        ('Random Forest', gbr, 'r')
+]):
+    y_pred = model.predict(df_test)
+
+    q, y_true_seg, y_pred_seg = _lift_curve(
+        df_test.Frequency.values,
+        y_pred,
+        sample_weights=df_test.Exposure.values,
+        n_bins=10)
+
+    axi.plot(q, y_pred_seg, 'o'+color, label="predictions", ms=5)
+    axi.step(q, y_true_seg, '--'+color, label="observations",
+             where='mid')
+    axi.set_xlim(0, 1.0)
+    axi.set(
+        title=label,
+        xlabel='Fraction of samples sorted by y_pred',
+        ylabel='Mean Frequency (y_pred)'
+
+    )
+
+    axi.legend()
+
+
+##############################################################################
+#
+# On the above figure, ``PoissonRegressor`` is the model which presents the
+# best consistency between predicted and observed targets, both for low
+# and high target values.

From a9ab4e4975d3b8b1bf27e73561bbadc921aac5bf Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 25 Sep 2019 19:24:11 +0200
Subject: [PATCH 171/269] Various improvements to the model comparison example

---
 ...plot_poisson_regression_non_normal_loss.py | 153 +++++++++---------
 1 file changed, 79 insertions(+), 74 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 3cdbc7cc1a789..5c044d5530bd8 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -39,7 +39,7 @@
 from sklearn.preprocessing import OrdinalEncoder
 from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
 from sklearn.ensemble import RandomForestRegressor
-from sklearn.utils import gen_batches
+from sklearn.utils import gen_even_slices
 
 from sklearn.metrics import mean_squared_error, mean_absolute_error
 from sklearn.metrics import mean_poisson_deviance
@@ -149,8 +149,8 @@ def load_mtpl2(n_samples=100000):
     column_trans,
     DummyRegressor(strategy='mean')
 )
-dummy.fit(df_train, df_train.Frequency,
-          dummyregressor__sample_weight=df_train.Exposure)
+dummy.fit(df_train, df_train["Frequency"],
+          dummyregressor__sample_weight=df_train["Exposure"])
 
 
 def score_estimator(estimator, df_test):
@@ -159,11 +159,11 @@ def score_estimator(estimator, df_test):
     y_pred = estimator.predict(df_test)
 
     print("MSE: %.3f" % mean_squared_error(
-              df_test.Frequency.values, y_pred,
-              df_test.Exposure.values))
+              df_test["Frequency"], y_pred,
+              df_test["Exposure"]))
     print("MAE: %.3f" % mean_absolute_error(
-              df_test.Frequency.values, y_pred,
-              df_test.Exposure.values))
+              df_test["Frequency"], y_pred,
+              df_test["Exposure"]))
 
     # ignore negative predictions, as they are invalid for
     # the Poisson deviance
@@ -174,12 +174,12 @@ def score_estimator(estimator, df_test):
                       "poisson deviance".format((~mask).sum(), mask.shape[0]))
 
     print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
-            df_test.Frequency.values[mask],
+            df_test["Frequency"][mask],
             y_pred[mask],
-            df_test.Exposure.values[mask]))
+            df_test["Exposure"][mask]))
 
 
-print("DummyRegressor")
+print("Constant mean frequency evaluation:")
 score_estimator(dummy, df_test)
 
 ##############################################################################
@@ -187,12 +187,12 @@ def score_estimator(estimator, df_test):
 # We start by modeling the target variable with the least squares linear
 # regression model,
 
-linregr = make_pipeline(
+ridge = make_pipeline(
     column_trans,
     Ridge(alpha=1.0)
 )
-linregr.fit(df_train, df_train.Frequency,
-            ridge__sample_weight=df_train.Exposure)
+ridge.fit(df_train, df_train["Frequency"],
+          ridge__sample_weight=df_train["Exposure"])
 
 ##############################################################################
 #
@@ -203,32 +203,33 @@ def score_estimator(estimator, df_test):
 # apporach could be to use class:`compose.TransformedTargetRegressor`
 # meta-estimator to map ``y_pred`` to strictly positive domain.
 
-print("Ridge")
-score_estimator(linregr, df_test)
+print("Ridge evaluation:")
+score_estimator(ridge, df_test)
 
 ##############################################################################
 #
 # Next we fit the Poisson regressor on the target variable,
 
-glm_freq = make_pipeline(
+poisson = make_pipeline(
     column_trans,
     PoissonRegressor(alpha=1/df_train.shape[0], max_iter=1000)
 )
-glm_freq.fit(df_train, df_train.Frequency,
-             poissonregressor__sample_weight=df_train.Exposure)
+poisson.fit(df_train, df_train["Frequency"],
+            poissonregressor__sample_weight=df_train["Exposure"])
 
-print("PoissonRegressor")
-score_estimator(glm_freq, df_test)
+print("PoissonRegressor evaluation:")
+score_estimator(poisson, df_test)
 
 ##############################################################################
 #
-# Finally, we will consider a non linear model with a random forest that
-# still minimizes the least square error.  Random forest does
-# not require for categorical data to be one hot encoded, therefore here we use
-# a simpler pre-processing pipeline with :class:`preprocessing.OrdinalEncoder`,
-
-
-gbr = make_pipeline(
+# Finally, we will consider a non-linear model, namely a random forest. Random
+# forests do not require the categorical data to be one-hot encoded, instead
+# we encode each category label with an arbirtrary integer using
+# :class:`preprocessing.OrdinalEncoder` to make the model faster to train (the
+# same information is encoded with a small number of features than with
+# one-hot encoding).
+
+rf = make_pipeline(
     ColumnTransformer(
         [
             (
@@ -242,53 +243,55 @@ def score_estimator(estimator, df_test):
         ],
         remainder="drop",
     ),
-    RandomForestRegressor(min_weight_fraction_leaf=1e-2)
+    RandomForestRegressor(min_weight_fraction_leaf=0.01, n_jobs=2)
 )
-gbr.fit(df_train, df_train.Frequency.values,
-        randomforestregressor__sample_weight=df_train.Exposure.values)
+rf.fit(df_train, df_train["Frequency"].values,
+       randomforestregressor__sample_weight=df_train["Exposure"].values)
+
 
+print("RandomForestRegressor evaluation:")
+score_estimator(rf, df_test)
 
-print("RandomForestRegressor")
-score_estimator(gbr, df_test)
 
 ##############################################################################
 #
-# In this example, although random forest minimizes the least square error,
-# because of a higher predictive power it also results in a smaller Poisson
-# deviance than the Poisson regression model.
+# The random forest model also minimizes the conditional least square error.
+# However because of a higher predictive power it also results in a smaller
+# Poisson deviance than the Poisson regression model.
 #
-# Evaluating models with a single train / test split is prone to numerical
-# errors, we can verify that we would also get equivalent resuts with the
-# cross-validation score.
+# Not that Evaluating models with a single train / test split is prone to
+# random fluctuations. We can verify that we would also get equivalent
+# conclusions with cross-validated performance metrics.
 #
-# The difference between these models can also be visualized by comparing the
+# The qualitative difference between these models can also be visualized by comparing the
 # histogram of observed target values with that of predicted values,
 
 
 fig, axes = plt.subplots(1, 4, figsize=(16, 3))
 fig.subplots_adjust(bottom=0.2)
+n_bins = 20
+df_train["Frequency"].hist(bins=np.linspace(-1, 10, n_bins), ax=axes[0])
 
-df_train.Frequency.hist(bins=np.linspace(-1, 10, 50), ax=axes[0])
+axes[0].set_title("Data")
+axes[0].set_xlabel("y (observed Frequency)")
 
-axes[0].set_title('Experimental data')
-
-for idx, model in enumerate([linregr, glm_freq, gbr]):
+for idx, model in enumerate([ridge, poisson, rf]):
     y_pred = model.predict(df_train)
 
-    pd.Series(y_pred).hist(bins=np.linspace(-1, 8, 50), ax=axes[idx+1])
+    pd.Series(y_pred).hist(bins=np.linspace(-1, 4, n_bins), ax=axes[idx+1])
     axes[idx + 1].set_title(model[-1].__class__.__name__)
 
 for axi in axes:
     axi.set(
         yscale='log',
-        xlabel="y (Frequency)"
+        xlabel="y_pred (predicted expected Frequency)"
     )
 
 ##############################################################################
 #
 # The experimental data presents a long tail distribution for ``y``. In all
-# models we predict the mean expected value, so we will have necessairily fewer
-# extreme values. Additionally normal distribution used in ``Ridge`` and
+# models we predict the mean expected value, so we will have necessairily
+# fewer extreme values. Additionally normal distribution used in ``Ridge`` and
 # ``RandomForestRegressor`` has a constant variance, while for the Poisson
 # distribution used in ``PoissonRegressor``, the variance is proportional to
 # the mean predicted value.
@@ -298,14 +301,13 @@ def score_estimator(estimator, df_test):
 # ``Ridge`` and ``RandomForestRegressor`` estimators.
 #
 # To ensure that estimators yield reasonable predictions for different
-# policyholder types, we can bin test samples according to `y_pred` returned by
-# each model. Then for each bin, compare the mean predicted `y_pred`, with
-# the mean observed target.
+# policyholder types, we can bin test samples according to `y_pred` returned
+# by each model. Then for each bin, compare the mean predicted `y_pred`, with
+# the mean observed target:
 
 
-def _lift_curve(y_true, y_pred, sample_weights=None, n_bins=100):
-    """Compare predictions and observations for bins
-    ordered by y_pred
+def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None, n_bins=100):
+    """Compare predictions and observations for bins ordered by y_pred
 
     We order the samples by ``y_pred`` and split it in bins.
     In each bin the observed mean is compared with the predicted
@@ -332,14 +334,12 @@ def _lift_curve(y_true, y_pred, sample_weights=None, n_bins=100):
         average y_pred for each bin
     """
     idx_sort = np.argsort(y_pred)
-
     bin_centers = np.arange(0, 1, 1/n_bins) + 0.5/n_bins
-
     y_pred_bin = np.zeros(n_bins)
     y_true_bin = np.zeros(n_bins)
-    bin_size = len(y_true) // n_bins
-    for n, sl in enumerate(gen_batches(len(y_true), bin_size)):
-        weights = sample_weights[idx_sort][sl]
+
+    for n, sl in enumerate(gen_even_slices(len(y_true), n_bins)):
+        weights = sample_weight[idx_sort][sl]
         y_pred_bin[n] = np.average(
                y_pred[idx_sort][sl], weights=weights
         )
@@ -350,39 +350,44 @@ def _lift_curve(y_true, y_pred, sample_weights=None, n_bins=100):
     return bin_centers, y_true_bin, y_pred_bin
 
 
-fig, ax = plt.subplots(1, 3, figsize=(12, 3.2))
+fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12, 3.2))
 plt.subplots_adjust(wspace=0.3)
 
-
-for axi, (label, model, color) in zip(ax, [
-        ('Ridge', linregr, 'b'),
-        ('PoissonRegressor', glm_freq, 'k'),
-        ('Random Forest', gbr, 'r')
+for axi, (label, model) in zip(ax, [
+        ('Ridge', ridge),
+        ('PoissonRegressor', poisson),
+        ('Random Forest', rf)
 ]):
     y_pred = model.predict(df_test)
 
-    q, y_true_seg, y_pred_seg = _lift_curve(
-        df_test.Frequency.values,
+    q, y_true_seg, y_pred_seg = _mean_frequency_by_risk_group(
+        df_test["Frequency"].values,
         y_pred,
-        sample_weights=df_test.Exposure.values,
-        n_bins=10)
+        sample_weights=df_test["Exposure"].values,
+        n_bins=5)
 
-    axi.plot(q, y_pred_seg, 'o'+color, label="predictions", ms=5)
-    axi.step(q, y_true_seg, '--'+color, label="observations",
-             where='mid')
+    axi.plot(q, y_pred_seg, marker='o', linestyle="-", label="predictions")
+    axi.plot(q, y_true_seg, marker='x', linestyle="--", label="observations")
     axi.set_xlim(0, 1.0)
+    axi.set_ylim(0, 0.3)
     axi.set(
         title=label,
         xlabel='Fraction of samples sorted by y_pred',
         ylabel='Mean Frequency (y_pred)'
 
     )
-
     axi.legend()
 
 
 ##############################################################################
 #
 # On the above figure, ``PoissonRegressor`` is the model which presents the
-# best consistency between predicted and observed targets, both for low
-# and high target values.
+# best consistency between predicted and observed targets, both for low and
+# high target values.
+#
+# The ridge regression model tends to predict very low expected frequencies
+# that do not match the data.
+#
+# The random forest regression model also tends to exaggerate low predicted
+# frequencies although to a lower extent than ridge. It also tends to
+# exaggerate high frequencies on the other hand.

From be7bb67e2859b6c74854aeb942a008c3947757e2 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 25 Sep 2019 19:31:51 +0200
Subject: [PATCH 172/269] Add cumulated claims plot

---
 ...plot_poisson_regression_non_normal_loss.py | 45 +++++++++++++++++++
 1 file changed, 45 insertions(+)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 5c044d5530bd8..cbfbfc45dd703 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -391,3 +391,48 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None, n_bins=100
 # The random forest regression model also tends to exaggerate low predicted
 # frequencies although to a lower extent than ridge. It also tends to
 # exaggerate high frequencies on the other hand.
+
+
+
+def _cumulated_claims(y_true, y_pred, exposure):
+    idx_sort = np.argsort(y_pred)[::-1]
+    sorted_exposure = exposure[idx_sort]
+    sorted_frequencies = y_true[idx_sort]
+    cumulated_exposure = np.cumsum(sorted_exposure)
+    cumulated_exposure /= cumulated_exposure[-1]
+    cumulated_claims = np.cumsum(sorted_exposure * sorted_frequencies)
+    cumulated_claims /= cumulated_claims[-1]
+    return cumulated_exposure, cumulated_claims
+
+
+fig, ax = plt.subplots(figsize=(8, 8))
+plt.subplots_adjust(wspace=0.3)
+
+for (label, model) in [
+        ('Ridge', ridge),
+        ('PoissonRegressor', poisson),
+        ('Random Forest', rf)
+]:
+    y_pred = model.predict(df_test)
+    cum_exposure, cum_claims = _cumulated_claims(
+        df_test["Frequency"].values,
+        y_pred,
+        df_test["Exposure"].values)
+    ax.plot(cum_exposure, cum_claims, linestyle="-", label=label)
+
+# Oracle model
+cum_exposure, cum_claims = _cumulated_claims(
+    df_test["Frequency"].values,
+    df_test["Frequency"].values,
+    df_test["Exposure"].values)
+ax.plot(cum_exposure, cum_claims, linestyle="-.", color="gray", label="Oracle")
+
+# Random Baseline
+ax.plot([0, 1], [0, 1], linestyle="--", color="black", label="Random baseline")
+ax.set(
+    title="Cumulated claims by model",
+    xlabel='Fraction of cumulated exposure (from riskiest to safest)',
+    ylabel='Fraction of cumulated number of claims'
+
+)
+ax.legend()

From 4125c20c9a5d06a0d3ed8241cceb6aaf582b350f Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 26 Sep 2019 09:05:17 +0200
Subject: [PATCH 173/269] Improve the cumulated nb claims plot

---
 ...plot_poisson_regression_non_normal_loss.py | 44 +++++++++++++++----
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index cbfbfc45dd703..22a4b419cf483 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -263,8 +263,9 @@ def score_estimator(estimator, df_test):
 # random fluctuations. We can verify that we would also get equivalent
 # conclusions with cross-validated performance metrics.
 #
-# The qualitative difference between these models can also be visualized by comparing the
-# histogram of observed target values with that of predicted values,
+# The qualitative difference between these models can also be visualized by
+# comparing the histogram of observed target values with that of predicted
+# values,
 
 
 fig, axes = plt.subplots(1, 4, figsize=(16, 3))
@@ -306,7 +307,8 @@ def score_estimator(estimator, df_test):
 # the mean observed target:
 
 
-def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None, n_bins=100):
+def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
+                                  n_bins=100):
     """Compare predictions and observations for bins ordered by y_pred
 
     We order the samples by ``y_pred`` and split it in bins.
@@ -363,7 +365,7 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None, n_bins=100
     q, y_true_seg, y_pred_seg = _mean_frequency_by_risk_group(
         df_test["Frequency"].values,
         y_pred,
-        sample_weights=df_test["Exposure"].values,
+        sample_weight=df_test["Exposure"].values,
         n_bins=5)
 
     axi.plot(q, y_pred_seg, marker='o', linestyle="-", label="predictions")
@@ -391,11 +393,21 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None, n_bins=100
 # The random forest regression model also tends to exaggerate low predicted
 # frequencies although to a lower extent than ridge. It also tends to
 # exaggerate high frequencies on the other hand.
-
+#
+# However for some business applications we are not necessarily interested in
+# the the ability of the model in predicting the expected frequency value but
+# instead in predicting which customer profiles are the riskiest and which are
+# the safest. In this case the model evaluation would cast the problem as a
+# ranking problem rather than a regression problem.
+#
+# To compare the 3 models under this light on, one can plot the fraction
+# of cumulated number of claims vs the fraction of cumulated of exposure
+# for test samples ordered by the model predictions, from riskiest to safest
+# according to each model:
 
 
 def _cumulated_claims(y_true, y_pred, exposure):
-    idx_sort = np.argsort(y_pred)[::-1]
+    idx_sort = np.argsort(y_pred)[::-1]  # from riskiest to safest
     sorted_exposure = exposure[idx_sort]
     sorted_frequencies = y_true[idx_sort]
     cumulated_exposure = np.cumsum(sorted_exposure)
@@ -420,7 +432,7 @@ def _cumulated_claims(y_true, y_pred, exposure):
         df_test["Exposure"].values)
     ax.plot(cum_exposure, cum_claims, linestyle="-", label=label)
 
-# Oracle model
+# Oracle model: y_pred == y_test
 cum_exposure, cum_claims = _cumulated_claims(
     df_test["Frequency"].values,
     df_test["Frequency"].values,
@@ -433,6 +445,22 @@ def _cumulated_claims(y_true, y_pred, exposure):
     title="Cumulated claims by model",
     xlabel='Fraction of cumulated exposure (from riskiest to safest)',
     ylabel='Fraction of cumulated number of claims'
-
 )
 ax.legend()
+
+##############################################################################
+#
+# This plot reveals that the random forest model is almost uniformly the best
+# at sorting customers by risk profiles even if the absolute value of the
+# predicted expected frequencies are less well calibrated than for the linear
+# Poisson model.
+#
+#
+# All three models are significantly better than chance but also very far from
+# making perfect predictions.
+#
+# This last point is expected due to the nature of the problem: the occurence
+# of accidents is mostly dominated by environmental causes that are not
+# captured in the columns of the dataset.
+
+plt.show()

From 0070d527e7b3399cae9a6bd6447b4de636191481 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 26 Sep 2019 10:41:02 +0200
Subject: [PATCH 174/269] Fix wrong xlabel in histogram plot

---
 .../plot_poisson_regression_non_normal_loss.py             | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 22a4b419cf483..47e08b618758c 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -274,16 +274,15 @@ def score_estimator(estimator, df_test):
 df_train["Frequency"].hist(bins=np.linspace(-1, 10, n_bins), ax=axes[0])
 
 axes[0].set_title("Data")
+axes[0].set_yscale('log')
 axes[0].set_xlabel("y (observed Frequency)")
 
 for idx, model in enumerate([ridge, poisson, rf]):
     y_pred = model.predict(df_train)
 
     pd.Series(y_pred).hist(bins=np.linspace(-1, 4, n_bins), ax=axes[idx+1])
-    axes[idx + 1].set_title(model[-1].__class__.__name__)
-
-for axi in axes:
-    axi.set(
+    axes[idx + 1].set(
+        title=model[-1].__class__.__name__,
         yscale='log',
         xlabel="y_pred (predicted expected Frequency)"
     )

From 9d6bb5258ac604bc523beb1c8b85357344a9929e Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 26 Sep 2019 12:03:51 +0200
Subject: [PATCH 175/269] More example improvements (preprocessors + plots)

---
 ...plot_poisson_regression_non_normal_loss.py | 130 +++++++++---------
 1 file changed, 63 insertions(+), 67 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 47e08b618758c..76f957c57b6da 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -40,6 +40,7 @@
 from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.utils import gen_even_slices
+from sklearn.metrics import auc
 
 from sklearn.metrics import mean_squared_error, mean_absolute_error
 from sklearn.metrics import mean_poisson_deviance
@@ -95,26 +96,33 @@ def load_mtpl2(n_samples=100000):
 df["ClaimNb"] = df["ClaimNb"].clip(upper=4)
 df["Exposure"] = df["Exposure"].clip(upper=1)
 
-column_trans = ColumnTransformer(
+##############################################################################
+#
+# The remaining columns can be used to predict the frequency of claim events.
+# Those columns are very heterogeneous with a mix of categorical and numeric
+# variables with different scales, possibly with heavy tails.
+#
+# In order to fit linear models with those predictors it is therefore
+# necessary to perform standard feature transformation as follows:
+
+log_scale_transformer = make_pipeline(
+    FunctionTransformer(np.log, validate=False),
+    StandardScaler()
+)
+
+linear_model_preprocessor = ColumnTransformer(
     [
-        ("Veh_Driv_Age", KBinsDiscretizer(n_bins=10), ["VehAge", "DrivAge"]),
-        (
-            "Veh_Brand_Gas_Region",
-            OneHotEncoder(),
-            ["VehBrand", "VehPower", "VehGas", "Region", "Area"],
-        ),
-        ("BonusMalus", "passthrough", ["BonusMalus"]),
-        (
-            "Density_log",
-            make_pipeline(
-                FunctionTransformer(np.log, validate=False), StandardScaler()
-            ),
-            ["Density"],
-        ),
+        ("passthrough_numeric", "passthrough",
+            ["BonusMalus"]),
+        ("binned_numeric", KBinsDiscretizer(n_bins=10),
+            ["VehAge", "DrivAge"]),
+        ("log_scaled_numeric", log_scale_transformer,
+            ["Density"]),
+        ("onehot_categorical", OneHotEncoder(),
+            ["VehBrand", "VehPower", "VehGas", "Region", "Area"]),
     ],
     remainder="drop",
 )
-X = column_trans.fit_transform(df)
 
 ##############################################################################
 #
@@ -141,12 +149,13 @@ def load_mtpl2(n_samples=100000):
 # significantly imbalanced.
 #
 # To evaluate the pertinence of the used metrics, we will consider as a
-# baseline an estimator that returns the mean of the training sample.
+# baseline an estimator that constantly predicts the mean frequency of the
+# training sample.
 
 df_train, df_test = train_test_split(df, random_state=0)
 
 dummy = make_pipeline(
-    column_trans,
+    linear_model_preprocessor,
     DummyRegressor(strategy='mean')
 )
 dummy.fit(df_train, df_train["Frequency"],
@@ -187,10 +196,7 @@ def score_estimator(estimator, df_test):
 # We start by modeling the target variable with the least squares linear
 # regression model,
 
-ridge = make_pipeline(
-    column_trans,
-    Ridge(alpha=1.0)
-)
+ridge = make_pipeline(linear_model_preprocessor, Ridge(alpha=1.0))
 ridge.fit(df_train, df_train["Frequency"],
           ridge__sample_weight=df_train["Exposure"])
 
@@ -211,7 +217,7 @@ def score_estimator(estimator, df_test):
 # Next we fit the Poisson regressor on the target variable,
 
 poisson = make_pipeline(
-    column_trans,
+    linear_model_preprocessor,
     PoissonRegressor(alpha=1/df_train.shape[0], max_iter=1000)
 )
 poisson.fit(df_train, df_train["Frequency"],
@@ -229,20 +235,17 @@ def score_estimator(estimator, df_test):
 # same information is encoded with a small number of features than with
 # one-hot encoding).
 
+rf_preprocessor = ColumnTransformer(
+    [
+        ("categorical", OrdinalEncoder(),
+            ["VehBrand", "VehPower", "VehGas", "Region", "Area"]),
+        ("numeric", "passthrough",
+            ["VehAge", "DrivAge", "BonusMalus", "Density"]),
+    ],
+    remainder="drop",
+)
 rf = make_pipeline(
-    ColumnTransformer(
-        [
-            (
-                "Veh_Brand_Gas_Region", OrdinalEncoder(),
-                ["VehBrand", "VehPower", "VehGas", "Region", "Area"],
-            ),
-            (
-                "Continious", "passthrough",
-                ["VehAge", "DrivAge", "BonusMalus", "Density"]
-            ),
-        ],
-        remainder="drop",
-    ),
+    rf_preprocessor,
     RandomForestRegressor(min_weight_fraction_leaf=0.01, n_jobs=2)
 )
 rf.fit(df_train, df_train["Frequency"].values,
@@ -351,14 +354,10 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
     return bin_centers, y_true_bin, y_pred_bin
 
 
-fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12, 3.2))
+fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12, 3.5))
 plt.subplots_adjust(wspace=0.3)
 
-for axi, (label, model) in zip(ax, [
-        ('Ridge', ridge),
-        ('PoissonRegressor', poisson),
-        ('Random Forest', rf)
-]):
+for axi, model in zip(ax, [ridge,  poisson, rf]):
     y_pred = model.predict(df_test)
 
     q, y_true_seg, y_pred_seg = _mean_frequency_by_risk_group(
@@ -372,19 +371,19 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
     axi.set_xlim(0, 1.0)
     axi.set_ylim(0, 0.3)
     axi.set(
-        title=label,
+        title=model[-1].__class__.__name__,
         xlabel='Fraction of samples sorted by y_pred',
         ylabel='Mean Frequency (y_pred)'
 
     )
     axi.legend()
-
+plt.tight_layout()
 
 ##############################################################################
 #
 # On the above figure, ``PoissonRegressor`` is the model which presents the
 # best consistency between predicted and observed targets, both for low and
-# high target values.
+# high predicted target values.
 #
 # The ridge regression model tends to predict very low expected frequencies
 # that do not match the data.
@@ -393,16 +392,16 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
 # frequencies although to a lower extent than ridge. It also tends to
 # exaggerate high frequencies on the other hand.
 #
-# However for some business applications we are not necessarily interested in
-# the the ability of the model in predicting the expected frequency value but
-# instead in predicting which customer profiles are the riskiest and which are
-# the safest. In this case the model evaluation would cast the problem as a
-# ranking problem rather than a regression problem.
+# However, for some business applications, we are not necessarily interested
+# in the the ability of the model in predicting the expected frequency value
+# but instead in predicting which policyholder groups are the riskiest and
+# which are the safest. In this case the model evaluation would cast the
+# problem as a ranking problem rather than a regression problem.
 #
-# To compare the 3 models under this light on, one can plot the fraction
-# of cumulated number of claims vs the fraction of cumulated of exposure
-# for test samples ordered by the model predictions, from riskiest to safest
-# according to each model:
+# To compare the 3 models under this light on, one can plot the fraction of
+# cumulated number of claims vs the fraction of cumulated of exposure for test
+# samples ordered by the model predictions, from riskiest to safest according
+# to each model:
 
 
 def _cumulated_claims(y_true, y_pred, exposure):
@@ -417,18 +416,16 @@ def _cumulated_claims(y_true, y_pred, exposure):
 
 
 fig, ax = plt.subplots(figsize=(8, 8))
-plt.subplots_adjust(wspace=0.3)
 
-for (label, model) in [
-        ('Ridge', ridge),
-        ('PoissonRegressor', poisson),
-        ('Random Forest', rf)
-]:
+for model in [ridge, poisson, rf]:
     y_pred = model.predict(df_test)
     cum_exposure, cum_claims = _cumulated_claims(
         df_test["Frequency"].values,
         y_pred,
         df_test["Exposure"].values)
+    area = auc(cum_exposure, cum_claims)
+    label = "{} (area under curve: {:.3f})".format(
+        model[-1].__class__.__name__, area)
     ax.plot(cum_exposure, cum_claims, linestyle="-", label=label)
 
 # Oracle model: y_pred == y_test
@@ -449,17 +446,16 @@ def _cumulated_claims(y_true, y_pred, exposure):
 
 ##############################################################################
 #
-# This plot reveals that the random forest model is almost uniformly the best
-# at sorting customers by risk profiles even if the absolute value of the
-# predicted expected frequencies are less well calibrated than for the linear
-# Poisson model.
-#
+# This plot reveals that the random forest model is slightly better at ranking
+# policyholders by risk profiles even if the absolute value of the predicted
+# expected frequencies are less well calibrated than for the linear Poisson
+# model.
 #
 # All three models are significantly better than chance but also very far from
 # making perfect predictions.
 #
-# This last point is expected due to the nature of the problem: the occurence
-# of accidents is mostly dominated by environmental causes that are not
+# This last point is expected due to the nature of the problem: the occurrence
+# of accidents is mostly dominated by circumstantial causes that are not
 # captured in the columns of the dataset.
 
 plt.show()

From b353b2dcdb25bd2e55dffaacda1b80856f4b5b78 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 26 Sep 2019 13:14:09 +0200
Subject: [PATCH 176/269] Simplify dataset + use more data

---
 ...plot_poisson_regression_non_normal_loss.py | 99 ++++++++-----------
 1 file changed, 39 insertions(+), 60 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 76f957c57b6da..da2a4bd2dccf8 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -9,10 +9,6 @@
 number of insurance claims (or frequency) following car accidents for a
 policyholder given historical data over a population of policyholders.
 
-We start by defining a few helper functions for loading the data and
-visualizing results.
-
-
 .. [1]  A. Noll, R. Salzmann and M.V. Wuthrich, Case Study: French Motor
     Third-Party Liability Claims (November 8, 2018).
     `doi:10.2139/ssrn.3164764 <http://dx.doi.org/10.2139/ssrn.3164764>`_
@@ -46,7 +42,7 @@
 from sklearn.metrics import mean_poisson_deviance
 
 
-def load_mtpl2(n_samples=100000):
+def load_mtpl2(n_samples=None):
     """Fetcher for French Motor Third-Party Liability Claims dataset
 
     Parameters
@@ -57,43 +53,27 @@ def load_mtpl2(n_samples=100000):
     """
 
     # freMTPL2freq dataset from https://www.openml.org/d/41214
-    df_freq = fetch_openml(data_id=41214, as_frame=True)['data']
-    df_freq['IDpol'] = df_freq['IDpol'].astype(np.int)
-    df_freq.set_index('IDpol', inplace=True)
-
-    # freMTPL2sev dataset from https://www.openml.org/d/41215
-    df_sev = fetch_openml(data_id=41215, as_frame=True)['data']
-
-    # sum ClaimAmount over identical IDs
-    df_sev = df_sev.groupby('IDpol').sum()
-
-    df = df_freq.join(df_sev, how="left")
-    df["ClaimAmount"].fillna(0, inplace=True)
+    df = fetch_openml(data_id=41214, as_frame=True)['data']
 
     # unquote string fields
     for column_name in df.columns[df.dtypes.values == np.object]:
         df[column_name] = df[column_name].str.strip("'")
-    return df.iloc[:n_samples]
+    if n_samples is not None:
+        return df.iloc[:n_samples]
+    return df
 
 
 ##############################################################################
 #
-# 1. Loading datasets and pre-processing
-# --------------------------------------
+# Let's load the motor claim dataset. We ignore the severity data for this
+# study for the sake of simplicitly.
 #
-# We construct the freMTPL2 dataset by joining the freMTPL2freq table,
-# containing the number of claims (``ClaimNb``) with the freMTPL2sev table
-# containing the claim amount (``ClaimAmount``) for the same policy ids
-# (``IDpol``).
-
-df = load_mtpl2(n_samples=50000)
+# We also subsample the data for the sake of computational cost and running
+# time. Using the full dataset would lead to similar conclusions.
 
-# Note: filter out claims with zero amount, as the severity model
-# requires strictly positive target values.
-df.loc[(df.ClaimAmount == 0) & (df.ClaimNb >= 1), "ClaimNb"] = 0
+df = load_mtpl2(n_samples=300000)
 
-# correct for unreasonable observations (that might be data error)
-df["ClaimNb"] = df["ClaimNb"].clip(upper=4)
+# Correct for unreasonable observations (that might be data error)
 df["Exposure"] = df["Exposure"].clip(upper=1)
 
 ##############################################################################
@@ -133,14 +113,14 @@ def load_mtpl2(n_samples=100000):
 # ``y = ClaimNb / Exposure``, which is still a (scaled) Poisson distribution,
 # and use ``Exposure`` as `sample_weight`.
 
-df["Frequency"] = df.ClaimNb / df.Exposure
+df["Frequency"] = df["ClaimNb"] / df["Exposure"]
 
 print(
-   pd.cut(df.Frequency, [-1e-6, 1e-6, 1, 2, 3, 4, 5]).value_counts()
+   pd.cut(df["Frequency"], [-1e-6, 1e-6, 1, 2, 3, 4, 5]).value_counts()
 )
 
 print("Average Frequency = {}"
-      .format(np.average(df.Frequency, weights=df.Exposure)))
+      .format(np.average(df["Frequency"], weights=df["Exposure"])))
 
 ##############################################################################
 #
@@ -262,13 +242,13 @@ def score_estimator(estimator, df_test):
 # However because of a higher predictive power it also results in a smaller
 # Poisson deviance than the Poisson regression model.
 #
-# Not that Evaluating models with a single train / test split is prone to
-# random fluctuations. We can verify that we would also get equivalent
-# conclusions with cross-validated performance metrics.
+# Evaluating models with a single train / test split is prone to random
+# fluctuations. If computation resources allow, it should be verified that
+# cross-validated performance metrics would lead to similar conclusions.
 #
 # The qualitative difference between these models can also be visualized by
 # comparing the histogram of observed target values with that of predicted
-# values,
+# values:
 
 
 fig, axes = plt.subplots(1, 4, figsize=(16, 3))
@@ -293,8 +273,8 @@ def score_estimator(estimator, df_test):
 ##############################################################################
 #
 # The experimental data presents a long tail distribution for ``y``. In all
-# models we predict the mean expected value, so we will have necessairily
-# fewer extreme values. Additionally normal distribution used in ``Ridge`` and
+# models we predict the mean expected value, so we will have necessarily fewer
+# extreme values. Additionally normal distribution used in ``Ridge`` and
 # ``RandomForestRegressor`` has a constant variance, while for the Poisson
 # distribution used in ``PoissonRegressor``, the variance is proportional to
 # the mean predicted value.
@@ -364,12 +344,12 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
         df_test["Frequency"].values,
         y_pred,
         sample_weight=df_test["Exposure"].values,
-        n_bins=5)
+        n_bins=10)
 
     axi.plot(q, y_pred_seg, marker='o', linestyle="-", label="predictions")
     axi.plot(q, y_true_seg, marker='x', linestyle="--", label="observations")
     axi.set_xlim(0, 1.0)
-    axi.set_ylim(0, 0.3)
+    axi.set_ylim(0, 0.6)
     axi.set(
         title=model[-1].__class__.__name__,
         xlabel='Fraction of samples sorted by y_pred',
@@ -381,16 +361,13 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
 
 ##############################################################################
 #
-# On the above figure, ``PoissonRegressor`` is the model which presents the
-# best consistency between predicted and observed targets, both for low and
-# high predicted target values.
-#
-# The ridge regression model tends to predict very low expected frequencies
-# that do not match the data.
+# The ``Ridge`` regression model can predict very low expected frequencies
+# that do not match the data. It can therefore severly under-estimate the risk
+# for some policyholders.
 #
-# The random forest regression model also tends to exaggerate low predicted
-# frequencies although to a lower extent than ridge. It also tends to
-# exaggerate high frequencies on the other hand.
+# ``PoissonRegressor`` and ``RandomForestRegressor`` show better consistency
+# between predicted and observed targets, especially for low predicted target
+# values.
 #
 # However, for some business applications, we are not necessarily interested
 # in the the ability of the model in predicting the expected frequency value
@@ -399,9 +376,8 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
 # problem as a ranking problem rather than a regression problem.
 #
 # To compare the 3 models under this light on, one can plot the fraction of
-# cumulated number of claims vs the fraction of cumulated of exposure for test
-# samples ordered by the model predictions, from riskiest to safest according
-# to each model:
+# the number of claims vs the fraction of exposure for test samples ordered by
+# the model predictions, from riskiest to safest according to each model:
 
 
 def _cumulated_claims(y_true, y_pred, exposure):
@@ -433,16 +409,19 @@ def _cumulated_claims(y_true, y_pred, exposure):
     df_test["Frequency"].values,
     df_test["Frequency"].values,
     df_test["Exposure"].values)
-ax.plot(cum_exposure, cum_claims, linestyle="-.", color="gray", label="Oracle")
+area = auc(cum_exposure, cum_claims)
+label = "Oracle (area under curve: {:.3f})".format(area)
+ax.plot(cum_exposure, cum_claims, linestyle="-.", color="gray", label=label)
 
 # Random Baseline
-ax.plot([0, 1], [0, 1], linestyle="--", color="black", label="Random baseline")
+ax.plot([0, 1], [0, 1], linestyle="--", color="black",
+        label="Random baseline")
 ax.set(
-    title="Cumulated claims by model",
-    xlabel='Fraction of cumulated exposure (from riskiest to safest)',
-    ylabel='Fraction of cumulated number of claims'
+    title="Cumulated number of claims by model",
+    xlabel='Fraction of exposure (from riskiest to safest)',
+    ylabel='Fraction of number of claims'
 )
-ax.legend()
+ax.legend(loc="lower right")
 
 ##############################################################################
 #

From 88757fdb99cc516be230fe08ec1ebfb7bea0b694 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 26 Sep 2019 15:59:36 +0200
Subject: [PATCH 177/269] Remove solver parameter from
 {Poisson,Gamma,Tweedie}Regression

---
 sklearn/linear_model/_glm/glm.py | 54 +++++++++++---------------------
 sklearn/neighbors/base.py        |  8 ++---
 2 files changed, 22 insertions(+), 40 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index d6da8b8b80949..f7985c0f3bae3 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -80,7 +80,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
             Calls scipy's L-BFGS-B optimizer.
 
     max_iter : int, optional (default=100)
-        The maximal number of iterations for solver algorithms.
+        The maximal number of iterations for the solver.
 
     tol : float, optional (default=1e-4)
         Stopping criterion. For the lbfgs solver,
@@ -113,7 +113,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         Intercept (a.k.a. bias) added to linear predictor.
 
     n_iter_ : int
-        Actual number of iterations used in solver.
+        Actual number of iterations used in the solver.
     """
     def __init__(self, alpha=1.0,
                  fit_intercept=True, family='normal', link='auto',
@@ -423,14 +423,8 @@ class PoissonRegressor(GeneralizedLinearRegressor):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
-    solver : {'lbfgs'}, optional (default='lbfgs')
-        Algorithm to use in the optimization problem:
-
-        'lbfgs'
-            Calls scipy's L-BFGS-B optimizer.
-
     max_iter : int, optional (default=100)
-        The maximal number of iterations for solver algorithms.
+        The maximal number of iterations for the solver.
 
     tol : float, optional (default=1e-4)
         Stopping criterion. For the lbfgs solver,
@@ -458,16 +452,16 @@ class PoissonRegressor(GeneralizedLinearRegressor):
         Intercept (a.k.a. bias) added to linear predictor.
 
     n_iter_ : int
-        Actual number of iterations used in solver.
+        Actual number of iterations used in the solver.
     """
     def __init__(self, alpha=1.0, fit_intercept=True,
-                 solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
+                 max_iter=100, tol=1e-4, warm_start=False,
                  copy_X=True, check_input=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
-                         family="poisson", link='log',
-                         solver=solver, max_iter=max_iter, tol=tol,
-                         warm_start=warm_start, copy_X=copy_X, verbose=verbose)
+                         family="poisson", link='log', max_iter=max_iter,
+                         tol=tol, warm_start=warm_start, copy_X=copy_X,
+                         verbose=verbose)
 
     @property
     def family(self):
@@ -511,14 +505,8 @@ class GammaRegressor(GeneralizedLinearRegressor):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
-    solver : {'lbfgs'}, optional (default='lbfgs')
-        Algorithm to use in the optimization problem:
-
-        'lbfgs'
-            Calls scipy's L-BFGS-B optimizer.
-
     max_iter : int, optional (default=100)
-        The maximal number of iterations for solver algorithms.
+        The maximal number of iterations for the solver.
 
     tol : float, optional (default=1e-4)
         Stopping criterion. For the lbfgs solver,
@@ -546,16 +534,16 @@ class GammaRegressor(GeneralizedLinearRegressor):
         Intercept (a.k.a. bias) added to linear predictor.
 
     n_iter_ : int
-        Actual number of iterations used in solver.
+        Actual number of iterations used in the solver.
     """
-    def __init__(self, alpha=1.0, fit_intercept=True, solver='lbfgs',
+    def __init__(self, alpha=1.0, fit_intercept=True,
                  max_iter=100, tol=1e-4, warm_start=False,
                  copy_X=True, check_input=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
-                         family="gamma", link='log',
-                         solver=solver, max_iter=max_iter, tol=tol,
-                         warm_start=warm_start, copy_X=copy_X, verbose=verbose)
+                         family="gamma", link='log', max_iter=max_iter,
+                         tol=tol, warm_start=warm_start, copy_X=copy_X,
+                         verbose=verbose)
 
     @property
     def family(self):
@@ -632,14 +620,8 @@ class TweedieRegressor(GeneralizedLinearRegressor):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
-    solver : {'lbfgs'}, optional (default='lbfgs')
-        Algorithm to use in the optimization problem:
-
-        'lbfgs'
-            Calls scipy's L-BFGS-B optimizer.
-
     max_iter : int, optional (default=100)
-        The maximal number of iterations for solver algorithms.
+        The maximal number of iterations for the solver.
 
     tol : float, optional (default=1e-4)
         Stopping criterion. For the lbfgs solver,
@@ -666,15 +648,15 @@ class TweedieRegressor(GeneralizedLinearRegressor):
         Intercept (a.k.a. bias) added to linear predictor.
 
     n_iter_ : int
-        Actual number of iterations used in solver.
+        Actual number of iterations used in the solver.
     """
     def __init__(self, power=0.0, alpha=1.0, fit_intercept=True,
-                 link='auto', solver='lbfgs', max_iter=100, tol=1e-4,
+                 link='auto', max_iter=100, tol=1e-4,
                  warm_start=False, copy_X=True, check_input=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family=TweedieDistribution(power=power), link=link,
-                         solver=solver, max_iter=max_iter, tol=tol,
+                         max_iter=max_iter, tol=tol,
                          warm_start=warm_start, copy_X=copy_X, verbose=verbose)
 
     @property
diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index 9548a619b0b14..d178d607d3636 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -289,13 +289,13 @@ def _pairwise(self):
         return self.metric == 'precomputed'
 
 
-def _tree_query_parallel_helper(tree, data, n_neighbors, return_distance):
+def _tree_query_parallel_helper(tree, data, n_neighbors, return_distance, **kwargs):
     """Helper for the Parallel calls in KNeighborsMixin.kneighbors
 
     The Cython method tree.query is not directly picklable by cloudpickle
     under PyPy.
     """
-    return tree.query(data, n_neighbors, return_distance)
+    return tree.query(data, n_neighbors, return_distance, **kwargs)
 
 
 class KNeighborsMixin:
@@ -336,7 +336,7 @@ def _kneighbors_reduce_func(self, dist, start,
             result = neigh_ind
         return result
 
-    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
+    def kneighbors(self, X=None, n_neighbors=None, return_distance=True, **kwargs):
         """Finds the K-neighbors of a point.
         Returns indices of and distances to the neighbors of each point.
 
@@ -458,7 +458,7 @@ class from an array representing our data set and ask who's
                 parallel_kwargs = {"prefer": "threads"}
             result = Parallel(n_jobs, **parallel_kwargs)(
                 delayed_query(
-                    self._tree, X[s], n_neighbors, return_distance)
+                    self._tree, X[s], n_neighbors, return_distance, **kwargs)
                 for s in gen_even_slices(X.shape[0], n_jobs)
             )
         else:

From 6d119d43577e46f1a0dde29980df20434a920739 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 26 Sep 2019 16:02:15 +0200
Subject: [PATCH 178/269] Revert some accidental changes from
 88757fdb99cc516be230fe08ec1ebfb7bea0b694.

---
 sklearn/neighbors/base.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py
index d178d607d3636..9548a619b0b14 100644
--- a/sklearn/neighbors/base.py
+++ b/sklearn/neighbors/base.py
@@ -289,13 +289,13 @@ def _pairwise(self):
         return self.metric == 'precomputed'
 
 
-def _tree_query_parallel_helper(tree, data, n_neighbors, return_distance, **kwargs):
+def _tree_query_parallel_helper(tree, data, n_neighbors, return_distance):
     """Helper for the Parallel calls in KNeighborsMixin.kneighbors
 
     The Cython method tree.query is not directly picklable by cloudpickle
     under PyPy.
     """
-    return tree.query(data, n_neighbors, return_distance, **kwargs)
+    return tree.query(data, n_neighbors, return_distance)
 
 
 class KNeighborsMixin:
@@ -336,7 +336,7 @@ def _kneighbors_reduce_func(self, dist, start,
             result = neigh_ind
         return result
 
-    def kneighbors(self, X=None, n_neighbors=None, return_distance=True, **kwargs):
+    def kneighbors(self, X=None, n_neighbors=None, return_distance=True):
         """Finds the K-neighbors of a point.
         Returns indices of and distances to the neighbors of each point.
 
@@ -458,7 +458,7 @@ class from an array representing our data set and ask who's
                 parallel_kwargs = {"prefer": "threads"}
             result = Parallel(n_jobs, **parallel_kwargs)(
                 delayed_query(
-                    self._tree, X[s], n_neighbors, return_distance, **kwargs)
+                    self._tree, X[s], n_neighbors, return_distance)
                 for s in gen_even_slices(X.shape[0], n_jobs)
             )
         else:

From b735eb786ca9ef03a9bbfc25851e62a4fbb71f3b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 26 Sep 2019 16:23:12 +0200
Subject: [PATCH 179/269] Additional comment about the use of properties with
 setters

---
 sklearn/linear_model/_glm/distribution.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/linear_model/_glm/distribution.py
index a5e42bcee5d1c..5f9e9ed06847c 100644
--- a/sklearn/linear_model/_glm/distribution.py
+++ b/sklearn/linear_model/_glm/distribution.py
@@ -245,6 +245,9 @@ def power(self):
 
     @power.setter
     def power(self, power):
+        # We use a property with a setter, to update lower and
+        # upper bound when the power parameter is updated e.g. in grid
+        # search.
         if not isinstance(power, numbers.Real):
             raise TypeError('power must be a real number, input was {0}'
                             .format(power))

From 2d911143067a9ab0fb206cbbb0b13fa228955969 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 26 Sep 2019 16:43:13 +0200
Subject: [PATCH 180/269] Add additional tests for link derivatives

---
 sklearn/linear_model/_glm/tests/test_link.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_link.py b/sklearn/linear_model/_glm/tests/test_link.py
index 36219e09b58e3..27ec4ed19bdc2 100644
--- a/sklearn/linear_model/_glm/tests/test_link.py
+++ b/sklearn/linear_model/_glm/tests/test_link.py
@@ -4,6 +4,7 @@
 import numpy as np
 from numpy.testing import assert_allclose
 import pytest
+from scipy.optimize import check_grad
 
 from sklearn.linear_model._glm.link import (
     IdentityLink,
@@ -15,12 +16,12 @@
 LINK_FUNCTIONS = [IdentityLink, LogLink, LogitLink]
 
 
-@pytest.mark.parametrize('link', LINK_FUNCTIONS)
-def test_link_properties(link):
+@pytest.mark.parametrize('Link', LINK_FUNCTIONS)
+def test_link_properties(Link):
     """Test link inverse and derivative."""
     rng = np.random.RandomState(42)
     x = rng.rand(100) * 100
-    link = link()  # instantiate object
+    link = Link()
     if isinstance(link, LogitLink):
         # careful for large x, note expit(36) = 1
         # limit max eta to 15
@@ -30,3 +31,15 @@ def test_link_properties(link):
     # g = link, h = link.inverse
     assert_allclose(link.derivative(link.inverse(x)),
                     1 / link.inverse_derivative(x))
+
+
+@pytest.mark.parametrize('Link', LINK_FUNCTIONS)
+def test_link_derivative(Link):
+    link = Link()
+    x = np.random.RandomState(0).rand(1)
+    err = check_grad(link, link.derivative, x) / link.derivative(x)
+    assert abs(err) < 1e-6
+
+    err = (check_grad(link.inverse, link.inverse_derivative, x)
+           / link.derivative(x))
+    assert abs(err) < 1e-6

From 89103bc417646864a1aa85c616a3148cb26ac2ed Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Sun, 29 Sep 2019 23:06:18 +0200
Subject: [PATCH 181/269] cosmits + typos

---
 doc/modules/linear_model.rst                  |  4 +--
 ...plot_poisson_regression_non_normal_loss.py | 36 +++++++++----------
 ...lot_tweedie_regression_insurance_claims.py |  6 ++--
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index a9b2e66599537..e53f309076b3b 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -996,8 +996,8 @@ of the unit variance function:
      weights s=exposure (time, money, persons years, ...). Then you fit
      y = z/s, i.e. ``PoissonRegressor.fit(X, y, sample_weight=s)``.
      The weights are necessary for the right (finite sample) mean.
-     Consider :math:`\bar{y} = \frac{\\sum_i s_i y_i}{\sum_i s_i}`,
-     in this case one might say that y has a 'scaled' Poisson distributions.
+     Considering :math:`\bar{y} = \frac{\\sum_i s_i y_i}{\sum_i s_i}`,
+     in this case one might say that y has a 'scaled' Poisson distribution.
      The same holds for other distributions.
 
 The estimator can be used as follows::
diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index da2a4bd2dccf8..d99654cf04080 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -129,8 +129,8 @@ def load_mtpl2(n_samples=None):
 # significantly imbalanced.
 #
 # To evaluate the pertinence of the used metrics, we will consider as a
-# baseline an estimator that constantly predicts the mean frequency of the
-# training sample.
+# baseline a "dummy" estimator that constantly predicts the mean frequency of
+# the training sample.
 
 df_train, df_test = train_test_split(df, random_state=0)
 
@@ -143,16 +143,16 @@ def load_mtpl2(n_samples=None):
 
 
 def score_estimator(estimator, df_test):
-    """Score an estimatr on the test set"""
+    """Score an estimator on the test set"""
 
     y_pred = estimator.predict(df_test)
 
-    print("MSE: %.3f" % mean_squared_error(
-              df_test["Frequency"], y_pred,
-              df_test["Exposure"]))
-    print("MAE: %.3f" % mean_absolute_error(
-              df_test["Frequency"], y_pred,
-              df_test["Exposure"]))
+    print("MSE: %.3f" %
+          mean_squared_error(df_test["Frequency"], y_pred,
+                             df_test["Exposure"]))
+    print("MAE: %.3f" %
+          mean_absolute_error(df_test["Frequency"], y_pred,
+                              df_test["Exposure"]))
 
     # ignore negative predictions, as they are invalid for
     # the Poisson deviance
@@ -160,12 +160,12 @@ def score_estimator(estimator, df_test):
     if (~mask).any():
         warnings.warn("estimator yields negative predictions for {} samples "
                       "out of {}. These will be ignored while computing the "
-                      "poisson deviance".format((~mask).sum(), mask.shape[0]))
+                      "Poisson deviance".format((~mask).sum(), mask.shape[0]))
 
-    print("mean Poisson deviance: %.3f" % mean_poisson_deviance(
-            df_test["Frequency"][mask],
-            y_pred[mask],
-            df_test["Exposure"][mask]))
+    print("mean Poisson deviance: %.3f" %
+          mean_poisson_deviance(df_test["Frequency"][mask],
+                                y_pred[mask],
+                                df_test["Exposure"][mask]))
 
 
 print("Constant mean frequency evaluation:")
@@ -285,8 +285,8 @@ def score_estimator(estimator, df_test):
 #
 # To ensure that estimators yield reasonable predictions for different
 # policyholder types, we can bin test samples according to `y_pred` returned
-# by each model. Then for each bin, compare the mean predicted `y_pred`, with
-# the mean observed target:
+# by each model. Then for each bin, we compare the mean predicted `y_pred`,
+# with the mean observed target:
 
 
 def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
@@ -325,7 +325,7 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
     for n, sl in enumerate(gen_even_slices(len(y_true), n_bins)):
         weights = sample_weight[idx_sort][sl]
         y_pred_bin[n] = np.average(
-               y_pred[idx_sort][sl], weights=weights
+            y_pred[idx_sort][sl], weights=weights
         )
         y_true_bin[n] = np.average(
             y_true[idx_sort][sl],
@@ -337,7 +337,7 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
 fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(12, 3.5))
 plt.subplots_adjust(wspace=0.3)
 
-for axi, model in zip(ax, [ridge,  poisson, rf]):
+for axi, model in zip(ax, [ridge, poisson, rf]):
     y_pred = model.predict(df_test)
 
     q, y_true_seg, y_pred_seg = _mean_frequency_by_risk_group(
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 5a312f656d9ce..1c50541fcd85c 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -98,7 +98,7 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
     predicted : frame
         a dataframe, with the same index as df, with the predicted target
     fill_legend : bool, default=False
-        wgether to show fill_between legend
+        whether to show fill_between legend
     """
     # aggregate observed and predicted variables by feature level
     df_ = df.loc[:, [feature, weight]].copy()
@@ -219,8 +219,8 @@ def score_estimator(
                 # Score the model consisting of the product of frequency and
                 # severity models, denormalized by the exposure values.
                 est_freq, est_sev = estimator
-                y_pred = (df.Exposure.values * est_freq.predict(X)
-                          * est_sev.predict(X))
+                y_pred = (df.Exposure.values * est_freq.predict(X) *
+                          est_sev.predict(X))
                 power = 1.5
             else:
                 y_pred = estimator.predict(X)

From 4f28a44bff0a7d84de13f66e0d2b5264897f6c95 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Mon, 30 Sep 2019 11:20:25 +0200
Subject: [PATCH 182/269] Address some of Alex's comments

- Link -> BaseLink
- Removed reference to none existing notes
- Use X.dtype for dtype of y
- remove check_input
---
 sklearn/linear_model/_glm/glm.py            | 51 ++++++++-------------
 sklearn/linear_model/_glm/link.py           |  8 ++--
 sklearn/linear_model/_glm/tests/test_glm.py |  2 +-
 3 files changed, 25 insertions(+), 36 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index f7985c0f3bae3..819e36e13addf 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -21,7 +21,7 @@
         EDM_DISTRIBUTIONS
 )
 from .link import (
-        Link,
+        BaseLink,
         IdentityLink,
         LogLink,
 )
@@ -47,10 +47,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     ----------
     alpha : float, optional (default=1)
         Constant that multiplies the penalty terms and thus determines the
-        regularization strength.
-        See the notes for the exact mathematical meaning of this
-        parameter. ``alpha = 0`` is equivalent to unpenalized GLMs. In this
-        case, the design matrix X must have full column rank
+        regularization strength.  ``alpha = 0`` is equivalent to unpenalized
+        GLMs. In this case, the design matrix X must have full column rank
         (no collinearities).
 
     fit_intercept : boolean, optional (default=True)
@@ -63,7 +61,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         The distributional assumption of the GLM, i.e. which distribution from
         the EDM, specifies the loss function to be minimized.
 
-    link : {'auto', 'identity', 'log'} or an instance of class Link, \
+    link : {'auto', 'identity', 'log'} or an instance of class BaseLink, \
             optional (default='auto')
         The link function of the GLM, i.e. mapping from linear predictor
         (X*coef) to expectation (y_pred). Option 'auto' sets the link
@@ -167,8 +165,8 @@ def fit(self, X, y, sample_weight=None):
                 "; got (family={0})".format(self.family))
 
         # Guarantee that self._link_instance is set to an instance of
-        # class Link
-        if isinstance(self.link, Link):
+        # class BaseLink
+        if isinstance(self.link, BaseLink):
             self._link_instance = self.link
         else:
             if self.link == 'auto':
@@ -227,7 +225,7 @@ def fit(self, X, y, sample_weight=None):
         X, y = check_X_y(X, y, accept_sparse=['csc', 'csr'],
                          dtype=[np.float64, np.float32],
                          y_numeric=True, multi_output=False, copy=self.copy_X)
-        y = np.asarray(y, dtype=np.float64)
+        y = np.asarray(y, dtype=X.dtype)
 
         weights = _check_sample_weight(sample_weight, X)
 
@@ -247,8 +245,7 @@ def fit(self, X, y, sample_weight=None):
         # deviance = sum(sample_weight * unit_deviance),
         # we rescale weights such that sum(weights) = 1 and this becomes
         # 1/2*deviance + L2 with deviance=sum(weights * unit_deviance)
-        weights_sum = np.sum(weights)
-        weights = weights / weights_sum
+        weights = weights / weights.sum()
 
         if self.warm_start and hasattr(self, 'coef_'):
             if self.fit_intercept:
@@ -318,7 +315,7 @@ def _linear_predictor(self, X):
         """
         check_is_fitted(self)
         X = check_array(X, accept_sparse=['csr', 'csc', 'coo'],
-                        dtype='numeric', ensure_2d=True,
+                        dtype=[np.float64, np.float32], ensure_2d=True,
                         allow_nd=False)
         return X @ self.coef_ + self.intercept_
 
@@ -413,10 +410,8 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     ----------
     alpha : float, optional (default=1)
         Constant that multiplies the penalty terms and thus determines the
-        regularization strength.
-        See the notes for the exact mathematical meaning of this
-        parameter. ``alpha = 0`` is equivalent to unpenalized GLMs. In this
-        case, the design matrix X must have full column rank
+        regularization strength.  ``alpha = 0`` is equivalent to unpenalized
+        GLMs. In this case, the design matrix X must have full column rank
         (no collinearities).
 
     fit_intercept : boolean, optional (default=True)
@@ -454,9 +449,8 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     n_iter_ : int
         Actual number of iterations used in the solver.
     """
-    def __init__(self, alpha=1.0, fit_intercept=True,
-                 max_iter=100, tol=1e-4, warm_start=False,
-                 copy_X=True, check_input=True, verbose=0):
+    def __init__(self, alpha=1.0, fit_intercept=True, max_iter=100, tol=1e-4,
+                 warm_start=False, copy_X=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family="poisson", link='log', max_iter=max_iter,
@@ -495,10 +489,8 @@ class GammaRegressor(GeneralizedLinearRegressor):
     ----------
     alpha : float, optional (default=1)
         Constant that multiplies the penalty terms and thus determines the
-        regularization strength.
-        See the notes for the exact mathematical meaning of this
-        parameter. ``alpha = 0`` is equivalent to unpenalized GLMs. In this
-        case, the design matrix X must have full column rank
+        regularization strength.  ``alpha = 0`` is equivalent to unpenalized
+        GLMs. In this case, the design matrix X must have full column rank
         (no collinearities).
 
     fit_intercept : boolean, optional (default=True)
@@ -536,9 +528,8 @@ class GammaRegressor(GeneralizedLinearRegressor):
     n_iter_ : int
         Actual number of iterations used in the solver.
     """
-    def __init__(self, alpha=1.0, fit_intercept=True,
-                 max_iter=100, tol=1e-4, warm_start=False,
-                 copy_X=True, check_input=True, verbose=0):
+    def __init__(self, alpha=1.0, fit_intercept=True, max_iter=100, tol=1e-4,
+                 warm_start=False, copy_X=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family="gamma", link='log', max_iter=max_iter,
@@ -601,10 +592,8 @@ class TweedieRegressor(GeneralizedLinearRegressor):
 
     alpha : float, optional (default=1)
         Constant that multiplies the penalty terms and thus determines the
-        regularization strength.
-        See the notes for the exact mathematical meaning of this
-        parameter.``alpha = 0`` is equivalent to unpenalized GLMs. In this
-        case, the design matrix X must have full column rank
+        regularization strength.  ``alpha = 0`` is equivalent to unpenalized
+        GLMs. In this case, the design matrix X must have full column rank
         (no collinearities).
 
     link : {'auto', 'identity', 'log'}, default='auto'
@@ -652,7 +641,7 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     """
     def __init__(self, power=0.0, alpha=1.0, fit_intercept=True,
                  link='auto', max_iter=100, tol=1e-4,
-                 warm_start=False, copy_X=True, check_input=True, verbose=0):
+                 warm_start=False, copy_X=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family=TweedieDistribution(power=power), link=link,
diff --git a/sklearn/linear_model/_glm/link.py b/sklearn/linear_model/_glm/link.py
index cfdc6f181a832..7c404798b2c18 100644
--- a/sklearn/linear_model/_glm/link.py
+++ b/sklearn/linear_model/_glm/link.py
@@ -11,7 +11,7 @@
 from scipy.special import expit, logit
 
 
-class Link(metaclass=ABCMeta):
+class BaseLink(metaclass=ABCMeta):
     """Abstract base class for Link functions."""
 
     @abstractmethod
@@ -65,7 +65,7 @@ def inverse_derivative(self, lin_pred):
         pass  # pragma: no cover
 
 
-class IdentityLink(Link):
+class IdentityLink(BaseLink):
     """The identity link function g(x)=x."""
 
     def __call__(self, y_pred):
@@ -81,7 +81,7 @@ def inverse_derivative(self, lin_pred):
         return np.ones_like(lin_pred)
 
 
-class LogLink(Link):
+class LogLink(BaseLink):
     """The log link function g(x)=log(x)."""
 
     def __call__(self, y_pred):
@@ -97,7 +97,7 @@ def inverse_derivative(self, lin_pred):
         return np.exp(lin_pred)
 
 
-class LogitLink(Link):
+class LogitLink(BaseLink):
     """The logit link function g(x)=logit(x)."""
 
     def __call__(self, y_pred):
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 898d3c4edf9c0..030cdf8a9b141 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -338,7 +338,7 @@ def test_tweedie_regression_family(regression_data):
     with pytest.raises(TypeError, match=msg):
         est.family = None
 
-    # TODO: the following should not be allowed
+    # XXX: following is currently allowed, but maybe it shouldn't be
     # est.family.power = 2
 
 
From d4dfd0b13c9bd3ce7fae0d7ed2b0ad40411f46cc Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Mon, 30 Sep 2019 12:10:01 +0200
Subject: [PATCH 183/269] Removing unnecessary comments / asarray call

---
 sklearn/linear_model/_glm/glm.py            | 1 -
 sklearn/linear_model/_glm/tests/test_glm.py | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 819e36e13addf..65de7f9532717 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -225,7 +225,6 @@ def fit(self, X, y, sample_weight=None):
         X, y = check_X_y(X, y, accept_sparse=['csc', 'csr'],
                          dtype=[np.float64, np.float32],
                          y_numeric=True, multi_output=False, copy=self.copy_X)
-        y = np.asarray(y, dtype=X.dtype)
 
         weights = _check_sample_weight(sample_weight, X)
 
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 030cdf8a9b141..ef49fabdee4b1 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -338,9 +338,6 @@ def test_tweedie_regression_family(regression_data):
     with pytest.raises(TypeError, match=msg):
         est.family = None
 
-    # XXX: following is currently allowed, but maybe it shouldn't be
-    # est.family.power = 2
-
 
 @pytest.mark.parametrize(
         'estimator, value',

From 64d6fbd0dd99d66789c3043558f80883a974e99e Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Thu, 3 Oct 2019 10:57:18 +0200
Subject: [PATCH 184/269] Update doc/modules/linear_model.rst

Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com>
---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index e53f309076b3b..8945235279d9d 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -923,7 +923,7 @@ likelihood as
               \left( \log p(y|\mu,\phi)
               - \log p(y|y,\phi)\right).
 
-The following table lists some specific EDM distributions—all are Tweedie
+The following table lists some specific EDM distributions—all are instances of Tweedie
 distributions—and some of their properties.
 
 ================= ===============================  ====================================== ============================================

From 82ace9f399c83d7f023f9dafcdada88540c0fd25 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 3 Oct 2019 11:14:34 +0200
Subject: [PATCH 185/269] Remove unused solver parameter in tests

---
 sklearn/linear_model/_glm/tests/test_glm.py | 30 +++++++--------------
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index ef49fabdee4b1..a5df69b50c967 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -26,8 +26,6 @@
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.model_selection import train_test_split
 
-GLM_SOLVERS = ['lbfgs']
-
 
 @pytest.fixture(scope="module")
 def regression_data():
@@ -176,14 +174,13 @@ def test_glm_check_input_argument(check_input):
         glm.fit(X, y)
 
 
-@pytest.mark.parametrize('solver', GLM_SOLVERS)
-def test_glm_identity_regression(solver):
+def test_glm_identity_regression():
     """Test GLM regression with identity link on a simple dataset."""
     coef = [1., 2.]
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.dot(X, coef)
     glm = GeneralizedLinearRegressor(alpha=0, family='normal', link='identity',
-                                     fit_intercept=False, solver=solver)
+                                     fit_intercept=False)
     res = glm.fit(X, y)
     assert_allclose(res.coef_, coef, rtol=1e-6)
 
@@ -193,15 +190,14 @@ def test_glm_identity_regression(solver):
     [NormalDistribution(), PoissonDistribution(),
      GammaDistribution(), InverseGaussianDistribution(),
      TweedieDistribution(power=1.5), TweedieDistribution(power=4.5)])
-@pytest.mark.parametrize('solver, tol', [('lbfgs', 1e-6)])
-def test_glm_log_regression(family, solver, tol):
+def test_glm_log_regression(family):
     """Test GLM regression with log link on a simple dataset."""
     coef = [0.2, -0.1]
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.exp(np.dot(X, coef))
     glm = GeneralizedLinearRegressor(
                 alpha=0, family=family, link='log', fit_intercept=False,
-                solver=solver, tol=tol)
+                tol=1e-6)
     res = glm.fit(X, y)
     assert_allclose(res.coef_, coef, rtol=5e-6)
 
@@ -239,8 +235,7 @@ def test_warm_start(fit_intercept):
 
 @pytest.mark.parametrize('n_samples, n_features', [(100, 10), (10, 100)])
 @pytest.mark.parametrize('fit_intercept', [True, False])
-@pytest.mark.parametrize('solver', GLM_SOLVERS)
-def test_normal_ridge_comparison(n_samples, n_features, fit_intercept, solver):
+def test_normal_ridge_comparison(n_samples, n_features, fit_intercept):
     """Compare with Ridge regression for Normal distributions."""
     alpha = 1.0
     test_size = 10
@@ -264,8 +259,7 @@ def test_normal_ridge_comparison(n_samples, n_features, fit_intercept, solver):
 
     glm = GeneralizedLinearRegressor(alpha=1.0, family='normal',
                                      link='identity', fit_intercept=True,
-                                     solver=solver, check_input=False,
-                                     max_iter=300)
+                                     check_input=False, max_iter=300)
     glm.fit(X_train, y_train)
     assert glm.coef_.shape == (X.shape[1], )
     assert_allclose(glm.coef_, ridge.coef_, atol=5e-5)
@@ -274,8 +268,7 @@ def test_normal_ridge_comparison(n_samples, n_features, fit_intercept, solver):
     assert_allclose(glm.predict(X_test), ridge.predict(X_test), rtol=5e-5)
 
 
-@pytest.mark.parametrize('solver, tol', [('lbfgs', 1e-7)])
-def test_poisson_glmnet(solver, tol):
+def test_poisson_glmnet():
     """Compare Poisson regression with L2 regularization and LogLink to glmnet
     """
     # library("glmnet")
@@ -294,19 +287,16 @@ def test_poisson_glmnet(solver, tol):
     glm = GeneralizedLinearRegressor(alpha=1,
                                      fit_intercept=True, family='poisson',
                                      link='log', tol=1e-7,
-                                     solver=solver, max_iter=300,
-                                     )
+                                     max_iter=300)
     glm.fit(X, y)
     assert_allclose(glm.intercept_, -0.12889386979, rtol=1e-5)
     assert_allclose(glm.coef_, [0.29019207995, 0.03741173122], rtol=1e-5)
 
 
-@pytest.mark.parametrize("solver", GLM_SOLVERS)
-def test_convergence_warning(solver, regression_data):
+def test_convergence_warning(regression_data):
     X, y = regression_data
 
-    est = GeneralizedLinearRegressor(solver=solver,
-                                     max_iter=1, tol=1e-20)
+    est = GeneralizedLinearRegressor(max_iter=1, tol=1e-20)
     with pytest.warns(ConvergenceWarning):
         est.fit(X, y)
 

From 5288a0ff156c254df7e465971127685f5532fbf3 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 3 Oct 2019 12:13:35 +0200
Subject: [PATCH 186/269] Add test for sample_weight consistency

---
 sklearn/linear_model/_glm/tests/test_glm.py | 36 +++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index a5df69b50c967..2a54b759011e7 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -181,8 +181,40 @@ def test_glm_identity_regression():
     y = np.dot(X, coef)
     glm = GeneralizedLinearRegressor(alpha=0, family='normal', link='identity',
                                      fit_intercept=False)
-    res = glm.fit(X, y)
-    assert_allclose(res.coef_, coef, rtol=1e-6)
+    glm.fit(X, y)
+    assert_allclose(glm.coef_, coef, rtol=1e-6)
+
+
+def test_glm_sample_weight_consistentcy():
+    """Test that the impact of sample_weight is consistent"""
+    rng = np.random.RandomState(0)
+    n_samples, n_features = 10, 5
+
+    X = rng.rand(n_samples, n_features)
+    y = rng.rand(n_samples)
+    glm = GeneralizedLinearRegressor(alpha=0, family='normal', link='identity',
+                                     fit_intercept=False)
+    glm.fit(X, y)
+    coef = glm.coef_.copy()
+
+    # sample_weight=np.ones(..) should be equivalent to sample_weight=None
+    sample_weight = np.ones(y.shape)
+    glm.fit(X, y, sample_weight=sample_weight)
+    assert_allclose(glm.coef_, coef, rtol=1e-6)
+
+    # sample_weight are normalized to 1 so, scaling them has no effect
+    sample_weight = 2*np.ones(y.shape)
+    glm.fit(X, y, sample_weight=sample_weight)
+    assert_allclose(glm.coef_, coef, rtol=1e-6)
+
+    # setting one element of sample_weight to 0 is equivalent to removing
+    # the correspoding sample
+    sample_weight = np.ones(y.shape)
+    sample_weight[-1] = 0
+    glm.fit(X, y, sample_weight=sample_weight)
+    coef1 = glm.coef_.copy()
+    glm.fit(X[:-1], y[:-1])
+    assert_allclose(glm.coef_, coef1, rtol=1e-6)
 
 
 @pytest.mark.parametrize(

From 499e8d244b5f6bf3ad7856dace4338b1cf5d31e1 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 3 Oct 2019 14:41:47 +0200
Subject: [PATCH 187/269] Move GLM losses under sklearn._loss.glm_distribution

---
 sklearn/_loss/__init__.py                                       | 0
 .../_glm/distribution.py => _loss/glm_distribution.py}          | 0
 sklearn/_loss/tests/__init__.py                                 | 0
 .../tests/test_glm_distribution.py}                             | 2 +-
 sklearn/linear_model/_glm/glm.py                                | 2 +-
 sklearn/linear_model/_glm/tests/test_glm.py                     | 2 +-
 sklearn/metrics/regression.py                                   | 2 +-
 7 files changed, 4 insertions(+), 4 deletions(-)
 create mode 100644 sklearn/_loss/__init__.py
 rename sklearn/{linear_model/_glm/distribution.py => _loss/glm_distribution.py} (100%)
 create mode 100644 sklearn/_loss/tests/__init__.py
 rename sklearn/{linear_model/_glm/tests/test_distribution.py => _loss/tests/test_glm_distribution.py} (98%)

diff --git a/sklearn/_loss/__init__.py b/sklearn/_loss/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sklearn/linear_model/_glm/distribution.py b/sklearn/_loss/glm_distribution.py
similarity index 100%
rename from sklearn/linear_model/_glm/distribution.py
rename to sklearn/_loss/glm_distribution.py
diff --git a/sklearn/_loss/tests/__init__.py b/sklearn/_loss/tests/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/sklearn/linear_model/_glm/tests/test_distribution.py b/sklearn/_loss/tests/test_glm_distribution.py
similarity index 98%
rename from sklearn/linear_model/_glm/tests/test_distribution.py
rename to sklearn/_loss/tests/test_glm_distribution.py
index 97c3a485ef4bb..cb4c5ae07e4d1 100644
--- a/sklearn/linear_model/_glm/tests/test_distribution.py
+++ b/sklearn/_loss/tests/test_glm_distribution.py
@@ -9,7 +9,7 @@
 from scipy.optimize import check_grad
 import pytest
 
-from sklearn.linear_model._glm.distribution import (
+from sklearn._loss.glm_distribution import (
     TweedieDistribution,
     NormalDistribution, PoissonDistribution,
     GammaDistribution, InverseGaussianDistribution,
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 65de7f9532717..360db6e4f741e 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -15,7 +15,7 @@
 from ...utils import check_array, check_X_y
 from ...utils.optimize import _check_optimize_result
 from ...utils.validation import check_is_fitted, _check_sample_weight
-from .distribution import (
+from ..._loss.glm_distribution import (
         ExponentialDispersionModel,
         TweedieDistribution,
         EDM_DISTRIBUTIONS
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 2a54b759011e7..9e21ae7775cf4 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -17,7 +17,7 @@
     IdentityLink,
     LogLink,
 )
-from sklearn.linear_model._glm.distribution import (
+from sklearn._loss.glm_distribution import (
     TweedieDistribution,
     NormalDistribution, PoissonDistribution,
     GammaDistribution, InverseGaussianDistribution,
diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
index 706c484334d21..f7ef99794727b 100644
--- a/sklearn/metrics/regression.py
+++ b/sklearn/metrics/regression.py
@@ -25,6 +25,7 @@
 import numpy as np
 import warnings
 
+from .._loss.glm_distribution import TweedieDistribution
 from ..utils.validation import (check_array, check_consistent_length,
                                 _num_samples)
 from ..utils.validation import column_or_1d
@@ -672,7 +673,6 @@ def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, power=0):
     >>> mean_tweedie_deviance(y_true, y_pred, power=1)
     1.4260...
     """
-    from ..linear_model._glm.distribution import TweedieDistribution
     y_type, y_true, y_pred, _ = _check_reg_targets(
         y_true, y_pred, None, dtype=[np.float64, np.float32])
     if y_type == 'continuous-multioutput':

From f4aa839da1f0226ff8ed98adb22d6e90446d7120 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Thu, 3 Oct 2019 15:21:42 +0200
Subject: [PATCH 188/269] Update sklearn/linear_model/_glm/glm.py

Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com>
---
 sklearn/linear_model/_glm/glm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 360db6e4f741e..dd6f847895434 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -571,7 +571,7 @@ class TweedieRegressor(GeneralizedLinearRegressor):
             mean (:math:`\y_\textrm{pred}`):
             :math:`v(\y_\textrm{pred}) = \y_\textrm{pred}^{power}`.
 
-            For ``0<power<1``, no distribution exists.
+            For ``0 < power < 1``, no distribution exists.
 
             Special cases are:
 

From 48fcbe669a9fc8135331ad5a4aae987df7bcdc62 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 3 Oct 2019 14:55:48 +0200
Subject: [PATCH 189/269] Add missing config.add_subpackage in setup.py

---
 sklearn/linear_model/setup.py | 1 +
 sklearn/setup.py              | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/sklearn/linear_model/setup.py b/sklearn/linear_model/setup.py
index 5cf7040d4c9d4..e50a30eca73da 100644
--- a/sklearn/linear_model/setup.py
+++ b/sklearn/linear_model/setup.py
@@ -43,6 +43,7 @@ def configuration(parent_package='', top_path=None):
     # add other directories
     config.add_subpackage('tests')
     config.add_subpackage('_glm')
+    config.add_subpackage('_glm/tests')
 
     return config
 
diff --git a/sklearn/setup.py b/sklearn/setup.py
index 53f6d3f6eb30c..3913965a375cd 100644
--- a/sklearn/setup.py
+++ b/sklearn/setup.py
@@ -47,6 +47,8 @@ def configuration(parent_package='', top_path=None):
     config.add_subpackage('experimental/tests')
     config.add_subpackage('ensemble/_hist_gradient_boosting')
     config.add_subpackage('ensemble/_hist_gradient_boosting/tests')
+    config.add_subpackage('_loss/')
+    config.add_subpackage('_loss/tests')
 
     # submodules which have their own setup.py
     config.add_subpackage('cluster')

From d71fb9f8fb54608124f4947931ecf1cbaaba425f Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 3 Oct 2019 15:28:50 +0200
Subject: [PATCH 190/269] Address Nicolas comments in the documentation
 (partial)

---
 doc/modules/linear_model.rst | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 8945235279d9d..6667057dc5073 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -907,13 +907,13 @@ Generalized Linear Models (GLM) extend linear models in two ways
 combination of the input variables :math:`X` via an inverse link function
 :math:`h` as
 
-.. math::    \hat{y}(w, x) = h(x^\top w) = h(w_0 + w_1 x_1 + ... + w_p x_p).
+.. math::    \hat{y}(w, X) = h(x^\top w) = h(w_0 + w_1 X_1 + ... + w_p X_p).
 
 Secondly, the squared loss function is replaced by the unit deviance :math:`d`
 of a reproductive exponential dispersion model (EDM) [11]_. The minimization
 problem becomes
 
-.. math::    \min_{w} \frac{1}{2 \sum_i s_i} \sum_i s_i \cdot d(y_i, \hat{y}(w, x_i)) + \frac{\alpha}{2} ||w||_2
+.. math::    \min_{w} \frac{1}{2 \sum_i s_i} \sum_i s_i \cdot d(y_i, \hat{y}(w, X_i)) + \frac{\alpha}{2} ||w||_2
 
 with sample weights :math:`s`, and L2 regularization penalty :math:`\alpha`.
 The unit deviance is defined by the log of the :math:`\mathrm{EDM}(\mu, \phi)`
@@ -939,8 +939,8 @@ Inverse Gaussian  :math:`y \in (0, \infty)`        :math:`\mu^3`
 Usage
 -----
 
-In the following use cases, a loss different from the squared loss might be
-appropriate:
+A GLM loss different from the classical squared loss might be appropriate in
+the following cases:
 
   * If the target values :math:`y` are counts (non-negative integer valued) or
     frequencies (non-negative), you might use a Poisson deviance with log-link.
@@ -985,13 +985,8 @@ of the unit variance function:
    * If you want to model a relative frequency, i.e. counts per exposure (time,
      volume, ...) you can do so by a Poisson distribution and passing
      :math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values
-     together with :math:`s=\mathrm{exposure}` as sample weights. This is done
-     in both examples linked below.
-   * The fit itself does not need Y to be from an EDM, but only assumes
-     the first two moments to be :math:`E[Y_i]=\mu_i=h((Xw)_i)` and
-     :math:`Var[Y_i]=\frac{\phi}{s_i} v(\mu_i)`.
-   * If the target `y` is a ratio, appropriate sample weights ``s`` should be
-     provided.
+     together with :math:`s=\mathrm{exposure}` as sample weights.
+
      As an example, consider Poisson distributed counts z (integers) and
      weights s=exposure (time, money, persons years, ...). Then you fit
      y = z/s, i.e. ``PoissonRegressor.fit(X, y, sample_weight=s)``.
@@ -1000,6 +995,10 @@ of the unit variance function:
      in this case one might say that y has a 'scaled' Poisson distribution.
      The same holds for other distributions.
 
+   * The fit itself does not need Y to be from an EDM, but only assumes
+     the first two moments to be :math:`E[Y_i]=\mu_i=h((Xw)_i)` and
+     :math:`Var[Y_i]=\frac{\phi}{s_i} v(\mu_i)`.
+
 The estimator can be used as follows::
 
     >>> from sklearn.linear_model import TweedieRegressor

From fa90272e4d1925b4caa967febf417c4ea9e96457 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 3 Oct 2019 16:10:40 +0200
Subject: [PATCH 191/269] More cleanups in the
 plot_tweedie_regression_insurance_claims.py example

---
 ...lot_tweedie_regression_insurance_claims.py | 57 ++++++++-----------
 1 file changed, 24 insertions(+), 33 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 1c50541fcd85c..dfd5555c83af4 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -142,7 +142,7 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 
 # Note: filter out claims with zero amount, as the severity model
 # requires strictly positive target values.
-df.loc[(df.ClaimAmount == 0) & (df.ClaimNb >= 1), "ClaimNb"] = 0
+df.loc[(df["ClaimAmount"] == 0) & (df["ClaimNb"] >= 1), "ClaimNb"] = 0
 
 # Correct for unreasonable observations (that might be data error)
 # and a few exceptionally large claim amounts
@@ -150,30 +150,26 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 df["Exposure"] = df["Exposure"].clip(upper=1)
 df["ClaimAmount"] = df["ClaimAmount"].clip(upper=200000)
 
+log_scale_transformer = make_pipeline(
+    FunctionTransformer(np.log, validate=False),
+    StandardScaler()
+)
+
 column_trans = ColumnTransformer(
     [
-        ("Veh_Driv_Age", KBinsDiscretizer(n_bins=10), ["VehAge", "DrivAge"]),
-        (
-            "Veh_Brand_Gas_Region",
-            OneHotEncoder(),
-            ["VehBrand", "VehPower", "VehGas", "Region", "Area"],
-        ),
-        ("BonusMalus", "passthrough", ["BonusMalus"]),
-        (
-            "Density_log",
-            make_pipeline(
-                FunctionTransformer(np.log, validate=False), StandardScaler()
-            ),
-            ["Density"],
-        ),
+        ("binned_numeric", KBinsDiscretizer(n_bins=10), ["VehAge", "DrivAge"]),
+        ("onehot_categorical", OneHotEncoder(),
+         ["VehBrand", "VehPower", "VehGas", "Region", "Area"]),
+        ("passthrough_numeric", "passthrough", ["BonusMalus"]),
+        ("log_scaled_numeric", log_scale_transformer, ["Density"]),
     ],
     remainder="drop",
 )
 X = column_trans.fit_transform(df)
 
 
-df["Frequency"] = df.ClaimNb / df.Exposure
-df["AvgClaimAmount"] = df.ClaimAmount / np.fmax(df.ClaimNb, 1)
+df["Frequency"] = df["ClaimNb"] / df["Exposure"]
+df["AvgClaimAmount"] = df["ClaimAmount"] / np.fmax(df["ClaimNb"], 1)
 
 print(df[df.ClaimAmount > 0].head())
 
@@ -268,7 +264,7 @@ def score_estimator(
 # the drivers age (``DrivAge``), vehicle age (``VehAge``) and the insurance
 # bonus/malus (``BonusMalus``).
 
-fig, ax = plt.subplots(2, 2, figsize=(16, 8))
+fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(16, 8))
 fig.subplots_adjust(hspace=0.3, wspace=0.2)
 
 plot_obs_pred(
@@ -369,18 +365,12 @@ def score_estimator(
 # it is conditional on having at least one claim, and cannot be used to predict
 # the average claim amount per policy in general.
 
-print(
-    "Mean AvgClaim Amount per policy:              %.2f "
-    % df_train.AvgClaimAmount.mean()
-)
-print(
-    "Mean AvgClaim Amount | NbClaim > 0:           %.2f"
-    % df_train.AvgClaimAmount[df_train.AvgClaimAmount > 0].mean()
-)
-print(
-    "Predicted Mean AvgClaim Amount | NbClaim > 0: %.2f"
-    % glm_sev.predict(X_train).mean()
-)
+print("Mean AvgClaim Amount per policy:              %.2f "
+      % df_train["AvgClaimAmount"].mean())
+print("Mean AvgClaim Amount | NbClaim > 0:           %.2f"
+      % df_train["AvgClaimAmount"][df_train["AvgClaimAmount"] > 0].mean())
+print("Predicted Mean AvgClaim Amount | NbClaim > 0: %.2f"
+      % glm_sev.predict(X_train).mean())
 
 
 ##############################################################################
@@ -388,7 +378,7 @@ def score_estimator(
 # We can visually compare observed and predicted values, aggregated for
 # the drivers age (``DrivAge``).
 
-fig, ax = plt.subplots(1, 2, figsize=(16, 4))
+fig, ax = plt.subplots(ncols=1, nrows=2, figsize=(16, 4))
 
 # plot DivAge
 plot_obs_pred(
@@ -500,9 +490,9 @@ def score_estimator(
     res.append(
         {
             "subset": subset_label,
-            "observed": df.ClaimAmount.values.sum(),
+            "observed": df["ClaimAmount"].values.sum(),
             "predicted, frequency*severity model": np.sum(
-                df.Exposure.values*glm_freq.predict(X)*glm_sev.predict(X)
+                df["Exposure"].values*glm_freq.predict(X)*glm_sev.predict(X)
             ),
             "predicted, tweedie, power=%.2f"
             % glm_total.best_estimator_.family.power: np.sum(
@@ -512,3 +502,4 @@ def score_estimator(
     )
 
 print(pd.DataFrame(res).set_index("subset").T)
+plt.plot()

From 4d16f318ca07cf8a00200201042bba177d4d27ba Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 6 Oct 2019 16:13:21 +0200
Subject: [PATCH 192/269] Typos and text improvement in poisson example

---
 doc/modules/linear_model.rst                  |  2 +-
 ...plot_poisson_regression_non_normal_loss.py | 65 ++++++++++---------
 2 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 6667057dc5073..4a5aeab305b8a 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -991,7 +991,7 @@ of the unit variance function:
      weights s=exposure (time, money, persons years, ...). Then you fit
      y = z/s, i.e. ``PoissonRegressor.fit(X, y, sample_weight=s)``.
      The weights are necessary for the right (finite sample) mean.
-     Considering :math:`\bar{y} = \frac{\\sum_i s_i y_i}{\sum_i s_i}`,
+     Considering :math:`\bar{y} = \frac{\sum_i s_i y_i}{\sum_i s_i}`,
      in this case one might say that y has a 'scaled' Poisson distribution.
      The same holds for other distributions.
 
diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index d99654cf04080..3ecb02108de4f 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -6,8 +6,8 @@
 This example illustrates the use of log-linear Poisson regression
 on the French Motor Third-Party Liability Claims dataset [1] and compares
 it with models learned with least squared error. The goal is to predict the
-number of insurance claims (or frequency) following car accidents for a
-policyholder given historical data over a population of policyholders.
+expected number of insurance claims (or frequency) following car accidents for
+a policyholder given historical data over a population of policyholders.
 
 .. [1]  A. Noll, R. Salzmann and M.V. Wuthrich, Case Study: French Motor
     Third-Party Liability Claims (November 8, 2018).
@@ -42,8 +42,8 @@
 from sklearn.metrics import mean_poisson_deviance
 
 
-def load_mtpl2(n_samples=None):
-    """Fetcher for French Motor Third-Party Liability Claims dataset
+def load_mtpl2(n_samples=100000):
+    """Fetch the French Motor Third-Party Liability Claims dataset.
 
     Parameters
     ----------
@@ -122,9 +122,13 @@ def load_mtpl2(n_samples=None):
 print("Average Frequency = {}"
       .format(np.average(df["Frequency"], weights=df["Exposure"])))
 
+print("Percentage of zero claims = {0:%}"
+      .format(df.loc[df["ClaimNb"] == 0, "Exposure"].sum() /
+              df["Exposure"].sum()))
+
 ##############################################################################
 #
-# It worth noting that 96 % of policyholders have zero claims, and if we were
+# It worth noting that 92 % of policyholders have zero claims, and if we were
 # to convert this problem into a binary classification task, it would be
 # significantly imbalanced.
 #
@@ -143,7 +147,7 @@ def load_mtpl2(n_samples=None):
 
 
 def score_estimator(estimator, df_test):
-    """Score an estimator on the test set"""
+    """Score an estimator on the test set."""
 
     y_pred = estimator.predict(df_test)
 
@@ -154,13 +158,14 @@ def score_estimator(estimator, df_test):
           mean_absolute_error(df_test["Frequency"], y_pred,
                               df_test["Exposure"]))
 
-    # ignore negative predictions, as they are invalid for
+    # ignore non-positive predictions, as they are invalid for
     # the Poisson deviance
     mask = y_pred > 0
     if (~mask).any():
-        warnings.warn("estimator yields negative predictions for {} samples "
-                      "out of {}. These will be ignored while computing the "
-                      "Poisson deviance".format((~mask).sum(), mask.shape[0]))
+        warnings.warn("Estimator yields non-positive predictions for {} "
+                      "samples out of {}. These will be ignored while "
+                      "computing the Poisson deviance"
+                      .format((~mask).sum(), mask.shape[0]))
 
     print("mean Poisson deviance: %.3f" %
           mean_poisson_deviance(df_test["Frequency"][mask],
@@ -182,12 +187,12 @@ def score_estimator(estimator, df_test):
 
 ##############################################################################
 #
-# The Poisson deviance cannot be computed on negative values predicted by the
-# model. For models that do return a few negative predictions
+# The Poisson deviance cannot be computed on non-positive values predicted by
+# the model. For models that do return a few non-positive predictions
 # (e.g. :class:`linear_model.Ridge`) we ignore the corresponding samples,
 # meaning that the obtained Poisson deviance is approximate. An alternative
-# apporach could be to use class:`compose.TransformedTargetRegressor`
-# meta-estimator to map ``y_pred`` to strictly positive domain.
+# approach could be to use :class:`compose.TransformedTargetRegressor`
+# meta-estimator to map ``y_pred`` to a strictly positive domain.
 
 print("Ridge evaluation:")
 score_estimator(ridge, df_test)
@@ -210,9 +215,9 @@ def score_estimator(estimator, df_test):
 #
 # Finally, we will consider a non-linear model, namely a random forest. Random
 # forests do not require the categorical data to be one-hot encoded, instead
-# we encode each category label with an arbirtrary integer using
+# we encode each category label with an arbitrary integer using
 # :class:`preprocessing.OrdinalEncoder` to make the model faster to train (the
-# same information is encoded with a small number of features than with
+# same information is encoded with a smaller number of features than with
 # one-hot encoding).
 
 rf_preprocessor = ColumnTransformer(
@@ -238,12 +243,13 @@ def score_estimator(estimator, df_test):
 
 ##############################################################################
 #
-# The random forest model also minimizes the conditional least square error.
-# However because of a higher predictive power it also results in a smaller
-# Poisson deviance than the Poisson regression model.
+# Like the Ridge regression above, the random forest model minimizes the
+# conditional squared error, too. However, because of a higher predictive
+# power, it also results in a smaller Poisson deviance than the Poisson
+# regression model.
 #
 # Evaluating models with a single train / test split is prone to random
-# fluctuations. If computation resources allow, it should be verified that
+# fluctuations. If computing resources allow, it should be verified that
 # cross-validated performance metrics would lead to similar conclusions.
 #
 # The qualitative difference between these models can also be visualized by
@@ -274,7 +280,7 @@ def score_estimator(estimator, df_test):
 #
 # The experimental data presents a long tail distribution for ``y``. In all
 # models we predict the mean expected value, so we will have necessarily fewer
-# extreme values. Additionally normal distribution used in ``Ridge`` and
+# extreme values. Additionally, normal distribution used in ``Ridge`` and
 # ``RandomForestRegressor`` has a constant variance, while for the Poisson
 # distribution used in ``PoissonRegressor``, the variance is proportional to
 # the mean predicted value.
@@ -291,11 +297,10 @@ def score_estimator(estimator, df_test):
 
 def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
                                   n_bins=100):
-    """Compare predictions and observations for bins ordered by y_pred
+    """Compare predictions and observations for bins ordered by y_pred.
 
     We order the samples by ``y_pred`` and split it in bins.
-    In each bin the observed mean is compared with the predicted
-    mean.
+    In each bin the observed mean is compared with the predicted mean.
 
     Parameters
     ----------
@@ -306,7 +311,7 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
     sample_weight : array-like of shape (n_samples,)
         Sample weights.
     n_bins: int
-        number of bins to use
+        Number of bins to use.
 
     Returns
     -------
@@ -370,10 +375,10 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
 # values.
 #
 # However, for some business applications, we are not necessarily interested
-# in the the ability of the model in predicting the expected frequency value
-# but instead in predicting which policyholder groups are the riskiest and
-# which are the safest. In this case the model evaluation would cast the
-# problem as a ranking problem rather than a regression problem.
+# in the ability of the model to predict the expected frequency value, but
+# instead to predict which policyholder groups are the riskiest and which are
+# the safest. In this case, the model evaluation would cast the problem as a
+# ranking problem rather than a regression problem.
 #
 # To compare the 3 models under this light on, one can plot the fraction of
 # the number of claims vs the fraction of exposure for test samples ordered by
@@ -435,6 +440,6 @@ def _cumulated_claims(y_true, y_pred, exposure):
 #
 # This last point is expected due to the nature of the problem: the occurrence
 # of accidents is mostly dominated by circumstantial causes that are not
-# captured in the columns of the dataset.
+# captured in the columns of the dataset or that are indeed random.
 
 plt.show()

From 15eb1d39c6c2bbbdceeb64996d0f297ef4f80ebf Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 6 Oct 2019 16:20:33 +0200
Subject: [PATCH 193/269] EXA sharey for histograms

---
 .../linear_model/plot_poisson_regression_non_normal_loss.py    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 3ecb02108de4f..769c072c624fb 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -257,7 +257,7 @@ def score_estimator(estimator, df_test):
 # values:
 
 
-fig, axes = plt.subplots(1, 4, figsize=(16, 3))
+fig, axes = plt.subplots(1, 4, figsize=(16, 3), sharey=True)
 fig.subplots_adjust(bottom=0.2)
 n_bins = 20
 df_train["Frequency"].hist(bins=np.linspace(-1, 10, n_bins), ax=axes[0])
@@ -265,6 +265,7 @@ def score_estimator(estimator, df_test):
 axes[0].set_title("Data")
 axes[0].set_yscale('log')
 axes[0].set_xlabel("y (observed Frequency)")
+axes[0].set_ylim([1E2, 5E5])
 
 for idx, model in enumerate([ridge, poisson, rf]):
     y_pred = model.predict(df_train)

From 3d097c686dfb016a4561afca37f28ad0d40dc0f3 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Tue, 8 Oct 2019 15:07:56 +0200
Subject: [PATCH 194/269] Plot y_pred histograms on the test set

---
 ...plot_poisson_regression_non_normal_loss.py | 42 +++++++++++--------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 769c072c624fb..0e948873da570 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -256,26 +256,32 @@ def score_estimator(estimator, df_test):
 # comparing the histogram of observed target values with that of predicted
 # values:
 
-
-fig, axes = plt.subplots(1, 4, figsize=(16, 3), sharey=True)
+fig, axes = plt.subplots(2, 4, figsize=(16, 6), sharey=True)
 fig.subplots_adjust(bottom=0.2)
 n_bins = 20
-df_train["Frequency"].hist(bins=np.linspace(-1, 10, n_bins), ax=axes[0])
-
-axes[0].set_title("Data")
-axes[0].set_yscale('log')
-axes[0].set_xlabel("y (observed Frequency)")
-axes[0].set_ylim([1E2, 5E5])
-
-for idx, model in enumerate([ridge, poisson, rf]):
-    y_pred = model.predict(df_train)
-
-    pd.Series(y_pred).hist(bins=np.linspace(-1, 4, n_bins), ax=axes[idx+1])
-    axes[idx + 1].set(
-        title=model[-1].__class__.__name__,
-        yscale='log',
-        xlabel="y_pred (predicted expected Frequency)"
-    )
+for row_idx, label, df in zip(range(2),
+                              ["train", "test"],
+                              [df_train, df_test]):
+    df["Frequency"].hist(bins=np.linspace(-1, 30, n_bins),
+                         ax=axes[row_idx, 0])
+
+    axes[row_idx, 0].set_title("Data")
+    axes[row_idx, 0].set_yscale('log')
+    axes[row_idx, 0].set_xlabel("y (observed Frequency)")
+    axes[row_idx, 0].set_ylim([1e1, 5e5])
+    axes[row_idx, 0].set_ylabel(label + " samples")
+
+    for idx, model in enumerate([ridge, poisson, rf]):
+        y_pred = model.predict(df)
+
+        pd.Series(y_pred).hist(bins=np.linspace(-1, 4, n_bins),
+                               ax=axes[row_idx, idx+1])
+        axes[row_idx, idx + 1].set(
+            title=model[-1].__class__.__name__,
+            yscale='log',
+            xlabel="y_pred (predicted expected Frequency)"
+        )
+plt.tight_layout()
 
 ##############################################################################
 #

From 31f5b3d6a7a5e01055bc8bd914b51d3d92bcafb5 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 9 Oct 2019 16:30:37 +0200
Subject: [PATCH 195/269] Compound Poisson => Compound Poisson Gamma

---
 ...lot_tweedie_regression_insurance_claims.py | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index dfd5555c83af4..3bdb7d93f0130 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -410,15 +410,15 @@ def score_estimator(
 # Overall, the drivers age (``DrivAge``) has a weak impact on the claim
 # severity, both in observed and predicted data.
 #
-# 4. Total claim amount -- Compound Poisson distribution
-# -------------------------------------------------------
+# 4. Total claim amount -- Compound Poisson Gamma distribution
+#    ------------------------------------------------------------
 #
 # As mentioned in the introduction, the total claim amount can be modeled
 # either as the product of the frequency model by the severity model,
 # denormalized by exposure. In the following code sample, the
-# ``score_estimator`` is extended to score such a model. The mean deviance
-# is computed assuming a Tweedie distribution with ``power=1.5`` to be
-# comparable with the model from the following section,
+# ``score_estimator`` is extended to score such a model. The mean deviance is
+# computed assuming a Tweedie distribution with ``power=1.5`` to be comparable
+# with the model from the following section,
 
 
 scores = score_estimator(
@@ -436,9 +436,9 @@ def score_estimator(
 ##############################################################################
 #
 # Indeed, an alternative approach for modeling the total loss is with a unique
-# Compound Poisson model, also corresponding to a Tweedie model
-# with a power :math:`p \in (1, 2)`. We determine the optimal hyperparameter
-# ``p`` with a grid search,
+# Compound Poisson Gamma model, also corresponding to a Tweedie model with a
+# power :math:`p \in (1, 2)`. We determine the optimal hyperparameter ``p``
+# with a grid search:
 
 from sklearn.model_selection import GridSearchCV
 
@@ -475,12 +475,13 @@ def score_estimator(
 ##############################################################################
 #
 # In this example, the mean absolute error is lower for the Compound Poisson
-# model than when using separate models for frequency and severity.
+# Gamma model than when using the product of the predictions of separate
+# models for frequency and severity.
 #
-# We can additionally validate these models by comparing observed and predicted
-# total claim amount over the test and train subsets. We see that in our case
-# the frequency-severity model underestimates the total claim amount, whereas
-# the Tweedie model overestimates.
+# We can additionally validate these models by comparing observed and
+# predicted total claim amount over the test and train subsets. We see that,
+# on average, the frequency-severity model underestimates the total claim
+# amount, whereas the Tweedie model overestimates.
 
 res = []
 for subset_label, X, df in [

From a498ff546da062fb4f6c33bef9d18e45e18249c8 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 9 Oct 2019 17:07:02 +0200
Subject: [PATCH 196/269] Compound Poisson => Compound Poisson Gamma

---
 sklearn/linear_model/_glm/glm.py | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index dd6f847895434..aae438733d424 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -28,7 +28,7 @@
 
 
 class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
-    """Regression via a Generalized Linear Model (GLM) with penalties.
+    """Regression via a penalized Generalized Linear Model (GLM).
 
     GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
     fitting and predicting the mean of the target y as y_pred=h(X*w).
@@ -575,19 +575,19 @@ class TweedieRegressor(GeneralizedLinearRegressor):
 
             Special cases are:
 
-            +-------+------------------+
-            | Power | Distribution     |
-            +=======+==================+
-            | 0     | Normal           |
-            +-------+------------------+
-            | 1     | Poisson          |
-            +-------+------------------+
-            | (1,2) | Compound Poisson |
-            +-------+------------------+
-            | 2     | Gamma            |
-            +-------+------------------+
-            | 3     | Inverse Gaussian |
-            +-------+------------------+
+            +-------+------------------------+
+            | Power | Distribution           |
+            +=======+========================+
+            | 0     | Normal                 |
+            +-------+------------------------+
+            | 1     | Poisson                |
+            +-------+------------------------+
+            | (1,2) | Compound Poisson Gamma |
+            +-------+------------------------+
+            | 2     | Gamma                  |
+            +-------+------------------------+
+            | 3     | Inverse Gaussian       |
+            +-------+------------------------+
 
     alpha : float, optional (default=1)
         Constant that multiplies the penalty terms and thus determines the

From 3fae28a06968ab8ce0d95b5e4a53b2e7a7d66205 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Wed, 9 Oct 2019 19:23:19 +0200
Subject: [PATCH 197/269] Various improvement in Tweedie regression example

---
 ...lot_tweedie_regression_insurance_claims.py | 156 +++++++++++++-----
 1 file changed, 118 insertions(+), 38 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 3bdb7d93f0130..4b450fe34bb1e 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -48,7 +48,7 @@
 from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
 from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
 
-from sklearn.metrics import mean_absolute_error, mean_squared_error
+from sklearn.metrics import mean_absolute_error, mean_squared_error, auc
 
 
 def load_mtpl2(n_samples=100000):
@@ -157,11 +157,14 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 
 column_trans = ColumnTransformer(
     [
-        ("binned_numeric", KBinsDiscretizer(n_bins=10), ["VehAge", "DrivAge"]),
+        ("binned_numeric", KBinsDiscretizer(n_bins=10),
+            ["VehAge", "DrivAge"]),
         ("onehot_categorical", OneHotEncoder(),
-         ["VehBrand", "VehPower", "VehGas", "Region", "Area"]),
-        ("passthrough_numeric", "passthrough", ["BonusMalus"]),
-        ("log_scaled_numeric", log_scale_transformer, ["Density"]),
+            ["VehBrand", "VehPower", "VehGas", "Region", "Area"]),
+        ("passthrough_numeric", "passthrough",
+            ["BonusMalus"]),
+        ("log_scaled_numeric", log_scale_transformer,
+            ["Density"]),
     ],
     remainder="drop",
 )
@@ -194,7 +197,8 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 
 
 def score_estimator(
-    estimator, X_train, X_test, df_train, df_test, target, weights
+    estimator, X_train, X_test, df_train, df_test, target, weights,
+    power=None,
 ):
     """Evaluate an estimator on train and test sets with different metrics"""
     res = []
@@ -217,13 +221,15 @@ def score_estimator(
                 est_freq, est_sev = estimator
                 y_pred = (df.Exposure.values * est_freq.predict(X) *
                           est_sev.predict(X))
-                power = 1.5
             else:
                 y_pred = estimator.predict(X)
-                power = getattr(getattr(estimator, "_family_instance"),
-                                "power")
+                if power is None:
+                    power = getattr(getattr(estimator, "_family_instance"),
+                                    "power")
 
             if score_label == "mean deviance":
+                if power is None:
+                    continue
                 metric = partial(mean_tweedie_deviance, power=power)
 
             if metric is None:
@@ -378,7 +384,7 @@ def score_estimator(
 # We can visually compare observed and predicted values, aggregated for
 # the drivers age (``DrivAge``).
 
-fig, ax = plt.subplots(ncols=1, nrows=2, figsize=(16, 4))
+fig, ax = plt.subplots(ncols=1, nrows=2, figsize=(16, 6))
 
 # plot DivAge
 plot_obs_pred(
@@ -403,7 +409,7 @@ def score_estimator(
     ax=ax[1],
     fill_legend=True
 )
-
+plt.tight_layout()
 
 ##############################################################################
 #
@@ -411,16 +417,16 @@ def score_estimator(
 # severity, both in observed and predicted data.
 #
 # 4. Total claim amount -- Compound Poisson Gamma distribution
-#    ------------------------------------------------------------
+# ------------------------------------------------------------
 #
 # As mentioned in the introduction, the total claim amount can be modeled
 # either as the product of the frequency model by the severity model,
 # denormalized by exposure. In the following code sample, the
 # ``score_estimator`` is extended to score such a model. The mean deviance is
-# computed assuming a Tweedie distribution with ``power=1.5`` to be comparable
-# with the model from the following section,
-
+# computed assuming a Tweedie distribution with ``power=2`` to be comparable
+# with the model from the following section:
 
+eps = 1e-4
 scores = score_estimator(
     (glm_freq, glm_sev),
     X_train,
@@ -429,40 +435,54 @@ def score_estimator(
     df_test,
     target="ClaimAmount",
     weights="Exposure",
+    power=2-eps,
 )
 print(scores)
 
 
 ##############################################################################
 #
-# Indeed, an alternative approach for modeling the total loss is with a unique
-# Compound Poisson Gamma model, also corresponding to a Tweedie model with a
-# power :math:`p \in (1, 2)`. We determine the optimal hyperparameter ``p``
-# with a grid search:
+# Instead of taking the product of two independently fit models for frequency
+# and severity one can directly model the total loss is with a unique Compound
+# Poisson Gamma generalized linear model (with a log link function). This
+# model is a special case of the Tweedie model with a power parameter :math:`p
+# \in (1, 2)`.
+#
+# We determine the optimal hyperparameter ``p`` with a grid search so as to
+# minimize the deviance:
 
 from sklearn.model_selection import GridSearchCV
 
-# exclude upper bound as power>=2 does not support y=0.
-params = {"power": np.linspace(1 + 1e-4, 2 - 1e-4, 8)}
-
-
-# this takes a while
-glm_total = GridSearchCV(
-    TweedieRegressor(tol=1e-3, max_iter=500), cv=3,
-    param_grid=params, n_jobs=-1
+# exclude upper bound as power>=2 as p=2 would lead to an undefined unit
+# deviance on data points with y=0.
+params = {"power": np.linspace(1 + eps, 2 - eps, 5)}
+
+X_train_small, _, df_train_small, _ = train_test_split(
+    X_train, df_train, train_size=5000)
+
+# This can takes a while on the full training set, therefore we do the
+# hyper-parameter search on a random subset, hoping that the best value of
+# power does not depend too much on the dataset size. We use a bit
+# penalization to avoid numerical issues with colinear features and speed-up
+# convergence.
+glm_total = TweedieRegressor(max_iter=10000, alpha=1e-2)
+search = GridSearchCV(
+    glm_total, cv=3,
+    param_grid=params, n_jobs=-1, verbose=10,
+    refit=False,
 )
-glm_total.fit(
-    X_train, df_train["ClaimAmount"], sample_weight=df_train["Exposure"]
+search.fit(
+    X_train_small, df_train_small["ClaimAmount"],
+    sample_weight=df_train_small["Exposure"]
 )
+print("Best hyper-parameters: %s" % search.best_params_)
 
-
-print(
-    "Best hyperparameters: power=%.2f\n"
-    % glm_total.best_estimator_.family.power
-)
+glm_total.set_params(**search.best_params_)
+glm_total.fit(X_train, df_train["ClaimAmount"],
+              sample_weight=df_train["Exposure"])
 
 scores = score_estimator(
-    glm_total.best_estimator_,
+    glm_total,
     X_train,
     X_test,
     df_train,
@@ -496,11 +516,71 @@ def score_estimator(
                 df["Exposure"].values*glm_freq.predict(X)*glm_sev.predict(X)
             ),
             "predicted, tweedie, power=%.2f"
-            % glm_total.best_estimator_.family.power: np.sum(
-                glm_total.best_estimator_.predict(X)
-            ),
+            % glm_total.power: np.sum(glm_total.predict(X)),
         }
     )
 
 print(pd.DataFrame(res).set_index("subset").T)
+
+##############################################################################
+#
+# Finally, we can compare the two models using a plot of cumulated claims: for
+# each model, the policyholders are ranked from riskiest to safest and the
+# actual cumulated claims are plotted against the cumulated exposure.
+#
+# The area under the curve can be used as a model selection metric to quantify
+# the ability of the model to rank policyholders. Note that this metric does
+# not reflect the ability of the models to make accurate predictions in terms
+# of absolute value of total claim amounts but only in terms of relative
+# amounts as a ranking metric.
+#
+# Both models are able to rank policyholders by risky-ness significantly
+# better than chance although they are also both far from perfect due to the
+# natural difficulty of the prediction problem from few features.
+
+
+def _cumulated_claims(y_true, y_pred, exposure):
+    idx_sort = np.argsort(y_pred)[::-1]  # from riskiest to safest
+    sorted_exposure = exposure[idx_sort]
+    sorted_frequencies = y_true[idx_sort]
+    cumulated_exposure = np.cumsum(sorted_exposure)
+    cumulated_exposure /= cumulated_exposure[-1]
+    cumulated_claims = np.cumsum(sorted_exposure * sorted_frequencies)
+    cumulated_claims /= cumulated_claims[-1]
+    return cumulated_exposure, cumulated_claims
+
+
+fig, ax = plt.subplots(figsize=(8, 8))
+
+y_pred_product = glm_freq.predict(X_test) * glm_sev.predict(X_test)
+y_pred_total = glm_total.predict(X_test)
+
+for label, y_pred in [("Frequency * Severity model", y_pred_product),
+                      ("Compound Poisson Gamma", y_pred_total)]:
+    cum_exposure, cum_claims = _cumulated_claims(
+        df_test["Frequency"].values,
+        y_pred,
+        df_test["Exposure"].values)
+    area = auc(cum_exposure, cum_claims)
+    label += " (area under curve: {:.3f})".format(area)
+    ax.plot(cum_exposure, cum_claims, linestyle="-", label=label)
+
+# Oracle model: y_pred == y_test
+cum_exposure, cum_claims = _cumulated_claims(
+    df_test["Frequency"].values,
+    df_test["Frequency"].values,
+    df_test["Exposure"].values)
+area = auc(cum_exposure, cum_claims)
+label = "Oracle (area under curve: {:.3f})".format(area)
+ax.plot(cum_exposure, cum_claims, linestyle="-.", color="gray", label=label)
+
+# Random Baseline
+ax.plot([0, 1], [0, 1], linestyle="--", color="black",
+        label="Random baseline")
+ax.set(
+    title="Cumulated claim amount by model",
+    xlabel='Fraction of exposure (from riskiest to safest)',
+    ylabel='Fraction of total claim amount'
+)
+ax.legend(loc="lower right")
 plt.plot()

From a47798afe6e30e75b66f274a2323838c6a1401ea Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Thu, 10 Oct 2019 11:07:09 +0200
Subject: [PATCH 198/269] Update doc/modules/linear_model.rst

Co-Authored-By: Thomas J Fan <thomasjpfan@gmail.com>
---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index d9e60a3517f8e..3119b9b0db94b 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -915,7 +915,7 @@ problem becomes
 
 .. math::    \min_{w} \frac{1}{2 \sum_i s_i} \sum_i s_i \cdot d(y_i, \hat{y}(w, X_i)) + \frac{\alpha}{2} ||w||_2
 
-with sample weights :math:`s`, and L2 regularization penalty :math:`\alpha`.
+with sample weights :math:`s_i`, and L2 regularization penalty :math:`\alpha`.
 The unit deviance is defined by the log of the :math:`\mathrm{EDM}(\mu, \phi)`
 likelihood as
 

From 83391dd56bac107b21eea4cb258f3831a56d02ff Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 10 Oct 2019 11:49:53 +0200
Subject: [PATCH 199/269] Use latest docstring conventions everywhere

---
 sklearn/_loss/glm_distribution.py |  36 +++++------
 sklearn/linear_model/_glm/glm.py  | 100 +++++++++++++++---------------
 sklearn/linear_model/_glm/link.py |   8 +--
 sklearn/metrics/regression.py     |   6 +-
 4 files changed, 74 insertions(+), 76 deletions(-)

diff --git a/sklearn/_loss/glm_distribution.py b/sklearn/_loss/glm_distribution.py
index 5f9e9ed06847c..4020f74427c44 100644
--- a/sklearn/_loss/glm_distribution.py
+++ b/sklearn/_loss/glm_distribution.py
@@ -60,7 +60,7 @@ def in_y_range(self, y):
 
         Parameters
         ----------
-        y : array, shape (n_samples,)
+        y : array of shape (n_samples,)
             Target values.
         """
         # Note that currently supported distributions have +inf upper bound
@@ -92,7 +92,7 @@ def unit_variance(self, y_pred):
 
         Parameters
         ----------
-        y_pred : array, shape (n_samples,)
+        y_pred : array of shape (n_samples,)
             Predicted mean.
         """
         pass  # pragma: no cover
@@ -105,7 +105,7 @@ def unit_variance_derivative(self, y_pred):
 
         Parameters
         ----------
-        y_pred : array, shape (n_samples,)
+        y_pred : array of shape (n_samples,)
             Target values.
         """
         pass  # pragma: no cover
@@ -121,10 +121,10 @@ def unit_deviance(self, y, y_pred, check_input=False):
 
         Parameters
         ----------
-        y : array, shape (n_samples,)
+        y : array of shape (n_samples,)
             Target values.
 
-        y_pred : array, shape (n_samples,)
+        y_pred : array of shape (n_samples,)
             Predicted mean.
 
         check_input : bool, default=False
@@ -132,7 +132,7 @@ def unit_deviance(self, y, y_pred, check_input=False):
             they will be propagated as NaN.
         Returns
         -------
-        deviance: array, shape (n_samples,)
+        deviance: array of shape (n_samples,)
             Computed deviance
         """
         pass  # pragma: no cover
@@ -147,10 +147,10 @@ def unit_deviance_derivative(self, y, y_pred):
 
         Parameters
         ----------
-        y : array, shape (n_samples,)
+        y : array of shape (n_samples,)
             Target values.
 
-        y_pred : array, shape (n_samples,)
+        y_pred : array of shape (n_samples,)
             Predicted mean.
         """
         return -2 * (y - y_pred) / self.unit_variance(y_pred)
@@ -168,13 +168,13 @@ def deviance(self, y, y_pred, weights=1):
 
         Parameters
         ----------
-        y : array, shape (n_samples,)
+        y : array of shape (n_samples,)
             Target values.
 
-        y_pred : array, shape (n_samples,)
+        y_pred : array of shape (n_samples,)
             Predicted mean.
 
-        weights : array, shape (n_samples,) (default=1)
+        weights : {int, array of shape (n_samples,)}, default=1
             Weights or exposure to which variance is inverse proportional.
         """
         return np.sum(weights * self.unit_deviance(y, y_pred))
@@ -193,7 +193,7 @@ def deviance_derivative(self, y, y_pred, weights=1):
         y_pred : array, shape (n_samples,)
             Predicted mean.
 
-        weights : array, shape (n_samples,) (default=1)
+        weights : {int, array of shape (n_samples,)}, default=1
             Weights or exposure to which variance is inverse proportional.
         """
         return weights * self.unit_deviance_derivative(y, y_pred)
@@ -231,7 +231,7 @@ class TweedieDistribution(ExponentialDispersionModel):
 
     Parameters
     ----------
-    power : float (default=0)
+    power : float, default=0
             The variance power of the `unit_variance`
             :math:`v(y_\textrm{pred}) = y_\textrm{pred}^{power}`.
             For ``0<power<1``, no distribution exists.
@@ -276,7 +276,7 @@ def unit_variance(self, y_pred):
 
         Parameters
         ----------
-        y_pred : array, shape (n_samples,)
+        y_pred : array of shape (n_samples,)
             Predicted mean.
         """
         return np.power(y_pred, self.power)
@@ -287,7 +287,7 @@ def unit_variance_derivative(self, y_pred):
 
         Parameters
         ----------
-        y_pred : array, shape (n_samples,)
+        y_pred : array of shape (n_samples,)
             Predicted mean.
         """
         return self.power * np.power(y_pred, self.power - 1)
@@ -302,10 +302,10 @@ def unit_deviance(self, y, y_pred, check_input=False):
 
         Parameters
         ----------
-        y : array, shape (n_samples,)
+        y : array of shape (n_samples,)
             Target values.
 
-        y_pred : array, shape (n_samples,)
+        y_pred : array of shape (n_samples,)
             Predicted mean.
 
         check_input : bool, default=False
@@ -313,7 +313,7 @@ def unit_deviance(self, y, y_pred, check_input=False):
             they will be propagated as NaN.
         Returns
         -------
-        deviance: array, shape (n_samples,)
+        deviance: array of shape (n_samples,)
             Computed deviance
         """
         p = self.power
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index aae438733d424..2012f3cbb32a9 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -45,24 +45,23 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     Parameters
     ----------
-    alpha : float, optional (default=1)
+    alpha : float, default=1
         Constant that multiplies the penalty terms and thus determines the
         regularization strength.  ``alpha = 0`` is equivalent to unpenalized
         GLMs. In this case, the design matrix X must have full column rank
         (no collinearities).
 
-    fit_intercept : boolean, optional (default=True)
+    fit_intercept : bool, default=True
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
     family : {'normal', 'poisson', 'gamma', 'inverse-gaussian'} \
-            or an instance of class ExponentialDispersionModel, \
-            optional(default='normal')
+            or an ExponentialDispersionModel instance, default='normal'
         The distributional assumption of the GLM, i.e. which distribution from
         the EDM, specifies the loss function to be minimized.
 
     link : {'auto', 'identity', 'log'} or an instance of class BaseLink, \
-            optional (default='auto')
+            default='auto'
         The link function of the GLM, i.e. mapping from linear predictor
         (X*coef) to expectation (y_pred). Option 'auto' sets the link
         depending on the chosen family as follows:
@@ -71,39 +70,39 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
         - 'log' for families 'poisson', 'gamma', 'inverse-gaussian'
 
-    solver : 'lbfgs', optional (default='lbfgs')
+    solver : 'lbfgs', default='lbfgs'
         Algorithm to use in the optimization problem:
 
         'lbfgs'
             Calls scipy's L-BFGS-B optimizer.
 
-    max_iter : int, optional (default=100)
+    max_iter : int, default=100
         The maximal number of iterations for the solver.
 
-    tol : float, optional (default=1e-4)
+    tol : float, default=1e-4
         Stopping criterion. For the lbfgs solver,
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
         where ``g_i`` is the i-th component of the gradient (derivative) of
         the objective function.
 
-    warm_start : boolean, optional (default=False)
+    warm_start : bool, default=False
         If set to ``True``, reuse the solution of the previous call to ``fit``
         as initialization for ``coef_`` and ``intercept_``.
 
-    copy_X : boolean, optional, (default=True)
+    copy_X : bool, default=True
         If ``True``, X will be copied; else, it may be overwritten.
 
-    check_input : boolean, optional (default=True)
+    check_input : bool, default=True
         Allow to bypass several checks on input: y values in range of family,
         sample_weight non-negative.
         Don't use this parameter unless you know what you do.
 
-    verbose : int, optional (default=0)
+    verbose : int, default=0
         For the lbfgs solver set verbose to any positive number for verbosity.
 
     Attributes
     ----------
-    coef_ : array, shape (n_features,)
+    coef_ : array of shape (n_features,)
         Estimated coefficients for the linear predictor (X*coef_+intercept_) in
         the GLM.
 
@@ -134,14 +133,13 @@ def fit(self, X, y, sample_weight=None):
 
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
             Training data.
 
-        y : array-like, shape (n_samples,)
+        y : array-like of shape (n_samples,)
             Target values.
 
-        sample_weight : {None, array-like}, shape (n_samples,),\
-                optional (default=None)
+        sample_weight : array-like of shape (n_samples,), default=None
             Individual weights w_i for each sample. Note that for an
             Exponential Dispersion Model (EDM), one has
             Var[Y_i]=phi/w_i * v(y_pred).
@@ -304,12 +302,12 @@ def _linear_predictor(self, X):
 
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
             Samples.
 
         Returns
         -------
-        y_pred : array, shape (n_samples,)
+        y_pred : array of shape (n_samples,)
             Returns predicted values of linear predictor.
         """
         check_is_fitted(self)
@@ -323,12 +321,12 @@ def predict(self, X):
 
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
             Samples.
 
         Returns
         -------
-        y_pred : array, shape (n_samples,)
+        y_pred : array of shape (n_samples,)
             Returns predicted values.
         """
         # check_array is done in _linear_predictor
@@ -353,14 +351,13 @@ def score(self, X, y, sample_weight=None):
 
         Parameters
         ----------
-        X : {array-like, sparse matrix}, shape (n_samples, n_features)
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
             Test samples.
 
-        y : array-like, shape (n_samples,)
+        y : array-like of shape (n_samples,)
             True values of target.
 
-        sample_weight : {None, array-like}, shape (n_samples,), optional \
-                (default=None)
+        sample_weight : array-like of shape (n_samples,), default=None
             Sample weights.
 
         Returns
@@ -407,38 +404,38 @@ class PoissonRegressor(GeneralizedLinearRegressor):
 
     Parameters
     ----------
-    alpha : float, optional (default=1)
+    alpha : float, default=1
         Constant that multiplies the penalty terms and thus determines the
         regularization strength.  ``alpha = 0`` is equivalent to unpenalized
         GLMs. In this case, the design matrix X must have full column rank
         (no collinearities).
 
-    fit_intercept : boolean, optional (default=True)
+    fit_intercept : bool, default=True
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
-    max_iter : int, optional (default=100)
+    max_iter : int, default=100
         The maximal number of iterations for the solver.
 
-    tol : float, optional (default=1e-4)
+    tol : float, default=1e-4
         Stopping criterion. For the lbfgs solver,
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
         where ``g_i`` is the i-th component of the gradient (derivative) of
         the objective function.
 
-    warm_start : boolean, optional (default=False)
+    warm_start : bool, default=False
         If set to ``True``, reuse the solution of the previous call to ``fit``
         as initialization for ``coef_`` and ``intercept_`` .
 
-    copy_X : boolean, optional, (default=True)
+    copy_X : bool, default=True
         If ``True``, X will be copied; else, it may be overwritten.
 
-    verbose : int, optional (default=0)
+    verbose : int, default=0
         For the lbfgs solver set verbose to any positive number for verbosity.
 
     Attributes
     ----------
-    coef_ : array, shape (n_features,)
+    coef_ : array of shape (n_features,)
         Estimated coefficients for the linear predictor (X*coef_+intercept_) in
         the GLM.
 
@@ -486,38 +483,38 @@ class GammaRegressor(GeneralizedLinearRegressor):
 
     Parameters
     ----------
-    alpha : float, optional (default=1)
+    alpha : float, default=1
         Constant that multiplies the penalty terms and thus determines the
         regularization strength.  ``alpha = 0`` is equivalent to unpenalized
         GLMs. In this case, the design matrix X must have full column rank
         (no collinearities).
 
-    fit_intercept : boolean, optional (default=True)
+    fit_intercept : bool, default=True
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
-    max_iter : int, optional (default=100)
+    max_iter : int, default=100
         The maximal number of iterations for the solver.
 
-    tol : float, optional (default=1e-4)
+    tol : float, default=1e-4
         Stopping criterion. For the lbfgs solver,
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
         where ``g_i`` is the i-th component of the gradient (derivative) of
         the objective function.
 
-    warm_start : boolean, optional (default=False)
+    warm_start : bool, default=False
         If set to ``True``, reuse the solution of the previous call to ``fit``
         as initialization for ``coef_`` and ``intercept_`` .
 
-    copy_X : boolean, optional, (default=True)
+    copy_X : bool, default=True
         If ``True``, X will be copied; else, it may be overwritten.
 
-    verbose : int, optional (default=0)
+    verbose : int, default=0
         For the lbfgs solver set verbose to any positive number for verbosity.
 
     Attributes
     ----------
-    coef_ : array, shape (n_features,)
+    coef_ : array of shape (n_features,)
         Estimated coefficients for the linear predictor (X*coef_+intercept_) in
         the GLM.
 
@@ -565,7 +562,7 @@ class TweedieRegressor(GeneralizedLinearRegressor):
 
     Parameters
     ----------
-    power : float (default=0)
+    power : float, default=0
             The power determines the underlying target distribution. By
             definition it links distribution variance (:math:`v`) and
             mean (:math:`\y_\textrm{pred}`):
@@ -589,7 +586,7 @@ class TweedieRegressor(GeneralizedLinearRegressor):
             | 3     | Inverse Gaussian       |
             +-------+------------------------+
 
-    alpha : float, optional (default=1)
+    alpha : float, default=1
         Constant that multiplies the penalty terms and thus determines the
         regularization strength.  ``alpha = 0`` is equivalent to unpenalized
         GLMs. In this case, the design matrix X must have full column rank
@@ -604,33 +601,34 @@ class TweedieRegressor(GeneralizedLinearRegressor):
 
         - 'log' for Poisson,  Gamma or Inverse Gaussian distributions
 
-    fit_intercept : boolean, optional (default=True)
+    fit_intercept : bool, default=True
         Specifies if a constant (a.k.a. bias or intercept) should be
         added to the linear predictor (X*coef+intercept).
 
-    max_iter : int, optional (default=100)
+    max_iter : int, default=100
         The maximal number of iterations for the solver.
 
-    tol : float, optional (default=1e-4)
+    tol : float, default=1e-4
         Stopping criterion. For the lbfgs solver,
         the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
         where ``g_i`` is the i-th component of the gradient (derivative) of
         the objective function.
 
-    warm_start : boolean, optional (default=False)
+    warm_start : bool, default=False
         If set to ``True``, reuse the solution of the previous call to ``fit``
         as initialization for ``coef_`` and ``intercept_`` .
 
-    copy_X : boolean, optional, (default=True)
+    copy_X : bool, default=True
         If ``True``, X will be copied; else, it may be overwritten.
 
-    verbose : int, optional (default=0)
+    verbose : int, default=0
         For the lbfgs solver set verbose to any positive number for verbosity.
 
     Attributes
     ----------
-    coef_ : array, shape (n_features,) Estimated coefficients for the linear
-        predictor (X*coef_+intercept_) in the GLM.
+    coef_ : array of shape (n_features,)
+        Estimated coefficients for the linear predictor (X*coef_+intercept_)
+        in the GLM.
 
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.
diff --git a/sklearn/linear_model/_glm/link.py b/sklearn/linear_model/_glm/link.py
index 7c404798b2c18..e8d3c792d3efe 100644
--- a/sklearn/linear_model/_glm/link.py
+++ b/sklearn/linear_model/_glm/link.py
@@ -23,7 +23,7 @@ def __call__(self, y_pred):
 
         Parameters
         ----------
-        y_pred : array, shape (n_samples,)
+        y_pred : array of shape (n_samples,)
             Usually the (predicted) mean.
         """
         pass  # pragma: no cover
@@ -34,7 +34,7 @@ def derivative(self, y_pred):
 
         Parameters
         ----------
-        y_pred : array, shape (n_samples,)
+        y_pred : array of shape (n_samples,)
             Usually the (predicted) mean.
         """
         pass  # pragma: no cover
@@ -48,7 +48,7 @@ def inverse(self, lin_pred):
 
         Parameters
         ----------
-        lin_pred : array, shape (n_samples,)
+        lin_pred : array of shape (n_samples,)
             Usually the (fitted) linear predictor.
         """
         pass  # pragma: no cover
@@ -59,7 +59,7 @@ def inverse_derivative(self, lin_pred):
 
         Parameters
         ----------
-        lin_pred : array, shape (n_samples,)
+        lin_pred : array of shape (n_samples,)
             Usually the (fitted) linear predictor.
         """
         pass  # pragma: no cover
diff --git a/sklearn/metrics/regression.py b/sklearn/metrics/regression.py
index d1b4498b05196..0ec3db5b6fad8 100644
--- a/sklearn/metrics/regression.py
+++ b/sklearn/metrics/regression.py
@@ -638,7 +638,7 @@ def mean_tweedie_deviance(y_true, y_pred, sample_weight=None, power=0):
     y_pred : array-like of shape (n_samples,)
         Estimated target values.
 
-    sample_weight : array-like, shape (n_samples,), optional
+    sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
     power : float, default=0
@@ -705,7 +705,7 @@ def mean_poisson_deviance(y_true, y_pred, sample_weight=None):
     y_pred : array-like of shape (n_samples,)
         Estimated target values. Requires y_pred > 0.
 
-    sample_weight : array-like, shape (n_samples,), optional
+    sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
     Returns
@@ -743,7 +743,7 @@ def mean_gamma_deviance(y_true, y_pred, sample_weight=None):
     y_pred : array-like of shape (n_samples,)
         Estimated target values. Requires y_pred > 0.
 
-    sample_weight : array-like, shape (n_samples,), optional
+    sample_weight : array-like of shape (n_samples,), default=None
         Sample weights.
 
     Returns

From 3bfb54e9e172b6f2cae3e48a7ece4d011937e786 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 10 Oct 2019 11:53:42 +0200
Subject: [PATCH 200/269] Drop check_input parameter

---
 sklearn/linear_model/_glm/glm.py            | 22 ++++++---------------
 sklearn/linear_model/_glm/tests/test_glm.py | 12 +----------
 2 files changed, 7 insertions(+), 27 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 2012f3cbb32a9..990de8114a717 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -92,11 +92,6 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     copy_X : bool, default=True
         If ``True``, X will be copied; else, it may be overwritten.
 
-    check_input : bool, default=True
-        Allow to bypass several checks on input: y values in range of family,
-        sample_weight non-negative.
-        Don't use this parameter unless you know what you do.
-
     verbose : int, default=0
         For the lbfgs solver set verbose to any positive number for verbosity.
 
@@ -115,7 +110,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     def __init__(self, alpha=1.0,
                  fit_intercept=True, family='normal', link='auto',
                  solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
-                 copy_X=True, check_input=True, verbose=0):
+                 copy_X=True, verbose=0):
         self.alpha = alpha
         self.fit_intercept = fit_intercept
         self.family = family
@@ -125,7 +120,6 @@ def __init__(self, alpha=1.0,
         self.tol = tol
         self.warm_start = warm_start
         self.copy_X = copy_X
-        self.check_input = check_input
         self.verbose = verbose
 
     def fit(self, X, y, sample_weight=None):
@@ -213,9 +207,6 @@ def fit(self, X, y, sample_weight=None):
         if not isinstance(self.copy_X, bool):
             raise ValueError("The argument copy_X must be bool;"
                              " got {0}".format(self.copy_X))
-        if not isinstance(self.check_input, bool):
-            raise ValueError("The argument check_input must be bool; got "
-                             "(check_input={0})".format(self.check_input))
 
         family = self._family_instance
         link = self._link_instance
@@ -228,12 +219,11 @@ def fit(self, X, y, sample_weight=None):
 
         _, n_features = X.shape
 
-        if self.check_input:
-            if not np.all(family.in_y_range(y)):
-                raise ValueError("Some value(s) of y are out of the valid "
-                                 "range for family {0}"
-                                 .format(family.__class__.__name__))
-            # TODO: if alpha=0 check that X is not rank deficient
+        if not np.all(family.in_y_range(y)):
+            raise ValueError("Some value(s) of y are out of the valid "
+                             "range for family {0}"
+                             .format(family.__class__.__name__))
+        # TODO: if alpha=0 check that X is not rank deficient
 
         # rescaling of sample_weight
         #
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 9e21ae7775cf4..c0ff6508db9c9 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -164,16 +164,6 @@ def test_glm_copy_X_argument(copy_X):
         glm.fit(X, y)
 
 
-@pytest.mark.parametrize('check_input', ['not bool', 1, 0, [True]])
-def test_glm_check_input_argument(check_input):
-    """Test GLM for invalid check_input argument."""
-    y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    glm = GeneralizedLinearRegressor(check_input=check_input)
-    with pytest.raises(ValueError, match="check_input must be bool"):
-        glm.fit(X, y)
-
-
 def test_glm_identity_regression():
     """Test GLM regression with identity link on a simple dataset."""
     coef = [1., 2.]
@@ -291,7 +281,7 @@ def test_normal_ridge_comparison(n_samples, n_features, fit_intercept):
 
     glm = GeneralizedLinearRegressor(alpha=1.0, family='normal',
                                      link='identity', fit_intercept=True,
-                                     check_input=False, max_iter=300)
+                                     max_iter=300)
     glm.fit(X_train, y_train)
     assert glm.coef_.shape == (X.shape[1], )
     assert_allclose(glm.coef_, ridge.coef_, atol=5e-5)

From d325fe23348f8dabfcc55dd4fbd8fa82fd60ff8d Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 10 Oct 2019 11:57:10 +0200
Subject: [PATCH 201/269] Use keyword only arguments SLEP009

---
 sklearn/linear_model/_glm/glm.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 990de8114a717..8ef912f9596b6 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -107,7 +107,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     n_iter_ : int
         Actual number of iterations used in the solver.
     """
-    def __init__(self, alpha=1.0,
+    def __init__(self, *, alpha=1.0,
                  fit_intercept=True, family='normal', link='auto',
                  solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
                  copy_X=True, verbose=0):
@@ -435,8 +435,8 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     n_iter_ : int
         Actual number of iterations used in the solver.
     """
-    def __init__(self, alpha=1.0, fit_intercept=True, max_iter=100, tol=1e-4,
-                 warm_start=False, copy_X=True, verbose=0):
+    def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,
+                 tol=1e-4, warm_start=False, copy_X=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family="poisson", link='log', max_iter=max_iter,
@@ -514,8 +514,8 @@ class GammaRegressor(GeneralizedLinearRegressor):
     n_iter_ : int
         Actual number of iterations used in the solver.
     """
-    def __init__(self, alpha=1.0, fit_intercept=True, max_iter=100, tol=1e-4,
-                 warm_start=False, copy_X=True, verbose=0):
+    def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,
+                 tol=1e-4, warm_start=False, copy_X=True, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family="gamma", link='log', max_iter=max_iter,
@@ -626,7 +626,7 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     n_iter_ : int
         Actual number of iterations used in the solver.
     """
-    def __init__(self, power=0.0, alpha=1.0, fit_intercept=True,
+    def __init__(self, *, power=0.0, alpha=1.0, fit_intercept=True,
                  link='auto', max_iter=100, tol=1e-4,
                  warm_start=False, copy_X=True, verbose=0):
 

From 661cf56e04bc23e53a7173bc43d998187892d9ec Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 10 Oct 2019 12:04:37 +0200
Subject: [PATCH 202/269] Move _y_pred_deviance_derivative from losses as a
 private function

---
 sklearn/_loss/glm_distribution.py | 19 -------------------
 sklearn/linear_model/_glm/glm.py  | 25 +++++++++++++++++++++++--
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/sklearn/_loss/glm_distribution.py b/sklearn/_loss/glm_distribution.py
index 4020f74427c44..55365d382c03b 100644
--- a/sklearn/_loss/glm_distribution.py
+++ b/sklearn/_loss/glm_distribution.py
@@ -13,14 +13,6 @@
 from scipy.special import xlogy
 
 
-def _safe_lin_pred(X, coef):
-    """Compute the linear predictor taking care if intercept is present."""
-    if coef.size == X.shape[1] + 1:
-        return X @ coef[1:] + coef[0]
-    else:
-        return X @ coef
-
-
 DistributionBoundary = namedtuple("DistributionBoundary",
                                   ("value", "inclusive"))
 
@@ -198,17 +190,6 @@ def deviance_derivative(self, y, y_pred, weights=1):
         """
         return weights * self.unit_deviance_derivative(y, y_pred)
 
-    def _y_pred_deviance_derivative(self, coef, X, y, weights, link):
-        """Compute y_pred and the derivative of the deviance w.r.t coef."""
-        lin_pred = _safe_lin_pred(X, coef)
-        y_pred = link.inverse(lin_pred)
-        d1 = link.inverse_derivative(lin_pred)
-        temp = d1 * self.deviance_derivative(y, y_pred, weights)
-        if coef.size == X.shape[1] + 1:
-            devp = np.concatenate(([temp.sum()], temp @ X))
-        else:
-            devp = temp @ X  # same as X.T @ temp
-        return y_pred, devp
 
 
 class TweedieDistribution(ExponentialDispersionModel):
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 8ef912f9596b6..b29dcd89a35a6 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -27,6 +27,27 @@
 )
 
 
+def _safe_lin_pred(X, coef):
+    """Compute the linear predictor taking care if intercept is present."""
+    if coef.size == X.shape[1] + 1:
+        return X @ coef[1:] + coef[0]
+    else:
+        return X @ coef
+
+
+def _y_pred_deviance_derivative(coef, X, y, weights, family, link):
+    """Compute y_pred and the derivative of the deviance w.r.t coef."""
+    lin_pred = _safe_lin_pred(X, coef)
+    y_pred = link.inverse(lin_pred)
+    d1 = link.inverse_derivative(lin_pred)
+    temp = d1 * family.deviance_derivative(y, y_pred, weights)
+    if coef.size == X.shape[1] + 1:
+        devp = np.concatenate(([temp.sum()], temp @ X))
+    else:
+        devp = temp @ X  # same as X.T @ temp
+    return y_pred, devp
+
+
 class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     """Regression via a penalized Generalized Linear Model (GLM).
 
@@ -251,8 +272,8 @@ def fit(self, X, y, sample_weight=None):
 
         if solver == 'lbfgs':
             def func(coef, X, y, weights, alpha, family, link):
-                y_pred, devp = family._y_pred_deviance_derivative(
-                    coef, X, y, weights, link
+                y_pred, devp = _y_pred_deviance_derivative(
+                    coef, X, y, weights, family, link
                 )
                 dev = family.deviance(y, y_pred, weights)
                 intercept = (coef.size == X.shape[1] + 1)

From 560c180fee5c1217353af106beeca04a0b9bd732 Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 10 Oct 2019 15:28:21 +0200
Subject: [PATCH 203/269] Fix cumulated claim amount curve in Tweedie
 regression example

---
 ...lot_tweedie_regression_insurance_claims.py | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 4b450fe34bb1e..7e4a8599dec0e 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -539,15 +539,15 @@ def score_estimator(
 # natural difficulty of the prediction problem from few features.
 
 
-def _cumulated_claims(y_true, y_pred, exposure):
-    idx_sort = np.argsort(y_pred)[::-1]  # from riskiest to safest
-    sorted_exposure = exposure[idx_sort]
-    sorted_frequencies = y_true[idx_sort]
-    cumulated_exposure = np.cumsum(sorted_exposure)
+def _cumulated_claim_amount(y_true, y_pred, exposure):
+    ranking = np.argsort(y_pred)[::-1]  # from riskiest to safest
+    ranked_exposure = exposure[ranking]
+    ranked_claim_amount = y_true[ranking]
+    cumulated_exposure = np.cumsum(ranked_exposure)
     cumulated_exposure /= cumulated_exposure[-1]
-    cumulated_claims = np.cumsum(sorted_exposure * sorted_frequencies)
-    cumulated_claims /= cumulated_claims[-1]
-    return cumulated_exposure, cumulated_claims
+    cumulated_claim_amount = np.cumsum(ranked_claim_amount)
+    cumulated_claim_amount /= cumulated_claim_amount[-1]
+    return cumulated_exposure, cumulated_claim_amount
 
 
 fig, ax = plt.subplots(figsize=(8, 8))
@@ -557,8 +557,8 @@ def _cumulated_claims(y_true, y_pred, exposure):
 
 for label, y_pred in [("Frequency * Severity model", y_pred_product),
                       ("Compound Poisson Gamma", y_pred_total)]:
-    cum_exposure, cum_claims = _cumulated_claims(
-        df_test["Frequency"].values,
+    cum_exposure, cum_claims = _cumulated_claim_amount(
+        df_test["ClaimAmount"].values,
         y_pred,
         df_test["Exposure"].values)
     area = auc(cum_exposure, cum_claims)
@@ -566,9 +566,9 @@ def _cumulated_claims(y_true, y_pred, exposure):
     ax.plot(cum_exposure, cum_claims, linestyle="-", label=label)
 
 # Oracle model: y_pred == y_test
-cum_exposure, cum_claims = _cumulated_claims(
-    df_test["Frequency"].values,
-    df_test["Frequency"].values,
+cum_exposure, cum_claims = _cumulated_claim_amount(
+    df_test["ClaimAmount"].values,
+    df_test["ClaimAmount"].values,
     df_test["Exposure"].values)
 area = auc(cum_exposure, cum_claims)
 label = "Oracle (area under curve: {:.3f})".format(area)

From 0ea2dce29a52829fb2c0ace0c992f68e1f052f0a Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Thu, 10 Oct 2019 16:15:25 +0200
Subject: [PATCH 204/269] PEP8

---
 sklearn/_loss/glm_distribution.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sklearn/_loss/glm_distribution.py b/sklearn/_loss/glm_distribution.py
index 55365d382c03b..dbfac6af673ae 100644
--- a/sklearn/_loss/glm_distribution.py
+++ b/sklearn/_loss/glm_distribution.py
@@ -191,7 +191,6 @@ def deviance_derivative(self, y, y_pred, weights=1):
         return weights * self.unit_deviance_derivative(y, y_pred)
 
 
-
 class TweedieDistribution(ExponentialDispersionModel):
     r"""A class for the Tweedie distribution.
 

From 4ca2e95535fd3ba55980fbf437a88df0175ab819 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Mon, 14 Oct 2019 14:50:48 +0200
Subject: [PATCH 205/269] MNT remove function body in abstract methods

---
 sklearn/_loss/glm_distribution.py | 3 ---
 sklearn/linear_model/_glm/link.py | 4 ----
 2 files changed, 7 deletions(-)

diff --git a/sklearn/_loss/glm_distribution.py b/sklearn/_loss/glm_distribution.py
index dbfac6af673ae..920218ea7f674 100644
--- a/sklearn/_loss/glm_distribution.py
+++ b/sklearn/_loss/glm_distribution.py
@@ -87,7 +87,6 @@ def unit_variance(self, y_pred):
         y_pred : array of shape (n_samples,)
             Predicted mean.
         """
-        pass  # pragma: no cover
 
     @abstractmethod
     def unit_variance_derivative(self, y_pred):
@@ -100,7 +99,6 @@ def unit_variance_derivative(self, y_pred):
         y_pred : array of shape (n_samples,)
             Target values.
         """
-        pass  # pragma: no cover
 
     @abstractmethod
     def unit_deviance(self, y, y_pred, check_input=False):
@@ -127,7 +125,6 @@ def unit_deviance(self, y, y_pred, check_input=False):
         deviance: array of shape (n_samples,)
             Computed deviance
         """
-        pass  # pragma: no cover
 
     def unit_deviance_derivative(self, y, y_pred):
         r"""Compute the derivative of the unit deviance w.r.t. y_pred.
diff --git a/sklearn/linear_model/_glm/link.py b/sklearn/linear_model/_glm/link.py
index e8d3c792d3efe..878d8e835bc42 100644
--- a/sklearn/linear_model/_glm/link.py
+++ b/sklearn/linear_model/_glm/link.py
@@ -26,7 +26,6 @@ def __call__(self, y_pred):
         y_pred : array of shape (n_samples,)
             Usually the (predicted) mean.
         """
-        pass  # pragma: no cover
 
     @abstractmethod
     def derivative(self, y_pred):
@@ -37,7 +36,6 @@ def derivative(self, y_pred):
         y_pred : array of shape (n_samples,)
             Usually the (predicted) mean.
         """
-        pass  # pragma: no cover
 
     @abstractmethod
     def inverse(self, lin_pred):
@@ -51,7 +49,6 @@ def inverse(self, lin_pred):
         lin_pred : array of shape (n_samples,)
             Usually the (fitted) linear predictor.
         """
-        pass  # pragma: no cover
 
     @abstractmethod
     def inverse_derivative(self, lin_pred):
@@ -62,7 +59,6 @@ def inverse_derivative(self, lin_pred):
         lin_pred : array of shape (n_samples,)
             Usually the (fitted) linear predictor.
         """
-        pass  # pragma: no cover
 
 
 class IdentityLink(BaseLink):

From 89b429d921bb65f2f8dcd431dee2720152a4abbe Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 14 Oct 2019 16:05:59 +0200
Subject: [PATCH 206/269] Improvements to Pure Premium example

---
 ...lot_tweedie_regression_insurance_claims.py | 258 +++++++++---------
 1 file changed, 132 insertions(+), 126 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 7e4a8599dec0e..0dd0ed25f4a02 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -130,8 +130,8 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 
 ##############################################################################
 #
-# 1. Loading datasets and pre-processing
-# --------------------------------------
+# Loading datasets, basic feature extraction and target definitions
+# -----------------------------------------------------------------
 #
 # We construct the freMTPL2 dataset by joining the freMTPL2freq table,
 # containing the number of claims (``ClaimNb``), with the freMTPL2sev table,
@@ -170,7 +170,13 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 )
 X = column_trans.fit_transform(df)
 
+# Insurances companies are interested in modeling the Pure Premium, that is
+# the expected total claim amount per unit of exposure for each policyholder
+# in their portfolio:
+df["PurePremium"] = df["ClaimAmount"] / df["Exposure"]
 
+# This can be inderectly approximated by a 2-step modeling the product of the
+# Frequency times the average claim amount per claim:
 df["Frequency"] = df["ClaimNb"] / df["Exposure"]
 df["AvgClaimAmount"] = df["ClaimAmount"] / np.fmax(df["ClaimNb"], 1)
 
@@ -178,8 +184,8 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 
 ##############################################################################
 #
-# 2. Frequency model -- Poisson distribution
-# -------------------------------------------
+# Frequency model -- Poisson distribution
+# ---------------------------------------
 #
 # The number of claims (``ClaimNb``) is a positive integer that can be modeled
 # as a Poisson distribution. It is then assumed to be the number of discrete
@@ -190,47 +196,50 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 
 df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=0)
 
-# Some of the features are colinear, we use a weak penalization to avoid
-# numerical issues.
-glm_freq = PoissonRegressor(alpha=1e-2)
-glm_freq.fit(X_train, df_train.Frequency, sample_weight=df_train.Exposure)
+# The parameters of the model are estimated by minimizing the Poisson deviance
+# on the training set via a quasi-Newton solver: l-BFGS. Some of the features
+# are colinear, we use a weak penalization to avoid numerical issues.
+glm_freq = PoissonRegressor(alpha=1e-3)
+glm_freq.fit(X_train, df_train["Frequency"],
+             sample_weight=df_train["Exposure"])
 
 
 def score_estimator(
     estimator, X_train, X_test, df_train, df_test, target, weights,
-    power=None,
+    tweedie_powers=None,
 ):
     """Evaluate an estimator on train and test sets with different metrics"""
-    res = []
+    if isinstance(estimator, tuple):
+        model_name = " * ".join(e.__class__.__name__ for e in estimator)
+    else:
+        model_name = estimator.__class__.__name__
+    print("\nEvaluation of {} of target {} ".format(model_name, target))
+
+    metrics = [
+        ("D² explained", None),
+        ("mean abs. error", mean_absolute_error),
+        ("mean squared error", mean_squared_error),
+    ]
+    if tweedie_powers:
+        metrics += [(
+            "mean Tweedie deviance (p={:.4f})".format(power),
+            partial(mean_tweedie_deviance, power=power)
+        ) for power in tweedie_powers]
 
+    res = []
     for subset_label, X, df in [
         ("train", X_train, df_train),
         ("test", X_test, df_test),
     ]:
         y, _weights = df[target], df[weights]
-
-        for score_label, metric in [
-            ("D² explained", None),
-            ("mean deviance", mean_tweedie_deviance),
-            ("mean abs. error", mean_absolute_error),
-            ("mean squared error", mean_squared_error),
-        ]:
+        for score_label, metric in metrics:
             if isinstance(estimator, tuple) and len(estimator) == 2:
                 # Score the model consisting of the product of frequency and
-                # severity models, denormalized by the exposure values.
+                # severity models.
                 est_freq, est_sev = estimator
-                y_pred = (df.Exposure.values * est_freq.predict(X) *
-                          est_sev.predict(X))
+                y_pred = est_freq.predict(X) * est_sev.predict(X)
             else:
                 y_pred = estimator.predict(X)
-                if power is None:
-                    power = getattr(getattr(estimator, "_family_instance"),
-                                    "power")
-
-            if score_label == "mean deviance":
-                if power is None:
-                    continue
-                metric = partial(mean_tweedie_deviance, power=power)
 
             if metric is None:
                 if not hasattr(estimator, "score"):
@@ -266,8 +275,8 @@ def score_estimator(
 
 ##############################################################################
 #
-# We can visually compare observed and predicted values, aggregated by
-# the drivers age (``DrivAge``), vehicle age (``VehAge``) and the insurance
+# We can visually compare observed and predicted values, aggregated by the
+# drivers age (``DrivAge``), vehicle age (``VehAge``) and the insurance
 # bonus/malus (``BonusMalus``).
 
 fig, ax = plt.subplots(ncols=2, nrows=2, figsize=(16, 8))
@@ -325,11 +334,11 @@ def score_estimator(
 #
 # According to the observed data, the frequency of accidents is higher for
 # drivers younger than 30 years old, and it positively correlated with the
-# `BonusMalus` variable. Our model is able to mostly correctly model
-# this behaviour.
+# `BonusMalus` variable. Our model is able to mostly correctly model this
+# behaviour.
 #
-# 3. Severity model -  Gamma distribution
-# ---------------------------------------
+# Severity Model -  Gamma distribution
+# ------------------------------------
 # The mean claim amount or severity (`AvgClaimAmount`) can be empirically
 # shown to follow approximately a Gamma distribution. We fit a GLM model for
 # the severity with the same features as the frequency model.
@@ -343,7 +352,7 @@ def score_estimator(
 mask_train = df_train["ClaimAmount"] > 0
 mask_test = df_test["ClaimAmount"] > 0
 
-glm_sev = GammaRegressor()
+glm_sev = GammaRegressor(alpha=10., max_iter=10000)
 
 glm_sev.fit(
     X_train[mask_train.values],
@@ -351,7 +360,6 @@ def score_estimator(
     sample_weight=df_train.loc[mask_train, "ClaimNb"],
 )
 
-
 scores = score_estimator(
     glm_sev,
     X_train[mask_train.values],
@@ -365,11 +373,13 @@ def score_estimator(
 
 ##############################################################################
 #
-# Here, the scores for the test data call for caution as they are significantly
-# worse than for the training data indicating an overfit.
-# Note that the resulting model is the average claim amount per claim. As such,
-# it is conditional on having at least one claim, and cannot be used to predict
-# the average claim amount per policy in general.
+# Here, the scores for the test data call for caution as they are
+# significantly worse than for the training data indicating an overfit despite
+# the strong regularization.
+#
+# Note that the resulting model is the average claim amount per claim. As
+# such, it is conditional on having at least one claim, and cannot be used to
+# predict the average claim amount per policy in general.
 
 print("Mean AvgClaim Amount per policy:              %.2f "
       % df_train["AvgClaimAmount"].mean())
@@ -386,7 +396,6 @@ def score_estimator(
 
 fig, ax = plt.subplots(ncols=1, nrows=2, figsize=(16, 6))
 
-# plot DivAge
 plot_obs_pred(
     df=df_train.loc[mask_train],
     feature="DrivAge",
@@ -416,79 +425,63 @@ def score_estimator(
 # Overall, the drivers age (``DrivAge``) has a weak impact on the claim
 # severity, both in observed and predicted data.
 #
-# 4. Total claim amount -- Compound Poisson Gamma distribution
-# ------------------------------------------------------------
+# Pure Premium Modeling via a Product of Frequency and Severity Models
+# --------------------------------------------------------------------
+# As mentioned in the introduction, the total claim amount per unit of
+# exposure can be modeled either as the product of the frequency model by the
+# severity model.
+#
+# To quantify the aggregate performance of this product model, one can compute
+# the deviance of Tweedie distribution which is equivalent to a com.
+# In the following code sample, the ``score_estimator`` is extended to score
+# such a model.
 #
-# As mentioned in the introduction, the total claim amount can be modeled
-# either as the product of the frequency model by the severity model,
-# denormalized by exposure. In the following code sample, the
-# ``score_estimator`` is extended to score such a model. The mean deviance is
-# computed assuming a Tweedie distribution with ``power=2`` to be comparable
-# with the model from the following section:
-
-eps = 1e-4
+# The mean deviance is computed assuming a Tweedie distribution with a fixed
+# grid of values for the power parameter to be comparable with the model from
+# the following section:
+
+tweedie_powers = [1.5, 1.7, 1.8, 1.9, 1.99, 1.999, 1.9999]
 scores = score_estimator(
     (glm_freq, glm_sev),
     X_train,
     X_test,
     df_train,
     df_test,
-    target="ClaimAmount",
+    target="PurePremium",
     weights="Exposure",
-    power=2-eps,
+    tweedie_powers=tweedie_powers,
 )
 print(scores)
 
 
 ##############################################################################
 #
+# Pure Premium Modeling Using a Single Compound Poisson Gamma Model
+# -----------------------------------------------------------------
 # Instead of taking the product of two independently fit models for frequency
 # and severity one can directly model the total loss is with a unique Compound
 # Poisson Gamma generalized linear model (with a log link function). This
-# model is a special case of the Tweedie model with a power parameter :math:`p
+# model is a special case of the Tweedie GLM with a "power" parameter :math:`p
 # \in (1, 2)`.
 #
-# We determine the optimal hyperparameter ``p`` with a grid search so as to
-# minimize the deviance:
-
-from sklearn.model_selection import GridSearchCV
-
-# exclude upper bound as power>=2 as p=2 would lead to an undefined unit
-# deviance on data points with y=0.
-params = {"power": np.linspace(1 + eps, 2 - eps, 5)}
-
-X_train_small, _, df_train_small, _ = train_test_split(
-    X_train, df_train, train_size=5000)
-
-# This can takes a while on the full training set, therefore we do the
-# hyper-parameter search on a random subset, hoping that the best value of
-# power does not depend too much on the dataset size. We use a bit
-# penalization to avoid numerical issues with colinear features and speed-up
-# convergence.
-glm_total = TweedieRegressor(max_iter=10000, alpha=1e-2)
-search = GridSearchCV(
-    glm_total, cv=3,
-    param_grid=params, n_jobs=-1, verbose=10,
-    refit=False,
-)
-search.fit(
-    X_train_small, df_train_small["ClaimAmount"],
-    sample_weight=df_train_small["Exposure"]
-)
-print("Best hyper-parameters: %s" % search.best_params_)
+# Here we fix apriori the "power" parameter of the Tweedie model to some
+# arbitrary value in the valid range. Ideally one would select this value via
+# grid-search by minimizing the negative log-likelihood of the Tweedie model
+# but unfortunately the current implementation does not allow for this (yet).
 
-glm_total.set_params(**search.best_params_)
-glm_total.fit(X_train, df_train["ClaimAmount"],
-              sample_weight=df_train["Exposure"])
+glm_pure_premium = TweedieRegressor(power=1.999, alpha=.1, max_iter=10000)
+glm_pure_premium.fit(X_train, df_train["PurePremium"],
+                     sample_weight=df_train["Exposure"])
 
 scores = score_estimator(
-    glm_total,
+    glm_pure_premium,
     X_train,
     X_test,
     df_train,
     df_test,
-    target="ClaimAmount",
+    target="PurePremium",
     weights="Exposure",
+    tweedie_powers=tweedie_powers
 )
 print(scores)
 
@@ -500,23 +493,25 @@ def score_estimator(
 #
 # We can additionally validate these models by comparing observed and
 # predicted total claim amount over the test and train subsets. We see that,
-# on average, the frequency-severity model underestimates the total claim
-# amount, whereas the Tweedie model overestimates.
+# on average, both model tend to underestimate the total claim (but this
+# behavior depends on the amount of regularization).
 
 res = []
 for subset_label, X, df in [
     ("train", X_train, df_train),
     ("test", X_test, df_test),
 ]:
+    exposure = df["Exposure"].values
     res.append(
         {
             "subset": subset_label,
             "observed": df["ClaimAmount"].values.sum(),
             "predicted, frequency*severity model": np.sum(
-                df["Exposure"].values*glm_freq.predict(X)*glm_sev.predict(X)
+                exposure * glm_freq.predict(X) * glm_sev.predict(X)
             ),
             "predicted, tweedie, power=%.2f"
-            % glm_total.power: np.sum(glm_total.predict(X)),
+            % glm_pure_premium.power: np.sum(
+                exposure * glm_pure_premium.predict(X)),
         }
     )
 
@@ -525,62 +520,73 @@ def score_estimator(
 ##############################################################################
 #
 # Finally, we can compare the two models using a plot of cumulated claims: for
-# each model, the policyholders are ranked from riskiest to safest and the
-# actual cumulated claims are plotted against the cumulated exposure.
+# each model, the policyholders are ranked from safest to riskiest and the
+# fraction of observed total cumulated claims is plotted on the y axis. This
+# plot is often called the ordered Lorenz curve of the model.
 #
-# The area under the curve can be used as a model selection metric to quantify
-# the ability of the model to rank policyholders. Note that this metric does
-# not reflect the ability of the models to make accurate predictions in terms
-# of absolute value of total claim amounts but only in terms of relative
-# amounts as a ranking metric.
+# The Gini coefficient (based on the area under the curve) can be used as a
+# model selection metric to quantify the ability of the model to rank
+# policyholders. Note that this metric does not reflect the ability of the
+# models to make accurate predictions in terms of absolute value of total
+# claim amounts but only in terms of relative amounts as a ranking metric.
 #
 # Both models are able to rank policyholders by risky-ness significantly
 # better than chance although they are also both far from perfect due to the
 # natural difficulty of the prediction problem from few features.
+#
+# Note that the Gini index only characterize the ranking performance of the
+# model but not its calibration: any monotonic transformation of the
+# predictions leaves the Gini index of the model unchanged.
+#
+# Finally on should highlight that the Compound Poisson Gamma model that
+# is directly fit on the pure premium is operationally simpler to develop and
+# maintain as it consists in a single scikit-learn estimator instead of a
+# pair of models.
+
 
+def ordered_lorenz_curve(y_true, y_pred, exposure):
+    y_true, y_pred = np.asarray(y_true), np.asarray(y_pred)
+    exposure = np.asarray(exposure)
 
-def _cumulated_claim_amount(y_true, y_pred, exposure):
-    ranking = np.argsort(y_pred)[::-1]  # from riskiest to safest
+    # order samples by increasing predicted risk:
+    ranking = np.argsort(y_pred)
     ranked_exposure = exposure[ranking]
-    ranked_claim_amount = y_true[ranking]
-    cumulated_exposure = np.cumsum(ranked_exposure)
-    cumulated_exposure /= cumulated_exposure[-1]
-    cumulated_claim_amount = np.cumsum(ranked_claim_amount)
+    ranked_pure_premium = y_true[ranking]
+    cumulated_claim_amount = np.cumsum(ranked_pure_premium * ranked_exposure)
     cumulated_claim_amount /= cumulated_claim_amount[-1]
-    return cumulated_exposure, cumulated_claim_amount
+    cumulated_samples = np.linspace(0, 1, len(cumulated_claim_amount))
+    return cumulated_samples, cumulated_claim_amount
 
 
 fig, ax = plt.subplots(figsize=(8, 8))
 
 y_pred_product = glm_freq.predict(X_test) * glm_sev.predict(X_test)
-y_pred_total = glm_total.predict(X_test)
+y_pred_total = glm_pure_premium.predict(X_test)
 
 for label, y_pred in [("Frequency * Severity model", y_pred_product),
                       ("Compound Poisson Gamma", y_pred_total)]:
-    cum_exposure, cum_claims = _cumulated_claim_amount(
-        df_test["ClaimAmount"].values,
-        y_pred,
-        df_test["Exposure"].values)
-    area = auc(cum_exposure, cum_claims)
-    label += " (area under curve: {:.3f})".format(area)
-    ax.plot(cum_exposure, cum_claims, linestyle="-", label=label)
+    ordered_samples, cum_claims = ordered_lorenz_curve(
+        df_test["PurePremium"], y_pred, df_test["Exposure"])
+    gini = 1 - 2 * auc(ordered_samples, cum_claims)
+    label += " (Gini index: {:.3f})".format(gini)
+    ax.plot(ordered_samples, cum_claims, linestyle="-", label=label)
 
 # Oracle model: y_pred == y_test
-cum_exposure, cum_claims = _cumulated_claim_amount(
-    df_test["ClaimAmount"].values,
-    df_test["ClaimAmount"].values,
-    df_test["Exposure"].values)
-area = auc(cum_exposure, cum_claims)
-label = "Oracle (area under curve: {:.3f})".format(area)
-ax.plot(cum_exposure, cum_claims, linestyle="-.", color="gray", label=label)
-
-# Random Baseline
+ordered_samples, cum_claims = ordered_lorenz_curve(
+    df_test["PurePremium"], df_test["PurePremium"], df_test["Exposure"])
+gini = 1 - 2 * auc(ordered_samples, cum_claims)
+label = "Oracle (Gini index: {:.3f})".format(gini)
+ax.plot(ordered_samples, cum_claims, linestyle="-.", color="gray",
+        label=label)
+
+# Random baseline
 ax.plot([0, 1], [0, 1], linestyle="--", color="black",
         label="Random baseline")
 ax.set(
-    title="Cumulated claim amount by model",
-    xlabel='Fraction of exposure (from riskiest to safest)',
+    title="Ordered Lorenz Curves",
+    xlabel=('Fraction of policyholds\n'
+            '(ordered by model from safest to riskiest)'),
     ylabel='Fraction of total claim amount'
 )
-ax.legend(loc="lower right")
+ax.legend(loc="upper left")
 plt.plot()

From 2d0b195bae7566d4a0ebd36211d869d1a86d703a Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 14 Oct 2019 18:38:49 +0200
Subject: [PATCH 207/269] s/ordered Lorenz/Lorenz/

---
 .../plot_tweedie_regression_insurance_claims.py    | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 0dd0ed25f4a02..27e0449d84ce8 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -538,13 +538,13 @@ def score_estimator(
 # model but not its calibration: any monotonic transformation of the
 # predictions leaves the Gini index of the model unchanged.
 #
-# Finally on should highlight that the Compound Poisson Gamma model that
+# Finally one should highlight that the Compound Poisson Gamma model that
 # is directly fit on the pure premium is operationally simpler to develop and
 # maintain as it consists in a single scikit-learn estimator instead of a
-# pair of models.
+# pair of models, each with its own set of hyperparameters.
 
 
-def ordered_lorenz_curve(y_true, y_pred, exposure):
+def lorenz_curve(y_true, y_pred, exposure):
     y_true, y_pred = np.asarray(y_true), np.asarray(y_pred)
     exposure = np.asarray(exposure)
 
@@ -565,14 +565,14 @@ def ordered_lorenz_curve(y_true, y_pred, exposure):
 
 for label, y_pred in [("Frequency * Severity model", y_pred_product),
                       ("Compound Poisson Gamma", y_pred_total)]:
-    ordered_samples, cum_claims = ordered_lorenz_curve(
+    ordered_samples, cum_claims = lorenz_curve(
         df_test["PurePremium"], y_pred, df_test["Exposure"])
     gini = 1 - 2 * auc(ordered_samples, cum_claims)
     label += " (Gini index: {:.3f})".format(gini)
     ax.plot(ordered_samples, cum_claims, linestyle="-", label=label)
 
 # Oracle model: y_pred == y_test
-ordered_samples, cum_claims = ordered_lorenz_curve(
+ordered_samples, cum_claims = lorenz_curve(
     df_test["PurePremium"], df_test["PurePremium"], df_test["Exposure"])
 gini = 1 - 2 * auc(ordered_samples, cum_claims)
 label = "Oracle (Gini index: {:.3f})".format(gini)
@@ -583,8 +583,8 @@ def ordered_lorenz_curve(y_true, y_pred, exposure):
 ax.plot([0, 1], [0, 1], linestyle="--", color="black",
         label="Random baseline")
 ax.set(
-    title="Ordered Lorenz Curves",
-    xlabel=('Fraction of policyholds\n'
+    title="Lorenz Curves",
+    xlabel=('Fraction of policyholders\n'
             '(ordered by model from safest to riskiest)'),
     ylabel='Fraction of total claim amount'
 )

From ea6a3e8bc57c7775cab5f89bd923fee062d2d94d Mon Sep 17 00:00:00 2001
From: Olivier Grisel <olivier.grisel@ensta.org>
Date: Mon, 14 Oct 2019 19:02:46 +0200
Subject: [PATCH 208/269] More doc improvements to the pure premium example

---
 ...lot_tweedie_regression_insurance_claims.py | 165 +++++++++---------
 1 file changed, 83 insertions(+), 82 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 27e0449d84ce8..a95b8f0301663 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -3,17 +3,19 @@
 Tweedie regression on insurance claims
 ======================================
 
-This example illustrates the use of Poisson, Gamma and Tweedie regression
-on the French Motor Third-Party Liability Claims dataset, and is inspired
-by an R tutorial [1].
+This example illustrates the use of Poisson, Gamma and Tweedie regression on
+the French Motor Third-Party Liability Claims dataset, and is inspired by an R
+tutorial [1].
 
 Insurance claims data consist of the number of claims and the total claim
 amount. Often, the final goal is to predict the expected value, i.e. the mean,
-of the total claim amount. There are several possibilities to do that, two of
-which are:
+of the total claim amount per exposure unit also referred to as the pure
+premium.
 
-1. Model the number of claims with a Poisson distribution, the average
-   claim amount per claim, also known as severity, as a Gamma distribution and
+There are several possibilities to do that, two of which are:
+
+1. Model the number of claims with a Poisson distribution, the average claim
+   amount per claim, also known as severity, as a Gamma distribution and
    multiply the predictions of both in order to get the total claim amount.
 2. Model total claim amount directly, typically with a Tweedie distribution of
    Tweedie power :math:`p \\in (1, 2)`.
@@ -21,16 +23,16 @@
 In this example we will illustrate both approaches. We start by defining a few
 helper functions for loading the data and visualizing results.
 
-
 .. [1]  A. Noll, R. Salzmann and M.V. Wuthrich, Case Study: French Motor
-    Third-Party Liability Claims (November 8, 2018).
-    `doi:10.2139/ssrn.3164764 <http://dx.doi.org/10.2139/ssrn.3164764>`_
+    Third-Party Liability Claims (November 8, 2018). `doi:10.2139/ssrn.3164764
+    <http://dx.doi.org/10.2139/ssrn.3164764>`_
 
 """
 print(__doc__)
 
 # Authors: Christian Lorentzen <lorentzen.ch@gmail.com>
 #          Roman Yurchak <rth.yurchak@gmail.com>
+#          Olivier Grisel <olivier.grisel@ensta.org>
 # License: BSD 3 clause
 from functools import partial
 
@@ -128,6 +130,64 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
     )
 
 
+def score_estimator(
+    estimator, X_train, X_test, df_train, df_test, target, weights,
+    tweedie_powers=None,
+):
+    """Evaluate an estimator on train and test sets with different metrics"""
+    if isinstance(estimator, tuple):
+        model_name = " * ".join(e.__class__.__name__ for e in estimator)
+    else:
+        model_name = estimator.__class__.__name__
+    print("\nEvaluation of {} of target {} ".format(model_name, target))
+
+    metrics = [
+        ("D² explained", None),
+        ("mean abs. error", mean_absolute_error),
+        ("mean squared error", mean_squared_error),
+    ]
+    if tweedie_powers:
+        metrics += [(
+            "mean Tweedie deviance (p={:.4f})".format(power),
+            partial(mean_tweedie_deviance, power=power)
+        ) for power in tweedie_powers]
+
+    res = []
+    for subset_label, X, df in [
+        ("train", X_train, df_train),
+        ("test", X_test, df_test),
+    ]:
+        y, _weights = df[target], df[weights]
+        for score_label, metric in metrics:
+            if isinstance(estimator, tuple) and len(estimator) == 2:
+                # Score the model consisting of the product of frequency and
+                # severity models.
+                est_freq, est_sev = estimator
+                y_pred = est_freq.predict(X) * est_sev.predict(X)
+            else:
+                y_pred = estimator.predict(X)
+
+            if metric is None:
+                if not hasattr(estimator, "score"):
+                    continue
+                score = estimator.score(X, y, _weights)
+            else:
+                score = metric(y, y_pred, _weights)
+
+            res.append(
+                {"subset": subset_label, "metric": score_label, "score": score}
+            )
+
+    res = (
+        pd.DataFrame(res)
+        .set_index(["metric", "subset"])
+        .score.unstack(-1)
+        .round(2)
+        .loc[:, ['train', 'test']]
+    )
+    return res
+
+
 ##############################################################################
 #
 # Loading datasets, basic feature extraction and target definitions
@@ -203,65 +263,6 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
 glm_freq.fit(X_train, df_train["Frequency"],
              sample_weight=df_train["Exposure"])
 
-
-def score_estimator(
-    estimator, X_train, X_test, df_train, df_test, target, weights,
-    tweedie_powers=None,
-):
-    """Evaluate an estimator on train and test sets with different metrics"""
-    if isinstance(estimator, tuple):
-        model_name = " * ".join(e.__class__.__name__ for e in estimator)
-    else:
-        model_name = estimator.__class__.__name__
-    print("\nEvaluation of {} of target {} ".format(model_name, target))
-
-    metrics = [
-        ("D² explained", None),
-        ("mean abs. error", mean_absolute_error),
-        ("mean squared error", mean_squared_error),
-    ]
-    if tweedie_powers:
-        metrics += [(
-            "mean Tweedie deviance (p={:.4f})".format(power),
-            partial(mean_tweedie_deviance, power=power)
-        ) for power in tweedie_powers]
-
-    res = []
-    for subset_label, X, df in [
-        ("train", X_train, df_train),
-        ("test", X_test, df_test),
-    ]:
-        y, _weights = df[target], df[weights]
-        for score_label, metric in metrics:
-            if isinstance(estimator, tuple) and len(estimator) == 2:
-                # Score the model consisting of the product of frequency and
-                # severity models.
-                est_freq, est_sev = estimator
-                y_pred = est_freq.predict(X) * est_sev.predict(X)
-            else:
-                y_pred = estimator.predict(X)
-
-            if metric is None:
-                if not hasattr(estimator, "score"):
-                    continue
-                score = estimator.score(X, y, _weights)
-            else:
-                score = metric(y, y_pred, _weights)
-
-            res.append(
-                {"subset": subset_label, "metric": score_label, "score": score}
-            )
-
-    res = (
-        pd.DataFrame(res)
-        .set_index(["metric", "subset"])
-        .score.unstack(-1)
-        .round(2)
-        .loc[:, ['train', 'test']]
-    )
-    return res
-
-
 scores = score_estimator(
     glm_freq,
     X_train,
@@ -425,20 +426,21 @@ def score_estimator(
 # Overall, the drivers age (``DrivAge``) has a weak impact on the claim
 # severity, both in observed and predicted data.
 #
-# Pure Premium Modeling via a Product of Frequency and Severity Models
-# --------------------------------------------------------------------
+# Pure Premium Modeling via a Product of Frequency and Severity
+# -------------------------------------------------------------
 # As mentioned in the introduction, the total claim amount per unit of
-# exposure can be modeled either as the product of the frequency model by the
-# severity model.
+# exposure can be modeled either as the product of the prediction of the
+# frequency model by the prediction of the severity model.
 #
 # To quantify the aggregate performance of this product model, one can compute
-# the deviance of Tweedie distribution which is equivalent to a com.
-# In the following code sample, the ``score_estimator`` is extended to score
-# such a model.
+# the mean deviance of the train and test data assuming a Compound
+# Poisson-Gamma distribution of the total claim amount. This is equivalent to
+# a Tweedie distribution with "power" parameter between 1 and 2.
 #
-# The mean deviance is computed assuming a Tweedie distribution with a fixed
-# grid of values for the power parameter to be comparable with the model from
-# the following section:
+# As we do not know the true value of the "power" parameter, we compute the
+# mean deviances for a grid of possible values of the "power" parameter,
+# hoping that a good model for one value of "power" will stay a good model for
+# another:
 
 tweedie_powers = [1.5, 1.7, 1.8, 1.9, 1.99, 1.999, 1.9999]
 scores = score_estimator(
@@ -487,9 +489,8 @@ def score_estimator(
 
 ##############################################################################
 #
-# In this example, the mean absolute error is lower for the Compound Poisson
-# Gamma model than when using the product of the predictions of separate
-# models for frequency and severity.
+# In this example, both modeling approaches yield comparable performance
+# metrics.
 #
 # We can additionally validate these models by comparing observed and
 # predicted total claim amount over the test and train subsets. We see that,

From ddae396ddc636c65f9ff53be8ce61373e477f4b5 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Tue, 15 Oct 2019 16:20:47 -0400
Subject: [PATCH 209/269] doc update, simplification and fixes

---
 doc/modules/classes.rst          |   5 +-
 doc/modules/linear_model.rst     | 179 +++++++++++--------------------
 sklearn/linear_model/_glm/glm.py |  96 ++++++-----------
 3 files changed, 97 insertions(+), 183 deletions(-)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index bb62b47945e6e..adbd960dfbb6a 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -857,9 +857,8 @@ Any estimator using the Huber loss would also be robust to outliers, e.g.
 Generalized linear models (GLM) for regression
 ----------------------------------------------
 
-A generalization of linear models that allows for response variables to
-have error distribution other than a normal distribution is implemented
-in the following models,
+These models allow for response variables to have error distribution other
+than a normal distribution:
 
 .. autosummary::
    :toctree: generated/
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 3119b9b0db94b..b930a0d2a8106 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -907,99 +907,74 @@ Generalized Linear Models (GLM) extend linear models in two ways
 combination of the input variables :math:`X` via an inverse link function
 :math:`h` as
 
-.. math::    \hat{y}(w, X) = h(x^\top w) = h(w_0 + w_1 X_1 + ... + w_p X_p).
+.. math::    \hat{y}(w, X) = h(Xw).
 
-Secondly, the squared loss function is replaced by the unit deviance :math:`d`
-of a reproductive exponential dispersion model (EDM) [11]_. The minimization
-problem becomes
+Secondly, the squared loss function is replaced by the unit deviance
+:math:`d` of a distribution in the exponential family (or more precisely, a
+reproductive exponential dispersion model (EDM) [11]_).
 
-.. math::    \min_{w} \frac{1}{2 \sum_i s_i} \sum_i s_i \cdot d(y_i, \hat{y}(w, X_i)) + \frac{\alpha}{2} ||w||_2
+The minimization problem becomes:
 
-with sample weights :math:`s_i`, and L2 regularization penalty :math:`\alpha`.
-The unit deviance is defined by the log of the :math:`\mathrm{EDM}(\mu, \phi)`
-likelihood as
+.. math::    \min_{w} \frac{1}{2 \cdot n\text{_samples}} \sum_i d(y_i, \hat{y}_i) + \frac{\alpha}{2} ||w||_2,
 
-.. math::     d(y, \mu) = -2\phi\cdot
-              \left( \log p(y|\mu,\phi)
-              - \log p(y|y,\phi)\right).
+where :math:`\alpha` is the L2 regularization penalty. When sample weights are
+provided, the average becomes a weighted average.
 
-The following table lists some specific EDM distributions—all are instances of Tweedie
-distributions—and some of their properties.
+The following table lists some specific EDMs and their unit deviance (all of
+these are instances of the Tweedie family):
 
-================= ===============================  ====================================== ============================================
-Distribution       Target Domain                    Unit Variance Function :math:`v(\mu)`  Unit Deviance :math:`d(y, \mu)`
-================= ===============================  ====================================== ============================================
-Normal            :math:`y \in (-\infty, \infty)`  :math:`1`                              :math:`(y-\mu)^2`
-Poisson           :math:`y \in [0, \infty)`        :math:`\mu`                            :math:`2(y\log\frac{y}{\mu}-y+\mu)`
-Gamma             :math:`y \in (0, \infty)`        :math:`\mu^2`                          :math:`2(\log\frac{\mu}{y}+\frac{y}{\mu}-1)`
-Inverse Gaussian  :math:`y \in (0, \infty)`        :math:`\mu^3`                          :math:`\frac{(y-\mu)^2}{y\mu^2}`
-================= ===============================  ====================================== ============================================
+================= ===============================  ============================================
+Distribution       Target Domain                    Unit Deviance :math:`d(y, \hat{y})`
+================= ===============================  ============================================
+Normal            :math:`y \in (-\infty, \infty)`  :math:`(y-\hat{y})^2`
+Poisson           :math:`y \in [0, \infty)`        :math:`2(y\log\frac{y}{\hat{y}}-y+\hat{y})`
+Gamma             :math:`y \in (0, \infty)`        :math:`2(\log\frac{\hat{y}}{y}+\frac{y}{\hat{y}}-1)`
+Inverse Gaussian  :math:`y \in (0, \infty)`        :math:`\frac{(y-\hat{y})^2}{y\hat{y}^2}`
+================= ===============================  ============================================
 
+The choice of the distribution depends on the problem at hand:
 
-Usage
------
-
-A GLM loss different from the classical squared loss might be appropriate in
-the following cases:
-
-  * If the target values :math:`y` are counts (non-negative integer valued) or
-    frequencies (non-negative), you might use a Poisson deviance with log-link.
-
-  * If the target values are positive valued and skewed, you might try a
-    Gamma deviance with log-link.
-
-  * If the target values seem to be heavier tailed than a Gamma distribution,
-    you might try an Inverse Gaussian deviance (or even higher variance powers
-    of the Tweedie family).
-
-Since the linear predictor :math:`x^\top w` can be negative and
-Poisson, Gamma and Inverse Gaussian distributions don't support negative values,
-it is convenient to apply a link function different from the identity link
-:math:`h(x^\top w)=x^\top w` that guarantees the non-negativeness, e.g. the
-log-link `link='log'` with :math:`h(x^\top w)=\exp(x^\top w)`.
-
-:class:`TweedieRegressor` implements a generalized linear model
-for the Tweedie distribution, that allows to model any of the above mentioned
-distributions using the appropriate ``power`` parameter, i.e. the exponent
-of the unit variance function:
-
- - ``power = 0``: Normal distribution. Specialized solvers such as
-   :class:`Ridge`, :class:`ElasticNet` are generally
-   more appropriate in this case.
+* If the target values :math:`y` are counts (non-negative integer valued) or
+  relative frequencies (non-negative), you might use a Poisson deviance
+  with log-link.
+* If the target values are positive valued and skewed, you might try a
+  Gamma deviance with log-link.
+* If the target values seem to be heavier tailed than a Gamma distribution,
+  you might try an Inverse Gaussian deviance (or even higher variance powers
+  of the Tweedie family).
 
- - ``power = 1``: Poisson distribution. :class:`PoissonRegressor` is exposed for
-   convenience. However, it is strictly equivalent to
-   `TweedieRegressor(power=1)`.
 
- - ``power = 2``: Gamma distribution. :class:`GammaRegressor` is exposed for
-   convenience. However, it is strictly equivalent to
-   `TweedieRegressor(power=2)`.
+.. topic:: References:
 
- - ``power = 3``: Inverse Gamma distribution.
+    .. [10] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models,
+       Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
 
+    .. [11] Jørgensen, B. (1992). The theory of exponential dispersion models
+       and analysis of deviance. Monografias de matemática, no. 51.  See also
+       `Exponential dispersion model.
+       <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
 
-.. note::
+Usage
+-----
 
-   * The feature matrix `X` should be standardized before fitting. This
-     ensures that the penalty treats features equally.
-   * If you want to model a relative frequency, i.e. counts per exposure (time,
-     volume, ...) you can do so by a Poisson distribution and passing
-     :math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values
-     together with :math:`s=\mathrm{exposure}` as sample weights.
+:class:`TweedieRegressor` implements a generalized linear model for the
+Tweedie distribution, that allows to model any of the above mentioned
+distributions using the appropriate ``power`` parameter. In particular:
 
-     As an example, consider Poisson distributed counts z (integers) and
-     weights s=exposure (time, money, persons years, ...). Then you fit
-     y = z/s, i.e. ``PoissonRegressor.fit(X, y, sample_weight=s)``.
-     The weights are necessary for the right (finite sample) mean.
-     Considering :math:`\bar{y} = \frac{\sum_i s_i y_i}{\sum_i s_i}`,
-     in this case one might say that y has a 'scaled' Poisson distribution.
-     The same holds for other distributions.
+- ``power = 0``: Normal distribution. Specific estimators such as
+  :class:`Ridge`, :class:`ElasticNet` are generally more appropriate in
+  this case.
+- ``power = 1``: Poisson distribution. :class:`PoissonRegressor` is exposed
+  for convenience. However, it is strictly equivalent to
+  `TweedieRegressor(power=1, link='log')`.
+- ``power = 2``: Gamma distribution. :class:`GammaRegressor` is exposed for
+  convenience. However, it is strictly equivalent to
+  `TweedieRegressor(power=2, link='log')`.
+- ``power = 3``: Inverse Gaussian distribution.
 
-   * The fit itself does not need Y to be from an EDM, but only assumes
-     the first two moments to be :math:`E[Y_i]=\mu_i=h((Xw)_i)` and
-     :math:`Var[Y_i]=\frac{\phi}{s_i} v(\mu_i)`.
+The link function is determined by the `link` parameter.
 
-The estimator can be used as follows::
+Usage example::
 
     >>> from sklearn.linear_model import TweedieRegressor
     >>> reg = TweedieRegressor(power=1, alpha=0.5, link='log')
@@ -1016,49 +991,25 @@ The estimator can be used as follows::
   * :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py`
   * :ref:`sphx_glr_auto_examples_linear_model_plot_poisson_regression_non_normal_loss.py`
 
-Mathematical formulation
+Practical considerations
 ------------------------
 
-In the unpenalized case, the assumptions are the following:
-
-    * The target values :math:`y_i` are realizations of random variables
-      :math:`Y_i \overset{i.i.d}{\sim} \mathrm{EDM}(\mu_i, \frac{\phi}{s_i})`
-      with expectation :math:`\mu_i=\mathrm{E}[Y]`, dispersion parameter
-      :math:`\phi` and sample weights :math:`s_i`.
-    * The aim is to predict the expectation :math:`\mu_i` with
-      :math:`\hat{y}_i = h(\eta_i)`, linear predictor
-      :math:`\eta_i=(Xw)_i` and inverse link function :math:`h`.
-
-Note that the first assumption implies
-:math:`\mathrm{Var}[Y_i]=\frac{\phi}{s_i} v(\mu_i)` with unit variance
-function :math:`v(\mu)`. Specifying a particular distribution of an EDM is the
-same as specifying a unit variance function (they are one-to-one).
-
-A few remarks:
-
-* The deviance is independent of :math:`\phi`. Therefore, also the estimation
-  of the coefficients :math:`w` is independent of the dispersion parameter of
-  the EDM.
-* The minimization is equivalent to (penalized) maximum likelihood estimation.
-* The deviances for at least Normal, Poisson and Gamma distributions are
-  strictly consistent scoring functions for the mean :math:`\mu`, see Eq.
-  (19)-(20) in [12]_. This means that, given an appropriate feature matrix `X`,
-  you get good (asymptotic) estimators for the expectation when using these
-  deviances.
-
+The feature matrix `X` should be standardized before fitting. This ensures
+that the penalty treats features equally.
 
-.. topic:: References:
-
-    .. [10] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models,
-       Second Edition. Boca Raton: Chapman and Hall/CRC. ISBN 0-412-31760-5.
+Since the linear predictor :math:`Xw` can be negative and Poisson,
+Gamma and Inverse Gaussian distributions don't support negative values, it
+is necessary to apply an inverse link function that guarantees the
+non-negativeness. For example with `link='log'`, the inverse link function
+becomes :math:`h(Xw)=\exp(Xw)`.
 
-    .. [11] Jørgensen, B. (1992). The theory of exponential dispersion models
-       and analysis of deviance. Monografias de matemática, no. 51.  See also
-       `Exponential dispersion model.
-       <https://en.wikipedia.org/wiki/Exponential_dispersion_model>`_
+If you want to model a relative frequency, i.e. counts per exposure (time,
+volume, ...) you can do so by using a Poisson distribution and passing
+:math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values
+together with :math:`s=\mathrm{exposure}` as sample weights. For a concrete
+example see e.g.
+:ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py`.
 
-    .. [12] Gneiting, T. (2010). `Making and Evaluating Point Forecasts.
-       <https://arxiv.org/pdf/0912.0902.pdf>`_
 
 Stochastic Gradient Descent - SGD
 =================================
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index b29dcd89a35a6..557184fdd5c85 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -67,9 +67,9 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     Parameters
     ----------
     alpha : float, default=1
-        Constant that multiplies the penalty terms and thus determines the
-        regularization strength.  ``alpha = 0`` is equivalent to unpenalized
-        GLMs. In this case, the design matrix X must have full column rank
+        Constant that multiplies the penalty term and thus determines the
+        regularization strength. ``alpha = 0`` is equivalent to unpenalized
+        GLMs. In this case, the design matrix `X` must have full column rank
         (no collinearities).
 
     fit_intercept : bool, default=True
@@ -81,15 +81,13 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         The distributional assumption of the GLM, i.e. which distribution from
         the EDM, specifies the loss function to be minimized.
 
-    link : {'auto', 'identity', 'log'} or an instance of class BaseLink, \
-            default='auto'
+    link : {'auto', 'identity', 'log'}, default='auto'
         The link function of the GLM, i.e. mapping from linear predictor
-        (X*coef) to expectation (y_pred). Option 'auto' sets the link
-        depending on the chosen family as follows:
-
-        - 'identity' for family 'normal'
+        `Xw` to prediction `y_pred`. Option 'auto' sets the link depending
+        on the chosen family as follows:
 
-        - 'log' for families 'poisson', 'gamma', 'inverse-gaussian'
+        - 'identity' for Normal distribution
+        - 'log' for Poisson,  Gamma and Inverse Gaussian distributions
 
     solver : 'lbfgs', default='lbfgs'
         Algorithm to use in the optimization problem:
@@ -155,12 +153,7 @@ def fit(self, X, y, sample_weight=None):
             Target values.
 
         sample_weight : array-like of shape (n_samples,), default=None
-            Individual weights w_i for each sample. Note that for an
-            Exponential Dispersion Model (EDM), one has
-            Var[Y_i]=phi/w_i * v(y_pred).
-            If Y_i ~ EDM(y_pred, phi/w_i), then
-            sum(w*Y)/sum(w) ~ EDM(y_pred, phi/sum(w)), i.e. the mean of y is a
-            weighted average with weights=sample_weight.
+            Sample weights.
 
         Returns
         -------
@@ -400,25 +393,16 @@ def _more_tags(self):
 
 
 class PoissonRegressor(GeneralizedLinearRegressor):
-    """Regression with the response variable y following a Poisson distribution
-
-    GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
-    fitting and predicting the mean of the target y as y_pred=h(X*w).
-    The fit minimizes the following objective function with L2 regularization::
-
-            1/(2*sum(s)) * deviance(y, h(X*w); s) + 1/2 * alpha * ||w||_2^2
-
-    with inverse link function h and s=sample_weight. Note that for
-    ``sample_weight=None``, one has s_i=1 and sum(s)=n_samples).
+    """Generalized Linear Model with a Poisson distribution.
 
     Read more in the :ref:`User Guide <Generalized_linear_regression>`.
 
     Parameters
     ----------
     alpha : float, default=1
-        Constant that multiplies the penalty terms and thus determines the
-        regularization strength.  ``alpha = 0`` is equivalent to unpenalized
-        GLMs. In this case, the design matrix X must have full column rank
+        Constant that multiplies the penalty term and thus determines the
+        regularization strength. ``alpha = 0`` is equivalent to unpenalized
+        GLMs. In this case, the design matrix `X` must have full column rank
         (no collinearities).
 
     fit_intercept : bool, default=True
@@ -479,25 +463,16 @@ def family(self, value):
 
 
 class GammaRegressor(GeneralizedLinearRegressor):
-    """Regression with the response variable y following a Gamma distribution
-
-    GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
-    fitting and predicting the mean of the target y as y_pred=h(X*w).
-    The fit minimizes the following objective function with L2 regularization::
-
-            1/(2*sum(s)) * deviance(y, h(X*w); s) + 1/2 * alpha * ||w||_2^2
-
-    with inverse link function h and s=sample_weight. Note that for
-    ``sample_weight=None``, one has s_i=1 and sum(s)=n_samples).
+    """Generalized Linear Model with a Gamma distribution.
 
     Read more in the :ref:`User Guide <Generalized_linear_regression>`.
 
     Parameters
     ----------
     alpha : float, default=1
-        Constant that multiplies the penalty terms and thus determines the
-        regularization strength.  ``alpha = 0`` is equivalent to unpenalized
-        GLMs. In this case, the design matrix X must have full column rank
+        Constant that multiplies the penalty term and thus determines the
+        regularization strength. ``alpha = 0`` is equivalent to unpenalized
+        GLMs. In this case, the design matrix `X` must have full column rank
         (no collinearities).
 
     fit_intercept : bool, default=True
@@ -558,30 +533,18 @@ def family(self, value):
 
 
 class TweedieRegressor(GeneralizedLinearRegressor):
-    r"""Regression with the response variable y following a Tweedie distribution
-
-    GLMs based on a reproductive Exponential Dispersion Model (EDM) aim at
-    fitting and predicting the mean of the target y as y_pred=h(X*w).
-    The fit minimizes the following objective function with L2 regularization::
-
-            1/(2*sum(s)) * deviance(y, h(X*w); s) + 1/2 * alpha * ||w||_2^2
+    """Generalized Linear Model with a Tweedie distribution.
 
-    with inverse link function h and s=sample_weight. Note that for
-    ``sample_weight=None``, one has s_i=1 and sum(s)=n_samples).
+    This estimator can be used to model different GLMs depending on the
+    ``power`` parameter, which determines the underlying distribution.
 
     Read more in the :ref:`User Guide <Generalized_linear_regression>`.
 
     Parameters
     ----------
     power : float, default=0
-            The power determines the underlying target distribution. By
-            definition it links distribution variance (:math:`v`) and
-            mean (:math:`\y_\textrm{pred}`):
-            :math:`v(\y_\textrm{pred}) = \y_\textrm{pred}^{power}`.
-
-            For ``0 < power < 1``, no distribution exists.
-
-            Special cases are:
+            The power determines the underlying target distribution according
+            to the following table:
 
             +-------+------------------------+
             | Power | Distribution           |
@@ -597,20 +560,21 @@ class TweedieRegressor(GeneralizedLinearRegressor):
             | 3     | Inverse Gaussian       |
             +-------+------------------------+
 
+            For ``0 < power < 1``, no distribution exists.
+
     alpha : float, default=1
-        Constant that multiplies the penalty terms and thus determines the
-        regularization strength.  ``alpha = 0`` is equivalent to unpenalized
-        GLMs. In this case, the design matrix X must have full column rank
+        Constant that multiplies the penalty term and thus determines the
+        regularization strength. ``alpha = 0`` is equivalent to unpenalized
+        GLMs. In this case, the design matrix `X` must have full column rank
         (no collinearities).
 
     link : {'auto', 'identity', 'log'}, default='auto'
         The link function of the GLM, i.e. mapping from linear predictor
-        (X*coef) to expectation (y_pred). Option 'auto' sets the link
-        depending on the chosen family as follows:
+        `Xw` to prediction `y_pred`. Option 'auto' sets the link depending
+        on the chosen family as follows:
 
         - 'identity' for Normal distribution
-
-        - 'log' for Poisson,  Gamma or Inverse Gaussian distributions
+        - 'log' for Poisson,  Gamma and Inverse Gaussian distributions
 
     fit_intercept : bool, default=True
         Specifies if a constant (a.k.a. bias or intercept) should be

From 21572d93e07e2352e2b9e25fc044d4f03df607a9 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Tue, 15 Oct 2019 16:23:22 -0400
Subject: [PATCH 210/269] put back doc for BaseLink removed by mistake

---
 sklearn/linear_model/_glm/glm.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 557184fdd5c85..9b17f1814a497 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -81,7 +81,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         The distributional assumption of the GLM, i.e. which distribution from
         the EDM, specifies the loss function to be minimized.
 
-    link : {'auto', 'identity', 'log'}, default='auto'
+    link : {'auto', 'identity', 'log'} or an instance of class BaseLink, \
+            default='auto'
         The link function of the GLM, i.e. mapping from linear predictor
         `Xw` to prediction `y_pred`. Option 'auto' sets the link depending
         on the chosen family as follows:

From d7ff6f44f4b5e477e6887850c4fcd9a5f80ea043 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 17 Oct 2019 11:10:42 +0200
Subject: [PATCH 211/269] TST GLM/Ridge comparison with sample_weight (xfail
 for now)

---
 sklearn/linear_model/_glm/tests/test_glm.py | 39 ++++++++++++++-------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index c0ff6508db9c9..cf6681f56acde 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -257,37 +257,52 @@ def test_warm_start(fit_intercept):
 
 @pytest.mark.parametrize('n_samples, n_features', [(100, 10), (10, 100)])
 @pytest.mark.parametrize('fit_intercept', [True, False])
-def test_normal_ridge_comparison(n_samples, n_features, fit_intercept):
+@pytest.mark.parametrize('sample_weight', [None, pytest.mark.xfail('rand')])
+def test_normal_ridge_comparison(n_samples, n_features, fit_intercept,
+                                 sample_weight, request):
     """Compare with Ridge regression for Normal distributions."""
-    alpha = 1.0
     test_size = 10
     X, y = make_regression(n_samples=n_samples + test_size,
                            n_features=n_features,
                            n_informative=n_features-2, noise=0.5,
                            random_state=42)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=test_size, random_state=0
-    )
 
     if n_samples > n_features:
         ridge_params = {"solver": "svd"}
     else:
-        ridge_params = {"solver": "sag", "max_iter": 10000, "tol": 1e-9}
+        ridge_params = {"solver": "saga", "max_iter": 1000000, "tol": 1e-9}
+
+    X_train, X_test, y_train, y_test, = train_test_split(
+        X, y, test_size=test_size, random_state=0
+    )
+
+    if sample_weight is None:
+        alpha = 1.0
+        sw_train = None
+    else:
+        sw_train = np.random.RandomState(0).rand(len(y_train))
+        alpha = 0.0
+        sw_train /= sw_train.sum()
+        request.applymarker(pytest.mark.xfail(
+            run=False, reason=('TODO: GLM / Ridge comparison with '
+                               'sample_weight should be fixed')))
 
     # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
     ridge = Ridge(alpha=alpha*n_samples, normalize=False,
-                  random_state=42, **ridge_params)
-    ridge.fit(X_train, y_train)
+                  random_state=42, fit_intercept=fit_intercept,
+                  **ridge_params)
+    ridge.fit(X_train, y_train, sample_weight=sw_train)
 
     glm = GeneralizedLinearRegressor(alpha=1.0, family='normal',
-                                     link='identity', fit_intercept=True,
+                                     link='identity',
+                                     fit_intercept=fit_intercept,
                                      max_iter=300)
-    glm.fit(X_train, y_train)
+    glm.fit(X_train, y_train, sample_weight=sw_train)
     assert glm.coef_.shape == (X.shape[1], )
     assert_allclose(glm.coef_, ridge.coef_, atol=5e-5)
     assert_allclose(glm.intercept_, ridge.intercept_, rtol=1e-5)
-    assert_allclose(glm.predict(X_train), ridge.predict(X_train), rtol=5e-5)
-    assert_allclose(glm.predict(X_test), ridge.predict(X_test), rtol=5e-5)
+    assert_allclose(glm.predict(X_train), ridge.predict(X_train), rtol=2e-4)
+    assert_allclose(glm.predict(X_test), ridge.predict(X_test), rtol=2e-4)
 
 
 def test_poisson_glmnet():

From 939d24012088df5d26693a06a8dc991c3af0d4fd Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 17 Oct 2019 11:36:17 +0200
Subject: [PATCH 212/269] TST More invariance checks for sample_weight

---
 sklearn/linear_model/_glm/tests/test_glm.py | 28 ++++++++++++++++++---
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index cf6681f56acde..e526db5002ca2 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -175,16 +175,20 @@ def test_glm_identity_regression():
     assert_allclose(glm.coef_, coef, rtol=1e-6)
 
 
-def test_glm_sample_weight_consistentcy():
+@pytest.mark.parametrize('fit_intercept', [False, True])
+@pytest.mark.parametrize('alpha', [0.0, 1.0])
+@pytest.mark.parametrize('family', ['normal', 'poisson', 'gamma'])
+def test_glm_sample_weight_consistentcy(fit_intercept, alpha, family):
     """Test that the impact of sample_weight is consistent"""
     rng = np.random.RandomState(0)
     n_samples, n_features = 10, 5
 
     X = rng.rand(n_samples, n_features)
     y = rng.rand(n_samples)
-    glm = GeneralizedLinearRegressor(alpha=0, family='normal', link='identity',
-                                     fit_intercept=False)
-    glm.fit(X, y)
+    glm_params = dict(alpha=alpha, family=family, link='auto',
+                      fit_intercept=fit_intercept)
+
+    glm = GeneralizedLinearRegressor(**glm_params).fit(X, y)
     coef = glm.coef_.copy()
 
     # sample_weight=np.ones(..) should be equivalent to sample_weight=None
@@ -206,6 +210,22 @@ def test_glm_sample_weight_consistentcy():
     glm.fit(X[:-1], y[:-1])
     assert_allclose(glm.coef_, coef1, rtol=1e-6)
 
+    # check that multiplying sample_weight by 2 is equivalent
+    # to repeating correspoding samples twice
+    X2 = np.concatenate([X, X[:n_samples//2]], axis=0)
+    y2 = np.concatenate([y, y[:n_samples//2]])
+    sample_weight_1 = np.ones(len(y))
+    sample_weight_1[:n_samples//2] = 2
+
+    glm1 = GeneralizedLinearRegressor(**glm_params).fit(
+            X, y, sample_weight=sample_weight_1
+    )
+
+    glm2 = GeneralizedLinearRegressor(**glm_params).fit(
+            X2, y2, sample_weight=None
+    )
+    assert_allclose(glm1.coef_, glm2.coef_)
+
 
 @pytest.mark.parametrize(
     'family',

From da0d2a6f7fc372f7ae6b3daf359e8d1a4000a57a Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Thu, 17 Oct 2019 12:06:59 +0200
Subject: [PATCH 213/269] Remove copy_X parameter

---
 sklearn/linear_model/_glm/glm.py            | 34 +++++----------------
 sklearn/linear_model/_glm/tests/test_glm.py | 10 ------
 2 files changed, 8 insertions(+), 36 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 9b17f1814a497..9135cd2392952 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -109,9 +109,6 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
         If set to ``True``, reuse the solution of the previous call to ``fit``
         as initialization for ``coef_`` and ``intercept_``.
 
-    copy_X : bool, default=True
-        If ``True``, X will be copied; else, it may be overwritten.
-
     verbose : int, default=0
         For the lbfgs solver set verbose to any positive number for verbosity.
 
@@ -130,7 +127,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     def __init__(self, *, alpha=1.0,
                  fit_intercept=True, family='normal', link='auto',
                  solver='lbfgs', max_iter=100, tol=1e-4, warm_start=False,
-                 copy_X=True, verbose=0):
+                 verbose=0):
         self.alpha = alpha
         self.fit_intercept = fit_intercept
         self.family = family
@@ -139,7 +136,6 @@ def __init__(self, *, alpha=1.0,
         self.max_iter = max_iter
         self.tol = tol
         self.warm_start = warm_start
-        self.copy_X = copy_X
         self.verbose = verbose
 
     def fit(self, X, y, sample_weight=None):
@@ -219,16 +215,13 @@ def fit(self, X, y, sample_weight=None):
         if not isinstance(self.warm_start, bool):
             raise ValueError("The argument warm_start must be bool;"
                              " got {0}".format(self.warm_start))
-        if not isinstance(self.copy_X, bool):
-            raise ValueError("The argument copy_X must be bool;"
-                             " got {0}".format(self.copy_X))
 
         family = self._family_instance
         link = self._link_instance
 
         X, y = check_X_y(X, y, accept_sparse=['csc', 'csr'],
                          dtype=[np.float64, np.float32],
-                         y_numeric=True, multi_output=False, copy=self.copy_X)
+                         y_numeric=True, multi_output=False)
 
         weights = _check_sample_weight(sample_weight, X)
 
@@ -423,9 +416,6 @@ class PoissonRegressor(GeneralizedLinearRegressor):
         If set to ``True``, reuse the solution of the previous call to ``fit``
         as initialization for ``coef_`` and ``intercept_`` .
 
-    copy_X : bool, default=True
-        If ``True``, X will be copied; else, it may be overwritten.
-
     verbose : int, default=0
         For the lbfgs solver set verbose to any positive number for verbosity.
 
@@ -442,12 +432,11 @@ class PoissonRegressor(GeneralizedLinearRegressor):
         Actual number of iterations used in the solver.
     """
     def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,
-                 tol=1e-4, warm_start=False, copy_X=True, verbose=0):
+                 tol=1e-4, warm_start=False, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family="poisson", link='log', max_iter=max_iter,
-                         tol=tol, warm_start=warm_start, copy_X=copy_X,
-                         verbose=verbose)
+                         tol=tol, warm_start=warm_start, verbose=verbose)
 
     @property
     def family(self):
@@ -493,9 +482,6 @@ class GammaRegressor(GeneralizedLinearRegressor):
         If set to ``True``, reuse the solution of the previous call to ``fit``
         as initialization for ``coef_`` and ``intercept_`` .
 
-    copy_X : bool, default=True
-        If ``True``, X will be copied; else, it may be overwritten.
-
     verbose : int, default=0
         For the lbfgs solver set verbose to any positive number for verbosity.
 
@@ -512,12 +498,11 @@ class GammaRegressor(GeneralizedLinearRegressor):
         Actual number of iterations used in the solver.
     """
     def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,
-                 tol=1e-4, warm_start=False, copy_X=True, verbose=0):
+                 tol=1e-4, warm_start=False, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family="gamma", link='log', max_iter=max_iter,
-                         tol=tol, warm_start=warm_start, copy_X=copy_X,
-                         verbose=verbose)
+                         tol=tol, warm_start=warm_start, verbose=verbose)
 
     @property
     def family(self):
@@ -594,9 +579,6 @@ class TweedieRegressor(GeneralizedLinearRegressor):
         If set to ``True``, reuse the solution of the previous call to ``fit``
         as initialization for ``coef_`` and ``intercept_`` .
 
-    copy_X : bool, default=True
-        If ``True``, X will be copied; else, it may be overwritten.
-
     verbose : int, default=0
         For the lbfgs solver set verbose to any positive number for verbosity.
 
@@ -614,12 +596,12 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     """
     def __init__(self, *, power=0.0, alpha=1.0, fit_intercept=True,
                  link='auto', max_iter=100, tol=1e-4,
-                 warm_start=False, copy_X=True, verbose=0):
+                 warm_start=False, verbose=0):
 
         super().__init__(alpha=alpha, fit_intercept=fit_intercept,
                          family=TweedieDistribution(power=power), link=link,
                          max_iter=max_iter, tol=tol,
-                         warm_start=warm_start, copy_X=copy_X, verbose=verbose)
+                         warm_start=warm_start, verbose=verbose)
 
     @property
     def family(self):
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index e526db5002ca2..5bec1fb6f493a 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -154,16 +154,6 @@ def test_glm_warm_start_argument(warm_start):
         glm.fit(X, y)
 
 
-@pytest.mark.parametrize('copy_X', ['not bool', 1, 0, [True]])
-def test_glm_copy_X_argument(copy_X):
-    """Test GLM for invalid copy_X arguments."""
-    y = np.array([1, 2])
-    X = np.array([[1], [1]])
-    glm = GeneralizedLinearRegressor(copy_X=copy_X)
-    with pytest.raises(ValueError, match="copy_X must be bool"):
-        glm.fit(X, y)
-
-
 def test_glm_identity_regression():
     """Test GLM regression with identity link on a simple dataset."""
     coef = [1., 2.]

From 162fb3b23c5132eeca7d0fb020a9b00d84ad1352 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Thu, 17 Oct 2019 20:08:23 +0200
Subject: [PATCH 214/269] Minor doc improvements

---
 doc/modules/linear_model.rst                              | 4 ++--
 .../plot_poisson_regression_non_normal_loss.py            | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index b930a0d2a8106..d9cbce4eebe8b 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -915,7 +915,7 @@ reproductive exponential dispersion model (EDM) [11]_).
 
 The minimization problem becomes:
 
-.. math::    \min_{w} \frac{1}{2 \cdot n\text{_samples}} \sum_i d(y_i, \hat{y}_i) + \frac{\alpha}{2} ||w||_2,
+.. math::    \min_{w} \frac{1}{2 n_{\text{samples}}} \sum_i d(y_i, \hat{y}_i) + \frac{\alpha}{2} ||w||_2,
 
 where :math:`\alpha` is the L2 regularization penalty. When sample weights are
 provided, the average becomes a weighted average.
@@ -988,8 +988,8 @@ Usage example::
 
 .. topic:: Examples:
 
-  * :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py`
   * :ref:`sphx_glr_auto_examples_linear_model_plot_poisson_regression_non_normal_loss.py`
+  * :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py`
 
 Practical considerations
 ------------------------
diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 0e948873da570..e84a60eb3d8ee 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -80,10 +80,10 @@ def load_mtpl2(n_samples=100000):
 #
 # The remaining columns can be used to predict the frequency of claim events.
 # Those columns are very heterogeneous with a mix of categorical and numeric
-# variables with different scales, possibly with heavy tails.
+# variables with different scales, possibly very uneven distributed.
 #
 # In order to fit linear models with those predictors it is therefore
-# necessary to perform standard feature transformation as follows:
+# necessary to perform standard feature transformations as follows:
 
 log_scale_transformer = make_pipeline(
     FunctionTransformer(np.log, validate=False),
@@ -128,8 +128,8 @@ def load_mtpl2(n_samples=100000):
 
 ##############################################################################
 #
-# It worth noting that 92 % of policyholders have zero claims, and if we were
-# to convert this problem into a binary classification task, it would be
+# It is worth noting that 92 % of policyholders have zero claims, and if we
+# were to convert this problem into a binary classification task, it would be
 # significantly imbalanced.
 #
 # To evaluate the pertinence of the used metrics, we will consider as a

From cafc92f0b55307d3dc0f0d9dedefcad1bf73d482 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Thu, 17 Oct 2019 20:13:59 +0200
Subject: [PATCH 215/269] DOC consistent coef_=w for OMP in user guide

---
 doc/modules/linear_model.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index d9cbce4eebe8b..7b4e3ca9d6750 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -525,13 +525,13 @@ orthogonal matching pursuit can approximate the optimum solution vector with a
 fixed number of non-zero elements:
 
 .. math::
-    \underset{\gamma}{\operatorname{arg\,min\,}}  ||y - X\gamma||_2^2 \text{ subject to } ||\gamma||_0 \leq n_{\text{nonzero\_coefs}}
+    \underset{w}{\operatorname{arg\,min\,}}  ||y - Xw||_2^2 \text{ subject to } ||w||_0 \leq n_{\text{nonzero\_coefs}}
 
 Alternatively, orthogonal matching pursuit can target a specific error instead
 of a specific number of non-zero coefficients. This can be expressed as:
 
 .. math::
-    \underset{\gamma}{\operatorname{arg\,min\,}} ||\gamma||_0 \text{ subject to } ||y-X\gamma||_2^2 \leq \text{tol}
+    \underset{w}{\operatorname{arg\,min\,}} ||w||_0 \text{ subject to } ||y-Xw||_2^2 \leq \text{tol}
 
 
 OMP is based on a greedy algorithm that includes at each step the atom most

From d3235db48a063ba01c5dafa543cba2d0f42b252c Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Thu, 17 Oct 2019 20:31:31 +0200
Subject: [PATCH 216/269] EXA typos

---
 .../plot_tweedie_regression_insurance_claims.py           | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index a95b8f0301663..606de6b2df609 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -235,7 +235,7 @@ def score_estimator(
 # in their portfolio:
 df["PurePremium"] = df["ClaimAmount"] / df["Exposure"]
 
-# This can be inderectly approximated by a 2-step modeling the product of the
+# This can be indirectly approximated by a 2-step modeling: the product of the
 # Frequency times the average claim amount per claim:
 df["Frequency"] = df["ClaimNb"] / df["Exposure"]
 df["AvgClaimAmount"] = df["ClaimAmount"] / np.fmax(df["ClaimNb"], 1)
@@ -258,7 +258,7 @@ def score_estimator(
 
 # The parameters of the model are estimated by minimizing the Poisson deviance
 # on the training set via a quasi-Newton solver: l-BFGS. Some of the features
-# are colinear, we use a weak penalization to avoid numerical issues.
+# are collinear, we use a weak penalization to avoid numerical issues.
 glm_freq = PoissonRegressor(alpha=1e-3)
 glm_freq.fit(X_train, df_train["Frequency"],
              sample_weight=df_train["Exposure"])
@@ -461,14 +461,14 @@ def score_estimator(
 # Pure Premium Modeling Using a Single Compound Poisson Gamma Model
 # -----------------------------------------------------------------
 # Instead of taking the product of two independently fit models for frequency
-# and severity one can directly model the total loss is with a unique Compound
+# and severity one can directly model the total loss with a unique Compound
 # Poisson Gamma generalized linear model (with a log link function). This
 # model is a special case of the Tweedie GLM with a "power" parameter :math:`p
 # \in (1, 2)`.
 #
 # Here we fix apriori the "power" parameter of the Tweedie model to some
 # arbitrary value in the valid range. Ideally one would select this value via
-# grid-search by minimizing the negative log-likelihood of the Tweedie model
+# grid-search by minimizing the negative log-likelihood of the Tweedie model,
 # but unfortunately the current implementation does not allow for this (yet).
 
 glm_pure_premium = TweedieRegressor(power=1.999, alpha=.1, max_iter=10000)

From 9401230d387870a00cf50148aa74b43eccd059ad Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Thu, 17 Oct 2019 20:38:42 +0200
Subject: [PATCH 217/269] EXA set less extreme Tweedie power=1.9

---
 .../linear_model/plot_tweedie_regression_insurance_claims.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 606de6b2df609..02e528e98b130 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -471,7 +471,7 @@ def score_estimator(
 # grid-search by minimizing the negative log-likelihood of the Tweedie model,
 # but unfortunately the current implementation does not allow for this (yet).
 
-glm_pure_premium = TweedieRegressor(power=1.999, alpha=.1, max_iter=10000)
+glm_pure_premium = TweedieRegressor(power=1.9, alpha=.1, max_iter=10000)
 glm_pure_premium.fit(X_train, df_train["PurePremium"],
                      sample_weight=df_train["Exposure"])
 

From 5bc48e520135ea8ce4ef2a0be156a28cd8d33ce8 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Thu, 17 Oct 2019 21:35:09 +0200
Subject: [PATCH 218/269] TST fix normal_ridge_comparison with sample_weight

---
 sklearn/linear_model/_glm/tests/test_glm.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 5bec1fb6f493a..074c8c90a4898 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -267,7 +267,7 @@ def test_warm_start(fit_intercept):
 
 @pytest.mark.parametrize('n_samples, n_features', [(100, 10), (10, 100)])
 @pytest.mark.parametrize('fit_intercept', [True, False])
-@pytest.mark.parametrize('sample_weight', [None, pytest.mark.xfail('rand')])
+@pytest.mark.parametrize('sample_weight', [None, True])
 def test_normal_ridge_comparison(n_samples, n_features, fit_intercept,
                                  sample_weight, request):
     """Compare with Ridge regression for Normal distributions."""
@@ -280,33 +280,31 @@ def test_normal_ridge_comparison(n_samples, n_features, fit_intercept,
     if n_samples > n_features:
         ridge_params = {"solver": "svd"}
     else:
-        ridge_params = {"solver": "saga", "max_iter": 1000000, "tol": 1e-9}
+        ridge_params = {"solver": "saga", "max_iter": 1000000, "tol": 1e-7}
 
     X_train, X_test, y_train, y_test, = train_test_split(
         X, y, test_size=test_size, random_state=0
     )
 
+    alpha = 1.0
     if sample_weight is None:
-        alpha = 1.0
         sw_train = None
+        alpha_ridge = alpha * n_samples
     else:
         sw_train = np.random.RandomState(0).rand(len(y_train))
-        alpha = 0.0
-        sw_train /= sw_train.sum()
-        request.applymarker(pytest.mark.xfail(
-            run=False, reason=('TODO: GLM / Ridge comparison with '
-                               'sample_weight should be fixed')))
+        alpha_ridge = alpha * sw_train.sum()
 
     # GLM has 1/(2*n) * Loss + 1/2*L2, Ridge has Loss + L2
-    ridge = Ridge(alpha=alpha*n_samples, normalize=False,
+    ridge = Ridge(alpha=alpha_ridge, normalize=False,
                   random_state=42, fit_intercept=fit_intercept,
                   **ridge_params)
     ridge.fit(X_train, y_train, sample_weight=sw_train)
 
-    glm = GeneralizedLinearRegressor(alpha=1.0, family='normal',
+    glm = GeneralizedLinearRegressor(alpha=alpha, family='normal',
                                      link='identity',
                                      fit_intercept=fit_intercept,
-                                     max_iter=300)
+                                     max_iter=300,
+                                     tol=1e-5)
     glm.fit(X_train, y_train, sample_weight=sw_train)
     assert glm.coef_.shape == (X.shape[1], )
     assert_allclose(glm.coef_, ridge.coef_, atol=5e-5)

From e572c3100508ca74abb097d73f36795e404715fd Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 20 Oct 2019 18:38:46 +0200
Subject: [PATCH 219/269] DOC advice against cv for Tweedie power in UG

---
 doc/modules/linear_model.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 7b4e3ca9d6750..4fc65cf668045 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -1010,6 +1010,10 @@ together with :math:`s=\mathrm{exposure}` as sample weights. For a concrete
 example see e.g.
 :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py`.
 
+The `power` parameter of `TweedieRegressor` does not qualify to be optimized
+by cross-validation, because it defines the very scoring criteria itself, i.e.
+:meth:`TweedieRegressor.score`. Without the full likelihood of Tweedie
+distributions at hand, it is suggested to choose it's value a priori.
 
 Stochastic Gradient Descent - SGD
 =================================

From 8d70042c09539af11d4c6283f1b46584473d7bbd Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 20 Oct 2019 19:19:56 +0200
Subject: [PATCH 220/269] DOC improve advice for power cv

---
 doc/modules/linear_model.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 4fc65cf668045..fe5f5966016bd 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -1010,10 +1010,10 @@ together with :math:`s=\mathrm{exposure}` as sample weights. For a concrete
 example see e.g.
 :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py`.
 
-The `power` parameter of `TweedieRegressor` does not qualify to be optimized
-by cross-validation, because it defines the very scoring criteria itself, i.e.
-:meth:`TweedieRegressor.score`. Without the full likelihood of Tweedie
-distributions at hand, it is suggested to choose it's value a priori.
+When performing cross-validation for the `power` parameter of
+`TweedieRegressor`, it is advisable to specify an explicit `scoring` function,
+because the default scorer :meth:`TweedieRegressor.score` is a function of
+`power` itself.
 
 Stochastic Gradient Descent - SGD
 =================================

From c47988993f939b28949e1cbd73946c4d3b426a4c Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 10 Nov 2019 20:28:06 +0100
Subject: [PATCH 221/269] EXA rely on default of FunctionTransformer

Co-Authored-By: Guillaume Lemaitre <g.lemaitre58@gmail.com>
---
 .../linear_model/plot_tweedie_regression_insurance_claims.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 02e528e98b130..ade6912efd80d 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -211,7 +211,7 @@ def score_estimator(
 df["ClaimAmount"] = df["ClaimAmount"].clip(upper=200000)
 
 log_scale_transformer = make_pipeline(
-    FunctionTransformer(np.log, validate=False),
+    FunctionTransformer(func=np.log),
     StandardScaler()
 )
 

From 88d150e77e56175dad823663804f7418e91fd055 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 10 Nov 2019 21:19:49 +0100
Subject: [PATCH 222/269] EXA print all columns of DataFrame with max_columns
 option

---
 .../linear_model/plot_tweedie_regression_insurance_claims.py   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index ade6912efd80d..ab2ca2e8bfc28 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -240,7 +240,8 @@ def score_estimator(
 df["Frequency"] = df["ClaimNb"] / df["Exposure"]
 df["AvgClaimAmount"] = df["ClaimAmount"] / np.fmax(df["ClaimNb"], 1)
 
-print(df[df.ClaimAmount > 0].head())
+with pd.option_context("display.max_columns", 15):
+    print(df[df.ClaimAmount > 0].head())
 
 ##############################################################################
 #

From 0d31f47a1a4b644e281e4002437c4a54234ed88e Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 10 Nov 2019 21:27:11 +0100
Subject: [PATCH 223/269] EXA improve wording Poisson target

---
 .../plot_tweedie_regression_insurance_claims.py      | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index ab2ca2e8bfc28..1edcebcd3609a 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -248,12 +248,12 @@ def score_estimator(
 # Frequency model -- Poisson distribution
 # ---------------------------------------
 #
-# The number of claims (``ClaimNb``) is a positive integer that can be modeled
-# as a Poisson distribution. It is then assumed to be the number of discrete
-# events occuring with a constant rate in a given time interval
-# (``Exposure``, in units of years). Here we model the frequency
-# ``y = ClaimNb / Exposure``, which is still a (scaled) Poisson distribution,
-# and use ``Exposure`` as `sample_weight`.
+# The number of claims (``ClaimNb``) is a positive integer (0 included).
+# Thus, this target can be modelled by a Poisson distribution.
+# It is then assumed to be the number of discrete events occuring with a
+# constant rate in a given time interval (``Exposure``, in units of years).
+# Here we model the frequency ``y = ClaimNb / Exposure``, which is still a
+# (scaled) Poisson distribution, and use ``Exposure`` as `sample_weight`.
 
 df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=0)
 

From 3ab2877685301d9c3ccc42fab314fd455eceb332 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 10 Nov 2019 21:40:51 +0100
Subject: [PATCH 224/269] EXA increase digits in scores

---
 .../linear_model/plot_tweedie_regression_insurance_claims.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 1edcebcd3609a..9570573caaaa9 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -182,7 +182,7 @@ def score_estimator(
         pd.DataFrame(res)
         .set_index(["metric", "subset"])
         .score.unstack(-1)
-        .round(2)
+        .round(4)
         .loc[:, ['train', 'test']]
     )
     return res

From 87de01bcdfdc3ba63779c2f958d37356112e4064 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sun, 10 Nov 2019 21:42:01 +0100
Subject: [PATCH 225/269] EXA convergence issue solve with max_iter

---
 .../linear_model/plot_tweedie_regression_insurance_claims.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 9570573caaaa9..8b8cbe143ddb9 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -260,7 +260,7 @@ def score_estimator(
 # The parameters of the model are estimated by minimizing the Poisson deviance
 # on the training set via a quasi-Newton solver: l-BFGS. Some of the features
 # are collinear, we use a weak penalization to avoid numerical issues.
-glm_freq = PoissonRegressor(alpha=1e-3)
+glm_freq = PoissonRegressor(alpha=1e-3, max_iter=400)
 glm_freq.fit(X_train, df_train["Frequency"],
              sample_weight=df_train["Exposure"])
 

From fbc22d86523373ce2940084c07d50d1760eab8ca Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@gmail.com>
Date: Wed, 29 Jan 2020 23:19:17 +0100
Subject: [PATCH 226/269] Update sklearn/metrics/_regression.py

Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com>
---
 sklearn/metrics/_regression.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 6ba0331ccd9d4..91bf350395fa2 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -761,7 +761,7 @@ def mean_gamma_deviance(y_true, y_pred, sample_weight=None):
 
     Gamma deviance is equivalent to the Tweedie deviance with
     the power parameter `power=2`. It is invariant to scaling of
-    the target variable, and mesures relative errors.
+    the target variable, and measures relative errors.
 
     Read more in the :ref:`User Guide <mean_tweedie_deviance>`.
 

From 27ae4a285e70081738e53bd5907745a388156a7c Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Wed, 29 Jan 2020 23:35:22 +0100
Subject: [PATCH 227/269] DOC Add what's new entry

---
 doc/whats_new/v0.23.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst
index 45219b8346b35..0a70b65809e35 100644
--- a/doc/whats_new/v0.23.rst
+++ b/doc/whats_new/v0.23.rst
@@ -120,6 +120,13 @@ Changelog
 :mod:`sklearn.linear_model`
 ...........................
 
+- |MajorFeature| Added generalized linear models (GLM) with non normal error
+  distributions, including :class:`linear_model.PoissonRegressor`,
+  :class:`linear_model.GammaRegressor` and :class:`linear_model.TweedieRegressor`
+  which use Poisson, Gamma and Tweedie distributions respectively.
+  :pr:`14300` by :user:`Christian Lorentzen <lorentzenchr>`, `Roman Yurchak`_,
+  `Olivier Grisel`_ and `Nicolas Hug`_.
+
 - |Fix| Fixed a bug where if a `sample_weight` parameter was passed to the fit
   method of :class:`linear_model.RANSACRegressor`, it would not be passed to
   the wrapped `base_estimator` during the fitting of the final model.

From 51be7e193e7b7891c13ee70ad65d48a0a6cbf3ee Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Thu, 30 Jan 2020 14:19:06 -0500
Subject: [PATCH 228/269] Removed myself from what's new

---
 doc/whats_new/v0.23.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst
index 0a70b65809e35..ee24943e5f210 100644
--- a/doc/whats_new/v0.23.rst
+++ b/doc/whats_new/v0.23.rst
@@ -125,7 +125,7 @@ Changelog
   :class:`linear_model.GammaRegressor` and :class:`linear_model.TweedieRegressor`
   which use Poisson, Gamma and Tweedie distributions respectively.
   :pr:`14300` by :user:`Christian Lorentzen <lorentzenchr>`, `Roman Yurchak`_,
-  `Olivier Grisel`_ and `Nicolas Hug`_.
+  and `Olivier Grisel`_.
 
 - |Fix| Fixed a bug where if a `sample_weight` parameter was passed to the fit
   method of :class:`linear_model.RANSACRegressor`, it would not be passed to

From 25f0e531ceaaaf55d9124c127304d6c578acc72e Mon Sep 17 00:00:00 2001
From: Thomas J Fan <thomasjpfan@gmail.com>
Date: Tue, 4 Feb 2020 11:39:26 -0500
Subject: [PATCH 229/269] CLN Minor link clean up

---
 .../plot_poisson_regression_non_normal_loss.py            | 8 ++++----
 .../plot_tweedie_regression_insurance_claims.py           | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index e84a60eb3d8ee..4c8d49ed01463 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -4,7 +4,7 @@
 ======================================
 
 This example illustrates the use of log-linear Poisson regression
-on the French Motor Third-Party Liability Claims dataset [1] and compares
+on the French Motor Third-Party Liability Claims dataset [1]_ and compares
 it with models learned with least squared error. The goal is to predict the
 expected number of insurance claims (or frequency) following car accidents for
 a policyholder given historical data over a population of policyholders.
@@ -47,9 +47,9 @@ def load_mtpl2(n_samples=100000):
 
     Parameters
     ----------
-    n_samples: int, default=100000
-      number of samples to select (for faster run time). Full dataset has
-      678013 samples.
+    n_samples: int or None, default=100000
+      number of samples to select (for faster run time). If None, the full
+      dataset has with 678013 samples is returned.
     """
 
     # freMTPL2freq dataset from https://www.openml.org/d/41214
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 8b8cbe143ddb9..7a01ebe1ea112 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -5,7 +5,7 @@
 
 This example illustrates the use of Poisson, Gamma and Tweedie regression on
 the French Motor Third-Party Liability Claims dataset, and is inspired by an R
-tutorial [1].
+tutorial [1]_.
 
 Insurance claims data consist of the number of claims and the total claim
 amount. Often, the final goal is to predict the expected value, i.e. the mean,

From 5eddf9ced305678f2d00ff6ce55cdde8e0bf0bbe Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Wed, 5 Feb 2020 08:11:12 +0100
Subject: [PATCH 230/269] CLN one word too much

---
 .../linear_model/plot_poisson_regression_non_normal_loss.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 4c8d49ed01463..7ddebacc4ec71 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -49,7 +49,7 @@ def load_mtpl2(n_samples=100000):
     ----------
     n_samples: int or None, default=100000
       number of samples to select (for faster run time). If None, the full
-      dataset has with 678013 samples is returned.
+      dataset with 678013 samples is returned.
     """
 
     # freMTPL2freq dataset from https://www.openml.org/d/41214

From c700acfe787d7e50d588d87b428223ca1daa5dbc Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 28 Feb 2020 15:23:05 -0500
Subject: [PATCH 231/269] minor typo

---
 doc/modules/classes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 2fb8a9f25bd83..c138f51f6c06f 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -840,7 +840,7 @@ Any estimator using the Huber loss would also be robust to outliers, e.g.
 Generalized linear models (GLM) for regression
 ----------------------------------------------
 
-These models allow for response variables to have error distribution other
+These models allow for response variables to have error distributions other
 than a normal distribution:
 
 .. autosummary::

From a126f4aa3aad97c79de88912ad369332e3ba7394 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 28 Feb 2020 15:23:45 -0500
Subject: [PATCH 232/269] remove unused symbol

---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index d16806446fd7c..fd17227bbadb1 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -1037,7 +1037,7 @@ becomes :math:`h(Xw)=\exp(Xw)`.
 If you want to model a relative frequency, i.e. counts per exposure (time,
 volume, ...) you can do so by using a Poisson distribution and passing
 :math:`y=\frac{\mathrm{counts}}{\mathrm{exposure}}` as target values
-together with :math:`s=\mathrm{exposure}` as sample weights. For a concrete
+together with :math:`\mathrm{exposure}` as sample weights. For a concrete
 example see e.g.
 :ref:`sphx_glr_auto_examples_linear_model_plot_tweedie_regression_insurance_claims.py`.
 

From 82c44830625bdb4ac32b0f06c05d44056db72b4c Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 28 Feb 2020 15:23:58 -0500
Subject: [PATCH 233/269] use j instead of i to index features

---
 sklearn/linear_model/_glm/glm.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 9135cd2392952..76735678a8f0b 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -101,8 +101,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     tol : float, default=1e-4
         Stopping criterion. For the lbfgs solver,
-        the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
-        where ``g_i`` is the i-th component of the gradient (derivative) of
+        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``
+        where ``g_j`` is the j-th component of the gradient (derivative) of
         the objective function.
 
     warm_start : bool, default=False
@@ -408,8 +408,8 @@ class PoissonRegressor(GeneralizedLinearRegressor):
 
     tol : float, default=1e-4
         Stopping criterion. For the lbfgs solver,
-        the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
-        where ``g_i`` is the i-th component of the gradient (derivative) of
+        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``
+        where ``g_j`` is the j-th component of the gradient (derivative) of
         the objective function.
 
     warm_start : bool, default=False
@@ -474,8 +474,8 @@ class GammaRegressor(GeneralizedLinearRegressor):
 
     tol : float, default=1e-4
         Stopping criterion. For the lbfgs solver,
-        the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
-        where ``g_i`` is the i-th component of the gradient (derivative) of
+        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``
+        where ``g_j`` is the j-th component of the gradient (derivative) of
         the objective function.
 
     warm_start : bool, default=False
@@ -571,8 +571,8 @@ class TweedieRegressor(GeneralizedLinearRegressor):
 
     tol : float, default=1e-4
         Stopping criterion. For the lbfgs solver,
-        the iteration will stop when ``max{|g_i|, i = 1, ..., n} <= tol``
-        where ``g_i`` is the i-th component of the gradient (derivative) of
+        the iteration will stop when ``max{|g_j|, j = 1, ..., d} <= tol``
+        where ``g_j`` is the j-th component of the gradient (derivative) of
         the objective function.
 
     warm_start : bool, default=False

From d3083db043674c8f6fe938a7d9312119134ff007 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 28 Feb 2020 15:25:54 -0500
Subject: [PATCH 234/269] removed redundant variable

---
 sklearn/linear_model/_glm/glm.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 76735678a8f0b..a8df8931961db 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -263,12 +263,12 @@ def func(coef, X, y, weights, alpha, family, link):
                     coef, X, y, weights, family, link
                 )
                 dev = family.deviance(y, y_pred, weights)
-                intercept = (coef.size == X.shape[1] + 1)
-                idx = 1 if intercept else 0  # offset if coef[0] is intercept
-                coef_scaled = alpha * coef[idx:]
-                obj = 0.5 * dev + 0.5 * (coef[idx:] @ coef_scaled)
+                # offset if coef[0] is intercept
+                offset = 1 if self.fit_intercept else 0
+                coef_scaled = alpha * coef[offset:]
+                obj = 0.5 * dev + 0.5 * (coef[offset:] @ coef_scaled)
                 objp = 0.5 * devp
-                objp[idx:] += coef_scaled
+                objp[offset:] += coef_scaled
                 return obj, objp
 
             args = (X, y, weights, self.alpha, family, link)

From f63f795270b15837aae873d38412fa6dbf183f16 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 28 Feb 2020 17:13:47 -0500
Subject: [PATCH 235/269] removed unused var

---
 sklearn/linear_model/_glm/tests/test_glm.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 074c8c90a4898..9b9bf08aebd5b 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -237,10 +237,9 @@ def test_glm_log_regression(family):
 @pytest.mark.parametrize('fit_intercept', [True, False])
 def test_warm_start(fit_intercept):
     n_samples, n_features = 110, 10
-    X, y, coef = make_regression(n_samples=n_samples,
-                                 n_features=n_features,
-                                 n_informative=n_features-2, noise=0.5,
-                                 coef=True, random_state=42)
+    X, y = make_regression(n_samples=n_samples, n_features=n_features,
+                           n_informative=n_features-2, noise=0.5,
+                           random_state=42)
 
     glm1 = GeneralizedLinearRegressor(
         warm_start=False,

From 45de11068f083970516a793a0a2154d70662469b Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 28 Feb 2020 17:20:45 -0500
Subject: [PATCH 236/269] Added comment and basic test to family attribute

---
 sklearn/linear_model/_glm/tests/test_glm.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 9b9bf08aebd5b..69fd25021d079 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -346,6 +346,8 @@ def test_convergence_warning(regression_data):
 
 
 def test_poisson_regression_family(regression_data):
+    # Make sure the family attribute is read-only to prevent searching over it
+    # e.g. in a grid search
     est = PoissonRegressor()
     est.family == "poisson"
 
@@ -355,6 +357,8 @@ def test_poisson_regression_family(regression_data):
 
 
 def test_gamma_regression_family(regression_data):
+    # Make sure the family attribute is read-only to prevent searching over it
+    # e.g. in a grid search
     est = GammaRegressor()
     est.family == "gamma"
 
@@ -364,10 +368,21 @@ def test_gamma_regression_family(regression_data):
 
 
 def test_tweedie_regression_family(regression_data):
+    # Make sure the family attribute is always a TweedieDistribution and that
+    # the power attribute is properly updated
     power = 2.0
     est = TweedieRegressor(power=power)
     assert isinstance(est.family, TweedieDistribution)
     assert est.family.power == power
+    assert est.power == power
+
+    new_power = 0
+    new_family = TweedieDistribution(power=new_power)
+    est.family = new_family
+    assert isinstance(est.family, TweedieDistribution)
+    assert est.family.power == new_power
+    assert est.power == new_power
+
     msg = "TweedieRegressor.family must be of type TweedieDistribution!"
     with pytest.raises(TypeError, match=msg):
         est.family = None

From b8459b0540c1af4d239bc149155e12d83816b724 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 28 Feb 2020 17:26:11 -0500
Subject: [PATCH 237/269] Added test for link='auto'

---
 sklearn/linear_model/_glm/tests/test_glm.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 69fd25021d079..1d1d0c55ceeae 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -92,6 +92,20 @@ def test_glm_link_argument(name, instance):
         glm.fit(X, y)
 
 
+@pytest.mark.parametrize('family, expected_link_class', [
+    ('normal', IdentityLink),
+    ('poisson', LogLink),
+    ('gamma', LogLink),
+    ('inverse-gaussian', LogLink),
+])
+def test_glm_link_auto(family, expected_link_class):
+    # Make sure link='auto' delivers the expected link function
+    y = np.array([0.1, 0.5])  # in range of all distributions
+    X = np.array([[1], [2]])
+    glm = GeneralizedLinearRegressor(family=family, link='auto').fit(X, y)
+    assert isinstance(glm._link_instance, expected_link_class)
+
+
 @pytest.mark.parametrize('alpha', ['not a number', -4.2])
 def test_glm_alpha_argument(alpha):
     """Test GLM for invalid alpha argument."""

From 581b4d7492d39b8662fa2a8ec31960ecf545eed2 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 28 Feb 2020 17:56:16 -0500
Subject: [PATCH 238/269] more comment about OneHotEncoder vs OrdinalEncoder
 for forests

---
 .../plot_poisson_regression_non_normal_loss.py    | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 7ddebacc4ec71..d51c4a36c9322 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -80,7 +80,7 @@ def load_mtpl2(n_samples=100000):
 #
 # The remaining columns can be used to predict the frequency of claim events.
 # Those columns are very heterogeneous with a mix of categorical and numeric
-# variables with different scales, possibly very uneven distributed.
+# variables with different scales, possibly very unevenly distributed.
 #
 # In order to fit linear models with those predictors it is therefore
 # necessary to perform standard feature transformations as follows:
@@ -214,11 +214,14 @@ def score_estimator(estimator, df_test):
 ##############################################################################
 #
 # Finally, we will consider a non-linear model, namely a random forest. Random
-# forests do not require the categorical data to be one-hot encoded, instead
-# we encode each category label with an arbitrary integer using
-# :class:`preprocessing.OrdinalEncoder` to make the model faster to train (the
-# same information is encoded with a smaller number of features than with
-# one-hot encoding).
+# forests do not require the categorical data to be one-hot encoded: instead,
+# we can encode each category label with an arbitrary integer using
+# :class:`preprocessing.OrdinalEncoder`. With this encoding, the forest will
+# treat the categorical features as ordered features, which might not be always
+# a desired behavior. However this effect is limited for deep enough trees
+# which are able to recover the categorical nature of the features. The main
+# advantage of the :class:`preprocessing.OrdinalEncoder` over the
+# :class:`preprocessing.OneHotEncoder` is that it will make training faster.
 
 rf_preprocessor = ColumnTransformer(
     [

From bb75435964600a6dc92342472634415c2473e550 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 28 Feb 2020 18:45:42 -0500
Subject: [PATCH 239/269] minor typos or formulations

---
 .../plot_poisson_regression_non_normal_loss.py      |  6 +++---
 .../plot_tweedie_regression_insurance_claims.py     | 13 +++++++------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index d51c4a36c9322..70b4f36d014e3 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -289,8 +289,8 @@ def score_estimator(estimator, df_test):
 ##############################################################################
 #
 # The experimental data presents a long tail distribution for ``y``. In all
-# models we predict the mean expected value, so we will have necessarily fewer
-# extreme values. Additionally, normal distribution used in ``Ridge`` and
+# models we predict a mean expected value, so we will have necessarily fewer
+# extreme values. Additionally, the normal distribution used in ``Ridge`` and
 # ``RandomForestRegressor`` has a constant variance, while for the Poisson
 # distribution used in ``PoissonRegressor``, the variance is proportional to
 # the mean predicted value.
@@ -390,7 +390,7 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
 # the safest. In this case, the model evaluation would cast the problem as a
 # ranking problem rather than a regression problem.
 #
-# To compare the 3 models under this light on, one can plot the fraction of
+# To compare the 3 models within this perspective, one can plot the fraction of
 # the number of claims vs the fraction of exposure for test samples ordered by
 # the model predictions, from riskiest to safest according to each model:
 
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 7a01ebe1ea112..9154aa50e361d 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -14,11 +14,12 @@
 
 There are several possibilities to do that, two of which are:
 
-1. Model the number of claims with a Poisson distribution, the average claim
-   amount per claim, also known as severity, as a Gamma distribution and
-   multiply the predictions of both in order to get the total claim amount.
-2. Model total claim amount directly, typically with a Tweedie distribution of
-   Tweedie power :math:`p \\in (1, 2)`.
+1. Model the number of claims with a Poisson distribution, and the average
+   claim amount per claim, also known as severity, as a Gamma distribution
+   and multiply the predictions of both in order to get the total claim
+   amount.
+2. Model the total claim amount per exposure directly, typically with a Tweedie
+   distribution of Tweedie power :math:`p \\in (1, 2)`.
 
 In this example we will illustrate both approaches. We start by defining a few
 helper functions for loading the data and visualizing results.
@@ -430,7 +431,7 @@ def score_estimator(
 # Pure Premium Modeling via a Product of Frequency and Severity
 # -------------------------------------------------------------
 # As mentioned in the introduction, the total claim amount per unit of
-# exposure can be modeled either as the product of the prediction of the
+# exposure can be modeled as the product of the prediction of the
 # frequency model by the prediction of the severity model.
 #
 # To quantify the aggregate performance of this product model, one can compute

From 94dfc00a5179ddf63bdbd254280b693acfb0cc49 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Sat, 29 Feb 2020 09:33:29 +0100
Subject: [PATCH 240/269] Remove unused
 ExponentialDispersionModel.unit_variance_derivative

---
 sklearn/_loss/glm_distribution.py | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/sklearn/_loss/glm_distribution.py b/sklearn/_loss/glm_distribution.py
index 920218ea7f674..cb20fda1c022d 100644
--- a/sklearn/_loss/glm_distribution.py
+++ b/sklearn/_loss/glm_distribution.py
@@ -40,7 +40,6 @@ class ExponentialDispersionModel(metaclass=ABCMeta):
     unit_deviance
     unit_deviance_derivative
     unit_variance
-    unit_variance_derivative
 
     References
     ----------
@@ -88,18 +87,6 @@ def unit_variance(self, y_pred):
             Predicted mean.
         """
 
-    @abstractmethod
-    def unit_variance_derivative(self, y_pred):
-        r"""Compute the derivative of the unit variance w.r.t. y_pred.
-
-        Return :math:`v'(y_\textrm{pred})`.
-
-        Parameters
-        ----------
-        y_pred : array of shape (n_samples,)
-            Target values.
-        """
-
     @abstractmethod
     def unit_deviance(self, y, y_pred, check_input=False):
         r"""Compute the unit deviance.
@@ -258,17 +245,6 @@ def unit_variance(self, y_pred):
         """
         return np.power(y_pred, self.power)
 
-    def unit_variance_derivative(self, y_pred):
-        """Compute the derivative of the unit variance of a Tweedie
-        distribution v(y_pred)=power*y_pred**(power-1).
-
-        Parameters
-        ----------
-        y_pred : array of shape (n_samples,)
-            Predicted mean.
-        """
-        return self.power * np.power(y_pred, self.power - 1)
-
     def unit_deviance(self, y, y_pred, check_input=False):
         r"""Compute the unit deviance.
 

From 0810bf31d467f31e301ccd5b4e84a222b5e0d41c Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Sat, 29 Feb 2020 11:07:07 +0100
Subject: [PATCH 241/269] DOC more detailed description of the dataset used in
 examples

---
 .../plot_poisson_regression_non_normal_loss.py        |  4 +++-
 .../plot_tweedie_regression_insurance_claims.py       | 11 ++++++-----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 70b4f36d014e3..d923afbc70891 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -4,10 +4,12 @@
 ======================================
 
 This example illustrates the use of log-linear Poisson regression
-on the French Motor Third-Party Liability Claims dataset [1]_ and compares
+on the `French Motor Third-Party Liability Claims dataset
+<https://www.openml.org/d/41214>`_ [1]_ and compares
 it with models learned with least squared error. The goal is to predict the
 expected number of insurance claims (or frequency) following car accidents for
 a policyholder given historical data over a population of policyholders.
+Available features include driver age, vehicle age, vehicle power, etc.
 
 .. [1]  A. Noll, R. Salzmann and M.V. Wuthrich, Case Study: French Motor
     Third-Party Liability Claims (November 8, 2018).
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 9154aa50e361d..f9898e8b73542 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -4,13 +4,14 @@
 ======================================
 
 This example illustrates the use of Poisson, Gamma and Tweedie regression on
-the French Motor Third-Party Liability Claims dataset, and is inspired by an R
-tutorial [1]_.
+the `French Motor Third-Party Liability Claims dataset
+<https://www.openml.org/d/41214>`_, and is inspired by an R tutorial [1]_.
 
 Insurance claims data consist of the number of claims and the total claim
-amount. Often, the final goal is to predict the expected value, i.e. the mean,
-of the total claim amount per exposure unit also referred to as the pure
-premium.
+amount, together with policyholder features such as driver age, vehicle age,
+vehicle power, etc. Often, the final goal is to predict the expected value,
+i.e. the mean, of the total claim amount per exposure unit also referred to as
+the pure premium.
 
 There are several possibilities to do that, two of which are:
 

From a90a0aadb59830c338cb915ac61fdb03d5152450 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 29 Feb 2020 11:17:38 +0100
Subject: [PATCH 242/269] EXA fix minor typo

---
 .../linear_model/plot_tweedie_regression_insurance_claims.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index f9898e8b73542..655ca578184fb 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -337,7 +337,7 @@ def score_estimator(
 ##############################################################################
 #
 # According to the observed data, the frequency of accidents is higher for
-# drivers younger than 30 years old, and it positively correlated with the
+# drivers younger than 30 years old, and is positively correlated with the
 # `BonusMalus` variable. Our model is able to mostly correctly model this
 # behaviour.
 #

From f74ab9604b3f1c2e4810df11a63145545ec47acd Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 29 Feb 2020 12:27:37 +0100
Subject: [PATCH 243/269] EXA compare metric by metric

---
 .../linear_model/plot_tweedie_regression_insurance_claims.py   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 655ca578184fb..98d515a4f9418 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -443,7 +443,8 @@ def score_estimator(
 # As we do not know the true value of the "power" parameter, we compute the
 # mean deviances for a grid of possible values of the "power" parameter,
 # hoping that a good model for one value of "power" will stay a good model for
-# another:
+# another. Here, every value of "power" defines a separate metric and models
+# are to be compared metric by metric:
 
 tweedie_powers = [1.5, 1.7, 1.8, 1.9, 1.99, 1.999, 1.9999]
 scores = score_estimator(

From a349be7e3d4ad3fca2151f55c3c60476dc147b51 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Sat, 29 Feb 2020 12:38:05 +0100
Subject: [PATCH 244/269] Add examples of use-cases

---
 doc/modules/linear_model.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index fd17227bbadb1..1721a67d7e6c5 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -975,6 +975,19 @@ The choice of the distribution depends on the problem at hand:
   of the Tweedie family).
 
 
+Examples of use cases include:
+
+* Agriculture / weather modeling:  number of rain events per year (Poisson),
+  amount of rainfall per event (Gamma), total rainfall per year (Tweedie /
+  Compound Poisson Gamma).
+* Risk modeling / insurance policy pricing:  number of claim events /
+  policyholder per year (Poisson), cost per event (Gamma), total cost per
+  policyholder per year (Tweedie / Compound Poisson Gamma).
+* Predictive maintenance: number of production interruption event per year:
+  Poisson, duration of interruption: Gamma, total interruption time per year
+  (Tweedie / Compound Poisson Gamma).
+
+
 .. topic:: References:
 
     .. [10] McCullagh, Peter; Nelder, John (1989). Generalized Linear Models,

From 497a76c46bf5a92ef995f5f3cd810c7b4b108233 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Sat, 29 Feb 2020 13:07:04 +0100
Subject: [PATCH 245/269] Add figure with Poisson, Gamma and Tweedie
 distributions

---
 .../poisson_gamma_tweedie_distributions.png     | Bin 0 -> 38253 bytes
 doc/modules/linear_model.rst                    |  10 ++++++++++
 2 files changed, 10 insertions(+)
 create mode 100644 doc/modules/glm_data/poisson_gamma_tweedie_distributions.png

diff --git a/doc/modules/glm_data/poisson_gamma_tweedie_distributions.png b/doc/modules/glm_data/poisson_gamma_tweedie_distributions.png
new file mode 100644
index 0000000000000000000000000000000000000000..b4cbc187ada9b28b4c971e96cd1ae83bf5f2c6ce
GIT binary patch
literal 38253
zcmagF2Q-{v*ETu`f*^<zH6~i3XY`&ZAqbM_H3_2|jNV0$AfpeXC89(RiQYS7h~5ce
zB6{!UJo5hUI^X)%f7Y3`GQ`Y0_x<d9?`vQC+Iyn3pFbrhWh4cGK;&wwN-sbl0viwr
z@5W6c;4h}l^rFCz8}KJ;uWkYlznhkk!2d~{RSn@F5E=FLKfK|&fD7P{5^l-{ZZBa_
zH_z9uRv^dMZq5!cHwW7{_dKjz;kGa*h=8zwFyB2JH#cV~LBap`bpe>Gwcu?i{ap~~
z9!O2;@hdOX#+<jGiF+e%cY^zTI@hp{({@B;PQR`~HK#`pUk~q#vN9g6_GnQJySyNe
zD{a~AdbnBHQrj3A;ZOC)>Qk+shK7^5!iHl?*XG^SQ$%KtkGmz+@(sDyG`Jw%@@#Q3
zQV^^(){kq`&qoeh&kV<~ga17c^z>^WrGJl(&-j}Ee#uSI4(9sz>aco&0u9ymV~z7w
zc2wl`<AD0%(0@MQLqcmK+=t$JR2czgd1{Q_P=WaJFP`c^cu9UXrOQ_%Y15IA%Fx@J
zz6EUdemqCq)VyeiV5EP4pkEX+DZ-|?8aYLFwQPwy7w1qqzluf5{*h;W3q8!4ENgDo
zfqL+~I=6p|!&yV&ZQpT_*O}F52g}^IR8d4%-p>kgCn?aQGWO|hTPz>+=snV3>dyJm
z@$m<w|D#vu2_uCH>%Y+s(a4OQW=mi6#xXSc=hQh0z5vIBZA3FX$JE}!&gR8Vr%pLQ
zif+9zXFs34+P;bp@`vreb-dYmrCirONT?|vgLnF?0O`*+yeo+{bH->=v<GONEi`Aa
ze1}v@LVpNHUd}C}&i#1aKq}{;9sw;)6VljkiWS>pv7!V#9vFU#-2izgwCo(0DK%S-
zlXi00g}JZ>?K_p8-Cvx^`Ji<s21$oO)dx0Gk(-BXffzL3a79==d`k>F>nUwNv3HT1
z;UBhv>O?m=LIWKtaJ(ouW!75L7srP~pW2C0@VhOqETncyZg!9GhJDS~$Q9qmwGZsT
z+r%y|7I7D_+~sH@olIUQf~!f}qS7M#D}s~3dowW7i|RipUp|W&^B~><KJgWuXIXEd
z=6C`47q=O1-0C=IZJ<<rZ(-A@mxz^kOlb7V1+xjt#Nz^`PThw1_Ms_Kr&J*48ZT#B
ziddk@Z=iu9l@B4(mQYyQltSQ44D?4Y`s;uLe1z5u5)G+@A*b|kc4N3y!{p`?+dqrk
z0}koPP4P<HgxM@KdA-}a1D%X_-^hV-V_pzaDH<Rnjov7NCT|69vE%96N1SBslHe*?
z@SGghA5uD3A~!`UO`%6)LCaV3=q)m@`B`;qXdoyYU+cX4yzV>0qq`m3I2YWpo+ETS
zb?PAd>XUL^FZvO>xC?!DI@oLv<sRT03%pW<)Gq|0?4a^um74*SLxX5ONegrRbi=SL
zNOI~_O^_pGpm-Knefbpsd3>=lWipZ$IrT(d%i&Swj8tS`5R!in-nrgV9_0Ao>bPtu
z@ai$dc)h!P4&C&J{sxXX@y5z~6JP~2t8isEx|lI_yZJ~vdU;_L8<hb)3JaP@k8u8R
z@d0|oZ2{Ur1vUs8MYAWYl8YLxVBa6o3GwyBEe8&4=-?_}rA{SMT&0zP%p9rq)uBfV
zSKE#;NYkO#^O+Tt@dnBle{yW>_(w(A=N7x{cHRqchVXX7ve_aWQ_GElk74$c;ahJp
zwFv?ByaP-|z8!<a3lx)2M{slEr!y313+Z?9B)3|Fj>OZZ<k1^XuYPDkJu-1;9{ZRm
z{O0ye*7}UWgYQ8yaCo%i0G1P)3|RW`t;8bsm&E$X#mY<Q4|1>G>C9E(Hgxi7g@8yU
zGtQty<pFX*-Fn{nZ+(#W)o$PA=71s>e<=3Dl1?z4GP?LC55fGNxe@3UzG4t1!2+c6
zxO#V73*S(I>SMI$@3m0xutBY0z{`;>-YpMtvf^j*^mv>KL_7ELrP&zBO>WIPLf_7i
zTw)c-g&1n4aJGZ6Amd9fE|8!kHuhIW;>N8SqZtwOT1X3xg4mGRcB^uW>f)nCrt#i#
z5C}U<j+&R=-F6gN-#C_^fF+R2D6cPMsj8lGchR=+;l7aVUd3e6>@=*cT*(JNu>j8>
zd);y74nk%ST;SJ`2wGp@c@3kf(Y!}=_&D;FMN=8%jGoNufWN_w3z}`;N5nP#XaNW8
z{R>iwAkVOiN;ed50GVIgAnnT_PE75?MZ4$QbA@{|=43J-Qpj+#T~|xN@ZG(+q!v#;
zi^(^>C^#G55q`>P_qi?8z}1c7_Woct!T?L?HrZsrcbxaPVESOTYInYU(xKL?CVW#+
z;_{{GT->qe*sEMzmwc=Jy0xKoRvCfvRn#QcJ}GXLZf1e-iQK5}844b3*?<~Bylvs$
zX;Re3&l0Vq{B8pmo3Cw=Y1p2`wbw@qvDTC-n@KSVrmW(}Enb?-5%3VoQOMZj=cV6(
zJG9i-5ctv7jK+QWypJeb{JrhhfbycB=UIxyXbIft!cVEL9MoF;J=UrqJ1=Pwaq(+(
zfq->CZeslSOy!9uhNnOwnocWC{MUtOLqgH^Y3KBL5KIDVhk_RcMeH>B@sIuDM83e9
zgutD`;9HT+B~+;v&_YbDDW=x*VnUwiA|LLQ5IB6S*-(TD)_>EqjFSF^;_a4}v^o58
zs5MkdxaEOifnyqDJmW_*r7Td$LpX-T4O6nQ;<;Z3-%`Vxe1>l+Njo+zv-yhdNH3l}
zKbk7M7l<p9#L}HEH{*hy>*ZQEkeb+-!6sD~n=i1)7LG->tK*ZGEzJ^G3P~)*a>nG~
z@v#f<PIeYABMRQ!Casnhu75EmferYDD%y|G)vKcEKUH`S-y+7;8ewOB8nzcNlF0}7
z2+M}fle6^oeqJfl2N78`Ag!dZUr}(F`w-otF7A9%r?jS9{>a#?{iY&<S7pinczX}~
zlfSz^Sp0^FZ?ELBpF*ik8F7iN+00M6G8pB_`3YM?LR~)|3ZCVPv8vvORHm~Pwc#{E
zb_Dz~;*PhuwcQmOz;j8&8=e@#BURi;bBlA^#rzxWEH9>Z?CQ84-|J490m*?W4!4fo
zdH?lk2Hf}Zr<!_DCH}ON2j&ukdoCC58GLSrlfGcqj@B<G^xfc^z-tDqyJ_xs<w2a)
zNKy8YaW$|yMFMNW-CPDc$4YPhu93gNd~Uu=I-<?zM;8xQkyIu$ay$8m%UTn{&SEbF
z=Wj0t!AqsG=k&6(V~%lnUUUYFo`tx2B<Gp=khhntT|IgO1{LcH9K0PE7<~S`rT+bh
zrDx0XvX{PwQQz@Gbz^7KG@Rc|@RuIMcMOf$%;x7mV$w)2@ygoNAlyHPot~=+$B?D$
z_|m4vcoKR^?oiM7ww(u!U3DVa6~H^sU&2UtIOerwcf#rBX(wmi9h(a5EX=sJW*#+-
zv<EHGo!1g<cQk!@97N?;dUDUKX%aeryZJ6lRanpJdw>4tsZ#y4oBTK2hj5j@kA7Uq
zJ^40mWsHJLXTa?*X@+CGxZkol{p6#$IrA)e*!mTocQDRV3X6fygJpD2uoX~`AIL@r
z=s9{rXx>}XEZTpQ#LMqf3>LV4r{o0m{9fFg<;_-X4I4hwZnzfgQKhuB%}j2k%*5Vg
zR27@Oe&}D6jvtWlC!XKyZhkm#$y5H>N)gxB?}>sh?}Q7Rg)NLMqkP9DJs-^LrhRWI
zBRG=hvk+I@t!pQsc_nmoy3K1>Q6?sdop;0d-`-Z*Ra@N6)7)(2jASrkt+(}B5x&Q7
zVceh46??>Tux*|j(HQP&wfC&yjU%lPQCa?HTd!q-hV9>xLGy`xSd)=Y6f?U%jRd~N
z!zg&*?t<~;QtGCEel>x;@do#Zq^CQRqvx_e_T2z&nKZVvO&ZUO0>(gnfH6TVyt!#n
z?FvMA5UxpwU+nwgaRp5pwu;#iYptyR)<#gIVM%JpprL35Y`x^Z$T3fdjFovOo|N4q
znL>CGKaS&S|H9inue>8!IxTrji!~vc|ML#u3QKtdn~fz22F9%Q6q8!f%-G|vzdc_!
z=SW%K$s(=d`Hh;16)#&pU)9Xa3Kh339Q$hq^*|ss51O5y?lgUjfr<?^L>;Ypk?ioy
ze|)-A_ovwb+J5OWuDKjn?^bp%(5uBf2$pV`=1oRk<-KqVqW%b0hz0DB!Djzb#y4m-
zPc7f{pzNxmdwHPH*2vL;RvQYHgo@ofc@cfQ*~+9oH;yGeI~E*HK*1w|Ja<-|8~>v0
zJJ!|CMFN6X1DgZLE;<^b{y^v7u%?{VpVICSLejTj{X&(s&}5<umNM}ZvHjIt(>QUr
zZ~SF=sZ%Kz+iLHSnvW_A9Xm`3WMagRdB<?izkAPsAnM|<sm%bd)8e!dcqfQ#$%Di3
z8eA+J*dv2$mmWg;#VYX^$@u3doJw2j@tYWH1{-{9H9Px*=LTq_ZDjqv@+@ux5y$;{
z5KjW@<d$FdgW&{HVoMCA+optQf|k;*nx;ux(DSf-I{Ulf$GG;kpv^TeIFtFts`VYb
zwEZd1AoJ9zhylKl2Eqq<pfx;A$gaxw(bOr8hT2S7;OqjJVUH_a0^}O=8^fU<Nywx$
zBwoaBxI6J`?bfXP(K23l6TMfPyq0lX%0!t+rBcIKTLQs+H8D&?$=6*`6?6CZ9a@{8
zn?BhL+kKt5UjtcQIWZm~J9NVm@5EisbsJvCxp=k5cr0n6n{RLDQZyt~%P(epaO^T(
z`TSc=tz0f{BB_z-M0TDz@n{z0APm#(n%U7BLc#5Ksav=L3ts7mZ#TU6j6VSH1jz?5
zHw@}iIJ8P5y^A}~n<)MI<>&Qn`6m~eRkeI+w@J79g0O2RgbCZ~UO11G<x6aH>Pz^;
zD?C3c82d*>FPwMa=*-IVMSi<o$p&^${Uutxl9MotyJmxzdJU|HxKEck{MAc`k(;}5
zb3hK5TEM<;pu*GKm+#*#g~AS->m^w>?rRg2-w|Bk?@EX5c?QnFc3peK7R$V9d4`(t
zaI0+FRlY^1a1}ZH4**e!!!rjUTIKjcd>%{~d=)DmxC)wYBc70enZ4HcBZ5ti$L-Jw
zNd(LHE<B&(Xs~5in`(Ikk&YGjw0;1*PqP${P7-_MDNKaWLsj4GVriM!bya*Ba6&X6
z_F`x6XX9$gu#b4Y%ECctcHAb)mk1UdpR(iHr_QqB$+3e!-?={#Y%YN{JF0f~2pG-;
z%pS2x1bk2?py`Bp-sFr0fcRj=eB27o&<g5ULVmjsu7X#*e;N)HnZw4ZQ|3YDKrT-A
zoOwqnjrHk_Ke`&bqHXcR@{`^NgH3Y@$$)zEQzU#VO|^T7wYj@|?6w{i6^YqoxJ23%
zY@q&W-^`|iIOM$0Tl}QN#{T)c$R4CND>Xv(CGb>Aie;`^Se{F)lGR^~DPtj-eJ3vX
zScxMsAa3tt2aulxefxJ*Y$vvLfHw==G!9<XDwP|0oIr_>Tb6-=rl4XHCcBZU3!h~;
zz=%BH$&|KNmzhlBGTb|<%0I!13gLYH$$x8&D0PC_p&n#r)$(g=Ya?AB?$Z2<RLX_$
z>J?9Jqx!cStlWToMafm5A|4Ho<i}9-DB64KCLrP!UgJiO<^{l1Lm4u6dhs2J^A~<L
z3<Z|ZVrR>CoKSrSYa?@ezx&(SG9pn_kQv4`hIiRA9MtV^q$tTXrTHmM4_bIN_<EM|
zC$ea}w3Ty<>JlQrk{!Db7HO{jDq}$r8Sr{ALtNy6Y@Z^S+IFsA@$mbQ_QHh-r*I~X
zJNd8Z7fL=FlRiV}4IK-ykJiwn7Fh+bSJ894=%7ap(uHiqPNe#L;9>Cxlu@d=VS<#B
z1@k0Rig>D;A3m!Prm1RNn+;Xpl(0!rBWu!4Pxa3tWcI95RVB3w>gvxHl}boO5C*qt
zqeB4&p=a&y-@gUL#kqNTT*Em^CeNOchJ}T3aB-<fC1$$Y81J_yc6d=R7sXfzfvF!}
zs||HFmzjT6Bkn!1vf@M{k$33n-$D@-+;33uSGu}7hK9X6zuND!Jg59NLRvMN>qYKz
zCyEMT!l9(Z@i3zpwld^mTA1APwXl#~T)bL~-Wmi|adL86SXcX?CDW*&{$T{n4i<hw
zm=>uj?ocSjx^`RX%+F;!SL=#(DZ%KDjZvo#hu*aivKC&V+YM%U7kOZ{tLFS?bTjEi
z{L**TNYxM5g2^zs@lg@6rV^NkorGfBritFj?u`uhI+Lw~8x@6y5+;#UgbE?>>_gm3
z%Cw%lMh#xVpP$C%RaGUPZ#Os3d31JmYM>+#4_@X8`JWxIekf0g*214yoFgMIQy>sQ
z>u?a+&_2Ds!1Ktr(SS9b>}_lok7i)lQ*2g#dHKh?5U7ynmPuC(6Tm4_%ma`O4GpZZ
z-+-&^T^($PB`3RmRZ!O&U&~Mx0X7h+O3%sJQK1VQ2uR5H$YH$HluSWamzIW0<IjO?
ze=`4PU}N#|)ba80sq%qxWb^{JJPRr~%N5$|DH*f9ZNS0`DF(p7@H~)<{rdJhb7A3U
z^Ji@J!leh#7QD|`#y4cBhy7!jhU0wYgqlV?n8ckdM+<a7uU@?pa#>OWK?l;5-QCxH
ziR5-#2$-eZB0qilv^qgbMrK&&W}AL44iTu|c!d673~Nj>cB=g%&BlGy!A<2QSoNxz
zSo8P^O;o@?x8RXvk2SI|9&_QkW2r2~jEJ17bLS-_B<$$)CUAZD@L{TmEj36zna}pv
zUrT0JX{g(4^ZNu350Az6e4~la+LsS^A>m{KHH&EcI6~+UtfvrP6F#K$Cj37qwMSOP
zuiO19TdnN2hL*lWVoHiQy(I_;W{dd-Z-s7I8(<}eg{H>S?|psGNJ&YjWmHDi35aej
z{w*qlPJOd_IFx2d85T1C;=GV?iH90;t)RK-ym&WZ3>a{DqQvO?`?NGhkeI_{OIsLO
z?RL|7P^|CJ-}UvW^4A3E;;_8z>>xrix}Bc;aaDGn8<WC}OiaCK^vYz#Tbbio_^ki&
z+^<uipFF-i4*o1VTOmZwCbBkQ9j)sibJIcHB{=7-v})FKO>8P@Xh3y!sc>m!q8pFr
z`)gd5y-t2bNc)>Vd4f+VcXAJK7s<EXvA^rqOW$6cylf1zva;Hktxb^iJ#xM}#X`LI
zETI@wZ<4-QFG>>Lc8d6<exxn2G)GoaNL-@wZ&Bb0JSW5_ZmPbio~5s&)7qEB3v6~{
z&$qv?kB67n9zOF)IXaCkB_+jEx+yh9cC?;;8o`pw<x|Z*#4LjNlg|o-hZ7G*^KEpv
z#4|QMQ_~ovdyz!K?OxWum)<t^`wwG*GyjxzeKt-E$?2G;zX{CJhHPuvN?<Sql5n^_
z@l@(VQwh#_dypVqi`M4a=7;}muIS<<l^ym(iR0ZjQg#6<3MV;HV$FZM{ulGN`ZWnG
zidJ&~Z0qP$#JzU5po!Xb;pn*AJr=_C-%Gp<&I7!jTIVvNJ$L|=%hg$=Kof=c&kNZh
zoQQ@G9EaIas+PYKTUz-SneX<Jq|x(I6Z~5~A$gh}5!rv2f;+Bd_u+5?jf7qiz9z?X
z;Web>y{@%20+0&Ko8NK1-vS*@Sd#R4p~$TKDdXRrKE!BRPyAvmN#g<|MjA%1xXOq%
zuV2ml_o|%{kL3*B;io9o&;vFi2Ikpf;KC~ZuqrRlH^*_R4A``(`~HA9#G(bnrI9-H
zy1p}-9^!Lg^Gsd67I$^-%Tvb_|8ZlEa<hdChPXZOm?p~S-(9JR7tyUXF~J%Tzy~}h
z^Kx>6XJE}4)|uYnqW0rEe?G^31EQGqz0msu!El0fHTCz{)Ya@LC0W{QPe@EkR`oFY
z1UR3Z<I%=cv1vVB(ATeDpZj*CnK-u+uB@!MO<u=HVsi3}7cZW?ctO379kl+{7G}*1
zNB2IE91F=~j|J-pvJc}$Mk+!0zY8|DK|I^G#&YJc*-_5QSVdBCr`etYozI|xf&z=B
zjwld1O*9%vEP9)4UW<4FwJY)2Pk=}vR#sLX7$o}-vS@E_zH|+5-@UZ#&RVhbzkf&`
z!_l$M*q~bV0swGVqHt{9?rL%ff<lo4{KIMP+2hA}E-q%m+JHOfswIF7zFDeqD6Sh{
zBry|<Zs9LZ_7Z&?298*X_xhf#EexEb$of59zcmG~gr2BI-1pbLEo=Hq+stg<<Rf0O
zLB%`o{Z+i2wa2^w0-Yai4h-`0@;>0<k!UY?`!P_k>SCIu#UgfmJRvEWVJcNAcc#t9
zyA!>dIwb{}v4ML2sPHeiE+k<{!!Tr07=)E-&9^NhA{_f=nb~3D#rZJt|3Z}X2nhkh
zZ=t$Hdf(4Ten=3c3cn>{5_7QFUmX}7AOB|2ioc4<U`igOe*RjXt#8+7$komh<ZQAJ
zM48bfsR$&RIDo<g?*l=pc%=(}nm?J>tfVKM5Zj4z%@P%&DW7{!d!%vb$A*3|m%o;`
zw`5FAj7Fvm(s#Qt#bvo`cc-0__;7x`tT_`tD4XKGKCWY6&;twDPhWNXKj;z~dV|$(
z=dnQZg=<$VOTYVsY4^$Qa{Hl6+oz|Ph<FIQY}N*i8Ejbxu&xe2N|D(bpD6>L4njNN
z?L5Bc>~BJRu+L_VC*A&!*}NW9Y~R1|&9Tk7y%qvszd^0*HCHeV*!~<RH@)>kIhsyX
zUVxYPb1~mn;>iQ_skYMt<Wq41n?ipC4?TuGmaye02%B_eAKXOlH;aB4tlUE1_TMW{
z*{WH7u`Bdn^Zi;>BwXo4YwfQwh@$+R9PiU1IOJAohbW+1;|wSz4%kNV-slUGPc<w`
zMQ_kSjGdtU$D1<~%a7lyYDh23yh5b|jy;UEghIasT%M^$Q1N0e{CHq@NJMoHmJ7DR
zxTt@(8D?*S-s5M|pOq1w-Pw@{f^WIsBkZxSy${eq6nrEDKC-{R-+rewvLl<dSFzK(
zjy^52s>m7>v8RGB)aC);N|fs_zM=UWDrr;B5cRh7gR@uBVlzR}cxVScvnMa;k&&U#
z;#P8A*RE~IxV=j_Ogm-=mt32q)I7U$bEV)qj=z`F+F)vZ7WZ2!Tk5@vXd2%_kJAiC
z7;DY(<laJmw|s7i!AR1V0)7fxE0htD5sXnGo}onDv^oJ(6ZmZS72t$SOz^+B)Jyn$
z69~;xQ1US{fW6v88Hb8biSB%_CtxkZo2SPx0H0rvk`dvXFz<E22|pvYMD>;`aS$E$
zoND`l#{(rkfA!ekBe|w@P=whvrTa@hIQHTkJWRF{^QLBVNj1g?a9HFu96}N^l2w}%
zZvg&6ZRq{wPMs;UtQpOks?y4>Ye<w-gm&#Txc$(xE%c{~%AQ4VywK{aNRC!|uPUMg
z#6LBDyX(kQgE^2Hs2^XwIG^gSRuZG2lDvP7hflI4wA44FZxrR_g(D%>kARfe6~{Ix
zdptYhH1oe8Rj7|=Meuu;c|Cty@O3Tn@7kTUMt0^L2EDz#4F|*O0GX(?8+%A6XodzL
z_UBhqMMZ+@g#hV6nf+V#<Hf&zK4r5SNE4-slz5hlb(LX{Zgo#W#40Iag064$HV<ZB
z&vo`|*pu8iq^z%){X{8Y_SdiPNocs-&R8M$@3+@;Dw~?JxVyUpj_C1!rE{Cak-`zf
z;W}uFzl@CcS>8vwy)@Vk0cfoLopbd~d)73iHp=A7pzp-v;ci)PUAuN-kPCmR<^X<q
zSJdtfh?bTXpsQ3thLunqQ`5PxmjskDEu1MPPk<X4$<uth+!Zq$aK4H1Kbk6;-kPik
zxn_7+famj>)>YNB6>3Q&3!~!O%d|~jYCZRPk=qVr_5T>^^76~k_xR5ToW|8oUjPbJ
zBLP!*uB=R$z@>?A(Hqan(j+b?mkFSn%by|k(xx-Jd0?GvpM~7F+?DFR3$-5|iH|-e
zn2nw?DdkT5=UkyGE@pK=Y*A7DUx*x5)<*MNWo7N3PtgVqUM@xWCBUMaGu7hqWk8LR
zoqcbV?S9@UHU=-->6noRNEFmD*J(Xe6<`%GM|p-dAtfL5SKXq5!d<8L4X@jY=mbo9
z0B%O?HXHH-wjf7ML(_}Ftae1v$lxya{g#<ZqR{|ZF9+T!2AkIaats+A-y-5EE6HF)
z6z%C-JZIkt{FJ9+bX03}{M2ADHMq0Zc0<DJ-L#J;CZ#Fo|AWgdl$zGp0QYdsc_C2r
z;;_2l@2^FrcJpR>dR>?dX*E^V#a3drmA!sJ>y60@6VEyK4{Y*sD7FWGzpRjdnf?;5
zO*<_a1s;Zv0$u<t>b(XqSK>GbSpNR-ugFkS;0Sp2OF1BrO7-n@U=dj3p<ewAJ-^ZD
zMNM(>hi~7$IdxA?9H?u)3E|D&83M2k-~qYUtnT*R<f1P}U$zTMOS{l@CMY$pLv>9{
z|F`VOF{Xc>m+;T2?`BR^jWz+4VKN1Y>^(dD`(@1$phHut?1k<Ti+`Gcj0&F12Ab3y
zN@w|;@yPgW;NnpXkhf3YR2*Awr$u%F3tl&|Ih4>JmS)}Nij9=OcAl<L#}-+1q*PHV
z|KhknLeLbTHi$?&RkoIjnp>0SPPvNzb-pKnsb8_mlzur<=6G|K&s{0%RB#P1|7y3}
z4>7XeN1Gy5IS#bcEoH;znMJT*me@!)#J}w`W^R|Q)Mx?elj9Na(OoA~yiw-xzjFwR
zr%H9I*G%(fOwYcPcB3us=gp^Ju)pqeBWhZA#RhJp-n$;~&Hv5<=y3eKMk|t>;wq)R
zNAX^;GGGTBkG_p07|a-~$WHBhScU*~;LB@NdBf>__wQqxH6<4pVH)n|mVJp2G_vFx
z=RBh61)&FjeggE$6?b`jY!UjCmzzH-<lwV_ztDyr;KPp8V3wdr)#3-eTG(1qUocC!
zYS}U18wn{X6I(~&;Y3c&mmZmcSKdHTh6IQM9E-7<t#OfMusT@FO1`P!eq`Y4Q5Neg
z0v^@{c0ff^L3MhUSwgSB_3d{Ag)+wmbTD&Z`un`=maeX@Mye3I<+<4phr3_Cd`V19
z{Fs>7adEnDSmXS<=ZLKcuoj+nk|g6%SwtNTRjjH~-IMEbf?aP3FsLm~C+`bZNrG{W
z%p-8y7v(2jb=>fbRXY5!=iHcWeGHTMBfC)^&~>#Z;Q|I#LI*z8)z#^q6SH~+sSDKJ
zfGmIO7idS<9F7~;{!TVF`abu+NTH|vi!8)4y-ocK^}B0~4NQ9U8ZpkVz|?%#q>fz-
zykOkB+vwk&DyBi0)=^qwT>U~@Tfqb+elP#ir*~=zTzKV{z3~(a=hOCOI@;PT^Nqf>
z0Kfwgev973`SIl2EwXQy?6JnwD$aObWMRP3Mlt?##NzV6hf%v-zUNiikp~|veCpbO
z&~B)`M_^F#h7jOr1XA1AOc7+!5`<^se>lcx{G(VmSPxj@XmchV=ql-nHET_$pl)mV
zypS<w++%-0AVR6h@%5kcRD_;Lg#XT~1Xz4BLp6|T?Ck77lR#YwRPrlL=UW6)TaTqZ
zHW&cX22c@Kpt#KR-ICk_UeVixqElPL$(be_ydgk^%V$s?^pw?`Lxzp3%QBzS<&To=
zEff%i9DO#3+f)c421HgVt4Ngh@87>g#Kf-i;#T9a8IUHr+bQK|*Uka<-wsF=gDbrW
zNJ%)CzL_^Cd=+i`{|`&hm0WI@MKAtaX@B^?FHAD5eN^I5eZBb@7`<rG3i4WH$C?x|
zrKV1~w^ZX1CrV@Grhm#E>t$JXj)!P4jmzJnidHQqdCE$vq830T*i@fV7p_9R?9Q+8
z=H>rHGTsLduFXET8BND;7hs~Nr($SmSp9Z`9E^wn{L-7Zmm=*Z?23`XF&GsOnfelW
z@Uc8YG}TaM2fj#Z!R*|KjIsX-<=D|&^vBfhwxmeSY-ipV_1nl1H>fzYVE9CbFrY4#
zx$^;tj!|)`A8Zn=@8_Zpq{ha^QVmn0E1S4pc=L$MMqNI=#3y%aw?ptyWBThL!7-Rx
zO`w-(fw$92=O{;T0Z{MTEt1vZsVoy?;(J|rUq2G@f`uPAco#r4)i;~-y=|yJKY~CE
zn*;oF+armndKJCzs%^;oP^ZWf@tIvydQ}OsDtrsymi=eA?+TxTbf8C+5Z`t-{Gs>A
zxi`@M0f$-~3jWNsow_QrBe8rX$N&7u-Di=?8%P4_fM4a7MXR#*4Qi|cWz$n88umbu
z4ZA?q!pH*wL?#0Q^x+1SV1l4ota2kc2I!EwVl+K5np%+%(zE<W?lwhG0GWUOmPyRY
zT3CBtIZWN#uR8pM9Zdb<BR1y(Z3#Nk%?f3=Zp9=mn)DFRBZkPVgMSe4kb0T@X{IZn
z_Ue<=$D(F#6?dy^Z!!)Eh5<61w{JJ%KQ`d=<}m<Z5MWnI;P{BWM}T72eboA^?>~p-
zOlJebzdztmgXF~956(BL@}}Olqk<+!OmsFEe$IgAbH&FqbiWg?4wHLB6(#z*SFx2*
zSOE|*yd)1JL5)|(w-GX<qai<VHvD%q4$Wq3_cS~nfMq!x2-D!DtS2%%d^-X=HSjGK
zNc{wQ^=u%M@GIJZeCN@QDApvRD6pgZt`!aO{^-{XRDt77{cCRAmK5<$6f7SS(!E|F
z<@4xHJ&mL$Ab|of+1MySB8_DRe#_0xRgy{+Q+`PKTl_eC%uBo|Ry~vjfQ?U&0g|?g
zGQPoQw>v$D7suipfupJN9*I8b?_=RJYK*qerw!-Oql(h})u-6XAUR|l;CGq%8MU6p
z7Zv*K^=tN|q@){_Hk!riYHB&<<;R(nN?^)EEp@OQrh&C8)RIBM>{=KB|G^(X6M?n~
z!a>|Y)xp0pwXDsem*`)Tw+7X*72?%^__3zw^OBXcs~~v(SuE3$9(?O&i41Q}Qfg|?
zKQUG%i-zBuM*!2KskOJ79W0J^Iiwkyyy=24yZ>BCYIse26fN+m(mJ^L*PNw`(0mMC
zGhwk<<#wuJ^G^8GL$1{Uo;Op;1ZfNG!^8(&K>R$ue9@!v+~S`O4Aj~@jQiRF&`0NK
z;RcjQQb1S<{-{wwix48sBQCjh@&R5t_LUi$3^E}!A-$07sBr(2R!DfUGvzLGwd&Di
zDE5P!k{>KvroWnlNxgfY7+M|>C%_Ta5me9*#7*ypJmXLZ(Y#Q1!*CBFCpd#NFYX-c
z$a#N<t4Ru*>4rJ<dc^n<+1B}%Xa}2=G%9-VRfiBv?bG;(lJM<li-y=n;nqHxWaPig
zog#bl!zET@!aGFx?|`WZ-#r*I6RMQHEK+RjyFe;$$Q$)XZ*Rr8@qnq#8pSBxp(`yK
z6s%?YTz{igMWX9~kXbv-78R3<)Cy`%_^utPdgo@If;1V!&0?WS!`HF->H-YN!Ipqt
znLOW5Q5|*jNf8u|VOGuIG{n-5(+%2@fUKod5B&9=kPrk}N7Nm>d`0+){pD+YLXlBr
zF(Ilo?fh#`270Pii_k?|i0=&gA==?PQa4n-<A}4PdlzA=+)x+5R!@-?X$fpnrBQ$~
zLxwcEiBukWi<dQ=B3v-m&dkmnXak6>yL<E845@sHH|bx~<(uOyHJTMPS9G5tDbkIr
zz+G=(<FaKtI3kGhL|*?iP-WD8B-+Mpr(>`0nkl{>E!rDu*>&zO60)I|xl}$`Pszz?
z8ptDfl+gQ3)AG{n+IpKlx~!yai+tOwbJCQj^cB&VqpR5_ktbA9szOpMPaUCTK~EMx
zV`|Mjd#!Ao7_l)uzbk&v)Lwpiv9!cD%pRp0dDtf-VoB<^^uTMaerGz+M=^Z$s(9=&
z4E;fe03y<%!_f!4V>Ie<K=6<A$c2z(U67%&S+EDj`aT0P(v71)`3VY+JzljGfvHd>
z5o_(7<$b;0{jZ<W``RYVCa#nwUfjiJpY@BszXlF+d|UEs5+K$YZ=)4idepVJgy$#6
zL^`M%%&HvKrJ2+C!3bK7V7<4{O%HYC)J=%Ae5kPd)bC|tL{eH0fJsju-J&ANn$L)U
zE|9#G|8<yKa%o&N@R`Wc`9x{1@OP;)2jk(E%xHLx#9NEzJ02{gsrlw^Vz0JC+tV0<
z&KhCmD+ht{KC~W)H^^UsRGZY~<_7WBE#uI~s%jswe=AA7EZ4*+Sc|>{`n=!(3b3K|
zIq#lZr6)PnQqwqpI;V5#13{;GKf9cdpKtw1q0YIdymIS$y;yd`LVlkis867iIq8%#
zg5;T!)E)H0Sb4lW{5bs6md>kvmKJhMt$hZZsg2R<HW198e&isy^*1B0{rrs%B7*wm
zVqHY5XxmG33CEKm)shuJtNjF~{Qg>7uWyZTx`pIpYAK-D`cz~){rH2j2W1`y`h2Mf
zITx-KiYDy;0blm+bQ2<dtn^7+WQF~VzTN8Br%0;wL{_7jBnZ%Af%@R<wF&~PWW<0<
zYFnzdcgA>QRdz|VCCGm?QmqXs<+5ZPWh1BHcyY~l=bbU=hP#S})%j_AeeD2!ReHjc
zws(8P>D@2-7C4UdsRurQa&L)`m*?iLL0I?I?;KD_<%<GGo0XP|>6e0Z83exbM_X$=
z;Kt`d(V;8EGFPfK_#|PJ8mt;X7bwoQ;x@v7M%4Qz<fKI_Cx<SxIsf!Ga61)F$5f1r
z-(AyG6W?db+_BJ3_N@G?$17Q<GFc(>L?=r1DS#KBRvo~A#LUZ0zFMd&E#dKPGrjA*
z>Z|u@GB*@3&Jx>X<!8IAua$u9ea<UI44lU)OGp61@}EA%ro43BEVJwZP;>}CE>imy
zco<=^nw^w7^)iT1#%UFm&Xn9!XM6PloN1ubyP&tdRJUf7p~|76q|1tLG0sx{m&$8(
zeFvFxSG<M|WbeU}<EEez8bX6`ifod(aSL-qT9py3zL{^?y4YL(`}<90PB05ep2E3B
z%QA5rt!e>4!?NNp+*kwQ^+3aFl1kV6W>BtM%|`tWR_pO8{xfbftv3z3Tc!8W88RHh
zKciG9l+9W3_aO0VY9HR72QTNtcrpoL7xbg75xG@8H@Lx$f2$OLM)&l=Bzo_1;wTJK
z0C^2XXX$ht)0J2)&)Wg&k%|-tAoTo9j!4t2p}RVTc>O--(7X`3zX$R1oIt2P<pj^&
zF7fU`Go&SS{YWO9-XUAek`L>%(tadO4Ripc-x+rPL~pP{W+;mZ#?z<dAobE0zuX5!
zUy`XsLip*x)b~FLQ{v6PV^8s8qJTyGZnm2J7*IR~bdER}l^t2%{UEE#!3K4<uX0|8
z#WXD%%x+GEA%;p|T~YAM;L6}*<Wx#KqpA3BD-L+?WblReZwkA0pbU<UMAZW6XF$c|
zM!R5WGwx&W>F!2}oZ8sW1VrpJr8)t-eQllRj5CzX-L*mP&*LGldko8!0oaAnXasu@
zI5)zb;3H%JZEWcd)(Yh<hls+URK&4kdrS2|b@%@I>S)CQfm-5NeV3oD@_0L20&_sQ
zKniS{5wJzYboj`27?F@4-G%|)cOz51Tyfwi)!i_gu+vZSTT;RY>|tGm_xJAt(S%P9
zzY|5MgN#U{)d;D8KkzwbKZA#g)>Ax{0#mKk6$~gU!W)zojL6}9a%_z2=`dn2xKUAv
z(8y7|TKSb93dkv87yj*X9ybK|dBHz<9_|m>3e=GmNn@IGWy@HBX3(AQ-UV~`C4ZPB
z)rVZq!HpV1gwvJ~={L}^t3gB_wxz?F*o#YUNw5+Jq>t-j+L{M2PbJ>l*R#Y(Z076p
zSQbRA%72XZI<Q%x$WAAl)7ne+BHe85G+QaxG=mNhH@&bX@>r8R+=$?!ciGmkV4vFZ
z#jp&kc~eHX%EYyNr2l?Z#x0Gk`T19A3?Xq=c3vs_@K2z1ssv|!rup7l0qVNWDiTCR
z-;SwXG|(L+<^nsCLXnquAm{N&SsQ3|s-cj0^_FO9x>-l8BKJvmmtK39GRHF|UsgLY
zXPJ63f#t56&f=L*rxoCz9Mk(lLnx6SKXkK`%<v^94_xEAIlzu&(8%TX7B>q~ZK8{(
z2KPxT`nxDOr1=ru$?KM?9PgFgSxN86pRXTOE{inn_Fv~BQ<^bv8=w_oTr_P^;?i?R
zb(0bp(z(8C%~q4kyI55EO$%r<UUHat?hw#S#->%0p0+a?OE?!NqYiRaBY+VH`Y~@F
zZL>8uvx>Xu*$d)=Q?6W~zsf;H^86Jxhl*hMbieP}SNp#`8YB#eyY!Tt^sT;v@8A}6
zwfB^7+$<$7DP0SOm&$#p2pwp+t2`<yarNy*bXcTPVZorHCi+7&WnENN*XQyL{cCCD
z9k7^<QUDPs{pkD8Xgwbi<~aaMw|*5U<?@>vc2?Ma`J{$d#K5xl2N>4sMvo(b>GS%^
z5RZv8;L8PTrk9{r23ekj2g*}2O@7jtYoO)1a|mp|nF``n@r!R(K9GX@`%05)V&q?$
z(O21x)!Q~qObgqWg|B15jZgITTpWxFdDs_SnvdqKp}Q@E!Ms5(Er(FC(ijh9OEXBl
z9w#g%9;iaKe$4#4WHX}8n}LcX#GqGElZzI*TX|MgkruME3iDa|2zCwsBuh!&uX&#x
zGc7Q5Mjjqa(*P_&?b3RX?LMJl@fnz7g_qJ9fc(m1JQfH8K%=3;bfsxBp;ftFtj2kr
zPn6C+nk7Y~0h1iiB;GHNu*_bUj|&{%T8UCc(Kdo9IlH=rMZ{x{kA4XMqYx!!Mf&Es
z^f#@8^*}fD6oJ;o0NUZ9_6q9U_^I?>N~P)l={=NKJc_5Ou^YC?ES>EAnV=)?W};M=
zum3xT3m~VrdY}D@I*4nl@GoCFwyu(=S{w7xWMxETwRZjR83L&c1b0aR;l4%)bS2z-
z&`UwM8%@K8#{aVuQSXoThJ?E|{0MH=-PkyiQ{wOdiV9KWtJHx6;_$Ze{a<YI!F8-_
z4KxU|Jnnt%wV+!+^jJQZ=2RSZeX3*^B_8MhyBDC$QT0C>AELcCxIks8Q*?{Wi-pe5
zhq9zZ(h&rY40iIuCEp)V0VGruX^`5P{B@aG7E2*f$E0^DJR5Nb(ep?2pEtRa$2vOB
zl}pLtshCo%XN85=jn63Oc869f_|h=gLfZ%UNKz?f#;SfwdQM8FU{Smt7+L}t2%Z7&
zZ>Rl;6b9%W%u%}IOqTI2KTv_x$&6aMz7_+)I<-REkUP_HIZa}-5wwWR3Hv?mH4u!m
zn|w2aAVwf0O|g9QP@Bz2sqP+7AqK{M93&XT-^P!V#?A{@`VKfa)x^#qiv}Ow6Q1|d
zXavS6%Bok{gVdA_Q(KK^DBHu#Bid(j-aO_|q(%t+(e$=CXwg`@H9t4@JXmb$@^;oO
zdr@E#B%Zph7LJ_-Tj)ejY&qp`Ze)A-SP~Zp;1g;7)zheLA^-Bd*rXzHdQIhp-$h}}
z`9%AbyS5D3d#Hy!4m?~R%1Iff(445GLW#CK9S_;LQn%Q-W3b02bozN-`#x3F-9}Ez
zFI=53Q7M@E`srFnV^^X?^wDb0z^Pnn81EgbHHCu=LgqQLz^l=_o1Y}_UwtbnAtHIO
z)0_V)H%iq&lVv<BUw<?A=i^yI^N?;iBA7g00`{pCpw;TUB|iN8<|*-UD9cMCrp&NL
zdyB=BOfn6nG-dN(A_e9TxZ59UzZq{kx$SJ`FTA#Bwsgue{o7b`1lz%wgNVt$ms>>v
zS}RGBt>XE9&#<Gzq;KNS&;1rHUMJ@SLc8~G(=VJ-T#pNFENWc+@Sz_Qb+^pp_~9h<
zmvm9ReDwjK=uuIJ5vDzU@(8s*<wURTqJWuKli67!TBS2l0<tSGui+jTa+VOZpWH-8
zMe^CU{fwa8ZyisCvM-dZiBxO$hJ9uz$%Kl86jhWmK)47j=C<te=&@B!A0=<UA3PQ?
z8f<#m&a4c+_vZNV`j3DR#YY!A_eUCOVYsSK!@fB-Tz-3H+CC&TDh2cIc?h8C0w6Ub
zAk`ED(^tMg@?wW2PDGDScKN>tdObr7%?kFOtc)!uMUCF4zVmB21xg&+2edYK2^1e4
zl;4w1GQ7X<{pQqzkul@=%}<S*D`5tiyLA5K&zWVGmm?#qy3wgpRk%WYegRgf|5*BY
zPI~8iRR$pB*?C0~qMIcvH|IOS(Z~4#k*ZqJP5AtwtAjjmu)i3S9VR{hcAnfwJ-^3X
z_72-bTf3-~tCcENEN@&CTUmCNd)yXkpM!HgId2YuBgmgCy@^E%=VY@LNgMHS$Wn%z
zgJ3vAZ&eQbGq+$5faCy5-uEW%=|#1jCc!r2z);OHB@QvFHKsgz-~FL9-=9k~*KD(h
zSxknlwMU6VPxC<^bhFgX?l0psOyn1%L=B-w)7V>=sK|URdK)zo7ZbQ1o?t_L#N|`1
z%%<Hopg%++bIDmC%ZdakbF6Ce^niiEAggprl#k1>U(drAf@zVVohxk)f0<aeH)Li5
zJ`3wVHhDJ@%>|bI3S1Gi551!^v3Sy)B>jBhV=5`z$phxuTV;Ook6NQt(dy45t^W>P
zH8d6e9gQYYbG}N+-qSo>idO9bwn?;D;>dCfuF=0s_uTh<aA@+eR(fQpNtWP{3I*Eo
z?~nssLoNBl-m}9UL!rZjowJZ~xUMDs9Ym_<R;0=NoO@_Ux1Cm_#P8*gk_=Z@?LCBC
zU}0N@<~!k*U$o7p9#b;5tgJWcmMr+zTI17DDG)M(_N{OI2IOdi+L9PuCc#Gf;r(BQ
zgNll#gT&lm;q6(fS`Z&O^J;1;{`NWj<Qe7UUffM(E^vii-;jfz<F@7Zzd!XA>;erA
zn`LPdBS8;`KsBzz7C@Wj^i-Yc^t7KzOLLr9bs+p3vBj}n88^^O>ENP=CZsV8G6lCY
zx30Hp4eDQ;a~3cspk_YS?Uy_WrjCET^weT=!EK!#!J3OtuB!EX5!gvXl@l9ocUDU7
zXoxAF-gbGlZIZHu_gHbv7eV$)=?yUV!8{UuzJKP+Pg<2LB5}N5zGhWm!iro};@~0_
zr6jlL>(2hg(NF>?8wbGbZ_{JLLXe15K@rTspY!}Z+Y`S>GM9ozx|&K)wovgV<C}#}
z{-VBJ`Rt-kqiLg5RW$|rxPb9AD`N_`S?3)~1|c&bGJQUhiY$y-)84k8U+y@Oz&z-c
zZG7C^>O9q0Zbedjk(EiQt@OrdM5any?7M{FGt7>j+w2LxgEv1uB1)C-nc4^IXF6i)
z#%3R#dT(LQ7yz;i#3iufGYMdW09@?D^tEmP!}HVSS0zlEmmGHn-(jz4zj_`!hk~j5
zx#%nr2vmzb({c5@#6BHNxtqS}GpipJ2@;TA4H}n)#zZst-ie+1sHz5wv;9T3{JSn*
zX_jQ7dKblFS&^s9i5F9E-z~Wvnu4-mW%Kn@z~i9+GjN%YG`VUDFkd$g;K*wWTL|j2
zD&Io(+~5HVV>jYW7+LrhrhD=4*-8riGU5rmWhZ5rX0C)uC9R^{s&Nw;6Ft1JQ<IJk
zUWpBw@@%WuYOU8gc5nt$j|ebl3<BKM3%2lcoN4e(V&y|nN>kBw0jjFOGM&}oWiPwk
z#1D}pckgZhYbv!pBQzpg4nJ7_suprOC;y|ox$%qj;5q)Sw3~tF4^e+C5ARbU?hUZY
zf`atNH|-mR<*4Ry{FC)Wdbq-f`!ooH#I!qufNLfHK%Gw~zbh27{3?#tQ7lnzdkBmI
z9jKT$x!s8d3RQZ+=I%K*hKahzeqi+F-HWr!oi^F|a$rCb^6FzoUk~2hYtbqzQ+K$o
zwT7~Fk*&QSN3>iP9GaU+`9kSUU+sO@YiF1AYltnb>WF0;ytpNEYB+h}$x~ZC;?E}M
zoht6x+*=p2KI!U3rYow8Dv*ABKgN>FDn0?F=%OdgImM~bo7I+m@R<GV75Jwnp%IhH
zina6R=##4&H{PsT3yF|R&J!)j3ZKNW#|d_@AzgwlYhge5y(+ra`IQ`Q|4c7-Vmu*v
z6s+9c+1lyZ&Q=!oyEzyx$Ar8G94MMKV{p)Hx+gy793XBVW0~Tb88gr68m{&|$f{@#
zxxJkF;WD+mYI<9yYM<>nFiQ;aa-n#(1U=7s+k|4rS-*U$d)H}9x4h%C5)P&KJ<E=D
z_1^uh!6KA&QgfmmXo4un#xG$pZ}8&KDxGfCGN0`^E<;Xwe4g_3-XF?51N$AZ%_{`}
z#uV~B0I~WwLdu%;J3HSJ1F3ZB4NvV&vsyN;fqky+=t#Gb#7|e2U%2-A>h6(Nb-YUT
z=55%Mojez~#W2%QnAa5EJuce?Te#vUwxbX#UJ!CZx{+bYCF5LbmsMl6oZu;<hoTLh
zC$f4QX07S<3oC`knM)P!afqz6!C5LeQrw3%1VR+CcR%{TCoN<=_7h6vIvC{n7ck>r
z-#Usvd$u1g&gw;Jc6pph4n*$ohhLX!njf&g-F;`AEQnG9DW9_j>jw^1UL$!_97KR2
zX^?mQ=;tIu$Lu#`R&a+1>;_^xOZuvJjI3-m_&23c9nL1e_Y~%Rrl<u!Pn?moZ!z4Y
zvj#mNdGKM7Wstio@LiVa1;ng&i#EXP7v<e7@yvzBed*N;D+=>~)BfV=`iFPe^f?|D
zD6F?&1s`(CVNQ0*n5JQ`9x>(lF<<S{OzQr82?z}zfYQr)yhp~&ptAZh;Y;wc#k6Mm
z#zZ;HbdU&$S?i+$;0$DY(@dXA!BEKr)^i}*xR<^bkmE)dw(2HlUJ@4)8Mi6Li@)E@
zh2IX{LKhoX<Z|W-=X+7e`{D+@R@1C&Y%etV7AjFpnYF>jS<Sj(X$K?^hopqE%}KLl
z%2EL}?pjShVlq22RqYSUZvv4Y%F<^?cH~OZ5LKM3lG92{)hrAtdp>><F3zR|%+Q!f
zNU;T_jQ<#cFAUdKS4v|}R+hAOChNLSI%>3bI%c^ge0QeUW9(JCZa0Z+s>}FpR{B{@
z>l^r5@Qcg9391{NnV2X%oOew)`hc%zcR13=-jjV0%qH91bg5!1seQCvZcod1S(Zr#
z@QGnST#vtP%@^S7c^)&ozt~pR8jiJSCzGW|MRuSXK3HNH(*`QH-9Ezc9ljCC$+_{Q
zYq2gq#Rf!_4V)N7v@;C8GJ3-({=5cZUFsb!oj3Qmvx!vh;vnL5YklFjg68e&o42DL
z0p>s?WOfW-DY3|E&wY=l`jy@OH&s$v)p10`VuvO{U~2fcVQNWFC4s@FN~VdQm*LQP
zQc2F84O4CN88{vg%@5LrQawFbZ3E{Uw=*uccX~Usa^-|$B0M*j<N0BS+M#AS9wVXY
z25yJo)ws;TEb$8@FV2;dvUy*~J;q&@E<&}YP=Ty1-<q5o;AF&^>lO($YnK0=1t^WU
zn<xu%A=x=dF`Dae8JpLYsXEIkkIA4ozEaM$HWdrFoqo*rwtfEdruZ|}cm2(j=M$~#
zlt(52bi+$i|ICGKASryd#93#1N(5)0`Mi+|tl6Vdi97u$PJd+cQSrVahcst8Eb~(H
zYOl4~c|54n%i@#(ja&EYJ5D;bVFv&DWOWoWB*=-F5nyhmq${&JO(xI~bh0*FFUGGq
zklv}5wivtbJl+i|^Qt?YN-u;Jb7PiLA$}AD!$hHcO^w~h-@NvLIem^!?%nnNfPqB3
zmrs5KQmuKX#4+{Eku5-XXX@{Y3RPZ)f5x_?7BwVUa+?rbM*H%qP(`tXU5=!hq;`H9
ztFPH?qbm8Kucz<2zF!@G2b*Di$BM2M$Z}&^E%x%^k_+p*sp{XJopf?GFLpW#?lpc{
zSPoYfcb@yRezG(wI4oFb9@l;U`pX3G4tn>)che`t_S>x|Di)LB)8-jAK<J%m&42ui
zNcq;ye0;H=xmtQmG|{p&5UmzTHdofvRFO$E0aDOZB-d&@24+DJJmNqUy^J`r{`uF`
zZ+8j7BHpw_yt7~V`sc*r=Qv;4GeCxM>+EQq#i5M=mh@cbxzYam*!MBNt<FK65$*PE
z?8Q7U2ITs*W#x2dRnd8Cb3W|v%^I&iohZl&GppbFV^+iSn}SZh2x;s2UaNM2^&`7@
zFyhWQ(3V>T$>S{rN1))x)sq3lJGR-Zlf<(ds2N5~Y8@~CgWy4Pk#{ip#_$QTEdXFH
zelT@)wh{+CYZqMTUe=oH(UMRwRb(Ma4r)g*q%iEUmg@TI@2+9xG#wTBDY~=9aZv*i
zIC)>}T~^&-(M3<6M-LL_rSYimDcnm83N=%t8YQHv{}fQ?QNB5CYHC<Nso;|I<NY&O
zPAIGDy_|6EVc)U@jmCvIGQ3wpjUT<UtTU08TeH4IT-ML!ZDsBp-EMCPH%v?j6VS9C
zR~$L_-km-h;lS!r|Md>BDeroU`dhPUr8*C4%&X4$NXKVzqAnH9nl`QPHWVP6-2QU8
zqvfNa(Ee~!O3zpHQaZ`P(A=&k-sgV9DpOc1x@_8L#mC`}ScA|BVV*~N9~1Gy;;HN)
zWIj+vkkb1uy?u&DLjSw4ydgVT8|o+}B0eH6g2o_ePHMHX1(xKSN;lsP(HjuB{c#-h
zq}XkrX-&g9sMo$;zLJ!j9ciS*J+k~cF&+KAy3}ea*Gh)!N~tK3?Z|U|aF|5h+qETu
zkEJ;=EQBuJFsb*mG^^E7*OY_SLTs;t-A{X?@}awbHhgUC%(Yftkl53}ug(@*<AQt{
z9K7-VGM4xEydxxYd}2MUC>LuN?hc0zR1}}deuz$>tK{~jBqJ8I{Mel>-_-MyEnJSM
zTmLbpLM-_t?!aSfpzlwTjO?QAo60xH+W&{BuZ*gr3zj9gLxAA!?(Po3J-E9&1b03h
zg1ftWaCdiicXxY}d*54+Kf|B1&YazQS65e8&)AZuD|2kQZ~uXS!;4(RMPbJLO9(Ao
zMhNYp&&IfS3I;B;Oa~qCI2+^OCr>jvXRYFhB_EA&=<W)^WCr_r?Uh*@Eg$1OdX7*(
z(mUMdqUM#E7FjJU;@kWQgiWD55nq@83%QzTedElwX>xTmy>C<0YW{XT>*%k`M&-Nf
zCkH(FSON*0PPklUXoihy<oV8MYk{`Rl9r}7Rw{0hH~hj2@}1=xw0TcJaL9D&{WRWp
ze&fVNN6yNGzwMk!8S1^2J<ZI;M;T{b=KW4NuKqUF?*@{Zo`lJxzZuVke6k){d|Z9n
zYpx)&Ha}Pg8V4@u(jedv^4O$tJNzrkeI_Ao8+xvV^re9xM?8+b-IrX8@xpkByC=L6
zCpB=}1ANIQ_dR;5YU=ngd*BgC)==&<u@?`bYdfN{fVDE$4`v9Kwkcze=97No!Rw0=
zyWbDn<z5c&aF=J^1of>`y_&WgMa&vC(}>QNe5BBzBpyHin~>Y@04pu2pr)KgS17G6
zSXGd1?8FXS@lzod3jj|Q4Y7G*Jg?_4>}tW4NLJvr;k~JQ=$%_cCD0;ZrIz*VqJu+k
z#m<yMl0Cf`DKDF=kmMAp+jjwF{npO<T!_;wt-jKi@=Je@1+Q$pHC=?mC*>VXAnPS=
z3Pr$Za22bab)PH`ET8Uz0G*o^&rtF|QQ}l6Q4o^qyLT^re!uK&Jshb=2{dM^smQAy
z<?{Gtd*b9cB6eCS_5Rn0vzz)mqm!UQ_?z|uH*d(^*dan&rl5^)pK&(y)iTsZO5LR7
zuk!nXm*4({##3T*?`%(xrFx0?FlfA1hPzJRKRP$`G|~l^hF4r$7ndSJ?vGNMaAGcO
zBSVE8M50V^+bUj^RA5vuq+4Sfr@bTyi>Zt$s;ZhC+EQYCTDG9Y3YKM5<C1pY#A=lO
zkbWzVb8(xITd#I6^fWkOqIy%6dd>8vGdQ1dWy1H3r9gz2tGqgfy}R1%e6^CV*WjI}
z|45#iJK%eage>N$Z`sgORR+DiFG)r@!ha#Pk4nGk8fvYwe`38%S&>`9L4vh5u->1X
zVBXy^?H?G?()MfXM70z_)~iM6GZV#)2+~UsCzI4SOBBrzI@t>z?ncfIK?TP!Na+z2
zU`GwWG)x<lR;tF+#7PqSFF^>UxSt#)gMbUUKe-J>f$@Wh!v<U@N3a6pj6uMkTI}DQ
zlnKyy{uSxOcWB2Y1eLS$zg<%w<5xTcG%X8_P-j-#5xIt=0`IoR?ATsV%`w}_oND%w
zg%rtk3}yuEusEhq850{FmrblQn&yh6C?kqO$i%A}A&AwX)XqQ7hnIUEBQHqzDRMZ@
z;-n9PxP!wvjue{6q&V#tW!8JKmbGZUUJF)Wt}on!e4I)%-WeRrokl7r$?B=&;SMl3
z^<1+e4lriY2M!iprFdRj9}Bak1$sC7POetJVp_)og5~}bLt={>xG8@*9p@3{66oG7
zonn0t#tPM6A^pz_l)Q6}x{eJhex?6*|G+Eh87oVbIV(<%l(bJ&$C3FJ4g(qf*Qy{X
z^$$lkH{+%z$py;-c6P$#z>y$et&bm4sA-rIlZ~huBuYBiG0Er56@r8Gf1m_Vvx1jT
z>y`wxg&}4I(~BwJD_EIM@A_Y&gpnc-e>((ms^%9Yjtc_Q1B=Jbi{ysoVp~~;@RHCy
zU}`TU^5U_f;=}cPf9a9JVTx$FAp6f7<IxxVsmksbg^k}ENT>Z*Kz*Uv%j15tm+z}*
zQntqA_$5nA^^^DWvP%j&E*yHAbm+W@Y0vm)#>acYEh>~FqCQ&W^XdyTk1=~(Rvf%D
zfxB#fYq}~HQy7$n8*fZo=<C-5gcq^?v$Eu)ZxA^tw(KyPaHdE#S3mFp#nhggyuF8J
z2B%I@K|v~2luP_cg{fRo4{?-Tv;Y}PCTd!Bb7DLtB%PSqRwXq(?Z`x`|Mtv!8D+o6
z{=`Xvxs3_(e-c{9Fg%F;HKg%Ek|HyY$2Z!af*W$)ATm4w<wLGVH&ms^DfhG)SY{@l
z;4ehk-Vw{i$x01oT2?g}n;AvKgX68n{yHBqpDCMb{a^{6i3n1fiMP1(p@<YIh{uFn
zUc*>03JJbJ>&&G|7YIohCs_nSKoXt!A%`5~25h=?!2m+&GcuGBL_+xCw);OaLddr2
zf7i3HSOZtOW6XT^#|jwYewwJO%8KZ*2W?Enh?8~s-_%`!Ex#NV5|A=JW*dswKoQk5
z$6v{L54A>|A%pvm_{aaQnF<8wfdtEul@awM#ediqD(UOfdVj6-_JO+)k@!zr?@xZ;
zF;S(^*wm4a>B5TYzRJ<I;YDvlj)e?-h9pC=?h+d$xRCy-*&9<TL=!Jto`HCvB9!!y
zuoye9M~={2SlPWQDLT0$=M|&FZ-)#T9w@(#*{q$-KCBC#G~hyKGK0>IfX+AXuDkw3
zyf)i>^OU7`eOO)S<^3c`{`hGA3}ZRAeb=LZ1%Y&d28G#3;xaOlQ2i1-I+4zlm`J2Z
zU5mvI|4<|O_!s@|>W`{qp(8rHX7~7ZDxGc;C1%ty3v&sN3+%;=k$DWhnC}fS2nr3&
zya=Qb4>9c8_vV~?J3)|1;o^KMj}le9hQ*u?+y>==V)u{jvx0;|#z8lDfUBe?;8Hz#
zxdm|)JsoZud|UE$Sp5Ec#mcxCCLlrQ$Mj{6+JYk@B>PPu<HbLF>O-coHVLbYXq?L=
z=03UdK{<85s!CF|485&Lqqt8+n>I_2e&}q&*FeCftkCw2JSQJhyi&;MA$Bpx#AVd7
zQ{aKmq?H-+FT(a@EDzQh2YRr~Opa3p_n#5zuB`p!A!4lKtN9u@jO@^AiEzbpPYywn
zdVL@P<Bst3`mckXPp!yX=ukO^W%%QR{afTXeiAw?H#j`(2z*Q2H>bzD<>(>z24I95
z<q?wvV_<jBeBuk=!WikJRL@}IgYj{}6-F}s?6gF^IqUZ8Bc_$bES55HIT|nUqd#mp
z8j~oAMwhspIbsR1L)^=U7a))~9{XqRs~$~dUW0YamNEE+hYGk0Z%+op!{CBDSD_dG
zQJxP*i2!>%tX(8Byk=U47EzN%!Kh^Cu764+t{wE3)h}KATV|t=Ay7jV&qof2f*(sY
zZawA|i5!eRb<y1e8}_y|`(!;f(z#PMHuv<{A^MCtd`Xfcla*+f%K@^sEzh2Upef&4
zp|+RH?ILvQFV}xNkBlt6%N_0qaT9|+<Or2OA&D>WVTR2!cjwd_Fh%w=_v$gcj~<0(
zD{KB5wZGcoUA$s?ZF#i1XR~q^4|%tu*z;_;n>iOxKp?9OXpHGCN!Fi_i0ZsF0kS4V
zgnS*36-TW`ZGl<===5AI1u7UhGm$>%=VQ`wb#QbrfY-R0KZh!<?8JzaYfM+M6PU@Q
zp9*gui;Nd}v44sRf`N$;LhAT-B2C(&)tlR>oSIWJ6k9J)OC?u#t0)CflY)n?*-5al
zVW$T6x#*dpkN?9!0{jnT7?i9C_h=rE^%>J1LvL}&g||oL4m*a`zkSf$c+3<O=Y!Eb
zw!PriA@D+mRKe#B>bMQ)<m@y_Zirp~HfX))<l)FSdBfs{w!@U?d!@_gQ)C7?#GRSA
zjnCHt54wuKc+7kHk1@z78&b0z@J1R~Y;RA>9Xv^=A8$;*)QA-+6_D^F9YslL!kZW$
zyNct|yD0;olZ$V!*Tv{uy=@GMQ=bI<^ce}`#vEF&bR%CN=KBeHQfSmXhH-HfkXlC#
z&Sv^{Yg|BETMe8Wwkfahe#ZcL0Bf7w&x|C?=8uspTXhxBz$uBwxvvAOt!otK?#&sA
z{Vl~`Pdf0A6Z^m7@fxWkmlYd8))i7xL;=Z_%awsn=?4FTd0uJ0>eqoo@6HRw?wgI5
ze%W89@M8JIA@1mpOjWZZJ^k1Sa$l~0&&9EUd`4|3ygp!5Sm}A^%dzez9VRy{t0*u&
zX-=Tmi_TdvWMPIqg2ET&b63dEsK_|Q)#Z7{4!uKU={JI|M)K^KC<h<cq+9TOQf&0*
zS*Wc|Y1Axud|ZygkNTK5w<ac`OKp@384$7}3U3f=_uSVpWHy9$g+6Q?Ub6kG|GIe<
z4VVowI^ZOX>=yhS$#j0a+tci4`*D@$y!}N__#A`|@cZnAC^IxFjouIemNaEzSX;XW
z4=ebhL~&UZQN)y{=qnyJ-u<fW?F4XNBxDDv=yBZ%A2D@py`!IrwCEzG@$*1k#F3cq
z?aknG1rSj{fs~2|dTa~cJPQTZz{1qZh@JX5tM-))D)d3Y5M1B2WZ%@>Y`6GNF)`^I
zMu-~DMOb0p3DL$xo)5ys3uAu@>-?a_f@2e76hOz-VxLRYi=TSDzpx%#9t_Jv^*gSR
zg-;po_X|s6mV~<y|Fs|zRj8mfJik@MJnuxA9sP2jc9e83dyqLX^8Rj48i*Y)23G5M
zJfvkXErR?Z6fx|<cDSlJs#cNXKB6?9n7AyT8gNH)S9|J^*<*tshz!1%q4)1T_0h-w
zRaXGvwMbSaHO-7p1A{6J56iVYk^??9oRL|Z&5Jd0Z=oI>tf48){S6(eG8M=?b?5WU
zh)4K?l$gH(&|6=6>+CxJ&Xuau(=hdPH@|$z%hKsDmD{9!E=|J^<@N#ZuSj=|_=#Ju
z8}oSmz03%RLR6)jbA`u4uj@RewM0yBncLNwY2hE(cu7jhoHh|RSCfm~=DV()9LMdO
z&lo4In6-XB85=($4h$PFUN(v7oGAJ3SgJ*9>5FsG&<mxI32N4U6rEELXR*Fxg-fD;
zo9l(`Wqs}pA0NrLeE_3ZpmP_~a|*-ZdVpz}jg-(ibr-J27@qUFCvZ)RE-h0lQ(8tG
zer>MVDNo&b@wM%df}1i8<{PSgon?W)S&uNwakxhbReRmnkEQ|}gNw*U-(erU$vlX*
zmAccN+$cBbrVh|h{?N6~;F>ipQC%VAlJ)k~-X{zLTFP$MeM#-?-`M4`;^0-E0qv)}
zo1FKMP>x89-8LX!xUW;MI~EuI8r8+7SGv^f)&(9<w%{H3!CeGtrE}9V>EX<TIUC0H
z_C^4}vn*$}q!+si1pE1+QGG?fa>RpDusuJ-yr*>g!;zh9abioPLoUKBC3HW{C}hF7
zeL?y}(GvXqHke0T_H_9pvknaEdxIvan%XivPRK<`+;TK-Sno#1&m%edpNAjMPd_Oo
z373gdAil$49`aQT7~4^XyuVAYurc)&O$~zxS{enq^b1-;A_$kw=u^`R@j3rzB5aH^
zmk3V+@FlY$exsEW6YyoI>&5G4+Y}c&#AGmG23f?NxR3-@y@{5Knp?YRvm|rcM#p`&
zV(w5n3$5jZ)-Zst$4c*udWfOmp-q@?%^+y`Sp8dYDUXuU3|j7y^$DlnIa6S~EZ0+R
zS#&(HgE%;oHTPN2gAbMk7eE5}cJhvTJ&kC8wL$ya1%p2JYjMpSiD0nI&2C-K<GYLK
z**gYYK=&$~)ywuV)cmp4-|8uEpib!;0JDo4P*D|Ic*F#jUC{HH5gJs@z`)#eyqn(x
z)=^@}HF2u95deE~$JS?7BwNo|Y$F5^=CDYL-t6t%now-7XeS6&nRj=FI|=Zc*@WpX
zpB|Gk*zGqe1s&+83b3nhQ9lSkE^0-o*=<CpPo@hq@-O#)g6wBy0jTy5cmCy1%LV2!
zGYBDlvqL$4$!EXwQ|mmhJa_)x-=J*%bdH}z`jnKS>GmTA5it_kX&whZQ{iKLKT$)w
zHVjVmtVmkJ4H5v{2;hU0q?X!^#&@f+pYB!DyXCbzF_`?JQtP&3UV#=xd$sXIbnXQv
z-UUj6(G>0HD=cLJn?B24%jLoXu?kvouo!uRBjT*j8mAj)lN8oG-FQq5tUYu<_s7~L
zMF(-FY(2X{e$C6=9y~cqJpX2dhM$eRL~a7dt!D!@L1#3ITOWTrPDQmq&7Mb!l@-<N
zyOY${8<-Lf&U)5w$WR_zyLj|apal5$10OJ!_H523#zb_<M~fe*v#|qnbNohS(@U3Y
zgm7EQlZ|kG1e?A(1CU@jGqQ%B_7CzwnXs7J+pkQle>AC_(lEi0qbWg1lhax_gTJ3F
zgjw@tPHjTuJJj0+`K!f&jggD!K&&^z@V)}_LeZ&ZIRATWU1IsEZ}Isio~$!#Q@%tP
z7@H=OR29bwz>@;1<-MXNON``(aq%ynD6tcZ7OGYhU<Ka)`$Hyu`U@O{vc0_~oBPLG
z>C8l#V)ONqGejRHzd)9mQMQ=f?49Pml?+VUEcudvdu>XKG~!*%u>wB!_xZmVaG(|5
z@X^%)SP?{Cx@XY8@x}vE^C2WwfDM0vwN^U&YsdJYAHa7}D6zSv!s^1}Ug3zDANLAQ
zoGDJR60W?xz!Dt-T-DC~{4zoKQV)HKMp^hbXIY#F6yYB7yBjrlB(fF{_FqGALC<r{
z-wYj*{3Ku#7tK45>rNMc$ik;$jw=5)OO{I-wST#zBWnH^Dof(p-^lDjbyT9>6+QZZ
z!-)QasIhs-XO@(erO1d+-%qT$2^GNi??B+2i#reEjk2ew8VRx^KZ7dDXi;EM{<DuU
zSNg5w;ecQyu-?~B<kQZSzaxhZVscVr9sXDQ@sn=b!|13-T}^x?MnI|63*vBeltx~%
z@Gt)Z8hy;TUBmlCZb4Jfz|V-6gLj5ystfmuEu;6!qb&tVoEeh2-gh72g1bN@1<zYF
zHYlWkkszSzW1TB96fl8>WvF(IzcW2B<%rbPc!0IT_dwP2)Z@qc_?d1NbM^ViSnNo>
z+-C2*=P@Hl%cc3kA-M+U^aZss#G!#GgpBOWJ>J*|khuU|Uoy(uw=0ZP_%Nkll|oTN
zlVBl8s9VH3Di>7Gx~CkLlk3_C1F31Wyudt|S=Jut2K^msTUWk$Tu(`4@I&h~k>IKT
zgl1dwX}LkD&KL^%#vtG*?3~R9p&axd)Lc3o3m}<#eI}bM0@*j5|1)Toz(;WBAoz&q
zN}r|SU{xqBpvQRoZEoxCA*?#?r!nvPlfmVs3kS8v(1;2QIVzijgGz;#gz7kA(5fa{
zXQz3QtO$!ys|k@Sj*-JO_J5lNM^PenHJ`g4Mc{Hf6FYWI%LI!+1*1PPW9=#+0820=
zPF=kVXGktf9r(r(I7bkLD&oUmIF_XNI5!2|*ti@;5Hm3xht~elbo?JB+B4l-)y8Y3
zl9aM~Ld}1Dze8c>!RrVOzma%}H$>20AE6RQ4GgH*ZipX*Ak5<nAo_6=3y)XGrn&d&
zP@!W_PFQ2UJl_7m)wL&_W%=_J`6?eF%W4Ck>^YN#i#lG9>cRdA-b&3xsp6=pZFs?g
z++*CUt`tz40|MOB;5%p}*+2?4HRCT8eQJgw6`zyNBFNk-M4@7<#kW{WG)Wcenx?!(
zDXFlEBw-6SHfjY++?(5TIgYDNPLEd-Cs*;y^MLgMtd{zE6NcE}u6@%*k%`7ofET*6
zKWc2^f3mX#n6VzSaUC?uyqhz8c(Kv?+DpzcGq-I8^Id%$*bc7l4UOnLgT6+>;~s5>
ztvOBU)a$o)jFgS;0;w=;B79(NF&)rQ%__E*5t9p<m*y3)P(==2ALenx!GyY7eSKM*
zC%b{;6EQId38V=~1+`y;b8Xp&>N`yMS6`el_ud6!Un!1bR#=UcNm_j!?q84O=e*RM
zB#4NLtN*34Lg|43dL0^rj?_TwgFbtHDm%haD1q>TEG~+DhvLJ9KOuC#DWK}kAPtv{
z>;GM{2WoIZj!<z#R&%g>4Gk6Bb;*H{D8s2VVgC!d=A2aaA8RxDahlC`B=%;9rtFaw
zV3onY)H-@20`;tyHcN+o;Tt`z2peKVT3+k20#uBM2F}}q`!zxN*r-!T&r*=#uK{f(
zNdC<#hGgOJ`9&NTC*hoVEVp?q4<}(`t(c)ru!>1=|0rx}B5c?wY?#z<o9rXiS5?Ne
z(|YRw-B68Uk?E5G_>3k<WmUGrQ7Pvig(}JD`ehZus#c=5pLb8hjm;95=aK6J5G`#y
zu<}SiCZy!$RdoNIcYQ5bHsj10I3m2}z;c#*xGX|mAC`J%Ml94@J?AS%ZsSg{6yAr`
ze!E7snfYTdGcdj8N&&f_R&y3M<sKj2`+T){!c;2RTYg7^AlKfh?@sT_@~L7aiZjJC
zR_rF?uY!-i5PqP$!8F{xv!(EK1p?##Q3U%`tIP4Eo?xDWK940@8rEEGRCpbDhbBx!
z;3I}MZP)M~4?*$Yrj%wN+m=v8MeRf(6u;~!Kr&5<e&4rTdq&;I3%x4#Ukz*B$yTnr
z{YBkuE0U+MZNFy2$#)GRMR1jb(H_i{0$;)17G53g8@yO@0A%QE-;XaOiSlR;9RN^(
zf?CCTS@NJr784FGu51A~o}nM}7C=GL7<p+&%297HUn$SAvs)T*K?i9u!1iw=&v^5_
zm_HJX)kTSk&1dpS>)dKMo5hz(^V7)ju<6~n8%JX&l))yjL+*=Q34UM4Xla9jMRMD}
zyd1{GTXU(A6BmPIqQc>F_h32rjm2qB7BmKWOo2q$z3<o1pfvt^aAZVD`5YKXrC6@$
z?w)gfFS>IPH9Lh798>Lnbrd?@1jD^5T)gw|u+@ex>ZlJ1Aif78Nq7&te+?#brcZ#n
z(eaJnoI5TGIKcJT%0A;TW9d(T#kS)Yy+c6?!2frF^P8bl^dXDy9~2$&*pbxRy}<?#
z8nWVOIN2fq{UGJiYGc!D86X>*hn~u}rFo>AZFm3_s#irjQYOC#pk`?&bG4^SQz*-p
zLbQmh2>%l1(IaoDRMGAnQ}*mqqaBSTg_24k2~eFzo*hh@xv|rw7jD!A?0&V2%SMBi
zMqv-Nxa~M!@eS)}YpSm<7FSe70!iuL_ge<1NxbooW)iuvmuz<MA<K|A+`y<<uL~|T
zU8IzkRndlIVB08aBlH_$HX;yM%kR|dV5qyubAB|mn4_YW%F5(D_QS{w3_J``wRb26
zhv+ZlLydL|CVcxw`SBsPS4=B1zJ6BLZ&9aPSx=nP0w{Mwiw%|5a%(bjE{L$D0b1>L
z22$H-00*o|Dq}=X)*qkhi%pIpVvzUBNDn0vAP{k7FRH7vnxt}a7Tr0Dm|c>0UZvll
z0RxTY;?nqB<l#c+yO+2p=bQ6M4+%^JE6@MMal(@IOHum1_|2hFwf6C5N1Y~}@#l=O
zU-o5if-LgKH_a6g$1HC|<)1LN4DEh<I!-(rTY5m61NQ8|X~KeJ95aVAJ<{WEWT^6J
zEpb5oo5$sq1Q1K*V%a}uF8BUv$Y;=*b*?!7{>H-YI}aZ^)CG41e_Ps9bh`vcsI3?O
zp%bqSpVrS-n)a>-TD^W<<1u9t=m%@{dkJK4%SU?ZE`tB(1t7@TKdYW0CncO)-bGUg
zGAf{`dr24EXu)XLrEKfY3lhT}=lid#LtsjRcS<rl4o~x2`h!Q=(|N9ApEyue4h+P0
zb}q|DiL=;X7lnr-Dr>IJ)BrkQL}@Fe&HHz1q`|lb7X}|Pjo}G$5FjS%PiLX3SGpy_
z!axE4az^Q_!R;mR!*a}E5*8YJ<<%}P44^`#yUIX(#+&YK&=$}Q=$cIvt|-YNWsLtW
zU7uBPGphbF8XgzTP7UU#K@~AYf32W}*U-Scy{#Idm%6x6goMn2g+|L<?G{uK@~3?N
zh|`qv=Z5$fB(+SPfiA?ai0UEljS9S?TB_jnFK^yvglmJD3)zfL%XyM_JAAS;L<MK5
zz}_LKD<py5R{1;E!bT3R1un0+dk7qQ+ir1#zK)KogDl-A=jV+WfbNzx*%TXYa5qRq
zgXrTiDXdqsI~W!Zo4SeuC*_JUEs8c=uYa_j2J3|7kFUhXTPqL0W}0JV*7vW?iC99j
zq)pGgFVr66JZe*@fi177OBrTIB%}-icMw226G_8>qB-*OWMHbZS@_&LOAjQ3J|9~d
z%6k?o_*KNx?*z@#I*}u&HNAh5YBO*!5Sy6Ju7v8Q&bm+UP4qKC>b+CX)M2)&7dSe!
z0X>2jBk}XNoRSl{BJ)|C-^58!62n`!krz)hVes)rGT!pj`F&c6p^~;B3o#59Ii@{L
z$2!B*x{cW#Dy}ip=h#u>zXMzh)T)K8*?p^*v&%vBMbx0&S{)`SXUS-ILM%HqjGqrr
zM-%(C!TH|80uv6l3YRi5{NRuOw-Qh=ATBzN+n}*+t**y8%D?*ZLaW5ArkM|xsK6~j
zUu<{XCO#{C{_+LOyXkBnP!WyCSbAl*)HWon2g8-F_=50kY4Q@J`dcrC?$$J<`@xxD
zJk#E=0GdVb$Y_|S=hoPHR{h#Ix(#AT2JXL--tfcYzO+;_LCSh4`ukJ)h9ew`*ic2l
z>3;xiC@-1)RjYpminx!v4aD*KyxR<C0elU`tM!hTU@&8^`3pz<?-l_9d5?aMVV}5E
z60J~nQpoHU!O;s6Kp8Gp#U~Ofue;j1c|Z?8K+7h#SPU3(o~aHf`1u~GH1p9xf>nCl
zP`l2i<yKALgM;_K?o@#jQ~H&;K7law`>rON^?RWnDt56QGi`|C9niJ{v`+$69yi5$
zJA%VB>{!W~(>bX0VM*%{gg)+e0j0!nGhZn~9VL1JK9mu`FVp+qi%DTZxfOkkH+Yz@
zzP?<rZTb3)IR8z>n3)aAxtd-U6jXr+s|XeRBHeo*WEyo-iTBi(U0~bQPXzjv78b{|
zN@7QJe0aH4RiB)tm2{&ygepQp$(KNkAad^?8ZlWf{KSxS#+oU*4)%-)?)vr#pxvx|
z1ZS+#pWn(v*)zwDZq~nvKH0bj|6%9P$~ejzQqhfmi(uH@c1HQRZU~(U%hCm!s8fG%
zMwvm$-LqRT@DD|Reo33~)o#^v8+ZTYa+feN_^DiG5pZ;<Ie=877BW5p^LA`rt5+#+
zC#`VGP{V1SJX|$;c-8hP=OM;j4euSi=DEHpKC2_$I4L>*)^>7WF?AbSB}2%(w5Zg1
zh#}SahG?XVUpB1(xP4<bo^K?LmmG-YtAp8|M7rr1pC}a8apxce<li&w<h%m?p1GXs
z9Crb!i-pbHGL!+LWUIbtDNrRGi1UY-gMAP+E81Has+buaVU9P14FwRYxoI;q<qjtb
zAXK@09%wS;##QvoQP{x~|9xME$A2$2ilaG$m-%!ja7+!7m5=ULTZqhM57B%+AmVKM
z{yGs{O5mSopiImI3s5c80gC0g+CV%kp5e+7G4i<YJNP}AT}+Ypcwx;pccFAJrYwW)
zyv`QMnawKBr54A^%1r3!x?BqM5+_HY%#_Hg`pBcU$r*pU1&v)fVu%3LJ<g>8YW39j
zG2_>^2eX^y0XlMt%$|N7X(T7+;()>@QRIcbhmavSa#XS{FOeCu?EL$Ofw#Sw8d!uM
z*^zH3O&##`eEO48_LuH++39@%2-9-a1_5Rqg|UV4ORn@b&yA?^*TFASJP6=icx?Gl
zFIE?OEYSnXnF&yy70Le-`h(qu2=tfm)p1z~OBuud>1I9F+xdzFO#SuV67}>e>7zX9
z@h>OnTA<4E`m|KJ)Z~!L7F3I1P`kIoUe@A~=IZx5&tKck5`^v<%Ou2CoYLRZrIZX%
zknDUh0Rx06anIUddSn^=d4lUWY}P<VEEmveU58xQs^(za-&bg{;YnO;QgV9-2Pun1
zKBtiFx7`(0@waIk|I1g4DUL7{n=EmVRH<USD!Oqp2U5}sWyE7j>uMx!^aFh{=V!RY
ztW@g91W{fe@5}k*^`RKsgz54de}5<3CfJUhKtPd`&kJmTprZvd53_3xixWlZk5UkD
z9&pqHRIvnkv=8mK#^+IRWf!*Bc_3(|$I1*`b{NVtW~RC<HcFSL;dNRZOl#<vq}&c0
ztwe5a&7wu=VZcG#sqde9KJ4ds=e;l!&JfmANvgx<bE6L<sr(yK_$1Dl_m<6rJGg6D
zTX3^Ybe^Ao<GzPr+|)($B)tJ9umxp`y8a3gBxj?j!2^0UI$3+auhdj%Cqf;^Or?)<
z=P{Ar>~wU}!J?|B)Z#+(<7`Pe-5w33e6yn#M*zR=X76-KPy;w?)-oHz<xsi<bA#~%
zMi{6$Fy>l2pF!M#VP;v+DML>_p)$znQ<bkzeIGAJiW00lJbTB!27U2{Fx_}tB2bw@
z%b5QwB@_1Y1^85bNYcs}AFLz-4Gw<|MwVGOQYVo^Ku0$(W>$l5X;HD=l3Q4`nI|Xv
zmUFst_otp6Ms<4PqCx{cAIAvT7LI3X)EsuXkH>`!&HUaqfFKUsXqq<po7g$T=2lv7
zM7)T*voV~P5V;*1FttK9t`1M6oaQ^}N)Bje9V?zI=^e>{^uq)zexCRU|AapYP?z;x
zZ|U+Rd>BZu*NSr*L@moQb>`npKb%}#J{(XQHP0R3V1*axpiT5+O(cJk)E*PX{aHmJ
z|Kd&s8jl*Rj*8{O%9{CRmt+(d<-ftO@OE7%?f~szpH|(kr4IJ#m+s(BG*8u6rf$+V
zfepGUJZyUg8IND<NS^U=K(7ZN052EscIHu<y&Ju$7~C;TK1Qo9L#9SEW+sY_HaLk)
zOb<O4Ci;Y~dxHiXJ{g$C=c12V(sL$wto8ap4ma%Nx<Lr!g}m7rMq2YOC?$k8kLg!#
zdY7O!as>rN5sZ~`@I0jgaBu5Ap5PO^i8hG8xu=4RLzd;@YNUGPs5l|}8Bl-{786kl
zU<_@CwToQR7k9=8d;y{xQ+Bady51VK+Aj*C!snsSVL|*DPo4`4YiS0fg&$#XFEP%I
zx4`43^m%3n5K(ATmeFln^Su1pY-WhUF5h|+RB(R^HL|)FWBjcRo?|3&rFcqyhD+K}
zfp3BM0Wfx3tR{=9uhfVK_=a|mx{n9v`3+TQrJ_<P(wKn!3IBjArMJ{cSx`v;j|%=x
z*`fBhK}VIkR9bc*CE4vjFcrQqCT4jJ9nD>}OADD<cIe8&LYme?a(_$Wa{^da4IU#p
z|FlIo#H7XJmZP@5jFHOy2`ujvj(GxrRXvYYQ|h*3nh5%-&o0-0i^z!bSpGo7eFo16
z&_7(-<|$b)wvz{1`xPBENk2wthD#@V`UV42dstf7p4Hl|J65&Y#SJJabAaYdnW5n{
zei&3dOiL(~tI?1lk0t4;P++X4OVyv3!&x%C22CTnujMbKCNpa@pql#Ry3f97)f>~+
zk{$>_Nci}RyYX@dv=2-07}1Dap_uDo;$@;OW{%)9P!`(ZF1|2b+aW)$0#33;c<BGw
z*f^8p0VaZ4;@=QvQj_|{HAdps@R#r{r~+@39Fcvo*}elBJg)E{s!x}{G8)qhXuAzD
zb#gDqu||Dg62qf#n0~1cKHZA4*l<72DA%0M<PRLookp$SED+J7tJc5w5Q%~}bcZA6
zBf56Ngz`fj>8Ct~Eu(s(8laD7(DP4Zo%7+2^7WVgQBHwiv200$j)4yGKqLbsCtJjg
zRbFiCs#2lktZ05V1f5f{T4QE5Ra_-Q#(4q?Cs*`(+hl3-!2a>{-Z7w5i*Ck1=L2t9
zXp#A_p`L4#J$f*xZf@0P<0kRdOB}AM59Y-$+KH~*QotdIF&%W~)%E^SDoes*VDSbd
zfm~)N;>*z5YJZoZ4Vpz<pl1yzhS$oY@&KKH76j<cKWkm&6=p)+WFi*_z@vWPNCQHi
zB_gh1Nx2B1G<po{Tj%H1M1>2VoULsdHgW4r3en7>-|SaWaOpUvnL+W70c75$8Lf<;
zC$vtLhDz@a?l2ryB{!FIB0bdm9<7r%VWLQN?O53>4wZg`^6{*vt^Spne!lYe?O-Rx
zW2h)E^bCP>+D|K4F=X&)3_LRc^ZE;h^igCYud#a8V_SeG3vr?||6ZSh0%7#!R#W0?
z{*WND0;F4?&n{`gw@*yRJMN3<#wuG(tg+FO^_sX_Jzp`mM_QCZ6aexZ{@S3Q=89--
zsMD9t-ZWPuDQZ_5J-p}MKU(IFOIgxZ_@ayg&*0@_{FJS+9h@IVlqu}|^W5%^c!0NS
zziBW_GxyED*43coNCC1H$rx}ffnI>EU9?FsrZO6vEj_j9&?n9p78zERlnBTfnvh*O
zw6z*DQ$iPZeaYfYT%LMBZxg(I^&~aWS*JC^AmPNY$HGUFvq*o{XMED|53E{ue-0!=
z|Fv?R`;v23t$EY4f2L7r>wf0QxjIj83V;?J^0)|Ye0Sa9xV5@<P~c)@(kE61EuP-0
z^ewLcJ?`QHw0283+hR^TJQ<CRK@F)2Y*f(LNyKyTk)u^MTBr-Kg_9kpCzG-c!ZPIn
zC**#|_baABJDKz`VrbCSzECQi&Wux?HX-63+jsYj%dF4`zi$uW$L8W#Zgf$7`m7|0
zQuei~alaHLg1Sf;WDpG$l=IgDKzMXR!3ZJ_qHQ~f?7^QAZCf~2gs6aB0W^?b02VKG
zDg|AG^v|`D`kgml`5fbg1s)<;WuRZ7(S>HUyvx~y`5(dTyJ*h8KIJV$)R|#K0ry`m
zARSe!%(EO*0}uQLWY?K>b~ssQ=k(<4gM=l=p=7Aine^w&PN4~$E?+i}N=#ht26g92
z@h)(%OE8V2lT>q9;E>=>@@QYxBrYa`?%uC_vjxN4rr-47ezbo2{4Rvdn3$-8rQ~>{
zl&xin4U{=IZ(xGN-`!dvAo5^OT?>tw2|-fpor<q;PQpEoE$Nf=j2B-u#^ExD0)*gJ
zMd^jAi?8!zsC1g(Gxd<+Jg17fdjgwnFZvofvnJkT4)8*wrUWMy?XST0bTOccU2dmN
zq8L+U`ws?g&R+CZ;ya)+e})!EQN)9iGjZ876Uh-}-d?F>XZeh1tVKKye|I2m683lL
z>Rg7$3ZIxJQhzveQ@&qw#e%Y#sg7_r*n(T8xp=W2Hw)qpZ7PK~txO7kz@gNt1yB(V
zKt=l$KAa)0dPVoo7sS|nER+7iBu3WZ)J}eEAF1Kpc)TO3idiQ1{wRoe50u@RAcTtm
zLOCApdbM{AY7B|F$CTxJj^(iF^n2^6<;k*r)eK60|ES&&td0KLp=d_ZqEa-kXQD#s
zBweOF`rPYB;Njxag<9aSdRvI)DQ$lYj2A6Sm;c7qsg}=>QIIs&3b7bLBG!K`*62p4
zk(R5BZEWEe<tPm$-$62#nZex`=#XD=T6@DVBmQSpJJbckN4_yXIBNnfMA*TcX#_FE
zspcLDu#b__$!~yMAR@5jbMJH^0i-)`?N`>{Uz%3>RpqE|t%bU4M@90|fP{M*6JN4G
z>FuTP{F-<FAhtuXN#pGesI)q5)%Ah1iTBglH}{vtVPg?SV-<8Zw^;LL#<6$nck`Lt
z!F%gG4qAU>BP~(otL63q+qIF%y8GE8zdILee&~XA2uoKO*^<f0wlAD0ACyqOoyUf<
zQHjB?Lwca8z5}%FBLlwsdisjLMFS#BiCU`o8amKVoWbV{d9=r6a|j)<7&NU{N9586
z0boc`LR5dD*sa)kxiu*8aIsI;-gh8#bF$-KSZ!K)gtIecS4LG6sx<;z>QGdx6*3Mt
zq5upnC>xAvuU<%3cV&!XFk(=uWMO7mPd6KS>K>|%JK5+U=&ow^8~d(E6(kxFAJZDu
zQYsBnb!a^EQ!PtpWbwWdb3#1C6Uht!F?ez?fCIUNLo6qkZq=GH5-L(&yMM5aC#wGQ
zh>(-&Q`3M6US9en_OSyzPB7GAxi!F`Kw35*@FY<$Rt;#Y2A4@7;T}zjvDjGdz0qpM
z<x%7PbHpXQOX;Sd^}d(nB)w%ogBDnar2NX=2Xzk?nFl>)dMUb$`d|SdtlD82I93%~
zH^)Y2B0xM1stkf!g1!UZ%fxo0M08kFmXLMc`%;V<Gh@TyN#Xx1QK>z$P2wm~!6G4{
z`S?(Jdn-KadNMRNUm>@a&>ZKUIn*7KI&n-Le7$}V#5{ObsG58S_rF~zr*iiN#>zpb
znzwp=dV^k%WKI1|pk|---?%{VqhWXBN(imf>Z@l;LV;2BFA0T2sH~6eBr`(eRoUzb
zD9Umg5^|CdozynY3}4LrmHz@q{VB64|Hb*gBjodhOsAo|(mE6yVNig>n(raz^HBKM
zlEOl$dB5u8;YYi;`_Av|0AlT`#%foQG^hEZ|DtdVhsX8Av)!ukP%1z__jW0DIs*Xo
z6{H@p?RC#M)X^GT#vN_+=#1<bt_wUN>~7Ef<Ygkqq)3MphXL$wzQut*8K?_mOLy_V
z+b}JAMg)rX9{C&HQS`e&hFkrQ7ECi)k)y4L2>9F&vZg&#0QA+teoybca;wl0M+U#g
zpjSX8VBogCfk_FE_!isJniKt>AfGuhdmv;kJfdy$lLrfq_zs^3v{fjQgeN5;8tgfR
zO!rMyi+U)B(~~-lQa($a{)7X2_BaLic)afftccM@09c|7q@*@Iw)6qk_99$}078dd
z2bC*3doZ>L0zkd&>z+xA!{r@-3*c@6876ZKzzaOtSq{vK=7Vv7%`H}*Qp3C2!UY%V
zR##WKYQU;v1}Odi2$Y#or3L$Gdxh-ml<mB6p1*UiZ-sY&=iiP=zju_=plF;yA=#Po
zIg>!iB7<M9I3fWv1lC!NEvnLA2)}>tcibXbBI5U(KHKwd{l*C%%6@7!NwoZJH>LsT
zFMR`Q16xDjcg@gx6^Rqiw3sgTyS}>(Lx14%Hc)+<mr2%sx^b{4S$eZUIU%8>{}|iM
z=ukPPj{RwT?{LxwScd<xZUC1qxJ9eEFr}qR{@G?}<A4ePkYvgTq6@C?`dWPjD&B$&
z7K%C4h=}m2Vk*Zt&H1*DM^Ja<uN9U>DqZnvBOUlmXw?2QiHwfs9RI|Lb~@Dlb;Fw6
z9<IqB%{4V8QLjLAFdhTD+oJ`FgKjX&4SMoMxUGbgrigUZ)Ll!k-79yKlB3ADZ9C(C
z1`0X1G}Hx@I=ipXk0!XP(eU;Jf<d0(U>i`geYH?^av3IQ;NQG8xmUqwH0B0}jK(pd
z1XJ_($U<eES5^2tn^s7h(^nWzjZEm?(Xqnz&R00seYA@I+xCCfc=52Kj%X^}HZ0%o
z(D2qWh5uSeiL^h(SbqA3R_wQ`!J0&T2{q)G)S*UEAgQHtRgp=>B_gE(3%F4qrrOFk
zPEO|Ifq4JBTq17VG(OqHGks0;sOXSz+X2#ZofG5!VR&=9b|hCzN)@&`sGl~y>2`a&
zg1blFHF_oo&<}Ft;6S^ym1IjZh2pjSpP<_~oto+{KY`jIdd8=2m^F<)K-8z+>iOhe
ze~3Yoqh7*c>@vRDA-oDmkJIjLOqg-~(G9PU`tue7VeeKTfOu+}P@O6pTL*#C85q&l
z?>KacntO$3c=90oSK?v{kex|?kULObA^$TgQ==EpXpU849rMTfUxERK{A_-DZ*Fb$
zuH2Ldk5jqqqDOV&#-Cy4sPU&@3zCJDfuOaQ8#w<Ig3|@K^66>UO3M=z#58UNx_h5;
zj(<Lon?G!TqfeEI3G)`NWTlJ1$%IU;!(+$&X#;S*trsv47TM?4go~{yExWdYU}*gS
zMuJQy2?Y{v$*~s?G-4Z+B5i*e2Q;GIr;}jIUUq1zJtK<#f0akLY&&!3O2MdtD3@?2
z&F4E$Fy7AP&I6)WiIaIKM~H0@Av9)P{S1CfhCu{IC@6@Mr{_EvRJf<)`32MAkr9iR
zdbvzkdn!WyH$G2T3HfYs;;Rf3R*Jf@-=9Z&88jv2t<@9^a<Dy1rNJc8rYr>m!tc$7
zFa?By`e;ELh`DpNt0OV8xj-M%sqjdWN_xloL}M~BlcbUuAzmM}-#R@e6G<qIFaS3I
zdKUb92@I(M>as-$ajKOi<Nkq;!@45_U*|oKk&yp$Y75Y_4F*)-mNpz%jwfm0y<C6W
z76g*oS~jlRUVw()lGAhpB;DMS+|bHus_IO+rpc<Fjuh-mb%&gd63Xd+(7ay{h*DjH
zl^sA%Ah9U)YCIxtQw~=cDU^Y^Gkzr03jyb5o8n@S4$c4iVUvC8rBE`k`Mz1-zlfA*
zGdHy{QZoMwC7VEA+ZH$U4k=zO4$@DXx;R;x0gwobyR>|P!5I5rZEnldu{cDg&v2zY
z-f6AM#bQSn;}$Yv$r<cn;K9`lWtSU@5Ks|M8*TaU@aFMZP4qiC5ci14i<i1<hLfrK
zj0$e-!QlvJR9c;RpK!u>Txd&O0%0z|JIvl{@W*KPwzMXJaV#_e3brZP<DQ02UCBb(
zgrmA1=W{5@O=wdvcZvT@K)g-;%bXUexBe^WCH-IH%X6O@c^7%triS#ed<j9W4PA3l
zmz6IuuV^UUI9bKq1^x?P$ol&0C)-7w`#l!c60hA4^bnUWA4)K=(;@q@jr8J@wmgIR
z)M+ePFD%cv>PPK<>k!Y-mpgkEWLwutWW~zi?+=qB**UB~jHRIQ8MrNb_PU!cJi>V3
zxZ|S%i<~5#JXs@gVnaBq5{=SG*?2D{=2?u>ueLqAs~BJlbh>u#c2v4=DUOkS!T0EQ
zD=7R|TLWSw^lh+L_~vLV1`du99gTQ?;bW(fIZE7lSz9Nrt(DNpLFjTf9B9agp3Jpo
z;J@B47t?aJ1QC#b3I5d@gWQt&@dM5GhOBVx-{8=Y&5khOcrb6ubY}1R^TQ#vLUuH_
zZ1TUZ`ZOwoESbwap%gb3=2BBsWBdT+uHIA^!?REoVAg8dJD?;~+jH~2Ppqm4XtnN%
z4gI8*Aq5oKIkMuJ)c#K;i9`~B!2?_z<rG&6n0|O(esOM%`%Gric3qv!5OVtjZM{31
zDF#wDybn=)E<ALJ+hW4!=Pn%~NQ==JFsvCAxTxJXtfDJ-akKZ}Qqf2uPdvY<>lv#O
zex@wHb36QQDz)YXFycq;?^kxRL|oN>Hgz9qGth)p+>#He`qNN2Yc*NZ!Y2cIAz#PQ
zpdy~(PnAYbng99!={Tvmts!7C7<iEUl*$UKt@ME6m9&>6pp)KPk%n8%+sTOPd0Rv(
zSvM7h%FS&EV09Yhl>*{<YWeWj5<f75m>CtEH|e9ck~F^_!096b7rBP3OZEiTl%EgO
z*<ni))*wCZx~~OW+nR;D#gPd)6lwi=A0n2QV?=RjXN~eWgC}Tk_cM72XVWmpA?wQO
zBx&T!-PW1QnE|6p8Nr(`mS>@N$KIjMSyX5g)TN~xuQl&=%}uxB!E$NdK>(qCRavjP
zg8fJ;Djv7UzwJN_w5Ks6!Wq#qB1qc&b*aqS67Zz}`zP=?<9`NBp62#B!zJdW1;y9B
zm;_pl@Rl;zP@c=~ixsn2u7>m8W6h2*y5igC2X(|yC27V~h<3aRES%H9YkiCmNG^^J
zw?*Ti@j!ciNcwZXUY6Y}oAqux%4f0gCw%X={@A!NOc}^O((zotJ8Jf4jsHmh;nMTL
z&)0wlFFiQ5s(aeNZT(&V)4R6VrrZ<?<b!*QljF&33_A?bp^4Z`Z&`G`vYDLwyS7n_
zJBghOx+Fu6y3obloDaa{HC<e+SE|UD^xx;M*bU&5ET2}W=<8ENf`$qXSU<v;m!G)P
zdejNkM6_mys}RT<gJPlrMl(^c_Q&DkMUJo5WDY2yV>3a9K54{L;MumZiIW5#)4|v?
z^P7v<N~Tq{_i8ff_#!kINs@<8E@sn#+~v^V2qfr!K+T1JJm-Bo@)01LwOeshf4YjP
zt%3Ib*LqZO1{z#*vopmXwst6lPxqofF#T@bCezn9@WNEW*_q;UdIFtp5g@Z&J>x5f
zC-!r6$UFXqki99ryqL3l<fr;{BCaz{hmLsj+Bew*-`dt=!OHcvT-f{Y7!QGg4p;V%
zcYcu>jj-TaXJ~!Cau8_-CIMc(y7Kyws!?<Tz1FG9kxGug^!W+Y4$Tf-gAV@y!}=n!
zK9Plfk)BhFCqJp2r<mlIcBE>vt<InjOA5mH25a)}s^Q|K`Wi8oijyqDfz_hI?FMrs
z1jIkL@LBy-!^mqa&=O!8fw1ODoa%`no5z-sBtt&j>y8zqczxlV-kSQx5T!cDpZ#OS
zFve$4h-pmCr?gHq7u~Jm_v4RDwyDnD>3sU>HealcpHsoNaHPqk@k&X;mSWWGnFgaE
zAZY17MTJyK4@!ZC`vL)Q?>9@&OJ?WZ=U&&fKdw5gI}}E^m%5TQs)v9XFn&HTZ&OIX
z!v&9pbv(!NFX^>CO43X0T<UHK&^+C@GsBPUDV;p1##^_w2bxxu=U>iKHH3xDlani^
zr>F}=Yg^j+T96)rIPjXOXA8*uZrPk~j}F>2H$B*a=U;R2|AWmJ|K@)E-k+6Vh4A3H
znARSt_6wre7q#AU<;D<8P?Gf-@mrwUIl&32bWOEHV`o}H9x2N0K;3Zs0L7W7sNlKD
zUG3d>1pG#*tfnmYk^#v^K)9%~TtaNjE?~-bbJD?kjT{$n*&JV3co_OwQ5o0PVlX_q
z`#7&6x3u7jUtaZnJrZ%%B)r_ELUw%TD9Bm~wk%dk-!GfQNMr+RW+PZZ-qTo?uD<j8
zs**#t=Ah6`k}qhG^o)$>6-~$=iIl%i6HGbMSTYj>v?)}TG=(`ue(z~sw~a5p?%E}E
z?L}U<%xcUuGZt@|dxypcLE@!7LI-~0g+00^F@dAsw1RCy9gbU-$iMzc-2bK_d0&kN
zU!!amTM$}W5-L9lOv-;5RaJ=MefI~zHV>b*q{WZ}rej9N&=d=b9?fan5Qd_jW;$K;
z17N&))iELXm#+8r&4q30cpK~13N$z`-db@a+`wYSx(Uw<y-IJ{DYcY1WwZrKGjc*j
z#AF8cELM)G;HjR{{G`KRdBPZD_@kRD=c>xSvas@C@+a9rxoyW}FF1V_fuj-U((=g)
zzpYG%XCfA*UM`e~=WX?KMOq}bAAhx%#w0$u<SN(2iNyXOJo+H#C+VM`-4uTC>eSG^
zo?V{05bq%H;8K1o0fYrx#~B|VAHjiEOL>S$LkjBZSN~a{#wfaxW6?)#s6c}M7rpFf
z4-}=8fuCI+o-wt1v{#&;-iMBtZD%^wd(zf>qAlD-(O`KN7DTL~;Ek36gk;bPQd+rX
z1;MZ?puP<ystm$53u?j^@v%vu0ZC{=DwMgf7JtfQ)A~ubm&5j!<qSQqJYApGtL48^
zRyN{wsyo_lJSN?<V_4JN4kKf-8{K^0I-N{6TkhK@blv#TtYj}e-zlH?T@$48g9#f{
zZ*+F#x1LNl9R|qRO;O!1sk;tmL~6dud-N-Hq1|AuSHJ>)2~VLPZ$R>C1s1x|>^J!l
z#1J$&?Od8&9bO(?iYtQer1e-G%RjPc<D35Fl*v_e-?)k9gfD_Rg0!v=Y+kvTSebw?
zfIO^v;Gg?&+7MQ?h>n^-SUeqyR&u{*!XAooTD#f9Jr4K=Jh5Opm{%~pu#DH&GK2sh
zcV|b01o#(Vf3rqglL+kxvLD(VjmhOr_ci$f^{<{r$x1Zf*_TLPr029BXym$S1Px(e
zIGbp$3G{IIB(rS@9?ek7R%Q&EZc_%H`I;B#FPd3lqu_gq4&-I1)mAaT22)F97&S6U
z^(Gk7h}@EBY#H^X?`M<GF}uk;MqV`>eWI<2wcqg0M58mZN%qVpS43qum<6@Ya9=lP
zt{j&3+-~B^1$xc49#>QsmeW#mpNC0gypMPivfWWck6TX2**^z|B(FBw%AT}o9!%3k
zL|Ay;48zUz(s#Q@fK^B~ZwR>h`F(jY*>p%S?cxKg=?Ur#@57?WMv^5(I?LPg%N6H#
zNdK4%lM7@o)L{2O{{%Pk@6Tg6r(!#a<xNDswSe;2y7T~G^j7$%Dpqyml(&aK8$LAx
zlP11N1Fuw}hQ|HT>mToXiZ?7tgz4;4q$J}nZBIh1^DF7ci31UiO(eh$X<vO_s=UKj
zpU)~W6fjmJA-??3SnWCmA}gxB&1jEm59$v499xEe&UxtOm#w(4g&w!&|5SPkHFe+V
zIcxE;NS3?{7)-5bpG#CxV3h29SW?d(Ap+~Hst0f|)#Izn*CH{jyxz03O~1;OBXm*y
z{bd4V`9}0d$d1<Fn%-tVPo~`YU&Zd@1VnoE24<UC+^<~>pwlAk7RmV-1@zBw+aH{1
zS7fWNIO)7Z1!ZV6$Q*aZ^9seOIC*riO-oo=m$LYb__VQgod-wUyqb)gWAB{WQ+ypR
z|BS9x{f!!XWgxjI9C62*_4)faYy3X0&2v$h?0x7_3F%j#EZ_MREb{8rh;8wwN)@p(
z^xLX%-<)a|FME&j5XJ9P^(+jDLwP}C6#8VH1f!vDF9~x`-?U0^D#$t*NM^fjW5r?*
zs=sEg?~>Q;`C%kIIX1fo+sdDE(E{0gsY1i>;F$&EgZ7?AH3c3-ad6~6rHq#*Qm;A6
zR&1G7(+!K*`<%VgN>9<|9xRHi?#5Iu9^On(SrTv#$e>&^m2AT+vdPZOetKFc>4qmH
z6JNMC^nGN$DcM@m!zt^ApZ%ZKt}CjEuI(Zy)i<F80@5T>R5~aar3fSn(vccLkO0yI
z0@6hxii9pI0)jM=5~Z3zq(lM+LKUe30s$kTH>GGOUtV?n|J8T*or_t|Jaf)|o_%Ka
zUTbb<>YOBhvfL^Unwz$hoK-*A6xq6t%ZT)^4;}qj?``+m8Yodzb5EDtE>W`Et={K7
z?LTEU#qe`)Ew7t#7R#e!&LVDI&pP?N-!LZQQlgl+d?3Vm8b)j}Ea+@m@l%?wE|cqU
zQ6jl~mR$HKJ!i`JQQR1?JdnLLXUq(v0=a;38Yb_BXnnLUy~tl~0Ufj-@oSS~19wv8
z80e#&P9Qh)2!kr+Au!G2w69zwefy`w+5m~amC@u;b~^S_%qJ@qd(l&=Dz+j>oYwi!
zmYY}&vRm-uvN6!DTgpGLLv=MZ?H7%H^gWr&^7)v%l+_dWE{T(27W8hq*pNY5q$7t{
zD(XL2!tMfg&{(3}mwmBl?0$H=DloQvw-OW9tWst4(PA*y8u1?Q1CH-fLbfud%sr;?
zMIoz~dRxzAO)O;DZQR+Jlq;<p3#(`?nT&!qoyx1$Pp)=NUJuZ=EmcL7TKX&Pw{1J!
zxT@4aMaPU^+;{NP4U^L`Jr<@Fv6nH5V}JvqVQ()MYU$Ug=@+V@osW@}ncpvXi0W!3
zkZ<H{2-i?KOvQnL*#$S{XJkIv<+0o<n3-PM<2|~>Is){F2x@d$mTif;+qWJlKr(Ig
zgM8VdADFbO#^o_2ivhZ3Q#Px#&c#W=5TaZ7qWkSZB4aK}D`;10vSv~H%SLayc80l4
z7S>czqA~qpc*v5&!z%{z5xOjTv-)L0)Y>YiH{hS7>Vtf^C^!yyJ@~?f$k3VixJ3eC
zW(F!zK_H|wcpg|)_V^u4X}<qZqRR;FvD-*D%(JVqh2kVQJbBW+O#!K(FchJgr}aom
zk`NSXGwnuH3!S$t`9~x#dGxU8Tcf~JslNRuGH!RsqC+(1ADc!<YiB&5KMII_K&Lj4
z_?LKwb59Ubs2J)w^!=Hkjj&j$#(rMEb!oF{kl!@O3?*D<0{O;u(){>xTfg(ubC2e@
z*nK*3T~4pSGziLmen#v*_efj5<LfQte@6Dvx0Jg}Q<g_Z((>84MqZ33UPsO&&Fdy)
z*gn?QmYtCYEyAxP^OG;7xt{P1UCO>TVr-2N#P)bs;n2aa916y;R$kVnb&JOPQ5<*o
zE@~d_8<vjSYI4t;M12(ha*uj<%eV32J;M5G$2%|F#6w%2TN+-1+_;UaxZ+|@vm_3n
z5K!5!QbOlw5B2ddP0;R9!!&!~YJJR)$tLii;AP%!M5H&iGyYCiL78`-CN~5AeewK2
zx;o9^<UEvVAzo85f)(mq8BZl~Z;3UrM(k(I&3zIkLvklQavgmVd{3CyX306_NslT}
zH4}HrHyjSkqHDABm)$)hy>5rBt52U7yz`j;i{hKt@tw{vPmwes(jG8gz%J)?z&{*8
z(Z^E~&yjRcj)N$^c8A+Jw<YMkwKu=tJhG>lIqvrov>+q9c$MZeK=ZlUJ;(!kF2lVE
zAzzBue`#1?S!53!j*XC_JdRk$Wq9Sq%SjLyp9Ba<=N#tJj9J|w5d$tvORqUePC@rj
zxnY%yqCK_R?X3c-<CVmw*X*8>EPl2_Q`~jB+`DhElcT8an6*rS>8-A5)vqOy=A1fu
zk<`xD^?v-RrP|-G+;!N_XF32vUAQq=iA)|BARoIGyGT<Rxeymt7<HDhnIJgyt>5*L
zmxgh!qFRSXN8GX&IF0Td9;=t7Crvo0G5*Dyh_kV{$oKRoM0>UpTJg0|VgJ=|`0jkp
z$|jpphPkhJz7)Eo)A=9zyP{9U5PdRVC&XI3V2M+ZYuF}hsv^hl{nzd$Tiyb^&^BLi
z*iW9zCnXcN_}lTFQlj~a<rB7-c+Ot$$`E-`rs}|bUgyax7ZK&*lSA20y%iJivab+N
z0-J)jG<D>jN}T>-9FyE#rI+x-LP}0B@M(upx+7u2Aoh(l#Y)ELNztHD|8b)igG-3s
z$*jceTBZFor_aXcoIGAA(rq{Bw!gOV(a$+;V!SiM@hXdV*T0w4--xc2h2k!}81}zj
z&%=CsXzC{Be5Lx2Uvj0JS&cXv{2FU2+Fl`xI=&Y`2YchwUGW_zc+%}xB<9iNl}Lps
z2y2S%5lW9BoIzNNDt6vzumaY~CkrJ%>-VfJt3$h&t<)MPiU~%%g836x3QT~4{Z=PT
zlh14p;1-KY)40ZUdR)?E<3~dX(zHA7zBrI4iwvqP<VP%;n5te^sQ?&S!(%x&1H8KN
z1*&zXYscQUtAsilJSe{S#AO0eDA4d+06w9PB#B@s?J=or((vd0d-LWgPb(TKKq!aT
zs+<ZUwG)Q?^Gg;7wemy`z;5XTCGY&3&CZ1V<<#71JdsFzk>SyqE#3HJ(Gz+>LtA@q
zK~zCBI#JU@X)335vzNGSs0|GYEZpsX7q*JOd+_gtRnwI1XW~)=wmOc+IRj&w&QITA
zANk$1xd@6lZRX{M+ihXajVYhkPA`&O{0g?E<JIG$5uXbZUVi>&!SP!A7^)&_xkOYe
zRcN+v^1kvLtvG<V7DLrXxp(TSZRD95M@B-Bry_@^T#UH*_;_nn$%R?iPTE~bQcrc|
zOYAnwHVcL_FcflOL-ekl0W3G=m&v5J!}2Fg<g(>R^RZcroCD`KTi-u=r#}2<5n40$
z@&^7g@+=hh3@oC1#8C;7fiYwyh=&D`RDrCa50-%}y5|QrhuehT>OV7R%Pe&Oh#+m<
zdUW)z4i#g*aB0C*%9r-AzZx;Ss%to`4(A<_9V|xdkSv#emoT!It4j)LGN=%jq$-au
zW-4uAJ7!h+^LCv0^67wa?y0TZAPa=`n=McK<xg`7V~TM?)xZ0BDeqk;hWNm2aY!Or
zaD-S4#A>t&Ba>8_ZQKD5-&guKVxG#jxt_f!kd_Un!TF#NNGROtV_-Z5z!wgK0TL+R
zAsZc~J_s&86r9!$*;ut4sj-68=)oj-)4(vA#ZGgdJ#X3+@VzqtL(zmu3kI7S^QAH1
zZn&U@a6S><v;$CDM$2YDa4s>P;K6GTfS;;>f=Z6Eb`|)6w~wNCW-T1mIfwlLWRGZB
zh|-!2wW-;UY5=LZFou0r&UFW26UzMYRvyN3nI-Qq0PghR^q%4xn@tn<XJZp0fbX5~
zjyAO%TOUVl+wdcL?dC;igO?h3!t8C7IV1peNcmZj!_N#X6Py5KmHW+srQ>An`^Sk;
zP{UF5<_m*rk^DpcY$>G+mNw;VQ-}TH3B~S~LsB*qlC?aCb%nu~uMKTrC?V907)D`*
z`zu%xFpPt=WMAs-WK&XD?hV&Eu1HcxsWq2j79AdAbuOuVw=cOCUHM?SlBF(pq_DT?
z^KZs7vrtg>AI9|GjJuhPy#Qpd=n&O4MEe$(>yP`|#;}k>jA*%+<!+<<>^p4v;=xj{
z1GrS!cadR6Em4ri$@Qh*EQX#mJF`}@8Z_gu`zm8tTi>@>K<x4jm!PNQ95yhFJ?2|q
zxKNi^{Y5oSg}8dFqw1V9*#_0!$L*(>dwvM96mJOF(WG9*o)-w#zIg)jo;3-(pw7Ib
zS0n<&1M09VY*#+eHlD5hqFv)e2a7%uOiP7b8(N43I^s_u8={%uU?zCqZ*ce*HBQ&O
z{|6s4a=F~TwH4s3fWesa8amqz`CoSWsMaLDW5WkE3lJeI$5TcM6}{kQEOOStTSq6@
z4oeAgtg@$+Eo5RfqW#%)B~hmGXCaVlhgvp0KWsKyR`(vS5=$_244TuYFr$Ujsy)Xs
zp#bvP`!3>iiyU@O^FtYl6dB0l;{>BM62=E1#K(vBxd$v;aAsBxEL)&k2RC3>AUoUp
zY1p>e?pQ2<tumgHK3yRibD!@Sj=z$1B+*cIcmG{l=9j_oX+3I6RTRhC1g8%a?3%6T
z0tKZD)B9o{F{8n=Gtc#SSH%)g?4<L(QvRl0&^T-SDM%<Qkgt0t@I6*X66Mgf8A5&8
zY!0W5229ayHW_@wxa+%0YnyA}QA;Y4Z{)24_E!-`i)BeJ^XEeQn;D}Y^hC@ZHo_rc
zP?|uq<e__qc3)$5W!JdZd_LLiw2`%#5A4Y+)m;SyccyO0(e4(vT{HBmh(U&EW2xjn
z+HHd=nq{0j_FL4*?jt?yo_`fSAmbbD@mJtZE&nww%g?{#&lm3BP8^5PI4p<VlN@ya
zH5$veVrnJGfu(E;h9W_|)~tiRxv?iqaIkSs!Kom->u*5alH#9#d*nc2_ms7QY{2sm
zyTzcf`$ivRM3BQGe+|n-PBcHuFy8PPm1{kiDN{ruK^ob89{G3iJPqy??J#AHmxdg$
zf)taOWvj&0+F(6_l-o5%`X-v5afzyJFmfP8Sm11z6x(N(o<H`5tbh1V-(PaY#-F97
w{*UyMz?naLku=Y*aZ&wGK2zxi+y5ZFet<bsVuZ?cW%+j)!OdV5`Y!1I02kcm`~Uy|

literal 0
HcmV?d00001

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 1721a67d7e6c5..103a305faa0c4 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -963,6 +963,16 @@ Gamma             :math:`y \in (0, \infty)`        :math:`2(\log\frac{\hat{y}}{y
 Inverse Gaussian  :math:`y \in (0, \infty)`        :math:`\frac{(y-\hat{y})^2}{y\hat{y}^2}`
 ================= ===============================  ============================================
 
+The Probability Density Functions (PDF) of these distributions are illustrated
+in the following figure,
+
+.. figure:: ./glm_data/poisson_gamma_tweedie_distributions.png
+   :align: center
+   :scale: 100%
+
+   PDF of a random variable Y following Gamma, Tweedie (power=1.5) and Gamma
+   distributions with different mean values (:math:`\mu`).
+
 The choice of the distribution depends on the problem at hand:
 
 * If the target values :math:`y` are counts (non-negative integer valued) or

From c8c902f69886516f3d106f1188f0ce3f01b6d23e Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 29 Feb 2020 13:32:09 +0100
Subject: [PATCH 246/269] DOC fix minor typo

---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 103a305faa0c4..9ddf0cbf46971 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -993,7 +993,7 @@ Examples of use cases include:
 * Risk modeling / insurance policy pricing:  number of claim events /
   policyholder per year (Poisson), cost per event (Gamma), total cost per
   policyholder per year (Tweedie / Compound Poisson Gamma).
-* Predictive maintenance: number of production interruption event per year:
+* Predictive maintenance: number of production interruption events per year:
   Poisson, duration of interruption: Gamma, total interruption time per year
   (Tweedie / Compound Poisson Gamma).
 

From bda7ad6867b661fb26c5cb4a496222c5a37c1b90 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 29 Feb 2020 17:24:26 +0100
Subject: [PATCH 247/269] DOC add point mass to plot

---
 .../poisson_gamma_tweedie_distributions.png   | Bin 38253 -> 38430 bytes
 doc/modules/linear_model.rst                  |   5 ++++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/doc/modules/glm_data/poisson_gamma_tweedie_distributions.png b/doc/modules/glm_data/poisson_gamma_tweedie_distributions.png
index b4cbc187ada9b28b4c971e96cd1ae83bf5f2c6ce..cfc8fef2ae40c2e422e939ff067b53b94dd08f8f 100644
GIT binary patch
literal 38430
zcmZ^~1zc27_cl5M0@5uaB}0iQDcv#%N*Q#Q14=V=Bhn>;<WP#DfP^3&L-!EUF*F0x
z3|(^%zTbQA{qFsL=O+U=%szYXwbrwqXYF+&pKGd+liVQzfk5O>RiQ6IAbfM+=Zl+!
zz$-Pn6AXO20e}2d`zCPt-L#GXekXQTHGqRaq%_xG;NjVT3*bK`+?4g*UOB<sJS|*b
zgB&c}ob8?5?B7`4^?2<Hf8*pR!Y{-x^x&?oo13$gfWZIrKEIQzjQ|ylfe{3{3wjDw
z)b>Jb&U*W4A2#6j#(5I`IXEpe9zVW$Uhu#%sLs5ivQnq4(4l13erF4fM(h4OG@L%$
zc<XCqXSDU%_9wjq^|9ga`|mz}4+>JayZQd%jmMQcJMnOuaO+=MFyZd=Q?KRbKEw_J
zn}+n0$i|w)Nn)s|udjqg28JyV^xs8DN^cJNcTs=Fvj+*5JEc!%4#jcMiPUxUWl{PB
zjPymdR_r7XIKY%)t9=jT)X!O$I`7=<yn+(ah#0+v&F!<9nZsUNOzq_NZ$8Dje8jOZ
zvwZUp?Cf(qX44Wb4|CoepPRcOB28C(89ITR>$7(;+iDW;!=|Uj#(O+~lp*Eyc=y<F
zJ$;QV-!8M<(3UXVqjKEM&c@42*|#t@^6@J|X?pX^h%^V$@}(;V+xWEE*Rb`>h5<>>
zXgI&U*{{ZBgqkPHVf%Y*TBa53lgWhMgj^M_uIaAr-1LP$Q)^jZLe@T35v%p;d7K0=
zVvL6$s)i>ME^=<(#B;GTkBXa0o3s>}9tpjiokyG-OIV7O&%(syZReN}aD#;j&j_o2
zf+z%>ucdyDGJP`oswto1cncSoVR&g&PlEKO?G7ZHxs5upb`~k;f*t)z8%>=w#qHVj
z_kCtDl90s6Z6VGbZMF=vzf_c$lx9|?PR7EH@^#OfCL}#QosJ)}0;?OSL%`ROKakAx
zMla&>%}DgLQd9#mxbXCCOsK<&WDD*TUa=HSAe1E_M}G0+kM67<!G-+1iiv%b<jLG;
z_;zEC10`Ncs)6Rjmn6l`zDBYOx)XLmn%lw0vJ#}KF_yNMx-pn)#is=N@+*jGundVh
zzF_$f?5H%2MX+20=HURJp68>7CELRSh0E`YNL#~7QYUW)PK&}SC;JNe?bWcOaFJ+{
za;G$d(7><jlrXWjxiOiFoiDJ;{=TD(Nn8a(Rocpl3#(B1CuFtmbVwaqt>6A9+W>{g
zc~sih(`;Y2!B1mDS2lh~oV`kr-D8tnu$&`Iy5Bw}&7W(n+B0pkUva?5XR>cI9(jA7
z8s}wJuB&vXZuh)t4%eWQF_{4q%U25=f;5=J)?vDKu(T`ePWmLVh}vE@Zm`eskS#xT
zQmfCtZY~517f~ab8ef6wZewCGefG}=E4^X5tGM$kT16hku>zu=K9WBBv}``HjLCfI
zh`{Q!hsW6VXa$SGN^6{S>W+M1Roc&Es|iOKZasS!M}Ci@hd~nNQJ6Ms1v^eN5G`#B
z+(PMg;wrNFJklpIA*N1@mXl{9>62zb3&^Fby{kWl<Zm81-5Ye3H?uFzv5*d64iqc*
zy&QPy2rHL5v1`2TRjJdbl`&azIX7)6Vl>fwuH=gP>5P&EZT{W%RodLRw<|TcK<|@)
zO7+}!q1`pW%;2MH-eSD_C65&`GsI1^T&v{%vWu7yrjVnd3*2$%g#*#$PX9~}v#0O#
zbGyDx0+I8wCWrF0%=M<m*?R2f`5ouFPDhN0zv#UIzxvV2WH_xzuV{JGxe)U%)*0dM
z7q?F=GEFGskehEABU1h;ps2U+!L+<?M#7WT<6mjwGp89yovXc`i>-%BcOJ%IVnaTW
zlm-C%81!;p6!-P30+-+&FL~$av`mJ@pKJtSAOZynw9c2MReCXbe0~qqYmg0|-Nmu-
z5)MI1hrf>8LfFd*QcJK^qx-n4XwL+K49)3wU2yrbO$2f)og(C3Q<8cio-Jh}YtQVk
zRC%8Glpq5IU%jgVf)0EJuvwL6?IsujXIov)3T1n5J2tt46K>MDwJYcl7znYzWS=DC
zlU}5tI?g?Pd<L<riN|c<Q>0t{&i;*gCJ@X+YFBOE%FBBx9;pPo<zy;UMRp?kA=Wc^
z`PcKkRPkeT$z7-VogT~i448+9G}{8jJP1v{Juv9fc*3$vEpOI`mjy@ZfK%YEJL;e2
zuKba+UZ=Yy>_-eB6tdSl1vFQdV-dRLf-(Vb67cWGAD2`0U*Xk<rVz=4E++a=BvS!3
zIPYIU`9az5rINh~R__IxHQ&N}?G>n3wRmYF%COLMs5SrivS@P?>z;#@ADXf^wa6ZR
zdoFc%Q|ZO)K}meomR?DgA7Ht6$1JrsPGZs;@^v!)?4pQ@Th8MyDbdRBejq>n+#i19
zRr83bnrH`ytx+J}WuN!(wRxth7f0^J;1pB>VmK>vbJN^^U4J1<CcqOw>y+fAg1Kjd
zpBSvXwTb2M@^+^i4Dw{1U=OUuK1BLo?LGD{DPi{mO#hLj^a{g{<%Y}0doH>~o5f@j
z_{~b{Z6J*BXCjZ=-~MD32u_l~+{uI&X2Ml295j%RMATeRTdt^v^`<Muc@<Pm{}u6U
z;F5_DMPt-H3ueXzW6X#Syl`mzaMeA29<-4O7e&>G?`j53y`oeXDp$wM0B&EHdEsd4
zhLT-I*tw!uobvlUFRYp<E?BE;6a763qMLA-OnC2Q*?1t72SOt`fM=62o*~M5B9HSX
z6vX#x#_<Q@od{rN*fBE}^|Z%aS8{R$GuxQxCzu%)FCNA6y{I}v%#1)L{P3!@bKk=<
zjgnIu)3}2ugrgjg=dAp1W(WH|?vd0uoz}c2-!jt&+urz=ZV(19?vXq;Bys>}9X2Jh
ztVA;blL`~>6JS2IltCZkGOlDQb-cWHTNV4WJW#UKNdi+FV7n|mkBZTa>5wbAt02$e
z%9}{ZV`7Z4uCzSX=cKIXa^Mlo&f=I~eV}}ELC7Ng`VV9CCOCh!%IVw{!TY7v<0C1|
z)jGm=78j6$f4oS~(cHL!%f6H_9davyJC?||tVzx%<y0CW;7!L(iN^~jZH?VH3VDi=
z1$rwJS&wqQ;5lgwdRIVjZlc+yq?0d+<Z|Z-j&k=YM>Ac*vyNP6*h1ni9qL;|n4_aZ
zSL$5^UxCMP^_{r*UxSjz1~@g*I}-DVLW;L~;N?&+99wW_(ANOZW6Hh<&A5G&&~c8+
z-o7gfSmMQT%+WDJ{f<{XpJ2ZSk*r*p=kG7SEw0WhtETjP(c$C0dKR#O^hrjRim#&q
zb;IBAcFGTR^#X7|1LeEF2~E;44&Y(tO`1&zWMVyqop0^d27+OaK7i^cXPEG0-is$u
z)aM4yk!?S0K9|Fs?l?5u)AXI&c%1WnG@$Xl=OTJa6}jfq=p}B7nt-9H>T6gkLf2NK
z{eNmkNcA#oJ*;#a#FZZ&Jr88FFnB0ThndmD%=qBm!)ejoW9LSqy`VFDE>*@y1wk|O
zIQdMtjg(VD#{CPdu84GhUzSk0Kg{Fy&J1_eXyefhSvl54ZyzED7gVVIT)UMxlg2oF
zhB8>gt2IMBV^UnocDkrsX8de|xPr~j^ZiD>%iv4!EhZ1V$t(ITm{kzVPo^_Hjm9NJ
z#yj!8-Io_xcQ7+ns2b9};?g3b0NM&%OU`Ue^F{%MrkqLHK%kjop?~Ny!na-W*s(ql
z86I?=A^!U~nZIrDY@)xF!7gYd$Kj{eP3bAmlt3Kh#jXtcQ@h%3RGBD}vpy?obs@ZN
zw=rYsosUwO1SXw9V<K)G&N5^4@h`%6FwiNW(5mvd0Zmcl^gEjjGqb`zwcT-!b{93P
zUqjT<?Z}^W?u8L6;`$g`4atW#UNkzRSa#>cb4R}c`zW3JTC;n|BS6<vJnZOkD_>TK
zMn_%G!(k=?Jf2<tYPV(9`h_b3R1G__#P;0jSBV>DqHfJ6uFwPJKPjmfBQ7<s2J;BO
zt{$Pso)-O#H=*3j*NMaWwuiY}H7!UvF;{2@wSzD-9`%y+b0SWbXJqvYPzT)(;c|Gt
zeJ$>_)Vt%?cEaV<s~VPCt!j=cdtsdK#{I99MVL0?T^w1@r+=O=pO_p|;)$ml6nH0|
zD4|7K`e^!%2)!fV#pw9ol3B6}n%MU!$Fs`7%^zM^gLJ*WhWJV829;^CHyQ{3`upy7
z2!)>+n_OK+L(}2gRJ+`0<^r2cxQ3Uk1uobu=ya}Z4lFO~^tmdnO1~rY%5fVc=LCaa
zIIP1w($c=-;aWC^E-Sp@EyW#7Q@4AtAlMNBu8zhDCepV#Lgg}!7eC+M@NBP1k3D5n
zy!;V1`75fe_su8*ArNN7;5bR$LVi_dvq(-&hGIxbH%Lxf0cSfMHhN$Q(Da+*!5RH^
zDU(0Eyi4;>EhQbo#p!Qc`upA7;oNWm_Dpnpd6Xn15GKBZ?8Cbf9Q3+de7eJ@4J5;?
zhVt9b*wHxZ-glMx3;hrM3c|Br+_3GonF9yBf$fNwlVNZ9r4r;3i1s6y(q7uZu)C{<
zPo8<k`OBj}TfDmP^HK68@85hW!Y3qxPq9Q^M}dON)`b5Je;4OZyBic(TnQE^F|&jb
zd+|B)5HvVZqHV*D{=P3oh*QAsBP}~e6{yiV;Y|zuCSJnOx&$v$9p>f?y2}UX(l&|u
zTm|Vp?uP7G&lW;-<RsgkW<}-v!_Px!@4pnvg^%~wGgXs1U8(rWk=ctn>9lGsW&25D
z#+LkR#Lrt(Yegzn4eDug@`I)?%BJW28->mJ(2+g<Khejs^F~*DjgO5?E&)rnmV4=$
zo00oug_%*EHU=H<c+#T-xB0wmG+RlLfr}3-TyE~jV<^2cW?br(Ipm_nIk#!*TYiu?
z4f3^b_?I9z=yCE>Y(D$E^{hW-ugau*7}$=Fy>FN3HF;CO7}29^B)Id8*=D(SqId#J
zEA{kVSlO!;r*jrl)=9z(76CSbbF=xt%cV>=C!s)82R)LmUdD@UR|hkLJ?Jahy9K^^
zBv60KN3?tEmRAXO_0YZZw^wUHKunba(&ggm!4XQMG$Y(Qi?HZXxs?~(#<asoKWEZ6
zcm-8+IUA#xM~-<YyvH?f=T(y{DeuR!V|mZ(bd4LUE7_3c`D76KI3<1y;Y;TP{*b@t
z+Vh8H(^F=b6HTzs5Nsxe$=a)O_lO(Ll3&b>-Xi?Cw=WaWc=7=4cf{f)YuT)nfm_5G
zyoTkjV#2ZYF3*3VFvpP*5vmoblOaeJR1N*=!wp;j#<)p;f40f6>5l$ly|Nw@nn!EM
zsQ`RqgQFV6r-UjxclPW=+O(n2DTYe+2-uM{cAI+tiS%@TUjPiPVc)CNPfSI<PQJoU
z9VrI9tgB04f@VWK|MVXj$A~w&%j`a__Xa~C3vLWOO2R>sn1E5UFt%rP+mF3fp;A<p
zczxORemNoBJA_yBG6G-&nAm;eP6p~1tTYL>uYea6)Rn$XMzfLKm@C(b^8sX7FDV?#
zIR(5B)&$ht3{Uf?DKW~bsHm7s<kpCCg+hkYMho?JJW=Eu^{?%pv(iM$hHz1eib;-b
zOSP=AcQ5DhxL1%;N0t^Z=c?W^mzFq<n>oF*bCAGnBkP3j!B_iS3Uz<}Jzs<7>Gc&s
zri9#r_dj%j#B#8M?}r|Guj!lMzbX!laWa;P)XJ1Cj&G$;*j{_a8nsN-79JdbQiwxP
zFngf3qUQIMq5o#;lN|mxu2M!liW21fIrm5WW4eS!x=L0-kvawmqAt<$XY+)2!Sy;B
zK`RQp=uBihn2mJ2Oc|;GY|rPCf&J%JCcUi+hy*IZw;N2YjI|L^)d><o%*<$}y(iLr
zK`JXk)viYgLgV0{9DZjbci~2xFTC;NBK}`6ZRCqfS858NoFHl7?=So5)2ETKu_w|=
z-_<lVx#i`v-@kuv8%|>o&d1)Ff&E0s))`577!=tCxh^+kPQal&^)&v#hperwpKEKs
zPg9mge0M>;(9uzT{hG_FGcMLcBEkLp=dU0&&704SN>~LG^y#{lC7@8t;f1BcAqvI!
zQ&WbnuC9dcdj#gN<>h5ER2EWc+K`o-tIfq2j)Yo%BoU-e&>RtcG&HgNnI)_%N1!pC
zyk{<C5)i>}ErX_{fSHu35~N0`@-UZT2ls5l*&!hY#*5tq`j2HgwSl>EUeDF!`3v_4
zQ|u94=C`tM63x{#*|}N<S>1Vek0ufLn+Uh&v4V8l@VhjBbc^(x+ahUsd3hJcjGP>*
zOkTX8{*$j|Ggf5KmnOpVj`|25JSJ-njvE(+3f-k%=l%Ka9%Psp|2{CxfxeX$$GC|n
zi<ZpZjnmW9zD((qUjDim-dEq=+vEB~20mnDFfH6WuTg+7&ypYh{_sKlt)+qzm9k}$
z4-yKMO1RC|sY|X%2cAd3Q5KhHhahbo9U+IQ3eCB4*Oh0$b&sE)TSzE@OD(z7e)3l%
zg}ZsA*1?sGF|ook%OFGGqmfSmn^LbsU-~J?nnM~jGQ@-|T5f=PdVAC4{G~vow;trw
z*4ApAPdnDdDuq+Lc>a8F-!LFRPN3<+wd*p2k-@}`8rG37cgQPu+O2ieoFAbGZURYb
zgKN0*IKLCAbL##8U5{_+$4sx&Y5Uf-t)0AOrR3|}DT>4Cmj@=HbQ|8Yrtau9K0-0A
zt*!kbB}K&;As+Otz8=9S0t4;Nzz2SxHxjXW<@CD9;4ZMsD=TH5JEr{gyN|gw(%V`>
zN$W1Iz=dBy*wx^#+=LRQa}jVyXZ~IRn$~}Yg7C`4(UNOy{wRJu@*&AKoP$R6_3PIU
z2^f%nM+<BI78C}>`6BoC_a_?teS7Ty*92}5D)R)p(Hc&P+3yvwS?T$7ABp5cE-o68
zlqR)#Aws6kxL?xXzo5{B)++sTB$}nA?*x_G^55{(&q7f?1!q0!qITbZ{v<G8>WF<{
zR2h@u)X2D2O;nVd8yw3d{_Az`E#Qv&wQjt}GjOd84^dIkN1~z$2?>fC8szcvR}$l9
zf&O!TsrrvNae1tQL00bM_W{?AxwQa*BGj@z<-RDTI$_<fU&86>>8X-%Zt+<++E?Gy
z7q;tmwh#RG_V#81&bJ;IRz!3s^1xR<=?T04l~%TX^M+U2bK80#OSUgtK08gsM%W20
z!(X#Zb4$Q<CWVc+(mK{VpXfm|Uu~QnJ4Hh37uM@g;v`&DC9~y%ChYIKn))B=2n!3>
zUZRJGUp`Y)Q*tzF^z-gr`t|D<Yp3}9>)7B}Exd{YAC~dQ%n%_Jz@vab`jGjVRgiI{
zps-M=oR!{;hT9<YssR~CM6Rd}=BBaxXC=iKn(01B>*u{)UFNM}<U<rz&P;~Om)@nH
zzY?hvq^fSd0CPiz2|^{`XlswoBceU-L4@^RGx!Fw&d@v}Dq@cUN2+RlzxK3upJ%k=
zMn!+^tVP;}Lm(legJtM!qRJ2HYZ^=PrsH(&LC`#&|J>6;^A3pqPS`!j(8qN0r)kdS
z*r&aN(W>_UxtFr!50L$ANvH&r^I6|r-y-oFLN9=~+wT8^hYu;A9{VB6bokh#R3DME
zkP?Yi^@0CDm?32<*Dk@(Mzq+)mj3w@l`?1TujqwAv6p!0cM8{5jK2mb{<o&YFQA-n
ze|0{7h3}$r|EXrg7v1Z?j{EoS{hp^^Epb8*l?5O}v=wyzhL$oaTDH^_3Ho2~)S-FN
zB=F(yazn`A^jctzG;q1Ay?7D76X$Ol8Dkc8^nAL4E{jkg`l}Dej5z2_$le%41&F+P
zx;FysP^2U<325#2d*f!rFR(~x?N+*7u|<0nJxk>qd;8_ujpC&t6}Hz~Q@>|?cN&UJ
z>ck#XcSOXzpvoo#cCi>wos;`tM1S_&Civ!UF6@;FM@bf$HL(U26clLsR-`{PuKuvO
zxk)iyWL)#{<42mP=;){F>ek&KxqwJ08W*Cx=i(w*ag)N4OI9-Lui-TubIVPEBO=<O
z-sS#iOWP*k4w(j4k}7C{2U=NKQO}lR2_hi9_1wf{_S>aH)ocd=gYn>3|DB1lkazFi
zC1U{KtiRarwTSQu>gEr_&*)Z-wK-0hvQ^=L%;x>uE+9T{XMTFz4klMjy61bqRC<26
zyA5A<tFB&>)T5`T2ST2j0Yjd{D<ELg5yJ?Q0m5=2R%J@u?Lqw|uzTQ#E$lQQ|D2|E
zV&>6dP<M`wuI|`NXK;~eLwaFxEy+zSN_|sP*6AuICvQ^&gQupZrUGABg8=ikx;$xT
zTb;YMyrw>>pokGrqrD08l)^YV{Prhv7XPqPbs7eD^p>!qN@)IBf_-kGc|SgBi^Qr8
zB0Kn0OUKNh97g{8H~K@>HFDTb3OhEQ!tTn*n!_=u*8sc#upU}pFO$q~(j$93^JnAo
ziD7m&<!fI)_SL#s?QvKr?_27~q|%2i7BKKU1F{`SH~&^7W4K_lD(E~Do)1zBG7H-{
zfj|<vl$0gkFw2<^C<1O5^4!3HQA0zc4<VVD;aD39WF+U~Sx?XP20b%>Q5%E{p;N=*
zEnw#_j;fj*CUp%Bdr+Cuk@4}EqiH9bq0h?dSu$ySCG{VFyvjcs?Odm&mSo%CArLFD
zAmX``{@uy`ufqg{MsZ;ioIzyoigF+nEdPqE3!Xo;V)?Kx&x*H6wgga}GZ{+yQ$BXb
zp9L1ShL%oC^pU)PL)4%BEd=<3+j+w=+SjRWI$0nvADdrMku*F!%-TM#b#Qq)_(6y&
z1-V=M%v@NM=T6r%9XXmv`UT@KIm!vvgZkmXhZC{L@V+7%eF{Ih7=ReAV`a3H&Nac@
z!l~MNdR8#m`;CNYjq+?HZAu<OlfUUfhEb-R9ndp5b3J@dNz8um%lUZN7&oEFd0yI2
z;c}hxqpfJ?2#7H9@IEEIb^eLkkR2D4(HMZriBb!tiq_lrd;;Z?q_c{wOu^p(Ot4gw
zAT4E4P#nbi;eMNEgSLsBQ4Kda@oRp1E+gt%CB-`>#chC(T9Cbf5?0F%C_UW+Ke71=
z<WuMrZ5lQgLG#kCk~-%FfQ&K3HsB)?L3Ce7x&Z{8Hym#Cdy)`AX%0)=qlv4nhH@H1
zK88oR4o?_<1>l^RR~VyIvlMApz%zFQ#BrpM1s@UtCr4s}I)mK33eW4`z{JxgUk8*6
z;E$&pFggo3$HGyee1z1DGmuZfIIr9Vu1B%NC`m4XZ`(*q7D$>ygt&^1!Pp&(viecG
zaUxk$QUpE9Dk|beG?ewNY1GwVnbc1iZioYP+3(&sA2P}z1z<0h>Uj)6I*at~R~vGA
zA&?pYReLL@!7E8Ot@##+v^KhHn4Q&5${<k)T@8*@r8C&}>S*5@7CwLoJ-LQh@fAvq
zc6A`d(3m6}wMblZng2i=uOvHijh$E9me+?m@*84rdCKC>TZ$*rG}P;a*MrJFPs-@I
z1^oT`M}SPO_GfegL@iCq{l55&vr?Ud-v37EV$G9;?s?*Y)>W#Yl0f#Q_+KL=egn8E
zKY+1|>-pK=ii%=H?Z!$i+eu7)*1w8v-C<&?*{<F2Iv7$hsPlLT_^E*1cya9GHlY?X
zVi#2n<s^LSgze|oISv0h>ZDeIm=jI~SloZ1^!_!Lulx4J92~fT{%&miC@Jaeo2oFX
zapeLoItGTs)YPtzTx$B2_6A<^NCn}F2tqvT@dCi4Tow(KsQ__(k+ktpx#{=c+vRQo
zf{aixhpD#r#OT2)Adi33%BDcRBOv`=T<rMgs&d+K=^ZJ5PFY#PWVucGn?Kyeb=y*!
z(pwc`4xXD6ion&2y}0l*4LDtqJLAMxxEoyXeu%7a`qq{zG>_>T7Y`I(#8uH<j$MlO
z-S3KWeIz{L7r1h45NPhNe=N!COJkOOeE5IlckU->g}X?AGD*0tQKgs#NIKQ;JxTEy
z$g|SnRi<MeVg^*eXZDC|%YE1amZXD4T2t}z@!9cKgLF$wXZQV@-x2hsix&0|Bx=cd
z56b~`L;`?tgEC9yj7tna82!(Wk}?0`c85dxjJGMi#(zO>wTL1j03tE0{Eq?$uYO&t
z`fF-VLy&o>efSBm=W`<?Qe;OglR>3DqxhVk$e;@)z!J|@RaNiu@kK9n#$(%PwRR6n
zsxHa?XD}9Lg9S-siDtdejAfo~2p+juAEHo!)?PDhh%DgRJ-XLWtNK4f?OCHqorlYI
z4My0igKQO**&E7W+VwTXbm~_FfOVPDo-}N116x&dsL#sLsWRTedZyl7uktmwdgJM^
ze}D1dJ+E7ey{7^Yu<N)_V7=e>)arDj4v<8uZAmF9*uRA(9}^OWejfLy3WbcD`u?2V
z>P-=F@-{X!eD>zen|Lc``$bot!TCkonV&0X%9f$mio?gYDEV`WVqR_S9gz)>JuZJf
z4vtt7@psji3nobbUjJh+toQ8!a??unnyTx+Y);9L@#9CNQE||6$XOkAB>go70mcdx
zC?)y%{vPCoFeg`VEPr;=m+72Fv8e@sKmFr!xz{EEs3&#g$D$Fa*rOF%71~WbfmGE8
z|3XIzfDS#|o1Y_~PYo@Dsvgey{_-?&3jx^GKL$q{D)|GjY{GXT5eiQFLG!&*EdVe7
z^HJcvH|hA?kQbk_(tFVF{U!>pQ91aUe3{-uX)NU=e`ZLy*Q5T_+$iTcU@oD@*N`JX
zM~T&a?B|>9(+uU53d`f%t)xdb|9qZr%sD*wj}?Wwv<`#4eCLL)cRoZ%_br|s({HM9
z)uMkBCOuVprF!#r1|PYo!hd@KM)A3ab3cEE03nGgGAK_KvC*vC`rk!mtoi#gK0dDC
zQJxDtdr$A{6>7GtNQ0;o^l!06d=cc)0A^^i`yahHy?xZt@#H#yv%D6G_p#VCX;1zX
z6Awl}A4HlbmD$)DLN@He7sISA>&)UkBmgZ62=s`9;cAOF_PSv1kTyvSqV?<${nl4U
zM@KzF?4kX+>HmZ}6G!sTSMs><o?Mo`$-z_4n|pN)XKL-O!;>EKw+~}AhA5Qt1^_V{
zNleQPdTwfpI#~Y$)-N>&m&5wsVzJAKT6pNo0{+@H2y7s$pT%>A+I3FaV>1W1R=fd;
z3H-daq(yh`I-CfSNEc;1swdWneGvx~c>aMw!y`w1sl(W~Euy#=n*u%T!zmtfuB1F;
zp~qQTPZN2B_A}--OEr|03B0|%+uFUyoq7RHD`Z6o8jw9U4&U*=I>mJN_FC<r8;rdc
zLe<kBMTU|xgz=mx@=HIY4l0NtwjO5-!8f{F#o6;BQq^|^@RngUz~caE#JJYe(_=`&
z9|RPIs$_PGh#m+CPzEiuhQm)+QUK-MBX&Lj3=Ob?hXZ_kd`4)Vt=>H#;dd`JFjN5E
z&;%q8z|rocoTFYIFbG0=hPuoL4gPPg_ZwtCQ=Ra-H`xW-FOJW095gyIGB`2<ee#45
zP>M2FCrieGmxm5h{Xn_k|1T#2-3ZRBAu56tgBEG(z0LoinO43-^JM>l+*kG=#LM4!
z3W(GR>;F&k(f4T6;_Bk`n)-QTS6#ZfQcAt{7L&KWe*L;RQ<Hdd;^o+M>2d3!VcT+d
zV$FUJACNoWng!&6+4)4NMJ57gnoFM=MkwWFliE?HradGKAh{7Foe6hvoVS4K;fdo>
z+=HY1bH4a*|CDR~8x_*W&wK~<T-iG4Z?wz^eS6J1z_keL|6gMBuyJwG64>ym*+#$9
z{p3zy9!bf`M-m;j(N8Ey+88H)L~4(YG<U7<9{p*U%#1iFD?JQKGXR|sI1339R8RGN
zt9UEIdg3(y05hXKPnx;qIgnr|=Y>%#Dweoi{O`_%z@3-E^2@xN01-N0P+Z*6S8L)&
z&OOwpIQb@$S>i8)xr%4&P{YE+&xnIyX9`8cKi2Aj8U?Ebdse-LKink+jw?YJ<CI@Y
z{z|k04#gx^-yy;ZhX$;F)m<E=h~EB{(0|1cHye2%&B(}TKT*m)WpUv7qW5}y?NP)o
ziQu3hG$G&tf<T|ORCdk#E6%W#7<og)x4F-BziwTco-JbOU=bl0$_v5Q&tAzz#V~D`
zystKErBWs=`@WKcI9O$mxQZ?NP4ii`iGE#KVlj9RDTy3KI`us$bm#alQ3XZk$Br-q
z3@*hHlM7@;_u4;;$)Bi10iG!}@|F3<Y_HPwg1`R=Ny-I8nn(Se)ly|n;}(OUEfC4O
zEX;WcLE#NnVBOM~N)XfT$HAuvxTgeW75Hb(&9mF`?Q%q1EL2FFnla~$laicCtmqFu
zhEjuFh$<ft(u7r|D|gpSS$m$UEe@9X+_{rse(<9dt}!4V8gcN$`IdPJXsO@6D2)qG
zEePPT-ZyumC}GLb<K#^>Uov6&T%|Z$7Oj$T>zCB)+S$vw#SxTkFH#c=zOH@MlEi#w
zdE7J@A4m!8spLAk0ZGAJ7*hXgVOQ+?<lOXZlq%azBk(H14kSk;Ls*2r?TKpmFV*b~
zx-VixDueGgDmlLJ_s)7ZaZH!`VxwNA=LjISfoiE<EK@RS+82Ooe-25>G(hNH8T&!s
z!{Z7H?te^7{KE2#3GkFw0H-GO{987__~c4->-jA{Y(PoX)!e&qyqMBwGf3c6-oz4l
zZiB%F;`}mZ!|fO)^Ke|o)a&wR5K8ya6Y6!DGDk|DJ9nN&QNH&*7VcWXU|s=QQg4-$
z!mYczdl`dKk&gNB`BO-kjSoh}Hy&aBcOuQ)r4*}^>ij~2k^^Ef=m>_vT7%1jhLA4v
z<mR8?{ELZY#fVo(fh_L~B3&m8923gS{Sbn$9@wL3A{qcCjMa$iKYyDlb6;d?p$nAw
z;RAA_%<2nEM-Gjex2;#$IDTbS?X8btO50E%OB}p%2)mm;Spz$|y-QioL>$*rmo{G&
zG~T}{4ZAfY65WD$gknx<^)IaaGxDf9+%$sB?NW%yee^vw`dI|ik+zB?z`<kn`;=PK
zNlQ6+>KXPm4}Fk?ryq0@uI_15>hn7PggY8-ki%|K8j~2`y!ah&CEcw9Cp{_oYUkww
zr?>gE{A7O+LSt|#Oyu4UC~-U&pybAR)NaiBO!MZ4Pw7aYps*7<l#fi8Z(VrSb${)o
zA42mYm!#C}{WFO{;hKZoMsnT?<yR`#gyY<F5-4X_n{T8@e&+*<+jJ{=zbxW!EmB3g
z2&ILG*>E=e!5^!q>O|@3081Ci)m8Kx?rU5yTUZzWNQ-A2@QjrYJ<;EMU4!9(_~)Cg
z<`kz(2#3T1KpW})pApKdrUZ~rG()Oy^yAa6hd-t_&j)b?jlwGLw(PmB^+*zoh>GnE
zrjH#zqDuhh;Z}02&Mk1?_(fUdWkRCxJHP*Q+FZ(k=20Ft$&3)RiX=|>wz0jtoqdDj
zn-7rkhf6AIek~yi2^(!;<-z%Yp0#Z;qDD(%U{HyUTd$tj2y+D9JZ|`vX@`2y()XD@
zD&im^AT5N#A$;1Vwe4l~2@wkgCQT56j{%az7xi8yn&!Jeu%EqV{rhf#v3pb62M>er
zj7ySd=-CeKAiUY8ZIjg%bZh@fAIyW*nZX%2-o;^F;s(CgzI6m|c<Q|}^_G~*jZkG<
z_ms3fp_4c^Dm~5KEj%q<sqx4V1JKH)B#b?(?N>lbc&)kD6B;0ShjjjJU?9;9sr^#w
z>EMP>n~+#h!)yst3-+4_nxDow*j{#`Md6R;F3$~<8-Hb{5g1%E=$0a2vrjlB&I<)E
z{hbGBPX72V;2*bE!>1oT?9t$0HwV%WXV_scuxT6@8A3afFV#E@A#{CMG5bKqCx^Y7
z(g}*Z%MfN%&FQ7+0svgj-3LQ5FDny)LhHZrOK__>o0cahBwdV@YVx;TU40+ljVr5p
zb3Jb5;hfA!Fm)OLF-MOE@O$szGk&QD%1mdm&k6K@x(n9y^mU|7UdYA#c%`;V?|cBo
z$K9Kw?-zvTIUVdUC#93K1q_~Jui}wq_zvkYYzeu5kenE6!akRN^7<>NbfeTPD9yXU
zd>W8d2Rykj>s63lpS@`5oe3gNd@kf9GGLyGI$H3(WT+A^YjTHwi9IGtvbHL;3-o6P
zPQ}Qv&{MbI8NJY~<<m;Ngs#aFweTYBHJfYSt9^1^v?kMr`h3S>a;ctJb5ci$c(6)c
zp`r=$Tc=(-Tk}}Q#hE%ohCgdiroy-Lfxg5Bq~N?C?v!G_6;*RAi4lA9s7JRirf`+6
zv|#aW#-kw<YlX;A<{JuVdwn-jdmlVq+*O_i?UTH!B7oJG@xb<<^X&yoJb`hSFPS@o
zJ<^Z!0S7o>Vjt&pM~UKToycDf`)c)L!s8p-FAMLV!q%6sW66S!RPts}4Gz44qtG&@
zToO1~HJT?^yy0c1CHe>Zo326iK~$Q|NY7f4wa1O)pjZUOUU#WKmwJPY4^SwG7LzrH
z9eE8d;o|HiOvE2>L}Ga;F(rk3KkRIy0KQAZUG^~q<P^|e7qBWDa3bF<35s%k`7|ot
z>av*Q1(_z)=l(Zf-e1aJPIyav2SsZg_-s}+&^6>hmpOB9E!@6!a1y0TFifkq8kbw}
zEiQj-D-sZqG;%_TA)woaH!*XdjO<`{N_@0nP9%qjewER6ktXV=3>~KcUnqe3Z|q%n
z>h%RFJyMir%hm|EArApq;BT|L9Bv_Qy`7fU2f(L_G#xc@c51ic!&N`#C+Z=cSNSV9
zY<j+{k8R7mgO?q<KsgDqADrBDNq`u-RMD!(E~bj)`~crKi!=pr0iq4owTDq=OmZm3
z8wEgE%vJDQI_Tm1-X5)uiFy~;x<N$pyYgLD{wq0VGzIP`j8l{KQ+Kn~;ijbP=S1;n
zRe3sJ@2(V#fU+4QfPkrgnitJCurH#XLEnWMzF(SEjCYFGzANj#g1`&7I^e$gRqzp4
zYwlOkN}Yg?{=q!**|t%<`SO&l^#_~9{&#iI*p^gF7kl@=BNJXREY%0`5k4f~d#KHp
zTy%7|lKIGki8Et54%!5m!}U32d4GJreJXCMQI&9%g<$eWpFsD@mCp3(g*R7Q`)}Vl
zKoNq=l0!6In`2A2cXl_A)7)}BQI#I!)TqoyG6fcqs>f}vtanz-cHf8y+iv1{?WLdv
z*V{(!LAfFQkAJ?ig?WUeZ4sgI#8M~aMC#=B7u@?l!AU1v)+7Y#8|1G+OrYJ8;;m3c
zR|e(LU$$-;Lq$ao61!`5)fc%Ulazo^fVuij;}zM0AB}DVm_sfVzDt^$-B`tkXTm2@
zHMz()NCVhWd&HA*Wd;pKML-CunGIPoL+};)R&o*_Tts;eWZF2w_Pf-_wyuvsLM){y
zSI9MFy(w;=V3Byhsl+hl`};^aGUF}8^zg+yxT6Tr%*T;HQY8Jql8;t>O*p=&`!!zq
z@T|HHJ{=BkWgKtL2wXz+vDp78SkO07s`x^aupPNBqn(=htUXv)4aM_X0sZ5eoU`53
z3F4B#OvS8VJLPx|{8eS)qceDa?_``z+ev?B0aAsg`f5%h;#5+Vu}8UX8!72!=jmR-
z@!#<>dMXR7c;-mS>f7IW6ot{Me4$N^0OgXF#?^i}b2uK(OGxW`&~;6s)*GcEWe>UG
zgHJz(BQFF#)?vIgf0L&Zantw$Yo=exdoe!IR@|Dq8Z2tdnyQ)<dLIc(pQIKkJPM3^
zBv15BMGCpCFI*Kw?i=?DcAk5Es3XfzF1f}DmkU<wUr>vrEYk!+tmxk|mwvkHzZiXc
zdolG7J3qKmhH23RLc@FQQ^rWilyyY!ZlG{T*71hpv6@W`go_>NZy2M*32S$ykb3%3
z%A*65>SXu5(2Hwo2q-YN`LF^9G5F?oI{n>DG=J(L_J97oh<FA~4>b&vo9t^ehgD@v
z^7KC#=}$lZ8nb+ZKT{}5rN`qX{Ba^$+498$Ope&9NpG0~H-p|~a9GE;>k|nRx(#nz
zYnX84o{NJo64a4N<ZBxKcQ%1i9U3&KM72Y>3M#?>io`|RLK`ZqO~In>u@_(b^eU97
z6S5-LncY6-k=^GuZmn@CDcgz-RFS?zy<VG&QgU&5V&QS$=y<$hR^K)F7&HBsfHpGu
zIl%<b;T8_G1QMbtA4E8-H@N;y&5dD7?ikjqR0c3+JuMZb`19+7>bO?^Pi=v7x3-=`
z^ENEyDCt-8Lbt(8Lgd3?t>5w?Y)SuYd$vl1s&R~65-D$o9VMqZ2$ws#4Q~4b4+z+6
zdLGTS-_{rQBr9>0ZVQt&JTu@VJd_|(#C0h(cf>F^XK%jTKbFC->p9<levOvdKr#8{
z4~geWuQtA}EiKktL^#QlYs4<d6-BrysubQ8D>)M3u;t3}NZA)I2kny1Tbh$N|4!2&
z9^%J85y%-BQ)svtk)gr+)-D0Hd=>9Xbx<NR@_-+uFLHK2&hXB~29dwN1T}Gd>VQ&c
z&6|-i@v?JOhv`@@2IL-oshx%yvmsP!Xy|@Kf8k{tkt}M{+zcC}0raS%YNX70cekmw
z8cF@wZYEH-aEQxBl5)q-JC$tBLqm$UBicTiF5g{K@xC`v{myUut^SP-)?isX4i{DG
z_0ZrC;T@D(L;#<n5;tM><Tp1Yf5G|e@nO)RHY?T4iuQWbSWBNK0SN+*ySe0|bWNN9
zY3Sv;g>IBfia21qyiX~bks$wGdV^so^#kF+8~)TT(73?<b2#{xtJD)i{+KL+jI7PV
z)@W5CK$N+6^(_Xmfz7rbPiv)50xIUW(_AAfi&OK7MGWi5B4BIsl=HC9%Nb9V0QI%8
zU-Dj@_5CH2?{9hPNRxyfCCT-t*7;mv+hF|Lfz;Z?@6!t$xAdSs0tkW$ik!P^-i<iJ
zax(KfyILaCBnS!_=C_MK;M)|K8D`83!lE_o@Ls9{V(&_lK&I)9{3jx%B0NqA{)?s_
zrT+b2FS|6=mP?-Q1fN)Y@qg2;xRtQIF7Ex^+DnRQnh(jH{LbzjIbUsq8TESR|3~;`
zh)ZE8q%p3UaAi~tWS(nY7&B8K*x~$<4~D_tP0{(}?|zed-R#VS70=0i3|N|%izjp6
z>FuOBN}|ex6?SD#pEC{CbU{dFtZ#t}>i4X~TW;&sR-@ile|_Ni9B6b2OdA^j$AdSn
zPr#hgCxHhc465V){5R1E0P<zRhf$ZB(fQ1Z^{p6((@j9T!2Pyu*!fwra4wI4EGIXd
zy+3%ez{vc=!QAOEvXZIae6bzvdT`ju!I<2Y!!Rakj;aB=v4~MME~pyZp;;YT^4K||
z5_52<W3zi}X~MfY86&>6v!N8JDnH1Er9u`789b7_CygM<54b%xgCFizNR|-%y=ggj
zHB>@u`;E=*#k`l_<q0!zlMLYu2_8^;>Nr;ETB6F5&An*>gfMV)N<1^}BNVsA*Wi7-
zVWKKRRWI|UAYITXJ<&pufR{ky?w@;40Q1bAP%|n>^ReU6p~Caj_1VglEtGFW#Cw1;
zrGC){__E*lc$a^0tBMCK@CVvT7+xnC#E8zEW4hcvIk7%j<9IG8P-GDuaf2y6xeKTm
z)*HG|p)yI!i^KGAAkrp0MvgBmSrKFkHS3XpWBm&7ls=A&(>ieMskaFAbQ}r6KA>K2
zyfO;{n}6`Uzg%T-_q$r$*vMYW=qJRB)Yg~__UAPul?^u}Y`=#-8~`e+33B53f*GA-
z+uwe4Q;XxGcc-!!LbIC}jXtxa1Gj;7j0~mKmwm~^BQqc7+FcDnN944KKo}5RcW>bn
zq<S}KGoVM7AiPZaWg7%nuHS=s<SMBO{Rp-j`}i822dOMw3nElg`}vuB0Mb?N9Z1gA
z8f|e#U;5Wm5EuOcUH5fc8TvRaS^T|p_H<ul*prWoN(asw`pcO%4W~IIf(bX*ad4!t
zO0x>vwmaqM#~K3rDc>Z$Dwczz!=dnndZ*^n?%f`zoF~z45vp@izXvmjj^4H_?t~JU
z9r532@baYt@^i5L-!=8WOeW`~E}XBu#9y1%^)UfG3I4@62J>}}7Gas)kBncQYLGe4
z@m&>I5n$_U81yb!HMLu#oDR-zLm(`?C=Cui(O?~VqI!lfji4VXt$e^k6!=5rnoz>L
zH($a*yQ`>HmPbA%Rci+Aq|{N*terP-1p29YJnh(G#xLG^_6(!H4|nWu%T3miT%MVk
zw=NBlK8F(4S4&tOrRD<Cz5iAK!QZ%+-liykT?(7Wc58CU@-{Acn=dgaKE6}GXTFR_
z%h*P!0VZ@Ft;dTS7vDeLH{o&8-{tDcycvl5JkA8vPE|A?GWSfMzXj@Gm<B=1`|7oH
zrf2jc9?qeL&^)5W#Tec%BHmxVu$H!Ir&doeg-z(s1xQm9M=R<CWxo8-ezLIs!2j!L
zzt&RX{IBfR1eV<q&o8uy2r?4*g|~T$J^3Bi>RmqPMZuzaj%P0-yA;G64yXX$`PeGa
zu4!oGxCMaymB}swZedmO0=&MyY5;m)PgDMG;OR*HV~+lfCjQ`vdI0ga9!q`jad8PK
zH;hUP#L2a1+`uggEI_4cSCoMx3qw=x--jCZQ`3q8yfw%=UihgQgdw%-uIP6X!Jy-;
zbgjoov_TBV#Ob)ws;$=vcJMjF={(dLsTM^kSm%6CpwYyAKP=P?TP#;{=HP}$n*iQO
z61pNOnx?3nyT?9hL}xqOnjXhK+(~ExPR>m3QhxPhziQkA=<yqMEqu!8ydG~c=i_yy
zGDLiBAdx7zepB69rnfnW0G@lod~!UHyRBdG7#Q~ZJ)Td93aGBbRt^YXVnykBAQ{^4
zo^Ws^*~^e(5dQV*!{bJ=CmFs|j)>Bm-x}Di+?h5_*#6B{+RB&$B;Z6mM5<d(X?m$d
z_Ze{0o<51+&bFPh=ActJyNygy4bOi@`vuA{`>5vV1F4cd8td*=6gTqY<NWHYM4gci
zVatgivlZDdSABZ3f+bAf?G{}Yfxrdb9Ze{Y1U2Ia7z4%Y-m5zWaULIx`pn0O%%_`j
z|88{=@CbL2_26y(-U^p+E6M^2Aty*D)3-+Nzfy)M&xQ1SLX*E#hPK=jl8cj6R?;$!
z_%hj2qHw5r_%2dazn|Yei^${qQkY0eP`lz}19b%8?{|_2Ce$(tZC%kXi-7(EFW!--
zgMD0r+cI8%rJe7!y<>qu8k{?-nZ-6Gd#=XxzC;g%3fUl=0>0M($ID`Wuwfsl1a)<o
z*Qza6Z3QTQ))1xNk#^*R$O_*hE&US>SB;5py})n?Cr*h8ho}YO6%$|WaXGWhNGn0}
zGEn!9R)c>thS?GV(op3X2}IFjYltuv7tf8<Wbr&~s03)$udh!0Zy3El8{kThfH5aS
zl}@pN2oz?3k6ly<`K4rsV%FJ~9dYb1aUxN_H{L}El!2#WE*@N;L2ZWlJBZ)P0xvN%
zlgbA#nMfD_RknA)wC+s@M5NP_vxp;{g{)WK+zC@<V@x8r?WH`C!k^ydj!E7fIcwM|
zA#<C`2kNl9YvVcGZO@f~v%;HgjHDfyePG~6qM>UR@;IOGvWpM9*da88HnF{5bGyr2
z)p%N|@b?7It$?C?Y2YjCceIR9LI?2JX1Yjp*Hf%6l#G`jlyb@Zma=sTM>`3`fP%Dd
zi{Oz?w)yNfhl6+=g78mCP)Y#7=+?n-bc9<g;&U!%N9P_;LCzGHJLI5B$;oJqJ_tUs
zC8BZwT9^D*5iT<q@$MJCe*#bsx*eu%mmj6#l$~B)9Ps5VUvv@Ohv1_oX~V(0164;t
z?gt+#$x)Zw^{01b&he|Y#etNGeyVv>FyHy!fah0G?``la{*du2GK@bd%^xZMXn0(I
zxOn;A;FJ3ldd_QY>~ERui1mZ|+SFZNxiU&yKj1RpB2#0z6{f{WlR@YYCfv>aLt2Ba
zd3dqASLNkwOaU9K9C}yc<r#)MXP6fe@Hr+1TG2v*BAPl&W_P*M)>188ZAz?*m9)Gh
zc+|6*aa<8qtr~t0g5PJrRdcahRy7fLaHD3O{T4f^*s-A##YMU=fJy8bk7~GecMSS}
zY4mypbYitf8%0bw9<s;gDbQ#XNB*sSduLX<@cu1pIzgvp8B~vSG=Vp%YMd5DFNQXf
z;68G<_@@8W1<e)4$!WLG$XC*7IHEF^=HtSz?B*STFdN6<_A02{bAacdvIJwyaCRap
zZ#TNtHEpNO7shk!d1bB~s?V~zh=6H6gM*I%)71Owns&x^A}GA!^X=0XsLqBrjEx=N
zp6J?}G?(-S_9wR*scNa(C|=>AZo3n{a{VUA4JYH><9)c53D+LgEjt~Ajj`zu@cEi9
z?EQ?#j>iUaiOPM#b|5;>JDTDrIbh+9RQ$q0WM`7*{nusd)J<(iUwfrB;yqBPO$$N`
z*^49kRLH>uz=|qHomtZ=aVa!*i;TGEHkh=L%;^$eOrGi5*N|yh;_m@Qbb<Xz#aCJh
zBec0>V4fk_V0H99J@hXrUuIJ~>44AOH3vJU1(Ir+oj#J+Kf&i`3zVcS=gHFc^R5#H
z44)?cmX$5-j5;6yJ#aeyF>o&<I-s@PN?>)e$L4gxif!pKzCN2sOa>X*p?j=K$Ectk
zbV`Hg;<xZ_uV~i&y~^C0dCNJ0xWbw0e=<Q0NSX7k>6}Yyxubu2xal<Yk$<*uhf&;B
zq`U8wliQgMyB(R#k;d$89jFf%l?_7rjf7UooSKr=D~waB8?iM*E2;dK+_EgTk}DxL
z<1592IK91)Q2F_il*><^vA%j4?+W=Amlh@cN7iS&R%2z{+h5Iib;N@FFzq|f&&FR~
z)o%~nZ?jea9z$B5E#JQ_FO$h8yLT&g>+l^HWw6>6+onJ5)z6h&Wx{Bd@Hp%oWh3hm
z<N-TX=V@X%c6my6q{>q`=>r)9`i3t$X#DE-*iv-YJbYo9`nqeZAc}v+Mvxz56OeW3
zJLbH_p|@8j9?<FA*_85n$Djk)Hf~f4{C*lucFQ?;eLNBR5aem<#zT-sFjKp)`wVB;
z<G-=<CHF_7Z%Skv6}`D4P_jzlM<4;laAKswW03gF;_nK&$NF3h%CA5jS+H~%dr{h8
zq<he^_=7|8<up<)L%Mo=KIs^lDl-3L$ET_t=+?;%r438C9kJB?o8_RMXj1A(UG9T;
zx;UlNV)AK~^HQ4l$Ar`&p0YH?{8*{jIe|0TriQhuWt(L<wpSe9&Y`#4631}1oC-<!
zhFA2d{m&EOhXRS&GBv@qVHCBi0#{(pycP}|v4+W73qu0f0+YLBF^2)FctE<_oycGz
z3ls}!p9`_l*)5rFVGW5OV+DW=Veqk)cH?N?Sehfi&q=o-fFfSxzr6t6D%lrwJT$Lr
z>^e2k^j&ePF|dyM7uagMJ(G)NM7Hziy&Ks9`+xjay9Y{7cUL3Y?hfl|2mQ8$`JH}_
z)Mk}I0W7v^PS9WC!I{~_5v1tKKcnC7sCWT2zGK~w7PW{k6F7bdVPVP~z4WHY1n|%q
z)n*}cwVHLr0LHuRPlDK2^jc3T^)#7S0-6z*-gR?)|1Q;WHaogi>Ait8GCn`~%}Mm}
zw6#6Z`kAHJ1=g23W>{K4x@p&s)#q5r?=?#A-n3E2Icor0Rg*{EYWUMWpk0d{A!2bG
zP7>~V{)ef?(nGGBICzlU#-FNZC!EJID3SpnN=UvBa(fdz-<=A0<oL6+P$!(qN>ME|
z(_^hhHD4|$?xf#QCF69(D0>RqofMUB8bPI!KT^#XMspu^H!^53b$@Z%Tm{HNs)<C!
zWf;TEjSU<acm~e6oO#iWMFU#9v-pOiW?@0xyV{;7QRQ{zWaDL*hur0I{?kY;pjIBJ
zV(P=%hodPvk+MH!h%vc6<VA}g>F4%WrKaNKrK6EMQxiW<l&`XQ-Y7pX`j1K`>ySRw
zM~y)LW`BdZ9&VNE>co;@od1Di348qp&u*{C0-ISF*RgZ;_RXr|`cJjoPN8jQ-+Thf
zRvG+hFp(X!7u&IOd#&YxsxNaAGkBv^OO{`M-5ph?^&a5OoDt6t2Wpsz_d$DW8+B6r
z$YIy{mYtT??2uo+^GTG4Ph}v&r7L+nF1k<ofi}@)yJ+q#x1FD@JQUa;Jo3s)ukbw|
zHumy00*{?s0Bj&1#~{~;l%|Ywo8gu8b80@Wwx4-Db1~e1ZA;^qe#B;(SRL1wt`OFE
zRjv2GK|o~(sTB-z&WCT0=}tG@xF8w?ZQ{#qBMNWYkS(|Z6lpbOc|UNVv41o_mR<Ho
zrkRe15aL{8hIS6tj-6W>%hYoDenbT(okws0rp!RqMn0WPY@r!H!{0c?ZDxAvC)tdJ
z@BAQZIHtIt^ZhR52yK@xl%6%3B-;J0n%7LMxSKoU*Sf4wdt36i4zw==l{OvI-G*aE
z^S|epdyu!5VCDk2*|@O?g_E}`i~@5kgE`GSJ;zuHGEz}Zq%|b&@pWJoj)|(JHy!Qi
zD`7S91f(pVk|UI4{aGe)!(F%TltqL*Yc6$UEs{*`?4{-S@U$dx0y<0Dt=7KUH_ILI
zxesWI+j<w5M5?i|)rxImS+X7b`pGk-8(BY#y*Eg=ds)3h>`v0)_U<u3t0|S<yjDp_
z1h0dTJ&5px%zUNd(*v2t4?8a#tpc|LOe1|mT@;rd#3CM6_;QL*`Ax2oKIIgie;`QM
z1zrqJdv_O6yO~Ojv{0ma%SqEsi*#0`f*J4B0_XDdG&D4Q!X4i}B21bcvUf12LbG0P
z|1X-ZF*>g{*v7VvFK%Nuwrw?5W7{?w+ji3=jcwbuZQYZ5*Ug_Kf8O=3XXcsNv-h5J
zR2D6XQnb_h@xR*`<;chvI&#88bqZGHB?t9Dym&mYd4}sdC_3~uIExX&%hT>De2@|(
zB8?&B47Z~`?o_r+AhbZj)A@<~Cy4=(XNw=3Cp?&UKX{wgdPXh%fi<{QZL`S#?+U|<
z7s6xQZ?(hxx=yop3f26Sb54l~MvD?jb@BN4b%6aM{Qj`vS3F1?{wIB;Jl(;)e~qK(
z;_9cz?`aS{M6f`t10vAonMxxIq+gi7+FCwU<q|XXRl2A@=0ElRX>O8)Xw^;$u~H(X
zIo=BJZ?%!Y90!mb`Q`ZkN+BkJ>5&7@;^fmE=89$aeOxZF6TN_p=h@VjD~au;^KDI^
z`D%BVxY>LCC;7LcHWjUIv;uAedIJ%65vJC|Gpywt=jVo3_eZ1crC{2eznK~}8XbHe
zY`bbvyIEwH3c1ZO1?iCRSqVslp<<$L`nD8iCQ1|0$PZ^o;Mo}JJu@zU-?nHqYNv3x
zR1p5C)8MSHC(IAtl+NNWXE$W^mte7c_ga|m$M%y!GTIEfLEyZRJw<xL3*rjCnGMQu
zV?WS*I5^vHoNub*^L|GpB%;$W|30P}a0IVogP#iC_w(?n6BK{k?GtPJxo$o6X;`;o
zBVho3mmvu?`x|e6064q^owP~nCL@qHke9-Gy&cGTi%u|v)8)HSPLX>BsKoJV@dJ-E
zoAqyZ4nBOhEXu+qH+He1<ECJS-9p?Y!h@tqLIXh(@(U%!e;7()W?F<IEFu(6=gsf%
z<~-Oa6JW_nk|g3Q$d_Irc5C-(2%vaFE7}=0TsEGTUbZaGEgGT9uxK^RSqbT(#!s-j
zUUr|~(i3|)jz1}TC<5HQ4}X3ZRu|-rn%5CqT&A7pBWTel-Ae3{Aw1$wRJ`i#7qv6g
zJ@=UEo_~J3V85{D<$=el&Xtwn1X9~uKu>>(&F6#qidq2p`5tptBjU!**qnmQf7aR6
z8yHZhiu?TEGdxxa{eb9x&uj07nambW-G<vzq@4v~r6*K2CyZ2v8nJDuGPNy|PeP~o
zdzL6oq=vH(X2nF6q1BXz>0_pKW4k#Pt&`J=G{K1Sp*`m(BbBuLZ>?l0i*?*Wf355#
zpJSY6m6-M(BJE&0hcbc$eIWq>+Zko+0s$jdp&4;?9d#Pj9kWYu>=Jq>=L;$Il4>W%
z<B*)J924IlpeXp^_vgMCY4IQ-!q`)EsrDbz*pg@*yO|wvQyWL%=WsqCJ~^PAxe*q7
zt^H?Yi9{+#`o_9Vg_WZ<W;UE-zk0IjR<<cEgOv7`vP5;{baZ=rN(_l36~Qz=r$1xP
zI6vn{LJ~hLNGV$WSLk3bY-|`dEdkxZ)fMC8V`zOnUSrLijuq7rAmu@5Q6LO--aGZ6
zP$0Ie_z5}io-lAIVT7imt5=gfyXb%GR+^55dg7ofi-pvt^2id+w#pu+rJC6(d<?hT
z_wZ@3!g_2#60MV(!#q7&X840sG*Hm~Xb94qcni>~F|EDS2M$v<%1kCBdRXYNu<B8F
zE;es{khEY}LdXD2o2AKt@sdPyUK3=A<_yLamTHY6U=!)&s{V05p;c_#VAo%&y5)im
zQCqj@Wy&m3<~_)(Nx)&gd9UKOS;5)gasO~gQSaL?C&lKe(JJhAUXR(;c8++Pc#YBV
z&DF7h;KtmlT?OfW67yxRqwogex|%cY1F?>=U8Z_G-<{&mK;fKTvJBGPcg=1y6;npQ
zO7lB10e71>2%GRi$eB5?Wp37dGE*=k9u%rKiAu6a!rCfsWywz^LZ8*IJ4~`dw6yGb
zl$TtJ0*;?QvAespr>8aoMjR8>zh`lgdD%H~CY9580<+dFw?gDUSSSa7PrvUS{b=Fj
zcu#mZn@fJPM0T|mWPe}3Jyam0>SK?^?)nzSn&ngw6R_g+Q1*mx3jPO~hRO4<cw8hk
zqXB=ikEfT6mv}p6wzHPp85Y^vBg{P395~-`lugoP5Q99HX`9`t<(v99p9j}Qoh6}P
zKWP>mEO**=QZbn#IcqqDGKKJ@psLCzZl)U@VLTS`BN#xQ!_@8gnjjC)KZ{P$U%M|y
zK6uz(@MgZv0QF;E<C-Z3o7hZ!f*u4z2~v?r@$T_L-uLpu$@M%&$KqH4C>1QfnwXBX
z_R1Y4OLamN3FUm`7|ciB&cZ$e=h$i=h_9A5TnvWnfB3L{<D(@Se~vEF$Xb(3@}W}o
zTU*6f)xdEf6VFG06WTrx?peie&cH{*&#*iYrOZMy5``RyfI|@NKXUvtZDLk1IU<&k
z9piB*$Cw5Z$Hircjv4H{iDPB>Vo10Q9{$Js{jqM!CDj@dLEyi583;b1zq;?bC_x+)
zXv3!h0VFV%K;>Z1rHa$@l}aXoQLb?vZ`1)rhwxWDRKM2k@tnguDW$`kn9KEaf1JP<
zDIv7&$#(5uM8l2pf(+r=yaBg?Xb90#%hK-8Ply~^+(SPg7})dxJaYa<@sj-^vuu)*
za|Gdo#d@^t)6=HyzS6p@LaQ?iER!kG1?zpJK+A8ew5FB+IWe7IKwTsi@{DZcHRTra
zgxGvAqAX}a7`U<Aeb(LG#>13v&oL?;U^^(#WY1_cjQD!1GN)?Sw#Nwz2}>}-FJhtj
z4demN^95*3Ky?Q`ppG+K^GM-uDCZp7S7#+ggYc(n<nNyDDd-#e7`;%2LTY10qspL!
z39ipDSYBtm(PO;JF(uS<WK2v}dLFbWXSXS&r+@@8k$@>qC3-VW81WcH@NmH&3jl?`
z`g6~G+AiDBU_O~uK!4BLKoz5$;fC>33<?D8`}fLM5hD&`Y8PRsg0q=^X3!1EUor&D
z6(E+dD^h+YDyCO2-HVdcG9&{mA->2@9Q<c9wRd({t2y{$ajVWR)+WUMq6yulkmQU;
zG$Jw<4Rst)?UIurcZgmoH-xxJxDZRwR}zb!xLy*n%i=kH(=bXj>~(cRnVDk_@<4L1
zU0?5<#p%RMYTIrzy8osCaYkDa-kDWmWkbT1fnqnc+B@CVOVCUT&&eWY=`pp_PFb~6
zV!L>~d)g5}GD~#0+E*;a_%pEn)MZw?kD2;JbeA!EbPbj`pJZRS1rvIMRL5`2519@|
zE^&0ck%0O^hI7G@sZgRY`ZzDtI)E0MKJapP-}d*BLO04g9_O%3=E;rWcEDwKIDXNl
z-BH!Y8)d>OXz;;L{PdCyTZWSL^-=lyv_|D}4H62xJe#+Um>cSKx}XjY395@cRl~>Q
z2Ql$GPMOF^TgUCc`qL~ryhVhfqB(L(rHs_DgD%j&hZEo=kJke>S14y8$h|IG0)^%H
z>f9PfI2sTdEK~URU#GT7@ClWq)Bp7mRioAt6iN~0Cpa&iSy7kU{o;>n`uCKtFa)2F
zzmxq7ka^lr&v>*BQ_R6u+H}y==?dLep#mmU3t0U?Bv5kp>FI}b#%7hrG}L11D$gPw
zjjOAd?#?4+t!UZb4eL&3`v0Ug1R6?u^)2raE^|LIUJEl<FvB_`?Ot!hvh?_h^ujqB
zEqhj{zIJl=h_mGXqg!dRr%IEl^@1wA4EeFzU164R;f^YZ!l~_(oVCoooYWqfP(%fK
z)o^R#)MY*Z8BI1rH}l^B?O$$M`_gNb<MPx$7H}Od9kkiCIB6;aft$qJ#<~=*x&+Hl
z;lv=$s9sDU1Z@TnF>2pMS&yAms(y+eBX1UP=Y)*)Lk|9$wQIRWs%&v;K5$>2U#8`(
z^&^86Ke}Acfdt3ivRoYE^h`Lqy-~Pa4K=;gWrapEoH37ETGHomqDC?r)aQ1>VQxK@
zxzRR%1P&Wr@|)C719(lmTG~KRE^OP>WpHQWW?KVklI=rr{c~EL)9v5RYf%>Dp5NSE
zKVXP-77x^_0)|D_uXj~0SAsnjZT^<XgXdn|7PRQ$a3fIRHspacqarYGB|wTqw;A{j
zi76%|iY)ZTk{sPj>^bNVd7uajf~G&YA-Ee-gp3VKVi;jVYBt$m2`&i!n@7*N-9L2a
z{;)<mBrc9r#?FrN`3)p%x;3ONGi;QXFZM*El-j$u!mj*WV%UW7H|W%Q@Jc+?5*q<U
zS=3fXmwGZw=+GNv14i7dx_bkiDNf+%3tBB*BXR1d=>N^{Dk!^geEgg)w7g*u`4>?%
z0X-v?+Hl4AnYIx43Jt+_9#F#3HOXQy#lm>NRv&$9k?O?t&F3R{#<sIxK_v(%@YTMa
z5TVz*`0bM;<f%{ZXP0d`S9~Co{XBzrwZ~N}9l}$m(*!?+?vtxjY*$0-{$hkHg`aPA
zb4KAuu9r?@F)e`B!PJT=6;<r)_QgKmpVa^W^GfPd9p|M)_6(b5@d)SULJrE+ez)Qk
zzgd~h8=JOWTPN99A5kNtq_CzAXWoMK6*+gK0sj7Cedk-EOo*%#s|#6(5r-G&NE`||
z8ti8FQ?jfjALW0eeZwHIN$WiBu1Bq=j<RhFM2tV>=-I?$6wry~$K%P4934U9esCGC
zH!(E6izu36Vugq-{7r&@uj;Kze>#fd>fm{x;4gghtLjN^R&;Cz#>RRa%{R9#uuomb
z-<w3l0#<R-yYgd8;QX?ISCWkHRE)diVnvO=zw+O~03P}l$q)tMY2}$ctPUmx<|_iG
zYvuN(Wrga+j)y1QX$vNJ@KE&TZh9yX7cVCyKla(qJ`Tf*s{;JZ2jZW|c>c<<Qn0WD
zOiS#iHzaz!(7d$qSiRi$-cO6y5wg}U+f&JB4R#{GWe7iqhTTW(H~80V+g{op#Bbts
zn-{XvgqPE4mu|Em3j6S(CBePMli8&*L$-DY##7@>6Lkq?7@_Jfk~UMPJq5UdB<JYL
z0r28cbZv00w0ViTdb9Jix616<?e)h3-;dO6pP1;@?Pa>FTiUDefLHZ26SWtQqd$K{
zx*II9)1M%wPUC?V&S4DW(i>{T;WnlE=t6-_!D2Q_a7W3j6pq0iOt`<|FaQ;^znp4z
zUH@n~ZLcSilMzYDr;A~H4SQ5CQejip{$BLEe@R0Hbz5Q&Yq0U5t19)WFtPBe#{2WX
zD(2|bmbC##rpfI}X8B=yfvSn2Tvr#A^UFS0<bL+^y}+E_gU>+J&j16udZrE~?ctb8
zLaT|jvF)~A90+hhBtaR3?4iE6a0o7l#vyMyjel58h6EN!@>ZS!d-=u->m5kyr{BLj
zB9){t?`8<M@sMT?h6J;p_i!P4u|eFiT^-rfKC+Iwfda)u;b}n|XrSWr1<)6hypiW!
z2k<qcs1W{1hiW$f^uU26lNaM1t_;((K7jcS1J)optWu>dJs%Q{^Zi@gj~@p720Ss!
z#rlMV)@er;gBlP^Y!dWRfM;geJX#9`>>S+PY>E5M?tHrRw&L^-!QGXr{XO&PI-YWA
zw4@r7qZ|ss&(oL1vcR-kKlk8Bajw;+0Jsf5J-9$drWC$<iO-YWHT)_}-EW;l832@f
z^S;iEH{<hXzr_>FefUiq?|oc6vXm?mSYcMRl2<cOn`;nO{6LGPXF&%k%OYH0@~(}J
z?l0EkhscUa?<>Gc>1W?klk$=Y2oD~_zS*y1uy1fN>Gp#}xW=FP@A1QJ^#|XYOY-<6
z0>xMbEILO3NCgr4WB$Y&ZV%<(p(lDJMfL87%5$rsB>^u%9x$-@>UbHrw7#dUYb*{?
z_^O^NjL`Ja>`dOVb-ol}j=a{7g|6`!`|1Z8a*&?7IR1g<;T80dG*{>5SLfi!l_3MH
z&K*cE0bp>OKA|VF843^(Ti0*5OR<Er|A7?>D0iJg>zUbJ==S{VN!#@0<6X~aN?Fi>
z@!&mqe=XChPd*{!MmafAczW4kG+~ZWE-}DFTPxnJzfz|KJHu;pexggx(uMMafORZ5
zxY4*AOm~W|aVfce7?_o1K;*jSu1rPDBiqW}&oys39q(u6Q+<Sq!c5wcTHI()EiV_y
z>tbhD)7i)4Log8#NT~5;gJXsCa=1fZD6Z*O-5;6t&NkIL`b&uqp9=47p!j~0N<Biq
z(xxktudwy`?z%L<Za?28RDV1NY)k7%JehGurOW?XRBUT|G(kEbHa5Hx(LMeg(Zm0f
z5$_Nk<8OPZFI0$4JQilLGW??T<inF#+`ZZ8XZDzkyO#qHQnfTj=%*|TjxA@bO5uqg
z8}Dft%;Acl1zO%<gFRKkB{9OLL!nWWWUiIX;0gMM`*TUu)ub^>4&T#mmibJPkq(k1
z$HVXkeS?!6b3b)(J~vIHRi{pAUa!R?I-k&FDA$NQF;c{d1T9SFD^J;8Is&>D#;V#J
z_Xr~_BfrXJ_QkDJ<iPWQp^Bw~?WWlNinBQq0Z0O@YK<<ZTvkMHVfawixVpbV-ZO9u
zRF|^aGY(z(V;4$ABQT0&hI{JZ!*77yZsiIV<~v_*SeN?!6!BLMYf8#ru$9^H>y%wl
z{j%wV<&qL4x7*?ZHMVZ@2PYBhwFdWU#erH*y$9ht=xykqNP`ivs26Ax@O2C*r}Gd0
z4Jzo!Y_HlXH~0wmdZHDfg_bGXW%58~EOjKe8_~&>$a#4g3-uJZkgu-Xw%LMUiqSC)
zwy*6x^kY9ywkXqb+4<8=B2mn(SO20?KM1Rzme7Atv$M)#-ia(!Ed<ydUVLWuxQdB>
zDbs$t&utmF>^}X??nN08fbl`f;zCLzC4Sj`BmN@4_y|5o3&6$o9+y(oi4L2B2-x3@
zBz9#6KgL3^w}uuO&*8&5nj}jWvVI6}j4~APCPl0tDY|vsF-vtW*weIyX_Xw@?oDdC
zc;HT`O|+rBf}sac{mq!+_f|wzJ>^hn_GmCKrA#pbP*2Qku>pr6r;_ySA$#wCUG%~z
z5MSJbC+sEaHeUB`J26$&f6*P;-WjwV^bYocZ}PEsM&=xk#aUj)q*kfW>$ZX1o#4y&
z(%`aMbV(Izig(?iJ5tzwA_22exNhUV4XRmSkb?TR!Q)<WPoU4~USg0@JI|?y*RJO8
zvK1+g!K3*q5wf407U;2t<z<2w$Kkzw;hb`5QK~`x<hc|_U+FHRaa9R@yZ-Z<xf{Ca
zUGAUs>3s{0q9yLbg<ME*^KF0oIAb4;gg-H*Umyd|pR)OAKY=KGAhdP#-h>$pR>eZv
z{sl>7M&N_YOm6>q^Su5oe@zXZkOXp^BMtdDimSLXJs@EuJcsJttG6@I1dotxb5k_6
zgqNSgIUl#HG?c<s=)mXSqifyGArw$8DB!V`ZuZrO2nepUu51YgqJFJ-kB^njD7X1s
zIm;zylTUERU>oTbRyd-+7B(<Cn@s@cji_q5Xh=;jcCzKy`*h8@!9M;+uM)8qrq%;!
zgP*)_dOrNG+(ux7R|i`hb;;VBcT0bv$jCNVqbst0&pSf1y@UJI{j#X>+-hW~IZ-iH
zu`GSsH<QAq>(S5&U}nYeR>KMy4id>D*$TZ}wW?9h%8SX7d+%$a>gd;Jw$tnL9y?}(
z_B0vAi_(eZZ>3GQuvENUliY8)FMYg#Y$pOH!Wd=BUB+L(m!(+goG$vPy7Xu`J5a@B
ztO<#M8phgrHb9k@7J0y5afp=P<jVq~@?yh;dvgi>Sk;#QwydHanpcbBEl4_W;}C_3
ziSzwC1FyOo0Ih*~%J{3{e$pAnSqmcTVuttQ?os5w6tcQM<-XMM-})D+F;#~C8p9S9
z;=`9I;l=&noawLOXb*r!*+TbU8^>&}_^u=_E`6h)H|=!5=~Vwb^=Hnc&3TWmgXo<N
z+#8zxBC}WXZL=J`&0Adl;&?HtGsQXS>K&qV2#w0O?Y|oDqqO?H_BXT@<uXp0^q|8|
z-CrCO&SDAidXpa`C6YNx1vLg9O~+%=>5&QL_f+-Oq57=(KitAb*ec2Ze3aLS72L@W
zw1OQ@>vD?2Ulg=p2-8KU&p#&}|7|fkdQ5B&A5o2SjlM{GVKG#5b}v2-GFFGG&ob1$
z`qthehPlH~`a;}?hvc<o-t<dTYqwx1n^3TWzwACN87=koD`sSRseXTK%r7ktlTY9?
zxqqJLtjJ8mXG`I3*_bxH5M1n0(3QJCpaW2Xxa_;xNQ<j20j2za!4AY;we@O2Rl#f;
zI`OH#<#x`b=e30-E}Xcp8++0Gpu*1*v!zcL)C-fznPq!fG>U+3KRyoZ_3EM{cJB~5
zyr2zsG;c+wkN_|sTI#&=`AVdYP9g_KVRH*uWesrQNpRsKaKGN`Matf0HRwCAP!Pl*
zveYoFwFm>$zJIjJMV1vqDXU_JoO}zixNm^2Q6O=(7rZ!$82gGCz5@rxUUoLMSDT`n
zJL%(t*ukDLss$F8GeJw5SE-m@Z(kBRtP8@ytNkX&ORFI>?Aq(OLCm2_?QvS(Q!8sP
zPbITDZ6KD+)fJRo`;4X{3dA>`=cJy`5A3pb?^ccb7OZ{eeXs1dADvn6hyW++d-?8a
z8Z;?DYVBJzM;RSbUZo9mbExrdVG<Y(aq(SF>u*TAgL?C@s{!u+_8~P)RVn3de18)-
zBh?^!MVI9%kMnRb;>bol+aXJ(B_e*}1jwvJLFm7p);@ZNc@lWv)hE8!hTUxX2$z<1
z+uTvF5qV>r>b8&Zxi~uhxg)<Ma!)A`6+b>%OTsqP%k6aiY$RGxoj%-GY%l;%vRusL
zb%Zizwt`0JvL7*)-t9G=Nkh*e@hytG=CCA(vrN5#y=!Te9k}lOK+x6pjHtPYH1OKf
zqw<0f?25~gUq7CmD=N<C^~O7YW_4O#)@NiTpi)g$__vY^a~itVs@v!0k#}_`0JOKS
zdGTj?aRvk%bhmk5;k1HUW^Z@Bwj@teMo(rvrv-$NY<19faO?z>doZ5xFPe~<nePwX
z0n}KS8ynPZ?Qx#&ku~pc3Q$i4SGVF9+cv1?O(0o6kk)1R?#{z)pP!BfzliC4-gI{R
z3~j~HC*((z?enf1pzktF(ntl1CJ2~9BG*8tpa`5PYccxrsp7L6a+{;@U3ScpQVLHd
z<?OCMx&BD>4bvag&yID^&hrsowT_y`#!zUm$@-^vw&qR_2MJ6GUjcIw4rOm~ZwMZ+
zmxiwerjYFQds;MdFV-EhdN#BUyov<J=fTNgFE4qF+5Gf6NfneSI`{OM{fe@oztH5s
zO%Gem{Gs7&t>-+zA%p_aXkxwl9`rb@ddR$H%Q^YzolP>B+eoy+R>tj_T<&~YrqzVx
zt-Db)5Smb;&*2tF!N~SoF6&9>AYe&t2VTIIm1%$0TV?)a`DKl<axQ6(l1e!vQRF-v
z<3uiJKt+>=h&DbK0imzlQXQa%slmY&p`h~srZc$_ZusR;ge<q!7tH<jH457TZrRI#
z1)qiex6-$2&WG!x-k>ems0ZR;<>*uufZ1o-J1m&Q);op2VcNsir>5`7Cs)c>%Da0M
z-X`{&*(4VA;~(*RS>)qOzI3O`Qy$DSJrGuH`9}TQv-bbJ00T8R@_)ryb7EiGG#@{B
znL^zGEwUMq0X<Y$Zhr@0JlQi-pKs;e&+S_dxj$&${b>Hj|IS+NXxen*^N=Bm0^xFa
zcF-R5I8-@8zCxB>Qjk+zqyGB-1RLPT&XzPXUs0yhL}c`fhM9_rfKsl@euY(~-J?Mp
zbvgc@3FK0_V};xM)ae(Ww>&hdwMNimjP|8i{e?o;&LND@S$G0bay>vO((@IbFP4=k
z+`>_bKJ7)$Z@cBE<J?IWx19U4H;0Cmtr516Yhv0bLv(tN6QdSwrLztNLy6unJgidv
zQ~G{D)^Jx?T>NY>cF_KYJZ`u&T;80cDCZnedS~_zOn@K#E?619>z~J1!n>OB-fD7U
zlvR2SQIXt;Fab=|m~T7;6*lW}D>@}Q)F0X!pFBYbp}5joZ9m?|<9;TM55VIur`E;a
zVjjjz8uz&mGVz)7kb#TovQ@)y5dvd2a0h|ucFN48V01z(Co7nq8#=H0Vy0V(22chn
z0|G_s@Cc%qIbkZ|Pyiw=Ij9iaAY9mYKcOI}_Nq_FC*wxDemoFDaI0A__FCM5ev=<(
zG|Vp6^k?Xs5lm#UlFvj)zS|FGY;`YwH{y)c<P37#UjRY?<f@$tBRxeO!0@=){Z=r^
zuQgv<_5%hAJRKam^v#HEezl3ryn#9}pbxE|Mp=8^`45ZzC@f>a@X(+*|3PR1KVz3C
z$R(n0&HM|y)ep_Cf=G1t!-?#%u=Z=p!Sq6Msm>3m`|6*7>|vMgj!cit<G0fV#@UE4
z@CFUVyM6<1YanGI{X~1rqL!^44xp)7p%R0dzf=?4)yo$NRf591y$z>*1ECEKJILf{
z({IMp|5?3gh%D`Epb_61QPT)IUZJE1LQfO$gpulD=rlCOkaItwb2vkE_d?~KOt-tY
z+y74KKB4NaC^=FxT@>M1WZ%+x{=se<W=JcZ6{z!HNscQ&038jDyN1Nrys#iZh3%PM
z&a9j?foL#Ko9GCn#QR_LKtfbCZflga^Nwt7b%wE+=<7#5lKq)-qP0Jg0=|FhKZpN%
z-$JfC4L)1TUKdZI)}Hr49Y!oQZuj&9*3xy$Z7ZuOeK%%RF8PA0azJyCI|VlCy)Pau
zz5$8=z=KS<QgxuK69x|4n%9W0W*P0wGnUV**lx(|pRmml`~dTl-R$7hSz|EW^r1Rh
zA!|wmshD28n%PMCvKZhRuzV?bRTf~RFknsI_I|%D=(rCMd!;@Wvy}r#a+iJmW^6n1
z4u(*T<>WZ|?fB^*HzLRCe4rMMG4R=PbYRgp1AQE|&RxwV<x;XtKR%udlZ(JeQbwLF
z?$m)qZMYt?iIOZJz0~{6_t-5Cc`S^H;gs)$f3@FL6n&^2i!+k+8uQm@R&;&ioac7Q
z2bh8L<$Av-_xk9_FOu2nbuiE6*7R-A)4o=%_M)u26oX@W_u|a->0Z$83QKe;hkd9?
z9YT<#h&<R@b9E`XIlA(MmRut2A<Wci%Nz(!&)`JK@giFqBT&X<Crh!7am`tnr;5z<
zscGAsa#bRF7O_nv7}dH-sfC4W;a~-E(PnHiS8YVUqO<Yi25D5D;V|}>Z>q_60T)d~
zW&YLBz=nWLV&bq??_DZV3;Dvh*Ry({OkTUak?I7QNR&HlKh)$6T66F=J_sdMM|?@U
z=`D$P!iZ#V;#WY3!kt<h!HqqIax$d%wd^}n>B+zW%5l9k{%gcs-|CjYIG?mDY_}hU
zsRfe-l|OfjsNXkpXL=Ad6&<rEn*KSw;n=_SxsHk@b37p&7=XV4u>0jr)&+N`wx!(e
zXw4;5)v-KLvv<ep45-W}{Xs`#9;q)hcka2EzLCjB+t<Nf>31|>J7yEWz*l*tIR=yC
zU-R=-_r&Iisd(03CgNK@w{wY{f43iSGOyRWb~yY*s>XYC-v?;J<!>41=ktEpq1;ap
z0!l(~T_FSjB3|ygU9R-4ppiu8d3PA1`yMmt4*V};uco?4uw>5d9Rr8>z(!5|^n+SE
zLg^Q6r&cylAp9LM50|o=vw)a!YSY6lUR1B+5|TvJ=?`vYeAuM3;{C;@*U_RIm9Ryz
zNU2F5&dgK@2{FV(1B#dk#xGJ*YA<g(#BFUHX@VA!4z3)@3Q8slCpZ8)ja%(XjC?a&
z=;;mmw|aA1@Ls3>lb;kD!vCf}lICsGb7+07?|1K!`BY-pO<J5I1Qb6s*(c=8#O-_O
zoZ25&XBugx6osk^)7(jcgS*Vx>c_C3F(fc^MTKGS{R=gN8oDt6p&u%l(D!c0+p_uR
zc_R8B1;o<u?*yKPT^b7nwl<D4UZ__joBrva;Uq9?x8tNU5o}h3he1rFL<wr*_sMie
zGqZ{iP+MG{?7)<}{!h4B@H<rTUqZV<p(D0-XP<B^a)9Ifp?#3caSRlDzz!0xks9XL
z#n@>u7Lh8NsXlwwpq;?~h{K|$qR{Tmt(w7J@WV|3!-$V>DHcDp_sPC#%~TC&I&Lyr
zbSLYrWh=F)Kn;a)c9yWVt|R1v0EpE(eC9$$tZw*)Oyg;Pcv`Pq2BQ|wjkVknhOWs+
zp=lbNb`U7O^(m}^f_TlO;V(99a&6-J_b$lO@-x$%Ej3WCrWY$1q|%;cI3N;Wy>&aN
zhh}DgOO-1vFP;(aU-$R#HYbqO%+xR@5@FO6LMNhH3RCxwr<&%h$(&B%zQj))ma_|g
zJ|6DfR^2n4X}Kpcm|h1V^53(bwzrooBN&ZLDBNq>0z6h-&cvd+4y!(OaaR$vmKc#^
zKFH<X*6L8KbjG+A8pW!VZ%5Pgo{s0_0pV<Vwu<VD5l+ozI-lwIu=pa;+Fwg`+VSpV
z2!Ve_x&+P!W4e9r%vE?jU8up6l8a~IYw55XuSPo5?HXM6XXA4kBe_<e;7!`DUt-@L
zuUsXf#XvAXLEU(6k<SLBEmzJ;s-cqC5!0acUy!G~wcw*$?g28CGLO+8U?akB!twz^
zug1)0|FOPPxtgv75JXHvCXF42jIA&TsYsc=LS{szn!)jPiS9i1_%=ABs<4gPJzYJR
zOeMXlV{;dQV8)f-<CktUA|{9h0?GDjZ&XLjhL_<}$B~`-jPC3~HrvT1Ar>COe}GgN
zIVihue9C$?xVb$?jKZg><A1p|tl(elx4zuEoX{HD`y4ur*Pt<4g}cS;2Vh&=9S#WO
z3{yNfRxn|A_xA`*u9Z6{r(ehBEHl$)hknCzcA0B~_!V(eA@j^8wbV1lHa1y&E~)6!
z1ZcyhV_OgNO)M;4lnf7^b)p0H+ai4O4D$J{vEcl?GssjK-*?r|>x}mJUI||E|KmB}
zjU5d{)_twdyaKm+qFQR{w$>9m3d6#6)K;=A-H*NXt)+Fw#V)7cGC1#!y26#YWG6IJ
z9i-7_S5sAN_=P^7(Ukkc3kLyekBDd~mMHlBXnLt{VB^LlB1_E=HlPcj5%v=b<-;6e
zcK|!lw#E8`H<=SQy+WfCZvSX98Ak-KD?~U%tl5YEs!=T4Wxf_0Z3p4a@2%{jDeFvJ
z@im_*yaub0rM?;9awiuV*8Dwupl@=Nf8u>wZK{kcYGzVXQv!lDpTVBTXD5JRv;oA5
zF!KG$UgPm%oyIJJHhfYKV9{I9sY*4Pe*?@e5M7dnra_-E90E$&K6ZMf@VA49@&na5
z=KFf@r3#F<>qkM(m9;%$K0!Z#{%?fTKSgr&0`C^Kxkx_`(h{b-hP*}$s1#5J5eSlo
z2=s=Y-E8FtOb@0%30tuMM}oI0<;veX>7uCn3R59j*<O1kXy|DW^1_ZpCRP<zu<OUp
z<4HNQKD1DggY)b`fTFhgXCzz@^(%e!VNNJp!y{2&UAx1xl)-0$9VS3v899>rWeXgG
zi0H}Z33Q>|bMwjYc~HNffcgjc`(cDNKStGTWGx#LEak)gh0(7QC8RyVE}&%q!@=6}
zy_`p@Yvr&Oa818T%tj05{f4(8TWA5<!<V4qfzb#Kwej0qmAeRJ7T5Vp?`$}>{33G~
zp*lj6D$g@ZfwD3F@t#Ph8)Z7Ft3X|fbQwH9Y$PEo7RCId$1?|&*TDSNR!JR}Uk(!=
zcjBc;c>>|52ns|DCb5y`33H>VO+Pgb?g>Yt=5{^K7E7)$BqfSjM7ir(uE(q?pmmcS
z-4C_5+2L?t0Z5}R=7h&^?DvySkAjjApiQY%>iA!HX?fc=$8*YL2LWoItnP6O>NWNV
zAT6ROe#pJuNf0bZv78kQNrzzi2F|b^eY)a2!%=fj63;}w4m|9g8OTo%0oIg3Y8=I3
zl|Hut#x66E_x9J>`R4CRlBeF{kb+$b3MAFZMT?g#($eB7+4K+Wtzkm`%p_YQN&|5y
zz(_W9cBW<B2FwQp0m?-ssdY9`u@4@JCHXlP*Y<>*dOb$cXo@3+Uc77=lMJ7s9*gv6
zVzE7k;~WfWQEX5@5v{V2ui^Vm#}oLD=6=X)iQ-8sr~Co9MF12E5eWY)I3<*U%?gMt
zH&P`|wXmE%+mydwsQPN!W#w=fa8`zfwfI~Zt$D&gUv>G%Q~kq<3sg-_eSn2KhqR13
zoT?yuZ(29DarslCKudPNoS$0n55!MXN@iD%OtZDX+o;xOuA<-0qa3sK<k7n}d7OPB
z^;aKD85wESG=ZWCd9;KVda(ZB<By|%l>y`4)BRY@o67^o$jJzd3xcWXlT2}lT)*1%
zHJYH2GuufPcR#O@=VOH7*ep6w?(6}`kt_U&+d7X<Tqowk06YXZf1@2A0jjbf->z-f
z1(@-yt_$7Xuf4Lhd=g22u4_IC=p{O^gh5r|HFm(k0ycFJnGci}7uvv>f&7KCaE7o$
zz(9($PyAmyGa(3e61@F}{g;&}$_xSY0o3}@wWI~_r<hnE$*k#I<ljnKMjX7D1WylL
z2eY<+o3yML&7tBY@;)~AoDf$}jf0Xjk_Nkk_;u=yG2U)kb?L4fx&z>=oxRHnfs62;
zefuA;8K3Ubn~<m3fqz(!e*Cl=-__snwmOH6*6lrj&(O5DfgFsCB}*B>-R2LFBu&8C
z;Vx8F+aC|lT~u~tg&p7EEp@#V^t>Pdfg>ii3PtR<8B(y?eIac)cWc%$I#L0n1<3E_
z)k7sE?H>HVO?|JaCwAOecCx^mqR~>F1~{N?IA3mXZn_d9?GF4`mNPFmMtitni=4-j
z8b{Ji4Z~Tw$HUYHkBkgw(P8qp1qMqtAi^pv9kTrr^wY=m?R<rbs~~0^o4p<=&oo6e
zfHN)?P<~l@&CrfemKJ&~vBVl)qyVh=^CeHn{}TFgm$BIBUaVOao!DEKXutDK3U~lB
zj<i%<bMrPpn#j4X`Ky|rYPEcNkw>PeW$PRNgVtMHJv=x&W}vzSseFe{%nT-Kilii*
z5Iksh;Vx28ULqL)VwGRm2mzv=^c~>FQFd{KiDUVk94|sDqq9~a+I6isvwSpZj9RE?
zWGtM4N{F8U)Wwub4tN;zr`P%EX4Ty((H;05cjH?!9_(8JWO2T=Z@XwwVp1J%5x*Ha
za>u5#p4-jPP_V~8iysHZFDSny-Z|G^3NJl^N*FWhS1%E(&Ie&OAPqg=+*a~*{&xO#
zO1+qDR8woFvPDe^A)lM0-l=9?XH0+R?hdfk!tG|rz$DqvY}rh3y3t-2eHtpO<6Bwa
z;hMe?ktOEm-S%Px6y?yQu?Tr_A1V3A>?$s(ZeyW(TVj*Ql-%X-$+%o{nSQt4j|S_v
zCq|M$z!W@a-i3AQ6i3`#XMee5(+tdZN0^DZ8hJz!J7}_)mde*)XN{nUv@VqC3C!_w
zJi@z>e0k-Tk4>yPYH<O>7ci2K!R7RmYzkIsQ5wrdS+G6VOq6)+4t(EGe|j-Q$N$dv
z(w~2IYq?mz*rc>WmP|yP>-Fpa=(C{ufH^_Vbv<}JuJJtK&z=HhO|={;`&6(k@kwMa
zB5gi?v>^N|9~%09-7xmC-Er>v-bPIWF#t|BRxyp!NE`ypUj%eKQrVQqAb^q%v@oDT
zP66PISI1Mo#EO{04;x?ZbcSWBSnqhkt*(p%gc%pk);1CJgZd4;&y4>w#MiZum!L_B
z;DsfC#O`z8by)qd{v+nTbdRpnS4vMqVx!Gh37t5$caFwwB4_dsIz?L@AEq{t0aRjk
z>csqoGr*%iRFgruH~bPeyH@}Q9W)a%V70SBs^mESk?ETnjr(PTCXCh%j2sD`1}(t7
z;Lr5gcwcdXmvdw~cABH4%m)D4_;xRV4g7|K`bOYR`KFf!?gtxv3Cf9hxhm7dOHlNQ
z42S|2Ki#JRwK^x}ZSG91e6hDwO}W_tb6+|oe{@Xb;v#02m&LB*)n@PFLbhXL`%Y<N
z!zsP65>9B%`uT8)aX{RIjjn9twF4(CNcuwa=?>L1SabvSTPS601=-jAwl(wFU99f}
z{uIg;Ci%L9J2*YVGYN?Fk%Yk^d`2F;XlV!~o~HUklO>dVpnxw8#DSs4LvR5*V@V@X
zQ1DzTZXGT>wk|&)BB_G<^V#f9Q*%g1$1a_Z@UXR{;L4i6{cM3~bhlYA0oJZ=`}Fm7
zroT5TAorVml;H4aU)s@!e3;=_%kDq7v_LOspFk36S{W;9%|sH?KBgdvCJB$`$Ny&N
zepgTYT+LelD_J1unZ{kf#{&OT4(X+`G9=XzHLac{0>n4*1C;vhT|pP)*Z(?zSLgW_
zv`LYEM*duGb+Z^$9wTPVIbPRlbdDy0QK{M#==S{x%$?n4^W~Fk(z7V`YOGC9QO_n}
zs?S^^G1JY!lXT6=7?Z#GE#jDa_cPZaz$Z4MZm@A*Mr;M%IfUyAum}%80lx|s6e6S7
zLIgVkaexSub_jDNChY9vR8*9O%jX_kk+Oy&6#z^)NVrt6voZO?^t8rf_yE_p@?O^F
zOAN3vt|hIb2Q7{c+;>EKKKm*Md20_SAhZ!CA#7|K;-cXuAx491PH7<|FjhZQhd$YJ
zw!PxhH$LRrIQgRd^P&+*mc>PckqVHE;lkW;m|Y^<Ju%p)c9D)8ftOG@FA?#{3fj)q
zvpIg3mS@;^yS@&2+*kTxi{KF$-i^MRB+*Rdd%*&=BNa(chbZgC0~ikTNxq{>=NVtv
zEZt%Dr&?Rlj?n*HxpO@q3zyF!;tg`1wJET8_kgIvI!(pNG@I!2Im4;Pw@8f){8hx$
zd`7Jm82(uNXzEp%;%*z~OW@KDX~yn1LuB|#8NuYqvRM+S0C*pO<jSP}(H;I99q_U!
zsHsDR$sYFp>^Z(z$+Ana>&fheafY)_!i+f3udD{B#%TKQ_}(#3m7^0vIaz^x{KO~P
zl^_z8m4IO(|LR(B`#id*>0@~iIye^DL4H!r+45}6{-~vBijvp0x)~i1@#p`l(tUeE
zG%>+wd*GVS;4oTm<=C+83|6(f(u-k9gYc0b+pdZ(Q<*zSn;8u?8+0ODI;T`uaYXA&
zdYU~IutAJ>4Qr0m+tmK=5}0%dhWUG{&wiebY=|maqXH%ZqH12iXV<B^uA>bmtcm;O
zrUF=UStVwG<6t|pSA~L(44~zLI~|e$?0LUZDvW_@9o&vJkT5OJ{I!PB8mrHox7?~u
z%d*yl94_SFi#UQ{3l%usH&W2-_3xbaImsE}{9popw<qk*TEyD%0rg)kc0fp&XBPyh
zLu}79F{ugU(cIR8tPbX>Gx8FfcSAMV%|RXq@Pd>{^|r%&hrC#E;unv53c#gWE%$TP
zt8-wS1O}`n3jW?u#tR1y9@IYhC7-1J7&}t6$)<{nWgQS|B$67gJRdIrMLOVi%u^va
zwCO$$PgGZwPM%hBPeYEvwg2h*WbcAgE8{s<RWRZov#&#v1{NU`q@kJcR<8DcxGh)#
zBylCf==~b@a8y!qc7Wl64Qkp@_gg&L9*Rh+G>*Z90pg_1!g=du3NVsRPId}q9y>Y$
z`cdBGXlwp8D~J*e5Kd{ZD&15XuQq&$^tp)%t4>F=7~w^9eQ7zm?W`x)C#Ss<gh%3y
z{~iJQ<)liR#_ilWN>%kA|2+o4P<aXa|I{ovJmvlMR{IIYRS-_o-uAq@h2rtDzNcHg
z!(w*?&-sCW#I}VC>rI65Shn$>dn^e|<~Q#Ci?emWiF}96vB-JJna`UKq-J|B9J8`D
zoih#1968083}UUM)cdVnB(LimErMz+QK^R$c228{^(}8zmXO~v#3~H$$82D*5q5e*
z;n|Kw;I}`}Eq_@@lIcZr(kPN~C)&hmS)44NP0qR6<8zR7w{Ek3sVtnNt*nL~4!V#{
zHKehKXf-T}+#kuudE*p1z+_NE6-}WU2FyuH0@kRB<R3?}9>z+z$WFd~`RV_m!J`8z
z514+<Uu?*<66&G<bjY~&LKY$cRe+CMTc@K2+xSK2+BNCRRgNAASX`AnJ)`>jm+oRM
znL_&~GN0VsHQ?X*aQckEb4-+m3LA;!5~lE?yjtFQ^e*Ij*C!%Us6tp8nqr?5-ey>*
z>6m`O&Z+u&a^7XrB#Uu7SL*uE1xqr<YiErf-^^yxaFg+kOvcqT|0Nz3JQfHug6WRw
z4&n?!*&*+EsJlN#Nf0k?;ktW%1{|r3bNr`{w|E<~*cdw7OE4sLb%NU$F>ABRUT*&i
zuI@xGw!_zY!GlXeHBOeo3d>mr^DJR&|Fkeu)?~^6AjgaCr2-4}swI;E?(-H3m=x<G
z!nzL6n-qhcEoayw%1@s>T)Fs(hlL7QW*CNoU!fqF8(R;@aiM-yZ1>q3vK$8NlEe#L
zDb)Q(Z73CLOSy?L$Hf*N9FBM(c=&w+JU8*%@%BO>bu4xMdcpDebZhVOnjSDDez5?q
zCm7CD@`|oE<iJHYD6fV(5!GC*)^I*m`&`4Up#K@?$!0$~DLX|2p`j2<vK^b{o2u$n
z_IOEkcuj?J_Y(40Sx{i#{K<DV81*Y*e{4jHSMO48_w^kYCkm{72h7dFSbgo>)-cfI
z8doulP#cM@Xlo04`cY`EeQS+r9o$yLL6I3Y7}j?gJh^6q3gCzOdPIOc0JcUzK`fiu
zrirG%FwtT?1F$VZ=#qT&AE4q`?Cj!?VBM!_rub0D>$B*CuzSCREzAMxU0^_*7GS^d
z=62jMBDA7&(wu@!lIaidhv&+K0q4wC?jHRvZ^v8{@zr|(E8qty-Bg#ZIyjZO(9VBD
z;n<tEm&m-A0kyDOM`j<ME_UA<k}~P|N7MeW0M^a~-84Bd6ER^}vM`e<e<Xh(6}D_I
zV>{dVeI9(CR$q4wcBc(w!XOGzg(W51sYiPiG=jc-6^f)G`GyF=!8j2S)lMN*;s-kp
zlDY0-`7#FwA#3A^%^BPdt{jzMU|iM&Ey*CB=UbMQTul*}%IYX5#=3{932M^I`NTBO
zH_~xZ_P#fgIv1Un#W#iJT)dqFX1Bc`@Ox@=hBd!gno~Gt^SzGokJfksnNceIBjf}}
zq?n+RiXsiiYI2tN3Py3RP9V?CCk>^Q0l-w3Z$yqs5wDWj2Y6p_WOjhZ0aYQRR-=mi
z6G7R=6Lej?pj-LdN`WdvdAGcLpD%-9yyAFRbCgBhP>&9&sje6o{wz-dfYJICNmn?~
zg*m8F-(rK{ts_N?gx_Vn9)I(QyG1ii$9p_KDmb`R>J>Cp2M1Rtucf`%)f{N*LxUL2
zBp4q!c8yJ^!e3cqJ|KQ^_dN=QL{da-CkSj@>p0mY|6V~;jKd`Vt&lp!C~ZtppP%Ac
z10}dH+K#<&lYvcTExG+ltnhH&v_niKaI!@*f${TUJ>}KS2vdb&NGPBV4oi>2WZ*~*
zMIU6Nal@+FQW`(D)IC)O?0bTPyX`G5T=_{DtkXeV9%)p}Rojo0KbDHgq{*Tsl_=<B
zAhASK&InF~-;)(aSM=U}JoECa1ER_in7^?|)chkKyJ36A36_4XO;n=jc<2DgUbZk6
zVs2s{Yu@WW*FB>9Gu5MbpHU<*fH9O21CtGg+1ntGdLbeLuP?o>5<#Z;`^34=fm2De
zu=;2y{mR7V=DUw>n0L3;BQSY=TdXX4nw4kV+c)AjMaceqWQGI2ZC;uT81$^PrafzD
zG!<OwJidLSBIAE+flR2`2qz~CUfAk_es4zkj(5*h`To#;RiR7bY<46Nci5v?++8Do
znoC*c>;bz2;H!$)N*_7?GmD5m17fpa6Vx1px`QKBYD5jJu6<>+`8-qAXEY|J`DBi`
z=SAid=XXAQzz{@OsE~ViFN%vp4fm%@;M!TA1Wta^RF-}1H`rL=tYV6TQYE@97RS|3
zR=T*A<#76My4fmKnt}SPq>9uqPus69i%;zB8so3plYc(k_~;v@4q}dayx+lVe{UM+
zmJWsN6&a=}m^7LnuXq-luWh3mWOus?d^`CjL5rq7hz5tosq6y<?C#Mgp&#Q(0(1oZ
zH|%ofd4wCnIeS8^Q~t~HFc_z_v?FHbzJY37+<Z}IF(}ID;%%MqXvC4+uFi=1T#`a|
zcJ(!~LcPv4u=WmRdRra(E^-OZkF9I`%;}D@dE93<(u`ow*<qTR{>9;R?$;*=vnajW
z(HvWm>0g{pqF%;?5D9{70+)+Te7JZZK6U!@e6(w;EFKBjfcyk(E9|Kp#wlkByEwb_
z?(Xo_M*{0ZUkUN|x^Hwr+H1*e``2lBXf!l%=zvXVHA{In8YN0BYJVR`N{SI91uzT@
zJd?zZLV<u_%1DX`s!VX(;DV|F1n>l#D_E`J30%Dl7MvK_ymph~&GRAPQx*n%%0#!>
zgDzcu866MzQ|5?geA;7;%E4!x$|9oCL}d^jPn>AjXF@t*-@U=J>>I63gCkmCCTy{(
z^6HTyo{__#gnXE`*0psAZ!Zw(^P1+qr2)=EnP*%jX=O=Zs|=8EfrKw*SzsgBH9cQY
zz&kfPTx8tdHM?B281EPl2J%~4s*ix01${kuOuDB24~m5)10iivgvpfpYUc@nXa^t5
zt+{MqK~1UYs$8Wc^B>I70O)~O6dd6PVwGP|8*>rp?8K_}Qz(8&1k{mju=!!=IumYV
z-6_QTQ2NUG&jH|lfU#@XmJq`QX8dqit73|hVQQ^rqIbzmQs(3Sy^=}rEgTT{@Dv$7
z8$lo;5ci{iwpWNzqQ5s2<>==P0<`Ru-?-WgZPxAAf9`vF2MX3Hn4{m@^fovoT=R4@
z=iynrZ7z~3OgA<(AfZVc8VZ2TwF@ra>;&fqLbLRC*gFMHc)bTW!(}_z<M%hNGq|N3
zCLCESKFuKp*yS@`^(YbnS6gT?aW}?qJqZ$e;>L7!%xda113B=o{l=w?qE=O;@d1`8
zFCwKWPiOP!Rywx7gky^V<O=xaZ|0vd%;Bg@X>x(>js8;r0U-ozGaPbh{@5;e=pSD}
z#J0rNQLX-`75V9++PstR(A_-(O7)%U4C8=hf{`lAo#JEic?#Ge)&Quo&Dhs$jhdy5
zmd3K;kO<WZYVA8RKx0qz_y5QLf1=DiJ2+H9c;A@ALrIbYXM1Ad8JrW8ebMaXa=HYu
z_<!DnN86_o9P*xRgetOn-8DlYmBMk(0#2&5Qkb_SAi;EQ4=CefYb}13>UO7a6Wt`2
zOWGKCkpN&SnLwA$KULqH{Wc3YPYRW^-TUie)MB1i!UM)u7x0nsk$J3mul}M^xvf>L
zbB`?`38DaHv0$_ffOW4U?-kgOwYrVoumTpAk!!uXe?qx+8QP{=z-0c5kAmidGVW>$
zAZdFq$<YSo2W1kVVH8FQf?sSs*asD7Oq<H)-&By&NXVS4?G(*_`h2)xm56C=+kJXG
zK5jO9nA0i=j$*<g9{Ro-4h{=y1*a_8pI3{nBLD-6K#D|Z-w)Yl2j(t_^bJ!t2q(oz
zh5~~Wg&+~c1q`mZ6vA-yPc*Icf&t<8a|?CP3GXu=_v!Zf`ii8#tt(mC(|kGQ)gP|%
z@~82C0V1=rB$lzxvWB9=2>SX1Ut-O!_fPixe7TRq6@u?`pO24^!|$}z@PA0iou6VG
zZz~;<uOwqx{@5Mchy!8|lhrA4uAuJ+_<2TTg=WQwmKT5X7<eBkh&(<>F+@=sovp~w
zA{bNgjmvr1%(MkHu=NH9_QWnRNIuPP2j;}s$=kZMY}+0yE!iiW-?b`AlcYA{^VJWK
zCZwQ|a`%~C33*30uyFjU#E^bsN1k#3A~;4!xctVrHs9UW2?lX)_b?cD4tGDYF4y>l
z(e50^3{wie-u(FZs>e}hTVw@?%T)au+0>zOW)Ql3Z`Y@LQxmZVXtCTx2hr&blxcg-
zHJ1ply3nH9(Go)0=)xS0$ajMl_HlFj#Jn{WvS(DW$bu*=F<iPBhhjTZxmE=_+JCWe
zK}TUuQ@H1aEGv@ge=9aFqTEK0CDK*MMVT|nF|)amYi7(ODiFHVTKVsux(D5_e2MmY
z7kyJ-UDlc^@fy?$YR!<*WBKI{UgZ@=_rt7x!P`3@NoShYwfOZ3{~DU4AIi(r9`-?a
z%FEsCl+oz^VDT&#qdl|hZ^5X!kiI!_mTY_=Uz)=-6E=&I&9dK#7k%R`H>vq#xeG`U
zEO`W8@H&*8y)GuSff&ri@K|kfzP4jBHRj;ao}x<Urf*1LGJ-2-D!<U_O*}N=1J9p`
zpRdFH`Z9iD-o$|;eK(DMn#a8&>sq;LIoNU>;WEEk<zWZiwwZP^g!`>T&VvINkKj%|
zk|^5|@$}U9L+BO?c;*<GF4P7R0*E}gYsE=Nh&3Nlo1iK`<$sr;8%5=v-W~5YoBtGZ
zo`%Fr5s(z!E}CC{$_erE_PO3A99>dvY|5XlSlZ{OJT%BOq&sYKJX~E)B2Q=%!~Uq0
z7Cn*{H!D$b+Hi>KaJqXFEQH&9H@cKFWg&+^T*RBBcr}GKjt;Dkk=+2HYaedCoqE&D
ze{PzxMUQsXK;Yi~;krMjeuJ<diZkLNo*L>PjBo&Wn3_rD4&`&^!|e%OCyoV>kT&^W
zyo)Vo|HX`gbMSZFdoL!~p_8vcT-#*^WoSGQ;;TE_rTDra5Ar_=Zg}f=c4*K-d2IYo
zW!D)E2lMq;5M9WMmh~hyT7qa%HvT$6glHSldzWCNMO)qKSstAzQCADH>SCjd64ATW
zqO)v>?%n78=lS@4c;~~Eb7tnwZ|?l=oHOUlz1>c0!vg?X?EIIgGOtUw<g1$%mI4Mp
zdxR43wFccjKQ~w@B5RK(&qj1A7e4$HPc57p$;KV-Nf|c*T{feHu6^UeSD&>E4dZ32
z9z5~wZ638+!O~)+TZ$y#PmeaT>E69Ry&d43@_KzR>FPfd=prpaBdXKqt+Mi&e44^K
z85T89AkxXa>BI&!1Jx#R&D0yNtfZ5;v}zbyt1P837YFoS>6~M&){afPYwQNu>)!4c
z+tx+u<48Y`f6GfJnW|iAZn#$c{cbcO(`TbDGR4DtVCzIGe7?C)_K0cvo{Wm84VgOE
zM#H67)1JTN!PQmD?uEoRCjK31@vc0ifa)_C2FBBaL<;bB)4Z7cdF<SL+fu@-+i;Pw
zke4k_k}Z#yt&o^Yv*q>V5Lu}|#=Z5l_WEX*x{%)xDC9@zLYxfGOJB$n&#j#+B=Go<
zXL?e&f>89DS6LbZiuP12xAroX3RI_^(@*Xj;pzytj36v{JcTzc{MDBrY{qVnk<xN(
z`ht@+=_!0Lvc!R<Dppe+Ir4h6EEL%iD!u8J;Ui`CJv9AAnjuT)v>GaUMM{$+=<+t+
z9(6_!KOGI^%yFIisSeM)zQf4EdgJWZe^drM{M(HslKfXP5x;*YP5pT3vprEq8I@dj
zJ|(puuv+|gnYdy+zSY=FMF3>QrI(Ri!~Lw5+ll6)tSEmEa?@eK-#Jidw$)Ek)Ko#q
z619IRS@8S0Vk2kP&izETkbzuDO*+M&Og0hsC1J<+n4?cZMc{5c^ymQT7A(vpt3L4N
z<L2&j2|=dgx?ABbfJ{f?KZ=${;BmZ<5dOODdh?!et1HX=BqpxRwe^k4^==zs?7<E(
zz?tM*KML=Qt>qkbNuU}H-4J^*JDO&Xe_44ev^s3tQ~&l-<8C@bhM}A;>r!=#xusDt
zl{!MM@sz}D_R`3thntJjbuY}VD|2#Ey`&Z7)qdoqrf7e;Cv&>7-mrf4-5?>3<;JCx
z2l`#_B1sk8aj{XNg)46D+l~yRFF23(JW>=SCDVmOf=&<ThExT1hBISFGNVs8viW5C
zgi9ULY@R4T+Db$;736a!GNu+RPE$pKz;pzJoH_ni4{T_kjLs)Jw|i)KHORX{mD~IH
z9Ke7mxS#P+vJj)C{bW~Wcp>h+BrkPO%jrOkG#s);2jH0O=ju5kl)6tJ!Cqu(<cFeJ
zlQ*f8*3?DUXd9o$yKrk%?hMu@?-*{wl1&1>y{cg<Q;8l2hi@ug#=nU{|Mvb}*A8%j
z!Zx}y-;Vj+YfcrCJy)sRZAlSoyX-Fj+nbA(CR2|Bf%*}Vr64)2(<zbXkqO#9OQ&as
zN?MOp4h;g+e;F@6KbtUiPb~A;wNFYe6OZ7D$;#prmNK+(tLZJJa+S?`D4HTDmcg%w
z`gl%|0NvKrN!1_VzC)LEJR7BkD7gz}_Cka!bt;PcdS-q6CM+{<IWn%3IPQKs+1+YQ
z1Y9Tr?$T-R5({ImEH*+swS1ECter|I6zopPdtqCpDMgPg$kA}$_vrOCcSP>`UiTfW
z=e;MKZgaG?)KFCFCo7`0@5RR5=}F_$9GgvtMYurv^=+eFAR2X9+<HSFN^OQtD=rjG
z+}A<F>FJT#!bt5f1dtQt<Z!;-ktfW^D{7y4<{Sv;6IhzQN?KWt-X7#g-9E2$-2&`r
z;k=-ipW7}1aV*fIg@`xDXS1{?{gIb%hO(U7CcBo;2KlQB_JKstiK_dCNTb<#;>CRN
zt9Rpl6-j30L+eIZfV>1eNFwp!L~3#LJ2{+JZ~xpfrt_t9)w*H!dDbn}`>T}kl$`cn
z(Yl4-(``ss%2P$oSXV^5H%8qjHu(aGF1}_Q+XZ$zewjE?LQRUgD!x0T*-f{y^GmU8
z_8fP3Bf$4VLMH73XN_+v*w)3h#etQJ2rk*O!MS4$@!@s4^nw?`g(kU4XnEcpZN3hB
zq0Az1NEu!lNI%bB%=cV9m+$!xM+c$LCNOq0;4pM&8?u#cuyS&sb9)VMe=7;jJ*CHB
zARaj}ENKywhl*Y2r;S}_V-*q>{%FsoXEnBui(UH`Yo`&r*3ZPxBHQqpK^3D{(GefI
zLKz?QK(t;kWW{Iq10|T@(~EkWjgf~<o{3sT8zq`Q^7J~<Y9<7(CodIg0-NO(37z6E
z^)>iY^axNYod=y?!b+CL2r69%Vx{h~gp7f@^_MI@i~cn5nSRV0X5Yndctz@YYj~Y=
zc}uh|Gat?0o&6b!IWgP^X8e%aZP|cDu?k^;aWcqPQ&q9ejZn5h?fsd%no5-%1^x0W
zbkorlCk{~;%#_(~**iPxtZa+#q?2S=Y|TD7-VK&3mry%}CI4_qKft6KtTc)?>L!<3
zCA*Y9pUAO~hSn5PPwOW8J7(yjpH+)$RxhK=XgD^_<g>tz)CJrxc_AaZxt}Z-IN$3s
zA@snHZLR)RJK6PFnaeRTG%GTTbp#c;mn>;NP}a=yjsNNbcYJHC4e?$qYpoE#%a*uK
zp7`GBg?s>V?#nBL<*%$MxbyIWwk|&<;@Y%icpu$>9vl`f3mF8V1|Ay-<xAG4ktztD
zk{K~JQZ!Hy!*Dkc@Od-U3D;NsjR^t2Od`dN$DiV|EN7w>OBd)u`}PJG@5p;^dc0j8
z^YHR;;69$8o^CuEkMhqsG0^4c<5lJ1<xMa@$&RUQlyo0IQ!BN$yDsh@FHQP%8p;y5
z4m%F;={lCFy^eQie6UX$m2o&(*c-hIzqE}i6FV@{C0FJ&b%Pa063&G#hPpPVS=%ZZ
z#vM*ythL{*M>ay3mMcH0mgb@JlqeG7Vq#5kyA=hyM9<ITf5m!LUV199d`@J4N6FXX
zJbBg9Kl3B_62DUXs=U(HVLj(znGEZ$$vOEu2W|=r-r)YbDfK7Y%rkb_>u)W<{pC@J
z{*fil62SQ04)dD!B4lQb0cjs1d%ND`m7nlT%W<x=1@v9XOx?B6?uD2wTGG6E@-Gj%
zaw~c?;W}n~!JpWRIZ28Cz3%!b77B)iBb0-F1EkmA)B;a}7F&T+o~?xJI1#%jfyh`9
zg<6nkch+Qd0BI2l%Gk)_b9?{rgDUh$h4B=Gk45c~U6GuqGD65=m%<Z8y2~m1Ijvs|
z|1*%KCVw)hbnAD}{-E)13#DetIdtc|EapZbd6(0z2;wHBl+4361c+-XKeqUyyqltA
zUp*6QiKbWyL04^rrp1YLvs%ZBbZ2GCAV>bU#%i5WU`t^EhBYHV3`iyLVQKERp#Wo}
ziSm`P@^Mse3>aoUEH0Rg2vfF6KwPK}qXohd9#OsZ$~J8X%&Hu1tstWJ-l?3BpUUbs
z@m;^Eh`@+cRk4|Zrwk4>{l$1H#0c5=&Jw6*dbb)x9lxecFr-KG^ylGT(~Fx+uMKzE
zXlu}8mAS|+IPh9HcHCRWMxH4Ya|QKW6>1Vt4}cFpLw|3$<@0a@oU#;y1)_Y#KW9sv
zN+Wvha6qgk%9mq7Wws(v;n*6xOPC0tKFLFwkzWj(zco%#3a$k?f&23&Y#>Zh{dqi2
z9U$LWTf2Yc1(^frOSc%If~429bBu19G5M%a)LHrB5bko=mA($u1w;yBt4I>aO#5{a
zYP@(mjI>4`$`+bUS%>0w4{q@J9;yx~zEQc3(Ld_(@!oUtpuj`hE}=NdOrx=HwIFK5
znzn^Nq*20}w!A>(jZq1RhX^r7I_c*@Y*fS=$;?KoR|LX^^ei}s+_*3CLJ$worfK@X
z-E@PMq;)yxF&=^73VrM(qJPANC&x(%Q@%iL!EYLm*OVKKE40awmEWpKyJG=i0ufOW
zF&d^QeXRyL)o5@<GAAokJOw!WffH|B-8m^hX_1?Lw?gz*k#?Y(q&JLGzLI+Z;8mzg
z`FIJ)QKy{T2|Z?bj*LrXHQ?(jPxlLo=o!IRq;`-oj}#@rPFRUJQA^ot+A&h*H_^na
zjLYqP$00M^)R+M&Y&PYu32o!>kkP>N;3Sn-`4$}MU4ckAbzOnVUBFF!&z~DJ_4pel
z9CJ4@`KSjdBa&7+j0i3xW#tRPg$%#mK{ENSS|BpS62e6LUmXz(3lnO{ml9g1agwQj
zS%iO#on<3x2n-JKUf9(@ok=Q8)-s{a9HI01s9F5gpm(X0G3dSaI43R)ff+LQ<o_&$
zO-xyqa*IK5UF75#J0{WCtxr?=V_o5Z6RjY0Iaz-m;59vYHxZs8JvkP}C!DHCFZd{Q
zt|+;d3}ti&=@-m<uorY#f@)=TGmJUn!Chj%tE@uc^Y;2jek2oLUR`kF#jlmNDDhu|
zwhft}m+1(98XxBUN*zCk170lA8&PmQU|ZP+aH5A33W75tZ;|u_EmBvu$Jj$`Qu<&e
z>cpe*aZTGzATk4IC8~xQ1^!FS^14?t<Ujr}^D#sMAogFvrtwshr2Qf9lPEeYk;DIV
zDpGd=*!`(%T9FEmGA*A*AuzR;^QPcK<qw{0))Vx9j0f%1n+;81RIeviGB^-pKfFuw
z)?mCNnAtAYQ0!luL=>mV4Th7i2627R8JD}&z-2waZVj=CrOH?vp0Oj&uMbY_^lN|5
z)}Rj6AW5}}HMIUS)@XJvmqyq*2K7^U^{)Ixdp9eP?2qILiDs%uD#gK^SP>q+6s7L8
tTtoT_o&N*cd|1c7cB!+K<o|*G@LE6Y9w}UYumC_z>MB~wm5LT2{{!^Nmbm}`

literal 38253
zcmagF2Q-{v*ETu`f*^<zH6~i3XY`&ZAqbM_H3_2|jNV0$AfpeXC89(RiQYS7h~5ce
zB6{!UJo5hUI^X)%f7Y3`GQ`Y0_x<d9?`vQC+Iyn3pFbrhWh4cGK;&wwN-sbl0viwr
z@5W6c;4h}l^rFCz8}KJ;uWkYlznhkk!2d~{RSn@F5E=FLKfK|&fD7P{5^l-{ZZBa_
zH_z9uRv^dMZq5!cHwW7{_dKjz;kGa*h=8zwFyB2JH#cV~LBap`bpe>Gwcu?i{ap~~
z9!O2;@hdOX#+<jGiF+e%cY^zTI@hp{({@B;PQR`~HK#`pUk~q#vN9g6_GnQJySyNe
zD{a~AdbnBHQrj3A;ZOC)>Qk+shK7^5!iHl?*XG^SQ$%KtkGmz+@(sDyG`Jw%@@#Q3
zQV^^(){kq`&qoeh&kV<~ga17c^z>^WrGJl(&-j}Ee#uSI4(9sz>aco&0u9ymV~z7w
zc2wl`<AD0%(0@MQLqcmK+=t$JR2czgd1{Q_P=WaJFP`c^cu9UXrOQ_%Y15IA%Fx@J
zz6EUdemqCq)VyeiV5EP4pkEX+DZ-|?8aYLFwQPwy7w1qqzluf5{*h;W3q8!4ENgDo
zfqL+~I=6p|!&yV&ZQpT_*O}F52g}^IR8d4%-p>kgCn?aQGWO|hTPz>+=snV3>dyJm
z@$m<w|D#vu2_uCH>%Y+s(a4OQW=mi6#xXSc=hQh0z5vIBZA3FX$JE}!&gR8Vr%pLQ
zif+9zXFs34+P;bp@`vreb-dYmrCirONT?|vgLnF?0O`*+yeo+{bH->=v<GONEi`Aa
ze1}v@LVpNHUd}C}&i#1aKq}{;9sw;)6VljkiWS>pv7!V#9vFU#-2izgwCo(0DK%S-
zlXi00g}JZ>?K_p8-Cvx^`Ji<s21$oO)dx0Gk(-BXffzL3a79==d`k>F>nUwNv3HT1
z;UBhv>O?m=LIWKtaJ(ouW!75L7srP~pW2C0@VhOqETncyZg!9GhJDS~$Q9qmwGZsT
z+r%y|7I7D_+~sH@olIUQf~!f}qS7M#D}s~3dowW7i|RipUp|W&^B~><KJgWuXIXEd
z=6C`47q=O1-0C=IZJ<<rZ(-A@mxz^kOlb7V1+xjt#Nz^`PThw1_Ms_Kr&J*48ZT#B
ziddk@Z=iu9l@B4(mQYyQltSQ44D?4Y`s;uLe1z5u5)G+@A*b|kc4N3y!{p`?+dqrk
z0}koPP4P<HgxM@KdA-}a1D%X_-^hV-V_pzaDH<Rnjov7NCT|69vE%96N1SBslHe*?
z@SGghA5uD3A~!`UO`%6)LCaV3=q)m@`B`;qXdoyYU+cX4yzV>0qq`m3I2YWpo+ETS
zb?PAd>XUL^FZvO>xC?!DI@oLv<sRT03%pW<)Gq|0?4a^um74*SLxX5ONegrRbi=SL
zNOI~_O^_pGpm-Knefbpsd3>=lWipZ$IrT(d%i&Swj8tS`5R!in-nrgV9_0Ao>bPtu
z@ai$dc)h!P4&C&J{sxXX@y5z~6JP~2t8isEx|lI_yZJ~vdU;_L8<hb)3JaP@k8u8R
z@d0|oZ2{Ur1vUs8MYAWYl8YLxVBa6o3GwyBEe8&4=-?_}rA{SMT&0zP%p9rq)uBfV
zSKE#;NYkO#^O+Tt@dnBle{yW>_(w(A=N7x{cHRqchVXX7ve_aWQ_GElk74$c;ahJp
zwFv?ByaP-|z8!<a3lx)2M{slEr!y313+Z?9B)3|Fj>OZZ<k1^XuYPDkJu-1;9{ZRm
z{O0ye*7}UWgYQ8yaCo%i0G1P)3|RW`t;8bsm&E$X#mY<Q4|1>G>C9E(Hgxi7g@8yU
zGtQty<pFX*-Fn{nZ+(#W)o$PA=71s>e<=3Dl1?z4GP?LC55fGNxe@3UzG4t1!2+c6
zxO#V73*S(I>SMI$@3m0xutBY0z{`;>-YpMtvf^j*^mv>KL_7ELrP&zBO>WIPLf_7i
zTw)c-g&1n4aJGZ6Amd9fE|8!kHuhIW;>N8SqZtwOT1X3xg4mGRcB^uW>f)nCrt#i#
z5C}U<j+&R=-F6gN-#C_^fF+R2D6cPMsj8lGchR=+;l7aVUd3e6>@=*cT*(JNu>j8>
zd);y74nk%ST;SJ`2wGp@c@3kf(Y!}=_&D;FMN=8%jGoNufWN_w3z}`;N5nP#XaNW8
z{R>iwAkVOiN;ed50GVIgAnnT_PE75?MZ4$QbA@{|=43J-Qpj+#T~|xN@ZG(+q!v#;
zi^(^>C^#G55q`>P_qi?8z}1c7_Woct!T?L?HrZsrcbxaPVESOTYInYU(xKL?CVW#+
z;_{{GT->qe*sEMzmwc=Jy0xKoRvCfvRn#QcJ}GXLZf1e-iQK5}844b3*?<~Bylvs$
zX;Re3&l0Vq{B8pmo3Cw=Y1p2`wbw@qvDTC-n@KSVrmW(}Enb?-5%3VoQOMZj=cV6(
zJG9i-5ctv7jK+QWypJeb{JrhhfbycB=UIxyXbIft!cVEL9MoF;J=UrqJ1=Pwaq(+(
zfq->CZeslSOy!9uhNnOwnocWC{MUtOLqgH^Y3KBL5KIDVhk_RcMeH>B@sIuDM83e9
zgutD`;9HT+B~+;v&_YbDDW=x*VnUwiA|LLQ5IB6S*-(TD)_>EqjFSF^;_a4}v^o58
zs5MkdxaEOifnyqDJmW_*r7Td$LpX-T4O6nQ;<;Z3-%`Vxe1>l+Njo+zv-yhdNH3l}
zKbk7M7l<p9#L}HEH{*hy>*ZQEkeb+-!6sD~n=i1)7LG->tK*ZGEzJ^G3P~)*a>nG~
z@v#f<PIeYABMRQ!Casnhu75EmferYDD%y|G)vKcEKUH`S-y+7;8ewOB8nzcNlF0}7
z2+M}fle6^oeqJfl2N78`Ag!dZUr}(F`w-otF7A9%r?jS9{>a#?{iY&<S7pinczX}~
zlfSz^Sp0^FZ?ELBpF*ik8F7iN+00M6G8pB_`3YM?LR~)|3ZCVPv8vvORHm~Pwc#{E
zb_Dz~;*PhuwcQmOz;j8&8=e@#BURi;bBlA^#rzxWEH9>Z?CQ84-|J490m*?W4!4fo
zdH?lk2Hf}Zr<!_DCH}ON2j&ukdoCC58GLSrlfGcqj@B<G^xfc^z-tDqyJ_xs<w2a)
zNKy8YaW$|yMFMNW-CPDc$4YPhu93gNd~Uu=I-<?zM;8xQkyIu$ay$8m%UTn{&SEbF
z=Wj0t!AqsG=k&6(V~%lnUUUYFo`tx2B<Gp=khhntT|IgO1{LcH9K0PE7<~S`rT+bh
zrDx0XvX{PwQQz@Gbz^7KG@Rc|@RuIMcMOf$%;x7mV$w)2@ygoNAlyHPot~=+$B?D$
z_|m4vcoKR^?oiM7ww(u!U3DVa6~H^sU&2UtIOerwcf#rBX(wmi9h(a5EX=sJW*#+-
zv<EHGo!1g<cQk!@97N?;dUDUKX%aeryZJ6lRanpJdw>4tsZ#y4oBTK2hj5j@kA7Uq
zJ^40mWsHJLXTa?*X@+CGxZkol{p6#$IrA)e*!mTocQDRV3X6fygJpD2uoX~`AIL@r
z=s9{rXx>}XEZTpQ#LMqf3>LV4r{o0m{9fFg<;_-X4I4hwZnzfgQKhuB%}j2k%*5Vg
zR27@Oe&}D6jvtWlC!XKyZhkm#$y5H>N)gxB?}>sh?}Q7Rg)NLMqkP9DJs-^LrhRWI
zBRG=hvk+I@t!pQsc_nmoy3K1>Q6?sdop;0d-`-Z*Ra@N6)7)(2jASrkt+(}B5x&Q7
zVceh46??>Tux*|j(HQP&wfC&yjU%lPQCa?HTd!q-hV9>xLGy`xSd)=Y6f?U%jRd~N
z!zg&*?t<~;QtGCEel>x;@do#Zq^CQRqvx_e_T2z&nKZVvO&ZUO0>(gnfH6TVyt!#n
z?FvMA5UxpwU+nwgaRp5pwu;#iYptyR)<#gIVM%JpprL35Y`x^Z$T3fdjFovOo|N4q
znL>CGKaS&S|H9inue>8!IxTrji!~vc|ML#u3QKtdn~fz22F9%Q6q8!f%-G|vzdc_!
z=SW%K$s(=d`Hh;16)#&pU)9Xa3Kh339Q$hq^*|ss51O5y?lgUjfr<?^L>;Ypk?ioy
ze|)-A_ovwb+J5OWuDKjn?^bp%(5uBf2$pV`=1oRk<-KqVqW%b0hz0DB!Djzb#y4m-
zPc7f{pzNxmdwHPH*2vL;RvQYHgo@ofc@cfQ*~+9oH;yGeI~E*HK*1w|Ja<-|8~>v0
zJJ!|CMFN6X1DgZLE;<^b{y^v7u%?{VpVICSLejTj{X&(s&}5<umNM}ZvHjIt(>QUr
zZ~SF=sZ%Kz+iLHSnvW_A9Xm`3WMagRdB<?izkAPsAnM|<sm%bd)8e!dcqfQ#$%Di3
z8eA+J*dv2$mmWg;#VYX^$@u3doJw2j@tYWH1{-{9H9Px*=LTq_ZDjqv@+@ux5y$;{
z5KjW@<d$FdgW&{HVoMCA+optQf|k;*nx;ux(DSf-I{Ulf$GG;kpv^TeIFtFts`VYb
zwEZd1AoJ9zhylKl2Eqq<pfx;A$gaxw(bOr8hT2S7;OqjJVUH_a0^}O=8^fU<Nywx$
zBwoaBxI6J`?bfXP(K23l6TMfPyq0lX%0!t+rBcIKTLQs+H8D&?$=6*`6?6CZ9a@{8
zn?BhL+kKt5UjtcQIWZm~J9NVm@5EisbsJvCxp=k5cr0n6n{RLDQZyt~%P(epaO^T(
z`TSc=tz0f{BB_z-M0TDz@n{z0APm#(n%U7BLc#5Ksav=L3ts7mZ#TU6j6VSH1jz?5
zHw@}iIJ8P5y^A}~n<)MI<>&Qn`6m~eRkeI+w@J79g0O2RgbCZ~UO11G<x6aH>Pz^;
zD?C3c82d*>FPwMa=*-IVMSi<o$p&^${Uutxl9MotyJmxzdJU|HxKEck{MAc`k(;}5
zb3hK5TEM<;pu*GKm+#*#g~AS->m^w>?rRg2-w|Bk?@EX5c?QnFc3peK7R$V9d4`(t
zaI0+FRlY^1a1}ZH4**e!!!rjUTIKjcd>%{~d=)DmxC)wYBc70enZ4HcBZ5ti$L-Jw
zNd(LHE<B&(Xs~5in`(Ikk&YGjw0;1*PqP${P7-_MDNKaWLsj4GVriM!bya*Ba6&X6
z_F`x6XX9$gu#b4Y%ECctcHAb)mk1UdpR(iHr_QqB$+3e!-?={#Y%YN{JF0f~2pG-;
z%pS2x1bk2?py`Bp-sFr0fcRj=eB27o&<g5ULVmjsu7X#*e;N)HnZw4ZQ|3YDKrT-A
zoOwqnjrHk_Ke`&bqHXcR@{`^NgH3Y@$$)zEQzU#VO|^T7wYj@|?6w{i6^YqoxJ23%
zY@q&W-^`|iIOM$0Tl}QN#{T)c$R4CND>Xv(CGb>Aie;`^Se{F)lGR^~DPtj-eJ3vX
zScxMsAa3tt2aulxefxJ*Y$vvLfHw==G!9<XDwP|0oIr_>Tb6-=rl4XHCcBZU3!h~;
zz=%BH$&|KNmzhlBGTb|<%0I!13gLYH$$x8&D0PC_p&n#r)$(g=Ya?AB?$Z2<RLX_$
z>J?9Jqx!cStlWToMafm5A|4Ho<i}9-DB64KCLrP!UgJiO<^{l1Lm4u6dhs2J^A~<L
z3<Z|ZVrR>CoKSrSYa?@ezx&(SG9pn_kQv4`hIiRA9MtV^q$tTXrTHmM4_bIN_<EM|
zC$ea}w3Ty<>JlQrk{!Db7HO{jDq}$r8Sr{ALtNy6Y@Z^S+IFsA@$mbQ_QHh-r*I~X
zJNd8Z7fL=FlRiV}4IK-ykJiwn7Fh+bSJ894=%7ap(uHiqPNe#L;9>Cxlu@d=VS<#B
z1@k0Rig>D;A3m!Prm1RNn+;Xpl(0!rBWu!4Pxa3tWcI95RVB3w>gvxHl}boO5C*qt
zqeB4&p=a&y-@gUL#kqNTT*Em^CeNOchJ}T3aB-<fC1$$Y81J_yc6d=R7sXfzfvF!}
zs||HFmzjT6Bkn!1vf@M{k$33n-$D@-+;33uSGu}7hK9X6zuND!Jg59NLRvMN>qYKz
zCyEMT!l9(Z@i3zpwld^mTA1APwXl#~T)bL~-Wmi|adL86SXcX?CDW*&{$T{n4i<hw
zm=>uj?ocSjx^`RX%+F;!SL=#(DZ%KDjZvo#hu*aivKC&V+YM%U7kOZ{tLFS?bTjEi
z{L**TNYxM5g2^zs@lg@6rV^NkorGfBritFj?u`uhI+Lw~8x@6y5+;#UgbE?>>_gm3
z%Cw%lMh#xVpP$C%RaGUPZ#Os3d31JmYM>+#4_@X8`JWxIekf0g*214yoFgMIQy>sQ
z>u?a+&_2Ds!1Ktr(SS9b>}_lok7i)lQ*2g#dHKh?5U7ynmPuC(6Tm4_%ma`O4GpZZ
z-+-&^T^($PB`3RmRZ!O&U&~Mx0X7h+O3%sJQK1VQ2uR5H$YH$HluSWamzIW0<IjO?
ze=`4PU}N#|)ba80sq%qxWb^{JJPRr~%N5$|DH*f9ZNS0`DF(p7@H~)<{rdJhb7A3U
z^Ji@J!leh#7QD|`#y4cBhy7!jhU0wYgqlV?n8ckdM+<a7uU@?pa#>OWK?l;5-QCxH
ziR5-#2$-eZB0qilv^qgbMrK&&W}AL44iTu|c!d673~Nj>cB=g%&BlGy!A<2QSoNxz
zSo8P^O;o@?x8RXvk2SI|9&_QkW2r2~jEJ17bLS-_B<$$)CUAZD@L{TmEj36zna}pv
zUrT0JX{g(4^ZNu350Az6e4~la+LsS^A>m{KHH&EcI6~+UtfvrP6F#K$Cj37qwMSOP
zuiO19TdnN2hL*lWVoHiQy(I_;W{dd-Z-s7I8(<}eg{H>S?|psGNJ&YjWmHDi35aej
z{w*qlPJOd_IFx2d85T1C;=GV?iH90;t)RK-ym&WZ3>a{DqQvO?`?NGhkeI_{OIsLO
z?RL|7P^|CJ-}UvW^4A3E;;_8z>>xrix}Bc;aaDGn8<WC}OiaCK^vYz#Tbbio_^ki&
z+^<uipFF-i4*o1VTOmZwCbBkQ9j)sibJIcHB{=7-v})FKO>8P@Xh3y!sc>m!q8pFr
z`)gd5y-t2bNc)>Vd4f+VcXAJK7s<EXvA^rqOW$6cylf1zva;Hktxb^iJ#xM}#X`LI
zETI@wZ<4-QFG>>Lc8d6<exxn2G)GoaNL-@wZ&Bb0JSW5_ZmPbio~5s&)7qEB3v6~{
z&$qv?kB67n9zOF)IXaCkB_+jEx+yh9cC?;;8o`pw<x|Z*#4LjNlg|o-hZ7G*^KEpv
z#4|QMQ_~ovdyz!K?OxWum)<t^`wwG*GyjxzeKt-E$?2G;zX{CJhHPuvN?<Sql5n^_
z@l@(VQwh#_dypVqi`M4a=7;}muIS<<l^ym(iR0ZjQg#6<3MV;HV$FZM{ulGN`ZWnG
zidJ&~Z0qP$#JzU5po!Xb;pn*AJr=_C-%Gp<&I7!jTIVvNJ$L|=%hg$=Kof=c&kNZh
zoQQ@G9EaIas+PYKTUz-SneX<Jq|x(I6Z~5~A$gh}5!rv2f;+Bd_u+5?jf7qiz9z?X
z;Web>y{@%20+0&Ko8NK1-vS*@Sd#R4p~$TKDdXRrKE!BRPyAvmN#g<|MjA%1xXOq%
zuV2ml_o|%{kL3*B;io9o&;vFi2Ikpf;KC~ZuqrRlH^*_R4A``(`~HA9#G(bnrI9-H
zy1p}-9^!Lg^Gsd67I$^-%Tvb_|8ZlEa<hdChPXZOm?p~S-(9JR7tyUXF~J%Tzy~}h
z^Kx>6XJE}4)|uYnqW0rEe?G^31EQGqz0msu!El0fHTCz{)Ya@LC0W{QPe@EkR`oFY
z1UR3Z<I%=cv1vVB(ATeDpZj*CnK-u+uB@!MO<u=HVsi3}7cZW?ctO379kl+{7G}*1
zNB2IE91F=~j|J-pvJc}$Mk+!0zY8|DK|I^G#&YJc*-_5QSVdBCr`etYozI|xf&z=B
zjwld1O*9%vEP9)4UW<4FwJY)2Pk=}vR#sLX7$o}-vS@E_zH|+5-@UZ#&RVhbzkf&`
z!_l$M*q~bV0swGVqHt{9?rL%ff<lo4{KIMP+2hA}E-q%m+JHOfswIF7zFDeqD6Sh{
zBry|<Zs9LZ_7Z&?298*X_xhf#EexEb$of59zcmG~gr2BI-1pbLEo=Hq+stg<<Rf0O
zLB%`o{Z+i2wa2^w0-Yai4h-`0@;>0<k!UY?`!P_k>SCIu#UgfmJRvEWVJcNAcc#t9
zyA!>dIwb{}v4ML2sPHeiE+k<{!!Tr07=)E-&9^NhA{_f=nb~3D#rZJt|3Z}X2nhkh
zZ=t$Hdf(4Ten=3c3cn>{5_7QFUmX}7AOB|2ioc4<U`igOe*RjXt#8+7$komh<ZQAJ
zM48bfsR$&RIDo<g?*l=pc%=(}nm?J>tfVKM5Zj4z%@P%&DW7{!d!%vb$A*3|m%o;`
zw`5FAj7Fvm(s#Qt#bvo`cc-0__;7x`tT_`tD4XKGKCWY6&;twDPhWNXKj;z~dV|$(
z=dnQZg=<$VOTYVsY4^$Qa{Hl6+oz|Ph<FIQY}N*i8Ejbxu&xe2N|D(bpD6>L4njNN
z?L5Bc>~BJRu+L_VC*A&!*}NW9Y~R1|&9Tk7y%qvszd^0*HCHeV*!~<RH@)>kIhsyX
zUVxYPb1~mn;>iQ_skYMt<Wq41n?ipC4?TuGmaye02%B_eAKXOlH;aB4tlUE1_TMW{
z*{WH7u`Bdn^Zi;>BwXo4YwfQwh@$+R9PiU1IOJAohbW+1;|wSz4%kNV-slUGPc<w`
zMQ_kSjGdtU$D1<~%a7lyYDh23yh5b|jy;UEghIasT%M^$Q1N0e{CHq@NJMoHmJ7DR
zxTt@(8D?*S-s5M|pOq1w-Pw@{f^WIsBkZxSy${eq6nrEDKC-{R-+rewvLl<dSFzK(
zjy^52s>m7>v8RGB)aC);N|fs_zM=UWDrr;B5cRh7gR@uBVlzR}cxVScvnMa;k&&U#
z;#P8A*RE~IxV=j_Ogm-=mt32q)I7U$bEV)qj=z`F+F)vZ7WZ2!Tk5@vXd2%_kJAiC
z7;DY(<laJmw|s7i!AR1V0)7fxE0htD5sXnGo}onDv^oJ(6ZmZS72t$SOz^+B)Jyn$
z69~;xQ1US{fW6v88Hb8biSB%_CtxkZo2SPx0H0rvk`dvXFz<E22|pvYMD>;`aS$E$
zoND`l#{(rkfA!ekBe|w@P=whvrTa@hIQHTkJWRF{^QLBVNj1g?a9HFu96}N^l2w}%
zZvg&6ZRq{wPMs;UtQpOks?y4>Ye<w-gm&#Txc$(xE%c{~%AQ4VywK{aNRC!|uPUMg
z#6LBDyX(kQgE^2Hs2^XwIG^gSRuZG2lDvP7hflI4wA44FZxrR_g(D%>kARfe6~{Ix
zdptYhH1oe8Rj7|=Meuu;c|Cty@O3Tn@7kTUMt0^L2EDz#4F|*O0GX(?8+%A6XodzL
z_UBhqMMZ+@g#hV6nf+V#<Hf&zK4r5SNE4-slz5hlb(LX{Zgo#W#40Iag064$HV<ZB
z&vo`|*pu8iq^z%){X{8Y_SdiPNocs-&R8M$@3+@;Dw~?JxVyUpj_C1!rE{Cak-`zf
z;W}uFzl@CcS>8vwy)@Vk0cfoLopbd~d)73iHp=A7pzp-v;ci)PUAuN-kPCmR<^X<q
zSJdtfh?bTXpsQ3thLunqQ`5PxmjskDEu1MPPk<X4$<uth+!Zq$aK4H1Kbk6;-kPik
zxn_7+famj>)>YNB6>3Q&3!~!O%d|~jYCZRPk=qVr_5T>^^76~k_xR5ToW|8oUjPbJ
zBLP!*uB=R$z@>?A(Hqan(j+b?mkFSn%by|k(xx-Jd0?GvpM~7F+?DFR3$-5|iH|-e
zn2nw?DdkT5=UkyGE@pK=Y*A7DUx*x5)<*MNWo7N3PtgVqUM@xWCBUMaGu7hqWk8LR
zoqcbV?S9@UHU=-->6noRNEFmD*J(Xe6<`%GM|p-dAtfL5SKXq5!d<8L4X@jY=mbo9
z0B%O?HXHH-wjf7ML(_}Ftae1v$lxya{g#<ZqR{|ZF9+T!2AkIaats+A-y-5EE6HF)
z6z%C-JZIkt{FJ9+bX03}{M2ADHMq0Zc0<DJ-L#J;CZ#Fo|AWgdl$zGp0QYdsc_C2r
z;;_2l@2^FrcJpR>dR>?dX*E^V#a3drmA!sJ>y60@6VEyK4{Y*sD7FWGzpRjdnf?;5
zO*<_a1s;Zv0$u<t>b(XqSK>GbSpNR-ugFkS;0Sp2OF1BrO7-n@U=dj3p<ewAJ-^ZD
zMNM(>hi~7$IdxA?9H?u)3E|D&83M2k-~qYUtnT*R<f1P}U$zTMOS{l@CMY$pLv>9{
z|F`VOF{Xc>m+;T2?`BR^jWz+4VKN1Y>^(dD`(@1$phHut?1k<Ti+`Gcj0&F12Ab3y
zN@w|;@yPgW;NnpXkhf3YR2*Awr$u%F3tl&|Ih4>JmS)}Nij9=OcAl<L#}-+1q*PHV
z|KhknLeLbTHi$?&RkoIjnp>0SPPvNzb-pKnsb8_mlzur<=6G|K&s{0%RB#P1|7y3}
z4>7XeN1Gy5IS#bcEoH;znMJT*me@!)#J}w`W^R|Q)Mx?elj9Na(OoA~yiw-xzjFwR
zr%H9I*G%(fOwYcPcB3us=gp^Ju)pqeBWhZA#RhJp-n$;~&Hv5<=y3eKMk|t>;wq)R
zNAX^;GGGTBkG_p07|a-~$WHBhScU*~;LB@NdBf>__wQqxH6<4pVH)n|mVJp2G_vFx
z=RBh61)&FjeggE$6?b`jY!UjCmzzH-<lwV_ztDyr;KPp8V3wdr)#3-eTG(1qUocC!
zYS}U18wn{X6I(~&;Y3c&mmZmcSKdHTh6IQM9E-7<t#OfMusT@FO1`P!eq`Y4Q5Neg
z0v^@{c0ff^L3MhUSwgSB_3d{Ag)+wmbTD&Z`un`=maeX@Mye3I<+<4phr3_Cd`V19
z{Fs>7adEnDSmXS<=ZLKcuoj+nk|g6%SwtNTRjjH~-IMEbf?aP3FsLm~C+`bZNrG{W
z%p-8y7v(2jb=>fbRXY5!=iHcWeGHTMBfC)^&~>#Z;Q|I#LI*z8)z#^q6SH~+sSDKJ
zfGmIO7idS<9F7~;{!TVF`abu+NTH|vi!8)4y-ocK^}B0~4NQ9U8ZpkVz|?%#q>fz-
zykOkB+vwk&DyBi0)=^qwT>U~@Tfqb+elP#ir*~=zTzKV{z3~(a=hOCOI@;PT^Nqf>
z0Kfwgev973`SIl2EwXQy?6JnwD$aObWMRP3Mlt?##NzV6hf%v-zUNiikp~|veCpbO
z&~B)`M_^F#h7jOr1XA1AOc7+!5`<^se>lcx{G(VmSPxj@XmchV=ql-nHET_$pl)mV
zypS<w++%-0AVR6h@%5kcRD_;Lg#XT~1Xz4BLp6|T?Ck77lR#YwRPrlL=UW6)TaTqZ
zHW&cX22c@Kpt#KR-ICk_UeVixqElPL$(be_ydgk^%V$s?^pw?`Lxzp3%QBzS<&To=
zEff%i9DO#3+f)c421HgVt4Ngh@87>g#Kf-i;#T9a8IUHr+bQK|*Uka<-wsF=gDbrW
zNJ%)CzL_^Cd=+i`{|`&hm0WI@MKAtaX@B^?FHAD5eN^I5eZBb@7`<rG3i4WH$C?x|
zrKV1~w^ZX1CrV@Grhm#E>t$JXj)!P4jmzJnidHQqdCE$vq830T*i@fV7p_9R?9Q+8
z=H>rHGTsLduFXET8BND;7hs~Nr($SmSp9Z`9E^wn{L-7Zmm=*Z?23`XF&GsOnfelW
z@Uc8YG}TaM2fj#Z!R*|KjIsX-<=D|&^vBfhwxmeSY-ipV_1nl1H>fzYVE9CbFrY4#
zx$^;tj!|)`A8Zn=@8_Zpq{ha^QVmn0E1S4pc=L$MMqNI=#3y%aw?ptyWBThL!7-Rx
zO`w-(fw$92=O{;T0Z{MTEt1vZsVoy?;(J|rUq2G@f`uPAco#r4)i;~-y=|yJKY~CE
zn*;oF+armndKJCzs%^;oP^ZWf@tIvydQ}OsDtrsymi=eA?+TxTbf8C+5Z`t-{Gs>A
zxi`@M0f$-~3jWNsow_QrBe8rX$N&7u-Di=?8%P4_fM4a7MXR#*4Qi|cWz$n88umbu
z4ZA?q!pH*wL?#0Q^x+1SV1l4ota2kc2I!EwVl+K5np%+%(zE<W?lwhG0GWUOmPyRY
zT3CBtIZWN#uR8pM9Zdb<BR1y(Z3#Nk%?f3=Zp9=mn)DFRBZkPVgMSe4kb0T@X{IZn
z_Ue<=$D(F#6?dy^Z!!)Eh5<61w{JJ%KQ`d=<}m<Z5MWnI;P{BWM}T72eboA^?>~p-
zOlJebzdztmgXF~956(BL@}}Olqk<+!OmsFEe$IgAbH&FqbiWg?4wHLB6(#z*SFx2*
zSOE|*yd)1JL5)|(w-GX<qai<VHvD%q4$Wq3_cS~nfMq!x2-D!DtS2%%d^-X=HSjGK
zNc{wQ^=u%M@GIJZeCN@QDApvRD6pgZt`!aO{^-{XRDt77{cCRAmK5<$6f7SS(!E|F
z<@4xHJ&mL$Ab|of+1MySB8_DRe#_0xRgy{+Q+`PKTl_eC%uBo|Ry~vjfQ?U&0g|?g
zGQPoQw>v$D7suipfupJN9*I8b?_=RJYK*qerw!-Oql(h})u-6XAUR|l;CGq%8MU6p
z7Zv*K^=tN|q@){_Hk!riYHB&<<;R(nN?^)EEp@OQrh&C8)RIBM>{=KB|G^(X6M?n~
z!a>|Y)xp0pwXDsem*`)Tw+7X*72?%^__3zw^OBXcs~~v(SuE3$9(?O&i41Q}Qfg|?
zKQUG%i-zBuM*!2KskOJ79W0J^Iiwkyyy=24yZ>BCYIse26fN+m(mJ^L*PNw`(0mMC
zGhwk<<#wuJ^G^8GL$1{Uo;Op;1ZfNG!^8(&K>R$ue9@!v+~S`O4Aj~@jQiRF&`0NK
z;RcjQQb1S<{-{wwix48sBQCjh@&R5t_LUi$3^E}!A-$07sBr(2R!DfUGvzLGwd&Di
zDE5P!k{>KvroWnlNxgfY7+M|>C%_Ta5me9*#7*ypJmXLZ(Y#Q1!*CBFCpd#NFYX-c
z$a#N<t4Ru*>4rJ<dc^n<+1B}%Xa}2=G%9-VRfiBv?bG;(lJM<li-y=n;nqHxWaPig
zog#bl!zET@!aGFx?|`WZ-#r*I6RMQHEK+RjyFe;$$Q$)XZ*Rr8@qnq#8pSBxp(`yK
z6s%?YTz{igMWX9~kXbv-78R3<)Cy`%_^utPdgo@If;1V!&0?WS!`HF->H-YN!Ipqt
znLOW5Q5|*jNf8u|VOGuIG{n-5(+%2@fUKod5B&9=kPrk}N7Nm>d`0+){pD+YLXlBr
zF(Ilo?fh#`270Pii_k?|i0=&gA==?PQa4n-<A}4PdlzA=+)x+5R!@-?X$fpnrBQ$~
zLxwcEiBukWi<dQ=B3v-m&dkmnXak6>yL<E845@sHH|bx~<(uOyHJTMPS9G5tDbkIr
zz+G=(<FaKtI3kGhL|*?iP-WD8B-+Mpr(>`0nkl{>E!rDu*>&zO60)I|xl}$`Pszz?
z8ptDfl+gQ3)AG{n+IpKlx~!yai+tOwbJCQj^cB&VqpR5_ktbA9szOpMPaUCTK~EMx
zV`|Mjd#!Ao7_l)uzbk&v)Lwpiv9!cD%pRp0dDtf-VoB<^^uTMaerGz+M=^Z$s(9=&
z4E;fe03y<%!_f!4V>Ie<K=6<A$c2z(U67%&S+EDj`aT0P(v71)`3VY+JzljGfvHd>
z5o_(7<$b;0{jZ<W``RYVCa#nwUfjiJpY@BszXlF+d|UEs5+K$YZ=)4idepVJgy$#6
zL^`M%%&HvKrJ2+C!3bK7V7<4{O%HYC)J=%Ae5kPd)bC|tL{eH0fJsju-J&ANn$L)U
zE|9#G|8<yKa%o&N@R`Wc`9x{1@OP;)2jk(E%xHLx#9NEzJ02{gsrlw^Vz0JC+tV0<
z&KhCmD+ht{KC~W)H^^UsRGZY~<_7WBE#uI~s%jswe=AA7EZ4*+Sc|>{`n=!(3b3K|
zIq#lZr6)PnQqwqpI;V5#13{;GKf9cdpKtw1q0YIdymIS$y;yd`LVlkis867iIq8%#
zg5;T!)E)H0Sb4lW{5bs6md>kvmKJhMt$hZZsg2R<HW198e&isy^*1B0{rrs%B7*wm
zVqHY5XxmG33CEKm)shuJtNjF~{Qg>7uWyZTx`pIpYAK-D`cz~){rH2j2W1`y`h2Mf
zITx-KiYDy;0blm+bQ2<dtn^7+WQF~VzTN8Br%0;wL{_7jBnZ%Af%@R<wF&~PWW<0<
zYFnzdcgA>QRdz|VCCGm?QmqXs<+5ZPWh1BHcyY~l=bbU=hP#S})%j_AeeD2!ReHjc
zws(8P>D@2-7C4UdsRurQa&L)`m*?iLL0I?I?;KD_<%<GGo0XP|>6e0Z83exbM_X$=
z;Kt`d(V;8EGFPfK_#|PJ8mt;X7bwoQ;x@v7M%4Qz<fKI_Cx<SxIsf!Ga61)F$5f1r
z-(AyG6W?db+_BJ3_N@G?$17Q<GFc(>L?=r1DS#KBRvo~A#LUZ0zFMd&E#dKPGrjA*
z>Z|u@GB*@3&Jx>X<!8IAua$u9ea<UI44lU)OGp61@}EA%ro43BEVJwZP;>}CE>imy
zco<=^nw^w7^)iT1#%UFm&Xn9!XM6PloN1ubyP&tdRJUf7p~|76q|1tLG0sx{m&$8(
zeFvFxSG<M|WbeU}<EEez8bX6`ifod(aSL-qT9py3zL{^?y4YL(`}<90PB05ep2E3B
z%QA5rt!e>4!?NNp+*kwQ^+3aFl1kV6W>BtM%|`tWR_pO8{xfbftv3z3Tc!8W88RHh
zKciG9l+9W3_aO0VY9HR72QTNtcrpoL7xbg75xG@8H@Lx$f2$OLM)&l=Bzo_1;wTJK
z0C^2XXX$ht)0J2)&)Wg&k%|-tAoTo9j!4t2p}RVTc>O--(7X`3zX$R1oIt2P<pj^&
zF7fU`Go&SS{YWO9-XUAek`L>%(tadO4Ripc-x+rPL~pP{W+;mZ#?z<dAobE0zuX5!
zUy`XsLip*x)b~FLQ{v6PV^8s8qJTyGZnm2J7*IR~bdER}l^t2%{UEE#!3K4<uX0|8
z#WXD%%x+GEA%;p|T~YAM;L6}*<Wx#KqpA3BD-L+?WblReZwkA0pbU<UMAZW6XF$c|
zM!R5WGwx&W>F!2}oZ8sW1VrpJr8)t-eQllRj5CzX-L*mP&*LGldko8!0oaAnXasu@
zI5)zb;3H%JZEWcd)(Yh<hls+URK&4kdrS2|b@%@I>S)CQfm-5NeV3oD@_0L20&_sQ
zKniS{5wJzYboj`27?F@4-G%|)cOz51Tyfwi)!i_gu+vZSTT;RY>|tGm_xJAt(S%P9
zzY|5MgN#U{)d;D8KkzwbKZA#g)>Ax{0#mKk6$~gU!W)zojL6}9a%_z2=`dn2xKUAv
z(8y7|TKSb93dkv87yj*X9ybK|dBHz<9_|m>3e=GmNn@IGWy@HBX3(AQ-UV~`C4ZPB
z)rVZq!HpV1gwvJ~={L}^t3gB_wxz?F*o#YUNw5+Jq>t-j+L{M2PbJ>l*R#Y(Z076p
zSQbRA%72XZI<Q%x$WAAl)7ne+BHe85G+QaxG=mNhH@&bX@>r8R+=$?!ciGmkV4vFZ
z#jp&kc~eHX%EYyNr2l?Z#x0Gk`T19A3?Xq=c3vs_@K2z1ssv|!rup7l0qVNWDiTCR
z-;SwXG|(L+<^nsCLXnquAm{N&SsQ3|s-cj0^_FO9x>-l8BKJvmmtK39GRHF|UsgLY
zXPJ63f#t56&f=L*rxoCz9Mk(lLnx6SKXkK`%<v^94_xEAIlzu&(8%TX7B>q~ZK8{(
z2KPxT`nxDOr1=ru$?KM?9PgFgSxN86pRXTOE{inn_Fv~BQ<^bv8=w_oTr_P^;?i?R
zb(0bp(z(8C%~q4kyI55EO$%r<UUHat?hw#S#->%0p0+a?OE?!NqYiRaBY+VH`Y~@F
zZL>8uvx>Xu*$d)=Q?6W~zsf;H^86Jxhl*hMbieP}SNp#`8YB#eyY!Tt^sT;v@8A}6
zwfB^7+$<$7DP0SOm&$#p2pwp+t2`<yarNy*bXcTPVZorHCi+7&WnENN*XQyL{cCCD
z9k7^<QUDPs{pkD8Xgwbi<~aaMw|*5U<?@>vc2?Ma`J{$d#K5xl2N>4sMvo(b>GS%^
z5RZv8;L8PTrk9{r23ekj2g*}2O@7jtYoO)1a|mp|nF``n@r!R(K9GX@`%05)V&q?$
z(O21x)!Q~qObgqWg|B15jZgITTpWxFdDs_SnvdqKp}Q@E!Ms5(Er(FC(ijh9OEXBl
z9w#g%9;iaKe$4#4WHX}8n}LcX#GqGElZzI*TX|MgkruME3iDa|2zCwsBuh!&uX&#x
zGc7Q5Mjjqa(*P_&?b3RX?LMJl@fnz7g_qJ9fc(m1JQfH8K%=3;bfsxBp;ftFtj2kr
zPn6C+nk7Y~0h1iiB;GHNu*_bUj|&{%T8UCc(Kdo9IlH=rMZ{x{kA4XMqYx!!Mf&Es
z^f#@8^*}fD6oJ;o0NUZ9_6q9U_^I?>N~P)l={=NKJc_5Ou^YC?ES>EAnV=)?W};M=
zum3xT3m~VrdY}D@I*4nl@GoCFwyu(=S{w7xWMxETwRZjR83L&c1b0aR;l4%)bS2z-
z&`UwM8%@K8#{aVuQSXoThJ?E|{0MH=-PkyiQ{wOdiV9KWtJHx6;_$Ze{a<YI!F8-_
z4KxU|Jnnt%wV+!+^jJQZ=2RSZeX3*^B_8MhyBDC$QT0C>AELcCxIks8Q*?{Wi-pe5
zhq9zZ(h&rY40iIuCEp)V0VGruX^`5P{B@aG7E2*f$E0^DJR5Nb(ep?2pEtRa$2vOB
zl}pLtshCo%XN85=jn63Oc869f_|h=gLfZ%UNKz?f#;SfwdQM8FU{Smt7+L}t2%Z7&
zZ>Rl;6b9%W%u%}IOqTI2KTv_x$&6aMz7_+)I<-REkUP_HIZa}-5wwWR3Hv?mH4u!m
zn|w2aAVwf0O|g9QP@Bz2sqP+7AqK{M93&XT-^P!V#?A{@`VKfa)x^#qiv}Ow6Q1|d
zXavS6%Bok{gVdA_Q(KK^DBHu#Bid(j-aO_|q(%t+(e$=CXwg`@H9t4@JXmb$@^;oO
zdr@E#B%Zph7LJ_-Tj)ejY&qp`Ze)A-SP~Zp;1g;7)zheLA^-Bd*rXzHdQIhp-$h}}
z`9%AbyS5D3d#Hy!4m?~R%1Iff(445GLW#CK9S_;LQn%Q-W3b02bozN-`#x3F-9}Ez
zFI=53Q7M@E`srFnV^^X?^wDb0z^Pnn81EgbHHCu=LgqQLz^l=_o1Y}_UwtbnAtHIO
z)0_V)H%iq&lVv<BUw<?A=i^yI^N?;iBA7g00`{pCpw;TUB|iN8<|*-UD9cMCrp&NL
zdyB=BOfn6nG-dN(A_e9TxZ59UzZq{kx$SJ`FTA#Bwsgue{o7b`1lz%wgNVt$ms>>v
zS}RGBt>XE9&#<Gzq;KNS&;1rHUMJ@SLc8~G(=VJ-T#pNFENWc+@Sz_Qb+^pp_~9h<
zmvm9ReDwjK=uuIJ5vDzU@(8s*<wURTqJWuKli67!TBS2l0<tSGui+jTa+VOZpWH-8
zMe^CU{fwa8ZyisCvM-dZiBxO$hJ9uz$%Kl86jhWmK)47j=C<te=&@B!A0=<UA3PQ?
z8f<#m&a4c+_vZNV`j3DR#YY!A_eUCOVYsSK!@fB-Tz-3H+CC&TDh2cIc?h8C0w6Ub
zAk`ED(^tMg@?wW2PDGDScKN>tdObr7%?kFOtc)!uMUCF4zVmB21xg&+2edYK2^1e4
zl;4w1GQ7X<{pQqzkul@=%}<S*D`5tiyLA5K&zWVGmm?#qy3wgpRk%WYegRgf|5*BY
zPI~8iRR$pB*?C0~qMIcvH|IOS(Z~4#k*ZqJP5AtwtAjjmu)i3S9VR{hcAnfwJ-^3X
z_72-bTf3-~tCcENEN@&CTUmCNd)yXkpM!HgId2YuBgmgCy@^E%=VY@LNgMHS$Wn%z
zgJ3vAZ&eQbGq+$5faCy5-uEW%=|#1jCc!r2z);OHB@QvFHKsgz-~FL9-=9k~*KD(h
zSxknlwMU6VPxC<^bhFgX?l0psOyn1%L=B-w)7V>=sK|URdK)zo7ZbQ1o?t_L#N|`1
z%%<Hopg%++bIDmC%ZdakbF6Ce^niiEAggprl#k1>U(drAf@zVVohxk)f0<aeH)Li5
zJ`3wVHhDJ@%>|bI3S1Gi551!^v3Sy)B>jBhV=5`z$phxuTV;Ook6NQt(dy45t^W>P
zH8d6e9gQYYbG}N+-qSo>idO9bwn?;D;>dCfuF=0s_uTh<aA@+eR(fQpNtWP{3I*Eo
z?~nssLoNBl-m}9UL!rZjowJZ~xUMDs9Ym_<R;0=NoO@_Ux1Cm_#P8*gk_=Z@?LCBC
zU}0N@<~!k*U$o7p9#b;5tgJWcmMr+zTI17DDG)M(_N{OI2IOdi+L9PuCc#Gf;r(BQ
zgNll#gT&lm;q6(fS`Z&O^J;1;{`NWj<Qe7UUffM(E^vii-;jfz<F@7Zzd!XA>;erA
zn`LPdBS8;`KsBzz7C@Wj^i-Yc^t7KzOLLr9bs+p3vBj}n88^^O>ENP=CZsV8G6lCY
zx30Hp4eDQ;a~3cspk_YS?Uy_WrjCET^weT=!EK!#!J3OtuB!EX5!gvXl@l9ocUDU7
zXoxAF-gbGlZIZHu_gHbv7eV$)=?yUV!8{UuzJKP+Pg<2LB5}N5zGhWm!iro};@~0_
zr6jlL>(2hg(NF>?8wbGbZ_{JLLXe15K@rTspY!}Z+Y`S>GM9ozx|&K)wovgV<C}#}
z{-VBJ`Rt-kqiLg5RW$|rxPb9AD`N_`S?3)~1|c&bGJQUhiY$y-)84k8U+y@Oz&z-c
zZG7C^>O9q0Zbedjk(EiQt@OrdM5any?7M{FGt7>j+w2LxgEv1uB1)C-nc4^IXF6i)
z#%3R#dT(LQ7yz;i#3iufGYMdW09@?D^tEmP!}HVSS0zlEmmGHn-(jz4zj_`!hk~j5
zx#%nr2vmzb({c5@#6BHNxtqS}GpipJ2@;TA4H}n)#zZst-ie+1sHz5wv;9T3{JSn*
zX_jQ7dKblFS&^s9i5F9E-z~Wvnu4-mW%Kn@z~i9+GjN%YG`VUDFkd$g;K*wWTL|j2
zD&Io(+~5HVV>jYW7+LrhrhD=4*-8riGU5rmWhZ5rX0C)uC9R^{s&Nw;6Ft1JQ<IJk
zUWpBw@@%WuYOU8gc5nt$j|ebl3<BKM3%2lcoN4e(V&y|nN>kBw0jjFOGM&}oWiPwk
z#1D}pckgZhYbv!pBQzpg4nJ7_suprOC;y|ox$%qj;5q)Sw3~tF4^e+C5ARbU?hUZY
zf`atNH|-mR<*4Ry{FC)Wdbq-f`!ooH#I!qufNLfHK%Gw~zbh27{3?#tQ7lnzdkBmI
z9jKT$x!s8d3RQZ+=I%K*hKahzeqi+F-HWr!oi^F|a$rCb^6FzoUk~2hYtbqzQ+K$o
zwT7~Fk*&QSN3>iP9GaU+`9kSUU+sO@YiF1AYltnb>WF0;ytpNEYB+h}$x~ZC;?E}M
zoht6x+*=p2KI!U3rYow8Dv*ABKgN>FDn0?F=%OdgImM~bo7I+m@R<GV75Jwnp%IhH
zina6R=##4&H{PsT3yF|R&J!)j3ZKNW#|d_@AzgwlYhge5y(+ra`IQ`Q|4c7-Vmu*v
z6s+9c+1lyZ&Q=!oyEzyx$Ar8G94MMKV{p)Hx+gy793XBVW0~Tb88gr68m{&|$f{@#
zxxJkF;WD+mYI<9yYM<>nFiQ;aa-n#(1U=7s+k|4rS-*U$d)H}9x4h%C5)P&KJ<E=D
z_1^uh!6KA&QgfmmXo4un#xG$pZ}8&KDxGfCGN0`^E<;Xwe4g_3-XF?51N$AZ%_{`}
z#uV~B0I~WwLdu%;J3HSJ1F3ZB4NvV&vsyN;fqky+=t#Gb#7|e2U%2-A>h6(Nb-YUT
z=55%Mojez~#W2%QnAa5EJuce?Te#vUwxbX#UJ!CZx{+bYCF5LbmsMl6oZu;<hoTLh
zC$f4QX07S<3oC`knM)P!afqz6!C5LeQrw3%1VR+CcR%{TCoN<=_7h6vIvC{n7ck>r
z-#Usvd$u1g&gw;Jc6pph4n*$ohhLX!njf&g-F;`AEQnG9DW9_j>jw^1UL$!_97KR2
zX^?mQ=;tIu$Lu#`R&a+1>;_^xOZuvJjI3-m_&23c9nL1e_Y~%Rrl<u!Pn?moZ!z4Y
zvj#mNdGKM7Wstio@LiVa1;ng&i#EXP7v<e7@yvzBed*N;D+=>~)BfV=`iFPe^f?|D
zD6F?&1s`(CVNQ0*n5JQ`9x>(lF<<S{OzQr82?z}zfYQr)yhp~&ptAZh;Y;wc#k6Mm
z#zZ;HbdU&$S?i+$;0$DY(@dXA!BEKr)^i}*xR<^bkmE)dw(2HlUJ@4)8Mi6Li@)E@
zh2IX{LKhoX<Z|W-=X+7e`{D+@R@1C&Y%etV7AjFpnYF>jS<Sj(X$K?^hopqE%}KLl
z%2EL}?pjShVlq22RqYSUZvv4Y%F<^?cH~OZ5LKM3lG92{)hrAtdp>><F3zR|%+Q!f
zNU;T_jQ<#cFAUdKS4v|}R+hAOChNLSI%>3bI%c^ge0QeUW9(JCZa0Z+s>}FpR{B{@
z>l^r5@Qcg9391{NnV2X%oOew)`hc%zcR13=-jjV0%qH91bg5!1seQCvZcod1S(Zr#
z@QGnST#vtP%@^S7c^)&ozt~pR8jiJSCzGW|MRuSXK3HNH(*`QH-9Ezc9ljCC$+_{Q
zYq2gq#Rf!_4V)N7v@;C8GJ3-({=5cZUFsb!oj3Qmvx!vh;vnL5YklFjg68e&o42DL
z0p>s?WOfW-DY3|E&wY=l`jy@OH&s$v)p10`VuvO{U~2fcVQNWFC4s@FN~VdQm*LQP
zQc2F84O4CN88{vg%@5LrQawFbZ3E{Uw=*uccX~Usa^-|$B0M*j<N0BS+M#AS9wVXY
z25yJo)ws;TEb$8@FV2;dvUy*~J;q&@E<&}YP=Ty1-<q5o;AF&^>lO($YnK0=1t^WU
zn<xu%A=x=dF`Dae8JpLYsXEIkkIA4ozEaM$HWdrFoqo*rwtfEdruZ|}cm2(j=M$~#
zlt(52bi+$i|ICGKASryd#93#1N(5)0`Mi+|tl6Vdi97u$PJd+cQSrVahcst8Eb~(H
zYOl4~c|54n%i@#(ja&EYJ5D;bVFv&DWOWoWB*=-F5nyhmq${&JO(xI~bh0*FFUGGq
zklv}5wivtbJl+i|^Qt?YN-u;Jb7PiLA$}AD!$hHcO^w~h-@NvLIem^!?%nnNfPqB3
zmrs5KQmuKX#4+{Eku5-XXX@{Y3RPZ)f5x_?7BwVUa+?rbM*H%qP(`tXU5=!hq;`H9
ztFPH?qbm8Kucz<2zF!@G2b*Di$BM2M$Z}&^E%x%^k_+p*sp{XJopf?GFLpW#?lpc{
zSPoYfcb@yRezG(wI4oFb9@l;U`pX3G4tn>)che`t_S>x|Di)LB)8-jAK<J%m&42ui
zNcq;ye0;H=xmtQmG|{p&5UmzTHdofvRFO$E0aDOZB-d&@24+DJJmNqUy^J`r{`uF`
zZ+8j7BHpw_yt7~V`sc*r=Qv;4GeCxM>+EQq#i5M=mh@cbxzYam*!MBNt<FK65$*PE
z?8Q7U2ITs*W#x2dRnd8Cb3W|v%^I&iohZl&GppbFV^+iSn}SZh2x;s2UaNM2^&`7@
zFyhWQ(3V>T$>S{rN1))x)sq3lJGR-Zlf<(ds2N5~Y8@~CgWy4Pk#{ip#_$QTEdXFH
zelT@)wh{+CYZqMTUe=oH(UMRwRb(Ma4r)g*q%iEUmg@TI@2+9xG#wTBDY~=9aZv*i
zIC)>}T~^&-(M3<6M-LL_rSYimDcnm83N=%t8YQHv{}fQ?QNB5CYHC<Nso;|I<NY&O
zPAIGDy_|6EVc)U@jmCvIGQ3wpjUT<UtTU08TeH4IT-ML!ZDsBp-EMCPH%v?j6VS9C
zR~$L_-km-h;lS!r|Md>BDeroU`dhPUr8*C4%&X4$NXKVzqAnH9nl`QPHWVP6-2QU8
zqvfNa(Ee~!O3zpHQaZ`P(A=&k-sgV9DpOc1x@_8L#mC`}ScA|BVV*~N9~1Gy;;HN)
zWIj+vkkb1uy?u&DLjSw4ydgVT8|o+}B0eH6g2o_ePHMHX1(xKSN;lsP(HjuB{c#-h
zq}XkrX-&g9sMo$;zLJ!j9ciS*J+k~cF&+KAy3}ea*Gh)!N~tK3?Z|U|aF|5h+qETu
zkEJ;=EQBuJFsb*mG^^E7*OY_SLTs;t-A{X?@}awbHhgUC%(Yftkl53}ug(@*<AQt{
z9K7-VGM4xEydxxYd}2MUC>LuN?hc0zR1}}deuz$>tK{~jBqJ8I{Mel>-_-MyEnJSM
zTmLbpLM-_t?!aSfpzlwTjO?QAo60xH+W&{BuZ*gr3zj9gLxAA!?(Po3J-E9&1b03h
zg1ftWaCdiicXxY}d*54+Kf|B1&YazQS65e8&)AZuD|2kQZ~uXS!;4(RMPbJLO9(Ao
zMhNYp&&IfS3I;B;Oa~qCI2+^OCr>jvXRYFhB_EA&=<W)^WCr_r?Uh*@Eg$1OdX7*(
z(mUMdqUM#E7FjJU;@kWQgiWD55nq@83%QzTedElwX>xTmy>C<0YW{XT>*%k`M&-Nf
zCkH(FSON*0PPklUXoihy<oV8MYk{`Rl9r}7Rw{0hH~hj2@}1=xw0TcJaL9D&{WRWp
ze&fVNN6yNGzwMk!8S1^2J<ZI;M;T{b=KW4NuKqUF?*@{Zo`lJxzZuVke6k){d|Z9n
zYpx)&Ha}Pg8V4@u(jedv^4O$tJNzrkeI_Ao8+xvV^re9xM?8+b-IrX8@xpkByC=L6
zCpB=}1ANIQ_dR;5YU=ngd*BgC)==&<u@?`bYdfN{fVDE$4`v9Kwkcze=97No!Rw0=
zyWbDn<z5c&aF=J^1of>`y_&WgMa&vC(}>QNe5BBzBpyHin~>Y@04pu2pr)KgS17G6
zSXGd1?8FXS@lzod3jj|Q4Y7G*Jg?_4>}tW4NLJvr;k~JQ=$%_cCD0;ZrIz*VqJu+k
z#m<yMl0Cf`DKDF=kmMAp+jjwF{npO<T!_;wt-jKi@=Je@1+Q$pHC=?mC*>VXAnPS=
z3Pr$Za22bab)PH`ET8Uz0G*o^&rtF|QQ}l6Q4o^qyLT^re!uK&Jshb=2{dM^smQAy
z<?{Gtd*b9cB6eCS_5Rn0vzz)mqm!UQ_?z|uH*d(^*dan&rl5^)pK&(y)iTsZO5LR7
zuk!nXm*4({##3T*?`%(xrFx0?FlfA1hPzJRKRP$`G|~l^hF4r$7ndSJ?vGNMaAGcO
zBSVE8M50V^+bUj^RA5vuq+4Sfr@bTyi>Zt$s;ZhC+EQYCTDG9Y3YKM5<C1pY#A=lO
zkbWzVb8(xITd#I6^fWkOqIy%6dd>8vGdQ1dWy1H3r9gz2tGqgfy}R1%e6^CV*WjI}
z|45#iJK%eage>N$Z`sgORR+DiFG)r@!ha#Pk4nGk8fvYwe`38%S&>`9L4vh5u->1X
zVBXy^?H?G?()MfXM70z_)~iM6GZV#)2+~UsCzI4SOBBrzI@t>z?ncfIK?TP!Na+z2
zU`GwWG)x<lR;tF+#7PqSFF^>UxSt#)gMbUUKe-J>f$@Wh!v<U@N3a6pj6uMkTI}DQ
zlnKyy{uSxOcWB2Y1eLS$zg<%w<5xTcG%X8_P-j-#5xIt=0`IoR?ATsV%`w}_oND%w
zg%rtk3}yuEusEhq850{FmrblQn&yh6C?kqO$i%A}A&AwX)XqQ7hnIUEBQHqzDRMZ@
z;-n9PxP!wvjue{6q&V#tW!8JKmbGZUUJF)Wt}on!e4I)%-WeRrokl7r$?B=&;SMl3
z^<1+e4lriY2M!iprFdRj9}Bak1$sC7POetJVp_)og5~}bLt={>xG8@*9p@3{66oG7
zonn0t#tPM6A^pz_l)Q6}x{eJhex?6*|G+Eh87oVbIV(<%l(bJ&$C3FJ4g(qf*Qy{X
z^$$lkH{+%z$py;-c6P$#z>y$et&bm4sA-rIlZ~huBuYBiG0Er56@r8Gf1m_Vvx1jT
z>y`wxg&}4I(~BwJD_EIM@A_Y&gpnc-e>((ms^%9Yjtc_Q1B=Jbi{ysoVp~~;@RHCy
zU}`TU^5U_f;=}cPf9a9JVTx$FAp6f7<IxxVsmksbg^k}ENT>Z*Kz*Uv%j15tm+z}*
zQntqA_$5nA^^^DWvP%j&E*yHAbm+W@Y0vm)#>acYEh>~FqCQ&W^XdyTk1=~(Rvf%D
zfxB#fYq}~HQy7$n8*fZo=<C-5gcq^?v$Eu)ZxA^tw(KyPaHdE#S3mFp#nhggyuF8J
z2B%I@K|v~2luP_cg{fRo4{?-Tv;Y}PCTd!Bb7DLtB%PSqRwXq(?Z`x`|Mtv!8D+o6
z{=`Xvxs3_(e-c{9Fg%F;HKg%Ek|HyY$2Z!af*W$)ATm4w<wLGVH&ms^DfhG)SY{@l
z;4ehk-Vw{i$x01oT2?g}n;AvKgX68n{yHBqpDCMb{a^{6i3n1fiMP1(p@<YIh{uFn
zUc*>03JJbJ>&&G|7YIohCs_nSKoXt!A%`5~25h=?!2m+&GcuGBL_+xCw);OaLddr2
zf7i3HSOZtOW6XT^#|jwYewwJO%8KZ*2W?Enh?8~s-_%`!Ex#NV5|A=JW*dswKoQk5
z$6v{L54A>|A%pvm_{aaQnF<8wfdtEul@awM#ediqD(UOfdVj6-_JO+)k@!zr?@xZ;
zF;S(^*wm4a>B5TYzRJ<I;YDvlj)e?-h9pC=?h+d$xRCy-*&9<TL=!Jto`HCvB9!!y
zuoye9M~={2SlPWQDLT0$=M|&FZ-)#T9w@(#*{q$-KCBC#G~hyKGK0>IfX+AXuDkw3
zyf)i>^OU7`eOO)S<^3c`{`hGA3}ZRAeb=LZ1%Y&d28G#3;xaOlQ2i1-I+4zlm`J2Z
zU5mvI|4<|O_!s@|>W`{qp(8rHX7~7ZDxGc;C1%ty3v&sN3+%;=k$DWhnC}fS2nr3&
zya=Qb4>9c8_vV~?J3)|1;o^KMj}le9hQ*u?+y>==V)u{jvx0;|#z8lDfUBe?;8Hz#
zxdm|)JsoZud|UE$Sp5Ec#mcxCCLlrQ$Mj{6+JYk@B>PPu<HbLF>O-coHVLbYXq?L=
z=03UdK{<85s!CF|485&Lqqt8+n>I_2e&}q&*FeCftkCw2JSQJhyi&;MA$Bpx#AVd7
zQ{aKmq?H-+FT(a@EDzQh2YRr~Opa3p_n#5zuB`p!A!4lKtN9u@jO@^AiEzbpPYywn
zdVL@P<Bst3`mckXPp!yX=ukO^W%%QR{afTXeiAw?H#j`(2z*Q2H>bzD<>(>z24I95
z<q?wvV_<jBeBuk=!WikJRL@}IgYj{}6-F}s?6gF^IqUZ8Bc_$bES55HIT|nUqd#mp
z8j~oAMwhspIbsR1L)^=U7a))~9{XqRs~$~dUW0YamNEE+hYGk0Z%+op!{CBDSD_dG
zQJxP*i2!>%tX(8Byk=U47EzN%!Kh^Cu764+t{wE3)h}KATV|t=Ay7jV&qof2f*(sY
zZawA|i5!eRb<y1e8}_y|`(!;f(z#PMHuv<{A^MCtd`Xfcla*+f%K@^sEzh2Upef&4
zp|+RH?ILvQFV}xNkBlt6%N_0qaT9|+<Or2OA&D>WVTR2!cjwd_Fh%w=_v$gcj~<0(
zD{KB5wZGcoUA$s?ZF#i1XR~q^4|%tu*z;_;n>iOxKp?9OXpHGCN!Fi_i0ZsF0kS4V
zgnS*36-TW`ZGl<===5AI1u7UhGm$>%=VQ`wb#QbrfY-R0KZh!<?8JzaYfM+M6PU@Q
zp9*gui;Nd}v44sRf`N$;LhAT-B2C(&)tlR>oSIWJ6k9J)OC?u#t0)CflY)n?*-5al
zVW$T6x#*dpkN?9!0{jnT7?i9C_h=rE^%>J1LvL}&g||oL4m*a`zkSf$c+3<O=Y!Eb
zw!PriA@D+mRKe#B>bMQ)<m@y_Zirp~HfX))<l)FSdBfs{w!@U?d!@_gQ)C7?#GRSA
zjnCHt54wuKc+7kHk1@z78&b0z@J1R~Y;RA>9Xv^=A8$;*)QA-+6_D^F9YslL!kZW$
zyNct|yD0;olZ$V!*Tv{uy=@GMQ=bI<^ce}`#vEF&bR%CN=KBeHQfSmXhH-HfkXlC#
z&Sv^{Yg|BETMe8Wwkfahe#ZcL0Bf7w&x|C?=8uspTXhxBz$uBwxvvAOt!otK?#&sA
z{Vl~`Pdf0A6Z^m7@fxWkmlYd8))i7xL;=Z_%awsn=?4FTd0uJ0>eqoo@6HRw?wgI5
ze%W89@M8JIA@1mpOjWZZJ^k1Sa$l~0&&9EUd`4|3ygp!5Sm}A^%dzez9VRy{t0*u&
zX-=Tmi_TdvWMPIqg2ET&b63dEsK_|Q)#Z7{4!uKU={JI|M)K^KC<h<cq+9TOQf&0*
zS*Wc|Y1Axud|ZygkNTK5w<ac`OKp@384$7}3U3f=_uSVpWHy9$g+6Q?Ub6kG|GIe<
z4VVowI^ZOX>=yhS$#j0a+tci4`*D@$y!}N__#A`|@cZnAC^IxFjouIemNaEzSX;XW
z4=ebhL~&UZQN)y{=qnyJ-u<fW?F4XNBxDDv=yBZ%A2D@py`!IrwCEzG@$*1k#F3cq
z?aknG1rSj{fs~2|dTa~cJPQTZz{1qZh@JX5tM-))D)d3Y5M1B2WZ%@>Y`6GNF)`^I
zMu-~DMOb0p3DL$xo)5ys3uAu@>-?a_f@2e76hOz-VxLRYi=TSDzpx%#9t_Jv^*gSR
zg-;po_X|s6mV~<y|Fs|zRj8mfJik@MJnuxA9sP2jc9e83dyqLX^8Rj48i*Y)23G5M
zJfvkXErR?Z6fx|<cDSlJs#cNXKB6?9n7AyT8gNH)S9|J^*<*tshz!1%q4)1T_0h-w
zRaXGvwMbSaHO-7p1A{6J56iVYk^??9oRL|Z&5Jd0Z=oI>tf48){S6(eG8M=?b?5WU
zh)4K?l$gH(&|6=6>+CxJ&Xuau(=hdPH@|$z%hKsDmD{9!E=|J^<@N#ZuSj=|_=#Ju
z8}oSmz03%RLR6)jbA`u4uj@RewM0yBncLNwY2hE(cu7jhoHh|RSCfm~=DV()9LMdO
z&lo4In6-XB85=($4h$PFUN(v7oGAJ3SgJ*9>5FsG&<mxI32N4U6rEELXR*Fxg-fD;
zo9l(`Wqs}pA0NrLeE_3ZpmP_~a|*-ZdVpz}jg-(ibr-J27@qUFCvZ)RE-h0lQ(8tG
zer>MVDNo&b@wM%df}1i8<{PSgon?W)S&uNwakxhbReRmnkEQ|}gNw*U-(erU$vlX*
zmAccN+$cBbrVh|h{?N6~;F>ipQC%VAlJ)k~-X{zLTFP$MeM#-?-`M4`;^0-E0qv)}
zo1FKMP>x89-8LX!xUW;MI~EuI8r8+7SGv^f)&(9<w%{H3!CeGtrE}9V>EX<TIUC0H
z_C^4}vn*$}q!+si1pE1+QGG?fa>RpDusuJ-yr*>g!;zh9abioPLoUKBC3HW{C}hF7
zeL?y}(GvXqHke0T_H_9pvknaEdxIvan%XivPRK<`+;TK-Sno#1&m%edpNAjMPd_Oo
z373gdAil$49`aQT7~4^XyuVAYurc)&O$~zxS{enq^b1-;A_$kw=u^`R@j3rzB5aH^
zmk3V+@FlY$exsEW6YyoI>&5G4+Y}c&#AGmG23f?NxR3-@y@{5Knp?YRvm|rcM#p`&
zV(w5n3$5jZ)-Zst$4c*udWfOmp-q@?%^+y`Sp8dYDUXuU3|j7y^$DlnIa6S~EZ0+R
zS#&(HgE%;oHTPN2gAbMk7eE5}cJhvTJ&kC8wL$ya1%p2JYjMpSiD0nI&2C-K<GYLK
z**gYYK=&$~)ywuV)cmp4-|8uEpib!;0JDo4P*D|Ic*F#jUC{HH5gJs@z`)#eyqn(x
z)=^@}HF2u95deE~$JS?7BwNo|Y$F5^=CDYL-t6t%now-7XeS6&nRj=FI|=Zc*@WpX
zpB|Gk*zGqe1s&+83b3nhQ9lSkE^0-o*=<CpPo@hq@-O#)g6wBy0jTy5cmCy1%LV2!
zGYBDlvqL$4$!EXwQ|mmhJa_)x-=J*%bdH}z`jnKS>GmTA5it_kX&whZQ{iKLKT$)w
zHVjVmtVmkJ4H5v{2;hU0q?X!^#&@f+pYB!DyXCbzF_`?JQtP&3UV#=xd$sXIbnXQv
z-UUj6(G>0HD=cLJn?B24%jLoXu?kvouo!uRBjT*j8mAj)lN8oG-FQq5tUYu<_s7~L
zMF(-FY(2X{e$C6=9y~cqJpX2dhM$eRL~a7dt!D!@L1#3ITOWTrPDQmq&7Mb!l@-<N
zyOY${8<-Lf&U)5w$WR_zyLj|apal5$10OJ!_H523#zb_<M~fe*v#|qnbNohS(@U3Y
zgm7EQlZ|kG1e?A(1CU@jGqQ%B_7CzwnXs7J+pkQle>AC_(lEi0qbWg1lhax_gTJ3F
zgjw@tPHjTuJJj0+`K!f&jggD!K&&^z@V)}_LeZ&ZIRATWU1IsEZ}Isio~$!#Q@%tP
z7@H=OR29bwz>@;1<-MXNON``(aq%ynD6tcZ7OGYhU<Ka)`$Hyu`U@O{vc0_~oBPLG
z>C8l#V)ONqGejRHzd)9mQMQ=f?49Pml?+VUEcudvdu>XKG~!*%u>wB!_xZmVaG(|5
z@X^%)SP?{Cx@XY8@x}vE^C2WwfDM0vwN^U&YsdJYAHa7}D6zSv!s^1}Ug3zDANLAQ
zoGDJR60W?xz!Dt-T-DC~{4zoKQV)HKMp^hbXIY#F6yYB7yBjrlB(fF{_FqGALC<r{
z-wYj*{3Ku#7tK45>rNMc$ik;$jw=5)OO{I-wST#zBWnH^Dof(p-^lDjbyT9>6+QZZ
z!-)QasIhs-XO@(erO1d+-%qT$2^GNi??B+2i#reEjk2ew8VRx^KZ7dDXi;EM{<DuU
zSNg5w;ecQyu-?~B<kQZSzaxhZVscVr9sXDQ@sn=b!|13-T}^x?MnI|63*vBeltx~%
z@Gt)Z8hy;TUBmlCZb4Jfz|V-6gLj5ystfmuEu;6!qb&tVoEeh2-gh72g1bN@1<zYF
zHYlWkkszSzW1TB96fl8>WvF(IzcW2B<%rbPc!0IT_dwP2)Z@qc_?d1NbM^ViSnNo>
z+-C2*=P@Hl%cc3kA-M+U^aZss#G!#GgpBOWJ>J*|khuU|Uoy(uw=0ZP_%Nkll|oTN
zlVBl8s9VH3Di>7Gx~CkLlk3_C1F31Wyudt|S=Jut2K^msTUWk$Tu(`4@I&h~k>IKT
zgl1dwX}LkD&KL^%#vtG*?3~R9p&axd)Lc3o3m}<#eI}bM0@*j5|1)Toz(;WBAoz&q
zN}r|SU{xqBpvQRoZEoxCA*?#?r!nvPlfmVs3kS8v(1;2QIVzijgGz;#gz7kA(5fa{
zXQz3QtO$!ys|k@Sj*-JO_J5lNM^PenHJ`g4Mc{Hf6FYWI%LI!+1*1PPW9=#+0820=
zPF=kVXGktf9r(r(I7bkLD&oUmIF_XNI5!2|*ti@;5Hm3xht~elbo?JB+B4l-)y8Y3
zl9aM~Ld}1Dze8c>!RrVOzma%}H$>20AE6RQ4GgH*ZipX*Ak5<nAo_6=3y)XGrn&d&
zP@!W_PFQ2UJl_7m)wL&_W%=_J`6?eF%W4Ck>^YN#i#lG9>cRdA-b&3xsp6=pZFs?g
z++*CUt`tz40|MOB;5%p}*+2?4HRCT8eQJgw6`zyNBFNk-M4@7<#kW{WG)Wcenx?!(
zDXFlEBw-6SHfjY++?(5TIgYDNPLEd-Cs*;y^MLgMtd{zE6NcE}u6@%*k%`7ofET*6
zKWc2^f3mX#n6VzSaUC?uyqhz8c(Kv?+DpzcGq-I8^Id%$*bc7l4UOnLgT6+>;~s5>
ztvOBU)a$o)jFgS;0;w=;B79(NF&)rQ%__E*5t9p<m*y3)P(==2ALenx!GyY7eSKM*
zC%b{;6EQId38V=~1+`y;b8Xp&>N`yMS6`el_ud6!Un!1bR#=UcNm_j!?q84O=e*RM
zB#4NLtN*34Lg|43dL0^rj?_TwgFbtHDm%haD1q>TEG~+DhvLJ9KOuC#DWK}kAPtv{
z>;GM{2WoIZj!<z#R&%g>4Gk6Bb;*H{D8s2VVgC!d=A2aaA8RxDahlC`B=%;9rtFaw
zV3onY)H-@20`;tyHcN+o;Tt`z2peKVT3+k20#uBM2F}}q`!zxN*r-!T&r*=#uK{f(
zNdC<#hGgOJ`9&NTC*hoVEVp?q4<}(`t(c)ru!>1=|0rx}B5c?wY?#z<o9rXiS5?Ne
z(|YRw-B68Uk?E5G_>3k<WmUGrQ7Pvig(}JD`ehZus#c=5pLb8hjm;95=aK6J5G`#y
zu<}SiCZy!$RdoNIcYQ5bHsj10I3m2}z;c#*xGX|mAC`J%Ml94@J?AS%ZsSg{6yAr`
ze!E7snfYTdGcdj8N&&f_R&y3M<sKj2`+T){!c;2RTYg7^AlKfh?@sT_@~L7aiZjJC
zR_rF?uY!-i5PqP$!8F{xv!(EK1p?##Q3U%`tIP4Eo?xDWK940@8rEEGRCpbDhbBx!
z;3I}MZP)M~4?*$Yrj%wN+m=v8MeRf(6u;~!Kr&5<e&4rTdq&;I3%x4#Ukz*B$yTnr
z{YBkuE0U+MZNFy2$#)GRMR1jb(H_i{0$;)17G53g8@yO@0A%QE-;XaOiSlR;9RN^(
zf?CCTS@NJr784FGu51A~o}nM}7C=GL7<p+&%297HUn$SAvs)T*K?i9u!1iw=&v^5_
zm_HJX)kTSk&1dpS>)dKMo5hz(^V7)ju<6~n8%JX&l))yjL+*=Q34UM4Xla9jMRMD}
zyd1{GTXU(A6BmPIqQc>F_h32rjm2qB7BmKWOo2q$z3<o1pfvt^aAZVD`5YKXrC6@$
z?w)gfFS>IPH9Lh798>Lnbrd?@1jD^5T)gw|u+@ex>ZlJ1Aif78Nq7&te+?#brcZ#n
z(eaJnoI5TGIKcJT%0A;TW9d(T#kS)Yy+c6?!2frF^P8bl^dXDy9~2$&*pbxRy}<?#
z8nWVOIN2fq{UGJiYGc!D86X>*hn~u}rFo>AZFm3_s#irjQYOC#pk`?&bG4^SQz*-p
zLbQmh2>%l1(IaoDRMGAnQ}*mqqaBSTg_24k2~eFzo*hh@xv|rw7jD!A?0&V2%SMBi
zMqv-Nxa~M!@eS)}YpSm<7FSe70!iuL_ge<1NxbooW)iuvmuz<MA<K|A+`y<<uL~|T
zU8IzkRndlIVB08aBlH_$HX;yM%kR|dV5qyubAB|mn4_YW%F5(D_QS{w3_J``wRb26
zhv+ZlLydL|CVcxw`SBsPS4=B1zJ6BLZ&9aPSx=nP0w{Mwiw%|5a%(bjE{L$D0b1>L
z22$H-00*o|Dq}=X)*qkhi%pIpVvzUBNDn0vAP{k7FRH7vnxt}a7Tr0Dm|c>0UZvll
z0RxTY;?nqB<l#c+yO+2p=bQ6M4+%^JE6@MMal(@IOHum1_|2hFwf6C5N1Y~}@#l=O
zU-o5if-LgKH_a6g$1HC|<)1LN4DEh<I!-(rTY5m61NQ8|X~KeJ95aVAJ<{WEWT^6J
zEpb5oo5$sq1Q1K*V%a}uF8BUv$Y;=*b*?!7{>H-YI}aZ^)CG41e_Ps9bh`vcsI3?O
zp%bqSpVrS-n)a>-TD^W<<1u9t=m%@{dkJK4%SU?ZE`tB(1t7@TKdYW0CncO)-bGUg
zGAf{`dr24EXu)XLrEKfY3lhT}=lid#LtsjRcS<rl4o~x2`h!Q=(|N9ApEyue4h+P0
zb}q|DiL=;X7lnr-Dr>IJ)BrkQL}@Fe&HHz1q`|lb7X}|Pjo}G$5FjS%PiLX3SGpy_
z!axE4az^Q_!R;mR!*a}E5*8YJ<<%}P44^`#yUIX(#+&YK&=$}Q=$cIvt|-YNWsLtW
zU7uBPGphbF8XgzTP7UU#K@~AYf32W}*U-Scy{#Idm%6x6goMn2g+|L<?G{uK@~3?N
zh|`qv=Z5$fB(+SPfiA?ai0UEljS9S?TB_jnFK^yvglmJD3)zfL%XyM_JAAS;L<MK5
zz}_LKD<py5R{1;E!bT3R1un0+dk7qQ+ir1#zK)KogDl-A=jV+WfbNzx*%TXYa5qRq
zgXrTiDXdqsI~W!Zo4SeuC*_JUEs8c=uYa_j2J3|7kFUhXTPqL0W}0JV*7vW?iC99j
zq)pGgFVr66JZe*@fi177OBrTIB%}-icMw226G_8>qB-*OWMHbZS@_&LOAjQ3J|9~d
z%6k?o_*KNx?*z@#I*}u&HNAh5YBO*!5Sy6Ju7v8Q&bm+UP4qKC>b+CX)M2)&7dSe!
z0X>2jBk}XNoRSl{BJ)|C-^58!62n`!krz)hVes)rGT!pj`F&c6p^~;B3o#59Ii@{L
z$2!B*x{cW#Dy}ip=h#u>zXMzh)T)K8*?p^*v&%vBMbx0&S{)`SXUS-ILM%HqjGqrr
zM-%(C!TH|80uv6l3YRi5{NRuOw-Qh=ATBzN+n}*+t**y8%D?*ZLaW5ArkM|xsK6~j
zUu<{XCO#{C{_+LOyXkBnP!WyCSbAl*)HWon2g8-F_=50kY4Q@J`dcrC?$$J<`@xxD
zJk#E=0GdVb$Y_|S=hoPHR{h#Ix(#AT2JXL--tfcYzO+;_LCSh4`ukJ)h9ew`*ic2l
z>3;xiC@-1)RjYpminx!v4aD*KyxR<C0elU`tM!hTU@&8^`3pz<?-l_9d5?aMVV}5E
z60J~nQpoHU!O;s6Kp8Gp#U~Ofue;j1c|Z?8K+7h#SPU3(o~aHf`1u~GH1p9xf>nCl
zP`l2i<yKALgM;_K?o@#jQ~H&;K7law`>rON^?RWnDt56QGi`|C9niJ{v`+$69yi5$
zJA%VB>{!W~(>bX0VM*%{gg)+e0j0!nGhZn~9VL1JK9mu`FVp+qi%DTZxfOkkH+Yz@
zzP?<rZTb3)IR8z>n3)aAxtd-U6jXr+s|XeRBHeo*WEyo-iTBi(U0~bQPXzjv78b{|
zN@7QJe0aH4RiB)tm2{&ygepQp$(KNkAad^?8ZlWf{KSxS#+oU*4)%-)?)vr#pxvx|
z1ZS+#pWn(v*)zwDZq~nvKH0bj|6%9P$~ejzQqhfmi(uH@c1HQRZU~(U%hCm!s8fG%
zMwvm$-LqRT@DD|Reo33~)o#^v8+ZTYa+feN_^DiG5pZ;<Ie=877BW5p^LA`rt5+#+
zC#`VGP{V1SJX|$;c-8hP=OM;j4euSi=DEHpKC2_$I4L>*)^>7WF?AbSB}2%(w5Zg1
zh#}SahG?XVUpB1(xP4<bo^K?LmmG-YtAp8|M7rr1pC}a8apxce<li&w<h%m?p1GXs
z9Crb!i-pbHGL!+LWUIbtDNrRGi1UY-gMAP+E81Has+buaVU9P14FwRYxoI;q<qjtb
zAXK@09%wS;##QvoQP{x~|9xME$A2$2ilaG$m-%!ja7+!7m5=ULTZqhM57B%+AmVKM
z{yGs{O5mSopiImI3s5c80gC0g+CV%kp5e+7G4i<YJNP}AT}+Ypcwx;pccFAJrYwW)
zyv`QMnawKBr54A^%1r3!x?BqM5+_HY%#_Hg`pBcU$r*pU1&v)fVu%3LJ<g>8YW39j
zG2_>^2eX^y0XlMt%$|N7X(T7+;()>@QRIcbhmavSa#XS{FOeCu?EL$Ofw#Sw8d!uM
z*^zH3O&##`eEO48_LuH++39@%2-9-a1_5Rqg|UV4ORn@b&yA?^*TFASJP6=icx?Gl
zFIE?OEYSnXnF&yy70Le-`h(qu2=tfm)p1z~OBuud>1I9F+xdzFO#SuV67}>e>7zX9
z@h>OnTA<4E`m|KJ)Z~!L7F3I1P`kIoUe@A~=IZx5&tKck5`^v<%Ou2CoYLRZrIZX%
zknDUh0Rx06anIUddSn^=d4lUWY}P<VEEmveU58xQs^(za-&bg{;YnO;QgV9-2Pun1
zKBtiFx7`(0@waIk|I1g4DUL7{n=EmVRH<USD!Oqp2U5}sWyE7j>uMx!^aFh{=V!RY
ztW@g91W{fe@5}k*^`RKsgz54de}5<3CfJUhKtPd`&kJmTprZvd53_3xixWlZk5UkD
z9&pqHRIvnkv=8mK#^+IRWf!*Bc_3(|$I1*`b{NVtW~RC<HcFSL;dNRZOl#<vq}&c0
ztwe5a&7wu=VZcG#sqde9KJ4ds=e;l!&JfmANvgx<bE6L<sr(yK_$1Dl_m<6rJGg6D
zTX3^Ybe^Ao<GzPr+|)($B)tJ9umxp`y8a3gBxj?j!2^0UI$3+auhdj%Cqf;^Or?)<
z=P{Ar>~wU}!J?|B)Z#+(<7`Pe-5w33e6yn#M*zR=X76-KPy;w?)-oHz<xsi<bA#~%
zMi{6$Fy>l2pF!M#VP;v+DML>_p)$znQ<bkzeIGAJiW00lJbTB!27U2{Fx_}tB2bw@
z%b5QwB@_1Y1^85bNYcs}AFLz-4Gw<|MwVGOQYVo^Ku0$(W>$l5X;HD=l3Q4`nI|Xv
zmUFst_otp6Ms<4PqCx{cAIAvT7LI3X)EsuXkH>`!&HUaqfFKUsXqq<po7g$T=2lv7
zM7)T*voV~P5V;*1FttK9t`1M6oaQ^}N)Bje9V?zI=^e>{^uq)zexCRU|AapYP?z;x
zZ|U+Rd>BZu*NSr*L@moQb>`npKb%}#J{(XQHP0R3V1*axpiT5+O(cJk)E*PX{aHmJ
z|Kd&s8jl*Rj*8{O%9{CRmt+(d<-ftO@OE7%?f~szpH|(kr4IJ#m+s(BG*8u6rf$+V
zfepGUJZyUg8IND<NS^U=K(7ZN052EscIHu<y&Ju$7~C;TK1Qo9L#9SEW+sY_HaLk)
zOb<O4Ci;Y~dxHiXJ{g$C=c12V(sL$wto8ap4ma%Nx<Lr!g}m7rMq2YOC?$k8kLg!#
zdY7O!as>rN5sZ~`@I0jgaBu5Ap5PO^i8hG8xu=4RLzd;@YNUGPs5l|}8Bl-{786kl
zU<_@CwToQR7k9=8d;y{xQ+Bady51VK+Aj*C!snsSVL|*DPo4`4YiS0fg&$#XFEP%I
zx4`43^m%3n5K(ATmeFln^Su1pY-WhUF5h|+RB(R^HL|)FWBjcRo?|3&rFcqyhD+K}
zfp3BM0Wfx3tR{=9uhfVK_=a|mx{n9v`3+TQrJ_<P(wKn!3IBjArMJ{cSx`v;j|%=x
z*`fBhK}VIkR9bc*CE4vjFcrQqCT4jJ9nD>}OADD<cIe8&LYme?a(_$Wa{^da4IU#p
z|FlIo#H7XJmZP@5jFHOy2`ujvj(GxrRXvYYQ|h*3nh5%-&o0-0i^z!bSpGo7eFo16
z&_7(-<|$b)wvz{1`xPBENk2wthD#@V`UV42dstf7p4Hl|J65&Y#SJJabAaYdnW5n{
zei&3dOiL(~tI?1lk0t4;P++X4OVyv3!&x%C22CTnujMbKCNpa@pql#Ry3f97)f>~+
zk{$>_Nci}RyYX@dv=2-07}1Dap_uDo;$@;OW{%)9P!`(ZF1|2b+aW)$0#33;c<BGw
z*f^8p0VaZ4;@=QvQj_|{HAdps@R#r{r~+@39Fcvo*}elBJg)E{s!x}{G8)qhXuAzD
zb#gDqu||Dg62qf#n0~1cKHZA4*l<72DA%0M<PRLookp$SED+J7tJc5w5Q%~}bcZA6
zBf56Ngz`fj>8Ct~Eu(s(8laD7(DP4Zo%7+2^7WVgQBHwiv200$j)4yGKqLbsCtJjg
zRbFiCs#2lktZ05V1f5f{T4QE5Ra_-Q#(4q?Cs*`(+hl3-!2a>{-Z7w5i*Ck1=L2t9
zXp#A_p`L4#J$f*xZf@0P<0kRdOB}AM59Y-$+KH~*QotdIF&%W~)%E^SDoes*VDSbd
zfm~)N;>*z5YJZoZ4Vpz<pl1yzhS$oY@&KKH76j<cKWkm&6=p)+WFi*_z@vWPNCQHi
zB_gh1Nx2B1G<po{Tj%H1M1>2VoULsdHgW4r3en7>-|SaWaOpUvnL+W70c75$8Lf<;
zC$vtLhDz@a?l2ryB{!FIB0bdm9<7r%VWLQN?O53>4wZg`^6{*vt^Spne!lYe?O-Rx
zW2h)E^bCP>+D|K4F=X&)3_LRc^ZE;h^igCYud#a8V_SeG3vr?||6ZSh0%7#!R#W0?
z{*WND0;F4?&n{`gw@*yRJMN3<#wuG(tg+FO^_sX_Jzp`mM_QCZ6aexZ{@S3Q=89--
zsMD9t-ZWPuDQZ_5J-p}MKU(IFOIgxZ_@ayg&*0@_{FJS+9h@IVlqu}|^W5%^c!0NS
zziBW_GxyED*43coNCC1H$rx}ffnI>EU9?FsrZO6vEj_j9&?n9p78zERlnBTfnvh*O
zw6z*DQ$iPZeaYfYT%LMBZxg(I^&~aWS*JC^AmPNY$HGUFvq*o{XMED|53E{ue-0!=
z|Fv?R`;v23t$EY4f2L7r>wf0QxjIj83V;?J^0)|Ye0Sa9xV5@<P~c)@(kE61EuP-0
z^ewLcJ?`QHw0283+hR^TJQ<CRK@F)2Y*f(LNyKyTk)u^MTBr-Kg_9kpCzG-c!ZPIn
zC**#|_baABJDKz`VrbCSzECQi&Wux?HX-63+jsYj%dF4`zi$uW$L8W#Zgf$7`m7|0
zQuei~alaHLg1Sf;WDpG$l=IgDKzMXR!3ZJ_qHQ~f?7^QAZCf~2gs6aB0W^?b02VKG
zDg|AG^v|`D`kgml`5fbg1s)<;WuRZ7(S>HUyvx~y`5(dTyJ*h8KIJV$)R|#K0ry`m
zARSe!%(EO*0}uQLWY?K>b~ssQ=k(<4gM=l=p=7Aine^w&PN4~$E?+i}N=#ht26g92
z@h)(%OE8V2lT>q9;E>=>@@QYxBrYa`?%uC_vjxN4rr-47ezbo2{4Rvdn3$-8rQ~>{
zl&xin4U{=IZ(xGN-`!dvAo5^OT?>tw2|-fpor<q;PQpEoE$Nf=j2B-u#^ExD0)*gJ
zMd^jAi?8!zsC1g(Gxd<+Jg17fdjgwnFZvofvnJkT4)8*wrUWMy?XST0bTOccU2dmN
zq8L+U`ws?g&R+CZ;ya)+e})!EQN)9iGjZ876Uh-}-d?F>XZeh1tVKKye|I2m683lL
z>Rg7$3ZIxJQhzveQ@&qw#e%Y#sg7_r*n(T8xp=W2Hw)qpZ7PK~txO7kz@gNt1yB(V
zKt=l$KAa)0dPVoo7sS|nER+7iBu3WZ)J}eEAF1Kpc)TO3idiQ1{wRoe50u@RAcTtm
zLOCApdbM{AY7B|F$CTxJj^(iF^n2^6<;k*r)eK60|ES&&td0KLp=d_ZqEa-kXQD#s
zBweOF`rPYB;Njxag<9aSdRvI)DQ$lYj2A6Sm;c7qsg}=>QIIs&3b7bLBG!K`*62p4
zk(R5BZEWEe<tPm$-$62#nZex`=#XD=T6@DVBmQSpJJbckN4_yXIBNnfMA*TcX#_FE
zspcLDu#b__$!~yMAR@5jbMJH^0i-)`?N`>{Uz%3>RpqE|t%bU4M@90|fP{M*6JN4G
z>FuTP{F-<FAhtuXN#pGesI)q5)%Ah1iTBglH}{vtVPg?SV-<8Zw^;LL#<6$nck`Lt
z!F%gG4qAU>BP~(otL63q+qIF%y8GE8zdILee&~XA2uoKO*^<f0wlAD0ACyqOoyUf<
zQHjB?Lwca8z5}%FBLlwsdisjLMFS#BiCU`o8amKVoWbV{d9=r6a|j)<7&NU{N9586
z0boc`LR5dD*sa)kxiu*8aIsI;-gh8#bF$-KSZ!K)gtIecS4LG6sx<;z>QGdx6*3Mt
zq5upnC>xAvuU<%3cV&!XFk(=uWMO7mPd6KS>K>|%JK5+U=&ow^8~d(E6(kxFAJZDu
zQYsBnb!a^EQ!PtpWbwWdb3#1C6Uht!F?ez?fCIUNLo6qkZq=GH5-L(&yMM5aC#wGQ
zh>(-&Q`3M6US9en_OSyzPB7GAxi!F`Kw35*@FY<$Rt;#Y2A4@7;T}zjvDjGdz0qpM
z<x%7PbHpXQOX;Sd^}d(nB)w%ogBDnar2NX=2Xzk?nFl>)dMUb$`d|SdtlD82I93%~
zH^)Y2B0xM1stkf!g1!UZ%fxo0M08kFmXLMc`%;V<Gh@TyN#Xx1QK>z$P2wm~!6G4{
z`S?(Jdn-KadNMRNUm>@a&>ZKUIn*7KI&n-Le7$}V#5{ObsG58S_rF~zr*iiN#>zpb
znzwp=dV^k%WKI1|pk|---?%{VqhWXBN(imf>Z@l;LV;2BFA0T2sH~6eBr`(eRoUzb
zD9Umg5^|CdozynY3}4LrmHz@q{VB64|Hb*gBjodhOsAo|(mE6yVNig>n(raz^HBKM
zlEOl$dB5u8;YYi;`_Av|0AlT`#%foQG^hEZ|DtdVhsX8Av)!ukP%1z__jW0DIs*Xo
z6{H@p?RC#M)X^GT#vN_+=#1<bt_wUN>~7Ef<Ygkqq)3MphXL$wzQut*8K?_mOLy_V
z+b}JAMg)rX9{C&HQS`e&hFkrQ7ECi)k)y4L2>9F&vZg&#0QA+teoybca;wl0M+U#g
zpjSX8VBogCfk_FE_!isJniKt>AfGuhdmv;kJfdy$lLrfq_zs^3v{fjQgeN5;8tgfR
zO!rMyi+U)B(~~-lQa($a{)7X2_BaLic)afftccM@09c|7q@*@Iw)6qk_99$}078dd
z2bC*3doZ>L0zkd&>z+xA!{r@-3*c@6876ZKzzaOtSq{vK=7Vv7%`H}*Qp3C2!UY%V
zR##WKYQU;v1}Odi2$Y#or3L$Gdxh-ml<mB6p1*UiZ-sY&=iiP=zju_=plF;yA=#Po
zIg>!iB7<M9I3fWv1lC!NEvnLA2)}>tcibXbBI5U(KHKwd{l*C%%6@7!NwoZJH>LsT
zFMR`Q16xDjcg@gx6^Rqiw3sgTyS}>(Lx14%Hc)+<mr2%sx^b{4S$eZUIU%8>{}|iM
z=ukPPj{RwT?{LxwScd<xZUC1qxJ9eEFr}qR{@G?}<A4ePkYvgTq6@C?`dWPjD&B$&
z7K%C4h=}m2Vk*Zt&H1*DM^Ja<uN9U>DqZnvBOUlmXw?2QiHwfs9RI|Lb~@Dlb;Fw6
z9<IqB%{4V8QLjLAFdhTD+oJ`FgKjX&4SMoMxUGbgrigUZ)Ll!k-79yKlB3ADZ9C(C
z1`0X1G}Hx@I=ipXk0!XP(eU;Jf<d0(U>i`geYH?^av3IQ;NQG8xmUqwH0B0}jK(pd
z1XJ_($U<eES5^2tn^s7h(^nWzjZEm?(Xqnz&R00seYA@I+xCCfc=52Kj%X^}HZ0%o
z(D2qWh5uSeiL^h(SbqA3R_wQ`!J0&T2{q)G)S*UEAgQHtRgp=>B_gE(3%F4qrrOFk
zPEO|Ifq4JBTq17VG(OqHGks0;sOXSz+X2#ZofG5!VR&=9b|hCzN)@&`sGl~y>2`a&
zg1blFHF_oo&<}Ft;6S^ym1IjZh2pjSpP<_~oto+{KY`jIdd8=2m^F<)K-8z+>iOhe
ze~3Yoqh7*c>@vRDA-oDmkJIjLOqg-~(G9PU`tue7VeeKTfOu+}P@O6pTL*#C85q&l
z?>KacntO$3c=90oSK?v{kex|?kULObA^$TgQ==EpXpU849rMTfUxERK{A_-DZ*Fb$
zuH2Ldk5jqqqDOV&#-Cy4sPU&@3zCJDfuOaQ8#w<Ig3|@K^66>UO3M=z#58UNx_h5;
zj(<Lon?G!TqfeEI3G)`NWTlJ1$%IU;!(+$&X#;S*trsv47TM?4go~{yExWdYU}*gS
zMuJQy2?Y{v$*~s?G-4Z+B5i*e2Q;GIr;}jIUUq1zJtK<#f0akLY&&!3O2MdtD3@?2
z&F4E$Fy7AP&I6)WiIaIKM~H0@Av9)P{S1CfhCu{IC@6@Mr{_EvRJf<)`32MAkr9iR
zdbvzkdn!WyH$G2T3HfYs;;Rf3R*Jf@-=9Z&88jv2t<@9^a<Dy1rNJc8rYr>m!tc$7
zFa?By`e;ELh`DpNt0OV8xj-M%sqjdWN_xloL}M~BlcbUuAzmM}-#R@e6G<qIFaS3I
zdKUb92@I(M>as-$ajKOi<Nkq;!@45_U*|oKk&yp$Y75Y_4F*)-mNpz%jwfm0y<C6W
z76g*oS~jlRUVw()lGAhpB;DMS+|bHus_IO+rpc<Fjuh-mb%&gd63Xd+(7ay{h*DjH
zl^sA%Ah9U)YCIxtQw~=cDU^Y^Gkzr03jyb5o8n@S4$c4iVUvC8rBE`k`Mz1-zlfA*
zGdHy{QZoMwC7VEA+ZH$U4k=zO4$@DXx;R;x0gwobyR>|P!5I5rZEnldu{cDg&v2zY
z-f6AM#bQSn;}$Yv$r<cn;K9`lWtSU@5Ks|M8*TaU@aFMZP4qiC5ci14i<i1<hLfrK
zj0$e-!QlvJR9c;RpK!u>Txd&O0%0z|JIvl{@W*KPwzMXJaV#_e3brZP<DQ02UCBb(
zgrmA1=W{5@O=wdvcZvT@K)g-;%bXUexBe^WCH-IH%X6O@c^7%triS#ed<j9W4PA3l
zmz6IuuV^UUI9bKq1^x?P$ol&0C)-7w`#l!c60hA4^bnUWA4)K=(;@q@jr8J@wmgIR
z)M+ePFD%cv>PPK<>k!Y-mpgkEWLwutWW~zi?+=qB**UB~jHRIQ8MrNb_PU!cJi>V3
zxZ|S%i<~5#JXs@gVnaBq5{=SG*?2D{=2?u>ueLqAs~BJlbh>u#c2v4=DUOkS!T0EQ
zD=7R|TLWSw^lh+L_~vLV1`du99gTQ?;bW(fIZE7lSz9Nrt(DNpLFjTf9B9agp3Jpo
z;J@B47t?aJ1QC#b3I5d@gWQt&@dM5GhOBVx-{8=Y&5khOcrb6ubY}1R^TQ#vLUuH_
zZ1TUZ`ZOwoESbwap%gb3=2BBsWBdT+uHIA^!?REoVAg8dJD?;~+jH~2Ppqm4XtnN%
z4gI8*Aq5oKIkMuJ)c#K;i9`~B!2?_z<rG&6n0|O(esOM%`%Gric3qv!5OVtjZM{31
zDF#wDybn=)E<ALJ+hW4!=Pn%~NQ==JFsvCAxTxJXtfDJ-akKZ}Qqf2uPdvY<>lv#O
zex@wHb36QQDz)YXFycq;?^kxRL|oN>Hgz9qGth)p+>#He`qNN2Yc*NZ!Y2cIAz#PQ
zpdy~(PnAYbng99!={Tvmts!7C7<iEUl*$UKt@ME6m9&>6pp)KPk%n8%+sTOPd0Rv(
zSvM7h%FS&EV09Yhl>*{<YWeWj5<f75m>CtEH|e9ck~F^_!096b7rBP3OZEiTl%EgO
z*<ni))*wCZx~~OW+nR;D#gPd)6lwi=A0n2QV?=RjXN~eWgC}Tk_cM72XVWmpA?wQO
zBx&T!-PW1QnE|6p8Nr(`mS>@N$KIjMSyX5g)TN~xuQl&=%}uxB!E$NdK>(qCRavjP
zg8fJ;Djv7UzwJN_w5Ks6!Wq#qB1qc&b*aqS67Zz}`zP=?<9`NBp62#B!zJdW1;y9B
zm;_pl@Rl;zP@c=~ixsn2u7>m8W6h2*y5igC2X(|yC27V~h<3aRES%H9YkiCmNG^^J
zw?*Ti@j!ciNcwZXUY6Y}oAqux%4f0gCw%X={@A!NOc}^O((zotJ8Jf4jsHmh;nMTL
z&)0wlFFiQ5s(aeNZT(&V)4R6VrrZ<?<b!*QljF&33_A?bp^4Z`Z&`G`vYDLwyS7n_
zJBghOx+Fu6y3obloDaa{HC<e+SE|UD^xx;M*bU&5ET2}W=<8ENf`$qXSU<v;m!G)P
zdejNkM6_mys}RT<gJPlrMl(^c_Q&DkMUJo5WDY2yV>3a9K54{L;MumZiIW5#)4|v?
z^P7v<N~Tq{_i8ff_#!kINs@<8E@sn#+~v^V2qfr!K+T1JJm-Bo@)01LwOeshf4YjP
zt%3Ib*LqZO1{z#*vopmXwst6lPxqofF#T@bCezn9@WNEW*_q;UdIFtp5g@Z&J>x5f
zC-!r6$UFXqki99ryqL3l<fr;{BCaz{hmLsj+Bew*-`dt=!OHcvT-f{Y7!QGg4p;V%
zcYcu>jj-TaXJ~!Cau8_-CIMc(y7Kyws!?<Tz1FG9kxGug^!W+Y4$Tf-gAV@y!}=n!
zK9Plfk)BhFCqJp2r<mlIcBE>vt<InjOA5mH25a)}s^Q|K`Wi8oijyqDfz_hI?FMrs
z1jIkL@LBy-!^mqa&=O!8fw1ODoa%`no5z-sBtt&j>y8zqczxlV-kSQx5T!cDpZ#OS
zFve$4h-pmCr?gHq7u~Jm_v4RDwyDnD>3sU>HealcpHsoNaHPqk@k&X;mSWWGnFgaE
zAZY17MTJyK4@!ZC`vL)Q?>9@&OJ?WZ=U&&fKdw5gI}}E^m%5TQs)v9XFn&HTZ&OIX
z!v&9pbv(!NFX^>CO43X0T<UHK&^+C@GsBPUDV;p1##^_w2bxxu=U>iKHH3xDlani^
zr>F}=Yg^j+T96)rIPjXOXA8*uZrPk~j}F>2H$B*a=U;R2|AWmJ|K@)E-k+6Vh4A3H
znARSt_6wre7q#AU<;D<8P?Gf-@mrwUIl&32bWOEHV`o}H9x2N0K;3Zs0L7W7sNlKD
zUG3d>1pG#*tfnmYk^#v^K)9%~TtaNjE?~-bbJD?kjT{$n*&JV3co_OwQ5o0PVlX_q
z`#7&6x3u7jUtaZnJrZ%%B)r_ELUw%TD9Bm~wk%dk-!GfQNMr+RW+PZZ-qTo?uD<j8
zs**#t=Ah6`k}qhG^o)$>6-~$=iIl%i6HGbMSTYj>v?)}TG=(`ue(z~sw~a5p?%E}E
z?L}U<%xcUuGZt@|dxypcLE@!7LI-~0g+00^F@dAsw1RCy9gbU-$iMzc-2bK_d0&kN
zU!!amTM$}W5-L9lOv-;5RaJ=MefI~zHV>b*q{WZ}rej9N&=d=b9?fan5Qd_jW;$K;
z17N&))iELXm#+8r&4q30cpK~13N$z`-db@a+`wYSx(Uw<y-IJ{DYcY1WwZrKGjc*j
z#AF8cELM)G;HjR{{G`KRdBPZD_@kRD=c>xSvas@C@+a9rxoyW}FF1V_fuj-U((=g)
zzpYG%XCfA*UM`e~=WX?KMOq}bAAhx%#w0$u<SN(2iNyXOJo+H#C+VM`-4uTC>eSG^
zo?V{05bq%H;8K1o0fYrx#~B|VAHjiEOL>S$LkjBZSN~a{#wfaxW6?)#s6c}M7rpFf
z4-}=8fuCI+o-wt1v{#&;-iMBtZD%^wd(zf>qAlD-(O`KN7DTL~;Ek36gk;bPQd+rX
z1;MZ?puP<ystm$53u?j^@v%vu0ZC{=DwMgf7JtfQ)A~ubm&5j!<qSQqJYApGtL48^
zRyN{wsyo_lJSN?<V_4JN4kKf-8{K^0I-N{6TkhK@blv#TtYj}e-zlH?T@$48g9#f{
zZ*+F#x1LNl9R|qRO;O!1sk;tmL~6dud-N-Hq1|AuSHJ>)2~VLPZ$R>C1s1x|>^J!l
z#1J$&?Od8&9bO(?iYtQer1e-G%RjPc<D35Fl*v_e-?)k9gfD_Rg0!v=Y+kvTSebw?
zfIO^v;Gg?&+7MQ?h>n^-SUeqyR&u{*!XAooTD#f9Jr4K=Jh5Opm{%~pu#DH&GK2sh
zcV|b01o#(Vf3rqglL+kxvLD(VjmhOr_ci$f^{<{r$x1Zf*_TLPr029BXym$S1Px(e
zIGbp$3G{IIB(rS@9?ek7R%Q&EZc_%H`I;B#FPd3lqu_gq4&-I1)mAaT22)F97&S6U
z^(Gk7h}@EBY#H^X?`M<GF}uk;MqV`>eWI<2wcqg0M58mZN%qVpS43qum<6@Ya9=lP
zt{j&3+-~B^1$xc49#>QsmeW#mpNC0gypMPivfWWck6TX2**^z|B(FBw%AT}o9!%3k
zL|Ay;48zUz(s#Q@fK^B~ZwR>h`F(jY*>p%S?cxKg=?Ur#@57?WMv^5(I?LPg%N6H#
zNdK4%lM7@o)L{2O{{%Pk@6Tg6r(!#a<xNDswSe;2y7T~G^j7$%Dpqyml(&aK8$LAx
zlP11N1Fuw}hQ|HT>mToXiZ?7tgz4;4q$J}nZBIh1^DF7ci31UiO(eh$X<vO_s=UKj
zpU)~W6fjmJA-??3SnWCmA}gxB&1jEm59$v499xEe&UxtOm#w(4g&w!&|5SPkHFe+V
zIcxE;NS3?{7)-5bpG#CxV3h29SW?d(Ap+~Hst0f|)#Izn*CH{jyxz03O~1;OBXm*y
z{bd4V`9}0d$d1<Fn%-tVPo~`YU&Zd@1VnoE24<UC+^<~>pwlAk7RmV-1@zBw+aH{1
zS7fWNIO)7Z1!ZV6$Q*aZ^9seOIC*riO-oo=m$LYb__VQgod-wUyqb)gWAB{WQ+ypR
z|BS9x{f!!XWgxjI9C62*_4)faYy3X0&2v$h?0x7_3F%j#EZ_MREb{8rh;8wwN)@p(
z^xLX%-<)a|FME&j5XJ9P^(+jDLwP}C6#8VH1f!vDF9~x`-?U0^D#$t*NM^fjW5r?*
zs=sEg?~>Q;`C%kIIX1fo+sdDE(E{0gsY1i>;F$&EgZ7?AH3c3-ad6~6rHq#*Qm;A6
zR&1G7(+!K*`<%VgN>9<|9xRHi?#5Iu9^On(SrTv#$e>&^m2AT+vdPZOetKFc>4qmH
z6JNMC^nGN$DcM@m!zt^ApZ%ZKt}CjEuI(Zy)i<F80@5T>R5~aar3fSn(vccLkO0yI
z0@6hxii9pI0)jM=5~Z3zq(lM+LKUe30s$kTH>GGOUtV?n|J8T*or_t|Jaf)|o_%Ka
zUTbb<>YOBhvfL^Unwz$hoK-*A6xq6t%ZT)^4;}qj?``+m8Yodzb5EDtE>W`Et={K7
z?LTEU#qe`)Ew7t#7R#e!&LVDI&pP?N-!LZQQlgl+d?3Vm8b)j}Ea+@m@l%?wE|cqU
zQ6jl~mR$HKJ!i`JQQR1?JdnLLXUq(v0=a;38Yb_BXnnLUy~tl~0Ufj-@oSS~19wv8
z80e#&P9Qh)2!kr+Au!G2w69zwefy`w+5m~amC@u;b~^S_%qJ@qd(l&=Dz+j>oYwi!
zmYY}&vRm-uvN6!DTgpGLLv=MZ?H7%H^gWr&^7)v%l+_dWE{T(27W8hq*pNY5q$7t{
zD(XL2!tMfg&{(3}mwmBl?0$H=DloQvw-OW9tWst4(PA*y8u1?Q1CH-fLbfud%sr;?
zMIoz~dRxzAO)O;DZQR+Jlq;<p3#(`?nT&!qoyx1$Pp)=NUJuZ=EmcL7TKX&Pw{1J!
zxT@4aMaPU^+;{NP4U^L`Jr<@Fv6nH5V}JvqVQ()MYU$Ug=@+V@osW@}ncpvXi0W!3
zkZ<H{2-i?KOvQnL*#$S{XJkIv<+0o<n3-PM<2|~>Is){F2x@d$mTif;+qWJlKr(Ig
zgM8VdADFbO#^o_2ivhZ3Q#Px#&c#W=5TaZ7qWkSZB4aK}D`;10vSv~H%SLayc80l4
z7S>czqA~qpc*v5&!z%{z5xOjTv-)L0)Y>YiH{hS7>Vtf^C^!yyJ@~?f$k3VixJ3eC
zW(F!zK_H|wcpg|)_V^u4X}<qZqRR;FvD-*D%(JVqh2kVQJbBW+O#!K(FchJgr}aom
zk`NSXGwnuH3!S$t`9~x#dGxU8Tcf~JslNRuGH!RsqC+(1ADc!<YiB&5KMII_K&Lj4
z_?LKwb59Ubs2J)w^!=Hkjj&j$#(rMEb!oF{kl!@O3?*D<0{O;u(){>xTfg(ubC2e@
z*nK*3T~4pSGziLmen#v*_efj5<LfQte@6Dvx0Jg}Q<g_Z((>84MqZ33UPsO&&Fdy)
z*gn?QmYtCYEyAxP^OG;7xt{P1UCO>TVr-2N#P)bs;n2aa916y;R$kVnb&JOPQ5<*o
zE@~d_8<vjSYI4t;M12(ha*uj<%eV32J;M5G$2%|F#6w%2TN+-1+_;UaxZ+|@vm_3n
z5K!5!QbOlw5B2ddP0;R9!!&!~YJJR)$tLii;AP%!M5H&iGyYCiL78`-CN~5AeewK2
zx;o9^<UEvVAzo85f)(mq8BZl~Z;3UrM(k(I&3zIkLvklQavgmVd{3CyX306_NslT}
zH4}HrHyjSkqHDABm)$)hy>5rBt52U7yz`j;i{hKt@tw{vPmwes(jG8gz%J)?z&{*8
z(Z^E~&yjRcj)N$^c8A+Jw<YMkwKu=tJhG>lIqvrov>+q9c$MZeK=ZlUJ;(!kF2lVE
zAzzBue`#1?S!53!j*XC_JdRk$Wq9Sq%SjLyp9Ba<=N#tJj9J|w5d$tvORqUePC@rj
zxnY%yqCK_R?X3c-<CVmw*X*8>EPl2_Q`~jB+`DhElcT8an6*rS>8-A5)vqOy=A1fu
zk<`xD^?v-RrP|-G+;!N_XF32vUAQq=iA)|BARoIGyGT<Rxeymt7<HDhnIJgyt>5*L
zmxgh!qFRSXN8GX&IF0Td9;=t7Crvo0G5*Dyh_kV{$oKRoM0>UpTJg0|VgJ=|`0jkp
z$|jpphPkhJz7)Eo)A=9zyP{9U5PdRVC&XI3V2M+ZYuF}hsv^hl{nzd$Tiyb^&^BLi
z*iW9zCnXcN_}lTFQlj~a<rB7-c+Ot$$`E-`rs}|bUgyax7ZK&*lSA20y%iJivab+N
z0-J)jG<D>jN}T>-9FyE#rI+x-LP}0B@M(upx+7u2Aoh(l#Y)ELNztHD|8b)igG-3s
z$*jceTBZFor_aXcoIGAA(rq{Bw!gOV(a$+;V!SiM@hXdV*T0w4--xc2h2k!}81}zj
z&%=CsXzC{Be5Lx2Uvj0JS&cXv{2FU2+Fl`xI=&Y`2YchwUGW_zc+%}xB<9iNl}Lps
z2y2S%5lW9BoIzNNDt6vzumaY~CkrJ%>-VfJt3$h&t<)MPiU~%%g836x3QT~4{Z=PT
zlh14p;1-KY)40ZUdR)?E<3~dX(zHA7zBrI4iwvqP<VP%;n5te^sQ?&S!(%x&1H8KN
z1*&zXYscQUtAsilJSe{S#AO0eDA4d+06w9PB#B@s?J=or((vd0d-LWgPb(TKKq!aT
zs+<ZUwG)Q?^Gg;7wemy`z;5XTCGY&3&CZ1V<<#71JdsFzk>SyqE#3HJ(Gz+>LtA@q
zK~zCBI#JU@X)335vzNGSs0|GYEZpsX7q*JOd+_gtRnwI1XW~)=wmOc+IRj&w&QITA
zANk$1xd@6lZRX{M+ihXajVYhkPA`&O{0g?E<JIG$5uXbZUVi>&!SP!A7^)&_xkOYe
zRcN+v^1kvLtvG<V7DLrXxp(TSZRD95M@B-Bry_@^T#UH*_;_nn$%R?iPTE~bQcrc|
zOYAnwHVcL_FcflOL-ekl0W3G=m&v5J!}2Fg<g(>R^RZcroCD`KTi-u=r#}2<5n40$
z@&^7g@+=hh3@oC1#8C;7fiYwyh=&D`RDrCa50-%}y5|QrhuehT>OV7R%Pe&Oh#+m<
zdUW)z4i#g*aB0C*%9r-AzZx;Ss%to`4(A<_9V|xdkSv#emoT!It4j)LGN=%jq$-au
zW-4uAJ7!h+^LCv0^67wa?y0TZAPa=`n=McK<xg`7V~TM?)xZ0BDeqk;hWNm2aY!Or
zaD-S4#A>t&Ba>8_ZQKD5-&guKVxG#jxt_f!kd_Un!TF#NNGROtV_-Z5z!wgK0TL+R
zAsZc~J_s&86r9!$*;ut4sj-68=)oj-)4(vA#ZGgdJ#X3+@VzqtL(zmu3kI7S^QAH1
zZn&U@a6S><v;$CDM$2YDa4s>P;K6GTfS;;>f=Z6Eb`|)6w~wNCW-T1mIfwlLWRGZB
zh|-!2wW-;UY5=LZFou0r&UFW26UzMYRvyN3nI-Qq0PghR^q%4xn@tn<XJZp0fbX5~
zjyAO%TOUVl+wdcL?dC;igO?h3!t8C7IV1peNcmZj!_N#X6Py5KmHW+srQ>An`^Sk;
zP{UF5<_m*rk^DpcY$>G+mNw;VQ-}TH3B~S~LsB*qlC?aCb%nu~uMKTrC?V907)D`*
z`zu%xFpPt=WMAs-WK&XD?hV&Eu1HcxsWq2j79AdAbuOuVw=cOCUHM?SlBF(pq_DT?
z^KZs7vrtg>AI9|GjJuhPy#Qpd=n&O4MEe$(>yP`|#;}k>jA*%+<!+<<>^p4v;=xj{
z1GrS!cadR6Em4ri$@Qh*EQX#mJF`}@8Z_gu`zm8tTi>@>K<x4jm!PNQ95yhFJ?2|q
zxKNi^{Y5oSg}8dFqw1V9*#_0!$L*(>dwvM96mJOF(WG9*o)-w#zIg)jo;3-(pw7Ib
zS0n<&1M09VY*#+eHlD5hqFv)e2a7%uOiP7b8(N43I^s_u8={%uU?zCqZ*ce*HBQ&O
z{|6s4a=F~TwH4s3fWesa8amqz`CoSWsMaLDW5WkE3lJeI$5TcM6}{kQEOOStTSq6@
z4oeAgtg@$+Eo5RfqW#%)B~hmGXCaVlhgvp0KWsKyR`(vS5=$_244TuYFr$Ujsy)Xs
zp#bvP`!3>iiyU@O^FtYl6dB0l;{>BM62=E1#K(vBxd$v;aAsBxEL)&k2RC3>AUoUp
zY1p>e?pQ2<tumgHK3yRibD!@Sj=z$1B+*cIcmG{l=9j_oX+3I6RTRhC1g8%a?3%6T
z0tKZD)B9o{F{8n=Gtc#SSH%)g?4<L(QvRl0&^T-SDM%<Qkgt0t@I6*X66Mgf8A5&8
zY!0W5229ayHW_@wxa+%0YnyA}QA;Y4Z{)24_E!-`i)BeJ^XEeQn;D}Y^hC@ZHo_rc
zP?|uq<e__qc3)$5W!JdZd_LLiw2`%#5A4Y+)m;SyccyO0(e4(vT{HBmh(U&EW2xjn
z+HHd=nq{0j_FL4*?jt?yo_`fSAmbbD@mJtZE&nww%g?{#&lm3BP8^5PI4p<VlN@ya
zH5$veVrnJGfu(E;h9W_|)~tiRxv?iqaIkSs!Kom->u*5alH#9#d*nc2_ms7QY{2sm
zyTzcf`$ivRM3BQGe+|n-PBcHuFy8PPm1{kiDN{ruK^ob89{G3iJPqy??J#AHmxdg$
zf)taOWvj&0+F(6_l-o5%`X-v5afzyJFmfP8Sm11z6x(N(o<H`5tbh1V-(PaY#-F97
w{*UyMz?naLku=Y*aZ&wGK2zxi+y5ZFet<bsVuZ?cW%+j)!OdV5`Y!1I02kcm`~Uy|

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 9ddf0cbf46971..28b220110fc49 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -971,7 +971,10 @@ in the following figure,
    :scale: 100%
 
    PDF of a random variable Y following Gamma, Tweedie (power=1.5) and Gamma
-   distributions with different mean values (:math:`\mu`).
+   distributions with different mean values (:math:`\mu`). Observe the point
+   mass at :math:`Y=0` for the Poisson distribution and the Tweedie (power=1.5)
+   distribution, but not for the Gamma distribution which has a strictly
+   positive target domain.
 
 The choice of the distribution depends on the problem at hand:
 

From 83c2ba69e50fc6d7f9313581e7359ec40fcfb722 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 29 Feb 2020 18:07:26 +0100
Subject: [PATCH 248/269] TST remove futile arguments

---
 sklearn/linear_model/_glm/tests/test_glm.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 1d1d0c55ceeae..a0e9b3830703d 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -42,7 +42,7 @@ def test_sample_weights_validation():
     X = [[1]]
     y = [1]
     weights = 0
-    glm = GeneralizedLinearRegressor(fit_intercept=False)
+    glm = GeneralizedLinearRegressor()
 
     # Positive weights are accepted
     glm.fit(X, y, sample_weight=1)
@@ -71,8 +71,7 @@ def test_glm_family_argument(name, instance):
     glm = GeneralizedLinearRegressor(family=name, alpha=0).fit(X, y)
     assert isinstance(glm._family_instance, instance.__class__)
 
-    glm = GeneralizedLinearRegressor(family='not a family',
-                                     fit_intercept=False)
+    glm = GeneralizedLinearRegressor(family='not a family')
     with pytest.raises(ValueError, match="family must be"):
         glm.fit(X, y)
 

From 7498f3eb15b789c4f1266459c7b85d075af0c7c4 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 29 Feb 2020 18:08:57 +0100
Subject: [PATCH 249/269] TST increase rtol

---
 sklearn/linear_model/_glm/tests/test_glm.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index a0e9b3830703d..99fe5e8bc420e 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -197,12 +197,12 @@ def test_glm_sample_weight_consistentcy(fit_intercept, alpha, family):
     # sample_weight=np.ones(..) should be equivalent to sample_weight=None
     sample_weight = np.ones(y.shape)
     glm.fit(X, y, sample_weight=sample_weight)
-    assert_allclose(glm.coef_, coef, rtol=1e-6)
+    assert_allclose(glm.coef_, coef, rtol=1e-12)
 
     # sample_weight are normalized to 1 so, scaling them has no effect
     sample_weight = 2*np.ones(y.shape)
     glm.fit(X, y, sample_weight=sample_weight)
-    assert_allclose(glm.coef_, coef, rtol=1e-6)
+    assert_allclose(glm.coef_, coef, rtol=1e-12)
 
     # setting one element of sample_weight to 0 is equivalent to removing
     # the correspoding sample
@@ -211,7 +211,7 @@ def test_glm_sample_weight_consistentcy(fit_intercept, alpha, family):
     glm.fit(X, y, sample_weight=sample_weight)
     coef1 = glm.coef_.copy()
     glm.fit(X[:-1], y[:-1])
-    assert_allclose(glm.coef_, coef1, rtol=1e-6)
+    assert_allclose(glm.coef_, coef1, rtol=1e-12)
 
     # check that multiplying sample_weight by 2 is equivalent
     # to repeating correspoding samples twice

From 90b1213673abff49672e3f0cfbdc987002b48c08 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 29 Feb 2020 18:22:08 +0100
Subject: [PATCH 250/269] TST add fit_intercept to test_glm_identity_regression

---
 sklearn/linear_model/_glm/tests/test_glm.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index 99fe5e8bc420e..f75f9f8a180ae 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -167,15 +167,21 @@ def test_glm_warm_start_argument(warm_start):
         glm.fit(X, y)
 
 
-def test_glm_identity_regression():
+@pytest.mark.parametrize('fit_intercept', [False, True])
+def test_glm_identity_regression(fit_intercept):
     """Test GLM regression with identity link on a simple dataset."""
     coef = [1., 2.]
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.dot(X, coef)
     glm = GeneralizedLinearRegressor(alpha=0, family='normal', link='identity',
-                                     fit_intercept=False)
-    glm.fit(X, y)
-    assert_allclose(glm.coef_, coef, rtol=1e-6)
+                                     fit_intercept=fit_intercept, tol=1e-12)
+    if fit_intercept:
+        glm.fit(X[:, 1:], y)
+        assert_allclose(glm.coef_, coef[1:], rtol=1e-10)
+        assert_allclose(glm.intercept_, coef[0], rtol=1e-10)
+    else:
+        glm.fit(X, y)
+        assert_allclose(glm.coef_, coef, rtol=1e-12)
 
 
 @pytest.mark.parametrize('fit_intercept', [False, True])

From 697bda293e6782cb68dce9bbddc0efe5d9066197 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 29 Feb 2020 18:47:42 +0100
Subject: [PATCH 251/269] TST ignore one specific ConvergenceWarning

---
 sklearn/linear_model/_glm/tests/test_glm.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index f75f9f8a180ae..b1583e2f3a242 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -5,6 +5,7 @@
 import numpy as np
 from numpy.testing import assert_allclose
 import pytest
+import warnings
 
 from sklearn.datasets import make_regression
 from sklearn.linear_model._glm import GeneralizedLinearRegressor
@@ -272,7 +273,11 @@ def test_warm_start(fit_intercept):
         fit_intercept=fit_intercept,
         max_iter=1
     )
-    glm2.fit(X, y)
+    # As we intentionally set max_iter=1, L-BFGS-B will issue a
+    # ConvergenceWarning which we here simply ignore.
+    with warnings.catch_warnings():
+        warnings.filterwarnings('ignore', category=ConvergenceWarning)
+        glm2.fit(X, y)
     assert glm1.score(X, y) > glm2.score(X, y)
     glm2.set_params(max_iter=1000)
     glm2.fit(X, y)

From 578408c855be085e45ac1458fca5f831c5b6c768 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 29 Feb 2020 18:53:38 +0100
Subject: [PATCH 252/269] TST add fit_intercept to test_glm_log_regression

---
 sklearn/linear_model/_glm/tests/test_glm.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index b1583e2f3a242..ece8f09c76acd 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -237,21 +237,27 @@ def test_glm_sample_weight_consistentcy(fit_intercept, alpha, family):
     assert_allclose(glm1.coef_, glm2.coef_)
 
 
+@pytest.mark.parametrize('fit_intercept', [True, False])
 @pytest.mark.parametrize(
     'family',
     [NormalDistribution(), PoissonDistribution(),
      GammaDistribution(), InverseGaussianDistribution(),
      TweedieDistribution(power=1.5), TweedieDistribution(power=4.5)])
-def test_glm_log_regression(family):
+def test_glm_log_regression(fit_intercept, family):
     """Test GLM regression with log link on a simple dataset."""
     coef = [0.2, -0.1]
     X = np.array([[1, 1, 1, 1, 1], [0, 1, 2, 3, 4]]).T
     y = np.exp(np.dot(X, coef))
     glm = GeneralizedLinearRegressor(
-                alpha=0, family=family, link='log', fit_intercept=False,
-                tol=1e-6)
-    res = glm.fit(X, y)
-    assert_allclose(res.coef_, coef, rtol=5e-6)
+                alpha=0, family=family, link='log',
+                fit_intercept=fit_intercept, tol=1e-7)
+    if fit_intercept:
+        res = glm.fit(X[:, 1:], y)
+        assert_allclose(res.coef_, coef[1:], rtol=1e-6)
+        assert_allclose(res.intercept_, coef[0], rtol=1e-6)
+    else:
+        res = glm.fit(X, y)
+        assert_allclose(res.coef_, coef, rtol=2e-6)
 
 
 @pytest.mark.parametrize('fit_intercept', [True, False])

From 266891012997c24e560119ae7d024477def5c60b Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 29 Feb 2020 20:48:18 +0100
Subject: [PATCH 253/269] EXA comment on penalty strenght GLM vs Ridge

---
 .../plot_poisson_regression_non_normal_loss.py        | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index d923afbc70891..d47c1579c70da 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -113,7 +113,7 @@ def load_mtpl2(n_samples=100000):
 # events occurring with a constant rate in a given time interval
 # (``Exposure``, in units of years). Here we model the frequency
 # ``y = ClaimNb / Exposure``, which is still a (scaled) Poisson distribution,
-# and use ``Exposure`` as `sample_weight`.
+# and use ``Exposure`` as ``sample_weight``.
 
 df["Frequency"] = df["ClaimNb"] / df["Exposure"]
 
@@ -201,7 +201,10 @@ def score_estimator(estimator, df_test):
 
 ##############################################################################
 #
-# Next we fit the Poisson regressor on the target variable,
+# Next we fit the Poisson regressor on the target variable. We set the
+# regularization strength ``alpha`` to 1 over number of samples in oder to
+# mimic the Ridge regressor whose L2 penalty term scales differently with the
+# number of samples.
 
 poisson = make_pipeline(
     linear_model_preprocessor,
@@ -302,8 +305,8 @@ def score_estimator(estimator, df_test):
 # ``Ridge`` and ``RandomForestRegressor`` estimators.
 #
 # To ensure that estimators yield reasonable predictions for different
-# policyholder types, we can bin test samples according to `y_pred` returned
-# by each model. Then for each bin, we compare the mean predicted `y_pred`,
+# policyholder types, we can bin test samples according to ``y_pred`` returned
+# by each model. Then for each bin, we compare the mean predicted ``y_pred``,
 # with the mean observed target:
 
 
From d1c3dc9405c0b19b778d5a4a56058dbfd994db4f Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 29 Feb 2020 20:56:20 +0100
Subject: [PATCH 254/269] EXA fix nitpicks

---
 .../plot_poisson_regression_non_normal_loss.py   | 16 +---------------
 .../plot_tweedie_regression_insurance_claims.py  | 11 +----------
 2 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index d47c1579c70da..78aa0a5f5090c 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -50,7 +50,7 @@ def load_mtpl2(n_samples=100000):
     Parameters
     ----------
     n_samples: int or None, default=100000
-      number of samples to select (for faster run time). If None, the full
+      Number of samples to select (for faster run time). If None, the full
       dataset with 678013 samples is returned.
     """
 
@@ -64,9 +64,7 @@ def load_mtpl2(n_samples=100000):
         return df.iloc[:n_samples]
     return df
 
-
 ##############################################################################
-#
 # Let's load the motor claim dataset. We ignore the severity data for this
 # study for the sake of simplicitly.
 #
@@ -79,7 +77,6 @@ def load_mtpl2(n_samples=100000):
 df["Exposure"] = df["Exposure"].clip(upper=1)
 
 ##############################################################################
-#
 # The remaining columns can be used to predict the frequency of claim events.
 # Those columns are very heterogeneous with a mix of categorical and numeric
 # variables with different scales, possibly very unevenly distributed.
@@ -107,7 +104,6 @@ def load_mtpl2(n_samples=100000):
 )
 
 ##############################################################################
-#
 # The number of claims (``ClaimNb``) is a positive integer that can be modeled
 # as a Poisson distribution. It is then assumed to be the number of discrete
 # events occurring with a constant rate in a given time interval
@@ -129,7 +125,6 @@ def load_mtpl2(n_samples=100000):
               df["Exposure"].sum()))
 
 ##############################################################################
-#
 # It is worth noting that 92 % of policyholders have zero claims, and if we
 # were to convert this problem into a binary classification task, it would be
 # significantly imbalanced.
@@ -179,7 +174,6 @@ def score_estimator(estimator, df_test):
 score_estimator(dummy, df_test)
 
 ##############################################################################
-#
 # We start by modeling the target variable with the least squares linear
 # regression model,
 
@@ -188,7 +182,6 @@ def score_estimator(estimator, df_test):
           ridge__sample_weight=df_train["Exposure"])
 
 ##############################################################################
-#
 # The Poisson deviance cannot be computed on non-positive values predicted by
 # the model. For models that do return a few non-positive predictions
 # (e.g. :class:`linear_model.Ridge`) we ignore the corresponding samples,
@@ -200,7 +193,6 @@ def score_estimator(estimator, df_test):
 score_estimator(ridge, df_test)
 
 ##############################################################################
-#
 # Next we fit the Poisson regressor on the target variable. We set the
 # regularization strength ``alpha`` to 1 over number of samples in oder to
 # mimic the Ridge regressor whose L2 penalty term scales differently with the
@@ -217,7 +209,6 @@ def score_estimator(estimator, df_test):
 score_estimator(poisson, df_test)
 
 ##############################################################################
-#
 # Finally, we will consider a non-linear model, namely a random forest. Random
 # forests do not require the categorical data to be one-hot encoded: instead,
 # we can encode each category label with an arbitrary integer using
@@ -250,7 +241,6 @@ def score_estimator(estimator, df_test):
 
 
 ##############################################################################
-#
 # Like the Ridge regression above, the random forest model minimizes the
 # conditional squared error, too. However, because of a higher predictive
 # power, it also results in a smaller Poisson deviance than the Poisson
@@ -292,7 +282,6 @@ def score_estimator(estimator, df_test):
 plt.tight_layout()
 
 ##############################################################################
-#
 # The experimental data presents a long tail distribution for ``y``. In all
 # models we predict a mean expected value, so we will have necessarily fewer
 # extreme values. Additionally, the normal distribution used in ``Ridge`` and
@@ -374,13 +363,11 @@ def _mean_frequency_by_risk_group(y_true, y_pred, sample_weight=None,
         title=model[-1].__class__.__name__,
         xlabel='Fraction of samples sorted by y_pred',
         ylabel='Mean Frequency (y_pred)'
-
     )
     axi.legend()
 plt.tight_layout()
 
 ##############################################################################
-#
 # The ``Ridge`` regression model can predict very low expected frequencies
 # that do not match the data. It can therefore severly under-estimate the risk
 # for some policyholders.
@@ -444,7 +431,6 @@ def _cumulated_claims(y_true, y_pred, exposure):
 ax.legend(loc="lower right")
 
 ##############################################################################
-#
 # This plot reveals that the random forest model is slightly better at ranking
 # policyholders by risk profiles even if the absolute value of the predicted
 # expected frequencies are less well calibrated than for the linear Poisson
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 98d515a4f9418..4a301f5fb43d2 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -99,7 +99,7 @@ def plot_obs_pred(df, feature, weight, observed, predicted, y_label=None,
         column name of df with the values of weights or exposure
     observed : str
         a column name of df with the observed target
-    predicted : frame
+    predicted : DataFrame
         a dataframe, with the same index as df, with the predicted target
     fill_legend : bool, default=False
         whether to show fill_between legend
@@ -191,7 +191,6 @@ def score_estimator(
 
 
 ##############################################################################
-#
 # Loading datasets, basic feature extraction and target definitions
 # -----------------------------------------------------------------
 #
@@ -278,7 +277,6 @@ def score_estimator(
 print(scores)
 
 ##############################################################################
-#
 # We can visually compare observed and predicted values, aggregated by the
 # drivers age (``DrivAge``), vehicle age (``VehAge``) and the insurance
 # bonus/malus (``BonusMalus``).
@@ -335,7 +333,6 @@ def score_estimator(
 
 
 ##############################################################################
-#
 # According to the observed data, the frequency of accidents is higher for
 # drivers younger than 30 years old, and is positively correlated with the
 # `BonusMalus` variable. Our model is able to mostly correctly model this
@@ -376,7 +373,6 @@ def score_estimator(
 print(scores)
 
 ##############################################################################
-#
 # Here, the scores for the test data call for caution as they are
 # significantly worse than for the training data indicating an overfit despite
 # the strong regularization.
@@ -394,7 +390,6 @@ def score_estimator(
 
 
 ##############################################################################
-#
 # We can visually compare observed and predicted values, aggregated for
 # the drivers age (``DrivAge``).
 
@@ -425,7 +420,6 @@ def score_estimator(
 plt.tight_layout()
 
 ##############################################################################
-#
 # Overall, the drivers age (``DrivAge``) has a weak impact on the claim
 # severity, both in observed and predicted data.
 #
@@ -461,7 +455,6 @@ def score_estimator(
 
 
 ##############################################################################
-#
 # Pure Premium Modeling Using a Single Compound Poisson Gamma Model
 # -----------------------------------------------------------------
 # Instead of taking the product of two independently fit models for frequency
@@ -492,7 +485,6 @@ def score_estimator(
 print(scores)
 
 ##############################################################################
-#
 # In this example, both modeling approaches yield comparable performance
 # metrics.
 #
@@ -523,7 +515,6 @@ def score_estimator(
 print(pd.DataFrame(res).set_index("subset").T)
 
 ##############################################################################
-#
 # Finally, we can compare the two models using a plot of cumulated claims: for
 # each model, the policyholders are ranked from safest to riskiest and the
 # fraction of observed total cumulated claims is plotted on the y axis. This

From a9686f6b7b6edb6ba89f4e0acac99f1a536c0457 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 29 Feb 2020 20:57:46 +0100
Subject: [PATCH 255/269] EXA remove empty line

---
 .../linear_model/plot_tweedie_regression_insurance_claims.py     | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 4a301f5fb43d2..75f6b1f3fb7a1 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -64,7 +64,6 @@ def load_mtpl2(n_samples=100000):
       number of samples to select (for faster run time). Full dataset has
       678013 samples.
     """
-
     # freMTPL2freq dataset from https://www.openml.org/d/41214
     df_freq = fetch_openml(data_id=41214, as_frame=True)['data']
     df_freq['IDpol'] = df_freq['IDpol'].astype(np.int)

From 04e7aca7ef67f75f15c8221e265fe3b4a66cb542 Mon Sep 17 00:00:00 2001
From: Christian Lorentzen <lorentzen.ch@googlemail.com>
Date: Sat, 29 Feb 2020 21:11:36 +0100
Subject: [PATCH 256/269] EXA add blank line after function definition E305

---
 examples/linear_model/plot_poisson_regression_non_normal_loss.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 78aa0a5f5090c..558269fe2d638 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -64,6 +64,7 @@ def load_mtpl2(n_samples=100000):
         return df.iloc[:n_samples]
     return df
 
+
 ##############################################################################
 # Let's load the motor claim dataset. We ignore the severity data for this
 # study for the sake of simplicitly.

From 21a739c6bd5ea072302020a72546214bd26f3db3 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Sat, 29 Feb 2020 16:53:29 -0500
Subject: [PATCH 257/269] Gamma -> Poisson

---
 doc/modules/linear_model.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 28b220110fc49..fc5f254035a53 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -970,7 +970,7 @@ in the following figure,
    :align: center
    :scale: 100%
 
-   PDF of a random variable Y following Gamma, Tweedie (power=1.5) and Gamma
+   PDF of a random variable Y following Poisson, Tweedie (power=1.5) and Gamma
    distributions with different mean values (:math:`\mu`). Observe the point
    mass at :math:`Y=0` for the Poisson distribution and the Tweedie (power=1.5)
    distribution, but not for the Gamma distribution which has a strictly

From 79ada1e8072f4218ab9c78c52492485f5bf2e8ce Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Sat, 29 Feb 2020 17:38:23 -0500
Subject: [PATCH 258/269] Used X @ coef as suggested by Roman

---
 sklearn/linear_model/_glm/glm.py | 34 ++++++++++++++++----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index a8df8931961db..90c87726a369d 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -74,7 +74,7 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
 
     fit_intercept : bool, default=True
         Specifies if a constant (a.k.a. bias or intercept) should be
-        added to the linear predictor (X*coef+intercept).
+        added to the linear predictor (X @ coef + intercept).
 
     family : {'normal', 'poisson', 'gamma', 'inverse-gaussian'} \
             or an ExponentialDispersionModel instance, default='normal'
@@ -84,8 +84,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     link : {'auto', 'identity', 'log'} or an instance of class BaseLink, \
             default='auto'
         The link function of the GLM, i.e. mapping from linear predictor
-        `Xw` to prediction `y_pred`. Option 'auto' sets the link depending
-        on the chosen family as follows:
+        `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets
+        the link depending on the chosen family as follows:
 
         - 'identity' for Normal distribution
         - 'log' for Poisson,  Gamma and Inverse Gaussian distributions
@@ -115,8 +115,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     Attributes
     ----------
     coef_ : array of shape (n_features,)
-        Estimated coefficients for the linear predictor (X*coef_+intercept_) in
-        the GLM.
+        Estimated coefficients for the linear predictor (X @ coef_ +
+        intercept_) in the GLM.
 
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.
@@ -296,7 +296,7 @@ def func(coef, X, y, weights, alpha, family, link):
         return self
 
     def _linear_predictor(self, X):
-        """Compute the linear_predictor = X*coef_ + intercept_.
+        """Compute the linear_predictor = X @ coef_ + intercept_.
 
         Parameters
         ----------
@@ -401,7 +401,7 @@ class PoissonRegressor(GeneralizedLinearRegressor):
 
     fit_intercept : bool, default=True
         Specifies if a constant (a.k.a. bias or intercept) should be
-        added to the linear predictor (X*coef+intercept).
+        added to the linear predictor (X @ coef + intercept).
 
     max_iter : int, default=100
         The maximal number of iterations for the solver.
@@ -422,8 +422,8 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     Attributes
     ----------
     coef_ : array of shape (n_features,)
-        Estimated coefficients for the linear predictor (X*coef_+intercept_) in
-        the GLM.
+        Estimated coefficients for the linear predictor (X @ coef_ +
+        intercept_) in the GLM.
 
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.
@@ -467,7 +467,7 @@ class GammaRegressor(GeneralizedLinearRegressor):
 
     fit_intercept : bool, default=True
         Specifies if a constant (a.k.a. bias or intercept) should be
-        added to the linear predictor (X*coef+intercept).
+        added to the linear predictor (X @ coef + intercept).
 
     max_iter : int, default=100
         The maximal number of iterations for the solver.
@@ -488,8 +488,8 @@ class GammaRegressor(GeneralizedLinearRegressor):
     Attributes
     ----------
     coef_ : array of shape (n_features,)
-        Estimated coefficients for the linear predictor (X*coef_+intercept_) in
-        the GLM.
+        Estimated coefficients for the linear predictor (X * coef_ +
+        intercept_) in the GLM.
 
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.
@@ -556,15 +556,15 @@ class TweedieRegressor(GeneralizedLinearRegressor):
 
     link : {'auto', 'identity', 'log'}, default='auto'
         The link function of the GLM, i.e. mapping from linear predictor
-        `Xw` to prediction `y_pred`. Option 'auto' sets the link depending
-        on the chosen family as follows:
+        `X @ coeff + intercept` to prediction `y_pred`. Option 'auto' sets
+        the link depending on the chosen family as follows:
 
         - 'identity' for Normal distribution
         - 'log' for Poisson,  Gamma and Inverse Gaussian distributions
 
     fit_intercept : bool, default=True
         Specifies if a constant (a.k.a. bias or intercept) should be
-        added to the linear predictor (X*coef+intercept).
+        added to the linear predictor (X @ coef + intercept).
 
     max_iter : int, default=100
         The maximal number of iterations for the solver.
@@ -585,8 +585,8 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     Attributes
     ----------
     coef_ : array of shape (n_features,)
-        Estimated coefficients for the linear predictor (X*coef_+intercept_)
-        in the GLM.
+        Estimated coefficients for the linear predictor (X @ coef_ +
+        intercept_) in the GLM.
 
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.

From 39eeb44d20afdb8b0c89cd87a537c56383879fe0 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Sat, 29 Feb 2020 17:40:41 -0500
Subject: [PATCH 259/269] minimal addition to clearly separate links in example

---
 .../linear_model/plot_poisson_regression_non_normal_loss.py     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 558269fe2d638..c447204a02eab 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -5,7 +5,7 @@
 
 This example illustrates the use of log-linear Poisson regression
 on the `French Motor Third-Party Liability Claims dataset
-<https://www.openml.org/d/41214>`_ [1]_ and compares
+<https://www.openml.org/d/41214>`_ from [1]_ and compares
 it with models learned with least squared error. The goal is to predict the
 expected number of insurance claims (or frequency) following car accidents for
 a policyholder given historical data over a population of policyholders.

From e3cf69dd221e686570ff1d55fb5b576b52f4acd3 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Sat, 29 Feb 2020 18:05:53 -0500
Subject: [PATCH 260/269] Added a few definitions from the insurance jargon

---
 .../plot_poisson_regression_non_normal_loss.py  | 16 ++++++++++++----
 .../plot_tweedie_regression_insurance_claims.py | 17 ++++++++++++-----
 2 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index c447204a02eab..ee863dd4198ba 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -6,10 +6,18 @@
 This example illustrates the use of log-linear Poisson regression
 on the `French Motor Third-Party Liability Claims dataset
 <https://www.openml.org/d/41214>`_ from [1]_ and compares
-it with models learned with least squared error. The goal is to predict the
-expected number of insurance claims (or frequency) following car accidents for
-a policyholder given historical data over a population of policyholders.
-Available features include driver age, vehicle age, vehicle power, etc.
+it with models learned with least squared error. In this dataset, each sample
+corresponds to an insurance policy, i.e. a contract within an insurance
+company and an individual (policiholder). Available features include driver
+age, vehicle age, vehicle power, etc.
+
+A few definitions: a *claim* is the request made by a policyholder to the
+insurer to compensate for a loss covered by the insurance. The *exposure* is
+the duration of the insurance coverage of a given policy, in years.
+
+Our goal is to predict the expected number of insurance claims (or frequency)
+following car accidents for a policyholder given the historical data over a
+population of policyholders.
 
 .. [1]  A. Noll, R. Salzmann and M.V. Wuthrich, Case Study: French Motor
     Third-Party Liability Claims (November 8, 2018).
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 75f6b1f3fb7a1..6cf7e8c6ae558 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -7,11 +7,18 @@
 the `French Motor Third-Party Liability Claims dataset
 <https://www.openml.org/d/41214>`_, and is inspired by an R tutorial [1]_.
 
-Insurance claims data consist of the number of claims and the total claim
-amount, together with policyholder features such as driver age, vehicle age,
-vehicle power, etc. Often, the final goal is to predict the expected value,
-i.e. the mean, of the total claim amount per exposure unit also referred to as
-the pure premium.
+In this dataset, each sample corresponds to an insurance policy, i.e. a
+contract within an insurance company and an individual (policyholder).
+Available features include driver age, vehicle age, vehicle power, etc.
+
+A few definitions: a *claim* is the request made by a policyholder to the
+insurer to compensate for a loss covered by the insurance. The *claim amount*
+is the amount of money that the insurer must pay. The *exposure* is the
+duration of the insurance coverage of a given policy, in years.
+
+Here our goal goal is to predict the expected
+value, i.e. the mean, of the total claim amount per exposure unit also
+referred to as the pure premium.
 
 There are several possibilities to do that, two of which are:
 

From 56aa0d78cf7df2cf3e74ad626dbd0cda68b116bd Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Sat, 29 Feb 2020 18:30:02 -0500
Subject: [PATCH 261/269] minor comment

---
 .../linear_model/plot_tweedie_regression_insurance_claims.py   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 6cf7e8c6ae558..61faf7c2225fb 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -354,7 +354,8 @@ def score_estimator(
 #
 # - We filter out ``ClaimAmount == 0`` as the Gamma distribution has support
 #   on :math:`(0, \infty)`, not :math:`[0, \infty)`.
-# - We use ``ClaimNb`` as `sample_weight`.
+# - We use ``ClaimNb`` as `sample_weight` to account for policies that contain
+#   more than one claim.
 
 mask_train = df_train["ClaimAmount"] > 0
 mask_test = df_test["ClaimAmount"] > 0

From 27a344c04fefed46e034fe21dc3016475ad4e204 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Sat, 29 Feb 2020 19:07:23 -0500
Subject: [PATCH 262/269] maybe fixed doc

---
 sklearn/linear_model/_glm/glm.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 90c87726a369d..c927114544d80 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -115,8 +115,8 @@ class GeneralizedLinearRegressor(BaseEstimator, RegressorMixin):
     Attributes
     ----------
     coef_ : array of shape (n_features,)
-        Estimated coefficients for the linear predictor (X @ coef_ +
-        intercept_) in the GLM.
+        Estimated coefficients for the linear predictor (`X @ coef_ +
+        intercept_`) in the GLM.
 
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.
@@ -296,7 +296,7 @@ def func(coef, X, y, weights, alpha, family, link):
         return self
 
     def _linear_predictor(self, X):
-        """Compute the linear_predictor = X @ coef_ + intercept_.
+        """Compute the linear_predictor = `X @ coef_ + intercept_`.
 
         Parameters
         ----------
@@ -422,8 +422,8 @@ class PoissonRegressor(GeneralizedLinearRegressor):
     Attributes
     ----------
     coef_ : array of shape (n_features,)
-        Estimated coefficients for the linear predictor (X @ coef_ +
-        intercept_) in the GLM.
+        Estimated coefficients for the linear predictor (`X @ coef_ +
+        intercept_`) in the GLM.
 
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.
@@ -585,8 +585,8 @@ class TweedieRegressor(GeneralizedLinearRegressor):
     Attributes
     ----------
     coef_ : array of shape (n_features,)
-        Estimated coefficients for the linear predictor (X @ coef_ +
-        intercept_) in the GLM.
+        Estimated coefficients for the linear predictor (`X @ coef_ +
+        intercept_`) in the GLM.
 
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.

From e817b2c82ff0e8e1484a59f339a7e148f76ecc04 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Sat, 29 Feb 2020 19:21:48 -0500
Subject: [PATCH 263/269] forgot these

---
 sklearn/linear_model/_glm/glm.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index c927114544d80..46773fd4f90c1 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -488,8 +488,8 @@ class GammaRegressor(GeneralizedLinearRegressor):
     Attributes
     ----------
     coef_ : array of shape (n_features,)
-        Estimated coefficients for the linear predictor (X * coef_ +
-        intercept_) in the GLM.
+        Estimated coefficients for the linear predictor (`X * coef_ +
+        intercept_`) in the GLM.
 
     intercept_ : float
         Intercept (a.k.a. bias) added to linear predictor.

From 0fdc518a186ef0b483ad28ad8fe428b4122d8cc4 Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Sun, 1 Mar 2020 10:39:01 +0100
Subject: [PATCH 264/269] Update comment about read-only family attribute

---
 sklearn/linear_model/_glm/glm.py | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 46773fd4f90c1..4a44e4a1baa58 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -440,10 +440,7 @@ def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,
 
     @property
     def family(self):
-        # We use a property with a setter, since the GLM solver relies
-        # on self.family attribute, but we can't set it in __init__ according
-        # to scikit-learn API constraints. This attribute is made read-only
-        # to disallow changing distribution to other than Poisson.
+        # Make this attribute read-only to avoid mis-uses e.g. in GridSearch.
         return "poisson"
 
     @family.setter
@@ -506,10 +503,7 @@ def __init__(self, *, alpha=1.0, fit_intercept=True, max_iter=100,
 
     @property
     def family(self):
-        # We use a property with a setter, since the GLM solver relies
-        # on self.family attribute, but we can't set it in __init__ according
-        # to scikit-learn API constraints. This attribute is made read-only
-        # to disallow changing distribution to other than Gamma.
+        # Make this attribute read-only to avoid mis-uses e.g. in GridSearch.
         return "gamma"
 
     @family.setter

From 6d4ecb29a84986540da4748cbdd724b419c30c5b Mon Sep 17 00:00:00 2001
From: Roman Yurchak <rth.yurchak@pm.me>
Date: Sun, 1 Mar 2020 10:46:48 +0100
Subject: [PATCH 265/269] Update figures to illustrate that they are not
 defined for Y<0

---
 .../poisson_gamma_tweedie_distributions.png   | Bin 38430 -> 63830 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/doc/modules/glm_data/poisson_gamma_tweedie_distributions.png b/doc/modules/glm_data/poisson_gamma_tweedie_distributions.png
index cfc8fef2ae40c2e422e939ff067b53b94dd08f8f..3b95b724a662389f0547e06049e65c729f1968e8 100644
GIT binary patch
literal 63830
zcmaHS2{@E*+rC}0Wyw;O?8$EIVoKKRMM$<VDtlw!rLq&1C4)k??6PkoA&gP>eT}h>
zkuVrD|7Ys=e((1k|L^_gsDoKN_dVBrE$4Y&*Ar`Eq(guH(s>dR5_&yd%{wF{6jdZ7
zq_*d%fLGpHti1z%P(U^G%+3LikaJEkz;hZeT`MRF38Mq?KWQ<z=rQo3l8=_9&s|T5
zkH5oHXA*Y@A1^mgAGgO3ulPAXg+BK507=S7%7|Zi<m2O|A|>_j_a!}_x=3Asuyc}-
zTp`iZylEBy+nx&!fjMU%?G1T8H5&PBI3oN;;qlG;SKrjXxnpW?+Ic?i*;U<Ko$70n
z*T*aFJ#aqwpjC|txbZyC*<91h3QoSg%rAbA$&yxghV}fNM;lFs)4yk7j1&94zC%8;
z2C12HnR<}+-3eFQz6Vg=up>dJ9h89HvPUXw{_8>Y$)uV50@FW_jaX(&{(l~_#iEfy
z|9TSjj@dGm4WjS^0m}?knXCRS0#XGlRqW^YqvdM{MW+c6P>bT`aZywN(%=U^SE07(
zZnYFsUkJqSsTaekrrA_EWE}P^w8I??x#tQQxLdUw(0leibBytKdsOH}s#8MHwZjQR
zx}%IK$Bk*i4cS_$%qc3+fT6!Qf9lk-O*94SNJw<jzjZ6GXwYr$#;)q5g|@9HL{+Ft
zkk2&s6()ZQF)v-~47rzr#>ysU8p_uSK@?=>s=r=BRLAQ6jDO6Zyg$P2svup<m^Gz1
z!mV>XN?rk!MHX`2^4;=T`v4*>Yw8ln)mXvw31rBB-zr!R@*|flKVs;&HHP2nD?*rZ
z;)-X6l_Mr#5P_Dh9U0-a6PB$FgdBeK>17(SPcl5oMdNg9JSY@MU%-{&zVL2{-#ci9
zOze&uX<S4-p3(bz)61W)yYEJJ!$;t2g5z0aKEh3vP6w<rr_=!p{KGBAJ=Y1WUJkHd
z9Yw;hup>j9@>TBA$RxpuV`)rBWY%j-7q#{zH^ZlPeQ}qwpcPYyVJn?*&VVuH2HEb@
zGihmF%XmTs=mX=-<a0s=tGo4Y7Sw~N%(L^@^O(xe$!Ohi*;+<YM$#R4#}c?Q8{P-k
ziQGcCr9wUQPx82a$6<EuWY<WGNE<||k)VP?$t}rxx8`}orQ7y;HTXqXuqgJ-1yav$
zNfAPUJ<?QMhC$&G{-m6AFerJ@upEf|d^Xe&*_0MJYvBpJWOFlCR|*F0hzs+8>{3W3
zNbr`zpgV$u?v{>2wf87X&;ae>i1xcJUDSj7hrEJFb<z+i`j!rEOJ+e#*h^!ykgMIZ
z+`8z<7m!0q)U4fU6>mzJBSeIZahZi>7mD183p{g$6C4&j*BBm?hlVNEYP=s)p3u9r
zRYTmg@TSJq-c94qA-yFskZLC)%pG$2Ot@_*Yl=GGB5p!t3UO_4n;lX=hq#Y$OV2#h
z@v)Exsg59W<Z6RJuCld;5I<u?eRSp&Bf`zPD*FK97y4>F))q-Qm@Fk<>)TmV`EH7u
zS=kG8*%Old&MGQ%N1>LNjM6(Te&Ptn0;%suFl0^ffr7gcc+r5J5qM>k7J+MKt{10d
z(V<w|1UY2>PJweAbv^2#X_!Qy6>8f$@n}%3srP)ckoR)38z^#eUV$ntGf}=aH_Iyf
zxT45)aC<Yu`3r$@B5MjY=;nwy!e(U-CPA<BKTSZ0izxAmCte$%MLu?iD2SJacw>$w
z60WHZB6w(!2ZoTtyi6Nb7jS+QbjJ_mDqpKOxGm<5?8_hAwzz3<ir7ZMF-kM>s=$Jw
zJs}g7g^Lf4#6YT&okz=Royy9yYGGlD1HPQ(1Bmm8ZOHx#r#!Xq2*?!ml{#{WjM5Y}
zu(nu-QL?a;s|Cy=jAP$nkw9`-nx4YkAY#utM*Ki6d}{}c6kF|}!qx$W;8+PXhb2T|
z29a}qv!bhW*QG}s>RyF*1GNYQ7$;>AY)#VOxho;|6qsI*q%B#{sjW$oT*lNc!tDXs
z%+6bAMLk$@``M7Go>Y7}#U+d)iI$)YfsT%Rtyj1bzOd+poLhp$e-4tX9T^N~4;NE1
zo_NRW=;`yjc`drvt5f;V0^;`_ahNgHFqmbJZ~kdgm00<;jF)G3BknzNmI)>j?g$Zh
zcCzfZKS8I1u-j+->gL#<c_wOE*d5aFZHqY+y1g!M2*vEzc%*Ip%3eUcq^@>nIBZc>
z>d0kk#mixCzE>%2pX7qh=APT^;=UnZ^2ECb9yg5=2c3T6Zp&#=SUbRA<XKt*bBBzt
z)HgcXXHEqVZcn{uP`-e(F^;to#t|?{iv-jS40w+gB!=GDy+W=pQT5X@n*%pXYae#K
ztb?o@&VBQE3?BY&suu=L{#{kRKvqQNzuZ!o*zF%&LZ*;4Wx3c9sRl2HliKsQH%WKm
z>9NL(YVe`ROZJj@BlI5DgXRa4W}G17#bJ34H;w?JGq{Cz6vw@M@ur(UiR#`Qbbh95
z@5Myhg@ya>l$g5rSk}ckcrs?tefQi$tNtGz>ObZ}3n^wAFN4#1PHg>J=m=pFG~;ay
z;dx+iqiRaA=EL}N^F>9G{uDB9Wa<pFt`LE2C^*i_nFs&uzyrS2QH(<(j5yZQ-1w(@
zlqVb@Vv+lKaV|?k3k(4w`0JEjl9&cecU$HlNltfop=YPaa3xuoCnQ}7#lx6(nMd3r
ziSWRa@$e|ro(^+h-^8`p5dqpmttdt&lY;esS|YY*;2mDSTP<V3#)C4onaCyqwX$F;
z3^gtgr;SUD*`*o^u4T2W6sNwo!Xo8D3c{e;oo`U$aXdV`SL<n)>#)0y;LZkdRSq|i
zoRGy>`<V`MbqaBL5ELn-YHP+{t(7rluqcHMI%C5TzWKsIkV6mD?7IwebkD>@{GKmd
z1E)1O_IumrCYO}|{)r(LeMNuo-~>!6k~vl8CAPS@%%4J=;D?%xq)5~6kqdcvZ3yAF
zL{MyBfo=VoS3#)^y3ym$*i*iSoSOu)FJG*$B!v-480qM8rMx;L><MHyVP6f)hdY|C
zTwxJdpg3IeWFycJ=FY-N7Zhtdlu<dcRs^b$>;N(R?2#5u`4bEHaE>|T?BsC>_epuN
zZzZ{Hx81;J@^ligLhUOkx*~^R7%`vRkrpHo0{|MtOz?fFDlR*eB<(nFVu!X0rCmcP
zwVhNVYQ4_Pt?eXy&xW!ixl$*1^R3MJ0Imh*gM~=3h};aTPf&CY&v?8@$|OHxn=B-o
zz=roUeNMQ1mYkWr1e@Wi?eD^NjL^e5;i!l?uC|x2&7jDQ4?2~MIK6Sw-7dSB1nLTk
z%{@EaWX7~phpJg+)2*~cW9F;~6B7j)OY}(ReoE*5R&dPHuze;d*m6yvQ+6Rnlpx=}
ztRz#4AeE4DE}~wg4GIL~MJ>@Qrq12icu8s)uX&hMmGb8C4wm)}jHjMZJ-MUVR<9i3
z?x9#YcNw*N$oz`k96$CR*Cn&tn@UkXb=S?9+_*O|7jI!^#iQOA$icz0PcUIjh`WE-
zb@MoS_>A-Wt>h0U@Nhogxz@wM#p{+F*9q4!Dq#jGLN~1>@dJy)#Roa@P}@G5e%qvL
zEy_v@h6z)<jC*z-V$V<(RJjLI+7hp6uk*eM=P$+Gdk^f*DddZzDMB3?W5I(`!`|`=
z>D78`*gL^FC^IllHjtmw!GJ4F-yw+msop&9>#W{MRNw7xWec5Uq;g(JyhclEytK8%
zbaRUO+3I{IT%jetT-x$*OJ?`3fHYsJn}*4uli5^VXLAR&cqy*8bD_03{b*|feyFs%
zv=T(i!Df!{dym_fAqYrD_HOmz=YnF-)NwAGvbaeo;o>$beNLR9AClhnIalQOcgHGl
z#PH(&Z(Z1g_Hw21m*l+SiFrR{r~h3)c$2BK{<E5v{QjpH$4Or@yZ9+P0h<oqT;4Lm
zd7G^s>SqVM$+1h7nl`+@FKn^Edno2}<*}Eg1oHYN=^n<-8K>{!uvxPMK~wl$#|9SY
z>}AK5<e(i~aZOQFjh3nN@lBlX(os_Wgft7wB*eekP;fH{v>94T-k;ZR)EjPxbvCoY
z5|&UhGdOOOT=dW_1@Pz#lhgwp9QT+zR%%2iVgLqJeKsLQW$S}fW4G-*R$U;(fnyDJ
zAT5Sisd|*P!9@MlrIrHT8hh9)CCQ%L2sGecr1m%5j?%y?C<X2A6Iriw;YEca?Bp#G
z3!3FKx(}nOdEkoS?b4CXEwz2hM^4NGgil-3&qPCt2+NK!d`#>sQds_#d`j=xNMkqr
zEyOQIWz?aLTDSssYG&b<=E<5Bj=fj8a~~R2;}W&dDe~+krE|t^+L1>HxAYf8|6mzc
zxXyu|{X6cRcYEGA?k$0()o0GRUqot$Xwq6Z7DCZQOK=$PR%x#)GIC(gPB#2<ZE`u5
z0vFtDEwkRfr@lBLS*jWb^^m}zC=NqOl;#jgt6Ud@=eGuHFjB~5si*9ChNlg^fn^Wp
zi*zSoi$~FsNB7N+hT(IP*m~XoHiF`=yZ!GUH;i|C+mKB!i&2b=uOogOObuat-8_>U
zZM07Nm5)SGvxCSc9T@aT8Kr)1mNDeoqdE>(u(a%>$gBKnV)S7>t~UE}_!B`KRs&0k
zdT<x(zdx5hu&1a4pYW{amk7`HR6b_LUs^_)u2N&#c$c$JcjJ$dXBt&uZUcGj5i}6C
z4W^4MTQOJ0J4i$_to$XQT?fDDLde$aM-OwpG!$<Rl7_^uI&LJL4Xk5>e<XQ+q}A;C
z-gbH66f!RJ^?QEt(S|K*w!QeQBY|McyxaUJ5V7wP`2%wAl8codsW0Cfr%=)W^6=#n
z@^Nar_g$rXB0nv|lY>HSw&RgaZ2RO=c#>jJEoyJ-_bCio*ZDBKQ!&woE<|~KA9ocK
zp{i2lAzwA3?B5uDrZaO%Wc&<ETb*OJk|o`e@oT>MS7ZlmC?zKPeJ(bfoKY2(qia7`
zWvC}k+wN@n9E#TcUgz*Ibw=-=JO+XwoD;+zc_DX>+BAaCf@5?SS|d6Fy(%)*qKpkf
zIdcu(@y?1Xv4<ZiUH|@DuXAYk$Jvb;$3ZvV_S3IUA+*zt1C-!Tk?!c76-J?_Mrt(S
z=^;?X%88>Yv^Tb(Kg-+J+IV-ar9HCaa?1~dZBN6U<^|=WCsJEl`fDNc%dq(JhWT@0
zMg|X6`_6Y5@8NmEd&6_D^Mq5Yiz^_#?p#aRZ#ux-cMBbNfOs<b;*d@B@Rt;aug`<w
z4a_Yh;If-?T_Sc`1^!FBhkHHgr#iR`tQT)=YYIDKU89Y?J|k)3(^=!x{JFn2^dyw)
zGc2>9W6Kz2YE!6IKU{f<F%jSG@qewJ)!-k+!&FypxwJbnH|D&@?E3@Hxxnv}m0AA9
zf45~w`RE>T!-bMFoKOL>=5gmCko1?`yZ*jY4?vgMSgehGr#^%w88g49NE6)B^7mo;
z61EC!nM3e&*{vfe+v#WRA-lfFa+*{vUJzJ%N2pWwh{TTOq{@G0p^17f@#x1$I?8bw
zHp4v|n;q}T<i~{9*i%Q%j?s7s|4`*R<9qA>@-F`9LLUS<R9}A{EIF2cgA<Q_2*MBS
zb7CpR2Tc8ayUH8VheO3amUErW;@RhnTh2~oxZytnrq|oGHX=eheURnSV6kzXh@+L3
zq0nxG`U<QC+&TFB=(x4zxo)ZT@$Z9>c6RVR$J|Wb8*L3YM>~@v$L>C>1wWU%zd~;h
zKCiU+u&0+fr2&fMrt@geVMEoi!x^W!UB;sK9JfLlb2ZhURVwCO!9R!WsVDcrhMKMG
zd)+B~`_6~7HJ%GcKkA-vw?hk$hb_T^ClNW_{^4QZFr~=K%qg90Xt=HysB}P3=nXs-
zU;hGYcilKCsLlGOe>*P;Si1H#=Hj45%)9<+<)iEBAObxoiyC(sU(rAGY{$+X(&c#H
zLFh9<(2dlHmI|4u-YNY1qw1q;ryQ7!%omvQ8vGALJNK+IdRMhZgvs{<XGjy6Av{{h
z4cu-FHXT@{9`L0B$DVm~`d89)fBv$$+Yue}d#)DyxOahk+Y9MVCS6O9{W`9U-FLHW
zd}|mFtr#%yu6~vo5f^OD5H6YF2zen>Ud60j<ngtEAxOYzt6+Zni%5r2-2VMM)P7LN
zL~1Uz&3SxW#|*EL{Q>H@^-6}5;E9q5HCT}v+~ptL0viepj-POPNZVmNH@3S2Pg1Hp
z=-z?@(gK&yVDi|i=S80mRZ_5|^`xSnTg2P}S9vnk?Z)^YIZBm=d0BcUnt~MsGIbY-
zi!#b~+kMI2GWCcwJ{!6acP-f;Y22WXiOLOEI^pt!#QHEYhUcoMHM_CngMRjQezp3v
zSDw&CQ-&`a=;?mE@g}ZW9H;JUkv`V@m{T|^p$9mOLnW`Y%xrE+Y4F8U)SgH|<5|^U
znGzDqd3kT}1?@9uZOU3YZCb@%M&nzjO6s7FE089s6=RRsd9C%q6}S>EVvS<&;!1L@
z4jahb^VKBHP(7=95~dGySTJXio6M=Wtr9|}dU7_YJ2CYQ&TG7#o#gZ|m<xP)^6k>I
z_mu?pJ%2bp6|SJ^WUm(QS^kSMDM^)GlEBp-uUT-N<7ho_krtvL?B^)s%(&94kifmm
z01}J8bc*uE!Ivg9+j8_rm4wx>^mtsm5>j2`=21%HGc-ypBAMFJ@%coS5t-xdZslCw
zFO0H0YVtwk>U(CRGl$HqB@53kU$KKXWCW{Roqrw<M#kSqVo+z6G~H|m>Y)fkkB<9M
zKu9_k+`WOHvJl%lFlv>_CsF4PsXUhzP2f$e7^3o_6W=#V3Y5~OPz=pWr(KdF;SOtz
zn4ExkeRi#g`HgPDc8=`IfcFE-Cbym^dGvRTE<_$gHFRG}2p-!Cyr!!@Mq;6p{MI>a
zZ6Dpk%OLfdMYTN*>Y*5R13TGVHsjrx`X+w&kTZ!79MDc}d66~RGfiv4S@~<n`0cCp
z8AqZ|xg8-zOV&AR^K;Fz0mYPQzZ<Og?v@Zb5ooxvYOp+Ba%I)KN8@}JU72)k^qyXY
zUkGUYJnlv(#mq4na(E73puiPg4UNecf%Re}65Cni1Z?BMe4!MLYC-Tr$KQ8xT3<sU
z3bf(=C$F)$>lDGXlEwJvxzqKRa=(`>t!GbO3eh^Odw=I~r`?rpv}5ZrS7><AqP6w)
z=-fg@X7;LCL|8oUH!qd;EJwGaIH!lF79lTuJTdC-JtAlG^dX{1OK1MnAVO3JJV#@K
zMmlF{ogL-kINqcX5SpENMwcr`!-%$}f?X7EU<K13--Y-w;&C!u885-?3Ed*K;p8}a
z?1#)Ya&S^_?_SjFqi@Pb{-9H~DkaM!8C?1lOGNUr%k>B2UBY-z3=b|j^NfP<;w%@l
zB5#CtP{TOdosotY1%bn!{MKJrQLw}gU;$Zx<@+Y=)Yr3jEU^pgWF1&F$GFscuUUq}
zi~}{x13G0~JXsvT-wWa9kV;86a3g3;9MOc_L10D(kJ5_(CPw+y7|lhRBU5{dS@Aey
zX-Bszw%)Moh0B6D2(617bXR&2?VQ&a)?u{9x_2&Y@9W{VV-|bMjx#gWShrg!z@2ce
z#U!Pbyligqecrwv(bRGn)RU@B3eRon2_d{BRCr=t69cg_mm8=DcxA!uD`0BHr~ZqT
zw89?+@U}R`_JKu5h@T=J=S5$Y)o}DO21OM@TZejfQco8ix_3>@zl4XuO|f=iabZ9J
zyT9ueB9&2P4u1>ryWHNcz6=W%-|O0XD1ujc$RD=-No)@_I}}X~_eZup2{ZU<{cNi+
z6w9Ny_lv#n>!CT0KKU)=5Q;lWkXr6#w;t|YNilmWnsaE9{NZ?U-K@|>xN4gQ`?%_8
z<@8nuNr*HgGgi~kHLfGLnU3%Rb1qCF0Tr37Qx0MQ&wOXY&8?~6Ui6n`xTljeEQyue
z!^Q3e<rVHTq(Q%C%ChA;H7Q1}lzj=`bAvC)G;-%+VzZ|YZ1W2Jy2C$&%EojEDdnu*
za7G-0qm4)FbUZ;ZyGL_hM}-cHJ7xm0Q)-7xQW0TVqra;=YxqLQo*H`@7?X(f1cQTO
zb{;QoWH|RO_g<S};o2S0G<3PVPw)zbId001Rq_O_r_2f{H8k6%1uA39vU$w?9-16#
zD{jG9VfvfTpgZ-8Fziys>g_%MK}9NndSahXrbobvSNr!+*e>kJ3J;F`5p@XvlP2pu
z^39$esE~T+Q^Cm|S9o#+3rj%qe6+Df-QsJP+}H+QTv;hV$D}&LtpL6pxpxge;O#Lg
z+l0-uJ6e<~dl&WVb(Ev^1(6(#>yO^EEBKdRoI+fSwQ*W-#em?mdy%{&WlZ40(QK@W
zoL|;l_dO2r(}Rp1WY5B)67%&OGDz<qIqb2_D(|`eg-rOuiif?8;+EY@;|qYaV4h6Y
zdo<!A)S4WSERvTdmT=dPS)HkclQ36A8o-2gQ#@_X_E5JDTK}Lk#29k+du1_Xjup0r
z=Yb6s_KF8z535N$!*0b#@6mjaT#$;|ZzAt^9PePyy%3wYw6VU=>J?Un?|qndD(7AC
zVR?r!oZQLX%ODFA^=XJPEh__l`pYzAZX%&FSyEt4MbJ|6={7*t7P=SgRajv&t`S=g
z$2ngDWi`ACzm}7vx>mRExS)u0iwdVt@&#dDC~f6NRJy#|JJ|Kd^wmwe`?1ex=2a->
zr-k$4wZjC|y$^EXqHSlP5Aiu|c(vag8PSyC=ZKAol+U2r*L&<rC}kA>(wk6{@Ch6<
zYIb*obD{ct^9>Ku_9a+LOT(&`rsmfXjz9ei*+)t!+g`?J;kTL{Ij>AC?w%~XM&AH>
z9xs{e;LOBJ-jsNK-##`5fY(4aj``<7_e4|=(BlAEKO=KT4&(w!7p_7cL+YoALU9z(
z_@Mj{7xV4P$7W5zU_fH=LT28#>#M$F!_P14^Rbk2TpE{f-cHxa0hIMTm+JQrR#e`z
z*Vyp7Kb)VHDW%iM7g&!=Yz%0s+%oG2QVodu!5S?1bF?#_7jqLe``FZ(kql7Dcy(`P
z09TADkp%YX2rQ=Q^oX;}(LS3VCsVlH_x^#VrXW+PzNY3+OX5d31)q*^-%z|3*_=_M
z@_rN%2ZI_`i)C1mnq0X1fNJ}4#sw|Smt{+Eqx9pE@1?{);F!9T9I?sFloFZhV<T(H
zVj1d9UZpg5GbV<PX(f_-gW=%?;&)2r7!#PgOZ+9HU^BJz9lVtC+Qa^95x4fxPboY|
zJ>rAZxhC5mj|jhH<jwIRad&se9S?=CsqF?b1_lP|Rf5en8hvM{_^p#e7urJ&a;|eO
zdNn)hk&X#H&RZGxc8PLRLyLdXxiaXhhZ5_^-^)}LRX!m1!cXw8W^G+=bUUV{lR_+I
zZV~uO+!-jRPL7{#tNC&J1OM!{h}*eO^MLEFpMwccI<$iF5e--oMAF~ie^o-lP$i<i
zULh_%o>MKz=iZsX-)&?itHl>f>alP)B&l9@+VXb!@OfNlr<<?G$KylohJ;4Gj!wVz
z(>R)otRrmaWmHP9-*wtYiCHJx^k&=i&RK|Zew{<yM!3Dq96T2%OgZ%F*@P!hJ@W2Z
zF~IC03-YzE2e%0%79?N;tybXlu*H8)PyXj->hPXA`j$=bB<tEL$@l3WdO9u&bXvmv
zsm)gBhA74<7%_%VCP)^}=kQXSy<qnPAg+2$M@uyu(6~`X&At|mv(VI};w5^`)3TRl
zHp|)um=3Pl`H!tvb?r&z8hi_g&h=Sek6i;bI}&k#IVozfa#@R@B)VDLCUeU6_|%l1
ztE+2sSIi?<R{=%E^tCmJii)gQ<g46V<Ew8<PiIU1Y}1>>a)9rV`neYZBv-$wog6)W
zvSn}CY0u56tyw@=HxMEA0;BG`x{75fxaZZ@g62MGDxr$Y%A98E-BV<eWVNocJ{&I4
z`-wsosj~L*YU>-(P;%>ph?QN5ym+3IvWf5hwCWg{zGltKzfDHjN6M(lI#+gOWo3g5
zX@~Eh={C2we|}35c=P5B-MfsFTP`jls{UxFi3%Gtb93hhPd%<a>6a>czj2OhlImiS
zc7OaY=o>h5N?9HS5W(L_iq$3&4LsJpg*$wbN!cB@HJQif=Zzd41y*_z^BWtL=9V;{
zx5%-vu|dqpXzdXhGTXCFuMZFX2IJ(<$*m?soJJr@jph1*rnU^!kMc`=F`o@snK*TC
z6Vn7;#IxB8$(5hTH2{mR^P3bTe_7ElQMKNDzOa?VkljKR)r0$iPtTyZ7a;s)%@0U2
zDci5C8sdSJLlv9S9r%VKYSUw8;mz73&6n!F&idSK6H)qi1tZ6Ng}-=_eJU}h5ohXS
z_`4JdD8IDqTtxZlr4g<~U}^Kjz&#dAtw8*p3Oz^rSlu)ez=)bo5e6KqES67*J=)N*
zH+JZ7<?n{<8bVc?g8Em9FUsC1?Z-uoSRegq<A$<qXaoeX`+U~wk0%;UJpZrJGB5b>
z%@#^N=N~pkH*ZM_8^LrP9QdGXh^20JyKV1dA^kLahz*ZOZphQPytnx8sn&4=q9j~g
z(@dfhK(0-72QCJrlq*YYmO$5y|7__)G|$5yd0OJOtr=sVEI^?rK5NMA0rXr;@Ppma
zyT;DIX{Pi-ht$&ZVm{hsbjGZl?V7IGb7#M(0&$zh#uUK*I0w045M3G4C>^ku;8;#D
zTKU(nUynRIlHRz_w{n6U2ht?;Ja_A7JXYShRyg2s#|GIdZ&wG>?>=~dP&=9(SqW-t
zYBIZbPn&0TbyaWIdU)V%(ou(IVwCwCUb^K9HelmIK-P=^@_*qa#k^M+lEx2>{CGV+
zJk@k)zQxS>3kH3h<y(GLiW;OJaQyDwB_rik*7LN<VTTiCR$axW`Nb6#RQ4OgAH{8&
zKlkpm9!|RgtJORiyXMij^intQD(mtyT1orW{<mrsgtaVBS&vzz^R%?er`xqnt7+E7
zrKPvAN=}|~Z7%<s1-QT=(@9A1h<y7=1~|c%e@^fzR3qV9M4LpFvu6}wm$UaJ<xwSy
z>X~Vw9f##jtSAxuLEz4G9Rj9Yt8Zm!NIz0!oP50l*iF-NoLJB-veA2GvMpGVo15Fr
z%*-dSJTLFrgk5;sj4uk2+0}M5Kk!EME-?fZ=X_XDIl+=!<K>Q6%n}W6d_lSxy+0qY
zp^t!{II#n61$grJb&cM6Hpi`Y?KPOR%G^?n?)kCNQEAWlR!L{q@iK>=1kMzcv$OM?
zw{Ph!>DX<2On)AY7=yEwp=%~KHiKP^V1x_!bc%GQ^0yTG<49k*$cvm`ZHTVV{MYs0
z|I!Fpx#Biv@SOiF@2XVvM7zAA`>Bzcjt(`jwP0)7GQg*x$E)c&IU*x3q(wxco12>@
zE62vhOs%Xky&eeYBsu;1+2=M{#kb_^bYb1$!cn0D{jwzg)J<R&rW8^$+eK<m3o`wJ
z@C>UvbJ$_$vy%fkMMaRuY@_Z}|68%CYDb-PHp^H)KE&Xl^LUw6nPv4eeW^Cq2qe<z
zUYTX*GfG<Kq%9yxxy?4F7>E6Ot;ds6aJPVRJkouUD?^L9lo$n54>1#XAY0y=T(_V6
zBoX`;&fF&~FE39r8;Hslf6(}f{<1=q*OCUweQRrh0ilr>UO>b=2t62s1+Hbne8;V3
z!%lbP<m9Bo@wjiYfU~cp=<qmn$6X|O5Po*>;Ax+bnT16y6lKIFWouw$#9(3LYaY&B
zTJQyPg=u=8BJtFIa{Zh5bHJv$^?#EE$y@dlHp`h&GJU~Mqrj2*qaVx%ZAlawX7!Ag
z+-=$@%xVO}zi}l=wUK~7eLcVS*21<gM=iK<ujg_}dAT!ive);%ve;1hRaREI00}h(
zHPhgUD*aUPaCg2fffMu+id2q9<%Z|TIQIM=@V*`QlkJ8cC09vQl(oa3%@~#wx8wKE
zb`m8q;3I|4PYXU~r;?es@lV+k-8e@rE6cOJNk4G=J-Y9HmHo|R8>)11t1qcS`usz@
zA(&3b>0CN~0bvT7vK?s;;TKC*=Vk!grfgrh?N0*OJG1O6yY@vCX_fihIjx$W={na;
z1^4^SU9SveAGQ(#;X|4nJj7<Irl9rT0D}v^xk!5Hn~k9L==k^@3yUI(wBMB}=6F<O
zIsAv!xh#3w4>3+O2adpi45vMd_Qm~Qhqv73CSIWdnTPV47|=%ls7lu-mtT4|be2Ub
zNBlJRAAreeZ~iAU_dXfbB<#c4u9VV#00KA_kkp9jl6WohDlH(J$LQK6!q&sl3uH*s
z43yOByJ?ltWn;~nXMfY6_$--g77?ftKSsjwS^mbYP$xDG)cd+yD}XuI?*1(W=5LRI
z(>+rB@uQ^WCqMr%brYqD3X%|JuU|mRcLVx~xb*QqOJD4w{We7NV}4^Y@VzJy!!h4X
z<bSQ-KnE)GJ=LIk#P#n&RAuswqtfF!5qLa=@QX9tcaX2IFA}}Nlrm~cTvw2?G|J<-
zHSxMB?dKcd=OO%D_g>N7aQp=9_@BKAQhr&ncQ+?~{Df!=Xos==MRqKkCA|dEK$*sW
z;*z!ak{Vq^IWbU}Qq<!%G?c{ch{qKC=a^E-{@PM}_626qZ~ak`68KT%M`E6&{pbB9
zit)5I`o~ECz%O;l34Ez9+4S6drN;qs{bW6#rrdwR#$U#JSrbUXnIIPisdnr#iOw~0
z2-f@6YsLz4h8-mps(bPOW79Ncw+45(9=RDp<@jY;<$+iPDj5(W70)LE*FQvCetyIh
z&n^Wy*c>y+RnKmMqH2PV(4+}wR0dZm1U}(*PZf3U+jIXQmPpD!hS!VC!*TV{Ns){u
znh087KbnR9TwVP!JIf_H(;5hZ^rT5xr_Rr7vD<tdR9rwfr`x1qR2t^|E>Apw((j{&
zTsB8b#O>N)%gYbZO{koLBdI9OQG@3~DOTr*5r6<Z7bs?NX0BY~DUK+v&k6;or3ZID
zjE|0b^@|pnSXy$3TGvA8^R*N6nx{qh(j;we>*?twCzo3{9B!09ad&qfD>Z-M)k&FB
zV{-K?CHn(7kPS~i8*)<clQ(&OP~!O7b~CCmUYG5g;h#{O$}26)NUz^^iHGpBZnV^E
zTdGK^o#NB8UxCG!x`84;fXD>3ydjo{(39Wa_FXI6(!?qn^Llp@E>AS9lvh?3<>yDm
z#l=akUc7j*6@Ri7I$cOdmM?sgd#VU38};XNGh^3a>LdG87z%X99xeQh1g953IbIvg
zn)v88_)Z=`aRR?u%gPdJYHG})g>PAzv42^93WQ+F_J@BglFTrqSxk|po4b1PG|gmV
z+W(n~ii%}}$K1?)`*Pd1cLP!#*MEiltCqXB5o@`*xe&XUiLI>>kX(IzeQo-;Ha7vJ
zYY-GHtE=hD$93<*XZY}cJKL0NiIGB2w-KqEJ?WBL6O}~({Lnw@cq?X6<Jg-dDN#jL
zL+cP0rd|sfd<dlR2SFPjNqqeLY94l<XA`&lUgX|?<pNq`vMOu%Rqj~*2Q8xS@BhJV
zsr=^7_`KkWPT~7As(nLa53ccNmKGLLMX2t!zJ5W=UgOY3#U}aS9JX_;D;0ovKff_4
zxJ_ONB=WK+Po7*A6N~@+{eHb0XY%!5=fv)nk=Xy|QWK_Zq!*QEY9dW`pW)8DE$xk{
z9wJhF?}-F*tLPouRAcWjhHM5uu1B~1mH1^Nfa_<d`b&DY9k7iQ8m?Skz)}%`#K(^x
zXT5v5L#Nk(!ub_&1A#eac8UAM-<p7OA}}yIF;NRd=eK>a%GT7~aKh<qz)B);l2O3P
z<^(LqHqdWN-ZJ=}=;buPaD#|={Y+?y)tglYRN5aG*mfvweW)N^5`yuMVoqtdYVnh(
zbs1U%U>5ogk7{wSED}rzfkD+IYn}R-)5NWYKe_>Bds8;!alcW-bUNpc&-6;>#LM3M
z=d$li3NwLv(fQx%g}EOYi4eUtDR8X(Q+yw3fa#rQneGa;5KwR@0=emY49q9~nH6(~
z9Nq$n8JH6}U!#+T+8rC>wPbDF-6a6WxouB>C(|Td`b3lDJfr8|0ulYjmrt%%EOI8o
zHIh9-iq!bs6f0^Ltu!!1?m?+ej=PvaH%qHzZyNW74Uj4L?{21){&*K_A~<4x@803`
znDMZ{-OW@l+A>^J7U?6mo%gjPOy{kpHJF(=AC<U8)}6zaK+){TrU}ejZGm>#7?cg1
z01V9ltrI`wnIdNQMzX&ykh&e6HP!Bz7XmS&d&ybTMkCWR@}f#>cE~izBe%PXK;XF%
zr4TE1X<DpfBSLwC^E$XCYYcxLab9slzLq5-KC+iu;XY8x++<65VvX!eUc8La-E_KR
zqtR#iO{(*Oh&kVaS6Q_dP>2O<KbF!uD8C{f?S3;sD$;A+seBZX`IdYrGjqw&1`V^m
z<QWJOBRQWJ_mcc`UEPoI@iZU!lJ7|k+N6-E6U4iz9rA^=lG`TVc(R!qmwFu$;-5TC
zPX8vI)u-X))!Ut1DQ<s)=Yp~hKp3;Lv&n0c+s}!-@2YlA1wb1N3S2Yg_3M$<IzBu>
z4dCNB>UK@}!VbQb(n6Iv^b)<$s_INrEoDz@kE{LT`=U;})mx3V`;D)iOBzpIAuAEw
z@D><UEWf{l78eEeFoO9asd*^cV|R1$42wEg;f{r*S{$>zx7#r}<MkWf-rfW*pr&4!
zn4ILu`4S3rcTJjC^vteprr2i-6}*?Us<{?@R^F3t<mxhv37aX?)$Si0{8&?yO2Z-y
z5b|8KSB(b1u>nq<Utgbgcvw*$^zw-sS*ArgtZBn235-!}pE&Ml`gHKHJt#<ZCHi1y
zO-qx?0FXq?>foi+DiLuRzs|p}&+-M@oV&17D-7-;=rR@VQ)Z~hBlP7RHe5u4Ls#Su
zG3-Q%QpqA9mE;6%ib}R(Gysy7Y8zfW><t(|tDfg{zAbo!Tf^>S9#16em=MACR554a
zbMD?<h}T|%x1Ohe)xh{rdQz}`QcF`)ru$62>76?o4oi!f3De5GOI^{N>LE()4n}JL
zaVsn?j@d`5?z-<DU#QTrKQ}I<7TT^6Zc872c3I@&_KSNN%BOA+W3IQOht!|k)N(Zr
zd0LWGTasETef4fLTOyn8fvhQ883S112#wBt=ee?bbYJhCTS}o>IF#sAo*?`DJU6;S
z7pSP*Bj7L96-4Q&R==J{(P^QYam>+BOO1J3M95d~V4a!x4R)xlUv4i5GEE&GpC9o)
zs!U2^KqQ!4)x7$q|2R5`4XwbDB=P#W1|LEkz2I||;K{h!EQoGi-6eT{ZJlXqSot$S
zea~s@n6>6;5)imPRQA3rWwTt|)xZrSY{_iD8e6H-VJRar=cs{><=h2v0!P(1#JJbz
zCUiq2FtGRT;Vn(UQzrwpJM#BIOmQ&<dOf2;35Osio`oish#oc@{#adzz7^Sk^3kiz
zMACLjGctKNH4M6Rll9YjkpBt<P6<w?K3o)g<ymkI0)*goVhAEM8!Z4Vm15Rk(EPlH
zg@j|II_vth@^Gq6)Qx!ELp^6&aTs)9y)du>ipFK2cHoAcw}^RQGvx^pvFc?Rl;~eA
ziD-3rK7A>}zD0R<cK%capfHvY-${005wb55RYZndVs4|Zsae2?;4iN#El;{dMD;KK
zz)-<%Z8p^1`|Fo)dd^hd1AtFPF7BrYEbL_CO<LNW`}g~&Tyh6_@tA1aiI%O3mJB6t
z{ueJ^097HH-7Z9i^2hDl35PUgvG)d;h$2SU-||N_mLE-8r9}k0&w7%1hk)^f{!;T#
z0UICF19pQK26WVaG1&sl$>aZkWvtqg<b~Uh$hN#{i3JTo%z8lD)1-Pjp=8O=xdq@U
z0V%2EwY4?fDF9i>fa=|*Qhy&xHvMFlY{xz{QrQp`HqJr#x9@Y`zI{6Ye~fCeLH%d(
zp~x}yW+7roQ|ZU192G#GTi-AM43wYW);!?K(I1rR^vE0eV|4yaS}6M}n*)SK%I@yn
zF3}>GhJ%Ac0TI?DT#}F5+<Za;z_kA1kNUy0NB}=+o12@b*8sj6cD7|V^8Gs;uqU^F
z+xYmsd*%1L=@n-$h|<2$AY!<(2Tig|E3a;fH&doT_*hfu{-VQ6fYhzKAXld-gNjss
zJat67Z~I=UQfX=R|AuRdlD|%YO8o^`&JYR(2?+^Z|Mg7(P+a8EK?es1K0ZDsrlvnW
zw*D0t)c!22YoFMlCMW+<q*{zW_1a$ychmP3d>YW|bbCrmN*v}}18w>Ng53p>NB*G!
zK)4kX(+|kHTWIj1u<&IRHRG?W1>DO|mF@33aH~>t!qtLZ^pyny{v66X^mLxnDd<Ol
zAo1o8OL{ELr)%%NFrn#EB*1#R)jCyIL9Ab5`9+{qf%`)2=K8w*T(hsIYh+!}!4t_2
z+^=u@PAwPKP^kSR|5ow{t6IojL&sSn@7cP}Z0OOPr*&m(D+~tX+T7f1elzh`TEJ9)
zy8_YP`o?m#H2lS{wV%D_EKHoY{{s6cZ-Mx4^ql3~*`qv9AhX}TN+oF3WM&GDB}&<h
zSK1jvq0n@6xR^zSQ*RQt&3${JnHSRcfKnwPA>nN|Ff`=yZ<#@^1{Zfx%o1R}ve5!j
zmZ<gD@9`ItTxZe%!uem{_7#h4Px7^g`}5lMXje0U(RTrip2Q24?Oy|E3IO_pe|bSm
z3T_Otl(jkM*|<a{*}TqQT3V_Uf|X{xjv+AypUD}kA3Hf^IuE4neG^Eo0$?Z!8-UQp
zEbIm|U2^)YT(Y}&8WxOh-6995$8P1Vw{ioVAj<Jubu%|zgGG*;-aWF_lzsiz_;=)k
zGK}qa+<#_^Bp3m>a5(vY;zCD&xy<^E7(Vcy^CjZK*>J*{o+tDOJr}ZhH}@gndkGve
z&xkS+ATp-XO0`PY7=(tZT~_o+xZd%rUu>$@g;8-nK%lO!j))OO0EFmH-ZJq0two?L
ziO5U8r;SaCKZGbBC+PxX34i&Zu(@;l*jQTVA4YgGL&SAz!3v5CA<*<+lQ-vH@9pOj
zS|^+VY)F(An%_uRR^70n0$Cz6`bc1pRF#3bp%^FWI;o3HOnR?3YaB@2=88&&v}6bW
zjS$^T{K&YUb2uc*p(_pj<@ou@wf`_#IhrBOM&C++OFE@aPu~Z82pFOn{U%m`>>>MJ
z?GpMf3dp5OM3IO;Rd_5viekLIM^1;I|E#2@h8gt$&?>}hK%QhP_g$+`fcsQ{c2k7u
zAMh(j(*U1dnC$tmRxhU`03_zIoM+caejR!0LIr`<b^fj{D*EkXs~|5xrN_<_DL2IK
zhbBoJL)+VVp!-Gj>hFoMa#HI9xJ<JknIYYP%s!xN?Kc$wUnBmybN&;e*!Pn|{%sD)
zjmP^&k46yn{HdA$yjglBvQEq9&mTX%-QxwP@A9hhcWmwgzSV`lVQxCM4jAk%anl?5
zOx?$BJ}bD_CyK+D69A3=uWpc7szWVNbtqq;OcX;(fwECu4T8agKy96X-hJVZ*f@N3
z_JtQJ5;Co#@>d4e`luDCV{N}f)Dr&XdvbmOq(EaqMTJ|H@zJ6Ua9s1>)+xqw4mRku
z0fqdpH)uoH9>1uwB5#V1(dA7LN4iACZ)!|VrOZbFCW-iK5{2qn#-HuZl|7xzDQF3+
z8Vb<XFQwz3uY+xND_YPGm%qlcva!j!Ah~Qq_PSX$#Y*Jzt|pgPJcFcD?-*8vLjlp4
zX~X}o0j3H78<p4s2eL6$Qk2tQVbj8&45+IWy{o)I>wLsOTkejxY73GF<XRW>Y}599
zo2nL*{<Sx3Yr}dmigSDI_D2gNmtO()`XiSK4hkm$m2nWbywe-tLP@Qy*WYDj2{>md
zdiEeDYn=6S>oc>m^a17dr>U*&{K~KYEF7rs*ijYD!Y*{tJ`L<jJfaK+&NT(`Itxt#
z2)s3OU!MYuC-o|dt3p(bg~!sA36h5S1t^!Wr~RUU1+@QU?EhW=Ha925{Qpune`WVS
z;=-&~7yW-17g*F8I4^0lqV{?|(7cKwiZ9xC{-{cWUu@1b?6?-R<-eZgl7Gdvk>vX1
z;TdP`aLumhCkuz}ZW^f$zn%LKBy8$q8Bi-WMv8QD?1Fdd9(Z?ORP%6|RRSv=!IYtm
zNAtm%vd-sQ{C1WLvpkk(p+n9<A)w<`$GLBfO-zjZ_;FK9ODp|XT-<qEKv#X9eCy>)
zpjT4{=J*UJZbkN=c)2Ij`U+_ONbF?0EiABdj@oFxtp86U#PfRpA6o2MVc@ghBSftU
zkfyKWFkK;swYfXF@14J0GJVsVm3@=!6W(zBQ53B8H<b{OG5-cvt;x3xr67*2Er#01
zfAoH}syzO}GTXQHp(3S#TH?Dk-d0XIzB%@(3HNJ2VCeOZ28TP)FauhvnSi4F)#iUG
zT<agcU684{n#=uWB<-$&t&YI<+&RbK+>duOor?czu@^EdZ4oEUU13*_L^qPOoiMVs
z1<iXO2Tl&Q9s?a09|E~$(^DZ;zwPy5GO<j_=haDE%3pdCFCYID0dpQ5;BX<3!FPbD
zA=dbNSf6x#d)o<kB5qlAW$ISy&5F-g8KpPljTalcw(O5w0T0&I{*wcoE}Uy<9tL#&
zOrYHVM>XT@U;^4Flum3OAhT7iS_5Q&X5j<hG4m8>KribNNWT6BSeS&U##z{f$^+7$
zuuHZtTf;(_Dm^_tA{X4z!C+2oz_qt5R^arwdQ~R#y^)%NwSO{$>^Xx=J1mx2ax5Qo
zwTl5v0>onL!ML?SmO^65-J+)>hKk;Ez7s&r^7kT~9f`CJ?#whm$ILCN{da8vMZ8NH
z1?$=$0uN=#IP(8bRYNWK&}$`8@s73i0Dx}*O+#c}p{j*0R3JWt%+=LH-X+2lP!>{e
z{yBI~3ZgDwTV~Ue1{6E!{-C12XfpSx8YiPV*1mE;+^Xg|(1!C7wXU*{Y^H-mGTkI|
z6<`U81eCWFy8OUqVDNz|QV)3X6t4}aM8tH-^{LKvLe_oyvrjail^~#-)+N~se+l)x
zX$`$30Hi9rKdFk2%rTnX1eSZND9ZhGEYZa2>ZkJZj};Y0G~#+COj^ezZcliqwDRG;
z8fTXuC*20x!2j4SgL5n&_`Dzs(DhmdMsP+;HW;*Oz7;#PQ;y?%6gZ%9Qf5RE;mY4-
zm|yqDlTH>8VYgt3TZU((@B5NOWHqqkWFY;fD*v^DpzzCo(z|9VsB}`K3KrGFBy#+v
z*<yqvB}SL<Cp$R3Qc_|00Kj;rl%GFWpYwy5&|+#+?Bf;sj+#If)&EllZ}MMf`OthJ
z4p7Y#7@C>T*OWThx#nr;4<0Az{@0h#eco~beSK0BYOV7wpNaDU47$O<=epj;(+nuq
zV-0J!tX_RB4HZgcijDc5J8OnUAlgYnE*4XaliB$o+a|ky2}JIa@KW|iNWjy9yRaC2
z%32r{_SbGYPIN*_`}e7W{DBf5@xd(lw^pE@x)5a(*W<@7Gws?&r5rivSDM3;Qc{c{
z$DtfGz(J@$`u@5^MR0g}`Zj>EKbWk|viKpZU50oN39YRN90RP+S5GOjuJQxrwW7}U
zh7hu?uSW&SvcP~ydJRQdrVY(3)dSjAk`S7JVML)4%CFZ-oj2h8Bz*|oB&iuWHI>SE
zG`{}r<*=jk*^<$`gfmW$<8`?Tn2r4RmWxa&Jaiv}3Mvr^v8&y{G-Hh60X=YVBtWbs
z@Pcyyr>_m%U-OA(S%T%4l*H!c<=uR3wj~AW!S1c}_4VobUp<o?*-HH(q#jw<@ZsXM
z!cJELsqZCfAcNkzW^FG)76y#!0eskoHsk`9&dU|H3XbaN+LJ^*ur{d=d>=LITP3P%
zdyL9vz_0pmDS7_lNV;q)_q~OkDFsa*8}!rCkV`GWSC!$hutv&NfPTPyhV=kZ=b+sy
z4*0Q$01bJ~2cn!G=znZ&ZA~z+rIc-!RGudLbkQhhjdtvXiKeV3pdoZ-$6LWAmSj;6
ziZaiblmInLovw@mxS2PZBshXPf`Ecuu8O>=Fs7)PYT>|*tZr&I1)M2Gj`Q}xw`vuY
zS1mxe#iqS}4JuB(23Y2EL&IH5%M5RtGK07DzIsztWLF+VnsW+P9hODcL%xX_I6Cq3
z_ez>eRP{%4y_YO(Wl(C$@--rvr^Q|NlEE-6_TJ#}y1V@aa*u>n^CCTUxW9O2i|>jK
z>jl|n!D<bv-sEq*X8vbXx3L_xOP2Y7p0U-)7n~e=j(0a=Lg0J2ub!nHeyCLYm^LA?
zg^_Wp!=gUC-Dh@+Zzef7=F(GNy&8gx!1>Tr|J^0XbX?CXYq|5T)5bFuzFxDx3l6i5
z{yk5_<6k3wu{I%NN+~3K&tDmJPy1~+JC+(X3sc%^C1oXh{l#Aq;l_k9N6mVBr`eH-
zCrN}aMp9pulr*|F|JJwXmgfsEjzuiCzzA6~>L$)F`&(W?LDqBrdIS@)>DES)h-pc&
z&a;X1@5a7*cJ9h#4Q;9Vk8@ULTo{_OPWe+;iSpKzX?klI9NV`5RjT;5cfiTrx;1Fc
z3H>QQZWfwWo=<SNaU{W?THHP^RPrR!oY9BmqRi^W8}5o*+ho%Z->u=fi{)xVP!C)Y
zzZw>)o#-wzxB>dsg7M?WkMDHa(uGvuFQlIZ2u05%_-89e#TzGEgPmdj7QVieJz=O^
zK~}81+*Uhm3qC4TecpasOO5QzGX2H^euzP_0XicVzHE}^jSQA7`21-7EF7|h((f%_
zK~mjn<cZ|7Ocy*2zt^^Z0epPA|9O?wYp)x{a<XzeU`gJH_ww@RCU%j!U7!B(A@;xO
z;#TA7{qzFC$W<6{Kdw3aG(zV?fJO1YW&ubM;;h&0#s{Ae$Ppmm?z{@n)hxR0I15uH
z)cuGw%s$3V8W^w&J<%zalv=fM<Tgf2^VnDqIM>#=vVCc9_q>&zq<ZQ!^IytW5g-m$
zIgLvV4CLp1j%dCm;imTe3u1WmGw#y4zY>^Bx|=HcLrB@JCP_V?xww>e<mAGnwvmzK
zhxaq~NVG*C=d<t@x6=@<fpNzPZ<pwH9@paBmGwS%xIDzKE;E}9N5Mh$+=ZlR2;mWX
zA1{LC1cy06fQhImcn699$7_pIjv?F!+gZWIs-v*dIUq5I+^XQ&_{7rthNQ|n8Hg;V
z4Hr(E^ye;o+v`4DPdrprUIovo=yraW0l1B*GmSWE_7?mA-Vkxgo(e;XdJu4-``Th-
z@oEDpG;#_)0Oy7$$AH^*A~^0+)Rt-wZ!G?hSG`kb&BPSzcy(ML)b|Sqm}RL)O=p-3
zXr)5>em56fQH)L{Tj+y$-V%Fb{1O)ONST7VzCc+8$f&PNWvWQLkP`V>YfZ?e{gpT6
z6!Xe(hlBN+%oWSJ1CkD#w(Ox?e~ki~s+gF9d6P1o6aHZ`*9Ib$IFkS}VU5EvG1=SQ
z`c!G$V-PuQ0o?8V)hAknf7qpDTSH0ECgeUPKwIXERlhWI-+Gb<@J!CUBtJfY%xh|<
zvY@pak94IqEmk-1sEWGBaX`%qzZy9fzw{!ol`wjnB&;PU7${jxDQ;fNmbvih=E{`%
zB&2N`8xxcA6Cb6>u?q<TV2?sVQ?0Nev3eyxXSRf-xj<MDZgH7!>3vM123K9U7~$cg
z+wbda*qMO=qLdw9J=r8Ni2`8gqMo6*J~VNIK}XZc#6&GML#B(Cq!`Tg8GgzCC7}sg
zNA;9Fg|B=Ds5<nDwOfd7DL9`!2;*;`oaru8oBM8Ro3%Uh9x6xfWpx)V;6^>U6T3uR
zfdAIZ&D3_97vfK@_z!9VLlXK2y1#m;Xv*fIqpU{7j9!>X%AsFG%?xv7#(b-e&4SC;
zYGL~w)G<1XcW`vvuOn+CvoPdvLPMsS14QJb;Iv#H4QKk`@A9g!mfJ)+%lx7F=j5OM
zwO^^u3B=$rA4v-%wavNx^A6i)gj1ya<G9<JKPP<^X_wp<ucKx^&9&nBy^=F+;ww#%
zl=EB>SgOPUmqZlWWqx^R%fGhrgS@O#UQCRSzHgsQ4AAfc&}wQMP|rJmuvtpz(Dn{K
zpH2@YbKTS#X8mW&Pmwlnzx4z6i`fTuXv^suR&XFcujpX<lcI1ggAD3tXuS;vJ;1DY
zZJxRTm`}4Ndo!h-Mx^SH>xugP032u4NqUu|@4Gx^CS^ah#K?oH%*5Ny(or@6(nkvw
z^C@dU=jJ8Vm{6#{T;y29$om%l)Q@J#f}U>QUw93rq~#yz{5E)AdRNmaX7e%Imp%9=
z$YG}$C1^4<pl9#rDo5qE(znaL(IebRK+pfxoUfisaIQtRnK~UvX>zou{pENV-EB=L
z-9B_s0b|b_phxe{ophGyesf9m)m;(^YKH~u#|cK71-n7F?b|M6`7o}a)6C4b1y>7i
zesZ%^-v!3MzuTkv`Dyy+n+x(2XLk*kLo3ff%E=PXCBhA#902|ABwj0uN5TLwd@^O7
z{M$6lBgV8l@CMMwZaOvzd`GYWIQq8>WA0=FfeYkF5+h1mnOZGKz4S>+jV^U8a5mJ7
zDe|h1cGnGnt~`H~^}Pw9Qw4_|!#AHtnS+i=vE5o2xX9x7isy<Sya5S$#@ScxDP0c(
zBZU7ttJ<qmPJ9s%qOzKR2RHzI8DG$;a+v*BDe@m@d|Y0=B;5*E+g$oBfdP{0Q!m;f
z_hJ<8y%qpjeWb*;Qh&XEbh<3o`Wq{idABD1yBK5V<?x6Wi_W(l&q|fsJeI%82;o)}
z)qqBwW{vH=ae-cYx5uOalj{f-)*$k%mt{H47al?`LJm1FYKyhVCK_auo@p}69hSAl
zl-$e90wB4JmKfc<ntDl@sNt1Kn;;c#-V>$=M;08x*W|Weo=Qa-4jnefvh*^L+y7z9
zYy;ClKRFU;Ap8kFe9!Y~%4}Re)UFU}{TKGk$57<G;Dwf&BjF3$ld@g5{jWq9;pP_B
z{;qZ7%{n%|qq+CXYoB~XUt88Nc|-5;TtfAgmfty=sneW=|3}qZMrGMW-NJ-)cb6z3
zNOud;DcvF6CEcyGfOMxwceivm(%s$7w|U<8d}o|LjKR3!iWPIsJ=fa%CZh(~<>Hoc
zGuV9G&%d+yKn(BJJbLGyQ3jojDf~Us+y<w!$j{3ck4bs58toAGSF9F#fjs4-xgKiw
zFLu;OF?fp9gw2@Az^i~1pWwMd1tr)FZAgNJ6#hH|#5D{8zxRQ5N-^T|1>>}VSzaB7
zDARe-K69UbB2avOVCRH75NwZ4&e!>EKL5o7W)l%s^kI^-GvSXWowSd+7b6x$pq)sJ
zw4CXgUy<Nsn0gJ)>Az{aw=Ml$4`R2F(O=X*X{bD?v0)3t<59Wgl5`%3>yz+$A!d!g
z`O<ZHG-QqXT?(FpC%*w|hDml(w@T|B!)ID22ke=IO2RC3VK#Ax{(nR1c3U9@`B>ie
z{8Or&?q=mtL1Xt@V`zgk^b~;}F?<uKYtIof6mNDpq<m!L%N;)p!;jlX;FiZJ8nVk;
zPS-2u#nmb9H;<qU-s$l05eE1lnFr-=4xVod!`pN8zaKt54X}KM)BWVmx}+k{>1MV9
z??q_v)#gV_HkL4a0l8c5lbjp{pVu2)q1o|!!yZ*$@ppmfB(qcS#ZhsJgTEBjX!1M>
zd4*`%6=<*Mqu+#=YXr)DSX_yf^y-ATwFn9C1Wpo@U@(`{M;`LgSy{ei<f^P3?;^~{
zAdQELNAG-+AT=)trVVW9{$-ZzY$0BE52d;c`J3KB$OycNw=7OMER?ig8#HyC;zOLG
zAYEQOO&$kL-nk!%ki33yzB$=Et!_c3exN|aX%R`1dWj~II1)wR!cXmq7)VEh&4~N!
z7(tQghRHWXi0VHYSb`Mb*PP!)RVx|!3P2TA_DFr{HV>1#3l`CL5eh)w3jopZ5m=8+
zwrHH~NH0163c3gK1j@rtMjyqFXiRjndc`ccwpX`<Cxrl-^7Ml3r(3rFFl{dhNhG>3
z7f%2C61I&KbaZIZz4H5ij?RPF!W4l`z)9ZeCX^HcxDYJz%Bc-iDKeDU!u3>2(s1Bn
zDADJhlzhWt+o(U*s{+5_+Kw7|TK&!u!35@i*N{tqOP2T`e`^G3<%R^ShdORg?Q&B{
z=J!iSS<>DZ<vHC%T=YXAIx3}G?ol=rKew)`?DY?+kpoOS3-Q_Ua?_t|4tT=#sZ?qd
z%2Ki_QZ?$U4lXD~7|-RLTJc<X!r!ZaYw`r}I#=n0syc660;b&^)O-_?zTZ#hq7Jfw
zr%(9_p8gA@E`$*tJ3Ex+0>RyDM(M=D(65@3*RK)fO_}5=S09Nue57oI8>jr6a=y>w
zy_mAGYZr*#`43#ZBx%$yM9}CYcqt>TLYlAviDEHv^QUtt>J*Z?hUhQT<L~%yOl*ID
z@PGUa_n9gX{b*7fQabJG$;~$e#n4_-ik6{>ET~3Z6`1UMl_Z1t#}y{77hNxpEhSU7
zZ?-i0Br*Sg82!E%JJvA0X=h2!lzvjaLQWE+3A7^_#wq_yr^BP&hEPp=ciSL~8H6tU
zeTLAH_sx~>1o3yDtIbK2h(%*Wka6E<LjM5UdS#z4rY(u>MNPm-!><V^)9IUE>B)LA
z?mHn}n^=|Uv!S%ce^Cbs#MA%Y+Th%GPyS$m=mkd9UCtce?>!v`BE@3#f3H|f=I7*$
zMn6W>5K#OXJPYkG`)B2LsXS#7QwLl9ABunOF9X<Q!+3XOIIGD^=~7~m)SD?|dGjyD
z3ai9<K^$wvXium@Z-2T$h{i<&tJn)Y-UdtqT}d43fg*&<S}raLiokNBe#)b%vA_Iv
z^)Wm}!Q*!GJF&rH$Gxb+Zji2Uoaczb@N3%O9r)d5ZdT@5(A79S?67?voXKeKj1Rfo
z{V|pTh)H5V_(FrX$-|cMAYRUZ<Q4QoaM>Wjl>U~9mqh(RV(6psH~G6vfOkXyENK@<
zp#%Jrc_JPsKD>S8mp{5h2pVg3$GXW$$$BB~g#i&+GGzoQJy~598)Oz-7V_g?z}q_u
zQ>ri*BMC@pc#3H42^(g^%u!^nN33EuRNQ9q4EPn0`9HAB1JP|n&Ltj_!eQS&@%MlD
z^X}ktcM=S}@A1YJl&WU)>mM*=smLz=Qp}Pn{3%t4AytU31-JDFH+)Qu^3mG`_ZPd<
z|G<9OW(-I2{qN`c9qGO$dBOjZ+Yvndu@U7>wisc9OU7@({O$$jw-kY9Z6-+SQiT_B
ziU+>h`bg0bo3weVw0ZacD23EmO4Z<Xp!jG@i;cVew8cXx1JUI^IY{z~q6GQjK@%2x
z2S0ythddB+t}xsHNy{7iU&35PED~wawmi5#F?(<-_YY%G1YTHH`sT%j0>#FlogeJi
zd%G$?j;LN1hgc^bUD~OypiPK}(!PH5ANETy>}AqM`LeWkoDi4@AhvfLVzSBNu3`$;
z!}U{=V3FiTwGYr(<@(~FqkQ5AsXPAh8bt=4Vm_+@L)`~*!gnn>zvYklyo2nd_Y~Wa
zF*&M4nV?web|se=;aX$Y-R3Vg7G*VfibY_fNcaP|09=SMuayP$a9F877DO-PZv)ZW
z<YL5m$f@Rtd@udad(d_TKKaNj=ofqsM9+Q#(IgA^oMk@Gt%5H*`N8)E+GPF%lNt`u
z_TXuv(0(!D%JGoHH!cu(W--z{7881qOvJa5v=lH9hY99^!tgf~Zn@BMxaEjiAM~xO
zm0oL>H%L==NZ#asZ6(k6_VF-RIpw$~ebMHm!tgHC4Y@g;aP7WszC9oJiG=R;5hxZz
zH-Dn;;^yL&N8-kDJ~!lMp{Z`8{<g{Pdk@@`-2fF&Hn#XekpR(}Ml@2^)l8vNo`_65
zL~EcI@7aegT+iPBKFwgF1>!GG3Axx-rJh3=dZ(8J>7FlxpDsSO@{BsSs;1L1<R|`i
zLBeA8=!)s%Gr$P6*qRV2Rtw_JZ4Z%7;o?ZXe&dE*Hpxf(4srR42W#8!622)X7>t?1
z!WKVv+>*dK<obe&LI;Ab|0@eRP9~>=Z8ciFVu6@J9tb=Z5O_`7i0X(EZxB;%d5kW0
zwdedX?kxgWjs~*xNA_3E0EQZXYsO!kK<oS2lQ_Zb#VOBlagl?pM*?v$H3oNJCLOV;
zl=pfYn|zaN*$OS}HsPHc1j?z|0Fa$C#5_Hhei)IclJFG7AZktbI0)U-quv{n)Fs8+
zF`1I)xWD1$`2eoR4z7odbvT^Axk1do{1>s7pK33P0KuJ%KCJG^A4~W<hgqp_4P-rD
zBAUAIulCrhBCqebiQ%ve1T#T59J%GL9>K4<c`OBPlKhP^w|L{j24SnL^siTVCIP0+
zOpL}A(+~RV@1sUD;+sR1bra+qE(cGcS=K-shz30hl_x}XYPyQz@#f01LO-yTaBU!<
z0XtrWmx~a<4m@yJ;m)1W1M**`Y4pkyXmN3nh}VFi1T;>|Q&?5Q;qH2sf2BTH=(jw?
zj71l&7I%axCh`@uNaj$Qtd{n5Yp;<ij3;ahbBE4;B*5R2mrd)5(W@l>uX6iu#*E}*
zYXsQ9=wDWJE9h*m`U2B$zl-{&P?vd6bsQv%T@K+sJvt?$Y4`v>%1%pu=q7`vx4adb
ztj6(${Z`?m&_;y<V%9cc8a@c!M$$o&8Y#3XT-ox?vnIj}2^lvJ@&6k66{-vBV+N8N
zd(igX{GJn=;H0Ad(Y^*l3J3<O%=6+M3y7+t&bzAyS9LrY*zV)-w~?E_xc<8WC$K}Y
zN21`1F)4)8S=lF@k^oA8Iw^o;eCiOuedLod(`5B3_v|A7THcygIpf^{;+BYQ_OA})
zi;9#=+I1@RX|3Y^W-5&ABtw;oT^npxE2+#eUQZ~rn`Z-mES%|$%Qp#OcmsKmxk=-h
zb7bBacfGCT)$gFMeV_vvUA}=pHvkdMBS9-##zf|9iv82GIIb1LJA!QP0Ltr)>bXTf
z9pElgi}1y0zI>Ux)n!qB#Z~E+?0HipQiaNS%6W<wVTu;t=ti3~Y4d3FTFnO^xWi!V
z@7X%bTuc;9r-_Lp(1qtz3brv_hAB-C7TCjIW=5vke)zz!^?wxpuJ`dyW&*)I=ng9S
zTXE9zkBWk;j+o}Rfp)^Ok?j*~M>lDAVp4@gz6pK{ejnSH+8y$g1!X7Clex>_PT!Wd
zvTjrq6he0*oU+7fX^TGo^c*=M+TM)@?q=X&O{jMy<z86bg-BoGOB2+HUrnlli&6b_
zVjhVKsuoY0C)FM?zlaxvM89pmFAghdI4%ffe<S!GHA>$Ai1A!&w``EE>c~#wS|rz!
z@tem%b8<NOD-OVWLh+%#TX-);bjn1KB&dAC+Ta?(<_HLwv%><PLv6>nANJbd+XLmN
zAGc~GM9~1DQ0s=?w{%`Ysh(F%<fB!(iqtXRJ5PMz{R{xkdQuy$SS}~75`xlqhcJDT
zZTL@|CS<tf_Ea0BTXO!BI4yf&(9J0XpPa`4S|;d8CMPFz^e)=Td5l;Ob&F8mnK=>;
z-fSr-DEeg-=sfQ(6jzlksPs8C%{m1<=r$GUmhqoUJUwtZF_@20&OxKa`qlk0u}_<I
zP#(~<h_ng%ZIE^^aI|&&(z@BW6wstsN9WM3J)t3J%Iud8@s5nn^123@f223Dk@R2<
zXA0#-CuF&ba`9>ful}TF(5Y$1sqWXrAyb<M?KDi^n;y&L+P&%>JqU&|HftHm9ho=J
zaLAKvAN6Rj^vD%J>L!ED#bYO-scT>;qkCbYNvSl4k_{z&!TMdDu)R|mPmgF{vjauo
z)31aC$6@wah&N`*s)xI-nuDZnxzDN8cI(;>jR`mSl5{GbSdXw>0~`JD#9i0vE?hkH
z6o1Q7i?%WBPP7$u;BMaUa*kyXXoT5fS#sSc6fkd%8BP(>KQi6Q7Q=>-<N(edcD;>R
zKn$B3cMdH8Wi!t9vF%p@e~3Uviyv7u*lB|N`1G_uECO~u3BEkCf_8eZ)p4qdJL{vO
z>)B2haUs&$9(Tz;j=4*1y^R|A?2+y)%CW~$Zxor!H+bjq5k4q}XlX*L8d7p=X`U2A
z8{@eS*X*!bOlBMMH&?q;H|A(APdD#pKh7SZF20ZQbp)XG$o;UB<z$4Tg2`(|3>Y}O
zcXWQ%(*cT19`_I_zVD2s6fk|u|26%rQypi*T#@fDv?)joQ+_hYO*RKl`Y~wr>^KY8
z5s1vVKQQ;^$?@Or5t8#&AgL2(t+__6I9zt&vT}AZY&!aEnyc5D7AEAPb+kofzB?`r
z3+kBa*3tbua27mvK4S1J;TWY{C2Yv;7_<50+5A3jS`-5e?g>x*#7V4cMZs&CDm<CQ
zIKDn9^3$&Ku&+CCKC=X;<V+9<npUi}g4b}k{ETDh!Fi$SW!fHz#{9l4(3_R#GM!}0
z=A~6`r*xKK_K0-eqr};Kz0_Qjy_|%r;d6}QfWpgDzL*?0JTh!>+VvGld80S)QV+$#
zXJubJERPVVi6I?8jfBtp8em;eAoTTAnP==>_T-Nr-we#NNC*KM6w<U46Z{K4Hj2Fp
zlR_vX>E3YNp#?1zkYWUnQsG+`yD-K5I;ld)b}e7Da&#fut1QKRZU{hq*ku<y?(yuP
zXbN9i=OgW1{=nGlK;BxI!m1X%djk+1x}UOOk7PTn;GdAW%12sn`!Qp#DOI6P-bhqg
zu#QSv*p69VYlR^0r!#+U*NyEk^XtUs^8UMR4^Z3kUx{y?`TS0MOS@7#!(|h06`Sl}
ziQqff@0DtNkt`R@5}y1rxp>Ou>^1G<Yw#3-gVB5rRI%gj?KCY31L;Fw6Yc6+Dc4AT
z+pHZo0z{#88F}wT`3S)kw;&8T*oD_9KURm8u)qbXzZt(rWV`TW^(?#WL8*CiNWQrt
zo9u$ad}}{IlhPzdpV>tVvb0jJu(6mYh1BUn#sgz!7E`--V?%76*)kFu1%ikafn5E0
z=if<?_Xy$J83{z0^hw`b1%?XG-Bkoo0xe-@tF$3k&kK^SCDVW6&-^ypvC_)Je)%hB
z80jj7p--rIf|e;mGwl4i!`vc(jJZ60V&XC}ixUuVmK`(gp;$=sQNYosv*~nz{8m5x
zxqd@L{KXu+w4Gu=&Xs_=2aCLK2mHp#l#7)o$-V_KeiFJcyeZbEAq7~g_;!7a=-k&@
zfupm=Qm%Cm@&Kin)}ustmnW2E?V;&QGLDdswsYC@w4@j$9N$5kTSXEyI~{!k(bs}c
zLf;Bn+Q8CKFC=S;5K&!pYDR+@r3rS14^C9vV1<)u6A@0F22_uv%dktzvfNM+?)S)1
z2p0G4(dzJ#LH_R~bWxxgYxSZ*d3ZzM=Tz(euElPXzM6+d!ucBzhA2`M$`te-`DI@@
zXQC362_wE^N-OKLiUvQ>hW--V*VLln3ax0|Z*&-Qgi8RrBrm3)_K^!WZf)poJWD5u
zR(qRtL5r;WCm2>;jWEUX{W3iCy_39~1$V<sjT39%Ro%4fR&XD0eq`A0@<j5P_oRls
z<HE3_aP565`cHxsE@<yc{Ff&+y(uWb+AGTz96Ux>fCEzud7*zn6nzUlr5e!xbz>Kh
z9yb=K11rdJrV|x5<2PeuJ1tOIUhV6X=6XwSMF<vikhYSk^^W+n1j**%wp;$-4H4eD
zz_dxgJsG^Z$+@d(FJjS!6nwr$f=uaMWZT=ep7}cv<XF!06-@Epneg^~4noh)tV(#r
z*#)jwK9q!APKf0KUE?#~AH0nA<)I)A-6cSF*`x!$rEGfrh`<bGl5gF{x9O(+7nxtT
z{E1<uSC;Iu9=P2>*g@$H(?9#_)2Ud)7kJp%)@<u-R5(!}Fm(RAM7R;5?PDyVYODTp
z7l0zR>+`eKOaI4F0ARVe5|Bl-&pKSkoS8}mDCyQCt2BJWurZOpDtL2try_mL@VUqs
zJhflEyOk7@O{=)T^VgRe=v-l_oA(ZKFP|E`D<?v3)P5DZ{`YRK8sXp2u3^b5Y9`?!
zQ5j$?(@;P@e2<re9Q1qS8y@15&mgUL_2_AU3vb5A4*sNq@pze-mIG2|4H2q&_!9L7
zMlMARo@tHY$^NCT68}30#@PEAP6xrUWGK}r$yEXfW4GvZ{f)j?7b&(ol>di8HZHg-
zHKt3`O!x-Jf{-w6k7$ZU#|_|r9lN`CbIL~&#qjM(FVjmIkt@%f{`XOl57*J!TZ^-C
zBR|_NF1ysWew+kk7L^Ky9xi2!H%`JwKJ<fyx>M#)bN_LBrhO_ek3GFB93fzX?~}+;
z;}802%wQ|~`qKSIseI<^O8{oN77i;82T{&hg#$y+#U63mj%l{LKnEl%a&T2vLhJ<O
zGTZIeoS)`_5K3gw8BBeGZ&EfH0A77(?ViRfcQ#GUtQ~Q;5Whh{=ZCVOqgY!(4F#))
zKvr|0{q)o2H8<97VSO#||EHy={lT&z0vPUlKCmr<wr(5YTCzwlp&rU#I&>4h9NRK4
zNW{rQ{IpkOnB`r>(?i4Bnsy@Q@iPCGr{9@zbrk55^=FD15@4Af;o5Wh=UWf`X?-76
z`$1~;=ByJLFOQS23$WCI=)=)a@mRYg$Gu~vkW-rMtxLbAZ$F8YPNmSysJ5e_&2b8W
zRFWGuQ$Q{|38>*OIUP!S+7Y&+S5Eg$BjYCL0iTZK8(`0*&&W{{dj>t%-_Vt7rU<IS
zQ&@dQRF_*n$8cx85mTdl^Tcp3%IMdD{6nz4!}!0|QeYL)G$p=oV4Odz3w}7$vz(X5
z#({#G;STV=FjvYa6qw?0RorrSz29YG>M=0lobybtnUsuOp8B^rOQ#$k<#3c?TgVwp
z#I5T-?H>KS>q$Y!qzwXyVq7)j{8vyj@o`YqRVjz4Pbjy6BKEM4=MB)WTJV}o@HguX
zc=}j4<is-D6Ahyrkh}lI8>ZhfZaxY;TrxorjNP->fAXihAdsigq3~xuqKA#QSHyf!
zj7rF8m^^gbhwZyh$SlQ?^#FH#v|tZ!m*P>C(omsu6z*c{^jhf~*cFBkd9|;`e>%R2
z+DuZBMB2zYisL?#T8eH+bAa>z`2T{9h+szcB;Z%H!{Z{f*fP9she9;+Lu;)gpyCv%
zlJzU*5sN9wPxiH>SRI^-CxwaC1GT`npDA;Fw>~rM;)#8a3Ia>y=N5(^huNV$Xos9c
z_d0K8G}Z6bxIF%VG=s1E1{NibuD6}_g8K^xdT~_EuZ#TPZ_~E!hdl1tPSn5yyDVN>
z`9Sg`76P98YsJPyK0xOCqLtnkxTAJ<H<J@V#`oaH<RLN-JNG2WD@F?dNP``_e8hay
z``lNngAyW=BoKP#->VpaK4S9uh8&_ckNqbrA?vNf)7coy?++E9ltJxaqt)mBd6<p3
z9(ykDZ9BcXR(8!HfJw&$%TTYk^1S!uHx1tnKDdO9F}OrHtf-g)OcFI=KS??y(>8~f
z$5I%J|Eu0J*LM#!b6t(K!|*&F$6${>0cSCArTe<169>rg7D$3Sp7FmY*%U7Zi3AM}
zNziA=1}IJ@@&C^(z=gq<QZ1SS$UmivGp01Fp{AO`FY1Ob5V(4MOVB?A=b8TtZX<<n
z5V$#hz`BvRY-es8R#F|O2bV`_WD5*jAQ}TfG)jyrkRk66?I0t%2fA~5`}afPwdC5`
zYQYmMSQBLe;}>vl8<7<QoetU+uIqQo)lm&&-)qyuP6>$V0ohmA?-vvom{JUPr!&K&
zTM5q57HGh|xe{Kl4T#^U4hQ~*xoGjlLof!;>hz@~KUL@03Iw6252yNp6=?yu!w!36
zh;I9Q>Y|K%ti2CZES}L(^sijtpa}NMj(P8D6d3w8iCtEamH*UUULZ8bHdrVl6OpD4
zw6NYo-D5|0R_<@hCqGv&&%WEio4ZdZ$p$5Ttyt^^vByjycQb)~84jtP6Itw8cSZLB
zjUWiD-E1sAyFBIhUe|`V`zuYtyZFF5)ua*s0_`3-5I{5^+QWeUyWPz29H%mSwj8{<
zEq{s<U><d_Q1p!Yp1T%J@zwU{H*BoMDXnbx!Ck${0(PAl9h(SyTC06hyrKY}I+NHy
zS`E=<{m#z6qP|W;5HL_kMiN(@`_DkJz(8?|p-uT$kgEX^=BYHc0Exb@h+M+|{t&zs
zHAewdfAkRx;S&Xj8L|=Ed-AqErIvD4{*O6;s&%3S-MNiX{w0zPur3^y;TKF9M;0la
zsB`f&OIZy>U#ACA^pzD>3}ivZ#2?M&mFS7b4p?aIOBNdF!zaub)dgw^aCtdXMY0y(
z8s<GNRZG^O!XG}6RcJ;h{sAiswEq)7+E(zHJK-Dz-Yw6Gx6s(e-yA>^*fW46V9vvK
zkv&3b@4QtG9niv`Sun;AZxmh8b!MDW{m1p+vzw{c2?1)19j8(y*i^p<?Imn*NOpOo
zGTJ9h4%`#m)4R{fzaGWLYEj3$_LSB8Zmjq|bT=)tvNk5Qk~k(TroVXjT?0)L_~p<3
zreg#;luRV55S#6`+t16e(+ve1gp=Na(}X>*6hVW1%JLT?F7G$%FEDtErWr!7IEqp=
z-ae<>QOp(H8%%kJ9Exgozk24<IpeZ62RV1cj@-KF?e|(^>Vq-V;T)y7bTvi4mx&+9
zlX@ZLmWGg!wZq&56khGjXdKLWS#fGP)`AgyYDrPa&A7CGgEu94wrPu~H>lJ-jKQC>
z0`M_80@v2M5Uvody^4k56R0W6Q&T(a{@~3#oY7*d)iNOtvO<^$6xo*fU?6HHy3+tJ
zshNNEA}dB2zUqKFMVty#hbYSUHT@d+!-4niV}cWCzOwz+sbX^{Z%+@PjC=K8>PmRF
zB-Sv}7tV<%ab-8^1fSz)U0UQkRUiA;fA#CXZO8O&JEsOe`!k~&`shXtwT?NSn@-vm
ztF}q=J(+`wTH8YFoLfY{8hl+?#}%%BN8q;wWr7x(D__qTav^nR>$%_WuNn*caV&*4
zzc|SFrlTfIi1y(P;0c|yGR3bTuPJ0KOOkG4I0+ZkK*G@m10#7l_#*c)iUIt|$M7(e
zHGMe*qS#1Q7m0+K3^TTlJ|csw6^b<-d<Wy$kmG@cZr--$uKS@Y>WquJTh`J+uJz6h
ze0-MZ<L4!WOZZ%{xe;tXVFn}hGE`X9f8?TdW8@sZu(T0B1TFPCJJz2vJw%<DzMMTZ
zH$2F~zCl}EU&eF%G8zVdiG0{c7J}X>2{;D~K5{5;!PQ#nsPlOS3hkq}FdATZvKg6i
z%VSR^_I8`5T;*?IN9y$2L4_h_;225(l*%ELPEvw)^|94Jmh5n?fb-kS-WT-?G<;9d
zlzcys>~_{zUqv+VGX{EUuREad&$QGZ(E22Ni*?-fmM4vVnVgLty=?YQkwGfZ<oC#I
zRSG09GguIt7_w$qjy*uZxxr#=68w!Wr_i2wz=SxbJJ-mlNT1#-(#U8?hlYXUinjCh
zv3a4sFx+wMYA4#o7|S`Dr3|#wWC<D*)n?-gons;cr0>>FNpp}o@H!a5<zPWj0A3}p
z7UjPx@I}i0lh?S#bVqZ%JiZg_t(frm7XXB>Gz?INp3DD$kd%?SC9~O@`XRWhJ?!e}
z<BPyub-uZ${Vo#2-8H7S%o9e?`owa=`BUD?#Xy`ISKGneN8GYxtbLq;JTHbVJ*AiK
zeLB^P)m^wfvc-%)jURB|GkksKylAkT);SzrSvbA{S%*if-vFQU$$m%KYk#%rAeOeo
zLNqEy@<vDjjb<dUW^{q$m6e&^+S_tC>aFzYaS|mx+>3!;rS%PdJ}Nw%{#J)Jo2=!t
zeifdKn>xU2C}r$<uK(bub98d(dm=f6Dw##MY*bamQ_gd}DF(KwefIeqAMHEw^yNo3
zoEn_JLA@1b%Wu<@PqCSwnbYta7{*wmE#b5W@f_Rq*M@>*j(G@BB7p7hh9_1|cT{|~
z?bg=heDo*+y+H5;4H3-;>1GL}(v+=r$rgUh(7*QQX53#$Sq@zEszsKEr}*4Lc4OlB
z(8~+jae1mr@3!L|z%Pfof0O5UvBUa>z#34pSCbjj<<<#Z0R4k}`+b5H*?*;!Ij_xo
zGDjuHd346<&9@AQ8A8+&pY);x-QnON6jyxydY`E$`qY|e?~W%P?sZ4nJbVJ~nA!Z6
zp1K6j!N<8f{5U_Ay(0C~B`8w$8wia&4UQ-gcmpB;_7q|P>+(t-(5K7y*aMoZh()8{
z@R`V}d2XLc%QdZu^8k-mQzaIIL@`WfySxr8E3H0R(j!B|+Hl_+%qim<3q^YeODW?c
zwX%F}OCwXGWB1D42BrW@$<N(TR-UbSg6Uc5hGmp59n}&wF{je;YC7K&&2V^XKVYVL
z@Hsrh84EL|66l(}XfqMJAuuzem)7WcwB5egF6AuzT~h8L1BOI?LdAzA305QV3pdl!
z6m4$|m+S_?Fdc9+JqX>K%smxaLxZyYNy?kRRDN!fO+rA`<VHPzTAosSIvEOA=maTi
z2Sz~+g)XDk01PDnYPkZhmkr(W`%g_JbK%kOq7R!^1MAyV#DK5GYIx6_H$Imr`<4L`
z3;AmLjJnB>n0iA`{y#A&3;z_Ymn_*bzogtfCY~D(Djf!)%ZVVWGxQj-+4W+3VbAa$
z#L$-6bkh|3mNIQocs&&KW;VA619BO-ORyinbHJUKsg@YEWN2QW)B3T*7pz`BW@KUy
zxe_Xr-$?f@+ARD><6-i!mG0{d&Y~tg6D~<^%6y_VVXCADU0I)KAjKH90{_`S3&Wm1
z58b$(`Q!aK|M9kfKICC$sdrd#Nk-G24h|MzlN5NY-I%rUzi&Jon~Rg>>xiwFBn@ps
zPA@XEAyoPPyHn@zl9xe7>?CZV&lKD31>3{Qk7L43*%t}HV!~qXZ^cg3phZfdwznY2
zpAX0QT6S#r&ArMsd0EUw=MhQ6e2`Hh=pN`-?+{tHW9V<U9JPYy8Kh{zX@~tT37)+q
zUB~T{yOP3kP)My>?H(S>IX#*iyXZmxt-g?0h)@=p-{bn$%~I9bL^h5iJL=5<76QMA
z5J4KRl`icd%hIsbkcMMZv=z4%9i1j5rxCfdq6I|!$>L96n!R+0pWb>h(?)n>v^IM!
z!JD9jev{xsA(K!gHtRaME{yUYhn`qpYj@!Y1#Q(pRsu4j(7J93=P&P@SVJ}WybZU-
zj<sCKREG!hS{S#+?V}9wU#Puy?(wolxRHd@spM_Q+<EneUqUOs7{2;_nxxEO`bO<+
zutJZRBpp2)fl3^Xe1500zO=b^AD$ox$1~Y3E`{pa>Ml32;vP|nS8#@?aitM9yukZS
zE6*d2yp<;uN<`2K{pNXV0xOS2^>FFhH-vu(o=Law2?=41SI_LD95pfq56_nYmq)6C
z6=g2bqX9=X8IGi$B@UZ}bhB6sy|FEo{5qUvs5~>ZX3kOTsi@DHcxtMC14Zxi#$6sR
ztnMfP2cR|+;MYnIAs0`K{~pV!Qw)}%QusJ3n0_|RF3OhF#P2N?T-?_8evtwtmf~5(
ziz0gIC7}M`$OY1RYPgN)M8+rNB(`lQsD#lDRL$9sO_2w%y1hDGC!Ld<d$0a0cuhx^
z*_%%H-6H<x7LFg5y??8Zq5Fg6J38{Bo8T~?Q8tbt$gAF67@Z*si5Pxae+%%wa?6-)
z;E#se*A1-jFhcP-+qk6_p!9fK2fHA>n^=iWU6*>5wtC!3h!Hr>7|S16X{16>UIGiF
z<~S4p@S}2?+F`G}#0*!q{Nv+lYSWG5k^W>1=~B`d7CMh8CLXzcCpl`!Hsym`al6YY
z<n4|<^F)T+qM3oRo*Sn)6qeQ-xQozSe<d3Kj&UvLiL64vkbhLhGnm3agqS27Jv$w{
z9CUAiKU??V!NkO^UgsBY&Mcgqa!=vaPHKaGo$`APt`kMn*<bdf$sTgJWpcBezZsu#
zeTY8$NZC6v{|NeF%2$wOKPk_7HjCk7EH#83Lf--iP#Z36z}M?l3EZB7N-KHi10;1^
z+r&F$Px}CGPOmw3^u<tZ&<X7{mfgdPO!y}Oioio<C;>K!UC(oJBjK@zBU(~k(zlC)
z9%;Phz-}|{3sFKbgEWar+rYm|`n_~!8TzC8>1mhrRqi9}KO*F#HB$$!1hq#a0&F(X
zL|*KU*%2uY!P10b=qt-<NPh+2{jlz`X3F2`94&4{%iF8+3quPQO!fLrmuBl>+iHWX
zPXw-p^F*KT=W0!*<up)P@kamt(HM0VXnB|R8bNKD-ird4&VZ`?5{?IL+DXxX9Cg5M
zK>x8M1IS$Z0+-3i{g(Q7U8gCdR`{TRz>8m0;wMcAh8uf0s5eqSq(aRMF6fzf96Ngs
z2{dzDu|x3_C7GC=fr*ta`6!@m1#6>F&C)=mt$;J@TW>AXrClTQ`la1$M?H#%)dyLz
z(hY?AAjCtc1GTCw*@bAGLKHu!(5e*1&4VULWk-zSK5pKWd!Dkv(`@K=q>Gwb_@R`t
zFLgt=if6^g7AI-~<afGuoU_h9Hf!dsEQ;B+$F88BdCtqIkv^k%{kGC+hWdW_V-qA~
z#@RAjxVx_w?23QXzYvk2klVrJHoNW^x?Ap}9Y;i+{~3gMTL0?*{qiQa`cM2-$c2_q
zwj$F53{&Fj7z@{9)sa7_OuS%Rrjk40ih~ryZ_u>6MSM^4CA+8Rsv$&r$}Mq)UxjR2
zUmsBiHL)CV5DD2F+wutRDn|Q7Z2C?{?2BZ%DN_g%n&a?<rK{zvR;6MVI;v3}RJ`K;
zhgIWMNpjRV6nZtOAWr)Mqg7UYL?%=TIB!KM(FFm`*);=}^WJ{<byrJSOqH79G6{9V
zlC`5HTgT~baqd-vj<(WB(?RSq*?U26m*o?6_cKMC%Z8tMc#JicSXYDh+CAqmgKj?C
z5A=HF+P10qE_j1@^gA;Yb?b)L6aY{>W){2rPjF9__T?4(Fgu6AkN|$br!;LC`+w<&
zwQ0mNXg!Oiwv0_^79V)H0TC-l0|R!fB|dLjNvS4~KFlR+LhM#ikw*An`yRmQj>aX@
z4!3>pCVEvwbT9anaTi#OQtHbef;`z@=orEqqBTq6IbGWNQ3h?u8)*KGXCbV`A96z-
z{NILcJqZn*jn$#P>}2UQ+mWp7q_cUlGkQG%|FXGr*qo>~b0rbRGpP}tN~M{WSZpuJ
z2d=+0(q&r_I<^XyCB~q4)yi#PtgB1-;TZ?wZ8PPr@v&&2OJGZ<AngQo%$J`zBluT1
zz%glECq{SvTZ<f<9G=6i#N76QZKY3iuT}1RYU|Y!uahX5VZpmDQ)-8!)@Q-J+zhS|
zkJ*__U^`nE>^K5NpbnU<gJBH&8MZHO@NxO4pGwvF>OCCopATR4`-F}oo0^;$+16dq
zCx`|vvYs!3ixAEb*S7i@FCKGu&Z=%JZf<o;wH@AP5Z`_l2!S^+b>8#K`+vL1e%{nM
z?9Gs@3S)mgTp>sc__yE`l$1bgb0NfyBXu&}@n{%+k*APao{_&Vr6|g7Z%l$dU#X@a
zK+f{Yi@?jj2QrHqO(=TEMZt4#6Q_(IibvkteX42{MkU@;MT^I;myW52jV&n4iyXTg
z-jg?+o(`v20=$<5;&Q}m%QZKcMCgOV0XEy6HhAfpPq`7=dnZk-4F~^EfBNx{V}CS_
zUgr}%F`eKDZ%V=>dvj>>IiJIzBkgbc%;Nm;%4C)S%fZ5$nRIP8sjPDRkGI#m0eUtl
ztsw%PqYA>VeGr{3T?jXb)ScA+5+@Qeu(6XmbuE2Pph*p_)~c&E^gi;DJ4KMqf3z;I
z>YF^rllzGgz@28<Wj@|#jX0GW_YY~wkNElBZK4a8ur*!YNY6T&e7yuOUc<W_0a+9x
zZVnf*RwMt0E)YhmK3L<(hkIo!EtGO?)GP#m2B@x!7OgC=%T~ike%wBk5!41~Cw`m1
zV1&RkOXkSODLbQL!s3#jtR4`%7KG&UzP`xtzNcMYUMBgz%s!>QR!AZiYKg6O^~caN
z@cAZLC8b+!OYf)QFr<E_8~H`^_L}MGr2G_H4^@lA=YG=jN7xG5Yk`)PAF)N{s^t|H
zlqr8WzWjLJK(~Q6t=ut!O@foGa-m2D=ZuZ8&uT-XXPPGuhHur0lNJhYBX}D(<o0QP
z*0d%Rw~UHWN)Qo~C9Mf|NNG;Mpri;!7mmmZB<O_a44&+s3qlw9{-}$)uGXn?0T1Go
zt_ap=#XPs&J=J$ktx$=5=q7p8EQ+HRVv>!D^qH5!I7AXD{cq`U-5ixZ?>Jk;$7wBf
z-HKE~i_^cc4a<%M7NqXyJ@V4ZPVD(HnBni=<I~Z7nP&ezPl!Cto@s6-28|zK21}_L
zmc(OAR=lh`<>sNZbA-}!y;P0YsZq_DQ2!ZHn)Zs^PLN4svRW^hE5Cx5gy8P(?)KrK
zj(Z#Ioc4acGtHOJi2P?|b8#10Zf8h1`fT^T)GwXB#Sxi|*fS&O0avkT8gmz;51Nup
zUu-Dc(We6UouRua=AY8gKX)(Nz26uU8jfT3F$MT9X){P4<6@FAA>8k8Hse3lG4Y$Y
z7l|7+tgpD3+RYxo-n3kA^hROf;J`3kcXYkZC~zf=zTaZsdbdecF5t+Y;>Z!J<80GL
zC#z(}-Vq)enHOgJW^H*b&^36~^18eCFre-qQB_BHaPV`6n2OFX0pX%|VdqybnF>Wz
zbqnwgEl~YMW8(HI(<<zQu~GdYU`>aQ&gKiGqVGsylta8jGd|ePJEd)EV0Po6QHMRs
z*K!+E{3W%ek>%&hJHfzy>Yy(eIj)xbT1Sl(SuAx1_b2ph*iLOP2qlB3CQ&YzPIY3M
z_JmQ^?%^%HH&9F^3v8sMS4^MMKS7wO(y^PHE0b^uIY}Y1(-tZh<SV8m{ivy~ezLV<
z(hvRKj*Sz<-H4RGYervD;d;3jz)c0amFuJX(LSinQ43hd#+IQ1Oih!M{e~W~DaHol
z2`<@+`?{;2Ry(&bgYwN)gr?1RAoV#?crOFS#xIWcg3tKfOuas6{F2nJ-NZTnqX?O}
z?8Orm?iPq%7TqHsz6%$7?8iQ8pm7F|@sm@(6hHW1Q`3{bZGpZclTYXaRvXEsmH_uD
z0@?9Pblf^N>GCbp$+1VmLhxcwF=>^iPM<?w+sVjgDVWr7`9JdwjSackwf@uMYE^8;
zv?)~_O4cO;4WpUJmuSbMr>ASFcn+j3&>j~FQB_=tI{noh0rlQiyxy}~!{wW!3XKHM
z^#uNnhSSAH3C6LCal(q9FfDIN^9I^N|8{;Juo#^CSr<U!#=9E*jJfpK$o%%}-~O`F
zRvj&`o%w$a3n#x|9hQzey33}G9e?C_k2H7H8VtyOs?w}~Y7RLocdhnRG;|LS=upeP
z_VhEqgSNYnVyf1Lcii^TYY}n37*CD59Zi2Zy*{qfNUQZ<KSFq{aOYd=r~W-x1!rkL
znKAe)1TMdv3HjL@zqy&&)jgY!f82MmSf(=|AZsMFCOfqei@H3WCcCsAzgX3xs;Y2!
zB;TPol2vcYswc-(o)7M_*Kt>&IT*FO%2qGog9Z1+O2fy71*f_j%_BuHo>ihrTSL<F
zZ;cZ|sxWvvq>R%kU2)%g6+=Hi-+bW^qlk_be;=R4l@cK+tiPxda}(n)r6HMtNmSCu
z48Ap+xqF5`^cfZXj5#gf>^Bdbb(?ap)wK4}_1+^9+nssH+4DU=`72@7FNkQRKGAh9
zZNt_gwua78|BjA-f(c3be#(diAgP9-eq7(F&Y&&hXaC4F&zao6M8!($j@i{2vSxVA
zz0o|l!FT&G0ZqT0KHds7b4s+Dz)1d)w6zJV^aVDcL#W!8ZM=0n9Dz(k-+|oGWatwC
zPQPgXp<rvqQ}0a`oh@DnBHB-&AG%BOtPtP9`U3AIq4WFw7{?+Y;U=`LI*_Tt5Uz&>
z<JOMFkN9My?nu6Wn*SBgDRu4Ikhpdb$aiItkz2wYJ}TIr9`A=r)w3`sN)iPt))>>J
zs|2+x4(z+op`r1?A-Ctq+tTxU80`InmcBVo?CfH`y=8OX$OjW&X+X@h8cB5&*tB$V
znj#gHlQp4Y*U*?UGWp`g7O~$Y{+q`W-_ng+<;M@@_f(3PM=tKj>FKU0gn??>!~%xB
zt2lTe9g~x_J`BQh-&@4={aUq;;G_zzg`n&>SE1?OC`ft18(Ab@D?HtxLC8bSLKFgj
zj=1I}=R4^SZIw;7K6>#HzR-Yb{)zTCEoo;&4(V5us81yxe&-ts&X1dQQdONXO%%?A
zZ|X-rba>+m+7P*?==Sma#;a5eQ*twV5_$BvX=`xmD;YZGCW>7fB72%oC2o$1`90j<
zu~<Tx)No|s;P6{cwre7Xce`)I5`y+`S@b5xWe|6K!N;9R=!{BgpV9uE9Iu?D1xto1
zkCA*ep)wP3RvkY2<Q;FWzS2vqcO-Q0Ifm-2Kj9-Yy|BA_yv;2RDmm#)Tp;A1GGZDB
z%QLTldk4uAX$Y_To6=?-tI?&A>&2r({?>%>31k7mOYrSL_r$Rsf4`q%$b3(4ztb7D
zJ#z<LQIvNH)f3VSdf9-2;AcD^(h+y<XieS?26>%TR$l~{>%Eh4!;cqyLzf;1dC|dJ
z?-)=U&^Vze<$a&DPUf9yHA(fmL=-YzsHO~Vp;4uZQS1vb(FhqXpZv=%u2xY^G7BMd
zNQfaSTF9EIT-Ov^#C$RR2P~|@#k!18ou=QnTX@CY-Ep=HVt2LH#6CMDaovh~Lxf?Y
zxAv~6gn<^_W?U4heoNr1rt`f@;}AvUA%3<q=*Q=qH(Z7Mg(xT!xrb|q@^qV#=11T&
zSTNHOSJA<-J#r6wIeuVt(j@6f?+CwcN)m<h*HCv0KM>PkCeS)VDkMkXDk4i(g@fs5
zP-(YVwLIGbMBhR7#q#!W)zq{61^U&ku3cS^4|pxn>;*f-GO$?Gt&cH>ZTxbaMZczQ
zo<+cU?3k{1Th2eK)><kwJM_yBJdoBb7Wt!;XH_n)*2>&mMw>R{4-%|65|fr3Igsw@
z^<8gIRq@<9ANlGtZSjjxx6it3zmX>MXC6TY{q4Lg(ErzRda;r@iw%FU@JH#1-dkp$
z=ZB;YHRP37lLO{lbb}vC5>X%hu#xl#ZtbSk+SU={WLF!Ul3_jzyUB+D5jbfPSBlR~
z4IW@0O9{7@MshQD(0T(B=}wfSn-R8ZxseO5TI7EF@bY+F=SUO<;2*9~+b>0vUu)mX
zPX1nOO6VLFEO6apsSXW|ec*#dZnwP;x|K@~+-dhyCR^y(AtI${qE89LsXJ}@)V3kt
z^$Zs173Semv_s?Li8(oEDJ-H<3l@1i?7u?OnrP%DGOaF5EF2waSDKT~hf}z(2k7uc
zM9$JYA-$FIS6OX{VevH0gzCxucbLokt7~b#dF<$YYY2(3gSkiK(OSiAufzu_D1GPA
ze|&8B6!Zg8zZTSDotsl_U5qRAKhMVN(Sy+<NVbEd(ODHv@8+=6np=m#Xsvn^AukHc
z6}ViSg0sG0@!OkYAIAb?swFZHB0^l&a?O74OJ;|cGyma}&wvEX#Kb7ctCtuFUULF?
zTF1eo&OrfPU*7GXgqFkoohouk%h~UT=)#Ncb-K5_7;PKd43J6*rz2G0E^ZyPU~gnj
z!a@p3T&BdAkl_-9&n<56RqOXmnQB`K{ObOtyrOl(f)ShShFSj?4kH_VuWfhCN-T+w
za$8R=Cl}AlH-OQb0{sMN493cvG()gAdw4)XB|c7+p3m;l>|#yHfLtB$R%3Z*g+3c4
zyWT*Cdft(*T#EnCFFi;MOZ)OIeJTpCfNw{F$mj(x<E>o&^prxg6+y4yt?!>FI5`2^
zz3DJtH^(z@Aa_(c|0tI!xC`#6-jE>Xb4Q)zN~PV2FnF%jh99HjjP)5h8)~r7q%H1i
z<3)IWNDLyAY=4v#X%?z~RLZ8%tW+m8K6-|>YRil#gnc0;y$vS{IhmrpmCcBxrRgg@
zLmbC!!Y%*2^BahWV4Zo|(TY}Xp(@NL;%aqhD03iiw8_jVzu%7}NV<MCEnaWr$|fd)
zBuPb1xZM(3yJ-`r8Ujn>Yx@kr$q=%dKAdr~2{e;MzFU*hV;A<7r%OWdwwIU=xR|~4
z2i2V3j4$ey)UuRE^9M{$KRQhhxjcI!J3NNehcMP6e~81cDVzx#r<MPoS%9Ro=JG2T
z&@P39zd_u|@8GP+Jb~g1KeEczvekUC0~>?-K9+vHYt`OHJDRx0`+~^ehP6zLq`_;j
z6vnibh1#7Lm)+#!()i%=TDcW9bp}8)qL{BS<|^r+zo?IwdzxvCG8)K@tW7&spj($a
z3btr{^B+a^6jS_y`HA$?BKM;lL@DvVhy))wBQVverHF{XP~^oa53wsi3I;+JmS-Pm
zo((9~Jv9YJBa4lVQ*Y0uBVTTyEvl}jJ%4rkuM{V}0cX{Rai#!hc=TFdNy20eD&XMn
zUhC}AZn?BL#&?Em6Fj*=9aNJljD;y#wJZ}YD$Vy?;E+-+$^SO}!J@K~MyD!xg59#u
z?VJ`^tJ`P9>M>uo3m<<sIx&bV4Tyq6$A(RA^v}$Sp^6ZRpcK7ILPDdgK3R)QrEn+3
z(p2x;!rwPl0oT#xL-s5QS>GVdop+grC%I#6+uDXKww#XVxnm5T9KvX62dHXE-$jzd
zm1;}&hyFqy9m+N6smEziaB;t{_;Bj;VBFbMYx6<IV|shX@zSMDko~^Mum7{PDu<|^
zm*}{f9^}0aX-0+N87VJp#(2(qKln2`m+D&@Mk_DB8Ujm5_;dL6+N?Tzx0OL&?RpD+
z7Q%XWW#qC_zM<Zf$$7PQsM8ZLrGXq~>+kde*Fq(d@G}jQa+lKF^eVNxIKAq3X_1NJ
zwTUW;PWS%hG`2EsIgLF=7$zRZG0sRhDQhCYYyhc)G^$_(BqaOK+vVpiYlvs!8&fDg
zC<yrT3cI`SrJj{rG(e`_SBFFbvMI$GHH}QrFZr&+9bSC5o$~kurPJ|V?B*8%Cs9!d
zfPDz9&+brV&Dq10G5OsxhU#-n>5G@|iYC=A_9N9aQd;9psRg7teIBlttivnSPYDD`
zGtM@~N!=3v{OyV>)fGJh-||@V0%3f17T4Vy`*e$SD}x{PQvB~$hYKYqBO^xy&*rri
z0`@`2a5}eigggiWF%$IKw`>hxdVt^*L3(mvupTeja@)2Qay$E{3pDZaw!WxC{5L`f
z(RttH6t?!1q>u<j#^}4k2s7#{*UGX`uBxn1Yft%}PF1ndo!yCAE_JbhkMA^z39`+6
za&B@oK=fo~v_w#y*+QXUHSHg)$+r?i8>?bj!^`gx@Avl#M)v%+1Wi%57F-lsjaiLT
zIFuTE1gHD^m8q$fZubf@vD2YpSNWak1vYxtNdS-QZ8+9MpqMY1(Hs<i5PZcE1r@a*
z$)ww1-xa%uv4yj`TbKI>9sWk5vN8c-VJXNEN9kRx@sSq_l4=OdgsybPOW_&WB3{lh
zcifzyi>CBZKy`M$9OS&)e0OR%5LGOp^+{Khl#J{)F$&F72JmCRw@g5dxkRZ1IuS+S
zExX%Jf(uO{LzcWkMl6yoTm8|a`g0w7n$wTox!5m%S4&B5wO9AhF7&1a4XxaWMw;Gy
z?Qv$hzn=V4MEiIyeZJxz)@XPKk=WFW?yw>IM%#fIHtKqX4-v>()3jyh))RGn8K-<r
zkw;%UVqg|I_XQYE92+JV?Y>B@{VQWkf4awXe>i$wjy`TgVZWSGs)V3^sdXkXww(QW
zQ4h0Xu7uFMiRnotBb1bzOMWQsJ+Ivk2p=Er3w~jDA;eHr+UQfxv7noHE-h#H!3gNn
zNeZa;Oz{7+ZfiQmLzwkJc}YTO@K~){*%kd*rN&$Kx7Gp)iB_j2IOZ1}ZCI3eQ88b`
zC_cLH$!7npE*HuQ0+t*H^`TOH*SrCh^Gz{jr2-|j#Fy1Fb+Q+Hu$EY4*~D#@scl+r
zA7tz`UH|-NGV<#lsiOmq@QNhXSsELLZ)5h`0ZdGqmFA$z;gmu-sceTA?+-U81*=at
z9Y&p&y9%B?;T@)n>U<%n-TD>Qu@V%YcckfMHf_&*3|G#)Ai^UE2fGYO^G$t^^6kW7
zT$xLJxCD<zw{nKf`JC**>7e{6icB}gUgnn@;!F$;(4J)ni~6HqSrYMAjQKX2!gt0i
zqA)WEM$rld%c;CjK#QTdZijz);ezxj;DNPnR-0kp2_s84iz7nvQP5+DWM*pVX6?a9
z7%GrI?)_?~$_**kemg&JYK>BpR>{evL5+NZ&b+F1My*XrQ+<uU)*I})Y^chgEvA?;
zH&HYeH6;T*j(TdPfvj!R?RRM_J5Atxz<m~kJA~q<N>pE$`8G%S<dmkgP=gX3c&?P8
z7?CgZX>+Y#sW11?<YfY@EcFk<PS3|JT;ho$#LjO{sE>-u#^1k#Aa`-<Ub~ttKfe)=
zR+amcK@J~&Ngye{6X|Jp2rZYF{`L9C9r1U;_D7owdj)Rg0k;oWK?T&;jBVjhgN#b<
zPdsiO*U@#RX!F0#&TkIi`;6-5USFF%K8=SH{s1SrvRha78T!)bPYWv|Dt#Xf>-}=n
z*Hc8K;pm1U7S5H-d;VoMYWHa7!7tL#oXXTaO3btAg;4N~C`<J(q!_p#osC#pjIm}l
zon-5gUE4eQj}}UhJ(|88%s(I7g%JHw6|hNdLX8@KG=GAozrX3K)bmk3T3DLpT`hJt
zv6*cc;$@Dar-R1)<X@*kneCW!x7Epb*xVYcL1H?-(&F>WO?-1?WUF)fDcXWf9R=Ta
z+7}XHcVdDDk^P5;D7Em|duk<OQaO89E&}Oz^2DSpi<RX#o4GtXlh$E6>r-$_Ora;T
z7<3V1naSE-&@PHQJx=`4K2z9F0m`CoI^Mz}I3Ha(W5@c!#HN~k3>dq-_u*QQZI>dL
zHDo8S*S<c+$@g_MxrmR8Pp6JIITC7rf96siHnfINN{ZLVI0eyZsNFI=@-}kp4%nTn
z-8S4AP_Y)_E&#uO-bY-?cqVQ6B(IiG`ptIksdb@wqBq31s{gPhPFPt}Kp+xF<~`9R
z!Cz9pRIa%zhVZ+*ue~1dQe}G7S8W-J_{7MGiZDldZUiaz8|Z)f2o}e(DcVpTBc>D7
zE}r&IxZ}Tte_0SeGe|%q*yS#l8qpx2X=%1g<i>g+V5N!pPEJkk+YI>lU>~{VmQ<x`
z>F0mdw8<EhW5`6ILs{1d4sWjl=Cg#q$5pXA&|Sz4b)6>I_)<2DGZk$V`$RO}(<wE1
zoF9xIe9p%ye^^HeuJsNiq-H2t+q)_^Vbfkh;a&E{5KQJfo(>;3%Vm4(|JY7`9^g;Y
z;(k?nHLyKgaX5=M=<W4|!}3}vwrxB@e6qg+E9Khu>!PncTeXFRf+sqX9K)*E8p^W3
zC5Mhq<GD}TOPLfU1u1qz4rQ`I7kJL#akI|}iDvt6F)^hK%xH|0lD6*l_k8~jAgUBZ
z{^m_lM8#QN-dYI*2gP0Nnq7i(T$d~a(DXiU+EeCm1)xQyWQRc9*I%vWFp#MfQP+k<
z@eikdY>0^&ogZ^GxxeGP??~0^$Q(78UAU#P{Tt1NK)B?CsjTvQ^8>6K>euKUatA4g
zIlQK*quDw~fhWG+m`Hp2J$zgTr1VSA7z5odvz;l_9||=(rr<Ea&lbEplcQCf&Wun{
zC*NFmZ0judt@>kI<sRL*f7w1atDe!&v!aAQPfz_i?G#f|R!u>`;oWXiP*srViFyD3
z@$`-1ku_brgV?rhn-klZ*yhCcgcIAw#I|kQ6WjK;^IY#azxrqI?p?LktyN`It^w-K
z>o4ecc6(6evlVzv6Hy{eB%VBD0JV<oc)&mSE+ni9a)IZa^!*C_QSRU)J65?JHY$t&
zBKhfg`}E|`Y?(R=4!5XH7aJ5DPWJ7w$_yF}!C9JZ)!{t!iJf8f;k=YIBTBJyUuU6)
zu%tfJjsJoEKbx!L+4C%>HPc68+}RQ72?t)RY6gZ_eE(dGnQO${sxLgPy-RylTPDes
zBWRhgyD?VoYT;bCB3oNz)5kv$o_Dbue@YhoU1=Pw@!_X=z8Bhh{?dPZ5fC>5N#Bev
zOGOSMkliWf)53l_QHkzt{M5gf-el)Ly(-4M?7~D}?+uyI<_tsQPmuk~K?q1Jk3U|K
z`O!1Q0#}Pc2?<g{LH9-{i3#W-fcgo15yw4Ma5?uUh>K~p#<me9uI_vf8hv$Bi^A3H
z{YgS}{fOX99KI}O5~m&uoLFGwJh&Wg$yBjosSn}b1<l*;YC6lQ3O0BxC`XS{0|mi_
zo7s!t05ZJy<@dPKRzYu};03F1eF`gbs*IjsEo9&RK+VoGoU`Z!Ni#!Ad5q>VR&zoS
z8Z729YrOtO?`75XUqtTz-Z@g<^bmVd$5*}IlMtyZY;S{Sn{G9fFT{$=4vc{gkeqdP
zrFss84--?Sa5%3h&h4-jxLqUd27|BIP;mJB18Y&!Mft$(B8~L2AMD^FupIVYR-S6y
zMzs7HAECPRuOsKl>*<ob{ev0&70#M?paOJ*Gn4RA{QM*K4#TUpNo&4e!X`5d{{0hx
zg2NLA=7CM-D9Q%IMYjketBFv>961+=h-!&15G%iE(P&XQu9>f6=@-Y_v9e})B`M;)
z{TL7Gy2O{x3g`^p`9o#2s5@PdFOPu~Mn?)Nq)+Jf@=o&9{?N3LM}DPL6w_`33%qa$
z6B9MK!>Cd8tl7sU1s>J%KIiLQF|^8eIENI@%6hK|U&ODsR!hk30+Tg=MltXh(%Y*&
z7~ktX^k{z|fQ+TGbUR-gttHYFN(@2ru-kNPl{d<}oP=d7D<)<NY)oYAf`P1^`<gNQ
z+R1A^RHV9mAS5EX{(%u8x}c@g`{zZMq?QC3-x7b}rP9=M_KH<?$Vn{?j6<DLVwB$a
zrFl0DR~muhMpryDg<4QQb@(cW`sv}V1Vb)@{PWp(4ezTw|JjSO<3(sxkQEbzW=(#K
zP4NdY**yre>8y*}nA^ytQ3I#WD_Z|vgV=IhF^^XyWzI@|s$6AvYA~7`hda<cZ&&1;
z?Bm`1`e2hy>kAwALsRX_aKn#}J1u{?R>JkjSZUv{mD`5@<M(Q8KP!>Wx*%P`jmGMt
z8mlWRo+@5GY60WUKNx3}vurh8EkkEs8pQ~(v?8*9&F?O4&KEt-&ko|&*3(5raZjBe
zh`hfL?34;)TMfyBW@b{^e&)jwL#fsOK8>~N;J<m}9VPV#VIK`!S7xQ+oNe*=Rz*R9
z6p-^RKzh|wziWJ`^bNl|g%!2t42?659eKVHA9#&O^Mg^P8C-F<1K7JZtC584=*1DR
zsf>Ac+BL~!<o<SVTi|74k)DUD4N?BB3EY2PX!kujFU-xRqvvv!O>i^&p?x}jILbOE
zWOWijFk$YOYZ`vc{Ar>GoaQR|wVxGz7LQdP>}(hzS?@f{C+zZxNk88P!b0ZQ>1@UB
zrRak)Ga^rubbh&U7Z?|K{(Al1!Sh6FJirv>2D{_(v3XTp{9{FjG&@=%d1f5hHw#|u
zr&anFc{PONJ2FuRY;maI4BK_IF-we4f}8*c@O`JW!R{+?^(o7*yjb@?h{QRPyd&iV
z`MPcM@NMzk(|RF;r!gG?cmrHjE1sawf~D?mL-}PVHN02-?ZHX?Vz>NVMqZp0M9kEl
zgeh-3NIak6o?V_>aD_C#b)&4#j@b5suL2%dbmJbEAV%+H&B4_gx-0h~U>>ysZiS;x
ze`hL!Xa1J*i(q|D_$q5;UZ|MmQcbYf(Ow`>j#>B|qk}f-1=mZ-A-;*^<d96JiWpUH
z<i9>BNmghTn=6SN6Xg1;K#LDa>CI$HXbrdcr_8|&kL&&sj%H{)Sp&xte-AxyXXz27
z+>w;z15ir&Qg=%y-1;Vi*Ty@R*0M@YFRE_)NCZ58OX(MGW8)z$t*Q(z)F{?cbI(%>
zSiU!u(GIVe<ext1!U6~Ycp=sk{!&@z1{x?VU?xdn*ci2U5~r*5w^WN>L4{C(3Mj9?
zcUEl!b~k){tQ}b>@qMI?qB2Q3?$AJk;SJ|bEV~pDW4I>v&Z_K?@<j+(j|ph~!)cY0
z?+3*j#B9ZYX0>j|mUaJFw}67|(@+bZXu?D^3$^4>y*-^QA}KZ^`Ud3>GEp87U67z>
zyk|hiZ2Bv&o93)DKHE;9ZFNgKb%{m5bgG))hQAWeHQ_z&`NnuDN!UnqZe$2Jk`OT_
z48filn5Qz8mrUSUylw{Hm>gH_@1rY>uX^I9%bDLy6S<yJWA(kF79bL`e}ZqDeNa98
zW$flg(tblHVcQ|0ZbCLPz~@@=fm^g(YhlE9WefEAjf-o(KW}Jxh%6-NWX!~7V6||Q
zMC}yqh2tJ(Ac5aX$+P<2d_H=~DG**3bwn1rK!t4RmMs?$nC(#*4F^-w4?sa#tt-TF
z1yHIilK)_$r!<)o*}2)V$g}J9PQjjHw?-L%PpY;Z;{$4rHswLJDkZS6S%y6EQPi)3
zFW+yn&x9GU^k!K?c<-lzr%X-b?$2EMBC>wz!v6tE><-HTuQD#zTq>Qr2@|sk4`YCD
z%-37HlYouCrh9t{FfWD}trZs~%yHPwwaj!k2kU6nBuF@$X%Rc8%M@uW-GAp}J5A{!
z>D#6Av+#>`>9Ti^>r+`Zg!Ad_9-)8y=yH?`Nsvl8+#U<;5oVEu<-hv~mFS4GpfSGm
zHXD=4u>6V!s$J@ef~IFRhn@Lac@*sR;TMvD%4edYZ)-CdkEXQ5<O!V?Y;6*IL1Hy-
zHB2>ZAk&96aDg*<c$TMbXD5=8F8+B-^lL(=s4lP7_ZaqP_5s;)0<QLG)NaNE!+0@v
zj7Q8b9TM0G29Z27`e8l2p_rUuZQ89rsQS+nlCIWjKyMbxsRt*h@=K+D>NL7%NQ{#a
zB9fd&tdEBHB3iYzTxE*tI)S<cfN_kahfcn~HN8DkCkBA|X~_=v{~<eY1E`MoC80@m
zwUY@SsUmZT$eLv|aj2N<!(}VV$sco@)%5;1v?O?(1VpF1pP3h1{b4)RS{*TBdcs)W
zJu~p$^!&dFp$pi-X)Qx`^1~{*_)DevtgB>Y$Sho=8da(X<orE+$Ce<Wu(ZqJXP094
zK79Euh7@%vvV=~uOU`g`h{LA4I_AUm5j?;aVY$-h)9-$&7N5cG;5bX>9*G_NH2&(d
zYOZA4IJjONS91$flSof)`D7p8F<y<wiBy^+6tldtaETfcGH1Y)_pi{^Nw)Z(3OR6M
zi{hG-dt#(Uum>t?Q8~qI3;TAXA)}TQtBC$nV}%;5=$Do;-*8KWj&BXp!Ia!x<+GjV
zCM;NtC@AuYDH7^Bc*f0c@q2p)6d&oqIe!rhB`AYtkirWzK7$LAQjCf5$+=<?q6}El
z@V%lo?^e(4K&|%|)OR1=bMb1Br%JZc?>3s3M}kdBrw`xtR6udNwtRKlr*25|3Wzf0
ze$|M~mYeNP9<|IO*9D;Q?xyS9VX3Cs*j^iZ6J{-YmZ^ds32~-bNSq3(Fsh3?qEL?n
z2|7EWRSMMX2gu+)d|jzR5wX`W5|ht6J_gLt`kL9}-OuF4rw2|+Ye7m1x6*Y@m>bN2
zHsqV8c(+iX;_G|dI`2DYXp-+^JqSfhRv%8|{=r+HHZ=y|A~(?e@**P1d{)`k{2tq1
zsxaB%#)lBMJ%x7hguj*sEqPiqXZ`l{?>5WU46yNBJ<y!9cS^{d6)o4ZXO~OF3o}ue
zVtr<JDNBCPr3zr|>`rAwa2dOOU*oH~>A2W`KDDYzq$RgJ(+Fn|q&B|U=V;?X_EKe|
zbT~+zfgnZF+Mg6U*Lot-F`gZ*N6*mas=c0(^yd74E>e_Zv>D{cw7F+t&P?_z=&31`
zc@;C7m_YIHAa31h?I_|spU&HTNBr@gBNVR_NpNQmQViYEllzmNg;-2)xd`50P$A$a
zA(0|<^^M=$D2{Rp<7#&#Qt0o*f`V5ylVELH)XIO(C<{GZ;%HzxhVoLT`B5{?_p%^g
zgFL^xu@l(im!*A~eNZB+6-$c_p=122ZO`;v$Kq&iemFZjT~v@KCpW03bpvWO|6?(&
zauvgufgoJ`=;e@RoUzv7hxLWOgB|0M=>(BWmDu_2Z@>(?wYL|vu+}ckJqKPoo-P%Z
zm(!o>Gp#3HnoQ;^&aqXyP}0eUN`M0VNcnIv8cgr6U_e?34?OtZeVmm+welMzcEc+v
z*5TmmR5(uPEI2+v{ofT%Rh5~w3Qo)5GUK=TJ5QfykBNdT!`H|wp4nGS9h)2b@+`|&
zWF}3`(?Xek=VO$x$#hc79Z`FiGeLA@p%s77e6I)GM6VYEUgxr6OcmPUA;e;J;I*eR
zVnxvKwI%})5T*;_0&(n&4ZT@dhxWjyLyTAzCu+|?5f8kwaJo?)6d*#oCYQHdYFqG>
z=z4b)aY=pG4FvYOTO*B}aFrclfmymwy=@|scxF4>3a(9ykXOfOlEty;nw>-Pl{ubH
z*VNy3zN;fw2k|m`xEA$t!`xwMn3B`@YegDwcBtK-{Q<xuQ1gaiA+mS3MLzl4zpk(O
zSg@+^?`EPAePza@Tpr~_L<y1?oat-tJhogbh6o_;Xs9<%8ek<?n{#!>A0u_=5o+E%
zi9>bY*n-I!a(42dZGyH9!YXz0Sg4S!6}juS7N?AN^R&5P0`9vb2^ET?%~-_C%c3>Z
z38&Qv>zUqY6iN}t;>F`l3SF3)K?*RDm`R6>3SXwn(Hr?Td8%}x1WNn=&g!?_XqWb?
zX@uoeAqeSzH4Y5KfP#|YM@P}4pb<_>i{K;2HZR6=!yIx5BP7q<K6EYfUAsBB@Sp{L
z>{xe>)>JGf7m<sdz(&>9_|NYOyTDZ@#b}@F1UgjsUnj99)Ar!bX-9O=#LX~_^?LR;
z*vJ^foqJrj#3qrENN>v(gEc$_?Rl%)dv^fIq&K{fHy@Iel-9mi_<S@WYGWQ6L4vqj
z!&B|8!^I%FzAgd^eqYIG>5`ilxov|-93mfaG=x(lQ#1j@-<5iijS^DR6+VGQrteQa
zuL8q?(7P9_V(%SZLs(6EyzVdyd18dN@{m5dL{4Z5T_vK9V7LHjuBLUz{sK_f&=k3`
zvITGzZfMfDR`<)Bm=hYP@V-iO(7dWIV=N4I)HK)->Wlh!X2}iEUe(j_?hVfC{7^p|
zqq(^kDYuR*S*pysT)a2!c#b%9e|rg7>Ck*|-s7`rulBuUY!($;&!)0f3l3RXRb@QV
zG$3acuuz2oWY50w;zw4Z=zWWmQKe>Z&nh-h$IOD@#m<!l?397FzQdhP4#Pi7>xg&X
z|Kv$wC(QWHZRVW?mt7AN(VV{c{Cv5ojp%uEz#7zG<E!yB83iqxhp25pM2b$YXwT`t
zJ!I*c+1Yi+`Qeg<72HV7R^Z$~vBn$S+Ct>u_~ZQ0Rr6o-Q!^XdN_$|(-{{&HF%EZg
zcJ#aeh1~=_br@9<(Kgt7*Awe1+Al&HV8-g|7*1S{!^4NMaUb`|ND0Sq0w?E5J-xWa
z#_M+te}J9_7cNT7pYa0eTP-udXMa0StHCnGFgqD6A`s(=@hfH`;`CRQvg(p-#u(C@
z5_WVY1_<SW{u}e<7+4dr^{Sex=)iWr5e=hATuP(0RIhDEMG{eP?Mh0ZH}t1CM&H{(
z!bYDM<<D<0TQ4(q#1gy!SKryO*s0IJEW~UCaX-l;Gn_!ZYqcSGet&itXB&OB6J<oB
z6W$^QUp5%oKO;Bx%?$?;w9Fz`{^!tY@CF$NCqO*50_gA=Du$b|s`mnQbjo-TN1OY!
z;IyR%D~k>L;Vrfx;@*8X!E&Rb2u=zFjI^f`ihKZ##^B?)RPFJu_#t!e9JaOA_z_K2
zTc`hH;v))xv>2TZJjkiC|K=Mi4GOAC%#o?XX9mFsT{imxdQ1VsCt&mA%afm<SzeyR
z$G+xFr)Eft<xijo7%X?{mS_5gg(#z5C+0nln11o|_>TDcCZNL`(~!`eUx8P~g(&8!
zir9(MK9Tm8wpOp>vIEwSPA6}Fh4lH(J&%xr*4A1Zn(xFZ>l;Lmb^>y%;U(qdcG)CR
za&Drb*`rzN0x0?YOKcXJqKoGo&6tzntihDZ=uLdayc2_I58RiXU<RxRunUw!v(oT;
z*Mp}7&I1^Ed*g{x5`*|yLA9@89VH~p;^HWzCcEU7h0Tpf3Swd&c#Gy1&;o-8$CJ3B
z;Nf@i@F;kAP8?Ii0h)G)n2I!VR5wdO5(XuryFM(<kAJT#yLTPIv+=R>jbF|}mvi!!
zE%S0rRE-tK0Iqjnm_eU*2qk`RH=4zgEl?~fl`Vq<5rFu|?SH59^x?VSFUD3v>R%n-
z$?1%={w=RhGWB?X*w%UE_4pfEcbBO%r}Mkw1OGLlj}HNW6qHb~B)6vXQ1E!efM*;Z
ziUB01fSJ+K!g17MGN5l@iPKA(noLO()etcxrQE1gRszy^>%eJsqdZe}EqEv1AAJDa
zc%^bEk9^5v*N9Q!2rJ9@igD-mCAq$dQtPhxqh~$?5R@}*?|6jTgRbw2E0+O@akKJu
zqGQSHj%e-y&0jeTXeMg6rvmEUd~O~OvDj4++n2wfqVS6@AMHpiSQ7)l^?B_{_Rpw~
z^hJg9BeUbFPIfoj6v_751;&$`&sjdWQTj17Y0(zx6dkG3b1#40vfHfHMiC_CS*@Tj
z>*^Zm=%f#iWgjq48tk7sUbsw^^U~mdD@K>TLna4L&UHIs*k^J8^MD(0T^0}K!(B?M
ziaJwR4V{QEu|3tfA2{!y7=!bn_X%>rkws4X`rt_)6dP+VAe9tv6t>Xvq@EV7M`f0k
zAY!MA)^9qoW*9KHffv~NV(nL|KpelJL$S7g^7dO6z=homKojQmsJ&O2wc{wE>W+b>
zFhc6UTMR<{6Lod4r@Rm}z+5dw%4%e9ZHrXB5big`uYbcLsWCx#HQqQ)N=B5Nvc=?s
zm%7zUec<WzrCE7mE>Y>~;o20C`mLfiCK?W{6~UGM{~ZCZm=EM}UgXT>dv#Qmm0H^3
zV`HCcxXF@yot@pT)w1LSzNSn>VInQD<ucRQYU3aP*2}>7LWK)T%;Q8#5P&vdP;sDj
zkkIv0oKAXRVJs0K53@5J#xiOBf7;vCaFGU=f8#`onoN}r=TYB1k4ZBn(Gq*CU$7Hb
zfQ(t)Ej14H8${y@UTm<RMDciYlx94&JS>J=GbK(6@gKOKJMlrihm6Ohxpm&WJgz(~
z)P*Jl&CMVI`u`_pwf5v`{<=D}-T<quh}4|c7~SdyZO_imLM)n%$@xuyOb8o0xMwy`
zwkYybZvjf{%Ar|HI^RU&!NUy078YpQABrXi;$jrzS+e9tvrzTX#N>J_KlPn~J)%(A
zqCD3J8>4&Y2lUHR?A~4L!spQ8BAGkntaU9@;_COXypfm#4ZguTwArR~rxe_2?j<iT
z_7u{TvX-_mm873r<yR*1c>*xkIKPVqkQsRMsoSw~ZY>-)cM?%ivLpTop`!=0@!?rp
z;jE-(jWRJ)jt*sW5C?3}7F}#8P_aU4>gWLGg}kdr&h5SE^pI5e3KF8**xxXKH9WFa
z8Qgc7c(!o+14MszU_Mo~B4IcE@)S<)1MY+@x|Td7{v)&dFFTQoL+@Si1*7Fcnf-*)
z;4gX3OY<l>%uPq0Pou%}+0C2pgE_6jdV7zJp1EOo$)dDg3l}InzdT=Paddnl{E5Ye
zmg`dddxI(A;EOnT(&vDa+${d`13<uVp+qIaktpny8BlBj(`9+bEx~n&<<f#%3%CK+
z49QO2n92tIMNEo0^X#;{W`~gmDlFgB@Fn;iyz^(%x7tM7Y9A*uG&)j<lPo`I>>t+f
z-}$|)L}+~J=(Lx3te7o{7iw<T6eo(Z4vI767%PT3GA-^=mPwbJG@EzID)0p(d)9G-
z7j=8z=TlHJxI_S%|N7xvBq>^NMEB!ra20#=l*^pg*=ih<hUhvuZ*roLfg`zRryB*2
zKa!6X)mKwyR4U&dWK)&pbRJbD_&&MOT*(f;D6W$7+F~98kkwFyWQq?H?~1%L2_WjK
zECrxj(b{I;aU)4_4FP5aM~+9v#h%)4ieCG8YLL3h_9@-K+~SCT|I=ByN>pFf)g}Tx
zrdiiystr`El_u9ZyXwR3o#}xAMR#7Xv%8bRo*PC+GeZLk2&fiI{*<^lO3BgD)!U6B
z=z>Oc#g||oIs^a+t}CnD&S&s!)*|XxtDknJkAypIoqCp&W8I7(%7@#mON`?QI3F^0
zg{)}G^d?Zptb91Q^bGp3!C9*@yTbZi`O$E-hcvZu@Ik+YvnDNGing+2{Ij#2v(*-j
z9v;D#IZ{*_ek1lxr`4PyC5f)}{?2*Ao^3Waw9A3dSEjQd?4boTdnyB;HSc|#lz@fo
zf_g7Y@56`h4JP%YwXce{^b5ZS5b=^^emRn@d!6E&@q3w&rF7B~zdXD^{A~Bw?BI;l
zg7RsDAp9d>4bHn+Tq0GByasiLyJp6)`P-*AGnv<_tsy32k?k9SK+l?u1LnG`Bdn%|
z8w3*Mv22FC%kl56fQMmt7!r?Ou&~~X5I6>mrmOB?j5~Lr*Ap=^mvRP(2;(6GJG<a#
zMKr#nuYUkIWwYl~g~5rxVrYpF_}qF+jfCms&qdI5Yucp`MSA5hNl(XL=HHcEdXHnG
zogow9*+fj#7vW6hjERy)t{fhEC)cBRtnt8i;A_a8;R%Ncg^ffIVo<vqw>GbT_aehs
zu!Q(!ey?Wtsj|bAR;b#i)zsU#wy_&EW671162iunF~4<^tQ5;@#}hibGaH#<o+>KL
zm6Iu8c<j@Dkzm1!-`JqQW(R<Up}XyAz}Q5EVh22+<sBDM<yG1cLVVMWvhJP2kBW^}
z89V9B+G?c4jh3KR<q}Ftm7Kx0RrI{4`Ep<Wr`k3%yvjO^vVCf;g=wsE7VvbwM12Zj
zy$hC_>h?B}S5H@M4v)AROZ9pK$jpb!Ri^DoHD7jRWtTc%k(}u}ekiD6aHcgTTiguv
zMR)unJc35gJzm2czuZ!T^F%7{f8$mUn}Y?qJCquBbs_*9`$A&)Y`oL}VR^r1<!9f@
z@JP5RS92{9N1}*djN{GwLGa8A)X#E_t;=KSxpwz;4`wUouVGC3n;!yGp7P5AFU2a3
ztWMzM^~lifwAoo6{2RQlHc$57!T8|blYs?$*>GPW09^_eDj8_<_{%6#wofj@eeyXz
z>L8}!A2nc;3+-G)wP>>c=2E@|^tuMaxg!cFc=k|L0f7g{9wj#Guoe%Ox%F1S?FK@5
znPu~}&#S~Fdnh;knbc$W3d81?vK@sBhoz!|s&&PAXSHcvM;jD*&;AKLVE_xl*kblC
zi>3Lt^KBWsHA#uGh$GEAvG&MR{g?e$+zc+JztaTIEe~Es<jDjOLmBDO+?+Y*3uhHh
z#?8&~I-QaC&rY)*bw}Ds$svU7NuM=bLM(n|X^qe#<u`3;(LtU!D&V2a3p)wxt_$YH
z*4{4PhrdxK_b$$)H106nh}F3oZRH>CN+4~zr`3eRkf^G%s9lmKZcwRlqag{(Ohgqe
z2FQ@kua)c!k=m27q;<((<dn75#iAyu{-V4=Vh%8w8R<X;r0hUESdTewLaAtRd6y&q
z7Af=*D5#;$2lD3CMgYcXyh}dqBGbG)h%p<099L)~80F$9E+Y6CCWLgG{aaPqthv6H
z{6fZ((UmUi5G`SGvG{6lYr7CQfQ**bA9|AF=-A0Fr=1mJr`beB`L(Oe$r8t8qN($j
z6S=nO4+SusH!$Z8(EWygFOMi{ujE$oY!5Wd1}p_=Y`BB9<5>kcBE48uWflsq?!Lm`
zz<SRD{e8!#5xp#^+*Vzp8@<34EiGxlVTE@tY^mPqieE%7M1)JQ^Him}uXc9NmW8@=
zdNHoAM;0MXO#Zey0jUYyIbIJUm$nZh(Sqo!{$y3@XWDQT^6kUbu^7ID>Gpfs)3&YV
z_CoQpaLdcp9~h0_upTPcZsr65VfM2J?xaPMms3RW8-5tco#FwVhMa{oekxv`5Ax~v
zz3kc1!A6w#`5P7Ox>m__w#6!%%4&lPEqi=oZbWDf3scz4Ukat4eSaw&AtB?9O_fm4
zL>1JQv|n7+HDYhYzL>~hZKBqbx&ycBS%B+P>Z{;+Ig!(1Tc~m5&{megnmUQ&;5k!e
zUOYu-Y=?{5t5%Zjq}k?_!5GgLHoaf+Lb<riJ{Py@kDH0N!gSsT6fO8^7L9Z!bA@SC
zm;f6(dCKt{HC?&hTf&{zP*YcUHfCLe?RYr3$yB*OqdU2gALZVL`<NNve6r2&07B}T
z_39L`(HkAe-MC$doT|i_9WekgCAt2xJZV$NEB~H~+6l%wB@sT-t1EMf_e<OKIc#BL
zy3WZ5Vmhmd^x@&aaWn@SsA!Er2=)H-^r4a>!r1r)y;eIHveRTR1pnDf4kZ$`C#Y2l
zoJm@D>3$2t0@_;nS_}jdjA*a+sMLt0nwJ|Z(A1f_48JIO7EO)hsKBbLKMiGM*ro7C
zVdIaphN`h!>X{FO=RA(7Upt=>*<R?3FforhEE!KssKMgVz%dXJ!TTyy7E#cIudTNZ
zZ{h=ct<f`MZU79LpFhrQz8o9r2K*1)`SOxq@j8z>BF^aP_`&jn3BA=^!rHv-EJEfg
zI5wHU8lxIJUJ#9M<mAtDHw(r{DlxKWS)<$wVWb=6Pu}5G{Sq=1_RnZULbfn{_fHkQ
znek>=$GO+h#YQJ^GoCS#0u=zLPBU|d069qs%Rt$v`ENl)xL{IxI1PW+b~Hkmkl^`>
z*#5RXH|Uk>Tw-J65-D)!A=an5#@Pz;$?R3lfvyf?aWv)w#(prnjtC&RZ<_V~(`Q=v
zX1YQOh!dTa<U4DeU&?}DdyU`QS_f$l5-)qcKE6?kH|v!Ya2PPw;r&ESQZlV(ZrsU$
zyI?5jM^V+%>JBQ!=E$<kO!}@{SfDMHX?cg^wf;<$Aa1!bM+FE>Ih6gE1?ZH`t}$!A
z*1Kmb#>WOto}B8VNyIKL?BlFKFapc@)f|g|`$N8|l13Me??imJ{Y7<$!W0_*w7$<g
z=AkB&cGm?&cGB+1ccyXHgvNSkn=oPf-mj^}3$ho#>n#+reyV>hgT%knpuRo&K#Qm&
zRO2r21Kn0Ipy8v#mUlq?Y;3K<A;rZ24=nF1ARYA5;cw^L8vx$T?+pmrthPD_t3jj9
zVW5*K(SxC%R2wQh9Qv`*;RObJxS<QGv^D@bRsYe)K48jAZd7RRUTjA3krsIRD!97p
z?OjC8%lpLZz#k1GBNi6{7I~Ol7Mbm$&Gr3R9u0FXuvWV9N(BO{u^vhGtyhV}@z3yp
z(@>csMGKKLbcWK)51z~oUPgSnl$KMWnlD^&C*I&%H~i`#FdjLFsMHxs_nI_XnA+zZ
z3ma2qf$=W^I{}b>a67Uy7<4AOUxYi~{>akz46XzNRpantI;)UPrt~&P)4;}ilS{LJ
zlM_dyJrzz?_Asw9NzD3ac4lg`$?tst7Agv_-4egK^hCS99t8jg?QM8!%0NiV?N<r+
z^RaB^#d1h~x`8Ul;PrW-13(6Y8r~25S5}tPkaWFZOaD?t5C3w%;I$&-^Z0m48$c~Q
z15R-0nqRNFb5k;m{(6JoYPYjl#ol<Z9(lo3zo(q2t2F_vlcr?1m`3>ah~NZpB@Jo)
zriWkgiU4WTAcgSvbt_d(ZnOxT^{A!faF1Wh;HKnoh0yloN#XyH-k;>pyS(q9G=W7s
zz^eb`e*~0)lx;|lPzsz1fA=iVo@w*{NF}ARo#dG5Sx?LjP>I?shj)z=b%+ul-ch8n
z_g<Yf;>$F(Acvw1yjqN?L~(wz97yFkyXQ-AKy-@#j760Tup?a~|EhJQmV-VzT8S%F
z|EccehT3ZA?7(%#QZy^OV{$amw~vf@#Eg{CX5&%0_g@@N7tx4`#@%r+SIh+lf3o1;
z5F$`U3V|dDKxK_+)CGZ15%B$DJNzJ}ghGA54O3C1ExShc-@XmK5r5GZ6nwHTu4hgB
z4sO6|1W^KahOb`Ng3R0;ABeod<bGs#uPwQEK)mSI?1DN698x4L^TVor>z~ioZe_G5
z+CU|Xj53}OmIjNF{hwiJ8^)CIZ5X&dd;$u-kSc6_;XPr*1myn71D^qjJz?9$ja%mv
z8S(?!<l&Mc${^wL!oKx&NBWLDXMJ@Wx`OHHcpYXD%XOp%p#A}rK37UQ|KC*3DmW6z
zu8mFM_7xu=g;$%x<6}rqVu(2CqvdTvT#Gw*JCuKyn2;CcLSprn@;BQG{y)-041f3D
zxWV`FImE9|mjPXRN>gL|Y6o7;>HKw)TtTDjH)~;P>ma(|p`thpb3v|V>(6(QdArl`
zvy^!|v{_9VQ|&%3@6Ky(z`z~jXUyt1MPr_JAp+t8LV0bCb<$n}@)TA7rP|&YP|#w1
z4iSJ@rM&AS#LD5}b;Z$$$+)z^LKnG0$Q_;~D8LiHR+wlTi~fU|37h@J;$kH6LCR8Z
ztubrD%hG=^!RWkMiNUZ@J7G70L-TGu<tlF^sbupUmh)?;taxGgmcRS>3JouPe&V6r
zbY`xB5-`@|*;m@TFMfKP?#xF3=HjK>(!r;JaV0Z&<5*i%N(<9g7)_DzzldZ!DvIJS
zf{-=sRtg?GX>(x^b40{^uQ%BJvlV^TIeeflL@zH?L}mfDLTFA?GLY1?Z?Gw#AgOEf
z$QCZ>vF`%P@pIn!Oo3!7L7n|PgNqt8A4JoTfBR3f<sU*(1MnM^7k-E!hrG~Q8fk6E
z5=H+GbjZX)TsBAeb#Z$();iiuRBg=Z0522)16}-=Jwvfb@!q=ic)p3F{n3Am!v0Lf
z+^H51tXMRlM4MarY-PP(D=WdNwj+@eRRJ4S##|Z2n3MC9M)nUX{kr^jh6}Rg+?N<7
zv4#{#ix>l*NA^h}Vjw)(T2Kp~nZ8YsF&mzl^I^}26>+VjC50Os=YKpBr%3sH>nhXM
zq=UtiuI*goe}5-Y#9)A5;}Yr`Eu<*Ra_<Sj&v9EH4lkAkfRbDNSt$@M;A{}JF`nO3
zS)*v{&Yk`9__Q|p=<)RVFQ4qb-s?yIo&OBaKE5a=X32TWM@x)x;YlAQQ0MD53qHGS
z=2)Euh<v=O-Jj4WvJ?vle?38jY)0%!1K-S-{~6LakXG=Pv~6JLBPVFx2WZ&ue&3B~
zb?xMWbFQsPkmB(Y*2aa*A(0!CTcnLeyN{<CtQ#q<g?BqRN^sor*0I~BjzJqlYz^=B
z=06v~NuaK}ojIrw7OZ~F#jZPl(-(2txlc=g*^>isCa)jfB!I>NXa*`9wA6RAlkzTS
zVF{i5iQk|9fc_Ra1{KYViG@c0)IHjGJs40>;wzP$&cp8x71ZD`<Y}D(0X<`)`8N(*
zAo=ymMbR=_E%>>!isJ^^_pSckD!bU?j^Sg4sXG-VYZ13QVf1+uAV$#J7R)&2fkFC=
z)T7wL5$pQ9H#As+QhG~#8!?gu?`w9kjFNlveQuGZoW^ov$ut-<VBQ&JaOa$Rwl38G
zS<*7CrLbI<6=mI0ttCnEA4$=_eDp_GSCutK2f+gy;U#SqmXHwrQlmOrkc)-?o!D8+
zkK|i%By^>hqYqKdTua*8j@a9aXTIsUl~j-CODuKVksUl-_&g%BVt=E@4U|zLd}6Um
zhA1cR`bXdWx&bChlpR}|e8?RGIZjr^ZbDy5R|%ZbgW4c$qc63Kw!+!4bF?ovJK2tJ
zFY$JcKv!Sa=e{crgrr!;S-*WB-mFdPsbZLYM8Oo<pUH}DZb~L<dVXLRY5B7bQwn>P
z<_+SkvpF0*$S-U=HQQbM9r_JzAq7DSz&)IR>$EVU0<3Tu=skNBIXDA#M@l)}^{6?s
zd})No>T!(yRLj4P>ay0hC3e@VKg6SaosjkHzfjC2con(_N>*$inaDAB47<XS;urv{
zHC>i_{QmF^&^=|*ljx-HGqmVew>RKZB4F9%WV-f?ArArlW3@dPuo@`<RMcZQNWtSA
z9^dRxeYlQI?c+o9d@E`|ChID4j26@*2pFhD9&hfF^kVV!N$gf3K(Y`}A##6}Z`c(0
z6I>1tB?8ku2ZD-paY=zSPYE_ln#u~}Fsq^rUi_ySh8AG<+hQSn5P2V_RR5^I`KFA8
z>r}KK?ydZJh`2ybK{iLKMS^qS+u`|HTWQbwLe;7D2nyW{ikrbb1N1{6O&!)+;1Lmp
z?N2~ZTPtK@8r(9v9<vDus<VBBD<ozKVphhcn`jO1sbNc1`fk&%fsO2vSpyX9SfTwp
zB6!pmLIC6(%17$Jn1#}W29<&rsnxqsftt)PHR_5wv|anbz}&kR_hc3XD^xVg`y6V;
zBV8Z~=LI&1-u(nyh(y-g5lkkhx&b=Ryo<#7xt`}NDNJW4Yv>Gc{p|eq3Z!wyaWMfB
zt-zP}tJc2M+@Tcb^Kaf6&<DmF0hWw{!zb+x1XsnPay#t;PJzKIo!ZYr7np^Q=Jf^y
z9?7^SOEDC>+#vulr&IeiLZk@&E5nonLDt<POkmwTI>cN-K}@?d?()fEl1qN98OsvS
z9SYkdU+=Idq@@p}ah4dl78FyHzP?;gGiwT<oRn5uU)EI`*$uDYl352D2S{@sV;6PJ
zb_UdWmp&)>DH9cBiSa+X-R=lJgR1K248J$M0TD;ST~UY)EL*ZCS%be1Q|_9-uPOn)
zUzNs&+G?%@$ak@=mgsJ+&Ux+L8Lw|I!<(J>p&Aj!AKq$DFL~!Hs9utiZi6z-ns4|(
zDM2G$tcU?bHGq64iD^PYfvN_8=V0=JsSLtFAW1|lo0276ug1fLYamHmnIO*oOgCDG
z)|RI(J**bAf$K#Rh!d!KV-;@&>3kHpc*tHoG055IB!KuPW0gA9R|q-T=$;A09sxme
z2dE>mYa9YVcL7I+vuEf=8K3*H?pp!dGE*<`v!%(g*h#k82c;o5mog$u7#mj`uU#cE
z<L!a90gL6w^j}1%N>K`IR>A2ZXpub8Ask!_tv&!hO9L;0Kl2Ik@6%x^HzJ7N0~l+s
zEGfC?Fcg^0$M^Oen9wwAJpOlI^;yXgifGF0kHM+A&3MIROwf}7ftxLc!<)f|!tDUy
zfvs^@XBMhy%|*<uIvP@B7TID{(+B9$y|<E}JEx|biSs-~r#JKX_iLW<uc+qDMrwb+
zkdmT+WfAZ*MEidSrx+b3WGC<i%--l>`FhJ>kqe!_9{0k_YNuvu%OT$uzAKtg?A%mM
zF8^}+{>d#bor4==AXwS`ToEKIbcliQ5U_Lo(=uV2Te%yQ$P9Tcl$*(GVrno#zhLe!
z7$s{aUHCAurG4GRHL?FGr*pB25mAB!N;bHEd1?YUYPt8lg_KiQq)PfOGIydVb1E8~
zb=xN!x^c?UeSnQ-6|pAiXv2b^^_YAy={c!1o@pRB3%Qia%SV|CB_{_6Bz@ikFp`y8
zgtm)x_AVEqS)Ks^8y{Y8z!fOyQB(enjqq|}+ccIHe7981YTYtq7%5On>a&?;RH-Xx
z=j!@o9~cb2AjcbMvI}9vR!{!cJv|bxGRw{AY|e0i<ltuR#L+2z?%SLT#a+OL3safS
z;6&Nhf(_^S4V|rfD)8TQ{qcn6U^Qbcr5q~_bG#kt2^6_aIspe~GGMa;o(-KGDk8#V
z{Y^ohgBTZwQ+BzMW*Leqs~6-$f!0zBF)HGDMYRf@m57*9VnQGtB_!aq1P(^7*GCDL
zcT`ptT;Bf3_2fwxAiDeWZ&z=kFiV6|<=V6Kz;Y$Ml>r5v+32CtkstfZk18oCRhO(W
zQzq^fmHXi}u2e-RcyA~|DH+`9yl**+G)j^!hhLt=a8x44Tn)hJ>)idmLxMjNAH(~{
z`E#b&$Hq~TKhfMWgRx@u0aGP)c4kT^2LlOyePb}A9o}m9cfEfXm4H*fK?T4mIP5{J
zrC~($^-%%?yMO8uJJ@vt0U;XqJK^;LXr%gNJ7oosysM3#^}DYE2tXQRZB47J4DY>L
zs%W;_<H05f2Gd_>>SiP6fg|}B!dJg#TvGt_8th?fO8|@#Dhl^Ajs_JFFq)LsXc>G~
zR}AD;JT@bhv1JEtd^G9x82p8O*`HbW9X;WKD+|8smB-$Qb`D6NZu@Czgt8Y6{fHCm
zGh=w85tq)qcaEdJ!1?IO=(Kf1Md7QUyRLr%*3(F82x~q{A#ke;73Mx~I1oU*oLO6k
zYUt2<=+tMypy2aJY|9{H9F2~1m}Fa+<Oxvu@J?>D)~vKFGBPnSu(gHH2E+z8R%<!y
zBBsIE&KLYypmx@J^Zpk<PB7M>NHAEg#0PrdkNE<r2vN;9Y}4&pxBEr;usUPMyPBQG
zV%%uxfs*z{jEG;}#HTOtD8|W&^=U9~`>8W?mLV;!a<nt3h?!Wj0E#7K7YrE*ZLw?@
zj-Hw;q&&-5neA@=j+@T4hMc4FvqCc~ArMeUfe{`tY-G!49P9u(LPVILmkUQ`+^fo-
z#F<h=u!taDkNdgjlM1UoyWOqgA+>9Y4_yF;nY!?g28KfeeS54dvKFuVxI_ROU2OZr
zR^I)NN}Oa!Cv}+GT{m|I8WF^`Itl#(#w<*aru6ki@|YAD9_rgn$6t0$wV-OifUP%6
zFF$*ji|;{c5V78g`pYj)B~GP#p6jd+jI@BUV@Wfwn!w|_DX&((2uuwC;Chz=R16MK
zOoOWHZto?Vac<bk6<m(OS}f7WG#f>uM^E~l_@Sb>5`prw`RfF}*mAR?&FPjF5D~=)
z6)xQ-C*5Xho%dz2>ol&NPf0BMp(KfkWuQrsXtF$!fJVY-JG-wNg_spHIj|0n$=?U>
z;vw|~PFrTDb{!FjEEZI|E|;b-H(>&SODb{_CU(dU%daj@0w{XNbeE?(S3CS)?#>2s
zm=7Vsp^f$m60}CzlsnpzEa?j$WpjXi3?lH?W$}#w1q3|qa_zQLn|^37ng0r;gf0X4
z5&i(ZhX{M#$v`H@gBMeQQtL;jk7;*DZEk&t7aJN$@M?0zBiRPZqfXgc~s7-Ep
zB>^pP=W<)AxlLii;cn_YzMut^9S(HaN~X86I{!>tp@W|m`9mIFO+|%hg-D4=qOKMk
zK$s6P3EgFqEy5b>6Xu=atWFQ)z&b;<a?$Q$)M+?gUAaJO{%3+Q>>hladHZX~REq|9
zjV4m?@&ci;yk9P2=jRHQN=W^mtHS2%3DD+Dz{dNuOeW5!TdUbCIrTqeukYW6BjEYE
zqO+prCL)72z5ld0kcZW|^DSd`U+5P;w~cF;JziYylVtMiJUyW1O)DdxOv2|djMt4d
z*Hf-fwN@5G+rTRQ96$<~{sBDoSB0;Drlatennw4)d~f)lXaF!uhCm;&_<bGzWW<S|
zA}AIqtfz;9!}B*_ivsL?qh3e8Sl-DjO1(%nec|X3$i!3N<%Q@fbUj0(W;l}UlU=WN
zY(gZ0#Azjm17-vM^ZO6(B5<ocjflhA+TrCYzp{EuN-1`?Tv963)M~@33?9tDlYj%)
zkM#&4T@$^XV&RT;pM>ylv|qH_te(0ylM-R6)u@U0A<ACU?i(W91%i*J#yTZW4CfoM
z_i2Hqy;Z#R=|KxY1~Y7+K&AoSBoIc{SZN?3pzyW(KCtziJojy?ARl4AadVw`ZsO?w
zoeQwxEYJn8+_1?kv8-78#e4r|&XW5xV8w)&IG~StrfstOS62|o^58{|pjCyt^F9fH
z4{6jNSFi}&IG=q!_UQZDu@!mR?A5ZgI-00=C>}mI>txeDler`7y3&rn{-j5={l;zW
z!1%6;E$hM@Acy^pGL+Czz$#LdwWFH94I7SVHpK>a0LQtDgfZjvb)Gl_8dEtsD^Pcd
zjWIFF7*ARxrl=wqL$zA^glB~xZ2=Klp{Y6N^76;lR^-|`ZeZ(C?bvX!s+h_<_BO}$
zrE-z4F$-2omBTkgbwp`P&WOg<&bJWNpLl?DtH}S7o-PB#J(CwNTQAP-=1yDVd(vqS
zhUsPqA*N=ByLyh^s_V=AhaP9XDM@Z!iDb7^Dz`cX;n}7f(=KF3tDq0YB6&dqBXHH+
zChP#Mm)CISz8+ei!R2S?2ZkeEd$u6+eVFMM`%k>(f!EUV=n5tG8_+EU6~rNoeTE@o
z)HL(X=O9&HoRb?ZqttIqobFRD1rR{XI^|>h3@mq50cWhqv!+tP>~m5^XF=zq#`G>X
z;tw$}ylDRp(WRyZ<2yDCxhjENjmM8?j+)=Pn{=q=PLZk`)t^0&jR1nNzL22)J#D+p
zP1x4O28qVXjXUttTa6=SMrcK@JT~{w-BXa;pQSam1>rJ%%{ey75y^86aspt~LTi8p
zQ%Pt+g&v6iH~@1;WQbBkl$CzKe>vUXuK-A|`eqO!Ir@!CEjC2eb;9$-$j1{I3Qk8_
z2Vu-%$Wg(0H6UevyzcT%*4)5sK(>@DTY3C&6&4T1>Tn!`?R<$nY$YKb>B03{SX};o
zf&n3Wjg^Hdmm9b8<S;B)+)rLJzTM5&$TLrACs~b1Q1f|y5I#6V2~oQsN-pX>l!KW`
zJK7g4<AIvih8PJWmPTt?>|nOK8r#6i4El4`^SctZFoDtn4zfgK&Bm#L>Cl>d;X1o3
zvz?u3QqngsNut6a7<(7?xa__Cu(lj(`^BKbhalJ(jk$9y%KxnzR3Ik_$P>b(O4Tl7
zI#y#A4bBEYGVXJ70?3dcfSl^MehmiZA0E>zhbbubX0;Jt?FQmg_w{oAm4B7N-0VhG
zH%??|7SSgIOflJas77IQG2OW6c?0DLmgJHXLkYhI-vCJPPqPtun0yM;#wET^VPy)4
zLwlclJK7!jYk0qS|6Htm(PzMdiPvcr-3mp3@(-YdfECG|->8qsR@C1pWG%VJmCjW?
zS-2|+N~F+x3lzhp0OpllfwK$uwo#Lh`A@NH;Z5ruP}QD){Z9;c0HBLMT)f9e{CJun
zyET<+0GJBJ`jLPwPUy0TXZ;1$>O?9T<i0&6{r&ZJ!G_LXlqd!Xj&>;LHpl@cgnR}o
zLf1RL%Y;IwMnO+dNESQ@+@iD|PwL^?caC)i2T6N1XFJSMdfHCY3?Tg0uI({9T!37G
zR^cR>?4}lmjKtAmj?6n4(fexKf`A1JCP|z}0R@}GFsB@SRe3}*lLq=*C8IU_d<L`G
zW4tl%gS;<vuvbChYbgU&m_wFV&{<0DUL+Wx(k^_`_aoC|<<x%xv9X#xb$j~4`Mbn%
z5M!8Y;YYk!Dw`jH4G@BfKQ7_r?PpufXNOoY&3SVqm7wv9s&~=)pA9AoG7V>|Wz5d%
z(<eFPR3#R8SBv*7QkU6!7bhBD_ssXyi$uyhU%asEHGse-5Mv$@p|$qJX#tWP5)#>q
zA3Jsh3kyTqG6snfOBNwL^Ye9O0^dnY=CklT$p2eINxlW_hnVeNER4;Jp9+<Z?SngL
zSp|k@fZu8TiTKMgb+`sC6F4{)%_EZ2BDiIf-JvAZMFqczXsG$=rL#P<hdgs;Ss4<U
z4=F#YAX@LRaU*dJO?u87iWz%>2DbEkQ8gf*4PE8$;7D@}ZT0o;TH5l9jgl~B{&MxV
zfo+)vRD(9!_T@$w-S(#QV1Z9?I|%oxr9GX=D+bJli<nO64ZVz>sZjNweyq0Ma14)J
zPwjL*%<0T)w^z=t-ky%ZFi=Y&F!~bEQBaA7hf+w)iA&#->7dY~%Xhm5U?gEgK-q<c
z5g>kD27&p;JO(2)+2J@T?IL&aqUbU2@xd`a|1x%~sIsm-`EbLT<#4PFWIY|7vWhki
z+D2QPn&_Io91OrVP4MG;$c*P&Pg6sdL7HGwI5(W{8p_Gx;U5GqzD8RYtnOF;X3ir<
z1j}*@I>+bzkzGMoy?L`gO4;;evWBkBLXUovqQjtTd%7nTg>Udstn($T7WcK%a^Z4g
z991nGGqLD{<(B1#_t_%>B4l^W;8s0;*YF4CdX^@rrcICxeo$HS`j)Pw-?o<YgA1Q(
zN>TV(!RoS3Lk94HF8s}fZ`?wD&MdyGTgXK4WFGg?`9hY2R8(zr1gX512VFP3+)@8+
zBY&Hb*`e6+@xE=M7~ae5R<ySyVfU?HH~MD6rLg^5s+AXz%f7aqtZ7_*RdkwfK{{{N
zCl*<zJXJ-*VY&F*`6_eil=iX)YO=O;&v=<Q`J!o}L4(P7-&U?lJuWmp=d^GodVq~{
z$k!E<Nxah%7ye6gJv4vexNW{{LQ}@t<}5cQ)=Sln1c`8M#e%FsHiKl%Y9b87#)b^w
zMAh%_wRpXX$|3EZ%Vx2qWhj>BlnpgZRs{tZoSYIe7*aSX&B~-z-fp`|Yj4NvyIl5e
z+d<?*4W^ShbG7IeHidW4jXSH=-+jy2J5y>rI{aBb)=c~1f_q>08|9{ysxmLyrhZgT
zTJ<~Vq$J5{Rr{Z?C62dM*wMXuU9LRn4?Ya6{b)?C{b$p>+}54Bi`Fj1n)usL|Gw^G
z>#6X-E$1lE?t9mF;A@M+RjxC~FSc^8|NI@R(3)ZoJQ!8ZdKi*%{K#Af{`l@jQ}&p|
z1qpsjBjc^Ls;`R!upI0*ef4(43o|n!R~~U73m^{Weoo`PiwM}W8MO1XRXx_$zhyI-
zifkrE3O}}Vr*HoGkhNq`v}UBARFZ!X`-B?yhdp^+s82Xv@4mC0yImv5@R|UYD-(TV
zrx$KgMf!mNF?UqYIezy>mLS19>10*6+Xd6$NJ)pIs@Fo2MRxK;yw3rK=dUwx&N+vW
zmI*E*_gCEQlG74lCQHqu22f^#L#y#Y(+VNN*O&YLmzs;NP8WJE9J~iC4=2|1KjRDP
z=Vz<cXei+?_;ODEQdD@%%t?~Lk)p+&_tv-WJZ0}=rk5Q@H6mAm27JGJ_3?aOA)mw8
zt`o>35yA$BoK_$CFF2wetTYl=HigVEoZk?1-soWZz~x!Xs?Ia^VWU^5^I0!&GS1qF
zt07?DU~x|QhH}n@K0jhMh#!gHB`Q@YO4?M)Mzs4&cpTJIT4RA{Hy@OdNHX%=nC=!}
z&S8z6!;1R{#m<g=^EBZOvuCeQO&dYpFqWTKUxuP-u5PfY)j}!xxrRupf_i(k9)_H5
zLEM|HU#!>a&I>sc-S-`7L@y?NWCuT9(Ca3H?VBmyPfdqkreqvaV<zweeSE4j@jD!m
ze3njYw6y>2CyplGoXd<3Ig@R<V?%at4US<S7QJ({Y${nrR=U1N$UEyR4vdV%tIe=X
z^^MZf-`J0io|>&P+90_43jQS2lx(PKc4hwjpt7CMwW;UMaCmF$r3H=<Zj;s1#eMgB
zfyChnfde6Hp8)oR!#Y>G<-)dFE7-wG{ZeTu`kJdoY*(sYg`wX1(?~|2BMfjwqTMj=
zTGesTE{dbSo#72dV9hG>tgXiN^-${(*|p6x!Ggbke%pr@U2?a0Jh(33FuRXWdbe-N
zYm<dGWTSRtBC3e`jpkG|Jlrdm^V=8MD7fF!(;7?93QhkeyWGwLMolHNF;1j?4mzr$
z+HpOWryBwmS9ErMJowoS)A^!uQV)>Uk;#^JOp)#|M?Jkg>iY092C3?8zDw+6t4T?Q
z)HSBT$LH=s>k@oS^0Uxi_|W}VkATslO!vf#i&r!;uOhX~)^s_YM~RA(4k}JiZM}ha
zAKbIst0?$+yQ=wD2ankj!H&ns8#V3N?ZmIsA<L-`^B0k_IdIY1HUr_%w*EaT4(@-X
z!$}!hZz2bB(+~N(bMTmRoXQ?B2A?BkmfsM%^zG!+l%%X}XjwO~EbJ}R&b4I|8mX*l
z8x-H%EXzah?-2Rc4an+b+0#~mO}VNOTGxPhcE(#|h`GF+7g`>RR3;*#_UF%d0YwMh
zP*zP3v7OPrVocZb{>kR9htp=>ispD_<p(Ppas6Vpp3rw|myHB7r@f7qgqliEOb?a$
zvPkxMw+Faj*UN-Bp37^A=WW`WHD~BWU9}0B)zW3PnEZ=e+~6^6!WVDQcgDrP{s~q0
zxcfC$&|^wJD}Z0NrIy@o?eTPl?=4-~nDl)ed>|qNG}?O7MCpo*4b8?#XbX%D#fo4t
zC9M!?GHcm;gA0k!^_2ryPY`owy`4V%-N1MZR>rNy-miaZ^&`?|<vNBHi)G!haUIWT
zBvoX|QR)!*?)y3Mz{#t`h(S8;q5E>4_A_N{4I`3JHqC+QCz}bkHsm*U-KUsvR}rRB
z*KxQJ!iHF-IgdAgN}p~MN{4R7yge(->rW%V6M_kFI+@KKdV?NP*M&#eGwu*lFev`5
zeN2E01)SP!j!!gXoLa391DpK+T6^oTsG_e?nC@;#rKGz-kS=L~p}V`g1VKPRkQ7O!
zq`P5&fssbR0VHMs$)S<1@A%&Py}$SS>-*#0dCoKE>=XN}y?U*^&)yq_#vHIe^)_L<
zfqY!u?<R-yN1LYoe)<-ctqZ3S5v?z^;=`+@X;vF!5o<#ReSlEJ`TgSB=Qcus^pbnq
zsGwt6!mh4Sb^my(Ipx22Dk^x5NtZ)Js`255(rWr0D-7~j?=3QN`7i?(FO*i{ARE#>
zj`1+O8#1isxF|1?c0kfKY(pKFao*=)R{FElbg#M>81@8?BuvhMgo?ITHjjipnw-SJ
z8dvS+E`RBK3b&6_(A(nt!d}6izVPh?rh5Q%ky~WU8JXdqoTXx-DgFLE@zuzUF1^iY
zu+}5a%#3`gv82RLI@iXU^E*^5V<w4Tk?(nH`!X7t>Xb<nIfz%0*Zgtk9HSKVFlzkX
z+$3OACi7+)5-!{IaM0~9cZ?D@<XBxj9X{e7Aes<2l{d9SophW$|LJP9x}x%k-lMg-
zoHaklQg=jpioc><05{e`b;$cp(r@qsi_S~?i2%-O$i8F89&KeO!Dr$6s8rCq`Rvy;
zVXJzS4p#HBfTgq1fihYi1xJh-byJ#9Itp?3)l6DRds<%&Tg%EF<-&Ck$2M;gh{+|w
zA?8fd@rC8*b!81C(9_KY<GJ-p3!6IQ8Ud$qEnl|~b!#mFxIxi$JLqZjV~SLGchDLf
zpx~-_`TfwhaD=M8Mfgcijt$pY$SPDizGFL4Q6^|%U+yr#wDVlY_=GygXrqCB4d0=d
zX)n@6?EFARq^m!?d6CO)C~V-c@KlvecVfRM483>X6+S!#1DI$@T`_8?W1Dwu@aE0A
zJNL@SMbm*h*J=x5L_{6xBzrs|{JHOZ8T)nXpRG^owZBd2xs9#sMaj)!O-&})l<L-#
z6Yh4SVNqw6sppJz;?LeGG)j4i_J_{bXXyTX>(KRB|IuP7{5&Wj#5al#C9rznw*B?7
zcusxsH|ss5pT6=N??37bx55$+HInJy0Y|&x;-KM_t8#$|)(nqM2W5QzFMY%)H3=}!
zXBvrt(T9(p=_N)JAIJ&f%KP7&5OQx8`f@dKSAPE{FuzF2Gj6J)9EfRoyU(555G;4J
zGeeRNg9VS*iZL@VC`m~t`ul%c0*<=6D|(WN1t?OQ?ym$tNcc`=<bi*h`XP_JHix&U
z7md17n0n5!Id{uzTOE(9DqJ47>)A)PMnli?IXmz17b^1To!F-~A2r=3r^sL2xjV7w
z9J;YkFnF9tMk*7{*YIy9i7J0K-YR&6t6QU_zVKjgN|k*fbu680=g7Nc8>m+wA^h4M
z9od<N+6k)X^$RFJONB7g?@A6L8S0z+!{lzeFZ^5)bD7Yp@87HqSGkO~Yqe{rZN>kR
zjz07R@Thb&ri0g5FjDE+X{>Za%V9V}2V!28^_9a$7Ad_=cwvTYZQr<fIy=u!A4Qg=
zMhEwBI^&eFv5pTG?WVXI+{7R5Z*2xal3d*QFHY)mZ4i}J>K5|bKgQO1B9GHO?oNWK
ztwQfe5=^nLPRSFlPO2+94m{jC1BYaY1-2(*4x%uZJYz<TClB9ZKhl|AfW0oj23FDd
za?b3{Nw5j{tz)r_6Qi-Q)#R^V#^_rNXX*AZKx2szis{W8s#+K`$Spjg*TiR?;ZN)M
zOZF7+poOBn5FrxL->fzDMTh7>TYbJP_Jq5)D-2mLiucLa^ru-zpby+^#O=}H6v=9}
z)Du4*u3QBjF$U3Z25x}*!p-%^ExXae3^(5(;e}P&7e(|rJ!mt|+JB4G3=*m0KPeDD
zUTm8!1T-at70OH;8uZ@ztFp*N)h4=$XZp@waPd{~g4IUAJqC?Z*7x0$4joJTw!u4s
zl~Igcse_uTYi0GZlj!4c`>9B?u(x^3V;@c?sL+yaO2*8Tvx35nS!+dm2Ue97koNs~
zb5fGt8CxZe%#AoHu_+=%V%j0*^(&eHS2`35X}l=Q?9${s$z*LyURDOBlm%(E+B?{o
zwBkX(?RHP>IcGZS5U-`j!E!N-TdGJWFtR}?g>K=g)cIBrS5W8u`jSCL4{c-sHpAsg
zPA+K7$WnXs!0IWI*GqkuCP9Of?;9_o^f%gJm+XWA_V3{R6e`QQqw>1WqOPsTzKc0h
zjUJ*pkj2Dib@D==OjIO0g-_^YV}n@Vbk7st(<sC-`OI;TB&Xq-w%_%TT<kBG3!JJ2
ztgNNUeC9<qKYm55Mp`2nb)8eh+6|=d!CDTxo7lJb76ste=6W|KhCZTC37=dTUAF+o
zpS(T*L|^mWLf%9Bpaa&myB}Z3H73w7KWANEH4t_nnt1=C#75ycEe>p++*Lxpp`=hZ
z0=;BJ#a5On=KdCUA1=2WtYSoU@r}#f&~Sm9yV20lw!UVtt%lxA9e1oSj?vhTr?;}_
zWt_AW*(ewMa0MfcK5tHR#?<_M7?R4|tbKj8i4yuP>#yAD_Z=Egy!@g?5MPJMgyWu$
z5!LCE8UM!G%4GP#E{n*;)fGhNvZ0neZ3!O9Nd0=s5AIM_d_(5fd0`oR|9lI4)cf29
z8Z=q9rPXlm2)flitGjuL{}%Wp4WYnMSqMM1|L2&_|FVPzw9cc>0JB%9IUWFdb;+A9
zOlpE143F6824#r~s`V(GG^Q|Zl(&lOA-F&pvBIehS=su5^93ItpuNUAhx$}!x3~oW
zF8Wppc+Lqi=xA{Yq~2a!<IqdIM1W&(y{w1y3|{b6ZGt+{!nCwXZ+&d<c++2tN{Sk^
zflUjWWm3vaCE8$r&Z^U_1_8YkzWp}TQs(#QMnJ8M<-A5fO#Y=u9pYd+M^75Q@w_mx
z0!8fcZ70Uf+K$22%K6Rqz&xgEloKCHU3fE@ehBGn3wNVe?wYsw#|mFFnc;aleZobP
zZ28a^)G5lle}jICEGV57o8-ALx^MlRaruOuzf0sVOhSvWzLX~lWcqx16)(i$Po=;2
zM9V#^LzBawQh$ewbv`?pf6I>Z!cmthS(494kF>^0?ocu;NL=RlWhfr|$S(1P-*jpi
z%GFMMntr}P{@mr+!K&7Le?-n&jwRXzohzm|{M7(2(~x)BC2j6Tlg|cB!*khGeO~uN
z1yRe%-5SwDa8Le4%+vmbUl9I4QB+;C9UC((*`=KA<}#_YJ+#YZGB2ZqMbjXMyIXTB
z7LgttLVj=}wM_>(t(9PoQcQL6okBA*bQH_4z~Gs<s^e2=`ViY@j<@<XTVg~+jBM&T
z!-n&bp5LzI*h$|8Mrn*o{UspS-+F%~9&T@%YyuW&q^ZUMKT<8Tv%x;Zd)SGOUNb=<
zj+SmRR8=un(#_L=Wo?8VFh`L1cpQ2eH3Wj9(SWn&?c0Az2jPw1W8)LJ%43re*>`u6
z0sPr~akIJV2C}uHYvt_1CarGNeJkBKMW4dkr!6f~+>lhuW^L=MZIoup!<p3M-ez<q
z1}Q+CqhHvg5lB9=W*ryxsE7`~ReTZCu!y(0LY;k<HeWZa>s{)WyZM%J%y&x?XT)~q
zI4t-e!TYZ|&65wy1{e<1rN|=1T^iFTl|hk1HDLSb*CKi_(^;8?iPSa*z-1Wa=B=Nh
z-p@a66&q5?tC%ov8soUkG}z-Tlc|<z^k|;Yk(^zW3FCX*kfIlh)rfde5|8x;iky^Y
zCwyN1IS~V;tikZLb%(h>lkCKWovM6S<mZo_?_<ub9v@s*ANAiic{=k(C-4b(?0P+9
zaaS9Ni>Y@_{Cz~~B2na)etiE@U*sqtA%(v8`=@)|itZStThd|&&vQwG@**uBtb^O)
zhs&*n=acm*OqUoFCWOf_GVVoijAv|sS0^d5Nef|m7du{<JiM?nOnt$YVdBvz_B(#E
zY-PR>q-vYq%!JeI@Uo=(gF;2#0+%->Ylb;y9hxsg=O4+_?*UsG8{gHh5x>v0x=N?b
zg}O*wYp?f{i#GENrgb-J*Ght9<7TBE?eO+M?@OpHi*IiqV;;}vCkw>rLc~|q)}G6z
zwhBl#>rc-Iu#u!TohcDdGwmX>Zah<4h_&4fUbfAS-T3m;jrlEr(uaVfUzm*U*&!WO
z-$ebH63T;<Ys%+~H=YV8Gc4z!v(Y6|)vZ~ZeF*RheGXd}{a_~Q)x61PKbpk$1C$9E
zpwtUS29HKMuhy_~&mHTwIg9MpiJD{xxf#H&Nh`w?ok<+tloOq0hqg&2g8lH%&)TaJ
zb-(1z;facpdb%<%|NfL4e)uYLGQPDE$I6nCTIN(hje79UY$MHp9dyF}SDnRG&;k3;
zMac(cjWtj=)nztsN4w`huS0SOHIjZoLz6n9x_UR5-bmK(+*P)tH>3VVK%DBu=dB;T
zymxJNS<Vyg)z%yx+wm#k)=T@2ym=Wm?^mDfdd0tZ#O-E^D-n};AYd_MF0(t%?~g@1
zr6|H)bwKc9Lw`>8J<r!q((q!-TGd!>zlqp)UK!xkfeT#+a11scKj{wRpm<w1sO0PB
zURupF{wmyIY1+fFInYX)a1HBs!~thsrA<xeT^e^jAvB3n!l;I(-3n?OXJ(2kI+w=Y
zx?bC6dw(C7|7o4y{F68@)u#hhTb?jh6#{|iF`_H0GzchAnc{2uHY;+hXZik+a5-?r
zl(F7zv$4y_Qg(HCtxcY&ozVBO8G0;LYu;7*LDQJ3p-L#N)nW0>pi_cwD7p!MVzVhq
zpu)(jTDbe0_<=x9NuV=4?MGix3X0ry-gWaT9)~$DX7$7qB7CDyZeGJZlSXWgP!GRm
zE-%XJ)AB<V>RCa+VQL}H$E{6gHSVr2V2mQRc;M^0v-q2ElL%o>#8~M3-sbQTDe}=&
zGL`=HbBJTc_>-vp?<`A&FS-_*Mw^^?M-SiIUNFjD2uyzt#j0-dJ=(mS3OnUmUX_2W
zkJ5q<5kK!!<j7C?eA$!m#!OZWxrk{n>C)_dy~oUF<pCSIiLePQ*t6MAWDNYgvt~Vx
z5*ve4FZTD?b!dkm)<{v$5aH;cN5!4g4ARRSK2;#j3gN?Rrr%!uWx;vYWz~TvHtbYD
zM3Afax#b|4|F#7AZt>=FP7|cvYd?}v^au&>9VQAHpQOn!wriUks}-4-%tG*e$ToAb
z7UX#yR$X<G`iNHETF$?aqvlvZFQGnbO%4QsEd;GIbo&lZMZhnoQ4&Ilv_jn9mVymm
zrdPis3&K@CWKLRErsz)M^rrl<J;4G`*C`YxPMiGrldo+Q&_-MC@dp0#^QPnn2W-n^
zMIv9>WMQ^$_~}=HU%#L;5RrSjJd@BD&nwAPQ7D^(qdnqeqIsdTyVeF3uALuYA7Z&i
z+kv-RU^-umD}e~s``0|Vi`$T?{+mZ|%n!bpunQ8p+z+fRi{2i~xoV(JF^@r^<ZP?F
zQXc4Vgt+DR$sE4z<j3BkUac>@zP7Zom&dY&QZw3s(I%_^xbBWUyr`YjO9Ujp|8%%F
z_oMuI4K|Y{&Us|Ho9+kWU%m>CiLa2Ai_BqxzOrY_2IPcJ%R6LzAdh=sgC0PbV5a=5
zv{ZBlj#w!COd?*$`H5>uK!<1}<Dw^>CkqMT%<j?GS-84|(@G|Ey3OHvh6$h~#+p?=
zngQVzo;jvW=$+nSI527s5{&qP-Ldggy2IudVbu*K>suj$eai6$V;&ad={WgqD@>`o
zYv=J`!$17I+J$HB4#ea06}bdQEfZMBRUG0Wuz+6^t+w}xE_(GlcODq6_P1tSdewfN
zre4pBJ&^HhC@|jZeuATPwGty18D#B`az(gl$yYhy;Nc?1o!ksE$<#`f9u4BJ4Xj*n
z<qIEsg?A|~za(2)arsm4lD^Ep^oGu+c+s5Jk|v5E+3po4Yu|hKu<^;onm;4zrccJy
zT1si9x5hwg-nnp@2f8+^#)1d6DCVkPK>L^AS~>m6o>Afv1F|=7BK~6E{&2<`iY180
zcA@>em=o0gODp>YKH$n{^SXs+aQE3SHhC{CchnyKIA8F{r^^>iIMUUG*e0^ghSVO2
z_TBB}pkB?D>ooRR@Ad!4OZs~u^!IlAN%)NmkR0L^{Lr6XptV{Q5#2)^MPuf3($izy
z^{2zbyt6Kjdp^%j_w5(K)K=og{<0$UirYoF5V!<}8eYOdSTH2wPc$@eGQ_`7i$&SV
zm%Z6p`Q5LPXI|MPHM!JX)pj&!sY1tBviAgawVq~$8dqVP)ETbRvLVhMafE_GW38?v
z|61Sx`#~))g67vO%8%ka#GaJU`AJP(u#j8c6FC{>EvagWzRc)~g;)NglOHk4C_<)W
z03G7aRVd@u%fB!D(PW7CGozH&_=iqUCeMl?xO`51^1D>ly8BtSPL&0M{Oicn9?0#M
z^x0_R<_v|qLc!}lUA`y3dpGKk+R1);=FM69ptG|{lR5EM5n4aj1kzK91{oOq8Rdw?
zS0y$s)lJZVmBCOEvWN{Z5#lJOB2(Y31;${|^PrFDk6wy~qlu%Yag~X=)jm&S7fTC$
zKyJ6d1E{l1{Z7N+*wvKG8~N$n?R1xKssDLK^MF~Jl9Ll0c`p$}$L)Jn;Rf@bjoQ!s
zyL?2fbJMKWV3rcB?-p)bEwQAYJ}<8x4u2AxFpQNiOTZr)(4zSa<wUS0{Dr^_+5jrf
zg`(5r)gs~Zrq1WA9b?bJ+%w!3=VHD9T~Kq14|u}IU-fB69=OgkeW7eY-rL+kWV3<<
z4&FdPH{GM{4c9bR(do7d7Se;kwalS|Eq#tQe`gQ>B9Ah5RUWX`1*Z~rP&nE<(v8Qy
zyC}(-h-hYGQqO9-v`OJUG=Xw5L}Q4ziG|;%(XSpIybkCK++__^lxq=7t^VN$ZnINd
ztu#F45@b?7w!UuSwYZ}87f%m`(~)g}FsD1e@0z{u@vlj?s0a=DQk51NmHQ$kV&8=x
z&eqAXB=1oItpP!}X*N3@F~2MzA$IK(N7V9V6Ne0)h7hBktO7;42~U*tXFD=@E6`$O
z?&908$Xv+|FA-<U%dou)xw=UYBN>_DFIgYb@!oOI_qYi8L=XwHC0#oh8UZ2i>)%td
zNK+9i2LTZ^uZEmHYo^{9%bVEyt&Nvfwoz8aX#+FGopIRY2GCn(*>F-IklgKODsant
zKGK%%k@M^A>>j-Z&&T<al9|t|aw}8@Jb4-xP7Fnn53@@CaqYZObWAJ9KTMdosqjcE
ztL=ifwZrQV!R=v}9Y?rzyk_4c9&e8={j`2VJ2Zd~T@gLL{M%V!Hr3ZrHLc|yPr)at
z&-SF_fjr<=FqGtZcY+HO4oMQkd55;V#4Y&uhADw^3{+dMEq0Ix`<hW&l8+A9IuB3W
z{2D)6bTF#!s@SIE)L(B{jPb-ThJC%2@Sj(NbDsG`HHVzpS28e13d8)19DFeyqPh7Q
z#ZM;|1Xt`-#TH&zPYZcfzIS(eX*@#enV+8(!fle*?mM=vYPuJAGm(!3qG_p!f|lFg
zyT<`D<U1weGn!rv)KL%}399m;dBFqGyT%-*eBb_G4XNy{?kD+_7d+-h)O^)$KU<d_
z0<^k(1e>N^0@fQOerA&xh|#q<j5qL_sla&5e9D_JVNTIUZorbA#hEt}$AO41;9fww
zX2C}-Fso#_2ViYCSA;<>>T^z@gRKP~F@Ke$%K9e#V{F)x(eerEd;t<R=5BRnACiu(
zn8ynYFUD-|b6^`#d!IV+lpz*X3hRwO0oLwi0<N;H$mNa8;d#-?0^k$mZqjIZr+7?e
z9TYDhOM)g3?nX=aZ43pOe_Kyq2Q}RHM#+VK0HYAJ4T4>0oJBvHqE~DnuiE;t2m6A(
zC1oRGcrH%Vd83HC9zrN+Qxq)z`hX%*uBDP!UNy17+D>j$uIz?{y0fjq9>|e)MO!-l
z1oyR3@f4}|0Yl&DK(*OE$B^pJ?RU0fH9b2ua>9WPtGkEsw}XA0W?m`oL03{EZ1pP6
zBK<O_<4+1=XV*Xb=Y+AIY&LaG9N<oOy7OHG?r%CJM2ejzo}J*!PMKB1lZG3tMQcD=
zl*u*&rR!;gZnjIMj#ejaS=m2N+vhi~N9rBV&+s9o?^sYSHpg)pUr5PjbG+>p>qOt@
zGc-yb*!m_TX1s~&xVdiMo+ke3rqb<j8w7m{BzBz-h-V~ezWNE^%>KbUFp5P>D+o_3
zc*BE@$tWG-DQ%+7y-38(m=JK+Hk1Ul8E|PvG|)=aQ0*IS>~HvVo)+&h`#s^N6*O}=
z7fmsGc|)t(j%_*-Om>m?G3{@m%%xT4<csq4H8E~Fv7nxH(kS;!$fZNxZH=~YHmJZ6
zZDL&jo3${oXo&vIjdYpN(wQgzt+^9l=Tu%w3RUXcCwPQ{6tuQaWx&?XA4+COVoT()
z6wnFYCa92GJ`-NsOujmP8*#-F33L};4azDA3ZGE?AZ<3AFd2!=)1K=O29`MH{R9%q
zhJAvI-DP)Mb<$ZupylgY%K^I<+kbnXQW223xCwV3%!!l8_~wGszx+$>h&ZO$tw=T1
zcaa?}t;Vw`+GXn}HPJU4{>nE>VM1f4Px%33Dz2J`?pn%K+)mu6-GZI(c2C~E8}aWU
z^1Bd>84=1{q+KLy_I~`4^WFEi_Y!X~z2)*nZSX$zMxcqey_xqMO$QI@>0rSh)!!%a
z(%<ne5kvtSc;~>}#WuwLcEtX+gnx6z_aj!egv^X`o~Y8BY}h4DI*edf3-=Ws2QQyZ
z{kXDSP(g~)_B^QgQCWBUy^4T3jubSeQNJXhUD6L?Ku%qmMnQ_y9M7>P99Rc_9UN^r
zZ|KQfGsd$FZPab+F|d&(|C99RGkznNxwzdnP#YiX_FT7wUxJ5?E4EM<pHx}7Ey;Wv
z%{jfP77t-}fb;T)vnw|ic(!D#Z5*7PosArHMxP#Pa7rW4S1Vd0{U;u<W6W#M&kgQg
zv3M83ZXN`04>4A2Tl*07tA0kYY()KCu_I~W_CFuSfY}F;rOn1o)ZPAs%$LdYwAXhm
zw~4MP%A9?^L-8(9Fkajje{D6_lodwYrz=C-kqHf%p04i$wMR6ZzeKOQtCqg8)x%@y
zYopn+mBp&Iv8F-1euh`NLb2<KXJ_$8x%+!sU^?|Kv)pfW&yJ5Z&io^<(Z#0HRY(oR
zO5;;<n@9}BY)_zi4s4E~N$8jgPoK^%hv9vF$p`~>Dy60J!a5p7@eEgA{FUi2`bcg>
zBJ|`-Rr*2t{KsIK(@@oUd9kZNR<fLc!(?14Pnsy(W}H`_b|XiYnVyYs-duk>;R>D5
z%!(fuIO@VoBzdawfdiEpBawvl(*l#hE`#FO3?4onx}V1YICs7AZTfs(nw_)u3@{_y
z;CPWVL9+Ytia#X%3h6|tiB%Z+X*TN8<?INAgoG}NKT;n3GA=svp{n*D&KP)^doyLD
z9Z2ZcHC+Sxe0V-f2ETZ_Rln{bnwd9~^t^4~2)$c*jgFR?UmQDm7)+b?gsQB;lp<aq
zv?0iPvaDvVhlfd{vG(rzd+J37!|pTbpPEKr6ZX}JP0|^cf1sMD%bV-@`KVy~s3edX
zZ@!-4upJ|_s)00+q{i5^mZ)BE;+sDCQa?l+HTABZ*V#cHYWLO%>(#P0gjzUMyVDAl
z#+fp$VN8vF_Py#z?m?oUf>vkk^Wh@F>6H5y&O5sY&*b>wub3<SUaVfhH|I9<j+aHC
z5@(Vr)X!8nNS8`CY|V62M!KE~4l3cYQJrloaB1YyC_bR{-~J&2+52EnS@EDQTn@7A
zUk?+Xm?iAc9==4SJoK&mc4bflnrY-vvMaXRaeki*S9Q1ku!DHo7=y~(+Y3Yt<uCfm
zicu~vJc8T2pni)E3*6JIp9CnWX2CFWz!MIQS4@mE+P3~+Y4r|pU4TtCDi;;_w~}b|
zdP<_bCs1a@{6|==j+QyW#;-PhbijOD$i40tNvCTr4_b#IinzSYRbx{F*}NW4;uV>p
zk!_-cbsbdB$eG8v?A`Eu=pa%QOVHtE_>05NtBdae5_<LO<~gcJnU6?Ka-{0}rTQ!(
zRc0C*NdCuITC+5L64ns`y>PCvhNN|at&Qj;vZop^hjGYQidE`%?b^R29-!zkyVMQ6
zqBvKT{F|uQ@DkgHAA<)Bxn5d|X`Q`$wJTCz)YPA%KZYtXvOk8kv%C9@o~X!Ao@LcN
zzD3N5f!2nGz{!bxzfiF)G#w{l=nCk{?Cv(8<*EC2-mq5BU)(vbqg<G*U;pt?_^xl|
zr54qk{UWOLNm|%vZxNMCV3phf5(`E7uJ~Ag!uTC}yq}U3rEVNbZuLcdFXO1pTXe}E
z)tNlx;XKo<;(f0?U0qoeo<(F<T51^bo4p?jpSfB8IxjkcmuNkpLXn?j%kn1c-L*uY
zmkc4s#yE#&c-|C(;#NaP&TphSU&qexu(iav_$bT8O1fwYy9GMgix~o0x1)%t>&^)x
zERsS_yFSIZ%8Z%Ty~;|e-$)}wFVepv9BvRgrb~(zqA}X?JD`f)NFB8ZZz|P$NoqiX
z$*e(IP~Jp_Idz{TW|G4jctOqkN<EE5AgVEr*UUTWTJ!H6lo+Q#Cb73X{?1HRCHQPh
z*m5a5$$ywET0{J`kz~mj?w^#fDe9ydB>t>0efd0fDX=*$)W}>ZkjA6y#Sf9j$Lhr^
z>cz(b2c+R^90T6&=9+7|;dy(ezx}ge1KU$XnUI{_Cy-;rP<43l6Lw8ZA2Ns{Nrlin
zA6m!ylJ=Y6*Cf>P^l=SU9D#!p@2pwgD=IfY+YgkyNk)VYlk2f;U-i(Ym|IXdrOIhe
z&5<MdOwi>g2i1Hg#@3VAzys8Uh#sfsM9DL1_{0-C5@VYXef>AJrD)JG!v&){NMk4^
zFmNDKAM(y>no8}xh7)}ru~G2^u7fr{MW>(}|7`NGNS`-F6vtRJ|92+K$gFc}cXkSN
zpsdT0Bu6p8l$0mPNyb7UIKQU$-daROpfpj*7GooAh-E3AW6k+}Ol9(FWwLY22Q_#0
zQG%2d9LBp2!|&$gklHsLxVeo=d4dpa9BdnUt~eG(x&1hwZ{9Lf5Ne0ITBQb(VGV+6
zA*9sSM!J+PLG`D-Za3DxS8dLAx*1;BO3_b~iuL2>zSH_G(R6#=UF^u>aK++~7$ymj
z7>(7mTf?jd@t8T7@%$3K60(@39HrkdQ{-Cc`*WVhbQ5;itxs)^=S)xkn4g|Tykl+_
zeZTl1JKHK<^i7I&YyPQSe{!Tm1g3gLcm^XEb=e<C#!vzxNN4JRMgga>L5)7o(ih3r
z-0Ec&3b(>}#-6Xoqi%>+jo-WmhM2u$1N-|CwND<OUs@A=moSnQSehzbd?~Ce@904B
zr)RTASh>9L_It>*mFmr$nle3VBHNkrl-ZOtx&2KDf13navT(Nj4cw%>K-~VYb3DXp
zD`a}7D>nlp8z%KN^EaV6t_)srrqFs?zvyIIFIm(BU~B7AkBatQuwRyZ8nP%6L}w=A
zYkRo&d$!HHUBD@HzDzwN(qo{mOrO}F2FVC{`tLOEpC0O>VH(XpM?B3_bV8TQ09+S6
zPOMoLI7jG=uQd+P0&TTTkf<t-DAziO7T8VO6|x_-r!w<~=UC#{32cO@Hvl<4izC5L
zfRF+^Kek3CoM%*3nQRR$uzp<)O4J}5qR<ZN4kNc0LL^$0UpAsjTO&<41IUlr5ZqwF
ziAC9qif<DNz%xXNEWmC{!#0sbfya`Q0b=2H&s*+NSl!xZn-oEeO3c;?n#Dy666(A@
z(to~PU?Ozh^UQBLLa*qOIhm#5dlcO7DRA~rkMMY@+>tVsCe{5>SoZ(A$zn>nd19|8
z3nrYWU;wN{MgqJE_<J`e^wR?6zl%7}rj*D)|LfAf>$dVAF|z;ntw0$x!x|%s3FP9q
z%Cx!4&@`$TG^!4E`3#yk%RCJ<ddo9uUuy(#lrb7=)T#${>T7YH_h;#wat1_ojc2%P
zR5@r=6>*exW$8zj&B>O{!TfW$^D_0R%H~!ys>o0L>Wnoa^f=3YTC`n%&FnJgJkQnt
z#`bV|cZfU9T{dT-Q3bm?S~Dq|`{th&R5jKDV)lV#s{3HDW~=c4;}+b>G+DF1cJqv~
z``GV}kmf#un1KPXnfgp9hePzFQNqVq$C>)yBx*4=`^3@VSbSJ@Snl9i4Ne_X%`63A
zL0>Q&C196N6KKk5v>ciZ!UG%vc8Xc`&*sLI#STZiqG;fIxGIV^n>DFqJ+`*&ehB=u
z{fK)d4Wvwz!2(+WQ+c1(c_W_CYO<!<>;B{A|ANZ?e?T<~CC%LiF*lQF7ia!P?I~}4
zczxH1DuN21Gll2F2FpI^f5X{9S*>Y(Q0yB22;hsyn)p~95olcxyE&RQ`}RjqV+H>%
z5q6^mz{$l~7Kz{DRx2mw2p@{Ja=YK&<u*CxC=)qmtUdp70>yJieNcA0e_X%yjSfVx
zm?DcRY8y1)LC6wWcupkH{(+^&qb`=nMy3f`Z&X@W+k%kh7J-;~8%A_B7Grq9%`CSq
zu*Rzm&7X#KBBP1*VLwfi=B~w#J7719&3(Qu!PE^v&PcZ><N~Qt)vpm$qK|MuXq3&l
zqLj^Hf*rsgU3d*$d9gK~L=_;h13!e!cYgf<%6JM8=OgG1BqN=HOmiy-l7W%PrkM{R
z6%tDY^VlYeif)>U;GyC>j(F7sT6^pze`GxAmcB%fnlnAp2@1&oILaw^vZ=kSn}_pt
z@}m2>EP^|C`=MT=>I3X%pe389^ANKKY*Y4{;_UGrA82%WJBZVe)=DR!fLnPY;e8q#
znW|7cfCQk~8LGC4kc?MPeOUcAa3+|u)zCCwZ7zYaCU}lB$}_)V^r4#9wi12qmf^HV
z>284)rO8EFaJHg0AD!qEQHi55p?RVap-`h;a+jHA2Y;Yo{sj;o>tDrjYV1IxiXfzk
zEW*qdI8Y>7>C4f$d$yNmKzl+ps=QE-P*zJ@J+*^E^sVuA3HJ3glr!~7Ck9v^`5qQB
zR@d3IxV^mx`xIXq(vD?lSfL;OGJQ06>(z0)w?iPo9xi;wTq)+`LKjjwXipga@!P9^
z{6?lJ^j9i}`5$HN6?oDmGCF-Q97#0+iH(OzVeDf83xra8b%nAWYjlle;hpSq_ha^8
zUm4V%C;I1bDi@-Q**f;BW?O)DV(?pGKSZ*5^B!<sea;9L_2wdUP-hb-$#3A-aJ0bJ
zvBrvPoR{R^s$tT+k;>u9`!AL^{|ADdIv|Nc_5WDwnJ|FebiF((Rcoq7r7jZfK&Inj
zeOKxR@~g0kDyFDRm_8f4T`*t3l9+(2Oy%bDg<I|YF1L}dOD%ScPsCo9{?N-yvLm<0
z0E9!?+;)r=+EPq^D!=BU9hf>(KP7iEKpeFQH8K~bKiPamAjS>dH?7O;BH8(2D$Z4Q
zi0x^t!7xNXA7nR$a_YRN*=_G&3!sE7=h^>PU+|m*TErd~N~)&7^TTvp$OcvRZQePd
zZEAFsUe|c$8$Z}A50iS(;?LvmVAKw|l})vQkUsPUYz%??y$8epx?^gDp5QM90(&op
zAN9gN$D?riEXn1ei0PeNeRQ|LQ80#p(*vmB(3iiJZT25ZeEO5$zhgP^%K|YsD%q<0
z%3=Q_-A~JG)}jhXIvcI(`nFVkqtbAc+W*kfn4GgWAiV!p)9;j3{(rPI+yATJ0ka~a
zZjJk&M=`bj&mLus5qeRb8S0v}EnAhjXP{yD^^5yI;{`P1e;Y5525StP5}9+YsJF0p
zy8fP*$^~~>Hu@vM@;89#jjsXFc)|fOAyJsff{-<4a-1ER;G`1(S_GpH^E2^hj6(FA
z7MS?gX7hjb691=Oynpp(aF#J*`bMip^P?eKVFkOqS%4KAR{Cc%-*skyn5m?J=BC?g
zRM{d>J`nd9yWKl~N@fL2S$aB%8Q9gaX1^QO9s!dvj{+%fa6fti>px8A68`5s`~TkK
zs<fUASUy3s|3K=jj;8q^qVPCpfXV)2D)B$DHv9+akT>kjB>#h8m|CX)H;M4yP}s`f
ZK59Gu%GLS1D1rj~)RncB>J+Ts{6FKc$3Fl7

literal 38430
zcmZ^~1zc27_cl5M0@5uaB}0iQDcv#%N*Q#Q14=V=Bhn>;<WP#DfP^3&L-!EUF*F0x
z3|(^%zTbQA{qFsL=O+U=%szYXwbrwqXYF+&pKGd+liVQzfk5O>RiQ6IAbfM+=Zl+!
zz$-Pn6AXO20e}2d`zCPt-L#GXekXQTHGqRaq%_xG;NjVT3*bK`+?4g*UOB<sJS|*b
zgB&c}ob8?5?B7`4^?2<Hf8*pR!Y{-x^x&?oo13$gfWZIrKEIQzjQ|ylfe{3{3wjDw
z)b>Jb&U*W4A2#6j#(5I`IXEpe9zVW$Uhu#%sLs5ivQnq4(4l13erF4fM(h4OG@L%$
zc<XCqXSDU%_9wjq^|9ga`|mz}4+>JayZQd%jmMQcJMnOuaO+=MFyZd=Q?KRbKEw_J
zn}+n0$i|w)Nn)s|udjqg28JyV^xs8DN^cJNcTs=Fvj+*5JEc!%4#jcMiPUxUWl{PB
zjPymdR_r7XIKY%)t9=jT)X!O$I`7=<yn+(ah#0+v&F!<9nZsUNOzq_NZ$8Dje8jOZ
zvwZUp?Cf(qX44Wb4|CoepPRcOB28C(89ITR>$7(;+iDW;!=|Uj#(O+~lp*Eyc=y<F
zJ$;QV-!8M<(3UXVqjKEM&c@42*|#t@^6@J|X?pX^h%^V$@}(;V+xWEE*Rb`>h5<>>
zXgI&U*{{ZBgqkPHVf%Y*TBa53lgWhMgj^M_uIaAr-1LP$Q)^jZLe@T35v%p;d7K0=
zVvL6$s)i>ME^=<(#B;GTkBXa0o3s>}9tpjiokyG-OIV7O&%(syZReN}aD#;j&j_o2
zf+z%>ucdyDGJP`oswto1cncSoVR&g&PlEKO?G7ZHxs5upb`~k;f*t)z8%>=w#qHVj
z_kCtDl90s6Z6VGbZMF=vzf_c$lx9|?PR7EH@^#OfCL}#QosJ)}0;?OSL%`ROKakAx
zMla&>%}DgLQd9#mxbXCCOsK<&WDD*TUa=HSAe1E_M}G0+kM67<!G-+1iiv%b<jLG;
z_;zEC10`Ncs)6Rjmn6l`zDBYOx)XLmn%lw0vJ#}KF_yNMx-pn)#is=N@+*jGundVh
zzF_$f?5H%2MX+20=HURJp68>7CELRSh0E`YNL#~7QYUW)PK&}SC;JNe?bWcOaFJ+{
za;G$d(7><jlrXWjxiOiFoiDJ;{=TD(Nn8a(Rocpl3#(B1CuFtmbVwaqt>6A9+W>{g
zc~sih(`;Y2!B1mDS2lh~oV`kr-D8tnu$&`Iy5Bw}&7W(n+B0pkUva?5XR>cI9(jA7
z8s}wJuB&vXZuh)t4%eWQF_{4q%U25=f;5=J)?vDKu(T`ePWmLVh}vE@Zm`eskS#xT
zQmfCtZY~517f~ab8ef6wZewCGefG}=E4^X5tGM$kT16hku>zu=K9WBBv}``HjLCfI
zh`{Q!hsW6VXa$SGN^6{S>W+M1Roc&Es|iOKZasS!M}Ci@hd~nNQJ6Ms1v^eN5G`#B
z+(PMg;wrNFJklpIA*N1@mXl{9>62zb3&^Fby{kWl<Zm81-5Ye3H?uFzv5*d64iqc*
zy&QPy2rHL5v1`2TRjJdbl`&azIX7)6Vl>fwuH=gP>5P&EZT{W%RodLRw<|TcK<|@)
zO7+}!q1`pW%;2MH-eSD_C65&`GsI1^T&v{%vWu7yrjVnd3*2$%g#*#$PX9~}v#0O#
zbGyDx0+I8wCWrF0%=M<m*?R2f`5ouFPDhN0zv#UIzxvV2WH_xzuV{JGxe)U%)*0dM
z7q?F=GEFGskehEABU1h;ps2U+!L+<?M#7WT<6mjwGp89yovXc`i>-%BcOJ%IVnaTW
zlm-C%81!;p6!-P30+-+&FL~$av`mJ@pKJtSAOZynw9c2MReCXbe0~qqYmg0|-Nmu-
z5)MI1hrf>8LfFd*QcJK^qx-n4XwL+K49)3wU2yrbO$2f)og(C3Q<8cio-Jh}YtQVk
zRC%8Glpq5IU%jgVf)0EJuvwL6?IsujXIov)3T1n5J2tt46K>MDwJYcl7znYzWS=DC
zlU}5tI?g?Pd<L<riN|c<Q>0t{&i;*gCJ@X+YFBOE%FBBx9;pPo<zy;UMRp?kA=Wc^
z`PcKkRPkeT$z7-VogT~i448+9G}{8jJP1v{Juv9fc*3$vEpOI`mjy@ZfK%YEJL;e2
zuKba+UZ=Yy>_-eB6tdSl1vFQdV-dRLf-(Vb67cWGAD2`0U*Xk<rVz=4E++a=BvS!3
zIPYIU`9az5rINh~R__IxHQ&N}?G>n3wRmYF%COLMs5SrivS@P?>z;#@ADXf^wa6ZR
zdoFc%Q|ZO)K}meomR?DgA7Ht6$1JrsPGZs;@^v!)?4pQ@Th8MyDbdRBejq>n+#i19
zRr83bnrH`ytx+J}WuN!(wRxth7f0^J;1pB>VmK>vbJN^^U4J1<CcqOw>y+fAg1Kjd
zpBSvXwTb2M@^+^i4Dw{1U=OUuK1BLo?LGD{DPi{mO#hLj^a{g{<%Y}0doH>~o5f@j
z_{~b{Z6J*BXCjZ=-~MD32u_l~+{uI&X2Ml295j%RMATeRTdt^v^`<Muc@<Pm{}u6U
z;F5_DMPt-H3ueXzW6X#Syl`mzaMeA29<-4O7e&>G?`j53y`oeXDp$wM0B&EHdEsd4
zhLT-I*tw!uobvlUFRYp<E?BE;6a763qMLA-OnC2Q*?1t72SOt`fM=62o*~M5B9HSX
z6vX#x#_<Q@od{rN*fBE}^|Z%aS8{R$GuxQxCzu%)FCNA6y{I}v%#1)L{P3!@bKk=<
zjgnIu)3}2ugrgjg=dAp1W(WH|?vd0uoz}c2-!jt&+urz=ZV(19?vXq;Bys>}9X2Jh
ztVA;blL`~>6JS2IltCZkGOlDQb-cWHTNV4WJW#UKNdi+FV7n|mkBZTa>5wbAt02$e
z%9}{ZV`7Z4uCzSX=cKIXa^Mlo&f=I~eV}}ELC7Ng`VV9CCOCh!%IVw{!TY7v<0C1|
z)jGm=78j6$f4oS~(cHL!%f6H_9davyJC?||tVzx%<y0CW;7!L(iN^~jZH?VH3VDi=
z1$rwJS&wqQ;5lgwdRIVjZlc+yq?0d+<Z|Z-j&k=YM>Ac*vyNP6*h1ni9qL;|n4_aZ
zSL$5^UxCMP^_{r*UxSjz1~@g*I}-DVLW;L~;N?&+99wW_(ANOZW6Hh<&A5G&&~c8+
z-o7gfSmMQT%+WDJ{f<{XpJ2ZSk*r*p=kG7SEw0WhtETjP(c$C0dKR#O^hrjRim#&q
zb;IBAcFGTR^#X7|1LeEF2~E;44&Y(tO`1&zWMVyqop0^d27+OaK7i^cXPEG0-is$u
z)aM4yk!?S0K9|Fs?l?5u)AXI&c%1WnG@$Xl=OTJa6}jfq=p}B7nt-9H>T6gkLf2NK
z{eNmkNcA#oJ*;#a#FZZ&Jr88FFnB0ThndmD%=qBm!)ejoW9LSqy`VFDE>*@y1wk|O
zIQdMtjg(VD#{CPdu84GhUzSk0Kg{Fy&J1_eXyefhSvl54ZyzED7gVVIT)UMxlg2oF
zhB8>gt2IMBV^UnocDkrsX8de|xPr~j^ZiD>%iv4!EhZ1V$t(ITm{kzVPo^_Hjm9NJ
z#yj!8-Io_xcQ7+ns2b9};?g3b0NM&%OU`Ue^F{%MrkqLHK%kjop?~Ny!na-W*s(ql
z86I?=A^!U~nZIrDY@)xF!7gYd$Kj{eP3bAmlt3Kh#jXtcQ@h%3RGBD}vpy?obs@ZN
zw=rYsosUwO1SXw9V<K)G&N5^4@h`%6FwiNW(5mvd0Zmcl^gEjjGqb`zwcT-!b{93P
zUqjT<?Z}^W?u8L6;`$g`4atW#UNkzRSa#>cb4R}c`zW3JTC;n|BS6<vJnZOkD_>TK
zMn_%G!(k=?Jf2<tYPV(9`h_b3R1G__#P;0jSBV>DqHfJ6uFwPJKPjmfBQ7<s2J;BO
zt{$Pso)-O#H=*3j*NMaWwuiY}H7!UvF;{2@wSzD-9`%y+b0SWbXJqvYPzT)(;c|Gt
zeJ$>_)Vt%?cEaV<s~VPCt!j=cdtsdK#{I99MVL0?T^w1@r+=O=pO_p|;)$ml6nH0|
zD4|7K`e^!%2)!fV#pw9ol3B6}n%MU!$Fs`7%^zM^gLJ*WhWJV829;^CHyQ{3`upy7
z2!)>+n_OK+L(}2gRJ+`0<^r2cxQ3Uk1uobu=ya}Z4lFO~^tmdnO1~rY%5fVc=LCaa
zIIP1w($c=-;aWC^E-Sp@EyW#7Q@4AtAlMNBu8zhDCepV#Lgg}!7eC+M@NBP1k3D5n
zy!;V1`75fe_su8*ArNN7;5bR$LVi_dvq(-&hGIxbH%Lxf0cSfMHhN$Q(Da+*!5RH^
zDU(0Eyi4;>EhQbo#p!Qc`upA7;oNWm_Dpnpd6Xn15GKBZ?8Cbf9Q3+de7eJ@4J5;?
zhVt9b*wHxZ-glMx3;hrM3c|Br+_3GonF9yBf$fNwlVNZ9r4r;3i1s6y(q7uZu)C{<
zPo8<k`OBj}TfDmP^HK68@85hW!Y3qxPq9Q^M}dON)`b5Je;4OZyBic(TnQE^F|&jb
zd+|B)5HvVZqHV*D{=P3oh*QAsBP}~e6{yiV;Y|zuCSJnOx&$v$9p>f?y2}UX(l&|u
zTm|Vp?uP7G&lW;-<RsgkW<}-v!_Px!@4pnvg^%~wGgXs1U8(rWk=ctn>9lGsW&25D
z#+LkR#Lrt(Yegzn4eDug@`I)?%BJW28->mJ(2+g<Khejs^F~*DjgO5?E&)rnmV4=$
zo00oug_%*EHU=H<c+#T-xB0wmG+RlLfr}3-TyE~jV<^2cW?br(Ipm_nIk#!*TYiu?
z4f3^b_?I9z=yCE>Y(D$E^{hW-ugau*7}$=Fy>FN3HF;CO7}29^B)Id8*=D(SqId#J
zEA{kVSlO!;r*jrl)=9z(76CSbbF=xt%cV>=C!s)82R)LmUdD@UR|hkLJ?Jahy9K^^
zBv60KN3?tEmRAXO_0YZZw^wUHKunba(&ggm!4XQMG$Y(Qi?HZXxs?~(#<asoKWEZ6
zcm-8+IUA#xM~-<YyvH?f=T(y{DeuR!V|mZ(bd4LUE7_3c`D76KI3<1y;Y;TP{*b@t
z+Vh8H(^F=b6HTzs5Nsxe$=a)O_lO(Ll3&b>-Xi?Cw=WaWc=7=4cf{f)YuT)nfm_5G
zyoTkjV#2ZYF3*3VFvpP*5vmoblOaeJR1N*=!wp;j#<)p;f40f6>5l$ly|Nw@nn!EM
zsQ`RqgQFV6r-UjxclPW=+O(n2DTYe+2-uM{cAI+tiS%@TUjPiPVc)CNPfSI<PQJoU
z9VrI9tgB04f@VWK|MVXj$A~w&%j`a__Xa~C3vLWOO2R>sn1E5UFt%rP+mF3fp;A<p
zczxORemNoBJA_yBG6G-&nAm;eP6p~1tTYL>uYea6)Rn$XMzfLKm@C(b^8sX7FDV?#
zIR(5B)&$ht3{Uf?DKW~bsHm7s<kpCCg+hkYMho?JJW=Eu^{?%pv(iM$hHz1eib;-b
zOSP=AcQ5DhxL1%;N0t^Z=c?W^mzFq<n>oF*bCAGnBkP3j!B_iS3Uz<}Jzs<7>Gc&s
zri9#r_dj%j#B#8M?}r|Guj!lMzbX!laWa;P)XJ1Cj&G$;*j{_a8nsN-79JdbQiwxP
zFngf3qUQIMq5o#;lN|mxu2M!liW21fIrm5WW4eS!x=L0-kvawmqAt<$XY+)2!Sy;B
zK`RQp=uBihn2mJ2Oc|;GY|rPCf&J%JCcUi+hy*IZw;N2YjI|L^)d><o%*<$}y(iLr
zK`JXk)viYgLgV0{9DZjbci~2xFTC;NBK}`6ZRCqfS858NoFHl7?=So5)2ETKu_w|=
z-_<lVx#i`v-@kuv8%|>o&d1)Ff&E0s))`577!=tCxh^+kPQal&^)&v#hperwpKEKs
zPg9mge0M>;(9uzT{hG_FGcMLcBEkLp=dU0&&704SN>~LG^y#{lC7@8t;f1BcAqvI!
zQ&WbnuC9dcdj#gN<>h5ER2EWc+K`o-tIfq2j)Yo%BoU-e&>RtcG&HgNnI)_%N1!pC
zyk{<C5)i>}ErX_{fSHu35~N0`@-UZT2ls5l*&!hY#*5tq`j2HgwSl>EUeDF!`3v_4
zQ|u94=C`tM63x{#*|}N<S>1Vek0ufLn+Uh&v4V8l@VhjBbc^(x+ahUsd3hJcjGP>*
zOkTX8{*$j|Ggf5KmnOpVj`|25JSJ-njvE(+3f-k%=l%Ka9%Psp|2{CxfxeX$$GC|n
zi<ZpZjnmW9zD((qUjDim-dEq=+vEB~20mnDFfH6WuTg+7&ypYh{_sKlt)+qzm9k}$
z4-yKMO1RC|sY|X%2cAd3Q5KhHhahbo9U+IQ3eCB4*Oh0$b&sE)TSzE@OD(z7e)3l%
zg}ZsA*1?sGF|ook%OFGGqmfSmn^LbsU-~J?nnM~jGQ@-|T5f=PdVAC4{G~vow;trw
z*4ApAPdnDdDuq+Lc>a8F-!LFRPN3<+wd*p2k-@}`8rG37cgQPu+O2ieoFAbGZURYb
zgKN0*IKLCAbL##8U5{_+$4sx&Y5Uf-t)0AOrR3|}DT>4Cmj@=HbQ|8Yrtau9K0-0A
zt*!kbB}K&;As+Otz8=9S0t4;Nzz2SxHxjXW<@CD9;4ZMsD=TH5JEr{gyN|gw(%V`>
zN$W1Iz=dBy*wx^#+=LRQa}jVyXZ~IRn$~}Yg7C`4(UNOy{wRJu@*&AKoP$R6_3PIU
z2^f%nM+<BI78C}>`6BoC_a_?teS7Ty*92}5D)R)p(Hc&P+3yvwS?T$7ABp5cE-o68
zlqR)#Aws6kxL?xXzo5{B)++sTB$}nA?*x_G^55{(&q7f?1!q0!qITbZ{v<G8>WF<{
zR2h@u)X2D2O;nVd8yw3d{_Az`E#Qv&wQjt}GjOd84^dIkN1~z$2?>fC8szcvR}$l9
zf&O!TsrrvNae1tQL00bM_W{?AxwQa*BGj@z<-RDTI$_<fU&86>>8X-%Zt+<++E?Gy
z7q;tmwh#RG_V#81&bJ;IRz!3s^1xR<=?T04l~%TX^M+U2bK80#OSUgtK08gsM%W20
z!(X#Zb4$Q<CWVc+(mK{VpXfm|Uu~QnJ4Hh37uM@g;v`&DC9~y%ChYIKn))B=2n!3>
zUZRJGUp`Y)Q*tzF^z-gr`t|D<Yp3}9>)7B}Exd{YAC~dQ%n%_Jz@vab`jGjVRgiI{
zps-M=oR!{;hT9<YssR~CM6Rd}=BBaxXC=iKn(01B>*u{)UFNM}<U<rz&P;~Om)@nH
zzY?hvq^fSd0CPiz2|^{`XlswoBceU-L4@^RGx!Fw&d@v}Dq@cUN2+RlzxK3upJ%k=
zMn!+^tVP;}Lm(legJtM!qRJ2HYZ^=PrsH(&LC`#&|J>6;^A3pqPS`!j(8qN0r)kdS
z*r&aN(W>_UxtFr!50L$ANvH&r^I6|r-y-oFLN9=~+wT8^hYu;A9{VB6bokh#R3DME
zkP?Yi^@0CDm?32<*Dk@(Mzq+)mj3w@l`?1TujqwAv6p!0cM8{5jK2mb{<o&YFQA-n
ze|0{7h3}$r|EXrg7v1Z?j{EoS{hp^^Epb8*l?5O}v=wyzhL$oaTDH^_3Ho2~)S-FN
zB=F(yazn`A^jctzG;q1Ay?7D76X$Ol8Dkc8^nAL4E{jkg`l}Dej5z2_$le%41&F+P
zx;FysP^2U<325#2d*f!rFR(~x?N+*7u|<0nJxk>qd;8_ujpC&t6}Hz~Q@>|?cN&UJ
z>ck#XcSOXzpvoo#cCi>wos;`tM1S_&Civ!UF6@;FM@bf$HL(U26clLsR-`{PuKuvO
zxk)iyWL)#{<42mP=;){F>ek&KxqwJ08W*Cx=i(w*ag)N4OI9-Lui-TubIVPEBO=<O
z-sS#iOWP*k4w(j4k}7C{2U=NKQO}lR2_hi9_1wf{_S>aH)ocd=gYn>3|DB1lkazFi
zC1U{KtiRarwTSQu>gEr_&*)Z-wK-0hvQ^=L%;x>uE+9T{XMTFz4klMjy61bqRC<26
zyA5A<tFB&>)T5`T2ST2j0Yjd{D<ELg5yJ?Q0m5=2R%J@u?Lqw|uzTQ#E$lQQ|D2|E
zV&>6dP<M`wuI|`NXK;~eLwaFxEy+zSN_|sP*6AuICvQ^&gQupZrUGABg8=ikx;$xT
zTb;YMyrw>>pokGrqrD08l)^YV{Prhv7XPqPbs7eD^p>!qN@)IBf_-kGc|SgBi^Qr8
zB0Kn0OUKNh97g{8H~K@>HFDTb3OhEQ!tTn*n!_=u*8sc#upU}pFO$q~(j$93^JnAo
ziD7m&<!fI)_SL#s?QvKr?_27~q|%2i7BKKU1F{`SH~&^7W4K_lD(E~Do)1zBG7H-{
zfj|<vl$0gkFw2<^C<1O5^4!3HQA0zc4<VVD;aD39WF+U~Sx?XP20b%>Q5%E{p;N=*
zEnw#_j;fj*CUp%Bdr+Cuk@4}EqiH9bq0h?dSu$ySCG{VFyvjcs?Odm&mSo%CArLFD
zAmX``{@uy`ufqg{MsZ;ioIzyoigF+nEdPqE3!Xo;V)?Kx&x*H6wgga}GZ{+yQ$BXb
zp9L1ShL%oC^pU)PL)4%BEd=<3+j+w=+SjRWI$0nvADdrMku*F!%-TM#b#Qq)_(6y&
z1-V=M%v@NM=T6r%9XXmv`UT@KIm!vvgZkmXhZC{L@V+7%eF{Ih7=ReAV`a3H&Nac@
z!l~MNdR8#m`;CNYjq+?HZAu<OlfUUfhEb-R9ndp5b3J@dNz8um%lUZN7&oEFd0yI2
z;c}hxqpfJ?2#7H9@IEEIb^eLkkR2D4(HMZriBb!tiq_lrd;;Z?q_c{wOu^p(Ot4gw
zAT4E4P#nbi;eMNEgSLsBQ4Kda@oRp1E+gt%CB-`>#chC(T9Cbf5?0F%C_UW+Ke71=
z<WuMrZ5lQgLG#kCk~-%FfQ&K3HsB)?L3Ce7x&Z{8Hym#Cdy)`AX%0)=qlv4nhH@H1
zK88oR4o?_<1>l^RR~VyIvlMApz%zFQ#BrpM1s@UtCr4s}I)mK33eW4`z{JxgUk8*6
z;E$&pFggo3$HGyee1z1DGmuZfIIr9Vu1B%NC`m4XZ`(*q7D$>ygt&^1!Pp&(viecG
zaUxk$QUpE9Dk|beG?ewNY1GwVnbc1iZioYP+3(&sA2P}z1z<0h>Uj)6I*at~R~vGA
zA&?pYReLL@!7E8Ot@##+v^KhHn4Q&5${<k)T@8*@r8C&}>S*5@7CwLoJ-LQh@fAvq
zc6A`d(3m6}wMblZng2i=uOvHijh$E9me+?m@*84rdCKC>TZ$*rG}P;a*MrJFPs-@I
z1^oT`M}SPO_GfegL@iCq{l55&vr?Ud-v37EV$G9;?s?*Y)>W#Yl0f#Q_+KL=egn8E
zKY+1|>-pK=ii%=H?Z!$i+eu7)*1w8v-C<&?*{<F2Iv7$hsPlLT_^E*1cya9GHlY?X
zVi#2n<s^LSgze|oISv0h>ZDeIm=jI~SloZ1^!_!Lulx4J92~fT{%&miC@Jaeo2oFX
zapeLoItGTs)YPtzTx$B2_6A<^NCn}F2tqvT@dCi4Tow(KsQ__(k+ktpx#{=c+vRQo
zf{aixhpD#r#OT2)Adi33%BDcRBOv`=T<rMgs&d+K=^ZJ5PFY#PWVucGn?Kyeb=y*!
z(pwc`4xXD6ion&2y}0l*4LDtqJLAMxxEoyXeu%7a`qq{zG>_>T7Y`I(#8uH<j$MlO
z-S3KWeIz{L7r1h45NPhNe=N!COJkOOeE5IlckU->g}X?AGD*0tQKgs#NIKQ;JxTEy
z$g|SnRi<MeVg^*eXZDC|%YE1amZXD4T2t}z@!9cKgLF$wXZQV@-x2hsix&0|Bx=cd
z56b~`L;`?tgEC9yj7tna82!(Wk}?0`c85dxjJGMi#(zO>wTL1j03tE0{Eq?$uYO&t
z`fF-VLy&o>efSBm=W`<?Qe;OglR>3DqxhVk$e;@)z!J|@RaNiu@kK9n#$(%PwRR6n
zsxHa?XD}9Lg9S-siDtdejAfo~2p+juAEHo!)?PDhh%DgRJ-XLWtNK4f?OCHqorlYI
z4My0igKQO**&E7W+VwTXbm~_FfOVPDo-}N116x&dsL#sLsWRTedZyl7uktmwdgJM^
ze}D1dJ+E7ey{7^Yu<N)_V7=e>)arDj4v<8uZAmF9*uRA(9}^OWejfLy3WbcD`u?2V
z>P-=F@-{X!eD>zen|Lc``$bot!TCkonV&0X%9f$mio?gYDEV`WVqR_S9gz)>JuZJf
z4vtt7@psji3nobbUjJh+toQ8!a??unnyTx+Y);9L@#9CNQE||6$XOkAB>go70mcdx
zC?)y%{vPCoFeg`VEPr;=m+72Fv8e@sKmFr!xz{EEs3&#g$D$Fa*rOF%71~WbfmGE8
z|3XIzfDS#|o1Y_~PYo@Dsvgey{_-?&3jx^GKL$q{D)|GjY{GXT5eiQFLG!&*EdVe7
z^HJcvH|hA?kQbk_(tFVF{U!>pQ91aUe3{-uX)NU=e`ZLy*Q5T_+$iTcU@oD@*N`JX
zM~T&a?B|>9(+uU53d`f%t)xdb|9qZr%sD*wj}?Wwv<`#4eCLL)cRoZ%_br|s({HM9
z)uMkBCOuVprF!#r1|PYo!hd@KM)A3ab3cEE03nGgGAK_KvC*vC`rk!mtoi#gK0dDC
zQJxDtdr$A{6>7GtNQ0;o^l!06d=cc)0A^^i`yahHy?xZt@#H#yv%D6G_p#VCX;1zX
z6Awl}A4HlbmD$)DLN@He7sISA>&)UkBmgZ62=s`9;cAOF_PSv1kTyvSqV?<${nl4U
zM@KzF?4kX+>HmZ}6G!sTSMs><o?Mo`$-z_4n|pN)XKL-O!;>EKw+~}AhA5Qt1^_V{
zNleQPdTwfpI#~Y$)-N>&m&5wsVzJAKT6pNo0{+@H2y7s$pT%>A+I3FaV>1W1R=fd;
z3H-daq(yh`I-CfSNEc;1swdWneGvx~c>aMw!y`w1sl(W~Euy#=n*u%T!zmtfuB1F;
zp~qQTPZN2B_A}--OEr|03B0|%+uFUyoq7RHD`Z6o8jw9U4&U*=I>mJN_FC<r8;rdc
zLe<kBMTU|xgz=mx@=HIY4l0NtwjO5-!8f{F#o6;BQq^|^@RngUz~caE#JJYe(_=`&
z9|RPIs$_PGh#m+CPzEiuhQm)+QUK-MBX&Lj3=Ob?hXZ_kd`4)Vt=>H#;dd`JFjN5E
z&;%q8z|rocoTFYIFbG0=hPuoL4gPPg_ZwtCQ=Ra-H`xW-FOJW095gyIGB`2<ee#45
zP>M2FCrieGmxm5h{Xn_k|1T#2-3ZRBAu56tgBEG(z0LoinO43-^JM>l+*kG=#LM4!
z3W(GR>;F&k(f4T6;_Bk`n)-QTS6#ZfQcAt{7L&KWe*L;RQ<Hdd;^o+M>2d3!VcT+d
zV$FUJACNoWng!&6+4)4NMJ57gnoFM=MkwWFliE?HradGKAh{7Foe6hvoVS4K;fdo>
z+=HY1bH4a*|CDR~8x_*W&wK~<T-iG4Z?wz^eS6J1z_keL|6gMBuyJwG64>ym*+#$9
z{p3zy9!bf`M-m;j(N8Ey+88H)L~4(YG<U7<9{p*U%#1iFD?JQKGXR|sI1339R8RGN
zt9UEIdg3(y05hXKPnx;qIgnr|=Y>%#Dweoi{O`_%z@3-E^2@xN01-N0P+Z*6S8L)&
z&OOwpIQb@$S>i8)xr%4&P{YE+&xnIyX9`8cKi2Aj8U?Ebdse-LKink+jw?YJ<CI@Y
z{z|k04#gx^-yy;ZhX$;F)m<E=h~EB{(0|1cHye2%&B(}TKT*m)WpUv7qW5}y?NP)o
ziQu3hG$G&tf<T|ORCdk#E6%W#7<og)x4F-BziwTco-JbOU=bl0$_v5Q&tAzz#V~D`
zystKErBWs=`@WKcI9O$mxQZ?NP4ii`iGE#KVlj9RDTy3KI`us$bm#alQ3XZk$Br-q
z3@*hHlM7@;_u4;;$)Bi10iG!}@|F3<Y_HPwg1`R=Ny-I8nn(Se)ly|n;}(OUEfC4O
zEX;WcLE#NnVBOM~N)XfT$HAuvxTgeW75Hb(&9mF`?Q%q1EL2FFnla~$laicCtmqFu
zhEjuFh$<ft(u7r|D|gpSS$m$UEe@9X+_{rse(<9dt}!4V8gcN$`IdPJXsO@6D2)qG
zEePPT-ZyumC}GLb<K#^>Uov6&T%|Z$7Oj$T>zCB)+S$vw#SxTkFH#c=zOH@MlEi#w
zdE7J@A4m!8spLAk0ZGAJ7*hXgVOQ+?<lOXZlq%azBk(H14kSk;Ls*2r?TKpmFV*b~
zx-VixDueGgDmlLJ_s)7ZaZH!`VxwNA=LjISfoiE<EK@RS+82Ooe-25>G(hNH8T&!s
z!{Z7H?te^7{KE2#3GkFw0H-GO{987__~c4->-jA{Y(PoX)!e&qyqMBwGf3c6-oz4l
zZiB%F;`}mZ!|fO)^Ke|o)a&wR5K8ya6Y6!DGDk|DJ9nN&QNH&*7VcWXU|s=QQg4-$
z!mYczdl`dKk&gNB`BO-kjSoh}Hy&aBcOuQ)r4*}^>ij~2k^^Ef=m>_vT7%1jhLA4v
z<mR8?{ELZY#fVo(fh_L~B3&m8923gS{Sbn$9@wL3A{qcCjMa$iKYyDlb6;d?p$nAw
z;RAA_%<2nEM-Gjex2;#$IDTbS?X8btO50E%OB}p%2)mm;Spz$|y-QioL>$*rmo{G&
zG~T}{4ZAfY65WD$gknx<^)IaaGxDf9+%$sB?NW%yee^vw`dI|ik+zB?z`<kn`;=PK
zNlQ6+>KXPm4}Fk?ryq0@uI_15>hn7PggY8-ki%|K8j~2`y!ah&CEcw9Cp{_oYUkww
zr?>gE{A7O+LSt|#Oyu4UC~-U&pybAR)NaiBO!MZ4Pw7aYps*7<l#fi8Z(VrSb${)o
zA42mYm!#C}{WFO{;hKZoMsnT?<yR`#gyY<F5-4X_n{T8@e&+*<+jJ{=zbxW!EmB3g
z2&ILG*>E=e!5^!q>O|@3081Ci)m8Kx?rU5yTUZzWNQ-A2@QjrYJ<;EMU4!9(_~)Cg
z<`kz(2#3T1KpW})pApKdrUZ~rG()Oy^yAa6hd-t_&j)b?jlwGLw(PmB^+*zoh>GnE
zrjH#zqDuhh;Z}02&Mk1?_(fUdWkRCxJHP*Q+FZ(k=20Ft$&3)RiX=|>wz0jtoqdDj
zn-7rkhf6AIek~yi2^(!;<-z%Yp0#Z;qDD(%U{HyUTd$tj2y+D9JZ|`vX@`2y()XD@
zD&im^AT5N#A$;1Vwe4l~2@wkgCQT56j{%az7xi8yn&!Jeu%EqV{rhf#v3pb62M>er
zj7ySd=-CeKAiUY8ZIjg%bZh@fAIyW*nZX%2-o;^F;s(CgzI6m|c<Q|}^_G~*jZkG<
z_ms3fp_4c^Dm~5KEj%q<sqx4V1JKH)B#b?(?N>lbc&)kD6B;0ShjjjJU?9;9sr^#w
z>EMP>n~+#h!)yst3-+4_nxDow*j{#`Md6R;F3$~<8-Hb{5g1%E=$0a2vrjlB&I<)E
z{hbGBPX72V;2*bE!>1oT?9t$0HwV%WXV_scuxT6@8A3afFV#E@A#{CMG5bKqCx^Y7
z(g}*Z%MfN%&FQ7+0svgj-3LQ5FDny)LhHZrOK__>o0cahBwdV@YVx;TU40+ljVr5p
zb3Jb5;hfA!Fm)OLF-MOE@O$szGk&QD%1mdm&k6K@x(n9y^mU|7UdYA#c%`;V?|cBo
z$K9Kw?-zvTIUVdUC#93K1q_~Jui}wq_zvkYYzeu5kenE6!akRN^7<>NbfeTPD9yXU
zd>W8d2Rykj>s63lpS@`5oe3gNd@kf9GGLyGI$H3(WT+A^YjTHwi9IGtvbHL;3-o6P
zPQ}Qv&{MbI8NJY~<<m;Ngs#aFweTYBHJfYSt9^1^v?kMr`h3S>a;ctJb5ci$c(6)c
zp`r=$Tc=(-Tk}}Q#hE%ohCgdiroy-Lfxg5Bq~N?C?v!G_6;*RAi4lA9s7JRirf`+6
zv|#aW#-kw<YlX;A<{JuVdwn-jdmlVq+*O_i?UTH!B7oJG@xb<<^X&yoJb`hSFPS@o
zJ<^Z!0S7o>Vjt&pM~UKToycDf`)c)L!s8p-FAMLV!q%6sW66S!RPts}4Gz44qtG&@
zToO1~HJT?^yy0c1CHe>Zo326iK~$Q|NY7f4wa1O)pjZUOUU#WKmwJPY4^SwG7LzrH
z9eE8d;o|HiOvE2>L}Ga;F(rk3KkRIy0KQAZUG^~q<P^|e7qBWDa3bF<35s%k`7|ot
z>av*Q1(_z)=l(Zf-e1aJPIyav2SsZg_-s}+&^6>hmpOB9E!@6!a1y0TFifkq8kbw}
zEiQj-D-sZqG;%_TA)woaH!*XdjO<`{N_@0nP9%qjewER6ktXV=3>~KcUnqe3Z|q%n
z>h%RFJyMir%hm|EArApq;BT|L9Bv_Qy`7fU2f(L_G#xc@c51ic!&N`#C+Z=cSNSV9
zY<j+{k8R7mgO?q<KsgDqADrBDNq`u-RMD!(E~bj)`~crKi!=pr0iq4owTDq=OmZm3
z8wEgE%vJDQI_Tm1-X5)uiFy~;x<N$pyYgLD{wq0VGzIP`j8l{KQ+Kn~;ijbP=S1;n
zRe3sJ@2(V#fU+4QfPkrgnitJCurH#XLEnWMzF(SEjCYFGzANj#g1`&7I^e$gRqzp4
zYwlOkN}Yg?{=q!**|t%<`SO&l^#_~9{&#iI*p^gF7kl@=BNJXREY%0`5k4f~d#KHp
zTy%7|lKIGki8Et54%!5m!}U32d4GJreJXCMQI&9%g<$eWpFsD@mCp3(g*R7Q`)}Vl
zKoNq=l0!6In`2A2cXl_A)7)}BQI#I!)TqoyG6fcqs>f}vtanz-cHf8y+iv1{?WLdv
z*V{(!LAfFQkAJ?ig?WUeZ4sgI#8M~aMC#=B7u@?l!AU1v)+7Y#8|1G+OrYJ8;;m3c
zR|e(LU$$-;Lq$ao61!`5)fc%Ulazo^fVuij;}zM0AB}DVm_sfVzDt^$-B`tkXTm2@
zHMz()NCVhWd&HA*Wd;pKML-CunGIPoL+};)R&o*_Tts;eWZF2w_Pf-_wyuvsLM){y
zSI9MFy(w;=V3Byhsl+hl`};^aGUF}8^zg+yxT6Tr%*T;HQY8Jql8;t>O*p=&`!!zq
z@T|HHJ{=BkWgKtL2wXz+vDp78SkO07s`x^aupPNBqn(=htUXv)4aM_X0sZ5eoU`53
z3F4B#OvS8VJLPx|{8eS)qceDa?_``z+ev?B0aAsg`f5%h;#5+Vu}8UX8!72!=jmR-
z@!#<>dMXR7c;-mS>f7IW6ot{Me4$N^0OgXF#?^i}b2uK(OGxW`&~;6s)*GcEWe>UG
zgHJz(BQFF#)?vIgf0L&Zantw$Yo=exdoe!IR@|Dq8Z2tdnyQ)<dLIc(pQIKkJPM3^
zBv15BMGCpCFI*Kw?i=?DcAk5Es3XfzF1f}DmkU<wUr>vrEYk!+tmxk|mwvkHzZiXc
zdolG7J3qKmhH23RLc@FQQ^rWilyyY!ZlG{T*71hpv6@W`go_>NZy2M*32S$ykb3%3
z%A*65>SXu5(2Hwo2q-YN`LF^9G5F?oI{n>DG=J(L_J97oh<FA~4>b&vo9t^ehgD@v
z^7KC#=}$lZ8nb+ZKT{}5rN`qX{Ba^$+498$Ope&9NpG0~H-p|~a9GE;>k|nRx(#nz
zYnX84o{NJo64a4N<ZBxKcQ%1i9U3&KM72Y>3M#?>io`|RLK`ZqO~In>u@_(b^eU97
z6S5-LncY6-k=^GuZmn@CDcgz-RFS?zy<VG&QgU&5V&QS$=y<$hR^K)F7&HBsfHpGu
zIl%<b;T8_G1QMbtA4E8-H@N;y&5dD7?ikjqR0c3+JuMZb`19+7>bO?^Pi=v7x3-=`
z^ENEyDCt-8Lbt(8Lgd3?t>5w?Y)SuYd$vl1s&R~65-D$o9VMqZ2$ws#4Q~4b4+z+6
zdLGTS-_{rQBr9>0ZVQt&JTu@VJd_|(#C0h(cf>F^XK%jTKbFC->p9<levOvdKr#8{
z4~geWuQtA}EiKktL^#QlYs4<d6-BrysubQ8D>)M3u;t3}NZA)I2kny1Tbh$N|4!2&
z9^%J85y%-BQ)svtk)gr+)-D0Hd=>9Xbx<NR@_-+uFLHK2&hXB~29dwN1T}Gd>VQ&c
z&6|-i@v?JOhv`@@2IL-oshx%yvmsP!Xy|@Kf8k{tkt}M{+zcC}0raS%YNX70cekmw
z8cF@wZYEH-aEQxBl5)q-JC$tBLqm$UBicTiF5g{K@xC`v{myUut^SP-)?isX4i{DG
z_0ZrC;T@D(L;#<n5;tM><Tp1Yf5G|e@nO)RHY?T4iuQWbSWBNK0SN+*ySe0|bWNN9
zY3Sv;g>IBfia21qyiX~bks$wGdV^so^#kF+8~)TT(73?<b2#{xtJD)i{+KL+jI7PV
z)@W5CK$N+6^(_Xmfz7rbPiv)50xIUW(_AAfi&OK7MGWi5B4BIsl=HC9%Nb9V0QI%8
zU-Dj@_5CH2?{9hPNRxyfCCT-t*7;mv+hF|Lfz;Z?@6!t$xAdSs0tkW$ik!P^-i<iJ
zax(KfyILaCBnS!_=C_MK;M)|K8D`83!lE_o@Ls9{V(&_lK&I)9{3jx%B0NqA{)?s_
zrT+b2FS|6=mP?-Q1fN)Y@qg2;xRtQIF7Ex^+DnRQnh(jH{LbzjIbUsq8TESR|3~;`
zh)ZE8q%p3UaAi~tWS(nY7&B8K*x~$<4~D_tP0{(}?|zed-R#VS70=0i3|N|%izjp6
z>FuOBN}|ex6?SD#pEC{CbU{dFtZ#t}>i4X~TW;&sR-@ile|_Ni9B6b2OdA^j$AdSn
zPr#hgCxHhc465V){5R1E0P<zRhf$ZB(fQ1Z^{p6((@j9T!2Pyu*!fwra4wI4EGIXd
zy+3%ez{vc=!QAOEvXZIae6bzvdT`ju!I<2Y!!Rakj;aB=v4~MME~pyZp;;YT^4K||
z5_52<W3zi}X~MfY86&>6v!N8JDnH1Er9u`789b7_CygM<54b%xgCFizNR|-%y=ggj
zHB>@u`;E=*#k`l_<q0!zlMLYu2_8^;>Nr;ETB6F5&An*>gfMV)N<1^}BNVsA*Wi7-
zVWKKRRWI|UAYITXJ<&pufR{ky?w@;40Q1bAP%|n>^ReU6p~Caj_1VglEtGFW#Cw1;
zrGC){__E*lc$a^0tBMCK@CVvT7+xnC#E8zEW4hcvIk7%j<9IG8P-GDuaf2y6xeKTm
z)*HG|p)yI!i^KGAAkrp0MvgBmSrKFkHS3XpWBm&7ls=A&(>ieMskaFAbQ}r6KA>K2
zyfO;{n}6`Uzg%T-_q$r$*vMYW=qJRB)Yg~__UAPul?^u}Y`=#-8~`e+33B53f*GA-
z+uwe4Q;XxGcc-!!LbIC}jXtxa1Gj;7j0~mKmwm~^BQqc7+FcDnN944KKo}5RcW>bn
zq<S}KGoVM7AiPZaWg7%nuHS=s<SMBO{Rp-j`}i822dOMw3nElg`}vuB0Mb?N9Z1gA
z8f|e#U;5Wm5EuOcUH5fc8TvRaS^T|p_H<ul*prWoN(asw`pcO%4W~IIf(bX*ad4!t
zO0x>vwmaqM#~K3rDc>Z$Dwczz!=dnndZ*^n?%f`zoF~z45vp@izXvmjj^4H_?t~JU
z9r532@baYt@^i5L-!=8WOeW`~E}XBu#9y1%^)UfG3I4@62J>}}7Gas)kBncQYLGe4
z@m&>I5n$_U81yb!HMLu#oDR-zLm(`?C=Cui(O?~VqI!lfji4VXt$e^k6!=5rnoz>L
zH($a*yQ`>HmPbA%Rci+Aq|{N*terP-1p29YJnh(G#xLG^_6(!H4|nWu%T3miT%MVk
zw=NBlK8F(4S4&tOrRD<Cz5iAK!QZ%+-liykT?(7Wc58CU@-{Acn=dgaKE6}GXTFR_
z%h*P!0VZ@Ft;dTS7vDeLH{o&8-{tDcycvl5JkA8vPE|A?GWSfMzXj@Gm<B=1`|7oH
zrf2jc9?qeL&^)5W#Tec%BHmxVu$H!Ir&doeg-z(s1xQm9M=R<CWxo8-ezLIs!2j!L
zzt&RX{IBfR1eV<q&o8uy2r?4*g|~T$J^3Bi>RmqPMZuzaj%P0-yA;G64yXX$`PeGa
zu4!oGxCMaymB}swZedmO0=&MyY5;m)PgDMG;OR*HV~+lfCjQ`vdI0ga9!q`jad8PK
zH;hUP#L2a1+`uggEI_4cSCoMx3qw=x--jCZQ`3q8yfw%=UihgQgdw%-uIP6X!Jy-;
zbgjoov_TBV#Ob)ws;$=vcJMjF={(dLsTM^kSm%6CpwYyAKP=P?TP#;{=HP}$n*iQO
z61pNOnx?3nyT?9hL}xqOnjXhK+(~ExPR>m3QhxPhziQkA=<yqMEqu!8ydG~c=i_yy
zGDLiBAdx7zepB69rnfnW0G@lod~!UHyRBdG7#Q~ZJ)Td93aGBbRt^YXVnykBAQ{^4
zo^Ws^*~^e(5dQV*!{bJ=CmFs|j)>Bm-x}Di+?h5_*#6B{+RB&$B;Z6mM5<d(X?m$d
z_Ze{0o<51+&bFPh=ActJyNygy4bOi@`vuA{`>5vV1F4cd8td*=6gTqY<NWHYM4gci
zVatgivlZDdSABZ3f+bAf?G{}Yfxrdb9Ze{Y1U2Ia7z4%Y-m5zWaULIx`pn0O%%_`j
z|88{=@CbL2_26y(-U^p+E6M^2Aty*D)3-+Nzfy)M&xQ1SLX*E#hPK=jl8cj6R?;$!
z_%hj2qHw5r_%2dazn|Yei^${qQkY0eP`lz}19b%8?{|_2Ce$(tZC%kXi-7(EFW!--
zgMD0r+cI8%rJe7!y<>qu8k{?-nZ-6Gd#=XxzC;g%3fUl=0>0M($ID`Wuwfsl1a)<o
z*Qza6Z3QTQ))1xNk#^*R$O_*hE&US>SB;5py})n?Cr*h8ho}YO6%$|WaXGWhNGn0}
zGEn!9R)c>thS?GV(op3X2}IFjYltuv7tf8<Wbr&~s03)$udh!0Zy3El8{kThfH5aS
zl}@pN2oz?3k6ly<`K4rsV%FJ~9dYb1aUxN_H{L}El!2#WE*@N;L2ZWlJBZ)P0xvN%
zlgbA#nMfD_RknA)wC+s@M5NP_vxp;{g{)WK+zC@<V@x8r?WH`C!k^ydj!E7fIcwM|
zA#<C`2kNl9YvVcGZO@f~v%;HgjHDfyePG~6qM>UR@;IOGvWpM9*da88HnF{5bGyr2
z)p%N|@b?7It$?C?Y2YjCceIR9LI?2JX1Yjp*Hf%6l#G`jlyb@Zma=sTM>`3`fP%Dd
zi{Oz?w)yNfhl6+=g78mCP)Y#7=+?n-bc9<g;&U!%N9P_;LCzGHJLI5B$;oJqJ_tUs
zC8BZwT9^D*5iT<q@$MJCe*#bsx*eu%mmj6#l$~B)9Ps5VUvv@Ohv1_oX~V(0164;t
z?gt+#$x)Zw^{01b&he|Y#etNGeyVv>FyHy!fah0G?``la{*du2GK@bd%^xZMXn0(I
zxOn;A;FJ3ldd_QY>~ERui1mZ|+SFZNxiU&yKj1RpB2#0z6{f{WlR@YYCfv>aLt2Ba
zd3dqASLNkwOaU9K9C}yc<r#)MXP6fe@Hr+1TG2v*BAPl&W_P*M)>188ZAz?*m9)Gh
zc+|6*aa<8qtr~t0g5PJrRdcahRy7fLaHD3O{T4f^*s-A##YMU=fJy8bk7~GecMSS}
zY4mypbYitf8%0bw9<s;gDbQ#XNB*sSduLX<@cu1pIzgvp8B~vSG=Vp%YMd5DFNQXf
z;68G<_@@8W1<e)4$!WLG$XC*7IHEF^=HtSz?B*STFdN6<_A02{bAacdvIJwyaCRap
zZ#TNtHEpNO7shk!d1bB~s?V~zh=6H6gM*I%)71Owns&x^A}GA!^X=0XsLqBrjEx=N
zp6J?}G?(-S_9wR*scNa(C|=>AZo3n{a{VUA4JYH><9)c53D+LgEjt~Ajj`zu@cEi9
z?EQ?#j>iUaiOPM#b|5;>JDTDrIbh+9RQ$q0WM`7*{nusd)J<(iUwfrB;yqBPO$$N`
z*^49kRLH>uz=|qHomtZ=aVa!*i;TGEHkh=L%;^$eOrGi5*N|yh;_m@Qbb<Xz#aCJh
zBec0>V4fk_V0H99J@hXrUuIJ~>44AOH3vJU1(Ir+oj#J+Kf&i`3zVcS=gHFc^R5#H
z44)?cmX$5-j5;6yJ#aeyF>o&<I-s@PN?>)e$L4gxif!pKzCN2sOa>X*p?j=K$Ectk
zbV`Hg;<xZ_uV~i&y~^C0dCNJ0xWbw0e=<Q0NSX7k>6}Yyxubu2xal<Yk$<*uhf&;B
zq`U8wliQgMyB(R#k;d$89jFf%l?_7rjf7UooSKr=D~waB8?iM*E2;dK+_EgTk}DxL
z<1592IK91)Q2F_il*><^vA%j4?+W=Amlh@cN7iS&R%2z{+h5Iib;N@FFzq|f&&FR~
z)o%~nZ?jea9z$B5E#JQ_FO$h8yLT&g>+l^HWw6>6+onJ5)z6h&Wx{Bd@Hp%oWh3hm
z<N-TX=V@X%c6my6q{>q`=>r)9`i3t$X#DE-*iv-YJbYo9`nqeZAc}v+Mvxz56OeW3
zJLbH_p|@8j9?<FA*_85n$Djk)Hf~f4{C*lucFQ?;eLNBR5aem<#zT-sFjKp)`wVB;
z<G-=<CHF_7Z%Skv6}`D4P_jzlM<4;laAKswW03gF;_nK&$NF3h%CA5jS+H~%dr{h8
zq<he^_=7|8<up<)L%Mo=KIs^lDl-3L$ET_t=+?;%r438C9kJB?o8_RMXj1A(UG9T;
zx;UlNV)AK~^HQ4l$Ar`&p0YH?{8*{jIe|0TriQhuWt(L<wpSe9&Y`#4631}1oC-<!
zhFA2d{m&EOhXRS&GBv@qVHCBi0#{(pycP}|v4+W73qu0f0+YLBF^2)FctE<_oycGz
z3ls}!p9`_l*)5rFVGW5OV+DW=Veqk)cH?N?Sehfi&q=o-fFfSxzr6t6D%lrwJT$Lr
z>^e2k^j&ePF|dyM7uagMJ(G)NM7Hziy&Ks9`+xjay9Y{7cUL3Y?hfl|2mQ8$`JH}_
z)Mk}I0W7v^PS9WC!I{~_5v1tKKcnC7sCWT2zGK~w7PW{k6F7bdVPVP~z4WHY1n|%q
z)n*}cwVHLr0LHuRPlDK2^jc3T^)#7S0-6z*-gR?)|1Q;WHaogi>Ait8GCn`~%}Mm}
zw6#6Z`kAHJ1=g23W>{K4x@p&s)#q5r?=?#A-n3E2Icor0Rg*{EYWUMWpk0d{A!2bG
zP7>~V{)ef?(nGGBICzlU#-FNZC!EJID3SpnN=UvBa(fdz-<=A0<oL6+P$!(qN>ME|
z(_^hhHD4|$?xf#QCF69(D0>RqofMUB8bPI!KT^#XMspu^H!^53b$@Z%Tm{HNs)<C!
zWf;TEjSU<acm~e6oO#iWMFU#9v-pOiW?@0xyV{;7QRQ{zWaDL*hur0I{?kY;pjIBJ
zV(P=%hodPvk+MH!h%vc6<VA}g>F4%WrKaNKrK6EMQxiW<l&`XQ-Y7pX`j1K`>ySRw
zM~y)LW`BdZ9&VNE>co;@od1Di348qp&u*{C0-ISF*RgZ;_RXr|`cJjoPN8jQ-+Thf
zRvG+hFp(X!7u&IOd#&YxsxNaAGkBv^OO{`M-5ph?^&a5OoDt6t2Wpsz_d$DW8+B6r
z$YIy{mYtT??2uo+^GTG4Ph}v&r7L+nF1k<ofi}@)yJ+q#x1FD@JQUa;Jo3s)ukbw|
zHumy00*{?s0Bj&1#~{~;l%|Ywo8gu8b80@Wwx4-Db1~e1ZA;^qe#B;(SRL1wt`OFE
zRjv2GK|o~(sTB-z&WCT0=}tG@xF8w?ZQ{#qBMNWYkS(|Z6lpbOc|UNVv41o_mR<Ho
zrkRe15aL{8hIS6tj-6W>%hYoDenbT(okws0rp!RqMn0WPY@r!H!{0c?ZDxAvC)tdJ
z@BAQZIHtIt^ZhR52yK@xl%6%3B-;J0n%7LMxSKoU*Sf4wdt36i4zw==l{OvI-G*aE
z^S|epdyu!5VCDk2*|@O?g_E}`i~@5kgE`GSJ;zuHGEz}Zq%|b&@pWJoj)|(JHy!Qi
zD`7S91f(pVk|UI4{aGe)!(F%TltqL*Yc6$UEs{*`?4{-S@U$dx0y<0Dt=7KUH_ILI
zxesWI+j<w5M5?i|)rxImS+X7b`pGk-8(BY#y*Eg=ds)3h>`v0)_U<u3t0|S<yjDp_
z1h0dTJ&5px%zUNd(*v2t4?8a#tpc|LOe1|mT@;rd#3CM6_;QL*`Ax2oKIIgie;`QM
z1zrqJdv_O6yO~Ojv{0ma%SqEsi*#0`f*J4B0_XDdG&D4Q!X4i}B21bcvUf12LbG0P
z|1X-ZF*>g{*v7VvFK%Nuwrw?5W7{?w+ji3=jcwbuZQYZ5*Ug_Kf8O=3XXcsNv-h5J
zR2D6XQnb_h@xR*`<;chvI&#88bqZGHB?t9Dym&mYd4}sdC_3~uIExX&%hT>De2@|(
zB8?&B47Z~`?o_r+AhbZj)A@<~Cy4=(XNw=3Cp?&UKX{wgdPXh%fi<{QZL`S#?+U|<
z7s6xQZ?(hxx=yop3f26Sb54l~MvD?jb@BN4b%6aM{Qj`vS3F1?{wIB;Jl(;)e~qK(
z;_9cz?`aS{M6f`t10vAonMxxIq+gi7+FCwU<q|XXRl2A@=0ElRX>O8)Xw^;$u~H(X
zIo=BJZ?%!Y90!mb`Q`ZkN+BkJ>5&7@;^fmE=89$aeOxZF6TN_p=h@VjD~au;^KDI^
z`D%BVxY>LCC;7LcHWjUIv;uAedIJ%65vJC|Gpywt=jVo3_eZ1crC{2eznK~}8XbHe
zY`bbvyIEwH3c1ZO1?iCRSqVslp<<$L`nD8iCQ1|0$PZ^o;Mo}JJu@zU-?nHqYNv3x
zR1p5C)8MSHC(IAtl+NNWXE$W^mte7c_ga|m$M%y!GTIEfLEyZRJw<xL3*rjCnGMQu
zV?WS*I5^vHoNub*^L|GpB%;$W|30P}a0IVogP#iC_w(?n6BK{k?GtPJxo$o6X;`;o
zBVho3mmvu?`x|e6064q^owP~nCL@qHke9-Gy&cGTi%u|v)8)HSPLX>BsKoJV@dJ-E
zoAqyZ4nBOhEXu+qH+He1<ECJS-9p?Y!h@tqLIXh(@(U%!e;7()W?F<IEFu(6=gsf%
z<~-Oa6JW_nk|g3Q$d_Irc5C-(2%vaFE7}=0TsEGTUbZaGEgGT9uxK^RSqbT(#!s-j
zUUr|~(i3|)jz1}TC<5HQ4}X3ZRu|-rn%5CqT&A7pBWTel-Ae3{Aw1$wRJ`i#7qv6g
zJ@=UEo_~J3V85{D<$=el&Xtwn1X9~uKu>>(&F6#qidq2p`5tptBjU!**qnmQf7aR6
z8yHZhiu?TEGdxxa{eb9x&uj07nambW-G<vzq@4v~r6*K2CyZ2v8nJDuGPNy|PeP~o
zdzL6oq=vH(X2nF6q1BXz>0_pKW4k#Pt&`J=G{K1Sp*`m(BbBuLZ>?l0i*?*Wf355#
zpJSY6m6-M(BJE&0hcbc$eIWq>+Zko+0s$jdp&4;?9d#Pj9kWYu>=Jq>=L;$Il4>W%
z<B*)J924IlpeXp^_vgMCY4IQ-!q`)EsrDbz*pg@*yO|wvQyWL%=WsqCJ~^PAxe*q7
zt^H?Yi9{+#`o_9Vg_WZ<W;UE-zk0IjR<<cEgOv7`vP5;{baZ=rN(_l36~Qz=r$1xP
zI6vn{LJ~hLNGV$WSLk3bY-|`dEdkxZ)fMC8V`zOnUSrLijuq7rAmu@5Q6LO--aGZ6
zP$0Ie_z5}io-lAIVT7imt5=gfyXb%GR+^55dg7ofi-pvt^2id+w#pu+rJC6(d<?hT
z_wZ@3!g_2#60MV(!#q7&X840sG*Hm~Xb94qcni>~F|EDS2M$v<%1kCBdRXYNu<B8F
zE;es{khEY}LdXD2o2AKt@sdPyUK3=A<_yLamTHY6U=!)&s{V05p;c_#VAo%&y5)im
zQCqj@Wy&m3<~_)(Nx)&gd9UKOS;5)gasO~gQSaL?C&lKe(JJhAUXR(;c8++Pc#YBV
z&DF7h;KtmlT?OfW67yxRqwogex|%cY1F?>=U8Z_G-<{&mK;fKTvJBGPcg=1y6;npQ
zO7lB10e71>2%GRi$eB5?Wp37dGE*=k9u%rKiAu6a!rCfsWywz^LZ8*IJ4~`dw6yGb
zl$TtJ0*;?QvAespr>8aoMjR8>zh`lgdD%H~CY9580<+dFw?gDUSSSa7PrvUS{b=Fj
zcu#mZn@fJPM0T|mWPe}3Jyam0>SK?^?)nzSn&ngw6R_g+Q1*mx3jPO~hRO4<cw8hk
zqXB=ikEfT6mv}p6wzHPp85Y^vBg{P395~-`lugoP5Q99HX`9`t<(v99p9j}Qoh6}P
zKWP>mEO**=QZbn#IcqqDGKKJ@psLCzZl)U@VLTS`BN#xQ!_@8gnjjC)KZ{P$U%M|y
zK6uz(@MgZv0QF;E<C-Z3o7hZ!f*u4z2~v?r@$T_L-uLpu$@M%&$KqH4C>1QfnwXBX
z_R1Y4OLamN3FUm`7|ciB&cZ$e=h$i=h_9A5TnvWnfB3L{<D(@Se~vEF$Xb(3@}W}o
zTU*6f)xdEf6VFG06WTrx?peie&cH{*&#*iYrOZMy5``RyfI|@NKXUvtZDLk1IU<&k
z9piB*$Cw5Z$Hircjv4H{iDPB>Vo10Q9{$Js{jqM!CDj@dLEyi583;b1zq;?bC_x+)
zXv3!h0VFV%K;>Z1rHa$@l}aXoQLb?vZ`1)rhwxWDRKM2k@tnguDW$`kn9KEaf1JP<
zDIv7&$#(5uM8l2pf(+r=yaBg?Xb90#%hK-8Ply~^+(SPg7})dxJaYa<@sj-^vuu)*
za|Gdo#d@^t)6=HyzS6p@LaQ?iER!kG1?zpJK+A8ew5FB+IWe7IKwTsi@{DZcHRTra
zgxGvAqAX}a7`U<Aeb(LG#>13v&oL?;U^^(#WY1_cjQD!1GN)?Sw#Nwz2}>}-FJhtj
z4demN^95*3Ky?Q`ppG+K^GM-uDCZp7S7#+ggYc(n<nNyDDd-#e7`;%2LTY10qspL!
z39ipDSYBtm(PO;JF(uS<WK2v}dLFbWXSXS&r+@@8k$@>qC3-VW81WcH@NmH&3jl?`
z`g6~G+AiDBU_O~uK!4BLKoz5$;fC>33<?D8`}fLM5hD&`Y8PRsg0q=^X3!1EUor&D
z6(E+dD^h+YDyCO2-HVdcG9&{mA->2@9Q<c9wRd({t2y{$ajVWR)+WUMq6yulkmQU;
zG$Jw<4Rst)?UIurcZgmoH-xxJxDZRwR}zb!xLy*n%i=kH(=bXj>~(cRnVDk_@<4L1
zU0?5<#p%RMYTIrzy8osCaYkDa-kDWmWkbT1fnqnc+B@CVOVCUT&&eWY=`pp_PFb~6
zV!L>~d)g5}GD~#0+E*;a_%pEn)MZw?kD2;JbeA!EbPbj`pJZRS1rvIMRL5`2519@|
zE^&0ck%0O^hI7G@sZgRY`ZzDtI)E0MKJapP-}d*BLO04g9_O%3=E;rWcEDwKIDXNl
z-BH!Y8)d>OXz;;L{PdCyTZWSL^-=lyv_|D}4H62xJe#+Um>cSKx}XjY395@cRl~>Q
z2Ql$GPMOF^TgUCc`qL~ryhVhfqB(L(rHs_DgD%j&hZEo=kJke>S14y8$h|IG0)^%H
z>f9PfI2sTdEK~URU#GT7@ClWq)Bp7mRioAt6iN~0Cpa&iSy7kU{o;>n`uCKtFa)2F
zzmxq7ka^lr&v>*BQ_R6u+H}y==?dLep#mmU3t0U?Bv5kp>FI}b#%7hrG}L11D$gPw
zjjOAd?#?4+t!UZb4eL&3`v0Ug1R6?u^)2raE^|LIUJEl<FvB_`?Ot!hvh?_h^ujqB
zEqhj{zIJl=h_mGXqg!dRr%IEl^@1wA4EeFzU164R;f^YZ!l~_(oVCoooYWqfP(%fK
z)o^R#)MY*Z8BI1rH}l^B?O$$M`_gNb<MPx$7H}Od9kkiCIB6;aft$qJ#<~=*x&+Hl
z;lv=$s9sDU1Z@TnF>2pMS&yAms(y+eBX1UP=Y)*)Lk|9$wQIRWs%&v;K5$>2U#8`(
z^&^86Ke}Acfdt3ivRoYE^h`Lqy-~Pa4K=;gWrapEoH37ETGHomqDC?r)aQ1>VQxK@
zxzRR%1P&Wr@|)C719(lmTG~KRE^OP>WpHQWW?KVklI=rr{c~EL)9v5RYf%>Dp5NSE
zKVXP-77x^_0)|D_uXj~0SAsnjZT^<XgXdn|7PRQ$a3fIRHspacqarYGB|wTqw;A{j
zi76%|iY)ZTk{sPj>^bNVd7uajf~G&YA-Ee-gp3VKVi;jVYBt$m2`&i!n@7*N-9L2a
z{;)<mBrc9r#?FrN`3)p%x;3ONGi;QXFZM*El-j$u!mj*WV%UW7H|W%Q@Jc+?5*q<U
zS=3fXmwGZw=+GNv14i7dx_bkiDNf+%3tBB*BXR1d=>N^{Dk!^geEgg)w7g*u`4>?%
z0X-v?+Hl4AnYIx43Jt+_9#F#3HOXQy#lm>NRv&$9k?O?t&F3R{#<sIxK_v(%@YTMa
z5TVz*`0bM;<f%{ZXP0d`S9~Co{XBzrwZ~N}9l}$m(*!?+?vtxjY*$0-{$hkHg`aPA
zb4KAuu9r?@F)e`B!PJT=6;<r)_QgKmpVa^W^GfPd9p|M)_6(b5@d)SULJrE+ez)Qk
zzgd~h8=JOWTPN99A5kNtq_CzAXWoMK6*+gK0sj7Cedk-EOo*%#s|#6(5r-G&NE`||
z8ti8FQ?jfjALW0eeZwHIN$WiBu1Bq=j<RhFM2tV>=-I?$6wry~$K%P4934U9esCGC
zH!(E6izu36Vugq-{7r&@uj;Kze>#fd>fm{x;4gghtLjN^R&;Cz#>RRa%{R9#uuomb
z-<w3l0#<R-yYgd8;QX?ISCWkHRE)diVnvO=zw+O~03P}l$q)tMY2}$ctPUmx<|_iG
zYvuN(Wrga+j)y1QX$vNJ@KE&TZh9yX7cVCyKla(qJ`Tf*s{;JZ2jZW|c>c<<Qn0WD
zOiS#iHzaz!(7d$qSiRi$-cO6y5wg}U+f&JB4R#{GWe7iqhTTW(H~80V+g{op#Bbts
zn-{XvgqPE4mu|Em3j6S(CBePMli8&*L$-DY##7@>6Lkq?7@_Jfk~UMPJq5UdB<JYL
z0r28cbZv00w0ViTdb9Jix616<?e)h3-;dO6pP1;@?Pa>FTiUDefLHZ26SWtQqd$K{
zx*II9)1M%wPUC?V&S4DW(i>{T;WnlE=t6-_!D2Q_a7W3j6pq0iOt`<|FaQ;^znp4z
zUH@n~ZLcSilMzYDr;A~H4SQ5CQejip{$BLEe@R0Hbz5Q&Yq0U5t19)WFtPBe#{2WX
zD(2|bmbC##rpfI}X8B=yfvSn2Tvr#A^UFS0<bL+^y}+E_gU>+J&j16udZrE~?ctb8
zLaT|jvF)~A90+hhBtaR3?4iE6a0o7l#vyMyjel58h6EN!@>ZS!d-=u->m5kyr{BLj
zB9){t?`8<M@sMT?h6J;p_i!P4u|eFiT^-rfKC+Iwfda)u;b}n|XrSWr1<)6hypiW!
z2k<qcs1W{1hiW$f^uU26lNaM1t_;((K7jcS1J)optWu>dJs%Q{^Zi@gj~@p720Ss!
z#rlMV)@er;gBlP^Y!dWRfM;geJX#9`>>S+PY>E5M?tHrRw&L^-!QGXr{XO&PI-YWA
zw4@r7qZ|ss&(oL1vcR-kKlk8Bajw;+0Jsf5J-9$drWC$<iO-YWHT)_}-EW;l832@f
z^S;iEH{<hXzr_>FefUiq?|oc6vXm?mSYcMRl2<cOn`;nO{6LGPXF&%k%OYH0@~(}J
z?l0EkhscUa?<>Gc>1W?klk$=Y2oD~_zS*y1uy1fN>Gp#}xW=FP@A1QJ^#|XYOY-<6
z0>xMbEILO3NCgr4WB$Y&ZV%<(p(lDJMfL87%5$rsB>^u%9x$-@>UbHrw7#dUYb*{?
z_^O^NjL`Ja>`dOVb-ol}j=a{7g|6`!`|1Z8a*&?7IR1g<;T80dG*{>5SLfi!l_3MH
z&K*cE0bp>OKA|VF843^(Ti0*5OR<Er|A7?>D0iJg>zUbJ==S{VN!#@0<6X~aN?Fi>
z@!&mqe=XChPd*{!MmafAczW4kG+~ZWE-}DFTPxnJzfz|KJHu;pexggx(uMMafORZ5
zxY4*AOm~W|aVfce7?_o1K;*jSu1rPDBiqW}&oys39q(u6Q+<Sq!c5wcTHI()EiV_y
z>tbhD)7i)4Log8#NT~5;gJXsCa=1fZD6Z*O-5;6t&NkIL`b&uqp9=47p!j~0N<Biq
z(xxktudwy`?z%L<Za?28RDV1NY)k7%JehGurOW?XRBUT|G(kEbHa5Hx(LMeg(Zm0f
z5$_Nk<8OPZFI0$4JQilLGW??T<inF#+`ZZ8XZDzkyO#qHQnfTj=%*|TjxA@bO5uqg
z8}Dft%;Acl1zO%<gFRKkB{9OLL!nWWWUiIX;0gMM`*TUu)ub^>4&T#mmibJPkq(k1
z$HVXkeS?!6b3b)(J~vIHRi{pAUa!R?I-k&FDA$NQF;c{d1T9SFD^J;8Is&>D#;V#J
z_Xr~_BfrXJ_QkDJ<iPWQp^Bw~?WWlNinBQq0Z0O@YK<<ZTvkMHVfawixVpbV-ZO9u
zRF|^aGY(z(V;4$ABQT0&hI{JZ!*77yZsiIV<~v_*SeN?!6!BLMYf8#ru$9^H>y%wl
z{j%wV<&qL4x7*?ZHMVZ@2PYBhwFdWU#erH*y$9ht=xykqNP`ivs26Ax@O2C*r}Gd0
z4Jzo!Y_HlXH~0wmdZHDfg_bGXW%58~EOjKe8_~&>$a#4g3-uJZkgu-Xw%LMUiqSC)
zwy*6x^kY9ywkXqb+4<8=B2mn(SO20?KM1Rzme7Atv$M)#-ia(!Ed<ydUVLWuxQdB>
zDbs$t&utmF>^}X??nN08fbl`f;zCLzC4Sj`BmN@4_y|5o3&6$o9+y(oi4L2B2-x3@
zBz9#6KgL3^w}uuO&*8&5nj}jWvVI6}j4~APCPl0tDY|vsF-vtW*weIyX_Xw@?oDdC
zc;HT`O|+rBf}sac{mq!+_f|wzJ>^hn_GmCKrA#pbP*2Qku>pr6r;_ySA$#wCUG%~z
z5MSJbC+sEaHeUB`J26$&f6*P;-WjwV^bYocZ}PEsM&=xk#aUj)q*kfW>$ZX1o#4y&
z(%`aMbV(Izig(?iJ5tzwA_22exNhUV4XRmSkb?TR!Q)<WPoU4~USg0@JI|?y*RJO8
zvK1+g!K3*q5wf407U;2t<z<2w$Kkzw;hb`5QK~`x<hc|_U+FHRaa9R@yZ-Z<xf{Ca
zUGAUs>3s{0q9yLbg<ME*^KF0oIAb4;gg-H*Umyd|pR)OAKY=KGAhdP#-h>$pR>eZv
z{sl>7M&N_YOm6>q^Su5oe@zXZkOXp^BMtdDimSLXJs@EuJcsJttG6@I1dotxb5k_6
zgqNSgIUl#HG?c<s=)mXSqifyGArw$8DB!V`ZuZrO2nepUu51YgqJFJ-kB^njD7X1s
zIm;zylTUERU>oTbRyd-+7B(<Cn@s@cji_q5Xh=;jcCzKy`*h8@!9M;+uM)8qrq%;!
zgP*)_dOrNG+(ux7R|i`hb;;VBcT0bv$jCNVqbst0&pSf1y@UJI{j#X>+-hW~IZ-iH
zu`GSsH<QAq>(S5&U}nYeR>KMy4id>D*$TZ}wW?9h%8SX7d+%$a>gd;Jw$tnL9y?}(
z_B0vAi_(eZZ>3GQuvENUliY8)FMYg#Y$pOH!Wd=BUB+L(m!(+goG$vPy7Xu`J5a@B
ztO<#M8phgrHb9k@7J0y5afp=P<jVq~@?yh;dvgi>Sk;#QwydHanpcbBEl4_W;}C_3
ziSzwC1FyOo0Ih*~%J{3{e$pAnSqmcTVuttQ?os5w6tcQM<-XMM-})D+F;#~C8p9S9
z;=`9I;l=&noawLOXb*r!*+TbU8^>&}_^u=_E`6h)H|=!5=~Vwb^=Hnc&3TWmgXo<N
z+#8zxBC}WXZL=J`&0Adl;&?HtGsQXS>K&qV2#w0O?Y|oDqqO?H_BXT@<uXp0^q|8|
z-CrCO&SDAidXpa`C6YNx1vLg9O~+%=>5&QL_f+-Oq57=(KitAb*ec2Ze3aLS72L@W
zw1OQ@>vD?2Ulg=p2-8KU&p#&}|7|fkdQ5B&A5o2SjlM{GVKG#5b}v2-GFFGG&ob1$
z`qthehPlH~`a;}?hvc<o-t<dTYqwx1n^3TWzwACN87=koD`sSRseXTK%r7ktlTY9?
zxqqJLtjJ8mXG`I3*_bxH5M1n0(3QJCpaW2Xxa_;xNQ<j20j2za!4AY;we@O2Rl#f;
zI`OH#<#x`b=e30-E}Xcp8++0Gpu*1*v!zcL)C-fznPq!fG>U+3KRyoZ_3EM{cJB~5
zyr2zsG;c+wkN_|sTI#&=`AVdYP9g_KVRH*uWesrQNpRsKaKGN`Matf0HRwCAP!Pl*
zveYoFwFm>$zJIjJMV1vqDXU_JoO}zixNm^2Q6O=(7rZ!$82gGCz5@rxUUoLMSDT`n
zJL%(t*ukDLss$F8GeJw5SE-m@Z(kBRtP8@ytNkX&ORFI>?Aq(OLCm2_?QvS(Q!8sP
zPbITDZ6KD+)fJRo`;4X{3dA>`=cJy`5A3pb?^ccb7OZ{eeXs1dADvn6hyW++d-?8a
z8Z;?DYVBJzM;RSbUZo9mbExrdVG<Y(aq(SF>u*TAgL?C@s{!u+_8~P)RVn3de18)-
zBh?^!MVI9%kMnRb;>bol+aXJ(B_e*}1jwvJLFm7p);@ZNc@lWv)hE8!hTUxX2$z<1
z+uTvF5qV>r>b8&Zxi~uhxg)<Ma!)A`6+b>%OTsqP%k6aiY$RGxoj%-GY%l;%vRusL
zb%Zizwt`0JvL7*)-t9G=Nkh*e@hytG=CCA(vrN5#y=!Te9k}lOK+x6pjHtPYH1OKf
zqw<0f?25~gUq7CmD=N<C^~O7YW_4O#)@NiTpi)g$__vY^a~itVs@v!0k#}_`0JOKS
zdGTj?aRvk%bhmk5;k1HUW^Z@Bwj@teMo(rvrv-$NY<19faO?z>doZ5xFPe~<nePwX
z0n}KS8ynPZ?Qx#&ku~pc3Q$i4SGVF9+cv1?O(0o6kk)1R?#{z)pP!BfzliC4-gI{R
z3~j~HC*((z?enf1pzktF(ntl1CJ2~9BG*8tpa`5PYccxrsp7L6a+{;@U3ScpQVLHd
z<?OCMx&BD>4bvag&yID^&hrsowT_y`#!zUm$@-^vw&qR_2MJ6GUjcIw4rOm~ZwMZ+
zmxiwerjYFQds;MdFV-EhdN#BUyov<J=fTNgFE4qF+5Gf6NfneSI`{OM{fe@oztH5s
zO%Gem{Gs7&t>-+zA%p_aXkxwl9`rb@ddR$H%Q^YzolP>B+eoy+R>tj_T<&~YrqzVx
zt-Db)5Smb;&*2tF!N~SoF6&9>AYe&t2VTIIm1%$0TV?)a`DKl<axQ6(l1e!vQRF-v
z<3uiJKt+>=h&DbK0imzlQXQa%slmY&p`h~srZc$_ZusR;ge<q!7tH<jH457TZrRI#
z1)qiex6-$2&WG!x-k>ems0ZR;<>*uufZ1o-J1m&Q);op2VcNsir>5`7Cs)c>%Da0M
z-X`{&*(4VA;~(*RS>)qOzI3O`Qy$DSJrGuH`9}TQv-bbJ00T8R@_)ryb7EiGG#@{B
znL^zGEwUMq0X<Y$Zhr@0JlQi-pKs;e&+S_dxj$&${b>Hj|IS+NXxen*^N=Bm0^xFa
zcF-R5I8-@8zCxB>Qjk+zqyGB-1RLPT&XzPXUs0yhL}c`fhM9_rfKsl@euY(~-J?Mp
zbvgc@3FK0_V};xM)ae(Ww>&hdwMNimjP|8i{e?o;&LND@S$G0bay>vO((@IbFP4=k
z+`>_bKJ7)$Z@cBE<J?IWx19U4H;0Cmtr516Yhv0bLv(tN6QdSwrLztNLy6unJgidv
zQ~G{D)^Jx?T>NY>cF_KYJZ`u&T;80cDCZnedS~_zOn@K#E?619>z~J1!n>OB-fD7U
zlvR2SQIXt;Fab=|m~T7;6*lW}D>@}Q)F0X!pFBYbp}5joZ9m?|<9;TM55VIur`E;a
zVjjjz8uz&mGVz)7kb#TovQ@)y5dvd2a0h|ucFN48V01z(Co7nq8#=H0Vy0V(22chn
z0|G_s@Cc%qIbkZ|Pyiw=Ij9iaAY9mYKcOI}_Nq_FC*wxDemoFDaI0A__FCM5ev=<(
zG|Vp6^k?Xs5lm#UlFvj)zS|FGY;`YwH{y)c<P37#UjRY?<f@$tBRxeO!0@=){Z=r^
zuQgv<_5%hAJRKam^v#HEezl3ryn#9}pbxE|Mp=8^`45ZzC@f>a@X(+*|3PR1KVz3C
z$R(n0&HM|y)ep_Cf=G1t!-?#%u=Z=p!Sq6Msm>3m`|6*7>|vMgj!cit<G0fV#@UE4
z@CFUVyM6<1YanGI{X~1rqL!^44xp)7p%R0dzf=?4)yo$NRf591y$z>*1ECEKJILf{
z({IMp|5?3gh%D`Epb_61QPT)IUZJE1LQfO$gpulD=rlCOkaItwb2vkE_d?~KOt-tY
z+y74KKB4NaC^=FxT@>M1WZ%+x{=se<W=JcZ6{z!HNscQ&038jDyN1Nrys#iZh3%PM
z&a9j?foL#Ko9GCn#QR_LKtfbCZflga^Nwt7b%wE+=<7#5lKq)-qP0Jg0=|FhKZpN%
z-$JfC4L)1TUKdZI)}Hr49Y!oQZuj&9*3xy$Z7ZuOeK%%RF8PA0azJyCI|VlCy)Pau
zz5$8=z=KS<QgxuK69x|4n%9W0W*P0wGnUV**lx(|pRmml`~dTl-R$7hSz|EW^r1Rh
zA!|wmshD28n%PMCvKZhRuzV?bRTf~RFknsI_I|%D=(rCMd!;@Wvy}r#a+iJmW^6n1
z4u(*T<>WZ|?fB^*HzLRCe4rMMG4R=PbYRgp1AQE|&RxwV<x;XtKR%udlZ(JeQbwLF
z?$m)qZMYt?iIOZJz0~{6_t-5Cc`S^H;gs)$f3@FL6n&^2i!+k+8uQm@R&;&ioac7Q
z2bh8L<$Av-_xk9_FOu2nbuiE6*7R-A)4o=%_M)u26oX@W_u|a->0Z$83QKe;hkd9?
z9YT<#h&<R@b9E`XIlA(MmRut2A<Wci%Nz(!&)`JK@giFqBT&X<Crh!7am`tnr;5z<
zscGAsa#bRF7O_nv7}dH-sfC4W;a~-E(PnHiS8YVUqO<Yi25D5D;V|}>Z>q_60T)d~
zW&YLBz=nWLV&bq??_DZV3;Dvh*Ry({OkTUak?I7QNR&HlKh)$6T66F=J_sdMM|?@U
z=`D$P!iZ#V;#WY3!kt<h!HqqIax$d%wd^}n>B+zW%5l9k{%gcs-|CjYIG?mDY_}hU
zsRfe-l|OfjsNXkpXL=Ad6&<rEn*KSw;n=_SxsHk@b37p&7=XV4u>0jr)&+N`wx!(e
zXw4;5)v-KLvv<ep45-W}{Xs`#9;q)hcka2EzLCjB+t<Nf>31|>J7yEWz*l*tIR=yC
zU-R=-_r&Iisd(03CgNK@w{wY{f43iSGOyRWb~yY*s>XYC-v?;J<!>41=ktEpq1;ap
z0!l(~T_FSjB3|ygU9R-4ppiu8d3PA1`yMmt4*V};uco?4uw>5d9Rr8>z(!5|^n+SE
zLg^Q6r&cylAp9LM50|o=vw)a!YSY6lUR1B+5|TvJ=?`vYeAuM3;{C;@*U_RIm9Ryz
zNU2F5&dgK@2{FV(1B#dk#xGJ*YA<g(#BFUHX@VA!4z3)@3Q8slCpZ8)ja%(XjC?a&
z=;;mmw|aA1@Ls3>lb;kD!vCf}lICsGb7+07?|1K!`BY-pO<J5I1Qb6s*(c=8#O-_O
zoZ25&XBugx6osk^)7(jcgS*Vx>c_C3F(fc^MTKGS{R=gN8oDt6p&u%l(D!c0+p_uR
zc_R8B1;o<u?*yKPT^b7nwl<D4UZ__joBrva;Uq9?x8tNU5o}h3he1rFL<wr*_sMie
zGqZ{iP+MG{?7)<}{!h4B@H<rTUqZV<p(D0-XP<B^a)9Ifp?#3caSRlDzz!0xks9XL
z#n@>u7Lh8NsXlwwpq;?~h{K|$qR{Tmt(w7J@WV|3!-$V>DHcDp_sPC#%~TC&I&Lyr
zbSLYrWh=F)Kn;a)c9yWVt|R1v0EpE(eC9$$tZw*)Oyg;Pcv`Pq2BQ|wjkVknhOWs+
zp=lbNb`U7O^(m}^f_TlO;V(99a&6-J_b$lO@-x$%Ej3WCrWY$1q|%;cI3N;Wy>&aN
zhh}DgOO-1vFP;(aU-$R#HYbqO%+xR@5@FO6LMNhH3RCxwr<&%h$(&B%zQj))ma_|g
zJ|6DfR^2n4X}Kpcm|h1V^53(bwzrooBN&ZLDBNq>0z6h-&cvd+4y!(OaaR$vmKc#^
zKFH<X*6L8KbjG+A8pW!VZ%5Pgo{s0_0pV<Vwu<VD5l+ozI-lwIu=pa;+Fwg`+VSpV
z2!Ve_x&+P!W4e9r%vE?jU8up6l8a~IYw55XuSPo5?HXM6XXA4kBe_<e;7!`DUt-@L
zuUsXf#XvAXLEU(6k<SLBEmzJ;s-cqC5!0acUy!G~wcw*$?g28CGLO+8U?akB!twz^
zug1)0|FOPPxtgv75JXHvCXF42jIA&TsYsc=LS{szn!)jPiS9i1_%=ABs<4gPJzYJR
zOeMXlV{;dQV8)f-<CktUA|{9h0?GDjZ&XLjhL_<}$B~`-jPC3~HrvT1Ar>COe}GgN
zIVihue9C$?xVb$?jKZg><A1p|tl(elx4zuEoX{HD`y4ur*Pt<4g}cS;2Vh&=9S#WO
z3{yNfRxn|A_xA`*u9Z6{r(ehBEHl$)hknCzcA0B~_!V(eA@j^8wbV1lHa1y&E~)6!
z1ZcyhV_OgNO)M;4lnf7^b)p0H+ai4O4D$J{vEcl?GssjK-*?r|>x}mJUI||E|KmB}
zjU5d{)_twdyaKm+qFQR{w$>9m3d6#6)K;=A-H*NXt)+Fw#V)7cGC1#!y26#YWG6IJ
z9i-7_S5sAN_=P^7(Ukkc3kLyekBDd~mMHlBXnLt{VB^LlB1_E=HlPcj5%v=b<-;6e
zcK|!lw#E8`H<=SQy+WfCZvSX98Ak-KD?~U%tl5YEs!=T4Wxf_0Z3p4a@2%{jDeFvJ
z@im_*yaub0rM?;9awiuV*8Dwupl@=Nf8u>wZK{kcYGzVXQv!lDpTVBTXD5JRv;oA5
zF!KG$UgPm%oyIJJHhfYKV9{I9sY*4Pe*?@e5M7dnra_-E90E$&K6ZMf@VA49@&na5
z=KFf@r3#F<>qkM(m9;%$K0!Z#{%?fTKSgr&0`C^Kxkx_`(h{b-hP*}$s1#5J5eSlo
z2=s=Y-E8FtOb@0%30tuMM}oI0<;veX>7uCn3R59j*<O1kXy|DW^1_ZpCRP<zu<OUp
z<4HNQKD1DggY)b`fTFhgXCzz@^(%e!VNNJp!y{2&UAx1xl)-0$9VS3v899>rWeXgG
zi0H}Z33Q>|bMwjYc~HNffcgjc`(cDNKStGTWGx#LEak)gh0(7QC8RyVE}&%q!@=6}
zy_`p@Yvr&Oa818T%tj05{f4(8TWA5<!<V4qfzb#Kwej0qmAeRJ7T5Vp?`$}>{33G~
zp*lj6D$g@ZfwD3F@t#Ph8)Z7Ft3X|fbQwH9Y$PEo7RCId$1?|&*TDSNR!JR}Uk(!=
zcjBc;c>>|52ns|DCb5y`33H>VO+Pgb?g>Yt=5{^K7E7)$BqfSjM7ir(uE(q?pmmcS
z-4C_5+2L?t0Z5}R=7h&^?DvySkAjjApiQY%>iA!HX?fc=$8*YL2LWoItnP6O>NWNV
zAT6ROe#pJuNf0bZv78kQNrzzi2F|b^eY)a2!%=fj63;}w4m|9g8OTo%0oIg3Y8=I3
zl|Hut#x66E_x9J>`R4CRlBeF{kb+$b3MAFZMT?g#($eB7+4K+Wtzkm`%p_YQN&|5y
zz(_W9cBW<B2FwQp0m?-ssdY9`u@4@JCHXlP*Y<>*dOb$cXo@3+Uc77=lMJ7s9*gv6
zVzE7k;~WfWQEX5@5v{V2ui^Vm#}oLD=6=X)iQ-8sr~Co9MF12E5eWY)I3<*U%?gMt
zH&P`|wXmE%+mydwsQPN!W#w=fa8`zfwfI~Zt$D&gUv>G%Q~kq<3sg-_eSn2KhqR13
zoT?yuZ(29DarslCKudPNoS$0n55!MXN@iD%OtZDX+o;xOuA<-0qa3sK<k7n}d7OPB
z^;aKD85wESG=ZWCd9;KVda(ZB<By|%l>y`4)BRY@o67^o$jJzd3xcWXlT2}lT)*1%
zHJYH2GuufPcR#O@=VOH7*ep6w?(6}`kt_U&+d7X<Tqowk06YXZf1@2A0jjbf->z-f
z1(@-yt_$7Xuf4Lhd=g22u4_IC=p{O^gh5r|HFm(k0ycFJnGci}7uvv>f&7KCaE7o$
zz(9($PyAmyGa(3e61@F}{g;&}$_xSY0o3}@wWI~_r<hnE$*k#I<ljnKMjX7D1WylL
z2eY<+o3yML&7tBY@;)~AoDf$}jf0Xjk_Nkk_;u=yG2U)kb?L4fx&z>=oxRHnfs62;
zefuA;8K3Ubn~<m3fqz(!e*Cl=-__snwmOH6*6lrj&(O5DfgFsCB}*B>-R2LFBu&8C
z;Vx8F+aC|lT~u~tg&p7EEp@#V^t>Pdfg>ii3PtR<8B(y?eIac)cWc%$I#L0n1<3E_
z)k7sE?H>HVO?|JaCwAOecCx^mqR~>F1~{N?IA3mXZn_d9?GF4`mNPFmMtitni=4-j
z8b{Ji4Z~Tw$HUYHkBkgw(P8qp1qMqtAi^pv9kTrr^wY=m?R<rbs~~0^o4p<=&oo6e
zfHN)?P<~l@&CrfemKJ&~vBVl)qyVh=^CeHn{}TFgm$BIBUaVOao!DEKXutDK3U~lB
zj<i%<bMrPpn#j4X`Ky|rYPEcNkw>PeW$PRNgVtMHJv=x&W}vzSseFe{%nT-Kilii*
z5Iksh;Vx28ULqL)VwGRm2mzv=^c~>FQFd{KiDUVk94|sDqq9~a+I6isvwSpZj9RE?
zWGtM4N{F8U)Wwub4tN;zr`P%EX4Ty((H;05cjH?!9_(8JWO2T=Z@XwwVp1J%5x*Ha
za>u5#p4-jPP_V~8iysHZFDSny-Z|G^3NJl^N*FWhS1%E(&Ie&OAPqg=+*a~*{&xO#
zO1+qDR8woFvPDe^A)lM0-l=9?XH0+R?hdfk!tG|rz$DqvY}rh3y3t-2eHtpO<6Bwa
z;hMe?ktOEm-S%Px6y?yQu?Tr_A1V3A>?$s(ZeyW(TVj*Ql-%X-$+%o{nSQt4j|S_v
zCq|M$z!W@a-i3AQ6i3`#XMee5(+tdZN0^DZ8hJz!J7}_)mde*)XN{nUv@VqC3C!_w
zJi@z>e0k-Tk4>yPYH<O>7ci2K!R7RmYzkIsQ5wrdS+G6VOq6)+4t(EGe|j-Q$N$dv
z(w~2IYq?mz*rc>WmP|yP>-Fpa=(C{ufH^_Vbv<}JuJJtK&z=HhO|={;`&6(k@kwMa
zB5gi?v>^N|9~%09-7xmC-Er>v-bPIWF#t|BRxyp!NE`ypUj%eKQrVQqAb^q%v@oDT
zP66PISI1Mo#EO{04;x?ZbcSWBSnqhkt*(p%gc%pk);1CJgZd4;&y4>w#MiZum!L_B
z;DsfC#O`z8by)qd{v+nTbdRpnS4vMqVx!Gh37t5$caFwwB4_dsIz?L@AEq{t0aRjk
z>csqoGr*%iRFgruH~bPeyH@}Q9W)a%V70SBs^mESk?ETnjr(PTCXCh%j2sD`1}(t7
z;Lr5gcwcdXmvdw~cABH4%m)D4_;xRV4g7|K`bOYR`KFf!?gtxv3Cf9hxhm7dOHlNQ
z42S|2Ki#JRwK^x}ZSG91e6hDwO}W_tb6+|oe{@Xb;v#02m&LB*)n@PFLbhXL`%Y<N
z!zsP65>9B%`uT8)aX{RIjjn9twF4(CNcuwa=?>L1SabvSTPS601=-jAwl(wFU99f}
z{uIg;Ci%L9J2*YVGYN?Fk%Yk^d`2F;XlV!~o~HUklO>dVpnxw8#DSs4LvR5*V@V@X
zQ1DzTZXGT>wk|&)BB_G<^V#f9Q*%g1$1a_Z@UXR{;L4i6{cM3~bhlYA0oJZ=`}Fm7
zroT5TAorVml;H4aU)s@!e3;=_%kDq7v_LOspFk36S{W;9%|sH?KBgdvCJB$`$Ny&N
zepgTYT+LelD_J1unZ{kf#{&OT4(X+`G9=XzHLac{0>n4*1C;vhT|pP)*Z(?zSLgW_
zv`LYEM*duGb+Z^$9wTPVIbPRlbdDy0QK{M#==S{x%$?n4^W~Fk(z7V`YOGC9QO_n}
zs?S^^G1JY!lXT6=7?Z#GE#jDa_cPZaz$Z4MZm@A*Mr;M%IfUyAum}%80lx|s6e6S7
zLIgVkaexSub_jDNChY9vR8*9O%jX_kk+Oy&6#z^)NVrt6voZO?^t8rf_yE_p@?O^F
zOAN3vt|hIb2Q7{c+;>EKKKm*Md20_SAhZ!CA#7|K;-cXuAx491PH7<|FjhZQhd$YJ
zw!PxhH$LRrIQgRd^P&+*mc>PckqVHE;lkW;m|Y^<Ju%p)c9D)8ftOG@FA?#{3fj)q
zvpIg3mS@;^yS@&2+*kTxi{KF$-i^MRB+*Rdd%*&=BNa(chbZgC0~ikTNxq{>=NVtv
zEZt%Dr&?Rlj?n*HxpO@q3zyF!;tg`1wJET8_kgIvI!(pNG@I!2Im4;Pw@8f){8hx$
zd`7Jm82(uNXzEp%;%*z~OW@KDX~yn1LuB|#8NuYqvRM+S0C*pO<jSP}(H;I99q_U!
zsHsDR$sYFp>^Z(z$+Ana>&fheafY)_!i+f3udD{B#%TKQ_}(#3m7^0vIaz^x{KO~P
zl^_z8m4IO(|LR(B`#id*>0@~iIye^DL4H!r+45}6{-~vBijvp0x)~i1@#p`l(tUeE
zG%>+wd*GVS;4oTm<=C+83|6(f(u-k9gYc0b+pdZ(Q<*zSn;8u?8+0ODI;T`uaYXA&
zdYU~IutAJ>4Qr0m+tmK=5}0%dhWUG{&wiebY=|maqXH%ZqH12iXV<B^uA>bmtcm;O
zrUF=UStVwG<6t|pSA~L(44~zLI~|e$?0LUZDvW_@9o&vJkT5OJ{I!PB8mrHox7?~u
z%d*yl94_SFi#UQ{3l%usH&W2-_3xbaImsE}{9popw<qk*TEyD%0rg)kc0fp&XBPyh
zLu}79F{ugU(cIR8tPbX>Gx8FfcSAMV%|RXq@Pd>{^|r%&hrC#E;unv53c#gWE%$TP
zt8-wS1O}`n3jW?u#tR1y9@IYhC7-1J7&}t6$)<{nWgQS|B$67gJRdIrMLOVi%u^va
zwCO$$PgGZwPM%hBPeYEvwg2h*WbcAgE8{s<RWRZov#&#v1{NU`q@kJcR<8DcxGh)#
zBylCf==~b@a8y!qc7Wl64Qkp@_gg&L9*Rh+G>*Z90pg_1!g=du3NVsRPId}q9y>Y$
z`cdBGXlwp8D~J*e5Kd{ZD&15XuQq&$^tp)%t4>F=7~w^9eQ7zm?W`x)C#Ss<gh%3y
z{~iJQ<)liR#_ilWN>%kA|2+o4P<aXa|I{ovJmvlMR{IIYRS-_o-uAq@h2rtDzNcHg
z!(w*?&-sCW#I}VC>rI65Shn$>dn^e|<~Q#Ci?emWiF}96vB-JJna`UKq-J|B9J8`D
zoih#1968083}UUM)cdVnB(LimErMz+QK^R$c228{^(}8zmXO~v#3~H$$82D*5q5e*
z;n|Kw;I}`}Eq_@@lIcZr(kPN~C)&hmS)44NP0qR6<8zR7w{Ek3sVtnNt*nL~4!V#{
zHKehKXf-T}+#kuudE*p1z+_NE6-}WU2FyuH0@kRB<R3?}9>z+z$WFd~`RV_m!J`8z
z514+<Uu?*<66&G<bjY~&LKY$cRe+CMTc@K2+xSK2+BNCRRgNAASX`AnJ)`>jm+oRM
znL_&~GN0VsHQ?X*aQckEb4-+m3LA;!5~lE?yjtFQ^e*Ij*C!%Us6tp8nqr?5-ey>*
z>6m`O&Z+u&a^7XrB#Uu7SL*uE1xqr<YiErf-^^yxaFg+kOvcqT|0Nz3JQfHug6WRw
z4&n?!*&*+EsJlN#Nf0k?;ktW%1{|r3bNr`{w|E<~*cdw7OE4sLb%NU$F>ABRUT*&i
zuI@xGw!_zY!GlXeHBOeo3d>mr^DJR&|Fkeu)?~^6AjgaCr2-4}swI;E?(-H3m=x<G
z!nzL6n-qhcEoayw%1@s>T)Fs(hlL7QW*CNoU!fqF8(R;@aiM-yZ1>q3vK$8NlEe#L
zDb)Q(Z73CLOSy?L$Hf*N9FBM(c=&w+JU8*%@%BO>bu4xMdcpDebZhVOnjSDDez5?q
zCm7CD@`|oE<iJHYD6fV(5!GC*)^I*m`&`4Up#K@?$!0$~DLX|2p`j2<vK^b{o2u$n
z_IOEkcuj?J_Y(40Sx{i#{K<DV81*Y*e{4jHSMO48_w^kYCkm{72h7dFSbgo>)-cfI
z8doulP#cM@Xlo04`cY`EeQS+r9o$yLL6I3Y7}j?gJh^6q3gCzOdPIOc0JcUzK`fiu
zrirG%FwtT?1F$VZ=#qT&AE4q`?Cj!?VBM!_rub0D>$B*CuzSCREzAMxU0^_*7GS^d
z=62jMBDA7&(wu@!lIaidhv&+K0q4wC?jHRvZ^v8{@zr|(E8qty-Bg#ZIyjZO(9VBD
z;n<tEm&m-A0kyDOM`j<ME_UA<k}~P|N7MeW0M^a~-84Bd6ER^}vM`e<e<Xh(6}D_I
zV>{dVeI9(CR$q4wcBc(w!XOGzg(W51sYiPiG=jc-6^f)G`GyF=!8j2S)lMN*;s-kp
zlDY0-`7#FwA#3A^%^BPdt{jzMU|iM&Ey*CB=UbMQTul*}%IYX5#=3{932M^I`NTBO
zH_~xZ_P#fgIv1Un#W#iJT)dqFX1Bc`@Ox@=hBd!gno~Gt^SzGokJfksnNceIBjf}}
zq?n+RiXsiiYI2tN3Py3RP9V?CCk>^Q0l-w3Z$yqs5wDWj2Y6p_WOjhZ0aYQRR-=mi
z6G7R=6Lej?pj-LdN`WdvdAGcLpD%-9yyAFRbCgBhP>&9&sje6o{wz-dfYJICNmn?~
zg*m8F-(rK{ts_N?gx_Vn9)I(QyG1ii$9p_KDmb`R>J>Cp2M1Rtucf`%)f{N*LxUL2
zBp4q!c8yJ^!e3cqJ|KQ^_dN=QL{da-CkSj@>p0mY|6V~;jKd`Vt&lp!C~ZtppP%Ac
z10}dH+K#<&lYvcTExG+ltnhH&v_niKaI!@*f${TUJ>}KS2vdb&NGPBV4oi>2WZ*~*
zMIU6Nal@+FQW`(D)IC)O?0bTPyX`G5T=_{DtkXeV9%)p}Rojo0KbDHgq{*Tsl_=<B
zAhASK&InF~-;)(aSM=U}JoECa1ER_in7^?|)chkKyJ36A36_4XO;n=jc<2DgUbZk6
zVs2s{Yu@WW*FB>9Gu5MbpHU<*fH9O21CtGg+1ntGdLbeLuP?o>5<#Z;`^34=fm2De
zu=;2y{mR7V=DUw>n0L3;BQSY=TdXX4nw4kV+c)AjMaceqWQGI2ZC;uT81$^PrafzD
zG!<OwJidLSBIAE+flR2`2qz~CUfAk_es4zkj(5*h`To#;RiR7bY<46Nci5v?++8Do
znoC*c>;bz2;H!$)N*_7?GmD5m17fpa6Vx1px`QKBYD5jJu6<>+`8-qAXEY|J`DBi`
z=SAid=XXAQzz{@OsE~ViFN%vp4fm%@;M!TA1Wta^RF-}1H`rL=tYV6TQYE@97RS|3
zR=T*A<#76My4fmKnt}SPq>9uqPus69i%;zB8so3plYc(k_~;v@4q}dayx+lVe{UM+
zmJWsN6&a=}m^7LnuXq-luWh3mWOus?d^`CjL5rq7hz5tosq6y<?C#Mgp&#Q(0(1oZ
zH|%ofd4wCnIeS8^Q~t~HFc_z_v?FHbzJY37+<Z}IF(}ID;%%MqXvC4+uFi=1T#`a|
zcJ(!~LcPv4u=WmRdRra(E^-OZkF9I`%;}D@dE93<(u`ow*<qTR{>9;R?$;*=vnajW
z(HvWm>0g{pqF%;?5D9{70+)+Te7JZZK6U!@e6(w;EFKBjfcyk(E9|Kp#wlkByEwb_
z?(Xo_M*{0ZUkUN|x^Hwr+H1*e``2lBXf!l%=zvXVHA{In8YN0BYJVR`N{SI91uzT@
zJd?zZLV<u_%1DX`s!VX(;DV|F1n>l#D_E`J30%Dl7MvK_ymph~&GRAPQx*n%%0#!>
zgDzcu866MzQ|5?geA;7;%E4!x$|9oCL}d^jPn>AjXF@t*-@U=J>>I63gCkmCCTy{(
z^6HTyo{__#gnXE`*0psAZ!Zw(^P1+qr2)=EnP*%jX=O=Zs|=8EfrKw*SzsgBH9cQY
zz&kfPTx8tdHM?B281EPl2J%~4s*ix01${kuOuDB24~m5)10iivgvpfpYUc@nXa^t5
zt+{MqK~1UYs$8Wc^B>I70O)~O6dd6PVwGP|8*>rp?8K_}Qz(8&1k{mju=!!=IumYV
z-6_QTQ2NUG&jH|lfU#@XmJq`QX8dqit73|hVQQ^rqIbzmQs(3Sy^=}rEgTT{@Dv$7
z8$lo;5ci{iwpWNzqQ5s2<>==P0<`Ru-?-WgZPxAAf9`vF2MX3Hn4{m@^fovoT=R4@
z=iynrZ7z~3OgA<(AfZVc8VZ2TwF@ra>;&fqLbLRC*gFMHc)bTW!(}_z<M%hNGq|N3
zCLCESKFuKp*yS@`^(YbnS6gT?aW}?qJqZ$e;>L7!%xda113B=o{l=w?qE=O;@d1`8
zFCwKWPiOP!Rywx7gky^V<O=xaZ|0vd%;Bg@X>x(>js8;r0U-ozGaPbh{@5;e=pSD}
z#J0rNQLX-`75V9++PstR(A_-(O7)%U4C8=hf{`lAo#JEic?#Ge)&Quo&Dhs$jhdy5
zmd3K;kO<WZYVA8RKx0qz_y5QLf1=DiJ2+H9c;A@ALrIbYXM1Ad8JrW8ebMaXa=HYu
z_<!DnN86_o9P*xRgetOn-8DlYmBMk(0#2&5Qkb_SAi;EQ4=CefYb}13>UO7a6Wt`2
zOWGKCkpN&SnLwA$KULqH{Wc3YPYRW^-TUie)MB1i!UM)u7x0nsk$J3mul}M^xvf>L
zbB`?`38DaHv0$_ffOW4U?-kgOwYrVoumTpAk!!uXe?qx+8QP{=z-0c5kAmidGVW>$
zAZdFq$<YSo2W1kVVH8FQf?sSs*asD7Oq<H)-&By&NXVS4?G(*_`h2)xm56C=+kJXG
zK5jO9nA0i=j$*<g9{Ro-4h{=y1*a_8pI3{nBLD-6K#D|Z-w)Yl2j(t_^bJ!t2q(oz
zh5~~Wg&+~c1q`mZ6vA-yPc*Icf&t<8a|?CP3GXu=_v!Zf`ii8#tt(mC(|kGQ)gP|%
z@~82C0V1=rB$lzxvWB9=2>SX1Ut-O!_fPixe7TRq6@u?`pO24^!|$}z@PA0iou6VG
zZz~;<uOwqx{@5Mchy!8|lhrA4uAuJ+_<2TTg=WQwmKT5X7<eBkh&(<>F+@=sovp~w
zA{bNgjmvr1%(MkHu=NH9_QWnRNIuPP2j;}s$=kZMY}+0yE!iiW-?b`AlcYA{^VJWK
zCZwQ|a`%~C33*30uyFjU#E^bsN1k#3A~;4!xctVrHs9UW2?lX)_b?cD4tGDYF4y>l
z(e50^3{wie-u(FZs>e}hTVw@?%T)au+0>zOW)Ql3Z`Y@LQxmZVXtCTx2hr&blxcg-
zHJ1ply3nH9(Go)0=)xS0$ajMl_HlFj#Jn{WvS(DW$bu*=F<iPBhhjTZxmE=_+JCWe
zK}TUuQ@H1aEGv@ge=9aFqTEK0CDK*MMVT|nF|)amYi7(ODiFHVTKVsux(D5_e2MmY
z7kyJ-UDlc^@fy?$YR!<*WBKI{UgZ@=_rt7x!P`3@NoShYwfOZ3{~DU4AIi(r9`-?a
z%FEsCl+oz^VDT&#qdl|hZ^5X!kiI!_mTY_=Uz)=-6E=&I&9dK#7k%R`H>vq#xeG`U
zEO`W8@H&*8y)GuSff&ri@K|kfzP4jBHRj;ao}x<Urf*1LGJ-2-D!<U_O*}N=1J9p`
zpRdFH`Z9iD-o$|;eK(DMn#a8&>sq;LIoNU>;WEEk<zWZiwwZP^g!`>T&VvINkKj%|
zk|^5|@$}U9L+BO?c;*<GF4P7R0*E}gYsE=Nh&3Nlo1iK`<$sr;8%5=v-W~5YoBtGZ
zo`%Fr5s(z!E}CC{$_erE_PO3A99>dvY|5XlSlZ{OJT%BOq&sYKJX~E)B2Q=%!~Uq0
z7Cn*{H!D$b+Hi>KaJqXFEQH&9H@cKFWg&+^T*RBBcr}GKjt;Dkk=+2HYaedCoqE&D
ze{PzxMUQsXK;Yi~;krMjeuJ<diZkLNo*L>PjBo&Wn3_rD4&`&^!|e%OCyoV>kT&^W
zyo)Vo|HX`gbMSZFdoL!~p_8vcT-#*^WoSGQ;;TE_rTDra5Ar_=Zg}f=c4*K-d2IYo
zW!D)E2lMq;5M9WMmh~hyT7qa%HvT$6glHSldzWCNMO)qKSstAzQCADH>SCjd64ATW
zqO)v>?%n78=lS@4c;~~Eb7tnwZ|?l=oHOUlz1>c0!vg?X?EIIgGOtUw<g1$%mI4Mp
zdxR43wFccjKQ~w@B5RK(&qj1A7e4$HPc57p$;KV-Nf|c*T{feHu6^UeSD&>E4dZ32
z9z5~wZ638+!O~)+TZ$y#PmeaT>E69Ry&d43@_KzR>FPfd=prpaBdXKqt+Mi&e44^K
z85T89AkxXa>BI&!1Jx#R&D0yNtfZ5;v}zbyt1P837YFoS>6~M&){afPYwQNu>)!4c
z+tx+u<48Y`f6GfJnW|iAZn#$c{cbcO(`TbDGR4DtVCzIGe7?C)_K0cvo{Wm84VgOE
zM#H67)1JTN!PQmD?uEoRCjK31@vc0ifa)_C2FBBaL<;bB)4Z7cdF<SL+fu@-+i;Pw
zke4k_k}Z#yt&o^Yv*q>V5Lu}|#=Z5l_WEX*x{%)xDC9@zLYxfGOJB$n&#j#+B=Go<
zXL?e&f>89DS6LbZiuP12xAroX3RI_^(@*Xj;pzytj36v{JcTzc{MDBrY{qVnk<xN(
z`ht@+=_!0Lvc!R<Dppe+Ir4h6EEL%iD!u8J;Ui`CJv9AAnjuT)v>GaUMM{$+=<+t+
z9(6_!KOGI^%yFIisSeM)zQf4EdgJWZe^drM{M(HslKfXP5x;*YP5pT3vprEq8I@dj
zJ|(puuv+|gnYdy+zSY=FMF3>QrI(Ri!~Lw5+ll6)tSEmEa?@eK-#Jidw$)Ek)Ko#q
z619IRS@8S0Vk2kP&izETkbzuDO*+M&Og0hsC1J<+n4?cZMc{5c^ymQT7A(vpt3L4N
z<L2&j2|=dgx?ABbfJ{f?KZ=${;BmZ<5dOODdh?!et1HX=BqpxRwe^k4^==zs?7<E(
zz?tM*KML=Qt>qkbNuU}H-4J^*JDO&Xe_44ev^s3tQ~&l-<8C@bhM}A;>r!=#xusDt
zl{!MM@sz}D_R`3thntJjbuY}VD|2#Ey`&Z7)qdoqrf7e;Cv&>7-mrf4-5?>3<;JCx
z2l`#_B1sk8aj{XNg)46D+l~yRFF23(JW>=SCDVmOf=&<ThExT1hBISFGNVs8viW5C
zgi9ULY@R4T+Db$;736a!GNu+RPE$pKz;pzJoH_ni4{T_kjLs)Jw|i)KHORX{mD~IH
z9Ke7mxS#P+vJj)C{bW~Wcp>h+BrkPO%jrOkG#s);2jH0O=ju5kl)6tJ!Cqu(<cFeJ
zlQ*f8*3?DUXd9o$yKrk%?hMu@?-*{wl1&1>y{cg<Q;8l2hi@ug#=nU{|Mvb}*A8%j
z!Zx}y-;Vj+YfcrCJy)sRZAlSoyX-Fj+nbA(CR2|Bf%*}Vr64)2(<zbXkqO#9OQ&as
zN?MOp4h;g+e;F@6KbtUiPb~A;wNFYe6OZ7D$;#prmNK+(tLZJJa+S?`D4HTDmcg%w
z`gl%|0NvKrN!1_VzC)LEJR7BkD7gz}_Cka!bt;PcdS-q6CM+{<IWn%3IPQKs+1+YQ
z1Y9Tr?$T-R5({ImEH*+swS1ECter|I6zopPdtqCpDMgPg$kA}$_vrOCcSP>`UiTfW
z=e;MKZgaG?)KFCFCo7`0@5RR5=}F_$9GgvtMYurv^=+eFAR2X9+<HSFN^OQtD=rjG
z+}A<F>FJT#!bt5f1dtQt<Z!;-ktfW^D{7y4<{Sv;6IhzQN?KWt-X7#g-9E2$-2&`r
z;k=-ipW7}1aV*fIg@`xDXS1{?{gIb%hO(U7CcBo;2KlQB_JKstiK_dCNTb<#;>CRN
zt9Rpl6-j30L+eIZfV>1eNFwp!L~3#LJ2{+JZ~xpfrt_t9)w*H!dDbn}`>T}kl$`cn
z(Yl4-(``ss%2P$oSXV^5H%8qjHu(aGF1}_Q+XZ$zewjE?LQRUgD!x0T*-f{y^GmU8
z_8fP3Bf$4VLMH73XN_+v*w)3h#etQJ2rk*O!MS4$@!@s4^nw?`g(kU4XnEcpZN3hB
zq0Az1NEu!lNI%bB%=cV9m+$!xM+c$LCNOq0;4pM&8?u#cuyS&sb9)VMe=7;jJ*CHB
zARaj}ENKywhl*Y2r;S}_V-*q>{%FsoXEnBui(UH`Yo`&r*3ZPxBHQqpK^3D{(GefI
zLKz?QK(t;kWW{Iq10|T@(~EkWjgf~<o{3sT8zq`Q^7J~<Y9<7(CodIg0-NO(37z6E
z^)>iY^axNYod=y?!b+CL2r69%Vx{h~gp7f@^_MI@i~cn5nSRV0X5Yndctz@YYj~Y=
zc}uh|Gat?0o&6b!IWgP^X8e%aZP|cDu?k^;aWcqPQ&q9ejZn5h?fsd%no5-%1^x0W
zbkorlCk{~;%#_(~**iPxtZa+#q?2S=Y|TD7-VK&3mry%}CI4_qKft6KtTc)?>L!<3
zCA*Y9pUAO~hSn5PPwOW8J7(yjpH+)$RxhK=XgD^_<g>tz)CJrxc_AaZxt}Z-IN$3s
zA@snHZLR)RJK6PFnaeRTG%GTTbp#c;mn>;NP}a=yjsNNbcYJHC4e?$qYpoE#%a*uK
zp7`GBg?s>V?#nBL<*%$MxbyIWwk|&<;@Y%icpu$>9vl`f3mF8V1|Ay-<xAG4ktztD
zk{K~JQZ!Hy!*Dkc@Od-U3D;NsjR^t2Od`dN$DiV|EN7w>OBd)u`}PJG@5p;^dc0j8
z^YHR;;69$8o^CuEkMhqsG0^4c<5lJ1<xMa@$&RUQlyo0IQ!BN$yDsh@FHQP%8p;y5
z4m%F;={lCFy^eQie6UX$m2o&(*c-hIzqE}i6FV@{C0FJ&b%Pa063&G#hPpPVS=%ZZ
z#vM*ythL{*M>ay3mMcH0mgb@JlqeG7Vq#5kyA=hyM9<ITf5m!LUV199d`@J4N6FXX
zJbBg9Kl3B_62DUXs=U(HVLj(znGEZ$$vOEu2W|=r-r)YbDfK7Y%rkb_>u)W<{pC@J
z{*fil62SQ04)dD!B4lQb0cjs1d%ND`m7nlT%W<x=1@v9XOx?B6?uD2wTGG6E@-Gj%
zaw~c?;W}n~!JpWRIZ28Cz3%!b77B)iBb0-F1EkmA)B;a}7F&T+o~?xJI1#%jfyh`9
zg<6nkch+Qd0BI2l%Gk)_b9?{rgDUh$h4B=Gk45c~U6GuqGD65=m%<Z8y2~m1Ijvs|
z|1*%KCVw)hbnAD}{-E)13#DetIdtc|EapZbd6(0z2;wHBl+4361c+-XKeqUyyqltA
zUp*6QiKbWyL04^rrp1YLvs%ZBbZ2GCAV>bU#%i5WU`t^EhBYHV3`iyLVQKERp#Wo}
ziSm`P@^Mse3>aoUEH0Rg2vfF6KwPK}qXohd9#OsZ$~J8X%&Hu1tstWJ-l?3BpUUbs
z@m;^Eh`@+cRk4|Zrwk4>{l$1H#0c5=&Jw6*dbb)x9lxecFr-KG^ylGT(~Fx+uMKzE
zXlu}8mAS|+IPh9HcHCRWMxH4Ya|QKW6>1Vt4}cFpLw|3$<@0a@oU#;y1)_Y#KW9sv
zN+Wvha6qgk%9mq7Wws(v;n*6xOPC0tKFLFwkzWj(zco%#3a$k?f&23&Y#>Zh{dqi2
z9U$LWTf2Yc1(^frOSc%If~429bBu19G5M%a)LHrB5bko=mA($u1w;yBt4I>aO#5{a
zYP@(mjI>4`$`+bUS%>0w4{q@J9;yx~zEQc3(Ld_(@!oUtpuj`hE}=NdOrx=HwIFK5
znzn^Nq*20}w!A>(jZq1RhX^r7I_c*@Y*fS=$;?KoR|LX^^ei}s+_*3CLJ$worfK@X
z-E@PMq;)yxF&=^73VrM(qJPANC&x(%Q@%iL!EYLm*OVKKE40awmEWpKyJG=i0ufOW
zF&d^QeXRyL)o5@<GAAokJOw!WffH|B-8m^hX_1?Lw?gz*k#?Y(q&JLGzLI+Z;8mzg
z`FIJ)QKy{T2|Z?bj*LrXHQ?(jPxlLo=o!IRq;`-oj}#@rPFRUJQA^ot+A&h*H_^na
zjLYqP$00M^)R+M&Y&PYu32o!>kkP>N;3Sn-`4$}MU4ckAbzOnVUBFF!&z~DJ_4pel
z9CJ4@`KSjdBa&7+j0i3xW#tRPg$%#mK{ENSS|BpS62e6LUmXz(3lnO{ml9g1agwQj
zS%iO#on<3x2n-JKUf9(@ok=Q8)-s{a9HI01s9F5gpm(X0G3dSaI43R)ff+LQ<o_&$
zO-xyqa*IK5UF75#J0{WCtxr?=V_o5Z6RjY0Iaz-m;59vYHxZs8JvkP}C!DHCFZd{Q
zt|+;d3}ti&=@-m<uorY#f@)=TGmJUn!Chj%tE@uc^Y;2jek2oLUR`kF#jlmNDDhu|
zwhft}m+1(98XxBUN*zCk170lA8&PmQU|ZP+aH5A33W75tZ;|u_EmBvu$Jj$`Qu<&e
z>cpe*aZTGzATk4IC8~xQ1^!FS^14?t<Ujr}^D#sMAogFvrtwshr2Qf9lPEeYk;DIV
zDpGd=*!`(%T9FEmGA*A*AuzR;^QPcK<qw{0))Vx9j0f%1n+;81RIeviGB^-pKfFuw
z)?mCNnAtAYQ0!luL=>mV4Th7i2627R8JD}&z-2waZVj=CrOH?vp0Oj&uMbY_^lN|5
z)}Rj6AW5}}HMIUS)@XJvmqyq*2K7^U^{)Ixdp9eP?2qILiDs%uD#gK^SP>q+6s7L8
tTtoT_o&N*cd|1c7cB!+K<o|*G@LE6Y9w}UYumC_z>MB~wm5LT2{{!^Nmbm}`


From b96e02110569e1bffc7a91bc3971810e27344667 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Sun, 1 Mar 2020 09:19:15 -0500
Subject: [PATCH 266/269] updated remaining comment about property

---
 sklearn/linear_model/_glm/glm.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 4a44e4a1baa58..8607d6a1828ab 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -599,10 +599,9 @@ def __init__(self, *, power=0.0, alpha=1.0, fit_intercept=True,
 
     @property
     def family(self):
-        # We use a property with a setter, since the GLM solver relies
-        # on self.family attribute, but we can't set it in __init__ according
-        # to scikit-learn API constraints. This also ensures that self.power
-        # and self.family.power are identical by construction.
+        # We use a property with a setter to make sure that the family is
+        # always a Tweedie distribution, and that self.power and
+        # self.family.power are identical by construction.
         dist = TweedieDistribution(power=self.power)
         # TODO: make the returned object immutable
         return dist

From 293214c6dc78483e3802df8a9ac7b8ce5cede626 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Sun, 1 Mar 2020 10:54:00 -0500
Subject: [PATCH 267/269] Compare perfs of models side by side in example

---
 ...lot_tweedie_regression_insurance_claims.py | 68 +++++++++----------
 1 file changed, 34 insertions(+), 34 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 61faf7c2225fb..5235cd3287731 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -143,14 +143,9 @@ def score_estimator(
     tweedie_powers=None,
 ):
     """Evaluate an estimator on train and test sets with different metrics"""
-    if isinstance(estimator, tuple):
-        model_name = " * ".join(e.__class__.__name__ for e in estimator)
-    else:
-        model_name = estimator.__class__.__name__
-    print("\nEvaluation of {} of target {} ".format(model_name, target))
 
     metrics = [
-        ("D² explained", None),
+        ("D² explained", None),   # Use default scorer if it exists
         ("mean abs. error", mean_absolute_error),
         ("mean squared error", mean_squared_error),
     ]
@@ -280,6 +275,7 @@ def score_estimator(
     target="Frequency",
     weights="Exposure",
 )
+print("Evaluation of PoissonRegressor on target Frequency")
 print(scores)
 
 ##############################################################################
@@ -377,6 +373,7 @@ def score_estimator(
     target="AvgClaimAmount",
     weights="ClaimNb",
 )
+print("Evaluation of GammaRegressor on target AvgClaimAmount")
 print(scores)
 
 ##############################################################################
@@ -430,25 +427,37 @@ def score_estimator(
 # Overall, the drivers age (``DrivAge``) has a weak impact on the claim
 # severity, both in observed and predicted data.
 #
-# Pure Premium Modeling via a Product of Frequency and Severity
-# -------------------------------------------------------------
+# Pure Premium Modeling via a Product Model vs single TweedieRegressor
+# --------------------------------------------------------------------
 # As mentioned in the introduction, the total claim amount per unit of
 # exposure can be modeled as the product of the prediction of the
 # frequency model by the prediction of the severity model.
 #
-# To quantify the aggregate performance of this product model, one can compute
+# Alternatively, one can directly model the total loss with a unique
+# Compound Poisson Gamma generalized linear model (with a log link function).
+# This model is a special case of the Tweedie GLM with a "power" parameter
+# :math:`p \in (1, 2)`. Here, we fix apriori the `power` parameter of the
+# Tweedie model to some arbitrary value (1.9) in the valid range. Ideally one
+# would select this value via grid-search by minimizing the negative
+# log-likelihood of the Tweedie model, but unfortunately the current
+# implementation does not allow for this (yet).
+#
+# We will compare the performance of both approaches.
+# To quantify the performance of both models, one can compute
 # the mean deviance of the train and test data assuming a Compound
 # Poisson-Gamma distribution of the total claim amount. This is equivalent to
-# a Tweedie distribution with "power" parameter between 1 and 2.
+# a Tweedie distribution with a `power` parameter between 1 and 2.
 #
-# As we do not know the true value of the "power" parameter, we compute the
-# mean deviances for a grid of possible values of the "power" parameter,
-# hoping that a good model for one value of "power" will stay a good model for
-# another. Here, every value of "power" defines a separate metric and models
-# are to be compared metric by metric:
+# The :func:`sklearn.metrics.mean_tweedie_deviance` depends on a `power`
+# parameter. As we do not know the true value of the `power` parameter, we here
+# compute the mean deviances for a grid of possible values, and compare the
+# models side by side, i.e. we compare them at identical values of `power`.
+# Ideally, we hope that one model will be consistently better than the other,
+# regardless of `power`.
 
 tweedie_powers = [1.5, 1.7, 1.8, 1.9, 1.99, 1.999, 1.9999]
-scores = score_estimator(
+
+scores_product_model = score_estimator(
     (glm_freq, glm_sev),
     X_train,
     X_test,
@@ -458,28 +467,12 @@ def score_estimator(
     weights="Exposure",
     tweedie_powers=tweedie_powers,
 )
-print(scores)
-
-
-##############################################################################
-# Pure Premium Modeling Using a Single Compound Poisson Gamma Model
-# -----------------------------------------------------------------
-# Instead of taking the product of two independently fit models for frequency
-# and severity one can directly model the total loss with a unique Compound
-# Poisson Gamma generalized linear model (with a log link function). This
-# model is a special case of the Tweedie GLM with a "power" parameter :math:`p
-# \in (1, 2)`.
-#
-# Here we fix apriori the "power" parameter of the Tweedie model to some
-# arbitrary value in the valid range. Ideally one would select this value via
-# grid-search by minimizing the negative log-likelihood of the Tweedie model,
-# but unfortunately the current implementation does not allow for this (yet).
 
 glm_pure_premium = TweedieRegressor(power=1.9, alpha=.1, max_iter=10000)
 glm_pure_premium.fit(X_train, df_train["PurePremium"],
                      sample_weight=df_train["Exposure"])
 
-scores = score_estimator(
+scores_glm_pure_premium = score_estimator(
     glm_pure_premium,
     X_train,
     X_test,
@@ -489,11 +482,18 @@ def score_estimator(
     weights="Exposure",
     tweedie_powers=tweedie_powers
 )
+
+scores = pd.concat([scores_product_model, scores_glm_pure_premium],
+                   axis=1, sort=True,
+                   keys=('Product Model', 'TweedieRegressor'))
+print("Evaluation of the Product Model and the Tweedie Regressor "
+      "on target PurePremium")
 print(scores)
 
 ##############################################################################
 # In this example, both modeling approaches yield comparable performance
-# metrics.
+# metrics. For implementation reasons, the percentage of explained variance
+# :math:`D^2` is not available for the product model.
 #
 # We can additionally validate these models by comparing observed and
 # predicted total claim amount over the test and train subsets. We see that,

From 987239ab23d8295d01c78536358790fb8c0a6c7c Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Sun, 1 Mar 2020 11:18:48 -0500
Subject: [PATCH 268/269] Shorten text for df to fit fully in width

---
 .../plot_tweedie_regression_insurance_claims.py        | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 5235cd3287731..7df8bad102a21 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -151,7 +151,7 @@ def score_estimator(
     ]
     if tweedie_powers:
         metrics += [(
-            "mean Tweedie deviance (p={:.4f})".format(power),
+            "mean Tweedie dev p={:.4f}".format(power),
             partial(mean_tweedie_deviance, power=power)
         ) for power in tweedie_powers]
 
@@ -455,6 +455,10 @@ def score_estimator(
 # Ideally, we hope that one model will be consistently better than the other,
 # regardless of `power`.
 
+glm_pure_premium = TweedieRegressor(power=1.9, alpha=.1, max_iter=10000)
+glm_pure_premium.fit(X_train, df_train["PurePremium"],
+                     sample_weight=df_train["Exposure"])
+
 tweedie_powers = [1.5, 1.7, 1.8, 1.9, 1.99, 1.999, 1.9999]
 
 scores_product_model = score_estimator(
@@ -468,10 +472,6 @@ def score_estimator(
     tweedie_powers=tweedie_powers,
 )
 
-glm_pure_premium = TweedieRegressor(power=1.9, alpha=.1, max_iter=10000)
-glm_pure_premium.fit(X_train, df_train["PurePremium"],
-                     sample_weight=df_train["Exposure"])
-
 scores_glm_pure_premium = score_estimator(
     glm_pure_premium,
     X_train,

From edba3b8b5874d28eef0075a7034e2ccf9dc10fcc Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Sun, 1 Mar 2020 11:48:58 -0500
Subject: [PATCH 269/269] Use context manager instead?

---
 .../linear_model/plot_tweedie_regression_insurance_claims.py   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 7df8bad102a21..ccd18c8efff99 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -488,7 +488,8 @@ def score_estimator(
                    keys=('Product Model', 'TweedieRegressor'))
 print("Evaluation of the Product Model and the Tweedie Regressor "
       "on target PurePremium")
-print(scores)
+with pd.option_context('display.expand_frame_repr', False):
+    print(scores)
 
 ##############################################################################
 # In this example, both modeling approaches yield comparable performance