diff --git a/mgwr/gwr.py b/mgwr/gwr.py index 52f4771..a1d6457 100755 --- a/mgwr/gwr.py +++ b/mgwr/gwr.py @@ -1,4 +1,4 @@ -#Main GWR classes +# Main GWR classes __author__ = "Taylor Oshan Tayoshan@gmail.com" @@ -11,14 +11,16 @@ import spreg.user_output as USER from spglm.family import Gaussian, Binomial, Poisson from spglm.glm import GLM, GLMResults -from spglm.iwls import iwls,_compute_betas_gwr +from spglm.iwls import iwls, _compute_betas_gwr from spglm.utils import cache_readonly from .diagnostics import get_AIC, get_AICc, get_BIC, corr from .kernels import * from .summary import * fk = {'gaussian': fix_gauss, 'bisquare': fix_bisquare, 'exponential': fix_exp} -ak = {'gaussian': adapt_gauss, 'bisquare': adapt_bisquare, 'exponential': adapt_exp} +ak = {'gaussian': adapt_gauss, 'bisquare': adapt_bisquare, + 'exponential': adapt_exp} + class GWR(GLM): """ @@ -28,182 +30,181 @@ class GWR(GLM): Parameters ---------- - coords : array-like - n*2, collection of n sets of (x,y) coordinates of - observatons; also used as calibration locations is - 'points' is set to None - - y : array - n*1, dependent variable - - X : array - n*k, independent variable, exlcuding the constant - - bw : scalar - bandwidth value consisting of either a distance or N - nearest neighbors; user specified or obtained using - Sel_BW - - family : family object - underlying probability model; provides - distribution-specific calculations - - offset : array - n*1, the offset variable at the ith location. For Poisson model - this term is often the size of the population at risk or - the expected size of the outcome in spatial epidemiology - Default is None where Ni becomes 1.0 for all locations; - only for Poisson models - - sigma2_v1 : boolean - specify form of corrected denominator of sigma squared to use for - model diagnostics; Acceptable options are: - - 'True': n-tr(S) (defualt) - 'False': n-2(tr(S)+tr(S'S)) - - kernel : string - type of kernel function used to weight observations; - available options: - 'gaussian' - 'bisquare' - 'exponential' - - fixed : boolean - True for distance based kernel function and False for - adaptive (nearest neighbor) kernel function (default) - - constant : boolean - True to include intercept (default) in model and False to exclude - intercept. - - dmat : array - n*n, distance matrix between calibration locations used - to compute weight matrix. Defaults to None and is - primarily for avoiding duplicate computation during - bandwidth selection. - - sorted_dmat : array - n*n, sorted distance matrix between calibration locations used - to compute weight matrix. Defaults to None and is - primarily for avoiding duplicate computation during - bandwidth selection. - - spherical : boolean - True for shperical coordinates (long-lat), - False for projected coordinates (defalut). - + coords : array-like + n*2, collection of n sets of (x,y) coordinates of + observatons; also used as calibration locations is + 'points' is set to None + + y : array + n*1, dependent variable + + X : array + n*k, independent variable, exlcuding the constant + + bw : scalar + bandwidth value consisting of either a distance or N + nearest neighbors; user specified or obtained using + Sel_BW + + family : family object + underlying probability model; provides + distribution-specific calculations + + offset : array + n*1, the offset variable at the ith location. For Poisson model + this term is often the size of the population at risk or + the expected size of the outcome in spatial epidemiology + Default is None where Ni becomes 1.0 for all locations; + only for Poisson models + + sigma2_v1 : boolean + specify form of corrected denominator of sigma squared to use for + model diagnostics; Acceptable options are: + + 'True': n-tr(S) (defualt) + 'False': n-2(tr(S)+tr(S'S)) + + kernel : string + type of kernel function used to weight observations; + available options: + 'gaussian' + 'bisquare' + 'exponential' + + fixed : boolean + True for distance based kernel function and False for + adaptive (nearest neighbor) kernel function (default) + + constant : boolean + True to include intercept (default) in model and False to exclude + intercept. + + dmat : array + n*n, distance matrix between calibration locations used + to compute weight matrix. Defaults to None and is + primarily for avoiding duplicate computation during + bandwidth selection. + + sorted_dmat : array + n*n, sorted distance matrix between calibration locations used + to compute weight matrix. Defaults to None and is + primarily for avoiding duplicate computation during + bandwidth selection. + + spherical : boolean + True for shperical coordinates (long-lat), + False for projected coordinates (defalut). + Attributes ---------- - coords : array-like - n*2, collection of n sets of (x,y) coordinates used for - calibration locations - - y : array - n*1, dependent variable - - X : array - n*k, independent variable, exlcuding the constant - - bw : scalar - bandwidth value consisting of either a distance or N - nearest neighbors; user specified or obtained using - Sel_BW - - family : family object - underlying probability model; provides - distribution-specific calculations - - offset : array - n*1, the offset variable at the ith location. For Poisson model - this term is often the size of the population at risk or - the expected size of the outcome in spatial epidemiology - Default is None where Ni becomes 1.0 for all locations - - sigma2_v1 : boolean - specify form of corrected denominator of sigma squared to use for - model diagnostics; Acceptable options are: - - 'True': n-tr(S) (defualt) - 'False': n-2(tr(S)+tr(S'S)) - - kernel : string - type of kernel function used to weight observations; - available options: - 'gaussian' - 'bisquare' - 'exponential' - - fixed : boolean - True for distance based kernel function and False for - adaptive (nearest neighbor) kernel function (default) - - constant : boolean - True to include intercept (default) in model and False to exclude - intercept - - dmat : array - n*n, distance matrix between calibration locations used - to compute weight matrix. Defaults to None and is - primarily for avoiding duplicate computation during - bandwidth selection. - - sorted_dmat : array - n*n, sorted distance matrix between calibration locations used - to compute weight matrix. Defaults to None and is - primarily for avoiding duplicate computation during - bandwidth selection. - - spherical : boolean - True for shperical coordinates (long-lat), - False for projected coordinates (defalut). - - n : integer - number of observations - - k : integer - number of independent variables - - mean_y : float - mean of y - - std_y : float - standard deviation of y - - fit_params : dict - parameters passed into fit method to define estimation - routine - - W : array - n*n, spatial weights matrix for weighting all - observations from each calibration point - points : array-like - n*2, collection of n sets of (x,y) coordinates used for - calibration locations instead of all observations; - defaults to None unles specified in predict method - - P : array - n*k, independent variables used to make prediction; - exlcuding the constant; default to None unless specified - in predict method - - exog_scale : scalar - estimated scale using sampled locations; defualt is None - unless specified in predict method - - exog_resid : array-like - estimated residuals using sampled locations; defualt is None - unless specified in predict method + coords : array-like + n*2, collection of n sets of (x,y) coordinates used for + calibration locations + + y : array + n*1, dependent variable + + X : array + n*k, independent variable, exlcuding the constant + + bw : scalar + bandwidth value consisting of either a distance or N + nearest neighbors; user specified or obtained using + Sel_BW + + family : family object + underlying probability model; provides + distribution-specific calculations + + offset : array + n*1, the offset variable at the ith location. For Poisson model + this term is often the size of the population at risk or + the expected size of the outcome in spatial epidemiology + Default is None where Ni becomes 1.0 for all locations + + sigma2_v1 : boolean + specify form of corrected denominator of sigma squared to use for + model diagnostics; Acceptable options are: + + 'True': n-tr(S) (defualt) + 'False': n-2(tr(S)+tr(S'S)) + + kernel : string + type of kernel function used to weight observations; + available options: + 'gaussian' + 'bisquare' + 'exponential' + + fixed : boolean + True for distance based kernel function and False for + adaptive (nearest neighbor) kernel function (default) + + constant : boolean + True to include intercept (default) in model and False to exclude + intercept + + dmat : array + n*n, distance matrix between calibration locations used + to compute weight matrix. Defaults to None and is + primarily for avoiding duplicate computation during + bandwidth selection. + + sorted_dmat : array + n*n, sorted distance matrix between calibration locations used + to compute weight matrix. Defaults to None and is + primarily for avoiding duplicate computation during + bandwidth selection. + + spherical : boolean + True for shperical coordinates (long-lat), + False for projected coordinates (defalut). + + n : integer + number of observations + + k : integer + number of independent variables + + mean_y : float + mean of y + + std_y : float + standard deviation of y + + fit_params : dict + parameters passed into fit method to define estimation + routine + + W : array + n*n, spatial weights matrix for weighting all + observations from each calibration point + points : array-like + n*2, collection of n sets of (x,y) coordinates used for + calibration locations instead of all observations; + defaults to None unles specified in predict method + + P : array + n*k, independent variables used to make prediction; + exlcuding the constant; default to None unless specified + in predict method + + exog_scale : scalar + estimated scale using sampled locations; defualt is None + unless specified in predict method + + exog_resid : array-like + estimated residuals using sampled locations; defualt is None + unless specified in predict method Examples -------- #basic model calibration - >>> import gwr - >>> import pysal - >>> from gwr.gwr import GWR - >>> data = pysal.open(pysal.examples.get_path('GData_utm.csv')) - >>> coords = zip(data.bycol('X'), data.by_col('Y')) + >>> import libpysal as ps + >>> from mgwr.gwr import GWR + >>> data = ps.io.open(ps.examples.get_path('GData_utm.csv')) + >>> coords = list(zip(data.by_col('X'), data.by_col('Y'))) >>> y = np.array(data.by_col('PctBach')).reshape((-1,1)) >>> rural = np.array(data.by_col('PctRural')).reshape((-1,1)) >>> pov = np.array(data.by_col('PctPov')).reshape((-1,1)) @@ -211,24 +212,25 @@ class GWR(GLM): >>> X = np.hstack([rural, pov, african_amer]) >>> model = GWR(coords, y, X, bw=90.000, fixed=False, kernel='bisquare') >>> results = model.fit() - >>> print results.params.shape + >>> print(results.params.shape) (159, 4) #predict at unsampled locations - - >>> index = np.arange(len(self.y)) + + >>> index = np.arange(len(y)) >>> test = index[-10:] >>> X_test = X[test] - >>> coords_test = list(coords[test]) + >>> coords_test = np.array(coords)[test] >>> model = GWR(coords, y, X, bw=94, fixed=False, kernel='bisquare') >>> results = model.predict(coords_test, X_test) - >>> print results.params.shape + >>> print(results.params.shape) (10, 4) """ + def __init__(self, coords, y, X, bw, family=Gaussian(), offset=None, - sigma2_v1=True, kernel='bisquare', fixed=False, constant=True, - dmat=None, sorted_dmat=None, spherical=False): + sigma2_v1=True, kernel='bisquare', fixed=False, constant=True, + dmat=None, sorted_dmat=None, spherical=False): """ Initialize class """ @@ -240,11 +242,11 @@ def __init__(self, coords, y, X, bw, family=Gaussian(), offset=None, self.kernel = kernel self.fixed = fixed if offset is None: - self.offset = np.ones((self.n, 1)) + self.offset = np.ones((self.n, 1)) else: self.offset = offset * 1.0 self.fit_params = {} - + self.points = None self.exog_scale = None self.exog_resid = None @@ -257,19 +259,23 @@ def __init__(self, coords, y, X, bw, family=Gaussian(), offset=None, def _build_W(self, fixed, kernel, coords, bw, points=None): if fixed: try: - W = fk[kernel](coords, bw, points, self.dmat, self.sorted_dmat,spherical=self.spherical) - except: - raise #TypeError('Unsupported kernel function ', kernel) + W = fk[kernel](coords, bw, points, self.dmat, + self.sorted_dmat, + spherical=self.spherical) + except BaseException: + raise # TypeError('Unsupported kernel function ', kernel) else: try: - W = ak[kernel](coords, bw, points, self.dmat, self.sorted_dmat,spherical=self.spherical) - except: - raise #TypeError('Unsupported kernel function ', kernel) + W = ak[kernel](coords, bw, points, self.dmat, + self.sorted_dmat, + spherical=self.spherical) + except BaseException: + raise # TypeError('Unsupported kernel function ', kernel) return W - - - def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls',searching = False): + + def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, + solve='iwls',searching = False): """ Method that fits a model with a particular estimation routine. @@ -292,28 +298,32 @@ def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls',searching = self.fit_params['ini_params'] = ini_params self.fit_params['tol'] = tol self.fit_params['max_iter'] = max_iter - self.fit_params['solve']= solve + self.fit_params['solve'] = solve if solve.lower() == 'iwls': m = self.W.shape[0] - - #In bandwidth selection, return GWRResultsLite + + # In bandwidth selection, return GWRResultsLite if searching: resid = np.zeros((m, 1)) - influ = np.zeros((m,1)) + influ = np.zeros((m, 1)) for i in range(m): - wi = self.W[i].reshape((-1,1)) + wi = self.W[i].reshape((-1, 1)) if isinstance(self.family, Gaussian): - betas, inv_xtx_xt = _compute_betas_gwr(self.y,self.X,wi) - influ[i] = np.dot(self.X[i],inv_xtx_xt[:,i]) - predy = np.dot(self.X[i],betas)[0] + betas, inv_xtx_xt = _compute_betas_gwr( + self.y, self.X, wi) + influ[i] = np.dot(self.X[i], inv_xtx_xt[:, i]) + predy = np.dot(self.X[i], betas)[0] resid[i] = self.y[i] - predy elif isinstance(self.family, (Poisson, Binomial)): - rslt = iwls(self.y, self.X, self.family, self.offset, None, ini_params, tol, max_iter, wi=wi) + rslt = iwls(self.y, self.X, self.family, + self.offset, None, ini_params, tol, + max_iter, wi=wi) inv_xtx_xt = rslt[5] - influ[i] = np.dot(self.X[i],inv_xtx_xt[:,i])*rslt[3][i][0] + influ[i] = np.dot(self.X[i], inv_xtx_xt[:, i]) * \ + rslt[3][i][0] predy = rslt[1][i] resid[i] = self.y[i] - predy - return GWRResultsLite(self,resid,influ) + return GWRResultsLite(self, resid, influ) else: params = np.zeros((m, self.k)) @@ -322,22 +332,23 @@ def fit(self, ini_params=None, tol=1.0e-5, max_iter=20, solve='iwls',searching = S = np.zeros((m, self.n)) CCT = np.zeros((m, self.k)) for i in range(m): - wi = self.W[i].reshape((-1,1)) - rslt = iwls(self.y, self.X, self.family, self.offset, None, ini_params, tol, max_iter, wi=wi) - params[i,:] = rslt[0].T + wi = self.W[i].reshape((-1, 1)) + rslt = iwls(self.y, self.X, self.family, + self.offset, None, ini_params, tol, + max_iter, wi=wi) + params[i, :] = rslt[0].T predy[i] = rslt[1][i] w[i] = rslt[3][i] - S[i] = np.dot(self.X[i],rslt[5]) - #dont need unless f is explicitly passed for - #prediction of non-sampled points + S[i] = np.dot(self.X[i], rslt[5]) + # dont need unless f is explicitly passed for + # prediction of non-sampled points #cf = rslt[5] - np.dot(rslt[5], f) #CCT[i] = np.diag(np.dot(cf, cf.T/rslt[3])) CCT[i] = np.diag(np.dot(rslt[5], rslt[5].T)) return GWRResults(self, params, predy, S, CCT, w) - - - def predict(self, points, P, exog_scale=None, exog_resid=None, fit_params={}): + def predict(self, points, P, exog_scale=None, exog_resid=None, + fit_params={}): """ Method that predicts values of the dependent variable at un-sampled locations @@ -358,7 +369,7 @@ def predict(self, points, P, exog_scale=None, exog_resid=None, fit_params={}): which estimates a model using points from "coords"; if given it must be n*1 where n is the length of coords fit_params : dict - key-value pairs of parameters that will be passed into fit + key-value pairs of parameters that will be passed into fit method to define estimation routine; see fit method for more details """ @@ -371,14 +382,19 @@ def predict(self, points, P, exog_scale=None, exog_resid=None, fit_params={}): self.exog_resid = exog_resid else: raise InputError('exog_scale and exog_resid must both either be' - 'None or specified') + 'None or specified') self.points = points if self.constant: - P = np.hstack([np.ones((len(P),1)), P]) + P = np.hstack([np.ones((len(P), 1)), P]) self.P = P else: self.P = P - self.W = self._build_W(self.fixed, self.kernel, self.coords, self.bw, points) + self.W = self._build_W( + self.fixed, + self.kernel, + self.coords, + self.bw, + points) gwr = self.fit(**fit_params) return gwr @@ -391,186 +407,188 @@ def df_model(self): def df_resid(self): return None + class GWRResults(GLMResults): """ Basic class including common properties for all GWR regression models Parameters ---------- - model : GWR object - pointer to GWR object with estimation parameters + model : GWR object + pointer to GWR object with estimation parameters + + params : array + n*k, estimated coefficients - params : array - n*k, estimated coefficients + predy : array + n*1, predicted y values - predy : array - n*1, predicted y values + S : array + n*n, hat matrix - S : array - n*n, hat matrix + CCT : array + n*k, scaled variance-covariance matrix - CCT : array - n*k, scaled variance-covariance matrix - - w : array - n*1, final weight used for iteratively re-weighted least - sqaures; default is None + w : array + n*1, final weight used for iteratively re-weighted least + sqaures; default is None Attributes ---------- - model : GWR Object - points to GWR object for which parameters have been - estimated + model : GWR Object + points to GWR object for which parameters have been + estimated - params : array - n*k, parameter estimates + params : array + n*k, parameter estimates - predy : array - n*1, predicted value of y + predy : array + n*1, predicted value of y - y : array - n*1, dependent variable + y : array + n*1, dependent variable - X : array - n*k, independent variable, including constant + X : array + n*k, independent variable, including constant - family : family object - underlying probability model; provides - distribution-specific calculations + family : family object + underlying probability model; provides + distribution-specific calculations - n : integer - number of observations + n : integer + number of observations - k : integer - number of independent variables + k : integer + number of independent variables - df_model : integer - model degrees of freedom + df_model : integer + model degrees of freedom - df_resid : integer - residual degrees of freedom + df_resid : integer + residual degrees of freedom - offset : array - n*1, the offset variable at the ith location. - For Poisson model this term is often the size of - the population at risk or the expected size of - the outcome in spatial epidemiology; Default is - None where Ni becomes 1.0 for all locations + offset : array + n*1, the offset variable at the ith location. + For Poisson model this term is often the size of + the population at risk or the expected size of + the outcome in spatial epidemiology; Default is + None where Ni becomes 1.0 for all locations - scale : float - sigma squared used for subsequent computations + scale : float + sigma squared used for subsequent computations + + w : array + n*1, final weights from iteratively re-weighted least + sqaures routine + + resid_response : array + n*1, residuals of the repsonse + + resid_ss : scalar + residual sum of sqaures - w : array - n*1, final weights from iteratively re-weighted least - sqaures routine + W : array + n*n; spatial weights for each observation from each + calibration point - resid_response : array - n*1, residuals of the repsonse + S : array + n*n, hat matrix - resid_ss : scalar - residual sum of sqaures + CCT : array + n*k, scaled variance-covariance matrix - W : array - n*n; spatial weights for each observation from each - calibration point + ENP : scalar + effective number of paramters, which depends on + sigma2 - S : array - n*n, hat matrix + tr_S : float + trace of S (hat) matrix - CCT : array - n*k, scaled variance-covariance matrix - - ENP : scalar - effective number of paramters, which depends on - sigma2 + tr_STS : float + trace of STS matrix - tr_S : float - trace of S (hat) matrix + y_bar : array + n*1, weighted mean value of y - tr_STS : float - trace of STS matrix + TSS : array + n*1, geographically weighted total sum of squares - y_bar : array - n*1, weighted mean value of y + RSS : array + n*1, geographically weighted residual sum of squares - TSS : array - n*1, geographically weighted total sum of squares + R2 : float + R-squared for the entire model (1- RSS/TSS) - RSS : array - n*1, geographically weighted residual sum of squares - - R2 : float - R-squared for the entire model (1- RSS/TSS) - - aic : float - Akaike information criterion + aic : float + Akaike information criterion - aicc : float - corrected Akaike information criterion to account - to account for model complexity (smaller - bandwidths) + aicc : float + corrected Akaike information criterion to account + to account for model complexity (smaller + bandwidths) - bic : float - Bayesian information criterio + bic : float + Bayesian information criterio - localR2 : array - n*1, local R square + localR2 : array + n*1, local R square - sigma2 : float - sigma squared (residual variance) that has been - corrected to account for the ENP + sigma2 : float + sigma squared (residual variance) that has been + corrected to account for the ENP - std_res : array - n*1, standardised residuals + std_res : array + n*1, standardised residuals - bse : array - n*k, standard errors of parameters (betas) + bse : array + n*k, standard errors of parameters (betas) - influ : array - n*1, leading diagonal of S matrix + influ : array + n*1, leading diagonal of S matrix - CooksD : array - n*1, Cook's D + CooksD : array + n*1, Cook's D - tvalues : array - n*k, local t-statistics + tvalues : array + n*k, local t-statistics - adj_alpha : array - 3*1, corrected alpha values to account for multiple - hypothesis testing for the 90%, 95%, and 99% confidence - levels; tvalues with an absolute value larger than the - corrected alpha are considered statistically - significant. + adj_alpha : array + 3*1, corrected alpha values to account for multiple + hypothesis testing for the 90%, 95%, and 99% confidence + levels; tvalues with an absolute value larger than the + corrected alpha are considered statistically + significant. - deviance : array - n*1, local model deviance for each calibration point + deviance : array + n*1, local model deviance for each calibration point - resid_deviance : array - n*1, local sum of residual deviance for each - calibration point + resid_deviance : array + n*1, local sum of residual deviance for each + calibration point - llf : scalar - log-likelihood of the full model; see - pysal.contrib.glm.family for damily-sepcific - log-likelihoods + llf : scalar + log-likelihood of the full model; see + pysal.contrib.glm.family for damily-sepcific + log-likelihoods - pDev : float - local percent of deviation accounted for; analogous to - r-squared for GLM's + pDev : float + local percent of deviation accounted for; analogous to + r-squared for GLM's - mu : array - n*, flat one dimensional array of predicted mean - response value from estimator + mu : array + n*, flat one dimensional array of predicted mean + response value from estimator - fit_params : dict - parameters passed into fit method to define estimation - routine + fit_params : dict + parameters passed into fit method to define estimation + routine - predictions : array - p*1, predicted values generated by calling the GWR - predict method to predict dependent variable at - unsampled points () + predictions : array + p*1, predicted values generated by calling the GWR + predict method to predict dependent variable at + unsampled points () """ + def __init__(self, model, params, predy, S, CCT, w=None): GLMResults.__init__(self, model, params, predy, w) self.W = model.W @@ -612,23 +630,23 @@ def cov_params(self, cov, exog_scale=None): """ if exog_scale is not None: - return cov*exog_scale + return cov * exog_scale else: - return cov*self.scale + return cov * self.scale @cache_readonly def tr_S(self): """ trace of S (hat) matrix """ - return np.trace(self.S*self.w) + return np.trace(self.S * self.w) @cache_readonly def tr_STS(self): """ trace of STS matrix """ - return np.trace(np.dot(self.S.T*self.w,self.S*self.w)) + return np.trace(np.dot(self.S.T * self.w, self.S * self.w)) @cache_readonly def ENP(self): @@ -645,23 +663,23 @@ def ENP(self): if self.model.sigma2_v1: return self.tr_S else: - return 2*self.tr_S - self.tr_STS - + return 2 * self.tr_S - self.tr_STS + @cache_readonly def y_bar(self): """ weighted mean of y """ if self.model.points is not None: - n = len(self.model.points) + n = len(self.model.points) else: n = self.n - off = self.offset.reshape((-1,1)) - arr_ybar = np.zeros(shape=(self.n,1)) + off = self.offset.reshape((-1, 1)) + arr_ybar = np.zeros(shape=(self.n, 1)) for i in range(n): - w_i= np.reshape(np.array(self.W[i]), (-1, 1)) - sum_yw = np.sum(self.y.reshape((-1,1)) * w_i) - arr_ybar[i] = 1.0 * sum_yw / np.sum(w_i*off) + w_i = np.reshape(np.array(self.W[i]), (-1, 1)) + sum_yw = np.sum(self.y.reshape((-1, 1)) * w_i) + arr_ybar[i] = 1.0 * sum_yw / np.sum(w_i * off) return arr_ybar @cache_readonly @@ -676,13 +694,13 @@ def TSS(self): """ if self.model.points is not None: - n = len(self.model.points) + n = len(self.model.points) else: n = self.n - TSS = np.zeros(shape=(n,1)) + TSS = np.zeros(shape=(n, 1)) for i in range(n): - TSS[i] = np.sum(np.reshape(np.array(self.W[i]), (-1,1)) * - (self.y.reshape((-1,1)) - self.y_bar[i])**2) + TSS[i] = np.sum(np.reshape(np.array(self.W[i]), (-1, 1)) * + (self.y.reshape((-1, 1)) - self.y_bar[i])**2) return TSS @cache_readonly @@ -696,15 +714,15 @@ def RSS(self): relationships. """ if self.model.points is not None: - n = len(self.model.points) - resid = self.model.exog_resid.reshape((-1,1)) + n = len(self.model.points) + resid = self.model.exog_resid.reshape((-1, 1)) else: n = self.n - resid = self.resid_response.reshape((-1,1)) - RSS = np.zeros(shape=(n,1)) + resid = self.resid_response.reshape((-1, 1)) + RSS = np.zeros(shape=(n, 1)) for i in range(n): - RSS[i] = np.sum(np.reshape(np.array(self.W[i]), (-1,1)) - * resid**2) + RSS[i] = np.sum(np.reshape(np.array(self.W[i]), (-1, 1)) + * resid**2) return RSS @cache_readonly @@ -718,7 +736,7 @@ def localR2(self): relationships. """ if isinstance(self.family, Gaussian): - return (self.TSS - self.RSS)/self.TSS + return (self.TSS - self.RSS) / self.TSS else: raise NotImplementedError('Only applicable to Gaussian') @@ -745,10 +763,10 @@ def sigma2(self): """ if self.model.sigma2_v1: - return (self.resid_ss / (self.n-self.tr_S)) + return (self.resid_ss / (self.n - self.tr_S)) else: - return self.resid_ss / (self.n - 2.0*self.tr_S + - self.tr_STS) #could be changed to SWSTW - nothing to test against + # could be changed to SWSTW - nothing to test against + return self.resid_ss / (self.n - 2.0 * self.tr_S + self.tr_STS) @cache_readonly def std_res(self): @@ -760,7 +778,8 @@ def std_res(self): Geographically weighted regression: the analysis of spatially varying relationships. """ - return self.resid_response.reshape((-1,1))/(np.sqrt(self.scale * (1.0 - self.influ))) + return self.resid_response.reshape( + (-1, 1)) / (np.sqrt(self.scale * (1.0 - self.influ))) @cache_readonly def bse(self): @@ -779,7 +798,7 @@ def influ(self): """ Influence: leading diagonal of S Matrix """ - return np.reshape(np.diag(self.S),(-1,1)) + return np.reshape(np.diag(self.S), (-1, 1)) @cache_readonly def cooksD(self): @@ -792,34 +811,37 @@ def cooksD(self): relationships. Note: in (9.11), p should be tr(S), that is, the effective number of parameters """ - return self.std_res**2 * self.influ / (self.tr_S * (1.0-self.influ)) + return self.std_res**2 * self.influ / (self.tr_S * (1.0 - self.influ)) @cache_readonly def deviance(self): - off = self.offset.reshape((-1,1)).T + off = self.offset.reshape((-1, 1)).T y = self.y ybar = self.y_bar if isinstance(self.family, Gaussian): - raise NotImplementedError('deviance not currently used for Gaussian') + raise NotImplementedError( + 'deviance not currently used for Gaussian') elif isinstance(self.family, Poisson): - dev = np.sum(2.0*self.W*(y*np.log(y/(ybar*off))-(y-ybar*off)),axis=1) + dev = np.sum( + 2.0 * self.W * (y * np.log(y / (ybar * off)) - (y - ybar * off)), axis=1) elif isinstance(self.family, Binomial): dev = self.family.deviance(self.y, self.y_bar, self.W, axis=1) - return dev.reshape((-1,1)) + return dev.reshape((-1, 1)) @cache_readonly def resid_deviance(self): if isinstance(self.family, Gaussian): - raise NotImplementedError('deviance not currently used for Gaussian') + raise NotImplementedError( + 'deviance not currently used for Gaussian') else: - off = self.offset.reshape((-1,1)).T + off = self.offset.reshape((-1, 1)).T y = self.y ybar = self.y_bar global_dev_res = ((self.family.resid_dev(self.y, self.mu))**2) - dev_res = np.repeat(global_dev_res.flatten(),self.n) + dev_res = np.repeat(global_dev_res.flatten(), self.n) dev_res = dev_res.reshape((self.n, self.n)) dev_res = np.sum(dev_res * self.W.T, axis=0) - return dev_res.reshape((-1,1)) + return dev_res.reshape((-1, 1)) @cache_readonly def pDev(self): @@ -828,9 +850,9 @@ def pDev(self): manual. Equivalent to 1 - (deviance/null deviance) """ if isinstance(self.family, Gaussian): - raise NotImplementedError('Not implemented for Gaussian') + raise NotImplementedError('Not implemented for Gaussian') else: - return 1.0 - (self.resid_deviance/self.deviance) + return 1.0 - (self.resid_deviance / self.deviance) @cache_readonly def adj_alpha(self): @@ -846,7 +868,7 @@ def adj_alpha(self): alpha = np.array([.1, .05, .001]) pe = self.ENP p = self.k - return (alpha*p)/pe + return (alpha * p) / pe def critical_tval(self, alpha=None): """ @@ -869,13 +891,13 @@ def critical_tval(self, alpha=None): """ n = self.n if alpha is not None: - alpha = np.abs(alpha)/2.0 - critical = t.ppf(1-alpha, n-1) + alpha = np.abs(alpha) / 2.0 + critical = t.ppf(1 - alpha, n - 1) else: - alpha = np.abs(self.adj_alpha[1])/2.0 - critical = t.ppf(1-alpha, n-1) + alpha = np.abs(self.adj_alpha[1]) / 2.0 + critical = t.ppf(1 - alpha, n - 1) return critical - + def filter_tvals(self, critical_t=None, alpha=None): """ Utility function to set tvalues with an absolute value smaller than the @@ -885,14 +907,14 @@ def filter_tvals(self, critical_t=None, alpha=None): are critical_t nor alpha are provided, an adjusted alpha at the 95 percent CI will automatically be used to define the critical t-value and used to filter. If both critical_t and alpha are supplied then the alpha - value will be ignored. + value will be ignored. Parameters ---------- critical : scalar critical t-value to determine whether parameters are statistically significant - + alpha : scalar alpha value to determine which tvalues are associated with statistically significant parameter @@ -907,11 +929,11 @@ def filter_tvals(self, critical_t=None, alpha=None): """ n = self.n if critical_t is not None: - critical = critical_t + critical = critical_t else: critical = self.critical_tval(alpha=alpha) - - subset = (self.tvalues < critical) & (self.tvalues > -1.0*critical) + + subset = (self.tvalues < critical) & (self.tvalues > -1.0 * critical) tvalues = self.tvalues.copy() tvalues[subset] = 0 return tvalues @@ -922,7 +944,7 @@ def df_model(self): @cache_readonly def df_resid(self): - return self.n - 2.0*self.tr_S + self.tr_STS + return self.n - 2.0 * self.tr_S + self.tr_STS @cache_readonly def normalized_cov_params(self): @@ -955,13 +977,14 @@ def llnull(self): @cache_readonly def null_deviance(self): return None - + @cache_readonly def R2(self): if isinstance(self.family, Gaussian): - TSS = np.sum((self.y.reshape((-1,1)) - np.mean(self.y.reshape((-1,1))))**2) - RSS = np.sum((self.y.reshape((-1,1)) - - self.predy.reshape((-1,1)))**2) + TSS = np.sum((self.y.reshape((-1, 1)) - + np.mean(self.y.reshape((-1, 1))))**2) + RSS = np.sum((self.y.reshape((-1, 1)) - + self.predy.reshape((-1, 1)))**2) return 1 - (RSS / TSS) else: raise NotImplementedError('Only available for Gaussian GWR') @@ -1005,37 +1028,37 @@ def conf_int(self): @cache_readonly def use_t(self): return None - + def local_collinearity(self): """ Computes several indicators of multicollinearity within a geographically weighted design matrix, including: - + local correlation coefficients (n, ((p**2) + p) / 2) local variance inflation factors (VIF) (n, p-1) local condition number (n, 1) - local variance-decomposition proportions (n, p) - + local variance-decomposition proportions (n, p) + Returns four arrays with the order and dimensions listed above where n is the number of locations used as calibrations points and p is the nubmer of explanatory variables. Local correlation coefficient and local - VIF are not calculated for constant term. + VIF are not calculated for constant term. """ x = self.X - w = self.W + w = self.W nvar = x.shape[1] nrow = len(w) if self.model.constant: - ncor = (((nvar-1)**2 + (nvar-1)) / 2) - (nvar-1) + ncor = (((nvar - 1)**2 + (nvar - 1)) / 2) - (nvar - 1) jk = list(combo(range(1, nvar), 2)) else: ncor = (((nvar)**2 + (nvar)) / 2) - nvar jk = list(combo(range(nvar), 2)) corr_mat = np.ndarray((nrow, int(ncor))) if self.model.constant: - vifs_mat = np.ndarray((nrow, nvar-1)) - else: + vifs_mat = np.ndarray((nrow, nvar - 1)) + else: vifs_mat = np.ndarray((nrow, nvar)) vdp_idx = np.ndarray((nrow, nvar)) vdp_pi = np.ndarray((nrow, nvar, nvar)) @@ -1043,36 +1066,39 @@ def local_collinearity(self): for i in range(nrow): wi = w[i] sw = np.sum(wi) - wi = wi/sw + wi = wi / sw tag = 0 - + for j, k in jk: - corr_mat[i, tag] = corr(np.cov(x[:,j], x[:, k], aweights=wi))[0][1] + corr_mat[i, tag] = corr( + np.cov(x[:, j], x[:, k], aweights=wi))[0][1] tag = tag + 1 - + if self.model.constant: - corr_mati = corr(np.cov(x[:,1:].T, aweights=wi)) - vifs_mat[i,] = np.diag(np.linalg.solve(corr_mati, np.identity((nvar-1)))) + corr_mati = corr(np.cov(x[:, 1:].T, aweights=wi)) + vifs_mat[i, ] = np.diag(np.linalg.solve( + corr_mati, np.identity((nvar - 1)))) else: corr_mati = corr(np.cov(x.T, aweights=wi)) - vifs_mat[i,] = np.diag(np.linalg.solve(corr_mati, np.identity((nvar)))) - - xw = x * wi.reshape((nrow,1)) + vifs_mat[i, ] = np.diag(np.linalg.solve( + corr_mati, np.identity((nvar)))) + + xw = x * wi.reshape((nrow, 1)) sxw = np.sqrt(np.sum(xw**2, axis=0)) - sxw = np.transpose(xw.T / sxw.reshape((nvar,1))) - svdx = np.linalg.svd(sxw) - vdp_idx[i,] = svdx[1][0]/svdx[1] - phi = np.dot(svdx[2].T, np.diag(1/svdx[1])) + sxw = np.transpose(xw.T / sxw.reshape((nvar, 1))) + svdx = np.linalg.svd(sxw) + vdp_idx[i, ] = svdx[1][0] / svdx[1] + phi = np.dot(svdx[2].T, np.diag(1 / svdx[1])) phi = np.transpose(phi**2) pi_ij = phi / np.sum(phi, axis=0) - vdp_pi[i,:,:] = pi_ij - - local_CN = vdp_idx[:, nvar-1].reshape((-1,1)) - VDP = vdp_pi[:,nvar-1,:] - + vdp_pi[i, :, :] = pi_ij + + local_CN = vdp_idx[:, nvar - 1].reshape((-1, 1)) + VDP = vdp_pi[:, nvar - 1, :] + return corr_mat, vifs_mat, local_CN, VDP - + def spatial_variability(self, selector, n_iters=1000, seed=None): """ Method to compute a Monte Carlo test of spatial variability for each @@ -1086,24 +1112,24 @@ def spatial_variability(self, selector, n_iters=1000, seed=None): should be the sel_bw object used to select a bandwidth for the gwr model that produced the surfaces that are being tested for spatial variation - + n_iters : int the number of Monte Carlo iterations to include for the tests of spatial variability. - seed : int + seed : int optional parameter to select a custom seed to ensure stochastic results are replicable. Default is none which automatically sets the seed to 5536 - Returns - ------- + Returns + ------- - p values : list + p values : list a list of psuedo p-values that correspond to the model parameter surfaces. Allows us to assess the probability of obtaining the observed spatial - variation of a given surface by random chance. + variation of a given surface by random chance. """ @@ -1111,7 +1137,7 @@ def spatial_variability(self, selector, n_iters=1000, seed=None): temp_gwr = copy.deepcopy(self.model) if seed is None: - np.random.seed(5536) + np.random.seed(5536) else: np.random.seed(seed) @@ -1120,27 +1146,26 @@ def spatial_variability(self, selector, n_iters=1000, seed=None): kernel = temp_gwr.kernel fixed = temp_gwr.fixed - if self.model.constant: - X = self.X[:,1:] + X = self.X[:, 1:] else: X = self.X - init_sd = np.std(self.params, axis=0) + init_sd = np.std(self.params, axis=0) SDs = [] - + for x in range(n_iters): temp_coords = np.random.permutation(self.model.coords) temp_sel.coords = temp_coords temp_sel._build_dMat() temp_bw = temp_sel.search(**search_params) - + temp_gwr.W = temp_gwr._build_W(fixed, kernel, temp_coords, temp_bw) temp_params = temp_gwr.fit(**fit_params).params - + temp_sd = np.std(temp_params, axis=0) SDs.append(temp_sd) - + p_vals = (np.sum(np.array(SDs) > init_sd, axis=0) / float(n_iters)) return p_vals @@ -1148,10 +1173,10 @@ def spatial_variability(self, selector, n_iters=1000, seed=None): def predictions(self): P = self.model.P if P is None: - raise TypeError('predictions only avaialble if predict' - 'method is previously called on GWR model') + raise TypeError('predictions only avaialble if predict' + 'method is previously called on GWR model') else: - predictions = np.sum(P*self.params, axis=1).reshape((-1,1)) + predictions = np.sum(P * self.params, axis=1).reshape((-1, 1)) return predictions def summary(self): @@ -1163,37 +1188,36 @@ def summary(self): return - class GWRResultsLite(object): """ Lightweight GWR that computes the minimum diagnostics needed for bandwidth selection - + Parameters ---------- model : GWR object pointer to GWR object with estimation parameters - + resid : array n*1, residuals of the repsonse - + influ : array n*1, leading diagonal of S matrix - + Attributes ---------- tr_S : float trace of S (hat) matrix - + llf : scalar log-likelihood of the full model; see pysal.contrib.glm.family for damily-sepcific log-likelihoods - + mu : array n*, flat one dimensional array of predicted mean response value from estimator - + resid_ss : scalar residual sum of sqaures @@ -1206,15 +1230,15 @@ def __init__(self, model, resid, influ): self.influ = influ self.resid_response = resid self.model = model - + @cache_readonly def tr_S(self): return np.sum(self.influ) - + @cache_readonly def llf(self): - return self.family.loglike(self.y,self.mu) - + return self.family.loglike(self.y, self.mu) + @cache_readonly def mu(self): return self.y - self.resid_response @@ -1223,191 +1247,196 @@ def mu(self): def resid_ss(self): u = self.resid_response.flatten() return np.dot(u, u.T) - + + class MGWR(GWR): """ Parameters ---------- - coords : array-like - n*2, collection of n sets of (x,y) coordinates of - observatons; also used as calibration locations is - 'points' is set to None - - y : array - n*1, dependent variable - - X : array - n*k, independent variable, exlcuding the constant - - selector : sel_bw object - valid sel_bw object that has successfully called - the "search" method. This parameter passes on - information from GAM model estimation including optimal - bandwidths. - - family : family object - underlying probability model; provides - distribution-specific calculations - - sigma2_v1 : boolean - specify form of corrected denominator of sigma squared to use for - model diagnostics; Acceptable options are: - - 'True': n-tr(S) (defualt) - 'False': n-2(tr(S)+tr(S'S)) - - kernel : string - type of kernel function used to weight observations; - available options: - 'gaussian' - 'bisquare' - 'exponential' - - fixed : boolean - True for distance based kernel function and False for - adaptive (nearest neighbor) kernel function (default) - - constant : boolean - True to include intercept (default) in model and False to exclude - intercept. - - dmat : array - n*n, distance matrix between calibration locations used - to compute weight matrix. Defaults to None and is - primarily for avoiding duplicate computation during - bandwidth selection. - - sorted_dmat : array - n*n, sorted distance matrix between calibration locations used - to compute weight matrix. Defaults to None and is - primarily for avoiding duplicate computation during - bandwidth selection. - spherical : boolean - True for shperical coordinates (long-lat), - False for projected coordinates (defalut). + coords : array-like + n*2, collection of n sets of (x,y) coordinates of + observatons; also used as calibration locations is + 'points' is set to None + + y : array + n*1, dependent variable + + X : array + n*k, independent variable, exlcuding the constant + + selector : sel_bw object + valid sel_bw object that has successfully called + the "search" method. This parameter passes on + information from GAM model estimation including optimal + bandwidths. + + family : family object + underlying probability model; provides + distribution-specific calculations + + sigma2_v1 : boolean + specify form of corrected denominator of sigma squared to use for + model diagnostics; Acceptable options are: + + 'True': n-tr(S) (defualt) + 'False': n-2(tr(S)+tr(S'S)) + + kernel : string + type of kernel function used to weight observations; + available options: + 'gaussian' + 'bisquare' + 'exponential' + + fixed : boolean + True for distance based kernel function and False for + adaptive (nearest neighbor) kernel function (default) + + constant : boolean + True to include intercept (default) in model and False to exclude + intercept. + + dmat : array + n*n, distance matrix between calibration locations used + to compute weight matrix. Defaults to None and is + primarily for avoiding duplicate computation during + bandwidth selection. + + sorted_dmat : array + n*n, sorted distance matrix between calibration locations used + to compute weight matrix. Defaults to None and is + primarily for avoiding duplicate computation during + bandwidth selection. + spherical : boolean + True for shperical coordinates (long-lat), + False for projected coordinates (defalut). Attributes ---------- - coords : array-like - n*2, collection of n sets of (x,y) coordinates of - observatons; also used as calibration locations is - 'points' is set to None - - y : array - n*1, dependent variable - - X : array - n*k, independent variable, exlcuding the constant - - selector : sel_bw object - valid sel_bw object that has successfully called - the "search" method. This parameter passes on - information from GAM model estimation including optimal - bandwidths. - - bw : array-like - collection of bandwidth values consisting of either a distance or N - nearest neighbors; user specified or obtained using - Sel_BW with fb=True. Order of values should the same as - the order of columns associated with X - - family : family object - underlying probability model; provides - distribution-specific calculations - - sigma2_v1 : boolean - specify form of corrected denominator of sigma squared to use for - model diagnostics; Acceptable options are: - - 'True': n-tr(S) (defualt) - 'False': n-2(tr(S)+tr(S'S)) - - kernel : string - type of kernel function used to weight observations; - available options: - 'gaussian' - 'bisquare' - 'exponential' - - fixed : boolean - True for distance based kernel function and False for - adaptive (nearest neighbor) kernel function (default) - - constant : boolean - True to include intercept (default) in model and False to exclude - intercept. - - dmat : array - n*n, distance matrix between calibration locations used - to compute weight matrix. Defaults to None and is - primarily for avoiding duplicate computation during - bandwidth selection. - - sorted_dmat : array - n*n, sorted distance matrix between calibration locations used - to compute weight matrix. Defaults to None and is - primarily for avoiding duplicate computation during - bandwidth selection. - - spherical : boolean - True for shperical coordinates (long-lat), - False for projected coordinates (defalut). - - n : integer - number of observations - - k : integer - number of independent variables - - mean_y : float - mean of y - - std_y : float - standard deviation of y - - fit_params : dict - parameters passed into fit method to define estimation - routine - - W : array-like - list of n*n arrays, spatial weights matrices for weighting all - observations from each calibration point: one for each - covariate (k) + coords : array-like + n*2, collection of n sets of (x,y) coordinates of + observatons; also used as calibration locations is + 'points' is set to None + + y : array + n*1, dependent variable + + X : array + n*k, independent variable, exlcuding the constant + + selector : sel_bw object + valid sel_bw object that has successfully called + the "search" method. This parameter passes on + information from GAM model estimation including optimal + bandwidths. + + bw : array-like + collection of bandwidth values consisting of either a distance or N + nearest neighbors; user specified or obtained using + Sel_BW with fb=True. Order of values should the same as + the order of columns associated with X + + family : family object + underlying probability model; provides + distribution-specific calculations + + sigma2_v1 : boolean + specify form of corrected denominator of sigma squared to use for + model diagnostics; Acceptable options are: + + 'True': n-tr(S) (defualt) + 'False': n-2(tr(S)+tr(S'S)) + + kernel : string + type of kernel function used to weight observations; + available options: + 'gaussian' + 'bisquare' + 'exponential' + + fixed : boolean + True for distance based kernel function and False for + adaptive (nearest neighbor) kernel function (default) + + constant : boolean + True to include intercept (default) in model and False to exclude + intercept. + + dmat : array + n*n, distance matrix between calibration locations used + to compute weight matrix. Defaults to None and is + primarily for avoiding duplicate computation during + bandwidth selection. + + sorted_dmat : array + n*n, sorted distance matrix between calibration locations used + to compute weight matrix. Defaults to None and is + primarily for avoiding duplicate computation during + bandwidth selection. + + spherical : boolean + True for shperical coordinates (long-lat), + False for projected coordinates (defalut). + + n : integer + number of observations + + k : integer + number of independent variables + + mean_y : float + mean of y + + std_y : float + standard deviation of y + + fit_params : dict + parameters passed into fit method to define estimation + routine + + W : array-like + list of n*n arrays, spatial weights matrices for weighting all + observations from each calibration point: one for each + covariate (k) Examples -------- #basic model calibration - - >>> import gwr - >>> import pysal - >>> from gwr.gwr import MGWR - >>> from gwr.sel_bw import Sel_BW - >>> data = pysal.open(pysal.examples.get_path('GData_utm.csv')) - >>> coords = zip(data.bycol('X'), data.by_col('Y')) + >>> import libpysal as ps + >>> from mgwr.gwr import MGWR + >>> from mgwr.sel_bw import Sel_BW + >>> data = ps.io.open(ps.examples.get_path('GData_utm.csv')) + >>> coords = list(zip(data.by_col('X'), data.by_col('Y'))) >>> y = np.array(data.by_col('PctBach')).reshape((-1,1)) >>> rural = np.array(data.by_col('PctRural')).reshape((-1,1)) >>> fb = np.array(data.by_col('PctFB')).reshape((-1,1)) >>> african_amer = np.array(data.by_col('PctBlack')).reshape((-1,1)) >>> X = np.hstack([fb, african_amer, rural]) + >>> X = (X - X.mean(axis=0)) / X.std(axis=0) + >>> y = (y - y.mean(axis=0)) / y.std(axis=0) >>> selector = Sel_BW(coords, y, X, multi=True) - >>> selector.search(bw_min=2, bw_max=159) + >>> selector.search(multi_bw_min=[2]) + [92.0, 101.0, 136.0, 158.0] >>> model = MGWR(coords, y, X, selector, fixed=False, kernel='bisquare', sigma2_v1=True) >>> results = model.fit() - >>> print results.params.shape + >>> print(results.params.shape) (159, 4) """ - def __init__(self, coords, y, X, selector, sigma2_v1=True, kernel='bisquare', - fixed=False, constant=True, dmat=None, sorted_dmat=None, spherical=False): + + def __init__(self, coords, y, X, selector, sigma2_v1=True, + kernel='bisquare', + fixed=False, constant=True, dmat=None, + sorted_dmat=None, spherical=False): """ Initialize class """ self.selector = selector self.bw = self.selector.bw[0] - self.family = Gaussian() #manually set since we only support Gassian MGWR for now + self.family = Gaussian() # manually set since we only support Gassian MGWR for now GWR.__init__(self, coords, y, X, self.bw, family=self.family, - sigma2_v1=sigma2_v1, kernel=kernel, fixed=fixed, - constant=constant, dmat=dmat, sorted_dmat=sorted_dmat, - spherical=spherical) + sigma2_v1=sigma2_v1, kernel=kernel, fixed=fixed, + constant=constant, dmat=dmat, sorted_dmat=sorted_dmat, + spherical=spherical) self.selector = selector self.sigma2_v1 = sigma2_v1 self.points = None @@ -1416,21 +1445,25 @@ def __init__(self, coords, y, X, selector, sigma2_v1=True, kernel='bisquare', self.exog_resid = None self.exog_scale = None self_fit_params = None - - #overwrite GWR method to handle multiple BW's + + # overwrite GWR method to handle multiple BW's def _build_W(self, fixed, kernel, coords, bw, points=None): Ws = [] for bw_i in bw: if fixed: try: - W = fk[kernel](coords, bw_i, points, self.dmat, self.sorted_dmat,spherical=self.spherical) - except: - raise #TypeError('Unsupported kernel function ', kernel) + W = fk[kernel](coords, bw_i, points, self.dmat, + self.sorted_dmat, + spherical=self.spherical) + except BaseException: + raise # TypeError('Unsupported kernel function ', kernel) else: try: - W = ak[kernel](coords, bw_i, points, self.dmat, self.sorted_dmat,spherical=self.spherical) - except: - raise #TypeError('Unsupported kernel function ', kernel) + W = ak[kernel](coords, bw_i, points, self.dmat, + self.sorted_dmat, + spherical=self.spherical) + except BaseException: + raise # TypeError('Unsupported kernel function ', kernel) Ws.append(W) return Ws @@ -1446,173 +1479,176 @@ def fit(self): predy = np.dot(S, self.y) CCT = np.zeros((self.n, self.k)) for j in range(self.k): - C = np.dot(np.linalg.inv(np.diag(self.X[:,j])), R[:,:,j]) - CCT[:,j] = np.diag(np.dot(C,C.T)) - w = np.ones(self.n) #manually set since we onlly support Gaussian MGWR for now + C = np.dot(np.linalg.inv(np.diag(self.X[:, j])), R[:, :, j]) + CCT[:, j] = np.diag(np.dot(C, C.T)) + # manually set since we onlly support Gaussian MGWR for now + w = np.ones(self.n) return MGWRResults(self, params, predy, S, CCT, R, w) def predict(self): raise NotImplementedError('N/A') + class MGWRResults(GWRResults): """ Parameters ---------- - model : MGWR object - pointer to MGWR object with estimation parameters + model : MGWR object + pointer to MGWR object with estimation parameters - params : array - n*k, estimated coefficients + params : array + n*k, estimated coefficients - predy : array - n*1, predicted y values + predy : array + n*1, predicted y values - S : array - n*n, hat matrix + S : array + n*n, hat matrix - R : array - n*n*k, partial hat matrices for each covariate + R : array + n*n*k, partial hat matrices for each covariate - CCT : array - n*k, scaled variance-covariance matrix - - w : array - n*1, final weight used for iteratively re-weighted least - sqaures; default is None + CCT : array + n*k, scaled variance-covariance matrix + + w : array + n*1, final weight used for iteratively re-weighted least + sqaures; default is None Attributes ---------- - model : GWR Object - points to GWR object for which parameters have been - estimated - - params : array - n*k, parameter estimates - - predy : array - n*1, predicted value of y - - y : array - n*1, dependent variable - - X : array - n*k, independent variable, including constant - - family : family object - underlying probability model; provides - distribution-specific calculations - - n : integer - number of observations - - k : integer - number of independent variables - - df_model : integer - model degrees of freedom - - df_resid : integer - residual degrees of freedom - - scale : float - sigma squared used for subsequent computations - - w : array - n*1, final weights from iteratively re-weighted least - sqaures routine - - resid_response : array - n*1, residuals of the repsonse - - resid_ss : scalar - residual sum of sqaures - - W : array-like - list of n*n arrays, spatial weights matrices for weighting all - observations from each calibration point: one for each - covariate (k) - - S : array - n*n, hat matrix - - R : array - n*n*k, partial hat matrices for each covariate - - CCT : array - n*k, scaled variance-covariance matrix - - ENP : scalar - effective number of paramters, which depends on - sigma2, for the entire model - - ENP_j : array-like - effective number of paramters, which depends on - sigma2, for each covariate in the model - - adj_alpha : array - 3*1, corrected alpha values to account for multiple - hypothesis testing for the 90%, 95%, and 99% confidence - levels; tvalues with an absolute value larger than the - corrected alpha are considered statistically - significant. - - adj_alpha_j : array - k*3, corrected alpha values to account for multiple - hypothesis testing for the 90%, 95%, and 99% confidence - levels; tvalues with an absolute value larger than the - corrected alpha are considered statistically - significant. A set of alpha calues is computed for - each covariate in the model. - - tr_S : float - trace of S (hat) matrix - - tr_STS : float - trace of STS matrix - - R2 : float - R-squared for the entire model (1- RSS/TSS) - - aic : float - Akaike information criterion - - aicc : float - corrected Akaike information criterion to account - to account for model complexity (smaller - bandwidths) - - bic : float - Bayesian information criterio - - sigma2 : float - sigma squared (residual variance) that has been - corrected to account for the ENP - - std_res : array - n*1, standardised residuals - - bse : array - n*k, standard errors of parameters (betas) - - influ : array - n*1, leading diagonal of S matrix - - CooksD : array - n*1, Cook's D - - tvalues : array - n*k, local t-statistics - - llf : scalar - log-likelihood of the full model; see - pysal.contrib.glm.family for damily-sepcific - log-likelihoods - - mu : array - n*, flat one dimensional array of predicted mean - response value from estimator + model : GWR Object + points to GWR object for which parameters have been + estimated + + params : array + n*k, parameter estimates + + predy : array + n*1, predicted value of y + + y : array + n*1, dependent variable + + X : array + n*k, independent variable, including constant + + family : family object + underlying probability model; provides + distribution-specific calculations + + n : integer + number of observations + + k : integer + number of independent variables + + df_model : integer + model degrees of freedom + + df_resid : integer + residual degrees of freedom + + scale : float + sigma squared used for subsequent computations + + w : array + n*1, final weights from iteratively re-weighted least + sqaures routine + + resid_response : array + n*1, residuals of the repsonse + + resid_ss : scalar + residual sum of sqaures + + W : array-like + list of n*n arrays, spatial weights matrices for weighting all + observations from each calibration point: one for each + covariate (k) + + S : array + n*n, hat matrix + + R : array + n*n*k, partial hat matrices for each covariate + + CCT : array + n*k, scaled variance-covariance matrix + + ENP : scalar + effective number of paramters, which depends on + sigma2, for the entire model + + ENP_j : array-like + effective number of paramters, which depends on + sigma2, for each covariate in the model + + adj_alpha : array + 3*1, corrected alpha values to account for multiple + hypothesis testing for the 90%, 95%, and 99% confidence + levels; tvalues with an absolute value larger than the + corrected alpha are considered statistically + significant. + + adj_alpha_j : array + k*3, corrected alpha values to account for multiple + hypothesis testing for the 90%, 95%, and 99% confidence + levels; tvalues with an absolute value larger than the + corrected alpha are considered statistically + significant. A set of alpha calues is computed for + each covariate in the model. + + tr_S : float + trace of S (hat) matrix + + tr_STS : float + trace of STS matrix + + R2 : float + R-squared for the entire model (1- RSS/TSS) + + aic : float + Akaike information criterion + + aicc : float + corrected Akaike information criterion to account + to account for model complexity (smaller + bandwidths) + + bic : float + Bayesian information criterio + + sigma2 : float + sigma squared (residual variance) that has been + corrected to account for the ENP + + std_res : array + n*1, standardised residuals + + bse : array + n*k, standard errors of parameters (betas) + + influ : array + n*1, leading diagonal of S matrix + + CooksD : array + n*1, Cook's D + + tvalues : array + n*k, local t-statistics + + llf : scalar + log-likelihood of the full model; see + pysal.contrib.glm.family for damily-sepcific + log-likelihoods + + mu : array + n*, flat one dimensional array of predicted mean + response value from estimator """ + def __init__(self, model, params, predy, S, CCT, R, w): """ Initialize class @@ -1622,8 +1658,8 @@ def __init__(self, model, params, predy, S, CCT, R, w): @cache_readonly def ENP_j(self): - return [np.trace(self.R[:,:,j]) for j in range(self.R.shape[2])] - + return [np.trace(self.R[:, :, j]) for j in range(self.R.shape[2])] + @cache_readonly def adj_alpha_j(self): """ @@ -1636,9 +1672,9 @@ def adj_alpha_j(self): """ alpha = np.array([.1, .05, .001]) - pe = np.array(self.ENP_j).reshape((-1,1)) + pe = np.array(self.ENP_j).reshape((-1, 1)) p = 1. - return (alpha*p)/pe + return (alpha * p) / pe def critical_tval(self, alpha=None): """ @@ -1661,13 +1697,13 @@ def critical_tval(self, alpha=None): """ n = self.n if alpha is not None: - alpha = np.abs(alpha)/2.0 - critical = t.ppf(1-alpha, n-1) + alpha = np.abs(alpha) / 2.0 + critical = t.ppf(1 - alpha, n - 1) else: - alpha = np.abs(self.adj_alpha_j[:,1])/2.0 - critical = t.ppf(1-alpha, n-1) + alpha = np.abs(self.adj_alpha_j[:, 1]) / 2.0 + critical = t.ppf(1 - alpha, n - 1) return critical - + def filter_tvals(self, critical_t=None, alpha=None): """ Utility function to set tvalues with an absolute value smaller than the @@ -1677,14 +1713,14 @@ def filter_tvals(self, critical_t=None, alpha=None): are critical_t nor alpha are provided, an adjusted alpha at the 95 percent CI will automatically be used to define the critical t-value and used to filter. If both critical_t and alpha are supplied then the alpha - value will be ignored. + value will be ignored. Parameters ---------- critical : scalar critical t-value to determine whether parameters are statistically significant - + alpha : scalar alpha value to determine which tvalues are associated with statistically significant parameter @@ -1699,52 +1735,56 @@ def filter_tvals(self, critical_t=None, alpha=None): """ n = self.n if critical_t is not None: - critical = np.array(critical_t) + critical = np.array(critical_t) elif alpha is not None and critical_t is None: critical = self.critical_tval(alpha=alpha) elif alpha is None and critical_t is None: critical = self.critical_tval() - - subset = (self.tvalues < critical) & (self.tvalues > -1.0*critical) + + subset = (self.tvalues < critical) & (self.tvalues > -1.0 * critical) tvalues = self.tvalues.copy() tvalues[subset] = 0 return tvalues - + @cache_readonly def RSS(self): - raise NotImplementedError('Not yet implemented for multiple bandwidths') - + raise NotImplementedError( + 'Not yet implemented for multiple bandwidths') + @cache_readonly def TSS(self): - raise NotImplementedError('Not yet implemented for multiple bandwidths') - + raise NotImplementedError( + 'Not yet implemented for multiple bandwidths') + @cache_readonly def localR2(self): - raise NotImplementedError('Not yet implemented for multiple bandwidths') - + raise NotImplementedError( + 'Not yet implemented for multiple bandwidths') + @cache_readonly def y_bar(self): - raise NotImplementedError('Not yet implemented for multiple bandwidths') - + raise NotImplementedError( + 'Not yet implemented for multiple bandwidths') + @cache_readonly def predictions(self): raise NotImplementedError('Not yet implemented for MGWR') - + def local_collinearity(self): """ Computes several indicators of multicollinearity within a geographically weighted design matrix, including: - + local condition number (n, 1) - local variance-decomposition proportions (n, p) - + local variance-decomposition proportions (n, p) + Returns four arrays with the order and dimensions listed above where n is the number of locations used as calibrations points and p is the - nubmer of explanatory variables + nubmer of explanatory variables """ x = self.X - w = self.W + w = self.W nvar = x.shape[1] nrow = self.n vdp_idx = np.ndarray((nrow, nvar)) @@ -1755,24 +1795,24 @@ def local_collinearity(self): for j in range(nvar): wi = w[j][i] sw = np.sum(wi) - wi = wi/sw - xw[:,j] = x[:,j] * wi + wi = wi / sw + xw[:, j] = x[:, j] * wi sxw = np.sqrt(np.sum(xw**2, axis=0)) - sxw = np.transpose(xw.T / sxw.reshape((nvar,1))) - svdx = np.linalg.svd(sxw) - vdp_idx[i,] = svdx[1][0]/svdx[1] - - phi = np.dot(svdx[2].T, np.diag(1/svdx[1])) + sxw = np.transpose(xw.T / sxw.reshape((nvar, 1))) + svdx = np.linalg.svd(sxw) + vdp_idx[i, ] = svdx[1][0] / svdx[1] + + phi = np.dot(svdx[2].T, np.diag(1 / svdx[1])) phi = np.transpose(phi**2) pi_ij = phi / np.sum(phi, axis=0) - vdp_pi[i,:,:] = pi_ij - - local_CN = vdp_idx[:, nvar-1].reshape((-1,1)) - VDP = vdp_pi[:,nvar-1,:] - + vdp_pi[i, :, :] = pi_ij + + local_CN = vdp_idx[:, nvar - 1].reshape((-1, 1)) + VDP = vdp_pi[:, nvar - 1, :] + return local_CN, VDP - + def spatial_variability(self, selector, n_iters=1000, seed=None): """ Method to compute a Monte Carlo test of spatial variability for each @@ -1786,7 +1826,7 @@ def spatial_variability(self, selector, n_iters=1000, seed=None): should be the sel_bw object used to select a bandwidth for the gwr model that produced the surfaces that are being tested for spatial variation - + n_iters : int the number of Monte Carlo iterations to include for the tests of spatial variability. @@ -1803,27 +1843,27 @@ def spatial_variability(self, selector, n_iters=1000, seed=None): a list of psuedo p-values that correspond to the model parameter surfaces. Allows us to assess the probability of obtaining the observed spatial - variation of a given surface by random chance. + variation of a given surface by random chance. """ temp_sel = copy.deepcopy(selector) if seed is None: - np.random.seed(5536) + np.random.seed(5536) else: np.random.seed(seed) search_params = temp_sel.search_params if self.model.constant: - X = self.X[:,1:] + X = self.X[:, 1:] else: X = self.X - init_sd = np.std(self.params, axis=0) + init_sd = np.std(self.params, axis=0) SDs = [] - + for x in range(n_iters): temp_coords = np.random.permutation(self.model.coords) temp_sel.coords = temp_coords @@ -1832,10 +1872,10 @@ def spatial_variability(self, selector, n_iters=1000, seed=None): temp_params = temp_sel.params temp_sd = np.std(temp_params, axis=0) SDs.append(temp_sd) - + p_vals = (np.sum(np.array(SDs) > init_sd, axis=0) / float(n_iters)) return p_vals - + def summary(self): """ Print out MGWR summary