diff --git a/mgwr/gwr.py b/mgwr/gwr.py index 422cd8a..97525ae 100755 --- a/mgwr/gwr.py +++ b/mgwr/gwr.py @@ -15,6 +15,7 @@ from spglm.utils import cache_readonly from .diagnostics import get_AIC, get_AICc, get_BIC, corr from .kernels import * +from .summary import * fk = {'gaussian': fix_gauss, 'bisquare': fix_bisquare, 'exponential': fix_exp} ak = {'gaussian': adapt_gauss, 'bisquare': adapt_bisquare, 'exponential': adapt_exp} @@ -1153,6 +1154,16 @@ def predictions(self): predictions = np.sum(P*self.params, axis=1).reshape((-1,1)) return predictions + def summary(self): + """ + Print out GWR summary + """ + summary = summaryModel(self) + summaryGLM(self) + summaryGWR(self) + print(summary) + return + + + class GWRResultsLite(object): """ Lightweight GWR that computes the minimum diagnostics needed for bandwidth @@ -1780,15 +1791,15 @@ def spatial_variability(self, selector, n_iters=1000, seed=None): the number of Monte Carlo iterations to include for the tests of spatial variability. - seed : int + seed : int optional parameter to select a custom seed to ensure stochastic results are replicable. Default is none which automatically sets the seed to 5536 - Returns - ------- + Returns + ------- - p values : list + p values : list a list of psuedo p-values that correspond to the model parameter surfaces. Allows us to assess the probability of obtaining the observed spatial @@ -1824,3 +1835,11 @@ def spatial_variability(self, selector, n_iters=1000, seed=None): p_vals = (np.sum(np.array(SDs) > init_sd, axis=0) / float(n_iters)) return p_vals + + def summary(self): + """ + Print out MGWR summary + """ + summary = summaryModel(self) + summaryGLM(self) + summaryMGWR(self) + print(summary) + return diff --git a/mgwr/summary.py b/mgwr/summary.py new file mode 100644 index 0000000..c61efb9 --- /dev/null +++ b/mgwr/summary.py @@ -0,0 +1,150 @@ +import numpy as np +from spglm.family import Gaussian, Binomial, Poisson +from spglm.glm import GLM +from .diagnostics import get_AICc + +def summaryModel(self): + summary = '=' * 75 + '\n' + summary += "%-54s %20s\n" % ('Model type', self.family.__class__.__name__) + summary += "%-60s %14d\n" % ('Number of observations:', self.n) + summary += "%-60s %14d\n\n" % ('Number of covariates:', self.k) + return summary + +def summaryGLM(self): + + XNames = ["X"+str(i) for i in range(self.k)] + glm_rslt = GLM(self.model.y,self.model.X,constant=False,family=self.family).fit() + + summary = "%s\n" %('Global Regression Results') + summary += '-' * 75 + '\n' + + if isinstance(self.family, Gaussian): + summary += "%-62s %12.3f\n" % ('Residual sum of squares:', glm_rslt.deviance) + summary += "%-62s %12.3f\n" % ('Log-likelihood:', glm_rslt.llf) + summary += "%-62s %12.3f\n" % ('AIC:', glm_rslt.aic) + summary += "%-62s %12.3f\n" % ('AICc:', get_AICc(glm_rslt)) + summary += "%-62s %12.3f\n" % ('BIC:', glm_rslt.bic) + summary += "%-62s %12.3f\n" % ('R2:', glm_rslt.D2) + summary += "%-62s %12.3f\n\n" % ('Adj. R2:', glm_rslt.adj_D2) + else: + summary += "%-62s %12.3f\n" % ('Deviance:', glm_rslt.deviance) + summary += "%-62s %12.3f\n" % ('Log-likelihood:', glm_rslt.llf) + summary += "%-62s %12.3f\n" % ('AIC:', glm_rslt.aic) + summary += "%-62s %12.3f\n" % ('AICc:', get_AICc(glm_rslt)) + summary += "%-62s %12.3f\n" % ('BIC:', glm_rslt.bic) + summary += "%-62s %12.3f\n" % ('Percent deviance explained:', glm_rslt.D2) + summary += "%-62s %12.3f\n\n" % ('Adj. percent deviance explained:', glm_rslt.adj_D2) + + summary += "%-31s %10s %10s %10s %10s\n" % ('Variable', 'Est.', 'SE' ,'t(Est/SE)', 'p-value') + summary += "%-31s %10s %10s %10s %10s\n" % ('-'*31, '-'*10 ,'-'*10, '-'*10,'-'*10) + for i in range(self.k): + summary += "%-31s %10.3f %10.3f %10.3f %10.3f\n" % (XNames[i], glm_rslt.params[i], glm_rslt.bse[i], glm_rslt.tvalues[i], glm_rslt.pvalues[i]) + summary += "\n" + return summary + +def summaryGWR(self): + XNames = ["X"+str(i) for i in range(self.k)] + + summary = "%s\n" %('Geographically Weighted Regression (GWR) Results') + summary += '-' * 75 + '\n' + + if self.model.fixed: + summary += "%-50s %20s\n" % ('Spatial kernel:', 'Fixed ' + self.model.kernel) + else: + summary += "%-54s %20s\n" % ('Spatial kernel:', 'Adaptive ' + self.model.kernel) + + summary += "%-62s %12.3f\n" % ('Bandwidth used:', self.model.bw) + + summary += "\n%s\n" % ('Diagnostic information') + summary += '-' * 75 + '\n' + + if isinstance(self.family, Gaussian): + + summary += "%-62s %12.3f\n" % ('Residual sum of squares:', self.resid_ss) + summary += "%-62s %12.3f\n" % ('Effective number of parameters (trace(S)):', self.tr_S) + summary += "%-62s %12.3f\n" % ('Degree of freedom (n - trace(S)):', self.df_model) + summary += "%-62s %12.3f\n" % ('Sigma estimate:', np.sqrt(self.sigma2)) + summary += "%-62s %12.3f\n" % ('Log-likelihood:', self.llf) + summary += "%-62s %12.3f\n" % ('AIC:', self.aic) + summary += "%-62s %12.3f\n" % ('AICc:', self.aicc) + summary += "%-62s %12.3f\n" % ('BIC:', self.bic) + summary += "%-62s %12.3f\n" % ('R2:', self.R2) + else: + summary += "%-62s %12.3f\n" % ('Effective number of parameters (trace(S)):', self.tr_S) + summary += "%-62s %12.3f\n" % ('Degree of freedom (n - trace(S)):', self.df_model) + summary += "%-62s %12.3f\n" % ('Log-likelihood:', self.llf) + summary += "%-62s %12.3f\n" % ('AIC:', self.aic) + summary += "%-62s %12.3f\n" % ('AICc:', self.aicc) + summary += "%-62s %12.3f\n" % ('BIC:', self.bic) + #summary += "%-60s %12.6f\n" % ('Percent deviance explained:', 0) + + + summary += "%-62s %12.3f\n" % ('Adj. alpha (95%):', self.adj_alpha[1]) + summary += "%-62s %12.3f\n" % ('Adj. critical t value (95%):', self.critical_tval(self.adj_alpha[1])) + + summary += "\n%s\n" % ('Summary Statistics For GWR Parameter Estimates') + summary += '-' * 75 + '\n' + summary += "%-20s %10s %10s %10s %10s %10s\n" % ('Variable', 'Mean' ,'STD', 'Min' ,'Median', 'Max') + summary += "%-20s %10s %10s %10s %10s %10s\n" % ('-'*20, '-'*10 ,'-'*10, '-'*10 ,'-'*10, '-'*10) + for i in range(self.k): + summary += "%-20s %10.3f %10.3f %10.3f %10.3f %10.3f\n" % (XNames[i], np.mean(self.params[:,i]) ,np.std(self.params[:,i]),np.min(self.params[:,i]) ,np.median(self.params[:,i]), np.max(self.params[:,i])) + + summary += '=' * 75 + '\n' + + return summary + + + +def summaryMGWR(self): + + XNames = ["X"+str(i) for i in range(self.k)] + + summary = '' + summary += "%s\n" %('Multi-Scale Geographically Weighted Regression (MGWR) Results') + summary += '-' * 75 + '\n' + + if self.model.fixed: + summary += "%-50s %20s\n" % ('Spatial kernel:', 'Fixed ' + self.model.kernel) + else: + summary += "%-54s %20s\n" % ('Spatial kernel:', 'Adaptive ' + self.model.kernel) + + summary += "%-54s %20s\n" % ('Criterion for optimal bandwidth:', self.model.selector.criterion) + + if self.model.selector.rss_score: + summary += "%-54s %20s\n" % ('Score of Change (SOC) type:', 'RSS') + else: + summary += "%-54s %20s\n" % ('Score of Change (SOC) type:', 'Smoothing f') + + summary += "%-54s %20s\n\n" % ('Termination criterion for MGWR:', self.model.selector.tol_multi) + + summary += "%s\n" %('MGWR bandwidths') + summary += '-' * 75 + '\n' + summary += "%-15s %14s %10s %16s %16s\n" % ('Variable', 'Bandwidth', 'ENP_j','Adj t-val(95%)','Adj alpha(95%)') + for j in range(self.k): + summary += "%-14s %15.3f %10.3f %16.3f %16.3f\n" % (XNames[j], self.model.bw[j], self.ENP_j[j],self.critical_tval()[j],self.adj_alpha_j[j,1]) + + summary += "\n%s\n" % ('Diagnostic information') + summary += '-' * 75 + '\n' + + summary += "%-62s %12.3f\n" % ('Residual sum of squares:', self.resid_ss) + summary += "%-62s %12.3f\n" % ('Effective number of parameters (trace(S)):', self.tr_S) + summary += "%-62s %12.3f\n" % ('Degree of freedom (n - trace(S)):', self.df_model) + + summary += "%-62s %12.3f\n" % ('Sigma estimate:', np.sqrt(self.sigma2)) + summary += "%-62s %12.3f\n" % ('Log-likelihood:', self.llf) + summary += "%-62s %12.3f\n" % ('AIC:', self.aic) + summary += "%-62s %12.3f\n" % ('AICc:', self.aicc) + summary += "%-62s %12.3f\n" % ('BIC:', self.bic) + + summary += "\n%s\n" % ('Summary Statistics For MGWR Parameter Estimates') + summary += '-' * 75 + '\n' + summary += "%-20s %10s %10s %10s %10s %10s\n" % ('Variable', 'Mean' ,'STD', 'Min' ,'Median', 'Max') + summary += "%-20s %10s %10s %10s %10s %10s\n" % ('-'*20, '-'*10 ,'-'*10, '-'*10 ,'-'*10, '-'*10) + for i in range(self.k): + summary += "%-20s %10.3f %10.3f %10.3f %10.3f %10.3f\n" % (XNames[i], np.mean(self.params[:,i]) ,np.std(self.params[:,i]),np.min(self.params[:,i]) ,np.median(self.params[:,i]), np.max(self.params[:,i])) + + summary += '=' * 75 + '\n' + return summary + + +