Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,12 @@ Features
- Monte Carlo test for spatial variability of parameter estimate surfaces
- GWR-based spatial prediction
- MGWR model calibration via GAM iterative backfitting for Gaussian model
- Parallel computing for GWR and MGWR
- MGWR covariate-specific inference, including a multiple hypothesis test
correction and local collinearity
correction and local collinearity
- Bandwidth confidence intervals for GWR and MGWR

Citation
--------
Oshan, T. M., Li, Z., Kang, W., Wolf, L. J., & Fotheringham, A. S. (2019). mgwr: A Python implementation of multiscale geographically weighted regression for investigating process spatial heterogeneity and scale. ISPRS International Journal of Geo-Information, 8(6), 269.

71 changes: 71 additions & 0 deletions mgwr/gwr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1053,6 +1053,40 @@ def conf_int(self):
@cache_readonly
def use_t(self):
return None

def get_bws_intervals(self, selector, level=0.95):
"""
Computes bandwidths confidence interval (CI) for GWR.
The CI is based on Akaike weights and the bandwidth search algorithm used.
Details are in Li et al. (2020) Annals of AAG

Returns a tuple with lower and upper bound of the bw CI.
e.g. (100, 300)
"""

try:
import pandas as pd
except ImportError:
return

#Get AICcs and associated bw from the last iteration of back-fitting and make a DataFrame
aiccs = pd.DataFrame(list(zip(*selector.sel_hist))[1],columns=["aicc"])
aiccs['bw'] = list(zip(*selector.sel_hist))[0]
#Sort DataFrame by the AICc values
aiccs = aiccs.sort_values(by=['aicc'])
#Calculate delta AICc
d_aic_ak = aiccs.aicc - aiccs.aicc.min()
#Calculate AICc weights
w_aic_ak = np.exp(-0.5*d_aic_ak) / np.sum(np.exp(-0.5*d_aic_ak))
aiccs['w_aic_ak'] = w_aic_ak/np.sum(w_aic_ak)
#Calculate cum. AICc weights
aiccs['cum_w_ak'] = aiccs.w_aic_ak.cumsum()
#Find index where the cum weights above p-val
index = len(aiccs[aiccs.cum_w_ak < level]) + 1
#Get bw boundaries
interval = (aiccs.iloc[:index,:].bw.min(),aiccs.iloc[:index,:].bw.max())
return interval


def local_collinearity(self):
"""
Expand Down Expand Up @@ -1870,6 +1904,43 @@ def y_bar(self):
@cache_readonly
def predictions(self):
raise NotImplementedError('Not yet implemented for MGWR')

#Function for getting BWs intervals
def get_bws_intervals(self, selector, level=0.95):
"""
Computes bandwidths confidence intervals (CIs) for MGWR.
The CIs are based on Akaike weights and the bandwidth search algorithm used.
Details are in Li et al. (2020) Annals of AAG

Returns a list of confidence intervals. e.g. [(40, 60), (100, 180), (150, 300)]

"""
intervals = []
try:
import pandas as pd
except ImportError:
return

for j in range(self.k):
#Get AICcs and associated bw from the last iteration of back-fitting and make a DataFrame
aiccs = pd.DataFrame(list(zip(*selector.sel_hist[-self.k+j]))[1],columns=["aicc"])
aiccs['bw'] = list(zip(*selector.sel_hist[-self.k+j]))[0]
#Sort DataFrame by the AICc values
aiccs = aiccs.sort_values(by=['aicc'])
#Calculate delta AICc
d_aic_ak = aiccs.aicc - aiccs.aicc.min()
#Calculate AICc weights
w_aic_ak = np.exp(-0.5*d_aic_ak) / np.sum(np.exp(-0.5*d_aic_ak))
aiccs['w_aic_ak'] = w_aic_ak/np.sum(w_aic_ak)
#Calculate cum. AICc weights
aiccs['cum_w_ak'] = aiccs.w_aic_ak.cumsum()
#Find index where the cum weights above p-val
index = len(aiccs[aiccs.cum_w_ak < level]) + 1
#Get bw boundaries
interval = (aiccs.iloc[:index,:].bw.min(),aiccs.iloc[:index,:].bw.max())
intervals += [interval]
return intervals


def local_collinearity(self):
"""
Expand Down
52 changes: 31 additions & 21 deletions mgwr/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,15 @@ def golden_section(a, c, delta, function, tol, max_iter, int_score=False,
d = c - delta * np.abs(c - a)

output.append((opt_val, opt_score))

opt_val = np.round(opt_val, 2)
if (opt_val, opt_score) not in output:
output.append((opt_val, opt_score))

diff = score_b - score_d
score = opt_score

return np.round(opt_val, 2), opt_score, output
return opt_val, opt_score, output


def equal_interval(l_bound, u_bound, interval, function, int_score=False,
Expand Down Expand Up @@ -138,19 +143,10 @@ def equal_interval(l_bound, u_bound, interval, function, int_score=False,
print(score_a)
print("Bandwidth:", a, ", score:", "{0:.2f}".format(score_a[0]))

score_c = function(c)
if verbose:
print("Bandwidth:", c, ", score:", "{0:.2f}".format(score_c[0]))

output.append((a, score_a))
output.append((c, score_c))

if score_a < score_c:
opt_val = a
opt_score = score_a
else:
opt_val = c
opt_score = score_c
opt_val = a
opt_score = score_a

while b < c:
score_b = function(b)
Expand All @@ -163,6 +159,16 @@ def equal_interval(l_bound, u_bound, interval, function, int_score=False,
opt_score = score_b
b = b + interval

score_c = function(c)
if verbose:
print("Bandwidth:", c, ", score:", "{0:.2f}".format(score_c[0]))

output.append((c, score_c))

if score_c < opt_score:
opt_val = c
opt_score = score_c

return opt_val, opt_score, output


Expand All @@ -189,8 +195,9 @@ def multi_bw(init, y, X, n, k, family, tol, max_iter, rss_score, gwr_func,
scores = []
delta = 1e6
BWs = []
bw_stable_counter = np.ones(k)
bw_stable_counter = 0
bws = np.empty(k)
gwr_sel_hist = []

try:
from tqdm.auto import tqdm #if they have it, let users have a progress bar
Expand All @@ -209,23 +216,26 @@ def tqdm(x, desc=''): #otherwise, just passthrough the range
temp_X = X[:, j].reshape((-1, 1))
bw_class = bw_func(temp_y, temp_X)

if np.all(bw_stable_counter == bws_same_times):
#If in backfitting, all bws not changing in bws_same_times (default 3) iterations
if bw_stable_counter >= bws_same_times:
#If in backfitting, all bws not changing in bws_same_times (default 5) iterations
bw = bws[j]
else:
bw = sel_func(bw_class, multi_bw_min[j], multi_bw_max[j])
if bw == bws[j]:
bw_stable_counter[j] += 1
else:
bw_stable_counter = np.ones(k)
gwr_sel_hist.append(deepcopy(bw_class.sel_hist))

optim_model = gwr_func(temp_y, temp_X, bw)
err = optim_model.resid_response.reshape((-1, 1))
param = optim_model.params.reshape((-1, ))
new_XB[:, j] = optim_model.predy.reshape(-1)
params[:, j] = param
bws[j] = bw


#If bws remain the same as from previous iteration
if (iters > 1) and np.all(BWs[-1] == bws):
bw_stable_counter += 1
else:
bw_stable_counter = 0

num = np.sum((new_XB - XB)**2) / n
den = np.sum(np.sum(new_XB, axis=1)**2)
score = (num / den)**0.5
Expand All @@ -248,4 +258,4 @@ def tqdm(x, desc=''): #otherwise, just passthrough the range
break

opt_bws = BWs[-1]
return (opt_bws, np.array(BWs), np.array(scores), params, err, bw_gwr)
return (opt_bws, np.array(BWs), np.array(scores), params, err, gwr_sel_hist, bw_gwr)
20 changes: 14 additions & 6 deletions mgwr/sel_bw.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from scipy.spatial.distance import pdist
from scipy.optimize import minimize_scalar
from spglm.family import Gaussian, Poisson, Binomial
from .kernels import Kernel
from .kernels import Kernel,local_cdist
from .gwr import GWR
from .search import golden_section, equal_interval, multi_bw
from .diagnostics import get_AICc, get_AIC, get_BIC, get_CV
Expand Down Expand Up @@ -201,7 +201,7 @@ def search(self, search_method='golden_section', criterion='AICc',
max_iter=200, init_multi=None, tol_multi=1.0e-5,
rss_score=False, max_iter_multi=200, multi_bw_min=[None],
multi_bw_max=[None
], bws_same_times=3, pool=None, verbose=False):
], bws_same_times=5, pool=None, verbose=False):
"""
Method to select one unique bandwidth for a gwr model or a
bandwidth vector for a mgwr model.
Expand Down Expand Up @@ -245,7 +245,7 @@ def search(self, search_method='golden_section', criterion='AICc',
routine and False to use a smooth function; default is
False
bws_same_times : If bandwidths keep the same between iterations for
bws_same_times (default 3) in backfitting, then use the
bws_same_times (default 5) in backfitting, then use the
current set of bandwidths as final bandwidths.
pool : A multiprocessing Pool object to enbale parallel fitting;
default is None
Expand Down Expand Up @@ -312,10 +312,12 @@ def search(self, search_method='golden_section', criterion='AICc',
if self.multi:
self._mbw()
self.params = self.bw[3] #params n by k
self.sel_hist = self.bw[-2] #bw searching history
self.bw_init = self.bw[
-1] #scalar, optimal bw from initial gwr model
else:
self._bw()
self.sel_hist = self.bw[-1]

self.pool = None
return self.bw[0]
Expand Down Expand Up @@ -418,9 +420,15 @@ def _init_section(self, X_glob, X_loc, coords, constant):
a = 40 + 2 * n_vars
c = n
else:
sq_dists = pdist(coords)
a = np.min(sq_dists) / 2.0
c = np.max(sq_dists) * 2.0
min_dist = np.min(np.array([np.min(np.delete(
local_cdist(coords[i],coords,spherical=self.spherical),i))
for i in range(n)]))
max_dist = np.max(np.array([np.max(
local_cdist(coords[i],coords,spherical=self.spherical))
for i in range(n)]))

a = min_dist / 2.0
c = max_dist * 2.0

if self.bw_min is not None:
a = self.bw_min
Expand Down
6 changes: 5 additions & 1 deletion mgwr/tests/test_gwr.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,9 +361,13 @@ def test_MGWR(self):
rslt.filter_tvals(),
self.MGWR[[s + "_filter_tvalues" for s in varnames]].values,
atol=1e-07)

np.testing.assert_allclose(rslt.get_bws_intervals(selector),
[(92.0, 99.0), (99.0, 101.0), (99.0, 136.0), (122.0, 158.0)])

np.testing.assert_allclose(rslt.local_collinearity()[0].flatten(),
self.MGWR.local_collinearity, atol=1e-07)

def test_Prediction(self):
coords = np.array(self.coords)
index = np.arange(len(self.y))
Expand Down