Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
161 changes: 161 additions & 0 deletions esda/local_join_count.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator
from libpysal import weights
from esda.crand import (
crand as _crand_plus,
njit as _njit,
_prepare_univariate
)


class Local_Join_Count(BaseEstimator):

"""Univariate Local Join Count Statistic"""

def __init__(self, connectivity=None, permutations=999, n_jobs=1,
keep_simulations=True, seed=None):
"""
Initialize a Local_Join_Count estimator
Arguments
---------
connectivity : scipy.sparse matrix object
the connectivity structure describing
the relationships between observed units.
Need not be row-standardized.
permutations : int
number of random permutations for calculation of pseudo
p_values
n_jobs : int
Number of cores to be used in the conditional randomisation. If -1,
all available cores are used.
keep_simulations : Boolean
(default=True)
If True, the entire matrix of replications under the null
is stored in memory and accessible; otherwise, replications
are not saved
seed : None/int
Seed to ensure reproducibility of conditional randomizations.
Must be set here, and not outside of the function, since numba
does not correctly interpret external seeds
nor numpy.random.RandomState instances.

Attributes
----------
LJC : numpy array
array containing the univariate
Local Join Count (LJC).
p_sim : numpy array
array containing the simulated
p-values for each unit.

"""

self.connectivity = connectivity
self.permutations = permutations
self.n_jobs = n_jobs
self.keep_simulations = keep_simulations
self.seed = seed

def fit(self, x):
"""
Arguments
---------
x : numpy.ndarray
array containing binary (0/1) data
Returns
-------
the fitted estimator.

Notes
-----
Technical details and derivations found in :cite:`AnselinLi2019`.

Examples
--------
>>> import libpysal
>>> w = libpysal.weights.lat2W(4, 4)
>>> x = np.ones(16)
>>> x[0:8] = 0
>>> LJC_uni = Local_Join_Count(connectivity=w).fit(x)
>>> LJC_uni.LJC
>>> LJC_uni.p_sim

Guerry data replicating GeoDa tutorial
>>> import libpysal
>>> import geopandas as gpd
>>> guerry = libpysal.examples.load_example('Guerry')
>>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp'))
>>> guerry_ds['SELECTED'] = 0
>>> guerry_ds.loc[(guerry_ds['Donatns'] > 10997), 'SELECTED'] = 1
>>> w = libpysal.weights.Queen.from_dataframe(guerry_ds)
>>> LJC_uni = Local_Join_Count(connectivity=w).fit(guerry_ds['SELECTED'])
>>> LJC_uni.LJC
>>> LJC_uni.p_sim
"""
# Need to ensure that the np.array() are of
# dtype='float' for numba
x = np.array(x, dtype='float')

w = self.connectivity
# Fill the diagonal with 0s
w = weights.util.fill_diagonal(w, val=0)
w.transform = 'b'

keep_simulations = self.keep_simulations
n_jobs = self.n_jobs
seed = self.seed

permutations = self.permutations

self.x = x
self.n = len(x)
self.w = w

self.LJC = self._statistic(x, w)

if permutations:
self.p_sim, self.rjoins = _crand_plus(
z=self.x,
w=self.w,
observed=self.LJC,
permutations=permutations,
keep=keep_simulations,
n_jobs=n_jobs,
stat_func=_ljc_uni
)
# Set p-values for those with LJC of 0 to NaN
self.p_sim[self.LJC == 0] = 'NaN'

del (self.n, self.keep_simulations, self.n_jobs,
self.permutations, self.seed, self.w, self.x,
self.connectivity, self.rjoins)

return self

@staticmethod
def _statistic(x, w):
# Create adjacency list. Note that remove_symmetric=False - this is
# different from the esda.Join_Counts() function.
adj_list = w.to_adjlist(remove_symmetric=False)
zseries = pd.Series(x, index=w.id_order)
focal = zseries.loc[adj_list.focal].values
neighbor = zseries.loc[adj_list.neighbor].values
LJC = (focal == 1) & (neighbor == 1)
adj_list_LJC = pd.DataFrame(adj_list.focal.values,
LJC.astype('uint8')).reset_index()
adj_list_LJC.columns = ['LJC', 'ID']
adj_list_LJC = adj_list_LJC.groupby(by='ID').sum()
LJC = np.array(adj_list_LJC.LJC.values, dtype='float')
return (LJC)

# --------------------------------------------------------------
# Conditional Randomization Function Implementations
# --------------------------------------------------------------

# Note: scaling not used

@_njit(fastmath=True)
def _ljc_uni(i, z, permuted_ids, weights_i, scaling):
zi, zrand = _prepare_univariate(i, z, permuted_ids, weights_i)
return zi * (zrand @ weights_i)
229 changes: 229 additions & 0 deletions esda/local_join_count_bv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
import numpy as np
import pandas as pd
import warnings
from scipy import sparse
from sklearn.base import BaseEstimator
from libpysal import weights
from esda.crand import (
crand as _crand_plus,
njit as _njit,
_prepare_univariate,
_prepare_bivariate
)


class Local_Join_Count_BV(BaseEstimator):

"""Univariate Local Join Count Statistic"""

def __init__(self, connectivity=None, permutations=999, n_jobs=1,
keep_simulations=True, seed=None):
"""
Initialize a Local_Join_Count_BV estimator
Arguments
---------
connectivity : scipy.sparse matrix object
the connectivity structure describing
the relationships between observed units.
Need not be row-standardized.
permutations : int
number of random permutations for calculation of pseudo
p_values
n_jobs : int
Number of cores to be used in the conditional randomisation. If -1,
all available cores are used.
keep_simulations : Boolean
(default=True)
If True, the entire matrix of replications under the null
is stored in memory and accessible; otherwise, replications
are not saved
seed : None/int
Seed to ensure reproducibility of conditional randomizations.
Must be set here, and not outside of the function, since numba
does not correctly interpret external seeds
nor numpy.random.RandomState instances.

"""

self.connectivity = connectivity
self.permutations = permutations
self.n_jobs = n_jobs
self.keep_simulations = keep_simulations
self.seed = seed

def fit(self, x, y, case="CLC"):
"""
Arguments
---------
x : numpy.ndarray
array containing binary (0/1) data
y : numpy.ndarray
array containing binary (0/1) data
case : str
"BJC" for bivariate local join count,
"CLC" for co-location local join count.
Details in :cite:`AnselinLi2019`.

Returns
-------
the fitted estimator.

Notes
-----
Technical details and derivations can be found in :cite:`AnselinLi2019`.

Examples
--------
>>> import libpysal
>>> w = libpysal.weights.lat2W(4, 4)
>>> x = np.ones(16)
>>> x[0:8] = 0
>>> y = [0,1,0,1,1,1,1,1,0,0,1,1,0,0,1,1]
>>> LJC_BV_C1 = Local_Join_Count_BV(connectivity=w).fit(x, y, case="BJC")
>>> LJC_BV_C2 = Local_Join_Count_BV(connectivity=w).fit(x, y, case="CLC")
>>> LJC_BV_C1.LJC
>>> LJC_BV_C1.p_sim
>>> LJC_BV_C2.LJC
>>> LJC_BV_C2.p_sim

Commpop data replicating GeoDa tutorial (Case 1)
>>> import libpysal
>>> import geopandas as gpd
>>> commpop = gpd.read_file("https://github.com/jeffcsauer/GSOC2020/raw/master/validation/data/commpop.gpkg")
>>> w = libpysal.weights.Queen.from_dataframe(commpop)
>>> LJC_BV_Case1 = Local_Join_Count_BV(connectivity=w).fit(commpop['popneg'], commpop['popplus'], case='BJC')
>>> LJC_BV_Case1.LJC
>>> LJC_BV_Case1.p_sim

Guerry data replicating GeoDa tutorial (Case 2)
>>> import libpysal
>>> import geopandas as gpd
>>> guerry = libpysal.examples.load_example('Guerry')
>>> guerry_ds = gpd.read_file(guerry.get_path('Guerry.shp'))
>>> guerry_ds['infq5'] = 0
>>> guerry_ds['donq5'] = 0
>>> guerry_ds.loc[(guerry_ds['Infants'] > 23574), 'infq5'] = 1
>>> guerry_ds.loc[(guerry_ds['Donatns'] > 10973), 'donq5'] = 1
>>> w = libpysal.weights.Queen.from_dataframe(guerry_ds)
>>> LJC_BV_Case2 = Local_Join_Count_BV(connectivity=w).fit(guerry_ds['infq5'], guerry_ds['donq5'], case='CLC')
>>> LJC_BV_Case2.LJC
>>> LJC_BV_Case2.p_sim
"""
# Need to ensure that the np.array() are of
# dtype='float' for numba
x = np.array(x, dtype='float')
y = np.array(y, dtype='float')

w = self.connectivity
# Fill the diagonal with 0s
w = weights.util.fill_diagonal(w, val=0)
w.transform = 'b'

self.x = x
self.y = y
self.n = len(x)
self.w = w
self.case = case

keep_simulations = self.keep_simulations
n_jobs = self.n_jobs
seed = self.seed

permutations = self.permutations

self.LJC = self._statistic(x, y, w, case=case)

if permutations:
if case == "BJC":
self.p_sim, self.rjoins = _crand_plus(
z=np.column_stack((x, y)),
w=self.w,
observed=self.LJC,
permutations=permutations,
keep=True,
n_jobs=n_jobs,
stat_func=_ljc_bv_case1
)
# Set p-values for those with LJC of 0 to NaN
self.p_sim[self.LJC == 0] = 'NaN'
elif case == "CLC":
self.p_sim, self.rjoins = _crand_plus(
z=np.column_stack((x, y)),
w=self.w,
observed=self.LJC,
permutations=permutations,
keep=True,
n_jobs=n_jobs,
stat_func=_ljc_bv_case2
)
# Set p-values for those with LJC of 0 to NaN
self.p_sim[self.LJC == 0] = 'NaN'
else:
raise NotImplementedError(f'The requested LJC method ({case}) \
is not currently supported!')

del (self.n, self.keep_simulations, self.n_jobs,
self.permutations, self.seed, self.w, self.x,
self.y, self.connectivity, self.rjoins)

return self

@staticmethod
def _statistic(x, y, w, case):
# Create adjacency list. Note that remove_symmetric=False - this is
# different from the esda.Join_Counts() function.
adj_list = w.to_adjlist(remove_symmetric=False)

# First, set up a series that maps the values
# to the weights table
zseries_x = pd.Series(x, index=w.id_order)
zseries_y = pd.Series(y, index=w.id_order)

# Map the values to the focal (i) values
focal_x = zseries_x.loc[adj_list.focal].values
focal_y = zseries_y.loc[adj_list.focal].values

# Map the values to the neighbor (j) values
neighbor_x = zseries_x.loc[adj_list.neighbor].values
neighbor_y = zseries_y.loc[adj_list.neighbor].values

if case == "BJC":
BJC = (focal_x == 1) & (focal_y == 0) & \
(neighbor_x == 0) & (neighbor_y == 1)
adj_list_BJC = pd.DataFrame(adj_list.focal.values,
BJC.astype('uint8')).reset_index()
adj_list_BJC.columns = ['BJC', 'ID']
adj_list_BJC = adj_list_BJC.groupby(by='ID').sum()
return (np.array(adj_list_BJC.BJC.values, dtype='float'))
elif case == "CLC":
CLC = (focal_x == 1) & (focal_y == 1) & \
(neighbor_x == 1) & (neighbor_y == 1)
adj_list_CLC = pd.DataFrame(adj_list.focal.values,
CLC.astype('uint8')).reset_index()
adj_list_CLC.columns = ['CLC', 'ID']
adj_list_CLC = adj_list_CLC.groupby(by='ID').sum()
return (np.array(adj_list_CLC.CLC.values, dtype='float'))
else:
raise NotImplementedError(f'The requested LJC method ({case}) \
is not currently supported!')

# --------------------------------------------------------------
# Conditional Randomization Function Implementations
# --------------------------------------------------------------

# Note: scaling not used

@_njit(fastmath=True)
def _ljc_bv_case1(i, z, permuted_ids, weights_i, scaling):
zx = z[:, 0]
zy = z[:, 1]
zyi, zyrand = _prepare_univariate(i, zy, permuted_ids, weights_i)
return zx[i] * (zyrand @ weights_i)

@_njit(fastmath=True)
def _ljc_bv_case2(i, z, permuted_ids, weights_i, scaling):
zx = z[:, 0]
zy = z[:, 1]
zxi, zxrand, zyi, zyrand = _prepare_bivariate(i, z, permuted_ids, weights_i)
zf = zxrand * zyrand
return zy[i] * (zf @ weights_i)
Loading