pysal · sjsrey · Sep 17, 2025 · Feb 14, 2024 · Feb 14, 2024 · Feb 15, 2024
diff --git a/esda/crand.py b/esda/crand.py
@@ -4,25 +4,13 @@
 
 import os
 import warnings
-
+from .significance import _permutation_significance
 import numpy as np
 
 try:
-    from numba import boolean, jit, njit, prange
+    from numba import boolean, njit, prange
 except (ImportError, ModuleNotFoundError):
-
-    def jit(*dec_args, **dec_kwargs):  # noqa: ARG001
-        """
-        decorator mimicking numba.jit
-        """
-
-        def intercepted_function(f, *f_args, **f_kwargs):  # noqa: ARG001
-            return f
-
-        return intercepted_function
-
-    njit = jit
-
+    from libpysal.common import jit as njit
     prange = range
     boolean = bool
 
@@ -35,7 +23,9 @@ def intercepted_function(f, *f_args, **f_kwargs):  # noqa: ARG001
 
 
 @njit(fastmath=True)
-def vec_permutations(max_card: int, n: int, k_replications: int, seed: int):
+def vec_permutations(
+    max_card: int, n: int, k_replications: int, seed: int
+    ):
     """
     Generate `max_card` permuted IDs, sampled from `n` without replacement,
     `k_replications` times
@@ -75,6 +65,7 @@ def crand(
     scaling=None,
     seed=None,
     island_weight=0,
+    alternative=None
 ):
     """
     Conduct conditional randomization of a given input using the provided
@@ -147,6 +138,24 @@ def crand(
             f"conditional randomization. Recieved `z` of shape {z.shape}"
         )
 
+    if alternative is None:
+        warnings.warn(
+            "The alternative hypothesis for conditional randomization"
+            " is changing in the next major release of esda. We recommend"
+            " setting alternative='two-sided', which will generally"
+            " double the p-value returned." 
+            " To retain the current behavior, set alternative='directed'."
+            " We strongly recommend moving to alternative='two-sided'.",
+            DeprecationWarning,
+        )
+        # TODO: replace this with 'two-sided' by next major release
+        alternative = 'directed'
+    if alternative not in ("two-sided", "greater", "lesser", "directed", "folded"):
+        raise ValueError(
+            f"alternative='{alternative}' provided, but is not"
+            f" one of the supported options: 'two-sided', 'greater', 'lesser', 'directed', 'folded')"
+        )
+
     # paralellise over permutations?
     if seed is None:
         seed = np.random.randint(12345, 12345000)
@@ -163,7 +172,7 @@ def crand(
         adj_matrix.setdiag(0)
         adj_matrix.eliminate_zeros()
     # extract the weights from a now no-self-weighted adj_matrix
-    other_weights = adj_matrix.data
+    other_weights = adj_matrix.data.astype(z.dtype) # cast is forced by @ in numba
     # use the non-self weight as the cardinality, since
     # this is the set we have to randomize.
     # if there is a self-neighbor, we need to *not* shuffle the
@@ -185,7 +194,7 @@ def crand(
             n_jobs = 1
 
     if n_jobs == 1:
-        larger, rlocals = compute_chunk(
+        p_sims, rlocals = compute_chunk(
             0,  # chunk start
             z,  # chunked z, for serial this is the entire data
             z,  # all z, for serial this is also the entire data
@@ -198,14 +207,15 @@ def crand(
             keep,  # whether or not to keep the local statistics
             stat_func,
             island_weight,
+            alternative=alternative
         )
     else:
         if n_jobs == -1:
             n_jobs = os.cpu_count()
         if n_jobs > len(z):
             n_jobs = len(z)
         # Parallel implementation
-        larger, rlocals = parallel_crand(
+        p_sims, rlocals = parallel_crand(
             z,
             observed,
             cardinalities,
@@ -217,13 +227,10 @@ def crand(
             keep,
             stat_func,
             island_weight,
+            alternative=alternative
         )
 
-    low_extreme = (permutations - larger) < larger
-    larger[low_extreme] = permutations - larger[low_extreme]
-    p_sim = (larger + 1.0) / (permutations + 1.0)
-
-    return p_sim, rlocals
+    return p_sims, rlocals
 
 
 @njit(parallel=False, fastmath=True)
@@ -240,6 +247,7 @@ def compute_chunk(
     keep: bool,
     stat_func,
     island_weight: float,
+    alternative: str 
 ):
     """
     Compute conditional randomisation for a single chunk
@@ -302,11 +310,13 @@ def compute_chunk(
         the null of spatial randomness
     """
     chunk_n = z_chunk.shape[0]
-    n = z.shape[0]
-    larger = np.zeros((chunk_n,), dtype=np.int64)
+    n_samples = z.shape[0]
+    p_permutations, k_max_card = permuted_ids.shape
+    p_sims = np.zeros((chunk_n,), dtype=np.float32)
     rlocals = np.empty((chunk_n, permuted_ids.shape[0])) if keep else np.empty((1, 1))
 
-    mask = np.ones((n,), dtype=np.int8) == 1
+
+    mask = np.ones((n_samples,), dtype=np.int8) == 1
     wloc = 0
 
     for i in range(chunk_n):
@@ -323,10 +333,13 @@ def compute_chunk(
         wloc += cardinality
         mask[chunk_start + i] = False
         rstats = stat_func(chunk_start + i, z, permuted_ids, weights_i, scaling)
+        p_sims[i] = _permutation_significance(
+            observed[i], rstats, alternative=alternative
+        ).item()
         if keep:
             rlocals[i] = rstats
-        larger[i] = np.sum(rstats >= observed[i])
-    return larger, rlocals
+
+    return p_sims, rlocals
 
 
 #######################################################################
@@ -449,6 +462,7 @@ def parallel_crand(
     keep: bool,
     stat_func,
     island_weight,
+    alternative: str = 'directed'
 ):
     """
     Conduct conditional randomization in parallel using numba
@@ -516,8 +530,8 @@ def parallel_crand(
     starts = np.arange(n_jobs + 1) * chunk_size
     # ------------------------------------------------------------------
     # Set up output holders
-    larger = np.zeros((n,), dtype=np.int64)
     rlocals = np.empty((n, permuted_ids.shape[0])) if keep else np.empty((1, 1))
+
     # ------------------------------------------------------------------
     # Joblib parallel loop by chunks
 
@@ -536,14 +550,15 @@ def parallel_crand(
     with parallel_backend("loky", inner_max_num_threads=1):
         worker_out = Parallel(n_jobs=n_jobs)(
             delayed(compute_chunk)(
-                *pars, permuted_ids, scaling, keep, stat_func, island_weight
+                *pars, permuted_ids, scaling, keep, stat_func, island_weight, alternative
             )
             for pars in chunks
         )
-    larger, rlocals = zip(*worker_out, strict=True)
-    larger = np.hstack(larger).squeeze()
+
+    p_sims, rlocals = zip(*worker_out)
+    p_sims = np.hstack(p_sims).squeeze()
     rlocals = np.row_stack(rlocals).squeeze()
-    return larger, rlocals
+    return p_sims, rlocals
 
 
 #######################################################################

diff --git a/esda/moran.py b/esda/moran.py
@@ -217,7 +217,8 @@ def __init__(
             self.EI_sim = sim.sum() / permutations
             self.seI_sim = np.array(sim).std()
             self.VI_sim = self.seI_sim**2
-            self.z_sim = (self.I - self.EI_sim) / self.seI_sim
+            with np.errstate(divide='ignore'):
+                self.z_sim = (self.I - self.EI_sim) / self.seI_sim
             if self.z_sim > 0:
                 self.p_z_sim = stats.norm.sf(self.z_sim)
             else:
@@ -539,7 +540,8 @@ def __init__(self, x, y, w, transformation="r", permutations=PERMUTATIONS):
             self.EI_sim = sim.sum() / permutations
             self.seI_sim = np.array(sim).std()
             self.VI_sim = self.seI_sim**2
-            self.z_sim = (self.I - self.EI_sim) / self.seI_sim
+            with np.errstate(divide='ignore'):
+                self.z_sim = (self.I - self.EI_sim) / self.seI_sim
             if self.z_sim > 0:
                 self.p_z_sim = stats.norm.sf(self.z_sim)
             else:
@@ -1348,7 +1350,8 @@ def __init__(
                 self.EI_sim = self.sim.mean(axis=0)
                 self.seI_sim = self.sim.std(axis=0)
                 self.VI_sim = self.seI_sim * self.seI_sim
-                self.z_sim = (self.Is - self.EI_sim) / self.seI_sim
+                with np.errstate(divide='ignore'):
+                    self.z_sim = (self.Is - self.EI_sim) / self.seI_sim
                 self.p_z_sim = stats.norm.sf(np.abs(self.z_sim))
             else:
                 self.sim = self.rlisas = None
@@ -1825,7 +1828,8 @@ def __init__(
                 self.EI_sim = sim.mean(axis=0)
                 self.seI_sim = sim.std(axis=0)
                 self.VI_sim = self.seI_sim * self.seI_sim
-                self.z_sim = (self.Is - self.EI_sim) / self.seI_sim
+                with np.errstate(divide='ignore'):
+                    self.z_sim = (self.Is - self.EI_sim) / self.seI_sim
                 self.p_z_sim = stats.norm.sf(np.abs(self.z_sim))
 
     def __calc(self):

diff --git a/esda/significance.py b/esda/significance.py
@@ -0,0 +1,109 @@
+import numpy as np
+import warnings
+
+try:
+    from numba import njit
+except (ImportError, ModuleNotFoundError):
+    from libpysal.common import jit as njit
+
+
+def calculate_significance(test_stat, reference_distribution, alternative="two-sided"):
+    """
+    Calculate a pseudo p-value from a reference distribution.
+
+    Pseudo-p values are calculated using the formula (M + 1) / (R + 1). Where R is the number of simulations
+    and M is the number of times that the simulated value was equal to, or more extreme than the observed test statistic.
+
+    Parameters
+    ----------
+    test_stat: float or numpy.ndarray
+        The observed test statistic, or a vector of observed test statistics
+    reference_distribution: numpy.ndarray
+        A numpy array containing simulated test statistics as a result of conditional permutation.
+    alternative: string
+        One of 'two-sided', 'lesser', 'greater', 'folded', or 'directed'. Indicates the alternative hypothesis.
+        - 'two-sided': the observed test statistic is in either tail of the reference distribution. This is an un-directed alternative hypothesis.
+        - 'folded': the observed test statistic is an extreme value of the reference distribution folded about its mean. This is an un-directed alternative hypothesis.
+        - 'lesser': the observed test statistic is small relative to the reference distribution. This is a directed alternative hypothesis.
+        - 'greater': the observed test statistic is large relative to the reference distribution. This is a directed alternative hypothesis. 
+        - 'directed': the observed test statistic is in either tail of the reference distribution, but the tail is selected depending on the test statistic. This is a directed alternative hypothesis, but the direction is chosen dependent on the data. This is not advised, and included solely to reproduce past results.
+
+    Notes
+    -----
+
+    the directed p-value is half of the two-sided p-value, and corresponds to running the
+    lesser and greater tests, then picking the smaller significance value. This is not advised, 
+    since the p-value will be uniformly too small. 
+    """
+    reference_distribution = np.atleast_2d(reference_distribution)
+    n_samples, p_permutations = reference_distribution.shape
+    test_stat = np.atleast_2d(test_stat).reshape(n_samples, -1)
+    if alternative not in (
+        'folded',
+        'two-sided',
+        'greater',
+        'lesser',
+        'directed'
+    ):
+        raise ValueError(
+            f"alternative='{alternative}' provided, but is not"
+            f" one of the supported options: 'two-sided', 'greater', 'lesser', 'directed', 'folded')"
+            )
+    return _permutation_significance(
+        test_stat, 
+        reference_distribution,
+        alternative=alternative
+        )
+
+@njit(parallel=False, fastmath=False)
+def _permutation_significance(test_stat, reference_distribution, alternative='two-sided'):
+    reference_distribution = np.atleast_2d(reference_distribution)
+    n_samples, p_permutations = reference_distribution.shape
+    if alternative == "directed":
+        larger = (reference_distribution >= test_stat).sum(axis=1)
+        low_extreme = (p_permutations - larger) < larger
+        larger[low_extreme] = p_permutations - larger[low_extreme]
+        p_value = (larger + 1.0) / (p_permutations + 1.0)
+    elif alternative == "lesser":
+        p_value = (np.sum(reference_distribution <= test_stat, axis=1) + 1) / (
+            p_permutations + 1
+        )
+    elif alternative == "greater":
+        p_value = (np.sum(reference_distribution >= test_stat, axis=1) + 1) / (
+            p_permutations + 1
+        )
+    elif alternative == "two-sided":
+        # find percentile p at which the test statistic sits
+        # find "synthetic" test statistic at 1-p
+        # count how many observations are outisde of (p, 1-p)
+        # including the test statistic and its synthetic pair
+        lows = np.empty(n_samples).astype(reference_distribution.dtype)
+        highs = np.empty(n_samples).astype(reference_distribution.dtype)
+        for i in range(n_samples):
+            percentile_i = (reference_distribution[i] <= test_stat).mean()*100
+            p_low = np.minimum(percentile_i, 100-percentile_i)
+            lows[i] = np.percentile(
+                reference_distribution[i], 
+                p_low
+            )
+            highs[i] = np.percentile(
+                reference_distribution[i], 
+                100 - p_low
+            )
+        n_outside = (reference_distribution <= lows[:,None]).sum(axis=1)
+        n_outside += (reference_distribution >= highs[:,None]).sum(axis=1)
+        p_value = (n_outside + 1) / (p_permutations + 1)
+    elif alternative == "folded":
+        means = np.empty((n_samples,1)).astype(reference_distribution.dtype)
+        for i in range(n_samples):
+            means[i] = reference_distribution[i].mean()
+        folded_test_stat = np.abs(test_stat - means)
+        folded_reference_distribution = np.abs(reference_distribution - means)
+        p_value = ((folded_reference_distribution >= folded_test_stat).sum(axis=1) + 1) / (
+            p_permutations + 1
+    	) 
+    else:
+        p_value = np.ones((n_samples, ))*np.nan
+    return p_value
+
+