sccn
diff --git a/‎src/eegprep/eeg_eegrej.py‎
Lines changed: 201 additions & 3 deletions b/‎src/eegprep/eeg_eegrej.py‎
Lines changed: 201 additions & 3 deletions
diff --git a/‎src/eegprep/pop_saveset.py‎
Lines changed: 73 additions & 17 deletions b/‎src/eegprep/pop_saveset.py‎
Lines changed: 73 additions & 17 deletions
@@ -1,6 +1,204 @@
 import numpy as np
 from copy import deepcopy
-from eegprep.eegrej import eegrej  # expects: eegrej(data, regions, xdur, events) -> (data_out, xmax_rel, event2, boundevents)
+import numpy as np
+from typing import List, Dict, Optional, Tuple
+
+def _is_boundary_event(event: Dict) -> bool:
+    t = event.get("type")
+    if isinstance(t, str):
+        return t.lower() == "boundary"
+    if isinstance(t, (int, float)):
+        try:
+            return int(t) == -99
+        except Exception:
+            return False
+    return False
+
+def _eegrej(indata, regions, timelength, events: Optional[List[Dict]] = None) -> Tuple[np.ndarray, float, List[Dict], np.ndarray]:
+    """
+    Remove [beg end] sample ranges (1-based, inclusive) from continuous data
+    and update events (list of dictionaries) in the MATLAB EEGLAB style.
+
+    Inputs
+      - indata: 2D array shaped (channels, frames)
+      - regions: array-like with shape (n_regions, 2), 1-based [beg end] per row
+      - timelength: total duration of the original data in seconds
+      - events: list of dicts with at least key 'latency'; optional keys include
+                'type' and 'duration'. If None or empty, boundary events will
+                still be inserted based on regions.
+
+    Returns
+      - outdata: data with columns removed
+      - newt: new total time in seconds
+      - events_out: updated events list of dictionaries (with inserted boundaries)
+      - boundevents: boundary latencies (float, 1-based, with +0.5 convention)
+    """
+    x = np.asarray(indata)
+    if x.ndim != 2:
+        raise ValueError("indata must be 2D (channels, frames)")
+    n = x.shape[1]
+
+    r = np.asarray(regions, dtype=float)
+    if r.size == 0:
+        # nothing to remove; still ensure events sorted and valid
+        events_out = [] if events is None else [dict(ev) for ev in events]
+        # Sort events by latency if present
+        if events_out and all("latency" in ev for ev in events_out):
+            events_out.sort(key=lambda ev: ev.get("latency", float("inf")))
+        boundevents = np.array([], dtype=float)
+        return x, float(timelength), events_out, boundevents
+
+    if r.ndim != 2 or r.shape[1] != 2:
+        raise ValueError("regions must be of shape (n_regions, 2)")
+
+    # Round, clamp to [1, n], sort each row then sort rows (EEGLAB parity)
+    r = np.rint(r).astype(int)
+    r[:, 0] = np.clip(r[:, 0], 1, n)
+    r[:, 1] = np.clip(r[:, 1], 1, n)
+    r.sort(axis=1)
+    r = r[np.lexsort((r[:, 1], r[:, 0]))]
+
+    # Enforce non-overlap by shifting starts forward (like MATLAB)
+    for i in range(1, r.shape[0]):
+        if r[i - 1, 1] >= r[i, 0]:
+            r[i, 0] = r[i - 1, 1] + 1
+    # Drop empty or inverted regions after adjustment
+    r = r[r[:, 0] <= r[:, 1]]
+    if r.size == 0:
+        events_out = [] if events is None else [dict(ev) for ev in events]
+        if events_out and all("latency" in ev for ev in events_out):
+            events_out.sort(key=lambda ev: ev.get("latency", float("inf")))
+        boundevents = np.array([], dtype=float)
+        return x, float(timelength), events_out, boundevents
+
+    # Build reject mask (convert 1-based to 0-based slices)
+    # MATLAB: reject(beg:end) = 1  (includes both beg and end, 1-based)
+    # Python: reject[beg-1:end] = True  (includes beg-1 to end-1, since end is exclusive in Python slicing)
+    # To match MATLAB's inclusive end, we need reject[beg-1:end] where end is inclusive
+    reject = np.zeros(n, dtype=bool)
+    for beg, end in r:
+        reject[beg - 1:end] = True  # This matches MATLAB reject(beg:end) when end is already the inclusive end
+
+    # Prepare events
+    ori_events: List[Dict] = [] if events is None else [dict(ev) for ev in events]
+    events_out: List[Dict] = [dict(ev) for ev in ori_events]
+
+    # Recompute event latencies (if events have 'latency') and remove events strictly inside regions
+    if events_out and all("latency" in ev for ev in events_out):
+        ori_lat = np.array([float(ev.get("latency", float("nan"))) for ev in events_out], dtype=float)
+        lat = ori_lat.copy()
+        rejected_per_region: List[List[int]] = []
+        for beg, end in r:
+            # indices strictly inside (beg, end)
+            rej_idx = np.where((ori_lat > beg) & (ori_lat < end))[0].tolist()
+            rejected_per_region.append(rej_idx)
+            # subtract span from latencies whose original latency is strictly after region start
+            span = int(end - beg + 1)
+            lat[ori_lat > beg] -= span
+
+        # Apply updated latencies
+        for i, ev in enumerate(events_out):
+            ev["latency"] = float(lat[i])
+
+        # Remove events inside rejected regions
+        rm_idx = sorted(set(idx for group in rejected_per_region for idx in group))
+        if rm_idx:
+            keep_mask = np.ones(len(events_out), dtype=bool)
+            keep_mask[rm_idx] = False
+            events_out = [ev for j, ev in enumerate(events_out) if keep_mask[j]]
+
+    # Boundary latencies: start-1, then subtract cumulative prior durations, then +0.5
+    base_durations = (r[:, 1] - r[:, 0] + 1).astype(int)
+
+    # If we have original events and they include type/duration, add nested boundary durations
+    durations = base_durations.astype(float).copy()
+    if ori_events and all("latency" in ev for ev in ori_events):
+        ori_lat = np.array([float(ev.get("latency", float("nan"))) for ev in ori_events], dtype=float)
+        for i_region, (beg, end) in enumerate(r):
+            inside_mask = (ori_lat > beg) & (ori_lat < end)
+            selected_events = [ori_events[i] for i, m in enumerate(inside_mask) if m]
+            extra = 0.0
+            for ev in selected_events:
+                if _is_boundary_event(ev):
+                    extra += float(ev.get("duration", 0.0) or 0.0)
+            durations[i_region] += extra
+
+    # Compute boundevents considering prior removals
+    boundevents = r[:, 0].astype(float) - 1.0
+    if len(durations) > 1:
+        cums = np.concatenate([[0.0], np.cumsum(durations[:-1])])
+        boundevents = boundevents - cums
+    boundevents = boundevents + 0.5
+    boundevents = boundevents[boundevents >= 0]
+
+    # Excise samples
+    newx = x[:, ~reject]
+    newn = int(newx.shape[1])
+
+    # Update total time proportionally
+    newt = float(timelength) * (newn / float(n))
+
+    # Remove boundary events that would fall exactly after the last sample + 0.5
+    boundevents = boundevents[boundevents < (newn + 1)]
+
+    # Merge duplicate boundary latencies and sum durations for duplicates
+    if boundevents.size:
+        rounded = np.round(boundevents, 12)
+        merged_be: List[float] = []
+        merged_du: List[float] = []
+        for i, be in enumerate(rounded):
+            if not merged_be:
+                merged_be.append(be)
+                merged_du.append(float(durations[i]))
+            else:
+                if np.isclose(be, merged_be[-1]):
+                    merged_du[-1] += float(durations[i])
+                else:
+                    merged_be.append(be)
+                    merged_du.append(float(durations[i]))
+        boundevents = np.asarray(merged_be, dtype=float)
+        durations = np.asarray(merged_du, dtype=float)
+    else:
+        durations = np.asarray([], dtype=float)
+
+    # Insert boundary events into events list only if input events were provided
+    if ori_events:
+        bound_type = "boundary"
+        for i in range(len(boundevents)):
+            be = float(boundevents[i])
+            if be > 0 and be < (newn + 1):
+                events_out.append({
+                    "type": bound_type,
+                    "latency": be,
+                    "duration": float(durations[i] if i < len(durations) else (base_durations[i] if i < len(base_durations) else 0.0)),
+                })
+
+    # Remove events with latency out of bound (> newn+1)
+    filtered: List[Dict] = []
+    for ev in events_out:
+        latv = float(ev.get("latency", float("inf")))
+        if latv <= (newn + 1):
+            filtered.append(ev)
+    events_out = filtered
+
+    # Sort by latency
+    events_out.sort(key=lambda ev: ev.get("latency", float("inf")))
+
+    # Handle contiguous boundary events with same latency: merge durations
+    if events_out:
+        merged_events: List[Dict] = []
+        for ev in events_out:
+            if merged_events and _is_boundary_event(ev) and _is_boundary_event(merged_events[-1]) \
+               and np.isclose(float(ev.get("latency", 0.0)), float(merged_events[-1].get("latency", 0.0))):
+                prev_dur = float(merged_events[-1].get("duration", 0.0) or 0.0)
+                cur_dur = float(ev.get("duration", 0.0) or 0.0)
+                merged_events[-1]["duration"] = prev_dur + cur_dur
+            else:
+                merged_events.append(ev)
+        events_out = merged_events
+
+    return newx, newt, events_out, boundevents
+
 
 def eeg_eegrej(EEG, regions):
     EEG = deepcopy(EEG)
@@ -26,9 +224,9 @@ def eeg_eegrej(EEG, regions):
     # Use original events; backend will handle pruning, shifting, and boundary insertion
     events = list(EEG.get("event", []))
 
-    # call eegrej backend
+    # call _eegrej backend
     xdur = float(EEG["xmax"] - EEG["xmin"])
-    data_out, xmax_rel, event2, boundevents = eegrej(EEG["data"], regions, xdur, events)
+    data_out, xmax_rel, event2, boundevents = _eegrej(EEG["data"], regions, xdur, events)
 
     # finalize core fields
     old_pnts = int(EEG["pnts"])
 
@@ -157,29 +157,74 @@ def pop_saveset(EEG, file_name):
         'icawinv'         : EEG['icawinv'],
         'icasphere'       : EEG['icasphere'],
         'icaweights'      : EEG['icaweights'],
-        'icachansind'     : _as_array_or_empty(EEG['icachansind']),
+        'icachansind'     : EEG['icachansind'] if EEG['icachansind'] is not None else {},
         'chanlocs'        : EEG['chanlocs'],
         'urchanlocs'      : EEG['urchanlocs'],
         'chaninfo'        : EEG['chaninfo'],
         'ref'             : EEG['ref'],
-        'event'           : _as_array_or_empty(EEG['event']),
-        'urevent'         : _as_array_or_empty(EEG['urevent']),
-        'eventdescription': _as_array_or_empty(EEG['eventdescription']),
-        'epoch'           : _as_array_or_empty(EEG['epoch']),
-        'epochdescription': _as_array_or_empty(EEG['epochdescription']),
-        'reject'          : _as_array_or_empty(EEG['reject']),
-        'stats'           : _as_array_or_empty(EEG['stats']),
-        'specdata'        : _as_array_or_empty(EEG['specdata']),
-        'specicaact'      : _as_array_or_empty(EEG['specicaact']),
-        'splinefile'      : _as_array_or_empty(EEG['splinefile']),
-        'icasplinefile'   : _as_array_or_empty(EEG['icasplinefile']),
-        'dipfit'          : _as_array_or_empty(EEG['dipfit']),
+        'event'           : EEG['event'] if EEG['event'] is not None else {},
+        'urevent'         : EEG['urevent'] if EEG['urevent'] is not None else {},
+        'eventdescription': EEG['eventdescription'] if EEG['eventdescription'] is not None else {},
+        'epoch'           : EEG['epoch'] if EEG['epoch'] is not None else {},
+        'epochdescription': EEG['epochdescription'] if EEG['epochdescription'] is not None else {},
+        'reject'          : EEG['reject'] if EEG['reject'] is not None else {},
+        'stats'           : EEG['stats'] if EEG['stats'] is not None else {},
+        'specdata'        : EEG['specdata'] if EEG['specdata'] is not None else {},
+        'specicaact'      : EEG['specicaact'] if EEG['specicaact'] is not None else {},
+        'splinefile'      : EEG['splinefile'] if EEG['splinefile'] is not None else {},
+        'icasplinefile'   : EEG['icasplinefile'] if EEG['icasplinefile'] is not None else {},
+        'dipfit'          : EEG['dipfit'] if EEG['dipfit'] is not None else {},
         'history'         : EEG['history'],
         'saved'           : EEG['saved'],
         'etc'             : EEG['etc'],
-        'run'             : _as_array_or_empty(EEG['run']),
-        'roi'             : _as_array_or_empty(EEG['roi']),
-    }
+        'run'             : EEG['run'] if EEG['run'] is not None else {},
+        'roi'             : EEG['roi'] if EEG['roi'] is not None else {}
+        }
+    
+    # eeglab_dict = {
+    #     'setname'         : '',
+    #     'filename'        : '',
+    #     'filepath'        : '',
+    #     'subject'         : '',
+    #     'group'           : '',
+    #     'condition'       : '',
+    #     'session'         : np.array([]),
+    #     'comments'        : '',
+    #     'nbchan'          : float(EEG['nbchan']),
+    #     'trials'          : float(EEG['trials']),
+    #     'pnts'            : float(EEG['pnts']),
+    #     'srate'           : float(EEG['srate']),
+    #     'xmin'            : float(EEG['xmin']),
+    #     'xmax'            : float(EEG['xmax']),
+    #     'times'           : EEG['times'],
+    #     'data'            : EEG['data'],
+    #     'icaact'          : EEG['icaact'],
+    #     'icawinv'         : EEG['icawinv'],
+    #     'icasphere'       : EEG['icasphere'],
+    #     'icaweights'      : EEG['icaweights'],
+    #     'icachansind'     : _as_array_or_empty(EEG['icachansind']),
+    #     'chanlocs'        : EEG['chanlocs'],
+    #     'urchanlocs'      : EEG['urchanlocs'],
+    #     'chaninfo'        : EEG['chaninfo'],
+    #     'ref'             : EEG['ref'],
+    #     'event'           : _as_array_or_empty(EEG['event']),
+    #     'urevent'         : _as_array_or_empty(EEG['urevent']),
+    #     'eventdescription': _as_array_or_empty(EEG['eventdescription']),
+    #     'epoch'           : _as_array_or_empty(EEG['epoch']),
+    #     'epochdescription': _as_array_or_empty(EEG['epochdescription']),
+    #     'reject'          : _as_array_or_empty(EEG['reject']),
+    #     'stats'           : _as_array_or_empty(EEG['stats']),
+    #     'specdata'        : _as_array_or_empty(EEG['specdata']),
+    #     'specicaact'      : _as_array_or_empty(EEG['specicaact']),
+    #     'splinefile'      : _as_array_or_empty(EEG['splinefile']),
+    #     'icasplinefile'   : _as_array_or_empty(EEG['icasplinefile']),
+    #     'dipfit'          : _as_array_or_empty(EEG['dipfit']),
+    #     'history'         : EEG['history'],
+    #     'saved'           : EEG['saved'],
+    #     'etc'             : EEG['etc'],
+    #     'run'             : _as_array_or_empty(EEG['run']),
+    #     'roi'             : _as_array_or_empty(EEG['roi']),
+    # }
 
      # add 1 to EEG['icachansind'] to make it 1-based
     if ('icachansind' in eeglab_dict and 
@@ -242,8 +287,19 @@ def pop_saveset(EEG, file_name):
         eeglab_dict['event'] = np.array(eeglab_dict['event'])
 
     for key in eeglab_dict:
-        if isinstance(eeglab_dict[key], np.ndarray) and not(is_effectively_empty(eeglab_dict[key])) and len(eeglab_dict[key]) > 0 and isinstance(eeglab_dict[key][0], dict):
+        if isinstance(eeglab_dict[key], np.ndarray) and len(eeglab_dict[key]) > 0 and isinstance(eeglab_dict[key][0], dict):
             eeglab_dict[key] = flatten_dict(eeglab_dict[key])    
+    # for key in eeglab_dict:
+    #     arr = eeglab_dict[key]
+    #     if isinstance(arr, np.ndarray) and not is_effectively_empty(arr):
+    #         if not arr.ndim == 0:
+    #             if arr.shape != () and arr.shape[0] > 0 and isinstance(arr[0], dict):
+    #                 eeglab_dict[key] = flatten_dict(arr)
+    #         else:
+    #             elem = arr.item()
+    #             if isinstance(elem, dict):
+    #                 eeglab_dict[key] = flatten_dict([elem])  # wrap single dict
+                
     # # Step 4: Save the EEGLAB dataset as a .mat file
     scipy.io.savemat(file_name, eeglab_dict, appendmat=False)