1
1
import numpy as np
2
2
from copy import deepcopy
3
- from eegprep .eegrej import eegrej # expects: eegrej(data, regions, xdur, events) -> (data_out, xmax_rel, event2, boundevents)
3
+ import numpy as np
4
+ from typing import List , Dict , Optional , Tuple
5
+
6
+ def _is_boundary_event (event : Dict ) -> bool :
7
+ t = event .get ("type" )
8
+ if isinstance (t , str ):
9
+ return t .lower () == "boundary"
10
+ if isinstance (t , (int , float )):
11
+ try :
12
+ return int (t ) == - 99
13
+ except Exception :
14
+ return False
15
+ return False
16
+
17
+ def _eegrej (indata , regions , timelength , events : Optional [List [Dict ]] = None ) -> Tuple [np .ndarray , float , List [Dict ], np .ndarray ]:
18
+ """
19
+ Remove [beg end] sample ranges (1-based, inclusive) from continuous data
20
+ and update events (list of dictionaries) in the MATLAB EEGLAB style.
21
+
22
+ Inputs
23
+ - indata: 2D array shaped (channels, frames)
24
+ - regions: array-like with shape (n_regions, 2), 1-based [beg end] per row
25
+ - timelength: total duration of the original data in seconds
26
+ - events: list of dicts with at least key 'latency'; optional keys include
27
+ 'type' and 'duration'. If None or empty, boundary events will
28
+ still be inserted based on regions.
29
+
30
+ Returns
31
+ - outdata: data with columns removed
32
+ - newt: new total time in seconds
33
+ - events_out: updated events list of dictionaries (with inserted boundaries)
34
+ - boundevents: boundary latencies (float, 1-based, with +0.5 convention)
35
+ """
36
+ x = np .asarray (indata )
37
+ if x .ndim != 2 :
38
+ raise ValueError ("indata must be 2D (channels, frames)" )
39
+ n = x .shape [1 ]
40
+
41
+ r = np .asarray (regions , dtype = float )
42
+ if r .size == 0 :
43
+ # nothing to remove; still ensure events sorted and valid
44
+ events_out = [] if events is None else [dict (ev ) for ev in events ]
45
+ # Sort events by latency if present
46
+ if events_out and all ("latency" in ev for ev in events_out ):
47
+ events_out .sort (key = lambda ev : ev .get ("latency" , float ("inf" )))
48
+ boundevents = np .array ([], dtype = float )
49
+ return x , float (timelength ), events_out , boundevents
50
+
51
+ if r .ndim != 2 or r .shape [1 ] != 2 :
52
+ raise ValueError ("regions must be of shape (n_regions, 2)" )
53
+
54
+ # Round, clamp to [1, n], sort each row then sort rows (EEGLAB parity)
55
+ r = np .rint (r ).astype (int )
56
+ r [:, 0 ] = np .clip (r [:, 0 ], 1 , n )
57
+ r [:, 1 ] = np .clip (r [:, 1 ], 1 , n )
58
+ r .sort (axis = 1 )
59
+ r = r [np .lexsort ((r [:, 1 ], r [:, 0 ]))]
60
+
61
+ # Enforce non-overlap by shifting starts forward (like MATLAB)
62
+ for i in range (1 , r .shape [0 ]):
63
+ if r [i - 1 , 1 ] >= r [i , 0 ]:
64
+ r [i , 0 ] = r [i - 1 , 1 ] + 1
65
+ # Drop empty or inverted regions after adjustment
66
+ r = r [r [:, 0 ] <= r [:, 1 ]]
67
+ if r .size == 0 :
68
+ events_out = [] if events is None else [dict (ev ) for ev in events ]
69
+ if events_out and all ("latency" in ev for ev in events_out ):
70
+ events_out .sort (key = lambda ev : ev .get ("latency" , float ("inf" )))
71
+ boundevents = np .array ([], dtype = float )
72
+ return x , float (timelength ), events_out , boundevents
73
+
74
+ # Build reject mask (convert 1-based to 0-based slices)
75
+ # MATLAB: reject(beg:end) = 1 (includes both beg and end, 1-based)
76
+ # Python: reject[beg-1:end] = True (includes beg-1 to end-1, since end is exclusive in Python slicing)
77
+ # To match MATLAB's inclusive end, we need reject[beg-1:end] where end is inclusive
78
+ reject = np .zeros (n , dtype = bool )
79
+ for beg , end in r :
80
+ reject [beg - 1 :end ] = True # This matches MATLAB reject(beg:end) when end is already the inclusive end
81
+
82
+ # Prepare events
83
+ ori_events : List [Dict ] = [] if events is None else [dict (ev ) for ev in events ]
84
+ events_out : List [Dict ] = [dict (ev ) for ev in ori_events ]
85
+
86
+ # Recompute event latencies (if events have 'latency') and remove events strictly inside regions
87
+ if events_out and all ("latency" in ev for ev in events_out ):
88
+ ori_lat = np .array ([float (ev .get ("latency" , float ("nan" ))) for ev in events_out ], dtype = float )
89
+ lat = ori_lat .copy ()
90
+ rejected_per_region : List [List [int ]] = []
91
+ for beg , end in r :
92
+ # indices strictly inside (beg, end)
93
+ rej_idx = np .where ((ori_lat > beg ) & (ori_lat < end ))[0 ].tolist ()
94
+ rejected_per_region .append (rej_idx )
95
+ # subtract span from latencies whose original latency is strictly after region start
96
+ span = int (end - beg + 1 )
97
+ lat [ori_lat > beg ] -= span
98
+
99
+ # Apply updated latencies
100
+ for i , ev in enumerate (events_out ):
101
+ ev ["latency" ] = float (lat [i ])
102
+
103
+ # Remove events inside rejected regions
104
+ rm_idx = sorted (set (idx for group in rejected_per_region for idx in group ))
105
+ if rm_idx :
106
+ keep_mask = np .ones (len (events_out ), dtype = bool )
107
+ keep_mask [rm_idx ] = False
108
+ events_out = [ev for j , ev in enumerate (events_out ) if keep_mask [j ]]
109
+
110
+ # Boundary latencies: start-1, then subtract cumulative prior durations, then +0.5
111
+ base_durations = (r [:, 1 ] - r [:, 0 ] + 1 ).astype (int )
112
+
113
+ # If we have original events and they include type/duration, add nested boundary durations
114
+ durations = base_durations .astype (float ).copy ()
115
+ if ori_events and all ("latency" in ev for ev in ori_events ):
116
+ ori_lat = np .array ([float (ev .get ("latency" , float ("nan" ))) for ev in ori_events ], dtype = float )
117
+ for i_region , (beg , end ) in enumerate (r ):
118
+ inside_mask = (ori_lat > beg ) & (ori_lat < end )
119
+ selected_events = [ori_events [i ] for i , m in enumerate (inside_mask ) if m ]
120
+ extra = 0.0
121
+ for ev in selected_events :
122
+ if _is_boundary_event (ev ):
123
+ extra += float (ev .get ("duration" , 0.0 ) or 0.0 )
124
+ durations [i_region ] += extra
125
+
126
+ # Compute boundevents considering prior removals
127
+ boundevents = r [:, 0 ].astype (float ) - 1.0
128
+ if len (durations ) > 1 :
129
+ cums = np .concatenate ([[0.0 ], np .cumsum (durations [:- 1 ])])
130
+ boundevents = boundevents - cums
131
+ boundevents = boundevents + 0.5
132
+ boundevents = boundevents [boundevents >= 0 ]
133
+
134
+ # Excise samples
135
+ newx = x [:, ~ reject ]
136
+ newn = int (newx .shape [1 ])
137
+
138
+ # Update total time proportionally
139
+ newt = float (timelength ) * (newn / float (n ))
140
+
141
+ # Remove boundary events that would fall exactly after the last sample + 0.5
142
+ boundevents = boundevents [boundevents < (newn + 1 )]
143
+
144
+ # Merge duplicate boundary latencies and sum durations for duplicates
145
+ if boundevents .size :
146
+ rounded = np .round (boundevents , 12 )
147
+ merged_be : List [float ] = []
148
+ merged_du : List [float ] = []
149
+ for i , be in enumerate (rounded ):
150
+ if not merged_be :
151
+ merged_be .append (be )
152
+ merged_du .append (float (durations [i ]))
153
+ else :
154
+ if np .isclose (be , merged_be [- 1 ]):
155
+ merged_du [- 1 ] += float (durations [i ])
156
+ else :
157
+ merged_be .append (be )
158
+ merged_du .append (float (durations [i ]))
159
+ boundevents = np .asarray (merged_be , dtype = float )
160
+ durations = np .asarray (merged_du , dtype = float )
161
+ else :
162
+ durations = np .asarray ([], dtype = float )
163
+
164
+ # Insert boundary events into events list only if input events were provided
165
+ if ori_events :
166
+ bound_type = "boundary"
167
+ for i in range (len (boundevents )):
168
+ be = float (boundevents [i ])
169
+ if be > 0 and be < (newn + 1 ):
170
+ events_out .append ({
171
+ "type" : bound_type ,
172
+ "latency" : be ,
173
+ "duration" : float (durations [i ] if i < len (durations ) else (base_durations [i ] if i < len (base_durations ) else 0.0 )),
174
+ })
175
+
176
+ # Remove events with latency out of bound (> newn+1)
177
+ filtered : List [Dict ] = []
178
+ for ev in events_out :
179
+ latv = float (ev .get ("latency" , float ("inf" )))
180
+ if latv <= (newn + 1 ):
181
+ filtered .append (ev )
182
+ events_out = filtered
183
+
184
+ # Sort by latency
185
+ events_out .sort (key = lambda ev : ev .get ("latency" , float ("inf" )))
186
+
187
+ # Handle contiguous boundary events with same latency: merge durations
188
+ if events_out :
189
+ merged_events : List [Dict ] = []
190
+ for ev in events_out :
191
+ if merged_events and _is_boundary_event (ev ) and _is_boundary_event (merged_events [- 1 ]) \
192
+ and np .isclose (float (ev .get ("latency" , 0.0 )), float (merged_events [- 1 ].get ("latency" , 0.0 ))):
193
+ prev_dur = float (merged_events [- 1 ].get ("duration" , 0.0 ) or 0.0 )
194
+ cur_dur = float (ev .get ("duration" , 0.0 ) or 0.0 )
195
+ merged_events [- 1 ]["duration" ] = prev_dur + cur_dur
196
+ else :
197
+ merged_events .append (ev )
198
+ events_out = merged_events
199
+
200
+ return newx , newt , events_out , boundevents
201
+
4
202
5
203
def eeg_eegrej (EEG , regions ):
6
204
EEG = deepcopy (EEG )
@@ -26,9 +224,9 @@ def eeg_eegrej(EEG, regions):
26
224
# Use original events; backend will handle pruning, shifting, and boundary insertion
27
225
events = list (EEG .get ("event" , []))
28
226
29
- # call eegrej backend
227
+ # call _eegrej backend
30
228
xdur = float (EEG ["xmax" ] - EEG ["xmin" ])
31
- data_out , xmax_rel , event2 , boundevents = eegrej (EEG ["data" ], regions , xdur , events )
229
+ data_out , xmax_rel , event2 , boundevents = _eegrej (EEG ["data" ], regions , xdur , events )
32
230
33
231
# finalize core fields
34
232
old_pnts = int (EEG ["pnts" ])
0 commit comments