@@ -188,35 +188,6 @@ def test_log_normal_distribution(self):
188188 # We should activate this when the distribution could be properly detected as log-normal
189189 # self.assertLess(tf.math.reduce_variance(outputs), tf.math.reduce_variance(inputs))
190190
191- def test_discrete_distribution (self ): #########
192- # Generate discrete data
193- np .random .seed (42 )
194- data = np .random .choice (5 , 1000 )
195- inputs = tf .convert_to_tensor (data , dtype = tf .float32 )
196-
197- # Process data
198- outputs = self .encoder (inputs )
199-
200- # Check output properties
201- self .assertEqual (outputs .shape , inputs .shape )
202- self .assertAllInRange (outputs , - 1 , 1 )
203-
204- # Verify distribution detection
205- dist_info = self .encoder ._estimate_distribution (inputs )
206- self .assertEqual (dist_info ["type" ], DistributionType .DISCRETE )
207-
208- # Check value mapping consistency
209- unique_inputs = tf .unique (inputs )[0 ]
210- unique_outputs = tf .unique (outputs )[0 ]
211- self .assertEqual (len (unique_inputs ), len (unique_outputs ))
212-
213- # Check ordering preservation
214- self .assertTrue (
215- tf .reduce_all (
216- tf .equal (tf .argsort (unique_inputs ), tf .argsort (unique_outputs ))
217- )
218- )
219-
220191 def test_beta_distribution (self ):
221192 # Generate beta distribution data
222193 np .random .seed (42 )
@@ -287,101 +258,86 @@ def test_cauchy_distribution(self):
287258 # self.assertLess(tf.abs(tf.reduce_mean(outputs)), 1.0)
288259 # self.assertLess(tf.math.reduce_variance(outputs), tf.math.reduce_variance(inputs))
289260
290- # def test_poisson_distribution(self): #########
291- # # Generate Poisson distribution data
292- # np.random.seed(42)
293- # data = np.random.poisson(5, 1000)
294- # inputs = tf.convert_to_tensor(data, dtype=tf.float32)
295-
296- # # Process data
297- # outputs = self.encoder(inputs)
298-
299- # # Check output properties
300- # self.assertEqual(outputs.shape, inputs.shape)
301- # self.assertAllInRange(outputs, 0, 1)
302-
303- # # Verify distribution detection
304- # dist_info = self.encoder._estimate_distribution(inputs)
305- # self.assertEqual(dist_info["type"], DistributionType.POISSON)
306-
307- # def test_weibull_distribution(self):
308- # # Generate Weibull distribution data
309- # np.random.seed(42)
310- # data = np.random.weibull(1.5, 1000)
311- # inputs = tf.convert_to_tensor(data, dtype=tf.float32)
261+ def test_poisson_distribution (self ): #########
262+ # Generate Poisson distribution data
263+ np .random .seed (42 )
264+ data = np .random .poisson (5 , 100 )
265+ inputs = tf .convert_to_tensor (data , dtype = tf .float32 )
312266
313- # # Process data
314- # outputs = self.encoder (inputs)
267+ mean = tf . reduce_mean ( inputs )
268+ variance = tf . math . reduce_variance (inputs )
315269
316- # # Check output properties
317- # self.assertEqual(outputs.shape, inputs.shape)
318- # self.assertAllInRange(outputs, 0, 1)
270+ self .assertGreater (variance / mean , 0.8 )
271+ self .assertLess (variance / mean , 1.2 )
319272
320- # # Verify distribution detection
321- # dist_info = self.encoder._estimate_distribution(inputs)
322- # self.assertEqual(dist_info["type"], DistributionType.WEIBULL)
273+ # Process data
274+ outputs = self .encoder (inputs )
323275
324- # def test_zero_inflated_distribution(self):
325- # # Generate zero-inflated data
326- # np.random.seed(42)
327- # data = np.zeros(1000)
328- # non_zero_mask = np.random.random(1000) > 0.7
329- # data[non_zero_mask] = np.random.poisson(3, size=non_zero_mask.sum())
330- # inputs = tf.convert_to_tensor(data, dtype=tf.float32)
276+ # Check output properties
277+ self .assertEqual (outputs .shape , inputs .shape )
278+ self .assertAllInRange (outputs , - 1 , 1 )
331279
332- # # Process data
333- # outputs = self.encoder(inputs)
280+ # Verify distribution detection
281+ dist_info = self .encoder ._estimate_distribution (inputs )
282+ self .assertEqual (dist_info ["type" ], DistributionType .POISSON )
334283
335- # # Check output properties
336- # self.assertEqual(outputs.shape, inputs.shape)
337- # self.assertAllInRange(outputs, 0, 1)
284+ def test_exponential_distribution (self ):
285+ """Test that the encoder correctly identifies exponential distributions."""
286+ # Generate exponential data
287+ np .random .seed (42 )
288+ data = np .random .exponential (scale = 2.0 , size = 1000 )
289+ inputs = tf .convert_to_tensor (data , dtype = tf .float32 )
338290
339- # # Verify distribution detection
340- # dist_info = self.encoder._estimate_distribution(inputs)
341- # self.assertEqual(dist_info["type"], DistributionType.ZERO_INFLATED)
291+ # Calculate skewness manually to verify
292+ mean = tf .reduce_mean (inputs )
293+ variance = tf .math .reduce_variance (inputs )
294+ skewness = tf .reduce_mean (
295+ tf .pow ((inputs - mean ) / tf .sqrt (variance + self .encoder .epsilon ), 3 )
296+ )
342297
343- # # Check zero preservation
344- # zero_mask = tf.abs(inputs) < self.encoder.epsilon
345- # self.assertTrue(tf.reduce_all(tf.abs(outputs[zero_mask]) < self.encoder.epsilon))
298+ # Verify skewness is close to 2.0 (characteristic of exponential)
299+ self .assertLess (tf .abs (skewness - 2.0 ), 0.5 )
346300
347- # def test_bounded_distribution(self):
348- # # Generate bounded data
349- # np.random.seed(42)
350- # data = np.clip(np.random.normal(0, 1, 1000), -2, 2)
351- # inputs = tf.convert_to_tensor(data, dtype=tf.float32)
301+ # Process data
302+ outputs = self .encoder (inputs )
352303
353- # # Process data
354- # outputs = self.encoder(inputs)
304+ # Check output properties
305+ self .assertEqual (outputs .shape , inputs .shape )
306+ self .assertAllInRange (outputs , - 1 , 1 )
355307
356- # # Check output properties
357- # self.assertEqual(outputs.shape, inputs.shape )
358- # self.assertAllInRange(outputs, -1, 1 )
308+ # Verify distribution detection
309+ dist_info = self .encoder . _estimate_distribution ( inputs )
310+ self .assertEqual ( dist_info [ "type" ], DistributionType . EXPONENTIAL )
359311
360- # # Verify distribution detection
361- # dist_info = self.encoder._estimate_distribution(inputs)
362- # self.assertEqual(dist_info["type"], DistributionType.BOUNDED)
312+ # Additional exponential properties
313+ self .assertGreaterEqual (
314+ tf .reduce_min (inputs ), - self .encoder .epsilon
315+ ) # Non-negative
316+ self .assertNear (variance , tf .square (mean ), 0.5 ) # Variance ≈ mean²
363317
364- # def test_ordinal_distribution(self):
365- # # Generate ordinal data
366- # np.random.seed(42)
367- # data = np.random.choice([1, 2, 3, 4, 5], 1000, p=[0.1, 0.2, 0.4, 0.2, 0.1])
368- # inputs = tf.convert_to_tensor(data, dtype=tf.float32)
318+ def test_zero_inflated_distribution (self ):
319+ # Generate zero-inflated data
320+ np .random .seed (42 )
321+ data = np .random .random (100 ) # Generate 100 random numbers between 0 and 1
322+ zero_mask = np .random .random (100 ) < 0.4 # Create mask for 60% zeros
323+ data [zero_mask ] = 0 # Zero out 60% of values
324+ inputs = tf .convert_to_tensor (data , dtype = tf .float32 )
369325
370- # # Process data
371- # outputs = self.encoder(inputs)
326+ # Process data
327+ outputs = self .encoder (inputs )
372328
373- # # Check output properties
374- # self.assertEqual(outputs.shape, inputs.shape)
375- # self.assertAllInRange(outputs, 0, 1)
329+ # Check output properties
330+ self .assertEqual (outputs .shape , inputs .shape )
376331
377- # # Verify distribution detection
378- # dist_info = self.encoder._estimate_distribution(inputs)
379- # self.assertEqual(dist_info["type"], DistributionType.ORDINAL )
332+ # Verify distribution detection
333+ dist_info = self .encoder ._estimate_distribution (inputs )
334+ self .assertEqual (dist_info ["type" ], DistributionType .ZERO_INFLATED )
380335
381- # # Check ordering preservation
382- # unique_inputs = tf.unique(inputs)[0]
383- # unique_outputs = tf.unique(outputs)[0]
384- # self.assertTrue(tf.reduce_all(tf.equal(tf.argsort(unique_inputs), tf.argsort(unique_outputs))))
336+ # Check zero preservation
337+ zero_mask = tf .abs (inputs ) < self .encoder .epsilon
338+ self .assertTrue (
339+ tf .reduce_all (tf .abs (outputs [zero_mask ]) < self .encoder .epsilon )
340+ )
385341
386342 def test_config (self ):
387343 config = self .encoder .get_config ()
0 commit comments