Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5cb4e8e

Browse files
committed
test(KDP): add tests
1 parent 83f6996 commit 5cb4e8e

File tree

3 files changed

+690
-122
lines changed

3 files changed

+690
-122
lines changed

kdp/processor.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -699,8 +699,9 @@ def _add_pipeline_numeric(
699699
)
700700
elif _feature.feature_type == FeatureType.FLOAT_DISCRETIZED:
701701
logger.debug("Adding Float Discretized Feature")
702-
# output dimensions will be > 1
703-
_out_dims = len(_feature.kwargs.get("bin_boundaries", 1.0)) + 1
702+
# Use an empty list as the default value instead of 1.0.
703+
boundaries = _feature.kwargs.get("bin_boundaries", [])
704+
_out_dims = len(boundaries) + 1
704705
preprocessor.add_processing_step(
705706
layer_class="Discretization",
706707
**_feature.kwargs,
Lines changed: 220 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -1,127 +1,228 @@
11
import tensorflow as tf
22
import numpy as np
3-
from kdp.custom_layers import AdvancedNumericalEmbedding
4-
5-
6-
class TestAdvancedNumericalEmbedding:
7-
def test_multi_feature_input(self):
8-
"""Test with input having multiple features."""
9-
batch_size = 32
10-
num_features = 3
11-
embedding_dim = 8
12-
13-
# Create random multi-feature input.
14-
x_multi = tf.random.normal((batch_size, num_features))
15-
layer = AdvancedNumericalEmbedding(
16-
embedding_dim=embedding_dim,
17-
mlp_hidden_units=16,
18-
num_bins=10,
19-
init_min=[-3.0, -2.0, -4.0],
20-
init_max=[3.0, 2.0, 4.0],
21-
dropout_rate=0.1,
22-
use_batch_norm=True,
23-
)
24-
# Run in inference mode.
25-
y_multi = layer(x_multi, training=False)
26-
# Expected output shape: (batch_size, num_features, embedding_dim)
27-
assert (
28-
y_multi.shape == (batch_size, num_features, embedding_dim)
29-
), f"Expected shape {(batch_size, num_features, embedding_dim)} but got {y_multi.shape}"
30-
# Ensure outputs are finite.
31-
assert np.all(
32-
np.isfinite(y_multi.numpy())
33-
), "Output contains non-finite values."
34-
35-
def test_single_feature_input(self):
36-
"""Test with a single numeric feature."""
37-
batch_size = 32
38-
num_features = 1
39-
embedding_dim = 8
40-
41-
x_single = tf.random.normal((batch_size, num_features))
42-
layer = AdvancedNumericalEmbedding(
43-
embedding_dim=embedding_dim,
44-
mlp_hidden_units=16,
45-
num_bins=10,
46-
init_min=-3.0,
47-
init_max=3.0,
48-
dropout_rate=0.1,
49-
use_batch_norm=False,
3+
import pytest
4+
from kdp.custom_layers import GlobalAdvancedNumericalEmbedding
5+
6+
7+
def test_basic_functionality():
8+
"""Test basic functionality with default parameters."""
9+
batch_size = 32
10+
num_features = 3
11+
embedding_dim = 10
12+
13+
layer = GlobalAdvancedNumericalEmbedding(
14+
global_embedding_dim=embedding_dim,
15+
global_mlp_hidden_units=16,
16+
global_num_bins=10,
17+
global_init_min=-3.0,
18+
global_init_max=3.0,
19+
global_dropout_rate=0.1,
20+
global_use_batch_norm=True,
21+
global_pooling="average",
22+
)
23+
24+
# Input shape: (batch_size, num_features)
25+
x = tf.random.normal((batch_size, num_features))
26+
y = layer(x, training=False)
27+
28+
# Output shape should be (batch_size, embedding_dim)
29+
assert y.shape == (
30+
batch_size,
31+
embedding_dim,
32+
), f"Expected shape {(batch_size, embedding_dim)}, got {y.shape}"
33+
assert np.all(np.isfinite(y.numpy())), "Output contains non-finite values"
34+
35+
36+
def test_different_pooling_methods():
37+
"""Test both average and max pooling options."""
38+
batch_size = 16
39+
num_features = 4
40+
embedding_dim = 8
41+
42+
x = tf.random.normal((batch_size, num_features))
43+
44+
for pooling in ["average", "max"]:
45+
layer = GlobalAdvancedNumericalEmbedding(
46+
global_embedding_dim=embedding_dim,
47+
global_mlp_hidden_units=16,
48+
global_num_bins=10,
49+
global_init_min=-3.0,
50+
global_init_max=3.0,
51+
global_dropout_rate=0.1,
52+
global_use_batch_norm=True,
53+
global_pooling=pooling,
5054
)
51-
y_single = layer(x_single, training=False)
52-
assert (
53-
y_single.shape == (batch_size, num_features, embedding_dim)
54-
), f"Expected shape {(batch_size, num_features, embedding_dim)} but got {y_single.shape}"
55-
assert np.all(
56-
np.isfinite(y_single.numpy())
57-
), "Output contains non-finite values."
58-
59-
def test_dropout_behavior(self):
60-
"""When dropout is 0.0 and no batch norm is used, training and inference should match."""
61-
batch_size = 16
62-
num_features = 2
63-
embedding_dim = 8
6455

65-
x = tf.random.normal((batch_size, num_features))
66-
layer = AdvancedNumericalEmbedding(
67-
embedding_dim=embedding_dim,
68-
mlp_hidden_units=16,
69-
num_bins=10,
70-
init_min=[-3.0, -2.0],
71-
init_max=[3.0, 2.0],
72-
dropout_rate=0.0,
73-
use_batch_norm=False,
56+
y = layer(x, training=False)
57+
assert y.shape == (
58+
batch_size,
59+
embedding_dim,
60+
), f"Shape mismatch with {pooling} pooling"
61+
62+
63+
def test_training_inference_modes():
64+
"""Test behavior in training and inference modes."""
65+
batch_size = 16
66+
num_features = 3
67+
embedding_dim = 12
68+
69+
layer = GlobalAdvancedNumericalEmbedding(
70+
global_embedding_dim=embedding_dim,
71+
global_mlp_hidden_units=16,
72+
global_num_bins=10,
73+
global_init_min=-3.0,
74+
global_init_max=3.0,
75+
global_dropout_rate=0.0, # No dropout for deterministic comparison
76+
global_use_batch_norm=False, # No batch norm for deterministic comparison
77+
global_pooling="average",
78+
)
79+
80+
x = tf.random.normal((batch_size, num_features))
81+
y_train = layer(x, training=True)
82+
y_infer = layer(x, training=False)
83+
84+
# With no dropout and no batch norm, outputs should match
85+
assert np.allclose(
86+
y_train.numpy(), y_infer.numpy(), atol=1e-5
87+
), "Training and inference outputs should match when dropout=0 and batch_norm=False"
88+
89+
90+
def test_different_input_ranges():
91+
"""Test with different input value ranges and initialization boundaries."""
92+
batch_size = 16
93+
num_features = 2
94+
embedding_dim = 8
95+
96+
# Test with different input ranges
97+
x_small = tf.random.normal((batch_size, num_features)) * 0.1
98+
x_large = tf.random.normal((batch_size, num_features)) * 10.0
99+
100+
layer = GlobalAdvancedNumericalEmbedding(
101+
global_embedding_dim=embedding_dim,
102+
global_mlp_hidden_units=16,
103+
global_num_bins=10,
104+
global_init_min=[-5.0, -5.0],
105+
global_init_max=[5.0, 5.0],
106+
global_dropout_rate=0.1,
107+
global_use_batch_norm=True,
108+
global_pooling="average",
109+
)
110+
111+
y_small = layer(x_small, training=False)
112+
y_large = layer(x_large, training=False)
113+
114+
assert np.all(
115+
np.isfinite(y_small.numpy())
116+
), "Output contains non-finite values for small inputs"
117+
assert np.all(
118+
np.isfinite(y_large.numpy())
119+
), "Output contains non-finite values for large inputs"
120+
121+
122+
def test_config_round_trip():
123+
"""Test get_config and from_config round-trip functionality."""
124+
original_layer = GlobalAdvancedNumericalEmbedding(
125+
global_embedding_dim=8,
126+
global_mlp_hidden_units=16,
127+
global_num_bins=10,
128+
global_init_min=[-3.0, -2.0],
129+
global_init_max=[3.0, 2.0],
130+
global_dropout_rate=0.1,
131+
global_use_batch_norm=True,
132+
global_pooling="average",
133+
name="global_numeric_test",
134+
)
135+
136+
config = original_layer.get_config()
137+
new_layer = GlobalAdvancedNumericalEmbedding.from_config(config)
138+
# Test both layers with same input
139+
x = tf.random.normal((16, 2))
140+
y1 = original_layer(x, training=False)
141+
y2 = new_layer(x, training=False)
142+
assert (
143+
y1.shape == y2.shape
144+
), "Shapes from original and reconstructed layers should match"
145+
146+
# Verify config values
147+
assert (
148+
config["global_embedding_dim"] == 8
149+
), "global_embedding_dim not preserved in config"
150+
assert (
151+
config["global_pooling"] == "average"
152+
), "global_pooling not preserved in config"
153+
154+
155+
def test_invalid_pooling():
156+
"""Test that invalid pooling method raises ValueError."""
157+
with pytest.raises(ValueError):
158+
GlobalAdvancedNumericalEmbedding(
159+
global_embedding_dim=8,
160+
global_mlp_hidden_units=16,
161+
global_num_bins=10,
162+
global_init_min=-3.0,
163+
global_init_max=3.0,
164+
global_dropout_rate=0.1,
165+
global_use_batch_norm=True,
166+
global_pooling="invalid_pooling",
74167
)
75-
y_train = layer(x, training=True)
76-
y_infer = layer(x, training=False)
77-
assert np.allclose(
78-
y_train.numpy(), y_infer.numpy(), atol=1e-5
79-
), "Outputs in training and inference modes should match when dropout is disabled."
80-
81-
def test_config_round_trip(self):
82-
"""Test get_config and from_config round-trip functionality."""
83-
layer = AdvancedNumericalEmbedding(
84-
embedding_dim=8,
85-
mlp_hidden_units=16,
86-
num_bins=10,
87-
init_min=-3.0,
88-
init_max=3.0,
89-
dropout_rate=0.1,
90-
use_batch_norm=True,
91-
name="advanced_numeric_test",
168+
169+
170+
def test_gradient_flow():
171+
"""Test that gradients can flow through the layer."""
172+
batch_size = 8
173+
num_features = 3
174+
embedding_dim = 8
175+
176+
layer = GlobalAdvancedNumericalEmbedding(
177+
global_embedding_dim=embedding_dim,
178+
global_mlp_hidden_units=16,
179+
global_num_bins=10,
180+
global_init_min=[-5.0, -4.0, -6.0],
181+
global_init_max=[5.0, 2.0, 8.0],
182+
global_dropout_rate=0.15,
183+
global_use_batch_norm=True,
184+
global_pooling="max",
185+
)
186+
187+
x = tf.random.normal((batch_size, num_features))
188+
189+
with tf.GradientTape() as tape:
190+
tape.watch(x)
191+
y = layer(x, training=True)
192+
loss = tf.reduce_mean(y)
193+
194+
grads = tape.gradient(loss, layer.trainable_variables)
195+
196+
# Check that at least one gradient is not None
197+
assert any(
198+
g is not None for g in grads
199+
), "No gradients found for any trainable variable"
200+
201+
202+
def test_different_feature_dimensions():
203+
"""Test the layer with different numbers of input features."""
204+
embedding_dim = 8
205+
batch_size = 16
206+
207+
# Test with different feature dimensions
208+
feature_dims = [1, 5, 10]
209+
210+
for num_features in feature_dims:
211+
layer = GlobalAdvancedNumericalEmbedding(
212+
global_embedding_dim=embedding_dim,
213+
global_mlp_hidden_units=12,
214+
global_num_bins=10,
215+
global_init_min=-3.0,
216+
global_init_max=3.0,
217+
global_dropout_rate=0.2,
218+
global_use_batch_norm=False,
219+
global_pooling="average",
92220
)
93-
config = layer.get_config()
94-
new_layer = AdvancedNumericalEmbedding.from_config(config)
95-
# Create a dummy input to ensure the layers are built.
96-
x = tf.random.normal((10, 1))
97-
y1 = layer(x, training=False)
98-
y2 = new_layer(x, training=False)
99-
assert (
100-
y1.shape == y2.shape
101-
), "Shapes from original and reloaded layers should match."
102-
103-
def test_gradient_flow(self):
104-
"""Test that gradients can be computed through the layer."""
105-
batch_size = 8
106-
num_features = 3
107-
embedding_dim = 8
108221

109222
x = tf.random.normal((batch_size, num_features))
110-
layer = AdvancedNumericalEmbedding(
111-
embedding_dim=embedding_dim,
112-
mlp_hidden_units=16,
113-
num_bins=10,
114-
init_min=[-3.0, -2.0, -4.0],
115-
init_max=[3.0, 2.0, 4.0],
116-
dropout_rate=0.1,
117-
use_batch_norm=True,
118-
)
119-
with tf.GradientTape() as tape:
120-
tape.watch(x)
121-
y = layer(x, training=True)
122-
loss = tf.reduce_mean(y)
123-
grads = tape.gradient(loss, layer.trainable_variables)
124-
grad_not_none = [g for g in grads if g is not None]
125-
assert (
126-
len(grad_not_none) > 0
127-
), "Gradients should be computed for AdvancedNumericalEmbedding trainable variables."
223+
y = layer(x, training=False)
224+
225+
assert y.shape == (
226+
batch_size,
227+
embedding_dim,
228+
), f"Output shape mismatch with {num_features} input features"

0 commit comments

Comments
 (0)