Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 790f102

Browse files
committed
docs(KDP): add to the docs to showcase new features
1 parent 0c6c65c commit 790f102

File tree

5 files changed

+351
-24
lines changed

5 files changed

+351
-24
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,3 +168,4 @@ my_tests/*
168168
# derivative files
169169
data.csv
170170
sample_data.csv
171+
stats.json

docs/complex_example.md

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,15 @@ df = pd.DataFrame({
8787
] * 20
8888
})
8989

90-
# Save to CSV
90+
# Format data
9191
df.to_csv("sample_data.csv", index=False)
92+
test_batch = tf.data.Dataset.from_tensor_slices(dict(df.head(3))).batch(3)
9293

9394
# Create preprocessor with both transformer blocks and attention
9495
ppr = PreprocessingModel(
9596
path_data="sample_data.csv",
97+
features_stats_path="features_stats.json",
98+
overwrite_stats=True, # Force stats generation, recommended to be set to True
9699
features_specs=features,
97100
output_mode=OutputModeOptions.CONCAT,
98101

@@ -111,32 +114,31 @@ ppr = PreprocessingModel(
111114
tabular_attention_dropout=0.1, # Attention dropout rate
112115
tabular_attention_embedding_dim=16, # Embedding dimension
113116

114-
# Other parameters
115-
overwrite_stats=True, # Force stats generation, recommended to be set to True
117+
# Feature selection configuration
118+
feature_selection_placement="all_features", # Choose between (all_features|numeric|categorical)
119+
feature_selection_units=32,
120+
feature_selection_dropout=0.15,
116121
)
117122

118123
# Build the preprocessor
119124
result = ppr.build_preprocessor()
120125
```
121126

122-
Now if one wants to plot, use the Neural Network for predictions or just get the statistics, use the following:
127+
Now if one wants to plot the a block diagram of the model or get the outout of the NN or get the importance weights of the features, use the following:
123128

124129
```python
125130
# Plot the model architecture
126131
ppr.plot_model("complex_model.png")
127132

128133
# Get predictions with an example test batch from the example data
129-
test_batch = tf.data.Dataset.from_tensor_slices(dict(df.head(3))).batch(3)
130-
predictions = result["model"].predict(test_batch)
131-
print("Output shape:", predictions.shape)
132-
133-
# Print feature statistics
134-
print("\nFeature Statistics:")
135-
for feature_type, features in ppr.get_feature_statistics().items():
136-
if isinstance(features, dict):
137-
print(f"\n{feature_type}:")
138-
for feature_name, stats in features.items():
139-
print(f" {feature_name}: {list(stats.keys())}")
134+
processed_data = ppr.transform(test_batch) # this returns a dict with "transformed_data" and "feature_weights"
135+
print("Output shape:", processed_data["transformed_data"].shape)
136+
137+
# Analyze feature importance if feature selection is enabled
138+
if "feature_weights" in processed_data:
139+
for feature_name in features:
140+
weights = processed_data[f"{feature_name}_weights"]
141+
print(f"Feature {feature_name} importance: {weights.mean()}")
140142
```
141143

142144

docs/feature_selection.md

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -120,22 +120,32 @@ from kdp.features import NumericalFeature, CategoricalFeature
120120

121121
# Define features
122122
features = {
123-
"numeric_1": NumericalFeature("numeric_1"),
124-
"numeric_2": NumericalFeature("numeric_2"),
125-
"category_1": CategoricalFeature("category_1"),
123+
"numeric_1": NumericalFeature(
124+
name="numeric_1",
125+
feature_type=FeatureType.FLOAT_NORMALIZED
126+
),
127+
"numeric_2": NumericalFeature(
128+
name="numeric_2",
129+
feature_type=FeatureType.FLOAT_NORMALIZED
130+
),
131+
"category_1": CategoricalFeature(
132+
name="category_1",
133+
feature_type=FeatureType.STRING_CATEGORICAL
134+
)
126135
}
127136

128137
# Create model with feature selection
129138
model = PreprocessingModel(
139+
# ... other parameters ...
130140
features_specs=features,
131-
feature_selection_placement="all_features",
141+
feature_selection_placement="all_features", # or "numeric" or "categorical"
132142
feature_selection_units=64,
133143
feature_selection_dropout=0.2
134144
)
135145

136146
# Build and use the model
137147
preprocessor = model.build_preprocessor()
138-
processed_data = model.transform(data)
148+
processed_data = model.transform(data) # data can be pd.DataFrame, python Dict, or tf.data.Dataset
139149

140150
# Analyze feature importance
141151
for feature_name in features:

docs/imgs/complex_model.png

35 KB
Loading

0 commit comments

Comments
 (0)