@@ -87,12 +87,15 @@ df = pd.DataFrame({
8787 ] * 20
8888})
8989
90- # Save to CSV
90+ # Format data
9191df.to_csv(" sample_data.csv" , index = False )
92+ test_batch = tf.data.Dataset.from_tensor_slices(dict (df.head(3 ))).batch(3 )
9293
9394# Create preprocessor with both transformer blocks and attention
9495ppr = PreprocessingModel(
9596 path_data = " sample_data.csv" ,
97+ features_stats_path = " features_stats.json" ,
98+ overwrite_stats = True , # Force stats generation, recommended to be set to True
9699 features_specs = features,
97100 output_mode = OutputModeOptions.CONCAT ,
98101
@@ -111,32 +114,31 @@ ppr = PreprocessingModel(
111114 tabular_attention_dropout = 0.1 , # Attention dropout rate
112115 tabular_attention_embedding_dim = 16 , # Embedding dimension
113116
114- # Other parameters
115- overwrite_stats = True , # Force stats generation, recommended to be set to True
117+ # Feature selection configuration
118+ feature_selection_placement = " all_features" , # Choose between (all_features|numeric|categorical)
119+ feature_selection_units = 32 ,
120+ feature_selection_dropout = 0.15 ,
116121)
117122
118123# Build the preprocessor
119124result = ppr.build_preprocessor()
120125```
121126
122- Now if one wants to plot, use the Neural Network for predictions or just get the statistics , use the following:
127+ Now if one wants to plot the a block diagram of the model or get the outout of the NN or get the importance weights of the features , use the following:
123128
124129``` python
125130# Plot the model architecture
126131ppr.plot_model(" complex_model.png" )
127132
128133# Get predictions with an example test batch from the example data
129- test_batch = tf.data.Dataset.from_tensor_slices(dict (df.head(3 ))).batch(3 )
130- predictions = result[" model" ].predict(test_batch)
131- print (" Output shape:" , predictions.shape)
132-
133- # Print feature statistics
134- print (" \n Feature Statistics:" )
135- for feature_type, features in ppr.get_feature_statistics().items():
136- if isinstance (features, dict ):
137- print (f " \n { feature_type} : " )
138- for feature_name, stats in features.items():
139- print (f " { feature_name} : { list (stats.keys())} " )
134+ processed_data = ppr.transform(test_batch) # this returns a dict with "transformed_data" and "feature_weights"
135+ print (" Output shape:" , processed_data[" transformed_data" ].shape)
136+
137+ # Analyze feature importance if feature selection is enabled
138+ if " feature_weights" in processed_data:
139+ for feature_name in features:
140+ weights = processed_data[f " { feature_name} _weights " ]
141+ print (f " Feature { feature_name} importance: { weights.mean()} " )
140142```
141143
142144
0 commit comments