@@ -314,6 +314,7 @@ def __init__(
314314 feature_moe_dropout : float = 0.1 ,
315315 feature_moe_freeze_experts : bool = False ,
316316 feature_moe_use_residual : bool = True ,
317+ include_passthrough_in_output : bool = True ,
317318 ) -> None :
318319 """Initialize a preprocessing model.
319320
@@ -419,6 +420,9 @@ def __init__(
419420 self .feature_moe_freeze_experts = feature_moe_freeze_experts
420421 self .feature_moe_use_residual = feature_moe_use_residual
421422
423+ # Passthrough features control
424+ self .include_passthrough_in_output = include_passthrough_in_output
425+
422426 # Initialize feature type lists
423427 self .numeric_features = []
424428 self .categorical_features = []
@@ -433,6 +437,7 @@ def __init__(
433437 self .signature = {}
434438 self .outputs = {} # Final outputs for DICT mode
435439 self .processed_features = {} # All processed features before final output
440+ self .passthrough_outputs = {} # Passthrough features (unprocessed)
436441 self .concat_all = None # Final concatenated output for CONCAT mode
437442 self ._preprocessed_cache = {} if use_caching else None
438443
@@ -1400,7 +1405,11 @@ def _add_pipeline_date(self, feature_name: str, input_layer) -> None:
14001405
14011406 @_monitor_performance
14021407 def _add_pipeline_passthrough (self , feature_name : str , input_layer ) -> None :
1403- """Add a passthrough feature to the pipeline without preprocessing.
1408+ """Add a passthrough feature to the pipeline.
1409+
1410+ Depending on include_passthrough_in_output setting:
1411+ - If True: Process minimally and include in main output (legacy behavior)
1412+ - If False: Store unprocessed for separate access (recommended for IDs/metadata)
14041413
14051414 Args:
14061415 feature_name (str): The name of the feature to be passed through.
@@ -1409,6 +1418,17 @@ def _add_pipeline_passthrough(self, feature_name: str, input_layer) -> None:
14091418 # getting feature object
14101419 _feature = self .features_specs [feature_name ]
14111420
1421+ if self .include_passthrough_in_output :
1422+ # Legacy behavior: minimal processing and include in main output
1423+ self ._process_passthrough_for_output (feature_name , input_layer , _feature )
1424+ else :
1425+ # New behavior: store unprocessed for separate access
1426+ self ._store_passthrough_unprocessed (feature_name , input_layer , _feature )
1427+
1428+ def _process_passthrough_for_output (
1429+ self , feature_name : str , input_layer , _feature
1430+ ) -> None :
1431+ """Process passthrough feature minimally for inclusion in main output."""
14121432 # initializing preprocessor
14131433 preprocessor = FeaturePreprocessor (name = feature_name )
14141434
@@ -1452,6 +1472,17 @@ def _add_pipeline_passthrough(self, feature_name: str, input_layer) -> None:
14521472
14531473 self .processed_features [feature_name ] = _output_pipeline
14541474
1475+ def _store_passthrough_unprocessed (
1476+ self , feature_name : str , input_layer , _feature
1477+ ) -> None :
1478+ """Store passthrough feature unprocessed for separate access."""
1479+ logger .info (
1480+ f"Storing passthrough feature '{ feature_name } ' unprocessed for separate access"
1481+ )
1482+ # Store the raw input layer for this passthrough feature
1483+ # This will be available in the model outputs but not processed by KDP
1484+ self .passthrough_outputs [feature_name ] = input_layer
1485+
14551486 @_monitor_performance
14561487 def _add_pipeline_time_series (
14571488 self , feature_name : str , input_layer , feature
@@ -1650,6 +1681,37 @@ def _prepare_concat_mode_outputs(self) -> None:
16501681
16511682 logger .info ("Concatenating outputs mode enabled" )
16521683
1684+ def _combine_all_features (
1685+ self , concat_num : Optional [tf .Tensor ], concat_cat : Optional [tf .Tensor ]
1686+ ) -> None :
1687+ """Combine numeric and categorical features.
1688+
1689+ Args:
1690+ concat_num: Concatenated numeric features tensor
1691+ concat_cat: Concatenated categorical features tensor
1692+
1693+ Raises:
1694+ ValueError: If no features are available for concatenation
1695+ """
1696+ if concat_num is not None and concat_cat is not None :
1697+ self .concat_all = tf .keras .layers .Concatenate (
1698+ name = "ConcatenateAll" ,
1699+ axis = - 1 ,
1700+ )([concat_num , concat_cat ])
1701+ elif concat_num is not None :
1702+ self .concat_all = concat_num
1703+ elif concat_cat is not None :
1704+ self .concat_all = concat_cat
1705+ else :
1706+ # Check if we have passthrough features that are stored separately
1707+ if self .passthrough_outputs and not self .include_passthrough_in_output :
1708+ logger .info (
1709+ "No processed features to concatenate - only passthrough features exist"
1710+ )
1711+ self .concat_all = None # Will be handled in model building
1712+ else :
1713+ raise ValueError ("No features available for concatenation" )
1714+
16531715 def _group_features_by_type (self ) -> Tuple [List , List ]:
16541716 """Group processed features by type for concatenation.
16551717
@@ -1659,7 +1721,8 @@ def _group_features_by_type(self) -> Tuple[List, List]:
16591721 # Initialize lists for features of different types
16601722 numeric_features = []
16611723 categorical_features = []
1662- passthrough_features = []
1724+ passthrough_features_numeric = []
1725+ passthrough_features_string = []
16631726
16641727 # Group processed features by type
16651728 for feature_name , feature in self .processed_features .items ():
@@ -1688,14 +1751,36 @@ def _group_features_by_type(self) -> Tuple[List, List]:
16881751 logger .debug (f"Adding { feature_name } to categorical features" )
16891752 categorical_features .append (feature )
16901753 elif feature_name in self .passthrough_features :
1691- logger .debug (f"Adding { feature_name } to passthrough features" )
1692- passthrough_features .append (feature )
1754+ # Only include passthrough features in concatenation if they're meant to be in output
1755+ # When include_passthrough_in_output=False, they should be stored separately
1756+ if self .include_passthrough_in_output :
1757+ # Separate passthrough features by dtype to avoid concatenation issues
1758+ feature_dtype = getattr (feature_spec , "dtype" , tf .float32 )
1759+ if feature_dtype == tf .string :
1760+ logger .debug (
1761+ f"Adding { feature_name } to string passthrough features"
1762+ )
1763+ passthrough_features_string .append (feature )
1764+ else :
1765+ logger .debug (
1766+ f"Adding { feature_name } to numeric passthrough features"
1767+ )
1768+ passthrough_features_numeric .append (feature )
1769+ else :
1770+ logger .debug (
1771+ f"Skipping { feature_name } from concatenation (stored separately)"
1772+ )
16931773 else :
16941774 logger .warning (f"Unknown feature type for { feature_name } " )
16951775
1696- # For concatenation purposes, add passthrough features to numeric features
1697- if passthrough_features :
1698- numeric_features .extend (passthrough_features )
1776+ # Add numeric passthrough features to numeric features (only if include_passthrough_in_output=True)
1777+ if passthrough_features_numeric :
1778+ numeric_features .extend (passthrough_features_numeric )
1779+
1780+ # Add string passthrough features to categorical features (only if include_passthrough_in_output=True)
1781+ # (since categorical features are typically strings and handled separately)
1782+ if passthrough_features_string :
1783+ categorical_features .extend (passthrough_features_string )
16991784
17001785 return numeric_features , categorical_features
17011786
@@ -1753,30 +1838,6 @@ def _concatenate_categorical_features(
17531838
17541839 return concat_cat
17551840
1756- def _combine_all_features (
1757- self , concat_num : Optional [tf .Tensor ], concat_cat : Optional [tf .Tensor ]
1758- ) -> None :
1759- """Combine numeric and categorical features.
1760-
1761- Args:
1762- concat_num: Concatenated numeric features tensor
1763- concat_cat: Concatenated categorical features tensor
1764-
1765- Raises:
1766- ValueError: If no features are available for concatenation
1767- """
1768- if concat_num is not None and concat_cat is not None :
1769- self .concat_all = tf .keras .layers .Concatenate (
1770- name = "ConcatenateAll" ,
1771- axis = - 1 ,
1772- )([concat_num , concat_cat ])
1773- elif concat_num is not None :
1774- self .concat_all = concat_num
1775- elif concat_cat is not None :
1776- self .concat_all = concat_cat
1777- else :
1778- raise ValueError ("No features available for concatenation" )
1779-
17801841 def _apply_multi_resolution_attention (
17811842 self , concat_num : tf .Tensor , concat_cat : tf .Tensor
17821843 ) -> None :
@@ -2388,24 +2449,77 @@ def build_preprocessor(self) -> dict:
23882449 # Build the model based on output mode
23892450 logger .info ("Building preprocessor Model" )
23902451 if self .output_mode == OutputModeOptions .CONCAT .value :
2391- if self .concat_all is None :
2452+ # Handle case where only passthrough features exist
2453+ if (
2454+ self .concat_all is None
2455+ and self .passthrough_outputs
2456+ and not self .include_passthrough_in_output
2457+ ):
2458+ logger .info (
2459+ "Only passthrough features detected - creating passthrough-only model"
2460+ )
2461+ self .model = tf .keras .Model (
2462+ inputs = self .inputs ,
2463+ outputs = self .passthrough_outputs ,
2464+ name = "preprocessor" ,
2465+ )
2466+ _output_dims = "passthrough_only"
2467+ elif self .concat_all is None :
23922468 raise ValueError (
23932469 "No features were concatenated. Check if features were properly processed."
23942470 )
2395- self .model = tf .keras .Model (
2396- inputs = self .inputs ,
2397- outputs = self .concat_all , # Use concat_all for CONCAT mode
2398- name = "preprocessor" ,
2399- )
2400- _output_dims = self .model .output_shape [1 ]
2471+ else :
2472+ # Determine outputs based on passthrough settings
2473+ if (
2474+ self .passthrough_outputs
2475+ and not self .include_passthrough_in_output
2476+ ):
2477+ # Include both processed (concat) and passthrough outputs
2478+ model_outputs = {
2479+ "processed" : self .concat_all ,
2480+ "passthrough" : self .passthrough_outputs ,
2481+ }
2482+ logger .info (
2483+ f"Creating model with separate passthrough outputs: { list (self .passthrough_outputs .keys ())} "
2484+ )
2485+ else :
2486+ # Standard concat output
2487+ model_outputs = self .concat_all
2488+
2489+ self .model = tf .keras .Model (
2490+ inputs = self .inputs ,
2491+ outputs = model_outputs ,
2492+ name = "preprocessor" ,
2493+ )
2494+ _output_dims = (
2495+ self .model .output_shape [1 ]
2496+ if isinstance (model_outputs , tf .Tensor )
2497+ else "mixed"
2498+ )
2499+
24012500 else : # DICT mode
2402- if not self .outputs :
2501+ if not self .outputs and not self . passthrough_outputs :
24032502 raise ValueError (
24042503 "No outputs were created. Check if features were properly processed."
24052504 )
2505+
2506+ # Include passthrough outputs in dict mode if they exist
2507+ final_outputs = self .outputs .copy () if self .outputs else {}
2508+ if self .passthrough_outputs and not self .include_passthrough_in_output :
2509+ final_outputs .update (self .passthrough_outputs )
2510+ logger .info (
2511+ f"Adding passthrough outputs to dict mode: { list (self .passthrough_outputs .keys ())} "
2512+ )
2513+ elif not final_outputs :
2514+ # Only passthrough features exist
2515+ final_outputs = self .passthrough_outputs
2516+ logger .info (
2517+ "Only passthrough features detected - creating passthrough-only dict model"
2518+ )
2519+
24062520 self .model = tf .keras .Model (
24072521 inputs = self .inputs ,
2408- outputs = self . outputs , # Use outputs dict for DICT mode
2522+ outputs = final_outputs ,
24092523 name = "preprocessor" ,
24102524 )
24112525 _output_dims = self .model .output_shape
0 commit comments