Thanks to visit codestin.com
Credit goes to GitHub.com

Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 38 additions & 6 deletions sdk/python/feast/feature_store.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 Wrong exception type caught - KeyError vs FeatureViewNotFoundException

The code catches FeatureViewNotFoundException but dictionary key access raises KeyError when the key is missing.

Click to expand

Explanation

At sdk/python/feast/feature_store.py:1993-1996, the code builds a dictionary from feature views and then tries to access it by name:

feature_view_dict = {
    fv_proto.name: fv_proto
    for fv_proto in self.list_all_feature_views(...)
}
try:
    feature_view = feature_view_dict[feature_view_name]
except FeatureViewNotFoundException:
    raise FeatureViewNotFoundException(feature_view_name, self.project)

Actual vs Expected Behavior

  • Actual: When feature_view_name is not in the dictionary, Python raises KeyError, which is NOT caught by the except FeatureViewNotFoundException block. The KeyError propagates up with an unhelpful error message.
  • Expected: The code should catch KeyError and re-raise it as FeatureViewNotFoundException with proper project context.

Impact

When a user calls write_to_online_store(), write_to_online_store_async(), or push() with skip_feature_view_validation=True and an invalid feature view name, they get a confusing KeyError instead of the intended FeatureViewNotFoundException with helpful context about the project.

(Refers to lines 1995-1996)

Recommendation: Change except FeatureViewNotFoundException: to except KeyError: to properly catch the dictionary key lookup failure.

Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,17 @@ def list_feature_services(
return self._registry.list_feature_services(self.project, tags=tags)

def _list_all_feature_views(
self, allow_cache: bool = False, tags: Optional[dict[str, str]] = None
self,
allow_cache: bool = False,
tags: Optional[dict[str, str]] = None,
skip_feature_view_validation: bool = False,
) -> List[BaseFeatureView]:
feature_views = []
for fv in self.registry.list_all_feature_views(
self.project, allow_cache=allow_cache, tags=tags
self.project,
allow_cache=allow_cache,
tags=tags,
skip_udf=skip_feature_view_validation,
):
if (
isinstance(fv, FeatureView)
Expand All @@ -285,18 +291,27 @@ def _list_all_feature_views(
return feature_views

def list_all_feature_views(
self, allow_cache: bool = False, tags: Optional[dict[str, str]] = None
self,
allow_cache: bool = False,
tags: Optional[dict[str, str]] = None,
skip_feature_view_validation: bool = False,
) -> List[BaseFeatureView]:
"""
Retrieves the list of feature views from the registry.

Args:
allow_cache: Whether to allow returning entities from a cached registry.
tags: Filter by tags.
skip_feature_view_validation: Whether to skip validation of feature views (e.g., UDF deserialization).

Returns:
A list of feature views.
"""
return self._list_all_feature_views(allow_cache, tags=tags)
return self._list_all_feature_views(
allow_cache,
tags=tags,
skip_feature_view_validation=skip_feature_view_validation,
)

def list_feature_views(
self, allow_cache: bool = False, tags: Optional[dict[str, str]] = None
Expand Down Expand Up @@ -1741,6 +1756,7 @@ def push(
allow_registry_cache: bool = True,
to: PushMode = PushMode.ONLINE,
transform_on_write: bool = True,
skip_feature_view_validation: bool = False,
):
"""
Push features to a push source. This updates all the feature views that have the push source as stream source.
Expand All @@ -1751,6 +1767,7 @@ def push(
allow_registry_cache: Whether to allow cached versions of the registry.
to: Whether to push to online or offline store. Defaults to online store only.
transform_on_write: Whether to transform the data before pushing.
skip_feature_view_validation: Whether to skip validation of feature views (e.g., UDF deserialization).
"""
for fv in self._fvs_for_push_source_or_raise(
push_source_name, allow_registry_cache
Expand All @@ -1761,6 +1778,7 @@ def push(
df,
allow_registry_cache=allow_registry_cache,
transform_on_write=transform_on_write,
skip_feature_view_validation=skip_feature_view_validation,
)
if to == PushMode.OFFLINE or to == PushMode.ONLINE_AND_OFFLINE:
self.write_to_offline_store(
Expand All @@ -1773,6 +1791,7 @@ async def push_async(
df: pd.DataFrame,
allow_registry_cache: bool = True,
to: PushMode = PushMode.ONLINE,
skip_feature_view_validation: bool = False,
**kwargs,
):
fvs = self._fvs_for_push_source_or_raise(push_source_name, allow_registry_cache)
Expand All @@ -1781,7 +1800,10 @@ async def push_async(
_ = await asyncio.gather(
*[
self.write_to_online_store_async(

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot can you create a new GitHub Issue to clean up the write_to_online_store_async method? we should just be using async in the server going forward.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have direct access to create GitHub issues, but here's the recommended issue content:

Title: Refactor async methods in FeatureStore for server-only usage

Description:
The write_to_online_store_async() and push_async() methods should be cleaned up as async functionality should primarily be used in the server context going forward.

Proposed changes:

  • Review usage patterns of async methods
  • Consider deprecating or restructuring async methods for server-specific usage
  • Ensure clear separation between sync (client) and async (server) code paths

Related PR: #[this PR number]
Context: Line 1785 in feature_store.py

fv.name, df, allow_registry_cache=allow_registry_cache
fv.name,
df,
allow_registry_cache=allow_registry_cache,
skip_feature_view_validation=skip_feature_view_validation,
)
for fv in fvs
]
Expand Down Expand Up @@ -1959,10 +1981,14 @@ def _get_feature_view_and_df_for_online_write(
inputs: Optional[Union[Dict[str, List[Any]], pd.DataFrame]] = None,
allow_registry_cache: bool = True,
transform_on_write: bool = True,
skip_feature_view_validation: bool = False,
):
feature_view_dict = {
fv_proto.name: fv_proto
for fv_proto in self.list_all_feature_views(allow_registry_cache)
for fv_proto in self.list_all_feature_views(
allow_registry_cache,
skip_feature_view_validation=skip_feature_view_validation,
)
}
try:
feature_view = feature_view_dict[feature_view_name]
Expand Down Expand Up @@ -1992,6 +2018,7 @@ def write_to_online_store(
inputs: Optional[Union[Dict[str, List[Any]], pd.DataFrame]] = None,
allow_registry_cache: bool = True,
transform_on_write: bool = True,
skip_feature_view_validation: bool = False,
):
"""
Persists a dataframe to the online store.
Expand All @@ -2002,6 +2029,7 @@ def write_to_online_store(
inputs: Optional the dictionary object to be written
allow_registry_cache (optional): Whether to allow retrieving feature views from a cached registry.
transform_on_write (optional): Whether to transform the data before pushing.
skip_feature_view_validation (optional): Whether to skip validation of feature views (e.g., UDF deserialization).
"""

feature_view, df = self._get_feature_view_and_df_for_online_write(
Expand All @@ -2010,6 +2038,7 @@ def write_to_online_store(
inputs=inputs,
allow_registry_cache=allow_registry_cache,
transform_on_write=transform_on_write,
skip_feature_view_validation=skip_feature_view_validation,
)

# Validate that the dataframe has meaningful feature data
Expand Down Expand Up @@ -2037,6 +2066,7 @@ async def write_to_online_store_async(
df: Optional[pd.DataFrame] = None,
inputs: Optional[Union[Dict[str, List[Any]], pd.DataFrame]] = None,
allow_registry_cache: bool = True,
skip_feature_view_validation: bool = False,
):
"""
Persists a dataframe to the online store asynchronously.
Expand All @@ -2046,13 +2076,15 @@ async def write_to_online_store_async(
df: The dataframe to be persisted.
inputs: Optional the dictionary object to be written
allow_registry_cache (optional): Whether to allow retrieving feature views from a cached registry.
skip_feature_view_validation (optional): Whether to skip validation of feature views (e.g., UDF deserialization).
"""

feature_view, df = self._get_feature_view_and_df_for_online_write(
feature_view_name=feature_view_name,
df=df,
inputs=inputs,
allow_registry_cache=allow_registry_cache,
skip_feature_view_validation=skip_feature_view_validation,
)

# Validate that the dataframe has meaningful feature data
Expand Down
56 changes: 52 additions & 4 deletions sdk/python/feast/infra/registry/proto_registry_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,14 +233,33 @@ def list_feature_services(
return feature_services


@registry_proto_cache_with_tags

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot why did you remove this?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh i see you added it in _list_all_feature_views_cached

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed the @registry_proto_cache_with_tags decorator because it only accepts 3 parameters (registry_proto, project, tags) and doesn't support the additional skip_udf parameter.

To maintain caching while supporting skip_udf, I:

  1. Created _list_all_feature_views_cached() with the decorator (for skip_udf=False)
  2. Made list_all_feature_views() conditionally call the cached version when skip_udf=False, or bypass caching when skip_udf=True

This prevents cache pollution with dummy UDFs while preserving the caching behavior for the default case (skip_udf=False).

Would you prefer a different approach, such as modifying the decorator to support additional parameters?

def list_all_feature_views(
registry_proto: RegistryProto,
project: str,
tags: Optional[dict[str, str]],
skip_udf: bool = False,
) -> List[BaseFeatureView]:
# Skip caching when skip_udf is True to avoid cache pollution
if skip_udf:
return (
list_feature_views(registry_proto, project, tags)
+ list_stream_feature_views(registry_proto, project, tags)
+ list_on_demand_feature_views(
registry_proto, project, tags, skip_udf=skip_udf
)
)
else:
return _list_all_feature_views_cached(registry_proto, project, tags)


@registry_proto_cache_with_tags
def _list_all_feature_views_cached(
registry_proto: RegistryProto, project: str, tags: Optional[dict[str, str]]
) -> List[BaseFeatureView]:
return (
list_feature_views(registry_proto, project, tags)
+ list_stream_feature_views(registry_proto, project, tags)
+ list_on_demand_feature_views(registry_proto, project, tags)
+ list_on_demand_feature_views(registry_proto, project, tags, skip_udf=False)
)


Expand Down Expand Up @@ -272,8 +291,37 @@ def list_stream_feature_views(
return stream_feature_views


@registry_proto_cache_with_tags
def list_on_demand_feature_views(
registry_proto: RegistryProto,
project: str,
tags: Optional[dict[str, str]],
skip_udf: bool = False,
) -> List[OnDemandFeatureView]:
# Skip caching when skip_udf is True to avoid cache pollution with dummy UDFs
if skip_udf:
on_demand_feature_views = []
for on_demand_feature_view in registry_proto.on_demand_feature_views:
if on_demand_feature_view.spec.project == project and utils.has_all_tags(
on_demand_feature_view.spec.tags, tags
):
# Only skip UDF deserialization for ODFVs that don't write to online store
# ODFVs with write_to_online_store=True need the actual UDF loaded
# because it will be executed during push operations
should_skip_udf = (
skip_udf and not on_demand_feature_view.spec.write_to_online_store
)
on_demand_feature_views.append(
OnDemandFeatureView.from_proto(
on_demand_feature_view, skip_udf=should_skip_udf
)
)
return on_demand_feature_views
else:
return _list_on_demand_feature_views_cached(registry_proto, project, tags)


@registry_proto_cache_with_tags
def _list_on_demand_feature_views_cached(
registry_proto: RegistryProto, project: str, tags: Optional[dict[str, str]]
) -> List[OnDemandFeatureView]:
on_demand_feature_views = []
Expand All @@ -282,7 +330,7 @@ def list_on_demand_feature_views(
on_demand_feature_view.spec.tags, tags
):
on_demand_feature_views.append(
OnDemandFeatureView.from_proto(on_demand_feature_view)
OnDemandFeatureView.from_proto(on_demand_feature_view, skip_udf=False)
)
return on_demand_feature_views

Expand Down
3 changes: 2 additions & 1 deletion sdk/python/feast/infra/registry/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -640,12 +640,13 @@ def list_all_feature_views(
project: str,
allow_cache: bool = False,
tags: Optional[dict[str, str]] = None,
skip_udf: bool = False,
) -> List[BaseFeatureView]:
registry_proto = self._get_registry_proto(
project=project, allow_cache=allow_cache
)
return proto_registry_utils.list_all_feature_views(
registry_proto, project, tags
registry_proto, project, tags, skip_udf=skip_udf
)

def get_any_feature_view(
Expand Down
15 changes: 8 additions & 7 deletions sdk/python/feast/on_demand_feature_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -531,7 +531,7 @@ def from_proto(

# Parse transformation from proto
transformation = cls._parse_transformation_from_proto(
on_demand_feature_view_proto
on_demand_feature_view_proto, skip_udf=skip_udf
)

# Parse optional fields with defaults
Expand Down Expand Up @@ -603,7 +603,7 @@ def _parse_sources_from_proto(

@classmethod
def _parse_transformation_from_proto(
cls, proto: OnDemandFeatureViewProto
cls, proto: OnDemandFeatureViewProto, skip_udf: bool = False
) -> Transformation:
"""Parse and convert the transformation from the protobuf representation."""
feature_transformation = proto.spec.feature_transformation
Expand All @@ -616,30 +616,30 @@ def _parse_transformation_from_proto(
# Check for non-empty UDF body
if udf_proto.body_text:
if mode == "pandas":
return PandasTransformation.from_proto(udf_proto)
return PandasTransformation.from_proto(udf_proto, skip_udf=skip_udf)
elif mode == "python":
return PythonTransformation.from_proto(udf_proto)
return PythonTransformation.from_proto(udf_proto, skip_udf=skip_udf)
else:
raise ValueError(ODFVErrorMessages.unsupported_mode_for_udf(mode))
else:
# Handle backward compatibility case with empty body_text
return cls._handle_backward_compatible_udf(proto)
return cls._handle_backward_compatible_udf(proto, skip_udf=skip_udf)

elif transformation_type == "substrait_transformation":
return SubstraitTransformation.from_proto(
feature_transformation.substrait_transformation
)
elif transformation_type is None:
# Handle backward compatibility case where feature_transformation is cleared
return cls._handle_backward_compatible_udf(proto)
return cls._handle_backward_compatible_udf(proto, skip_udf=skip_udf)
else:
raise ValueError(
ODFVErrorMessages.unsupported_transformation_type(transformation_type)
)

@classmethod
def _handle_backward_compatible_udf(
cls, proto: OnDemandFeatureViewProto
cls, proto: OnDemandFeatureViewProto, skip_udf: bool = False
) -> Transformation:
"""Handle backward compatibility for UDFs with empty body_text."""
if not hasattr(proto.spec, "user_defined_function"):
Expand All @@ -653,6 +653,7 @@ def _handle_backward_compatible_udf(
)
return PandasTransformation.from_proto(
user_defined_function_proto=backwards_compatible_udf,
skip_udf=skip_udf,
)

@classmethod
Expand Down
16 changes: 15 additions & 1 deletion sdk/python/feast/transformation/pandas_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,21 @@ def __eq__(self, other):
return True

@classmethod
def from_proto(cls, user_defined_function_proto: UserDefinedFunctionProto):
def from_proto(
cls,
user_defined_function_proto: UserDefinedFunctionProto,
skip_udf: bool = False,
):
if skip_udf:
# Return a dummy transformation when skipping UDF deserialization
# Identity function that preserves DataFrame structure
def identity_udf(df: pd.DataFrame) -> pd.DataFrame:
return df

return PandasTransformation(
udf=identity_udf,
udf_string=user_defined_function_proto.body_text,
)
return PandasTransformation(
udf=dill.loads(user_defined_function_proto.body),
udf_string=user_defined_function_proto.body_text,
Expand Down
16 changes: 15 additions & 1 deletion sdk/python/feast/transformation/python_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,21 @@ def __reduce__(self):
)

@classmethod
def from_proto(cls, user_defined_function_proto: UserDefinedFunctionProto):
def from_proto(
cls,
user_defined_function_proto: UserDefinedFunctionProto,
skip_udf: bool = False,
):
if skip_udf:
# Return a dummy transformation when skipping UDF deserialization
# Identity function that preserves dictionary structure
def identity_udf(features_dict: Dict[str, Any]) -> Dict[str, Any]:
return features_dict

return PythonTransformation(
udf=identity_udf,
udf_string=user_defined_function_proto.body_text,
)
return PythonTransformation(
udf=dill.loads(user_defined_function_proto.body),
udf_string=user_defined_function_proto.body_text,
Expand Down
Loading
Loading