-
Notifications
You must be signed in to change notification settings - Fork 1.2k
feat: Add skip_feature_view_validation parameter to push() for ODFVs with missing UDF dependencies #5894
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
feat: Add skip_feature_view_validation parameter to push() for ODFVs with missing UDF dependencies #5894
Changes from all commits
ee8a7d0
6b2bf7f
f9542e5
79d6744
a735bfb
2433df6
72c471e
885536e
d4dd5f5
98f92a2
c111fba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -268,11 +268,17 @@ def list_feature_services( | |
| return self._registry.list_feature_services(self.project, tags=tags) | ||
|
|
||
| def _list_all_feature_views( | ||
| self, allow_cache: bool = False, tags: Optional[dict[str, str]] = None | ||
| self, | ||
| allow_cache: bool = False, | ||
| tags: Optional[dict[str, str]] = None, | ||
| skip_feature_view_validation: bool = False, | ||
| ) -> List[BaseFeatureView]: | ||
| feature_views = [] | ||
| for fv in self.registry.list_all_feature_views( | ||
| self.project, allow_cache=allow_cache, tags=tags | ||
| self.project, | ||
| allow_cache=allow_cache, | ||
| tags=tags, | ||
| skip_udf=skip_feature_view_validation, | ||
| ): | ||
| if ( | ||
| isinstance(fv, FeatureView) | ||
|
|
@@ -285,18 +291,27 @@ def _list_all_feature_views( | |
| return feature_views | ||
|
|
||
| def list_all_feature_views( | ||
| self, allow_cache: bool = False, tags: Optional[dict[str, str]] = None | ||
| self, | ||
| allow_cache: bool = False, | ||
| tags: Optional[dict[str, str]] = None, | ||
| skip_feature_view_validation: bool = False, | ||
| ) -> List[BaseFeatureView]: | ||
| """ | ||
| Retrieves the list of feature views from the registry. | ||
|
|
||
| Args: | ||
| allow_cache: Whether to allow returning entities from a cached registry. | ||
| tags: Filter by tags. | ||
| skip_feature_view_validation: Whether to skip validation of feature views (e.g., UDF deserialization). | ||
|
|
||
| Returns: | ||
| A list of feature views. | ||
| """ | ||
| return self._list_all_feature_views(allow_cache, tags=tags) | ||
| return self._list_all_feature_views( | ||
| allow_cache, | ||
| tags=tags, | ||
| skip_feature_view_validation=skip_feature_view_validation, | ||
| ) | ||
|
|
||
| def list_feature_views( | ||
| self, allow_cache: bool = False, tags: Optional[dict[str, str]] = None | ||
|
|
@@ -1741,6 +1756,7 @@ def push( | |
| allow_registry_cache: bool = True, | ||
| to: PushMode = PushMode.ONLINE, | ||
| transform_on_write: bool = True, | ||
| skip_feature_view_validation: bool = False, | ||
| ): | ||
| """ | ||
| Push features to a push source. This updates all the feature views that have the push source as stream source. | ||
|
|
@@ -1751,6 +1767,7 @@ def push( | |
| allow_registry_cache: Whether to allow cached versions of the registry. | ||
| to: Whether to push to online or offline store. Defaults to online store only. | ||
| transform_on_write: Whether to transform the data before pushing. | ||
| skip_feature_view_validation: Whether to skip validation of feature views (e.g., UDF deserialization). | ||
| """ | ||
| for fv in self._fvs_for_push_source_or_raise( | ||
| push_source_name, allow_registry_cache | ||
|
|
@@ -1761,6 +1778,7 @@ def push( | |
| df, | ||
| allow_registry_cache=allow_registry_cache, | ||
| transform_on_write=transform_on_write, | ||
| skip_feature_view_validation=skip_feature_view_validation, | ||
| ) | ||
| if to == PushMode.OFFLINE or to == PushMode.ONLINE_AND_OFFLINE: | ||
| self.write_to_offline_store( | ||
|
|
@@ -1773,6 +1791,7 @@ async def push_async( | |
| df: pd.DataFrame, | ||
| allow_registry_cache: bool = True, | ||
| to: PushMode = PushMode.ONLINE, | ||
| skip_feature_view_validation: bool = False, | ||
| **kwargs, | ||
| ): | ||
| fvs = self._fvs_for_push_source_or_raise(push_source_name, allow_registry_cache) | ||
|
|
@@ -1781,7 +1800,10 @@ async def push_async( | |
| _ = await asyncio.gather( | ||
| *[ | ||
| self.write_to_online_store_async( | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @copilot can you create a new GitHub Issue to clean up the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't have direct access to create GitHub issues, but here's the recommended issue content: Title: Refactor async methods in FeatureStore for server-only usage Description: Proposed changes:
Related PR: #[this PR number] |
||
| fv.name, df, allow_registry_cache=allow_registry_cache | ||
| fv.name, | ||
| df, | ||
| allow_registry_cache=allow_registry_cache, | ||
| skip_feature_view_validation=skip_feature_view_validation, | ||
| ) | ||
| for fv in fvs | ||
| ] | ||
|
|
@@ -1959,10 +1981,14 @@ def _get_feature_view_and_df_for_online_write( | |
| inputs: Optional[Union[Dict[str, List[Any]], pd.DataFrame]] = None, | ||
| allow_registry_cache: bool = True, | ||
| transform_on_write: bool = True, | ||
| skip_feature_view_validation: bool = False, | ||
| ): | ||
| feature_view_dict = { | ||
| fv_proto.name: fv_proto | ||
| for fv_proto in self.list_all_feature_views(allow_registry_cache) | ||
| for fv_proto in self.list_all_feature_views( | ||
| allow_registry_cache, | ||
| skip_feature_view_validation=skip_feature_view_validation, | ||
| ) | ||
| } | ||
| try: | ||
| feature_view = feature_view_dict[feature_view_name] | ||
|
|
@@ -1992,6 +2018,7 @@ def write_to_online_store( | |
| inputs: Optional[Union[Dict[str, List[Any]], pd.DataFrame]] = None, | ||
| allow_registry_cache: bool = True, | ||
| transform_on_write: bool = True, | ||
| skip_feature_view_validation: bool = False, | ||
| ): | ||
| """ | ||
| Persists a dataframe to the online store. | ||
|
|
@@ -2002,6 +2029,7 @@ def write_to_online_store( | |
| inputs: Optional the dictionary object to be written | ||
| allow_registry_cache (optional): Whether to allow retrieving feature views from a cached registry. | ||
| transform_on_write (optional): Whether to transform the data before pushing. | ||
| skip_feature_view_validation (optional): Whether to skip validation of feature views (e.g., UDF deserialization). | ||
| """ | ||
|
|
||
| feature_view, df = self._get_feature_view_and_df_for_online_write( | ||
|
|
@@ -2010,6 +2038,7 @@ def write_to_online_store( | |
| inputs=inputs, | ||
| allow_registry_cache=allow_registry_cache, | ||
| transform_on_write=transform_on_write, | ||
| skip_feature_view_validation=skip_feature_view_validation, | ||
| ) | ||
|
|
||
| # Validate that the dataframe has meaningful feature data | ||
|
|
@@ -2037,6 +2066,7 @@ async def write_to_online_store_async( | |
| df: Optional[pd.DataFrame] = None, | ||
| inputs: Optional[Union[Dict[str, List[Any]], pd.DataFrame]] = None, | ||
| allow_registry_cache: bool = True, | ||
| skip_feature_view_validation: bool = False, | ||
| ): | ||
| """ | ||
| Persists a dataframe to the online store asynchronously. | ||
|
|
@@ -2046,13 +2076,15 @@ async def write_to_online_store_async( | |
| df: The dataframe to be persisted. | ||
| inputs: Optional the dictionary object to be written | ||
| allow_registry_cache (optional): Whether to allow retrieving feature views from a cached registry. | ||
| skip_feature_view_validation (optional): Whether to skip validation of feature views (e.g., UDF deserialization). | ||
| """ | ||
|
|
||
| feature_view, df = self._get_feature_view_and_df_for_online_write( | ||
| feature_view_name=feature_view_name, | ||
| df=df, | ||
| inputs=inputs, | ||
| allow_registry_cache=allow_registry_cache, | ||
| skip_feature_view_validation=skip_feature_view_validation, | ||
| ) | ||
|
|
||
| # Validate that the dataframe has meaningful feature data | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -233,14 +233,33 @@ def list_feature_services( | |
| return feature_services | ||
|
|
||
|
|
||
| @registry_proto_cache_with_tags | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @copilot why did you remove this?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. oh i see you added it in _list_all_feature_views_cached
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I removed the To maintain caching while supporting
This prevents cache pollution with dummy UDFs while preserving the caching behavior for the default case (skip_udf=False). Would you prefer a different approach, such as modifying the decorator to support additional parameters? |
||
| def list_all_feature_views( | ||
| registry_proto: RegistryProto, | ||
| project: str, | ||
| tags: Optional[dict[str, str]], | ||
| skip_udf: bool = False, | ||
| ) -> List[BaseFeatureView]: | ||
| # Skip caching when skip_udf is True to avoid cache pollution | ||
| if skip_udf: | ||
| return ( | ||
| list_feature_views(registry_proto, project, tags) | ||
| + list_stream_feature_views(registry_proto, project, tags) | ||
| + list_on_demand_feature_views( | ||
| registry_proto, project, tags, skip_udf=skip_udf | ||
| ) | ||
| ) | ||
| else: | ||
| return _list_all_feature_views_cached(registry_proto, project, tags) | ||
|
|
||
|
|
||
| @registry_proto_cache_with_tags | ||
| def _list_all_feature_views_cached( | ||
| registry_proto: RegistryProto, project: str, tags: Optional[dict[str, str]] | ||
| ) -> List[BaseFeatureView]: | ||
| return ( | ||
| list_feature_views(registry_proto, project, tags) | ||
| + list_stream_feature_views(registry_proto, project, tags) | ||
| + list_on_demand_feature_views(registry_proto, project, tags) | ||
| + list_on_demand_feature_views(registry_proto, project, tags, skip_udf=False) | ||
| ) | ||
|
|
||
|
|
||
|
|
@@ -272,8 +291,37 @@ def list_stream_feature_views( | |
| return stream_feature_views | ||
|
|
||
|
|
||
| @registry_proto_cache_with_tags | ||
| def list_on_demand_feature_views( | ||
| registry_proto: RegistryProto, | ||
| project: str, | ||
| tags: Optional[dict[str, str]], | ||
| skip_udf: bool = False, | ||
| ) -> List[OnDemandFeatureView]: | ||
| # Skip caching when skip_udf is True to avoid cache pollution with dummy UDFs | ||
| if skip_udf: | ||
| on_demand_feature_views = [] | ||
| for on_demand_feature_view in registry_proto.on_demand_feature_views: | ||
| if on_demand_feature_view.spec.project == project and utils.has_all_tags( | ||
| on_demand_feature_view.spec.tags, tags | ||
| ): | ||
| # Only skip UDF deserialization for ODFVs that don't write to online store | ||
| # ODFVs with write_to_online_store=True need the actual UDF loaded | ||
| # because it will be executed during push operations | ||
| should_skip_udf = ( | ||
| skip_udf and not on_demand_feature_view.spec.write_to_online_store | ||
| ) | ||
| on_demand_feature_views.append( | ||
| OnDemandFeatureView.from_proto( | ||
| on_demand_feature_view, skip_udf=should_skip_udf | ||
| ) | ||
| ) | ||
| return on_demand_feature_views | ||
| else: | ||
| return _list_on_demand_feature_views_cached(registry_proto, project, tags) | ||
|
|
||
|
|
||
| @registry_proto_cache_with_tags | ||
| def _list_on_demand_feature_views_cached( | ||
| registry_proto: RegistryProto, project: str, tags: Optional[dict[str, str]] | ||
| ) -> List[OnDemandFeatureView]: | ||
| on_demand_feature_views = [] | ||
|
|
@@ -282,7 +330,7 @@ def list_on_demand_feature_views( | |
| on_demand_feature_view.spec.tags, tags | ||
| ): | ||
| on_demand_feature_views.append( | ||
| OnDemandFeatureView.from_proto(on_demand_feature_view) | ||
| OnDemandFeatureView.from_proto(on_demand_feature_view, skip_udf=False) | ||
| ) | ||
| return on_demand_feature_views | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🟡 Wrong exception type caught - KeyError vs FeatureViewNotFoundException
The code catches
FeatureViewNotFoundExceptionbut dictionary key access raisesKeyErrorwhen the key is missing.Click to expand
Explanation
At
sdk/python/feast/feature_store.py:1993-1996, the code builds a dictionary from feature views and then tries to access it by name:Actual vs Expected Behavior
feature_view_nameis not in the dictionary, Python raisesKeyError, which is NOT caught by theexcept FeatureViewNotFoundExceptionblock. TheKeyErrorpropagates up with an unhelpful error message.KeyErrorand re-raise it asFeatureViewNotFoundExceptionwith proper project context.Impact
When a user calls
write_to_online_store(),write_to_online_store_async(), orpush()withskip_feature_view_validation=Trueand an invalid feature view name, they get a confusingKeyErrorinstead of the intendedFeatureViewNotFoundExceptionwith helpful context about the project.(Refers to lines 1995-1996)
Recommendation: Change
except FeatureViewNotFoundException:toexcept KeyError:to properly catch the dictionary key lookup failure.Was this helpful? React with 👍 or 👎 to provide feedback.