-
Notifications
You must be signed in to change notification settings - Fork 1.3k
feat: Add skip_feature_view_validation parameter to push() for ODFVs with missing UDF dependencies #5894
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
feat: Add skip_feature_view_validation parameter to push() for ODFVs with missing UDF dependencies #5894
Changes from 1 commit
ee8a7d0
6b2bf7f
f9542e5
79d6744
a735bfb
2433df6
72c471e
885536e
d4dd5f5
98f92a2
c111fba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
Co-authored-by: franciscojavierarceo <[email protected]>
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,11 +7,20 @@ | |
| - Cases where the type/validation system is being too restrictive | ||
|
|
||
| Users should be encouraged to report issues on GitHub when they need to use this flag. | ||
|
|
||
| Also tests skip_validation parameter in push() and related methods to handle | ||
| On-Demand Feature Views with UDFs that reference modules not available in the | ||
| current environment. | ||
| """ | ||
|
|
||
| import inspect | ||
|
|
||
| import dill | ||
| import pandas as pd | ||
|
|
||
| from feast.feature_store import FeatureStore | ||
| from feast.on_demand_feature_view import PandasTransformation, PythonTransformation | ||
| from feast.protos.feast.core.Transformation_pb2 import UserDefinedFunctionV2 as UserDefinedFunctionProto | ||
|
|
||
|
|
||
| def test_apply_has_skip_feature_view_validation_parameter(): | ||
|
|
@@ -48,6 +57,108 @@ def test_plan_has_skip_feature_view_validation_parameter(): | |
| assert param.annotation == bool | ||
|
|
||
|
|
||
| def test_push_has_skip_validation_parameter(): | ||
| """Test that FeatureStore.push() method has skip_validation parameter""" | ||
| # Get the signature of the push method | ||
| sig = inspect.signature(FeatureStore.push) | ||
|
|
||
| # Check that skip_validation parameter exists | ||
| assert "skip_validation" in sig.parameters | ||
|
|
||
| # Check that it has a default value of False | ||
| param = sig.parameters["skip_validation"] | ||
| assert param.default is False | ||
|
|
||
| # Check that it's a boolean type hint (if type hints are present) | ||
| if param.annotation != inspect.Parameter.empty: | ||
| assert param.annotation == bool | ||
|
|
||
|
|
||
| def test_push_async_has_skip_validation_parameter(): | ||
| """Test that FeatureStore.push_async() method has skip_validation parameter""" | ||
| # Get the signature of the push_async method | ||
| sig = inspect.signature(FeatureStore.push_async) | ||
|
|
||
| # Check that skip_validation parameter exists | ||
| assert "skip_validation" in sig.parameters | ||
|
|
||
| # Check that it has a default value of False | ||
| param = sig.parameters["skip_validation"] | ||
| assert param.default is False | ||
|
|
||
| # Check that it's a boolean type hint (if type hints are present) | ||
| if param.annotation != inspect.Parameter.empty: | ||
| assert param.annotation == bool | ||
|
|
||
|
|
||
| def test_pandas_transformation_from_proto_with_skip_udf(): | ||
| """Test that PandasTransformation.from_proto works with skip_udf=True.""" | ||
|
|
||
| # Create a UDF that would reference a non-existent module | ||
| def udf_with_missing_module(df: pd.DataFrame) -> pd.DataFrame: | ||
| # This would normally fail if a module is missing during deserialization | ||
| import nonexistent_module # noqa: F401 | ||
| return df | ||
|
|
||
| # Serialize the UDF | ||
| serialized_udf = dill.dumps(udf_with_missing_module) | ||
| udf_string = "import nonexistent_module\ndef udf(df): return df" | ||
|
|
||
| # Create proto | ||
| udf_proto = UserDefinedFunctionProto( | ||
| name="test_udf", | ||
| body=serialized_udf, | ||
| body_text=udf_string, | ||
| ) | ||
|
|
||
| # Test that skip_udf=True doesn't try to deserialize the UDF | ||
| # This would normally fail with ModuleNotFoundError | ||
| transformation = PandasTransformation.from_proto(udf_proto, skip_udf=True) | ||
|
|
||
| # Should get a dummy transformation with identity function | ||
| assert transformation is not None | ||
| assert transformation.udf_string == udf_string | ||
|
|
||
| # The dummy UDF should be callable and act as identity | ||
| test_df = pd.DataFrame({"col1": [1, 2, 3]}) | ||
| result = transformation.udf(test_df) | ||
| assert result.equals(test_df) | ||
|
|
||
|
|
||
| def test_python_transformation_from_proto_with_skip_udf(): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @copilot we need to validate that the skip_udf is being applied only during push() for ODFVs that do not have
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in commit 2433df6. Added logic to only skip UDF deserialization for ODFVs with |
||
| """Test that PythonTransformation.from_proto works with skip_udf=True.""" | ||
|
|
||
| # Create a UDF that would reference a non-existent module | ||
| def udf_with_missing_module(features_dict): | ||
| # This would normally fail if a module is missing during deserialization | ||
| import nonexistent_module # noqa: F401 | ||
| return features_dict | ||
|
|
||
| # Serialize the UDF | ||
| serialized_udf = dill.dumps(udf_with_missing_module) | ||
| udf_string = "import nonexistent_module\ndef udf(d): return d" | ||
|
|
||
| # Create proto | ||
| udf_proto = UserDefinedFunctionProto( | ||
| name="test_udf", | ||
| body=serialized_udf, | ||
| body_text=udf_string, | ||
| ) | ||
|
|
||
| # Test that skip_udf=True doesn't try to deserialize the UDF | ||
| # This would normally fail with ModuleNotFoundError | ||
| transformation = PythonTransformation.from_proto(udf_proto, skip_udf=True) | ||
|
|
||
| # Should get a dummy transformation with identity function | ||
| assert transformation is not None | ||
| assert transformation.udf_string == udf_string | ||
|
|
||
| # The dummy UDF should be callable and act as identity | ||
| test_dict = {"col1": 1} | ||
| result = transformation.udf(test_dict) | ||
| assert result == test_dict | ||
|
|
||
|
|
||
| def test_skip_feature_view_validation_use_case_documentation(): | ||
| """ | ||
| Documentation test: This test documents the key use case for skip_feature_view_validation. | ||
|
|
@@ -69,3 +180,36 @@ def test_skip_feature_view_validation_use_case_documentation(): | |
| can improve the validation system. | ||
| """ | ||
| pass # This is a documentation test | ||
|
|
||
|
|
||
| def test_skip_validation_use_case_documentation(): | ||
| """ | ||
| Documentation test: This test documents the key use case for skip_validation in push(). | ||
|
|
||
| The skip_validation flag in push() addresses the ModuleNotFoundError issue when: | ||
| 1. An OnDemandFeatureView with a UDF is defined in an environment with specific modules | ||
| 2. The UDF references functions, classes, or constants from those modules (e.g., 'training') | ||
| 3. feast.apply() is run to save the definition to the remote registry | ||
| 4. store.push() is called from a different environment without those modules | ||
|
|
||
| Without skip_validation: | ||
| - push() calls list_all_feature_views() which deserializes ODFVs | ||
| - Deserialization uses dill.loads() which fails if referenced modules are missing | ||
| - Results in: ModuleNotFoundError: No module named 'training' | ||
|
|
||
| With skip_validation=True: | ||
| - push() calls list_all_feature_views(skip_validation=True) | ||
| - ODFVs are loaded with dummy UDFs (identity functions) | ||
| - No deserialization of the actual UDF happens | ||
| - push() can proceed successfully | ||
|
|
||
| Example usage: | ||
| store.push("my_push_source", df, skip_validation=True) | ||
|
|
||
| This is particularly useful in production environments where: | ||
| - Data ingestion services don't need the training/modeling code | ||
| - The UDF logic isn't needed during push operations | ||
| - Different teams manage training vs. serving infrastructure | ||
| """ | ||
| pass # This is a documentation test | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@copilot why did you remove this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh i see you added it in _list_all_feature_views_cached
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I removed the
@registry_proto_cache_with_tagsdecorator because it only accepts 3 parameters (registry_proto, project, tags) and doesn't support the additionalskip_udfparameter.To maintain caching while supporting
skip_udf, I:_list_all_feature_views_cached()with the decorator (for skip_udf=False)list_all_feature_views()conditionally call the cached version whenskip_udf=False, or bypass caching whenskip_udf=TrueThis prevents cache pollution with dummy UDFs while preserving the caching behavior for the default case (skip_udf=False).
Would you prefer a different approach, such as modifying the decorator to support additional parameters?