Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix caching decorator compatibility with skip_udf parameter
Co-authored-by: franciscojavierarceo <[email protected]>
  • Loading branch information
Copilot and franciscojavierarceo committed Jan 23, 2026
commit f9542e5436f46f1bd645997b1c39a932cb97f46f
40 changes: 36 additions & 4 deletions sdk/python/feast/infra/registry/proto_registry_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,17 +233,31 @@ def list_feature_services(
return feature_services


@registry_proto_cache_with_tags
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot why did you remove this?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh i see you added it in _list_all_feature_views_cached

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed the @registry_proto_cache_with_tags decorator because it only accepts 3 parameters (registry_proto, project, tags) and doesn't support the additional skip_udf parameter.

To maintain caching while supporting skip_udf, I:

  1. Created _list_all_feature_views_cached() with the decorator (for skip_udf=False)
  2. Made list_all_feature_views() conditionally call the cached version when skip_udf=False, or bypass caching when skip_udf=True

This prevents cache pollution with dummy UDFs while preserving the caching behavior for the default case (skip_udf=False).

Would you prefer a different approach, such as modifying the decorator to support additional parameters?

def list_all_feature_views(
registry_proto: RegistryProto,
project: str,
tags: Optional[dict[str, str]],
skip_udf: bool = False,
) -> List[BaseFeatureView]:
# Skip caching when skip_udf is True to avoid cache pollution
if skip_udf:
return (
list_feature_views(registry_proto, project, tags)
+ list_stream_feature_views(registry_proto, project, tags)
+ list_on_demand_feature_views(registry_proto, project, tags, skip_udf=skip_udf)
)
else:
return _list_all_feature_views_cached(registry_proto, project, tags)


@registry_proto_cache_with_tags
def _list_all_feature_views_cached(
registry_proto: RegistryProto, project: str, tags: Optional[dict[str, str]]
) -> List[BaseFeatureView]:
return (
list_feature_views(registry_proto, project, tags)
+ list_stream_feature_views(registry_proto, project, tags)
+ list_on_demand_feature_views(registry_proto, project, tags, skip_udf=skip_udf)
+ list_on_demand_feature_views(registry_proto, project, tags, skip_udf=False)
)


Expand Down Expand Up @@ -275,20 +289,38 @@ def list_stream_feature_views(
return stream_feature_views


@registry_proto_cache_with_tags
def list_on_demand_feature_views(
registry_proto: RegistryProto,
project: str,
tags: Optional[dict[str, str]],
skip_udf: bool = False,
) -> List[OnDemandFeatureView]:
# Skip caching when skip_udf is True to avoid cache pollution with dummy UDFs
if skip_udf:
on_demand_feature_views = []
for on_demand_feature_view in registry_proto.on_demand_feature_views:
if on_demand_feature_view.spec.project == project and utils.has_all_tags(
on_demand_feature_view.spec.tags, tags
):
on_demand_feature_views.append(
OnDemandFeatureView.from_proto(on_demand_feature_view, skip_udf=skip_udf)
)
return on_demand_feature_views
else:
return _list_on_demand_feature_views_cached(registry_proto, project, tags)


@registry_proto_cache_with_tags
def _list_on_demand_feature_views_cached(
registry_proto: RegistryProto, project: str, tags: Optional[dict[str, str]]
) -> List[OnDemandFeatureView]:
on_demand_feature_views = []
for on_demand_feature_view in registry_proto.on_demand_feature_views:
if on_demand_feature_view.spec.project == project and utils.has_all_tags(
on_demand_feature_view.spec.tags, tags
):
on_demand_feature_views.append(
OnDemandFeatureView.from_proto(on_demand_feature_view, skip_udf=skip_udf)
OnDemandFeatureView.from_proto(on_demand_feature_view, skip_udf=False)
)
return on_demand_feature_views

Expand Down
144 changes: 144 additions & 0 deletions sdk/python/tests/unit/test_skip_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,20 @@
- Cases where the type/validation system is being too restrictive

Users should be encouraged to report issues on GitHub when they need to use this flag.

Also tests skip_validation parameter in push() and related methods to handle
On-Demand Feature Views with UDFs that reference modules not available in the
current environment.
"""

import inspect

import dill
import pandas as pd

from feast.feature_store import FeatureStore
from feast.on_demand_feature_view import PandasTransformation, PythonTransformation
from feast.protos.feast.core.Transformation_pb2 import UserDefinedFunctionV2 as UserDefinedFunctionProto


def test_apply_has_skip_feature_view_validation_parameter():
Expand Down Expand Up @@ -48,6 +57,108 @@ def test_plan_has_skip_feature_view_validation_parameter():
assert param.annotation == bool


def test_push_has_skip_validation_parameter():
"""Test that FeatureStore.push() method has skip_validation parameter"""
# Get the signature of the push method
sig = inspect.signature(FeatureStore.push)

# Check that skip_validation parameter exists
assert "skip_validation" in sig.parameters

# Check that it has a default value of False
param = sig.parameters["skip_validation"]
assert param.default is False

# Check that it's a boolean type hint (if type hints are present)
if param.annotation != inspect.Parameter.empty:
assert param.annotation == bool


def test_push_async_has_skip_validation_parameter():
"""Test that FeatureStore.push_async() method has skip_validation parameter"""
# Get the signature of the push_async method
sig = inspect.signature(FeatureStore.push_async)

# Check that skip_validation parameter exists
assert "skip_validation" in sig.parameters

# Check that it has a default value of False
param = sig.parameters["skip_validation"]
assert param.default is False

# Check that it's a boolean type hint (if type hints are present)
if param.annotation != inspect.Parameter.empty:
assert param.annotation == bool


def test_pandas_transformation_from_proto_with_skip_udf():
"""Test that PandasTransformation.from_proto works with skip_udf=True."""

# Create a UDF that would reference a non-existent module
def udf_with_missing_module(df: pd.DataFrame) -> pd.DataFrame:
# This would normally fail if a module is missing during deserialization
import nonexistent_module # noqa: F401
return df

# Serialize the UDF
serialized_udf = dill.dumps(udf_with_missing_module)
udf_string = "import nonexistent_module\ndef udf(df): return df"

# Create proto
udf_proto = UserDefinedFunctionProto(
name="test_udf",
body=serialized_udf,
body_text=udf_string,
)

# Test that skip_udf=True doesn't try to deserialize the UDF
# This would normally fail with ModuleNotFoundError
transformation = PandasTransformation.from_proto(udf_proto, skip_udf=True)

# Should get a dummy transformation with identity function
assert transformation is not None
assert transformation.udf_string == udf_string

# The dummy UDF should be callable and act as identity
test_df = pd.DataFrame({"col1": [1, 2, 3]})
result = transformation.udf(test_df)
assert result.equals(test_df)


def test_python_transformation_from_proto_with_skip_udf():
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@copilot we need to validate that the skip_udf is being applied only during push() for ODFVs that do not have write_to_online_store=True because that would hide legitimate breaks.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in commit 2433df6. Added logic to only skip UDF deserialization for ODFVs with write_to_online_store=False. ODFVs with write_to_online_store=True always load the actual UDF since it will be executed during push operations. Also added test test_skip_validation_only_applies_to_non_writing_odfvs() to validate this behavior.

"""Test that PythonTransformation.from_proto works with skip_udf=True."""

# Create a UDF that would reference a non-existent module
def udf_with_missing_module(features_dict):
# This would normally fail if a module is missing during deserialization
import nonexistent_module # noqa: F401
return features_dict

# Serialize the UDF
serialized_udf = dill.dumps(udf_with_missing_module)
udf_string = "import nonexistent_module\ndef udf(d): return d"

# Create proto
udf_proto = UserDefinedFunctionProto(
name="test_udf",
body=serialized_udf,
body_text=udf_string,
)

# Test that skip_udf=True doesn't try to deserialize the UDF
# This would normally fail with ModuleNotFoundError
transformation = PythonTransformation.from_proto(udf_proto, skip_udf=True)

# Should get a dummy transformation with identity function
assert transformation is not None
assert transformation.udf_string == udf_string

# The dummy UDF should be callable and act as identity
test_dict = {"col1": 1}
result = transformation.udf(test_dict)
assert result == test_dict


def test_skip_feature_view_validation_use_case_documentation():
"""
Documentation test: This test documents the key use case for skip_feature_view_validation.
Expand All @@ -69,3 +180,36 @@ def test_skip_feature_view_validation_use_case_documentation():
can improve the validation system.
"""
pass # This is a documentation test


def test_skip_validation_use_case_documentation():
"""
Documentation test: This test documents the key use case for skip_validation in push().

The skip_validation flag in push() addresses the ModuleNotFoundError issue when:
1. An OnDemandFeatureView with a UDF is defined in an environment with specific modules
2. The UDF references functions, classes, or constants from those modules (e.g., 'training')
3. feast.apply() is run to save the definition to the remote registry
4. store.push() is called from a different environment without those modules

Without skip_validation:
- push() calls list_all_feature_views() which deserializes ODFVs
- Deserialization uses dill.loads() which fails if referenced modules are missing
- Results in: ModuleNotFoundError: No module named 'training'

With skip_validation=True:
- push() calls list_all_feature_views(skip_validation=True)
- ODFVs are loaded with dummy UDFs (identity functions)
- No deserialization of the actual UDF happens
- push() can proceed successfully

Example usage:
store.push("my_push_source", df, skip_validation=True)

This is particularly useful in production environments where:
- Data ingestion services don't need the training/modeling code
- The UDF logic isn't needed during push operations
- Different teams manage training vs. serving infrastructure
"""
pass # This is a documentation test