Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
fix: Harden informer cache with label selectors and memory optimizations
Signed-off-by: Jitendra Yejare <[email protected]>
  • Loading branch information
jyejare authored and ntkathole committed Apr 13, 2026
commit 7676dd72a9a9af614616403e921366a7737a4ad3
31 changes: 31 additions & 0 deletions infra/feast-operator/cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,18 @@ import (
// to ensure that exec-entrypoint and run can make use of them.
_ "k8s.io/client-go/plugin/pkg/client/auth"

appsv1 "k8s.io/api/apps/v1"
autoscalingv2 "k8s.io/api/autoscaling/v2"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
rbacv1 "k8s.io/api/rbac/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/cache"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/healthz"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
Expand Down Expand Up @@ -59,6 +66,29 @@ func init() {
// +kubebuilder:scaffold:scheme
}

func newCacheOptions() cache.Options {
managedBySelector := labels.SelectorFromSet(labels.Set{
services.ManagedByLabelKey: services.ManagedByLabelValue,
})
managedByFilter := cache.ByObject{Label: managedBySelector}

return cache.Options{
DefaultTransform: cache.TransformStripManagedFields(),
ByObject: map[client.Object]cache.ByObject{
&corev1.ConfigMap{}: managedByFilter,
&appsv1.Deployment{}: managedByFilter,
&corev1.Service{}: managedByFilter,
&corev1.ServiceAccount{}: managedByFilter,
&corev1.PersistentVolumeClaim{}: managedByFilter,
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
&rbacv1.RoleBinding{}: managedByFilter,
&rbacv1.Role{}: managedByFilter,
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
&batchv1.CronJob{}: managedByFilter,
&autoscalingv2.HorizontalPodAutoscaler{}: managedByFilter,
&policyv1.PodDisruptionBudget{}: managedByFilter,
},
}
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
}

func main() {
var metricsAddr string
var enableLeaderElection bool
Expand Down Expand Up @@ -145,6 +175,7 @@ func main() {
// if you are doing or is intended to do any operation such as perform cleanups
// after the manager stops then its usage might be unsafe.
// LeaderElectionReleaseOnCancel: true,
Cache: newCacheOptions(),
Client: client.Options{
Cache: &client.CacheOptions{
DisableFor: []client.Object{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
- op: test
path: "/spec/template/spec/containers/0/env/1/name"
value: RELATED_IMAGE_FEATURE_SERVER
- op: replace
path: "/spec/template/spec/containers/0/env/0"
path: "/spec/template/spec/containers/0/env/1"
value:
name: RELATED_IMAGE_FEATURE_SERVER
value: ${FS_IMG}
- op: test
path: "/spec/template/spec/containers/0/env/2/name"
value: RELATED_IMAGE_CRON_JOB
- op: replace
path: "/spec/template/spec/containers/0/env/1"
path: "/spec/template/spec/containers/0/env/2"
value:
name: RELATED_IMAGE_CRON_JOB
value: ${CJ_IMG}
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
- op: test
path: "/spec/template/spec/containers/0/env/1/name"
value: RELATED_IMAGE_FEATURE_SERVER
- op: replace
path: "/spec/template/spec/containers/0/env/0"
path: "/spec/template/spec/containers/0/env/1"
value:
name: RELATED_IMAGE_FEATURE_SERVER
value: quay.io/feastdev/feature-server:0.62.0
- op: test
path: "/spec/template/spec/containers/0/env/2/name"
value: RELATED_IMAGE_CRON_JOB
- op: replace
path: "/spec/template/spec/containers/0/env/1"
path: "/spec/template/spec/containers/0/env/2"
value:
name: RELATED_IMAGE_CRON_JOB
value: quay.io/openshift/origin-cli:4.17
2 changes: 2 additions & 0 deletions infra/feast-operator/config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ spec:
drop:
- "ALL"
env:
- name: GOMEMLIMIT
value: "230MiB"
- name: RELATED_IMAGE_FEATURE_SERVER
value: feast:latest
- name: RELATED_IMAGE_CRON_JOB
Expand Down
2 changes: 2 additions & 0 deletions infra/feast-operator/dist/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20864,6 +20864,8 @@ spec:
command:
- /manager
env:
- name: GOMEMLIMIT
value: 230MiB
- name: RELATED_IMAGE_FEATURE_SERVER
value: quay.io/feastdev/feature-server:0.62.0
- name: RELATED_IMAGE_CRON_JOB
Expand Down
1 change: 1 addition & 0 deletions infra/feast-operator/internal/controller/authz/authz.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,7 @@ func (authz *FeastAuthorization) getLabels() map[string]string {
return map[string]string{
services.NameLabelKey: authz.Handler.FeatureStore.Name,
services.ServiceTypeLabelKey: string(services.AuthzFeastType),
services.ManagedByLabelKey: services.ManagedByLabelValue,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 removeOrphanedRoles silently skips pre-upgrade custom auth Roles due to stricter label selector

The authz.getLabels() function now includes ManagedByLabelKey (authz.go:334), and removeOrphanedRoles uses this label set as a list selector (authz.go:85). Pre-upgrade custom auth Roles only have {NameLabelKey, ServiceTypeLabelKey} without ManagedByLabelKey, so the API server's label selector will never match them. These orphaned Roles will never be cleaned up by removeOrphanedRoles.

The main feast Role and RoleBinding are still cleaned up correctly via DeleteOwnedFeastObj (which looks up by name, not labels). Only custom auth roles from KubernetesAuthz.Roles are affected. The practical impact is limited: orphaned Roles have empty rules (no security impact) and have owner references for eventual GC on FeatureStore CR deletion. The window is narrow — it requires changing the Roles list concurrently with or very shortly after the operator upgrade, before the first reconciliation adds the label to existing Roles.

Prompt for agents
In authz.go, the removeOrphanedRoles function at line 81-101 lists Roles using authz.getLabels() as the label selector. Since getLabels() now includes ManagedByLabelKey, pre-upgrade Roles without this label are invisible to this cleanup function.

To fix: either (a) use a separate label set for removeOrphanedRoles that omits ManagedByLabelKey (matching by NameLabelKey and ServiceTypeLabelKey only), or (b) run a one-time migration during reconciliation that adds ManagedByLabelKey to all existing authz Roles before removeOrphanedRoles is called.
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.

}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ func (feast *FeastServices) setNamespaceRegistryRoleBinding(rb *rbacv1.RoleBindi
ObjectMeta: metav1.ObjectMeta{
Name: roleName,
Namespace: rb.Namespace,
Labels: feast.getLabels(),
},
}
role.Rules = desiredRules
Expand All @@ -205,6 +206,7 @@ func (feast *FeastServices) setNamespaceRegistryRoleBinding(rb *rbacv1.RoleBindi
}
}
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
Outdated

rb.Labels = feast.getLabels()
rb.RoleRef = rbacv1.RoleRef{
APIGroup: "rbac.authorization.k8s.io",
Kind: "Role",
Expand Down
34 changes: 24 additions & 10 deletions infra/feast-operator/internal/controller/services/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -408,9 +408,10 @@ func (feast *FeastServices) setDeployment(deploy *appsv1.Deployment) error {
}

deploy.Labels = feast.getLabels()
selectorLabels := feast.getSelectorLabels()
deploy.Spec = appsv1.DeploymentSpec{
Replicas: replicas,
Selector: metav1.SetAsLabelSelector(deploy.GetLabels()),
Selector: metav1.SetAsLabelSelector(selectorLabels),
Strategy: feast.getDeploymentStrategy(),
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Expand Down Expand Up @@ -818,7 +819,7 @@ func (feast *FeastServices) setService(svc *corev1.Service, feastType FeastServi
}

svc.Spec = corev1.ServiceSpec{
Selector: feast.getLabels(),
Selector: feast.getSelectorLabels(),
Type: corev1.ServiceTypeClusterIP,
Ports: []corev1.ServicePort{
{
Expand Down Expand Up @@ -868,6 +869,7 @@ func (feast *FeastServices) setServiceAccount(sa *corev1.ServiceAccount) error {

func (feast *FeastServices) createNewPVC(pvcCreate *feastdevv1.PvcCreate, feastType FeastServiceType) (*corev1.PersistentVolumeClaim, error) {
pvc := feast.initPVC(feastType)
pvc.Labels = feast.getFeastTypeLabels(feastType)

pvc.Spec = corev1.PersistentVolumeClaimSpec{
AccessModes: pvcCreate.AccessModes,
Expand Down Expand Up @@ -976,7 +978,7 @@ func (feast *FeastServices) applyTopologySpread(podSpec *corev1.PodSpec) {
MaxSkew: 1,
TopologyKey: "topology.kubernetes.io/zone",
WhenUnsatisfiable: corev1.ScheduleAnyway,
LabelSelector: metav1.SetAsLabelSelector(feast.getLabels()),
LabelSelector: metav1.SetAsLabelSelector(feast.getSelectorLabels()),
}}
}

Expand All @@ -999,10 +1001,10 @@ func (feast *FeastServices) applyAffinity(podSpec *corev1.PodSpec) {
Weight: 100,
PodAffinityTerm: corev1.PodAffinityTerm{
TopologyKey: "kubernetes.io/hostname",
LabelSelector: metav1.SetAsLabelSelector(feast.getLabels()),
},
}},
},
LabelSelector: metav1.SetAsLabelSelector(feast.getSelectorLabels()),
},
}},
},
}
}

Expand Down Expand Up @@ -1060,12 +1062,24 @@ func (feast *FeastServices) getFeastTypeLabels(feastType FeastServiceType) map[s
return labels
}

func (feast *FeastServices) getLabels() map[string]string {
// getSelectorLabels returns the minimal label set used for immutable selectors
// (Deployment spec.selector, Service spec.selector, TopologySpreadConstraints, PodAffinity).
// This must NOT change after initial resource creation.
func (feast *FeastServices) getSelectorLabels() map[string]string {
return map[string]string{
NameLabelKey: feast.Handler.FeatureStore.Name,
}
}

// getLabels returns the full label set for mutable metadata (ObjectMeta.Labels).
// Includes the managed-by label used by the informer cache filter.
func (feast *FeastServices) getLabels() map[string]string {
return map[string]string{
NameLabelKey: feast.Handler.FeatureStore.Name,
ManagedByLabelKey: ManagedByLabelValue,
}
}

func (feast *FeastServices) setServiceHostnames() error {
feast.Handler.FeatureStore.Status.ServiceHostnames = feastdevv1.ServiceHostnames{}
domain := svcDomain + ":"
Expand Down Expand Up @@ -1438,10 +1452,10 @@ func IsDeploymentAvailable(conditions []appsv1.DeploymentCondition) bool {
// container that is in a failing state. Returns empty string if no failure found.
func (feast *FeastServices) GetPodContainerFailureMessage(deploy appsv1.Deployment) string {
podList := corev1.PodList{}
labels := feast.getLabels()
selectorLabels := feast.getSelectorLabels()
if err := feast.Handler.Client.List(feast.Handler.Context, &podList,
client.InNamespace(deploy.Namespace),
client.MatchingLabels(labels),
client.MatchingLabels(selectorLabels),
); err != nil {
return ""
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,11 @@ const (
OidcMissingSecretError string = "missing OIDC secret: %s"
)

const (
ManagedByLabelKey = "app.kubernetes.io/managed-by"
ManagedByLabelValue = "feast-operator"
)

var (
DefaultImage = "quay.io/feastdev/feature-server:" + feastversion.FeastVersion
DefaultCronJobImage = "quay.io/openshift/origin-cli:4.17"
Expand Down