diff --git a/api/v1alpha1/nicclusterpolicy_types.go b/api/v1alpha1/nicclusterpolicy_types.go index 44593547c..6e5b8ef24 100644 --- a/api/v1alpha1/nicclusterpolicy_types.go +++ b/api/v1alpha1/nicclusterpolicy_types.go @@ -462,6 +462,15 @@ type NicClusterPolicyStatus struct { Reason string `json:"reason,omitempty"` // AppliedStates provide a finer view of the observed state AppliedStates []AppliedState `json:"appliedStates,omitempty"` + // Conditions is a list of conditions describing the state of the NicClusterPolicy. + // Each enabled component exposes a Ready condition, and the aggregate + // Ready condition summarizes the overall policy health. + // +optional + // +listType=map + // +listMapKey=type + // +patchStrategy=merge + // +patchMergeKey=type + Conditions []metav1.Condition `json:"conditions,omitempty"` } // +kubebuilder:object:root=true @@ -558,6 +567,16 @@ func (n *NicClusterPolicy) SetReason(reason string) { n.Status.Reason = reason } +// GetConditions implements ConditionHolder. +func (n *NicClusterPolicy) GetConditions() []metav1.Condition { + return n.Status.Conditions +} + +// SetConditions implements ConditionHolder. +func (n *NicClusterPolicy) SetConditions(conditions []metav1.Condition) { + n.Status.Conditions = conditions +} + func init() { SchemeBuilder.Register(&NicClusterPolicy{}, &NicClusterPolicyList{}) } diff --git a/api/v1alpha1/nicclusterpolicyconditions.go b/api/v1alpha1/nicclusterpolicyconditions.go new file mode 100644 index 000000000..8444407ec --- /dev/null +++ b/api/v1alpha1/nicclusterpolicyconditions.go @@ -0,0 +1,91 @@ +/* +Copyright 2026 NVIDIA CORPORATION & AFFILIATES + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + +// Per-component condition type constants. +// Each component present in the spec exposes exactly one condition of the +// form Ready. Components absent from the spec are omitted. +const ( + ConditionTypeOFEDDriverReady = "OFEDDriverReady" + ConditionTypeRDMASharedDevicePluginReady = "RDMASharedDevicePluginReady" + ConditionTypeSRIOVDevicePluginReady = "SRIOVDevicePluginReady" + ConditionTypeIBKubernetesReady = "IBKubernetesReady" + ConditionTypeMultusCNIReady = "MultusCNIReady" + ConditionTypeCNIPluginsReady = "CNIPluginsReady" + ConditionTypeIPoIBCNIReady = "IPoIBCNIReady" + ConditionTypeNVIPAMReady = "NVIPAMReady" + ConditionTypeNICFeatureDiscoveryReady = "NICFeatureDiscoveryReady" + ConditionTypeDOCATelemetryServiceReady = "DOCATelemetryServiceReady" + ConditionTypeNICConfigurationOperatorReady = "NICConfigurationOperatorReady" + ConditionTypeSpectrumXOperatorReady = "SpectrumXOperatorReady" + + // ConditionTypeReady is the aggregate condition that is True only when every + // configured component is ready. + ConditionTypeReady = "Ready" +) + +// Reason constants for per-component Ready conditions. +const ( + // ConditionReasonComponentReady indicates the component's workloads are fully available. + ConditionReasonComponentReady = "ComponentReady" + // ConditionReasonComponentNotReady indicates the component is deploying and expected + // to become ready without operator intervention. + ConditionReasonComponentNotReady = "ComponentNotReady" + // ConditionReasonComponentError indicates a reconcile failure that requires attention. + // Also used as a reason for the aggregate Ready condition. + ConditionReasonComponentError = "ComponentError" +) + +// Reason constants for the aggregate Ready condition. +const ( + // ConditionReasonAllComponentsReady indicates every configured component is ready. + ConditionReasonAllComponentsReady = "AllComponentsReady" + // ConditionReasonComponentsNotReady indicates at least one component is still deploying. + ConditionReasonComponentsNotReady = "ComponentsNotReady" +) + +// StateNameToConditionType maps the Name() of a pkg/state.State to the +// corresponding per-component condition type constant. +// NicNodePolicy reconciles a subset of these entries (OFED, RDMA shared device +// plugin, SR-IOV device plugin); NicClusterPolicy reconciles all entries. +// States absent from this map (e.g. those used by network CRDs) produce no +// condition entry. +var StateNameToConditionType = map[string]string{ + "state-OFED": ConditionTypeOFEDDriverReady, + "state-RDMA-device-plugin": ConditionTypeRDMASharedDevicePluginReady, + "state-SRIOV-device-plugin": ConditionTypeSRIOVDevicePluginReady, + "state-ib-kubernetes": ConditionTypeIBKubernetesReady, + "state-multus-cni": ConditionTypeMultusCNIReady, + "state-container-networking-plugins": ConditionTypeCNIPluginsReady, + "state-ipoib-cni": ConditionTypeIPoIBCNIReady, + "state-nv-ipam-cni": ConditionTypeNVIPAMReady, + "state-nic-feature-discovery": ConditionTypeNICFeatureDiscoveryReady, + "state-doca-telemetry-service": ConditionTypeDOCATelemetryServiceReady, + "state-nic-configuration-operator": ConditionTypeNICConfigurationOperatorReady, + "state-spectrum-x-operator": ConditionTypeSpectrumXOperatorReady, +} + +// ConditionHolder is implemented by CRDs that carry a status.conditions array. +// NicClusterPolicy and NicNodePolicy implement this interface. +// +// +kubebuilder:object:generate=false +type ConditionHolder interface { + GetConditions() []metav1.Condition + SetConditions([]metav1.Condition) +} diff --git a/api/v1alpha1/nicnodepolicy_types.go b/api/v1alpha1/nicnodepolicy_types.go index 9af84dcff..831089ef2 100644 --- a/api/v1alpha1/nicnodepolicy_types.go +++ b/api/v1alpha1/nicnodepolicy_types.go @@ -67,9 +67,6 @@ type NicNodePolicySpec struct { // NicNodePolicyStatus defines the observed state of NicNodePolicy type NicNodePolicyStatus struct { - // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster - // Important: Run "make" to regenerate code after modifying this file - // Reflects the current state of the cluster policy // +kubebuilder:validation:Enum={"ignore", "notReady", "ready", "error"} State State `json:"state"` @@ -77,6 +74,15 @@ type NicNodePolicyStatus struct { Reason string `json:"reason,omitempty"` // AppliedStates provide a finer view of the observed state AppliedStates []AppliedState `json:"appliedStates,omitempty"` + // Conditions is a list of conditions describing the state of the NicNodePolicy. + // Each enabled component exposes a Ready condition, and the aggregate + // Ready condition summarizes the overall policy health. + // +optional + // +listType=map + // +listMapKey=type + // +patchStrategy=merge + // +patchMergeKey=type + Conditions []metav1.Condition `json:"conditions,omitempty"` } // +kubebuilder:object:root=true @@ -174,6 +180,16 @@ func (n *NicNodePolicy) SetReason(reason string) { n.Status.Reason = reason } +// GetConditions implements ConditionHolder. +func (n *NicNodePolicy) GetConditions() []metav1.Condition { + return n.Status.Conditions +} + +// SetConditions implements ConditionHolder. +func (n *NicNodePolicy) SetConditions(conditions []metav1.Condition) { + n.Status.Conditions = conditions +} + func init() { SchemeBuilder.Register(&NicNodePolicy{}, &NicNodePolicyList{}) } diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 89b89aee4..85c9497a6 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -22,6 +22,7 @@ package v1alpha1 import ( "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) @@ -712,6 +713,13 @@ func (in *NicClusterPolicyStatus) DeepCopyInto(out *NicClusterPolicyStatus) { *out = make([]AppliedState, len(*in)) copy(*out, *in) } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NicClusterPolicyStatus. @@ -901,6 +909,13 @@ func (in *NicNodePolicyStatus) DeepCopyInto(out *NicNodePolicyStatus) { *out = make([]AppliedState, len(*in)) copy(*out, *in) } + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]metav1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NicNodePolicyStatus. diff --git a/config/crd/bases/mellanox.com_nicclusterpolicies.yaml b/config/crd/bases/mellanox.com_nicclusterpolicies.yaml index 67fe6f47d..75201a742 100644 --- a/config/crd/bases/mellanox.com_nicclusterpolicies.yaml +++ b/config/crd/bases/mellanox.com_nicclusterpolicies.yaml @@ -2101,6 +2101,69 @@ spec: - state type: object type: array + conditions: + description: |- + Conditions is a list of conditions describing the state of the NicClusterPolicy. + Each enabled component exposes a Ready condition, and the aggregate + Ready condition summarizes the overall policy health. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map reason: description: Informative string in case the observed state is error type: string diff --git a/config/crd/bases/mellanox.com_nicnodepolicies.yaml b/config/crd/bases/mellanox.com_nicnodepolicies.yaml index 531c80d77..c74b3ea1f 100644 --- a/config/crd/bases/mellanox.com_nicnodepolicies.yaml +++ b/config/crd/bases/mellanox.com_nicnodepolicies.yaml @@ -687,6 +687,69 @@ spec: - state type: object type: array + conditions: + description: |- + Conditions is a list of conditions describing the state of the NicNodePolicy. + Each enabled component exposes a Ready condition, and the aggregate + Ready condition summarizes the overall policy health. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map reason: description: Informative string in case the observed state is error type: string diff --git a/controllers/conditions.go b/controllers/conditions.go new file mode 100644 index 000000000..c2aca23a3 --- /dev/null +++ b/controllers/conditions.go @@ -0,0 +1,127 @@ +/* +Copyright 2026 NVIDIA CORPORATION & AFFILIATES + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + mellanoxv1alpha1 "github.com/Mellanox/network-operator/api/v1alpha1" + "github.com/Mellanox/network-operator/pkg/state" +) + +// computePolicyConditions derives the conditions slice from the sync results. +// Components in SyncStateIgnore are omitted from status.conditions; their +// presence is still reflected in status.appliedStates. +// +// It merges into existing so that lastTransitionTime is preserved for +// conditions whose status has not changed (standard Kubernetes semantics, +// implemented by apimeta.SetStatusCondition). +// +// Error takes priority over NotReady for the aggregate Ready condition: if any +// component is in error state, Ready reason is ComponentError regardless of +// whether other components are also not ready. +// +// The function is used for both NicClusterPolicy and NicNodePolicy; the +// StateNameToConditionType map controls which states produce conditions for +// each CRD. +func computePolicyConditions( + existing []metav1.Condition, + results state.Results, + generation int64, +) []metav1.Condition { + conditions := make([]metav1.Condition, len(existing)) + copy(conditions, existing) + + hasError := false + hasNotReady := false + seenComponentConditions := make(map[string]struct{}) + + for _, r := range results.StatesStatus { + condType, ok := mellanoxv1alpha1.StateNameToConditionType[r.StateName] + if !ok { + continue + } + seenComponentConditions[condType] = struct{}{} + + cond := metav1.Condition{ + Type: condType, + ObservedGeneration: generation, + } + + switch r.Status { + case state.SyncStateReady: + cond.Status = metav1.ConditionTrue + cond.Reason = mellanoxv1alpha1.ConditionReasonComponentReady + cond.Message = "" + case state.SyncStateIgnore: + apimeta.RemoveStatusCondition(&conditions, condType) + continue + case state.SyncStateError: + cond.Status = metav1.ConditionFalse + cond.Reason = mellanoxv1alpha1.ConditionReasonComponentError + cond.Message = errMessage(r.ErrInfo) + hasError = true + default: + // SyncStateNotReady, SyncStateReset, or any future value. + cond.Status = metav1.ConditionFalse + cond.Reason = mellanoxv1alpha1.ConditionReasonComponentNotReady + cond.Message = errMessage(r.ErrInfo) + hasNotReady = true + } + + apimeta.SetStatusCondition(&conditions, cond) + } + + // Prune stale component conditions for mapped states that did not appear in + // this reconcile result snapshot. + for _, condType := range mellanoxv1alpha1.StateNameToConditionType { + if _, seen := seenComponentConditions[condType]; seen { + continue + } + apimeta.RemoveStatusCondition(&conditions, condType) + } + + ready := metav1.Condition{ + Type: mellanoxv1alpha1.ConditionTypeReady, + ObservedGeneration: generation, + } + switch { + case hasError: + ready.Status = metav1.ConditionFalse + ready.Reason = mellanoxv1alpha1.ConditionReasonComponentError + ready.Message = "One or more components are in error state" + case hasNotReady: + ready.Status = metav1.ConditionFalse + ready.Reason = mellanoxv1alpha1.ConditionReasonComponentsNotReady + ready.Message = "One or more components are not yet ready" + default: + ready.Status = metav1.ConditionTrue + ready.Reason = mellanoxv1alpha1.ConditionReasonAllComponentsReady + ready.Message = "" + } + apimeta.SetStatusCondition(&conditions, ready) + + return conditions +} + +func errMessage(err error) string { + if err == nil { + return "" + } + return err.Error() +} diff --git a/controllers/conditions_test.go b/controllers/conditions_test.go new file mode 100644 index 000000000..1e77c3005 --- /dev/null +++ b/controllers/conditions_test.go @@ -0,0 +1,198 @@ +/* +Copyright 2026 NVIDIA CORPORATION & AFFILIATES + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controllers + +import ( + "errors" + "testing" + + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + mellanoxv1alpha1 "github.com/Mellanox/network-operator/api/v1alpha1" + "github.com/Mellanox/network-operator/pkg/state" +) + +const testObservedGeneration int64 = 3 + +func TestComputePolicyConditionsIgnoredComponentsOmitted(t *testing.T) { + t.Parallel() + + results := state.Results{ + StatesStatus: []state.Result{ + {StateName: "state-OFED", Status: state.SyncStateIgnore}, + {StateName: "state-multus-cni", Status: state.SyncStateIgnore}, + }, + } + + conditions := computePolicyConditions(nil, results, testObservedGeneration) + + if len(conditions) != 1 { + t.Fatalf("expected 1 condition, got %d: %+v", len(conditions), conditions) + } + ready := conditions[0] + if ready.Type != mellanoxv1alpha1.ConditionTypeReady { + t.Fatalf("expected Ready condition, got %s", ready.Type) + } + if ready.Status != metav1.ConditionTrue { + t.Fatalf("expected Ready=True, got %s", ready.Status) + } + if ready.Reason != mellanoxv1alpha1.ConditionReasonAllComponentsReady { + t.Fatalf("expected reason AllComponentsReady, got %s", ready.Reason) + } +} + +func TestComputePolicyConditionsConfiguredReadyComponentGetsCondition(t *testing.T) { + t.Parallel() + + results := state.Results{ + StatesStatus: []state.Result{ + {StateName: "state-OFED", Status: state.SyncStateReady}, + {StateName: "state-multus-cni", Status: state.SyncStateIgnore}, + }, + } + + conditions := computePolicyConditions(nil, results, testObservedGeneration) + + if len(conditions) != 2 { + t.Fatalf("expected 2 conditions, got %d: %+v", len(conditions), conditions) + } + + ofed := apimeta.FindStatusCondition(conditions, mellanoxv1alpha1.ConditionTypeOFEDDriverReady) + if ofed == nil { + t.Fatal("expected OFEDDriverReady condition") + } + if ofed.Status != metav1.ConditionTrue || ofed.Reason != mellanoxv1alpha1.ConditionReasonComponentReady { + t.Fatalf("unexpected OFED condition: %+v", *ofed) + } + if apimeta.FindStatusCondition(conditions, mellanoxv1alpha1.ConditionTypeMultusCNIReady) != nil { + t.Fatal("expected MultusCNIReady condition to be omitted") + } +} + +func TestComputePolicyConditionsIgnoredComponentRemovesStaleCondition(t *testing.T) { + t.Parallel() + + existing := []metav1.Condition{ + { + Type: mellanoxv1alpha1.ConditionTypeOFEDDriverReady, + Status: metav1.ConditionTrue, + Reason: mellanoxv1alpha1.ConditionReasonComponentReady, + ObservedGeneration: 1, + }, + } + results := state.Results{ + StatesStatus: []state.Result{ + {StateName: "state-OFED", Status: state.SyncStateIgnore}, + }, + } + + conditions := computePolicyConditions(existing, results, testObservedGeneration) + + if apimeta.FindStatusCondition(conditions, mellanoxv1alpha1.ConditionTypeOFEDDriverReady) != nil { + t.Fatal("expected stale OFEDDriverReady condition to be removed") + } +} + +func TestComputePolicyConditionsMissingStateRemovesStaleCondition(t *testing.T) { + t.Parallel() + + existing := []metav1.Condition{ + { + Type: mellanoxv1alpha1.ConditionTypeOFEDDriverReady, + Status: metav1.ConditionTrue, + Reason: mellanoxv1alpha1.ConditionReasonComponentReady, + ObservedGeneration: 1, + }, + { + Type: mellanoxv1alpha1.ConditionTypeMultusCNIReady, + Status: metav1.ConditionTrue, + Reason: mellanoxv1alpha1.ConditionReasonComponentReady, + ObservedGeneration: 1, + }, + } + results := state.Results{ + StatesStatus: []state.Result{ + {StateName: "state-OFED", Status: state.SyncStateReady}, + }, + } + + conditions := computePolicyConditions(existing, results, testObservedGeneration) + + if apimeta.FindStatusCondition(conditions, mellanoxv1alpha1.ConditionTypeMultusCNIReady) != nil { + t.Fatal("expected stale MultusCNIReady condition to be removed when state is missing") + } + + ofed := apimeta.FindStatusCondition(conditions, mellanoxv1alpha1.ConditionTypeOFEDDriverReady) + if ofed == nil { + t.Fatal("expected OFEDDriverReady condition") + } + if ofed.Status != metav1.ConditionTrue || ofed.Reason != mellanoxv1alpha1.ConditionReasonComponentReady { + t.Fatalf("unexpected OFED condition: %+v", *ofed) + } +} + +func TestComputePolicyConditionsNotReadyComponentBlocksAggregateReady(t *testing.T) { + t.Parallel() + + results := state.Results{ + StatesStatus: []state.Result{ + {StateName: "state-OFED", Status: state.SyncStateNotReady}, + {StateName: "state-multus-cni", Status: state.SyncStateIgnore}, + }, + } + + conditions := computePolicyConditions(nil, results, testObservedGeneration) + + ofed := apimeta.FindStatusCondition(conditions, mellanoxv1alpha1.ConditionTypeOFEDDriverReady) + if ofed == nil { + t.Fatal("expected OFEDDriverReady condition") + } + if ofed.Status != metav1.ConditionFalse || ofed.Reason != mellanoxv1alpha1.ConditionReasonComponentNotReady { + t.Fatalf("unexpected OFED condition: %+v", *ofed) + } + + ready := apimeta.FindStatusCondition(conditions, mellanoxv1alpha1.ConditionTypeReady) + if ready == nil { + t.Fatal("expected Ready condition") + } + if ready.Status != metav1.ConditionFalse || ready.Reason != mellanoxv1alpha1.ConditionReasonComponentsNotReady { + t.Fatalf("unexpected Ready condition: %+v", *ready) + } +} + +func TestComputePolicyConditionsErrorTakesPriorityForAggregateReady(t *testing.T) { + t.Parallel() + + syncErr := errors.New("sync failed") + results := state.Results{ + StatesStatus: []state.Result{ + {StateName: "state-OFED", Status: state.SyncStateError, ErrInfo: syncErr}, + {StateName: "state-RDMA-device-plugin", Status: state.SyncStateNotReady}, + }, + } + + conditions := computePolicyConditions(nil, results, testObservedGeneration) + + ready := apimeta.FindStatusCondition(conditions, mellanoxv1alpha1.ConditionTypeReady) + if ready == nil { + t.Fatal("expected Ready condition") + } + if ready.Status != metav1.ConditionFalse || ready.Reason != mellanoxv1alpha1.ConditionReasonComponentError { + t.Fatalf("unexpected Ready condition: %+v", *ready) + } +} diff --git a/controllers/nic_policy_helpers.go b/controllers/nic_policy_helpers.go index c03c4954e..c9eb97411 100644 --- a/controllers/nic_policy_helpers.go +++ b/controllers/nic_policy_helpers.go @@ -71,6 +71,10 @@ NextResult: cr.SetAppliedStates(appliedStates) cr.SetPolicyState(mellanoxv1alpha1.State(status.Status)) + if ch, ok := cr.(mellanoxv1alpha1.ConditionHolder); ok { + ch.SetConditions(computePolicyConditions(ch.GetConditions(), status, cr.GetGeneration())) + } + reqLogger.V(consts.LogLevelInfo).Info( "Updating status", "Custom resource name", cr.GetName(), "namespace", cr.GetNamespace(), "Result:", cr.GetPolicyState()) diff --git a/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml b/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml index 67fe6f47d..75201a742 100644 --- a/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml +++ b/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml @@ -2101,6 +2101,69 @@ spec: - state type: object type: array + conditions: + description: |- + Conditions is a list of conditions describing the state of the NicClusterPolicy. + Each enabled component exposes a Ready condition, and the aggregate + Ready condition summarizes the overall policy health. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map reason: description: Informative string in case the observed state is error type: string diff --git a/deployment/network-operator/crds/mellanox.com_nicnodepolicies.yaml b/deployment/network-operator/crds/mellanox.com_nicnodepolicies.yaml index 531c80d77..c74b3ea1f 100644 --- a/deployment/network-operator/crds/mellanox.com_nicnodepolicies.yaml +++ b/deployment/network-operator/crds/mellanox.com_nicnodepolicies.yaml @@ -687,6 +687,69 @@ spec: - state type: object type: array + conditions: + description: |- + Conditions is a list of conditions describing the state of the NicNodePolicy. + Each enabled component exposes a Ready condition, and the aggregate + Ready condition summarizes the overall policy health. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map reason: description: Informative string in case the observed state is error type: string