From b8a604c371f158f42cd7ed84cc8942b7ef61ed82 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Wed, 18 Feb 2026 12:11:56 +0100 Subject: [PATCH 01/36] Finalize API --- api/v1alpha1/decision_types.go | 105 ++---- api/v1alpha1/zz_generated.deepcopy.go | 120 +------ .../files/crds/cortex.cloud_decisions.yaml | 326 ++++-------------- 3 files changed, 118 insertions(+), 433 deletions(-) diff --git a/api/v1alpha1/decision_types.go b/api/v1alpha1/decision_types.go index c3f02de1e..b2e871616 100644 --- a/api/v1alpha1/decision_types.go +++ b/api/v1alpha1/decision_types.go @@ -6,90 +6,63 @@ package v1alpha1 import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" ) -type DecisionSpec struct { - // SchedulingDomain defines in which scheduling domain this decision - // was or is processed (e.g., nova, cinder, manila). - SchedulingDomain SchedulingDomain `json:"schedulingDomain"` +// SchedulingReasons represents the reason for a scheduling event. +type SchedulingReason string - // A reference to the pipeline that should be used for this decision. +const ( + // SchedulingReasonInitialPlacement indicates that this is the initial placement of a resource. + SchedulingReasonInitialPlacement SchedulingReason = "InitialPlacement" + // SchedulingReasonLiveMigration indicates that this scheduling event is triggered by a live migration operation. + SchedulingReasonLiveMigration SchedulingReason = "LiveMigration" + // SchedulingReasonResize indicates that this scheduling event is triggered by a resize operation. + SchedulingReasonResize SchedulingReason = "Resize" + // SchedulingReasonRebuild indicates that this scheduling event is triggered by a rebuild operation. + SchedulingReasonRebuild SchedulingReason = "Rebuild" + // SchedulingReasonEvacuate indicates that this scheduling event is triggered by an evacuate operation. + SchedulingReasonEvacuate SchedulingReason = "Evacuate" +) + +// SchedulingHistoryEntry represents a single entry in the scheduling history of a resource. +type SchedulingHistoryEntry struct { + // The host that was selected in this scheduling event. + Host string `json:"host"` + // Timestamp of when the scheduling event occurred. + Timestamp metav1.Time `json:"timestamp"` + // A reference to the pipeline that was used for this decision. // This reference can be used to look up the pipeline definition and its // scheduler step configuration for additional context. PipelineRef corev1.ObjectReference `json:"pipelineRef"` + // The reason for this scheduling event. + Reason SchedulingReason `json:"reason"` +} + +type DecisionSpec struct { + // SchedulingDomain defines in which scheduling domain this decision + // was or is processed (e.g., nova, cinder, manila). + SchedulingDomain SchedulingDomain `json:"schedulingDomain"` // An identifier for the underlying resource to be scheduled. // For example, this can be the UUID of a nova instance or cinder volume. - // This can be used to correlate multiple decisions for the same resource. ResourceID string `json:"resourceID"` - - // If the type is "nova", this field contains the raw nova decision request. - // +kubebuilder:validation:Optional - NovaRaw *runtime.RawExtension `json:"novaRaw,omitempty"` - // If the type is "cinder", this field contains the raw cinder decision request. - // +kubebuilder:validation:Optional - CinderRaw *runtime.RawExtension `json:"cinderRaw,omitempty"` - // If the type is "manila", this field contains the raw manila decision request. - // +kubebuilder:validation:Optional - ManilaRaw *runtime.RawExtension `json:"manilaRaw,omitempty"` - // If the type is "machine", this field contains the machine reference. - // +kubebuilder:validation:Optional - MachineRef *corev1.ObjectReference `json:"machineRef,omitempty"` - // If the type is "pod", this field contains the pod reference. - // +kubebuilder:validation:Optional - PodRef *corev1.ObjectReference `json:"podRef,omitempty"` -} - -type StepResult struct { - // object reference to the scheduler step. - StepName string `json:"stepName"` - // Activations of the step for each host. - Activations map[string]float64 `json:"activations"` -} - -type DecisionResult struct { - // Raw input weights to the pipeline. - // +kubebuilder:validation:Optional - RawInWeights map[string]float64 `json:"rawInWeights"` - // Normalized input weights to the pipeline. - // +kubebuilder:validation:Optional - NormalizedInWeights map[string]float64 `json:"normalizedInWeights"` - // Outputs of the decision pipeline including the activations used - // to make the final ordering of compute hosts. - // +kubebuilder:validation:Optional - StepResults []StepResult `json:"stepResults,omitempty"` - // Aggregated output weights from the pipeline. - // +kubebuilder:validation:Optional - AggregatedOutWeights map[string]float64 `json:"aggregatedOutWeights"` - // Final ordered list of hosts from most preferred to least preferred. - // +kubebuilder:validation:Optional - OrderedHosts []string `json:"orderedHosts,omitempty"` - // The first element of the ordered hosts is considered the target host. - // +kubebuilder:validation:Optional - TargetHost *string `json:"targetHost,omitempty"` } const ( - // The decision was successfully processed. + // The decision is ready and tracking the resource. DecisionConditionReady = "Ready" ) type DecisionStatus struct { - // The result of this decision. - // +kubebuilder:validation:Optional - Result *DecisionResult `json:"result,omitempty"` - - // If there were previous decisions for the underlying resource, they can - // be resolved here to provide historical context for the decision. + // The current host selected for the resource. Can be empty if no host could be determined. // +kubebuilder:validation:Optional - History *[]corev1.ObjectReference `json:"history,omitempty"` + CurrentHost string `json:"currentHost,omitempty"` - // The number of decisions that preceded this one for the same resource. + // The history of scheduling events for this resource. // +kubebuilder:validation:Optional - Precedence *int `json:"precedence,omitempty"` + SchedulingHistory []SchedulingHistoryEntry `json:"schedulingHistory,omitempty"` - // A human-readable explanation of the decision result. + // A human-readable explanation of the current scheduling state. // +kubebuilder:validation:Optional Explanation string `json:"explanation,omitempty"` @@ -103,12 +76,8 @@ type DecisionStatus struct { // +kubebuilder:resource:scope=Cluster // +kubebuilder:printcolumn:name="Domain",type="string",JSONPath=".spec.schedulingDomain" // +kubebuilder:printcolumn:name="Resource ID",type="string",JSONPath=".spec.resourceID" -// +kubebuilder:printcolumn:name="#",type="string",JSONPath=".status.precedence" +// +kubebuilder:printcolumn:name="Current Host",type="string",JSONPath=".status.currentHost" // +kubebuilder:printcolumn:name="Created",type="date",JSONPath=".metadata.creationTimestamp" -// +kubebuilder:printcolumn:name="Pipeline",type="string",JSONPath=".spec.pipelineRef.name" -// +kubebuilder:printcolumn:name="TargetHost",type="string",JSONPath=".status.result.targetHost" -// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" -// +kubebuilder:selectablefield:JSONPath=".spec.resourceID" // Decision is the Schema for the decisions API type Decision struct { diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 2551ef3ea..3c9cf255a 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -140,7 +140,7 @@ func (in *Decision) DeepCopyInto(out *Decision) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) + out.Spec = in.Spec in.Status.DeepCopyInto(&out.Status) } @@ -194,88 +194,9 @@ func (in *DecisionList) DeepCopyObject() runtime.Object { return nil } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *DecisionResult) DeepCopyInto(out *DecisionResult) { - *out = *in - if in.RawInWeights != nil { - in, out := &in.RawInWeights, &out.RawInWeights - *out = make(map[string]float64, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } - if in.NormalizedInWeights != nil { - in, out := &in.NormalizedInWeights, &out.NormalizedInWeights - *out = make(map[string]float64, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } - if in.StepResults != nil { - in, out := &in.StepResults, &out.StepResults - *out = make([]StepResult, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - if in.AggregatedOutWeights != nil { - in, out := &in.AggregatedOutWeights, &out.AggregatedOutWeights - *out = make(map[string]float64, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } - if in.OrderedHosts != nil { - in, out := &in.OrderedHosts, &out.OrderedHosts - *out = make([]string, len(*in)) - copy(*out, *in) - } - if in.TargetHost != nil { - in, out := &in.TargetHost, &out.TargetHost - *out = new(string) - **out = **in - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DecisionResult. -func (in *DecisionResult) DeepCopy() *DecisionResult { - if in == nil { - return nil - } - out := new(DecisionResult) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DecisionSpec) DeepCopyInto(out *DecisionSpec) { *out = *in - out.PipelineRef = in.PipelineRef - if in.NovaRaw != nil { - in, out := &in.NovaRaw, &out.NovaRaw - *out = new(runtime.RawExtension) - (*in).DeepCopyInto(*out) - } - if in.CinderRaw != nil { - in, out := &in.CinderRaw, &out.CinderRaw - *out = new(runtime.RawExtension) - (*in).DeepCopyInto(*out) - } - if in.ManilaRaw != nil { - in, out := &in.ManilaRaw, &out.ManilaRaw - *out = new(runtime.RawExtension) - (*in).DeepCopyInto(*out) - } - if in.MachineRef != nil { - in, out := &in.MachineRef, &out.MachineRef - *out = new(v1.ObjectReference) - **out = **in - } - if in.PodRef != nil { - in, out := &in.PodRef, &out.PodRef - *out = new(v1.ObjectReference) - **out = **in - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DecisionSpec. @@ -291,25 +212,13 @@ func (in *DecisionSpec) DeepCopy() *DecisionSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DecisionStatus) DeepCopyInto(out *DecisionStatus) { *out = *in - if in.Result != nil { - in, out := &in.Result, &out.Result - *out = new(DecisionResult) - (*in).DeepCopyInto(*out) - } - if in.History != nil { - in, out := &in.History, &out.History - *out = new([]v1.ObjectReference) - if **in != nil { - in, out := *in, *out - *out = make([]v1.ObjectReference, len(*in)) - copy(*out, *in) + if in.SchedulingHistory != nil { + in, out := &in.SchedulingHistory, &out.SchedulingHistory + *out = make([]SchedulingHistoryEntry, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.Precedence != nil { - in, out := &in.Precedence, &out.Precedence - *out = new(int) - **out = **in - } if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions *out = make([]metav1.Condition, len(*in)) @@ -1173,23 +1082,18 @@ func (in *ReservationStatus) DeepCopy() *ReservationStatus { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *StepResult) DeepCopyInto(out *StepResult) { +func (in *SchedulingHistoryEntry) DeepCopyInto(out *SchedulingHistoryEntry) { *out = *in - if in.Activations != nil { - in, out := &in.Activations, &out.Activations - *out = make(map[string]float64, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } + in.Timestamp.DeepCopyInto(&out.Timestamp) + out.PipelineRef = in.PipelineRef } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StepResult. -func (in *StepResult) DeepCopy() *StepResult { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingHistoryEntry. +func (in *SchedulingHistoryEntry) DeepCopy() *SchedulingHistoryEntry { if in == nil { return nil } - out := new(StepResult) + out := new(SchedulingHistoryEntry) in.DeepCopyInto(out) return out } diff --git a/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml b/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml index 3332a40e4..39fe4eba1 100644 --- a/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml +++ b/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml @@ -21,21 +21,12 @@ spec: - jsonPath: .spec.resourceID name: Resource ID type: string - - jsonPath: .status.precedence - name: '#' + - jsonPath: .status.currentHost + name: Current Host type: string - jsonPath: .metadata.creationTimestamp name: Created type: date - - jsonPath: .spec.pipelineRef.name - name: Pipeline - type: string - - jsonPath: .status.result.targetHost - name: TargetHost - type: string - - jsonPath: .status.conditions[?(@.type=='Ready')].status - name: Ready - type: string name: v1alpha1 schema: openAPIV3Schema: @@ -61,159 +52,10 @@ spec: spec: description: spec defines the desired state of Decision properties: - cinderRaw: - description: If the type is "cinder", this field contains the raw - cinder decision request. - type: object - x-kubernetes-preserve-unknown-fields: true - machineRef: - description: If the type is "machine", this field contains the machine - reference. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic - manilaRaw: - description: If the type is "manila", this field contains the raw - manila decision request. - type: object - x-kubernetes-preserve-unknown-fields: true - novaRaw: - description: If the type is "nova", this field contains the raw nova - decision request. - type: object - x-kubernetes-preserve-unknown-fields: true - pipelineRef: - description: |- - A reference to the pipeline that should be used for this decision. - This reference can be used to look up the pipeline definition and its - scheduler step configuration for additional context. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic - podRef: - description: If the type is "pod", this field contains the pod reference. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic resourceID: description: |- An identifier for the underlying resource to be scheduled. For example, this can be the UUID of a nova instance or cinder volume. - This can be used to correlate multiple decisions for the same resource. type: string schedulingDomain: description: |- @@ -221,7 +63,6 @@ spec: was or is processed (e.g., nova, cinder, manila). type: string required: - - pipelineRef - resourceID - schedulingDomain type: object @@ -285,116 +126,87 @@ spec: - type type: object type: array + currentHost: + description: The current host selected for the resource. Can be empty + if no host could be determined. + type: string explanation: - description: A human-readable explanation of the decision result. + description: A human-readable explanation of the current scheduling + state. type: string - history: - description: |- - If there were previous decisions for the underlying resource, they can - be resolved here to provide historical context for the decision. + schedulingHistory: + description: The history of scheduling events for this resource. items: - description: ObjectReference contains enough information to let - you inspect or modify the referred object. + description: SchedulingHistoryEntry represents a single entry in + the scheduling history of a resource. properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. + host: + description: The host that was selected in this scheduling event. type: string - kind: + pipelineRef: description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + A reference to the pipeline that was used for this decision. + This reference can be used to look up the pipeline definition and its + scheduler step configuration for additional context. + properties: + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + type: string + type: object + x-kubernetes-map-type: atomic + reason: + description: The reason for this scheduling event. type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids + timestamp: + description: Timestamp of when the scheduling event occurred. + format: date-time type: string + required: + - host + - pipelineRef + - reason + - timestamp type: object - x-kubernetes-map-type: atomic type: array - precedence: - description: The number of decisions that preceded this one for the - same resource. - type: integer - result: - description: The result of this decision. - properties: - aggregatedOutWeights: - additionalProperties: - type: number - description: Aggregated output weights from the pipeline. - type: object - normalizedInWeights: - additionalProperties: - type: number - description: Normalized input weights to the pipeline. - type: object - orderedHosts: - description: Final ordered list of hosts from most preferred to - least preferred. - items: - type: string - type: array - rawInWeights: - additionalProperties: - type: number - description: Raw input weights to the pipeline. - type: object - stepResults: - description: |- - Outputs of the decision pipeline including the activations used - to make the final ordering of compute hosts. - items: - properties: - activations: - additionalProperties: - type: number - description: Activations of the step for each host. - type: object - stepName: - description: object reference to the scheduler step. - type: string - required: - - activations - - stepName - type: object - type: array - targetHost: - description: The first element of the ordered hosts is considered - the target host. - type: string - type: object type: object required: - spec type: object - selectableFields: - - jsonPath: .spec.resourceID served: true storage: true subresources: From c19990b983d404a1feb146305eba5400d0a4a240 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Wed, 18 Feb 2026 12:15:32 +0100 Subject: [PATCH 02/36] Finalize the API this time frfr --- api/v1alpha1/decision_types.go | 10 +++++----- .../files/crds/cortex.cloud_decisions.yaml | 19 ++++++++++--------- .../kpis/plugins/deployment/decision_state.go | 3 --- 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/api/v1alpha1/decision_types.go b/api/v1alpha1/decision_types.go index b2e871616..6f0c66632 100644 --- a/api/v1alpha1/decision_types.go +++ b/api/v1alpha1/decision_types.go @@ -26,8 +26,8 @@ const ( // SchedulingHistoryEntry represents a single entry in the scheduling history of a resource. type SchedulingHistoryEntry struct { - // The host that was selected in this scheduling event. - Host string `json:"host"` + // The hosts that were selected in this scheduling event, in order of preference. + OrderedHosts string `json:"orderedHosts"` // Timestamp of when the scheduling event occurred. Timestamp metav1.Time `json:"timestamp"` // A reference to the pipeline that was used for this decision. @@ -54,9 +54,9 @@ const ( ) type DecisionStatus struct { - // The current host selected for the resource. Can be empty if no host could be determined. + // The target host selected for the resource. Can be empty if no host could be determined. // +kubebuilder:validation:Optional - CurrentHost string `json:"currentHost,omitempty"` + TargetHost string `json:"targetHost,omitempty"` // The history of scheduling events for this resource. // +kubebuilder:validation:Optional @@ -76,7 +76,7 @@ type DecisionStatus struct { // +kubebuilder:resource:scope=Cluster // +kubebuilder:printcolumn:name="Domain",type="string",JSONPath=".spec.schedulingDomain" // +kubebuilder:printcolumn:name="Resource ID",type="string",JSONPath=".spec.resourceID" -// +kubebuilder:printcolumn:name="Current Host",type="string",JSONPath=".status.currentHost" +// +kubebuilder:printcolumn:name="Target Host",type="string",JSONPath=".status.targetHost" // +kubebuilder:printcolumn:name="Created",type="date",JSONPath=".metadata.creationTimestamp" // Decision is the Schema for the decisions API diff --git a/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml b/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml index 39fe4eba1..4d9cd701a 100644 --- a/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml +++ b/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml @@ -21,8 +21,8 @@ spec: - jsonPath: .spec.resourceID name: Resource ID type: string - - jsonPath: .status.currentHost - name: Current Host + - jsonPath: .status.targetHost + name: Target Host type: string - jsonPath: .metadata.creationTimestamp name: Created @@ -126,10 +126,6 @@ spec: - type type: object type: array - currentHost: - description: The current host selected for the resource. Can be empty - if no host could be determined. - type: string explanation: description: A human-readable explanation of the current scheduling state. @@ -140,8 +136,9 @@ spec: description: SchedulingHistoryEntry represents a single entry in the scheduling history of a resource. properties: - host: - description: The host that was selected in this scheduling event. + orderedHosts: + description: The hosts that were selected in this scheduling + event, in order of preference. type: string pipelineRef: description: |- @@ -197,12 +194,16 @@ spec: format: date-time type: string required: - - host + - orderedHosts - pipelineRef - reason - timestamp type: object type: array + targetHost: + description: The target host selected for the resource. Can be empty + if no host could be determined. + type: string type: object required: - spec diff --git a/internal/knowledge/kpis/plugins/deployment/decision_state.go b/internal/knowledge/kpis/plugins/deployment/decision_state.go index f11e1f9a4..bc6df8d60 100644 --- a/internal/knowledge/kpis/plugins/deployment/decision_state.go +++ b/internal/knowledge/kpis/plugins/deployment/decision_state.go @@ -69,9 +69,6 @@ func (k *DecisionStateKPI) Collect(ch chan<- prometheus.Metric) { // Error state: decision has a false Ready condition case meta.IsStatusConditionFalse(d.Status.Conditions, v1alpha1.DecisionConditionReady): errorCount++ - // Waiting state: decision has a target host set (waiting for migration/placement) - case d.Status.Result != nil && d.Status.Result.TargetHost != nil: - waitingCount++ // Success state: decision is complete (has result with ordered hosts or no result needed) default: successCount++ From b6ec5bd284b90fe7615b4ec3e0ee213ac13b18ba Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Wed, 18 Feb 2026 12:16:22 +0100 Subject: [PATCH 03/36] Remove waiting state in decision oberserver kpi --- .../knowledge/kpis/plugins/deployment/decision_state.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/internal/knowledge/kpis/plugins/deployment/decision_state.go b/internal/knowledge/kpis/plugins/deployment/decision_state.go index bc6df8d60..6620fafc6 100644 --- a/internal/knowledge/kpis/plugins/deployment/decision_state.go +++ b/internal/knowledge/kpis/plugins/deployment/decision_state.go @@ -63,7 +63,7 @@ func (k *DecisionStateKPI) Collect(ch chan<- prometheus.Metric) { decisions = append(decisions, d) } // For each decision, categorize by state: error, waiting, or success - var errorCount, waitingCount, successCount float64 + var errorCount, successCount float64 for _, d := range decisions { switch { // Error state: decision has a false Ready condition @@ -79,10 +79,6 @@ func (k *DecisionStateKPI) Collect(ch chan<- prometheus.Metric) { k.counter, prometheus.GaugeValue, errorCount, string(k.Options.DecisionSchedulingDomain), "error", ) - ch <- prometheus.MustNewConstMetric( - k.counter, prometheus.GaugeValue, waitingCount, - string(k.Options.DecisionSchedulingDomain), "waiting", - ) ch <- prometheus.MustNewConstMetric( k.counter, prometheus.GaugeValue, successCount, string(k.Options.DecisionSchedulingDomain), "success", From c29a6ebf85dbc9aaad2a2b5a24b59b7954ec638a Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Wed, 18 Feb 2026 12:20:37 +0100 Subject: [PATCH 04/36] Refactor DecisionStateKPI_Collect tests to remove waiting state metrics --- .../plugins/deployment/decision_state_test.go | 64 +++---------------- 1 file changed, 10 insertions(+), 54 deletions(-) diff --git a/internal/knowledge/kpis/plugins/deployment/decision_state_test.go b/internal/knowledge/kpis/plugins/deployment/decision_state_test.go index 0d6bd4568..dc84a90af 100644 --- a/internal/knowledge/kpis/plugins/deployment/decision_state_test.go +++ b/internal/knowledge/kpis/plugins/deployment/decision_state_test.go @@ -35,17 +35,15 @@ func TestDecisionStateKPI_Collect(t *testing.T) { expectedCount int description string expectedError int - expectedWaiting int expectedSuccess int }{ { name: "no decisions", decisions: []v1alpha1.Decision{}, operator: "test-operator", - expectedCount: 3, // always emits 3 metrics: error, waiting, success + expectedCount: 2, // always emits 2 metrics: error, success description: "should collect metrics with zero counts when no decisions exist", expectedError: 0, - expectedWaiting: 0, expectedSuccess: 0, }, { @@ -65,30 +63,9 @@ func TestDecisionStateKPI_Collect(t *testing.T) { }, }, operator: "test-operator", - expectedCount: 3, + expectedCount: 2, description: "should count decision in error state", expectedError: 1, - expectedWaiting: 0, - expectedSuccess: 0, - }, - { - name: "single decision in waiting state", - decisions: []v1alpha1.Decision{ - { - ObjectMeta: v1.ObjectMeta{Name: "dec2"}, - Spec: v1alpha1.DecisionSpec{SchedulingDomain: "test-operator"}, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: &targetHost, - }, - }, - }, - }, - operator: "test-operator", - expectedCount: 3, - description: "should count decision with target host as waiting", - expectedError: 0, - expectedWaiting: 1, expectedSuccess: 0, }, { @@ -98,17 +75,14 @@ func TestDecisionStateKPI_Collect(t *testing.T) { ObjectMeta: v1.ObjectMeta{Name: "dec3"}, Spec: v1alpha1.DecisionSpec{SchedulingDomain: "test-operator"}, Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1", "host2"}, - }, + TargetHost: targetHost, }, }, }, operator: "test-operator", - expectedCount: 3, + expectedCount: 2, description: "should count decision without target host and no error as success", expectedError: 0, - expectedWaiting: 0, expectedSuccess: 1, }, { @@ -126,30 +100,18 @@ func TestDecisionStateKPI_Collect(t *testing.T) { }, }, }, - { - ObjectMeta: v1.ObjectMeta{Name: "dec-waiting"}, - Spec: v1alpha1.DecisionSpec{SchedulingDomain: "test-operator"}, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: &targetHost, - }, - }, - }, { ObjectMeta: v1.ObjectMeta{Name: "dec-success"}, Spec: v1alpha1.DecisionSpec{SchedulingDomain: "test-operator"}, Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1"}, - }, + TargetHost: targetHost, }, }, }, operator: "test-operator", - expectedCount: 3, + expectedCount: 2, description: "should correctly count decisions across all states", expectedError: 1, - expectedWaiting: 1, expectedSuccess: 1, }, { @@ -181,10 +143,9 @@ func TestDecisionStateKPI_Collect(t *testing.T) { }, }, operator: "test-operator", - expectedCount: 3, + expectedCount: 2, description: "should only count decisions with matching operator", expectedError: 1, - expectedWaiting: 0, expectedSuccess: 0, }, { @@ -216,10 +177,9 @@ func TestDecisionStateKPI_Collect(t *testing.T) { }, }, operator: "test-operator", - expectedCount: 3, + expectedCount: 2, description: "should correctly aggregate multiple decisions in same state", expectedError: 2, - expectedWaiting: 0, expectedSuccess: 0, }, { @@ -235,7 +195,6 @@ func TestDecisionStateKPI_Collect(t *testing.T) { expectedCount: 3, description: "should count decision with no result as success", expectedError: 0, - expectedWaiting: 0, expectedSuccess: 1, }, { @@ -245,9 +204,7 @@ func TestDecisionStateKPI_Collect(t *testing.T) { ObjectMeta: v1.ObjectMeta{Name: "dec-error-with-target"}, Spec: v1alpha1.DecisionSpec{SchedulingDomain: "test-operator"}, Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: &targetHost, - }, + TargetHost: targetHost, Conditions: []v1.Condition{ { Type: v1alpha1.DecisionConditionReady, @@ -258,10 +215,9 @@ func TestDecisionStateKPI_Collect(t *testing.T) { }, }, operator: "test-operator", - expectedCount: 3, + expectedCount: 2, description: "should count as error even if target host is present", expectedError: 1, - expectedWaiting: 0, expectedSuccess: 0, }, } From 210081e2e26272410b332ede9e442cf23856a784 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Wed, 18 Feb 2026 12:31:16 +0100 Subject: [PATCH 05/36] Refactor Run method to return FilterWeigherPipelineDecision instead of v1alpha1.DecisionResult --- .../scheduling/lib/filter_weigher_pipeline.go | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/internal/scheduling/lib/filter_weigher_pipeline.go b/internal/scheduling/lib/filter_weigher_pipeline.go index 188d7b657..c7c44bd8b 100644 --- a/internal/scheduling/lib/filter_weigher_pipeline.go +++ b/internal/scheduling/lib/filter_weigher_pipeline.go @@ -17,9 +17,20 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) +type FilterWeigherPipelineDecision struct { + // The original weights provided as input to the pipeline, from the request that cortex received. + RawInWeights map[string]float64 + // The normalized input weights after applying the normalization function. + NormalizedInWeights map[string]float64 + // The output weights after applying the weigher step activations and multipliers. + AggregatedOutWeights map[string]float64 + // The hosts in order of preference, with the most preferred host first. + OrderedHosts []string +} + type FilterWeigherPipeline[RequestType FilterWeigherPipelineRequest] interface { // Run the scheduling pipeline with the given request. - Run(request RequestType) (v1alpha1.DecisionResult, error) + Run(request RequestType) (FilterWeigherPipelineDecision, error) } // Pipeline of scheduler steps. @@ -243,7 +254,7 @@ func (s *filterWeigherPipeline[RequestType]) sortHostsByWeights(weights map[stri } // Evaluate the pipeline and return a list of hosts in order of preference. -func (p *filterWeigherPipeline[RequestType]) Run(request RequestType) (v1alpha1.DecisionResult, error) { +func (p *filterWeigherPipeline[RequestType]) Run(request RequestType) (FilterWeigherPipelineDecision, error) { slogArgs := request.GetTraceLogArgs() slogArgsAny := make([]any, 0, len(slogArgs)) for _, arg := range slogArgs { @@ -281,14 +292,11 @@ func (p *filterWeigherPipeline[RequestType]) Run(request RequestType) (v1alpha1. // Collect some metrics about the pipeline execution. go p.monitor.observePipelineResult(request, hosts) - result := v1alpha1.DecisionResult{ + result := FilterWeigherPipelineDecision{ RawInWeights: request.GetWeights(), NormalizedInWeights: inWeights, AggregatedOutWeights: outWeights, OrderedHosts: hosts, } - if len(hosts) > 0 { - result.TargetHost = &hosts[0] - } return result, nil } From d59441a089bb6d2767e7e6ecc711c5c265272a17 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Wed, 18 Feb 2026 13:01:52 +0100 Subject: [PATCH 06/36] Example implementation for cinder api --- .../cinder/external_scheduler_api.go | 49 +++----- .../filter_weigher_pipeline_controller.go | 111 +++--------------- .../scheduling/lib/filter_weigher_pipeline.go | 8 +- 3 files changed, 34 insertions(+), 134 deletions(-) diff --git a/internal/scheduling/cinder/external_scheduler_api.go b/internal/scheduling/cinder/external_scheduler_api.go index f40ec3bae..db82c6bbc 100644 --- a/internal/scheduling/cinder/external_scheduler_api.go +++ b/internal/scheduling/cinder/external_scheduler_api.go @@ -14,19 +14,15 @@ import ( "net/http" api "github.com/cobaltcore-dev/cortex/api/external/cinder" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" scheduling "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/metrics" ) type HTTPAPIDelegate interface { - // Process the decision from the API. Should create and return the updated decision. - ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error + // Process the scheduling request from the API. + ProcessRequest(ctx context.Context, pipeline string, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) } type HTTPAPI interface { @@ -106,7 +102,6 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R c.Respond(http.StatusInternalServerError, err, "failed to read request body") return } - raw := runtime.RawExtension{Raw: body} var requestData api.ExternalSchedulerRequest // Copy the raw body to a io.Reader for json deserialization. cp := body @@ -126,46 +121,30 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R return } + pipelineName := requestData.Pipeline + // If the pipeline name is not set, set it to a default value. - if requestData.Pipeline == "" { + if pipelineName == "" { var err error - requestData.Pipeline, err = httpAPI.inferPipelineName(requestData) + pipelineName, err = httpAPI.inferPipelineName(requestData) if err != nil { c.Respond(http.StatusBadRequest, err, err.Error()) return } - slog.Info("inferred pipeline name", "pipeline", requestData.Pipeline) + slog.Info("inferred pipeline name", "pipeline", pipelineName) } - // Create the decision object in kubernetes. - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "cinder-", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: requestData.Pipeline, - }, - ResourceID: "", // TODO - CinderRaw: &raw, - }, - } ctx := r.Context() - if err := httpAPI.delegate.ProcessNewDecisionFromAPI(ctx, decision); err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to process scheduling decision") - return - } - // Check if the decision contains status conditions indicating an error. - if meta.IsStatusConditionFalse(decision.Status.Conditions, v1alpha1.DecisionConditionReady) { - c.Respond(http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed") + result, err := httpAPI.delegate.ProcessRequest(ctx, pipelineName, requestData) + if err != nil { + c.Respond(http.StatusInternalServerError, err, "failed to process scheduling request") return } - if decision.Status.Result == nil { - c.Respond(http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed") + if result == nil { + c.Respond(http.StatusInternalServerError, errors.New("pipeline didn't produce a result"), "failed to process scheduling request") return } - hosts := decision.Status.Result.OrderedHosts + hosts := result.OrderedHosts response := api.ExternalSchedulerResponse{Hosts: hosts} w.Header().Set("Content-Type", "application/json") if err = json.NewEncoder(w).Encode(response); err != nil { diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go index 0d8771081..00cd7e727 100644 --- a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go @@ -5,23 +5,18 @@ package cinder import ( "context" - "encoding/json" - "errors" "fmt" "sync" "time" api "github.com/cobaltcore-dev/cortex/api/external/cinder" "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/cobaltcore-dev/cortex/internal/scheduling/cinder/plugins/filters" "github.com/cobaltcore-dev/cortex/internal/scheduling/cinder/plugins/weighers" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "github.com/cobaltcore-dev/cortex/pkg/multicluster" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/manager" @@ -50,93 +45,27 @@ func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } -// Callback executed when kubernetes asks to reconcile a decision resource. -func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - c.processMu.Lock() - defer c.processMu.Unlock() - - decision := &v1alpha1.Decision{} - if err := c.Get(ctx, req.NamespacedName, decision); err != nil { - return ctrl.Result{}, client.IgnoreNotFound(err) - } - old := decision.DeepCopy() - if err := c.process(ctx, decision); err != nil { - return ctrl.Result{}, err - } - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, nil -} - // Process the decision from the API. Should create and return the updated decision. -func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { +func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, pipelineName string, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { c.processMu.Lock() defer c.processMu.Unlock() - pipelineConf, ok := c.PipelineConfigs[decision.Spec.PipelineRef.Name] - if !ok { - return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) - } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() - err := c.process(ctx, decision) - if err != nil { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "PipelineRunFailed", - Message: "pipeline run failed: " + err.Error(), - }) - } else { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionTrue, - Reason: "PipelineRunSucceeded", - Message: "pipeline run succeeded", - }) - } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err - } - } - return err -} - -func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { log := ctrl.LoggerFrom(ctx) - startedAt := time.Now() // So we can measure sync duration. + startedAt := time.Now() - pipeline, ok := c.Pipelines[decision.Spec.PipelineRef.Name] + pipeline, ok := c.Pipelines[pipelineName] if !ok { - log.Error(nil, "pipeline not found or not ready", "pipelineName", decision.Spec.PipelineRef.Name) - return errors.New("pipeline not found or not ready") - } - if decision.Spec.CinderRaw == nil { - log.Error(nil, "skipping decision, no cinderRaw spec defined") - return errors.New("no cinderRaw spec defined") - } - var request api.ExternalSchedulerRequest - if err := json.Unmarshal(decision.Spec.CinderRaw.Raw, &request); err != nil { - log.Error(err, "failed to unmarshal cinderRaw spec") - return err + log.Error(nil, "pipeline not found or not ready", "pipelineName", pipelineName) + return nil, fmt.Errorf("pipeline %s not found or not ready", pipelineName) } result, err := pipeline.Run(request) if err != nil { - log.Error(err, "failed to run pipeline") - return err + log.Error(err, "failed to run pipeline", "pipeline", pipelineName) + return nil, err } - decision.Status.Result = &result - log.Info("decision processed successfully", "duration", time.Since(startedAt)) - return nil + log.Info("request processed successfully", "duration", time.Since(startedAt)) + return &result, nil } // The base controller will delegate the pipeline creation down to this method. @@ -153,6 +82,13 @@ func (c *FilterWeigherPipelineController) InitPipeline( ) } +// Reconcile is required by the controller interface but does nothing. +// Decisions are now read-only tracking objects created by the HTTP API. +func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + // Nothing to reconcile - decisions are created directly by the HTTP API + return ctrl.Result{}, nil +} + func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainCinder @@ -191,20 +127,5 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainCinder }), ). - For( - &v1alpha1.Decision{}, - builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - decision := obj.(*v1alpha1.Decision) - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder { - return false - } - // Ignore already decided schedulings. - if decision.Status.Result != nil { - return false - } - return true - })), - ). - Named("cortex-cinder-decisions"). Complete(c) } diff --git a/internal/scheduling/lib/filter_weigher_pipeline.go b/internal/scheduling/lib/filter_weigher_pipeline.go index c7c44bd8b..6be8304bc 100644 --- a/internal/scheduling/lib/filter_weigher_pipeline.go +++ b/internal/scheduling/lib/filter_weigher_pipeline.go @@ -17,7 +17,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -type FilterWeigherPipelineDecision struct { +type FilterWeigherPipelineResult struct { // The original weights provided as input to the pipeline, from the request that cortex received. RawInWeights map[string]float64 // The normalized input weights after applying the normalization function. @@ -30,7 +30,7 @@ type FilterWeigherPipelineDecision struct { type FilterWeigherPipeline[RequestType FilterWeigherPipelineRequest] interface { // Run the scheduling pipeline with the given request. - Run(request RequestType) (FilterWeigherPipelineDecision, error) + Run(request RequestType) (FilterWeigherPipelineResult, error) } // Pipeline of scheduler steps. @@ -254,7 +254,7 @@ func (s *filterWeigherPipeline[RequestType]) sortHostsByWeights(weights map[stri } // Evaluate the pipeline and return a list of hosts in order of preference. -func (p *filterWeigherPipeline[RequestType]) Run(request RequestType) (FilterWeigherPipelineDecision, error) { +func (p *filterWeigherPipeline[RequestType]) Run(request RequestType) (FilterWeigherPipelineResult, error) { slogArgs := request.GetTraceLogArgs() slogArgsAny := make([]any, 0, len(slogArgs)) for _, arg := range slogArgs { @@ -292,7 +292,7 @@ func (p *filterWeigherPipeline[RequestType]) Run(request RequestType) (FilterWei // Collect some metrics about the pipeline execution. go p.monitor.observePipelineResult(request, hosts) - result := FilterWeigherPipelineDecision{ + result := FilterWeigherPipelineResult{ RawInWeights: request.GetWeights(), NormalizedInWeights: inWeights, AggregatedOutWeights: outWeights, From 96b44c1e04a016162f4ec8721afd5b3f7df47a64 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Wed, 18 Feb 2026 13:20:54 +0100 Subject: [PATCH 07/36] Refactor FilterWeigherPipelineController tests to simplify request handling and remove redundant test cases --- ...filter_weigher_pipeline_controller_test.go | 197 ++---------------- 1 file changed, 20 insertions(+), 177 deletions(-) diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go index 3a828eae6..a3228bad8 100644 --- a/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go @@ -28,191 +28,34 @@ func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) } - cinderRequest := api.ExternalSchedulerRequest{ - Spec: map[string]any{ - "volume_id": "test-volume-id", - "size": 10, - }, - Context: api.CinderRequestContext{ - ProjectID: "test-project", - UserID: "test-user", - RequestID: "req-123", - GlobalRequestID: "global-req-123", - }, - Hosts: []api.ExternalSchedulerHost{ - {VolumeHost: "cinder-volume-1"}, - {VolumeHost: "cinder-volume-2"}, - }, - Weights: map[string]float64{"cinder-volume-1": 1.0, "cinder-volume-2": 0.5}, - Pipeline: "test-pipeline", - } + client := fake.NewClientBuilder(). + WithScheme(scheme). + Build() - cinderRaw, err := json.Marshal(cinderRequest) - if err != nil { - t.Fatalf("Failed to marshal cinder request: %v", err) + controller := &FilterWeigherPipelineController{ + BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ + Client: client, + Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), + }, + Monitor: lib.FilterWeigherPipelineMonitor{}, } - tests := []struct { - name string - decision *v1alpha1.Decision - pipeline *v1alpha1.Pipeline - expectError bool - expectResult bool - }{ - { - name: "successful cinder decision processing", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - CinderRaw: &runtime.RawExtension{ - Raw: cinderRaw, - }, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: false, - expectResult: true, - }, - { - name: "decision without cinderRaw spec", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-raw", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - CinderRaw: nil, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: true, - expectResult: false, - }, - { - name: "pipeline not found", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-pipeline", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "nonexistent-pipeline", - }, - CinderRaw: &runtime.RawExtension{ - Raw: cinderRaw, - }, - }, - }, - pipeline: nil, - expectError: true, - expectResult: false, + // Reconcile should always succeed and do nothing + req := ctrl.Request{ + NamespacedName: types.NamespacedName{ + Name: "any-name", + Namespace: "any-namespace", }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []client.Object{tt.decision} - if tt.pipeline != nil { - objects = append(objects, tt.pipeline) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &FilterWeigherPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ - Client: client, - Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), - }, - Monitor: lib.FilterWeigherPipelineMonitor{}, - } - - if tt.pipeline != nil { - initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }) - if len(initResult.FilterErrors) > 0 || len(initResult.WeigherErrors) > 0 { - t.Fatalf("Failed to init pipeline: %v", initResult) - } - controller.Pipelines[tt.pipeline.Name] = initResult.Pipeline - } - - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: tt.decision.Name, - Namespace: tt.decision.Namespace, - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if tt.expectError && err == nil { - t.Error("Expected error but got none") - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - } + result, err := controller.Reconcile(context.Background(), req) - if result.RequeueAfter > 0 { - t.Error("Expected no requeue") - } - - var updatedDecision v1alpha1.Decision - if err := client.Get(context.Background(), req.NamespacedName, &updatedDecision); err != nil { - t.Fatalf("Failed to get updated decision: %v", err) - } + if err != nil { + t.Errorf("Expected no error but got: %v", err) + } - if tt.expectResult && updatedDecision.Status.Result == nil { - t.Error("Expected result to be set but was nil") - } - if !tt.expectResult && updatedDecision.Status.Result != nil { - t.Error("Expected result to be nil but was set") - } - }) + if result.RequeueAfter > 0 { + t.Error("Expected no requeue delay") } } From 8bdee65ce05a8b4ec94f1fc0a173a7dd78e1bbb3 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Wed, 18 Feb 2026 14:12:44 +0100 Subject: [PATCH 08/36] Example implementation of external scheduler api tests in cinder --- .../cinder/external_scheduler_api_test.go | 138 +++++++----------- 1 file changed, 56 insertions(+), 82 deletions(-) diff --git a/internal/scheduling/cinder/external_scheduler_api_test.go b/internal/scheduling/cinder/external_scheduler_api_test.go index d5f7f394f..cddc197c3 100644 --- a/internal/scheduling/cinder/external_scheduler_api_test.go +++ b/internal/scheduling/cinder/external_scheduler_api_test.go @@ -14,20 +14,20 @@ import ( "testing" cinderapi "github.com/cobaltcore-dev/cortex/api/external/cinder" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" ) type mockHTTPAPIDelegate struct { - processDecisionFunc func(ctx context.Context, decision *v1alpha1.Decision) error + processFunc func(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) } -func (m *mockHTTPAPIDelegate) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { - if m.processDecisionFunc != nil { - return m.processDecisionFunc(ctx, decision) +func (m *mockHTTPAPIDelegate) ProcessRequest(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + if m.processFunc != nil { + return m.processFunc(ctx, pipeline, request) } - return nil + return &lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{"host1"}, + }, nil } func TestNewAPI(t *testing.T) { @@ -142,13 +142,12 @@ func TestHTTPAPI_canRunScheduler(t *testing.T) { func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { tests := []struct { - name string - method string - body string - processDecisionErr error - decisionResult *v1alpha1.Decision - expectedStatus int - expectedHosts []string + name string + method string + body string + processFunc func(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) + expectedStatus int + expectedHosts []string }{ { name: "invalid method", @@ -168,27 +167,24 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { req := cinderapi.ExternalSchedulerRequest{ Hosts: []cinderapi.ExternalSchedulerHost{ {VolumeHost: "host1"}, + {VolumeHost: "host2"}, }, Weights: map[string]float64{ "host1": 1.0, + "host2": 0.5, }, Pipeline: "test-pipeline", } - data, err := json.Marshal(req) - if err != nil { - t.Fatalf("Failed to marshal request data: %v", err) - } + data, _ := json.Marshal(req) return string(data) }(), - decisionResult: &v1alpha1.Decision{ - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1"}, - }, - }, + processFunc: func(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + return &lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{"host1", "host2"}, + }, nil }, expectedStatus: http.StatusOK, - expectedHosts: []string{"host1"}, + expectedHosts: []string{"host1", "host2"}, }, { name: "processing error", @@ -203,17 +199,16 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { }, Pipeline: "test-pipeline", } - data, err := json.Marshal(req) - if err != nil { - t.Fatalf("Failed to marshal request data: %v", err) - } + data, _ := json.Marshal(req) return string(data) }(), - processDecisionErr: errors.New("processing failed"), - expectedStatus: http.StatusInternalServerError, + processFunc: func(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + return nil, errors.New("processing failed") + }, + expectedStatus: http.StatusInternalServerError, }, { - name: "decision failed", + name: "empty result", method: http.MethodPost, body: func() string { req := cinderapi.ExternalSchedulerRequest{ @@ -225,22 +220,13 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { }, Pipeline: "test-pipeline", } - data, err := json.Marshal(req) - if err != nil { - t.Fatalf("Failed to marshal request data: %v", err) - } + data, _ := json.Marshal(req) return string(data) }(), - decisionResult: &v1alpha1.Decision{ - Status: v1alpha1.DecisionStatus{ - Conditions: []metav1.Condition{ - { - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "SchedulingError", - }, - }, - }, + processFunc: func(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + return &lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{}, + }, nil }, expectedStatus: http.StatusInternalServerError, }, @@ -249,16 +235,7 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { delegate := &mockHTTPAPIDelegate{ - processDecisionFunc: func(ctx context.Context, decision *v1alpha1.Decision) error { - if tt.processDecisionErr != nil { - return tt.processDecisionErr - } - if tt.decisionResult != nil { - decision.Status = tt.decisionResult.Status - return nil - } - return nil - }, + processFunc: tt.processFunc, } api := NewAPI(delegate).(*httpAPI) @@ -347,16 +324,17 @@ func TestHTTPAPI_inferPipelineName(t *testing.T) { } } -func TestHTTPAPI_CinderExternalScheduler_DecisionCreation(t *testing.T) { - var capturedDecision *v1alpha1.Decision +func TestHTTPAPI_CinderExternalScheduler_PipelineParameter(t *testing.T) { + var capturedPipeline string + var capturedRequest cinderapi.ExternalSchedulerRequest + delegate := &mockHTTPAPIDelegate{ - processDecisionFunc: func(ctx context.Context, decision *v1alpha1.Decision) error { - capturedDecision = decision - // Set a successful result to avoid "decision didn't produce a result" error - decision.Status.Result = &v1alpha1.DecisionResult{ + processFunc: func(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + capturedPipeline = pipeline + capturedRequest = request + return &lib.FilterWeigherPipelineResult{ OrderedHosts: []string{"host1"}, - } - return nil + }, nil }, } @@ -370,6 +348,9 @@ func TestHTTPAPI_CinderExternalScheduler_DecisionCreation(t *testing.T) { "host1": 1.0, }, Pipeline: "test-pipeline", + Spec: map[string]any{ + "volume_id": "test-volume", + }, } body, err := json.Marshal(requestData) @@ -382,27 +363,20 @@ func TestHTTPAPI_CinderExternalScheduler_DecisionCreation(t *testing.T) { api.CinderExternalScheduler(w, req) if w.Code != http.StatusOK { - t.Errorf("Expected status %d, got %d", http.StatusOK, w.Code) + t.Errorf("Expected status %d, got %d. Body: %s", http.StatusOK, w.Code, w.Body.String()) } - if capturedDecision == nil { - t.Fatal("Decision was not captured") + // Verify the pipeline name was passed correctly + expectedPipeline := "cinder-external-scheduler" // Default pipeline from inferPipelineName + if capturedPipeline != expectedPipeline { + t.Errorf("Expected pipeline '%s', got '%s'", expectedPipeline, capturedPipeline) } - // Verify decision fields - if capturedDecision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder { - t.Errorf("Expected scheduling domain %s, got %s", v1alpha1.SchedulingDomainCinder, capturedDecision.Spec.SchedulingDomain) + // Verify the request was passed correctly + if len(capturedRequest.Hosts) != 1 { + t.Errorf("Expected 1 host, got %d", len(capturedRequest.Hosts)) } - - if capturedDecision.Spec.PipelineRef.Name != "test-pipeline" { - t.Errorf("Expected pipeline 'test-pipeline', got %s", capturedDecision.Spec.PipelineRef.Name) - } - - if capturedDecision.GenerateName != "cinder-" { - t.Errorf("Expected generate name 'cinder-', got %s", capturedDecision.GenerateName) - } - - if capturedDecision.Spec.CinderRaw == nil { - t.Error("CinderRaw should not be nil") + if capturedRequest.Hosts[0].VolumeHost != "host1" { + t.Errorf("Expected host 'host1', got '%s'", capturedRequest.Hosts[0].VolumeHost) } } From 4f58ad2991922305922dca1b0ed3a09f6344a12b Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Thu, 19 Feb 2026 14:21:02 +0100 Subject: [PATCH 09/36] Remove decision as source for scheduling pipeline --- api/v1alpha1/decision_types.go | 2 + .../cinder/external_scheduler_api.go | 13 +- .../cinder/external_scheduler_api_test.go | 4 + .../filter_weigher_pipeline_controller.go | 70 ++- ...filter_weigher_pipeline_controller_test.go | 39 -- internal/scheduling/explanation/controller.go | 221 ------- .../scheduling/explanation/controller_test.go | 589 ------------------ .../{explanation => lib}/explainer.go | 4 +- .../explainer_templates.go} | 2 +- .../{explanation => lib}/explainer_test.go | 2 +- .../types.go => lib/explainer_types.go} | 2 +- .../scheduling/lib/pipeline_controller.go | 214 +++---- .../manila/external_scheduler_api.go | 43 +- .../filter_weigher_pipeline_controller.go | 148 ++--- .../nova/detector_pipeline_controller.go | 46 +- .../scheduling/nova/external_scheduler_api.go | 46 +- .../filter_weigher_pipeline_controller.go | 168 ++--- 17 files changed, 264 insertions(+), 1349 deletions(-) delete mode 100644 internal/scheduling/explanation/controller.go delete mode 100644 internal/scheduling/explanation/controller_test.go rename internal/scheduling/{explanation => lib}/explainer.go (99%) rename internal/scheduling/{explanation/templates.go => lib/explainer_templates.go} (99%) rename internal/scheduling/{explanation => lib}/explainer_test.go (99%) rename internal/scheduling/{explanation/types.go => lib/explainer_types.go} (99%) diff --git a/api/v1alpha1/decision_types.go b/api/v1alpha1/decision_types.go index 6f0c66632..338334ab8 100644 --- a/api/v1alpha1/decision_types.go +++ b/api/v1alpha1/decision_types.go @@ -22,6 +22,8 @@ const ( SchedulingReasonRebuild SchedulingReason = "Rebuild" // SchedulingReasonEvacuate indicates that this scheduling event is triggered by an evacuate operation. SchedulingReasonEvacuate SchedulingReason = "Evacuate" + // SchedulingReasonUnknown indicates that the reason for this scheduling event is unknown. + SchedulingReasonUnknown SchedulingReason = "Unknown" ) // SchedulingHistoryEntry represents a single entry in the scheduling history of a resource. diff --git a/internal/scheduling/cinder/external_scheduler_api.go b/internal/scheduling/cinder/external_scheduler_api.go index db82c6bbc..4a327b8f7 100644 --- a/internal/scheduling/cinder/external_scheduler_api.go +++ b/internal/scheduling/cinder/external_scheduler_api.go @@ -22,7 +22,7 @@ import ( type HTTPAPIDelegate interface { // Process the scheduling request from the API. - ProcessRequest(ctx context.Context, pipeline string, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) + ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) } type HTTPAPI interface { @@ -121,21 +121,19 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R return } - pipelineName := requestData.Pipeline - // If the pipeline name is not set, set it to a default value. - if pipelineName == "" { + if requestData.Pipeline == "" { var err error - pipelineName, err = httpAPI.inferPipelineName(requestData) + requestData.Pipeline, err = httpAPI.inferPipelineName(requestData) if err != nil { c.Respond(http.StatusBadRequest, err, err.Error()) return } - slog.Info("inferred pipeline name", "pipeline", pipelineName) + slog.Info("inferred pipeline name", "pipeline", requestData.Pipeline) } ctx := r.Context() - result, err := httpAPI.delegate.ProcessRequest(ctx, pipelineName, requestData) + result, err := httpAPI.delegate.ProcessRequest(ctx, requestData) if err != nil { c.Respond(http.StatusInternalServerError, err, "failed to process scheduling request") return @@ -151,5 +149,6 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R c.Respond(http.StatusInternalServerError, err, "failed to encode response") return } + c.Respond(http.StatusOK, nil, "Success") } diff --git a/internal/scheduling/cinder/external_scheduler_api_test.go b/internal/scheduling/cinder/external_scheduler_api_test.go index cddc197c3..acb640861 100644 --- a/internal/scheduling/cinder/external_scheduler_api_test.go +++ b/internal/scheduling/cinder/external_scheduler_api_test.go @@ -15,6 +15,8 @@ import ( cinderapi "github.com/cobaltcore-dev/cortex/api/external/cinder" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" + "k8s.io/apimachinery/pkg/runtime" + "sigs.k8s.io/controller-runtime/pkg/client/fake" ) type mockHTTPAPIDelegate struct { @@ -238,6 +240,8 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { processFunc: tt.processFunc, } + tttscheme := runtime.NewScheme() + tttfakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() api := NewAPI(delegate).(*httpAPI) var body *strings.Reader diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go index 00cd7e727..9c0dff770 100644 --- a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go @@ -17,10 +17,12 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "github.com/cobaltcore-dev/cortex/pkg/multicluster" ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) // The decision pipeline controller takes decision resources containing a @@ -45,26 +47,43 @@ func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } -// Process the decision from the API. Should create and return the updated decision. -func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, pipelineName string, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { +// Process the request from the API. Returns the result of the pipeline execution. +func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { c.processMu.Lock() defer c.processMu.Unlock() log := ctrl.LoggerFrom(ctx) startedAt := time.Now() + pipelineName := request.Pipeline + pipeline, ok := c.Pipelines[pipelineName] if !ok { log.Error(nil, "pipeline not found or not ready", "pipelineName", pipelineName) return nil, fmt.Errorf("pipeline %s not found or not ready", pipelineName) } + pipelineConfig, ok := c.PipelineConfigs[pipelineName] + if !ok { + log.Error(nil, "pipeline config not found", "pipelineName", pipelineName) + return nil, fmt.Errorf("pipeline config for %s not found", pipelineName) + } + result, err := pipeline.Run(request) if err != nil { log.Error(err, "failed to run pipeline", "pipeline", pipelineName) return nil, err } log.Info("request processed successfully", "duration", time.Since(startedAt)) + + if pipelineConfig.Spec.CreateDecisions { + c.DecisionQueue <- lib.DecisionUpdate{ + ResourceID: request.Context.ResourceUUID, + PipelineName: pipelineName, + Result: result, + Reason: v1alpha1.SchedulingReasonUnknown, + } + } return &result, nil } @@ -82,13 +101,6 @@ func (c *FilterWeigherPipelineController) InitPipeline( ) } -// Reconcile is required by the controller interface but does nothing. -// Decisions are now read-only tracking objects created by the HTTP API. -func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - // Nothing to reconcile - decisions are created directly by the HTTP API - return ctrl.Result{}, nil -} - func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainCinder @@ -98,34 +110,32 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, return multicluster.BuildController(mcl, mgr). // Watch pipeline changes so that we can reconfigure pipelines as needed. WatchesMulticluster( - &v1alpha1.Pipeline{}, - handler.Funcs{ - CreateFunc: c.HandlePipelineCreated, - UpdateFunc: c.HandlePipelineUpdated, - DeleteFunc: c.HandlePipelineDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - pipeline := obj.(*v1alpha1.Pipeline) - // Only react to pipelines matching the scheduling domain. - if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder { - return false + &v1alpha1.Knowledge{}, + // Get all pipelines of the controller when knowledge changes and trigger reconciliation to update the candidates in the pipelines. + handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { + knowledge := obj.(*v1alpha1.Knowledge) + if knowledge.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder { + return nil } - return pipeline.Spec.Type == c.PipelineType() + // When Knowledge changes, reconcile all pipelines + return c.GetAllPipelineReconcileRequests(ctx) }), - ). - // Watch knowledge changes so that we can reconfigure pipelines as needed. - WatchesMulticluster( - &v1alpha1.Knowledge{}, - handler.Funcs{ - CreateFunc: c.HandleKnowledgeCreated, - UpdateFunc: c.HandleKnowledgeUpdated, - DeleteFunc: c.HandleKnowledgeDeleted, - }, predicate.NewPredicateFuncs(func(obj client.Object) bool { knowledge := obj.(*v1alpha1.Knowledge) // Only react to knowledge matching the scheduling domain. return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainCinder }), ). + Named("cortex-cinder-pipelines"). + For( + &v1alpha1.Pipeline{}, + builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { + pipeline := obj.(*v1alpha1.Pipeline) + if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder { + return false + } + return pipeline.Spec.Type == c.PipelineType() + })), + ). Complete(c) } diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go index a3228bad8..f99d244b2 100644 --- a/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go @@ -10,8 +10,6 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -22,43 +20,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - Build() - - controller := &FilterWeigherPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ - Client: client, - Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), - }, - Monitor: lib.FilterWeigherPipelineMonitor{}, - } - - // Reconcile should always succeed and do nothing - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: "any-name", - Namespace: "any-namespace", - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - if result.RequeueAfter > 0 { - t.Error("Expected no requeue delay") - } -} - func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { diff --git a/internal/scheduling/explanation/controller.go b/internal/scheduling/explanation/controller.go deleted file mode 100644 index 7b71e0c4a..000000000 --- a/internal/scheduling/explanation/controller.go +++ /dev/null @@ -1,221 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import ( - "context" - "sort" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/pkg/multicluster" - corev1 "k8s.io/api/core/v1" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/builder" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/predicate" -) - -type ControllerConfig struct { - // The controller will scope to objects using this scheduling domain name. - // This allows multiple controllers to coexist in the same cluster without - // interfering with each other's decisions. - SchedulingDomain v1alpha1.SchedulingDomain `json:"schedulingDomain"` -} - -// The explanation controller populates two fields of the decision status. -// -// First, it reconstructs the history of each decision. It will look for -// previous decisions for the same resource (based on ResourceID) and provide -// them through the decision history field. -// -// Second, it will use the available context for a decision to generate a -// human-readable explanation of why the decision was made the way it was. -// This explanation is intended to help operators understand the reasoning -// behind scheduling decisions. -type Controller struct { - // The kubernetes client to use for processing decisions. - client.Client - // Config for the controller. - Config ControllerConfig - // If the field indexing should be skipped (useful for testing). - SkipIndexFields bool -} - -// Check if a decision should be processed by this controller. -func (c *Controller) shouldReconcileDecision(decision *v1alpha1.Decision) bool { - // Ignore decisions not created by this operator. - if decision.Spec.SchedulingDomain != c.Config.SchedulingDomain { - return false - } - // Ignore decisions that already have an explanation. - if decision.Status.Explanation != "" { - return false - } - // Ignore decisions that have no result yet. - if decision.Status.Result == nil { - return false - } - return true -} - -// This loop will be called by the controller-runtime for each decision -// resource that needs to be reconciled. -func (c *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - log := ctrl.LoggerFrom(ctx) - decision := &v1alpha1.Decision{} - if err := c.Get(ctx, req.NamespacedName, decision); err != nil { - log.Error(err, "failed to get decision", "name", req.NamespacedName) - return ctrl.Result{}, client.IgnoreNotFound(err) - } - // Reconcile the history. - if err := c.reconcileHistory(ctx, decision); err != nil { - return ctrl.Result{}, err - } - // Reconcile the explanation. - if err := c.reconcileExplanation(ctx, decision); err != nil { - return ctrl.Result{}, err - } - log.Info("successfully reconciled decision explanation", "name", req.NamespacedName) - return ctrl.Result{}, nil -} - -// Process the history for the given decision. -func (c *Controller) reconcileHistory(ctx context.Context, decision *v1alpha1.Decision) error { - log := ctrl.LoggerFrom(ctx) - // Get all previous decisions for the same ResourceID. - var previousDecisions v1alpha1.DecisionList - if c.SkipIndexFields { - // When field indexing is skipped, list all decisions and filter manually - if err := c.List(ctx, &previousDecisions); err != nil { - log.Error(err, "failed to list all decisions", "resourceID", decision.Spec.ResourceID) - return err - } - // Filter to only decisions with matching ResourceID - var filteredDecisions []v1alpha1.Decision - for _, prevDecision := range previousDecisions.Items { - if prevDecision.Spec.ResourceID == decision.Spec.ResourceID { - filteredDecisions = append(filteredDecisions, prevDecision) - } - } - previousDecisions.Items = filteredDecisions - } else { - // Use field indexing for efficient lookup - if err := c.List(ctx, &previousDecisions, client.MatchingFields{"spec.resourceID": decision.Spec.ResourceID}); err != nil { - log.Error(err, "failed to list previous decisions", "resourceID", decision.Spec.ResourceID) - return err - } - } - history := []corev1.ObjectReference{} // Not var-init so we see the empty slice. - // Make sure the resulting history will be in chronological order. - sort.Slice(previousDecisions.Items, func(i, j int) bool { - t1 := previousDecisions.Items[i].CreationTimestamp - t2 := previousDecisions.Items[j].CreationTimestamp - return t1.Before(&t2) - }) - for _, prevDecision := range previousDecisions.Items { - // Skip the current decision. - if prevDecision.Name == decision.Name && prevDecision.Namespace == decision.Namespace { - continue - } - // Skip decisions that were made after the current one. - if prevDecision.CreationTimestamp.After(decision.CreationTimestamp.Time) { - continue - } - history = append(history, corev1.ObjectReference{ - Kind: "Decision", - Namespace: prevDecision.Namespace, - Name: prevDecision.Name, - UID: prevDecision.UID, - }) - } - old := decision.DeepCopy() - decision.Status.History = &history - precedence := len(history) - decision.Status.Precedence = &precedence - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - log.Error(err, "failed to patch decision status with history", "name", decision.Name) - return err - } - log.Info("successfully reconciled decision history", "name", decision.Name) - return nil -} - -// Process the explanation for the given decision. -func (c *Controller) reconcileExplanation(ctx context.Context, decision *v1alpha1.Decision) error { - log := ctrl.LoggerFrom(ctx) - explainer, err := NewExplainer(c.Client) - if err != nil { - log.Error(err, "failed to create explainer", "name", decision.Name) - return err - } - explanationText, err := explainer.Explain(ctx, decision) - if err != nil { - log.Error(err, "failed to explain decision", "name", decision.Name) - return err - } - old := decision.DeepCopy() - decision.Status.Explanation = explanationText - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - log.Error(err, "failed to patch decision status with explanation", "name", decision.Name) - return err - } - log.Info("successfully reconciled decision explanation", "name", decision.Name) - return nil -} - -// This function will be called when the manager starts up. Must block. -func (c *Controller) StartupCallback(ctx context.Context) error { - // Reprocess all existing decisions that need an explanation. - var decisions v1alpha1.DecisionList - if err := c.List(ctx, &decisions); err != nil { - return err - } - for _, decision := range decisions.Items { - if !c.shouldReconcileDecision(&decision) { - continue - } - if _, err := c.Reconcile(ctx, ctrl.Request{ - NamespacedName: client.ObjectKey{ - Namespace: decision.Namespace, - Name: decision.Name, - }, - }); err != nil { - return err - } - } - return nil -} - -// This function sets up the controller with the provided manager. -func (c *Controller) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { - if !c.SkipIndexFields { - ctx := context.Background() - obj := &v1alpha1.Decision{} - lst := &v1alpha1.DecisionList{} - idx := "spec.resourceID" - fnc := func(obj client.Object) []string { - decision := obj.(*v1alpha1.Decision) - return []string{decision.Spec.ResourceID} - } - if err := mcl.IndexField(ctx, obj, lst, idx, fnc); err != nil { - return err - } - } - if err := mgr.Add(manager.RunnableFunc(c.StartupCallback)); err != nil { - return err - } - return multicluster.BuildController(mcl, mgr). - Named("explanation-controller"). - For( - &v1alpha1.Decision{}, - builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - decision := obj.(*v1alpha1.Decision) - return c.shouldReconcileDecision(decision) - })), - ). - Complete(c) -} diff --git a/internal/scheduling/explanation/controller_test.go b/internal/scheduling/explanation/controller_test.go deleted file mode 100644 index f287b4995..000000000 --- a/internal/scheduling/explanation/controller_test.go +++ /dev/null @@ -1,589 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import ( - "context" - "testing" - "time" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestController_shouldReconcileDecision(t *testing.T) { - controller := &Controller{ - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expected bool - }{ - { - name: "should reconcile nova decision without explanation", - decision: &v1alpha1.Decision{ - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - expected: true, - }, - { - name: "should not reconcile decision from different operator", - decision: &v1alpha1.Decision{ - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: "different-operator", - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", - }, - }, - expected: false, - }, - { - name: "should not reconcile decision with existing explanation", - decision: &v1alpha1.Decision{ - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "Already has explanation", - }, - }, - expected: false, - }, - { - name: "should not reconcile non-nova decision", - decision: &v1alpha1.Decision{ - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", - }, - }, - expected: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := controller.shouldReconcileDecision(tt.decision) - if result != tt.expected { - t.Errorf("shouldReconcileDecision() = %v, expected %v", result, tt.expected) - } - }) - } -} - -func TestController_Reconcile(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - existingDecisions []v1alpha1.Decision - expectError bool - expectRequeue bool - expectedExplanation string - expectedHistoryLength int - }{ - { - name: "decision not found", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "nonexistent-decision", - Namespace: "default", - }, - }, - expectError: false, // controller-runtime ignores not found errors - }, - { - name: "reconcile decision without history", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-1", - }, - Status: v1alpha1.DecisionStatus{}, - }, - expectedExplanation: "Initial placement of the nova server", - expectedHistoryLength: 0, - }, - { - name: "reconcile decision with history", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-2", - }, - Status: v1alpha1.DecisionStatus{}, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-2", - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - }, - expectedHistoryLength: 1, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - var objects []runtime.Object - if tt.name != "decision not found" { - objects = append(objects, tt.decision) - } - for i := range tt.existingDecisions { - objects = append(objects, &tt.existingDecisions[i]) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &Controller{ - Client: client, - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - SkipIndexFields: true, // Skip field indexing for testing - } - - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: tt.decision.Name, - Namespace: tt.decision.Namespace, - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if tt.expectError && err == nil { - t.Errorf("Expected error but got none") - return - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - if tt.expectRequeue && result.RequeueAfter == 0 { - t.Errorf("Expected requeue but got none") - } - if !tt.expectRequeue && result.RequeueAfter > 0 { - t.Errorf("Expected no requeue but got %v", result.RequeueAfter) - } - - // Only check results if we expect the decision to exist - if tt.name != "decision not found" { - // Verify the decision was updated - var updated v1alpha1.Decision - err = client.Get(context.Background(), req.NamespacedName, &updated) - if err != nil { - t.Errorf("Failed to get updated decision: %v", err) - return - } - - if tt.expectedExplanation != "" && !contains(updated.Status.Explanation, tt.expectedExplanation) { - t.Errorf("Expected explanation to contain '%s', but got: %s", tt.expectedExplanation, updated.Status.Explanation) - } - - if updated.Status.History != nil && len(*updated.Status.History) != tt.expectedHistoryLength { - t.Errorf("Expected history length %d, got %d", tt.expectedHistoryLength, len(*updated.Status.History)) - } - } - }) - } -} - -func TestController_reconcileHistory(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - existingDecisions []v1alpha1.Decision - expectedHistory int - expectError bool - }{ - { - name: "no previous decisions", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-1", - }, - }, - expectedHistory: 0, - }, - { - name: "one previous decision", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-2", - }, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-2", - }, - }, - }, - expectedHistory: 1, - }, - { - name: "multiple previous decisions in correct order", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-3", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(2 * time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-3", - }, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-3", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-3", - }, - }, - }, - expectedHistory: 2, - }, - { - name: "exclude future decisions", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-4", - }, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-4", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-3", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(2 * time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-4", - }, - }, - }, - expectedHistory: 1, // Only test-decision-1 should be included - }, - { - name: "exclude decisions with different ResourceID", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-target", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "target-resource", - }, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-same", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "target-resource", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-different", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "different-resource", - }, - }, - }, - expectedHistory: 1, // Only same ResourceID should be included - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []runtime.Object{tt.decision} - for i := range tt.existingDecisions { - objects = append(objects, &tt.existingDecisions[i]) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &Controller{ - Client: client, - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - SkipIndexFields: true, // Skip field indexing for testing - } - - err := controller.reconcileHistory(context.Background(), tt.decision) - - if tt.expectError && err == nil { - t.Errorf("Expected error but got none") - return - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - if tt.decision.Status.History == nil { - if tt.expectedHistory != 0 { - t.Errorf("Expected history length %d, got nil", tt.expectedHistory) - } - } else if len(*tt.decision.Status.History) != tt.expectedHistory { - t.Errorf("Expected history length %d, got %d", tt.expectedHistory, len(*tt.decision.Status.History)) - } - }) - } -} - -func TestController_reconcileExplanation(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: nil, - }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(decision). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &Controller{ - Client: client, - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - } - - err := controller.reconcileExplanation(context.Background(), decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - if decision.Status.Explanation == "" { - t.Error("Expected explanation to be set but it was empty") - } - - if !contains(decision.Status.Explanation, "Initial placement of the nova server") { - t.Errorf("Expected explanation to contain nova server text, got: %s", decision.Status.Explanation) - } -} - -func TestController_StartupCallback(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - // Create a decision that should be reconciled - decision1 := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-1", - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", // Empty explanation means it should be reconciled - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - } - - // Create a decision that should not be reconciled (already has explanation) - decision2 := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-2", - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "Already has explanation", - }, - } - - // Create a decision from different operator that should not be reconciled - decision3 := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-3", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: "different-operator", - ResourceID: "test-resource-3", - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", - }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(decision1, decision2, decision3). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &Controller{ - Client: client, - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - SkipIndexFields: true, // Skip field indexing for testing - } - - err := controller.StartupCallback(context.Background()) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - // Verify that decision1 now has an explanation - var updated1 v1alpha1.Decision - err = client.Get(context.Background(), types.NamespacedName{Name: "test-decision-1", Namespace: "default"}, &updated1) - if err != nil { - t.Errorf("Failed to get updated decision1: %v", err) - } - - if updated1.Status.Explanation == "" { - t.Error("Expected decision1 to have explanation after startup callback") - } - - // Verify that decision2 explanation remains unchanged - var updated2 v1alpha1.Decision - err = client.Get(context.Background(), types.NamespacedName{Name: "test-decision-2", Namespace: "default"}, &updated2) - if err != nil { - t.Errorf("Failed to get updated decision2: %v", err) - } - - if updated2.Status.Explanation != "Already has explanation" { - t.Errorf("Expected decision2 explanation to remain unchanged, got: %s", updated2.Status.Explanation) - } - - // Verify that decision3 explanation remains empty (different operator) - var updated3 v1alpha1.Decision - err = client.Get(context.Background(), types.NamespacedName{Name: "test-decision-3", Namespace: "default"}, &updated3) - if err != nil { - t.Errorf("Failed to get updated decision3: %v", err) - } - - if updated3.Status.Explanation != "" { - t.Errorf("Expected decision3 explanation to remain empty, got: %s", updated3.Status.Explanation) - } -} diff --git a/internal/scheduling/explanation/explainer.go b/internal/scheduling/lib/explainer.go similarity index 99% rename from internal/scheduling/explanation/explainer.go rename to internal/scheduling/lib/explainer.go index a5f199fae..c45f11ee9 100644 --- a/internal/scheduling/explanation/explainer.go +++ b/internal/scheduling/lib/explainer.go @@ -1,7 +1,7 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package explanation +package lib import ( "context" @@ -53,6 +53,8 @@ func (e *Explainer) getResourceType(schedulingDomain v1alpha1.SchedulingDomain) return "cinder volume" case v1alpha1.SchedulingDomainMachines: return "ironcore machine" + case v1alpha1.SchedulingDomainPods: + return "pod" default: return "resource" } diff --git a/internal/scheduling/explanation/templates.go b/internal/scheduling/lib/explainer_templates.go similarity index 99% rename from internal/scheduling/explanation/templates.go rename to internal/scheduling/lib/explainer_templates.go index dc7160c07..4476b8b1a 100644 --- a/internal/scheduling/explanation/templates.go +++ b/internal/scheduling/lib/explainer_templates.go @@ -1,7 +1,7 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package explanation +package lib import ( "bytes" diff --git a/internal/scheduling/explanation/explainer_test.go b/internal/scheduling/lib/explainer_test.go similarity index 99% rename from internal/scheduling/explanation/explainer_test.go rename to internal/scheduling/lib/explainer_test.go index ed1d52e13..0afc8bd9f 100644 --- a/internal/scheduling/explanation/explainer_test.go +++ b/internal/scheduling/lib/explainer_test.go @@ -1,7 +1,7 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package explanation +package lib import ( "context" diff --git a/internal/scheduling/explanation/types.go b/internal/scheduling/lib/explainer_types.go similarity index 99% rename from internal/scheduling/explanation/types.go rename to internal/scheduling/lib/explainer_types.go index 31f6d0aa1..2d3c71be7 100644 --- a/internal/scheduling/explanation/types.go +++ b/internal/scheduling/lib/explainer_types.go @@ -1,7 +1,7 @@ // Copyright SAP SE // SPDX-License-Identifier: Apache-2.0 -package explanation +package lib import "time" diff --git a/internal/scheduling/lib/pipeline_controller.go b/internal/scheduling/lib/pipeline_controller.go index 3201dd982..45a3b820f 100644 --- a/internal/scheduling/lib/pipeline_controller.go +++ b/internal/scheduling/lib/pipeline_controller.go @@ -11,10 +11,8 @@ import ( "github.com/cobaltcore-dev/cortex/api/v1alpha1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/util/workqueue" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -30,6 +28,64 @@ type BasePipelineController[PipelineType any] struct { client.Client // The scheduling domain to scope resources to. SchedulingDomain v1alpha1.SchedulingDomain + + DecisionQueue chan DecisionUpdate +} + +type DecisionUpdate struct { + ResourceID string + PipelineName string + Result FilterWeigherPipelineResult + Reason v1alpha1.SchedulingReason + SchedulingDomain v1alpha1.SchedulingDomain +} + +func (c *BasePipelineController[PipelineType]) StartExplainer(ctx context.Context) { + c.DecisionQueue = make(chan DecisionUpdate, 100) + log := ctrl.LoggerFrom(ctx) + for { + select { + case <-ctx.Done(): + return + case update := <-c.DecisionQueue: + if err := c.updateDecision(ctx, update); err != nil { + log.Error(err, "failed to update decision", "resourceID", update.ResourceID) + } + } + } +} + +func (c *BasePipelineController[PipelineType]) updateDecision(ctx context.Context, update DecisionUpdate) error { + log := ctrl.LoggerFrom(ctx) + log.Info("Explaining decision for resource", "resourceID", update.ResourceID, "pipelineName", update.PipelineName) + + explainer, err := NewExplainer(c.Client) + if err != nil { + return fmt.Errorf("failed to create explainer: %w", err) + } + + explanationText, err := explainer.Explain(ctx, update.ResourceID, update.PipelineName, update.RequestContext, update.Reason, update.Result) + if err != nil { + return fmt.Errorf("failed to generate explanation: %w", err) + } + + // Update the decision with the explanation. + decision := &v1alpha1.Decision{} + if err := c.Get(ctx, client.ObjectKey{Name: update.ResourceID}, decision); err != nil { + return fmt.Errorf("failed to get decision: %w", err) + } + + if decision.Status.Result == nil { + return errors.New("cannot update decision explanation: result is nil") + } + + decision.Status.Explanation = explanationText + if err := c.Status().Update(ctx, decision); err != nil { + return fmt.Errorf("failed to update decision status: %w", err) + } + + log.Info("Successfully updated decision explanation", "resourceID", update.ResourceID) + return nil } // Handle the startup of the manager by initializing the pipeline map. @@ -51,17 +107,40 @@ func (c *BasePipelineController[PipelineType]) InitAllPipelines(ctx context.Cont continue } log.Info("initializing existing pipeline", "pipelineName", pipelineConf.Name) - c.handlePipelineChange(ctx, &pipelineConf, nil) + c.handlePipelineChange(ctx, &pipelineConf) c.PipelineConfigs[pipelineConf.Name] = pipelineConf } return nil } +func (c *BasePipelineController[PipelineType]) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + log.Info("reconcile called for pipeline", "pipelineName", req.NamespacedName) + + pipeline := &v1alpha1.Pipeline{} + err := c.Get(ctx, req.NamespacedName, pipeline) + + if err != nil { + if client.IgnoreNotFound(err) != nil { + // Pipeline was deleted + log.Info("pipeline deleted, removing from cache", "pipelineName", req.Name) + delete(c.Pipelines, req.Name) + delete(c.PipelineConfigs, req.Name) + return ctrl.Result{}, nil + } + log.Error(err, "failed to get pipeline", "pipelineName", req.NamespacedName) + return ctrl.Result{}, fmt.Errorf("failed to get pipeline: %w", err) + } + + c.handlePipelineChange(ctx, pipeline) + + return ctrl.Result{}, nil +} + // Handle a pipeline creation or update event from watching pipeline resources. func (c *BasePipelineController[PipelineType]) handlePipelineChange( ctx context.Context, obj *v1alpha1.Pipeline, - _ workqueue.TypedRateLimitingInterface[reconcile.Request], ) { if obj.Spec.SchedulingDomain != c.SchedulingDomain { @@ -141,123 +220,14 @@ func (c *BasePipelineController[PipelineType]) handlePipelineChange( } } -// Handler bound to a pipeline watch to handle created pipelines. -// -// This handler will initialize new pipelines as needed and put them into the -// pipeline map. -func (c *BasePipelineController[PipelineType]) HandlePipelineCreated( - ctx context.Context, - evt event.CreateEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - pipelineConf := evt.Object.(*v1alpha1.Pipeline) - c.handlePipelineChange(ctx, pipelineConf, queue) -} - -// Handler bound to a pipeline watch to handle updated pipelines. -// -// This handler will initialize new pipelines as needed and put them into the -// pipeline map. -func (c *BasePipelineController[PipelineType]) HandlePipelineUpdated( - ctx context.Context, - evt event.UpdateEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - pipelineConf := evt.ObjectNew.(*v1alpha1.Pipeline) - c.handlePipelineChange(ctx, pipelineConf, queue) -} - -// Handler bound to a pipeline watch to handle deleted pipelines. -// -// This handler will remove pipelines from the pipeline map. -func (c *BasePipelineController[PipelineType]) HandlePipelineDeleted( - ctx context.Context, - evt event.DeleteEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - pipelineConf := evt.Object.(*v1alpha1.Pipeline) - delete(c.Pipelines, pipelineConf.Name) - delete(c.PipelineConfigs, pipelineConf.Name) -} - -// Handle a knowledge creation, readiness update, or delete event from watching knowledge resources. -func (c *BasePipelineController[PipelineType]) handleKnowledgeChange( - ctx context.Context, - obj *v1alpha1.Knowledge, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - if obj.Spec.SchedulingDomain != c.SchedulingDomain { - return - } - log := ctrl.LoggerFrom(ctx) - log.Info("knowledge changed, re-evaluating all pipelines", "knowledgeName", obj.Name) - // Find all pipelines depending on this knowledge and re-evaluate them. - var pipelines v1alpha1.PipelineList - if err := c.List(ctx, &pipelines); err != nil { - log.Error(err, "failed to list pipelines for knowledge change", "knowledgeName", obj.Name) - return - } - for _, pipeline := range pipelines.Items { - // TODO: Not all pipelines may depend on this knowledge. At the moment - // we re-evaluate all pipelines matching this controller. - if pipeline.Spec.SchedulingDomain != c.SchedulingDomain { - continue - } - if pipeline.Spec.Type != c.Initializer.PipelineType() { - continue - } - c.handlePipelineChange(ctx, &pipeline, queue) - } -} - -// Handler bound to a knowledge watch to handle created knowledges. -// -// This handler will re-evaluate all pipelines depending on the knowledge. -func (c *BasePipelineController[PipelineType]) HandleKnowledgeCreated( - ctx context.Context, - evt event.CreateEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - knowledgeConf := evt.Object.(*v1alpha1.Knowledge) - c.handleKnowledgeChange(ctx, knowledgeConf, queue) -} - -// Handler bound to a knowledge watch to handle updated knowledges. -// -// This handler will re-evaluate all pipelines depending on the knowledge. -func (c *BasePipelineController[PipelineType]) HandleKnowledgeUpdated( - ctx context.Context, - evt event.UpdateEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - before := evt.ObjectOld.(*v1alpha1.Knowledge) - after := evt.ObjectNew.(*v1alpha1.Knowledge) - errorBefore := meta.IsStatusConditionFalse(before.Status.Conditions, v1alpha1.KnowledgeConditionReady) - errorAfter := meta.IsStatusConditionFalse(after.Status.Conditions, v1alpha1.KnowledgeConditionReady) - errorChanged := errorBefore != errorAfter - dataBecameAvailable := before.Status.RawLength == 0 && after.Status.RawLength > 0 - if !errorChanged && !dataBecameAvailable { - // No relevant change, skip re-evaluation. - return +// GetAllPipelineReconcileRequests returns reconcile requests for all pipelines +// managed by this controller. Used when Knowledge changes require pipeline re-evaluation. +func (c *BasePipelineController[PipelineType]) GetAllPipelineReconcileRequests(ctx context.Context) []reconcile.Request { + var requests []reconcile.Request + for name := range c.Pipelines { + requests = append(requests, reconcile.Request{ + NamespacedName: client.ObjectKey{Name: name}, + }) } - c.handleKnowledgeChange(ctx, after, queue) -} - -// Handler bound to a knowledge watch to handle deleted knowledges. -// -// This handler will re-evaluate all pipelines depending on the knowledge. -func (c *BasePipelineController[PipelineType]) HandleKnowledgeDeleted( - ctx context.Context, - evt event.DeleteEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - knowledgeConf := evt.Object.(*v1alpha1.Knowledge) - c.handleKnowledgeChange(ctx, knowledgeConf, queue) + return requests } diff --git a/internal/scheduling/manila/external_scheduler_api.go b/internal/scheduling/manila/external_scheduler_api.go index 2ad5a9265..ece984407 100644 --- a/internal/scheduling/manila/external_scheduler_api.go +++ b/internal/scheduling/manila/external_scheduler_api.go @@ -14,19 +14,15 @@ import ( "net/http" api "github.com/cobaltcore-dev/cortex/api/external/manila" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" scheduling "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/metrics" ) type HTTPAPIDelegate interface { - // Process the decision from the API. Should create and return the updated decision. - ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error + // Process the scheduling request from the API. + ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) } type HTTPAPI interface { @@ -106,7 +102,6 @@ func (httpAPI *httpAPI) ManilaExternalScheduler(w http.ResponseWriter, r *http.R c.Respond(http.StatusInternalServerError, err, "failed to read request body") return } - raw := runtime.RawExtension{Raw: body} var requestData api.ExternalSchedulerRequest // Copy the raw body to a io.Reader for json deserialization. cp := body @@ -137,35 +132,19 @@ func (httpAPI *httpAPI) ManilaExternalScheduler(w http.ResponseWriter, r *http.R slog.Info("inferred pipeline name", "pipeline", requestData.Pipeline) } - // Create the decision object in kubernetes. - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "manila-", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: requestData.Pipeline, - }, - ResourceID: "", // TODO model out the spec. - ManilaRaw: &raw, - }, - } ctx := r.Context() - if err := httpAPI.delegate.ProcessNewDecisionFromAPI(ctx, decision); err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to process scheduling decision") - return - } - // Check if the decision contains status conditions indicating an error. - if meta.IsStatusConditionFalse(decision.Status.Conditions, v1alpha1.DecisionConditionReady) { - c.Respond(http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed") + + result, err := httpAPI.delegate.ProcessRequest(ctx, requestData) + if err != nil { + c.Respond(http.StatusInternalServerError, err, "failed to process scheduling request") return } - if decision.Status.Result == nil { - c.Respond(http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed") + if result == nil { + c.Respond(http.StatusInternalServerError, errors.New("pipeline didn't produce a result"), "failed to process scheduling request") return } - hosts := decision.Status.Result.OrderedHosts + + hosts := result.OrderedHosts response := api.ExternalSchedulerResponse{Hosts: hosts} w.Header().Set("Content-Type", "application/json") if err = json.NewEncoder(w).Encode(response); err != nil { diff --git a/internal/scheduling/manila/filter_weigher_pipeline_controller.go b/internal/scheduling/manila/filter_weigher_pipeline_controller.go index 3b63d64e0..9f9e1c23a 100644 --- a/internal/scheduling/manila/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/manila/filter_weigher_pipeline_controller.go @@ -5,16 +5,12 @@ package manila import ( "context" - "encoding/json" - "errors" "fmt" "sync" "time" api "github.com/cobaltcore-dev/cortex/api/external/manila" "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "github.com/cobaltcore-dev/cortex/internal/scheduling/manila/plugins/filters" @@ -26,6 +22,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) // The decision pipeline controller takes decision resources containing a @@ -50,93 +47,43 @@ func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } -// Callback executed when kubernetes asks to reconcile a decision resource. -func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - c.processMu.Lock() - defer c.processMu.Unlock() - - decision := &v1alpha1.Decision{} - if err := c.Get(ctx, req.NamespacedName, decision); err != nil { - return ctrl.Result{}, client.IgnoreNotFound(err) - } - old := decision.DeepCopy() - if err := c.process(ctx, decision); err != nil { - return ctrl.Result{}, err - } - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, nil -} - // Process the decision from the API. Should create and return the updated decision. -func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { +func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { c.processMu.Lock() defer c.processMu.Unlock() - pipelineConf, ok := c.PipelineConfigs[decision.Spec.PipelineRef.Name] - if !ok { - return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) - } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() - err := c.process(ctx, decision) - if err != nil { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "PipelineRunFailed", - Message: "pipeline run failed: " + err.Error(), - }) - } else { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionTrue, - Reason: "PipelineRunSucceeded", - Message: "pipeline run succeeded", - }) - } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err - } - } - return err -} - -func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { log := ctrl.LoggerFrom(ctx) - startedAt := time.Now() // So we can measure sync duration. + startedAt := time.Now() + + pipelineName := request.Pipeline - pipeline, ok := c.Pipelines[decision.Spec.PipelineRef.Name] + pipeline, ok := c.Pipelines[pipelineName] if !ok { - log.Error(nil, "skipping decision, pipeline not found or not ready") - return errors.New("pipeline not found or not ready") - } - if decision.Spec.ManilaRaw == nil { - log.Error(nil, "skipping decision, no manilaRaw spec defined") - return errors.New("no manilaRaw spec defined") + return nil, fmt.Errorf("pipeline %s not found or not ready", pipelineName) } - var request api.ExternalSchedulerRequest - if err := json.Unmarshal(decision.Spec.ManilaRaw.Raw, &request); err != nil { - log.Error(err, "failed to unmarshal manilaRaw spec") - return err + pipelineConfig, ok := c.PipelineConfigs[pipelineName] + if !ok { + log.Error(nil, "pipeline config not found", "pipelineName", pipelineName) + return nil, fmt.Errorf("pipeline config for %s not found", pipelineName) } result, err := pipeline.Run(request) if err != nil { - log.Error(err, "failed to run pipeline") - return err + log.Error(err, "failed to run pipeline", "pipeline", pipelineName) + return nil, err + } + log.Info("request processed successfully", "duration", time.Since(startedAt)) + + if pipelineConfig.Spec.CreateDecisions { + c.DecisionQueue <- lib.DecisionUpdate{ + // TODO model out the spec. + ResourceID: "", + PipelineName: pipelineName, + Result: result, + Reason: v1alpha1.SchedulingReasonUnknown, + } } - decision.Status.Result = &result - log.Info("decision processed successfully", "duration", time.Since(startedAt)) - return nil + return &result, nil } // The base controller will delegate the pipeline creation down to this method. @@ -160,50 +107,33 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, return err } return multicluster.BuildController(mcl, mgr). - // Watch pipeline changes so that we can reconfigure pipelines as needed. - WatchesMulticluster( - &v1alpha1.Pipeline{}, - handler.Funcs{ - CreateFunc: c.HandlePipelineCreated, - UpdateFunc: c.HandlePipelineUpdated, - DeleteFunc: c.HandlePipelineDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - pipeline := obj.(*v1alpha1.Pipeline) - // Only react to pipelines matching the scheduling domain. - if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila { - return false - } - return pipeline.Spec.Type == c.PipelineType() - }), - ). // Watch knowledge changes so that we can reconfigure pipelines as needed. WatchesMulticluster( &v1alpha1.Knowledge{}, - handler.Funcs{ - CreateFunc: c.HandleKnowledgeCreated, - UpdateFunc: c.HandleKnowledgeUpdated, - DeleteFunc: c.HandleKnowledgeDeleted, - }, + // Get all pipelines of the controller when knowledge changes and trigger reconciliation to update the candidates in the pipelines. + handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { + knowledge := obj.(*v1alpha1.Knowledge) + if knowledge.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila { + return nil + } + // When Knowledge changes, reconcile all pipelines + return c.GetAllPipelineReconcileRequests(ctx) + }), predicate.NewPredicateFuncs(func(obj client.Object) bool { knowledge := obj.(*v1alpha1.Knowledge) // Only react to knowledge matching the scheduling domain. return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainManila }), ). - Named("cortex-manila-decisions"). + Named("cortex-manila-pipelines"). For( - &v1alpha1.Decision{}, + &v1alpha1.Pipeline{}, builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - decision := obj.(*v1alpha1.Decision) - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila { - return false - } - // Ignore already decided schedulings. - if decision.Status.Result != nil { + pipeline := obj.(*v1alpha1.Pipeline) + if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila { return false } - return true + return pipeline.Spec.Type == c.PipelineType() })), ). Complete(c) diff --git a/internal/scheduling/nova/detector_pipeline_controller.go b/internal/scheduling/nova/detector_pipeline_controller.go index 7df8a6f5e..65b92c38d 100644 --- a/internal/scheduling/nova/detector_pipeline_controller.go +++ b/internal/scheduling/nova/detector_pipeline_controller.go @@ -21,6 +21,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) // The deschedulings pipeline controller is responsible for periodically running @@ -123,11 +124,6 @@ func (c *DetectorPipelineController) CreateDeschedulingsPeriodically(ctx context } } -func (c *DetectorPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - // This controller does not reconcile any resources directly. - return ctrl.Result{}, nil -} - func (c *DetectorPipelineController) SetupWithManager(mgr ctrl.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainNova @@ -137,40 +133,32 @@ func (c *DetectorPipelineController) SetupWithManager(mgr ctrl.Manager, mcl *mul return multicluster.BuildController(mcl, mgr). // Watch pipeline changes so that we can reconfigure pipelines as needed. WatchesMulticluster( - &v1alpha1.Pipeline{}, - handler.Funcs{ - CreateFunc: c.HandlePipelineCreated, - UpdateFunc: c.HandlePipelineUpdated, - DeleteFunc: c.HandlePipelineDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - pipeline := obj.(*v1alpha1.Pipeline) - // Only react to pipelines matching the scheduling domain. - if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { - return false + &v1alpha1.Knowledge{}, + // Get all pipelines of the controller when knowledge changes and trigger reconciliation to update the candidates in the pipelines. + handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { + knowledge := obj.(*v1alpha1.Knowledge) + if knowledge.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { + return nil } - return pipeline.Spec.Type == c.PipelineType() + // When Knowledge changes, reconcile all pipelines + return c.GetAllPipelineReconcileRequests(ctx) }), - ). - // Watch knowledge changes so that we can reconfigure pipelines as needed. - WatchesMulticluster( - &v1alpha1.Knowledge{}, - handler.Funcs{ - CreateFunc: c.HandleKnowledgeCreated, - UpdateFunc: c.HandleKnowledgeUpdated, - DeleteFunc: c.HandleKnowledgeDeleted, - }, predicate.NewPredicateFuncs(func(obj client.Object) bool { knowledge := obj.(*v1alpha1.Knowledge) // Only react to knowledge matching the scheduling domain. return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainNova }), ). - Named("cortex-nova-deschedulings"). + // Watch hypervisor changes so the cache gets updated. + Named("cortex-nova-pipelines"). For( - &v1alpha1.Descheduling{}, + &v1alpha1.Pipeline{}, builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - return false // This controller does not reconcile Descheduling resources directly. + pipeline := obj.(*v1alpha1.Pipeline) + if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { + return false + } + return pipeline.Spec.Type == c.PipelineType() })), ). Complete(c) diff --git a/internal/scheduling/nova/external_scheduler_api.go b/internal/scheduling/nova/external_scheduler_api.go index 0bd8c6c5e..12de74e62 100644 --- a/internal/scheduling/nova/external_scheduler_api.go +++ b/internal/scheduling/nova/external_scheduler_api.go @@ -15,13 +15,9 @@ import ( "slices" api "github.com/cobaltcore-dev/cortex/api/external/nova" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" scheduling "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/metrics" ) @@ -32,8 +28,8 @@ type HTTPAPIConfig struct { } type HTTPAPIDelegate interface { - // Process the decision from the API. Should create and return the updated decision. - ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error + // Process the scheduling request from the API. + ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) } type HTTPAPI interface { @@ -161,7 +157,6 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req c.Respond(http.StatusInternalServerError, err, "failed to read request body") return } - raw := runtime.RawExtension{Raw: body} var requestData api.ExternalSchedulerRequest // Copy the raw body to a io.Reader for json deserialization. cp := body @@ -181,7 +176,7 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req return } - // If the pipeline name is not set, infer it from the request data. + // If the pipeline name is not set, set it to a default value. if requestData.Pipeline == "" { var err error requestData.Pipeline, err = httpAPI.inferPipelineName(requestData) @@ -192,38 +187,17 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req slog.Info("inferred pipeline name", "pipeline", requestData.Pipeline) } - decision := &v1alpha1.Decision{ - TypeMeta: metav1.TypeMeta{ - Kind: "Decision", - APIVersion: "cortex.cloud/v1alpha1", - }, - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "nova-", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: requestData.Pipeline, - }, - ResourceID: requestData.Spec.Data.InstanceUUID, - NovaRaw: &raw, - }, - } ctx := r.Context() - if err := httpAPI.delegate.ProcessNewDecisionFromAPI(ctx, decision); err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to process scheduling decision") - return - } - // Check if the decision contains status conditions indicating an error. - if meta.IsStatusConditionFalse(decision.Status.Conditions, v1alpha1.DecisionConditionReady) { - c.Respond(http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed") + result, err := httpAPI.delegate.ProcessRequest(ctx, requestData) + if err != nil { + c.Respond(http.StatusInternalServerError, err, "failed to process scheduling request") return } - if decision.Status.Result == nil { - c.Respond(http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed") + if result == nil { + c.Respond(http.StatusInternalServerError, errors.New("pipeline didn't produce a result"), "failed to process scheduling request") return } - hosts := decision.Status.Result.OrderedHosts + hosts := result.OrderedHosts response := api.ExternalSchedulerResponse{Hosts: hosts} w.Header().Set("Content-Type", "application/json") if err = json.NewEncoder(w).Encode(response); err != nil { diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller.go b/internal/scheduling/nova/filter_weigher_pipeline_controller.go index 9c10a57b2..936a3f916 100644 --- a/internal/scheduling/nova/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller.go @@ -5,16 +5,12 @@ package nova import ( "context" - "encoding/json" - "errors" "fmt" "sync" "time" api "github.com/cobaltcore-dev/cortex/api/external/nova" "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "github.com/cobaltcore-dev/cortex/internal/scheduling/nova/plugins/filters" @@ -27,6 +23,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) // The decision pipeline controller takes decision resources containing a @@ -53,115 +50,41 @@ func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } -// Callback executed when kubernetes asks to reconcile a decision resource. -func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +// Process the request from the API. Returns the result of the pipeline execution. +func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, pipelineName string, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { c.processMu.Lock() defer c.processMu.Unlock() - decision := &v1alpha1.Decision{} - if err := c.Get(ctx, req.NamespacedName, decision); err != nil { - return ctrl.Result{}, client.IgnoreNotFound(err) - } - old := decision.DeepCopy() - if err := c.process(ctx, decision); err != nil { - return ctrl.Result{}, err - } - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, nil -} - -// Process the decision from the API. Should create and return the updated decision. -func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { - c.processMu.Lock() - defer c.processMu.Unlock() - - pipelineConf, ok := c.PipelineConfigs[decision.Spec.PipelineRef.Name] - if !ok { - return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) - } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() - err := c.process(ctx, decision) - if err != nil { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "PipelineRunFailed", - Message: "pipeline run failed: " + err.Error(), - }) - } else { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionTrue, - Reason: "PipelineRunSucceeded", - Message: "pipeline run succeeded", - }) - } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err - } - } - return err -} - -func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { log := ctrl.LoggerFrom(ctx) - startedAt := time.Now() // So we can measure sync duration. + startedAt := time.Now() - pipeline, ok := c.Pipelines[decision.Spec.PipelineRef.Name] + pipeline, ok := c.Pipelines[pipelineName] if !ok { - log.Error(nil, "pipeline not found or not ready", "pipelineName", decision.Spec.PipelineRef.Name) - return errors.New("pipeline not found or not ready") + log.Error(nil, "pipeline not found or not ready", "pipelineName", pipelineName) + return nil, fmt.Errorf("pipeline %s not found or not ready", pipelineName) } - if decision.Spec.NovaRaw == nil { - log.Error(nil, "skipping decision, no novaRaw spec defined") - return errors.New("no novaRaw spec defined") - } - var request api.ExternalSchedulerRequest - if err := json.Unmarshal(decision.Spec.NovaRaw.Raw, &request); err != nil { - log.Error(err, "failed to unmarshal novaRaw spec") - return err - } - - // If necessary gather all placement candidates before filtering. - // This will override the hosts and weights in the nova request. - pipelineConf, ok := c.PipelineConfigs[decision.Spec.PipelineRef.Name] + pipelineConfig, ok := c.PipelineConfigs[pipelineName] if !ok { - log.Error(nil, "pipeline config not found", "pipelineName", decision.Spec.PipelineRef.Name) - return errors.New("pipeline config not found") - } - if pipelineConf.Spec.IgnorePreselection { - log.Info("gathering all placement candidates before filtering") - if err := c.gatherer.MutateWithAllCandidates(ctx, &request); err != nil { - log.Error(err, "failed to gather all placement candidates") - return err - } - log.Info("gathered all placement candidates", "numHosts", len(request.Hosts)) + log.Error(nil, "pipeline config not found", "pipelineName", pipelineName) + return nil, fmt.Errorf("pipeline config for %s not found", pipelineName) } result, err := pipeline.Run(request) if err != nil { - log.Error(err, "failed to run pipeline") - return err + log.Error(err, "failed to run pipeline", "pipeline", pipelineName) + return nil, err + } + log.Info("request processed successfully", "duration", time.Since(startedAt)) + + if pipelineConfig.Spec.CreateDecisions { + c.DecisionQueue <- lib.DecisionUpdate{ + ResourceID: request.Spec.Data.InstanceUUID, + PipelineName: pipelineName, + Result: result, + Reason: v1alpha1.SchedulingReasonUnknown, + } } - decision.Status.Result = &result - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionTrue, - Reason: "PipelineRunSucceeded", - Message: "pipeline run succeeded", - }) - log.Info("decision processed successfully", "duration", time.Since(startedAt)) - return nil + return &result, nil } // The base controller will delegate the pipeline creation down to this method. @@ -186,31 +109,18 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, return err } return multicluster.BuildController(mcl, mgr). - // Watch pipeline changes so that we can reconfigure pipelines as needed. - WatchesMulticluster( - &v1alpha1.Pipeline{}, - handler.Funcs{ - CreateFunc: c.HandlePipelineCreated, - UpdateFunc: c.HandlePipelineUpdated, - DeleteFunc: c.HandlePipelineDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - pipeline := obj.(*v1alpha1.Pipeline) - // Only react to pipelines matching the scheduling domain. - if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { - return false - } - return pipeline.Spec.Type == c.PipelineType() - }), - ). // Watch knowledge changes so that we can reconfigure pipelines as needed. WatchesMulticluster( &v1alpha1.Knowledge{}, - handler.Funcs{ - CreateFunc: c.HandleKnowledgeCreated, - UpdateFunc: c.HandleKnowledgeUpdated, - DeleteFunc: c.HandleKnowledgeDeleted, - }, + // Get all pipelines of the controller when knowledge changes and trigger reconciliation to update the candidates in the pipelines. + handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { + knowledge := obj.(*v1alpha1.Knowledge) + if knowledge.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { + return nil + } + // When Knowledge changes, reconcile all pipelines + return c.GetAllPipelineReconcileRequests(ctx) + }), predicate.NewPredicateFuncs(func(obj client.Object) bool { knowledge := obj.(*v1alpha1.Knowledge) // Only react to knowledge matching the scheduling domain. @@ -219,19 +129,15 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, ). // Watch hypervisor changes so the cache gets updated. WatchesMulticluster(&hv1.Hypervisor{}, handler.Funcs{}). - Named("cortex-nova-decisions"). + Named("cortex-nova-pipelines"). For( - &v1alpha1.Decision{}, + &v1alpha1.Pipeline{}, builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - decision := obj.(*v1alpha1.Decision) - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { - return false - } - // Ignore already decided schedulings. - if decision.Status.Result != nil { + pipeline := obj.(*v1alpha1.Pipeline) + if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { return false } - return true + return pipeline.Spec.Type == c.PipelineType() })), ). Complete(c) From 2e5f6e49d0cb2ce2e3a9f7babd3b728bebcbc567 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Thu, 19 Feb 2026 16:06:00 +0100 Subject: [PATCH 10/36] Remove explanation controller from config --- cmd/main.go | 22 ++++--------------- helm/bundles/cortex-cinder/values.yaml | 1 - helm/bundles/cortex-ironcore/values.yaml | 1 - helm/bundles/cortex-manila/values.yaml | 1 - helm/bundles/cortex-nova/values.yaml | 1 - helm/bundles/cortex-pods/values.yaml | 1 - helm/library/cortex/values.yaml | 4 +--- internal/scheduling/lib/explainer.go | 6 ++--- .../scheduling/lib/pipeline_controller.go | 9 ++++---- .../filter_weigher_pipeline_controller.go | 4 +++- 10 files changed, 15 insertions(+), 35 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 294d9b242..bf6cc018a 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -41,7 +41,6 @@ import ( "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor" "github.com/cobaltcore-dev/cortex/internal/knowledge/kpis" "github.com/cobaltcore-dev/cortex/internal/scheduling/cinder" - "github.com/cobaltcore-dev/cortex/internal/scheduling/explanation" schedulinglib "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "github.com/cobaltcore-dev/cortex/internal/scheduling/machines" "github.com/cobaltcore-dev/cortex/internal/scheduling/manila" @@ -293,17 +292,17 @@ func main() { metrics.Registry.MustRegister(&pipelineMonitor) if slices.Contains(mainConfig.EnabledControllers, "nova-decisions-pipeline-controller") { - decisionController := &nova.FilterWeigherPipelineController{ + pipelineController := &nova.FilterWeigherPipelineController{ Monitor: pipelineMonitor, } // Inferred through the base controller. - decisionController.Client = multiclusterClient - if err := (decisionController).SetupWithManager(mgr, multiclusterClient); err != nil { + pipelineController.Client = multiclusterClient + if err := (pipelineController).SetupWithManager(mgr, multiclusterClient); err != nil { setupLog.Error(err, "unable to create controller", "controller", "DecisionReconciler") os.Exit(1) } httpAPIConf := conf.GetConfigOrDie[nova.HTTPAPIConfig]() - nova.NewAPI(httpAPIConf, decisionController).Init(mux) + nova.NewAPI(httpAPIConf, pipelineController).Init(mux) } if slices.Contains(mainConfig.EnabledControllers, "nova-deschedulings-pipeline-controller") { // Deschedulings controller @@ -404,19 +403,6 @@ func main() { os.Exit(1) } } - if slices.Contains(mainConfig.EnabledControllers, "explanation-controller") { - // Setup a controller which will reconcile the history and explanation for - // decision resources. - explanationControllerConfig := conf.GetConfigOrDie[explanation.ControllerConfig]() - explanationController := &explanation.Controller{ - Client: multiclusterClient, - Config: explanationControllerConfig, - } - if err := explanationController.SetupWithManager(mgr, multiclusterClient); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "ExplanationController") - os.Exit(1) - } - } if slices.Contains(mainConfig.EnabledControllers, "reservations-controller") { monitor := reservationscontroller.NewControllerMonitor(multiclusterClient) metrics.Registry.MustRegister(&monitor) diff --git a/helm/bundles/cortex-cinder/values.yaml b/helm/bundles/cortex-cinder/values.yaml index 33f287722..350e7607f 100644 --- a/helm/bundles/cortex-cinder/values.yaml +++ b/helm/bundles/cortex-cinder/values.yaml @@ -94,7 +94,6 @@ cortex-scheduling-controllers: component: cinder-scheduling enabledControllers: - cinder-decisions-pipeline-controller - - explanation-controller enabledTasks: - cinder-decisions-cleanup-task diff --git a/helm/bundles/cortex-ironcore/values.yaml b/helm/bundles/cortex-ironcore/values.yaml index 29458a2b3..59c495c66 100644 --- a/helm/bundles/cortex-ironcore/values.yaml +++ b/helm/bundles/cortex-ironcore/values.yaml @@ -29,7 +29,6 @@ cortex: schedulingDomain: machines enabledControllers: - ironcore-decisions-pipeline-controller - - explanation-controller monitoring: labels: github_org: cobaltcore-dev diff --git a/helm/bundles/cortex-manila/values.yaml b/helm/bundles/cortex-manila/values.yaml index cf74e4444..9a734dd09 100644 --- a/helm/bundles/cortex-manila/values.yaml +++ b/helm/bundles/cortex-manila/values.yaml @@ -94,7 +94,6 @@ cortex-scheduling-controllers: component: manila-scheduling enabledControllers: - manila-decisions-pipeline-controller - - explanation-controller enabledTasks: - manila-decisions-cleanup-task diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index 6df34a9df..4fdcc143f 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -106,7 +106,6 @@ cortex-scheduling-controllers: - nova-decisions-pipeline-controller - nova-deschedulings-pipeline-controller - nova-deschedulings-executor - - explanation-controller enabledTasks: - nova-decisions-cleanup-task diff --git a/helm/bundles/cortex-pods/values.yaml b/helm/bundles/cortex-pods/values.yaml index 598283003..3739aef6e 100644 --- a/helm/bundles/cortex-pods/values.yaml +++ b/helm/bundles/cortex-pods/values.yaml @@ -29,7 +29,6 @@ cortex: schedulingDomain: pods enabledControllers: - pods-decisions-pipeline-controller - - explanation-controller monitoring: labels: github_org: cobaltcore-dev diff --git a/helm/library/cortex/values.yaml b/helm/library/cortex/values.yaml index 50a9d48c0..4954c559f 100644 --- a/helm/library/cortex/values.yaml +++ b/helm/library/cortex/values.yaml @@ -106,6 +106,4 @@ conf: schedulingDomain: cortex # Used to differentiate different cortex deployments in the same cluster (e.g. leader election ID) leaderElectionID: cortex-unknown - enabledControllers: - # The explanation controller is available for all decision resources. - - explanation-controller + enabledControllers: [] diff --git a/internal/scheduling/lib/explainer.go b/internal/scheduling/lib/explainer.go index c45f11ee9..b32771719 100644 --- a/internal/scheduling/lib/explainer.go +++ b/internal/scheduling/lib/explainer.go @@ -38,7 +38,7 @@ func NewExplainer(client client.Client) (*Explainer, error) { } // Explain the given decision and return a human-readable explanation. -func (e *Explainer) Explain(ctx context.Context, decision *v1alpha1.Decision) (string, error) { +func (e *Explainer) Explain(ctx context.Context, decision DecisionUpdate) (string, error) { return e.ExplainWithTemplates(ctx, decision) } @@ -396,7 +396,7 @@ func (e *Explainer) calculateStepImpacts(inputWeights map[string]float64, stepRe // decision data into formats suitable for template rendering. // buildContextData creates context data for template rendering. -func (e *Explainer) buildContextData(decision *v1alpha1.Decision) ContextData { +func (e *Explainer) buildContextData(decision DecisionUpdate) ContextData { resourceType := e.getResourceType(decision.Spec.SchedulingDomain) history := decision.Status.History @@ -654,7 +654,7 @@ func (e *Explainer) buildChainData(ctx context.Context, decision *v1alpha1.Decis } // ExplainWithTemplates renders an explanation using Go templates. -func (e *Explainer) ExplainWithTemplates(ctx context.Context, decision *v1alpha1.Decision) (string, error) { +func (e *Explainer) ExplainWithTemplates(ctx context.Context, decision DecisionUpdate) (string, error) { // Build explanation context explanationCtx := ExplanationContext{ Context: e.buildContextData(decision), diff --git a/internal/scheduling/lib/pipeline_controller.go b/internal/scheduling/lib/pipeline_controller.go index 45a3b820f..6924de918 100644 --- a/internal/scheduling/lib/pipeline_controller.go +++ b/internal/scheduling/lib/pipeline_controller.go @@ -33,11 +33,10 @@ type BasePipelineController[PipelineType any] struct { } type DecisionUpdate struct { - ResourceID string - PipelineName string - Result FilterWeigherPipelineResult - Reason v1alpha1.SchedulingReason - SchedulingDomain v1alpha1.SchedulingDomain + ResourceID string + PipelineName string + Result FilterWeigherPipelineResult + Reason v1alpha1.SchedulingReason } func (c *BasePipelineController[PipelineType]) StartExplainer(ctx context.Context) { diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller.go b/internal/scheduling/nova/filter_weigher_pipeline_controller.go index 68059852a..bc70e7fbf 100644 --- a/internal/scheduling/nova/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller.go @@ -51,13 +51,15 @@ func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { } // Process the request from the API. Returns the result of the pipeline execution. -func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, pipelineName string, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { +func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { c.processMu.Lock() defer c.processMu.Unlock() log := ctrl.LoggerFrom(ctx) startedAt := time.Now() + pipelineName := request.Pipeline + pipeline, ok := c.Pipelines[pipelineName] if !ok { log.Error(nil, "pipeline not found or not ready", "pipelineName", pipelineName) From b9d718565e097ee1c91cf23970ffff2ceec14c9f Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Thu, 19 Feb 2026 16:18:55 +0100 Subject: [PATCH 11/36] Fix external scheduler api tests for cinder --- .../cinder/external_scheduler_api_test.go | 23 ++++++++----------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/internal/scheduling/cinder/external_scheduler_api_test.go b/internal/scheduling/cinder/external_scheduler_api_test.go index acb640861..35b29fb4a 100644 --- a/internal/scheduling/cinder/external_scheduler_api_test.go +++ b/internal/scheduling/cinder/external_scheduler_api_test.go @@ -15,17 +15,15 @@ import ( cinderapi "github.com/cobaltcore-dev/cortex/api/external/cinder" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" - "k8s.io/apimachinery/pkg/runtime" - "sigs.k8s.io/controller-runtime/pkg/client/fake" ) type mockHTTPAPIDelegate struct { - processFunc func(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) + processFunc func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) } -func (m *mockHTTPAPIDelegate) ProcessRequest(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { +func (m *mockHTTPAPIDelegate) ProcessRequest(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { if m.processFunc != nil { - return m.processFunc(ctx, pipeline, request) + return m.processFunc(ctx, request) } return &lib.FilterWeigherPipelineResult{ OrderedHosts: []string{"host1"}, @@ -147,7 +145,7 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { name string method string body string - processFunc func(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) + processFunc func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) expectedStatus int expectedHosts []string }{ @@ -180,7 +178,7 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { data, _ := json.Marshal(req) return string(data) }(), - processFunc: func(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + processFunc: func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { return &lib.FilterWeigherPipelineResult{ OrderedHosts: []string{"host1", "host2"}, }, nil @@ -204,7 +202,7 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { data, _ := json.Marshal(req) return string(data) }(), - processFunc: func(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + processFunc: func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { return nil, errors.New("processing failed") }, expectedStatus: http.StatusInternalServerError, @@ -225,7 +223,7 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { data, _ := json.Marshal(req) return string(data) }(), - processFunc: func(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + processFunc: func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { return &lib.FilterWeigherPipelineResult{ OrderedHosts: []string{}, }, nil @@ -239,9 +237,6 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { delegate := &mockHTTPAPIDelegate{ processFunc: tt.processFunc, } - - tttscheme := runtime.NewScheme() - tttfakeClient := fake.NewClientBuilder().WithScheme(scheme).Build() api := NewAPI(delegate).(*httpAPI) var body *strings.Reader @@ -333,8 +328,8 @@ func TestHTTPAPI_CinderExternalScheduler_PipelineParameter(t *testing.T) { var capturedRequest cinderapi.ExternalSchedulerRequest delegate := &mockHTTPAPIDelegate{ - processFunc: func(ctx context.Context, pipeline string, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { - capturedPipeline = pipeline + processFunc: func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + capturedPipeline = request.Pipeline capturedRequest = request return &lib.FilterWeigherPipelineResult{ OrderedHosts: []string{"host1"}, From 54ef0460f0d27c84f62ac1ff89ea61c95c9a9152 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Thu, 19 Feb 2026 16:24:24 +0100 Subject: [PATCH 12/36] Fix manila tests --- .../manila/external_scheduler_api_test.go | 142 +++--------------- 1 file changed, 17 insertions(+), 125 deletions(-) diff --git a/internal/scheduling/manila/external_scheduler_api_test.go b/internal/scheduling/manila/external_scheduler_api_test.go index dfdb3f534..871de4ff1 100644 --- a/internal/scheduling/manila/external_scheduler_api_test.go +++ b/internal/scheduling/manila/external_scheduler_api_test.go @@ -4,7 +4,6 @@ package manila import ( - "bytes" "context" "encoding/json" "errors" @@ -14,20 +13,20 @@ import ( "testing" manilaapi "github.com/cobaltcore-dev/cortex/api/external/manila" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" ) type mockHTTPAPIDelegate struct { - processDecisionFunc func(ctx context.Context, decision *v1alpha1.Decision) error + processFunc func(ctx context.Context, request manilaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) } -func (m *mockHTTPAPIDelegate) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { - if m.processDecisionFunc != nil { - return m.processDecisionFunc(ctx, decision) +func (m *mockHTTPAPIDelegate) ProcessRequest(ctx context.Context, request manilaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + if m.processFunc != nil { + return m.processFunc(ctx, request) } - return nil + return &lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{"host1"}, + }, nil } func TestNewAPI(t *testing.T) { @@ -142,13 +141,12 @@ func TestHTTPAPI_canRunScheduler(t *testing.T) { func TestHTTPAPI_ManilaExternalScheduler(t *testing.T) { tests := []struct { - name string - method string - body string - processDecisionErr error - decisionResult *v1alpha1.Decision - expectedStatus int - expectedHosts []string + name string + method string + body string + processFunc func(ctx context.Context, request manilaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) + expectedStatus int + expectedHosts []string }{ { name: "invalid method", @@ -180,13 +178,6 @@ func TestHTTPAPI_ManilaExternalScheduler(t *testing.T) { } return string(data) }(), - decisionResult: &v1alpha1.Decision{ - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1"}, - }, - }, - }, expectedStatus: http.StatusOK, expectedHosts: []string{"host1"}, }, @@ -209,38 +200,8 @@ func TestHTTPAPI_ManilaExternalScheduler(t *testing.T) { } return string(data) }(), - processDecisionErr: errors.New("processing failed"), - expectedStatus: http.StatusInternalServerError, - }, - { - name: "decision failed", - method: http.MethodPost, - body: func() string { - req := manilaapi.ExternalSchedulerRequest{ - Hosts: []manilaapi.ExternalSchedulerHost{ - {ShareHost: "host1"}, - }, - Weights: map[string]float64{ - "host1": 1.0, - }, - Pipeline: "test-pipeline", - } - data, err := json.Marshal(req) - if err != nil { - t.Fatalf("Failed to marshal request data: %v", err) - } - return string(data) - }(), - decisionResult: &v1alpha1.Decision{ - Status: v1alpha1.DecisionStatus{ - Conditions: []metav1.Condition{ - { - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "SchedulingError", - }, - }, - }, + processFunc: func(ctx context.Context, request manilaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + return nil, errors.New("processing failed") }, expectedStatus: http.StatusInternalServerError, }, @@ -249,16 +210,7 @@ func TestHTTPAPI_ManilaExternalScheduler(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { delegate := &mockHTTPAPIDelegate{ - processDecisionFunc: func(ctx context.Context, decision *v1alpha1.Decision) error { - if tt.processDecisionErr != nil { - return tt.processDecisionErr - } - if tt.decisionResult != nil { - decision.Status = tt.decisionResult.Status - return nil - } - return nil - }, + processFunc: tt.processFunc, } api := NewAPI(delegate).(*httpAPI) @@ -346,63 +298,3 @@ func TestHTTPAPI_inferPipelineName(t *testing.T) { }) } } - -func TestHTTPAPI_ManilaExternalScheduler_DecisionCreation(t *testing.T) { - var capturedDecision *v1alpha1.Decision - delegate := &mockHTTPAPIDelegate{ - processDecisionFunc: func(ctx context.Context, decision *v1alpha1.Decision) error { - capturedDecision = decision - // Set a successful result to avoid "decision didn't produce a result" error - decision.Status.Result = &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1"}, - } - return nil - }, - } - - api := NewAPI(delegate).(*httpAPI) - - requestData := manilaapi.ExternalSchedulerRequest{ - Hosts: []manilaapi.ExternalSchedulerHost{ - {ShareHost: "host1"}, - }, - Weights: map[string]float64{ - "host1": 1.0, - }, - Pipeline: "test-pipeline", - } - - body, err := json.Marshal(requestData) - if err != nil { - t.Fatalf("Failed to marshal request data: %v", err) - } - req := httptest.NewRequest(http.MethodPost, "/scheduler/manila/external", bytes.NewReader(body)) - w := httptest.NewRecorder() - - api.ManilaExternalScheduler(w, req) - - if w.Code != http.StatusOK { - t.Errorf("Expected status %d, got %d", http.StatusOK, w.Code) - } - - if capturedDecision == nil { - t.Fatal("Decision was not captured") - } - - // Verify decision fields - if capturedDecision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila { - t.Errorf("Expected scheduling domain %s, got %s", v1alpha1.SchedulingDomainManila, capturedDecision.Spec.SchedulingDomain) - } - - if capturedDecision.Spec.PipelineRef.Name != "test-pipeline" { - t.Errorf("Expected pipeline 'test-pipeline', got %s", capturedDecision.Spec.PipelineRef.Name) - } - - if capturedDecision.GenerateName != "manila-" { - t.Errorf("Expected generate name 'manila-', got %s", capturedDecision.GenerateName) - } - - if capturedDecision.Spec.ManilaRaw == nil { - t.Error("ManilaRaw should not be nil") - } -} From b4d6c189a86458d14e22e0fbb96a612956b06f6f Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Thu, 19 Feb 2026 16:27:08 +0100 Subject: [PATCH 13/36] Fix nova external scheduler api tests --- .../nova/external_scheduler_api_test.go | 160 +++--------------- 1 file changed, 21 insertions(+), 139 deletions(-) diff --git a/internal/scheduling/nova/external_scheduler_api_test.go b/internal/scheduling/nova/external_scheduler_api_test.go index ff711ef3d..dccf6d095 100644 --- a/internal/scheduling/nova/external_scheduler_api_test.go +++ b/internal/scheduling/nova/external_scheduler_api_test.go @@ -4,7 +4,6 @@ package nova import ( - "bytes" "context" "encoding/json" "errors" @@ -14,20 +13,20 @@ import ( "testing" novaapi "github.com/cobaltcore-dev/cortex/api/external/nova" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" ) type mockHTTPAPIDelegate struct { - processDecisionFunc func(ctx context.Context, decision *v1alpha1.Decision) error + processFunc func(ctx context.Context, request novaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) } -func (m *mockHTTPAPIDelegate) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { - if m.processDecisionFunc != nil { - return m.processDecisionFunc(ctx, decision) +func (m *mockHTTPAPIDelegate) ProcessRequest(ctx context.Context, request novaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + if m.processFunc != nil { + return m.processFunc(ctx, request) } - return nil + return &lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{"host1"}, + }, nil } func TestNewAPI(t *testing.T) { @@ -145,13 +144,12 @@ func TestHTTPAPI_canRunScheduler(t *testing.T) { func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { tests := []struct { - name string - method string - body string - processDecisionErr error - decisionResult *v1alpha1.Decision - expectedStatus int - expectedHosts []string + name string + method string + body string + processFunc func(ctx context.Context, request novaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) + expectedStatus int + expectedHosts []string }{ { name: "invalid method", @@ -188,12 +186,10 @@ func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { } return string(data) }(), - decisionResult: &v1alpha1.Decision{ - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1"}, - }, - }, + processFunc: func(ctx context.Context, request novaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + return &lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{"host1", "host2"}, + }, nil }, expectedStatus: http.StatusOK, expectedHosts: []string{"host1"}, @@ -222,43 +218,8 @@ func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { } return string(data) }(), - processDecisionErr: errors.New("processing failed"), - expectedStatus: http.StatusInternalServerError, - }, - { - name: "decision failed", - method: http.MethodPost, - body: func() string { - req := novaapi.ExternalSchedulerRequest{ - Spec: novaapi.NovaObject[novaapi.NovaSpec]{ - Data: novaapi.NovaSpec{ - InstanceUUID: "test-uuid", - }, - }, - Hosts: []novaapi.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - }, - Weights: map[string]float64{ - "host1": 1.0, - }, - Pipeline: "test-pipeline", - } - data, err := json.Marshal(req) - if err != nil { - t.Fatalf("Failed to marshal request data: %v", err) - } - return string(data) - }(), - decisionResult: &v1alpha1.Decision{ - Status: v1alpha1.DecisionStatus{ - Conditions: []metav1.Condition{ - { - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "SchedulingError", - }, - }, - }, + processFunc: func(ctx context.Context, request novaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + return nil, errors.New("processing failed") }, expectedStatus: http.StatusInternalServerError, }, @@ -267,16 +228,7 @@ func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { delegate := &mockHTTPAPIDelegate{ - processDecisionFunc: func(ctx context.Context, decision *v1alpha1.Decision) error { - if tt.processDecisionErr != nil { - return tt.processDecisionErr - } - if tt.decisionResult != nil { - decision.Status = tt.decisionResult.Status - return nil - } - return nil - }, + processFunc: tt.processFunc, } config := HTTPAPIConfig{} @@ -318,76 +270,6 @@ func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { } } -func TestHTTPAPI_NovaExternalScheduler_DecisionCreation(t *testing.T) { - var capturedDecision *v1alpha1.Decision - delegate := &mockHTTPAPIDelegate{ - processDecisionFunc: func(ctx context.Context, decision *v1alpha1.Decision) error { - capturedDecision = decision - // Set a successful result to avoid "decision didn't produce a result" error - decision.Status.Result = &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1"}, - } - return nil - }, - } - - config := HTTPAPIConfig{} - api := NewAPI(config, delegate).(*httpAPI) - - requestData := novaapi.ExternalSchedulerRequest{ - Spec: novaapi.NovaObject[novaapi.NovaSpec]{ - Data: novaapi.NovaSpec{ - InstanceUUID: "test-uuid-123", - }, - }, - Hosts: []novaapi.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - }, - Weights: map[string]float64{ - "host1": 1.0, - }, - Pipeline: "test-pipeline", - } - - body, err := json.Marshal(requestData) - if err != nil { - t.Fatalf("Failed to marshal request data: %v", err) - } - req := httptest.NewRequest(http.MethodPost, "/scheduler/nova/external", bytes.NewReader(body)) - w := httptest.NewRecorder() - - api.NovaExternalScheduler(w, req) - - if w.Code != http.StatusOK { - t.Errorf("Expected status %d, got %d", http.StatusOK, w.Code) - } - - if capturedDecision == nil { - t.Fatal("Decision was not captured") - } - - // Verify decision fields - if capturedDecision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { - t.Errorf("Expected scheduling domain %s, got %s", v1alpha1.SchedulingDomainNova, capturedDecision.Spec.SchedulingDomain) - } - - if capturedDecision.Spec.PipelineRef.Name != "test-pipeline" { - t.Errorf("Expected pipeline 'test-pipeline', got %s", capturedDecision.Spec.PipelineRef.Name) - } - - if capturedDecision.Spec.ResourceID != "test-uuid-123" { - t.Errorf("Expected resource ID 'test-uuid-123', got %s", capturedDecision.Spec.ResourceID) - } - - if capturedDecision.GenerateName != "nova-" { - t.Errorf("Expected generate name 'nova-', got %s", capturedDecision.GenerateName) - } - - if capturedDecision.Spec.NovaRaw == nil { - t.Error("NovaRaw should not be nil") - } -} - func TestHTTPAPI_inferPipelineName(t *testing.T) { delegate := &mockHTTPAPIDelegate{} config := HTTPAPIConfig{ From abedb1c0a63af91b532280b919eddde4d1bd4eb5 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Thu, 19 Feb 2026 17:01:18 +0100 Subject: [PATCH 14/36] Fix filter weigher pipeline controller for pods --- .../filter_weigher_pipeline_controller.go | 2 +- .../filter_weigher_pipeline_controller.go | 158 ++++-------------- 2 files changed, 38 insertions(+), 122 deletions(-) diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller.go b/internal/scheduling/nova/filter_weigher_pipeline_controller.go index bc70e7fbf..68b309242 100644 --- a/internal/scheduling/nova/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller.go @@ -132,7 +132,7 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, // Watch hypervisor changes so the cache gets updated. WatchesMulticluster(&hv1.Hypervisor{}, handler.Funcs{}). WatchesMulticluster(&v1alpha1.Reservation{}, handler.Funcs{}). - Named("cortex-nova-pipelines"). + Named("cortex-nova-pipelines"). For( &v1alpha1.Pipeline{}, builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { diff --git a/internal/scheduling/pods/filter_weigher_pipeline_controller.go b/internal/scheduling/pods/filter_weigher_pipeline_controller.go index 28e10ff88..917af0664 100644 --- a/internal/scheduling/pods/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/pods/filter_weigher_pipeline_controller.go @@ -18,7 +18,6 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduling/pods/plugins/filters" "github.com/cobaltcore-dev/cortex/internal/scheduling/pods/plugins/weighers" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/util/workqueue" ctrl "sigs.k8s.io/controller-runtime" @@ -53,108 +52,30 @@ func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } -func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - c.processMu.Lock() - defer c.processMu.Unlock() - - // Determine if this is a decision or pod reconciliation. - decision := &v1alpha1.Decision{} - if err := c.Get(ctx, req.NamespacedName, decision); err != nil { - return ctrl.Result{}, client.IgnoreNotFound(err) - } - old := decision.DeepCopy() - if err := c.process(ctx, decision); err != nil { - return ctrl.Result{}, err - } - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, nil -} - func (c *FilterWeigherPipelineController) ProcessNewPod(ctx context.Context, pod *corev1.Pod) error { c.processMu.Lock() defer c.processMu.Unlock() - // Create a decision resource to schedule the pod. - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "pod-", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainPods, - ResourceID: pod.Name, - PipelineRef: corev1.ObjectReference{ - Name: "pods-scheduler", - }, - PodRef: &corev1.ObjectReference{ - Name: pod.Name, - Namespace: pod.Namespace, - }, - }, - } + log := ctrl.LoggerFrom(ctx) + startedAt := time.Now() + + pipelineName := "pods-scheduler" - pipelineConf, ok := c.PipelineConfigs[decision.Spec.PipelineRef.Name] + pipeline, ok := c.Pipelines[pipelineName] if !ok { - return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) - } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() - err := c.process(ctx, decision) - if err != nil { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "PipelineRunFailed", - Message: "pipeline run failed: " + err.Error(), - }) - } else { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionTrue, - Reason: "PipelineRunSucceeded", - Message: "pipeline run succeeded", - }) + return fmt.Errorf("pipeline %s not found or not ready", pipelineName) } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err - } - } - return err -} -func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { - log := ctrl.LoggerFrom(ctx) - startedAt := time.Now() // So we can measure sync duration. - - pipeline, ok := c.Pipelines[decision.Spec.PipelineRef.Name] + pipelineConfig, ok := c.PipelineConfigs[pipelineName] if !ok { - log.Error(nil, "pipeline not found or not ready", "pipelineName", decision.Spec.PipelineRef.Name) - return errors.New("pipeline not found or not ready") + return fmt.Errorf("pipeline %s not configured", pipelineName) } - // Check if the pod is already assigned to a node. - pod := &corev1.Pod{} - if err := c.Get(ctx, client.ObjectKey{ - Name: decision.Spec.PodRef.Name, - Namespace: decision.Spec.PodRef.Namespace, - }, pod); err != nil { - log.Error(err, "failed to fetch pod for decision") - return err - } if pod.Spec.NodeName != "" { log.Info("pod is already assigned to a node", "node", pod.Spec.NodeName) return nil } - // Find all available nodes. nodes := &corev1.NodeList{} if err := c.List(ctx, nodes); err != nil { return err @@ -163,32 +84,48 @@ func (c *FilterWeigherPipelineController) process(ctx context.Context, decision return errors.New("no nodes available for scheduling") } - // Execute the scheduling pipeline. request := pods.PodPipelineRequest{Nodes: nodes.Items, Pod: *pod} result, err := pipeline.Run(request) if err != nil { log.V(1).Error(err, "failed to run scheduler pipeline") return errors.New("failed to run scheduler pipeline") } - decision.Status.Result = &result - log.Info("decision processed successfully", "duration", time.Since(startedAt)) - // Assign the first node returned by the pipeline using a Binding. + log.Info("pod processed successfully", "duration", time.Since(startedAt)) + + hosts := result.OrderedHosts + if len(hosts) == 0 { + log.Info("no suitable nodes found for pod") + return nil + } + + targetHost := hosts[0] + binding := &corev1.Binding{ ObjectMeta: metav1.ObjectMeta{ - Name: decision.Spec.PodRef.Name, - Namespace: decision.Spec.PodRef.Namespace, + Name: pod.Name, + Namespace: pod.Namespace, }, Target: corev1.ObjectReference{ Kind: "Node", - Name: *result.TargetHost, + Name: targetHost, }, } if err := c.Create(ctx, binding); err != nil { log.V(1).Error(err, "failed to assign node to pod via binding") return err } - log.V(1).Info("assigned node to pod", "node", *result.TargetHost) + log.V(1).Info("assigned node to pod", "node", targetHost) + + if pipelineConfig.Spec.CreateDecisions { + c.DecisionQueue <- lib.DecisionUpdate{ + ResourceID: pod.Name, + PipelineName: pipelineName, + Result: result, + // TODO: Refine the reason + Reason: v1alpha1.SchedulingReasonUnknown, + } + } return nil } @@ -236,7 +173,7 @@ func (c *FilterWeigherPipelineController) handlePod() handler.EventHandler { return } for _, decision := range decisions.Items { - if decision.Spec.PodRef.Name == pod.Name && decision.Spec.PodRef.Namespace == pod.Namespace { + if decision.Spec.ResourceID == pod.Name && decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainPods { if err := c.Delete(ctx, &decision); err != nil { log.Error(err, "failed to delete decision for deleted pod") } @@ -266,36 +203,15 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, return pod.Spec.SchedulerName == string(v1alpha1.SchedulingDomainPods) }), ). - // Watch pipeline changes so that we can reconfigure pipelines as needed. - WatchesMulticluster( - &v1alpha1.Pipeline{}, - handler.Funcs{ - CreateFunc: c.HandlePipelineCreated, - UpdateFunc: c.HandlePipelineUpdated, - DeleteFunc: c.HandlePipelineDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - pipeline := obj.(*v1alpha1.Pipeline) - // Only react to pipelines matching the scheduling domain. - if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods { - return false - } - return pipeline.Spec.Type == v1alpha1.PipelineTypeFilterWeigher - }), - ). Named("cortex-pod-scheduler"). For( - &v1alpha1.Decision{}, + &v1alpha1.Pipeline{}, builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - decision := obj.(*v1alpha1.Decision) - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods { - return false - } - // Ignore already decided schedulings. - if decision.Status.Result != nil { + pipeline := obj.(*v1alpha1.Pipeline) + if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods { return false } - return true + return pipeline.Spec.Type == c.PipelineType() })), ). Complete(c) From 2186be9514b57a41d6761006bfc26d04c20e9b94 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Thu, 19 Feb 2026 17:09:57 +0100 Subject: [PATCH 15/36] Fix machine scheduler --- .../filter_weigher_pipeline_controller.go | 152 +++++------------- 1 file changed, 37 insertions(+), 115 deletions(-) diff --git a/internal/scheduling/machines/filter_weigher_pipeline_controller.go b/internal/scheduling/machines/filter_weigher_pipeline_controller.go index 2b0c44f64..b2f1b9492 100644 --- a/internal/scheduling/machines/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/machines/filter_weigher_pipeline_controller.go @@ -19,8 +19,6 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduling/machines/plugins/filters" "github.com/cobaltcore-dev/cortex/internal/scheduling/machines/plugins/weighers" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/util/workqueue" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -54,91 +52,25 @@ func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } -func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - c.processMu.Lock() - defer c.processMu.Unlock() - - // Determine if this is a decision or machine reconciliation. - decision := &v1alpha1.Decision{} - if err := c.Get(ctx, req.NamespacedName, decision); err != nil { - return ctrl.Result{}, client.IgnoreNotFound(err) - } - old := decision.DeepCopy() - if err := c.process(ctx, decision); err != nil { - return ctrl.Result{}, err - } - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, nil -} - func (c *FilterWeigherPipelineController) ProcessNewMachine(ctx context.Context, machine *ironcorev1alpha1.Machine) error { c.processMu.Lock() defer c.processMu.Unlock() - // Create a decision resource to schedule the machine. - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "machine-", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainMachines, - ResourceID: machine.Name, - PipelineRef: corev1.ObjectReference{ - Name: "machines-scheduler", - }, - MachineRef: &corev1.ObjectReference{ - Name: machine.Name, - Namespace: machine.Namespace, - }, - }, - } + log := ctrl.LoggerFrom(ctx) + startedAt := time.Now() - pipelineConf, ok := c.PipelineConfigs[decision.Spec.PipelineRef.Name] + pipelineName := "machines-scheduler" + + pipeline, ok := c.Pipelines[pipelineName] if !ok { - return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) - } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() - err := c.process(ctx, decision) - if err != nil { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "PipelineRunFailed", - Message: "pipeline run failed: " + err.Error(), - }) - } else { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionTrue, - Reason: "PipelineRunSucceeded", - Message: "pipeline run succeeded", - }) - } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err - } + log.Error(nil, "pipeline not found or not ready", "pipelineName", pipelineName) + return errors.New("pipeline not found or not ready") } - return err -} -func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { - log := ctrl.LoggerFrom(ctx) - startedAt := time.Now() // So we can measure sync duration. - - pipeline, ok := c.Pipelines[decision.Spec.PipelineRef.Name] + pipelineConfig, ok := c.PipelineConfigs[pipelineName] if !ok { - log.Error(nil, "pipeline not found or not ready", "pipelineName", decision.Spec.PipelineRef.Name) - return errors.New("pipeline not found or not ready") + log.Error(nil, "pipeline not configured", "pipelineName", pipelineName) + return fmt.Errorf("pipeline %s not configured", pipelineName) } // Find all available machine pools. @@ -157,27 +89,38 @@ func (c *FilterWeigherPipelineController) process(ctx context.Context, decision log.V(1).Error(err, "failed to run scheduler pipeline") return errors.New("failed to run scheduler pipeline") } - decision.Status.Result = &result - log.Info("decision processed successfully", "duration", time.Since(startedAt)) - // Set the machine pool ref on the machine. - machine := &ironcorev1alpha1.Machine{} - if err := c.Get(ctx, client.ObjectKey{ - Name: decision.Spec.MachineRef.Name, - Namespace: decision.Spec.MachineRef.Namespace, - }, machine); err != nil { - log.Error(err, "failed to fetch machine for decision") - return err + log.Info("machine processed successfully", "duration", time.Since(startedAt)) + + hosts := result.OrderedHosts + if len(hosts) == 0 { + log.Info("no suitable machine pools found by pipeline") + return errors.New("no suitable machine pools found") } + + targetHost := hosts[0] + + // Set the machine pool ref on the machine. + // Assign the first machine pool returned by the pipeline. old := machine.DeepCopy() - machine.Spec.MachinePoolRef = &corev1.LocalObjectReference{Name: *result.TargetHost} + machine.Spec.MachinePoolRef = &corev1.LocalObjectReference{Name: targetHost} patch := client.MergeFrom(old) if err := c.Patch(ctx, machine, patch); err != nil { log.V(1).Error(err, "failed to assign machine pool to instance") return err } - log.V(1).Info("assigned machine pool to instance", "machinePool", *result.TargetHost) + log.V(1).Info("assigned machine pool to instance", "machinePool", targetHost) + + if pipelineConfig.Spec.CreateDecisions { + c.DecisionQueue <- lib.DecisionUpdate{ + ResourceID: machine.Name, + PipelineName: pipelineName, + Result: result, + // TODO: Refine the reason + Reason: v1alpha1.SchedulingReasonUnknown, + } + } return nil } @@ -225,7 +168,7 @@ func (c *FilterWeigherPipelineController) handleMachine() handler.EventHandler { return } for _, decision := range decisions.Items { - if decision.Spec.MachineRef.Name == machine.Name && decision.Spec.MachineRef.Namespace == machine.Namespace { + if decision.Spec.ResourceID == machine.Name && decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainMachines { if err := c.Delete(ctx, &decision); err != nil { log.Error(err, "failed to delete decision for deleted machine") } @@ -259,37 +202,16 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, return machine.Spec.Scheduler == "" }), ). - // Watch pipeline changes so that we can reconfigure pipelines as needed. - WatchesMulticluster( + For( &v1alpha1.Pipeline{}, - handler.Funcs{ - CreateFunc: c.HandlePipelineCreated, - UpdateFunc: c.HandlePipelineUpdated, - DeleteFunc: c.HandlePipelineDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { + builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { pipeline := obj.(*v1alpha1.Pipeline) - // Only react to pipelines matching the scheduling domain. if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainMachines { return false } return pipeline.Spec.Type == c.PipelineType() - }), - ). - Named("cortex-machine-scheduler"). - For( - &v1alpha1.Decision{}, - builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - decision := obj.(*v1alpha1.Decision) - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainMachines { - return false - } - // Ignore already decided schedulings. - if decision.Status.Result != nil { - return false - } - return true })), ). + Named("cortex-machine-scheduler"). Complete(c) } From c8e46dda04784532dca94f082f8dd9ed1b31dfb4 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Thu, 19 Feb 2026 17:18:29 +0100 Subject: [PATCH 16/36] Fix pod scheduler tests --- ...filter_weigher_pipeline_controller_test.go | 190 +----------------- 1 file changed, 10 insertions(+), 180 deletions(-) diff --git a/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go b/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go index 2dd48d339..d648b3857 100644 --- a/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go @@ -15,166 +15,9 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheduling scheme: %v", err) - } - if err := corev1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add corev1 scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - nodes []corev1.Node - pod *corev1.Pod - expectError bool - expectDecision bool - expectTargetHost string - }{ - { - name: "successful pod decision processing", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainPods, - ResourceID: "test-pod", - PipelineRef: corev1.ObjectReference{ - Name: "pods-scheduler", - }, - PodRef: &corev1.ObjectReference{ - Name: "test-pod", - Namespace: "default", - }, - }, - }, - nodes: []corev1.Node{ - { - ObjectMeta: metav1.ObjectMeta{Name: "node1"}, - }, - { - ObjectMeta: metav1.ObjectMeta{Name: "node2"}, - }, - }, - pod: &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pod", - Namespace: "default", - }, - Spec: corev1.PodSpec{ - SchedulerName: "", - }, - }, - expectError: false, - expectDecision: true, - expectTargetHost: "node1", // NoopFilter returns first node - }, - { - name: "no nodes available", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-nodes", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainPods, - ResourceID: "test-pod", - PipelineRef: corev1.ObjectReference{ - Name: "pods-scheduler", - }, - PodRef: &corev1.ObjectReference{ - Name: "test-pod", - Namespace: "default", - }, - }, - }, - nodes: []corev1.Node{}, - expectError: true, - expectDecision: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []runtime.Object{tt.decision} - for i := range tt.nodes { - objects = append(objects, &tt.nodes[i]) - } - if tt.pod != nil { - objects = append(objects, tt.pod) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &FilterWeigherPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[pods.PodPipelineRequest]]{ - Pipelines: map[string]lib.FilterWeigherPipeline[pods.PodPipelineRequest]{ - "pods-scheduler": createMockPodPipeline(), - }, - }, - Monitor: lib.FilterWeigherPipelineMonitor{}, - } - controller.Client = client - - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: tt.decision.Name, - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if tt.expectError && err == nil { - t.Error("expected error but got none") - return - } - - if !tt.expectError && err != nil { - t.Errorf("expected no error, got: %v", err) - return - } - - if result.RequeueAfter > 0 { - t.Errorf("unexpected requeue: %v", result.RequeueAfter) - } - - // Verify decision status if expected - if tt.expectDecision { - var updatedDecision v1alpha1.Decision - err := client.Get(context.Background(), req.NamespacedName, &updatedDecision) - if err != nil { - t.Errorf("Failed to get updated decision: %v", err) - return - } - - if updatedDecision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - - if updatedDecision.Status.Result.TargetHost == nil { - t.Error("expected target host to be set") - return - } - - if *updatedDecision.Status.Result.TargetHost != tt.expectTargetHost { - t.Errorf("expected target host %q, got %q", tt.expectTargetHost, *updatedDecision.Status.Result.TargetHost) - } - } - }) - } -} - func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { controller := &FilterWeigherPipelineController{ Monitor: lib.FilterWeigherPipelineMonitor{}, @@ -437,9 +280,8 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { found := false for _, decision := range decisions.Items { - if decision.Spec.PodRef != nil && - decision.Spec.PodRef.Name == tt.pod.Name && - decision.Spec.PodRef.Namespace == tt.pod.Namespace { + if decision.Spec.ResourceID == tt.pod.Name && + decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainPods { found = true // Verify decision properties @@ -449,22 +291,11 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { if decision.Spec.ResourceID != tt.pod.Name { t.Errorf("expected resource ID %q, got %q", tt.pod.Name, decision.Spec.ResourceID) } - if decision.Spec.PipelineRef.Name != "pods-scheduler" { - t.Errorf("expected pipeline ref %q, got %q", "pods-scheduler", decision.Spec.PipelineRef.Name) - } // Check if result was set (only for successful cases) if !tt.expectError && tt.expectTargetHost != "" { - if decision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - if decision.Status.Result.TargetHost == nil { - t.Error("expected target host to be set") - return - } - if *decision.Status.Result.TargetHost != tt.expectTargetHost { - t.Errorf("expected target host %q, got %q", tt.expectTargetHost, *decision.Status.Result.TargetHost) + if decision.Status.TargetHost != tt.expectTargetHost { + t.Errorf("expected target host %q, got %q", tt.expectTargetHost, decision.Status.TargetHost) } } break @@ -484,9 +315,8 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { } for _, decision := range decisions.Items { - if decision.Spec.PodRef != nil && - decision.Spec.PodRef.Name == tt.pod.Name && - decision.Spec.PodRef.Namespace == tt.pod.Namespace { + if decision.Spec.ResourceID == tt.pod.Name && + decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainPods { t.Error("expected no decision to be created but found one") break } @@ -523,14 +353,14 @@ func createMockPodPipeline() lib.FilterWeigherPipeline[pods.PodPipelineRequest] type mockPodPipeline struct{} -func (m *mockPodPipeline) Run(request pods.PodPipelineRequest) (v1alpha1.DecisionResult, error) { +func (m *mockPodPipeline) Run(request pods.PodPipelineRequest) (lib.FilterWeigherPipelineResult, error) { if len(request.Nodes) == 0 { - return v1alpha1.DecisionResult{}, nil + return lib.FilterWeigherPipelineResult{OrderedHosts: []string{}}, nil } // Return the first node as the target host targetHost := request.Nodes[0].Name - return v1alpha1.DecisionResult{ - TargetHost: &targetHost, + return lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{targetHost}, }, nil } From 8c9b59686e667a0523e0592d58def835c91bb5bf Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Thu, 19 Feb 2026 21:08:02 +0100 Subject: [PATCH 17/36] Fix type of ordered host --- api/v1alpha1/decision_types.go | 2 +- api/v1alpha1/zz_generated.deepcopy.go | 5 +++++ helm/library/cortex/files/crds/cortex.cloud_decisions.yaml | 4 +++- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/api/v1alpha1/decision_types.go b/api/v1alpha1/decision_types.go index 338334ab8..ec37126bd 100644 --- a/api/v1alpha1/decision_types.go +++ b/api/v1alpha1/decision_types.go @@ -29,7 +29,7 @@ const ( // SchedulingHistoryEntry represents a single entry in the scheduling history of a resource. type SchedulingHistoryEntry struct { // The hosts that were selected in this scheduling event, in order of preference. - OrderedHosts string `json:"orderedHosts"` + OrderedHosts []string `json:"orderedHosts"` // Timestamp of when the scheduling event occurred. Timestamp metav1.Time `json:"timestamp"` // A reference to the pipeline that was used for this decision. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 68256dfc3..92a6406c3 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -1076,6 +1076,11 @@ func (in *ReservationStatus) DeepCopy() *ReservationStatus { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *SchedulingHistoryEntry) DeepCopyInto(out *SchedulingHistoryEntry) { *out = *in + if in.OrderedHosts != nil { + in, out := &in.OrderedHosts, &out.OrderedHosts + *out = make([]string, len(*in)) + copy(*out, *in) + } in.Timestamp.DeepCopyInto(&out.Timestamp) out.PipelineRef = in.PipelineRef } diff --git a/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml b/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml index 4d9cd701a..906d36617 100644 --- a/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml +++ b/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml @@ -139,7 +139,9 @@ spec: orderedHosts: description: The hosts that were selected in this scheduling event, in order of preference. - type: string + items: + type: string + type: array pipelineRef: description: |- A reference to the pipeline that was used for this decision. From 4ff6bfc5457ef7332c8eee30ebfbdc05dc6660df Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 20 Feb 2026 08:41:03 +0100 Subject: [PATCH 18/36] Added back ignore preselection option --- .../nova/filter_weigher_pipeline_controller.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller.go b/internal/scheduling/nova/filter_weigher_pipeline_controller.go index 68b309242..5020f3320 100644 --- a/internal/scheduling/nova/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller.go @@ -70,6 +70,16 @@ func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, re log.Error(nil, "pipeline config not found", "pipelineName", pipelineName) return nil, fmt.Errorf("pipeline config for %s not found", pipelineName) } + // If necessary gather all placement candidates before filtering. + // This will override the hosts and weights in the nova request. + if pipelineConfig.Spec.IgnorePreselection { + log.Info("gathering all placement candidates before filtering") + if err := c.gatherer.MutateWithAllCandidates(ctx, &request); err != nil { + log.Error(err, "failed to gather all placement candidates") + return nil, err + } + log.Info("gathered all placement candidates", "numHosts", len(request.Hosts)) + } result, err := pipeline.Run(request) if err != nil { From 68d097cc92a9225579abee3f034d2066cd5a75e2 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 20 Feb 2026 08:41:39 +0100 Subject: [PATCH 19/36] Fix manila and cinder tests for filter weigher controller --- ...filter_weigher_pipeline_controller_test.go | 224 +++------- ...filter_weigher_pipeline_controller_test.go | 412 +++--------------- 2 files changed, 124 insertions(+), 512 deletions(-) diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go index f99d244b2..3a16f2996 100644 --- a/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go @@ -5,10 +5,8 @@ package cinder import ( "context" - "encoding/json" "testing" - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -20,61 +18,39 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { +func TestFilterWeigherPipelineController_ProcessRequest(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) } - cinderRequest := api.ExternalSchedulerRequest{ - Spec: map[string]any{ - "volume_id": "test-volume-id", - "size": 10, - }, - Context: api.CinderRequestContext{ - ProjectID: "test-project", - UserID: "test-user", - RequestID: "req-123", - GlobalRequestID: "global-req-123", - }, - Hosts: []api.ExternalSchedulerHost{ - {VolumeHost: "cinder-volume-1"}, - {VolumeHost: "cinder-volume-2"}, - }, - Weights: map[string]float64{"cinder-volume-1": 1.0, "cinder-volume-2": 0.5}, - Pipeline: "test-pipeline", - } - - cinderRaw, err := json.Marshal(cinderRequest) - if err != nil { - t.Fatalf("Failed to marshal cinder request: %v", err) - } - tests := []struct { - name string - decision *v1alpha1.Decision - pipelineConfig *v1alpha1.Pipeline - createDecisions bool - expectError bool - expectDecisionCreated bool - expectResult bool + name string + request api.ExternalSchedulerRequest + pipelineConfig *v1alpha1.Pipeline + expectError bool + expectResult bool }{ { - name: "successful decision processing with creation", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "test-decision-", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - CinderRaw: &runtime.RawExtension{ - Raw: cinderRaw, - }, - }, + name: "successful request processing", + request: api.ExternalSchedulerRequest{ + Spec: map[string]any{ + "volume_id": "test-volume-id", + "size": 10, + }, + Context: api.CinderRequestContext{ + ProjectID: "test-project", + UserID: "test-user", + RequestID: "req-123", + GlobalRequestID: "global-req-123", + ResourceUUID: "test-volume-id", + }, + Hosts: []api.ExternalSchedulerHost{ + {VolumeHost: "cinder-volume-1"}, + {VolumeHost: "cinder-volume-2"}, + }, + Weights: map[string]float64{"cinder-volume-1": 1.0, "cinder-volume-2": 0.5}, + Pipeline: "test-pipeline", }, pipelineConfig: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ @@ -88,81 +64,38 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: false, - expectDecisionCreated: true, - expectResult: true, - }, - { - name: "successful decision processing without creation", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-create", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - CinderRaw: &runtime.RawExtension{ - Raw: cinderRaw, - }, - }, - }, - pipelineConfig: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - CreateDecisions: false, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - createDecisions: false, - expectError: false, - expectDecisionCreated: false, - expectResult: true, + expectError: false, + expectResult: true, }, { name: "pipeline not configured", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-pipeline", - Namespace: "default", + request: api.ExternalSchedulerRequest{ + Spec: map[string]any{ + "volume_id": "test-volume-id", }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "nonexistent-pipeline", - }, - CinderRaw: &runtime.RawExtension{ - Raw: cinderRaw, - }, + Context: api.CinderRequestContext{ + ResourceUUID: "test-volume-id", }, + Hosts: []api.ExternalSchedulerHost{{VolumeHost: "cinder-volume-1"}}, + Weights: map[string]float64{"cinder-volume-1": 1.0}, + Pipeline: "nonexistent-pipeline", }, - pipelineConfig: nil, - expectError: true, - expectDecisionCreated: false, - expectResult: false, + pipelineConfig: nil, + expectError: true, + expectResult: false, }, { - name: "decision without cinderRaw spec", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-raw", - Namespace: "default", + name: "empty hosts", + request: api.ExternalSchedulerRequest{ + Spec: map[string]any{ + "volume_id": "test-volume-id", }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - CinderRaw: nil, + Context: api.CinderRequestContext{ + ResourceUUID: "test-volume-id", }, + Hosts: []api.ExternalSchedulerHost{}, + Weights: map[string]float64{}, + Pipeline: "test-pipeline", }, pipelineConfig: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ @@ -171,15 +104,13 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, - CreateDecisions: true, + CreateDecisions: false, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: true, - expectDecisionCreated: false, - expectResult: false, + expectError: false, + expectResult: true, }, } @@ -190,31 +121,31 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) objects = append(objects, tt.pipelineConfig) } - client := fake.NewClientBuilder(). + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). Build() controller := &FilterWeigherPipelineController{ BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ - Client: client, + Client: fakeClient, Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), PipelineConfigs: make(map[string]v1alpha1.Pipeline), + DecisionQueue: make(chan lib.DecisionUpdate, 10), }, Monitor: lib.FilterWeigherPipelineMonitor{}, } if tt.pipelineConfig != nil { controller.PipelineConfigs[tt.pipelineConfig.Name] = *tt.pipelineConfig - initResult := controller.InitPipeline(t.Context(), *tt.pipelineConfig) + initResult := controller.InitPipeline(context.Background(), *tt.pipelineConfig) if len(initResult.FilterErrors) > 0 || len(initResult.WeigherErrors) > 0 { t.Fatalf("Failed to init pipeline: %v", initResult) } controller.Pipelines[tt.pipelineConfig.Name] = initResult.Pipeline } - err := controller.ProcessNewDecisionFromAPI(context.Background(), tt.decision) + result, err := controller.ProcessRequest(context.Background(), tt.request) if tt.expectError && err == nil { t.Error("Expected error but got none") @@ -223,44 +154,15 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) t.Errorf("Expected no error but got: %v", err) } - // Check if decision was created (if expected) - if tt.expectDecisionCreated { - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - found := false - for _, decision := range decisions.Items { - if decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainCinder { - found = true - - // Verify decision properties - if decision.Spec.PipelineRef.Name != "test-pipeline" { - t.Errorf("expected pipeline ref %q, got %q", "test-pipeline", decision.Spec.PipelineRef.Name) - } - - // Check if result was set - if tt.expectResult { - if decision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - } - break + if tt.expectResult { + if result == nil { + t.Error("Expected result but got nil") + } else { + // Verify result has ordered hosts + if len(result.OrderedHosts) == 0 && len(tt.request.Hosts) > 0 { + t.Error("Expected ordered hosts in result") } } - - if !found { - t.Error("expected decision to be created but was not found") - } - } else if !tt.expectError { - // For cases without creation, check that the decision has the right status - if tt.expectResult && tt.decision.Status.Result == nil { - t.Error("expected decision result to be set in original decision object") - } } }) } @@ -309,7 +211,7 @@ func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + initResult := controller.InitPipeline(context.Background(), v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pipeline", }, diff --git a/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go b/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go index 0fe38eaa6..3114ca637 100644 --- a/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go @@ -5,13 +5,9 @@ package manila import ( "context" - "encoding/json" "testing" - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -24,250 +20,38 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { +func TestFilterWeigherPipelineController_ProcessRequest(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) } - manilaRequest := api.ExternalSchedulerRequest{ - Spec: map[string]any{ - "share_id": "test-share-id", - "size": 10, - }, - Context: api.ManilaRequestContext{ - ProjectID: "test-project", - UserID: "test-user", - RequestID: "req-123", - GlobalRequestID: "global-req-123", - }, - Hosts: []api.ExternalSchedulerHost{ - {ShareHost: "manila-share-1@backend1"}, - {ShareHost: "manila-share-2@backend2"}, - }, - Weights: map[string]float64{"manila-share-1@backend1": 1.0, "manila-share-2@backend2": 0.5}, - Pipeline: "test-pipeline", - } - - manilaRaw, err := json.Marshal(manilaRequest) - if err != nil { - t.Fatalf("Failed to marshal manila request: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - pipeline *v1alpha1.Pipeline - expectError bool - expectResult bool - }{ - { - name: "successful manila decision processing", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - ManilaRaw: &runtime.RawExtension{ - Raw: manilaRaw, - }, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainManila, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: false, - expectResult: true, - }, - { - name: "decision without manilaRaw spec", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-raw", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - ManilaRaw: nil, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainManila, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: true, - expectResult: false, - }, - { - name: "pipeline not found", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-pipeline", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "nonexistent-pipeline", - }, - ManilaRaw: &runtime.RawExtension{ - Raw: manilaRaw, - }, - }, - }, - pipeline: nil, - expectError: true, - expectResult: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []client.Object{tt.decision} - if tt.pipeline != nil { - objects = append(objects, tt.pipeline) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &FilterWeigherPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ - Client: client, - Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), - }, - Monitor: lib.FilterWeigherPipelineMonitor{}, - } - - if tt.pipeline != nil { - initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: tt.pipeline.Name, - }, - Spec: tt.pipeline.Spec, - }) - if err != nil { - t.Fatalf("Failed to init pipeline: %v", err) - } - controller.Pipelines[tt.pipeline.Name] = initResult.Pipeline - } - - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: tt.decision.Name, - Namespace: tt.decision.Namespace, - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if tt.expectError && err == nil { - t.Error("Expected error but got none") - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - if result.RequeueAfter > 0 { - t.Error("Expected no requeue") - } - - var updatedDecision v1alpha1.Decision - if err := client.Get(context.Background(), req.NamespacedName, &updatedDecision); err != nil { - t.Fatalf("Failed to get updated decision: %v", err) - } - - if tt.expectResult && updatedDecision.Status.Result == nil { - t.Error("Expected result to be set but was nil") - } - if !tt.expectResult && updatedDecision.Status.Result != nil { - t.Error("Expected result to be nil but was set") - } - }) - } -} - -func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - manilaRequest := api.ExternalSchedulerRequest{ - Spec: map[string]any{ - "share_id": "test-share-id", - "size": 10, - }, - Context: api.ManilaRequestContext{ - ProjectID: "test-project", - UserID: "test-user", - RequestID: "req-123", - GlobalRequestID: "global-req-123", - }, - Hosts: []api.ExternalSchedulerHost{ - {ShareHost: "manila-share-1@backend1"}, - {ShareHost: "manila-share-2@backend2"}, - }, - Weights: map[string]float64{"manila-share-1@backend1": 1.0, "manila-share-2@backend2": 0.5}, - Pipeline: "test-pipeline", - } - - manilaRaw, err := json.Marshal(manilaRequest) - if err != nil { - t.Fatalf("Failed to marshal manila request: %v", err) - } - tests := []struct { - name string - decision *v1alpha1.Decision - pipelineConfig *v1alpha1.Pipeline - createDecisions bool - expectError bool - expectDecisionCreated bool - expectResult bool + name string + request api.ExternalSchedulerRequest + pipelineConfig *v1alpha1.Pipeline + expectError bool + expectResult bool }{ { - name: "successful decision processing with creation", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "test-decision-", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - ManilaRaw: &runtime.RawExtension{ - Raw: manilaRaw, - }, - }, + name: "successful request processing", + request: api.ExternalSchedulerRequest{ + Spec: map[string]any{ + "share_id": "test-share-id", + "size": 10, + }, + Context: api.ManilaRequestContext{ + ProjectID: "test-project", + UserID: "test-user", + RequestID: "req-123", + GlobalRequestID: "global-req-123", + }, + Hosts: []api.ExternalSchedulerHost{ + {ShareHost: "manila-share-1@backend1"}, + {ShareHost: "manila-share-2@backend2"}, + }, + Weights: map[string]float64{"manila-share-1@backend1": 1.0, "manila-share-2@backend2": 0.5}, + Pipeline: "test-pipeline", }, pipelineConfig: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ @@ -281,81 +65,38 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: false, - expectDecisionCreated: true, - expectResult: true, - }, - { - name: "successful decision processing without creation", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-create", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - ManilaRaw: &runtime.RawExtension{ - Raw: manilaRaw, - }, - }, - }, - pipelineConfig: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainManila, - CreateDecisions: false, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - createDecisions: false, - expectError: false, - expectDecisionCreated: false, - expectResult: true, + expectError: false, + expectResult: true, }, { name: "pipeline not configured", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-pipeline", - Namespace: "default", + request: api.ExternalSchedulerRequest{ + Spec: map[string]any{ + "share_id": "test-share-id", }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "nonexistent-pipeline", - }, - ManilaRaw: &runtime.RawExtension{ - Raw: manilaRaw, - }, + Context: api.ManilaRequestContext{ + RequestID: "req-123", }, + Hosts: []api.ExternalSchedulerHost{{ShareHost: "manila-share-1@backend1"}}, + Weights: map[string]float64{"manila-share-1@backend1": 1.0}, + Pipeline: "nonexistent-pipeline", }, - pipelineConfig: nil, - expectError: true, - expectDecisionCreated: false, - expectResult: false, + pipelineConfig: nil, + expectError: true, + expectResult: false, }, { - name: "decision without manilaRaw spec", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-raw", - Namespace: "default", + name: "empty hosts", + request: api.ExternalSchedulerRequest{ + Spec: map[string]any{ + "share_id": "test-share-id", }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - ManilaRaw: nil, + Context: api.ManilaRequestContext{ + RequestID: "req-123", }, + Hosts: []api.ExternalSchedulerHost{}, + Weights: map[string]float64{}, + Pipeline: "test-pipeline", }, pipelineConfig: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ @@ -364,15 +105,13 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, - CreateDecisions: true, + CreateDecisions: false, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: true, - expectDecisionCreated: false, - expectResult: false, + expectError: false, + expectResult: true, }, } @@ -383,31 +122,31 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) objects = append(objects, tt.pipelineConfig) } - client := fake.NewClientBuilder(). + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). Build() controller := &FilterWeigherPipelineController{ BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ - Client: client, + Client: fakeClient, Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), PipelineConfigs: make(map[string]v1alpha1.Pipeline), + DecisionQueue: make(chan lib.DecisionUpdate, 10), }, Monitor: lib.FilterWeigherPipelineMonitor{}, } if tt.pipelineConfig != nil { controller.PipelineConfigs[tt.pipelineConfig.Name] = *tt.pipelineConfig - initResult := controller.InitPipeline(t.Context(), *tt.pipelineConfig) + initResult := controller.InitPipeline(context.Background(), *tt.pipelineConfig) if len(initResult.FilterErrors) > 0 || len(initResult.WeigherErrors) > 0 { t.Fatalf("Failed to init pipeline: %v", initResult) } controller.Pipelines[tt.pipelineConfig.Name] = initResult.Pipeline } - err := controller.ProcessNewDecisionFromAPI(context.Background(), tt.decision) + result, err := controller.ProcessRequest(context.Background(), tt.request) if tt.expectError && err == nil { t.Error("Expected error but got none") @@ -416,44 +155,15 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) t.Errorf("Expected no error but got: %v", err) } - // Check if decision was created (if expected) - if tt.expectDecisionCreated { - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - found := false - for _, decision := range decisions.Items { - if decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainManila { - found = true - - // Verify decision properties - if decision.Spec.PipelineRef.Name != "test-pipeline" { - t.Errorf("expected pipeline ref %q, got %q", "test-pipeline", decision.Spec.PipelineRef.Name) - } - - // Check if result was set - if tt.expectResult { - if decision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - } - break + if tt.expectResult { + if result == nil { + t.Error("Expected result but got nil") + } else { + // Verify result has ordered hosts + if len(result.OrderedHosts) == 0 && len(tt.request.Hosts) > 0 { + t.Error("Expected ordered hosts in result") } } - - if !found { - t.Error("expected decision to be created but was not found") - } - } else if !tt.expectError { - // For cases without creation, check that the decision has the right status - if tt.expectResult && tt.decision.Status.Result == nil { - t.Error("expected decision result to be set in original decision object") - } } }) } From 50012fd42198f27ff2d7175554e7430be76e3e9b Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 20 Feb 2026 08:47:53 +0100 Subject: [PATCH 20/36] Fix nova controller tests --- ...filter_weigher_pipeline_controller_test.go | 418 +----------------- 1 file changed, 19 insertions(+), 399 deletions(-) diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go b/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go index b28392589..4e04841cd 100644 --- a/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go @@ -5,16 +5,12 @@ package nova import ( "context" - "encoding/json" "errors" "strings" "testing" - corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -46,238 +42,6 @@ func (m *mockCandidateGatherer) MutateWithAllCandidates(ctx context.Context, req return nil } -func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - novaRequest := api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Name: "RequestSpec", - Namespace: "nova_object", - Version: "1.19", - Data: api.NovaSpec{ - ProjectID: "test-project", - UserID: "test-user", - InstanceUUID: "test-instance-uuid", - NumInstances: 1, - }, - }, - Context: api.NovaRequestContext{ - ProjectID: "test-project", - UserID: "test-user", - RequestID: "req-123", - GlobalRequestID: func() *string { s := "global-req-123"; return &s }(), - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "compute-1", HypervisorHostname: "hv-1"}, - {ComputeHost: "compute-2", HypervisorHostname: "hv-2"}, - }, - Weights: map[string]float64{"compute-1": 1.0, "compute-2": 0.5}, - Pipeline: "test-pipeline", - } - - novaRaw, err := json.Marshal(novaRequest) - if err != nil { - t.Fatalf("Failed to marshal nova request: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - pipeline *v1alpha1.Pipeline - expectError bool - expectResult bool - }{ - { - name: "successful nova decision processing", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: false, - expectResult: true, - }, - { - name: "decision without novaRaw spec", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-raw", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: nil, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: true, - expectResult: false, - }, - { - name: "pipeline not found", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-pipeline", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "nonexistent-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, - pipeline: nil, - expectError: true, - expectResult: false, - }, - { - name: "invalid novaRaw JSON", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-invalid-json", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: []byte("invalid json"), - }, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: true, - expectResult: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []client.Object{tt.decision} - if tt.pipeline != nil { - objects = append(objects, tt.pipeline) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &FilterWeigherPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ - Client: client, - Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), - PipelineConfigs: make(map[string]v1alpha1.Pipeline), - }, - Monitor: lib.FilterWeigherPipelineMonitor{}, - } - - if tt.pipeline != nil { - initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: tt.pipeline.Name, - }, - Spec: tt.pipeline.Spec, - }) - if len(initResult.FilterErrors) > 0 || len(initResult.WeigherErrors) > 0 { - t.Fatalf("Failed to initialize pipeline: filter errors: %v, weigher errors: %v", initResult.FilterErrors, initResult.WeigherErrors) - } - controller.Pipelines[tt.pipeline.Name] = initResult.Pipeline - controller.PipelineConfigs[tt.pipeline.Name] = *tt.pipeline - } - - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: tt.decision.Name, - Namespace: tt.decision.Namespace, - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if tt.expectError && err == nil { - t.Error("Expected error but got none") - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - if result.RequeueAfter > 0 { - t.Error("Expected no requeue") - } - - var updatedDecision v1alpha1.Decision - if err := client.Get(context.Background(), req.NamespacedName, &updatedDecision); err != nil { - if !tt.expectError { - t.Fatalf("Failed to get updated decision: %v", err) - } - return - } - - if tt.expectResult && updatedDecision.Status.Result == nil { - t.Error("Expected result to be set but was nil") - } - if !tt.expectResult && updatedDecision.Status.Result != nil { - t.Error("Expected result to be nil but was set") - } - }) - } -} - func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { controller := &FilterWeigherPipelineController{ Monitor: lib.FilterWeigherPipelineMonitor{}, @@ -373,7 +137,7 @@ func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { } } -func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { +func TestFilterWeigherPipelineController_ProcessRequest(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) @@ -405,14 +169,9 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Pipeline: "test-pipeline", } - novaRaw, err := json.Marshal(novaRequest) - if err != nil { - t.Fatalf("Failed to marshal nova request: %v", err) - } - tests := []struct { name string - decision *v1alpha1.Decision + request api.ExternalSchedulerRequest pipeline *v1alpha1.Pipeline pipelineConf *v1alpha1.Pipeline setupPipelineConfigs bool @@ -424,22 +183,8 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) errorContains string }{ { - name: "successful processing with decision creation enabled", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-api", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, + name: "successful processing with decision creation enabled", + request: novaRequest, pipeline: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pipeline", @@ -472,22 +217,8 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) expectUpdatedStatus: true, }, { - name: "successful processing with decision creation disabled", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-create", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline-no-create", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, + name: "successful processing with decision creation disabled", + request: novaRequest, pipeline: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pipeline-no-create", @@ -520,22 +251,8 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) expectUpdatedStatus: false, }, { - name: "pipeline not configured", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-config", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "nonexistent-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, + name: "pipeline not configured", + request: novaRequest, pipeline: nil, pipelineConf: nil, setupPipelineConfigs: false, @@ -546,69 +263,8 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) errorContains: "pipeline nonexistent-pipeline not configured", }, { - name: "decision without novaRaw spec", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-raw-api", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: nil, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: true, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - pipelineConf: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: true, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - setupPipelineConfigs: true, - createDecisions: true, - expectError: true, - expectResult: false, - expectCreatedDecision: true, - expectUpdatedStatus: false, - errorContains: "no novaRaw spec defined", - }, - { - name: "processing fails after decision creation", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-process-fail", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, + name: "processing fails after decision creation", + request: novaRequest, pipeline: nil, // This will cause processing to fail after creation pipelineConf: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ @@ -631,22 +287,8 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) errorContains: "pipeline not found or not ready", }, { - name: "pipeline not found in runtime map", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-runtime-pipeline", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "missing-runtime-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, + name: "pipeline not found in runtime map", + request: novaRequest, pipeline: nil, pipelineConf: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ @@ -712,7 +354,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) } // Call the method under test - err := controller.ProcessNewDecisionFromAPI(context.Background(), tt.decision) + result, err := controller.ProcessRequest(context.Background(), tt.request) // Validate error expectations if tt.expectError && err == nil { @@ -726,7 +368,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) } // Check if decision was created in the cluster when expected - if tt.expectCreatedDecision { + /* TODO CHECK IF DECISION WAS CREATED if tt.expectCreatedDecision { var createdDecision v1alpha1.Decision key := types.NamespacedName{Name: tt.decision.Name, Namespace: tt.decision.Namespace} err := client.Get(context.Background(), key, &createdDecision) @@ -740,13 +382,13 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) if err == nil { t.Error("Expected decision not to be created but it was found") } - } + }*/ // Validate result and duration expectations - if tt.expectResult && tt.decision.Status.Result == nil { + if tt.expectResult && result == nil { t.Error("Expected result to be set but was nil") } - if !tt.expectResult && tt.decision.Status.Result != nil { + if !tt.expectResult && result != nil { t.Error("Expected result to be nil but was set") } }) @@ -786,11 +428,6 @@ func TestFilterWeigherPipelineController_IgnorePreselection(t *testing.T) { Pipeline: "test-pipeline", } - novaRaw, err := json.Marshal(novaRequest) - if err != nil { - t.Fatalf("Failed to marshal nova request: %v", err) - } - tests := []struct { name string ignorePreselection bool @@ -884,25 +521,8 @@ func TestFilterWeigherPipelineController_IgnorePreselection(t *testing.T) { } controller.Pipelines["test-pipeline"] = initResult.Pipeline - // Create decision - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-preselection", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - } - // Process the decision - err := controller.ProcessNewDecisionFromAPI(context.Background(), decision) + result, err := controller.ProcessRequest(context.Background(), novaRequest) // Verify gatherer was called (or not) as expected if tt.expectGathererCall && !mockGatherer.called { @@ -924,7 +544,7 @@ func TestFilterWeigherPipelineController_IgnorePreselection(t *testing.T) { } // Verify result is set when no error - if !tt.expectError && decision.Status.Result == nil { + if !tt.expectError && result == nil { t.Error("Expected result to be set but was nil") } }) From 0dbffc4d5eebce374ac011d726ae71706dd5270c Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 20 Feb 2026 10:06:44 +0100 Subject: [PATCH 21/36] Add cedision creation and event publishing, disabled explainer for now --- api/v1alpha1/decision_types.go | 4 +- .../filter_weigher_pipeline_controller.go | 1 + internal/scheduling/lib/explainer.go | 19 ++-- .../scheduling/lib/explainer_templates.go | 3 +- internal/scheduling/lib/explainer_test.go | 4 +- internal/scheduling/lib/explainer_types.go | 5 +- .../scheduling/lib/pipeline_controller.go | 98 +++++++++++++++++-- .../filter_weigher_pipeline_controller.go | 1 + .../filter_weigher_pipeline_controller.go | 1 + .../filter_weigher_pipeline_controller.go | 1 + .../filter_weigher_pipeline_controller.go | 1 + 11 files changed, 111 insertions(+), 27 deletions(-) diff --git a/api/v1alpha1/decision_types.go b/api/v1alpha1/decision_types.go index ec37126bd..1fdb62e77 100644 --- a/api/v1alpha1/decision_types.go +++ b/api/v1alpha1/decision_types.go @@ -53,6 +53,8 @@ type DecisionSpec struct { const ( // The decision is ready and tracking the resource. DecisionConditionReady = "Ready" + // The decision has failed to make a placement decision for the resource. + DecisionConditionFailed = "Failed" ) type DecisionStatus struct { @@ -64,7 +66,7 @@ type DecisionStatus struct { // +kubebuilder:validation:Optional SchedulingHistory []SchedulingHistoryEntry `json:"schedulingHistory,omitempty"` - // A human-readable explanation of the current scheduling state. + // A human-readable explanation of the current scheduling decision. // +kubebuilder:validation:Optional Explanation string `json:"explanation,omitempty"` diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go index 9c0dff770..481fd6c3c 100644 --- a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go @@ -104,6 +104,7 @@ func (c *FilterWeigherPipelineController) InitPipeline( func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainCinder + c.Recorder = mgr.GetEventRecorder("cortex-cinder-pipeline-controller") if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err } diff --git a/internal/scheduling/lib/explainer.go b/internal/scheduling/lib/explainer.go index b32771719..02a4759ff 100644 --- a/internal/scheduling/lib/explainer.go +++ b/internal/scheduling/lib/explainer.go @@ -5,14 +5,8 @@ package lib import ( "context" - "fmt" - "sort" - "time" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "k8s.io/apimachinery/pkg/api/errors" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/log" ) // The explainer gets a scheduling decision and produces a human-readable @@ -21,12 +15,12 @@ type Explainer struct { // The kubernetes client to use for fetching related data. client.Client // The template manager to use for rendering explanations. - templateManager *TemplateManager + //templateManager *TemplateManager } // NewExplainer creates a new explainer with template support. func NewExplainer(client client.Client) (*Explainer, error) { - templateManager, err := NewTemplateManager() + /*templateManager, err := NewTemplateManager() if err != nil { return nil, fmt.Errorf("failed to create template manager: %w", err) } @@ -34,15 +28,18 @@ func NewExplainer(client client.Client) (*Explainer, error) { return &Explainer{ Client: client, templateManager: templateManager, + }, nil*/ + return &Explainer{ + Client: client, }, nil } // Explain the given decision and return a human-readable explanation. func (e *Explainer) Explain(ctx context.Context, decision DecisionUpdate) (string, error) { - return e.ExplainWithTemplates(ctx, decision) + return "Explanation generation not implemented yet", nil } -// getResourceType returns a human-readable resource type. +/*// getResourceType returns a human-readable resource type. func (e *Explainer) getResourceType(schedulingDomain v1alpha1.SchedulingDomain) string { switch schedulingDomain { case v1alpha1.SchedulingDomainNova: @@ -728,4 +725,4 @@ func (e *Explainer) ExplainWithTemplates(ctx context.Context, decision DecisionU // Render using templates return e.templateManager.RenderExplanation(explanationCtx) -} +}*/ diff --git a/internal/scheduling/lib/explainer_templates.go b/internal/scheduling/lib/explainer_templates.go index 4476b8b1a..aa67dd5d7 100644 --- a/internal/scheduling/lib/explainer_templates.go +++ b/internal/scheduling/lib/explainer_templates.go @@ -3,7 +3,7 @@ package lib -import ( +/*import ( "bytes" "fmt" "strings" @@ -139,3 +139,4 @@ const impactsTemplate = ` Step impacts: const chainTemplate = `{{if .HasLoop}}Chain (loop detected): {{else}}Chain: {{end}} {{- range $i, $segment := .Segments}}{{if gt $i 0}} -> {{end}}{{$segment.Host}} ({{formatDuration $segment.Duration}}{{if gt $segment.Decisions 1}}; {{$segment.Decisions}} decisions{{end}}){{end}}.` +*/ diff --git a/internal/scheduling/lib/explainer_test.go b/internal/scheduling/lib/explainer_test.go index 0afc8bd9f..ac21290f2 100644 --- a/internal/scheduling/lib/explainer_test.go +++ b/internal/scheduling/lib/explainer_test.go @@ -3,7 +3,7 @@ package lib -import ( +/*import ( "context" "sort" "testing" @@ -1473,4 +1473,4 @@ func TestExplainer_StepImpactAnalysis(t *testing.T) { } }) } -} +}*/ diff --git a/internal/scheduling/lib/explainer_types.go b/internal/scheduling/lib/explainer_types.go index 2d3c71be7..bd760ffc6 100644 --- a/internal/scheduling/lib/explainer_types.go +++ b/internal/scheduling/lib/explainer_types.go @@ -3,9 +3,7 @@ package lib -import "time" - -// ExplanationContext holds all data needed to render a complete explanation. +/*// ExplanationContext holds all data needed to render a complete explanation. type ExplanationContext struct { Context ContextData `json:"context"` History *HistoryData `json:"history,omitempty"` @@ -80,3 +78,4 @@ type ChainSegment struct { // number of decisions with this as the target host Decisions int `json:"decisions"` } +*/ diff --git a/internal/scheduling/lib/pipeline_controller.go b/internal/scheduling/lib/pipeline_controller.go index 6924de918..cc334c8a5 100644 --- a/internal/scheduling/lib/pipeline_controller.go +++ b/internal/scheduling/lib/pipeline_controller.go @@ -9,8 +9,11 @@ import ( "fmt" "github.com/cobaltcore-dev/cortex/api/v1alpha1" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/events" + "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/reconcile" @@ -28,6 +31,8 @@ type BasePipelineController[PipelineType any] struct { client.Client // The scheduling domain to scope resources to. SchedulingDomain v1alpha1.SchedulingDomain + // Event recorder for publishing events. + Recorder events.EventRecorder DecisionQueue chan DecisionUpdate } @@ -63,27 +68,102 @@ func (c *BasePipelineController[PipelineType]) updateDecision(ctx context.Contex return fmt.Errorf("failed to create explainer: %w", err) } - explanationText, err := explainer.Explain(ctx, update.ResourceID, update.PipelineName, update.RequestContext, update.Reason, update.Result) + explanationText, err := explainer.Explain(ctx, update) if err != nil { return fmt.Errorf("failed to generate explanation: %w", err) } - // Update the decision with the explanation. + // Try to get existing decision decision := &v1alpha1.Decision{} - if err := c.Get(ctx, client.ObjectKey{Name: update.ResourceID}, decision); err != nil { - return fmt.Errorf("failed to get decision: %w", err) + if err = c.Get(ctx, client.ObjectKey{Name: update.ResourceID}, decision); err != nil { + if client.IgnoreNotFound(err) != nil { + return fmt.Errorf("failed to get decision: %w", err) + } + + // Decision doesn't exist - create new one + decision = &v1alpha1.Decision{ + ObjectMeta: metav1.ObjectMeta{ + Name: update.ResourceID, + }, + Spec: v1alpha1.DecisionSpec{ + SchedulingDomain: c.SchedulingDomain, + ResourceID: update.ResourceID, + }, + } + + if err := c.Create(ctx, decision); err != nil { + return fmt.Errorf("failed to create decision: %w", err) + } + log.Info("Created new decision", "resourceID", update.ResourceID) } - if decision.Status.Result == nil { - return errors.New("cannot update decision explanation: result is nil") + // Prepare the scheduling history entry + historyEntry := v1alpha1.SchedulingHistoryEntry{ + OrderedHosts: update.Result.OrderedHosts, + Timestamp: metav1.Now(), + PipelineRef: corev1.ObjectReference{ + Name: update.PipelineName, + }, + Reason: update.Reason, } - decision.Status.Explanation = explanationText - if err := c.Status().Update(ctx, decision); err != nil { + // Check if scheduling failed (no hosts available) + schedulingFailed := len(update.Result.OrderedHosts) == 0 + + // Update status with retry on conflict to handle concurrent updates + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + // Get the latest version before each retry attempt + if err := c.Get(ctx, client.ObjectKey{Name: update.ResourceID}, decision); err != nil { + return err + } + + // Apply status updates + decision.Status.Explanation = explanationText + + if schedulingFailed { + // No hosts available - set failed condition + decision.Status.TargetHost = "" + meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ + Type: v1alpha1.DecisionConditionFailed, + Status: metav1.ConditionTrue, + Reason: "NoValidHosts", + Message: "Cannot schedule: No valid hosts available after filtering", + }) + } else { + // Successful scheduling + decision.Status.TargetHost = update.Result.OrderedHosts[0] + meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ + Type: v1alpha1.DecisionConditionReady, + Status: metav1.ConditionTrue, + Reason: "Scheduled", + Message: "Scheduling decision made successfully", + }) + } + + decision.Status.SchedulingHistory = append(decision.Status.SchedulingHistory, historyEntry) + + return c.Status().Update(ctx, decision) + }) + + if err != nil { return fmt.Errorf("failed to update decision status: %w", err) } - log.Info("Successfully updated decision explanation", "resourceID", update.ResourceID) + // Publish event to the decision + if c.Recorder != nil { + if schedulingFailed { + // Warning event for failed scheduling + c.Recorder.Eventf(decision, nil, corev1.EventTypeWarning, "NoValidHosts", "Scheduling", "Cannot schedule: No valid hosts available. %s", explanationText) + log.Info("Published NoValidHosts event", "resourceID", update.ResourceID) + } else { + // Normal event for successful scheduling + reasonStr := string(update.Reason) + c.Recorder.Eventf(decision, nil, corev1.EventTypeNormal, reasonStr, "Scheduling", "Scheduled to %s. %s", decision.Status.TargetHost, explanationText) + log.Info("Published scheduling event", "resourceID", update.ResourceID, "targetHost", decision.Status.TargetHost, "reason", update.Reason) + } + } + + log.Info("Successfully updated decision", "resourceID", update.ResourceID, "targetHost", decision.Status.TargetHost, "schedulingFailed", schedulingFailed) return nil } diff --git a/internal/scheduling/machines/filter_weigher_pipeline_controller.go b/internal/scheduling/machines/filter_weigher_pipeline_controller.go index b2f1b9492..661c8e884 100644 --- a/internal/scheduling/machines/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/machines/filter_weigher_pipeline_controller.go @@ -181,6 +181,7 @@ func (c *FilterWeigherPipelineController) handleMachine() handler.EventHandler { func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainMachines + c.Recorder = mgr.GetEventRecorder("cortex-machines-pipeline-controller") if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err } diff --git a/internal/scheduling/manila/filter_weigher_pipeline_controller.go b/internal/scheduling/manila/filter_weigher_pipeline_controller.go index 9f9e1c23a..0706038f6 100644 --- a/internal/scheduling/manila/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/manila/filter_weigher_pipeline_controller.go @@ -103,6 +103,7 @@ func (c *FilterWeigherPipelineController) InitPipeline( func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainManila + c.Recorder = mgr.GetEventRecorder("cortex-manila-pipeline-controller") if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err } diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller.go b/internal/scheduling/nova/filter_weigher_pipeline_controller.go index 5020f3320..ea4bf3b0e 100644 --- a/internal/scheduling/nova/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller.go @@ -116,6 +116,7 @@ func (c *FilterWeigherPipelineController) InitPipeline( func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainNova + c.Recorder = mgr.GetEventRecorder("cortex-nova-pipeline-controller") c.gatherer = &candidateGatherer{Client: mcl} if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err diff --git a/internal/scheduling/pods/filter_weigher_pipeline_controller.go b/internal/scheduling/pods/filter_weigher_pipeline_controller.go index 917af0664..5083a5062 100644 --- a/internal/scheduling/pods/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/pods/filter_weigher_pipeline_controller.go @@ -186,6 +186,7 @@ func (c *FilterWeigherPipelineController) handlePod() handler.EventHandler { func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainPods + c.Recorder = mgr.GetEventRecorder("cortex-pods-pipeline-controller") if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err } From 408349f8407c702529dadf97624abc994be9ca3e Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 20 Feb 2026 10:11:38 +0100 Subject: [PATCH 22/36] Fix machines test --- ...filter_weigher_pipeline_controller_test.go | 220 ++---------------- 1 file changed, 18 insertions(+), 202 deletions(-) diff --git a/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go b/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go index 42c2e1661..fa73eab3a 100644 --- a/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go @@ -12,194 +12,12 @@ import ( "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" - corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheduling scheme: %v", err) - } - if err := ironcorev1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add ironcore scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - machinePools []ironcorev1alpha1.MachinePool - machine *ironcorev1alpha1.Machine - expectError bool - expectDecision bool - expectTargetHost string - expectMachinePool string - }{ - { - name: "successful machine decision processing", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainMachines, - ResourceID: "test-machine", - PipelineRef: corev1.ObjectReference{ - Name: "machines-scheduler", - }, - MachineRef: &corev1.ObjectReference{ - Name: "test-machine", - Namespace: "default", - }, - }, - }, - machinePools: []ironcorev1alpha1.MachinePool{ - { - ObjectMeta: metav1.ObjectMeta{Name: "pool1"}, - }, - { - ObjectMeta: metav1.ObjectMeta{Name: "pool2"}, - }, - }, - machine: &ironcorev1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-machine", - Namespace: "default", - }, - Spec: ironcorev1alpha1.MachineSpec{ - Scheduler: "", - }, - }, - expectError: false, - expectDecision: true, - expectTargetHost: "pool1", // NoopFilter returns first pool - expectMachinePool: "pool1", - }, - { - name: "no machine pools available", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-pools", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainMachines, - ResourceID: "test-machine", - PipelineRef: corev1.ObjectReference{ - Name: "machines-scheduler", - }, - MachineRef: &corev1.ObjectReference{ - Name: "test-machine", - Namespace: "default", - }, - }, - }, - machinePools: []ironcorev1alpha1.MachinePool{}, - expectError: true, - expectDecision: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []runtime.Object{tt.decision} - for i := range tt.machinePools { - objects = append(objects, &tt.machinePools[i]) - } - if tt.machine != nil { - objects = append(objects, tt.machine) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &FilterWeigherPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]]{ - Pipelines: map[string]lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]{ - "machines-scheduler": createMockPipeline(), - }, - }, - Monitor: lib.FilterWeigherPipelineMonitor{}, - } - controller.Client = client - - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: tt.decision.Name, - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if tt.expectError && err == nil { - t.Error("expected error but got none") - return - } - - if !tt.expectError && err != nil { - t.Errorf("expected no error, got: %v", err) - return - } - - if result.RequeueAfter > 0 { - t.Errorf("unexpected requeue: %v", result.RequeueAfter) - } - - // Verify decision status if expected - if tt.expectDecision { - var updatedDecision v1alpha1.Decision - err := client.Get(context.Background(), req.NamespacedName, &updatedDecision) - if err != nil { - t.Errorf("Failed to get updated decision: %v", err) - return - } - - if updatedDecision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - - if updatedDecision.Status.Result.TargetHost == nil { - t.Error("expected target host to be set") - return - } - - if *updatedDecision.Status.Result.TargetHost != tt.expectTargetHost { - t.Errorf("expected target host %q, got %q", tt.expectTargetHost, *updatedDecision.Status.Result.TargetHost) - } - - // Verify machine was updated with machine pool ref - if tt.machine != nil { - var updatedMachine ironcorev1alpha1.Machine - err := client.Get(context.Background(), types.NamespacedName{ - Name: tt.machine.Name, - Namespace: tt.machine.Namespace, - }, &updatedMachine) - if err != nil { - t.Errorf("Failed to get updated machine: %v", err) - return - } - - if updatedMachine.Spec.MachinePoolRef == nil { - t.Error("expected machine pool ref to be set") - return - } - - if updatedMachine.Spec.MachinePoolRef.Name != tt.expectMachinePool { - t.Errorf("expected machine pool %q, got %q", tt.expectMachinePool, updatedMachine.Spec.MachinePoolRef.Name) - } - } - } - }) - } -} - func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { controller := &FilterWeigherPipelineController{ Monitor: lib.FilterWeigherPipelineMonitor{}, @@ -459,9 +277,8 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { found := false for _, decision := range decisions.Items { - if decision.Spec.MachineRef != nil && - decision.Spec.MachineRef.Name == tt.machine.Name && - decision.Spec.MachineRef.Namespace == tt.machine.Namespace { + if decision.Spec.ResourceID == tt.machine.Name && + decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainMachines { found = true // Verify decision properties @@ -471,22 +288,20 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { if decision.Spec.ResourceID != tt.machine.Name { t.Errorf("expected resource ID %q, got %q", tt.machine.Name, decision.Spec.ResourceID) } - if decision.Spec.PipelineRef.Name != "machines-scheduler" { - t.Errorf("expected pipeline ref %q, got %q", "machines-scheduler", decision.Spec.PipelineRef.Name) - } // Check if result was set (only for successful cases) if !tt.expectError && tt.expectTargetHost != "" { - if decision.Status.Result == nil { - t.Error("expected decision result to be set") - return + entry := decision.Status.SchedulingHistory[len(decision.Status.SchedulingHistory)-1] + if entry.PipelineRef.Name != "machines-scheduler" { + t.Errorf("expected pipeline name %q in scheduling history, got %q", "machines-scheduler", entry.PipelineRef.Name) } - if decision.Status.Result.TargetHost == nil { - t.Error("expected target host to be set") + if len(entry.OrderedHosts) == 0 { + t.Error("expected scheduling history entry to have a target host") return } - if *decision.Status.Result.TargetHost != tt.expectTargetHost { - t.Errorf("expected target host %q, got %q", tt.expectTargetHost, *decision.Status.Result.TargetHost) + targetHost := entry.OrderedHosts[0] + if targetHost != tt.expectTargetHost { + t.Errorf("expected target host %q in scheduling history, got %q", tt.expectTargetHost, targetHost) } } break @@ -506,9 +321,8 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { } for _, decision := range decisions.Items { - if decision.Spec.MachineRef != nil && - decision.Spec.MachineRef.Name == tt.machine.Name && - decision.Spec.MachineRef.Namespace == tt.machine.Namespace { + if decision.Spec.ResourceID == tt.machine.Name && + decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainMachines { t.Error("expected no decision to be created but found one") break } @@ -547,14 +361,16 @@ func createMockPipeline() lib.FilterWeigherPipeline[ironcore.MachinePipelineRequ type mockMachinePipeline struct{} -func (m *mockMachinePipeline) Run(request ironcore.MachinePipelineRequest) (v1alpha1.DecisionResult, error) { +func (m *mockMachinePipeline) Run(request ironcore.MachinePipelineRequest) (lib.FilterWeigherPipelineResult, error) { if len(request.Pools) == 0 { - return v1alpha1.DecisionResult{}, nil + return lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{}, + }, nil } // Return the first pool as the target host targetHost := request.Pools[0].Name - return v1alpha1.DecisionResult{ - TargetHost: &targetHost, + return lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{targetHost}, }, nil } From 0f583d6ccc10bb0b8324b558e102848ce38d3036 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 20 Feb 2026 10:20:22 +0100 Subject: [PATCH 23/36] Fix pipeline deletion handling in Reconcile method and update test cases --- .../scheduling/lib/pipeline_controller.go | 2 +- .../lib/pipeline_controller_test.go | 588 ++++-------------- 2 files changed, 116 insertions(+), 474 deletions(-) diff --git a/internal/scheduling/lib/pipeline_controller.go b/internal/scheduling/lib/pipeline_controller.go index cc334c8a5..381d1a0b7 100644 --- a/internal/scheduling/lib/pipeline_controller.go +++ b/internal/scheduling/lib/pipeline_controller.go @@ -200,7 +200,7 @@ func (c *BasePipelineController[PipelineType]) Reconcile(ctx context.Context, re err := c.Get(ctx, req.NamespacedName, pipeline) if err != nil { - if client.IgnoreNotFound(err) != nil { + if client.IgnoreNotFound(err) == nil { // Pipeline was deleted log.Info("pipeline deleted, removing from cache", "pipelineName", req.Name) delete(c.Pipelines, req.Name) diff --git a/internal/scheduling/lib/pipeline_controller_test.go b/internal/scheduling/lib/pipeline_controller_test.go index 9166288fe..265d1d373 100644 --- a/internal/scheduling/lib/pipeline_controller_test.go +++ b/internal/scheduling/lib/pipeline_controller_test.go @@ -11,9 +11,9 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - "sigs.k8s.io/controller-runtime/pkg/event" "github.com/cobaltcore-dev/cortex/api/v1alpha1" ) @@ -282,7 +282,7 @@ func TestBasePipelineController_handlePipelineChange(t *testing.T) { PipelineConfigs: make(map[string]v1alpha1.Pipeline), } - controller.handlePipelineChange(context.Background(), tt.pipeline, nil) + controller.handlePipelineChange(context.Background(), tt.pipeline) // Check if pipeline is in map _, inMap := controller.Pipelines[tt.pipeline.Name] @@ -306,140 +306,7 @@ func TestBasePipelineController_handlePipelineChange(t *testing.T) { } } -func TestBasePipelineController_HandlePipelineCreated(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - pipeline := &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(pipeline). - WithStatusSubresource(&v1alpha1.Pipeline{}). - Build() - - controller := &BasePipelineController[mockPipeline]{ - Client: fakeClient, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Initializer: &mockPipelineInitializer{ - pipelineType: v1alpha1.PipelineTypeFilterWeigher, - }, - Pipelines: make(map[string]mockPipeline), - PipelineConfigs: make(map[string]v1alpha1.Pipeline), - } - - evt := event.CreateEvent{ - Object: pipeline, - } - - controller.HandlePipelineCreated(context.Background(), evt, nil) - - if _, exists := controller.Pipelines[pipeline.Name]; !exists { - t.Error("Expected pipeline to be in map after creation") - } -} - -func TestBasePipelineController_HandlePipelineUpdated(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - oldPipeline := &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - } - - newPipeline := oldPipeline.DeepCopy() - newPipeline.Spec.Description = "Updated description" - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(newPipeline). - WithStatusSubresource(&v1alpha1.Pipeline{}). - Build() - - controller := &BasePipelineController[mockPipeline]{ - Client: fakeClient, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Initializer: &mockPipelineInitializer{ - pipelineType: v1alpha1.PipelineTypeFilterWeigher, - }, - Pipelines: make(map[string]mockPipeline), - PipelineConfigs: make(map[string]v1alpha1.Pipeline), - } - - evt := event.UpdateEvent{ - ObjectOld: oldPipeline, - ObjectNew: newPipeline, - } - - controller.HandlePipelineUpdated(context.Background(), evt, nil) - - if _, exists := controller.Pipelines[newPipeline.Name]; !exists { - t.Error("Expected pipeline to be in map after update") - } -} - -func TestBasePipelineController_HandlePipelineDeleted(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - pipeline := &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - }, - } - - controller := &BasePipelineController[mockPipeline]{ - Pipelines: map[string]mockPipeline{ - "test-pipeline": {name: "test-pipeline"}, - }, - PipelineConfigs: map[string]v1alpha1.Pipeline{ - "test-pipeline": *pipeline, - }, - } - - evt := event.DeleteEvent{ - Object: pipeline, - } - - controller.HandlePipelineDeleted(context.Background(), evt, nil) - - if _, exists := controller.Pipelines[pipeline.Name]; exists { - t.Error("Expected pipeline to be removed from map after deletion") - } - if _, exists := controller.PipelineConfigs[pipeline.Name]; exists { - t.Error("Expected pipeline config to be removed from map after deletion") - } -} - -func TestBasePipelineController_handleKnowledgeChange(t *testing.T) { +func TestBasePipelineController_Reconcile(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) @@ -447,398 +314,173 @@ func TestBasePipelineController_handleKnowledgeChange(t *testing.T) { tests := []struct { name string - knowledge *v1alpha1.Knowledge - pipelines []v1alpha1.Pipeline + pipeline *v1alpha1.Pipeline + pipelineExists bool schedulingDomain v1alpha1.SchedulingDomain - expectReEvaluated []string + initPipelineError bool + expectInMap bool + expectReady bool }{ { - name: "knowledge change triggers pipeline re-evaluation", - knowledge: &v1alpha1.Knowledge{ + name: "reconcile new pipeline", + pipeline: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", + Name: "test-pipeline", }, - Spec: v1alpha1.KnowledgeSpec{ + Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, - }, - }, - pipelines: []v1alpha1.Pipeline{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "pipeline-1", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Weighers: []v1alpha1.WeigherSpec{ - { - Name: "test-weigher", - }, - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "pipeline-2", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Weighers: []v1alpha1.WeigherSpec{ - { - Name: "test-weigher", - }, - }, - }, + Type: v1alpha1.PipelineTypeFilterWeigher, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, - schedulingDomain: v1alpha1.SchedulingDomainNova, - expectReEvaluated: []string{"pipeline-1", "pipeline-2"}, + pipelineExists: true, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectInMap: true, + expectReady: true, }, { - name: "knowledge change in different scheduling domain", - knowledge: &v1alpha1.Knowledge{ + name: "reconcile updated pipeline", + pipeline: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - }, - }, - pipelines: []v1alpha1.Pipeline{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "nova-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Weighers: []v1alpha1.WeigherSpec{ - { - Name: "test-weigher", - }, - }, - }, - }, - }, - schedulingDomain: v1alpha1.SchedulingDomainNova, - expectReEvaluated: []string{}, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []client.Object{tt.knowledge} - for i := range tt.pipelines { - objects = append(objects, &tt.pipelines[i]) - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}). - Build() - - controller := &BasePipelineController[mockPipeline]{ - Client: fakeClient, - SchedulingDomain: tt.schedulingDomain, - Initializer: &mockPipelineInitializer{ - pipelineType: v1alpha1.PipelineTypeFilterWeigher, + Name: "test-pipeline", }, - Pipelines: make(map[string]mockPipeline), - PipelineConfigs: make(map[string]v1alpha1.Pipeline), - } - - controller.handleKnowledgeChange(context.Background(), tt.knowledge, nil) - - // Verify expected pipelines were re-evaluated by checking if they're in the map - for _, expectedName := range tt.expectReEvaluated { - if _, exists := controller.Pipelines[expectedName]; !exists { - t.Errorf("Expected pipeline %s to be re-evaluated", expectedName) - } - } - }) - } -} - -func TestBasePipelineController_HandleKnowledgeCreated(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - knowledge := &v1alpha1.Knowledge{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, - }, - } - - pipeline := &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Weighers: []v1alpha1.WeigherSpec{ - { - Name: "test-weigher", + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Description: "Updated description", + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, + pipelineExists: true, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectInMap: true, + expectReady: true, }, - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(knowledge, pipeline). - WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}). - Build() - - controller := &BasePipelineController[mockPipeline]{ - Client: fakeClient, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Initializer: &mockPipelineInitializer{ - pipelineType: v1alpha1.PipelineTypeFilterWeigher, - }, - Pipelines: make(map[string]mockPipeline), - PipelineConfigs: make(map[string]v1alpha1.Pipeline), - } - - evt := event.CreateEvent{ - Object: knowledge, - } - - controller.HandleKnowledgeCreated(context.Background(), evt, nil) - - // Pipeline should be re-evaluated and added to map - if _, exists := controller.Pipelines[pipeline.Name]; !exists { - t.Error("Expected pipeline to be re-evaluated after knowledge creation") - } -} - -func TestBasePipelineController_HandleKnowledgeUpdated(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - tests := []struct { - name string - oldKnowledge *v1alpha1.Knowledge - newKnowledge *v1alpha1.Knowledge - expectReEvaluate bool - }{ { - name: "error state changed", - oldKnowledge: &v1alpha1.Knowledge{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - Conditions: []metav1.Condition{ - { - Type: v1alpha1.KnowledgeConditionReady, - Status: metav1.ConditionFalse, - }, - }, - }, - }, - newKnowledge: &v1alpha1.Knowledge{ + name: "reconcile deleted pipeline", + pipeline: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", + Name: "deleted-pipeline", }, - Spec: v1alpha1.KnowledgeSpec{ + Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, + Type: v1alpha1.PipelineTypeFilterWeigher, }, }, - expectReEvaluate: true, + pipelineExists: false, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectInMap: false, + expectReady: false, }, { - name: "data became available", - oldKnowledge: &v1alpha1.Knowledge{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 0, - }, - }, - newKnowledge: &v1alpha1.Knowledge{ + name: "reconcile pipeline with different scheduling domain", + pipeline: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", + Name: "cinder-pipeline", }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainCinder, + Type: v1alpha1.PipelineTypeFilterWeigher, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, - expectReEvaluate: true, + pipelineExists: true, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectInMap: false, + expectReady: false, }, { - name: "no relevant change", - oldKnowledge: &v1alpha1.Knowledge{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, - }, - }, - newKnowledge: &v1alpha1.Knowledge{ + name: "reconcile pipeline with init error", + pipeline: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", + Name: "error-pipeline", }, - Spec: v1alpha1.KnowledgeSpec{ + Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 15, + Type: v1alpha1.PipelineTypeFilterWeigher, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, - expectReEvaluate: false, + pipelineExists: true, + schedulingDomain: v1alpha1.SchedulingDomainNova, + initPipelineError: true, + expectInMap: false, + expectReady: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - pipeline := &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Weighers: []v1alpha1.WeigherSpec{ - { - Name: "test-weigher", - }, - }, - }, + var objects []client.Object + if tt.pipelineExists { + objects = append(objects, tt.pipeline) } fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(tt.newKnowledge, pipeline). - WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}). + WithObjects(objects...). + WithStatusSubresource(&v1alpha1.Pipeline{}). Build() + initializer := &mockPipelineInitializer{ + pipelineType: v1alpha1.PipelineTypeFilterWeigher, + } + if tt.initPipelineError { + initializer.initPipelineFunc = func(ctx context.Context, p v1alpha1.Pipeline) PipelineInitResult[mockPipeline] { + return PipelineInitResult[mockPipeline]{ + FilterErrors: map[string]error{ + "test-filter": errors.New("filter initialization failed"), + }, + } + } + } + controller := &BasePipelineController[mockPipeline]{ Client: fakeClient, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Initializer: &mockPipelineInitializer{ - pipelineType: v1alpha1.PipelineTypeFilterWeigher, - }, - Pipelines: make(map[string]mockPipeline), - PipelineConfigs: make(map[string]v1alpha1.Pipeline), + SchedulingDomain: tt.schedulingDomain, + Initializer: initializer, + Pipelines: make(map[string]mockPipeline), + PipelineConfigs: make(map[string]v1alpha1.Pipeline), } - evt := event.UpdateEvent{ - ObjectOld: tt.oldKnowledge, - ObjectNew: tt.newKnowledge, + // For delete test, pre-populate the maps + if !tt.pipelineExists { + controller.Pipelines[tt.pipeline.Name] = mockPipeline{name: tt.pipeline.Name} + controller.PipelineConfigs[tt.pipeline.Name] = *tt.pipeline } - controller.HandleKnowledgeUpdated(context.Background(), evt, nil) - - _, exists := controller.Pipelines[pipeline.Name] - if tt.expectReEvaluate && !exists { - t.Error("Expected pipeline to be re-evaluated") + req := ctrl.Request{ + NamespacedName: client.ObjectKey{Name: tt.pipeline.Name}, } - if !tt.expectReEvaluate && exists { - t.Error("Expected pipeline not to be re-evaluated") - } - }) - } -} - -func TestBasePipelineController_HandleKnowledgeDeleted(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - knowledge := &v1alpha1.Knowledge{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - } - - pipeline := &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Weighers: []v1alpha1.WeigherSpec{ - { - Name: "test-weigher", - }, - }, - }, - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(pipeline). - WithStatusSubresource(&v1alpha1.Pipeline{}). - Build() - - controller := &BasePipelineController[mockPipeline]{ - Client: fakeClient, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Initializer: &mockPipelineInitializer{ - pipelineType: v1alpha1.PipelineTypeFilterWeigher, - }, - Pipelines: map[string]mockPipeline{ - "test-pipeline": {name: "test-pipeline"}, - }, - PipelineConfigs: make(map[string]v1alpha1.Pipeline), - } + _, err := controller.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Reconcile failed: %v", err) + } - evt := event.DeleteEvent{ - Object: knowledge, - } + // Check if pipeline is in map + _, inMap := controller.Pipelines[tt.pipeline.Name] + if inMap != tt.expectInMap { + t.Errorf("Expected pipeline in map: %v, got: %v", tt.expectInMap, inMap) + } - controller.HandleKnowledgeDeleted(context.Background(), evt, nil) + // Check pipeline status if it exists + if tt.pipelineExists { + var updatedPipeline v1alpha1.Pipeline + err := fakeClient.Get(context.Background(), client.ObjectKey{Name: tt.pipeline.Name}, &updatedPipeline) + if err != nil { + t.Fatalf("Failed to get updated pipeline: %v", err) + } - // Check that the pipeline was re-evaluated and is still in the map - if _, exists := controller.Pipelines[pipeline.Name]; !exists { - t.Error("Expected pipeline to be re-evaluated after knowledge deletion") + ready := meta.IsStatusConditionTrue(updatedPipeline.Status.Conditions, v1alpha1.PipelineConditionReady) + if ready != tt.expectReady { + t.Errorf("Expected ready: %v, got: %v", tt.expectReady, ready) + } + } + }) } } From a97882233fe50ac356c25936c1000d11647b9e91 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 20 Feb 2026 10:25:05 +0100 Subject: [PATCH 24/36] Fix duplicate lib import --- internal/scheduling/cinder/external_scheduler_api.go | 3 +-- internal/scheduling/manila/external_scheduler_api.go | 3 +-- internal/scheduling/nova/external_scheduler_api.go | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/internal/scheduling/cinder/external_scheduler_api.go b/internal/scheduling/cinder/external_scheduler_api.go index 4a327b8f7..fd78f3d27 100644 --- a/internal/scheduling/cinder/external_scheduler_api.go +++ b/internal/scheduling/cinder/external_scheduler_api.go @@ -15,14 +15,13 @@ import ( api "github.com/cobaltcore-dev/cortex/api/external/cinder" - "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" scheduling "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "sigs.k8s.io/controller-runtime/pkg/metrics" ) type HTTPAPIDelegate interface { // Process the scheduling request from the API. - ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) + ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*scheduling.FilterWeigherPipelineResult, error) } type HTTPAPI interface { diff --git a/internal/scheduling/manila/external_scheduler_api.go b/internal/scheduling/manila/external_scheduler_api.go index ece984407..d7857faa2 100644 --- a/internal/scheduling/manila/external_scheduler_api.go +++ b/internal/scheduling/manila/external_scheduler_api.go @@ -15,14 +15,13 @@ import ( api "github.com/cobaltcore-dev/cortex/api/external/manila" - "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" scheduling "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "sigs.k8s.io/controller-runtime/pkg/metrics" ) type HTTPAPIDelegate interface { // Process the scheduling request from the API. - ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) + ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*scheduling.FilterWeigherPipelineResult, error) } type HTTPAPI interface { diff --git a/internal/scheduling/nova/external_scheduler_api.go b/internal/scheduling/nova/external_scheduler_api.go index 12de74e62..0845249f9 100644 --- a/internal/scheduling/nova/external_scheduler_api.go +++ b/internal/scheduling/nova/external_scheduler_api.go @@ -16,7 +16,6 @@ import ( api "github.com/cobaltcore-dev/cortex/api/external/nova" - "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" scheduling "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "sigs.k8s.io/controller-runtime/pkg/metrics" ) @@ -29,7 +28,7 @@ type HTTPAPIConfig struct { type HTTPAPIDelegate interface { // Process the scheduling request from the API. - ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) + ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*scheduling.FilterWeigherPipelineResult, error) } type HTTPAPI interface { From 45e256814cd7b286ed7383a7aeeb94bb599cfa98 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 20 Feb 2026 10:39:55 +0100 Subject: [PATCH 25/36] Remove linting errors --- .../cinder/external_scheduler_api_test.go | 15 +- ...filter_weigher_pipeline_controller_test.go | 7 +- internal/scheduling/lib/explainer.go | 699 -------- .../scheduling/lib/explainer_templates.go | 142 -- internal/scheduling/lib/explainer_test.go | 1476 ----------------- internal/scheduling/lib/explainer_types.go | 81 - ...filter_weigher_pipeline_controller_test.go | 7 +- 7 files changed, 16 insertions(+), 2411 deletions(-) delete mode 100644 internal/scheduling/lib/explainer_templates.go delete mode 100644 internal/scheduling/lib/explainer_test.go delete mode 100644 internal/scheduling/lib/explainer_types.go diff --git a/internal/scheduling/cinder/external_scheduler_api_test.go b/internal/scheduling/cinder/external_scheduler_api_test.go index 35b29fb4a..ff9b85ef4 100644 --- a/internal/scheduling/cinder/external_scheduler_api_test.go +++ b/internal/scheduling/cinder/external_scheduler_api_test.go @@ -175,7 +175,10 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { }, Pipeline: "test-pipeline", } - data, _ := json.Marshal(req) + data, err := json.Marshal(req) + if err != nil { + t.Fatalf("Failed to marshal request data: %v", err) + } return string(data) }(), processFunc: func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { @@ -199,7 +202,10 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { }, Pipeline: "test-pipeline", } - data, _ := json.Marshal(req) + data, err := json.Marshal(req) + if err != nil { + t.Fatalf("Failed to marshal request data: %v", err) + } return string(data) }(), processFunc: func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { @@ -220,7 +226,10 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { }, Pipeline: "test-pipeline", } - data, _ := json.Marshal(req) + data, err := json.Marshal(req) + if err != nil { + t.Fatalf("Failed to marshal request data: %v", err) + } return string(data) }(), processFunc: func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go index 3a16f2996..96fafa807 100644 --- a/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go @@ -157,11 +157,8 @@ func TestFilterWeigherPipelineController_ProcessRequest(t *testing.T) { if tt.expectResult { if result == nil { t.Error("Expected result but got nil") - } else { - // Verify result has ordered hosts - if len(result.OrderedHosts) == 0 && len(tt.request.Hosts) > 0 { - t.Error("Expected ordered hosts in result") - } + } else if len(result.OrderedHosts) == 0 && len(tt.request.Hosts) > 0 { + t.Error("Expected ordered hosts in result") } } }) diff --git a/internal/scheduling/lib/explainer.go b/internal/scheduling/lib/explainer.go index 02a4759ff..d25ca9177 100644 --- a/internal/scheduling/lib/explainer.go +++ b/internal/scheduling/lib/explainer.go @@ -14,21 +14,10 @@ import ( type Explainer struct { // The kubernetes client to use for fetching related data. client.Client - // The template manager to use for rendering explanations. - //templateManager *TemplateManager } // NewExplainer creates a new explainer with template support. func NewExplainer(client client.Client) (*Explainer, error) { - /*templateManager, err := NewTemplateManager() - if err != nil { - return nil, fmt.Errorf("failed to create template manager: %w", err) - } - - return &Explainer{ - Client: client, - templateManager: templateManager, - }, nil*/ return &Explainer{ Client: client, }, nil @@ -38,691 +27,3 @@ func NewExplainer(client client.Client) (*Explainer, error) { func (e *Explainer) Explain(ctx context.Context, decision DecisionUpdate) (string, error) { return "Explanation generation not implemented yet", nil } - -/*// getResourceType returns a human-readable resource type. -func (e *Explainer) getResourceType(schedulingDomain v1alpha1.SchedulingDomain) string { - switch schedulingDomain { - case v1alpha1.SchedulingDomainNova: - return "nova server" - case v1alpha1.SchedulingDomainManila: - return "manila share" - case v1alpha1.SchedulingDomainCinder: - return "cinder volume" - case v1alpha1.SchedulingDomainMachines: - return "ironcore machine" - case v1alpha1.SchedulingDomainPods: - return "pod" - default: - return "resource" - } -} - -// calculateScoreGap calculates the gap between first and second place. -func (e *Explainer) calculateScoreGap(weights map[string]float64) float64 { - if len(weights) < 2 { - return 0.0 - } - - scores := make([]float64, 0, len(weights)) - for _, score := range weights { - scores = append(scores, score) - } - - sort.Slice(scores, func(i, j int) bool { - return scores[i] > scores[j] - }) - - return scores[0] - scores[1] -} - -// fetchDecisionChain retrieves all decisions in the history chain. -func (e *Explainer) fetchDecisionChain(ctx context.Context, decision *v1alpha1.Decision) ([]*v1alpha1.Decision, error) { - var chainDecisions []*v1alpha1.Decision - logger := log.FromContext(ctx) - - // Add all historical decisions - if decision.Status.History != nil { - for _, ref := range *decision.Status.History { - histDecision := &v1alpha1.Decision{} - if err := e.Get(ctx, client.ObjectKey{ - Namespace: ref.Namespace, - Name: ref.Name, - }, histDecision); err != nil { - if errors.IsNotFound(err) { - logger.Info("History decision not found, skipping from chain analysis", - "decision", ref.Name, - "namespace", ref.Namespace, - "uid", ref.UID) - continue // Skip missing decisions instead of failing - } - // For other errors, still fail - return nil, err - } - chainDecisions = append(chainDecisions, histDecision) - } - } - - // Add current decision - chainDecisions = append(chainDecisions, decision) - - return chainDecisions, nil -} - -// HostSegment represents a segment in the host chain with duration and decision count. -type HostSegment struct { - host string - duration time.Duration // Full precision duration - decisions int -} - -// buildHostSegments creates host segments from decisions with durations. -func (e *Explainer) buildHostSegments(decisions []*v1alpha1.Decision) []HostSegment { - if len(decisions) < 2 { - return []HostSegment{} - } - - // Extract host chain - hostChain := make([]string, 0, len(decisions)) - for _, decision := range decisions { - host := "(n/a)" - if decision.Status.Result != nil && decision.Status.Result.TargetHost != nil { - host = *decision.Status.Result.TargetHost - } - hostChain = append(hostChain, host) - } - - // Build segments with durations - segments := make([]HostSegment, 0) - if len(hostChain) > 0 { - currentHost := hostChain[0] - segmentStart := 0 - - for i := 1; i <= len(hostChain); i++ { - // Check if we've reached the end or found a different host - if i == len(hostChain) || hostChain[i] != currentHost { - // Calculate duration for this segment - startTime := decisions[segmentStart].CreationTimestamp.Time - var endTime = startTime // Default to 0 duration for last segment - if i < len(hostChain) { - endTime = decisions[i].CreationTimestamp.Time - } - - duration := endTime.Sub(startTime) - - segments = append(segments, HostSegment{ - host: currentHost, - duration: duration, - decisions: i - segmentStart, - }) - - if i < len(hostChain) { - currentHost = hostChain[i] - segmentStart = i - } - } - } - } - - return segments -} - -// detectLoop checks if there are repeated hosts in the segments. -func (e *Explainer) detectLoop(segments []HostSegment) bool { - seenHosts := make(map[string]bool) - for _, segment := range segments { - if seenHosts[segment.host] { - return true - } - seenHosts[segment.host] = true - } - return false -} - -// findWinner returns the host with the highest score. -func (e *Explainer) findWinner(scores map[string]float64) string { - winner := "" - maxScore := -999999.0 - for host, score := range scores { - if score > maxScore { - maxScore = score - winner = host - } - } - return winner -} - -// ScoreCalculationResult holds both final scores and deleted host tracking information. -type ScoreCalculationResult struct { - FinalScores map[string]float64 - DeletedHosts map[string][]string // host -> list of steps that deleted it -} - -// StepImpact represents the impact of a single pipeline step on the winning host. -type StepImpact struct { - Step string - ScoreBefore float64 - ScoreAfter float64 - ScoreDelta float64 - CompetitorsRemoved int - PromotedToFirst bool -} - -// calculateScoresFromSteps processes step results sequentially to compute final scores and track deleted hosts. -func (e *Explainer) calculateScoresFromSteps(inputWeights map[string]float64, stepResults []v1alpha1.StepResult) ScoreCalculationResult { - if len(inputWeights) == 0 { - return ScoreCalculationResult{ - FinalScores: map[string]float64{}, - DeletedHosts: map[string][]string{}, - } - } - - // Start with input values as initial scores - currentScores := make(map[string]float64) - for hostName, inputValue := range inputWeights { - currentScores[hostName] = inputValue - } - - deletedHosts := make(map[string][]string) - - // Process each step sequentially - for _, stepResult := range stepResults { - // Check which hosts will be deleted in this step - for hostName := range currentScores { - if _, exists := stepResult.Activations[hostName]; !exists { - // Host not in this step's activations - will be deleted - deletedHosts[hostName] = append(deletedHosts[hostName], stepResult.StepName) - } - } - - // Apply activations and remove hosts not in this step - newScores := make(map[string]float64) - for hostName, score := range currentScores { - if activation, exists := stepResult.Activations[hostName]; exists { - // Add activation to current score - newScores[hostName] = score + activation - } - // Hosts not in activations are removed (don't copy to newScores) - } - currentScores = newScores - } - - return ScoreCalculationResult{ - FinalScores: currentScores, - DeletedHosts: deletedHosts, - } -} - -// calculateScoresWithoutStep processes step results while skipping one specific step. -func (e *Explainer) calculateScoresWithoutStep(inputWeights map[string]float64, stepResults []v1alpha1.StepResult, skipIndex int) ScoreCalculationResult { - if len(inputWeights) == 0 || skipIndex < 0 || skipIndex >= len(stepResults) { - return e.calculateScoresFromSteps(inputWeights, stepResults) - } - - // Create reduced step results without the skipped step - reducedSteps := make([]v1alpha1.StepResult, 0, len(stepResults)-1) - reducedSteps = append(reducedSteps, stepResults[:skipIndex]...) - reducedSteps = append(reducedSteps, stepResults[skipIndex+1:]...) - - return e.calculateScoresFromSteps(inputWeights, reducedSteps) -} - -// findCriticalSteps determines which steps change the winning host using backward elimination. -func (e *Explainer) findCriticalSteps(decision *v1alpha1.Decision) []string { - result := decision.Status.Result - if result == nil || len(result.StepResults) == 0 { - return []string{} - } - - // Get input weights (prefer raw, fall back to normalized) - var inputWeights map[string]float64 - switch { - case len(result.RawInWeights) > 0: - inputWeights = result.RawInWeights - case len(result.NormalizedInWeights) > 0: - inputWeights = result.NormalizedInWeights - default: - return []string{} - } - - // Calculate baseline scores with all steps - baselineResult := e.calculateScoresFromSteps(inputWeights, result.StepResults) - baselineWinner := e.findWinner(baselineResult.FinalScores) - - if baselineWinner == "" { - return []string{} - } - - criticalSteps := make([]string, 0) - - // Try removing each step one by one - for i, stepResult := range result.StepResults { - // Calculate scores without this step - reducedResult := e.calculateScoresWithoutStep(inputWeights, result.StepResults, i) - - // Find winner without this step - reducedWinner := e.findWinner(reducedResult.FinalScores) - - // If removing this step changes the winner, it's critical - if reducedWinner != baselineWinner { - criticalSteps = append(criticalSteps, stepResult.StepName) - } - } - - return criticalSteps -} - -func (e *Explainer) calculateStepImpacts(inputWeights map[string]float64, stepResults []v1alpha1.StepResult, targetHost string) []StepImpact { - if len(inputWeights) == 0 || len(stepResults) == 0 { - return []StepImpact{} - } - - impacts := make([]StepImpact, 0, len(stepResults)) - currentScores := make(map[string]float64) - - // Start with input values as initial scores - for hostName, inputValue := range inputWeights { - currentScores[hostName] = inputValue - } - - // Track target host's score before first step - scoreBefore := currentScores[targetHost] - - // Process each pipeline step and track the target host's evolution - for _, stepResult := range stepResults { - // Count how many competitors will be removed in this step - competitorsRemoved := 0 - for hostName := range currentScores { - if hostName != targetHost { - if _, exists := stepResult.Activations[hostName]; !exists { - competitorsRemoved++ - } - } - } - - // Check if target host was #1 before this step - wasFirst := true - targetScoreBefore := currentScores[targetHost] - for host, score := range currentScores { - if host != targetHost && score > targetScoreBefore { - wasFirst = false - break - } - } - - // Apply activations and remove hosts not in this step - newScores := make(map[string]float64) - for hostName, score := range currentScores { - if activation, exists := stepResult.Activations[hostName]; exists { - newScores[hostName] = score + activation - } - // Hosts not in activations are removed (don't copy to newScores) - } - - // Get target host's score after this step - scoreAfter := newScores[targetHost] - - // Check if target host became #1 after this step - isFirstAfter := true - for host, score := range newScores { - if host != targetHost && score > scoreAfter { - isFirstAfter = false - break - } - } - - promotedToFirst := !wasFirst && isFirstAfter - - impacts = append(impacts, StepImpact{ - Step: stepResult.StepName, - ScoreBefore: scoreBefore, - ScoreAfter: scoreAfter, - ScoreDelta: scoreAfter - scoreBefore, - CompetitorsRemoved: competitorsRemoved, - PromotedToFirst: promotedToFirst, - }) - - // Update for next iteration - currentScores = newScores - scoreBefore = scoreAfter - } - - return impacts -} - -// Template data building functions - these functions extract and structure -// decision data into formats suitable for template rendering. - -// buildContextData creates context data for template rendering. -func (e *Explainer) buildContextData(decision DecisionUpdate) ContextData { - resourceType := e.getResourceType(decision.Spec.SchedulingDomain) - - history := decision.Status.History - isInitial := history == nil || len(*history) == 0 - - decisionNumber := 1 - if !isInitial { - decisionNumber = len(*history) + 1 - if decision.Status.Precedence != nil { - decisionNumber = *decision.Status.Precedence + 1 - } - } - - return ContextData{ - ResourceType: resourceType, - DecisionNumber: decisionNumber, - IsInitial: isInitial, - } -} - -// buildHistoryData creates history comparison data for template rendering. -func (e *Explainer) buildHistoryData(ctx context.Context, decision *v1alpha1.Decision) (*HistoryData, error) { - history := decision.Status.History - if history == nil || len(*history) == 0 { - return nil, nil - } - - // Get the last decision - lastDecisionRef := (*history)[len(*history)-1] - lastDecision := &v1alpha1.Decision{} - if err := e.Get(ctx, client.ObjectKey{ - Namespace: lastDecisionRef.Namespace, - Name: lastDecisionRef.Name, - }, lastDecision); err != nil { - logger := log.FromContext(ctx) - if errors.IsNotFound(err) { - logger.Info("History decision not found, skipping history comparison", - "decision", lastDecisionRef.Name, - "namespace", lastDecisionRef.Namespace, - "uid", lastDecisionRef.UID) - return nil, nil // Skip history comparison instead of failing - } - // For other errors, still fail - return nil, err - } - - lastTarget := "(n/a)" - if lastDecision.Status.Result != nil && lastDecision.Status.Result.TargetHost != nil { - lastTarget = *lastDecision.Status.Result.TargetHost - } - - newTarget := "(n/a)" - if decision.Status.Result != nil && decision.Status.Result.TargetHost != nil { - newTarget = *decision.Status.Result.TargetHost - } - - return &HistoryData{ - PreviousTarget: lastTarget, - CurrentTarget: newTarget, - }, nil -} - -// buildWinnerData creates winner analysis data for template rendering. -func (e *Explainer) buildWinnerData(decision *v1alpha1.Decision) *WinnerData { - result := decision.Status.Result - if result == nil || result.TargetHost == nil { - return nil - } - - targetHost := *result.TargetHost - - // Get target host score - targetScore := 0.0 - if result.AggregatedOutWeights != nil { - if score, exists := result.AggregatedOutWeights[targetHost]; exists { - targetScore = score - } - } - - // Count hosts evaluated - hostsEvaluated := len(result.OrderedHosts) - if hostsEvaluated == 0 && result.AggregatedOutWeights != nil { - hostsEvaluated = len(result.AggregatedOutWeights) - } - - // Calculate score gap to second place - gap := e.calculateScoreGap(result.AggregatedOutWeights) - - return &WinnerData{ - HostName: targetHost, - Score: targetScore, - Gap: gap, - HostsEvaluated: hostsEvaluated, - HasGap: gap > 0, - } -} - -// buildInputData creates input comparison data for template rendering. -func (e *Explainer) buildInputData(decision *v1alpha1.Decision) *InputData { - result := decision.Status.Result - if result == nil || result.TargetHost == nil { - return nil - } - - targetHost := *result.TargetHost - - // Get input weights (prefer raw, fall back to normalized) - var inputWeights map[string]float64 - switch { - case len(result.RawInWeights) > 0: - inputWeights = result.RawInWeights - case len(result.NormalizedInWeights) > 0: - inputWeights = result.NormalizedInWeights - default: - return nil - } - - // Find input winner - inputWinner := "" - inputWinnerScore := -999999.0 - for host, score := range inputWeights { - if score > inputWinnerScore { - inputWinnerScore = score - inputWinner = host - } - } - - if inputWinner == "" { - return nil - } - - // Get target host's final score - targetFinalScore := 0.0 - if result.AggregatedOutWeights != nil { - if score, exists := result.AggregatedOutWeights[targetHost]; exists { - targetFinalScore = score - } - } - - return &InputData{ - InputWinner: inputWinner, - InputScore: inputWinnerScore, - FinalWinner: targetHost, - FinalScore: targetFinalScore, - FinalInputScore: inputWeights[targetHost], - InputConfirmed: inputWinner == targetHost, - } -} - -// buildCriticalStepsData creates critical steps data for template rendering. -func (e *Explainer) buildCriticalStepsData(decision *v1alpha1.Decision) *CriticalStepsData { - result := decision.Status.Result - if result == nil || result.TargetHost == nil || len(result.StepResults) == 0 { - return nil - } - - criticalSteps := e.findCriticalSteps(decision) - totalSteps := len(result.StepResults) - - return &CriticalStepsData{ - Steps: criticalSteps, - TotalSteps: totalSteps, - IsInputOnly: len(criticalSteps) == 0, - RequiresAll: len(criticalSteps) == totalSteps, - } -} - -// buildDeletedHostsData creates deleted hosts data for template rendering. -func (e *Explainer) buildDeletedHostsData(decision *v1alpha1.Decision) *DeletedHostsData { - result := decision.Status.Result - if result == nil || result.StepResults == nil || len(result.StepResults) == 0 { - return nil - } - - // Get input weights (prefer raw, fall back to normalized) - var inputWeights map[string]float64 - switch { - case len(result.RawInWeights) > 0: - inputWeights = result.RawInWeights - case len(result.NormalizedInWeights) > 0: - inputWeights = result.NormalizedInWeights - default: - return nil - } - - // Calculate scores and get deleted hosts information - scoreResult := e.calculateScoresFromSteps(inputWeights, result.StepResults) - - if len(scoreResult.DeletedHosts) == 0 { - return nil - } - - // Find input winner - inputWinner := "" - inputWinnerScore := -999999.0 - for host, score := range inputWeights { - if score > inputWinnerScore { - inputWinnerScore = score - inputWinner = host - } - } - - // Build list of deleted hosts - deletedHosts := make([]DeletedHostInfo, 0, len(scoreResult.DeletedHosts)) - for hostName, steps := range scoreResult.DeletedHosts { - deletedHosts = append(deletedHosts, DeletedHostInfo{ - Name: hostName, - Steps: steps, - IsInputWinner: hostName == inputWinner, - }) - } - - return &DeletedHostsData{ - DeletedHosts: deletedHosts, - } -} - -// buildChainData creates chain analysis data for template rendering. -func (e *Explainer) buildChainData(ctx context.Context, decision *v1alpha1.Decision) (*ChainData, error) { - history := decision.Status.History - if history == nil || len(*history) == 0 { - return nil, nil // No chain for initial decisions - } - - // Fetch all decisions in the chain - chainDecisions, err := e.fetchDecisionChain(ctx, decision) - if err != nil { - return nil, err - } - - if len(chainDecisions) < 2 { - return nil, nil // Need at least 2 decisions for a chain - } - - // Build segments - segments := e.buildHostSegments(chainDecisions) - if len(segments) == 0 { - return nil, nil - } - - // Convert to template data format - chainSegments := make([]ChainSegment, len(segments)) - for i, segment := range segments { - chainSegments[i] = ChainSegment{ - Host: segment.host, - Duration: segment.duration, - Decisions: segment.decisions, - } - } - - return &ChainData{ - Segments: chainSegments, - HasLoop: e.detectLoop(segments), - }, nil -} - -// ExplainWithTemplates renders an explanation using Go templates. -func (e *Explainer) ExplainWithTemplates(ctx context.Context, decision DecisionUpdate) (string, error) { - // Build explanation context - explanationCtx := ExplanationContext{ - Context: e.buildContextData(decision), - } - - // Build each component's data - if historyData, err := e.buildHistoryData(ctx, decision); err != nil { - return "", err - } else if historyData != nil { - explanationCtx.History = historyData - } - - if winnerData := e.buildWinnerData(decision); winnerData != nil { - explanationCtx.Winner = winnerData - } - - if inputData := e.buildInputData(decision); inputData != nil { - explanationCtx.Input = inputData - } - - if criticalStepsData := e.buildCriticalStepsData(decision); criticalStepsData != nil { - explanationCtx.CriticalSteps = criticalStepsData - } - - if deletedHostsData := e.buildDeletedHostsData(decision); deletedHostsData != nil { - explanationCtx.DeletedHosts = deletedHostsData - } - - // Build step impacts - if result := decision.Status.Result; result != nil && result.TargetHost != nil && len(result.StepResults) > 0 { - targetHost := *result.TargetHost - var inputWeights map[string]float64 - switch { - case len(result.RawInWeights) > 0: - inputWeights = result.RawInWeights - case len(result.NormalizedInWeights) > 0: - inputWeights = result.NormalizedInWeights - } - if inputWeights != nil { - impacts := e.calculateStepImpacts(inputWeights, result.StepResults, targetHost) - if len(impacts) > 0 { - // Sort impacts by absolute delta (highest first), with promotions taking priority - sort.Slice(impacts, func(i, j int) bool { - absI := impacts[i].ScoreDelta - if absI < 0 { - absI = -absI - } - absJ := impacts[j].ScoreDelta - if absJ < 0 { - absJ = -absJ - } - - if absI != absJ { - return absI > absJ - } - if impacts[i].PromotedToFirst != impacts[j].PromotedToFirst { - return impacts[i].PromotedToFirst - } - return impacts[i].Step < impacts[j].Step - }) - explanationCtx.StepImpacts = impacts - } - } - } - - if chainData, err := e.buildChainData(ctx, decision); err != nil { - return "", err - } else if chainData != nil { - explanationCtx.Chain = chainData - } - - // Render using templates - return e.templateManager.RenderExplanation(explanationCtx) -}*/ diff --git a/internal/scheduling/lib/explainer_templates.go b/internal/scheduling/lib/explainer_templates.go deleted file mode 100644 index aa67dd5d7..000000000 --- a/internal/scheduling/lib/explainer_templates.go +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package lib - -/*import ( - "bytes" - "fmt" - "strings" - "text/template" - "time" -) - -type TemplateManager struct { - templates *template.Template -} - -func NewTemplateManager() (*TemplateManager, error) { - tmpl := template.New("explanation").Funcs(template.FuncMap{ - "join": strings.Join, - "formatDuration": formatTemplateDuration, - "formatFloat": func(f float64) string { return fmt.Sprintf("%.2f", f) }, - "formatDelta": func(f float64) string { return fmt.Sprintf("%+.2f", f) }, - "add": func(a, b int) int { return a + b }, - "plural": func(n int, singular, plural string) string { - if n == 1 { - return singular - } - return plural - }, - }) - - tmpl, err := tmpl.Parse(mainTemplate) - if err != nil { - return nil, fmt.Errorf("failed to parse main template: %w", err) - } - - templates := map[string]string{ - "context": contextTemplate, - "history": historyTemplate, - "winner": winnerTemplate, - "input": inputTemplate, - "critical": criticalTemplate, - "deleted": deletedTemplate, - "impacts": impactsTemplate, - "chain": chainTemplate, - } - - for name, templateStr := range templates { - tmpl, err = tmpl.Parse(fmt.Sprintf(`{{define "%s"}}%s{{end}}`, name, templateStr)) - if err != nil { - return nil, fmt.Errorf("failed to parse %s template: %w", name, err) - } - } - - return &TemplateManager{templates: tmpl}, nil -} - -func (tm *TemplateManager) RenderExplanation(ctx ExplanationContext) (string, error) { - var buf bytes.Buffer - err := tm.templates.Execute(&buf, ctx) - if err != nil { - return "", fmt.Errorf("failed to render explanation: %w", err) - } - return strings.TrimSpace(buf.String()), nil -} - -func formatTemplateDuration(d time.Duration) string { - if d == 0 { - return "0s" - } - - // Truncate to seconds to remove sub-second precision - d = d.Truncate(time.Second) - - // For durations >= 24 hours, convert to days format - if d >= 24*time.Hour { - days := int(d.Hours()) / 24 - remainder := d - time.Duration(days)*24*time.Hour - if remainder == 0 { - return fmt.Sprintf("%dd0h0m0s", days) - } - return fmt.Sprintf("%d%s", days, remainder.String()) - } - - // For shorter durations, use Go's built-in formatting - return d.String() -} - -const mainTemplate = `{{template "context" .Context}} -{{- if .History}} {{template "history" .History}}{{end}} -{{- if .Winner}} {{template "winner" .Winner}}{{end}} -{{- if .Input}} {{template "input" .Input}}{{end}} -{{- if .CriticalSteps}} {{template "critical" .CriticalSteps}}{{end}} -{{- if .DeletedHosts}} {{template "deleted" .DeletedHosts}}{{end}} -{{- if .StepImpacts}} {{template "impacts" .StepImpacts}}{{end}} -{{- if .Chain}} {{template "chain" .Chain}}{{end}}` - -const contextTemplate = `{{if .IsInitial -}} -Initial placement of the {{.ResourceType}}. -{{- else -}} -Decision #{{.DecisionNumber}} for this {{.ResourceType}}. -{{- end}}` - -const historyTemplate = `Previous target host was '{{.PreviousTarget}}', now it's '{{.CurrentTarget}}'.` - -const winnerTemplate = `Selected: {{.HostName}} (score: {{formatFloat .Score}}) -{{- if .HasGap}}, gap to 2nd: {{formatFloat .Gap}}{{end}}, {{.HostsEvaluated}} {{plural .HostsEvaluated "host" "hosts"}} evaluated.` - -const inputTemplate = `{{if .InputConfirmed -}} -Input choice confirmed: {{.FinalWinner}} ({{formatFloat .InputScore}}→{{formatFloat .FinalScore}}). -{{- else -}} -Input favored {{.InputWinner}} ({{formatFloat .InputScore}}), final winner: {{.FinalWinner}} ({{formatFloat .FinalInputScore}}→{{formatFloat .FinalScore}}). -{{- end}}` - -const criticalTemplate = `{{if .IsInputOnly -}} -Decision driven by input only (all {{.TotalSteps}} {{plural .TotalSteps "step is" "steps are"}} non-critical). -{{- else if .RequiresAll -}} -Decision requires all {{.TotalSteps}} pipeline {{plural .TotalSteps "step" "steps"}}. -{{- else if eq (len .Steps) 1 -}} -Decision driven by 1/{{.TotalSteps}} pipeline step: {{index .Steps 0}}. -{{- else -}} -Decision driven by {{len .Steps}}/{{.TotalSteps}} pipeline {{plural .TotalSteps "step" "steps"}}: {{join .Steps ", "}}. -{{- end}}` - -const deletedTemplate = `{{len .DeletedHosts}} {{plural (len .DeletedHosts) "host" "hosts"}} filtered: -{{- range .DeletedHosts}} - - {{.Name}}{{if .IsInputWinner}} (input choice){{end}} by {{join .Steps ", "}} -{{- end}}` - -const impactsTemplate = ` Step impacts: -{{- range $i, $impact := .}} -• {{$impact.Step}} -{{- if $impact.PromotedToFirst}} {{formatDelta $impact.ScoreDelta}}→#1 -{{- else if ne $impact.ScoreDelta 0.0}} {{formatDelta $impact.ScoreDelta}} -{{- else if gt $impact.CompetitorsRemoved 0}} +0.00 (removed {{$impact.CompetitorsRemoved}}) -{{- else}} +0.00{{end}} -{{- end}}` - -const chainTemplate = `{{if .HasLoop}}Chain (loop detected): {{else}}Chain: {{end}} -{{- range $i, $segment := .Segments}}{{if gt $i 0}} -> {{end}}{{$segment.Host}} ({{formatDuration $segment.Duration}}{{if gt $segment.Decisions 1}}; {{$segment.Decisions}} decisions{{end}}){{end}}.` -*/ diff --git a/internal/scheduling/lib/explainer_test.go b/internal/scheduling/lib/explainer_test.go deleted file mode 100644 index ac21290f2..000000000 --- a/internal/scheduling/lib/explainer_test.go +++ /dev/null @@ -1,1476 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package lib - -/*import ( - "context" - "sort" - "testing" - "time" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestExplainer_Explain(t *testing.T) { - tests := []struct { - name string - decision *v1alpha1.Decision - historyDecisions []*v1alpha1.Decision - expectedContains []string - expectError bool - }{ - { - name: "initial nova server placement", - decision: WithResourceID(NewTestDecision("test-decision"), "test-resource-1"), - expectedContains: []string{"Initial placement of the nova server"}, - }, - { - name: "initial cinder volume placement", - decision: WithSchedulingDomain(WithResourceID(NewTestDecision("test-decision"), "test-resource-2"), v1alpha1.SchedulingDomainCinder), - expectedContains: []string{"Initial placement of the cinder volume"}, - }, - { - name: "initial manila share placement", - decision: WithSchedulingDomain(WithResourceID(NewTestDecision("test-decision"), "test-resource-3"), v1alpha1.SchedulingDomainManila), - expectedContains: []string{"Initial placement of the manila share"}, - }, - { - name: "initial ironcore machine placement", - decision: WithSchedulingDomain(WithResourceID(NewTestDecision("test-decision"), "test-resource-4"), v1alpha1.SchedulingDomainMachines), - expectedContains: []string{"Initial placement of the ironcore machine"}, - }, - { - name: "unknown resource type falls back to generic", - decision: WithSchedulingDomain(WithResourceID(NewTestDecision("test-decision"), "test-resource-5"), "unknown-type"), - expectedContains: []string{"Initial placement of the resource"}, - }, - { - name: "empty history array", - decision: WithResourceID(NewTestDecision("test-decision"), "test-resource-6"), - expectedContains: []string{"Initial placement of the nova server"}, - }, - { - name: "subsequent decision with history", - decision: WithHistoryRef( - WithTargetHost(WithResourceID(NewTestDecision("test-decision-2"), "test-resource-7"), "host-2"), - WithUID(WithTargetHost(WithResourceID(NewTestDecision("test-decision-1"), "test-resource-7"), "host-1"), "test-uid-1")), - historyDecisions: []*v1alpha1.Decision{ - WithUID(WithTargetHost(WithResourceID(NewTestDecision("test-decision-1"), "test-resource-7"), "host-1"), "test-uid-1"), - }, - expectedContains: []string{ - "Decision #2 for this nova server", - "Previous target host was 'host-1'", - "now it's 'host-2'", - }, - }, - { - name: "subsequent decision with nil target hosts", - decision: WithHistoryRef( - WithResourceID(NewTestDecision("test-decision-4"), "test-resource-8"), - WithUID(WithResourceID(NewTestDecision("test-decision-3"), "test-resource-8"), "test-uid-3")), - historyDecisions: []*v1alpha1.Decision{ - WithUID(WithResourceID(NewTestDecision("test-decision-3"), "test-resource-8"), "test-uid-3"), - }, - expectedContains: []string{ - "Decision #2 for this nova server", - "Previous target host was '(n/a)'", - "now it's '(n/a)'", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if len(tt.historyDecisions) > 0 { - RunExplanationTestWithHistory(t, tt.decision, tt.historyDecisions, tt.expectedContains) - } else { - RunExplanationTest(t, tt.decision, tt.expectedContains) - } - }) - } -} - -func TestExplainer_Explain_HistoryDecisionNotFound_GracefulHandling(t *testing.T) { - decision := NewDecision("test-decision"). - WithResourceID("test-resource"). - WithTargetHost("host-1"). - WithHistory([]corev1.ObjectReference{ - { - Kind: "Decision", - Namespace: "default", - Name: "non-existent-decision", - UID: "non-existent-uid", - }, - }). - Build() - - explainer := SetupExplainerTest(t, decision) - explanation, err := explainer.Explain(context.Background(), decision) - - // Should NOT error anymore - graceful handling - if err != nil { - t.Errorf("Expected no error with graceful handling, but got: %v", err) - } - - // Should contain context but not history comparison - if !contains(explanation, "Decision #2 for this nova server") { - t.Errorf("Expected explanation to contain context, but got: %s", explanation) - } - - if contains(explanation, "Previous target host") { - t.Errorf("Expected explanation to NOT contain history comparison when decision is missing, but got: %s", explanation) - } -} - -func TestExplainer_MissingHistoryDecisions_ChainAnalysis(t *testing.T) { - // Test that chain analysis works when some history decisions are missing - decision := NewDecision("current-decision"). - WithResourceID("test-resource"). - WithTargetHost("host-3"). - WithHistory([]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - {Kind: "Decision", Namespace: "default", Name: "missing-decision", UID: "missing-uid"}, - {Kind: "Decision", Namespace: "default", Name: "decision-3", UID: "uid-3"}, - }). - Build() - - // Only provide decision-1 and decision-3, missing decision-2 - availableDecision := NewDecision("decision-1"). - WithUID("uid-1"). - WithTargetHost("host-1"). - WithCreationTimestamp(time.Now().Add(-2 * time.Hour)). - Build() - - explainer := SetupExplainerTest(t, decision, availableDecision) - explanation, err := explainer.Explain(context.Background(), decision) - - if err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - // Should contain context with full history count - if !contains(explanation, "Decision #4 for this nova server") { - t.Errorf("Expected explanation to contain context, but got: %s", explanation) - } - - // Chain analysis should work with available decisions - if !contains(explanation, "Chain:") { - t.Errorf("Expected explanation to contain chain analysis, but got: %s", explanation) - } -} - -// Helper functions -func stringPtr(s string) *string { - return &s -} - -func contains(s, substr string) bool { - return len(s) >= len(substr) && (s == substr || substr == "" || findInString(s, substr)) -} - -func findInString(s, substr string) bool { - for i := 0; i <= len(s)-len(substr); i++ { - if s[i:i+len(substr)] == substr { - return true - } - } - return false -} - -// Generic Decision Helpers - Composable functions with smart defaults -func NewTestDecision(name string) *v1alpha1.Decision { - return &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: "default", // Sensible default - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, // Most common - ResourceID: "test-resource", // Generic default - }, - Status: v1alpha1.DecisionStatus{}, - } -} - -func WithTargetHost(decision *v1alpha1.Decision, host string) *v1alpha1.Decision { - if decision.Status.Result == nil { - decision.Status.Result = &v1alpha1.DecisionResult{} - } - decision.Status.Result.TargetHost = &host - return decision -} - -func WithInputWeights(decision *v1alpha1.Decision, weights map[string]float64) *v1alpha1.Decision { - if decision.Status.Result == nil { - decision.Status.Result = &v1alpha1.DecisionResult{} - } - decision.Status.Result.RawInWeights = weights - return decision -} - -func WithOutputWeights(decision *v1alpha1.Decision, weights map[string]float64) *v1alpha1.Decision { - if decision.Status.Result == nil { - decision.Status.Result = &v1alpha1.DecisionResult{} - } - decision.Status.Result.AggregatedOutWeights = weights - - // Auto-generate ordered hosts from weights - hosts := make([]string, 0, len(weights)) - for host := range weights { - hosts = append(hosts, host) - } - sort.Slice(hosts, func(i, j int) bool { - return weights[hosts[i]] > weights[hosts[j]] - }) - decision.Status.Result.OrderedHosts = hosts - - return decision -} - -func WithSteps(decision *v1alpha1.Decision, steps ...v1alpha1.StepResult) *v1alpha1.Decision { - if decision.Status.Result == nil { - decision.Status.Result = &v1alpha1.DecisionResult{} - } - decision.Status.Result.StepResults = steps - return decision -} - -func WithSchedulingDomain(decision *v1alpha1.Decision, schedulingDomain v1alpha1.SchedulingDomain) *v1alpha1.Decision { - decision.Spec.SchedulingDomain = schedulingDomain - return decision -} - -func WithResourceID(decision *v1alpha1.Decision, resourceID string) *v1alpha1.Decision { - decision.Spec.ResourceID = resourceID - return decision -} - -func WithUID(decision *v1alpha1.Decision, uid string) *v1alpha1.Decision { - decision.UID = types.UID(uid) - return decision -} - -func WithHistory(decision *v1alpha1.Decision, refs []corev1.ObjectReference) *v1alpha1.Decision { - decision.Status.History = &refs - return decision -} - -// Helper to create a decision with history reference to another decision -func WithHistoryRef(decision, historyDecision *v1alpha1.Decision) *v1alpha1.Decision { - refs := []corev1.ObjectReference{ - { - Kind: "Decision", - Namespace: historyDecision.Namespace, - Name: historyDecision.Name, - UID: historyDecision.UID, - }, - } - decision.Status.History = &refs - return decision -} - -// Generic step creator -func Step(name string, activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: name, - Activations: activations, - } -} - -// Common step names as constants -const ( - AvailabilityFilter = "availability-filter" - ResourceWeigher = "resource-weigher" - PlacementPolicy = "placement-policy" -) - -// Decision Builder Pattern - Fluent interface for creating test decisions -type DecisionBuilder struct { - decision *v1alpha1.Decision -} - -func NewDecision(name string) *DecisionBuilder { - return &DecisionBuilder{ - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{}, - }, - } -} - -func (b *DecisionBuilder) WithResourceID(resourceID string) *DecisionBuilder { - b.decision.Spec.ResourceID = resourceID - return b -} - -func (b *DecisionBuilder) WithSchedulingDomain(schedulingDomain v1alpha1.SchedulingDomain) *DecisionBuilder { - b.decision.Spec.SchedulingDomain = schedulingDomain - return b -} - -func (b *DecisionBuilder) WithTargetHost(host string) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.TargetHost = stringPtr(host) - return b -} - -func (b *DecisionBuilder) WithRawInputWeights(weights map[string]float64) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.RawInWeights = weights - return b -} - -func (b *DecisionBuilder) WithNormalizedInputWeights(weights map[string]float64) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.NormalizedInWeights = weights - return b -} - -func (b *DecisionBuilder) WithAggregatedOutputWeights(weights map[string]float64) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.AggregatedOutWeights = weights - return b -} - -func (b *DecisionBuilder) WithOrderedHosts(hosts []string) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.OrderedHosts = hosts - return b -} - -func (b *DecisionBuilder) WithSteps(steps ...v1alpha1.StepResult) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.StepResults = steps - return b -} - -func (b *DecisionBuilder) WithHistory(refs []corev1.ObjectReference) *DecisionBuilder { - b.decision.Status.History = &refs - return b -} - -func (b *DecisionBuilder) WithHistoryDecisions(decisions ...*v1alpha1.Decision) *DecisionBuilder { - refs := make([]corev1.ObjectReference, len(decisions)) - for i, decision := range decisions { - refs[i] = corev1.ObjectReference{ - Kind: "Decision", - Namespace: decision.Namespace, - Name: decision.Name, - UID: decision.UID, - } - } - b.decision.Status.History = &refs - return b -} - -func (b *DecisionBuilder) WithPrecedence(precedence int) *DecisionBuilder { - b.decision.Status.Precedence = intPtr(precedence) - return b -} - -func (b *DecisionBuilder) WithUID(uid string) *DecisionBuilder { - b.decision.UID = types.UID(uid) - return b -} - -func (b *DecisionBuilder) WithCreationTimestamp(timestamp time.Time) *DecisionBuilder { - b.decision.CreationTimestamp = metav1.Time{Time: timestamp} - return b -} - -func (b *DecisionBuilder) Build() *v1alpha1.Decision { - return b.decision -} - -// Pre-built scenario helpers for common test patterns -func DecisionWithScoring(name, winner string, scores map[string]float64) *DecisionBuilder { - orderedHosts := make([]string, 0, len(scores)) - for host := range scores { - orderedHosts = append(orderedHosts, host) - } - // Sort by score descending - sort.Slice(orderedHosts, func(i, j int) bool { - return scores[orderedHosts[i]] > scores[orderedHosts[j]] - }) - - return NewDecision(name). - WithTargetHost(winner). - WithAggregatedOutputWeights(scores). - WithOrderedHosts(orderedHosts) -} - -func DecisionWithInputComparison(name, winner string, inputWeights, finalWeights map[string]float64) *DecisionBuilder { - return NewDecision(name). - WithTargetHost(winner). - WithRawInputWeights(inputWeights). - WithAggregatedOutputWeights(finalWeights) -} - -func DecisionWithCriticalSteps(name, winner string, inputWeights map[string]float64, steps ...v1alpha1.StepResult) *DecisionBuilder { - return NewDecision(name). - WithTargetHost(winner). - WithRawInputWeights(inputWeights). - WithSteps(steps...) -} - -func DecisionWithHistory(name, winner string) *DecisionBuilder { - return NewDecision(name). - WithTargetHost(winner) -} - -// Step result builders for common pipeline steps -func ResourceWeigherStep(activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: "resource-weigher", - Activations: activations, - } -} - -func AvailabilityFilterStep(activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: "availability-filter", - Activations: activations, - } -} - -func PlacementPolicyStep(activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: "placement-policy", - Activations: activations, - } -} - -func WeigherStep(name string, activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: name, - Activations: activations, - } -} - -// Test execution helpers -func SetupExplainerTest(t *testing.T, decisions ...*v1alpha1.Decision) *Explainer { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - objects := make([]runtime.Object, len(decisions)) - for i, decision := range decisions { - objects[i] = decision - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Fatalf("Failed to create explainer: %v", err) - } - return explainer -} - -func RunExplanationTest(t *testing.T, decision *v1alpha1.Decision, expectedContains []string) { - explainer := SetupExplainerTest(t, decision) - explanation, err := explainer.Explain(context.Background(), decision) - AssertNoError(t, err) - AssertExplanationContains(t, explanation, expectedContains...) -} - -func RunExplanationTestWithHistory(t *testing.T, decision *v1alpha1.Decision, historyDecisions []*v1alpha1.Decision, expectedContains []string) { - allDecisions := make([]*v1alpha1.Decision, len(historyDecisions)+1) - copy(allDecisions, historyDecisions) - allDecisions[len(historyDecisions)] = decision - explainer := SetupExplainerTest(t, allDecisions...) - explanation, err := explainer.Explain(context.Background(), decision) - AssertNoError(t, err) - AssertExplanationContains(t, explanation, expectedContains...) -} - -func AssertNoError(t *testing.T, err error) { - if err != nil { - t.Errorf("Expected no error but got: %v", err) - } -} - -func AssertExplanationContains(t *testing.T, explanation string, expected ...string) { - for _, exp := range expected { - if !contains(explanation, exp) { - t.Errorf("Expected explanation to contain '%s', but got: %s", exp, explanation) - } - } -} - -func AssertExplanationNotContains(t *testing.T, explanation string, notExpected ...string) { - for _, notExp := range notExpected { - if contains(explanation, notExp) { - t.Errorf("Expected explanation to NOT contain '%s', but got: %s", notExp, explanation) - } - } -} - -func TestExplainer_WinnerAnalysis(t *testing.T) { - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "winner analysis with score gap", - decision: DecisionWithScoring("test-decision", "host-1", - map[string]float64{"host-1": 2.45, "host-2": 2.10, "host-3": 1.85}). - Build(), - expectedContains: []string{ - "Selected: host-1 (score: 2.45)", - "gap to 2nd: 0.35", - "3 hosts evaluated", - }, - }, - { - name: "winner analysis with single host", - decision: DecisionWithScoring("test-decision", "host-1", - map[string]float64{"host-1": 2.45}). - Build(), - expectedContains: []string{ - "Selected: host-1 (score: 2.45)", - "1 host evaluated", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - RunExplanationTest(t, tt.decision, tt.expectedContains) - }) - } -} - -func TestExplainer_InputComparison(t *testing.T) { - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "input choice confirmed", - decision: DecisionWithInputComparison("test-decision", "host-1", - map[string]float64{"host-1": 1.20, "host-2": 1.10, "host-3": 0.95}, - map[string]float64{"host-1": 2.45, "host-2": 2.10, "host-3": 1.85}). - Build(), - expectedContains: []string{ - "Input choice confirmed: host-1 (1.20→2.45)", - }, - }, - { - name: "input choice overridden", - decision: DecisionWithInputComparison("test-decision", "host-2", - map[string]float64{"host-1": 1.50, "host-2": 1.20, "host-3": 0.95}, - map[string]float64{"host-1": 1.85, "host-2": 2.45, "host-3": 2.10}). - Build(), - expectedContains: []string{ - "Input favored host-1 (1.50), final winner: host-2 (1.20→2.45)", - }, - }, - { - name: "raw weights preferred over normalized", - decision: NewDecision("test-decision"). - WithTargetHost("host-1"). - WithRawInputWeights(map[string]float64{"host-1": 100.0, "host-2": 90.0}). - WithNormalizedInputWeights(map[string]float64{"host-1": 1.0, "host-2": 0.9}). - WithAggregatedOutputWeights(map[string]float64{"host-1": 2.45, "host-2": 2.10}). - Build(), - expectedContains: []string{ - "Input choice confirmed: host-1 (100.00→2.45)", // Should now use raw weights (100.00) - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - RunExplanationTest(t, tt.decision, tt.expectedContains) - }) - } -} - -func TestExplainer_CriticalStepsAnalysis(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "single critical step", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 1.5, "host-2": 0.2}), - Step("availability-filter", map[string]float64{"host-1": 0.0, "host-2": 0.0})), - expectedContains: []string{ - "Decision driven by 1/2 pipeline step: resource-weigher", - }, - }, - { - name: "multiple critical steps", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 3.0}), - Step("resource-weigher", map[string]float64{"host-1": 1.0, "host-2": -0.5}), - Step("availability-filter", map[string]float64{"host-1": 1.0, "host-2": 0.0}), - Step("placement-policy", map[string]float64{"host-1": 0.05, "host-2": 0.05})), - expectedContains: []string{ - "Decision driven by 2/3 pipeline steps: resource-weigher, availability-filter", - }, - }, - { - name: "all steps non-critical", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 3.0, "host-2": 1.0}), - Step("step-1", map[string]float64{"host-1": 0.05, "host-2": 0.05}), - Step("step-2", map[string]float64{"host-1": 0.02, "host-2": 0.02})), - expectedContains: []string{ - "Decision driven by input only (all 2 steps are non-critical)", - }, - }, - { - name: "all steps critical", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 3.0}), - Step("step-1", map[string]float64{"host-1": 1.0, "host-2": -0.5}), - Step("step-2", map[string]float64{"host-1": 1.0, "host-2": 0.0})), - expectedContains: []string{ - "Decision requires all 2 pipeline steps", - }, - }, - { - name: "three critical steps formatting", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 4.0}), - Step("step-a", map[string]float64{"host-1": 1.0, "host-2": -0.5}), - Step("step-b", map[string]float64{"host-1": 1.0, "host-2": 0.0}), - Step("step-c", map[string]float64{"host-1": 1.0, "host-2": 0.0}), - Step("step-d", map[string]float64{"host-1": 0.05, "host-2": 0.05})), - expectedContains: []string{ - "Decision driven by 3/4 pipeline steps: step-a, step-b, step-c", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(tt.decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Failed to create explainer: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), tt.decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - }) - } -} - -func TestExplainer_CompleteExplanation(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - previousDecision := WithUID(WithTargetHost(NewTestDecision("test-decision-1"), "host-1"), "test-uid-1") - - decision := WithSteps( - WithOutputWeights( - WithInputWeights( - WithHistoryRef( - WithTargetHost(NewTestDecision("test-decision-2"), "host-2"), - previousDecision), - map[string]float64{"host-1": 1.50, "host-2": 1.20, "host-3": 0.95}), - map[string]float64{"host-1": 1.85, "host-2": 2.45, "host-3": 2.10}), - Step("resource-weigher", map[string]float64{"host-1": 0.15, "host-2": 0.85, "host-3": 0.75}), - Step("availability-filter", map[string]float64{"host-1": 0.20, "host-2": 0.40, "host-3": 0.40})) - - // Set precedence manually since it's not commonly used - decision.Status.Precedence = intPtr(1) - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(decision, previousDecision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Failed to create explainer: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - expectedParts := []string{ - "Decision #2 for this nova server", - "Previous target host was 'host-1', now it's 'host-2'", - "Selected: host-2 (score: 2.45), gap to 2nd: 0.35, 3 hosts evaluated", - "Input favored host-1 (1.50), final winner: host-2 (1.20→2.45)", - "Decision driven by 1/2 pipeline step: resource-weigher", - } - - for _, expected := range expectedParts { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } -} - -func TestExplainer_DeletedHostsAnalysis(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "single host filtered by single step", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("availability-filter", map[string]float64{"host-1": 0.5})), - expectedContains: []string{ - "1 host filtered:", - "- host-2 (input choice) by availability-filter", - }, - }, - { - name: "multiple hosts filtered", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 3.0, "host-2": 2.0, "host-3": 1.0}), - Step("availability-filter", map[string]float64{"host-1": 0.5})), - expectedContains: []string{ - "2 hosts filtered", - }, - }, - { - name: "multiple hosts filtered including input winner", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 3.0, "host-3": 2.0}), - Step("availability-filter", map[string]float64{"host-1": 0.5})), - expectedContains: []string{ - "2 hosts filtered:", - "- host-2 (input choice) by availability-filter", - "- host-3 by availability-filter", - }, - }, - { - name: "no hosts filtered", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 0.5, "host-2": 0.3})), - expectedContains: []string{}, // No deleted hosts analysis should be present - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(tt.decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Failed to create explainer: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), tt.decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - - // For the "no hosts filtered" case, ensure no deleted hosts analysis is present - if len(tt.expectedContains) == 0 { - deletedHostsKeywords := []string{"filtered", "Input winner", "hosts filtered"} - for _, keyword := range deletedHostsKeywords { - if contains(explanation, keyword) { - t.Errorf("Expected explanation to NOT contain '%s' for no deleted hosts case, but got: %s", keyword, explanation) - } - } - } - }) - } -} - -func TestExplainer_GlobalChainAnalysis(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - baseTime := metav1.Now() - time1 := metav1.Time{Time: baseTime.Add(-120 * time.Minute)} // 2 hours ago - time2 := metav1.Time{Time: baseTime.Add(-60 * time.Minute)} // 1 hour ago - time3 := metav1.Time{Time: baseTime.Time} // now - - tests := []struct { - name string - currentDecision *v1alpha1.Decision - historyDecisions []v1alpha1.Decision - expectedContains []string - expectedNotContain []string - }{ - { - name: "simple chain with durations", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-3", - Namespace: "default", - CreationTimestamp: time3, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: &[]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - {Kind: "Decision", Namespace: "default", Name: "decision-2", UID: "uid-2"}, - }, - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-3"), - }, - }, - }, - historyDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - UID: "uid-1", - CreationTimestamp: time1, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-2", - Namespace: "default", - UID: "uid-2", - CreationTimestamp: time2, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - }, - }, - }, - }, - expectedContains: []string{ - "Chain: host-1 (1h0m0s) -> host-2 (1h0m0s) -> host-3 (0s).", - }, - }, - { - name: "chain with loop detection", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-3", - Namespace: "default", - CreationTimestamp: time3, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: &[]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - {Kind: "Decision", Namespace: "default", Name: "decision-2", UID: "uid-2"}, - }, - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), // Back to host-1 - creates loop - }, - }, - }, - historyDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - UID: "uid-1", - CreationTimestamp: time1, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-2", - Namespace: "default", - UID: "uid-2", - CreationTimestamp: time2, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - }, - }, - }, - }, - expectedContains: []string{ - "Chain (loop detected): host-1 (1h0m0s) -> host-2 (1h0m0s) -> host-1 (0s).", - }, - }, - { - name: "chain with multiple decisions on same host", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-4", - Namespace: "default", - CreationTimestamp: time3, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: &[]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - {Kind: "Decision", Namespace: "default", Name: "decision-2", UID: "uid-2"}, - {Kind: "Decision", Namespace: "default", Name: "decision-3", UID: "uid-3"}, - }, - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - }, - }, - }, - historyDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - UID: "uid-1", - CreationTimestamp: time1, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-2", - Namespace: "default", - UID: "uid-2", - CreationTimestamp: time1, // Same time as decision-1 - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), // Same host as decision-1 - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-3", - Namespace: "default", - UID: "uid-3", - CreationTimestamp: time2, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), // Still same host - }, - }, - }, - }, - expectedContains: []string{ - "Chain: host-1 (2h0m0s; 3 decisions) -> host-2 (0s).", - }, - }, - { - name: "chain with multi-day duration", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: baseTime.Time}, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: &[]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - }, - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - }, - }, - }, - historyDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - UID: "uid-1", - CreationTimestamp: metav1.Time{Time: baseTime.Add(-72 * time.Hour)}, // 3 days ago - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - }, - expectedContains: []string{ - "Chain: host-1 (3d0h0m0s) -> host-2 (0s).", - }, - }, - { - name: "no chain for initial decision", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: nil, // No history - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - historyDecisions: []v1alpha1.Decision{}, - expectedNotContain: []string{ - "Chain:", - "chain:", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []runtime.Object{tt.currentDecision} - for i := range tt.historyDecisions { - objects = append(objects, &tt.historyDecisions[i]) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), tt.currentDecision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - - for _, notExpected := range tt.expectedNotContain { - if contains(explanation, notExpected) { - t.Errorf("Expected explanation to NOT contain '%s', but got: %s", notExpected, explanation) - } - } - }) - } -} - -func intPtr(i int) *int { - return &i -} - -func TestExplainer_RawWeightsPriorityBugFix(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - description string - }{ - { - name: "raw_weights_preserve_small_differences", - decision: func() *v1alpha1.Decision { - decision := WithOutputWeights( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-2"), - map[string]float64{"host-1": 1000.05, "host-2": 1000.10, "host-3": 1000.00}), - map[string]float64{"host-1": 1001.05, "host-2": 1002.10, "host-3": 1001.00}) - // Add normalized weights to show they would mask the difference - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.0, "host-2": 1.0, "host-3": 1.0} - return decision - }(), - expectedContains: []string{ - "Input choice confirmed: host-2 (1000.10→1002.10)", // Should use raw weights (1000.10) - }, - description: "Raw weights preserve small differences that normalized weights would mask", - }, - { - name: "raw_weights_detect_correct_input_winner", - decision: func() *v1alpha1.Decision { - decision := WithOutputWeights( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-3"), - map[string]float64{"host-1": 2000.15, "host-2": 2000.10, "host-3": 2000.05}), - map[string]float64{"host-1": 2001.15, "host-2": 2001.10, "host-3": 2002.05}) - // Add normalized weights to show they would mask the difference - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.0, "host-2": 1.0, "host-3": 1.0} - return decision - }(), - expectedContains: []string{ - "Input favored host-1 (2000.15), final winner: host-3 (2000.05→2002.05)", // Should detect host-1 as input winner using raw weights - }, - description: "Raw weights correctly identify input winner that normalized weights would miss", - }, - { - name: "critical_steps_analysis_uses_raw_weights", - decision: func() *v1alpha1.Decision { - decision := WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1000.05, "host-2": 1000.00}), - Step("resource-weigher", map[string]float64{"host-1": 0.5, "host-2": 0.0})) - // Add normalized weights to show they would mask the difference - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.0, "host-2": 1.0} - return decision - }(), - expectedContains: []string{ - "Decision driven by input only (all 1 step is non-critical)", // With small raw weight advantage, step is non-critical - "Input choice confirmed: host-1 (1000.05→0.00)", // Shows raw weights are being used - }, - description: "Critical steps analysis uses raw weights - with small raw advantage, step becomes non-critical", - }, - { - name: "deleted_hosts_analysis_uses_raw_weights", - decision: func() *v1alpha1.Decision { - decision := WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1000.00, "host-2": 1000.05, "host-3": 999.95}), - Step("availability-filter", map[string]float64{"host-1": 0.0})) - // Add normalized weights to show they would mask the difference - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.0, "host-2": 1.0, "host-3": 1.0} - return decision - }(), - expectedContains: []string{ - "2 hosts filtered:", - "- host-2 (input choice) by availability-filter", - "Input favored host-2 (1000.05), final winner: host-1 (1000.00→0.00)", - }, - description: "Deleted hosts analysis uses raw weights to correctly identify input winner", - }, - { - name: "fallback_to_normalized_when_no_raw_weights", - decision: func() *v1alpha1.Decision { - decision := WithOutputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 2.5, "host-2": 2.0, "host-3": 1.8}) - // Set normalized weights and clear raw weights to test fallback - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.5, "host-2": 1.0, "host-3": 0.8} - decision.Status.Result.RawInWeights = nil - return decision - }(), - expectedContains: []string{ - "Input choice confirmed: host-1 (1.50→2.50)", // Should use normalized weights as fallback - }, - description: "Should fall back to normalized weights when raw weights are not available", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(tt.decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Fatalf("Failed to create explainer: %v", err) - } - - explanation, err := explainer.Explain(context.Background(), tt.decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - }) - } -} - -// TestExplainer_RawVsNormalizedComparison demonstrates the impact of the bug fix -func TestExplainer_RawVsNormalizedComparison(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - RawInWeights: map[string]float64{ - "host-1": 1000.05, // Very small difference - "host-2": 1000.10, // Slightly higher - should be detected as input winner - "host-3": 1000.00, - }, - NormalizedInWeights: map[string]float64{ - "host-1": 1.0, // All normalized to same value - would mask the difference - "host-2": 1.0, - "host-3": 1.0, - }, - AggregatedOutWeights: map[string]float64{ - "host-1": 1001.05, - "host-2": 1002.10, // host-2 wins - "host-3": 1001.00, - }, - }, - }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - if !contains(explanation, "Input choice confirmed: host-2 (1000.10→1002.10)") { - t.Errorf("Expected explanation to show raw weight value (1000.10), but got: %s", explanation) - } - - if contains(explanation, "Input favored host-1") || contains(explanation, "Input favored host-3") { - t.Errorf("Expected explanation to NOT show input choice override, but got: %s", explanation) - } -} - -func TestExplainer_StepImpactAnalysis(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "step with positive impact", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 1.5, "host-2": 0.2})), - expectedContains: []string{ - "Step impacts:", - "resource-weigher +1.50", - }, - }, - { - name: "step with promotion to first", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 2.0, "host-2": 0.5})), - expectedContains: []string{ - "Step impacts:", - "resource-weigher +2.00→#1", - }, - }, - { - name: "step with competitor removal", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 2.0, "host-2": 1.0, "host-3": 0.5}), - Step("availability-filter", map[string]float64{"host-1": 0.0})), - expectedContains: []string{ - "Step impacts:", - "availability-filter +0.00 (removed 2)", - }, - }, - { - name: "multiple steps sorted by impact", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 1.5, "host-2": 0.2}), - Step("availability-filter", map[string]float64{"host-1": 0.1, "host-2": 0.0})), - expectedContains: []string{ - "Step impacts:", - "resource-weigher +1.50", - "availability-filter +0.10", - }, - }, - { - name: "no step impacts for decision without steps", - decision: WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 2.0, "host-2": 1.0}), - expectedContains: []string{}, // No step impacts should be present - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(tt.decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), tt.decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - - // For the "no step impacts" case, ensure no step impacts analysis is present - if len(tt.expectedContains) == 0 { - stepImpactsKeywords := []string{"Step impacts:", "→#1", "removed"} - for _, keyword := range stepImpactsKeywords { - if contains(explanation, keyword) { - t.Errorf("Expected explanation to NOT contain '%s' for no step impacts case, but got: %s", keyword, explanation) - } - } - } - }) - } -}*/ diff --git a/internal/scheduling/lib/explainer_types.go b/internal/scheduling/lib/explainer_types.go deleted file mode 100644 index bd760ffc6..000000000 --- a/internal/scheduling/lib/explainer_types.go +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package lib - -/*// ExplanationContext holds all data needed to render a complete explanation. -type ExplanationContext struct { - Context ContextData `json:"context"` - History *HistoryData `json:"history,omitempty"` - Winner *WinnerData `json:"winner,omitempty"` - Input *InputData `json:"input,omitempty"` - CriticalSteps *CriticalStepsData `json:"criticalSteps,omitempty"` - DeletedHosts *DeletedHostsData `json:"deletedHosts,omitempty"` - StepImpacts []StepImpact `json:"stepImpacts,omitempty"` - Chain *ChainData `json:"chain,omitempty"` -} - -type ContextData struct { - ResourceType string `json:"resourceType"` - DecisionNumber int `json:"decisionNumber"` - IsInitial bool `json:"isInitial"` -} - -// HistoryData contains information about the previous decision in the chain. -type HistoryData struct { - PreviousTarget string `json:"previousTarget"` - CurrentTarget string `json:"currentTarget"` -} - -type WinnerData struct { - HostName string `json:"hostName"` - Score float64 `json:"score"` - Gap float64 `json:"gap"` - HostsEvaluated int `json:"hostsEvaluated"` - HasGap bool `json:"hasGap"` -} - -// InputData contains information about input vs final winner comparison. -type InputData struct { - InputWinner string `json:"inputWinner"` - InputScore float64 `json:"inputScore"` - FinalWinner string `json:"finalWinner"` - FinalScore float64 `json:"finalScore"` - FinalInputScore float64 `json:"finalInputScore"` // Final winner's input score - InputConfirmed bool `json:"inputConfirmed"` -} - -// CriticalStepsData contains information about which pipeline steps were critical. -type CriticalStepsData struct { - Steps []string `json:"steps"` - TotalSteps int `json:"totalSteps"` - IsInputOnly bool `json:"isInputOnly"` - RequiresAll bool `json:"requiresAll"` -} - -// DeletedHostsData contains information about hosts that were filtered out. -type DeletedHostsData struct { - DeletedHosts []DeletedHostInfo `json:"deletedHosts"` -} - -// DeletedHostInfo contains details about a single deleted host. -type DeletedHostInfo struct { - Name string `json:"name"` - Steps []string `json:"steps"` - IsInputWinner bool `json:"isInputWinner"` -} - -// ChainData contains information about the decision chain over time. -type ChainData struct { - Segments []ChainSegment `json:"segments"` - HasLoop bool `json:"hasLoop"` -} - -// ChainSegment represents a period where the resource was on a specific host. -type ChainSegment struct { - Host string `json:"host"` - Duration time.Duration `json:"duration"` - // number of decisions with this as the target host - Decisions int `json:"decisions"` -} -*/ diff --git a/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go b/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go index 3114ca637..8ecd3d5af 100644 --- a/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go @@ -158,11 +158,8 @@ func TestFilterWeigherPipelineController_ProcessRequest(t *testing.T) { if tt.expectResult { if result == nil { t.Error("Expected result but got nil") - } else { - // Verify result has ordered hosts - if len(result.OrderedHosts) == 0 && len(tt.request.Hosts) > 0 { - t.Error("Expected ordered hosts in result") - } + } else if len(result.OrderedHosts) == 0 && len(tt.request.Hosts) > 0 { + t.Error("Expected ordered hosts in result") } } }) From 6074f1555b4ec8f8daeffc4f5dae703ba80bb9b2 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 20 Feb 2026 10:56:38 +0100 Subject: [PATCH 26/36] Update decision state KPI test cases and fix expected status in Cinder external scheduler tests --- .../plugins/deployment/decision_state_test.go | 15 --------------- .../cinder/external_scheduler_api_test.go | 4 ++-- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/internal/knowledge/kpis/plugins/deployment/decision_state_test.go b/internal/knowledge/kpis/plugins/deployment/decision_state_test.go index dc84a90af..b8a80622d 100644 --- a/internal/knowledge/kpis/plugins/deployment/decision_state_test.go +++ b/internal/knowledge/kpis/plugins/deployment/decision_state_test.go @@ -182,21 +182,6 @@ func TestDecisionStateKPI_Collect(t *testing.T) { expectedError: 2, expectedSuccess: 0, }, - { - name: "decision with no result", - decisions: []v1alpha1.Decision{ - { - ObjectMeta: v1.ObjectMeta{Name: "dec-no-result"}, - Spec: v1alpha1.DecisionSpec{SchedulingDomain: "test-operator"}, - Status: v1alpha1.DecisionStatus{}, - }, - }, - operator: "test-operator", - expectedCount: 3, - description: "should count decision with no result as success", - expectedError: 0, - expectedSuccess: 1, - }, { name: "error condition takes precedence", decisions: []v1alpha1.Decision{ diff --git a/internal/scheduling/cinder/external_scheduler_api_test.go b/internal/scheduling/cinder/external_scheduler_api_test.go index ff9b85ef4..8b6bd947d 100644 --- a/internal/scheduling/cinder/external_scheduler_api_test.go +++ b/internal/scheduling/cinder/external_scheduler_api_test.go @@ -237,7 +237,7 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { OrderedHosts: []string{}, }, nil }, - expectedStatus: http.StatusInternalServerError, + expectedStatus: http.StatusOK, }, } @@ -375,7 +375,7 @@ func TestHTTPAPI_CinderExternalScheduler_PipelineParameter(t *testing.T) { } // Verify the pipeline name was passed correctly - expectedPipeline := "cinder-external-scheduler" // Default pipeline from inferPipelineName + expectedPipeline := "test-pipeline" // Default pipeline from inferPipelineName if capturedPipeline != expectedPipeline { t.Errorf("Expected pipeline '%s', got '%s'", expectedPipeline, capturedPipeline) } From d0b2721f8b66dab2b5579b3d96bd78c51c86258d Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 20 Feb 2026 10:58:44 +0100 Subject: [PATCH 27/36] Update expected hosts in Nova external scheduler test case --- internal/scheduling/nova/external_scheduler_api_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/scheduling/nova/external_scheduler_api_test.go b/internal/scheduling/nova/external_scheduler_api_test.go index dccf6d095..73251ecdd 100644 --- a/internal/scheduling/nova/external_scheduler_api_test.go +++ b/internal/scheduling/nova/external_scheduler_api_test.go @@ -192,7 +192,7 @@ func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { }, nil }, expectedStatus: http.StatusOK, - expectedHosts: []string{"host1"}, + expectedHosts: []string{"host1", "host2"}, }, { name: "processing error", From 56c4d6916725127dd871b99558b3f57448ce47aa Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 20 Feb 2026 11:10:32 +0100 Subject: [PATCH 28/36] Refactor machine processing test cases to remove decision creation checks --- ...filter_weigher_pipeline_controller_test.go | 77 +------------------ 1 file changed, 3 insertions(+), 74 deletions(-) diff --git a/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go b/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go index fa73eab3a..303d65d32 100644 --- a/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go @@ -99,14 +99,12 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { machine *ironcorev1alpha1.Machine machinePools []ironcorev1alpha1.MachinePool pipelineConfig *v1alpha1.Pipeline - createDecisions bool expectError bool - expectDecisionCreated bool expectMachinePoolAssigned bool expectTargetHost string }{ { - name: "successful machine processing with decision creation", + name: "successful machine processing", machine: &ironcorev1alpha1.Machine{ ObjectMeta: metav1.ObjectMeta{ Name: "test-machine", @@ -131,14 +129,12 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainMachines, - CreateDecisions: true, + CreateDecisions: false, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, expectError: false, - expectDecisionCreated: true, expectMachinePoolAssigned: true, expectTargetHost: "pool1", }, @@ -170,9 +166,7 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: false, expectError: false, - expectDecisionCreated: false, expectMachinePoolAssigned: true, expectTargetHost: "pool1", }, @@ -190,7 +184,6 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { machinePools: []ironcorev1alpha1.MachinePool{}, pipelineConfig: nil, expectError: true, - expectDecisionCreated: false, expectMachinePoolAssigned: false, }, { @@ -217,9 +210,7 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, expectError: true, - expectDecisionCreated: true, // Decision is created but processing fails expectMachinePoolAssigned: false, }, } @@ -244,6 +235,7 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]]{ Pipelines: map[string]lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]{}, PipelineConfigs: map[string]v1alpha1.Pipeline{}, + DecisionQueue: make(chan lib.DecisionUpdate), }, Monitor: lib.FilterWeigherPipelineMonitor{}, } @@ -266,69 +258,6 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { return } - // Check if decision was created (if expected) - if tt.expectDecisionCreated { - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - found := false - for _, decision := range decisions.Items { - if decision.Spec.ResourceID == tt.machine.Name && - decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainMachines { - found = true - - // Verify decision properties - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainMachines { - t.Errorf("expected scheduling domain %q, got %q", v1alpha1.SchedulingDomainMachines, decision.Spec.SchedulingDomain) - } - if decision.Spec.ResourceID != tt.machine.Name { - t.Errorf("expected resource ID %q, got %q", tt.machine.Name, decision.Spec.ResourceID) - } - - // Check if result was set (only for successful cases) - if !tt.expectError && tt.expectTargetHost != "" { - entry := decision.Status.SchedulingHistory[len(decision.Status.SchedulingHistory)-1] - if entry.PipelineRef.Name != "machines-scheduler" { - t.Errorf("expected pipeline name %q in scheduling history, got %q", "machines-scheduler", entry.PipelineRef.Name) - } - if len(entry.OrderedHosts) == 0 { - t.Error("expected scheduling history entry to have a target host") - return - } - targetHost := entry.OrderedHosts[0] - if targetHost != tt.expectTargetHost { - t.Errorf("expected target host %q in scheduling history, got %q", tt.expectTargetHost, targetHost) - } - } - break - } - } - - if !found { - t.Error("expected decision to be created but was not found") - } - } else { - // Check that no decisions were created - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - for _, decision := range decisions.Items { - if decision.Spec.ResourceID == tt.machine.Name && - decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainMachines { - t.Error("expected no decision to be created but found one") - break - } - } - } - // Check if machine pool was assigned (if expected) if tt.expectMachinePoolAssigned { var updatedMachine ironcorev1alpha1.Machine From 1f27fd0fa3aa9f5a5dcde1e93913d12d71db8293 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 20 Feb 2026 11:16:25 +0100 Subject: [PATCH 29/36] Refactor test cases to remove decision creation checks in filter weigher pipeline controller tests --- ...filter_weigher_pipeline_controller_test.go | 32 ---- ...filter_weigher_pipeline_controller_test.go | 137 ++++-------------- ...filter_weigher_pipeline_controller_test.go | 129 +++-------------- 3 files changed, 45 insertions(+), 253 deletions(-) diff --git a/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go b/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go index 303d65d32..42d47a66e 100644 --- a/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go @@ -138,38 +138,6 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { expectMachinePoolAssigned: true, expectTargetHost: "pool1", }, - { - name: "successful machine processing without decision creation", - machine: &ironcorev1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-machine-no-decision", - Namespace: "default", - }, - Spec: ironcorev1alpha1.MachineSpec{ - Scheduler: "", - }, - }, - machinePools: []ironcorev1alpha1.MachinePool{ - { - ObjectMeta: metav1.ObjectMeta{Name: "pool1"}, - }, - }, - pipelineConfig: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machines-scheduler", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainMachines, - CreateDecisions: false, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: false, - expectMachinePoolAssigned: true, - expectTargetHost: "pool1", - }, { name: "pipeline not configured", machine: &ironcorev1alpha1.Machine{ diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go b/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go index 4e04841cd..8a8a6d3a3 100644 --- a/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go @@ -170,17 +170,15 @@ func TestFilterWeigherPipelineController_ProcessRequest(t *testing.T) { } tests := []struct { - name string - request api.ExternalSchedulerRequest - pipeline *v1alpha1.Pipeline - pipelineConf *v1alpha1.Pipeline - setupPipelineConfigs bool - createDecisions bool - expectError bool - expectResult bool - expectCreatedDecision bool - expectUpdatedStatus bool - errorContains string + name string + request api.ExternalSchedulerRequest + pipeline *v1alpha1.Pipeline + pipelineConf *v1alpha1.Pipeline + setupPipelineConfigs bool + expectError bool + expectResult bool + expectUpdatedStatus bool + errorContains string }{ { name: "successful processing with decision creation enabled", @@ -209,82 +207,21 @@ func TestFilterWeigherPipelineController_ProcessRequest(t *testing.T) { Weighers: []v1alpha1.WeigherSpec{}, }, }, - setupPipelineConfigs: true, - createDecisions: true, - expectError: false, - expectResult: true, - expectCreatedDecision: true, - expectUpdatedStatus: true, + setupPipelineConfigs: true, + expectError: false, + expectResult: true, + expectUpdatedStatus: true, }, { - name: "successful processing with decision creation disabled", - request: novaRequest, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline-no-create", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: false, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - pipelineConf: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline-no-create", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: false, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - setupPipelineConfigs: true, - createDecisions: false, - expectError: false, - expectResult: true, - expectCreatedDecision: false, - expectUpdatedStatus: false, - }, - { - name: "pipeline not configured", - request: novaRequest, - pipeline: nil, - pipelineConf: nil, - setupPipelineConfigs: false, - expectError: true, - expectResult: false, - expectCreatedDecision: false, - expectUpdatedStatus: false, - errorContains: "pipeline nonexistent-pipeline not configured", - }, - { - name: "processing fails after decision creation", - request: novaRequest, - pipeline: nil, // This will cause processing to fail after creation - pipelineConf: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: true, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - setupPipelineConfigs: true, - createDecisions: true, - expectError: true, - expectResult: false, - expectCreatedDecision: true, - expectUpdatedStatus: false, - errorContains: "pipeline not found or not ready", + name: "pipeline not configured", + request: novaRequest, + pipeline: nil, + pipelineConf: nil, + setupPipelineConfigs: false, + expectError: true, + expectResult: false, + expectUpdatedStatus: false, + errorContains: "pipeline test-pipeline not found or not ready", }, { name: "pipeline not found in runtime map", @@ -302,13 +239,11 @@ func TestFilterWeigherPipelineController_ProcessRequest(t *testing.T) { Weighers: []v1alpha1.WeigherSpec{}, }, }, - setupPipelineConfigs: true, - createDecisions: true, - expectError: true, - expectResult: false, - expectCreatedDecision: true, - expectUpdatedStatus: false, - errorContains: "pipeline not found or not ready", + setupPipelineConfigs: true, + expectError: true, + expectResult: false, + expectUpdatedStatus: false, + errorContains: "pipeline not found or not ready", }, } @@ -330,6 +265,7 @@ func TestFilterWeigherPipelineController_ProcessRequest(t *testing.T) { Client: client, Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), PipelineConfigs: make(map[string]v1alpha1.Pipeline), + DecisionQueue: make(chan lib.DecisionUpdate, 100), }, Monitor: lib.FilterWeigherPipelineMonitor{}, } @@ -367,23 +303,6 @@ func TestFilterWeigherPipelineController_ProcessRequest(t *testing.T) { t.Errorf("Expected error to contain %q, got: %v", tt.errorContains, err) } - // Check if decision was created in the cluster when expected - /* TODO CHECK IF DECISION WAS CREATED if tt.expectCreatedDecision { - var createdDecision v1alpha1.Decision - key := types.NamespacedName{Name: tt.decision.Name, Namespace: tt.decision.Namespace} - err := client.Get(context.Background(), key, &createdDecision) - if err != nil { - t.Errorf("Expected decision to be created but got error: %v", err) - } - } else { - var createdDecision v1alpha1.Decision - key := types.NamespacedName{Name: tt.decision.Name, Namespace: tt.decision.Namespace} - err := client.Get(context.Background(), key, &createdDecision) - if err == nil { - t.Error("Expected decision not to be created but it was found") - } - }*/ - // Validate result and duration expectations if tt.expectResult && result == nil { t.Error("Expected result to be set but was nil") diff --git a/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go b/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go index d648b3857..9382b0534 100644 --- a/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go @@ -98,18 +98,16 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { } tests := []struct { - name string - pod *corev1.Pod - nodes []corev1.Node - pipelineConfig *v1alpha1.Pipeline - createDecisions bool - expectError bool - expectDecisionCreated bool - expectNodeAssigned bool - expectTargetHost string + name string + pod *corev1.Pod + nodes []corev1.Node + pipelineConfig *v1alpha1.Pipeline + expectError bool + expectNodeAssigned bool + expectTargetHost string }{ { - name: "successful pod processing with decision creation", + name: "successful pod processing", pod: &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pod", @@ -127,40 +125,6 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "node2"}, }, }, - pipelineConfig: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "pods-scheduler", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainPods, - CreateDecisions: true, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - createDecisions: true, - expectError: false, - expectDecisionCreated: true, - expectNodeAssigned: true, - expectTargetHost: "node1", - }, - { - name: "successful pod processing without decision creation", - pod: &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pod-no-decision", - Namespace: "default", - }, - Spec: corev1.PodSpec{ - SchedulerName: "", - }, - }, - nodes: []corev1.Node{ - { - ObjectMeta: metav1.ObjectMeta{Name: "node1"}, - }, - }, pipelineConfig: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "pods-scheduler", @@ -173,11 +137,9 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: false, - expectError: false, - expectDecisionCreated: false, - expectNodeAssigned: true, - expectTargetHost: "node1", + expectError: false, + expectNodeAssigned: true, + expectTargetHost: "node1", }, { name: "pipeline not configured", @@ -190,11 +152,10 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { SchedulerName: "", }, }, - nodes: []corev1.Node{}, - pipelineConfig: nil, - expectError: true, - expectDecisionCreated: false, - expectNodeAssigned: false, + nodes: []corev1.Node{}, + pipelineConfig: nil, + expectError: true, + expectNodeAssigned: false, }, { name: "no nodes available", @@ -220,10 +181,8 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: true, - expectDecisionCreated: true, // Decision is created but processing fails - expectNodeAssigned: false, + expectError: true, + expectNodeAssigned: false, }, } @@ -269,60 +228,6 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { return } - // Check if decision was created (if expected) - if tt.expectDecisionCreated { - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - found := false - for _, decision := range decisions.Items { - if decision.Spec.ResourceID == tt.pod.Name && - decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainPods { - found = true - - // Verify decision properties - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods { - t.Errorf("expected scheduling domain %q, got %q", v1alpha1.SchedulingDomainPods, decision.Spec.SchedulingDomain) - } - if decision.Spec.ResourceID != tt.pod.Name { - t.Errorf("expected resource ID %q, got %q", tt.pod.Name, decision.Spec.ResourceID) - } - - // Check if result was set (only for successful cases) - if !tt.expectError && tt.expectTargetHost != "" { - if decision.Status.TargetHost != tt.expectTargetHost { - t.Errorf("expected target host %q, got %q", tt.expectTargetHost, decision.Status.TargetHost) - } - } - break - } - } - - if !found { - t.Error("expected decision to be created but was not found") - } - } else { - // Check that no decisions were created - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - for _, decision := range decisions.Items { - if decision.Spec.ResourceID == tt.pod.Name && - decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainPods { - t.Error("expected no decision to be created but found one") - break - } - } - } - // Check if node was assigned (if expected) if tt.expectNodeAssigned { var binding corev1.Binding From 9b344e95929be19ce8aa71fa348610ab5ace78fb Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Mon, 23 Feb 2026 08:35:08 +0100 Subject: [PATCH 30/36] Rename pipeline controller to reflect descheduler functionality --- internal/scheduling/nova/detector_pipeline_controller.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/scheduling/nova/detector_pipeline_controller.go b/internal/scheduling/nova/detector_pipeline_controller.go index 65b92c38d..7920260a7 100644 --- a/internal/scheduling/nova/detector_pipeline_controller.go +++ b/internal/scheduling/nova/detector_pipeline_controller.go @@ -150,7 +150,7 @@ func (c *DetectorPipelineController) SetupWithManager(mgr ctrl.Manager, mcl *mul }), ). // Watch hypervisor changes so the cache gets updated. - Named("cortex-nova-pipelines"). + Named("cortex-nova-descheduler"). For( &v1alpha1.Pipeline{}, builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { From 3742f4143ef17a43831fe4e719a39410cf8e33df Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Tue, 24 Feb 2026 10:03:36 +0100 Subject: [PATCH 31/36] Rename reason to intent --- api/v1alpha1/decision_types.go | 32 +++++++++---------- .../files/crds/cortex.cloud_decisions.yaml | 10 +++--- .../filter_weigher_pipeline_controller.go | 2 +- .../scheduling/lib/pipeline_controller.go | 4 +-- .../filter_weigher_pipeline_controller.go | 2 +- .../filter_weigher_pipeline_controller.go | 2 +- .../filter_weigher_pipeline_controller.go | 2 +- .../filter_weigher_pipeline_controller.go | 2 +- 8 files changed, 28 insertions(+), 28 deletions(-) diff --git a/api/v1alpha1/decision_types.go b/api/v1alpha1/decision_types.go index 1fdb62e77..9d6eeab5d 100644 --- a/api/v1alpha1/decision_types.go +++ b/api/v1alpha1/decision_types.go @@ -8,22 +8,22 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// SchedulingReasons represents the reason for a scheduling event. -type SchedulingReason string +// SchedulingIntents represents the Intent for a scheduling event. +type SchedulingIntent string const ( - // SchedulingReasonInitialPlacement indicates that this is the initial placement of a resource. - SchedulingReasonInitialPlacement SchedulingReason = "InitialPlacement" - // SchedulingReasonLiveMigration indicates that this scheduling event is triggered by a live migration operation. - SchedulingReasonLiveMigration SchedulingReason = "LiveMigration" - // SchedulingReasonResize indicates that this scheduling event is triggered by a resize operation. - SchedulingReasonResize SchedulingReason = "Resize" - // SchedulingReasonRebuild indicates that this scheduling event is triggered by a rebuild operation. - SchedulingReasonRebuild SchedulingReason = "Rebuild" - // SchedulingReasonEvacuate indicates that this scheduling event is triggered by an evacuate operation. - SchedulingReasonEvacuate SchedulingReason = "Evacuate" - // SchedulingReasonUnknown indicates that the reason for this scheduling event is unknown. - SchedulingReasonUnknown SchedulingReason = "Unknown" + // SchedulingIntentInitialPlacement indicates that this is the initial placement of a resource. + SchedulingIntentInitialPlacement SchedulingIntent = "InitialPlacement" + // SchedulingIntentLiveMigration indicates that this scheduling event is triggered by a live migration operation. + SchedulingIntentLiveMigration SchedulingIntent = "LiveMigration" + // SchedulingIntentResize indicates that this scheduling event is triggered by a resize operation. + SchedulingIntentResize SchedulingIntent = "Resize" + // SchedulingIntentRebuild indicates that this scheduling event is triggered by a rebuild operation. + SchedulingIntentRebuild SchedulingIntent = "Rebuild" + // SchedulingIntentEvacuate indicates that this scheduling event is triggered by an evacuate operation. + SchedulingIntentEvacuate SchedulingIntent = "Evacuate" + // SchedulingIntentUnknown indicates that the Intent for this scheduling event is unknown. + SchedulingIntentUnknown SchedulingIntent = "Unknown" ) // SchedulingHistoryEntry represents a single entry in the scheduling history of a resource. @@ -36,8 +36,8 @@ type SchedulingHistoryEntry struct { // This reference can be used to look up the pipeline definition and its // scheduler step configuration for additional context. PipelineRef corev1.ObjectReference `json:"pipelineRef"` - // The reason for this scheduling event. - Reason SchedulingReason `json:"reason"` + // The Intent for this scheduling event. + Intent SchedulingIntent `json:"intent"` } type DecisionSpec struct { diff --git a/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml b/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml index 906d36617..430ec9e7d 100644 --- a/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml +++ b/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml @@ -128,7 +128,7 @@ spec: type: array explanation: description: A human-readable explanation of the current scheduling - state. + decision. type: string schedulingHistory: description: The history of scheduling events for this resource. @@ -136,6 +136,9 @@ spec: description: SchedulingHistoryEntry represents a single entry in the scheduling history of a resource. properties: + intent: + description: The Intent for this scheduling event. + type: string orderedHosts: description: The hosts that were selected in this scheduling event, in order of preference. @@ -188,17 +191,14 @@ spec: type: string type: object x-kubernetes-map-type: atomic - reason: - description: The reason for this scheduling event. - type: string timestamp: description: Timestamp of when the scheduling event occurred. format: date-time type: string required: + - intent - orderedHosts - pipelineRef - - reason - timestamp type: object type: array diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go index 481fd6c3c..59dadecbe 100644 --- a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go @@ -81,7 +81,7 @@ func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, re ResourceID: request.Context.ResourceUUID, PipelineName: pipelineName, Result: result, - Reason: v1alpha1.SchedulingReasonUnknown, + Reason: v1alpha1.SchedulingIntentUnknown, } } return &result, nil diff --git a/internal/scheduling/lib/pipeline_controller.go b/internal/scheduling/lib/pipeline_controller.go index 381d1a0b7..154c402a1 100644 --- a/internal/scheduling/lib/pipeline_controller.go +++ b/internal/scheduling/lib/pipeline_controller.go @@ -41,7 +41,7 @@ type DecisionUpdate struct { ResourceID string PipelineName string Result FilterWeigherPipelineResult - Reason v1alpha1.SchedulingReason + Reason v1alpha1.SchedulingIntent } func (c *BasePipelineController[PipelineType]) StartExplainer(ctx context.Context) { @@ -104,7 +104,7 @@ func (c *BasePipelineController[PipelineType]) updateDecision(ctx context.Contex PipelineRef: corev1.ObjectReference{ Name: update.PipelineName, }, - Reason: update.Reason, + Intent: update.Reason, } // Check if scheduling failed (no hosts available) diff --git a/internal/scheduling/machines/filter_weigher_pipeline_controller.go b/internal/scheduling/machines/filter_weigher_pipeline_controller.go index 661c8e884..0dd7ed285 100644 --- a/internal/scheduling/machines/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/machines/filter_weigher_pipeline_controller.go @@ -118,7 +118,7 @@ func (c *FilterWeigherPipelineController) ProcessNewMachine(ctx context.Context, PipelineName: pipelineName, Result: result, // TODO: Refine the reason - Reason: v1alpha1.SchedulingReasonUnknown, + Reason: v1alpha1.SchedulingIntentUnknown, } } return nil diff --git a/internal/scheduling/manila/filter_weigher_pipeline_controller.go b/internal/scheduling/manila/filter_weigher_pipeline_controller.go index 0706038f6..feb0d7165 100644 --- a/internal/scheduling/manila/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/manila/filter_weigher_pipeline_controller.go @@ -80,7 +80,7 @@ func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, re ResourceID: "", PipelineName: pipelineName, Result: result, - Reason: v1alpha1.SchedulingReasonUnknown, + Reason: v1alpha1.SchedulingIntentUnknown, } } return &result, nil diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller.go b/internal/scheduling/nova/filter_weigher_pipeline_controller.go index ea4bf3b0e..cb130f9bf 100644 --- a/internal/scheduling/nova/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller.go @@ -93,7 +93,7 @@ func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, re ResourceID: request.Spec.Data.InstanceUUID, PipelineName: pipelineName, Result: result, - Reason: v1alpha1.SchedulingReasonUnknown, + Reason: v1alpha1.SchedulingIntentUnknown, } } return &result, nil diff --git a/internal/scheduling/pods/filter_weigher_pipeline_controller.go b/internal/scheduling/pods/filter_weigher_pipeline_controller.go index 5083a5062..85d228459 100644 --- a/internal/scheduling/pods/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/pods/filter_weigher_pipeline_controller.go @@ -123,7 +123,7 @@ func (c *FilterWeigherPipelineController) ProcessNewPod(ctx context.Context, pod PipelineName: pipelineName, Result: result, // TODO: Refine the reason - Reason: v1alpha1.SchedulingReasonUnknown, + Reason: v1alpha1.SchedulingIntentUnknown, } } return nil From 85dbe3f63cdf9b22aab6da585b50155921edae82 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Tue, 24 Feb 2026 10:05:13 +0100 Subject: [PATCH 32/36] Rename 'Reason' to 'Intent' in filter weigher pipeline controllers --- .../cinder/filter_weigher_pipeline_controller.go | 2 +- internal/scheduling/lib/pipeline_controller.go | 10 +++++----- .../machines/filter_weigher_pipeline_controller.go | 2 +- .../manila/filter_weigher_pipeline_controller.go | 2 +- .../nova/filter_weigher_pipeline_controller.go | 2 +- .../pods/filter_weigher_pipeline_controller.go | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go index 59dadecbe..affba0615 100644 --- a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go @@ -81,7 +81,7 @@ func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, re ResourceID: request.Context.ResourceUUID, PipelineName: pipelineName, Result: result, - Reason: v1alpha1.SchedulingIntentUnknown, + Intent: v1alpha1.SchedulingIntentUnknown, } } return &result, nil diff --git a/internal/scheduling/lib/pipeline_controller.go b/internal/scheduling/lib/pipeline_controller.go index 154c402a1..58a54e835 100644 --- a/internal/scheduling/lib/pipeline_controller.go +++ b/internal/scheduling/lib/pipeline_controller.go @@ -41,7 +41,7 @@ type DecisionUpdate struct { ResourceID string PipelineName string Result FilterWeigherPipelineResult - Reason v1alpha1.SchedulingIntent + Intent v1alpha1.SchedulingIntent } func (c *BasePipelineController[PipelineType]) StartExplainer(ctx context.Context) { @@ -104,7 +104,7 @@ func (c *BasePipelineController[PipelineType]) updateDecision(ctx context.Contex PipelineRef: corev1.ObjectReference{ Name: update.PipelineName, }, - Intent: update.Reason, + Intent: update.Intent, } // Check if scheduling failed (no hosts available) @@ -157,9 +157,9 @@ func (c *BasePipelineController[PipelineType]) updateDecision(ctx context.Contex log.Info("Published NoValidHosts event", "resourceID", update.ResourceID) } else { // Normal event for successful scheduling - reasonStr := string(update.Reason) - c.Recorder.Eventf(decision, nil, corev1.EventTypeNormal, reasonStr, "Scheduling", "Scheduled to %s. %s", decision.Status.TargetHost, explanationText) - log.Info("Published scheduling event", "resourceID", update.ResourceID, "targetHost", decision.Status.TargetHost, "reason", update.Reason) + intentStr := string(update.Intent) + c.Recorder.Eventf(decision, nil, corev1.EventTypeNormal, intentStr, "Scheduling", "Scheduled to %s. %s", decision.Status.TargetHost, explanationText) + log.Info("Published scheduling event", "resourceID", update.ResourceID, "targetHost", decision.Status.TargetHost, "reason", update.Intent) } } diff --git a/internal/scheduling/machines/filter_weigher_pipeline_controller.go b/internal/scheduling/machines/filter_weigher_pipeline_controller.go index 0dd7ed285..7fdfa896a 100644 --- a/internal/scheduling/machines/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/machines/filter_weigher_pipeline_controller.go @@ -118,7 +118,7 @@ func (c *FilterWeigherPipelineController) ProcessNewMachine(ctx context.Context, PipelineName: pipelineName, Result: result, // TODO: Refine the reason - Reason: v1alpha1.SchedulingIntentUnknown, + Intent: v1alpha1.SchedulingIntentUnknown, } } return nil diff --git a/internal/scheduling/manila/filter_weigher_pipeline_controller.go b/internal/scheduling/manila/filter_weigher_pipeline_controller.go index feb0d7165..ec106b636 100644 --- a/internal/scheduling/manila/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/manila/filter_weigher_pipeline_controller.go @@ -80,7 +80,7 @@ func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, re ResourceID: "", PipelineName: pipelineName, Result: result, - Reason: v1alpha1.SchedulingIntentUnknown, + Intent: v1alpha1.SchedulingIntentUnknown, } } return &result, nil diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller.go b/internal/scheduling/nova/filter_weigher_pipeline_controller.go index cb130f9bf..fd9e44d5e 100644 --- a/internal/scheduling/nova/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller.go @@ -93,7 +93,7 @@ func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, re ResourceID: request.Spec.Data.InstanceUUID, PipelineName: pipelineName, Result: result, - Reason: v1alpha1.SchedulingIntentUnknown, + Intent: v1alpha1.SchedulingIntentUnknown, } } return &result, nil diff --git a/internal/scheduling/pods/filter_weigher_pipeline_controller.go b/internal/scheduling/pods/filter_weigher_pipeline_controller.go index 85d228459..492c07a26 100644 --- a/internal/scheduling/pods/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/pods/filter_weigher_pipeline_controller.go @@ -123,7 +123,7 @@ func (c *FilterWeigherPipelineController) ProcessNewPod(ctx context.Context, pod PipelineName: pipelineName, Result: result, // TODO: Refine the reason - Reason: v1alpha1.SchedulingIntentUnknown, + Intent: v1alpha1.SchedulingIntentUnknown, } } return nil From 1fb1e8a5cebcc83fbecd6b3a2e094bccb5616beb Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 27 Feb 2026 08:07:42 +0100 Subject: [PATCH 33/36] Removed visualizer tool --- tools/visualizer/Dockerfile | 9 - tools/visualizer/app.yaml | 54 ---- tools/visualizer/favicon.ico | Bin 15406 -> 0 bytes tools/visualizer/nginx.conf | 18 -- tools/visualizer/nova.html | 503 ----------------------------------- tools/visualizer/role.yaml | 29 -- tools/visualizer/shared.css | 289 -------------------- 7 files changed, 902 deletions(-) delete mode 100644 tools/visualizer/Dockerfile delete mode 100644 tools/visualizer/app.yaml delete mode 100644 tools/visualizer/favicon.ico delete mode 100644 tools/visualizer/nginx.conf delete mode 100644 tools/visualizer/nova.html delete mode 100644 tools/visualizer/role.yaml delete mode 100644 tools/visualizer/shared.css diff --git a/tools/visualizer/Dockerfile b/tools/visualizer/Dockerfile deleted file mode 100644 index af7c859dd..000000000 --- a/tools/visualizer/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright SAP SE -# SPDX-License-Identifier: Apache-2.0 - -FROM nginx - -COPY nova.html /usr/share/nginx/html/nova.html -COPY shared.css /usr/share/nginx/html/shared.css -COPY favicon.ico /usr/share/nginx/html/favicon.ico -COPY nginx.conf /etc/nginx/conf.d/default.conf diff --git a/tools/visualizer/app.yaml b/tools/visualizer/app.yaml deleted file mode 100644 index 5697571e3..000000000 --- a/tools/visualizer/app.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright SAP SE -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: cortex-visualizer - labels: - app: cortex-visualizer -spec: - replicas: 1 - selector: - matchLabels: - app: cortex-visualizer - template: - metadata: - labels: - app: cortex-visualizer - spec: - serviceAccountName: cortex-visualizer - containers: - - name: cortex-visualizer - image: cortex-visualizer - ports: - - containerPort: 80 - - name: kubectl-proxy - image: alpine:latest - command: ["/bin/sh"] - args: - - -c - - | - apk add --no-cache curl - curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" - chmod +x kubectl - mv kubectl /usr/local/bin/ - kubectl proxy --port=8001 --address=0.0.0.0 --accept-hosts=.* - ports: - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: cortex-visualizer -spec: - selector: - app: cortex-visualizer - ports: - - name: http - port: 80 - targetPort: 80 - - name: kubectl-proxy - port: 8001 - targetPort: 8001 - type: ClusterIP \ No newline at end of file diff --git a/tools/visualizer/favicon.ico b/tools/visualizer/favicon.ico deleted file mode 100644 index b4f9d5fb8c202c1528e8458073ffeffdb530c143..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 15406 zcmeI21&~xn6UP^KcR%by3kgo}0)l&RmjVi>un^o`fkfyMDL@cXti${k>o3 zo2z$wyLar}?g2%n>g{{`-bhbRPxrsO$KeR)h~S7D*J0iL9Bm>x9Qhp%ho9d+|1-yP zIA&Y#a^?E}_y0H?|4rm@WVY&9O{^CGyti&Hi-i2M->UBS((t_#M&mf^#P%g~`i<)@#1(tb{!Jn3K0)dRd5 zJb17?d-hB})1K?psZ&DRUcGt=JaSiR*RCyN$Bqru9$J`3xpL*CUAuPfnoHZZZIwU8 zix-!L3m59xbLY;L>eZ{u*|TTm!Gj0Ewx{#kw{I(*)2C0D(W6I8-@bjNVZ(;ne{kbN zf77N-<^1{cI+szSMyU)OKYm=`Pf+a{W81cEa{c;sog=(M4*T`%r#!iL@187Lw8-uJ znPZCiS>a!P``eCY1y)+@*O$azI}U8O#*#; z^X84LUcFlP!@GCy0)4l=cw~F_^zorfrQpF!^o*Z%m9Q@Kle=OOw$)SOyjb78((&RT zA?=F&a`oy}ZT|T2V>xr?j6UDJdsmP>zQ20)O7FC-q>u%n>sj~X_-+#-a zN1ltq?%lhE{a(3pWtH241q(`(CQW4Ez=66C(V_YC=T|zaRjVdfu3V7;0|sdQ@4ov^ zX3m_cdbw=bvi@{?!Uyy^Ivt(+`|rQ&UPa#X<$ zI>Wj=dGbV%v7SA9O8@@-)gFX|$DY!S{9*T!Cr>W1W5rYW9?88fDoFJE3Ufv5eutAFjeLb|RxIUTu3Wi-#5OKFw_Y+nZPEM4)QJ-(0#tQ3{?@HqRZh~T zO{)!DCvoG(mFCTxD;;M1i4!ME=FFL8)v8rOea1&G;ZvYDLp#2i zG`N@b$d@l)fbsdm-bCKmBj{;+@z(h80b7Gz$8W;sp!>iD%a<<~bYiJer6f+AIEtsi znd}2^jnACG$>8jC>C&l8Llf(fEn7C})Txv5AD;+1vS-gO_#L4gpZTFnx^?R&DO09Y z`}+L(bHzO98e|>0Krfs1KX&Yx;_LnU_lJ6X>^yTpw}Np@LFX}^-R_w2O&#u`o&Wv& z_tkI2ceWSC$M#UrHTY7gQl$!{Cy|q&d>eb8_Gj=Hyxna1-L8rmAD@Q3$@=35+RGn* z{2};f_}-ot~Rv-*x#)k(}rc4RY{o%uhr9_DmDi_Xt3sCU`{(zlo z)~qSfqD7PMzyDr&O*8xWJX64Cty{NN`^TOC-YQ_q9654GiWDg{?t%Sh?p$E)apT76 ze3>`%_eH_3fiTiwM4zLfXGNvsg9qp5WT6DR3rt<#f_xK<$@`mxop>t{;4nKN_vA_|xe zJOS=9x{W*kP5hZUU@+_f-*|6-HtncGUtH87R>egz?Mz*7Fc%t#hY%+Q{}PiyM`J7b z4_4(n@iS}(J`Hg!unRZ~oPu33cm@1HUtGN3yLYdQ88b$CK`w!Kmow%G%{=g*iD^xm zG)a~%Tc$B*Vqa;}q)}fIybl%y3xK)sO?&j{A$981QT~%-03R6p4=yLh3`WB*0>73n zU0OD7+^86x__rOuhK6?9!cY896SG84;8E7BS#`ZxdvF7I4f=~)F+4CPem-mGihg{5 z>XWCTzge?psULtZX2;{9rJeTVQoj1?EA1D0kYBJxmMmG6PsnkC1PK%aK|k?6Xit(P ziN-ix(a(AkgJcb$fAQkQI`_PJ^L|jy$&-1J$0}O1sM3WDk@vtahUSqYM@swl?IlBo z44Mmp{_fqoD}KU%A|Hah^UR&NA@e3TL|=0(jLW|H=9_OczF4thMUA_`CvO$_gTGO- zWJ!TP_}uvNaN)wK zu4S(q8x|J4{LnFw74o>mk0L~fpn8+I3i(Xv$Jgf~evBOoxZjK74q! zS;R-kzcUv@KRT6|8~NRY2@~pi5EB9`7~B(<^MT*UHF^-c3r!t5bWnPb@rzcj(3yVT zfeBa-bS7~R;!4;G;%eyVuqfav;(o|@#E20!22GwHn{0|Z`ptK8pV$Rti9XV&Pp|T4 z@59vf)*YOTjR&(K$Jkr)c<3AF5)}P*K9h3+Yk=+8H-!rq)^#OD=I!#=Uw>)Nl6Vf- z?b4-7Zifhpey}XDaQI;gSQ`ui9w9%$`i3MoZTfS*6K@0WfP(LpU`|))D8ef3;Grli4lsGBt0mk(u z{rD{C4`=|tGbX%5mvfH6oOQGFpR)txj<8)|Ap9-bnD{xm20z;5*L+RC+1tc*p#j>- zL)!DHcKXpH_$lam>@fZTzKo&Y^k?p2ML*-A3puw!?w$*qOtI6?e{3@OcFy`?ufcQZ zNLR8Kmh>Y_U}Z25SQ%Yt&hjB!U?b`fLxFziHGU`hjCDhY;**ijW=+GAet2sNa!l@+ zvpY$XCe>Uu_5zu&RH>5W%$ZYtWv~YFPizI<$vFh_9_SzPS|5XcXu>yUpK|sRAJ$+Q z&O@Rf$-Sbph#NsaJjbpOXUUi`qv9#f&ia~u4#3%o7^RKf5eCp8cQ*>aEFiRO3tS^`@F8`x3~2*{n$)=4t$S7 zg$e~ae`2Q}U&RzK6aF!0#L@rAnybRu1YhAFd~*tZBeb9^T@}NB=trlrPl*+>CSZJG zop$_e?*skHxomWow+iFqU!c?R6Aeyq*T0>9`o^!Svx|ox5A;!u%c=Qu{!CCH{ z>wEGayMwQV9%RpQCx*Z{=p*N1r=K;zHZu?B!aU#wXN{ - - - - Cortex Nova Visualizer - - - - - - - - -
-
- -
Cortex Nova Visualizer
-
- - - -
-
-
-
Loading...
- -
-
-
- - - - - - \ No newline at end of file diff --git a/tools/visualizer/role.yaml b/tools/visualizer/role.yaml deleted file mode 100644 index e497f3b88..000000000 --- a/tools/visualizer/role.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: cortex-visualizer - namespace: default - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: cortex-visualizer-decision-reader -rules: -- apiGroups: ["cortex.cloud"] - resources: ["decisions"] - verbs: ["get", "list", "watch"] - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: cortex-visualizer-decision-reader-binding -subjects: -- kind: ServiceAccount - name: cortex-visualizer - namespace: default -roleRef: - kind: ClusterRole - name: cortex-visualizer-decision-reader - apiGroup: rbac.authorization.k8s.io \ No newline at end of file diff --git a/tools/visualizer/shared.css b/tools/visualizer/shared.css deleted file mode 100644 index b3a933375..000000000 --- a/tools/visualizer/shared.css +++ /dev/null @@ -1,289 +0,0 @@ -/* Copyright SAP SE */ -/* SPDX-License-Identifier: Apache-2.0 */ - -body { - font-family: Arial, Helvetica, sans-serif; - - --color-primary: rgb(255, 165, 2); - --color-on-primary: rgb(255, 255, 255); - --color-secondary: rgb(112, 161, 255); - --color-on-secondary: rgb(255, 255, 255); - --color-tertiary: rgb(255, 71, 87); - --color-on-tertiary: rgb(255, 255, 255); - --color-background: rgb(241, 242, 246); - --color-on-background: rgb(74, 74, 74); - --color-surface: rgb(255, 255, 255); - --color-on-surface: rgb(74, 74, 74); - - color: var(--color-text); - background: var(--color-background); - /* Remove the default margin and padding from the body. */ - margin: 0; -} - -/* Nice animated progress bar on top of the page. */ -.progress { - position: fixed; - top: 0; - left: 0; - right: 0; - height: 0.5em; - background: var(--color-primary); - z-index: 1000; -} - -.progress::before { - content: ''; - position: absolute; - top: 0; - left: 0; - right: 0; - height: 0.5em; - background: var(--color-secondary); - animation: progress 2s infinite; -} - -@keyframes progress { - 0% { - left: -100%; - right: 100%; - } - - 100% { - left: 100%; - right: -100%; - } -} - -.progress-text { - position: fixed; - top: 2em; - left: 0; - right: 0; - text-align: center; - font-weight: bold; -} - -/* Navbar that shows information. */ -nav { - padding-left: 0.25em; - background: var(--color-surface); - box-shadow: 0 0 1em rgba(0, 0, 0, 0.1); - z-index: 1; -} - -nav div.element { - display: inline-block; - padding-top: 1em; - padding-bottom: 2em; - padding-left: 1em; - padding-right: 1em; - margin: 0; - background: var(--color-surface); - color: var(--color-on-surface); - border-right: 2px solid var(--color-background); - font-size: 1em; -} - -nav div.element p.highlight { - font-size: 1.25em; - font-weight: bold; -} - -table { - /* Revert the default spacing used by the browser. */ - border-spacing: 0; -} - -/* Table cell showing the weight during scheduling. */ -td.weight { - text-align: center; - position: relative; - animation: weightAnimation 0.25s ease-in-out; -} - -td.weight div { - border-radius: 0.5em; - padding: 0.5em; - margin: 0.5em; - border: 2px solid var(--color-surface); -} - -/* Backdrop white for the weight cells */ -td.weight::after { - content: ''; - position: absolute; - --m: 0.6em; - top: var(--m); - bottom: var(--m); - left: var(--m); - right: var(--m); - border-radius: 0.5em; - background: var(--color-surface); - z-index: -1; -} - -/* Animation for weights when they first appear */ -@keyframes weightAnimation { - 0% { - opacity: 0; - transform: scale(0.5); - } - - 100% { - opacity: 1; - transform: scale(1); - } -} - -/* Table cell showing the hostname/name. */ -th.hostname { - text-align: center; - position: relative; -} - -th.hostname div { - position: relative; - padding: 0.1em; - padding-top: 0.5em; - padding-bottom: 0.5em; - margin: 0.1em; - width: 6em; - height: 6em; - overflow: hidden; -} - -/* Table cell showing additional information. */ -th.metainfo { - text-align: center; - position: relative; -} - -th.metainfo div p { - width: 6em; - overflow: hidden; -} - -th.metainfo div p.issue { - color: var(--color-tertiary); - border-radius: 0.5em; - font-size: 0.8em; -} - -/* Table row showing the name of a step in the pipeline. */ -th.stepkey { - text-align: left; - font-weight: bold; - padding-left: 0.75em; - padding-top: 0.5em; - padding-bottom: 0.25em; -} - -/* Highlighted rows in the table. */ -tr.highlight { - background: var(--color-surface); - /* tr doesn't support border-radius */ - clip-path: xywh(0 0 100% 100% round 0.75em); -} - -/* Chart showing usage statistics. */ -td.chart { - position: relative; - height: 24em; -} - -td.chart div.barsbefore, -td.chart div.barsafter, -td.chart div.backdrop, -td.chart div.stats { - position: absolute; - top: 0; - left: 0; - right: 0; - bottom: 0; - display: flex; - margin-top: 1.5em; - margin-bottom: 0.5em; - padding-left: 0.5em; - padding-right: 0.5em; - flex-direction: row; - justify-content: center; - align-items: flex-end; -} - -td.chart div.barsbefore p, -td.chart div.barsafter p, -td.chart div.backdrop p, -td.chart div.stats p { - margin-left: 0.1em; - margin-right: 0.1em; - display: flex; - border-radius: 0.2em; -} - -td.chart div.backdrop p { - height: 100%; - border-radius: 0.2em; - border: 1px solid rgba(0, 0, 0, 0.05); - background: white; -} - -td.chart div.stats { - text-align: center; - display: flex; - justify-content: center; - align-items: flex-start; -} - -td.chart div.stats p { - writing-mode: vertical-lr; - text-orientation: mixed; - display: flex; - font-size: 1em; - font-weight: bold; - margin-left: 0.1em; - margin-right: 0.1em; - justify-content: center; - align-items: center; -} - -/* Animation for chart bars */ -td.chart div.barsafter p, -td.chart div.barsbefore p { - animation: barAnim 0.25s ease-in-out; - overflow: hidden; -} - -@keyframes barAnim { - 0% { - transform: scaleY(0); - } - - 100% { - transform: scaleY(1); - } -} - -td.chart div.barsafter p.cpu { - background: var(--color-primary); -} - -td.chart div.barsafter p.mem { - background: var(--color-primary); -} - -td.chart div.barsafter p.disk { - background: var(--color-primary); -} - -td.chart div.barsbefore p.cpu { - background: var(--color-secondary); -} - -td.chart div.barsbefore p.mem { - background: var(--color-secondary); -} - -td.chart div.barsbefore p.disk { - background: var(--color-secondary); -} From 5cb83ac83b28d1e83a78879f8643dced08696179 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 27 Feb 2026 08:09:51 +0100 Subject: [PATCH 34/36] Fix variable name for ordered hosts in Nova external scheduler response --- internal/scheduling/nova/external_scheduler_api.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/scheduling/nova/external_scheduler_api.go b/internal/scheduling/nova/external_scheduler_api.go index e6a02b028..509a23538 100644 --- a/internal/scheduling/nova/external_scheduler_api.go +++ b/internal/scheduling/nova/external_scheduler_api.go @@ -222,7 +222,7 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req c.Respond(http.StatusInternalServerError, errors.New("pipeline didn't produce a result"), "failed to process scheduling request") return } - hosts := decision.Status.Result.OrderedHosts + hosts := result.OrderedHosts hosts = limitHostsToRequest(requestData, hosts) response := api.ExternalSchedulerResponse{Hosts: hosts} w.Header().Set("Content-Type", "application/json") From 0c31a0535ebff581dd020ec5048fdf6358d1685a Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 27 Feb 2026 08:25:48 +0100 Subject: [PATCH 35/36] Add additional compute host to Nova external scheduler test --- internal/scheduling/nova/external_scheduler_api_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/scheduling/nova/external_scheduler_api_test.go b/internal/scheduling/nova/external_scheduler_api_test.go index c6734801b..3db878939 100644 --- a/internal/scheduling/nova/external_scheduler_api_test.go +++ b/internal/scheduling/nova/external_scheduler_api_test.go @@ -174,6 +174,7 @@ func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { }, Hosts: []novaapi.ExternalSchedulerHost{ {ComputeHost: "host1"}, + {ComputeHost: "host2"}, }, Weights: map[string]float64{ "host1": 1.0, From d8d6833774843d1ed99c9aa6a484b91152dd36c5 Mon Sep 17 00:00:00 2001 From: Markus Wieland Date: Fri, 27 Feb 2026 08:29:16 +0100 Subject: [PATCH 36/36] Add weight for additional host in Nova external scheduler test --- internal/scheduling/nova/external_scheduler_api_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/scheduling/nova/external_scheduler_api_test.go b/internal/scheduling/nova/external_scheduler_api_test.go index 3db878939..9564eb0e0 100644 --- a/internal/scheduling/nova/external_scheduler_api_test.go +++ b/internal/scheduling/nova/external_scheduler_api_test.go @@ -178,6 +178,7 @@ func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { }, Weights: map[string]float64{ "host1": 1.0, + "host2": 2.0, }, Pipeline: "test-pipeline", }