diff --git a/api/v1alpha1/decision_types.go b/api/v1alpha1/decision_types.go index c3f02de1e..9d6eeab5d 100644 --- a/api/v1alpha1/decision_types.go +++ b/api/v1alpha1/decision_types.go @@ -6,90 +6,67 @@ package v1alpha1 import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - runtime "k8s.io/apimachinery/pkg/runtime" ) -type DecisionSpec struct { - // SchedulingDomain defines in which scheduling domain this decision - // was or is processed (e.g., nova, cinder, manila). - SchedulingDomain SchedulingDomain `json:"schedulingDomain"` +// SchedulingIntents represents the Intent for a scheduling event. +type SchedulingIntent string - // A reference to the pipeline that should be used for this decision. +const ( + // SchedulingIntentInitialPlacement indicates that this is the initial placement of a resource. + SchedulingIntentInitialPlacement SchedulingIntent = "InitialPlacement" + // SchedulingIntentLiveMigration indicates that this scheduling event is triggered by a live migration operation. + SchedulingIntentLiveMigration SchedulingIntent = "LiveMigration" + // SchedulingIntentResize indicates that this scheduling event is triggered by a resize operation. + SchedulingIntentResize SchedulingIntent = "Resize" + // SchedulingIntentRebuild indicates that this scheduling event is triggered by a rebuild operation. + SchedulingIntentRebuild SchedulingIntent = "Rebuild" + // SchedulingIntentEvacuate indicates that this scheduling event is triggered by an evacuate operation. + SchedulingIntentEvacuate SchedulingIntent = "Evacuate" + // SchedulingIntentUnknown indicates that the Intent for this scheduling event is unknown. + SchedulingIntentUnknown SchedulingIntent = "Unknown" +) + +// SchedulingHistoryEntry represents a single entry in the scheduling history of a resource. +type SchedulingHistoryEntry struct { + // The hosts that were selected in this scheduling event, in order of preference. + OrderedHosts []string `json:"orderedHosts"` + // Timestamp of when the scheduling event occurred. + Timestamp metav1.Time `json:"timestamp"` + // A reference to the pipeline that was used for this decision. // This reference can be used to look up the pipeline definition and its // scheduler step configuration for additional context. PipelineRef corev1.ObjectReference `json:"pipelineRef"` + // The Intent for this scheduling event. + Intent SchedulingIntent `json:"intent"` +} + +type DecisionSpec struct { + // SchedulingDomain defines in which scheduling domain this decision + // was or is processed (e.g., nova, cinder, manila). + SchedulingDomain SchedulingDomain `json:"schedulingDomain"` // An identifier for the underlying resource to be scheduled. // For example, this can be the UUID of a nova instance or cinder volume. - // This can be used to correlate multiple decisions for the same resource. ResourceID string `json:"resourceID"` - - // If the type is "nova", this field contains the raw nova decision request. - // +kubebuilder:validation:Optional - NovaRaw *runtime.RawExtension `json:"novaRaw,omitempty"` - // If the type is "cinder", this field contains the raw cinder decision request. - // +kubebuilder:validation:Optional - CinderRaw *runtime.RawExtension `json:"cinderRaw,omitempty"` - // If the type is "manila", this field contains the raw manila decision request. - // +kubebuilder:validation:Optional - ManilaRaw *runtime.RawExtension `json:"manilaRaw,omitempty"` - // If the type is "machine", this field contains the machine reference. - // +kubebuilder:validation:Optional - MachineRef *corev1.ObjectReference `json:"machineRef,omitempty"` - // If the type is "pod", this field contains the pod reference. - // +kubebuilder:validation:Optional - PodRef *corev1.ObjectReference `json:"podRef,omitempty"` -} - -type StepResult struct { - // object reference to the scheduler step. - StepName string `json:"stepName"` - // Activations of the step for each host. - Activations map[string]float64 `json:"activations"` -} - -type DecisionResult struct { - // Raw input weights to the pipeline. - // +kubebuilder:validation:Optional - RawInWeights map[string]float64 `json:"rawInWeights"` - // Normalized input weights to the pipeline. - // +kubebuilder:validation:Optional - NormalizedInWeights map[string]float64 `json:"normalizedInWeights"` - // Outputs of the decision pipeline including the activations used - // to make the final ordering of compute hosts. - // +kubebuilder:validation:Optional - StepResults []StepResult `json:"stepResults,omitempty"` - // Aggregated output weights from the pipeline. - // +kubebuilder:validation:Optional - AggregatedOutWeights map[string]float64 `json:"aggregatedOutWeights"` - // Final ordered list of hosts from most preferred to least preferred. - // +kubebuilder:validation:Optional - OrderedHosts []string `json:"orderedHosts,omitempty"` - // The first element of the ordered hosts is considered the target host. - // +kubebuilder:validation:Optional - TargetHost *string `json:"targetHost,omitempty"` } const ( - // The decision was successfully processed. + // The decision is ready and tracking the resource. DecisionConditionReady = "Ready" + // The decision has failed to make a placement decision for the resource. + DecisionConditionFailed = "Failed" ) type DecisionStatus struct { - // The result of this decision. - // +kubebuilder:validation:Optional - Result *DecisionResult `json:"result,omitempty"` - - // If there were previous decisions for the underlying resource, they can - // be resolved here to provide historical context for the decision. + // The target host selected for the resource. Can be empty if no host could be determined. // +kubebuilder:validation:Optional - History *[]corev1.ObjectReference `json:"history,omitempty"` + TargetHost string `json:"targetHost,omitempty"` - // The number of decisions that preceded this one for the same resource. + // The history of scheduling events for this resource. // +kubebuilder:validation:Optional - Precedence *int `json:"precedence,omitempty"` + SchedulingHistory []SchedulingHistoryEntry `json:"schedulingHistory,omitempty"` - // A human-readable explanation of the decision result. + // A human-readable explanation of the current scheduling decision. // +kubebuilder:validation:Optional Explanation string `json:"explanation,omitempty"` @@ -103,12 +80,8 @@ type DecisionStatus struct { // +kubebuilder:resource:scope=Cluster // +kubebuilder:printcolumn:name="Domain",type="string",JSONPath=".spec.schedulingDomain" // +kubebuilder:printcolumn:name="Resource ID",type="string",JSONPath=".spec.resourceID" -// +kubebuilder:printcolumn:name="#",type="string",JSONPath=".status.precedence" +// +kubebuilder:printcolumn:name="Target Host",type="string",JSONPath=".status.targetHost" // +kubebuilder:printcolumn:name="Created",type="date",JSONPath=".metadata.creationTimestamp" -// +kubebuilder:printcolumn:name="Pipeline",type="string",JSONPath=".spec.pipelineRef.name" -// +kubebuilder:printcolumn:name="TargetHost",type="string",JSONPath=".status.result.targetHost" -// +kubebuilder:printcolumn:name="Ready",type="string",JSONPath=".status.conditions[?(@.type=='Ready')].status" -// +kubebuilder:selectablefield:JSONPath=".spec.resourceID" // Decision is the Schema for the decisions API type Decision struct { diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 5a756e045..d448c2d9d 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -207,7 +207,7 @@ func (in *Decision) DeepCopyInto(out *Decision) { *out = *in out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) - in.Spec.DeepCopyInto(&out.Spec) + out.Spec = in.Spec in.Status.DeepCopyInto(&out.Status) } @@ -261,88 +261,9 @@ func (in *DecisionList) DeepCopyObject() runtime.Object { return nil } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *DecisionResult) DeepCopyInto(out *DecisionResult) { - *out = *in - if in.RawInWeights != nil { - in, out := &in.RawInWeights, &out.RawInWeights - *out = make(map[string]float64, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } - if in.NormalizedInWeights != nil { - in, out := &in.NormalizedInWeights, &out.NormalizedInWeights - *out = make(map[string]float64, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } - if in.StepResults != nil { - in, out := &in.StepResults, &out.StepResults - *out = make([]StepResult, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } - } - if in.AggregatedOutWeights != nil { - in, out := &in.AggregatedOutWeights, &out.AggregatedOutWeights - *out = make(map[string]float64, len(*in)) - for key, val := range *in { - (*out)[key] = val - } - } - if in.OrderedHosts != nil { - in, out := &in.OrderedHosts, &out.OrderedHosts - *out = make([]string, len(*in)) - copy(*out, *in) - } - if in.TargetHost != nil { - in, out := &in.TargetHost, &out.TargetHost - *out = new(string) - **out = **in - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DecisionResult. -func (in *DecisionResult) DeepCopy() *DecisionResult { - if in == nil { - return nil - } - out := new(DecisionResult) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DecisionSpec) DeepCopyInto(out *DecisionSpec) { *out = *in - out.PipelineRef = in.PipelineRef - if in.NovaRaw != nil { - in, out := &in.NovaRaw, &out.NovaRaw - *out = new(runtime.RawExtension) - (*in).DeepCopyInto(*out) - } - if in.CinderRaw != nil { - in, out := &in.CinderRaw, &out.CinderRaw - *out = new(runtime.RawExtension) - (*in).DeepCopyInto(*out) - } - if in.ManilaRaw != nil { - in, out := &in.ManilaRaw, &out.ManilaRaw - *out = new(runtime.RawExtension) - (*in).DeepCopyInto(*out) - } - if in.MachineRef != nil { - in, out := &in.MachineRef, &out.MachineRef - *out = new(v1.ObjectReference) - **out = **in - } - if in.PodRef != nil { - in, out := &in.PodRef, &out.PodRef - *out = new(v1.ObjectReference) - **out = **in - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DecisionSpec. @@ -358,25 +279,13 @@ func (in *DecisionSpec) DeepCopy() *DecisionSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *DecisionStatus) DeepCopyInto(out *DecisionStatus) { *out = *in - if in.Result != nil { - in, out := &in.Result, &out.Result - *out = new(DecisionResult) - (*in).DeepCopyInto(*out) - } - if in.History != nil { - in, out := &in.History, &out.History - *out = new([]v1.ObjectReference) - if **in != nil { - in, out := *in, *out - *out = make([]v1.ObjectReference, len(*in)) - copy(*out, *in) + if in.SchedulingHistory != nil { + in, out := &in.SchedulingHistory, &out.SchedulingHistory + *out = make([]SchedulingHistoryEntry, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) } } - if in.Precedence != nil { - in, out := &in.Precedence, &out.Precedence - *out = new(int) - **out = **in - } if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions *out = make([]metav1.Condition, len(*in)) @@ -1285,23 +1194,23 @@ func (in *ReservationStatus) DeepCopy() *ReservationStatus { } // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *StepResult) DeepCopyInto(out *StepResult) { +func (in *SchedulingHistoryEntry) DeepCopyInto(out *SchedulingHistoryEntry) { *out = *in - if in.Activations != nil { - in, out := &in.Activations, &out.Activations - *out = make(map[string]float64, len(*in)) - for key, val := range *in { - (*out)[key] = val - } + if in.OrderedHosts != nil { + in, out := &in.OrderedHosts, &out.OrderedHosts + *out = make([]string, len(*in)) + copy(*out, *in) } + in.Timestamp.DeepCopyInto(&out.Timestamp) + out.PipelineRef = in.PipelineRef } -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new StepResult. -func (in *StepResult) DeepCopy() *StepResult { +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SchedulingHistoryEntry. +func (in *SchedulingHistoryEntry) DeepCopy() *SchedulingHistoryEntry { if in == nil { return nil } - out := new(StepResult) + out := new(SchedulingHistoryEntry) in.DeepCopyInto(out) return out } diff --git a/cmd/main.go b/cmd/main.go index 4e4865567..e8987ba35 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -41,7 +41,6 @@ import ( "github.com/cobaltcore-dev/cortex/internal/knowledge/extractor" "github.com/cobaltcore-dev/cortex/internal/knowledge/kpis" "github.com/cobaltcore-dev/cortex/internal/scheduling/cinder" - "github.com/cobaltcore-dev/cortex/internal/scheduling/explanation" schedulinglib "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "github.com/cobaltcore-dev/cortex/internal/scheduling/machines" "github.com/cobaltcore-dev/cortex/internal/scheduling/manila" @@ -439,19 +438,6 @@ func main() { os.Exit(1) } } - if slices.Contains(mainConfig.EnabledControllers, "explanation-controller") { - // Setup a controller which will reconcile the history and explanation for - // decision resources. - explanationControllerConfig := conf.GetConfigOrDie[explanation.ControllerConfig]() - explanationController := &explanation.Controller{ - Client: multiclusterClient, - Config: explanationControllerConfig, - } - if err := explanationController.SetupWithManager(mgr, multiclusterClient); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "ExplanationController") - os.Exit(1) - } - } if slices.Contains(mainConfig.EnabledControllers, "reservations-controller") { monitor := reservationscontroller.NewControllerMonitor(multiclusterClient) metrics.Registry.MustRegister(&monitor) diff --git a/helm/bundles/cortex-cinder/values.yaml b/helm/bundles/cortex-cinder/values.yaml index f002fc58b..0c1f44467 100644 --- a/helm/bundles/cortex-cinder/values.yaml +++ b/helm/bundles/cortex-cinder/values.yaml @@ -97,7 +97,6 @@ cortex-scheduling-controllers: component: cinder-scheduling enabledControllers: - cinder-decisions-pipeline-controller - - explanation-controller enabledTasks: - cinder-decisions-cleanup-task diff --git a/helm/bundles/cortex-ironcore/values.yaml b/helm/bundles/cortex-ironcore/values.yaml index 2f885c7a5..6a1e8acdb 100644 --- a/helm/bundles/cortex-ironcore/values.yaml +++ b/helm/bundles/cortex-ironcore/values.yaml @@ -32,7 +32,6 @@ cortex: schedulingDomain: machines enabledControllers: - ironcore-decisions-pipeline-controller - - explanation-controller monitoring: labels: github_org: cobaltcore-dev diff --git a/helm/bundles/cortex-manila/values.yaml b/helm/bundles/cortex-manila/values.yaml index cc341a112..faf645fc6 100644 --- a/helm/bundles/cortex-manila/values.yaml +++ b/helm/bundles/cortex-manila/values.yaml @@ -97,7 +97,6 @@ cortex-scheduling-controllers: component: manila-scheduling enabledControllers: - manila-decisions-pipeline-controller - - explanation-controller enabledTasks: - manila-decisions-cleanup-task diff --git a/helm/bundles/cortex-nova/values.yaml b/helm/bundles/cortex-nova/values.yaml index b2dbba788..aa00fcae8 100644 --- a/helm/bundles/cortex-nova/values.yaml +++ b/helm/bundles/cortex-nova/values.yaml @@ -113,7 +113,6 @@ cortex-scheduling-controllers: enabledControllers: - nova-pipeline-controllers - nova-deschedulings-executor - - explanation-controller enabledTasks: - nova-decisions-cleanup-task diff --git a/helm/bundles/cortex-pods/values.yaml b/helm/bundles/cortex-pods/values.yaml index b7aab8a6d..8a8a4e231 100644 --- a/helm/bundles/cortex-pods/values.yaml +++ b/helm/bundles/cortex-pods/values.yaml @@ -32,7 +32,6 @@ cortex: schedulingDomain: pods enabledControllers: - pods-decisions-pipeline-controller - - explanation-controller monitoring: labels: github_org: cobaltcore-dev diff --git a/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml b/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml index 3332a40e4..430ec9e7d 100644 --- a/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml +++ b/helm/library/cortex/files/crds/cortex.cloud_decisions.yaml @@ -21,21 +21,12 @@ spec: - jsonPath: .spec.resourceID name: Resource ID type: string - - jsonPath: .status.precedence - name: '#' + - jsonPath: .status.targetHost + name: Target Host type: string - jsonPath: .metadata.creationTimestamp name: Created type: date - - jsonPath: .spec.pipelineRef.name - name: Pipeline - type: string - - jsonPath: .status.result.targetHost - name: TargetHost - type: string - - jsonPath: .status.conditions[?(@.type=='Ready')].status - name: Ready - type: string name: v1alpha1 schema: openAPIV3Schema: @@ -61,159 +52,10 @@ spec: spec: description: spec defines the desired state of Decision properties: - cinderRaw: - description: If the type is "cinder", this field contains the raw - cinder decision request. - type: object - x-kubernetes-preserve-unknown-fields: true - machineRef: - description: If the type is "machine", this field contains the machine - reference. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic - manilaRaw: - description: If the type is "manila", this field contains the raw - manila decision request. - type: object - x-kubernetes-preserve-unknown-fields: true - novaRaw: - description: If the type is "nova", this field contains the raw nova - decision request. - type: object - x-kubernetes-preserve-unknown-fields: true - pipelineRef: - description: |- - A reference to the pipeline that should be used for this decision. - This reference can be used to look up the pipeline definition and its - scheduler step configuration for additional context. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic - podRef: - description: If the type is "pod", this field contains the pod reference. - properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency - type: string - uid: - description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic resourceID: description: |- An identifier for the underlying resource to be scheduled. For example, this can be the UUID of a nova instance or cinder volume. - This can be used to correlate multiple decisions for the same resource. type: string schedulingDomain: description: |- @@ -221,7 +63,6 @@ spec: was or is processed (e.g., nova, cinder, manila). type: string required: - - pipelineRef - resourceID - schedulingDomain type: object @@ -286,115 +127,89 @@ spec: type: object type: array explanation: - description: A human-readable explanation of the decision result. + description: A human-readable explanation of the current scheduling + decision. type: string - history: - description: |- - If there were previous decisions for the underlying resource, they can - be resolved here to provide historical context for the decision. + schedulingHistory: + description: The history of scheduling events for this resource. items: - description: ObjectReference contains enough information to let - you inspect or modify the referred object. + description: SchedulingHistoryEntry represents a single entry in + the scheduling history of a resource. properties: - apiVersion: - description: API version of the referent. - type: string - fieldPath: - description: |- - If referring to a piece of an object instead of an entire object, this string - should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. - For example, if the object reference is to a container within a pod, this would take on a value like: - "spec.containers{name}" (where "name" refers to the name of the container that triggered - the event) or if no container name is specified "spec.containers[2]" (container with - index 2 in this pod). This syntax is chosen only to have some well-defined way of - referencing a part of an object. - type: string - kind: - description: |- - Kind of the referent. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds - type: string - name: - description: |- - Name of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names - type: string - namespace: - description: |- - Namespace of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ - type: string - resourceVersion: - description: |- - Specific resourceVersion to which this reference is made, if any. - More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + intent: + description: The Intent for this scheduling event. type: string - uid: + orderedHosts: + description: The hosts that were selected in this scheduling + event, in order of preference. + items: + type: string + type: array + pipelineRef: description: |- - UID of the referent. - More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids - type: string - type: object - x-kubernetes-map-type: atomic - type: array - precedence: - description: The number of decisions that preceded this one for the - same resource. - type: integer - result: - description: The result of this decision. - properties: - aggregatedOutWeights: - additionalProperties: - type: number - description: Aggregated output weights from the pipeline. - type: object - normalizedInWeights: - additionalProperties: - type: number - description: Normalized input weights to the pipeline. - type: object - orderedHosts: - description: Final ordered list of hosts from most preferred to - least preferred. - items: - type: string - type: array - rawInWeights: - additionalProperties: - type: number - description: Raw input weights to the pipeline. - type: object - stepResults: - description: |- - Outputs of the decision pipeline including the activations used - to make the final ordering of compute hosts. - items: + A reference to the pipeline that was used for this decision. + This reference can be used to look up the pipeline definition and its + scheduler step configuration for additional context. properties: - activations: - additionalProperties: - type: number - description: Activations of the step for each host. - type: object - stepName: - description: object reference to the scheduler step. + apiVersion: + description: API version of the referent. + type: string + fieldPath: + description: |- + If referring to a piece of an object instead of an entire object, this string + should contain a valid JSON/Go field access statement, such as desiredState.manifest.containers[2]. + For example, if the object reference is to a container within a pod, this would take on a value like: + "spec.containers{name}" (where "name" refers to the name of the container that triggered + the event) or if no container name is specified "spec.containers[2]" (container with + index 2 in this pod). This syntax is chosen only to have some well-defined way of + referencing a part of an object. + type: string + kind: + description: |- + Kind of the referent. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds + type: string + name: + description: |- + Name of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + namespace: + description: |- + Namespace of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ + type: string + resourceVersion: + description: |- + Specific resourceVersion to which this reference is made, if any. + More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#concurrency-control-and-consistency + type: string + uid: + description: |- + UID of the referent. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#uids type: string - required: - - activations - - stepName type: object - type: array - targetHost: - description: The first element of the ordered hosts is considered - the target host. - type: string - type: object + x-kubernetes-map-type: atomic + timestamp: + description: Timestamp of when the scheduling event occurred. + format: date-time + type: string + required: + - intent + - orderedHosts + - pipelineRef + - timestamp + type: object + type: array + targetHost: + description: The target host selected for the resource. Can be empty + if no host could be determined. + type: string type: object required: - spec type: object - selectableFields: - - jsonPath: .spec.resourceID served: true storage: true subresources: diff --git a/helm/library/cortex/values.yaml b/helm/library/cortex/values.yaml index e7a475184..4c9202299 100644 --- a/helm/library/cortex/values.yaml +++ b/helm/library/cortex/values.yaml @@ -113,6 +113,4 @@ conf: schedulingDomain: cortex # Used to differentiate different cortex deployments in the same cluster (e.g. leader election ID) leaderElectionID: cortex-unknown - enabledControllers: - # The explanation controller is available for all decision resources. - - explanation-controller + enabledControllers: [] diff --git a/internal/knowledge/kpis/plugins/deployment/decision_state.go b/internal/knowledge/kpis/plugins/deployment/decision_state.go index f11e1f9a4..6620fafc6 100644 --- a/internal/knowledge/kpis/plugins/deployment/decision_state.go +++ b/internal/knowledge/kpis/plugins/deployment/decision_state.go @@ -63,15 +63,12 @@ func (k *DecisionStateKPI) Collect(ch chan<- prometheus.Metric) { decisions = append(decisions, d) } // For each decision, categorize by state: error, waiting, or success - var errorCount, waitingCount, successCount float64 + var errorCount, successCount float64 for _, d := range decisions { switch { // Error state: decision has a false Ready condition case meta.IsStatusConditionFalse(d.Status.Conditions, v1alpha1.DecisionConditionReady): errorCount++ - // Waiting state: decision has a target host set (waiting for migration/placement) - case d.Status.Result != nil && d.Status.Result.TargetHost != nil: - waitingCount++ // Success state: decision is complete (has result with ordered hosts or no result needed) default: successCount++ @@ -82,10 +79,6 @@ func (k *DecisionStateKPI) Collect(ch chan<- prometheus.Metric) { k.counter, prometheus.GaugeValue, errorCount, string(k.Options.DecisionSchedulingDomain), "error", ) - ch <- prometheus.MustNewConstMetric( - k.counter, prometheus.GaugeValue, waitingCount, - string(k.Options.DecisionSchedulingDomain), "waiting", - ) ch <- prometheus.MustNewConstMetric( k.counter, prometheus.GaugeValue, successCount, string(k.Options.DecisionSchedulingDomain), "success", diff --git a/internal/knowledge/kpis/plugins/deployment/decision_state_test.go b/internal/knowledge/kpis/plugins/deployment/decision_state_test.go index 0d6bd4568..b8a80622d 100644 --- a/internal/knowledge/kpis/plugins/deployment/decision_state_test.go +++ b/internal/knowledge/kpis/plugins/deployment/decision_state_test.go @@ -35,17 +35,15 @@ func TestDecisionStateKPI_Collect(t *testing.T) { expectedCount int description string expectedError int - expectedWaiting int expectedSuccess int }{ { name: "no decisions", decisions: []v1alpha1.Decision{}, operator: "test-operator", - expectedCount: 3, // always emits 3 metrics: error, waiting, success + expectedCount: 2, // always emits 2 metrics: error, success description: "should collect metrics with zero counts when no decisions exist", expectedError: 0, - expectedWaiting: 0, expectedSuccess: 0, }, { @@ -65,30 +63,9 @@ func TestDecisionStateKPI_Collect(t *testing.T) { }, }, operator: "test-operator", - expectedCount: 3, + expectedCount: 2, description: "should count decision in error state", expectedError: 1, - expectedWaiting: 0, - expectedSuccess: 0, - }, - { - name: "single decision in waiting state", - decisions: []v1alpha1.Decision{ - { - ObjectMeta: v1.ObjectMeta{Name: "dec2"}, - Spec: v1alpha1.DecisionSpec{SchedulingDomain: "test-operator"}, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: &targetHost, - }, - }, - }, - }, - operator: "test-operator", - expectedCount: 3, - description: "should count decision with target host as waiting", - expectedError: 0, - expectedWaiting: 1, expectedSuccess: 0, }, { @@ -98,17 +75,14 @@ func TestDecisionStateKPI_Collect(t *testing.T) { ObjectMeta: v1.ObjectMeta{Name: "dec3"}, Spec: v1alpha1.DecisionSpec{SchedulingDomain: "test-operator"}, Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1", "host2"}, - }, + TargetHost: targetHost, }, }, }, operator: "test-operator", - expectedCount: 3, + expectedCount: 2, description: "should count decision without target host and no error as success", expectedError: 0, - expectedWaiting: 0, expectedSuccess: 1, }, { @@ -126,30 +100,18 @@ func TestDecisionStateKPI_Collect(t *testing.T) { }, }, }, - { - ObjectMeta: v1.ObjectMeta{Name: "dec-waiting"}, - Spec: v1alpha1.DecisionSpec{SchedulingDomain: "test-operator"}, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: &targetHost, - }, - }, - }, { ObjectMeta: v1.ObjectMeta{Name: "dec-success"}, Spec: v1alpha1.DecisionSpec{SchedulingDomain: "test-operator"}, Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1"}, - }, + TargetHost: targetHost, }, }, }, operator: "test-operator", - expectedCount: 3, + expectedCount: 2, description: "should correctly count decisions across all states", expectedError: 1, - expectedWaiting: 1, expectedSuccess: 1, }, { @@ -181,10 +143,9 @@ func TestDecisionStateKPI_Collect(t *testing.T) { }, }, operator: "test-operator", - expectedCount: 3, + expectedCount: 2, description: "should only count decisions with matching operator", expectedError: 1, - expectedWaiting: 0, expectedSuccess: 0, }, { @@ -216,28 +177,11 @@ func TestDecisionStateKPI_Collect(t *testing.T) { }, }, operator: "test-operator", - expectedCount: 3, + expectedCount: 2, description: "should correctly aggregate multiple decisions in same state", expectedError: 2, - expectedWaiting: 0, expectedSuccess: 0, }, - { - name: "decision with no result", - decisions: []v1alpha1.Decision{ - { - ObjectMeta: v1.ObjectMeta{Name: "dec-no-result"}, - Spec: v1alpha1.DecisionSpec{SchedulingDomain: "test-operator"}, - Status: v1alpha1.DecisionStatus{}, - }, - }, - operator: "test-operator", - expectedCount: 3, - description: "should count decision with no result as success", - expectedError: 0, - expectedWaiting: 0, - expectedSuccess: 1, - }, { name: "error condition takes precedence", decisions: []v1alpha1.Decision{ @@ -245,9 +189,7 @@ func TestDecisionStateKPI_Collect(t *testing.T) { ObjectMeta: v1.ObjectMeta{Name: "dec-error-with-target"}, Spec: v1alpha1.DecisionSpec{SchedulingDomain: "test-operator"}, Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: &targetHost, - }, + TargetHost: targetHost, Conditions: []v1.Condition{ { Type: v1alpha1.DecisionConditionReady, @@ -258,10 +200,9 @@ func TestDecisionStateKPI_Collect(t *testing.T) { }, }, operator: "test-operator", - expectedCount: 3, + expectedCount: 2, description: "should count as error even if target host is present", expectedError: 1, - expectedWaiting: 0, expectedSuccess: 0, }, } diff --git a/internal/scheduling/cinder/external_scheduler_api.go b/internal/scheduling/cinder/external_scheduler_api.go index f40ec3bae..fd78f3d27 100644 --- a/internal/scheduling/cinder/external_scheduler_api.go +++ b/internal/scheduling/cinder/external_scheduler_api.go @@ -14,19 +14,14 @@ import ( "net/http" api "github.com/cobaltcore-dev/cortex/api/external/cinder" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" scheduling "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/metrics" ) type HTTPAPIDelegate interface { - // Process the decision from the API. Should create and return the updated decision. - ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error + // Process the scheduling request from the API. + ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*scheduling.FilterWeigherPipelineResult, error) } type HTTPAPI interface { @@ -106,7 +101,6 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R c.Respond(http.StatusInternalServerError, err, "failed to read request body") return } - raw := runtime.RawExtension{Raw: body} var requestData api.ExternalSchedulerRequest // Copy the raw body to a io.Reader for json deserialization. cp := body @@ -137,40 +131,23 @@ func (httpAPI *httpAPI) CinderExternalScheduler(w http.ResponseWriter, r *http.R slog.Info("inferred pipeline name", "pipeline", requestData.Pipeline) } - // Create the decision object in kubernetes. - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "cinder-", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: requestData.Pipeline, - }, - ResourceID: "", // TODO - CinderRaw: &raw, - }, - } ctx := r.Context() - if err := httpAPI.delegate.ProcessNewDecisionFromAPI(ctx, decision); err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to process scheduling decision") - return - } - // Check if the decision contains status conditions indicating an error. - if meta.IsStatusConditionFalse(decision.Status.Conditions, v1alpha1.DecisionConditionReady) { - c.Respond(http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed") + result, err := httpAPI.delegate.ProcessRequest(ctx, requestData) + if err != nil { + c.Respond(http.StatusInternalServerError, err, "failed to process scheduling request") return } - if decision.Status.Result == nil { - c.Respond(http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed") + if result == nil { + c.Respond(http.StatusInternalServerError, errors.New("pipeline didn't produce a result"), "failed to process scheduling request") return } - hosts := decision.Status.Result.OrderedHosts + hosts := result.OrderedHosts response := api.ExternalSchedulerResponse{Hosts: hosts} w.Header().Set("Content-Type", "application/json") if err = json.NewEncoder(w).Encode(response); err != nil { c.Respond(http.StatusInternalServerError, err, "failed to encode response") return } + c.Respond(http.StatusOK, nil, "Success") } diff --git a/internal/scheduling/cinder/external_scheduler_api_test.go b/internal/scheduling/cinder/external_scheduler_api_test.go index d5f7f394f..8b6bd947d 100644 --- a/internal/scheduling/cinder/external_scheduler_api_test.go +++ b/internal/scheduling/cinder/external_scheduler_api_test.go @@ -14,20 +14,20 @@ import ( "testing" cinderapi "github.com/cobaltcore-dev/cortex/api/external/cinder" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" ) type mockHTTPAPIDelegate struct { - processDecisionFunc func(ctx context.Context, decision *v1alpha1.Decision) error + processFunc func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) } -func (m *mockHTTPAPIDelegate) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { - if m.processDecisionFunc != nil { - return m.processDecisionFunc(ctx, decision) +func (m *mockHTTPAPIDelegate) ProcessRequest(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + if m.processFunc != nil { + return m.processFunc(ctx, request) } - return nil + return &lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{"host1"}, + }, nil } func TestNewAPI(t *testing.T) { @@ -142,13 +142,12 @@ func TestHTTPAPI_canRunScheduler(t *testing.T) { func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { tests := []struct { - name string - method string - body string - processDecisionErr error - decisionResult *v1alpha1.Decision - expectedStatus int - expectedHosts []string + name string + method string + body string + processFunc func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) + expectedStatus int + expectedHosts []string }{ { name: "invalid method", @@ -168,9 +167,11 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { req := cinderapi.ExternalSchedulerRequest{ Hosts: []cinderapi.ExternalSchedulerHost{ {VolumeHost: "host1"}, + {VolumeHost: "host2"}, }, Weights: map[string]float64{ "host1": 1.0, + "host2": 0.5, }, Pipeline: "test-pipeline", } @@ -180,15 +181,13 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { } return string(data) }(), - decisionResult: &v1alpha1.Decision{ - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1"}, - }, - }, + processFunc: func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + return &lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{"host1", "host2"}, + }, nil }, expectedStatus: http.StatusOK, - expectedHosts: []string{"host1"}, + expectedHosts: []string{"host1", "host2"}, }, { name: "processing error", @@ -209,11 +208,13 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { } return string(data) }(), - processDecisionErr: errors.New("processing failed"), - expectedStatus: http.StatusInternalServerError, + processFunc: func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + return nil, errors.New("processing failed") + }, + expectedStatus: http.StatusInternalServerError, }, { - name: "decision failed", + name: "empty result", method: http.MethodPost, body: func() string { req := cinderapi.ExternalSchedulerRequest{ @@ -231,36 +232,20 @@ func TestHTTPAPI_CinderExternalScheduler(t *testing.T) { } return string(data) }(), - decisionResult: &v1alpha1.Decision{ - Status: v1alpha1.DecisionStatus{ - Conditions: []metav1.Condition{ - { - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "SchedulingError", - }, - }, - }, + processFunc: func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + return &lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{}, + }, nil }, - expectedStatus: http.StatusInternalServerError, + expectedStatus: http.StatusOK, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { delegate := &mockHTTPAPIDelegate{ - processDecisionFunc: func(ctx context.Context, decision *v1alpha1.Decision) error { - if tt.processDecisionErr != nil { - return tt.processDecisionErr - } - if tt.decisionResult != nil { - decision.Status = tt.decisionResult.Status - return nil - } - return nil - }, + processFunc: tt.processFunc, } - api := NewAPI(delegate).(*httpAPI) var body *strings.Reader @@ -347,16 +332,17 @@ func TestHTTPAPI_inferPipelineName(t *testing.T) { } } -func TestHTTPAPI_CinderExternalScheduler_DecisionCreation(t *testing.T) { - var capturedDecision *v1alpha1.Decision +func TestHTTPAPI_CinderExternalScheduler_PipelineParameter(t *testing.T) { + var capturedPipeline string + var capturedRequest cinderapi.ExternalSchedulerRequest + delegate := &mockHTTPAPIDelegate{ - processDecisionFunc: func(ctx context.Context, decision *v1alpha1.Decision) error { - capturedDecision = decision - // Set a successful result to avoid "decision didn't produce a result" error - decision.Status.Result = &v1alpha1.DecisionResult{ + processFunc: func(ctx context.Context, request cinderapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + capturedPipeline = request.Pipeline + capturedRequest = request + return &lib.FilterWeigherPipelineResult{ OrderedHosts: []string{"host1"}, - } - return nil + }, nil }, } @@ -370,6 +356,9 @@ func TestHTTPAPI_CinderExternalScheduler_DecisionCreation(t *testing.T) { "host1": 1.0, }, Pipeline: "test-pipeline", + Spec: map[string]any{ + "volume_id": "test-volume", + }, } body, err := json.Marshal(requestData) @@ -382,27 +371,20 @@ func TestHTTPAPI_CinderExternalScheduler_DecisionCreation(t *testing.T) { api.CinderExternalScheduler(w, req) if w.Code != http.StatusOK { - t.Errorf("Expected status %d, got %d", http.StatusOK, w.Code) + t.Errorf("Expected status %d, got %d. Body: %s", http.StatusOK, w.Code, w.Body.String()) } - if capturedDecision == nil { - t.Fatal("Decision was not captured") + // Verify the pipeline name was passed correctly + expectedPipeline := "test-pipeline" // Default pipeline from inferPipelineName + if capturedPipeline != expectedPipeline { + t.Errorf("Expected pipeline '%s', got '%s'", expectedPipeline, capturedPipeline) } - // Verify decision fields - if capturedDecision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder { - t.Errorf("Expected scheduling domain %s, got %s", v1alpha1.SchedulingDomainCinder, capturedDecision.Spec.SchedulingDomain) + // Verify the request was passed correctly + if len(capturedRequest.Hosts) != 1 { + t.Errorf("Expected 1 host, got %d", len(capturedRequest.Hosts)) } - - if capturedDecision.Spec.PipelineRef.Name != "test-pipeline" { - t.Errorf("Expected pipeline 'test-pipeline', got %s", capturedDecision.Spec.PipelineRef.Name) - } - - if capturedDecision.GenerateName != "cinder-" { - t.Errorf("Expected generate name 'cinder-', got %s", capturedDecision.GenerateName) - } - - if capturedDecision.Spec.CinderRaw == nil { - t.Error("CinderRaw should not be nil") + if capturedRequest.Hosts[0].VolumeHost != "host1" { + t.Errorf("Expected host 'host1', got '%s'", capturedRequest.Hosts[0].VolumeHost) } } diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go index 0d8771081..affba0615 100644 --- a/internal/scheduling/cinder/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller.go @@ -5,16 +5,12 @@ package cinder import ( "context" - "encoding/json" - "errors" "fmt" "sync" "time" api "github.com/cobaltcore-dev/cortex/api/external/cinder" "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/cobaltcore-dev/cortex/internal/scheduling/cinder/plugins/filters" "github.com/cobaltcore-dev/cortex/internal/scheduling/cinder/plugins/weighers" @@ -26,6 +22,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) // The decision pipeline controller takes decision resources containing a @@ -50,93 +47,44 @@ func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } -// Callback executed when kubernetes asks to reconcile a decision resource. -func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +// Process the request from the API. Returns the result of the pipeline execution. +func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { c.processMu.Lock() defer c.processMu.Unlock() - decision := &v1alpha1.Decision{} - if err := c.Get(ctx, req.NamespacedName, decision); err != nil { - return ctrl.Result{}, client.IgnoreNotFound(err) - } - old := decision.DeepCopy() - if err := c.process(ctx, decision); err != nil { - return ctrl.Result{}, err - } - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, nil -} + log := ctrl.LoggerFrom(ctx) + startedAt := time.Now() -// Process the decision from the API. Should create and return the updated decision. -func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { - c.processMu.Lock() - defer c.processMu.Unlock() + pipelineName := request.Pipeline - pipelineConf, ok := c.PipelineConfigs[decision.Spec.PipelineRef.Name] + pipeline, ok := c.Pipelines[pipelineName] if !ok { - return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) - } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() - err := c.process(ctx, decision) - if err != nil { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "PipelineRunFailed", - Message: "pipeline run failed: " + err.Error(), - }) - } else { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionTrue, - Reason: "PipelineRunSucceeded", - Message: "pipeline run succeeded", - }) - } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err - } + log.Error(nil, "pipeline not found or not ready", "pipelineName", pipelineName) + return nil, fmt.Errorf("pipeline %s not found or not ready", pipelineName) } - return err -} - -func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { - log := ctrl.LoggerFrom(ctx) - startedAt := time.Now() // So we can measure sync duration. - pipeline, ok := c.Pipelines[decision.Spec.PipelineRef.Name] + pipelineConfig, ok := c.PipelineConfigs[pipelineName] if !ok { - log.Error(nil, "pipeline not found or not ready", "pipelineName", decision.Spec.PipelineRef.Name) - return errors.New("pipeline not found or not ready") - } - if decision.Spec.CinderRaw == nil { - log.Error(nil, "skipping decision, no cinderRaw spec defined") - return errors.New("no cinderRaw spec defined") - } - var request api.ExternalSchedulerRequest - if err := json.Unmarshal(decision.Spec.CinderRaw.Raw, &request); err != nil { - log.Error(err, "failed to unmarshal cinderRaw spec") - return err + log.Error(nil, "pipeline config not found", "pipelineName", pipelineName) + return nil, fmt.Errorf("pipeline config for %s not found", pipelineName) } result, err := pipeline.Run(request) if err != nil { - log.Error(err, "failed to run pipeline") - return err + log.Error(err, "failed to run pipeline", "pipeline", pipelineName) + return nil, err } - decision.Status.Result = &result - log.Info("decision processed successfully", "duration", time.Since(startedAt)) - return nil + log.Info("request processed successfully", "duration", time.Since(startedAt)) + + if pipelineConfig.Spec.CreateDecisions { + c.DecisionQueue <- lib.DecisionUpdate{ + ResourceID: request.Context.ResourceUUID, + PipelineName: pipelineName, + Result: result, + Intent: v1alpha1.SchedulingIntentUnknown, + } + } + return &result, nil } // The base controller will delegate the pipeline creation down to this method. @@ -156,55 +104,39 @@ func (c *FilterWeigherPipelineController) InitPipeline( func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainCinder + c.Recorder = mgr.GetEventRecorder("cortex-cinder-pipeline-controller") if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err } return multicluster.BuildController(mcl, mgr). // Watch pipeline changes so that we can reconfigure pipelines as needed. WatchesMulticluster( - &v1alpha1.Pipeline{}, - handler.Funcs{ - CreateFunc: c.HandlePipelineCreated, - UpdateFunc: c.HandlePipelineUpdated, - DeleteFunc: c.HandlePipelineDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - pipeline := obj.(*v1alpha1.Pipeline) - // Only react to pipelines matching the scheduling domain. - if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder { - return false + &v1alpha1.Knowledge{}, + // Get all pipelines of the controller when knowledge changes and trigger reconciliation to update the candidates in the pipelines. + handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { + knowledge := obj.(*v1alpha1.Knowledge) + if knowledge.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder { + return nil } - return pipeline.Spec.Type == c.PipelineType() + // When Knowledge changes, reconcile all pipelines + return c.GetAllPipelineReconcileRequests(ctx) }), - ). - // Watch knowledge changes so that we can reconfigure pipelines as needed. - WatchesMulticluster( - &v1alpha1.Knowledge{}, - handler.Funcs{ - CreateFunc: c.HandleKnowledgeCreated, - UpdateFunc: c.HandleKnowledgeUpdated, - DeleteFunc: c.HandleKnowledgeDeleted, - }, predicate.NewPredicateFuncs(func(obj client.Object) bool { knowledge := obj.(*v1alpha1.Knowledge) // Only react to knowledge matching the scheduling domain. return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainCinder }), ). + Named("cortex-cinder-pipelines"). For( - &v1alpha1.Decision{}, + &v1alpha1.Pipeline{}, builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - decision := obj.(*v1alpha1.Decision) - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder { - return false - } - // Ignore already decided schedulings. - if decision.Status.Result != nil { + pipeline := obj.(*v1alpha1.Pipeline) + if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainCinder { return false } - return true + return pipeline.Spec.Type == c.PipelineType() })), ). - Named("cortex-cinder-decisions"). Complete(c) } diff --git a/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go index dadd74e70..8b2ccc9d0 100644 --- a/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/cinder/filter_weigher_pipeline_controller_test.go @@ -5,13 +5,9 @@ package cinder import ( "context" - "encoding/json" "testing" - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -22,255 +18,39 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { +func TestFilterWeigherPipelineController_ProcessRequest(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) } - cinderRequest := api.ExternalSchedulerRequest{ - Spec: map[string]any{ - "volume_id": "test-volume-id", - "size": 10, - }, - Context: api.CinderRequestContext{ - ProjectID: "test-project", - UserID: "test-user", - RequestID: "req-123", - GlobalRequestID: "global-req-123", - }, - Hosts: []api.ExternalSchedulerHost{ - {VolumeHost: "cinder-volume-1"}, - {VolumeHost: "cinder-volume-2"}, - }, - Weights: map[string]float64{"cinder-volume-1": 1.0, "cinder-volume-2": 0.5}, - Pipeline: "test-pipeline", - } - - cinderRaw, err := json.Marshal(cinderRequest) - if err != nil { - t.Fatalf("Failed to marshal cinder request: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - pipeline *v1alpha1.Pipeline - expectError bool - expectResult bool - }{ - { - name: "successful cinder decision processing", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - CinderRaw: &runtime.RawExtension{ - Raw: cinderRaw, - }, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: false, - expectResult: true, - }, - { - name: "decision without cinderRaw spec", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-raw", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - CinderRaw: nil, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: true, - expectResult: false, - }, - { - name: "pipeline not found", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-pipeline", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "nonexistent-pipeline", - }, - CinderRaw: &runtime.RawExtension{ - Raw: cinderRaw, - }, - }, - }, - pipeline: nil, - expectError: true, - expectResult: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []client.Object{tt.decision} - if tt.pipeline != nil { - objects = append(objects, tt.pipeline) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &FilterWeigherPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ - Client: client, - Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), - }, - Monitor: lib.FilterWeigherPipelineMonitor{}, - } - - if tt.pipeline != nil { - initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }) - if len(initResult.FilterErrors) > 0 || len(initResult.WeigherErrors) > 0 { - t.Fatalf("Failed to init pipeline: %v", initResult) - } - controller.Pipelines[tt.pipeline.Name] = initResult.Pipeline - } - - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: tt.decision.Name, - Namespace: tt.decision.Namespace, - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if tt.expectError && err == nil { - t.Error("Expected error but got none") - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - if result.RequeueAfter > 0 { - t.Error("Expected no requeue") - } - - var updatedDecision v1alpha1.Decision - if err := client.Get(context.Background(), req.NamespacedName, &updatedDecision); err != nil { - t.Fatalf("Failed to get updated decision: %v", err) - } - - if tt.expectResult && updatedDecision.Status.Result == nil { - t.Error("Expected result to be set but was nil") - } - if !tt.expectResult && updatedDecision.Status.Result != nil { - t.Error("Expected result to be nil but was set") - } - }) - } -} - -func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - cinderRequest := api.ExternalSchedulerRequest{ - Spec: map[string]any{ - "volume_id": "test-volume-id", - "size": 10, - }, - Context: api.CinderRequestContext{ - ProjectID: "test-project", - UserID: "test-user", - RequestID: "req-123", - GlobalRequestID: "global-req-123", - }, - Hosts: []api.ExternalSchedulerHost{ - {VolumeHost: "cinder-volume-1"}, - {VolumeHost: "cinder-volume-2"}, - }, - Weights: map[string]float64{"cinder-volume-1": 1.0, "cinder-volume-2": 0.5}, - Pipeline: "test-pipeline", - } - - cinderRaw, err := json.Marshal(cinderRequest) - if err != nil { - t.Fatalf("Failed to marshal cinder request: %v", err) - } - tests := []struct { - name string - decision *v1alpha1.Decision - pipelineConfig *v1alpha1.Pipeline - createDecisions bool - expectError bool - expectDecisionCreated bool - expectResult bool + name string + request api.ExternalSchedulerRequest + pipelineConfig *v1alpha1.Pipeline + expectError bool + expectResult bool }{ { - name: "successful decision processing with creation", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "test-decision-", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - CinderRaw: &runtime.RawExtension{ - Raw: cinderRaw, - }, - }, + name: "successful request processing", + request: api.ExternalSchedulerRequest{ + Spec: map[string]any{ + "volume_id": "test-volume-id", + "size": 10, + }, + Context: api.CinderRequestContext{ + ProjectID: "test-project", + UserID: "test-user", + RequestID: "req-123", + GlobalRequestID: "global-req-123", + ResourceUUID: "test-volume-id", + }, + Hosts: []api.ExternalSchedulerHost{ + {VolumeHost: "cinder-volume-1"}, + {VolumeHost: "cinder-volume-2"}, + }, + Weights: map[string]float64{"cinder-volume-1": 1.0, "cinder-volume-2": 0.5}, + Pipeline: "test-pipeline", }, pipelineConfig: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ @@ -284,81 +64,38 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: false, - expectDecisionCreated: true, - expectResult: true, - }, - { - name: "successful decision processing without creation", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-create", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - CinderRaw: &runtime.RawExtension{ - Raw: cinderRaw, - }, - }, - }, - pipelineConfig: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - CreateDecisions: false, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - createDecisions: false, - expectError: false, - expectDecisionCreated: false, - expectResult: true, + expectError: false, + expectResult: true, }, { name: "pipeline not configured", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-pipeline", - Namespace: "default", + request: api.ExternalSchedulerRequest{ + Spec: map[string]any{ + "volume_id": "test-volume-id", }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "nonexistent-pipeline", - }, - CinderRaw: &runtime.RawExtension{ - Raw: cinderRaw, - }, + Context: api.CinderRequestContext{ + ResourceUUID: "test-volume-id", }, + Hosts: []api.ExternalSchedulerHost{{VolumeHost: "cinder-volume-1"}}, + Weights: map[string]float64{"cinder-volume-1": 1.0}, + Pipeline: "nonexistent-pipeline", }, - pipelineConfig: nil, - expectError: true, - expectDecisionCreated: false, - expectResult: false, + pipelineConfig: nil, + expectError: true, + expectResult: false, }, { - name: "decision without cinderRaw spec", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-raw", - Namespace: "default", + name: "empty hosts", + request: api.ExternalSchedulerRequest{ + Spec: map[string]any{ + "volume_id": "test-volume-id", }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - CinderRaw: nil, + Context: api.CinderRequestContext{ + ResourceUUID: "test-volume-id", }, + Hosts: []api.ExternalSchedulerHost{}, + Weights: map[string]float64{}, + Pipeline: "test-pipeline", }, pipelineConfig: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ @@ -367,15 +104,13 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainCinder, - CreateDecisions: true, + CreateDecisions: false, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: true, - expectDecisionCreated: false, - expectResult: false, + expectError: false, + expectResult: true, }, } @@ -386,31 +121,31 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) objects = append(objects, tt.pipelineConfig) } - client := fake.NewClientBuilder(). + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). Build() controller := &FilterWeigherPipelineController{ BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ - Client: client, + Client: fakeClient, Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), PipelineConfigs: make(map[string]v1alpha1.Pipeline), + DecisionQueue: make(chan lib.DecisionUpdate, 10), }, Monitor: lib.FilterWeigherPipelineMonitor{}, } if tt.pipelineConfig != nil { controller.PipelineConfigs[tt.pipelineConfig.Name] = *tt.pipelineConfig - initResult := controller.InitPipeline(t.Context(), *tt.pipelineConfig) + initResult := controller.InitPipeline(context.Background(), *tt.pipelineConfig) if len(initResult.FilterErrors) > 0 || len(initResult.WeigherErrors) > 0 { t.Fatalf("Failed to init pipeline: %v", initResult) } controller.Pipelines[tt.pipelineConfig.Name] = initResult.Pipeline } - err := controller.ProcessNewDecisionFromAPI(context.Background(), tt.decision) + result, err := controller.ProcessRequest(context.Background(), tt.request) if tt.expectError && err == nil { t.Error("Expected error but got none") @@ -419,43 +154,11 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) t.Errorf("Expected no error but got: %v", err) } - // Check if decision was created (if expected) - if tt.expectDecisionCreated { - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - found := false - for _, decision := range decisions.Items { - if decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainCinder { - found = true - - // Verify decision properties - if decision.Spec.PipelineRef.Name != "test-pipeline" { - t.Errorf("expected pipeline ref %q, got %q", "test-pipeline", decision.Spec.PipelineRef.Name) - } - - // Check if result was set - if tt.expectResult { - if decision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - } - break - } - } - - if !found { - t.Error("expected decision to be created but was not found") - } - } else if !tt.expectError { - // For cases without creation, check that the decision has the right status - if tt.expectResult && tt.decision.Status.Result == nil { - t.Error("expected decision result to be set in original decision object") + if tt.expectResult { + if result == nil { + t.Error("Expected result but got nil") + } else if len(result.OrderedHosts) == 0 && len(tt.request.Hosts) > 0 { + t.Error("Expected ordered hosts in result") } } }) @@ -511,7 +214,7 @@ func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ + initResult := controller.InitPipeline(context.Background(), v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pipeline", }, diff --git a/internal/scheduling/explanation/controller.go b/internal/scheduling/explanation/controller.go deleted file mode 100644 index 7b71e0c4a..000000000 --- a/internal/scheduling/explanation/controller.go +++ /dev/null @@ -1,221 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import ( - "context" - "sort" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "github.com/cobaltcore-dev/cortex/pkg/multicluster" - corev1 "k8s.io/api/core/v1" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/builder" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/manager" - "sigs.k8s.io/controller-runtime/pkg/predicate" -) - -type ControllerConfig struct { - // The controller will scope to objects using this scheduling domain name. - // This allows multiple controllers to coexist in the same cluster without - // interfering with each other's decisions. - SchedulingDomain v1alpha1.SchedulingDomain `json:"schedulingDomain"` -} - -// The explanation controller populates two fields of the decision status. -// -// First, it reconstructs the history of each decision. It will look for -// previous decisions for the same resource (based on ResourceID) and provide -// them through the decision history field. -// -// Second, it will use the available context for a decision to generate a -// human-readable explanation of why the decision was made the way it was. -// This explanation is intended to help operators understand the reasoning -// behind scheduling decisions. -type Controller struct { - // The kubernetes client to use for processing decisions. - client.Client - // Config for the controller. - Config ControllerConfig - // If the field indexing should be skipped (useful for testing). - SkipIndexFields bool -} - -// Check if a decision should be processed by this controller. -func (c *Controller) shouldReconcileDecision(decision *v1alpha1.Decision) bool { - // Ignore decisions not created by this operator. - if decision.Spec.SchedulingDomain != c.Config.SchedulingDomain { - return false - } - // Ignore decisions that already have an explanation. - if decision.Status.Explanation != "" { - return false - } - // Ignore decisions that have no result yet. - if decision.Status.Result == nil { - return false - } - return true -} - -// This loop will be called by the controller-runtime for each decision -// resource that needs to be reconciled. -func (c *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - log := ctrl.LoggerFrom(ctx) - decision := &v1alpha1.Decision{} - if err := c.Get(ctx, req.NamespacedName, decision); err != nil { - log.Error(err, "failed to get decision", "name", req.NamespacedName) - return ctrl.Result{}, client.IgnoreNotFound(err) - } - // Reconcile the history. - if err := c.reconcileHistory(ctx, decision); err != nil { - return ctrl.Result{}, err - } - // Reconcile the explanation. - if err := c.reconcileExplanation(ctx, decision); err != nil { - return ctrl.Result{}, err - } - log.Info("successfully reconciled decision explanation", "name", req.NamespacedName) - return ctrl.Result{}, nil -} - -// Process the history for the given decision. -func (c *Controller) reconcileHistory(ctx context.Context, decision *v1alpha1.Decision) error { - log := ctrl.LoggerFrom(ctx) - // Get all previous decisions for the same ResourceID. - var previousDecisions v1alpha1.DecisionList - if c.SkipIndexFields { - // When field indexing is skipped, list all decisions and filter manually - if err := c.List(ctx, &previousDecisions); err != nil { - log.Error(err, "failed to list all decisions", "resourceID", decision.Spec.ResourceID) - return err - } - // Filter to only decisions with matching ResourceID - var filteredDecisions []v1alpha1.Decision - for _, prevDecision := range previousDecisions.Items { - if prevDecision.Spec.ResourceID == decision.Spec.ResourceID { - filteredDecisions = append(filteredDecisions, prevDecision) - } - } - previousDecisions.Items = filteredDecisions - } else { - // Use field indexing for efficient lookup - if err := c.List(ctx, &previousDecisions, client.MatchingFields{"spec.resourceID": decision.Spec.ResourceID}); err != nil { - log.Error(err, "failed to list previous decisions", "resourceID", decision.Spec.ResourceID) - return err - } - } - history := []corev1.ObjectReference{} // Not var-init so we see the empty slice. - // Make sure the resulting history will be in chronological order. - sort.Slice(previousDecisions.Items, func(i, j int) bool { - t1 := previousDecisions.Items[i].CreationTimestamp - t2 := previousDecisions.Items[j].CreationTimestamp - return t1.Before(&t2) - }) - for _, prevDecision := range previousDecisions.Items { - // Skip the current decision. - if prevDecision.Name == decision.Name && prevDecision.Namespace == decision.Namespace { - continue - } - // Skip decisions that were made after the current one. - if prevDecision.CreationTimestamp.After(decision.CreationTimestamp.Time) { - continue - } - history = append(history, corev1.ObjectReference{ - Kind: "Decision", - Namespace: prevDecision.Namespace, - Name: prevDecision.Name, - UID: prevDecision.UID, - }) - } - old := decision.DeepCopy() - decision.Status.History = &history - precedence := len(history) - decision.Status.Precedence = &precedence - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - log.Error(err, "failed to patch decision status with history", "name", decision.Name) - return err - } - log.Info("successfully reconciled decision history", "name", decision.Name) - return nil -} - -// Process the explanation for the given decision. -func (c *Controller) reconcileExplanation(ctx context.Context, decision *v1alpha1.Decision) error { - log := ctrl.LoggerFrom(ctx) - explainer, err := NewExplainer(c.Client) - if err != nil { - log.Error(err, "failed to create explainer", "name", decision.Name) - return err - } - explanationText, err := explainer.Explain(ctx, decision) - if err != nil { - log.Error(err, "failed to explain decision", "name", decision.Name) - return err - } - old := decision.DeepCopy() - decision.Status.Explanation = explanationText - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - log.Error(err, "failed to patch decision status with explanation", "name", decision.Name) - return err - } - log.Info("successfully reconciled decision explanation", "name", decision.Name) - return nil -} - -// This function will be called when the manager starts up. Must block. -func (c *Controller) StartupCallback(ctx context.Context) error { - // Reprocess all existing decisions that need an explanation. - var decisions v1alpha1.DecisionList - if err := c.List(ctx, &decisions); err != nil { - return err - } - for _, decision := range decisions.Items { - if !c.shouldReconcileDecision(&decision) { - continue - } - if _, err := c.Reconcile(ctx, ctrl.Request{ - NamespacedName: client.ObjectKey{ - Namespace: decision.Namespace, - Name: decision.Name, - }, - }); err != nil { - return err - } - } - return nil -} - -// This function sets up the controller with the provided manager. -func (c *Controller) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { - if !c.SkipIndexFields { - ctx := context.Background() - obj := &v1alpha1.Decision{} - lst := &v1alpha1.DecisionList{} - idx := "spec.resourceID" - fnc := func(obj client.Object) []string { - decision := obj.(*v1alpha1.Decision) - return []string{decision.Spec.ResourceID} - } - if err := mcl.IndexField(ctx, obj, lst, idx, fnc); err != nil { - return err - } - } - if err := mgr.Add(manager.RunnableFunc(c.StartupCallback)); err != nil { - return err - } - return multicluster.BuildController(mcl, mgr). - Named("explanation-controller"). - For( - &v1alpha1.Decision{}, - builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - decision := obj.(*v1alpha1.Decision) - return c.shouldReconcileDecision(decision) - })), - ). - Complete(c) -} diff --git a/internal/scheduling/explanation/controller_test.go b/internal/scheduling/explanation/controller_test.go deleted file mode 100644 index f287b4995..000000000 --- a/internal/scheduling/explanation/controller_test.go +++ /dev/null @@ -1,589 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import ( - "context" - "testing" - "time" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestController_shouldReconcileDecision(t *testing.T) { - controller := &Controller{ - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expected bool - }{ - { - name: "should reconcile nova decision without explanation", - decision: &v1alpha1.Decision{ - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - expected: true, - }, - { - name: "should not reconcile decision from different operator", - decision: &v1alpha1.Decision{ - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: "different-operator", - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", - }, - }, - expected: false, - }, - { - name: "should not reconcile decision with existing explanation", - decision: &v1alpha1.Decision{ - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "Already has explanation", - }, - }, - expected: false, - }, - { - name: "should not reconcile non-nova decision", - decision: &v1alpha1.Decision{ - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", - }, - }, - expected: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - result := controller.shouldReconcileDecision(tt.decision) - if result != tt.expected { - t.Errorf("shouldReconcileDecision() = %v, expected %v", result, tt.expected) - } - }) - } -} - -func TestController_Reconcile(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - existingDecisions []v1alpha1.Decision - expectError bool - expectRequeue bool - expectedExplanation string - expectedHistoryLength int - }{ - { - name: "decision not found", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "nonexistent-decision", - Namespace: "default", - }, - }, - expectError: false, // controller-runtime ignores not found errors - }, - { - name: "reconcile decision without history", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-1", - }, - Status: v1alpha1.DecisionStatus{}, - }, - expectedExplanation: "Initial placement of the nova server", - expectedHistoryLength: 0, - }, - { - name: "reconcile decision with history", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-2", - }, - Status: v1alpha1.DecisionStatus{}, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-2", - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - }, - expectedHistoryLength: 1, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - var objects []runtime.Object - if tt.name != "decision not found" { - objects = append(objects, tt.decision) - } - for i := range tt.existingDecisions { - objects = append(objects, &tt.existingDecisions[i]) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &Controller{ - Client: client, - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - SkipIndexFields: true, // Skip field indexing for testing - } - - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: tt.decision.Name, - Namespace: tt.decision.Namespace, - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if tt.expectError && err == nil { - t.Errorf("Expected error but got none") - return - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - if tt.expectRequeue && result.RequeueAfter == 0 { - t.Errorf("Expected requeue but got none") - } - if !tt.expectRequeue && result.RequeueAfter > 0 { - t.Errorf("Expected no requeue but got %v", result.RequeueAfter) - } - - // Only check results if we expect the decision to exist - if tt.name != "decision not found" { - // Verify the decision was updated - var updated v1alpha1.Decision - err = client.Get(context.Background(), req.NamespacedName, &updated) - if err != nil { - t.Errorf("Failed to get updated decision: %v", err) - return - } - - if tt.expectedExplanation != "" && !contains(updated.Status.Explanation, tt.expectedExplanation) { - t.Errorf("Expected explanation to contain '%s', but got: %s", tt.expectedExplanation, updated.Status.Explanation) - } - - if updated.Status.History != nil && len(*updated.Status.History) != tt.expectedHistoryLength { - t.Errorf("Expected history length %d, got %d", tt.expectedHistoryLength, len(*updated.Status.History)) - } - } - }) - } -} - -func TestController_reconcileHistory(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - existingDecisions []v1alpha1.Decision - expectedHistory int - expectError bool - }{ - { - name: "no previous decisions", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-1", - }, - }, - expectedHistory: 0, - }, - { - name: "one previous decision", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-2", - }, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-2", - }, - }, - }, - expectedHistory: 1, - }, - { - name: "multiple previous decisions in correct order", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-3", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(2 * time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-3", - }, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-3", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-3", - }, - }, - }, - expectedHistory: 2, - }, - { - name: "exclude future decisions", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-4", - }, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-4", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-3", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(2 * time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "test-resource-4", - }, - }, - }, - expectedHistory: 1, // Only test-decision-1 should be included - }, - { - name: "exclude decisions with different ResourceID", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-target", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now().Add(time.Hour)}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "target-resource", - }, - }, - existingDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-same", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "target-resource", - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-different", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: time.Now()}, - }, - Spec: v1alpha1.DecisionSpec{ - ResourceID: "different-resource", - }, - }, - }, - expectedHistory: 1, // Only same ResourceID should be included - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []runtime.Object{tt.decision} - for i := range tt.existingDecisions { - objects = append(objects, &tt.existingDecisions[i]) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &Controller{ - Client: client, - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - SkipIndexFields: true, // Skip field indexing for testing - } - - err := controller.reconcileHistory(context.Background(), tt.decision) - - if tt.expectError && err == nil { - t.Errorf("Expected error but got none") - return - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - if tt.decision.Status.History == nil { - if tt.expectedHistory != 0 { - t.Errorf("Expected history length %d, got nil", tt.expectedHistory) - } - } else if len(*tt.decision.Status.History) != tt.expectedHistory { - t.Errorf("Expected history length %d, got %d", tt.expectedHistory, len(*tt.decision.Status.History)) - } - }) - } -} - -func TestController_reconcileExplanation(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: nil, - }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(decision). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &Controller{ - Client: client, - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - } - - err := controller.reconcileExplanation(context.Background(), decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - if decision.Status.Explanation == "" { - t.Error("Expected explanation to be set but it was empty") - } - - if !contains(decision.Status.Explanation, "Initial placement of the nova server") { - t.Errorf("Expected explanation to contain nova server text, got: %s", decision.Status.Explanation) - } -} - -func TestController_StartupCallback(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - // Create a decision that should be reconciled - decision1 := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-1", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-1", - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", // Empty explanation means it should be reconciled - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - } - - // Create a decision that should not be reconciled (already has explanation) - decision2 := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-2", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource-2", - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "Already has explanation", - }, - } - - // Create a decision from different operator that should not be reconciled - decision3 := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-3", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: "different-operator", - ResourceID: "test-resource-3", - }, - Status: v1alpha1.DecisionStatus{ - Explanation: "", - }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(decision1, decision2, decision3). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &Controller{ - Client: client, - Config: ControllerConfig{SchedulingDomain: v1alpha1.SchedulingDomainNova}, - SkipIndexFields: true, // Skip field indexing for testing - } - - err := controller.StartupCallback(context.Background()) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - // Verify that decision1 now has an explanation - var updated1 v1alpha1.Decision - err = client.Get(context.Background(), types.NamespacedName{Name: "test-decision-1", Namespace: "default"}, &updated1) - if err != nil { - t.Errorf("Failed to get updated decision1: %v", err) - } - - if updated1.Status.Explanation == "" { - t.Error("Expected decision1 to have explanation after startup callback") - } - - // Verify that decision2 explanation remains unchanged - var updated2 v1alpha1.Decision - err = client.Get(context.Background(), types.NamespacedName{Name: "test-decision-2", Namespace: "default"}, &updated2) - if err != nil { - t.Errorf("Failed to get updated decision2: %v", err) - } - - if updated2.Status.Explanation != "Already has explanation" { - t.Errorf("Expected decision2 explanation to remain unchanged, got: %s", updated2.Status.Explanation) - } - - // Verify that decision3 explanation remains empty (different operator) - var updated3 v1alpha1.Decision - err = client.Get(context.Background(), types.NamespacedName{Name: "test-decision-3", Namespace: "default"}, &updated3) - if err != nil { - t.Errorf("Failed to get updated decision3: %v", err) - } - - if updated3.Status.Explanation != "" { - t.Errorf("Expected decision3 explanation to remain empty, got: %s", updated3.Status.Explanation) - } -} diff --git a/internal/scheduling/explanation/explainer.go b/internal/scheduling/explanation/explainer.go deleted file mode 100644 index a5f199fae..000000000 --- a/internal/scheduling/explanation/explainer.go +++ /dev/null @@ -1,729 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import ( - "context" - "fmt" - "sort" - "time" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "k8s.io/apimachinery/pkg/api/errors" - "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/log" -) - -// The explainer gets a scheduling decision and produces a human-readable -// explanation of why the decision was made the way it was. -type Explainer struct { - // The kubernetes client to use for fetching related data. - client.Client - // The template manager to use for rendering explanations. - templateManager *TemplateManager -} - -// NewExplainer creates a new explainer with template support. -func NewExplainer(client client.Client) (*Explainer, error) { - templateManager, err := NewTemplateManager() - if err != nil { - return nil, fmt.Errorf("failed to create template manager: %w", err) - } - - return &Explainer{ - Client: client, - templateManager: templateManager, - }, nil -} - -// Explain the given decision and return a human-readable explanation. -func (e *Explainer) Explain(ctx context.Context, decision *v1alpha1.Decision) (string, error) { - return e.ExplainWithTemplates(ctx, decision) -} - -// getResourceType returns a human-readable resource type. -func (e *Explainer) getResourceType(schedulingDomain v1alpha1.SchedulingDomain) string { - switch schedulingDomain { - case v1alpha1.SchedulingDomainNova: - return "nova server" - case v1alpha1.SchedulingDomainManila: - return "manila share" - case v1alpha1.SchedulingDomainCinder: - return "cinder volume" - case v1alpha1.SchedulingDomainMachines: - return "ironcore machine" - default: - return "resource" - } -} - -// calculateScoreGap calculates the gap between first and second place. -func (e *Explainer) calculateScoreGap(weights map[string]float64) float64 { - if len(weights) < 2 { - return 0.0 - } - - scores := make([]float64, 0, len(weights)) - for _, score := range weights { - scores = append(scores, score) - } - - sort.Slice(scores, func(i, j int) bool { - return scores[i] > scores[j] - }) - - return scores[0] - scores[1] -} - -// fetchDecisionChain retrieves all decisions in the history chain. -func (e *Explainer) fetchDecisionChain(ctx context.Context, decision *v1alpha1.Decision) ([]*v1alpha1.Decision, error) { - var chainDecisions []*v1alpha1.Decision - logger := log.FromContext(ctx) - - // Add all historical decisions - if decision.Status.History != nil { - for _, ref := range *decision.Status.History { - histDecision := &v1alpha1.Decision{} - if err := e.Get(ctx, client.ObjectKey{ - Namespace: ref.Namespace, - Name: ref.Name, - }, histDecision); err != nil { - if errors.IsNotFound(err) { - logger.Info("History decision not found, skipping from chain analysis", - "decision", ref.Name, - "namespace", ref.Namespace, - "uid", ref.UID) - continue // Skip missing decisions instead of failing - } - // For other errors, still fail - return nil, err - } - chainDecisions = append(chainDecisions, histDecision) - } - } - - // Add current decision - chainDecisions = append(chainDecisions, decision) - - return chainDecisions, nil -} - -// HostSegment represents a segment in the host chain with duration and decision count. -type HostSegment struct { - host string - duration time.Duration // Full precision duration - decisions int -} - -// buildHostSegments creates host segments from decisions with durations. -func (e *Explainer) buildHostSegments(decisions []*v1alpha1.Decision) []HostSegment { - if len(decisions) < 2 { - return []HostSegment{} - } - - // Extract host chain - hostChain := make([]string, 0, len(decisions)) - for _, decision := range decisions { - host := "(n/a)" - if decision.Status.Result != nil && decision.Status.Result.TargetHost != nil { - host = *decision.Status.Result.TargetHost - } - hostChain = append(hostChain, host) - } - - // Build segments with durations - segments := make([]HostSegment, 0) - if len(hostChain) > 0 { - currentHost := hostChain[0] - segmentStart := 0 - - for i := 1; i <= len(hostChain); i++ { - // Check if we've reached the end or found a different host - if i == len(hostChain) || hostChain[i] != currentHost { - // Calculate duration for this segment - startTime := decisions[segmentStart].CreationTimestamp.Time - var endTime = startTime // Default to 0 duration for last segment - if i < len(hostChain) { - endTime = decisions[i].CreationTimestamp.Time - } - - duration := endTime.Sub(startTime) - - segments = append(segments, HostSegment{ - host: currentHost, - duration: duration, - decisions: i - segmentStart, - }) - - if i < len(hostChain) { - currentHost = hostChain[i] - segmentStart = i - } - } - } - } - - return segments -} - -// detectLoop checks if there are repeated hosts in the segments. -func (e *Explainer) detectLoop(segments []HostSegment) bool { - seenHosts := make(map[string]bool) - for _, segment := range segments { - if seenHosts[segment.host] { - return true - } - seenHosts[segment.host] = true - } - return false -} - -// findWinner returns the host with the highest score. -func (e *Explainer) findWinner(scores map[string]float64) string { - winner := "" - maxScore := -999999.0 - for host, score := range scores { - if score > maxScore { - maxScore = score - winner = host - } - } - return winner -} - -// ScoreCalculationResult holds both final scores and deleted host tracking information. -type ScoreCalculationResult struct { - FinalScores map[string]float64 - DeletedHosts map[string][]string // host -> list of steps that deleted it -} - -// StepImpact represents the impact of a single pipeline step on the winning host. -type StepImpact struct { - Step string - ScoreBefore float64 - ScoreAfter float64 - ScoreDelta float64 - CompetitorsRemoved int - PromotedToFirst bool -} - -// calculateScoresFromSteps processes step results sequentially to compute final scores and track deleted hosts. -func (e *Explainer) calculateScoresFromSteps(inputWeights map[string]float64, stepResults []v1alpha1.StepResult) ScoreCalculationResult { - if len(inputWeights) == 0 { - return ScoreCalculationResult{ - FinalScores: map[string]float64{}, - DeletedHosts: map[string][]string{}, - } - } - - // Start with input values as initial scores - currentScores := make(map[string]float64) - for hostName, inputValue := range inputWeights { - currentScores[hostName] = inputValue - } - - deletedHosts := make(map[string][]string) - - // Process each step sequentially - for _, stepResult := range stepResults { - // Check which hosts will be deleted in this step - for hostName := range currentScores { - if _, exists := stepResult.Activations[hostName]; !exists { - // Host not in this step's activations - will be deleted - deletedHosts[hostName] = append(deletedHosts[hostName], stepResult.StepName) - } - } - - // Apply activations and remove hosts not in this step - newScores := make(map[string]float64) - for hostName, score := range currentScores { - if activation, exists := stepResult.Activations[hostName]; exists { - // Add activation to current score - newScores[hostName] = score + activation - } - // Hosts not in activations are removed (don't copy to newScores) - } - currentScores = newScores - } - - return ScoreCalculationResult{ - FinalScores: currentScores, - DeletedHosts: deletedHosts, - } -} - -// calculateScoresWithoutStep processes step results while skipping one specific step. -func (e *Explainer) calculateScoresWithoutStep(inputWeights map[string]float64, stepResults []v1alpha1.StepResult, skipIndex int) ScoreCalculationResult { - if len(inputWeights) == 0 || skipIndex < 0 || skipIndex >= len(stepResults) { - return e.calculateScoresFromSteps(inputWeights, stepResults) - } - - // Create reduced step results without the skipped step - reducedSteps := make([]v1alpha1.StepResult, 0, len(stepResults)-1) - reducedSteps = append(reducedSteps, stepResults[:skipIndex]...) - reducedSteps = append(reducedSteps, stepResults[skipIndex+1:]...) - - return e.calculateScoresFromSteps(inputWeights, reducedSteps) -} - -// findCriticalSteps determines which steps change the winning host using backward elimination. -func (e *Explainer) findCriticalSteps(decision *v1alpha1.Decision) []string { - result := decision.Status.Result - if result == nil || len(result.StepResults) == 0 { - return []string{} - } - - // Get input weights (prefer raw, fall back to normalized) - var inputWeights map[string]float64 - switch { - case len(result.RawInWeights) > 0: - inputWeights = result.RawInWeights - case len(result.NormalizedInWeights) > 0: - inputWeights = result.NormalizedInWeights - default: - return []string{} - } - - // Calculate baseline scores with all steps - baselineResult := e.calculateScoresFromSteps(inputWeights, result.StepResults) - baselineWinner := e.findWinner(baselineResult.FinalScores) - - if baselineWinner == "" { - return []string{} - } - - criticalSteps := make([]string, 0) - - // Try removing each step one by one - for i, stepResult := range result.StepResults { - // Calculate scores without this step - reducedResult := e.calculateScoresWithoutStep(inputWeights, result.StepResults, i) - - // Find winner without this step - reducedWinner := e.findWinner(reducedResult.FinalScores) - - // If removing this step changes the winner, it's critical - if reducedWinner != baselineWinner { - criticalSteps = append(criticalSteps, stepResult.StepName) - } - } - - return criticalSteps -} - -func (e *Explainer) calculateStepImpacts(inputWeights map[string]float64, stepResults []v1alpha1.StepResult, targetHost string) []StepImpact { - if len(inputWeights) == 0 || len(stepResults) == 0 { - return []StepImpact{} - } - - impacts := make([]StepImpact, 0, len(stepResults)) - currentScores := make(map[string]float64) - - // Start with input values as initial scores - for hostName, inputValue := range inputWeights { - currentScores[hostName] = inputValue - } - - // Track target host's score before first step - scoreBefore := currentScores[targetHost] - - // Process each pipeline step and track the target host's evolution - for _, stepResult := range stepResults { - // Count how many competitors will be removed in this step - competitorsRemoved := 0 - for hostName := range currentScores { - if hostName != targetHost { - if _, exists := stepResult.Activations[hostName]; !exists { - competitorsRemoved++ - } - } - } - - // Check if target host was #1 before this step - wasFirst := true - targetScoreBefore := currentScores[targetHost] - for host, score := range currentScores { - if host != targetHost && score > targetScoreBefore { - wasFirst = false - break - } - } - - // Apply activations and remove hosts not in this step - newScores := make(map[string]float64) - for hostName, score := range currentScores { - if activation, exists := stepResult.Activations[hostName]; exists { - newScores[hostName] = score + activation - } - // Hosts not in activations are removed (don't copy to newScores) - } - - // Get target host's score after this step - scoreAfter := newScores[targetHost] - - // Check if target host became #1 after this step - isFirstAfter := true - for host, score := range newScores { - if host != targetHost && score > scoreAfter { - isFirstAfter = false - break - } - } - - promotedToFirst := !wasFirst && isFirstAfter - - impacts = append(impacts, StepImpact{ - Step: stepResult.StepName, - ScoreBefore: scoreBefore, - ScoreAfter: scoreAfter, - ScoreDelta: scoreAfter - scoreBefore, - CompetitorsRemoved: competitorsRemoved, - PromotedToFirst: promotedToFirst, - }) - - // Update for next iteration - currentScores = newScores - scoreBefore = scoreAfter - } - - return impacts -} - -// Template data building functions - these functions extract and structure -// decision data into formats suitable for template rendering. - -// buildContextData creates context data for template rendering. -func (e *Explainer) buildContextData(decision *v1alpha1.Decision) ContextData { - resourceType := e.getResourceType(decision.Spec.SchedulingDomain) - - history := decision.Status.History - isInitial := history == nil || len(*history) == 0 - - decisionNumber := 1 - if !isInitial { - decisionNumber = len(*history) + 1 - if decision.Status.Precedence != nil { - decisionNumber = *decision.Status.Precedence + 1 - } - } - - return ContextData{ - ResourceType: resourceType, - DecisionNumber: decisionNumber, - IsInitial: isInitial, - } -} - -// buildHistoryData creates history comparison data for template rendering. -func (e *Explainer) buildHistoryData(ctx context.Context, decision *v1alpha1.Decision) (*HistoryData, error) { - history := decision.Status.History - if history == nil || len(*history) == 0 { - return nil, nil - } - - // Get the last decision - lastDecisionRef := (*history)[len(*history)-1] - lastDecision := &v1alpha1.Decision{} - if err := e.Get(ctx, client.ObjectKey{ - Namespace: lastDecisionRef.Namespace, - Name: lastDecisionRef.Name, - }, lastDecision); err != nil { - logger := log.FromContext(ctx) - if errors.IsNotFound(err) { - logger.Info("History decision not found, skipping history comparison", - "decision", lastDecisionRef.Name, - "namespace", lastDecisionRef.Namespace, - "uid", lastDecisionRef.UID) - return nil, nil // Skip history comparison instead of failing - } - // For other errors, still fail - return nil, err - } - - lastTarget := "(n/a)" - if lastDecision.Status.Result != nil && lastDecision.Status.Result.TargetHost != nil { - lastTarget = *lastDecision.Status.Result.TargetHost - } - - newTarget := "(n/a)" - if decision.Status.Result != nil && decision.Status.Result.TargetHost != nil { - newTarget = *decision.Status.Result.TargetHost - } - - return &HistoryData{ - PreviousTarget: lastTarget, - CurrentTarget: newTarget, - }, nil -} - -// buildWinnerData creates winner analysis data for template rendering. -func (e *Explainer) buildWinnerData(decision *v1alpha1.Decision) *WinnerData { - result := decision.Status.Result - if result == nil || result.TargetHost == nil { - return nil - } - - targetHost := *result.TargetHost - - // Get target host score - targetScore := 0.0 - if result.AggregatedOutWeights != nil { - if score, exists := result.AggregatedOutWeights[targetHost]; exists { - targetScore = score - } - } - - // Count hosts evaluated - hostsEvaluated := len(result.OrderedHosts) - if hostsEvaluated == 0 && result.AggregatedOutWeights != nil { - hostsEvaluated = len(result.AggregatedOutWeights) - } - - // Calculate score gap to second place - gap := e.calculateScoreGap(result.AggregatedOutWeights) - - return &WinnerData{ - HostName: targetHost, - Score: targetScore, - Gap: gap, - HostsEvaluated: hostsEvaluated, - HasGap: gap > 0, - } -} - -// buildInputData creates input comparison data for template rendering. -func (e *Explainer) buildInputData(decision *v1alpha1.Decision) *InputData { - result := decision.Status.Result - if result == nil || result.TargetHost == nil { - return nil - } - - targetHost := *result.TargetHost - - // Get input weights (prefer raw, fall back to normalized) - var inputWeights map[string]float64 - switch { - case len(result.RawInWeights) > 0: - inputWeights = result.RawInWeights - case len(result.NormalizedInWeights) > 0: - inputWeights = result.NormalizedInWeights - default: - return nil - } - - // Find input winner - inputWinner := "" - inputWinnerScore := -999999.0 - for host, score := range inputWeights { - if score > inputWinnerScore { - inputWinnerScore = score - inputWinner = host - } - } - - if inputWinner == "" { - return nil - } - - // Get target host's final score - targetFinalScore := 0.0 - if result.AggregatedOutWeights != nil { - if score, exists := result.AggregatedOutWeights[targetHost]; exists { - targetFinalScore = score - } - } - - return &InputData{ - InputWinner: inputWinner, - InputScore: inputWinnerScore, - FinalWinner: targetHost, - FinalScore: targetFinalScore, - FinalInputScore: inputWeights[targetHost], - InputConfirmed: inputWinner == targetHost, - } -} - -// buildCriticalStepsData creates critical steps data for template rendering. -func (e *Explainer) buildCriticalStepsData(decision *v1alpha1.Decision) *CriticalStepsData { - result := decision.Status.Result - if result == nil || result.TargetHost == nil || len(result.StepResults) == 0 { - return nil - } - - criticalSteps := e.findCriticalSteps(decision) - totalSteps := len(result.StepResults) - - return &CriticalStepsData{ - Steps: criticalSteps, - TotalSteps: totalSteps, - IsInputOnly: len(criticalSteps) == 0, - RequiresAll: len(criticalSteps) == totalSteps, - } -} - -// buildDeletedHostsData creates deleted hosts data for template rendering. -func (e *Explainer) buildDeletedHostsData(decision *v1alpha1.Decision) *DeletedHostsData { - result := decision.Status.Result - if result == nil || result.StepResults == nil || len(result.StepResults) == 0 { - return nil - } - - // Get input weights (prefer raw, fall back to normalized) - var inputWeights map[string]float64 - switch { - case len(result.RawInWeights) > 0: - inputWeights = result.RawInWeights - case len(result.NormalizedInWeights) > 0: - inputWeights = result.NormalizedInWeights - default: - return nil - } - - // Calculate scores and get deleted hosts information - scoreResult := e.calculateScoresFromSteps(inputWeights, result.StepResults) - - if len(scoreResult.DeletedHosts) == 0 { - return nil - } - - // Find input winner - inputWinner := "" - inputWinnerScore := -999999.0 - for host, score := range inputWeights { - if score > inputWinnerScore { - inputWinnerScore = score - inputWinner = host - } - } - - // Build list of deleted hosts - deletedHosts := make([]DeletedHostInfo, 0, len(scoreResult.DeletedHosts)) - for hostName, steps := range scoreResult.DeletedHosts { - deletedHosts = append(deletedHosts, DeletedHostInfo{ - Name: hostName, - Steps: steps, - IsInputWinner: hostName == inputWinner, - }) - } - - return &DeletedHostsData{ - DeletedHosts: deletedHosts, - } -} - -// buildChainData creates chain analysis data for template rendering. -func (e *Explainer) buildChainData(ctx context.Context, decision *v1alpha1.Decision) (*ChainData, error) { - history := decision.Status.History - if history == nil || len(*history) == 0 { - return nil, nil // No chain for initial decisions - } - - // Fetch all decisions in the chain - chainDecisions, err := e.fetchDecisionChain(ctx, decision) - if err != nil { - return nil, err - } - - if len(chainDecisions) < 2 { - return nil, nil // Need at least 2 decisions for a chain - } - - // Build segments - segments := e.buildHostSegments(chainDecisions) - if len(segments) == 0 { - return nil, nil - } - - // Convert to template data format - chainSegments := make([]ChainSegment, len(segments)) - for i, segment := range segments { - chainSegments[i] = ChainSegment{ - Host: segment.host, - Duration: segment.duration, - Decisions: segment.decisions, - } - } - - return &ChainData{ - Segments: chainSegments, - HasLoop: e.detectLoop(segments), - }, nil -} - -// ExplainWithTemplates renders an explanation using Go templates. -func (e *Explainer) ExplainWithTemplates(ctx context.Context, decision *v1alpha1.Decision) (string, error) { - // Build explanation context - explanationCtx := ExplanationContext{ - Context: e.buildContextData(decision), - } - - // Build each component's data - if historyData, err := e.buildHistoryData(ctx, decision); err != nil { - return "", err - } else if historyData != nil { - explanationCtx.History = historyData - } - - if winnerData := e.buildWinnerData(decision); winnerData != nil { - explanationCtx.Winner = winnerData - } - - if inputData := e.buildInputData(decision); inputData != nil { - explanationCtx.Input = inputData - } - - if criticalStepsData := e.buildCriticalStepsData(decision); criticalStepsData != nil { - explanationCtx.CriticalSteps = criticalStepsData - } - - if deletedHostsData := e.buildDeletedHostsData(decision); deletedHostsData != nil { - explanationCtx.DeletedHosts = deletedHostsData - } - - // Build step impacts - if result := decision.Status.Result; result != nil && result.TargetHost != nil && len(result.StepResults) > 0 { - targetHost := *result.TargetHost - var inputWeights map[string]float64 - switch { - case len(result.RawInWeights) > 0: - inputWeights = result.RawInWeights - case len(result.NormalizedInWeights) > 0: - inputWeights = result.NormalizedInWeights - } - if inputWeights != nil { - impacts := e.calculateStepImpacts(inputWeights, result.StepResults, targetHost) - if len(impacts) > 0 { - // Sort impacts by absolute delta (highest first), with promotions taking priority - sort.Slice(impacts, func(i, j int) bool { - absI := impacts[i].ScoreDelta - if absI < 0 { - absI = -absI - } - absJ := impacts[j].ScoreDelta - if absJ < 0 { - absJ = -absJ - } - - if absI != absJ { - return absI > absJ - } - if impacts[i].PromotedToFirst != impacts[j].PromotedToFirst { - return impacts[i].PromotedToFirst - } - return impacts[i].Step < impacts[j].Step - }) - explanationCtx.StepImpacts = impacts - } - } - } - - if chainData, err := e.buildChainData(ctx, decision); err != nil { - return "", err - } else if chainData != nil { - explanationCtx.Chain = chainData - } - - // Render using templates - return e.templateManager.RenderExplanation(explanationCtx) -} diff --git a/internal/scheduling/explanation/explainer_test.go b/internal/scheduling/explanation/explainer_test.go deleted file mode 100644 index ed1d52e13..000000000 --- a/internal/scheduling/explanation/explainer_test.go +++ /dev/null @@ -1,1476 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import ( - "context" - "sort" - "testing" - "time" - - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/client/fake" -) - -func TestExplainer_Explain(t *testing.T) { - tests := []struct { - name string - decision *v1alpha1.Decision - historyDecisions []*v1alpha1.Decision - expectedContains []string - expectError bool - }{ - { - name: "initial nova server placement", - decision: WithResourceID(NewTestDecision("test-decision"), "test-resource-1"), - expectedContains: []string{"Initial placement of the nova server"}, - }, - { - name: "initial cinder volume placement", - decision: WithSchedulingDomain(WithResourceID(NewTestDecision("test-decision"), "test-resource-2"), v1alpha1.SchedulingDomainCinder), - expectedContains: []string{"Initial placement of the cinder volume"}, - }, - { - name: "initial manila share placement", - decision: WithSchedulingDomain(WithResourceID(NewTestDecision("test-decision"), "test-resource-3"), v1alpha1.SchedulingDomainManila), - expectedContains: []string{"Initial placement of the manila share"}, - }, - { - name: "initial ironcore machine placement", - decision: WithSchedulingDomain(WithResourceID(NewTestDecision("test-decision"), "test-resource-4"), v1alpha1.SchedulingDomainMachines), - expectedContains: []string{"Initial placement of the ironcore machine"}, - }, - { - name: "unknown resource type falls back to generic", - decision: WithSchedulingDomain(WithResourceID(NewTestDecision("test-decision"), "test-resource-5"), "unknown-type"), - expectedContains: []string{"Initial placement of the resource"}, - }, - { - name: "empty history array", - decision: WithResourceID(NewTestDecision("test-decision"), "test-resource-6"), - expectedContains: []string{"Initial placement of the nova server"}, - }, - { - name: "subsequent decision with history", - decision: WithHistoryRef( - WithTargetHost(WithResourceID(NewTestDecision("test-decision-2"), "test-resource-7"), "host-2"), - WithUID(WithTargetHost(WithResourceID(NewTestDecision("test-decision-1"), "test-resource-7"), "host-1"), "test-uid-1")), - historyDecisions: []*v1alpha1.Decision{ - WithUID(WithTargetHost(WithResourceID(NewTestDecision("test-decision-1"), "test-resource-7"), "host-1"), "test-uid-1"), - }, - expectedContains: []string{ - "Decision #2 for this nova server", - "Previous target host was 'host-1'", - "now it's 'host-2'", - }, - }, - { - name: "subsequent decision with nil target hosts", - decision: WithHistoryRef( - WithResourceID(NewTestDecision("test-decision-4"), "test-resource-8"), - WithUID(WithResourceID(NewTestDecision("test-decision-3"), "test-resource-8"), "test-uid-3")), - historyDecisions: []*v1alpha1.Decision{ - WithUID(WithResourceID(NewTestDecision("test-decision-3"), "test-resource-8"), "test-uid-3"), - }, - expectedContains: []string{ - "Decision #2 for this nova server", - "Previous target host was '(n/a)'", - "now it's '(n/a)'", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - if len(tt.historyDecisions) > 0 { - RunExplanationTestWithHistory(t, tt.decision, tt.historyDecisions, tt.expectedContains) - } else { - RunExplanationTest(t, tt.decision, tt.expectedContains) - } - }) - } -} - -func TestExplainer_Explain_HistoryDecisionNotFound_GracefulHandling(t *testing.T) { - decision := NewDecision("test-decision"). - WithResourceID("test-resource"). - WithTargetHost("host-1"). - WithHistory([]corev1.ObjectReference{ - { - Kind: "Decision", - Namespace: "default", - Name: "non-existent-decision", - UID: "non-existent-uid", - }, - }). - Build() - - explainer := SetupExplainerTest(t, decision) - explanation, err := explainer.Explain(context.Background(), decision) - - // Should NOT error anymore - graceful handling - if err != nil { - t.Errorf("Expected no error with graceful handling, but got: %v", err) - } - - // Should contain context but not history comparison - if !contains(explanation, "Decision #2 for this nova server") { - t.Errorf("Expected explanation to contain context, but got: %s", explanation) - } - - if contains(explanation, "Previous target host") { - t.Errorf("Expected explanation to NOT contain history comparison when decision is missing, but got: %s", explanation) - } -} - -func TestExplainer_MissingHistoryDecisions_ChainAnalysis(t *testing.T) { - // Test that chain analysis works when some history decisions are missing - decision := NewDecision("current-decision"). - WithResourceID("test-resource"). - WithTargetHost("host-3"). - WithHistory([]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - {Kind: "Decision", Namespace: "default", Name: "missing-decision", UID: "missing-uid"}, - {Kind: "Decision", Namespace: "default", Name: "decision-3", UID: "uid-3"}, - }). - Build() - - // Only provide decision-1 and decision-3, missing decision-2 - availableDecision := NewDecision("decision-1"). - WithUID("uid-1"). - WithTargetHost("host-1"). - WithCreationTimestamp(time.Now().Add(-2 * time.Hour)). - Build() - - explainer := SetupExplainerTest(t, decision, availableDecision) - explanation, err := explainer.Explain(context.Background(), decision) - - if err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - // Should contain context with full history count - if !contains(explanation, "Decision #4 for this nova server") { - t.Errorf("Expected explanation to contain context, but got: %s", explanation) - } - - // Chain analysis should work with available decisions - if !contains(explanation, "Chain:") { - t.Errorf("Expected explanation to contain chain analysis, but got: %s", explanation) - } -} - -// Helper functions -func stringPtr(s string) *string { - return &s -} - -func contains(s, substr string) bool { - return len(s) >= len(substr) && (s == substr || substr == "" || findInString(s, substr)) -} - -func findInString(s, substr string) bool { - for i := 0; i <= len(s)-len(substr); i++ { - if s[i:i+len(substr)] == substr { - return true - } - } - return false -} - -// Generic Decision Helpers - Composable functions with smart defaults -func NewTestDecision(name string) *v1alpha1.Decision { - return &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: "default", // Sensible default - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, // Most common - ResourceID: "test-resource", // Generic default - }, - Status: v1alpha1.DecisionStatus{}, - } -} - -func WithTargetHost(decision *v1alpha1.Decision, host string) *v1alpha1.Decision { - if decision.Status.Result == nil { - decision.Status.Result = &v1alpha1.DecisionResult{} - } - decision.Status.Result.TargetHost = &host - return decision -} - -func WithInputWeights(decision *v1alpha1.Decision, weights map[string]float64) *v1alpha1.Decision { - if decision.Status.Result == nil { - decision.Status.Result = &v1alpha1.DecisionResult{} - } - decision.Status.Result.RawInWeights = weights - return decision -} - -func WithOutputWeights(decision *v1alpha1.Decision, weights map[string]float64) *v1alpha1.Decision { - if decision.Status.Result == nil { - decision.Status.Result = &v1alpha1.DecisionResult{} - } - decision.Status.Result.AggregatedOutWeights = weights - - // Auto-generate ordered hosts from weights - hosts := make([]string, 0, len(weights)) - for host := range weights { - hosts = append(hosts, host) - } - sort.Slice(hosts, func(i, j int) bool { - return weights[hosts[i]] > weights[hosts[j]] - }) - decision.Status.Result.OrderedHosts = hosts - - return decision -} - -func WithSteps(decision *v1alpha1.Decision, steps ...v1alpha1.StepResult) *v1alpha1.Decision { - if decision.Status.Result == nil { - decision.Status.Result = &v1alpha1.DecisionResult{} - } - decision.Status.Result.StepResults = steps - return decision -} - -func WithSchedulingDomain(decision *v1alpha1.Decision, schedulingDomain v1alpha1.SchedulingDomain) *v1alpha1.Decision { - decision.Spec.SchedulingDomain = schedulingDomain - return decision -} - -func WithResourceID(decision *v1alpha1.Decision, resourceID string) *v1alpha1.Decision { - decision.Spec.ResourceID = resourceID - return decision -} - -func WithUID(decision *v1alpha1.Decision, uid string) *v1alpha1.Decision { - decision.UID = types.UID(uid) - return decision -} - -func WithHistory(decision *v1alpha1.Decision, refs []corev1.ObjectReference) *v1alpha1.Decision { - decision.Status.History = &refs - return decision -} - -// Helper to create a decision with history reference to another decision -func WithHistoryRef(decision, historyDecision *v1alpha1.Decision) *v1alpha1.Decision { - refs := []corev1.ObjectReference{ - { - Kind: "Decision", - Namespace: historyDecision.Namespace, - Name: historyDecision.Name, - UID: historyDecision.UID, - }, - } - decision.Status.History = &refs - return decision -} - -// Generic step creator -func Step(name string, activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: name, - Activations: activations, - } -} - -// Common step names as constants -const ( - AvailabilityFilter = "availability-filter" - ResourceWeigher = "resource-weigher" - PlacementPolicy = "placement-policy" -) - -// Decision Builder Pattern - Fluent interface for creating test decisions -type DecisionBuilder struct { - decision *v1alpha1.Decision -} - -func NewDecision(name string) *DecisionBuilder { - return &DecisionBuilder{ - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: name, - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{}, - }, - } -} - -func (b *DecisionBuilder) WithResourceID(resourceID string) *DecisionBuilder { - b.decision.Spec.ResourceID = resourceID - return b -} - -func (b *DecisionBuilder) WithSchedulingDomain(schedulingDomain v1alpha1.SchedulingDomain) *DecisionBuilder { - b.decision.Spec.SchedulingDomain = schedulingDomain - return b -} - -func (b *DecisionBuilder) WithTargetHost(host string) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.TargetHost = stringPtr(host) - return b -} - -func (b *DecisionBuilder) WithRawInputWeights(weights map[string]float64) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.RawInWeights = weights - return b -} - -func (b *DecisionBuilder) WithNormalizedInputWeights(weights map[string]float64) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.NormalizedInWeights = weights - return b -} - -func (b *DecisionBuilder) WithAggregatedOutputWeights(weights map[string]float64) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.AggregatedOutWeights = weights - return b -} - -func (b *DecisionBuilder) WithOrderedHosts(hosts []string) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.OrderedHosts = hosts - return b -} - -func (b *DecisionBuilder) WithSteps(steps ...v1alpha1.StepResult) *DecisionBuilder { - if b.decision.Status.Result == nil { - b.decision.Status.Result = &v1alpha1.DecisionResult{} - } - b.decision.Status.Result.StepResults = steps - return b -} - -func (b *DecisionBuilder) WithHistory(refs []corev1.ObjectReference) *DecisionBuilder { - b.decision.Status.History = &refs - return b -} - -func (b *DecisionBuilder) WithHistoryDecisions(decisions ...*v1alpha1.Decision) *DecisionBuilder { - refs := make([]corev1.ObjectReference, len(decisions)) - for i, decision := range decisions { - refs[i] = corev1.ObjectReference{ - Kind: "Decision", - Namespace: decision.Namespace, - Name: decision.Name, - UID: decision.UID, - } - } - b.decision.Status.History = &refs - return b -} - -func (b *DecisionBuilder) WithPrecedence(precedence int) *DecisionBuilder { - b.decision.Status.Precedence = intPtr(precedence) - return b -} - -func (b *DecisionBuilder) WithUID(uid string) *DecisionBuilder { - b.decision.UID = types.UID(uid) - return b -} - -func (b *DecisionBuilder) WithCreationTimestamp(timestamp time.Time) *DecisionBuilder { - b.decision.CreationTimestamp = metav1.Time{Time: timestamp} - return b -} - -func (b *DecisionBuilder) Build() *v1alpha1.Decision { - return b.decision -} - -// Pre-built scenario helpers for common test patterns -func DecisionWithScoring(name, winner string, scores map[string]float64) *DecisionBuilder { - orderedHosts := make([]string, 0, len(scores)) - for host := range scores { - orderedHosts = append(orderedHosts, host) - } - // Sort by score descending - sort.Slice(orderedHosts, func(i, j int) bool { - return scores[orderedHosts[i]] > scores[orderedHosts[j]] - }) - - return NewDecision(name). - WithTargetHost(winner). - WithAggregatedOutputWeights(scores). - WithOrderedHosts(orderedHosts) -} - -func DecisionWithInputComparison(name, winner string, inputWeights, finalWeights map[string]float64) *DecisionBuilder { - return NewDecision(name). - WithTargetHost(winner). - WithRawInputWeights(inputWeights). - WithAggregatedOutputWeights(finalWeights) -} - -func DecisionWithCriticalSteps(name, winner string, inputWeights map[string]float64, steps ...v1alpha1.StepResult) *DecisionBuilder { - return NewDecision(name). - WithTargetHost(winner). - WithRawInputWeights(inputWeights). - WithSteps(steps...) -} - -func DecisionWithHistory(name, winner string) *DecisionBuilder { - return NewDecision(name). - WithTargetHost(winner) -} - -// Step result builders for common pipeline steps -func ResourceWeigherStep(activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: "resource-weigher", - Activations: activations, - } -} - -func AvailabilityFilterStep(activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: "availability-filter", - Activations: activations, - } -} - -func PlacementPolicyStep(activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: "placement-policy", - Activations: activations, - } -} - -func WeigherStep(name string, activations map[string]float64) v1alpha1.StepResult { - return v1alpha1.StepResult{ - StepName: name, - Activations: activations, - } -} - -// Test execution helpers -func SetupExplainerTest(t *testing.T, decisions ...*v1alpha1.Decision) *Explainer { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - objects := make([]runtime.Object, len(decisions)) - for i, decision := range decisions { - objects[i] = decision - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Fatalf("Failed to create explainer: %v", err) - } - return explainer -} - -func RunExplanationTest(t *testing.T, decision *v1alpha1.Decision, expectedContains []string) { - explainer := SetupExplainerTest(t, decision) - explanation, err := explainer.Explain(context.Background(), decision) - AssertNoError(t, err) - AssertExplanationContains(t, explanation, expectedContains...) -} - -func RunExplanationTestWithHistory(t *testing.T, decision *v1alpha1.Decision, historyDecisions []*v1alpha1.Decision, expectedContains []string) { - allDecisions := make([]*v1alpha1.Decision, len(historyDecisions)+1) - copy(allDecisions, historyDecisions) - allDecisions[len(historyDecisions)] = decision - explainer := SetupExplainerTest(t, allDecisions...) - explanation, err := explainer.Explain(context.Background(), decision) - AssertNoError(t, err) - AssertExplanationContains(t, explanation, expectedContains...) -} - -func AssertNoError(t *testing.T, err error) { - if err != nil { - t.Errorf("Expected no error but got: %v", err) - } -} - -func AssertExplanationContains(t *testing.T, explanation string, expected ...string) { - for _, exp := range expected { - if !contains(explanation, exp) { - t.Errorf("Expected explanation to contain '%s', but got: %s", exp, explanation) - } - } -} - -func AssertExplanationNotContains(t *testing.T, explanation string, notExpected ...string) { - for _, notExp := range notExpected { - if contains(explanation, notExp) { - t.Errorf("Expected explanation to NOT contain '%s', but got: %s", notExp, explanation) - } - } -} - -func TestExplainer_WinnerAnalysis(t *testing.T) { - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "winner analysis with score gap", - decision: DecisionWithScoring("test-decision", "host-1", - map[string]float64{"host-1": 2.45, "host-2": 2.10, "host-3": 1.85}). - Build(), - expectedContains: []string{ - "Selected: host-1 (score: 2.45)", - "gap to 2nd: 0.35", - "3 hosts evaluated", - }, - }, - { - name: "winner analysis with single host", - decision: DecisionWithScoring("test-decision", "host-1", - map[string]float64{"host-1": 2.45}). - Build(), - expectedContains: []string{ - "Selected: host-1 (score: 2.45)", - "1 host evaluated", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - RunExplanationTest(t, tt.decision, tt.expectedContains) - }) - } -} - -func TestExplainer_InputComparison(t *testing.T) { - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "input choice confirmed", - decision: DecisionWithInputComparison("test-decision", "host-1", - map[string]float64{"host-1": 1.20, "host-2": 1.10, "host-3": 0.95}, - map[string]float64{"host-1": 2.45, "host-2": 2.10, "host-3": 1.85}). - Build(), - expectedContains: []string{ - "Input choice confirmed: host-1 (1.20→2.45)", - }, - }, - { - name: "input choice overridden", - decision: DecisionWithInputComparison("test-decision", "host-2", - map[string]float64{"host-1": 1.50, "host-2": 1.20, "host-3": 0.95}, - map[string]float64{"host-1": 1.85, "host-2": 2.45, "host-3": 2.10}). - Build(), - expectedContains: []string{ - "Input favored host-1 (1.50), final winner: host-2 (1.20→2.45)", - }, - }, - { - name: "raw weights preferred over normalized", - decision: NewDecision("test-decision"). - WithTargetHost("host-1"). - WithRawInputWeights(map[string]float64{"host-1": 100.0, "host-2": 90.0}). - WithNormalizedInputWeights(map[string]float64{"host-1": 1.0, "host-2": 0.9}). - WithAggregatedOutputWeights(map[string]float64{"host-1": 2.45, "host-2": 2.10}). - Build(), - expectedContains: []string{ - "Input choice confirmed: host-1 (100.00→2.45)", // Should now use raw weights (100.00) - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - RunExplanationTest(t, tt.decision, tt.expectedContains) - }) - } -} - -func TestExplainer_CriticalStepsAnalysis(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "single critical step", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 1.5, "host-2": 0.2}), - Step("availability-filter", map[string]float64{"host-1": 0.0, "host-2": 0.0})), - expectedContains: []string{ - "Decision driven by 1/2 pipeline step: resource-weigher", - }, - }, - { - name: "multiple critical steps", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 3.0}), - Step("resource-weigher", map[string]float64{"host-1": 1.0, "host-2": -0.5}), - Step("availability-filter", map[string]float64{"host-1": 1.0, "host-2": 0.0}), - Step("placement-policy", map[string]float64{"host-1": 0.05, "host-2": 0.05})), - expectedContains: []string{ - "Decision driven by 2/3 pipeline steps: resource-weigher, availability-filter", - }, - }, - { - name: "all steps non-critical", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 3.0, "host-2": 1.0}), - Step("step-1", map[string]float64{"host-1": 0.05, "host-2": 0.05}), - Step("step-2", map[string]float64{"host-1": 0.02, "host-2": 0.02})), - expectedContains: []string{ - "Decision driven by input only (all 2 steps are non-critical)", - }, - }, - { - name: "all steps critical", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 3.0}), - Step("step-1", map[string]float64{"host-1": 1.0, "host-2": -0.5}), - Step("step-2", map[string]float64{"host-1": 1.0, "host-2": 0.0})), - expectedContains: []string{ - "Decision requires all 2 pipeline steps", - }, - }, - { - name: "three critical steps formatting", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 4.0}), - Step("step-a", map[string]float64{"host-1": 1.0, "host-2": -0.5}), - Step("step-b", map[string]float64{"host-1": 1.0, "host-2": 0.0}), - Step("step-c", map[string]float64{"host-1": 1.0, "host-2": 0.0}), - Step("step-d", map[string]float64{"host-1": 0.05, "host-2": 0.05})), - expectedContains: []string{ - "Decision driven by 3/4 pipeline steps: step-a, step-b, step-c", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(tt.decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Failed to create explainer: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), tt.decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - }) - } -} - -func TestExplainer_CompleteExplanation(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - previousDecision := WithUID(WithTargetHost(NewTestDecision("test-decision-1"), "host-1"), "test-uid-1") - - decision := WithSteps( - WithOutputWeights( - WithInputWeights( - WithHistoryRef( - WithTargetHost(NewTestDecision("test-decision-2"), "host-2"), - previousDecision), - map[string]float64{"host-1": 1.50, "host-2": 1.20, "host-3": 0.95}), - map[string]float64{"host-1": 1.85, "host-2": 2.45, "host-3": 2.10}), - Step("resource-weigher", map[string]float64{"host-1": 0.15, "host-2": 0.85, "host-3": 0.75}), - Step("availability-filter", map[string]float64{"host-1": 0.20, "host-2": 0.40, "host-3": 0.40})) - - // Set precedence manually since it's not commonly used - decision.Status.Precedence = intPtr(1) - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(decision, previousDecision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Failed to create explainer: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - expectedParts := []string{ - "Decision #2 for this nova server", - "Previous target host was 'host-1', now it's 'host-2'", - "Selected: host-2 (score: 2.45), gap to 2nd: 0.35, 3 hosts evaluated", - "Input favored host-1 (1.50), final winner: host-2 (1.20→2.45)", - "Decision driven by 1/2 pipeline step: resource-weigher", - } - - for _, expected := range expectedParts { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } -} - -func TestExplainer_DeletedHostsAnalysis(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "single host filtered by single step", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("availability-filter", map[string]float64{"host-1": 0.5})), - expectedContains: []string{ - "1 host filtered:", - "- host-2 (input choice) by availability-filter", - }, - }, - { - name: "multiple hosts filtered", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 3.0, "host-2": 2.0, "host-3": 1.0}), - Step("availability-filter", map[string]float64{"host-1": 0.5})), - expectedContains: []string{ - "2 hosts filtered", - }, - }, - { - name: "multiple hosts filtered including input winner", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 3.0, "host-3": 2.0}), - Step("availability-filter", map[string]float64{"host-1": 0.5})), - expectedContains: []string{ - "2 hosts filtered:", - "- host-2 (input choice) by availability-filter", - "- host-3 by availability-filter", - }, - }, - { - name: "no hosts filtered", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 0.5, "host-2": 0.3})), - expectedContains: []string{}, // No deleted hosts analysis should be present - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(tt.decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Failed to create explainer: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), tt.decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - - // For the "no hosts filtered" case, ensure no deleted hosts analysis is present - if len(tt.expectedContains) == 0 { - deletedHostsKeywords := []string{"filtered", "Input winner", "hosts filtered"} - for _, keyword := range deletedHostsKeywords { - if contains(explanation, keyword) { - t.Errorf("Expected explanation to NOT contain '%s' for no deleted hosts case, but got: %s", keyword, explanation) - } - } - } - }) - } -} - -func TestExplainer_GlobalChainAnalysis(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - baseTime := metav1.Now() - time1 := metav1.Time{Time: baseTime.Add(-120 * time.Minute)} // 2 hours ago - time2 := metav1.Time{Time: baseTime.Add(-60 * time.Minute)} // 1 hour ago - time3 := metav1.Time{Time: baseTime.Time} // now - - tests := []struct { - name string - currentDecision *v1alpha1.Decision - historyDecisions []v1alpha1.Decision - expectedContains []string - expectedNotContain []string - }{ - { - name: "simple chain with durations", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-3", - Namespace: "default", - CreationTimestamp: time3, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: &[]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - {Kind: "Decision", Namespace: "default", Name: "decision-2", UID: "uid-2"}, - }, - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-3"), - }, - }, - }, - historyDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - UID: "uid-1", - CreationTimestamp: time1, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-2", - Namespace: "default", - UID: "uid-2", - CreationTimestamp: time2, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - }, - }, - }, - }, - expectedContains: []string{ - "Chain: host-1 (1h0m0s) -> host-2 (1h0m0s) -> host-3 (0s).", - }, - }, - { - name: "chain with loop detection", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-3", - Namespace: "default", - CreationTimestamp: time3, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: &[]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - {Kind: "Decision", Namespace: "default", Name: "decision-2", UID: "uid-2"}, - }, - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), // Back to host-1 - creates loop - }, - }, - }, - historyDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - UID: "uid-1", - CreationTimestamp: time1, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-2", - Namespace: "default", - UID: "uid-2", - CreationTimestamp: time2, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - }, - }, - }, - }, - expectedContains: []string{ - "Chain (loop detected): host-1 (1h0m0s) -> host-2 (1h0m0s) -> host-1 (0s).", - }, - }, - { - name: "chain with multiple decisions on same host", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-4", - Namespace: "default", - CreationTimestamp: time3, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: &[]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - {Kind: "Decision", Namespace: "default", Name: "decision-2", UID: "uid-2"}, - {Kind: "Decision", Namespace: "default", Name: "decision-3", UID: "uid-3"}, - }, - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - }, - }, - }, - historyDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - UID: "uid-1", - CreationTimestamp: time1, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-2", - Namespace: "default", - UID: "uid-2", - CreationTimestamp: time1, // Same time as decision-1 - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), // Same host as decision-1 - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-3", - Namespace: "default", - UID: "uid-3", - CreationTimestamp: time2, - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), // Still same host - }, - }, - }, - }, - expectedContains: []string{ - "Chain: host-1 (2h0m0s; 3 decisions) -> host-2 (0s).", - }, - }, - { - name: "chain with multi-day duration", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-2", - Namespace: "default", - CreationTimestamp: metav1.Time{Time: baseTime.Time}, - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: &[]corev1.ObjectReference{ - {Kind: "Decision", Namespace: "default", Name: "decision-1", UID: "uid-1"}, - }, - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - }, - }, - }, - historyDecisions: []v1alpha1.Decision{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - UID: "uid-1", - CreationTimestamp: metav1.Time{Time: baseTime.Add(-72 * time.Hour)}, // 3 days ago - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - }, - expectedContains: []string{ - "Chain: host-1 (3d0h0m0s) -> host-2 (0s).", - }, - }, - { - name: "no chain for initial decision", - currentDecision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "decision-1", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - History: nil, // No history - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-1"), - }, - }, - }, - historyDecisions: []v1alpha1.Decision{}, - expectedNotContain: []string{ - "Chain:", - "chain:", - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []runtime.Object{tt.currentDecision} - for i := range tt.historyDecisions { - objects = append(objects, &tt.historyDecisions[i]) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), tt.currentDecision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - - for _, notExpected := range tt.expectedNotContain { - if contains(explanation, notExpected) { - t.Errorf("Expected explanation to NOT contain '%s', but got: %s", notExpected, explanation) - } - } - }) - } -} - -func intPtr(i int) *int { - return &i -} - -func TestExplainer_RawWeightsPriorityBugFix(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - description string - }{ - { - name: "raw_weights_preserve_small_differences", - decision: func() *v1alpha1.Decision { - decision := WithOutputWeights( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-2"), - map[string]float64{"host-1": 1000.05, "host-2": 1000.10, "host-3": 1000.00}), - map[string]float64{"host-1": 1001.05, "host-2": 1002.10, "host-3": 1001.00}) - // Add normalized weights to show they would mask the difference - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.0, "host-2": 1.0, "host-3": 1.0} - return decision - }(), - expectedContains: []string{ - "Input choice confirmed: host-2 (1000.10→1002.10)", // Should use raw weights (1000.10) - }, - description: "Raw weights preserve small differences that normalized weights would mask", - }, - { - name: "raw_weights_detect_correct_input_winner", - decision: func() *v1alpha1.Decision { - decision := WithOutputWeights( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-3"), - map[string]float64{"host-1": 2000.15, "host-2": 2000.10, "host-3": 2000.05}), - map[string]float64{"host-1": 2001.15, "host-2": 2001.10, "host-3": 2002.05}) - // Add normalized weights to show they would mask the difference - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.0, "host-2": 1.0, "host-3": 1.0} - return decision - }(), - expectedContains: []string{ - "Input favored host-1 (2000.15), final winner: host-3 (2000.05→2002.05)", // Should detect host-1 as input winner using raw weights - }, - description: "Raw weights correctly identify input winner that normalized weights would miss", - }, - { - name: "critical_steps_analysis_uses_raw_weights", - decision: func() *v1alpha1.Decision { - decision := WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1000.05, "host-2": 1000.00}), - Step("resource-weigher", map[string]float64{"host-1": 0.5, "host-2": 0.0})) - // Add normalized weights to show they would mask the difference - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.0, "host-2": 1.0} - return decision - }(), - expectedContains: []string{ - "Decision driven by input only (all 1 step is non-critical)", // With small raw weight advantage, step is non-critical - "Input choice confirmed: host-1 (1000.05→0.00)", // Shows raw weights are being used - }, - description: "Critical steps analysis uses raw weights - with small raw advantage, step becomes non-critical", - }, - { - name: "deleted_hosts_analysis_uses_raw_weights", - decision: func() *v1alpha1.Decision { - decision := WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1000.00, "host-2": 1000.05, "host-3": 999.95}), - Step("availability-filter", map[string]float64{"host-1": 0.0})) - // Add normalized weights to show they would mask the difference - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.0, "host-2": 1.0, "host-3": 1.0} - return decision - }(), - expectedContains: []string{ - "2 hosts filtered:", - "- host-2 (input choice) by availability-filter", - "Input favored host-2 (1000.05), final winner: host-1 (1000.00→0.00)", - }, - description: "Deleted hosts analysis uses raw weights to correctly identify input winner", - }, - { - name: "fallback_to_normalized_when_no_raw_weights", - decision: func() *v1alpha1.Decision { - decision := WithOutputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 2.5, "host-2": 2.0, "host-3": 1.8}) - // Set normalized weights and clear raw weights to test fallback - decision.Status.Result.NormalizedInWeights = map[string]float64{"host-1": 1.5, "host-2": 1.0, "host-3": 0.8} - decision.Status.Result.RawInWeights = nil - return decision - }(), - expectedContains: []string{ - "Input choice confirmed: host-1 (1.50→2.50)", // Should use normalized weights as fallback - }, - description: "Should fall back to normalized weights when raw weights are not available", - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(tt.decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Fatalf("Failed to create explainer: %v", err) - } - - explanation, err := explainer.Explain(context.Background(), tt.decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - }) - } -} - -// TestExplainer_RawVsNormalizedComparison demonstrates the impact of the bug fix -func TestExplainer_RawVsNormalizedComparison(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - ResourceID: "test-resource", - }, - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - TargetHost: stringPtr("host-2"), - RawInWeights: map[string]float64{ - "host-1": 1000.05, // Very small difference - "host-2": 1000.10, // Slightly higher - should be detected as input winner - "host-3": 1000.00, - }, - NormalizedInWeights: map[string]float64{ - "host-1": 1.0, // All normalized to same value - would mask the difference - "host-2": 1.0, - "host-3": 1.0, - }, - AggregatedOutWeights: map[string]float64{ - "host-1": 1001.05, - "host-2": 1002.10, // host-2 wins - "host-3": 1001.00, - }, - }, - }, - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - if !contains(explanation, "Input choice confirmed: host-2 (1000.10→1002.10)") { - t.Errorf("Expected explanation to show raw weight value (1000.10), but got: %s", explanation) - } - - if contains(explanation, "Input favored host-1") || contains(explanation, "Input favored host-3") { - t.Errorf("Expected explanation to NOT show input choice override, but got: %s", explanation) - } -} - -func TestExplainer_StepImpactAnalysis(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - expectedContains []string - }{ - { - name: "step with positive impact", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 1.5, "host-2": 0.2})), - expectedContains: []string{ - "Step impacts:", - "resource-weigher +1.50", - }, - }, - { - name: "step with promotion to first", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 2.0, "host-2": 0.5})), - expectedContains: []string{ - "Step impacts:", - "resource-weigher +2.00→#1", - }, - }, - { - name: "step with competitor removal", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 2.0, "host-2": 1.0, "host-3": 0.5}), - Step("availability-filter", map[string]float64{"host-1": 0.0})), - expectedContains: []string{ - "Step impacts:", - "availability-filter +0.00 (removed 2)", - }, - }, - { - name: "multiple steps sorted by impact", - decision: WithSteps( - WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 1.0, "host-2": 2.0}), - Step("resource-weigher", map[string]float64{"host-1": 1.5, "host-2": 0.2}), - Step("availability-filter", map[string]float64{"host-1": 0.1, "host-2": 0.0})), - expectedContains: []string{ - "Step impacts:", - "resource-weigher +1.50", - "availability-filter +0.10", - }, - }, - { - name: "no step impacts for decision without steps", - decision: WithInputWeights( - WithTargetHost(NewTestDecision("test-decision"), "host-1"), - map[string]float64{"host-1": 2.0, "host-2": 1.0}), - expectedContains: []string{}, // No step impacts should be present - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(tt.decision). - Build() - - explainer, err := NewExplainer(client) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - explanation, err := explainer.Explain(context.Background(), tt.decision) - if err != nil { - t.Errorf("Expected no error but got: %v", err) - return - } - - for _, expected := range tt.expectedContains { - if !contains(explanation, expected) { - t.Errorf("Expected explanation to contain '%s', but got: %s", expected, explanation) - } - } - - // For the "no step impacts" case, ensure no step impacts analysis is present - if len(tt.expectedContains) == 0 { - stepImpactsKeywords := []string{"Step impacts:", "→#1", "removed"} - for _, keyword := range stepImpactsKeywords { - if contains(explanation, keyword) { - t.Errorf("Expected explanation to NOT contain '%s' for no step impacts case, but got: %s", keyword, explanation) - } - } - } - }) - } -} diff --git a/internal/scheduling/explanation/templates.go b/internal/scheduling/explanation/templates.go deleted file mode 100644 index dc7160c07..000000000 --- a/internal/scheduling/explanation/templates.go +++ /dev/null @@ -1,141 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import ( - "bytes" - "fmt" - "strings" - "text/template" - "time" -) - -type TemplateManager struct { - templates *template.Template -} - -func NewTemplateManager() (*TemplateManager, error) { - tmpl := template.New("explanation").Funcs(template.FuncMap{ - "join": strings.Join, - "formatDuration": formatTemplateDuration, - "formatFloat": func(f float64) string { return fmt.Sprintf("%.2f", f) }, - "formatDelta": func(f float64) string { return fmt.Sprintf("%+.2f", f) }, - "add": func(a, b int) int { return a + b }, - "plural": func(n int, singular, plural string) string { - if n == 1 { - return singular - } - return plural - }, - }) - - tmpl, err := tmpl.Parse(mainTemplate) - if err != nil { - return nil, fmt.Errorf("failed to parse main template: %w", err) - } - - templates := map[string]string{ - "context": contextTemplate, - "history": historyTemplate, - "winner": winnerTemplate, - "input": inputTemplate, - "critical": criticalTemplate, - "deleted": deletedTemplate, - "impacts": impactsTemplate, - "chain": chainTemplate, - } - - for name, templateStr := range templates { - tmpl, err = tmpl.Parse(fmt.Sprintf(`{{define "%s"}}%s{{end}}`, name, templateStr)) - if err != nil { - return nil, fmt.Errorf("failed to parse %s template: %w", name, err) - } - } - - return &TemplateManager{templates: tmpl}, nil -} - -func (tm *TemplateManager) RenderExplanation(ctx ExplanationContext) (string, error) { - var buf bytes.Buffer - err := tm.templates.Execute(&buf, ctx) - if err != nil { - return "", fmt.Errorf("failed to render explanation: %w", err) - } - return strings.TrimSpace(buf.String()), nil -} - -func formatTemplateDuration(d time.Duration) string { - if d == 0 { - return "0s" - } - - // Truncate to seconds to remove sub-second precision - d = d.Truncate(time.Second) - - // For durations >= 24 hours, convert to days format - if d >= 24*time.Hour { - days := int(d.Hours()) / 24 - remainder := d - time.Duration(days)*24*time.Hour - if remainder == 0 { - return fmt.Sprintf("%dd0h0m0s", days) - } - return fmt.Sprintf("%d%s", days, remainder.String()) - } - - // For shorter durations, use Go's built-in formatting - return d.String() -} - -const mainTemplate = `{{template "context" .Context}} -{{- if .History}} {{template "history" .History}}{{end}} -{{- if .Winner}} {{template "winner" .Winner}}{{end}} -{{- if .Input}} {{template "input" .Input}}{{end}} -{{- if .CriticalSteps}} {{template "critical" .CriticalSteps}}{{end}} -{{- if .DeletedHosts}} {{template "deleted" .DeletedHosts}}{{end}} -{{- if .StepImpacts}} {{template "impacts" .StepImpacts}}{{end}} -{{- if .Chain}} {{template "chain" .Chain}}{{end}}` - -const contextTemplate = `{{if .IsInitial -}} -Initial placement of the {{.ResourceType}}. -{{- else -}} -Decision #{{.DecisionNumber}} for this {{.ResourceType}}. -{{- end}}` - -const historyTemplate = `Previous target host was '{{.PreviousTarget}}', now it's '{{.CurrentTarget}}'.` - -const winnerTemplate = `Selected: {{.HostName}} (score: {{formatFloat .Score}}) -{{- if .HasGap}}, gap to 2nd: {{formatFloat .Gap}}{{end}}, {{.HostsEvaluated}} {{plural .HostsEvaluated "host" "hosts"}} evaluated.` - -const inputTemplate = `{{if .InputConfirmed -}} -Input choice confirmed: {{.FinalWinner}} ({{formatFloat .InputScore}}→{{formatFloat .FinalScore}}). -{{- else -}} -Input favored {{.InputWinner}} ({{formatFloat .InputScore}}), final winner: {{.FinalWinner}} ({{formatFloat .FinalInputScore}}→{{formatFloat .FinalScore}}). -{{- end}}` - -const criticalTemplate = `{{if .IsInputOnly -}} -Decision driven by input only (all {{.TotalSteps}} {{plural .TotalSteps "step is" "steps are"}} non-critical). -{{- else if .RequiresAll -}} -Decision requires all {{.TotalSteps}} pipeline {{plural .TotalSteps "step" "steps"}}. -{{- else if eq (len .Steps) 1 -}} -Decision driven by 1/{{.TotalSteps}} pipeline step: {{index .Steps 0}}. -{{- else -}} -Decision driven by {{len .Steps}}/{{.TotalSteps}} pipeline {{plural .TotalSteps "step" "steps"}}: {{join .Steps ", "}}. -{{- end}}` - -const deletedTemplate = `{{len .DeletedHosts}} {{plural (len .DeletedHosts) "host" "hosts"}} filtered: -{{- range .DeletedHosts}} - - {{.Name}}{{if .IsInputWinner}} (input choice){{end}} by {{join .Steps ", "}} -{{- end}}` - -const impactsTemplate = ` Step impacts: -{{- range $i, $impact := .}} -• {{$impact.Step}} -{{- if $impact.PromotedToFirst}} {{formatDelta $impact.ScoreDelta}}→#1 -{{- else if ne $impact.ScoreDelta 0.0}} {{formatDelta $impact.ScoreDelta}} -{{- else if gt $impact.CompetitorsRemoved 0}} +0.00 (removed {{$impact.CompetitorsRemoved}}) -{{- else}} +0.00{{end}} -{{- end}}` - -const chainTemplate = `{{if .HasLoop}}Chain (loop detected): {{else}}Chain: {{end}} -{{- range $i, $segment := .Segments}}{{if gt $i 0}} -> {{end}}{{$segment.Host}} ({{formatDuration $segment.Duration}}{{if gt $segment.Decisions 1}}; {{$segment.Decisions}} decisions{{end}}){{end}}.` diff --git a/internal/scheduling/explanation/types.go b/internal/scheduling/explanation/types.go deleted file mode 100644 index 31f6d0aa1..000000000 --- a/internal/scheduling/explanation/types.go +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright SAP SE -// SPDX-License-Identifier: Apache-2.0 - -package explanation - -import "time" - -// ExplanationContext holds all data needed to render a complete explanation. -type ExplanationContext struct { - Context ContextData `json:"context"` - History *HistoryData `json:"history,omitempty"` - Winner *WinnerData `json:"winner,omitempty"` - Input *InputData `json:"input,omitempty"` - CriticalSteps *CriticalStepsData `json:"criticalSteps,omitempty"` - DeletedHosts *DeletedHostsData `json:"deletedHosts,omitempty"` - StepImpacts []StepImpact `json:"stepImpacts,omitempty"` - Chain *ChainData `json:"chain,omitempty"` -} - -type ContextData struct { - ResourceType string `json:"resourceType"` - DecisionNumber int `json:"decisionNumber"` - IsInitial bool `json:"isInitial"` -} - -// HistoryData contains information about the previous decision in the chain. -type HistoryData struct { - PreviousTarget string `json:"previousTarget"` - CurrentTarget string `json:"currentTarget"` -} - -type WinnerData struct { - HostName string `json:"hostName"` - Score float64 `json:"score"` - Gap float64 `json:"gap"` - HostsEvaluated int `json:"hostsEvaluated"` - HasGap bool `json:"hasGap"` -} - -// InputData contains information about input vs final winner comparison. -type InputData struct { - InputWinner string `json:"inputWinner"` - InputScore float64 `json:"inputScore"` - FinalWinner string `json:"finalWinner"` - FinalScore float64 `json:"finalScore"` - FinalInputScore float64 `json:"finalInputScore"` // Final winner's input score - InputConfirmed bool `json:"inputConfirmed"` -} - -// CriticalStepsData contains information about which pipeline steps were critical. -type CriticalStepsData struct { - Steps []string `json:"steps"` - TotalSteps int `json:"totalSteps"` - IsInputOnly bool `json:"isInputOnly"` - RequiresAll bool `json:"requiresAll"` -} - -// DeletedHostsData contains information about hosts that were filtered out. -type DeletedHostsData struct { - DeletedHosts []DeletedHostInfo `json:"deletedHosts"` -} - -// DeletedHostInfo contains details about a single deleted host. -type DeletedHostInfo struct { - Name string `json:"name"` - Steps []string `json:"steps"` - IsInputWinner bool `json:"isInputWinner"` -} - -// ChainData contains information about the decision chain over time. -type ChainData struct { - Segments []ChainSegment `json:"segments"` - HasLoop bool `json:"hasLoop"` -} - -// ChainSegment represents a period where the resource was on a specific host. -type ChainSegment struct { - Host string `json:"host"` - Duration time.Duration `json:"duration"` - // number of decisions with this as the target host - Decisions int `json:"decisions"` -} diff --git a/internal/scheduling/lib/explainer.go b/internal/scheduling/lib/explainer.go new file mode 100644 index 000000000..d25ca9177 --- /dev/null +++ b/internal/scheduling/lib/explainer.go @@ -0,0 +1,29 @@ +// Copyright SAP SE +// SPDX-License-Identifier: Apache-2.0 + +package lib + +import ( + "context" + + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// The explainer gets a scheduling decision and produces a human-readable +// explanation of why the decision was made the way it was. +type Explainer struct { + // The kubernetes client to use for fetching related data. + client.Client +} + +// NewExplainer creates a new explainer with template support. +func NewExplainer(client client.Client) (*Explainer, error) { + return &Explainer{ + Client: client, + }, nil +} + +// Explain the given decision and return a human-readable explanation. +func (e *Explainer) Explain(ctx context.Context, decision DecisionUpdate) (string, error) { + return "Explanation generation not implemented yet", nil +} diff --git a/internal/scheduling/lib/filter_weigher_pipeline.go b/internal/scheduling/lib/filter_weigher_pipeline.go index f362c07af..253b38b04 100644 --- a/internal/scheduling/lib/filter_weigher_pipeline.go +++ b/internal/scheduling/lib/filter_weigher_pipeline.go @@ -17,9 +17,20 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) +type FilterWeigherPipelineResult struct { + // The original weights provided as input to the pipeline, from the request that cortex received. + RawInWeights map[string]float64 + // The normalized input weights after applying the normalization function. + NormalizedInWeights map[string]float64 + // The output weights after applying the weigher step activations and multipliers. + AggregatedOutWeights map[string]float64 + // The hosts in order of preference, with the most preferred host first. + OrderedHosts []string +} + type FilterWeigherPipeline[RequestType FilterWeigherPipelineRequest] interface { // Run the scheduling pipeline with the given request. - Run(request RequestType) (v1alpha1.DecisionResult, error) + Run(request RequestType) (FilterWeigherPipelineResult, error) } // Pipeline of scheduler steps. @@ -258,7 +269,7 @@ func (s *filterWeigherPipeline[RequestType]) sortHostsByWeights(weights map[stri } // Evaluate the pipeline and return a list of hosts in order of preference. -func (p *filterWeigherPipeline[RequestType]) Run(request RequestType) (v1alpha1.DecisionResult, error) { +func (p *filterWeigherPipeline[RequestType]) Run(request RequestType) (FilterWeigherPipelineResult, error) { slogArgs := request.GetTraceLogArgs() slogArgsAny := make([]any, 0, len(slogArgs)) for _, arg := range slogArgs { @@ -296,14 +307,11 @@ func (p *filterWeigherPipeline[RequestType]) Run(request RequestType) (v1alpha1. // Collect some metrics about the pipeline execution. go p.monitor.observePipelineResult(request, hosts) - result := v1alpha1.DecisionResult{ + result := FilterWeigherPipelineResult{ RawInWeights: request.GetWeights(), NormalizedInWeights: inWeights, AggregatedOutWeights: outWeights, OrderedHosts: hosts, } - if len(hosts) > 0 { - result.TargetHost = &hosts[0] - } return result, nil } diff --git a/internal/scheduling/lib/pipeline_controller.go b/internal/scheduling/lib/pipeline_controller.go index a435133e4..814f9b5f2 100644 --- a/internal/scheduling/lib/pipeline_controller.go +++ b/internal/scheduling/lib/pipeline_controller.go @@ -9,12 +9,13 @@ import ( "fmt" "github.com/cobaltcore-dev/cortex/api/v1alpha1" + corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/util/workqueue" + "k8s.io/client-go/tools/events" + "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/event" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -30,6 +31,140 @@ type BasePipelineController[PipelineType any] struct { client.Client // The scheduling domain to scope resources to. SchedulingDomain v1alpha1.SchedulingDomain + // Event recorder for publishing events. + Recorder events.EventRecorder + + DecisionQueue chan DecisionUpdate +} + +type DecisionUpdate struct { + ResourceID string + PipelineName string + Result FilterWeigherPipelineResult + Intent v1alpha1.SchedulingIntent +} + +func (c *BasePipelineController[PipelineType]) StartExplainer(ctx context.Context) { + c.DecisionQueue = make(chan DecisionUpdate, 100) + log := ctrl.LoggerFrom(ctx) + for { + select { + case <-ctx.Done(): + return + case update := <-c.DecisionQueue: + if err := c.updateDecision(ctx, update); err != nil { + log.Error(err, "failed to update decision", "resourceID", update.ResourceID) + } + } + } +} + +func (c *BasePipelineController[PipelineType]) updateDecision(ctx context.Context, update DecisionUpdate) error { + log := ctrl.LoggerFrom(ctx) + log.Info("Explaining decision for resource", "resourceID", update.ResourceID, "pipelineName", update.PipelineName) + + explainer, err := NewExplainer(c.Client) + if err != nil { + return fmt.Errorf("failed to create explainer: %w", err) + } + + explanationText, err := explainer.Explain(ctx, update) + if err != nil { + return fmt.Errorf("failed to generate explanation: %w", err) + } + + // Try to get existing decision + decision := &v1alpha1.Decision{} + if err = c.Get(ctx, client.ObjectKey{Name: update.ResourceID}, decision); err != nil { + if client.IgnoreNotFound(err) != nil { + return fmt.Errorf("failed to get decision: %w", err) + } + + // Decision doesn't exist - create new one + decision = &v1alpha1.Decision{ + ObjectMeta: metav1.ObjectMeta{ + Name: update.ResourceID, + }, + Spec: v1alpha1.DecisionSpec{ + SchedulingDomain: c.SchedulingDomain, + ResourceID: update.ResourceID, + }, + } + + if err := c.Create(ctx, decision); err != nil { + return fmt.Errorf("failed to create decision: %w", err) + } + log.Info("Created new decision", "resourceID", update.ResourceID) + } + + // Prepare the scheduling history entry + historyEntry := v1alpha1.SchedulingHistoryEntry{ + OrderedHosts: update.Result.OrderedHosts, + Timestamp: metav1.Now(), + PipelineRef: corev1.ObjectReference{ + Name: update.PipelineName, + }, + Intent: update.Intent, + } + + // Check if scheduling failed (no hosts available) + schedulingFailed := len(update.Result.OrderedHosts) == 0 + + // Update status with retry on conflict to handle concurrent updates + err = retry.RetryOnConflict(retry.DefaultRetry, func() error { + // Get the latest version before each retry attempt + if err := c.Get(ctx, client.ObjectKey{Name: update.ResourceID}, decision); err != nil { + return err + } + + // Apply status updates + decision.Status.Explanation = explanationText + + if schedulingFailed { + // No hosts available - set failed condition + decision.Status.TargetHost = "" + meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ + Type: v1alpha1.DecisionConditionFailed, + Status: metav1.ConditionTrue, + Reason: "NoValidHosts", + Message: "Cannot schedule: No valid hosts available after filtering", + }) + } else { + // Successful scheduling + decision.Status.TargetHost = update.Result.OrderedHosts[0] + meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ + Type: v1alpha1.DecisionConditionReady, + Status: metav1.ConditionTrue, + Reason: "Scheduled", + Message: "Scheduling decision made successfully", + }) + } + + decision.Status.SchedulingHistory = append(decision.Status.SchedulingHistory, historyEntry) + + return c.Status().Update(ctx, decision) + }) + + if err != nil { + return fmt.Errorf("failed to update decision status: %w", err) + } + + // Publish event to the decision + if c.Recorder != nil { + if schedulingFailed { + // Warning event for failed scheduling + c.Recorder.Eventf(decision, nil, corev1.EventTypeWarning, "NoValidHosts", "Scheduling", "Cannot schedule: No valid hosts available. %s", explanationText) + log.Info("Published NoValidHosts event", "resourceID", update.ResourceID) + } else { + // Normal event for successful scheduling + intentStr := string(update.Intent) + c.Recorder.Eventf(decision, nil, corev1.EventTypeNormal, intentStr, "Scheduling", "Scheduled to %s. %s", decision.Status.TargetHost, explanationText) + log.Info("Published scheduling event", "resourceID", update.ResourceID, "targetHost", decision.Status.TargetHost, "reason", update.Intent) + } + } + + log.Info("Successfully updated decision", "resourceID", update.ResourceID, "targetHost", decision.Status.TargetHost, "schedulingFailed", schedulingFailed) + return nil } // Handle the startup of the manager by initializing the pipeline map. @@ -51,17 +186,40 @@ func (c *BasePipelineController[PipelineType]) InitAllPipelines(ctx context.Cont continue } log.Info("initializing existing pipeline", "pipelineName", pipelineConf.Name) - c.handlePipelineChange(ctx, &pipelineConf, nil) + c.handlePipelineChange(ctx, &pipelineConf) c.PipelineConfigs[pipelineConf.Name] = pipelineConf } return nil } +func (c *BasePipelineController[PipelineType]) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := ctrl.LoggerFrom(ctx) + log.Info("reconcile called for pipeline", "pipelineName", req.NamespacedName) + + pipeline := &v1alpha1.Pipeline{} + err := c.Get(ctx, req.NamespacedName, pipeline) + + if err != nil { + if client.IgnoreNotFound(err) == nil { + // Pipeline was deleted + log.Info("pipeline deleted, removing from cache", "pipelineName", req.Name) + delete(c.Pipelines, req.Name) + delete(c.PipelineConfigs, req.Name) + return ctrl.Result{}, nil + } + log.Error(err, "failed to get pipeline", "pipelineName", req.NamespacedName) + return ctrl.Result{}, fmt.Errorf("failed to get pipeline: %w", err) + } + + c.handlePipelineChange(ctx, pipeline) + + return ctrl.Result{}, nil +} + // Handle a pipeline creation or update event from watching pipeline resources. func (c *BasePipelineController[PipelineType]) handlePipelineChange( ctx context.Context, obj *v1alpha1.Pipeline, - _ workqueue.TypedRateLimitingInterface[reconcile.Request], ) { if obj.Spec.SchedulingDomain != c.SchedulingDomain { @@ -167,123 +325,14 @@ func (c *BasePipelineController[PipelineType]) handlePipelineChange( } } -// Handler bound to a pipeline watch to handle created pipelines. -// -// This handler will initialize new pipelines as needed and put them into the -// pipeline map. -func (c *BasePipelineController[PipelineType]) HandlePipelineCreated( - ctx context.Context, - evt event.CreateEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - pipelineConf := evt.Object.(*v1alpha1.Pipeline) - c.handlePipelineChange(ctx, pipelineConf, queue) -} - -// Handler bound to a pipeline watch to handle updated pipelines. -// -// This handler will initialize new pipelines as needed and put them into the -// pipeline map. -func (c *BasePipelineController[PipelineType]) HandlePipelineUpdated( - ctx context.Context, - evt event.UpdateEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - pipelineConf := evt.ObjectNew.(*v1alpha1.Pipeline) - c.handlePipelineChange(ctx, pipelineConf, queue) -} - -// Handler bound to a pipeline watch to handle deleted pipelines. -// -// This handler will remove pipelines from the pipeline map. -func (c *BasePipelineController[PipelineType]) HandlePipelineDeleted( - ctx context.Context, - evt event.DeleteEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - pipelineConf := evt.Object.(*v1alpha1.Pipeline) - delete(c.Pipelines, pipelineConf.Name) - delete(c.PipelineConfigs, pipelineConf.Name) -} - -// Handle a knowledge creation, readiness update, or delete event from watching knowledge resources. -func (c *BasePipelineController[PipelineType]) handleKnowledgeChange( - ctx context.Context, - obj *v1alpha1.Knowledge, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - if obj.Spec.SchedulingDomain != c.SchedulingDomain { - return - } - log := ctrl.LoggerFrom(ctx) - log.Info("knowledge changed, re-evaluating all pipelines", "knowledgeName", obj.Name) - // Find all pipelines depending on this knowledge and re-evaluate them. - var pipelines v1alpha1.PipelineList - if err := c.List(ctx, &pipelines); err != nil { - log.Error(err, "failed to list pipelines for knowledge change", "knowledgeName", obj.Name) - return - } - for _, pipeline := range pipelines.Items { - // TODO: Not all pipelines may depend on this knowledge. At the moment - // we re-evaluate all pipelines matching this controller. - if pipeline.Spec.SchedulingDomain != c.SchedulingDomain { - continue - } - if pipeline.Spec.Type != c.Initializer.PipelineType() { - continue - } - c.handlePipelineChange(ctx, &pipeline, queue) - } -} - -// Handler bound to a knowledge watch to handle created knowledges. -// -// This handler will re-evaluate all pipelines depending on the knowledge. -func (c *BasePipelineController[PipelineType]) HandleKnowledgeCreated( - ctx context.Context, - evt event.CreateEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - knowledgeConf := evt.Object.(*v1alpha1.Knowledge) - c.handleKnowledgeChange(ctx, knowledgeConf, queue) -} - -// Handler bound to a knowledge watch to handle updated knowledges. -// -// This handler will re-evaluate all pipelines depending on the knowledge. -func (c *BasePipelineController[PipelineType]) HandleKnowledgeUpdated( - ctx context.Context, - evt event.UpdateEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - before := evt.ObjectOld.(*v1alpha1.Knowledge) - after := evt.ObjectNew.(*v1alpha1.Knowledge) - errorBefore := meta.IsStatusConditionFalse(before.Status.Conditions, v1alpha1.KnowledgeConditionReady) - errorAfter := meta.IsStatusConditionFalse(after.Status.Conditions, v1alpha1.KnowledgeConditionReady) - errorChanged := errorBefore != errorAfter - dataBecameAvailable := before.Status.RawLength == 0 && after.Status.RawLength > 0 - if !errorChanged && !dataBecameAvailable { - // No relevant change, skip re-evaluation. - return +// GetAllPipelineReconcileRequests returns reconcile requests for all pipelines +// managed by this controller. Used when Knowledge changes require pipeline re-evaluation. +func (c *BasePipelineController[PipelineType]) GetAllPipelineReconcileRequests(ctx context.Context) []reconcile.Request { + var requests []reconcile.Request + for name := range c.Pipelines { + requests = append(requests, reconcile.Request{ + NamespacedName: client.ObjectKey{Name: name}, + }) } - c.handleKnowledgeChange(ctx, after, queue) -} - -// Handler bound to a knowledge watch to handle deleted knowledges. -// -// This handler will re-evaluate all pipelines depending on the knowledge. -func (c *BasePipelineController[PipelineType]) HandleKnowledgeDeleted( - ctx context.Context, - evt event.DeleteEvent, - queue workqueue.TypedRateLimitingInterface[reconcile.Request], -) { - - knowledgeConf := evt.Object.(*v1alpha1.Knowledge) - c.handleKnowledgeChange(ctx, knowledgeConf, queue) + return requests } diff --git a/internal/scheduling/lib/pipeline_controller_test.go b/internal/scheduling/lib/pipeline_controller_test.go index e876ec5e6..570f5b0a7 100644 --- a/internal/scheduling/lib/pipeline_controller_test.go +++ b/internal/scheduling/lib/pipeline_controller_test.go @@ -13,9 +13,9 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" - "sigs.k8s.io/controller-runtime/pkg/event" "github.com/cobaltcore-dev/cortex/api/v1alpha1" testlib "github.com/cobaltcore-dev/cortex/pkg/testing" @@ -404,7 +404,7 @@ func TestBasePipelineController_handlePipelineChange(t *testing.T) { PipelineConfigs: make(map[string]v1alpha1.Pipeline), } - controller.handlePipelineChange(context.Background(), tt.pipeline, nil) + controller.handlePipelineChange(context.Background(), tt.pipeline) // Check if pipeline is in map _, inMap := controller.Pipelines[tt.pipeline.Name] @@ -461,140 +461,7 @@ func TestBasePipelineController_handlePipelineChange(t *testing.T) { } } -func TestBasePipelineController_HandlePipelineCreated(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - pipeline := &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(pipeline). - WithStatusSubresource(&v1alpha1.Pipeline{}). - Build() - - controller := &BasePipelineController[mockPipeline]{ - Client: fakeClient, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Initializer: &mockPipelineInitializer{ - pipelineType: v1alpha1.PipelineTypeFilterWeigher, - }, - Pipelines: make(map[string]mockPipeline), - PipelineConfigs: make(map[string]v1alpha1.Pipeline), - } - - evt := event.CreateEvent{ - Object: pipeline, - } - - controller.HandlePipelineCreated(context.Background(), evt, nil) - - if _, exists := controller.Pipelines[pipeline.Name]; !exists { - t.Error("Expected pipeline to be in map after creation") - } -} - -func TestBasePipelineController_HandlePipelineUpdated(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - oldPipeline := &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - } - - newPipeline := oldPipeline.DeepCopy() - newPipeline.Spec.Description = "Updated description" - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(newPipeline). - WithStatusSubresource(&v1alpha1.Pipeline{}). - Build() - - controller := &BasePipelineController[mockPipeline]{ - Client: fakeClient, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Initializer: &mockPipelineInitializer{ - pipelineType: v1alpha1.PipelineTypeFilterWeigher, - }, - Pipelines: make(map[string]mockPipeline), - PipelineConfigs: make(map[string]v1alpha1.Pipeline), - } - - evt := event.UpdateEvent{ - ObjectOld: oldPipeline, - ObjectNew: newPipeline, - } - - controller.HandlePipelineUpdated(context.Background(), evt, nil) - - if _, exists := controller.Pipelines[newPipeline.Name]; !exists { - t.Error("Expected pipeline to be in map after update") - } -} - -func TestBasePipelineController_HandlePipelineDeleted(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - pipeline := &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - }, - } - - controller := &BasePipelineController[mockPipeline]{ - Pipelines: map[string]mockPipeline{ - "test-pipeline": {name: "test-pipeline"}, - }, - PipelineConfigs: map[string]v1alpha1.Pipeline{ - "test-pipeline": *pipeline, - }, - } - - evt := event.DeleteEvent{ - Object: pipeline, - } - - controller.HandlePipelineDeleted(context.Background(), evt, nil) - - if _, exists := controller.Pipelines[pipeline.Name]; exists { - t.Error("Expected pipeline to be removed from map after deletion") - } - if _, exists := controller.PipelineConfigs[pipeline.Name]; exists { - t.Error("Expected pipeline config to be removed from map after deletion") - } -} - -func TestBasePipelineController_handleKnowledgeChange(t *testing.T) { +func TestBasePipelineController_Reconcile(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) @@ -602,398 +469,173 @@ func TestBasePipelineController_handleKnowledgeChange(t *testing.T) { tests := []struct { name string - knowledge *v1alpha1.Knowledge - pipelines []v1alpha1.Pipeline + pipeline *v1alpha1.Pipeline + pipelineExists bool schedulingDomain v1alpha1.SchedulingDomain - expectReEvaluated []string + initPipelineError bool + expectInMap bool + expectReady bool }{ { - name: "knowledge change triggers pipeline re-evaluation", - knowledge: &v1alpha1.Knowledge{ + name: "reconcile new pipeline", + pipeline: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", + Name: "test-pipeline", }, - Spec: v1alpha1.KnowledgeSpec{ + Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, - }, - }, - pipelines: []v1alpha1.Pipeline{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "pipeline-1", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Weighers: []v1alpha1.WeigherSpec{ - { - Name: "test-weigher", - }, - }, - }, - }, - { - ObjectMeta: metav1.ObjectMeta{ - Name: "pipeline-2", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Weighers: []v1alpha1.WeigherSpec{ - { - Name: "test-weigher", - }, - }, - }, + Type: v1alpha1.PipelineTypeFilterWeigher, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, - schedulingDomain: v1alpha1.SchedulingDomainNova, - expectReEvaluated: []string{"pipeline-1", "pipeline-2"}, + pipelineExists: true, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectInMap: true, + expectReady: true, }, { - name: "knowledge change in different scheduling domain", - knowledge: &v1alpha1.Knowledge{ + name: "reconcile updated pipeline", + pipeline: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainCinder, - }, - }, - pipelines: []v1alpha1.Pipeline{ - { - ObjectMeta: metav1.ObjectMeta{ - Name: "nova-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Weighers: []v1alpha1.WeigherSpec{ - { - Name: "test-weigher", - }, - }, - }, - }, - }, - schedulingDomain: v1alpha1.SchedulingDomainNova, - expectReEvaluated: []string{}, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []client.Object{tt.knowledge} - for i := range tt.pipelines { - objects = append(objects, &tt.pipelines[i]) - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}). - Build() - - controller := &BasePipelineController[mockPipeline]{ - Client: fakeClient, - SchedulingDomain: tt.schedulingDomain, - Initializer: &mockPipelineInitializer{ - pipelineType: v1alpha1.PipelineTypeFilterWeigher, + Name: "test-pipeline", }, - Pipelines: make(map[string]mockPipeline), - PipelineConfigs: make(map[string]v1alpha1.Pipeline), - } - - controller.handleKnowledgeChange(context.Background(), tt.knowledge, nil) - - // Verify expected pipelines were re-evaluated by checking if they're in the map - for _, expectedName := range tt.expectReEvaluated { - if _, exists := controller.Pipelines[expectedName]; !exists { - t.Errorf("Expected pipeline %s to be re-evaluated", expectedName) - } - } - }) - } -} - -func TestBasePipelineController_HandleKnowledgeCreated(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - knowledge := &v1alpha1.Knowledge{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, - }, - } - - pipeline := &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Weighers: []v1alpha1.WeigherSpec{ - { - Name: "test-weigher", + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainNova, + Type: v1alpha1.PipelineTypeFilterWeigher, + Description: "Updated description", + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, + pipelineExists: true, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectInMap: true, + expectReady: true, }, - } - - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(knowledge, pipeline). - WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}). - Build() - - controller := &BasePipelineController[mockPipeline]{ - Client: fakeClient, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Initializer: &mockPipelineInitializer{ - pipelineType: v1alpha1.PipelineTypeFilterWeigher, - }, - Pipelines: make(map[string]mockPipeline), - PipelineConfigs: make(map[string]v1alpha1.Pipeline), - } - - evt := event.CreateEvent{ - Object: knowledge, - } - - controller.HandleKnowledgeCreated(context.Background(), evt, nil) - - // Pipeline should be re-evaluated and added to map - if _, exists := controller.Pipelines[pipeline.Name]; !exists { - t.Error("Expected pipeline to be re-evaluated after knowledge creation") - } -} - -func TestBasePipelineController_HandleKnowledgeUpdated(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - tests := []struct { - name string - oldKnowledge *v1alpha1.Knowledge - newKnowledge *v1alpha1.Knowledge - expectReEvaluate bool - }{ { - name: "error state changed", - oldKnowledge: &v1alpha1.Knowledge{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - Conditions: []metav1.Condition{ - { - Type: v1alpha1.KnowledgeConditionReady, - Status: metav1.ConditionFalse, - }, - }, - }, - }, - newKnowledge: &v1alpha1.Knowledge{ + name: "reconcile deleted pipeline", + pipeline: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", + Name: "deleted-pipeline", }, - Spec: v1alpha1.KnowledgeSpec{ + Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, + Type: v1alpha1.PipelineTypeFilterWeigher, }, }, - expectReEvaluate: true, + pipelineExists: false, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectInMap: false, + expectReady: false, }, { - name: "data became available", - oldKnowledge: &v1alpha1.Knowledge{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 0, - }, - }, - newKnowledge: &v1alpha1.Knowledge{ + name: "reconcile pipeline with different scheduling domain", + pipeline: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", + Name: "cinder-pipeline", }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, + Spec: v1alpha1.PipelineSpec{ + SchedulingDomain: v1alpha1.SchedulingDomainCinder, + Type: v1alpha1.PipelineTypeFilterWeigher, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, - expectReEvaluate: true, + pipelineExists: true, + schedulingDomain: v1alpha1.SchedulingDomainNova, + expectInMap: false, + expectReady: false, }, { - name: "no relevant change", - oldKnowledge: &v1alpha1.Knowledge{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 10, - }, - }, - newKnowledge: &v1alpha1.Knowledge{ + name: "reconcile pipeline with init error", + pipeline: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", + Name: "error-pipeline", }, - Spec: v1alpha1.KnowledgeSpec{ + Spec: v1alpha1.PipelineSpec{ SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - Status: v1alpha1.KnowledgeStatus{ - RawLength: 15, + Type: v1alpha1.PipelineTypeFilterWeigher, + Filters: []v1alpha1.FilterSpec{}, + Weighers: []v1alpha1.WeigherSpec{}, }, }, - expectReEvaluate: false, + pipelineExists: true, + schedulingDomain: v1alpha1.SchedulingDomainNova, + initPipelineError: true, + expectInMap: false, + expectReady: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - pipeline := &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Weighers: []v1alpha1.WeigherSpec{ - { - Name: "test-weigher", - }, - }, - }, + var objects []client.Object + if tt.pipelineExists { + objects = append(objects, tt.pipeline) } fakeClient := fake.NewClientBuilder(). WithScheme(scheme). - WithObjects(tt.newKnowledge, pipeline). - WithStatusSubresource(&v1alpha1.Pipeline{}, &v1alpha1.Knowledge{}). + WithObjects(objects...). + WithStatusSubresource(&v1alpha1.Pipeline{}). Build() + initializer := &mockPipelineInitializer{ + pipelineType: v1alpha1.PipelineTypeFilterWeigher, + } + if tt.initPipelineError { + initializer.initPipelineFunc = func(ctx context.Context, p v1alpha1.Pipeline) PipelineInitResult[mockPipeline] { + return PipelineInitResult[mockPipeline]{ + FilterErrors: map[string]error{ + "test-filter": errors.New("filter initialization failed"), + }, + } + } + } + controller := &BasePipelineController[mockPipeline]{ Client: fakeClient, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Initializer: &mockPipelineInitializer{ - pipelineType: v1alpha1.PipelineTypeFilterWeigher, - }, - Pipelines: make(map[string]mockPipeline), - PipelineConfigs: make(map[string]v1alpha1.Pipeline), + SchedulingDomain: tt.schedulingDomain, + Initializer: initializer, + Pipelines: make(map[string]mockPipeline), + PipelineConfigs: make(map[string]v1alpha1.Pipeline), } - evt := event.UpdateEvent{ - ObjectOld: tt.oldKnowledge, - ObjectNew: tt.newKnowledge, + // For delete test, pre-populate the maps + if !tt.pipelineExists { + controller.Pipelines[tt.pipeline.Name] = mockPipeline{name: tt.pipeline.Name} + controller.PipelineConfigs[tt.pipeline.Name] = *tt.pipeline } - controller.HandleKnowledgeUpdated(context.Background(), evt, nil) - - _, exists := controller.Pipelines[pipeline.Name] - if tt.expectReEvaluate && !exists { - t.Error("Expected pipeline to be re-evaluated") - } - if !tt.expectReEvaluate && exists { - t.Error("Expected pipeline not to be re-evaluated") + req := ctrl.Request{ + NamespacedName: client.ObjectKey{Name: tt.pipeline.Name}, } - }) - } -} - -func TestBasePipelineController_HandleKnowledgeDeleted(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - knowledge := &v1alpha1.Knowledge{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-knowledge", - Namespace: "default", - }, - Spec: v1alpha1.KnowledgeSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - }, - } - - pipeline := &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Type: v1alpha1.PipelineTypeFilterWeigher, - Weighers: []v1alpha1.WeigherSpec{ - { - Name: "test-weigher", - }, - }, - }, - } - fakeClient := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(pipeline). - WithStatusSubresource(&v1alpha1.Pipeline{}). - Build() - - controller := &BasePipelineController[mockPipeline]{ - Client: fakeClient, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Initializer: &mockPipelineInitializer{ - pipelineType: v1alpha1.PipelineTypeFilterWeigher, - }, - Pipelines: map[string]mockPipeline{ - "test-pipeline": {name: "test-pipeline"}, - }, - PipelineConfigs: make(map[string]v1alpha1.Pipeline), - } + _, err := controller.Reconcile(context.Background(), req) + if err != nil { + t.Fatalf("Reconcile failed: %v", err) + } - evt := event.DeleteEvent{ - Object: knowledge, - } + // Check if pipeline is in map + _, inMap := controller.Pipelines[tt.pipeline.Name] + if inMap != tt.expectInMap { + t.Errorf("Expected pipeline in map: %v, got: %v", tt.expectInMap, inMap) + } - controller.HandleKnowledgeDeleted(context.Background(), evt, nil) + // Check pipeline status if it exists + if tt.pipelineExists { + var updatedPipeline v1alpha1.Pipeline + err := fakeClient.Get(context.Background(), client.ObjectKey{Name: tt.pipeline.Name}, &updatedPipeline) + if err != nil { + t.Fatalf("Failed to get updated pipeline: %v", err) + } - // Check that the pipeline was re-evaluated and is still in the map - if _, exists := controller.Pipelines[pipeline.Name]; !exists { - t.Error("Expected pipeline to be re-evaluated after knowledge deletion") + ready := meta.IsStatusConditionTrue(updatedPipeline.Status.Conditions, v1alpha1.PipelineConditionReady) + if ready != tt.expectReady { + t.Errorf("Expected ready: %v, got: %v", tt.expectReady, ready) + } + } + }) } } diff --git a/internal/scheduling/machines/filter_weigher_pipeline_controller.go b/internal/scheduling/machines/filter_weigher_pipeline_controller.go index 2b0c44f64..7fdfa896a 100644 --- a/internal/scheduling/machines/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/machines/filter_weigher_pipeline_controller.go @@ -19,8 +19,6 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduling/machines/plugins/filters" "github.com/cobaltcore-dev/cortex/internal/scheduling/machines/plugins/weighers" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/util/workqueue" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" @@ -54,91 +52,25 @@ func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } -func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - c.processMu.Lock() - defer c.processMu.Unlock() - - // Determine if this is a decision or machine reconciliation. - decision := &v1alpha1.Decision{} - if err := c.Get(ctx, req.NamespacedName, decision); err != nil { - return ctrl.Result{}, client.IgnoreNotFound(err) - } - old := decision.DeepCopy() - if err := c.process(ctx, decision); err != nil { - return ctrl.Result{}, err - } - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, nil -} - func (c *FilterWeigherPipelineController) ProcessNewMachine(ctx context.Context, machine *ironcorev1alpha1.Machine) error { c.processMu.Lock() defer c.processMu.Unlock() - // Create a decision resource to schedule the machine. - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "machine-", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainMachines, - ResourceID: machine.Name, - PipelineRef: corev1.ObjectReference{ - Name: "machines-scheduler", - }, - MachineRef: &corev1.ObjectReference{ - Name: machine.Name, - Namespace: machine.Namespace, - }, - }, - } + log := ctrl.LoggerFrom(ctx) + startedAt := time.Now() - pipelineConf, ok := c.PipelineConfigs[decision.Spec.PipelineRef.Name] + pipelineName := "machines-scheduler" + + pipeline, ok := c.Pipelines[pipelineName] if !ok { - return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) - } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() - err := c.process(ctx, decision) - if err != nil { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "PipelineRunFailed", - Message: "pipeline run failed: " + err.Error(), - }) - } else { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionTrue, - Reason: "PipelineRunSucceeded", - Message: "pipeline run succeeded", - }) - } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err - } + log.Error(nil, "pipeline not found or not ready", "pipelineName", pipelineName) + return errors.New("pipeline not found or not ready") } - return err -} -func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { - log := ctrl.LoggerFrom(ctx) - startedAt := time.Now() // So we can measure sync duration. - - pipeline, ok := c.Pipelines[decision.Spec.PipelineRef.Name] + pipelineConfig, ok := c.PipelineConfigs[pipelineName] if !ok { - log.Error(nil, "pipeline not found or not ready", "pipelineName", decision.Spec.PipelineRef.Name) - return errors.New("pipeline not found or not ready") + log.Error(nil, "pipeline not configured", "pipelineName", pipelineName) + return fmt.Errorf("pipeline %s not configured", pipelineName) } // Find all available machine pools. @@ -157,27 +89,38 @@ func (c *FilterWeigherPipelineController) process(ctx context.Context, decision log.V(1).Error(err, "failed to run scheduler pipeline") return errors.New("failed to run scheduler pipeline") } - decision.Status.Result = &result - log.Info("decision processed successfully", "duration", time.Since(startedAt)) - // Set the machine pool ref on the machine. - machine := &ironcorev1alpha1.Machine{} - if err := c.Get(ctx, client.ObjectKey{ - Name: decision.Spec.MachineRef.Name, - Namespace: decision.Spec.MachineRef.Namespace, - }, machine); err != nil { - log.Error(err, "failed to fetch machine for decision") - return err + log.Info("machine processed successfully", "duration", time.Since(startedAt)) + + hosts := result.OrderedHosts + if len(hosts) == 0 { + log.Info("no suitable machine pools found by pipeline") + return errors.New("no suitable machine pools found") } + + targetHost := hosts[0] + + // Set the machine pool ref on the machine. + // Assign the first machine pool returned by the pipeline. old := machine.DeepCopy() - machine.Spec.MachinePoolRef = &corev1.LocalObjectReference{Name: *result.TargetHost} + machine.Spec.MachinePoolRef = &corev1.LocalObjectReference{Name: targetHost} patch := client.MergeFrom(old) if err := c.Patch(ctx, machine, patch); err != nil { log.V(1).Error(err, "failed to assign machine pool to instance") return err } - log.V(1).Info("assigned machine pool to instance", "machinePool", *result.TargetHost) + log.V(1).Info("assigned machine pool to instance", "machinePool", targetHost) + + if pipelineConfig.Spec.CreateDecisions { + c.DecisionQueue <- lib.DecisionUpdate{ + ResourceID: machine.Name, + PipelineName: pipelineName, + Result: result, + // TODO: Refine the reason + Intent: v1alpha1.SchedulingIntentUnknown, + } + } return nil } @@ -225,7 +168,7 @@ func (c *FilterWeigherPipelineController) handleMachine() handler.EventHandler { return } for _, decision := range decisions.Items { - if decision.Spec.MachineRef.Name == machine.Name && decision.Spec.MachineRef.Namespace == machine.Namespace { + if decision.Spec.ResourceID == machine.Name && decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainMachines { if err := c.Delete(ctx, &decision); err != nil { log.Error(err, "failed to delete decision for deleted machine") } @@ -238,6 +181,7 @@ func (c *FilterWeigherPipelineController) handleMachine() handler.EventHandler { func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainMachines + c.Recorder = mgr.GetEventRecorder("cortex-machines-pipeline-controller") if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err } @@ -259,37 +203,16 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, return machine.Spec.Scheduler == "" }), ). - // Watch pipeline changes so that we can reconfigure pipelines as needed. - WatchesMulticluster( + For( &v1alpha1.Pipeline{}, - handler.Funcs{ - CreateFunc: c.HandlePipelineCreated, - UpdateFunc: c.HandlePipelineUpdated, - DeleteFunc: c.HandlePipelineDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { + builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { pipeline := obj.(*v1alpha1.Pipeline) - // Only react to pipelines matching the scheduling domain. if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainMachines { return false } return pipeline.Spec.Type == c.PipelineType() - }), - ). - Named("cortex-machine-scheduler"). - For( - &v1alpha1.Decision{}, - builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - decision := obj.(*v1alpha1.Decision) - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainMachines { - return false - } - // Ignore already decided schedulings. - if decision.Status.Result != nil { - return false - } - return true })), ). + Named("cortex-machine-scheduler"). Complete(c) } diff --git a/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go b/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go index 58c574f45..eb2613807 100644 --- a/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/machines/filter_weigher_pipeline_controller_test.go @@ -12,194 +12,12 @@ import ( "github.com/cobaltcore-dev/cortex/api/v1alpha1" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" - corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheduling scheme: %v", err) - } - if err := ironcorev1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add ironcore scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - machinePools []ironcorev1alpha1.MachinePool - machine *ironcorev1alpha1.Machine - expectError bool - expectDecision bool - expectTargetHost string - expectMachinePool string - }{ - { - name: "successful machine decision processing", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainMachines, - ResourceID: "test-machine", - PipelineRef: corev1.ObjectReference{ - Name: "machines-scheduler", - }, - MachineRef: &corev1.ObjectReference{ - Name: "test-machine", - Namespace: "default", - }, - }, - }, - machinePools: []ironcorev1alpha1.MachinePool{ - { - ObjectMeta: metav1.ObjectMeta{Name: "pool1"}, - }, - { - ObjectMeta: metav1.ObjectMeta{Name: "pool2"}, - }, - }, - machine: &ironcorev1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-machine", - Namespace: "default", - }, - Spec: ironcorev1alpha1.MachineSpec{ - Scheduler: "", - }, - }, - expectError: false, - expectDecision: true, - expectTargetHost: "pool1", // NoopFilter returns first pool - expectMachinePool: "pool1", - }, - { - name: "no machine pools available", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-pools", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainMachines, - ResourceID: "test-machine", - PipelineRef: corev1.ObjectReference{ - Name: "machines-scheduler", - }, - MachineRef: &corev1.ObjectReference{ - Name: "test-machine", - Namespace: "default", - }, - }, - }, - machinePools: []ironcorev1alpha1.MachinePool{}, - expectError: true, - expectDecision: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []runtime.Object{tt.decision} - for i := range tt.machinePools { - objects = append(objects, &tt.machinePools[i]) - } - if tt.machine != nil { - objects = append(objects, tt.machine) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &FilterWeigherPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]]{ - Pipelines: map[string]lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]{ - "machines-scheduler": createMockPipeline(), - }, - }, - Monitor: lib.FilterWeigherPipelineMonitor{}, - } - controller.Client = client - - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: tt.decision.Name, - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if tt.expectError && err == nil { - t.Error("expected error but got none") - return - } - - if !tt.expectError && err != nil { - t.Errorf("expected no error, got: %v", err) - return - } - - if result.RequeueAfter > 0 { - t.Errorf("unexpected requeue: %v", result.RequeueAfter) - } - - // Verify decision status if expected - if tt.expectDecision { - var updatedDecision v1alpha1.Decision - err := client.Get(context.Background(), req.NamespacedName, &updatedDecision) - if err != nil { - t.Errorf("Failed to get updated decision: %v", err) - return - } - - if updatedDecision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - - if updatedDecision.Status.Result.TargetHost == nil { - t.Error("expected target host to be set") - return - } - - if *updatedDecision.Status.Result.TargetHost != tt.expectTargetHost { - t.Errorf("expected target host %q, got %q", tt.expectTargetHost, *updatedDecision.Status.Result.TargetHost) - } - - // Verify machine was updated with machine pool ref - if tt.machine != nil { - var updatedMachine ironcorev1alpha1.Machine - err := client.Get(context.Background(), types.NamespacedName{ - Name: tt.machine.Name, - Namespace: tt.machine.Namespace, - }, &updatedMachine) - if err != nil { - t.Errorf("Failed to get updated machine: %v", err) - return - } - - if updatedMachine.Spec.MachinePoolRef == nil { - t.Error("expected machine pool ref to be set") - return - } - - if updatedMachine.Spec.MachinePoolRef.Name != tt.expectMachinePool { - t.Errorf("expected machine pool %q, got %q", tt.expectMachinePool, updatedMachine.Spec.MachinePoolRef.Name) - } - } - } - }) - } -} - func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { controller := &FilterWeigherPipelineController{ Monitor: lib.FilterWeigherPipelineMonitor{}, @@ -289,14 +107,12 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { machine *ironcorev1alpha1.Machine machinePools []ironcorev1alpha1.MachinePool pipelineConfig *v1alpha1.Pipeline - createDecisions bool expectError bool - expectDecisionCreated bool expectMachinePoolAssigned bool expectTargetHost string }{ { - name: "successful machine processing with decision creation", + name: "successful machine processing", machine: &ironcorev1alpha1.Machine{ ObjectMeta: metav1.ObjectMeta{ Name: "test-machine", @@ -314,40 +130,6 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "pool2"}, }, }, - pipelineConfig: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "machines-scheduler", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainMachines, - CreateDecisions: true, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - createDecisions: true, - expectError: false, - expectDecisionCreated: true, - expectMachinePoolAssigned: true, - expectTargetHost: "pool1", - }, - { - name: "successful machine processing without decision creation", - machine: &ironcorev1alpha1.Machine{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-machine-no-decision", - Namespace: "default", - }, - Spec: ironcorev1alpha1.MachineSpec{ - Scheduler: "", - }, - }, - machinePools: []ironcorev1alpha1.MachinePool{ - { - ObjectMeta: metav1.ObjectMeta{Name: "pool1"}, - }, - }, pipelineConfig: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "machines-scheduler", @@ -360,9 +142,7 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: false, expectError: false, - expectDecisionCreated: false, expectMachinePoolAssigned: true, expectTargetHost: "pool1", }, @@ -380,7 +160,6 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { machinePools: []ironcorev1alpha1.MachinePool{}, pipelineConfig: nil, expectError: true, - expectDecisionCreated: false, expectMachinePoolAssigned: false, }, { @@ -407,9 +186,7 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, expectError: true, - expectDecisionCreated: true, // Decision is created but processing fails expectMachinePoolAssigned: false, }, } @@ -434,6 +211,7 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]]{ Pipelines: map[string]lib.FilterWeigherPipeline[ironcore.MachinePipelineRequest]{}, PipelineConfigs: map[string]v1alpha1.Pipeline{}, + DecisionQueue: make(chan lib.DecisionUpdate), }, Monitor: lib.FilterWeigherPipelineMonitor{}, } @@ -456,73 +234,6 @@ func TestFilterWeigherPipelineController_ProcessNewMachine(t *testing.T) { return } - // Check if decision was created (if expected) - if tt.expectDecisionCreated { - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - found := false - for _, decision := range decisions.Items { - if decision.Spec.MachineRef != nil && - decision.Spec.MachineRef.Name == tt.machine.Name && - decision.Spec.MachineRef.Namespace == tt.machine.Namespace { - found = true - - // Verify decision properties - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainMachines { - t.Errorf("expected scheduling domain %q, got %q", v1alpha1.SchedulingDomainMachines, decision.Spec.SchedulingDomain) - } - if decision.Spec.ResourceID != tt.machine.Name { - t.Errorf("expected resource ID %q, got %q", tt.machine.Name, decision.Spec.ResourceID) - } - if decision.Spec.PipelineRef.Name != "machines-scheduler" { - t.Errorf("expected pipeline ref %q, got %q", "machines-scheduler", decision.Spec.PipelineRef.Name) - } - - // Check if result was set (only for successful cases) - if !tt.expectError && tt.expectTargetHost != "" { - if decision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - if decision.Status.Result.TargetHost == nil { - t.Error("expected target host to be set") - return - } - if *decision.Status.Result.TargetHost != tt.expectTargetHost { - t.Errorf("expected target host %q, got %q", tt.expectTargetHost, *decision.Status.Result.TargetHost) - } - } - break - } - } - - if !found { - t.Error("expected decision to be created but was not found") - } - } else { - // Check that no decisions were created - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - for _, decision := range decisions.Items { - if decision.Spec.MachineRef != nil && - decision.Spec.MachineRef.Name == tt.machine.Name && - decision.Spec.MachineRef.Namespace == tt.machine.Namespace { - t.Error("expected no decision to be created but found one") - break - } - } - } - // Check if machine pool was assigned (if expected) if tt.expectMachinePoolAssigned { var updatedMachine ironcorev1alpha1.Machine @@ -555,14 +266,16 @@ func createMockPipeline() lib.FilterWeigherPipeline[ironcore.MachinePipelineRequ type mockMachinePipeline struct{} -func (m *mockMachinePipeline) Run(request ironcore.MachinePipelineRequest) (v1alpha1.DecisionResult, error) { +func (m *mockMachinePipeline) Run(request ironcore.MachinePipelineRequest) (lib.FilterWeigherPipelineResult, error) { if len(request.Pools) == 0 { - return v1alpha1.DecisionResult{}, nil + return lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{}, + }, nil } // Return the first pool as the target host targetHost := request.Pools[0].Name - return v1alpha1.DecisionResult{ - TargetHost: &targetHost, + return lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{targetHost}, }, nil } diff --git a/internal/scheduling/manila/external_scheduler_api.go b/internal/scheduling/manila/external_scheduler_api.go index 2ad5a9265..d7857faa2 100644 --- a/internal/scheduling/manila/external_scheduler_api.go +++ b/internal/scheduling/manila/external_scheduler_api.go @@ -14,19 +14,14 @@ import ( "net/http" api "github.com/cobaltcore-dev/cortex/api/external/manila" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" scheduling "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/metrics" ) type HTTPAPIDelegate interface { - // Process the decision from the API. Should create and return the updated decision. - ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error + // Process the scheduling request from the API. + ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*scheduling.FilterWeigherPipelineResult, error) } type HTTPAPI interface { @@ -106,7 +101,6 @@ func (httpAPI *httpAPI) ManilaExternalScheduler(w http.ResponseWriter, r *http.R c.Respond(http.StatusInternalServerError, err, "failed to read request body") return } - raw := runtime.RawExtension{Raw: body} var requestData api.ExternalSchedulerRequest // Copy the raw body to a io.Reader for json deserialization. cp := body @@ -137,35 +131,19 @@ func (httpAPI *httpAPI) ManilaExternalScheduler(w http.ResponseWriter, r *http.R slog.Info("inferred pipeline name", "pipeline", requestData.Pipeline) } - // Create the decision object in kubernetes. - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "manila-", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: requestData.Pipeline, - }, - ResourceID: "", // TODO model out the spec. - ManilaRaw: &raw, - }, - } ctx := r.Context() - if err := httpAPI.delegate.ProcessNewDecisionFromAPI(ctx, decision); err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to process scheduling decision") - return - } - // Check if the decision contains status conditions indicating an error. - if meta.IsStatusConditionFalse(decision.Status.Conditions, v1alpha1.DecisionConditionReady) { - c.Respond(http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed") + + result, err := httpAPI.delegate.ProcessRequest(ctx, requestData) + if err != nil { + c.Respond(http.StatusInternalServerError, err, "failed to process scheduling request") return } - if decision.Status.Result == nil { - c.Respond(http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed") + if result == nil { + c.Respond(http.StatusInternalServerError, errors.New("pipeline didn't produce a result"), "failed to process scheduling request") return } - hosts := decision.Status.Result.OrderedHosts + + hosts := result.OrderedHosts response := api.ExternalSchedulerResponse{Hosts: hosts} w.Header().Set("Content-Type", "application/json") if err = json.NewEncoder(w).Encode(response); err != nil { diff --git a/internal/scheduling/manila/external_scheduler_api_test.go b/internal/scheduling/manila/external_scheduler_api_test.go index dfdb3f534..871de4ff1 100644 --- a/internal/scheduling/manila/external_scheduler_api_test.go +++ b/internal/scheduling/manila/external_scheduler_api_test.go @@ -4,7 +4,6 @@ package manila import ( - "bytes" "context" "encoding/json" "errors" @@ -14,20 +13,20 @@ import ( "testing" manilaapi "github.com/cobaltcore-dev/cortex/api/external/manila" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" ) type mockHTTPAPIDelegate struct { - processDecisionFunc func(ctx context.Context, decision *v1alpha1.Decision) error + processFunc func(ctx context.Context, request manilaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) } -func (m *mockHTTPAPIDelegate) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { - if m.processDecisionFunc != nil { - return m.processDecisionFunc(ctx, decision) +func (m *mockHTTPAPIDelegate) ProcessRequest(ctx context.Context, request manilaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + if m.processFunc != nil { + return m.processFunc(ctx, request) } - return nil + return &lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{"host1"}, + }, nil } func TestNewAPI(t *testing.T) { @@ -142,13 +141,12 @@ func TestHTTPAPI_canRunScheduler(t *testing.T) { func TestHTTPAPI_ManilaExternalScheduler(t *testing.T) { tests := []struct { - name string - method string - body string - processDecisionErr error - decisionResult *v1alpha1.Decision - expectedStatus int - expectedHosts []string + name string + method string + body string + processFunc func(ctx context.Context, request manilaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) + expectedStatus int + expectedHosts []string }{ { name: "invalid method", @@ -180,13 +178,6 @@ func TestHTTPAPI_ManilaExternalScheduler(t *testing.T) { } return string(data) }(), - decisionResult: &v1alpha1.Decision{ - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1"}, - }, - }, - }, expectedStatus: http.StatusOK, expectedHosts: []string{"host1"}, }, @@ -209,38 +200,8 @@ func TestHTTPAPI_ManilaExternalScheduler(t *testing.T) { } return string(data) }(), - processDecisionErr: errors.New("processing failed"), - expectedStatus: http.StatusInternalServerError, - }, - { - name: "decision failed", - method: http.MethodPost, - body: func() string { - req := manilaapi.ExternalSchedulerRequest{ - Hosts: []manilaapi.ExternalSchedulerHost{ - {ShareHost: "host1"}, - }, - Weights: map[string]float64{ - "host1": 1.0, - }, - Pipeline: "test-pipeline", - } - data, err := json.Marshal(req) - if err != nil { - t.Fatalf("Failed to marshal request data: %v", err) - } - return string(data) - }(), - decisionResult: &v1alpha1.Decision{ - Status: v1alpha1.DecisionStatus{ - Conditions: []metav1.Condition{ - { - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "SchedulingError", - }, - }, - }, + processFunc: func(ctx context.Context, request manilaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + return nil, errors.New("processing failed") }, expectedStatus: http.StatusInternalServerError, }, @@ -249,16 +210,7 @@ func TestHTTPAPI_ManilaExternalScheduler(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { delegate := &mockHTTPAPIDelegate{ - processDecisionFunc: func(ctx context.Context, decision *v1alpha1.Decision) error { - if tt.processDecisionErr != nil { - return tt.processDecisionErr - } - if tt.decisionResult != nil { - decision.Status = tt.decisionResult.Status - return nil - } - return nil - }, + processFunc: tt.processFunc, } api := NewAPI(delegate).(*httpAPI) @@ -346,63 +298,3 @@ func TestHTTPAPI_inferPipelineName(t *testing.T) { }) } } - -func TestHTTPAPI_ManilaExternalScheduler_DecisionCreation(t *testing.T) { - var capturedDecision *v1alpha1.Decision - delegate := &mockHTTPAPIDelegate{ - processDecisionFunc: func(ctx context.Context, decision *v1alpha1.Decision) error { - capturedDecision = decision - // Set a successful result to avoid "decision didn't produce a result" error - decision.Status.Result = &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1"}, - } - return nil - }, - } - - api := NewAPI(delegate).(*httpAPI) - - requestData := manilaapi.ExternalSchedulerRequest{ - Hosts: []manilaapi.ExternalSchedulerHost{ - {ShareHost: "host1"}, - }, - Weights: map[string]float64{ - "host1": 1.0, - }, - Pipeline: "test-pipeline", - } - - body, err := json.Marshal(requestData) - if err != nil { - t.Fatalf("Failed to marshal request data: %v", err) - } - req := httptest.NewRequest(http.MethodPost, "/scheduler/manila/external", bytes.NewReader(body)) - w := httptest.NewRecorder() - - api.ManilaExternalScheduler(w, req) - - if w.Code != http.StatusOK { - t.Errorf("Expected status %d, got %d", http.StatusOK, w.Code) - } - - if capturedDecision == nil { - t.Fatal("Decision was not captured") - } - - // Verify decision fields - if capturedDecision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila { - t.Errorf("Expected scheduling domain %s, got %s", v1alpha1.SchedulingDomainManila, capturedDecision.Spec.SchedulingDomain) - } - - if capturedDecision.Spec.PipelineRef.Name != "test-pipeline" { - t.Errorf("Expected pipeline 'test-pipeline', got %s", capturedDecision.Spec.PipelineRef.Name) - } - - if capturedDecision.GenerateName != "manila-" { - t.Errorf("Expected generate name 'manila-', got %s", capturedDecision.GenerateName) - } - - if capturedDecision.Spec.ManilaRaw == nil { - t.Error("ManilaRaw should not be nil") - } -} diff --git a/internal/scheduling/manila/filter_weigher_pipeline_controller.go b/internal/scheduling/manila/filter_weigher_pipeline_controller.go index 3b63d64e0..ec106b636 100644 --- a/internal/scheduling/manila/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/manila/filter_weigher_pipeline_controller.go @@ -5,16 +5,12 @@ package manila import ( "context" - "encoding/json" - "errors" "fmt" "sync" "time" api "github.com/cobaltcore-dev/cortex/api/external/manila" "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "github.com/cobaltcore-dev/cortex/internal/scheduling/manila/plugins/filters" @@ -26,6 +22,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) // The decision pipeline controller takes decision resources containing a @@ -50,93 +47,43 @@ func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } -// Callback executed when kubernetes asks to reconcile a decision resource. -func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - c.processMu.Lock() - defer c.processMu.Unlock() - - decision := &v1alpha1.Decision{} - if err := c.Get(ctx, req.NamespacedName, decision); err != nil { - return ctrl.Result{}, client.IgnoreNotFound(err) - } - old := decision.DeepCopy() - if err := c.process(ctx, decision); err != nil { - return ctrl.Result{}, err - } - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, nil -} - // Process the decision from the API. Should create and return the updated decision. -func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { +func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { c.processMu.Lock() defer c.processMu.Unlock() - pipelineConf, ok := c.PipelineConfigs[decision.Spec.PipelineRef.Name] - if !ok { - return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) - } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() - err := c.process(ctx, decision) - if err != nil { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "PipelineRunFailed", - Message: "pipeline run failed: " + err.Error(), - }) - } else { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionTrue, - Reason: "PipelineRunSucceeded", - Message: "pipeline run succeeded", - }) - } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err - } - } - return err -} - -func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { log := ctrl.LoggerFrom(ctx) - startedAt := time.Now() // So we can measure sync duration. + startedAt := time.Now() + + pipelineName := request.Pipeline - pipeline, ok := c.Pipelines[decision.Spec.PipelineRef.Name] + pipeline, ok := c.Pipelines[pipelineName] if !ok { - log.Error(nil, "skipping decision, pipeline not found or not ready") - return errors.New("pipeline not found or not ready") - } - if decision.Spec.ManilaRaw == nil { - log.Error(nil, "skipping decision, no manilaRaw spec defined") - return errors.New("no manilaRaw spec defined") + return nil, fmt.Errorf("pipeline %s not found or not ready", pipelineName) } - var request api.ExternalSchedulerRequest - if err := json.Unmarshal(decision.Spec.ManilaRaw.Raw, &request); err != nil { - log.Error(err, "failed to unmarshal manilaRaw spec") - return err + pipelineConfig, ok := c.PipelineConfigs[pipelineName] + if !ok { + log.Error(nil, "pipeline config not found", "pipelineName", pipelineName) + return nil, fmt.Errorf("pipeline config for %s not found", pipelineName) } result, err := pipeline.Run(request) if err != nil { - log.Error(err, "failed to run pipeline") - return err + log.Error(err, "failed to run pipeline", "pipeline", pipelineName) + return nil, err + } + log.Info("request processed successfully", "duration", time.Since(startedAt)) + + if pipelineConfig.Spec.CreateDecisions { + c.DecisionQueue <- lib.DecisionUpdate{ + // TODO model out the spec. + ResourceID: "", + PipelineName: pipelineName, + Result: result, + Intent: v1alpha1.SchedulingIntentUnknown, + } } - decision.Status.Result = &result - log.Info("decision processed successfully", "duration", time.Since(startedAt)) - return nil + return &result, nil } // The base controller will delegate the pipeline creation down to this method. @@ -156,54 +103,38 @@ func (c *FilterWeigherPipelineController) InitPipeline( func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainManila + c.Recorder = mgr.GetEventRecorder("cortex-manila-pipeline-controller") if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err } return multicluster.BuildController(mcl, mgr). - // Watch pipeline changes so that we can reconfigure pipelines as needed. - WatchesMulticluster( - &v1alpha1.Pipeline{}, - handler.Funcs{ - CreateFunc: c.HandlePipelineCreated, - UpdateFunc: c.HandlePipelineUpdated, - DeleteFunc: c.HandlePipelineDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - pipeline := obj.(*v1alpha1.Pipeline) - // Only react to pipelines matching the scheduling domain. - if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila { - return false - } - return pipeline.Spec.Type == c.PipelineType() - }), - ). // Watch knowledge changes so that we can reconfigure pipelines as needed. WatchesMulticluster( &v1alpha1.Knowledge{}, - handler.Funcs{ - CreateFunc: c.HandleKnowledgeCreated, - UpdateFunc: c.HandleKnowledgeUpdated, - DeleteFunc: c.HandleKnowledgeDeleted, - }, + // Get all pipelines of the controller when knowledge changes and trigger reconciliation to update the candidates in the pipelines. + handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { + knowledge := obj.(*v1alpha1.Knowledge) + if knowledge.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila { + return nil + } + // When Knowledge changes, reconcile all pipelines + return c.GetAllPipelineReconcileRequests(ctx) + }), predicate.NewPredicateFuncs(func(obj client.Object) bool { knowledge := obj.(*v1alpha1.Knowledge) // Only react to knowledge matching the scheduling domain. return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainManila }), ). - Named("cortex-manila-decisions"). + Named("cortex-manila-pipelines"). For( - &v1alpha1.Decision{}, + &v1alpha1.Pipeline{}, builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - decision := obj.(*v1alpha1.Decision) - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila { - return false - } - // Ignore already decided schedulings. - if decision.Status.Result != nil { + pipeline := obj.(*v1alpha1.Pipeline) + if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainManila { return false } - return true + return pipeline.Spec.Type == c.PipelineType() })), ). Complete(c) diff --git a/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go b/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go index a9fc2df1d..8d9578943 100644 --- a/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/manila/filter_weigher_pipeline_controller_test.go @@ -5,13 +5,9 @@ package manila import ( "context" - "encoding/json" "testing" - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -25,250 +21,38 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { +func TestFilterWeigherPipelineController_ProcessRequest(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) } - manilaRequest := api.ExternalSchedulerRequest{ - Spec: map[string]any{ - "share_id": "test-share-id", - "size": 10, - }, - Context: api.ManilaRequestContext{ - ProjectID: "test-project", - UserID: "test-user", - RequestID: "req-123", - GlobalRequestID: "global-req-123", - }, - Hosts: []api.ExternalSchedulerHost{ - {ShareHost: "manila-share-1@backend1"}, - {ShareHost: "manila-share-2@backend2"}, - }, - Weights: map[string]float64{"manila-share-1@backend1": 1.0, "manila-share-2@backend2": 0.5}, - Pipeline: "test-pipeline", - } - - manilaRaw, err := json.Marshal(manilaRequest) - if err != nil { - t.Fatalf("Failed to marshal manila request: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - pipeline *v1alpha1.Pipeline - expectError bool - expectResult bool - }{ - { - name: "successful manila decision processing", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - ManilaRaw: &runtime.RawExtension{ - Raw: manilaRaw, - }, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainManila, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: false, - expectResult: true, - }, - { - name: "decision without manilaRaw spec", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-raw", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - ManilaRaw: nil, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainManila, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: true, - expectResult: false, - }, - { - name: "pipeline not found", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-pipeline", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "nonexistent-pipeline", - }, - ManilaRaw: &runtime.RawExtension{ - Raw: manilaRaw, - }, - }, - }, - pipeline: nil, - expectError: true, - expectResult: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []client.Object{tt.decision} - if tt.pipeline != nil { - objects = append(objects, tt.pipeline) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &FilterWeigherPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ - Client: client, - Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), - }, - Monitor: lib.FilterWeigherPipelineMonitor{}, - } - - if tt.pipeline != nil { - initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: tt.pipeline.Name, - }, - Spec: tt.pipeline.Spec, - }) - if err != nil { - t.Fatalf("Failed to init pipeline: %v", err) - } - controller.Pipelines[tt.pipeline.Name] = initResult.Pipeline - } - - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: tt.decision.Name, - Namespace: tt.decision.Namespace, - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if tt.expectError && err == nil { - t.Error("Expected error but got none") - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - if result.RequeueAfter > 0 { - t.Error("Expected no requeue") - } - - var updatedDecision v1alpha1.Decision - if err := client.Get(context.Background(), req.NamespacedName, &updatedDecision); err != nil { - t.Fatalf("Failed to get updated decision: %v", err) - } - - if tt.expectResult && updatedDecision.Status.Result == nil { - t.Error("Expected result to be set but was nil") - } - if !tt.expectResult && updatedDecision.Status.Result != nil { - t.Error("Expected result to be nil but was set") - } - }) - } -} - -func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - manilaRequest := api.ExternalSchedulerRequest{ - Spec: map[string]any{ - "share_id": "test-share-id", - "size": 10, - }, - Context: api.ManilaRequestContext{ - ProjectID: "test-project", - UserID: "test-user", - RequestID: "req-123", - GlobalRequestID: "global-req-123", - }, - Hosts: []api.ExternalSchedulerHost{ - {ShareHost: "manila-share-1@backend1"}, - {ShareHost: "manila-share-2@backend2"}, - }, - Weights: map[string]float64{"manila-share-1@backend1": 1.0, "manila-share-2@backend2": 0.5}, - Pipeline: "test-pipeline", - } - - manilaRaw, err := json.Marshal(manilaRequest) - if err != nil { - t.Fatalf("Failed to marshal manila request: %v", err) - } - tests := []struct { - name string - decision *v1alpha1.Decision - pipelineConfig *v1alpha1.Pipeline - createDecisions bool - expectError bool - expectDecisionCreated bool - expectResult bool + name string + request api.ExternalSchedulerRequest + pipelineConfig *v1alpha1.Pipeline + expectError bool + expectResult bool }{ { - name: "successful decision processing with creation", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "test-decision-", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - ManilaRaw: &runtime.RawExtension{ - Raw: manilaRaw, - }, - }, + name: "successful request processing", + request: api.ExternalSchedulerRequest{ + Spec: map[string]any{ + "share_id": "test-share-id", + "size": 10, + }, + Context: api.ManilaRequestContext{ + ProjectID: "test-project", + UserID: "test-user", + RequestID: "req-123", + GlobalRequestID: "global-req-123", + }, + Hosts: []api.ExternalSchedulerHost{ + {ShareHost: "manila-share-1@backend1"}, + {ShareHost: "manila-share-2@backend2"}, + }, + Weights: map[string]float64{"manila-share-1@backend1": 1.0, "manila-share-2@backend2": 0.5}, + Pipeline: "test-pipeline", }, pipelineConfig: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ @@ -282,81 +66,38 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: false, - expectDecisionCreated: true, - expectResult: true, - }, - { - name: "successful decision processing without creation", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-create", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - ManilaRaw: &runtime.RawExtension{ - Raw: manilaRaw, - }, - }, - }, - pipelineConfig: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainManila, - CreateDecisions: false, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - createDecisions: false, - expectError: false, - expectDecisionCreated: false, - expectResult: true, + expectError: false, + expectResult: true, }, { name: "pipeline not configured", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-pipeline", - Namespace: "default", + request: api.ExternalSchedulerRequest{ + Spec: map[string]any{ + "share_id": "test-share-id", }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "nonexistent-pipeline", - }, - ManilaRaw: &runtime.RawExtension{ - Raw: manilaRaw, - }, + Context: api.ManilaRequestContext{ + RequestID: "req-123", }, + Hosts: []api.ExternalSchedulerHost{{ShareHost: "manila-share-1@backend1"}}, + Weights: map[string]float64{"manila-share-1@backend1": 1.0}, + Pipeline: "nonexistent-pipeline", }, - pipelineConfig: nil, - expectError: true, - expectDecisionCreated: false, - expectResult: false, + pipelineConfig: nil, + expectError: true, + expectResult: false, }, { - name: "decision without manilaRaw spec", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-raw", - Namespace: "default", + name: "empty hosts", + request: api.ExternalSchedulerRequest{ + Spec: map[string]any{ + "share_id": "test-share-id", }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainManila, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - ManilaRaw: nil, + Context: api.ManilaRequestContext{ + RequestID: "req-123", }, + Hosts: []api.ExternalSchedulerHost{}, + Weights: map[string]float64{}, + Pipeline: "test-pipeline", }, pipelineConfig: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ @@ -365,15 +106,13 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Spec: v1alpha1.PipelineSpec{ Type: v1alpha1.PipelineTypeFilterWeigher, SchedulingDomain: v1alpha1.SchedulingDomainManila, - CreateDecisions: true, + CreateDecisions: false, Filters: []v1alpha1.FilterSpec{}, Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: true, - expectDecisionCreated: false, - expectResult: false, + expectError: false, + expectResult: true, }, } @@ -384,31 +123,31 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) objects = append(objects, tt.pipelineConfig) } - client := fake.NewClientBuilder(). + fakeClient := fake.NewClientBuilder(). WithScheme(scheme). WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). Build() controller := &FilterWeigherPipelineController{ BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ - Client: client, + Client: fakeClient, Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), PipelineConfigs: make(map[string]v1alpha1.Pipeline), + DecisionQueue: make(chan lib.DecisionUpdate, 10), }, Monitor: lib.FilterWeigherPipelineMonitor{}, } if tt.pipelineConfig != nil { controller.PipelineConfigs[tt.pipelineConfig.Name] = *tt.pipelineConfig - initResult := controller.InitPipeline(t.Context(), *tt.pipelineConfig) + initResult := controller.InitPipeline(context.Background(), *tt.pipelineConfig) if len(initResult.FilterErrors) > 0 || len(initResult.WeigherErrors) > 0 { t.Fatalf("Failed to init pipeline: %v", initResult) } controller.Pipelines[tt.pipelineConfig.Name] = initResult.Pipeline } - err := controller.ProcessNewDecisionFromAPI(context.Background(), tt.decision) + result, err := controller.ProcessRequest(context.Background(), tt.request) if tt.expectError && err == nil { t.Error("Expected error but got none") @@ -417,43 +156,11 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) t.Errorf("Expected no error but got: %v", err) } - // Check if decision was created (if expected) - if tt.expectDecisionCreated { - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - found := false - for _, decision := range decisions.Items { - if decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainManila { - found = true - - // Verify decision properties - if decision.Spec.PipelineRef.Name != "test-pipeline" { - t.Errorf("expected pipeline ref %q, got %q", "test-pipeline", decision.Spec.PipelineRef.Name) - } - - // Check if result was set - if tt.expectResult { - if decision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - } - break - } - } - - if !found { - t.Error("expected decision to be created but was not found") - } - } else if !tt.expectError { - // For cases without creation, check that the decision has the right status - if tt.expectResult && tt.decision.Status.Result == nil { - t.Error("expected decision result to be set in original decision object") + if tt.expectResult { + if result == nil { + t.Error("Expected result but got nil") + } else if len(result.OrderedHosts) == 0 && len(tt.request.Hosts) > 0 { + t.Error("Expected ordered hosts in result") } } }) diff --git a/internal/scheduling/nova/detector_pipeline_controller.go b/internal/scheduling/nova/detector_pipeline_controller.go index 68d7d1ed5..a968eb6f1 100644 --- a/internal/scheduling/nova/detector_pipeline_controller.go +++ b/internal/scheduling/nova/detector_pipeline_controller.go @@ -21,6 +21,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) // The deschedulings pipeline controller is responsible for periodically running @@ -124,11 +125,6 @@ func (c *DetectorPipelineController) CreateDeschedulingsPeriodically(ctx context } } -func (c *DetectorPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - // This controller does not reconcile any resources directly. - return ctrl.Result{}, nil -} - func (c *DetectorPipelineController) SetupWithManager(mgr ctrl.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainNova @@ -138,40 +134,32 @@ func (c *DetectorPipelineController) SetupWithManager(mgr ctrl.Manager, mcl *mul return multicluster.BuildController(mcl, mgr). // Watch pipeline changes so that we can reconfigure pipelines as needed. WatchesMulticluster( - &v1alpha1.Pipeline{}, - handler.Funcs{ - CreateFunc: c.HandlePipelineCreated, - UpdateFunc: c.HandlePipelineUpdated, - DeleteFunc: c.HandlePipelineDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - pipeline := obj.(*v1alpha1.Pipeline) - // Only react to pipelines matching the scheduling domain. - if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { - return false + &v1alpha1.Knowledge{}, + // Get all pipelines of the controller when knowledge changes and trigger reconciliation to update the candidates in the pipelines. + handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { + knowledge := obj.(*v1alpha1.Knowledge) + if knowledge.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { + return nil } - return pipeline.Spec.Type == c.PipelineType() + // When Knowledge changes, reconcile all pipelines + return c.GetAllPipelineReconcileRequests(ctx) }), - ). - // Watch knowledge changes so that we can reconfigure pipelines as needed. - WatchesMulticluster( - &v1alpha1.Knowledge{}, - handler.Funcs{ - CreateFunc: c.HandleKnowledgeCreated, - UpdateFunc: c.HandleKnowledgeUpdated, - DeleteFunc: c.HandleKnowledgeDeleted, - }, predicate.NewPredicateFuncs(func(obj client.Object) bool { knowledge := obj.(*v1alpha1.Knowledge) // Only react to knowledge matching the scheduling domain. return knowledge.Spec.SchedulingDomain == v1alpha1.SchedulingDomainNova }), ). - Named("cortex-nova-deschedulings"). + // Watch hypervisor changes so the cache gets updated. + Named("cortex-nova-descheduler"). For( - &v1alpha1.Descheduling{}, + &v1alpha1.Pipeline{}, builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - return false // This controller does not reconcile Descheduling resources directly. + pipeline := obj.(*v1alpha1.Pipeline) + if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { + return false + } + return pipeline.Spec.Type == c.PipelineType() })), ). Complete(c) diff --git a/internal/scheduling/nova/external_scheduler_api.go b/internal/scheduling/nova/external_scheduler_api.go index c3a5de071..509a23538 100644 --- a/internal/scheduling/nova/external_scheduler_api.go +++ b/internal/scheduling/nova/external_scheduler_api.go @@ -15,13 +15,8 @@ import ( "slices" api "github.com/cobaltcore-dev/cortex/api/external/nova" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" scheduling "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" "sigs.k8s.io/controller-runtime/pkg/metrics" ) @@ -32,8 +27,8 @@ type HTTPAPIConfig struct { } type HTTPAPIDelegate interface { - // Process the decision from the API. Should create and return the updated decision. - ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error + // Process the scheduling request from the API. + ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*scheduling.FilterWeigherPipelineResult, error) } type HTTPAPI interface { @@ -187,7 +182,6 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req c.Respond(http.StatusInternalServerError, err, "failed to read request body") return } - raw := runtime.RawExtension{Raw: body} var requestData api.ExternalSchedulerRequest // Copy the raw body to a io.Reader for json deserialization. cp := body @@ -207,7 +201,7 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req return } - // If the pipeline name is not set, infer it from the request data. + // If the pipeline name is not set, set it to a default value. if requestData.Pipeline == "" { var err error requestData.Pipeline, err = httpAPI.inferPipelineName(requestData) @@ -218,38 +212,17 @@ func (httpAPI *httpAPI) NovaExternalScheduler(w http.ResponseWriter, r *http.Req slog.Info("inferred pipeline name", "pipeline", requestData.Pipeline) } - decision := &v1alpha1.Decision{ - TypeMeta: metav1.TypeMeta{ - Kind: "Decision", - APIVersion: "cortex.cloud/v1alpha1", - }, - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "nova-", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: requestData.Pipeline, - }, - ResourceID: requestData.Spec.Data.InstanceUUID, - NovaRaw: &raw, - }, - } ctx := r.Context() - if err := httpAPI.delegate.ProcessNewDecisionFromAPI(ctx, decision); err != nil { - c.Respond(http.StatusInternalServerError, err, "failed to process scheduling decision") - return - } - // Check if the decision contains status conditions indicating an error. - if meta.IsStatusConditionFalse(decision.Status.Conditions, v1alpha1.DecisionConditionReady) { - c.Respond(http.StatusInternalServerError, errors.New("decision contains error condition"), "decision failed") + result, err := httpAPI.delegate.ProcessRequest(ctx, requestData) + if err != nil { + c.Respond(http.StatusInternalServerError, err, "failed to process scheduling request") return } - if decision.Status.Result == nil { - c.Respond(http.StatusInternalServerError, errors.New("decision didn't produce a result"), "decision failed") + if result == nil { + c.Respond(http.StatusInternalServerError, errors.New("pipeline didn't produce a result"), "failed to process scheduling request") return } - hosts := decision.Status.Result.OrderedHosts + hosts := result.OrderedHosts hosts = limitHostsToRequest(requestData, hosts) response := api.ExternalSchedulerResponse{Hosts: hosts} w.Header().Set("Content-Type", "application/json") diff --git a/internal/scheduling/nova/external_scheduler_api_test.go b/internal/scheduling/nova/external_scheduler_api_test.go index 78b2a84b1..9564eb0e0 100644 --- a/internal/scheduling/nova/external_scheduler_api_test.go +++ b/internal/scheduling/nova/external_scheduler_api_test.go @@ -4,7 +4,6 @@ package nova import ( - "bytes" "context" "encoding/json" "errors" @@ -14,20 +13,20 @@ import ( "testing" novaapi "github.com/cobaltcore-dev/cortex/api/external/nova" - "github.com/cobaltcore-dev/cortex/api/v1alpha1" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" ) type mockHTTPAPIDelegate struct { - processDecisionFunc func(ctx context.Context, decision *v1alpha1.Decision) error + processFunc func(ctx context.Context, request novaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) } -func (m *mockHTTPAPIDelegate) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { - if m.processDecisionFunc != nil { - return m.processDecisionFunc(ctx, decision) +func (m *mockHTTPAPIDelegate) ProcessRequest(ctx context.Context, request novaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + if m.processFunc != nil { + return m.processFunc(ctx, request) } - return nil + return &lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{"host1"}, + }, nil } func TestNewAPI(t *testing.T) { @@ -145,13 +144,12 @@ func TestHTTPAPI_canRunScheduler(t *testing.T) { func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { tests := []struct { - name string - method string - body string - processDecisionErr error - decisionResult *v1alpha1.Decision - expectedStatus int - expectedHosts []string + name string + method string + body string + processFunc func(ctx context.Context, request novaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) + expectedStatus int + expectedHosts []string }{ { name: "invalid method", @@ -176,9 +174,11 @@ func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { }, Hosts: []novaapi.ExternalSchedulerHost{ {ComputeHost: "host1"}, + {ComputeHost: "host2"}, }, Weights: map[string]float64{ "host1": 1.0, + "host2": 2.0, }, Pipeline: "test-pipeline", } @@ -188,15 +188,13 @@ func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { } return string(data) }(), - decisionResult: &v1alpha1.Decision{ - Status: v1alpha1.DecisionStatus{ - Result: &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1"}, - }, - }, + processFunc: func(ctx context.Context, request novaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + return &lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{"host1", "host2"}, + }, nil }, expectedStatus: http.StatusOK, - expectedHosts: []string{"host1"}, + expectedHosts: []string{"host1", "host2"}, }, { name: "processing error", @@ -222,43 +220,8 @@ func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { } return string(data) }(), - processDecisionErr: errors.New("processing failed"), - expectedStatus: http.StatusInternalServerError, - }, - { - name: "decision failed", - method: http.MethodPost, - body: func() string { - req := novaapi.ExternalSchedulerRequest{ - Spec: novaapi.NovaObject[novaapi.NovaSpec]{ - Data: novaapi.NovaSpec{ - InstanceUUID: "test-uuid", - }, - }, - Hosts: []novaapi.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - }, - Weights: map[string]float64{ - "host1": 1.0, - }, - Pipeline: "test-pipeline", - } - data, err := json.Marshal(req) - if err != nil { - t.Fatalf("Failed to marshal request data: %v", err) - } - return string(data) - }(), - decisionResult: &v1alpha1.Decision{ - Status: v1alpha1.DecisionStatus{ - Conditions: []metav1.Condition{ - { - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "SchedulingError", - }, - }, - }, + processFunc: func(ctx context.Context, request novaapi.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { + return nil, errors.New("processing failed") }, expectedStatus: http.StatusInternalServerError, }, @@ -267,16 +230,7 @@ func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { delegate := &mockHTTPAPIDelegate{ - processDecisionFunc: func(ctx context.Context, decision *v1alpha1.Decision) error { - if tt.processDecisionErr != nil { - return tt.processDecisionErr - } - if tt.decisionResult != nil { - decision.Status = tt.decisionResult.Status - return nil - } - return nil - }, + processFunc: tt.processFunc, } config := HTTPAPIConfig{} @@ -318,76 +272,6 @@ func TestHTTPAPI_NovaExternalScheduler(t *testing.T) { } } -func TestHTTPAPI_NovaExternalScheduler_DecisionCreation(t *testing.T) { - var capturedDecision *v1alpha1.Decision - delegate := &mockHTTPAPIDelegate{ - processDecisionFunc: func(ctx context.Context, decision *v1alpha1.Decision) error { - capturedDecision = decision - // Set a successful result to avoid "decision didn't produce a result" error - decision.Status.Result = &v1alpha1.DecisionResult{ - OrderedHosts: []string{"host1"}, - } - return nil - }, - } - - config := HTTPAPIConfig{} - api := NewAPI(config, delegate).(*httpAPI) - - requestData := novaapi.ExternalSchedulerRequest{ - Spec: novaapi.NovaObject[novaapi.NovaSpec]{ - Data: novaapi.NovaSpec{ - InstanceUUID: "test-uuid-123", - }, - }, - Hosts: []novaapi.ExternalSchedulerHost{ - {ComputeHost: "host1"}, - }, - Weights: map[string]float64{ - "host1": 1.0, - }, - Pipeline: "test-pipeline", - } - - body, err := json.Marshal(requestData) - if err != nil { - t.Fatalf("Failed to marshal request data: %v", err) - } - req := httptest.NewRequest(http.MethodPost, "/scheduler/nova/external", bytes.NewReader(body)) - w := httptest.NewRecorder() - - api.NovaExternalScheduler(w, req) - - if w.Code != http.StatusOK { - t.Errorf("Expected status %d, got %d", http.StatusOK, w.Code) - } - - if capturedDecision == nil { - t.Fatal("Decision was not captured") - } - - // Verify decision fields - if capturedDecision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { - t.Errorf("Expected scheduling domain %s, got %s", v1alpha1.SchedulingDomainNova, capturedDecision.Spec.SchedulingDomain) - } - - if capturedDecision.Spec.PipelineRef.Name != "test-pipeline" { - t.Errorf("Expected pipeline 'test-pipeline', got %s", capturedDecision.Spec.PipelineRef.Name) - } - - if capturedDecision.Spec.ResourceID != "test-uuid-123" { - t.Errorf("Expected resource ID 'test-uuid-123', got %s", capturedDecision.Spec.ResourceID) - } - - if capturedDecision.GenerateName != "nova-" { - t.Errorf("Expected generate name 'nova-', got %s", capturedDecision.GenerateName) - } - - if capturedDecision.Spec.NovaRaw == nil { - t.Error("NovaRaw should not be nil") - } -} - func TestLimitHostsToRequest(t *testing.T) { tests := []struct { name string diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller.go b/internal/scheduling/nova/filter_weigher_pipeline_controller.go index 301e6076a..fd9e44d5e 100644 --- a/internal/scheduling/nova/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller.go @@ -5,16 +5,12 @@ package nova import ( "context" - "encoding/json" - "errors" "fmt" "sync" "time" api "github.com/cobaltcore-dev/cortex/api/external/nova" "github.com/cobaltcore-dev/cortex/api/v1alpha1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "github.com/cobaltcore-dev/cortex/internal/scheduling/lib" "github.com/cobaltcore-dev/cortex/internal/scheduling/nova/plugins/filters" @@ -27,6 +23,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/handler" "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" ) // The decision pipeline controller takes decision resources containing a @@ -53,115 +50,53 @@ func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } -// Callback executed when kubernetes asks to reconcile a decision resource. -func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +// Process the request from the API. Returns the result of the pipeline execution. +func (c *FilterWeigherPipelineController) ProcessRequest(ctx context.Context, request api.ExternalSchedulerRequest) (*lib.FilterWeigherPipelineResult, error) { c.processMu.Lock() defer c.processMu.Unlock() - decision := &v1alpha1.Decision{} - if err := c.Get(ctx, req.NamespacedName, decision); err != nil { - return ctrl.Result{}, client.IgnoreNotFound(err) - } - old := decision.DeepCopy() - if err := c.process(ctx, decision); err != nil { - return ctrl.Result{}, err - } - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, nil -} + log := ctrl.LoggerFrom(ctx) + startedAt := time.Now() -// Process the decision from the API. Should create and return the updated decision. -func (c *FilterWeigherPipelineController) ProcessNewDecisionFromAPI(ctx context.Context, decision *v1alpha1.Decision) error { - c.processMu.Lock() - defer c.processMu.Unlock() + pipelineName := request.Pipeline - pipelineConf, ok := c.PipelineConfigs[decision.Spec.PipelineRef.Name] + pipeline, ok := c.Pipelines[pipelineName] if !ok { - return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) - } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } + log.Error(nil, "pipeline not found or not ready", "pipelineName", pipelineName) + return nil, fmt.Errorf("pipeline %s not found or not ready", pipelineName) } - old := decision.DeepCopy() - err := c.process(ctx, decision) - if err != nil { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "PipelineRunFailed", - Message: "pipeline run failed: " + err.Error(), - }) - } else { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionTrue, - Reason: "PipelineRunSucceeded", - Message: "pipeline run succeeded", - }) - } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err - } - } - return err -} - -func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { - log := ctrl.LoggerFrom(ctx) - startedAt := time.Now() // So we can measure sync duration. - - pipeline, ok := c.Pipelines[decision.Spec.PipelineRef.Name] + pipelineConfig, ok := c.PipelineConfigs[pipelineName] if !ok { - log.Error(nil, "pipeline not found or not ready", "pipelineName", decision.Spec.PipelineRef.Name) - return errors.New("pipeline not found or not ready") - } - if decision.Spec.NovaRaw == nil { - log.Error(nil, "skipping decision, no novaRaw spec defined") - return errors.New("no novaRaw spec defined") + log.Error(nil, "pipeline config not found", "pipelineName", pipelineName) + return nil, fmt.Errorf("pipeline config for %s not found", pipelineName) } - var request api.ExternalSchedulerRequest - if err := json.Unmarshal(decision.Spec.NovaRaw.Raw, &request); err != nil { - log.Error(err, "failed to unmarshal novaRaw spec") - return err - } - // If necessary gather all placement candidates before filtering. // This will override the hosts and weights in the nova request. - pipelineConf, ok := c.PipelineConfigs[decision.Spec.PipelineRef.Name] - if !ok { - log.Error(nil, "pipeline config not found", "pipelineName", decision.Spec.PipelineRef.Name) - return errors.New("pipeline config not found") - } - if pipelineConf.Spec.IgnorePreselection { + if pipelineConfig.Spec.IgnorePreselection { log.Info("gathering all placement candidates before filtering") if err := c.gatherer.MutateWithAllCandidates(ctx, &request); err != nil { log.Error(err, "failed to gather all placement candidates") - return err + return nil, err } log.Info("gathered all placement candidates", "numHosts", len(request.Hosts)) } result, err := pipeline.Run(request) if err != nil { - log.Error(err, "failed to run pipeline") - return err + log.Error(err, "failed to run pipeline", "pipeline", pipelineName) + return nil, err + } + log.Info("request processed successfully", "duration", time.Since(startedAt)) + + if pipelineConfig.Spec.CreateDecisions { + c.DecisionQueue <- lib.DecisionUpdate{ + ResourceID: request.Spec.Data.InstanceUUID, + PipelineName: pipelineName, + Result: result, + Intent: v1alpha1.SchedulingIntentUnknown, + } } - decision.Status.Result = &result - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionTrue, - Reason: "PipelineRunSucceeded", - Message: "pipeline run succeeded", - }) - log.Info("decision processed successfully", "duration", time.Since(startedAt)) - return nil + return &result, nil } // The base controller will delegate the pipeline creation down to this method. @@ -181,36 +116,24 @@ func (c *FilterWeigherPipelineController) InitPipeline( func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainNova + c.Recorder = mgr.GetEventRecorder("cortex-nova-pipeline-controller") c.gatherer = &candidateGatherer{Client: mcl} if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err } return multicluster.BuildController(mcl, mgr). - // Watch pipeline changes so that we can reconfigure pipelines as needed. - WatchesMulticluster( - &v1alpha1.Pipeline{}, - handler.Funcs{ - CreateFunc: c.HandlePipelineCreated, - UpdateFunc: c.HandlePipelineUpdated, - DeleteFunc: c.HandlePipelineDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - pipeline := obj.(*v1alpha1.Pipeline) - // Only react to pipelines matching the scheduling domain. - if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { - return false - } - return pipeline.Spec.Type == c.PipelineType() - }), - ). // Watch knowledge changes so that we can reconfigure pipelines as needed. WatchesMulticluster( &v1alpha1.Knowledge{}, - handler.Funcs{ - CreateFunc: c.HandleKnowledgeCreated, - UpdateFunc: c.HandleKnowledgeUpdated, - DeleteFunc: c.HandleKnowledgeDeleted, - }, + // Get all pipelines of the controller when knowledge changes and trigger reconciliation to update the candidates in the pipelines. + handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []reconcile.Request { + knowledge := obj.(*v1alpha1.Knowledge) + if knowledge.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { + return nil + } + // When Knowledge changes, reconcile all pipelines + return c.GetAllPipelineReconcileRequests(ctx) + }), predicate.NewPredicateFuncs(func(obj client.Object) bool { knowledge := obj.(*v1alpha1.Knowledge) // Only react to knowledge matching the scheduling domain. @@ -219,21 +142,16 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, ). // Watch hypervisor changes so the cache gets updated. WatchesMulticluster(&hv1.Hypervisor{}, handler.Funcs{}). - // Watch reservation changes so the cache gets updated. WatchesMulticluster(&v1alpha1.Reservation{}, handler.Funcs{}). - Named("cortex-nova-decisions"). + Named("cortex-nova-pipelines"). For( - &v1alpha1.Decision{}, + &v1alpha1.Pipeline{}, builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - decision := obj.(*v1alpha1.Decision) - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { - return false - } - // Ignore already decided schedulings. - if decision.Status.Result != nil { + pipeline := obj.(*v1alpha1.Pipeline) + if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainNova { return false } - return true + return pipeline.Spec.Type == c.PipelineType() })), ). Complete(c) diff --git a/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go b/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go index 287b488d9..c4f5b26a2 100644 --- a/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/nova/filter_weigher_pipeline_controller_test.go @@ -5,16 +5,12 @@ package nova import ( "context" - "encoding/json" "errors" "strings" "testing" - corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/fake" @@ -46,238 +42,6 @@ func (m *mockCandidateGatherer) MutateWithAllCandidates(ctx context.Context, req return nil } -func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add v1alpha1 scheme: %v", err) - } - - novaRequest := api.ExternalSchedulerRequest{ - Spec: api.NovaObject[api.NovaSpec]{ - Name: "RequestSpec", - Namespace: "nova_object", - Version: "1.19", - Data: api.NovaSpec{ - ProjectID: "test-project", - UserID: "test-user", - InstanceUUID: "test-instance-uuid", - NumInstances: 1, - }, - }, - Context: api.NovaRequestContext{ - ProjectID: "test-project", - UserID: "test-user", - RequestID: "req-123", - GlobalRequestID: func() *string { s := "global-req-123"; return &s }(), - }, - Hosts: []api.ExternalSchedulerHost{ - {ComputeHost: "compute-1", HypervisorHostname: "hv-1"}, - {ComputeHost: "compute-2", HypervisorHostname: "hv-2"}, - }, - Weights: map[string]float64{"compute-1": 1.0, "compute-2": 0.5}, - Pipeline: "test-pipeline", - } - - novaRaw, err := json.Marshal(novaRequest) - if err != nil { - t.Fatalf("Failed to marshal nova request: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - pipeline *v1alpha1.Pipeline - expectError bool - expectResult bool - }{ - { - name: "successful nova decision processing", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: false, - expectResult: true, - }, - { - name: "decision without novaRaw spec", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-raw", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: nil, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: true, - expectResult: false, - }, - { - name: "pipeline not found", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-pipeline", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "nonexistent-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, - pipeline: nil, - expectError: true, - expectResult: false, - }, - { - name: "invalid novaRaw JSON", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-invalid-json", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: []byte("invalid json"), - }, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - expectError: true, - expectResult: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []client.Object{tt.decision} - if tt.pipeline != nil { - objects = append(objects, tt.pipeline) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &FilterWeigherPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]]{ - Client: client, - Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), - PipelineConfigs: make(map[string]v1alpha1.Pipeline), - }, - Monitor: lib.FilterWeigherPipelineMonitor{}, - } - - if tt.pipeline != nil { - initResult := controller.InitPipeline(t.Context(), v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: tt.pipeline.Name, - }, - Spec: tt.pipeline.Spec, - }) - if len(initResult.FilterErrors) > 0 || len(initResult.WeigherErrors) > 0 { - t.Fatalf("Failed to initialize pipeline: filter errors: %v, weigher errors: %v", initResult.FilterErrors, initResult.WeigherErrors) - } - controller.Pipelines[tt.pipeline.Name] = initResult.Pipeline - controller.PipelineConfigs[tt.pipeline.Name] = *tt.pipeline - } - - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: tt.decision.Name, - Namespace: tt.decision.Namespace, - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if tt.expectError && err == nil { - t.Error("Expected error but got none") - } - if !tt.expectError && err != nil { - t.Errorf("Expected no error but got: %v", err) - } - - if result.RequeueAfter > 0 { - t.Error("Expected no requeue") - } - - var updatedDecision v1alpha1.Decision - if err := client.Get(context.Background(), req.NamespacedName, &updatedDecision); err != nil { - if !tt.expectError { - t.Fatalf("Failed to get updated decision: %v", err) - } - return - } - - if tt.expectResult && updatedDecision.Status.Result == nil { - t.Error("Expected result to be set but was nil") - } - if !tt.expectResult && updatedDecision.Status.Result != nil { - t.Error("Expected result to be nil but was set") - } - }) - } -} - func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { controller := &FilterWeigherPipelineController{ Monitor: lib.FilterWeigherPipelineMonitor{}, @@ -355,7 +119,7 @@ func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { } } -func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) { +func TestFilterWeigherPipelineController_ProcessRequest(t *testing.T) { scheme := runtime.NewScheme() if err := v1alpha1.AddToScheme(scheme); err != nil { t.Fatalf("Failed to add v1alpha1 scheme: %v", err) @@ -387,161 +151,20 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Pipeline: "test-pipeline", } - novaRaw, err := json.Marshal(novaRequest) - if err != nil { - t.Fatalf("Failed to marshal nova request: %v", err) - } - tests := []struct { - name string - decision *v1alpha1.Decision - pipeline *v1alpha1.Pipeline - pipelineConf *v1alpha1.Pipeline - setupPipelineConfigs bool - createDecisions bool - expectError bool - expectResult bool - expectCreatedDecision bool - expectUpdatedStatus bool - errorContains string + name string + request api.ExternalSchedulerRequest + pipeline *v1alpha1.Pipeline + pipelineConf *v1alpha1.Pipeline + setupPipelineConfigs bool + expectError bool + expectResult bool + expectUpdatedStatus bool + errorContains string }{ { - name: "successful processing with decision creation enabled", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-api", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: true, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - pipelineConf: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: true, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - setupPipelineConfigs: true, - createDecisions: true, - expectError: false, - expectResult: true, - expectCreatedDecision: true, - expectUpdatedStatus: true, - }, - { - name: "successful processing with decision creation disabled", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-create", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline-no-create", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, - pipeline: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline-no-create", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: false, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - pipelineConf: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline-no-create", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: false, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - setupPipelineConfigs: true, - createDecisions: false, - expectError: false, - expectResult: true, - expectCreatedDecision: false, - expectUpdatedStatus: false, - }, - { - name: "pipeline not configured", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-config", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "nonexistent-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, - pipeline: nil, - pipelineConf: nil, - setupPipelineConfigs: false, - expectError: true, - expectResult: false, - expectCreatedDecision: false, - expectUpdatedStatus: false, - errorContains: "pipeline nonexistent-pipeline not configured", - }, - { - name: "decision without novaRaw spec", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-raw-api", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: nil, - }, - }, + name: "successful processing with decision creation enabled", + request: novaRequest, pipeline: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pipeline", @@ -566,69 +189,25 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Weighers: []v1alpha1.WeigherSpec{}, }, }, - setupPipelineConfigs: true, - createDecisions: true, - expectError: true, - expectResult: false, - expectCreatedDecision: true, - expectUpdatedStatus: false, - errorContains: "no novaRaw spec defined", + setupPipelineConfigs: true, + expectError: false, + expectResult: true, + expectUpdatedStatus: true, }, { - name: "processing fails after decision creation", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-process-fail", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, - pipeline: nil, // This will cause processing to fail after creation - pipelineConf: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pipeline", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainNova, - CreateDecisions: true, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - setupPipelineConfigs: true, - createDecisions: true, - expectError: true, - expectResult: false, - expectCreatedDecision: true, - expectUpdatedStatus: false, - errorContains: "pipeline not found or not ready", + name: "pipeline not configured", + request: novaRequest, + pipeline: nil, + pipelineConf: nil, + setupPipelineConfigs: false, + expectError: true, + expectResult: false, + expectUpdatedStatus: false, + errorContains: "pipeline test-pipeline not found or not ready", }, { - name: "pipeline not found in runtime map", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-runtime-pipeline", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "missing-runtime-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - }, + name: "pipeline not found in runtime map", + request: novaRequest, pipeline: nil, pipelineConf: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ @@ -642,13 +221,11 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Weighers: []v1alpha1.WeigherSpec{}, }, }, - setupPipelineConfigs: true, - createDecisions: true, - expectError: true, - expectResult: false, - expectCreatedDecision: true, - expectUpdatedStatus: false, - errorContains: "pipeline not found or not ready", + setupPipelineConfigs: true, + expectError: true, + expectResult: false, + expectUpdatedStatus: false, + errorContains: "pipeline not found or not ready", }, } @@ -670,6 +247,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) Client: client, Pipelines: make(map[string]lib.FilterWeigherPipeline[api.ExternalSchedulerRequest]), PipelineConfigs: make(map[string]v1alpha1.Pipeline), + DecisionQueue: make(chan lib.DecisionUpdate, 100), }, Monitor: lib.FilterWeigherPipelineMonitor{}, } @@ -694,7 +272,7 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) } // Call the method under test - err := controller.ProcessNewDecisionFromAPI(context.Background(), tt.decision) + result, err := controller.ProcessRequest(context.Background(), tt.request) // Validate error expectations if tt.expectError && err == nil { @@ -707,28 +285,11 @@ func TestFilterWeigherPipelineController_ProcessNewDecisionFromAPI(t *testing.T) t.Errorf("Expected error to contain %q, got: %v", tt.errorContains, err) } - // Check if decision was created in the cluster when expected - if tt.expectCreatedDecision { - var createdDecision v1alpha1.Decision - key := types.NamespacedName{Name: tt.decision.Name, Namespace: tt.decision.Namespace} - err := client.Get(context.Background(), key, &createdDecision) - if err != nil { - t.Errorf("Expected decision to be created but got error: %v", err) - } - } else { - var createdDecision v1alpha1.Decision - key := types.NamespacedName{Name: tt.decision.Name, Namespace: tt.decision.Namespace} - err := client.Get(context.Background(), key, &createdDecision) - if err == nil { - t.Error("Expected decision not to be created but it was found") - } - } - // Validate result and duration expectations - if tt.expectResult && tt.decision.Status.Result == nil { + if tt.expectResult && result == nil { t.Error("Expected result to be set but was nil") } - if !tt.expectResult && tt.decision.Status.Result != nil { + if !tt.expectResult && result != nil { t.Error("Expected result to be nil but was set") } }) @@ -768,11 +329,6 @@ func TestFilterWeigherPipelineController_IgnorePreselection(t *testing.T) { Pipeline: "test-pipeline", } - novaRaw, err := json.Marshal(novaRequest) - if err != nil { - t.Fatalf("Failed to marshal nova request: %v", err) - } - tests := []struct { name string ignorePreselection bool @@ -866,25 +422,8 @@ func TestFilterWeigherPipelineController_IgnorePreselection(t *testing.T) { } controller.Pipelines["test-pipeline"] = initResult.Pipeline - // Create decision - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-preselection", - Namespace: "default", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainNova, - PipelineRef: corev1.ObjectReference{ - Name: "test-pipeline", - }, - NovaRaw: &runtime.RawExtension{ - Raw: novaRaw, - }, - }, - } - // Process the decision - err := controller.ProcessNewDecisionFromAPI(context.Background(), decision) + result, err := controller.ProcessRequest(context.Background(), novaRequest) // Verify gatherer was called (or not) as expected if tt.expectGathererCall && !mockGatherer.called { @@ -906,7 +445,7 @@ func TestFilterWeigherPipelineController_IgnorePreselection(t *testing.T) { } // Verify result is set when no error - if !tt.expectError && decision.Status.Result == nil { + if !tt.expectError && result == nil { t.Error("Expected result to be set but was nil") } }) diff --git a/internal/scheduling/pods/filter_weigher_pipeline_controller.go b/internal/scheduling/pods/filter_weigher_pipeline_controller.go index 28e10ff88..492c07a26 100644 --- a/internal/scheduling/pods/filter_weigher_pipeline_controller.go +++ b/internal/scheduling/pods/filter_weigher_pipeline_controller.go @@ -18,7 +18,6 @@ import ( "github.com/cobaltcore-dev/cortex/internal/scheduling/pods/plugins/filters" "github.com/cobaltcore-dev/cortex/internal/scheduling/pods/plugins/weighers" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/util/workqueue" ctrl "sigs.k8s.io/controller-runtime" @@ -53,108 +52,30 @@ func (c *FilterWeigherPipelineController) PipelineType() v1alpha1.PipelineType { return v1alpha1.PipelineTypeFilterWeigher } -func (c *FilterWeigherPipelineController) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - c.processMu.Lock() - defer c.processMu.Unlock() - - // Determine if this is a decision or pod reconciliation. - decision := &v1alpha1.Decision{} - if err := c.Get(ctx, req.NamespacedName, decision); err != nil { - return ctrl.Result{}, client.IgnoreNotFound(err) - } - old := decision.DeepCopy() - if err := c.process(ctx, decision); err != nil { - return ctrl.Result{}, err - } - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, nil -} - func (c *FilterWeigherPipelineController) ProcessNewPod(ctx context.Context, pod *corev1.Pod) error { c.processMu.Lock() defer c.processMu.Unlock() - // Create a decision resource to schedule the pod. - decision := &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - GenerateName: "pod-", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainPods, - ResourceID: pod.Name, - PipelineRef: corev1.ObjectReference{ - Name: "pods-scheduler", - }, - PodRef: &corev1.ObjectReference{ - Name: pod.Name, - Namespace: pod.Namespace, - }, - }, - } + log := ctrl.LoggerFrom(ctx) + startedAt := time.Now() + + pipelineName := "pods-scheduler" - pipelineConf, ok := c.PipelineConfigs[decision.Spec.PipelineRef.Name] + pipeline, ok := c.Pipelines[pipelineName] if !ok { - return fmt.Errorf("pipeline %s not configured", decision.Spec.PipelineRef.Name) - } - if pipelineConf.Spec.CreateDecisions { - if err := c.Create(ctx, decision); err != nil { - return err - } - } - old := decision.DeepCopy() - err := c.process(ctx, decision) - if err != nil { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionFalse, - Reason: "PipelineRunFailed", - Message: "pipeline run failed: " + err.Error(), - }) - } else { - meta.SetStatusCondition(&decision.Status.Conditions, metav1.Condition{ - Type: v1alpha1.DecisionConditionReady, - Status: metav1.ConditionTrue, - Reason: "PipelineRunSucceeded", - Message: "pipeline run succeeded", - }) + return fmt.Errorf("pipeline %s not found or not ready", pipelineName) } - if pipelineConf.Spec.CreateDecisions { - patch := client.MergeFrom(old) - if err := c.Status().Patch(ctx, decision, patch); err != nil { - return err - } - } - return err -} -func (c *FilterWeigherPipelineController) process(ctx context.Context, decision *v1alpha1.Decision) error { - log := ctrl.LoggerFrom(ctx) - startedAt := time.Now() // So we can measure sync duration. - - pipeline, ok := c.Pipelines[decision.Spec.PipelineRef.Name] + pipelineConfig, ok := c.PipelineConfigs[pipelineName] if !ok { - log.Error(nil, "pipeline not found or not ready", "pipelineName", decision.Spec.PipelineRef.Name) - return errors.New("pipeline not found or not ready") + return fmt.Errorf("pipeline %s not configured", pipelineName) } - // Check if the pod is already assigned to a node. - pod := &corev1.Pod{} - if err := c.Get(ctx, client.ObjectKey{ - Name: decision.Spec.PodRef.Name, - Namespace: decision.Spec.PodRef.Namespace, - }, pod); err != nil { - log.Error(err, "failed to fetch pod for decision") - return err - } if pod.Spec.NodeName != "" { log.Info("pod is already assigned to a node", "node", pod.Spec.NodeName) return nil } - // Find all available nodes. nodes := &corev1.NodeList{} if err := c.List(ctx, nodes); err != nil { return err @@ -163,32 +84,48 @@ func (c *FilterWeigherPipelineController) process(ctx context.Context, decision return errors.New("no nodes available for scheduling") } - // Execute the scheduling pipeline. request := pods.PodPipelineRequest{Nodes: nodes.Items, Pod: *pod} result, err := pipeline.Run(request) if err != nil { log.V(1).Error(err, "failed to run scheduler pipeline") return errors.New("failed to run scheduler pipeline") } - decision.Status.Result = &result - log.Info("decision processed successfully", "duration", time.Since(startedAt)) - // Assign the first node returned by the pipeline using a Binding. + log.Info("pod processed successfully", "duration", time.Since(startedAt)) + + hosts := result.OrderedHosts + if len(hosts) == 0 { + log.Info("no suitable nodes found for pod") + return nil + } + + targetHost := hosts[0] + binding := &corev1.Binding{ ObjectMeta: metav1.ObjectMeta{ - Name: decision.Spec.PodRef.Name, - Namespace: decision.Spec.PodRef.Namespace, + Name: pod.Name, + Namespace: pod.Namespace, }, Target: corev1.ObjectReference{ Kind: "Node", - Name: *result.TargetHost, + Name: targetHost, }, } if err := c.Create(ctx, binding); err != nil { log.V(1).Error(err, "failed to assign node to pod via binding") return err } - log.V(1).Info("assigned node to pod", "node", *result.TargetHost) + log.V(1).Info("assigned node to pod", "node", targetHost) + + if pipelineConfig.Spec.CreateDecisions { + c.DecisionQueue <- lib.DecisionUpdate{ + ResourceID: pod.Name, + PipelineName: pipelineName, + Result: result, + // TODO: Refine the reason + Intent: v1alpha1.SchedulingIntentUnknown, + } + } return nil } @@ -236,7 +173,7 @@ func (c *FilterWeigherPipelineController) handlePod() handler.EventHandler { return } for _, decision := range decisions.Items { - if decision.Spec.PodRef.Name == pod.Name && decision.Spec.PodRef.Namespace == pod.Namespace { + if decision.Spec.ResourceID == pod.Name && decision.Spec.SchedulingDomain == v1alpha1.SchedulingDomainPods { if err := c.Delete(ctx, &decision); err != nil { log.Error(err, "failed to delete decision for deleted pod") } @@ -249,6 +186,7 @@ func (c *FilterWeigherPipelineController) handlePod() handler.EventHandler { func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, mcl *multicluster.Client) error { c.Initializer = c c.SchedulingDomain = v1alpha1.SchedulingDomainPods + c.Recorder = mgr.GetEventRecorder("cortex-pods-pipeline-controller") if err := mgr.Add(manager.RunnableFunc(c.InitAllPipelines)); err != nil { return err } @@ -266,36 +204,15 @@ func (c *FilterWeigherPipelineController) SetupWithManager(mgr manager.Manager, return pod.Spec.SchedulerName == string(v1alpha1.SchedulingDomainPods) }), ). - // Watch pipeline changes so that we can reconfigure pipelines as needed. - WatchesMulticluster( - &v1alpha1.Pipeline{}, - handler.Funcs{ - CreateFunc: c.HandlePipelineCreated, - UpdateFunc: c.HandlePipelineUpdated, - DeleteFunc: c.HandlePipelineDeleted, - }, - predicate.NewPredicateFuncs(func(obj client.Object) bool { - pipeline := obj.(*v1alpha1.Pipeline) - // Only react to pipelines matching the scheduling domain. - if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods { - return false - } - return pipeline.Spec.Type == v1alpha1.PipelineTypeFilterWeigher - }), - ). Named("cortex-pod-scheduler"). For( - &v1alpha1.Decision{}, + &v1alpha1.Pipeline{}, builder.WithPredicates(predicate.NewPredicateFuncs(func(obj client.Object) bool { - decision := obj.(*v1alpha1.Decision) - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods { - return false - } - // Ignore already decided schedulings. - if decision.Status.Result != nil { + pipeline := obj.(*v1alpha1.Pipeline) + if pipeline.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods { return false } - return true + return pipeline.Spec.Type == c.PipelineType() })), ). Complete(c) diff --git a/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go b/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go index 3d523873b..0d2665cba 100644 --- a/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go +++ b/internal/scheduling/pods/filter_weigher_pipeline_controller_test.go @@ -15,166 +15,9 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client/fake" ) -func TestFilterWeigherPipelineController_Reconcile(t *testing.T) { - scheme := runtime.NewScheme() - if err := v1alpha1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add scheduling scheme: %v", err) - } - if err := corev1.AddToScheme(scheme); err != nil { - t.Fatalf("Failed to add corev1 scheme: %v", err) - } - - tests := []struct { - name string - decision *v1alpha1.Decision - nodes []corev1.Node - pod *corev1.Pod - expectError bool - expectDecision bool - expectTargetHost string - }{ - { - name: "successful pod decision processing", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainPods, - ResourceID: "test-pod", - PipelineRef: corev1.ObjectReference{ - Name: "pods-scheduler", - }, - PodRef: &corev1.ObjectReference{ - Name: "test-pod", - Namespace: "default", - }, - }, - }, - nodes: []corev1.Node{ - { - ObjectMeta: metav1.ObjectMeta{Name: "node1"}, - }, - { - ObjectMeta: metav1.ObjectMeta{Name: "node2"}, - }, - }, - pod: &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pod", - Namespace: "default", - }, - Spec: corev1.PodSpec{ - SchedulerName: "", - }, - }, - expectError: false, - expectDecision: true, - expectTargetHost: "node1", // NoopFilter returns first node - }, - { - name: "no nodes available", - decision: &v1alpha1.Decision{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-decision-no-nodes", - }, - Spec: v1alpha1.DecisionSpec{ - SchedulingDomain: v1alpha1.SchedulingDomainPods, - ResourceID: "test-pod", - PipelineRef: corev1.ObjectReference{ - Name: "pods-scheduler", - }, - PodRef: &corev1.ObjectReference{ - Name: "test-pod", - Namespace: "default", - }, - }, - }, - nodes: []corev1.Node{}, - expectError: true, - expectDecision: false, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - objects := []runtime.Object{tt.decision} - for i := range tt.nodes { - objects = append(objects, &tt.nodes[i]) - } - if tt.pod != nil { - objects = append(objects, tt.pod) - } - - client := fake.NewClientBuilder(). - WithScheme(scheme). - WithRuntimeObjects(objects...). - WithStatusSubresource(&v1alpha1.Decision{}). - Build() - - controller := &FilterWeigherPipelineController{ - BasePipelineController: lib.BasePipelineController[lib.FilterWeigherPipeline[pods.PodPipelineRequest]]{ - Pipelines: map[string]lib.FilterWeigherPipeline[pods.PodPipelineRequest]{ - "pods-scheduler": createMockPodPipeline(), - }, - }, - Monitor: lib.FilterWeigherPipelineMonitor{}, - } - controller.Client = client - - req := ctrl.Request{ - NamespacedName: types.NamespacedName{ - Name: tt.decision.Name, - }, - } - - result, err := controller.Reconcile(context.Background(), req) - - if tt.expectError && err == nil { - t.Error("expected error but got none") - return - } - - if !tt.expectError && err != nil { - t.Errorf("expected no error, got: %v", err) - return - } - - if result.RequeueAfter > 0 { - t.Errorf("unexpected requeue: %v", result.RequeueAfter) - } - - // Verify decision status if expected - if tt.expectDecision { - var updatedDecision v1alpha1.Decision - err := client.Get(context.Background(), req.NamespacedName, &updatedDecision) - if err != nil { - t.Errorf("Failed to get updated decision: %v", err) - return - } - - if updatedDecision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - - if updatedDecision.Status.Result.TargetHost == nil { - t.Error("expected target host to be set") - return - } - - if *updatedDecision.Status.Result.TargetHost != tt.expectTargetHost { - t.Errorf("expected target host %q, got %q", tt.expectTargetHost, *updatedDecision.Status.Result.TargetHost) - } - } - }) - } -} - func TestFilterWeigherPipelineController_InitPipeline(t *testing.T) { controller := &FilterWeigherPipelineController{ Monitor: lib.FilterWeigherPipelineMonitor{}, @@ -263,18 +106,16 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { } tests := []struct { - name string - pod *corev1.Pod - nodes []corev1.Node - pipelineConfig *v1alpha1.Pipeline - createDecisions bool - expectError bool - expectDecisionCreated bool - expectNodeAssigned bool - expectTargetHost string + name string + pod *corev1.Pod + nodes []corev1.Node + pipelineConfig *v1alpha1.Pipeline + expectError bool + expectNodeAssigned bool + expectTargetHost string }{ { - name: "successful pod processing with decision creation", + name: "successful pod processing", pod: &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: "test-pod", @@ -292,40 +133,6 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { ObjectMeta: metav1.ObjectMeta{Name: "node2"}, }, }, - pipelineConfig: &v1alpha1.Pipeline{ - ObjectMeta: metav1.ObjectMeta{ - Name: "pods-scheduler", - }, - Spec: v1alpha1.PipelineSpec{ - Type: v1alpha1.PipelineTypeFilterWeigher, - SchedulingDomain: v1alpha1.SchedulingDomainPods, - CreateDecisions: true, - Filters: []v1alpha1.FilterSpec{}, - Weighers: []v1alpha1.WeigherSpec{}, - }, - }, - createDecisions: true, - expectError: false, - expectDecisionCreated: true, - expectNodeAssigned: true, - expectTargetHost: "node1", - }, - { - name: "successful pod processing without decision creation", - pod: &corev1.Pod{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-pod-no-decision", - Namespace: "default", - }, - Spec: corev1.PodSpec{ - SchedulerName: "", - }, - }, - nodes: []corev1.Node{ - { - ObjectMeta: metav1.ObjectMeta{Name: "node1"}, - }, - }, pipelineConfig: &v1alpha1.Pipeline{ ObjectMeta: metav1.ObjectMeta{ Name: "pods-scheduler", @@ -338,11 +145,9 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: false, - expectError: false, - expectDecisionCreated: false, - expectNodeAssigned: true, - expectTargetHost: "node1", + expectError: false, + expectNodeAssigned: true, + expectTargetHost: "node1", }, { name: "pipeline not configured", @@ -355,11 +160,10 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { SchedulerName: "", }, }, - nodes: []corev1.Node{}, - pipelineConfig: nil, - expectError: true, - expectDecisionCreated: false, - expectNodeAssigned: false, + nodes: []corev1.Node{}, + pipelineConfig: nil, + expectError: true, + expectNodeAssigned: false, }, { name: "no nodes available", @@ -385,10 +189,8 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { Weighers: []v1alpha1.WeigherSpec{}, }, }, - createDecisions: true, - expectError: true, - expectDecisionCreated: true, // Decision is created but processing fails - expectNodeAssigned: false, + expectError: true, + expectNodeAssigned: false, }, } @@ -434,73 +236,6 @@ func TestFilterWeigherPipelineController_ProcessNewPod(t *testing.T) { return } - // Check if decision was created (if expected) - if tt.expectDecisionCreated { - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - found := false - for _, decision := range decisions.Items { - if decision.Spec.PodRef != nil && - decision.Spec.PodRef.Name == tt.pod.Name && - decision.Spec.PodRef.Namespace == tt.pod.Namespace { - found = true - - // Verify decision properties - if decision.Spec.SchedulingDomain != v1alpha1.SchedulingDomainPods { - t.Errorf("expected scheduling domain %q, got %q", v1alpha1.SchedulingDomainPods, decision.Spec.SchedulingDomain) - } - if decision.Spec.ResourceID != tt.pod.Name { - t.Errorf("expected resource ID %q, got %q", tt.pod.Name, decision.Spec.ResourceID) - } - if decision.Spec.PipelineRef.Name != "pods-scheduler" { - t.Errorf("expected pipeline ref %q, got %q", "pods-scheduler", decision.Spec.PipelineRef.Name) - } - - // Check if result was set (only for successful cases) - if !tt.expectError && tt.expectTargetHost != "" { - if decision.Status.Result == nil { - t.Error("expected decision result to be set") - return - } - if decision.Status.Result.TargetHost == nil { - t.Error("expected target host to be set") - return - } - if *decision.Status.Result.TargetHost != tt.expectTargetHost { - t.Errorf("expected target host %q, got %q", tt.expectTargetHost, *decision.Status.Result.TargetHost) - } - } - break - } - } - - if !found { - t.Error("expected decision to be created but was not found") - } - } else { - // Check that no decisions were created - var decisions v1alpha1.DecisionList - err := client.List(context.Background(), &decisions) - if err != nil { - t.Errorf("Failed to list decisions: %v", err) - return - } - - for _, decision := range decisions.Items { - if decision.Spec.PodRef != nil && - decision.Spec.PodRef.Name == tt.pod.Name && - decision.Spec.PodRef.Namespace == tt.pod.Namespace { - t.Error("expected no decision to be created but found one") - break - } - } - } - // Check if node was assigned (if expected) if tt.expectNodeAssigned { var binding corev1.Binding @@ -531,14 +266,14 @@ func createMockPodPipeline() lib.FilterWeigherPipeline[pods.PodPipelineRequest] type mockPodPipeline struct{} -func (m *mockPodPipeline) Run(request pods.PodPipelineRequest) (v1alpha1.DecisionResult, error) { +func (m *mockPodPipeline) Run(request pods.PodPipelineRequest) (lib.FilterWeigherPipelineResult, error) { if len(request.Nodes) == 0 { - return v1alpha1.DecisionResult{}, nil + return lib.FilterWeigherPipelineResult{OrderedHosts: []string{}}, nil } // Return the first node as the target host targetHost := request.Nodes[0].Name - return v1alpha1.DecisionResult{ - TargetHost: &targetHost, + return lib.FilterWeigherPipelineResult{ + OrderedHosts: []string{targetHost}, }, nil } diff --git a/tools/visualizer/Dockerfile b/tools/visualizer/Dockerfile deleted file mode 100644 index af7c859dd..000000000 --- a/tools/visualizer/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -# Copyright SAP SE -# SPDX-License-Identifier: Apache-2.0 - -FROM nginx - -COPY nova.html /usr/share/nginx/html/nova.html -COPY shared.css /usr/share/nginx/html/shared.css -COPY favicon.ico /usr/share/nginx/html/favicon.ico -COPY nginx.conf /etc/nginx/conf.d/default.conf diff --git a/tools/visualizer/app.yaml b/tools/visualizer/app.yaml deleted file mode 100644 index 5697571e3..000000000 --- a/tools/visualizer/app.yaml +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright SAP SE -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: cortex-visualizer - labels: - app: cortex-visualizer -spec: - replicas: 1 - selector: - matchLabels: - app: cortex-visualizer - template: - metadata: - labels: - app: cortex-visualizer - spec: - serviceAccountName: cortex-visualizer - containers: - - name: cortex-visualizer - image: cortex-visualizer - ports: - - containerPort: 80 - - name: kubectl-proxy - image: alpine:latest - command: ["/bin/sh"] - args: - - -c - - | - apk add --no-cache curl - curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" - chmod +x kubectl - mv kubectl /usr/local/bin/ - kubectl proxy --port=8001 --address=0.0.0.0 --accept-hosts=.* - ports: - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: cortex-visualizer -spec: - selector: - app: cortex-visualizer - ports: - - name: http - port: 80 - targetPort: 80 - - name: kubectl-proxy - port: 8001 - targetPort: 8001 - type: ClusterIP \ No newline at end of file diff --git a/tools/visualizer/favicon.ico b/tools/visualizer/favicon.ico deleted file mode 100644 index b4f9d5fb8..000000000 Binary files a/tools/visualizer/favicon.ico and /dev/null differ diff --git a/tools/visualizer/nginx.conf b/tools/visualizer/nginx.conf deleted file mode 100644 index 7be253e28..000000000 --- a/tools/visualizer/nginx.conf +++ /dev/null @@ -1,18 +0,0 @@ -server { - listen 80; - - # Serve static files - location / { - root /usr/share/nginx/html; - index nova.html; - } - - # Proxy API requests to kubectl proxy - location /k8s/ { - proxy_pass http://127.0.0.1:8001/; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_http_version 1.1; - proxy_set_header Connection ""; - } -} \ No newline at end of file diff --git a/tools/visualizer/nova.html b/tools/visualizer/nova.html deleted file mode 100644 index 455b8a00c..000000000 --- a/tools/visualizer/nova.html +++ /dev/null @@ -1,503 +0,0 @@ - - - - - Cortex Nova Visualizer - - - - - - - - -
-
- -
Cortex Nova Visualizer
-
- - - -
-
-
-
Loading...
- -
-
-
- - - - - - \ No newline at end of file diff --git a/tools/visualizer/role.yaml b/tools/visualizer/role.yaml deleted file mode 100644 index e497f3b88..000000000 --- a/tools/visualizer/role.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: v1 -kind: ServiceAccount -metadata: - name: cortex-visualizer - namespace: default - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: cortex-visualizer-decision-reader -rules: -- apiGroups: ["cortex.cloud"] - resources: ["decisions"] - verbs: ["get", "list", "watch"] - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: cortex-visualizer-decision-reader-binding -subjects: -- kind: ServiceAccount - name: cortex-visualizer - namespace: default -roleRef: - kind: ClusterRole - name: cortex-visualizer-decision-reader - apiGroup: rbac.authorization.k8s.io \ No newline at end of file diff --git a/tools/visualizer/shared.css b/tools/visualizer/shared.css deleted file mode 100644 index b3a933375..000000000 --- a/tools/visualizer/shared.css +++ /dev/null @@ -1,289 +0,0 @@ -/* Copyright SAP SE */ -/* SPDX-License-Identifier: Apache-2.0 */ - -body { - font-family: Arial, Helvetica, sans-serif; - - --color-primary: rgb(255, 165, 2); - --color-on-primary: rgb(255, 255, 255); - --color-secondary: rgb(112, 161, 255); - --color-on-secondary: rgb(255, 255, 255); - --color-tertiary: rgb(255, 71, 87); - --color-on-tertiary: rgb(255, 255, 255); - --color-background: rgb(241, 242, 246); - --color-on-background: rgb(74, 74, 74); - --color-surface: rgb(255, 255, 255); - --color-on-surface: rgb(74, 74, 74); - - color: var(--color-text); - background: var(--color-background); - /* Remove the default margin and padding from the body. */ - margin: 0; -} - -/* Nice animated progress bar on top of the page. */ -.progress { - position: fixed; - top: 0; - left: 0; - right: 0; - height: 0.5em; - background: var(--color-primary); - z-index: 1000; -} - -.progress::before { - content: ''; - position: absolute; - top: 0; - left: 0; - right: 0; - height: 0.5em; - background: var(--color-secondary); - animation: progress 2s infinite; -} - -@keyframes progress { - 0% { - left: -100%; - right: 100%; - } - - 100% { - left: 100%; - right: -100%; - } -} - -.progress-text { - position: fixed; - top: 2em; - left: 0; - right: 0; - text-align: center; - font-weight: bold; -} - -/* Navbar that shows information. */ -nav { - padding-left: 0.25em; - background: var(--color-surface); - box-shadow: 0 0 1em rgba(0, 0, 0, 0.1); - z-index: 1; -} - -nav div.element { - display: inline-block; - padding-top: 1em; - padding-bottom: 2em; - padding-left: 1em; - padding-right: 1em; - margin: 0; - background: var(--color-surface); - color: var(--color-on-surface); - border-right: 2px solid var(--color-background); - font-size: 1em; -} - -nav div.element p.highlight { - font-size: 1.25em; - font-weight: bold; -} - -table { - /* Revert the default spacing used by the browser. */ - border-spacing: 0; -} - -/* Table cell showing the weight during scheduling. */ -td.weight { - text-align: center; - position: relative; - animation: weightAnimation 0.25s ease-in-out; -} - -td.weight div { - border-radius: 0.5em; - padding: 0.5em; - margin: 0.5em; - border: 2px solid var(--color-surface); -} - -/* Backdrop white for the weight cells */ -td.weight::after { - content: ''; - position: absolute; - --m: 0.6em; - top: var(--m); - bottom: var(--m); - left: var(--m); - right: var(--m); - border-radius: 0.5em; - background: var(--color-surface); - z-index: -1; -} - -/* Animation for weights when they first appear */ -@keyframes weightAnimation { - 0% { - opacity: 0; - transform: scale(0.5); - } - - 100% { - opacity: 1; - transform: scale(1); - } -} - -/* Table cell showing the hostname/name. */ -th.hostname { - text-align: center; - position: relative; -} - -th.hostname div { - position: relative; - padding: 0.1em; - padding-top: 0.5em; - padding-bottom: 0.5em; - margin: 0.1em; - width: 6em; - height: 6em; - overflow: hidden; -} - -/* Table cell showing additional information. */ -th.metainfo { - text-align: center; - position: relative; -} - -th.metainfo div p { - width: 6em; - overflow: hidden; -} - -th.metainfo div p.issue { - color: var(--color-tertiary); - border-radius: 0.5em; - font-size: 0.8em; -} - -/* Table row showing the name of a step in the pipeline. */ -th.stepkey { - text-align: left; - font-weight: bold; - padding-left: 0.75em; - padding-top: 0.5em; - padding-bottom: 0.25em; -} - -/* Highlighted rows in the table. */ -tr.highlight { - background: var(--color-surface); - /* tr doesn't support border-radius */ - clip-path: xywh(0 0 100% 100% round 0.75em); -} - -/* Chart showing usage statistics. */ -td.chart { - position: relative; - height: 24em; -} - -td.chart div.barsbefore, -td.chart div.barsafter, -td.chart div.backdrop, -td.chart div.stats { - position: absolute; - top: 0; - left: 0; - right: 0; - bottom: 0; - display: flex; - margin-top: 1.5em; - margin-bottom: 0.5em; - padding-left: 0.5em; - padding-right: 0.5em; - flex-direction: row; - justify-content: center; - align-items: flex-end; -} - -td.chart div.barsbefore p, -td.chart div.barsafter p, -td.chart div.backdrop p, -td.chart div.stats p { - margin-left: 0.1em; - margin-right: 0.1em; - display: flex; - border-radius: 0.2em; -} - -td.chart div.backdrop p { - height: 100%; - border-radius: 0.2em; - border: 1px solid rgba(0, 0, 0, 0.05); - background: white; -} - -td.chart div.stats { - text-align: center; - display: flex; - justify-content: center; - align-items: flex-start; -} - -td.chart div.stats p { - writing-mode: vertical-lr; - text-orientation: mixed; - display: flex; - font-size: 1em; - font-weight: bold; - margin-left: 0.1em; - margin-right: 0.1em; - justify-content: center; - align-items: center; -} - -/* Animation for chart bars */ -td.chart div.barsafter p, -td.chart div.barsbefore p { - animation: barAnim 0.25s ease-in-out; - overflow: hidden; -} - -@keyframes barAnim { - 0% { - transform: scaleY(0); - } - - 100% { - transform: scaleY(1); - } -} - -td.chart div.barsafter p.cpu { - background: var(--color-primary); -} - -td.chart div.barsafter p.mem { - background: var(--color-primary); -} - -td.chart div.barsafter p.disk { - background: var(--color-primary); -} - -td.chart div.barsbefore p.cpu { - background: var(--color-secondary); -} - -td.chart div.barsbefore p.mem { - background: var(--color-secondary); -} - -td.chart div.barsbefore p.disk { - background: var(--color-secondary); -}