diff --git a/charts/bootchain-operator/templates/grafana-dashboard.yaml b/charts/bootchain-operator/templates/grafana-dashboard.yaml index e77f9dd..b47fb41 100644 --- a/charts/bootchain-operator/templates/grafana-dashboard.yaml +++ b/charts/bootchain-operator/templates/grafana-dashboard.yaml @@ -62,13 +62,12 @@ data: "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { "mode": "fixed", "fixedColor": "green" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "red", "value": 1 } + { "color": "green", "value": null } ] }, "unit": "short" @@ -89,7 +88,7 @@ data: "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "count(bootchain_dependencies_total{namespace=~\"$namespace\"})", + "expr": "count(bootchain_dependencies_total{namespace=~\"$namespace\"}) or vector(0)", "instant": true, "legendFormat": "", "refId": "A" @@ -107,8 +106,7 @@ data: "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "green", "value": 1 } + { "color": "green", "value": null } ] }, "unit": "short" @@ -129,7 +127,7 @@ data: "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum(bootchain_dependencies_ready{namespace=~\"$namespace\"})", + "expr": "sum(bootchain_dependencies_ready{namespace=~\"$namespace\"}) or vector(0)", "instant": true, "legendFormat": "", "refId": "A" @@ -148,8 +146,7 @@ data: "mode": "absolute", "steps": [ { "color": "green", "value": null }, - { "color": "yellow", "value": 1 }, - { "color": "red", "value": 5 } + { "color": "red", "value": 1 } ] }, "unit": "short" @@ -170,7 +167,7 @@ data: "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum(bootchain_dependencies_total{namespace=~\"$namespace\"}) - sum(bootchain_dependencies_ready{namespace=~\"$namespace\"})", + "expr": "(sum(bootchain_dependencies_total{namespace=~\"$namespace\"}) or vector(0)) - (sum(bootchain_dependencies_ready{namespace=~\"$namespace\"}) or vector(0))", "instant": true, "legendFormat": "", "refId": "A" @@ -184,7 +181,9 @@ data: "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, - "mappings": [], + "mappings": [ + { "options": { "match": "null", "result": { "color": "green", "index": 0, "text": "0%" } }, "type": "special" } + ], "thresholds": { "mode": "absolute", "steps": [ @@ -201,7 +200,7 @@ data: "id": 4, "options": { "colorMode": "background", - "graphMode": "area", + "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -211,8 +210,8 @@ data: "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "rate(bootchain_reconcile_total{result=\"error\"}[$__rate_interval]) / rate(bootchain_reconcile_total[$__rate_interval])", - "instant": false, + "expr": "(sum(rate(bootchain_reconcile_total{result=\"error\"}[5m])) or vector(0)) / ((sum(rate(bootchain_reconcile_total[5m])) or vector(0)) > 0)", + "instant": true, "legendFormat": "", "refId": "A" } @@ -388,10 +387,36 @@ data: } }, "overrides": [ - { "matcher": { "id": "byName", "options": "namespace" }, "properties": [{ "id": "custom.width", "value": 120 }] }, - { "matcher": { "id": "byName", "options": "name" }, "properties": [{ "id": "custom.width", "value": 200 }] }, - { "matcher": { "id": "byName", "options": "ready" }, "properties": [{ "id": "custom.width", "value": 80 }] }, - { "matcher": { "id": "byName", "options": "total" }, "properties": [{ "id": "custom.width", "value": 70 }] } + { "matcher": { "id": "byName", "options": "namespace" }, "properties": [{ "id": "custom.width", "value": 160 }] }, + { "matcher": { "id": "byName", "options": "name" }, "properties": [{ "id": "custom.width", "value": 220 }] }, + { + "matcher": { "id": "byName", "options": "ready" }, + "properties": [ + { "id": "custom.width", "value": 80 }, + { "id": "custom.cellOptions", "value": { "type": "auto" } } + ] + }, + { + "matcher": { "id": "byName", "options": "total" }, + "properties": [ + { "id": "custom.width", "value": 80 }, + { "id": "custom.cellOptions", "value": { "type": "auto" } } + ] + }, + { + "matcher": { "id": "byName", "options": "Value #A" }, + "properties": [ + { "id": "displayName", "value": "ready" }, + { "id": "custom.width", "value": 80 } + ] + }, + { + "matcher": { "id": "byName", "options": "Value #B" }, + "properties": [ + { "id": "displayName", "value": "total" }, + { "id": "custom.width", "value": 80 } + ] + } ] }, "gridPos": { "h": 8, "w": 24, "x": 0, "y": 14 }, @@ -404,21 +429,13 @@ data: }, "pluginVersion": "10.0.0", "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "label_replace(\n bootchain_dependencies_ready{namespace=~\"$namespace\", name=~\"$bootdependency\"}\n == bootchain_dependencies_total{namespace=~\"$namespace\", name=~\"$bootdependency\"},\n \"status\", \"1\", \"\", \"\"\n) or label_replace(\n bootchain_dependencies_ready{namespace=~\"$namespace\", name=~\"$bootdependency\"}\n != bootchain_dependencies_total{namespace=~\"$namespace\", name=~\"$bootdependency\"},\n \"status\", \"0\", \"\", \"\"\n)", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "bootchain_dependencies_ready{namespace=~\"$namespace\", name=~\"$bootdependency\"}", "format": "table", "instant": true, "legendFormat": "", - "refId": "B" + "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, @@ -426,31 +443,43 @@ data: "format": "table", "instant": true, "legendFormat": "", - "refId": "C" + "refId": "B" } ], "title": "BootDependency Status Table", "transformations": [ - { "id": "joinByField", "options": { "byField": "name", "mode": "outer" } }, + { + "id": "joinByLabels", + "options": { "labels": ["namespace", "name"] } + }, { "id": "organize", "options": { - "excludeByName": { - "Time": true, "Time 1": true, "Time 2": true, "Time 3": true, - "namespace 2": true, "namespace 3": true, - "__name__ 1": true, "__name__ 2": true, "__name__ 3": true, - "job 1": true, "job 2": true, "job 3": true, - "instance 1": true, "instance 2": true, "instance 3": true, - "status": true + "excludeByName": {}, + "indexByName": { + "namespace": 0, + "name": 1, + "Value #A": 2, + "Value #B": 3 }, "renameByName": { - "namespace 1": "namespace", - "name": "name", - "Value #B": "ready", - "Value #C": "total", - "Value #A": "all_ready" + "Value #A": "ready", + "Value #B": "total" } } + }, + { + "id": "calculateField", + "options": { + "alias": "status", + "binary": { + "left": "ready", + "operator": "==", + "right": "total" + }, + "mode": "reduceRow", + "reduce": { "reducer": "sum" } + } } ], "type": "table" @@ -751,8 +780,8 @@ data: "time": { "from": "now-1h", "to": "now" }, "timepicker": {}, "timezone": "browser", - "title": "bootchain-operator", + "title": "Bootchain Operator", "uid": "bootchain-operator-v1", - "version": 1 + "version": 3 } {{- end }} diff --git a/internal/controller/bootdependency_controller.go b/internal/controller/bootdependency_controller.go index 416e286..72ae6d2 100644 --- a/internal/controller/bootdependency_controller.go +++ b/internal/controller/bootdependency_controller.go @@ -85,7 +85,7 @@ func (r *BootDependencyReconciler) Reconcile(ctx context.Context, req ctrl.Reque if scheme == "" { scheme = "http" } - url := fmt.Sprintf("%s://%s:%d%s", scheme, depLabel(dep), dep.Port, dep.HTTPPath) + url := fmt.Sprintf("%s://%s:%d%s", scheme, depHost(dep, bd.Namespace), dep.Port, dep.HTTPPath) httpClient := secureClient if dep.Insecure { httpClient = insecureClient @@ -177,13 +177,21 @@ func (r *BootDependencyReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{RequeueAfter: requeueAfterNotReady}, nil } -// depAddress returns the dial address for a dependency. +// depAddress returns the dial address (host:port) for a dependency. // For in-cluster services it resolves to the FQDN; for external hosts it uses the host directly. func depAddress(dep corev1alpha1.ServiceDependency, namespace string) string { + return fmt.Sprintf("%s:%d", depHost(dep, namespace), dep.Port) +} + +// depHost returns the hostname for a dependency. +// For in-cluster services it builds the FQDN ..svc.cluster.local so that +// the controller — which runs in a different namespace — can always resolve the service correctly. +// For external dependencies (host field set) it returns the host directly. +func depHost(dep corev1alpha1.ServiceDependency, namespace string) string { if dep.Host != "" { - return fmt.Sprintf("%s:%d", dep.Host, dep.Port) + return dep.Host } - return fmt.Sprintf("%s.%s.svc.cluster.local:%d", dep.Service, namespace, dep.Port) + return fmt.Sprintf("%s.%s.svc.cluster.local", dep.Service, namespace) } // statusAccepted returns true when code is in the accepted list. diff --git a/internal/controller/bootdependency_controller_test.go b/internal/controller/bootdependency_controller_test.go index 402b1d1..ced091f 100644 --- a/internal/controller/bootdependency_controller_test.go +++ b/internal/controller/bootdependency_controller_test.go @@ -194,6 +194,19 @@ var _ = Describe("BootDependency Controller", func() { Expect(<-probed).To(Equal("/ready")) }) + It("should build FQDN from service name and BootDependency namespace for HTTP probes", func() { + // Unit-test depHost directly to guard against the regression where service-based + // HTTP probes used the bare service name instead of the FQDN, causing DNS lookup + // failures when the controller runs in a different namespace than the target service. + dep := corev1alpha1.ServiceDependency{Service: "my-svc", Port: 8080} + Expect(depHost(dep, "my-namespace")).To(Equal("my-svc.my-namespace.svc.cluster.local")) + }) + + It("should use the host field directly when set, not build a FQDN", func() { + dep := corev1alpha1.ServiceDependency{Host: "external.example.com", Port: 443} + Expect(depHost(dep, "any-namespace")).To(Equal("external.example.com")) + }) + It("should resolve an HTTPS dependency when insecure=true and server has a self-signed cert", func() { srv := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK)