From fff9c082b748eba3cb355b22fc7e0c7c22e603ab Mon Sep 17 00:00:00 2001 From: Rui Coelho Date: Sun, 1 Mar 2026 01:31:44 +0000 Subject: [PATCH 1/4] fix: update dashboard --- .../templates/grafana-dashboard.yaml | 113 +++++++++++------- 1 file changed, 71 insertions(+), 42 deletions(-) diff --git a/charts/bootchain-operator/templates/grafana-dashboard.yaml b/charts/bootchain-operator/templates/grafana-dashboard.yaml index e77f9dd..f99d558 100644 --- a/charts/bootchain-operator/templates/grafana-dashboard.yaml +++ b/charts/bootchain-operator/templates/grafana-dashboard.yaml @@ -62,13 +62,12 @@ data: "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "fieldConfig": { "defaults": { - "color": { "mode": "thresholds" }, + "color": { "mode": "fixed", "fixedColor": "green" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ - { "color": "green", "value": null }, - { "color": "red", "value": 1 } + { "color": "green", "value": null } ] }, "unit": "short" @@ -89,7 +88,7 @@ data: "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "count(bootchain_dependencies_total{namespace=~\"$namespace\"})", + "expr": "count(bootchain_dependencies_total{namespace=~\"$namespace\"}) or vector(0)", "instant": true, "legendFormat": "", "refId": "A" @@ -107,8 +106,7 @@ data: "thresholds": { "mode": "absolute", "steps": [ - { "color": "red", "value": null }, - { "color": "green", "value": 1 } + { "color": "green", "value": null } ] }, "unit": "short" @@ -129,7 +127,7 @@ data: "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum(bootchain_dependencies_ready{namespace=~\"$namespace\"})", + "expr": "sum(bootchain_dependencies_ready{namespace=~\"$namespace\"}) or vector(0)", "instant": true, "legendFormat": "", "refId": "A" @@ -148,8 +146,7 @@ data: "mode": "absolute", "steps": [ { "color": "green", "value": null }, - { "color": "yellow", "value": 1 }, - { "color": "red", "value": 5 } + { "color": "red", "value": 1 } ] }, "unit": "short" @@ -170,7 +167,7 @@ data: "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "sum(bootchain_dependencies_total{namespace=~\"$namespace\"}) - sum(bootchain_dependencies_ready{namespace=~\"$namespace\"})", + "expr": "(sum(bootchain_dependencies_total{namespace=~\"$namespace\"}) or vector(0)) - (sum(bootchain_dependencies_ready{namespace=~\"$namespace\"}) or vector(0))", "instant": true, "legendFormat": "", "refId": "A" @@ -184,7 +181,9 @@ data: "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, - "mappings": [], + "mappings": [ + { "options": { "match": "null", "result": { "color": "green", "index": 0, "text": "0%" } }, "type": "special" } + ], "thresholds": { "mode": "absolute", "steps": [ @@ -201,7 +200,7 @@ data: "id": 4, "options": { "colorMode": "background", - "graphMode": "area", + "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": ["lastNotNull"], "fields": "", "values": false }, @@ -211,8 +210,8 @@ data: "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "rate(bootchain_reconcile_total{result=\"error\"}[$__rate_interval]) / rate(bootchain_reconcile_total[$__rate_interval])", - "instant": false, + "expr": "(sum(rate(bootchain_reconcile_total{result=\"error\"}[5m])) or vector(0)) / ((sum(rate(bootchain_reconcile_total[5m])) or vector(0)) > 0)", + "instant": true, "legendFormat": "", "refId": "A" } @@ -388,10 +387,36 @@ data: } }, "overrides": [ - { "matcher": { "id": "byName", "options": "namespace" }, "properties": [{ "id": "custom.width", "value": 120 }] }, - { "matcher": { "id": "byName", "options": "name" }, "properties": [{ "id": "custom.width", "value": 200 }] }, - { "matcher": { "id": "byName", "options": "ready" }, "properties": [{ "id": "custom.width", "value": 80 }] }, - { "matcher": { "id": "byName", "options": "total" }, "properties": [{ "id": "custom.width", "value": 70 }] } + { "matcher": { "id": "byName", "options": "namespace" }, "properties": [{ "id": "custom.width", "value": 160 }] }, + { "matcher": { "id": "byName", "options": "name" }, "properties": [{ "id": "custom.width", "value": 220 }] }, + { + "matcher": { "id": "byName", "options": "ready" }, + "properties": [ + { "id": "custom.width", "value": 80 }, + { "id": "custom.cellOptions", "value": { "type": "auto" } } + ] + }, + { + "matcher": { "id": "byName", "options": "total" }, + "properties": [ + { "id": "custom.width", "value": 80 }, + { "id": "custom.cellOptions", "value": { "type": "auto" } } + ] + }, + { + "matcher": { "id": "byName", "options": "Value #A" }, + "properties": [ + { "id": "displayName", "value": "ready" }, + { "id": "custom.width", "value": 80 } + ] + }, + { + "matcher": { "id": "byName", "options": "Value #B" }, + "properties": [ + { "id": "displayName", "value": "total" }, + { "id": "custom.width", "value": 80 } + ] + } ] }, "gridPos": { "h": 8, "w": 24, "x": 0, "y": 14 }, @@ -404,21 +429,13 @@ data: }, "pluginVersion": "10.0.0", "targets": [ - { - "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, - "expr": "label_replace(\n bootchain_dependencies_ready{namespace=~\"$namespace\", name=~\"$bootdependency\"}\n == bootchain_dependencies_total{namespace=~\"$namespace\", name=~\"$bootdependency\"},\n \"status\", \"1\", \"\", \"\"\n) or label_replace(\n bootchain_dependencies_ready{namespace=~\"$namespace\", name=~\"$bootdependency\"}\n != bootchain_dependencies_total{namespace=~\"$namespace\", name=~\"$bootdependency\"},\n \"status\", \"0\", \"\", \"\"\n)", - "format": "table", - "instant": true, - "legendFormat": "", - "refId": "A" - }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "bootchain_dependencies_ready{namespace=~\"$namespace\", name=~\"$bootdependency\"}", "format": "table", "instant": true, "legendFormat": "", - "refId": "B" + "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, @@ -426,31 +443,43 @@ data: "format": "table", "instant": true, "legendFormat": "", - "refId": "C" + "refId": "B" } ], "title": "BootDependency Status Table", "transformations": [ - { "id": "joinByField", "options": { "byField": "name", "mode": "outer" } }, + { + "id": "joinByLabels", + "options": { "labels": ["namespace", "name"] } + }, { "id": "organize", "options": { - "excludeByName": { - "Time": true, "Time 1": true, "Time 2": true, "Time 3": true, - "namespace 2": true, "namespace 3": true, - "__name__ 1": true, "__name__ 2": true, "__name__ 3": true, - "job 1": true, "job 2": true, "job 3": true, - "instance 1": true, "instance 2": true, "instance 3": true, - "status": true + "excludeByName": {}, + "indexByName": { + "namespace": 0, + "name": 1, + "Value #A": 2, + "Value #B": 3 }, "renameByName": { - "namespace 1": "namespace", - "name": "name", - "Value #B": "ready", - "Value #C": "total", - "Value #A": "all_ready" + "Value #A": "ready", + "Value #B": "total" } } + }, + { + "id": "calculateField", + "options": { + "alias": "status", + "binary": { + "left": "ready", + "operator": "==", + "right": "total" + }, + "mode": "reduceRow", + "reduce": { "reducer": "sum" } + } } ], "type": "table" @@ -753,6 +782,6 @@ data: "timezone": "browser", "title": "bootchain-operator", "uid": "bootchain-operator-v1", - "version": 1 + "version": 3 } {{- end }} From 83a354e690d53cde2ba7f9ca4eeaf616a69d3dda Mon Sep 17 00:00:00 2001 From: Rui Coelho Date: Sun, 1 Mar 2026 01:31:52 +0000 Subject: [PATCH 2/4] fix: use full fqdn --- .../controller/bootdependency_controller.go | 16 +++++++++--- .../bootdependency_controller_test.go | 26 +++++++++++++++++++ 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/internal/controller/bootdependency_controller.go b/internal/controller/bootdependency_controller.go index 416e286..72ae6d2 100644 --- a/internal/controller/bootdependency_controller.go +++ b/internal/controller/bootdependency_controller.go @@ -85,7 +85,7 @@ func (r *BootDependencyReconciler) Reconcile(ctx context.Context, req ctrl.Reque if scheme == "" { scheme = "http" } - url := fmt.Sprintf("%s://%s:%d%s", scheme, depLabel(dep), dep.Port, dep.HTTPPath) + url := fmt.Sprintf("%s://%s:%d%s", scheme, depHost(dep, bd.Namespace), dep.Port, dep.HTTPPath) httpClient := secureClient if dep.Insecure { httpClient = insecureClient @@ -177,13 +177,21 @@ func (r *BootDependencyReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{RequeueAfter: requeueAfterNotReady}, nil } -// depAddress returns the dial address for a dependency. +// depAddress returns the dial address (host:port) for a dependency. // For in-cluster services it resolves to the FQDN; for external hosts it uses the host directly. func depAddress(dep corev1alpha1.ServiceDependency, namespace string) string { + return fmt.Sprintf("%s:%d", depHost(dep, namespace), dep.Port) +} + +// depHost returns the hostname for a dependency. +// For in-cluster services it builds the FQDN ..svc.cluster.local so that +// the controller — which runs in a different namespace — can always resolve the service correctly. +// For external dependencies (host field set) it returns the host directly. +func depHost(dep corev1alpha1.ServiceDependency, namespace string) string { if dep.Host != "" { - return fmt.Sprintf("%s:%d", dep.Host, dep.Port) + return dep.Host } - return fmt.Sprintf("%s.%s.svc.cluster.local:%d", dep.Service, namespace, dep.Port) + return fmt.Sprintf("%s.%s.svc.cluster.local", dep.Service, namespace) } // statusAccepted returns true when code is in the accepted list. diff --git a/internal/controller/bootdependency_controller_test.go b/internal/controller/bootdependency_controller_test.go index 402b1d1..0030e23 100644 --- a/internal/controller/bootdependency_controller_test.go +++ b/internal/controller/bootdependency_controller_test.go @@ -194,6 +194,32 @@ var _ = Describe("BootDependency Controller", func() { Expect(<-probed).To(Equal("/ready")) }) + It("should resolve service name to FQDN using the BootDependency namespace for HTTP probes", func() { + // This test guards against the bug where service-based HTTP probes used the bare + // service name (e.g. "my-svc") instead of the FQDN + // ("my-svc..svc.cluster.local"), which caused DNS lookup failures when + // the controller runs in a different namespace than the target service. + probed := make(chan string, 1) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + select { + case probed <- r.Host: + default: + } + w.WriteHeader(http.StatusOK) + })) + DeferCleanup(srv.Close) + _, port := parseTestServer(srv) + + // Use service (not host) — the controller must build the FQDN. + // We bind the test server on 127.0.0.1, so we override DNS resolution by + // checking the Host header sent by the HTTP client rather than actual DNS. + _ = createAndReconcile("http-fqdn-resource", []corev1alpha1.ServiceDependency{ + {Service: "127.0.0.1", Port: port, HTTPPath: "/healthz"}, + }) + // The Host header must contain the FQDN, not the bare service name. + Expect(<-probed).To(ContainSubstring("svc.cluster.local")) + }) + It("should resolve an HTTPS dependency when insecure=true and server has a self-signed cert", func() { srv := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { w.WriteHeader(http.StatusOK) From 2979c6d7b7c8a155371616105e71ad5c954519ff Mon Sep 17 00:00:00 2001 From: Rui Coelho Date: Sun, 1 Mar 2026 01:43:51 +0000 Subject: [PATCH 3/4] fix: fix slow test --- .../bootdependency_controller_test.go | 33 ++++++------------- 1 file changed, 10 insertions(+), 23 deletions(-) diff --git a/internal/controller/bootdependency_controller_test.go b/internal/controller/bootdependency_controller_test.go index 0030e23..ced091f 100644 --- a/internal/controller/bootdependency_controller_test.go +++ b/internal/controller/bootdependency_controller_test.go @@ -194,30 +194,17 @@ var _ = Describe("BootDependency Controller", func() { Expect(<-probed).To(Equal("/ready")) }) - It("should resolve service name to FQDN using the BootDependency namespace for HTTP probes", func() { - // This test guards against the bug where service-based HTTP probes used the bare - // service name (e.g. "my-svc") instead of the FQDN - // ("my-svc..svc.cluster.local"), which caused DNS lookup failures when - // the controller runs in a different namespace than the target service. - probed := make(chan string, 1) - srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { - select { - case probed <- r.Host: - default: - } - w.WriteHeader(http.StatusOK) - })) - DeferCleanup(srv.Close) - _, port := parseTestServer(srv) + It("should build FQDN from service name and BootDependency namespace for HTTP probes", func() { + // Unit-test depHost directly to guard against the regression where service-based + // HTTP probes used the bare service name instead of the FQDN, causing DNS lookup + // failures when the controller runs in a different namespace than the target service. + dep := corev1alpha1.ServiceDependency{Service: "my-svc", Port: 8080} + Expect(depHost(dep, "my-namespace")).To(Equal("my-svc.my-namespace.svc.cluster.local")) + }) - // Use service (not host) — the controller must build the FQDN. - // We bind the test server on 127.0.0.1, so we override DNS resolution by - // checking the Host header sent by the HTTP client rather than actual DNS. - _ = createAndReconcile("http-fqdn-resource", []corev1alpha1.ServiceDependency{ - {Service: "127.0.0.1", Port: port, HTTPPath: "/healthz"}, - }) - // The Host header must contain the FQDN, not the bare service name. - Expect(<-probed).To(ContainSubstring("svc.cluster.local")) + It("should use the host field directly when set, not build a FQDN", func() { + dep := corev1alpha1.ServiceDependency{Host: "external.example.com", Port: 443} + Expect(depHost(dep, "any-namespace")).To(Equal("external.example.com")) }) It("should resolve an HTTPS dependency when insecure=true and server has a self-signed cert", func() { From d80f3a92419d630f48d70a18ed8091e77fb8a181 Mon Sep 17 00:00:00 2001 From: Rui Coelho Date: Sun, 1 Mar 2026 01:45:38 +0000 Subject: [PATCH 4/4] fix: update board name --- charts/bootchain-operator/templates/grafana-dashboard.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/charts/bootchain-operator/templates/grafana-dashboard.yaml b/charts/bootchain-operator/templates/grafana-dashboard.yaml index f99d558..b47fb41 100644 --- a/charts/bootchain-operator/templates/grafana-dashboard.yaml +++ b/charts/bootchain-operator/templates/grafana-dashboard.yaml @@ -780,7 +780,7 @@ data: "time": { "from": "now-1h", "to": "now" }, "timepicker": {}, "timezone": "browser", - "title": "bootchain-operator", + "title": "Bootchain Operator", "uid": "bootchain-operator-v1", "version": 3 }