From 01c514803bb89885096acd872f7c283bb123253f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20G=C3=BCttler?= Date: Thu, 9 Apr 2026 11:50:54 +0200 Subject: [PATCH 1/2] Require maintenance mode for bare-metal checks --- README.md | 4 ++ internal/cmd/createhostyaml.go | 1 + internal/createhostyaml/createhostyaml.go | 2 +- .../createhostyaml/createhostyaml_test.go | 2 +- internal/provisioncheck/provisioncheck.go | 35 ++++++---- .../provisioncheck/provisioncheck_test.go | 66 +++++++++++++++++++ 6 files changed, 96 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index e0b4c27..0de9381 100644 --- a/README.md +++ b/README.md @@ -28,12 +28,16 @@ go run github.com/syself/caphcli@latest create-host-yaml 1234567 1234567.yaml This will create a HetznerBareMetalHost YAML file: `1234567.yaml` +The generated host starts with `spec.maintenanceMode: true`, and the command prints a hint to run `check-bm-servers` next. + After that you can check if the rescue system is reachable reliably: ```console go run github.com/syself/caphcli@latest check-bm-servers 1234567.yaml ``` +`check-bm-servers` refuses to run unless `spec.maintenanceMode` is `true`. After a successful check it prints a hint to disable maintenance mode again. + ## CLI Help diff --git a/internal/cmd/createhostyaml.go b/internal/cmd/createhostyaml.go index 8c5c7fc..62d83c9 100644 --- a/internal/cmd/createhostyaml.go +++ b/internal/cmd/createhostyaml.go @@ -56,6 +56,7 @@ YAML file to the requested output path. Progress and confirmation prompts go to } f = nil _, _ = fmt.Fprintf(cfg.LogOutput, "✓ created %s\n", outputFile) + _, _ = fmt.Fprintf(cfg.LogOutput, "Hint: run `caphcli check-bm-servers %s` next.\n", outputFile) return nil }, diff --git a/internal/createhostyaml/createhostyaml.go b/internal/createhostyaml/createhostyaml.go index ddf7038..c9213c6 100644 --- a/internal/createhostyaml/createhostyaml.go +++ b/internal/createhostyaml/createhostyaml.go @@ -586,7 +586,7 @@ func renderTemplate(server *models.Server, name string, disks []disk) string { } fmt.Fprintf(&b, " # wwn: %q\n", disk.WWN) } - b.WriteString(" maintenanceMode: false\n") + b.WriteString(" maintenanceMode: true\n") fmt.Fprintf(&b, " description: %q\n", defaultDescription(server)) return b.String() } diff --git a/internal/createhostyaml/createhostyaml_test.go b/internal/createhostyaml/createhostyaml_test.go index 66129cf..17c26ee 100644 --- a/internal/createhostyaml/createhostyaml_test.go +++ b/internal/createhostyaml/createhostyaml_test.go @@ -72,7 +72,7 @@ func TestRenderTemplate(t *testing.T) { `serverID: 1751550 # Robot name: ci-box-1751550, IP: 144.76.74.13`, `wwn: "0x0001"`, `# wwn: "0x0002"`, - `maintenanceMode: false`, + `maintenanceMode: true`, `description: "ci-box-1751550"`, } diff --git a/internal/provisioncheck/provisioncheck.go b/internal/provisioncheck/provisioncheck.go index e9bd3a3..b96abda 100644 --- a/internal/provisioncheck/provisioncheck.go +++ b/internal/provisioncheck/provisioncheck.go @@ -241,14 +241,6 @@ func Run(ctx context.Context, cfg Config) error { r := newRunner(cfg) - // Load all local inputs first so parse and credential errors fail before any - // Robot API call or reboot on the target machine. - creds, err := loadEnvCredentials() - if err != nil { - return err - } - r.creds = creds - hosts, err := loadHostsFromHBMHYAMLFile(cfg.HbmhYAMLFile) if err != nil { return err @@ -265,6 +257,14 @@ func Run(ctx context.Context, cfg Config) error { } r.host = host + // Load credentials only after the selected host has passed local manifest + // validation, including the maintenance-mode safety gate. + creds, err := loadEnvCredentials() + if err != nil { + return err + } + r.creds = creds + // Ask for confirmation only after we know the exact host and WWNs that will // be wiped by the provisioning loop. if err := r.confirmDestructiveAction(); err != nil { @@ -363,6 +363,7 @@ func (r *runner) run(ctx context.Context) error { _, _ = fmt.Fprintln(r.out) r.logf("all checks passed: machine %q (serverID=%d) completed two rescue+install+boot cycles", r.host.Name, r.host.Spec.ServerID) + _, _ = fmt.Fprintf(r.out, "Hint: set spec.maintenanceMode back to false in %s now.\n", r.cfg.HbmhYAMLFile) return nil } @@ -792,8 +793,8 @@ func selectHost(hosts []infrav1.HetznerBareMetalHost, name string) (infrav1.Hetz if name != "" { for _, host := range hosts { if host.Name == name { - if host.Spec.RootDeviceHints == nil { - return infrav1.HetznerBareMetalHost{}, fmt.Errorf("host %q has no spec.rootDeviceHints", host.Name) + if err := validateHostForProvisionCheck(host); err != nil { + return infrav1.HetznerBareMetalHost{}, err } return host, nil } @@ -816,12 +817,22 @@ func selectHost(hosts []infrav1.HetznerBareMetalHost, name string) (infrav1.Hetz } host := hosts[0] - if host.Spec.RootDeviceHints == nil { - return infrav1.HetznerBareMetalHost{}, fmt.Errorf("host %q has no spec.rootDeviceHints", host.Name) + if err := validateHostForProvisionCheck(host); err != nil { + return infrav1.HetznerBareMetalHost{}, err } return host, nil } +func validateHostForProvisionCheck(host infrav1.HetznerBareMetalHost) error { + if host.Spec.RootDeviceHints == nil { + return fmt.Errorf("host %q has no spec.rootDeviceHints", host.Name) + } + if host.Spec.MaintenanceMode == nil || !*host.Spec.MaintenanceMode { + return fmt.Errorf("host %q must set spec.maintenanceMode: true before running check-bm-servers", host.Name) + } + return nil +} + func listHostNames(hosts []infrav1.HetznerBareMetalHost) []string { names := make([]string, 0, len(hosts)) for _, host := range hosts { diff --git a/internal/provisioncheck/provisioncheck_test.go b/internal/provisioncheck/provisioncheck_test.go index 04f5627..4106113 100644 --- a/internal/provisioncheck/provisioncheck_test.go +++ b/internal/provisioncheck/provisioncheck_test.go @@ -19,7 +19,10 @@ package provisioncheck import ( "os" "path/filepath" + "strings" "testing" + + infrav1 "github.com/syself/cluster-api-provider-hetzner/api/v1beta1" ) func TestLoadHostsFromHBMHYAMLFile(t *testing.T) { @@ -102,3 +105,66 @@ items: }) } } + +func TestSelectHostRequiresMaintenanceMode(t *testing.T) { + t.Parallel() + + trueValue := true + falseValue := false + + tests := []struct { + name string + host infrav1.HetznerBareMetalHost + wantErr string + }{ + { + name: "maintenance mode unset", + host: infrav1.HetznerBareMetalHost{ + Spec: infrav1.HetznerBareMetalHostSpec{ + RootDeviceHints: &infrav1.RootDeviceHints{WWN: "0x1"}, + }, + }, + wantErr: `must set spec.maintenanceMode: true`, + }, + { + name: "maintenance mode false", + host: infrav1.HetznerBareMetalHost{ + Spec: infrav1.HetznerBareMetalHostSpec{ + RootDeviceHints: &infrav1.RootDeviceHints{WWN: "0x1"}, + MaintenanceMode: &falseValue, + }, + }, + wantErr: `must set spec.maintenanceMode: true`, + }, + { + name: "maintenance mode true", + host: infrav1.HetznerBareMetalHost{ + Spec: infrav1.HetznerBareMetalHostSpec{ + RootDeviceHints: &infrav1.RootDeviceHints{WWN: "0x1"}, + MaintenanceMode: &trueValue, + }, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + tt.host.Name = tt.name + _, err := selectHost([]infrav1.HetznerBareMetalHost{tt.host}, "") + if tt.wantErr == "" { + if err != nil { + t.Fatalf("selectHost() error = %v", err) + } + return + } + if err == nil { + t.Fatalf("selectHost() error = nil, want substring %q", tt.wantErr) + } + if !strings.Contains(err.Error(), tt.wantErr) { + t.Fatalf("selectHost() error = %q, want substring %q", err.Error(), tt.wantErr) + } + }) + } +} From f2c90707914e9ffa8077ecdcb8644b0000599bb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20G=C3=BCttler?= Date: Thu, 9 Apr 2026 12:02:14 +0200 Subject: [PATCH 2/2] rename check-bm-server command --- README.md | 16 ++++++++-------- .../{check_bm_servers.go => check_bm_server.go} | 8 ++++---- internal/cmd/createhostyaml.go | 2 +- internal/cmd/root.go | 2 +- internal/provisioncheck/provisioncheck.go | 2 +- internal/tools/readmegen/main.go | 4 ++-- 6 files changed, 17 insertions(+), 17 deletions(-) rename internal/cmd/{check_bm_servers.go => check_bm_server.go} (93%) diff --git a/README.md b/README.md index 0de9381..5e06288 100644 --- a/README.md +++ b/README.md @@ -28,15 +28,15 @@ go run github.com/syself/caphcli@latest create-host-yaml 1234567 1234567.yaml This will create a HetznerBareMetalHost YAML file: `1234567.yaml` -The generated host starts with `spec.maintenanceMode: true`, and the command prints a hint to run `check-bm-servers` next. +The generated host starts with `spec.maintenanceMode: true`, and the command prints a hint to run `check-bm-server` next. After that you can check if the rescue system is reachable reliably: ```console -go run github.com/syself/caphcli@latest check-bm-servers 1234567.yaml +go run github.com/syself/caphcli@latest check-bm-server 1234567.yaml ``` -`check-bm-servers` refuses to run unless `spec.maintenanceMode` is `true`. After a successful check it prints a hint to disable maintenance mode again. +`check-bm-server` refuses to run unless `spec.maintenanceMode` is `true`. After a successful check it prints a hint to disable maintenance mode again. @@ -51,7 +51,7 @@ Usage: caphcli [command] Available Commands: - check-bm-servers Validate rescue and provisioning reliability for one bare-metal server + check-bm-server Validate rescue and provisioning reliability for one bare-metal server completion Generate the autocompletion script for the specified shell create-host-yaml Generate a HetznerBareMetalHost YAML file for one Robot server help Help about any command @@ -62,7 +62,7 @@ Flags: Use "caphcli [command] --help" for more information about a command. ``` -### `caphcli check-bm-servers --help` +### `caphcli check-bm-server --help` ```text Validate rescue and provisioning reliability for one HetznerBareMetalHost from a local YAML file. @@ -72,16 +72,16 @@ HetznerBareMetalHost objects and then talks directly to Hetzner Robot plus the target server. Usage: - caphcli check-bm-servers FILE [flags] + caphcli check-bm-server FILE [flags] Examples: - caphcli check-bm-servers \ + caphcli check-bm-server \ test/e2e/data/infrastructure-hetzner/v1beta1/bases/hetznerbaremetalhosts.yaml \ --name bm-e2e-1731561 Flags: --force Skip the destructive-action confirmation prompt - -h, --help help for check-bm-servers + -h, --help help for check-bm-server --image-path string Installimage IMAGE path for operating system inside the Hetzner rescue system (default "/root/.oldroot/nfs/images/Ubuntu-2404-noble-amd64-base.tar.gz") --name string HetznerBareMetalHost metadata.name. Optional if YAML contains exactly one host --poll-interval duration Polling interval for wait steps (default 10s) diff --git a/internal/cmd/check_bm_servers.go b/internal/cmd/check_bm_server.go similarity index 93% rename from internal/cmd/check_bm_servers.go rename to internal/cmd/check_bm_server.go index 8da4b12..c8a9a54 100644 --- a/internal/cmd/check_bm_servers.go +++ b/internal/cmd/check_bm_server.go @@ -10,20 +10,20 @@ import ( "github.com/syself/caphcli/internal/provisioncheck" ) -func newCheckBMServersCommand() *cobra.Command { +func newCheckBMServerCommand() *cobra.Command { cfg := provisioncheck.DefaultConfig() cfg.Input = os.Stdin cfg.Output = os.Stdout cmd := &cobra.Command{ - Use: "check-bm-servers FILE", + Use: "check-bm-server FILE", Short: "Validate rescue and provisioning reliability for one bare-metal server", Long: `Validate rescue and provisioning reliability for one HetznerBareMetalHost from a local YAML file. The command does not talk to Kubernetes. It reads one local YAML file containing HetznerBareMetalHost objects and then talks directly to Hetzner Robot plus the target server.`, - Example: ` caphcli check-bm-servers \ + Example: ` caphcli check-bm-server \ test/e2e/data/infrastructure-hetzner/v1beta1/bases/hetznerbaremetalhosts.yaml \ --name bm-e2e-1731561`, Args: cobra.ExactArgs(1), @@ -35,7 +35,7 @@ target server.`, } if err := provisioncheck.Run(context.Background(), cfg); err != nil { - return fmt.Errorf("caphcli check-bm-servers failed for %q: %w", cfg.Name, err) + return fmt.Errorf("caphcli check-bm-server failed for %q: %w", cfg.Name, err) } return nil diff --git a/internal/cmd/createhostyaml.go b/internal/cmd/createhostyaml.go index 62d83c9..24d2aee 100644 --- a/internal/cmd/createhostyaml.go +++ b/internal/cmd/createhostyaml.go @@ -56,7 +56,7 @@ YAML file to the requested output path. Progress and confirmation prompts go to } f = nil _, _ = fmt.Fprintf(cfg.LogOutput, "✓ created %s\n", outputFile) - _, _ = fmt.Fprintf(cfg.LogOutput, "Hint: run `caphcli check-bm-servers %s` next.\n", outputFile) + _, _ = fmt.Fprintf(cfg.LogOutput, "Hint: run `caphcli check-bm-server %s` next.\n", outputFile) return nil }, diff --git a/internal/cmd/root.go b/internal/cmd/root.go index 07944b6..4cc8aa3 100644 --- a/internal/cmd/root.go +++ b/internal/cmd/root.go @@ -20,7 +20,7 @@ func NewRootCommand() *cobra.Command { SilenceErrors: false, } - rootCmd.AddCommand(newCheckBMServersCommand()) + rootCmd.AddCommand(newCheckBMServerCommand()) rootCmd.AddCommand(newCreateHostYAMLCommand()) return rootCmd diff --git a/internal/provisioncheck/provisioncheck.go b/internal/provisioncheck/provisioncheck.go index b96abda..c94f5c2 100644 --- a/internal/provisioncheck/provisioncheck.go +++ b/internal/provisioncheck/provisioncheck.go @@ -828,7 +828,7 @@ func validateHostForProvisionCheck(host infrav1.HetznerBareMetalHost) error { return fmt.Errorf("host %q has no spec.rootDeviceHints", host.Name) } if host.Spec.MaintenanceMode == nil || !*host.Spec.MaintenanceMode { - return fmt.Errorf("host %q must set spec.maintenanceMode: true before running check-bm-servers", host.Name) + return fmt.Errorf("host %q must set spec.maintenanceMode: true before running check-bm-server", host.Name) } return nil } diff --git a/internal/tools/readmegen/main.go b/internal/tools/readmegen/main.go index 4ce3c21..c235791 100644 --- a/internal/tools/readmegen/main.go +++ b/internal/tools/readmegen/main.go @@ -23,7 +23,7 @@ const generatedSectionTemplate = `## CLI Help {{ROOT_HELP}} ` + "```" + ` -### ` + "`caphcli check-bm-servers --help`" + ` +### ` + "`caphcli check-bm-server --help`" + ` ` + "```text" + ` {{CHECK_HELP}} @@ -42,7 +42,7 @@ func main() { fail(err) } - checkHelp, err := renderHelp("check-bm-servers") + checkHelp, err := renderHelp("check-bm-server") if err != nil { fail(err) }