From 80536a9b79d498953bb5fbaa2257c05d43c48914 Mon Sep 17 00:00:00 2001
From: Robert Gregor <348865+bobbravo2@users.noreply.github.com>
Date: Sun, 29 Mar 2026 21:33:41 -0400
Subject: [PATCH 1/2] feat: add component benchmark harness with SIGINT
 handling

Adds a build-time benchmark harness that measures cold-install and
warm-rebuild times for every component, compares baseline vs candidate
refs, and emits human/TSV/JSON reports.

Key implementation details:
- Proper signal handling: Ctrl+C recursively kills process trees
  (bench_kill_tree via pgrep), cleans up git worktrees and temp dirs,
  then re-raises SIGINT so make sees exit code 130.
- All cd calls in bench-manifest.sh guarded with || return 1 (benchmark
  functions) or || return 0 (cleanup functions) per shellcheck SC2164.
- CI workflow (.github/workflows/component-benchmarks.yml) runs
  self-tests and full benchmarks on workflow_dispatch or 'benchmark'
  label.
- Self-test suite (tests/bench-test.sh) validates syntax, function
  coverage, report generation, and ANSI suppression (8 tests).
- CLAUDE.md, dev-cluster SKILL, and .env.local.example updated with
  benchmark usage guidance.

Made-with: Cursor
---
 .claude/skills/dev-cluster/SKILL.md        |   40 +
 .env.local.example                         |   17 +
 .github/workflows/component-benchmarks.yml |  120 ++
 .gitignore                                 |    1 +
 CLAUDE.md                                  |   21 +
 Makefile                                   |  294 ++++-
 e2e/scripts/setup-kind.sh                  |    5 +-
 scripts/benchmarks/README.md               |  131 ++
 scripts/benchmarks/bench-manifest.sh       |  352 +++++
 scripts/benchmarks/component-bench.sh      | 1386 ++++++++++++++++++++
 tests/bench-test.sh                        |  187 +++
 tests/local-dev-test.sh                    |   26 +-
 12 files changed, 2560 insertions(+), 20 deletions(-)
 create mode 100644 .github/workflows/component-benchmarks.yml
 create mode 100644 scripts/benchmarks/README.md
 create mode 100644 scripts/benchmarks/bench-manifest.sh
 create mode 100644 scripts/benchmarks/component-bench.sh
 create mode 100644 tests/bench-test.sh
diff --git a/.claude/skills/dev-cluster/SKILL.md b/.claude/skills/dev-cluster/SKILL.md
index bf0b2bd38..e2aad799e 100644
--- a/.claude/skills/dev-cluster/SKILL.md
+++ b/.claude/skills/dev-cluster/SKILL.md
@@ -382,6 +382,46 @@ npm run dev
 - Backend, operator, or runner changes (those still need image rebuild + load)
 - Testing changes to container configuration or deployment manifests
 
+## Benchmarking Developer Loops
+
+Use the benchmark harness when the user wants measured cold-start or rebuild timing rather than ad hoc impressions.
+
+### Commands
+
+```bash
+# Human-friendly local summary
+make benchmark
+
+# Agent / automation friendly output
+make benchmark FORMAT=tsv
+
+# Single component
+make benchmark COMPONENT=frontend MODE=cold
+make benchmark COMPONENT=backend MODE=warm
+```
+
+### Agent Guidance
+
+- Prefer `FORMAT=tsv` when another agent, script, or evaluation harness will consume the output.
+- Prefer the default `human` format for interactive local use in a terminal.
+- `frontend` benchmarking requires **Node.js 20+**.
+- `warm` currently measures **rebuild proxies**, not browser-observed hot reload latency.
+- If `reports/benchmarks/` is not writable in the current environment, the harness will fall back to a temp directory and print a warning.
+- Session benchmarking is **contract-only** in v1 (`bench_session_*` stubs in `scripts/benchmarks/bench-manifest.sh`).
+- Start with the **smallest relevant benchmark**:
+  - backend/operator/public-api change -> `MODE=warm COMPONENT=<component> REPEATS=1`
+  - frontend contributor setup -> `MODE=cold COMPONENT=frontend REPEATS=1`
+  - only run all components when you explicitly need the whole matrix
+- Treat preflight failures as useful environment signals; do not work around them unless the user asks.
+- Use full-sweep benchmarking sparingly because each component still performs untimed setup before the measured warm rebuild.
+
+### Interpreting Results
+
+- `cold`: approximates first-contributor setup/install cost with isolated caches
+- `warm`: approximates incremental rebuild cost after setup has already completed
+- `budget_ok=false` on cold runs means the component exceeded the 60-second contributor budget
+- Large deltas on a single repeat should be treated cautiously; use more repeats before drawing conclusions
+
 ## Best Practices
 
 1. **Use local dev server for frontend**: Fastest feedback loop, no image rebuilds needed
diff --git a/.env.local.example b/.env.local.example
index 901a763b4..1c84926db 100644
--- a/.env.local.example
+++ b/.env.local.example
@@ -1 +1,18 @@
+# Root `.env.local` (optional, loaded by Makefile via `-include .env.local`)
+# Used for kind-on-remote-host and other repo-wide overrides.
+#
+# Example: point tools at a remote machine running kind (Tailscale, etc.)
 KIND_HOST=100.x.x.x
+
+# --- Frontend local dev (`components/frontend/.env.local`) ---
+# Prefer generating this file with `make dev-env` after the cluster is up; it sets:
+#   BACKEND_URL=http://localhost:<KIND_FWD_BACKEND_PORT>/api
+#   OC_TOKEN=<from cluster test-user-token>
+#   ENABLE_OC_WHOAMI=0
+#
+# `make dev COMPONENT=frontend` writes/updates that file and runs `npm run dev`.
+# `make dev COMPONENT=frontend,backend` uses BACKEND_URL=http://localhost:8080/api (local go run).
+#
+# Makefile variables for the dev workflow (pass on the command line):
+#   COMPONENT=frontend|backend|frontend,backend
+#   AUTO_CLUSTER=true          # run kind-up without prompting if cluster is missing
diff --git a/.github/workflows/component-benchmarks.yml b/.github/workflows/component-benchmarks.yml
new file mode 100644
index 000000000..4a41e996d
--- /dev/null
+++ b/.github/workflows/component-benchmarks.yml
@@ -0,0 +1,120 @@
+name: Component Benchmarks
+
+on:
+  workflow_dispatch:
+    inputs:
+      components:
+        description: Components to benchmark (comma-separated or all)
+        required: false
+        default: all
+        type: string
+      mode:
+        description: cold, warm, or both
+        required: false
+        default: both
+        type: string
+      baseline_ref:
+        description: Optional baseline git ref
+        required: false
+        default: ""
+        type: string
+  pull_request:
+    types: [labeled]
+
+jobs:
+  benchmark:
+    if: github.event_name == 'workflow_dispatch' || github.event.label.name == 'benchmark'
+    runs-on: ubuntu-latest
+    timeout-minutes: 90
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v6
+        with:
+          fetch-depth: 0
+
+      - name: Set up Go
+        uses: actions/setup-go@v6
+        with:
+          go-version-file: components/backend/go.mod
+          cache: false
+
+      - name: Set up Node.js
+        uses: actions/setup-node@v6
+        with:
+          node-version: "20"
+
+      - name: Set up Python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.11"
+
+      - name: Run benchmark harness self-tests
+        run: |
+          bash tests/bench-test.sh
+
+      - name: Run component benchmarks
+        env:
+          COMPONENTS: ${{ inputs.components }}
+          MODE: ${{ inputs.mode }}
+          BASELINE_REF_INPUT: ${{ inputs.baseline_ref }}
+        run: |
+          set -euo pipefail
+
+          ARGS=()
+          if [[ -n "${COMPONENTS:-}" && "${COMPONENTS}" != "all" ]]; then
+            ARGS+=(--components "${COMPONENTS}")
+          fi
+          if [[ -n "${MODE:-}" ]]; then
+            ARGS+=(--mode "${MODE}")
+          fi
+          if [[ -n "${BASELINE_REF_INPUT:-}" ]]; then
+            ARGS+=(--baseline-ref "${BASELINE_REF_INPUT}")
+          fi
+
+          bash scripts/benchmarks/component-bench.sh --ci "${ARGS[@]}" >/dev/null
+
+      - name: Publish benchmark summary
+        if: always()
+        run: |
+          {
+            echo "### Component Benchmarks"
+            echo
+
+            if [[ -f reports/benchmarks/results.tsv ]]; then
+              OVER_BUDGET=$(awk -F'\t' 'NR > 1 && $2 == "cold" && $8 == "false" { print $1 " (" $4 "s)" }' reports/benchmarks/results.tsv)
+              REGRESSIONS=$(awk -F'\t' 'NR > 1 && ($6 + 0) > 10.0 { print $1 " " $2 " (" $6 "%)" }' reports/benchmarks/results.tsv)
+
+              if [[ -n "${OVER_BUDGET}" ]]; then
+                echo "**Over 60s budget:**"
+                while IFS= read -r line; do
+                  [[ -n "$line" ]] && echo "- $line"
+                done <<<"${OVER_BUDGET}"
+                echo
+              fi
+
+              if [[ -n "${REGRESSIONS}" ]]; then
+                echo "**Regressions over 10%:**"
+                while IFS= read -r line; do
+                  [[ -n "$line" ]] && echo "- $line"
+                done <<<"${REGRESSIONS}"
+                echo
+              fi
+            fi
+
+            echo '```text'
+            if [[ -f reports/benchmarks/results.human.txt ]]; then
+              cat reports/benchmarks/results.human.txt
+            else
+              echo "No human-readable benchmark report was generated."
+            fi
+            echo '```'
+          } >> "$GITHUB_STEP_SUMMARY"
+
+      - name: Upload benchmark artifacts
+        if: always()
+        uses: actions/upload-artifact@v6
+        with:
+          name: component-benchmarks-${{ github.run_id }}
+          path: reports/benchmarks/
+          retention-days: 7
diff --git a/.gitignore b/.gitignore
index 408320472..d200b9908 100644
--- a/.gitignore
+++ b/.gitignore
@@ -60,6 +60,7 @@ venv.bak/
 # Environment files
 .env
 .env.local
+components/frontend/.env.local
 .env.uat
 .dev-bootstrap.env
 
diff --git a/CLAUDE.md b/CLAUDE.md
index 4ace5667f..231d23d0f 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -42,6 +42,7 @@ make test                     # Run tests
 make lint                     # Lint code
 make kind-up                  # Start local Kind cluster
 make test-e2e-local           # Run E2E tests against Kind
+make benchmark                # Run component benchmark harness
 ```
 
 ### Per-Component
@@ -61,6 +62,26 @@ cd components/runners/ambient-runner && uv venv && uv pip install -e .
 cd docs && npm run dev  # http://localhost:4321
 ```
 
+### Benchmarking
+
+```shell
+# Human-friendly summary
+make benchmark
+
+# Agent / automation friendly output
+make benchmark FORMAT=tsv
+
+# Single component
+make benchmark COMPONENT=frontend MODE=cold
+```
+
+Benchmark notes:
+
+- `frontend` requires **Node.js 20+**
+- `FORMAT=tsv` is preferred for agents to minimize token usage
+- `warm` measures rebuild proxies, not browser-observed hot reload latency
+- See `scripts/benchmarks/README.md` for semantics and caveats
+
 ## Critical Context
 
 - **User token auth required**: All user-facing API ops use `GetK8sClientsForRequest(c)`, never the backend service account
diff --git a/Makefile b/Makefile
index 580cc02d4..dcd5721de 100644
--- a/Makefile
+++ b/Makefile
@@ -3,11 +3,12 @@
 .PHONY: local-dev-token
 .PHONY: local-logs local-logs-backend local-logs-frontend local-logs-operator local-shell local-shell-frontend
 .PHONY: local-test local-test-dev local-test-quick test-all local-troubleshoot local-port-forward local-stop-port-forward
-.PHONY: push-all registry-login setup-hooks remove-hooks lint check-kind check-kubectl check-local-context dev-bootstrap kind-rebuild kind-reload-backend kind-reload-frontend kind-reload-operator kind-status kind-login
+.PHONY: push-all registry-login setup-hooks remove-hooks lint check-minikube check-kind check-kubectl check-local-context dev-bootstrap kind-rebuild kind-reload-backend kind-reload-frontend kind-reload-operator kind-status kind-login
+.PHONY: preflight-cluster preflight dev-env dev
 .PHONY: e2e-test e2e-setup e2e-clean deploy-langfuse-openshift
 .PHONY: unleash-port-forward unleash-status
 .PHONY: setup-minio minio-console minio-logs minio-status
-.PHONY: validate-makefile lint-makefile check-shell makefile-health
+.PHONY: validate-makefile lint-makefile check-shell makefile-health benchmark benchmark-ci
 .PHONY: _create-operator-config _auto-port-forward _show-access-info _kind-load-images
 
 # Default target
@@ -129,10 +130,11 @@ help: ## Display this help message
 	@echo '$(COLOR_BOLD)Ambient Code Platform - Development Makefile$(COLOR_RESET)'
 	@echo ''
 	@echo '$(COLOR_BOLD)Quick Start:$(COLOR_RESET)'
-	@echo '  $(COLOR_GREEN)make kind-up$(COLOR_RESET)             Start local development environment'
-	@echo '  $(COLOR_GREEN)make local-status$(COLOR_RESET)        Check status of local environment'
-	@echo '  $(COLOR_GREEN)make local-logs$(COLOR_RESET)          View logs from all components'
-	@echo '  $(COLOR_GREEN)make local-down$(COLOR_RESET)          Stop local environment'
+	@echo '  $(COLOR_GREEN)make dev$(COLOR_RESET)                  Start local dev environment (interactive)'
+	@echo '  $(COLOR_GREEN)make dev COMPONENT=frontend$(COLOR_RESET)   Hot-reload frontend against kind cluster'
+	@echo '  $(COLOR_GREEN)make kind-up$(COLOR_RESET)             Full cluster deploy (no hot-reload)'
+	@echo '  $(COLOR_GREEN)make kind-status$(COLOR_RESET)         Check kind cluster status'
+	@echo '  $(COLOR_GREEN)make kind-down$(COLOR_RESET)           Stop and delete the kind cluster'
 	@echo ''
 	@echo '$(COLOR_BOLD)Quality Assurance:$(COLOR_RESET)'
 	@echo '  $(COLOR_GREEN)make validate-makefile$(COLOR_RESET)   Validate Makefile quality (runs in CI)'
@@ -151,7 +153,7 @@ help: ## Display this help message
 	@echo '  Ports: frontend=$(KIND_FWD_FRONTEND_PORT) backend=$(KIND_FWD_BACKEND_PORT) http=$(KIND_HTTP_PORT) https=$(KIND_HTTPS_PORT)'
 	@echo ''
 	@echo '$(COLOR_BOLD)Examples:$(COLOR_RESET)'
-	@echo '  make kind-up LOCAL_IMAGES=true    Build from source and deploy to kind (requires podman)'
+	@echo '  make kind-up LOCAL_IMAGES=true    Build from source and deploy to kind'
 	@echo '  make kind-rebuild                 Rebuild and reload all components in kind'
 	@echo '  make kind-status                  Show all kind clusters and their ports'
 	@echo '  make kind-up CONTAINER_ENGINE=docker'
@@ -536,7 +538,246 @@ clean: ## Clean up Kubernetes resources
 
 ##@ Kind Local Development
 
-kind-up: check-kind check-kubectl ## Start kind cluster (LOCAL_IMAGES=true to build from source, requires podman)
+# COMPONENT for dev/preflight: comma-separated frontend, backend (e.g. frontend,backend). Empty = port-forward only.
+COMPONENT ?=
+# When true, `make dev` runs `kind-up` without prompting if the cluster is missing.
+AUTO_CLUSTER ?= false
+# Backend URL for dev-env: use local go run (8080) vs port-forwarded cluster port.
+DEV_BACKEND_LOCAL ?= false
+
+preflight-cluster: ## Validate kind, kubectl, and container engine (daemon running)
+	@echo "$(COLOR_BOLD)Preflight (cluster tools)$(COLOR_RESET)"
+	@FAILED=0; \
+	OS=$$(uname -s); \
+	printf '%s\n' "---"; \
+	if command -v kind >/dev/null 2>&1; then \
+		KVER=$$(kind version -q 2>/dev/null || kind version 2>/dev/null | head -1); \
+		echo "$(COLOR_GREEN)✓$(COLOR_RESET) kind $$KVER"; \
+	else \
+		echo "$(COLOR_RED)✗$(COLOR_RESET) kind not found"; \
+		if [ "$$OS" = "Darwin" ]; then echo "  Install: brew install kind"; else echo "  Install: go install sigs.k8s.io/kind@latest"; fi; \
+		echo "           https://kind.sigs.k8s.io/docs/user/quick-start/"; \
+		FAILED=1; \
+	fi; \
+	if command -v kubectl >/dev/null 2>&1; then \
+		echo "$(COLOR_GREEN)✓$(COLOR_RESET) kubectl $$(kubectl version --client -o yaml 2>/dev/null | grep gitVersion | head -1 | sed 's/.*: //' || kubectl version --client 2>/dev/null | head -1)"; \
+	else \
+		echo "$(COLOR_RED)✗$(COLOR_RESET) kubectl not found"; \
+		if [ "$$OS" = "Darwin" ]; then echo "  Install: brew install kubectl"; else echo "  Install: https://kubernetes.io/docs/tasks/tools/"; fi; \
+		FAILED=1; \
+	fi; \
+	CE="$(CONTAINER_ENGINE)"; \
+	if [ "$$CE" = "podman" ]; then \
+		if command -v podman >/dev/null 2>&1 && podman info >/dev/null 2>&1; then \
+			echo "$(COLOR_GREEN)✓$(COLOR_RESET) podman $$(podman --version 2>/dev/null | head -1) (daemon running)"; \
+		else \
+			echo "$(COLOR_RED)✗$(COLOR_RESET) podman missing or daemon not running"; \
+			if [ "$$OS" = "Darwin" ]; then echo "  Install: brew install podman && podman machine start"; else echo "  Install: https://podman.io/getting-started/installation"; fi; \
+			FAILED=1; \
+		fi; \
+	else \
+		if command -v docker >/dev/null 2>&1 && docker info >/dev/null 2>&1; then \
+			echo "$(COLOR_GREEN)✓$(COLOR_RESET) docker $$(docker --version 2>/dev/null) (daemon running)"; \
+		else \
+			echo "$(COLOR_RED)✗$(COLOR_RESET) docker missing or daemon not running"; \
+			if [ "$$OS" = "Darwin" ]; then echo "  Install: https://docs.docker.com/desktop/install/mac-install/"; else echo "  Install: https://docs.docker.com/engine/install/"; fi; \
+			FAILED=1; \
+		fi; \
+	fi; \
+	printf '%s\n' "---"; \
+	if [ "$$FAILED" -ne 0 ]; then \
+		echo "$(COLOR_RED)Preflight failed: fix the issues above.$(COLOR_RESET)"; \
+		exit 1; \
+	fi; \
+	echo "$(COLOR_GREEN)✓$(COLOR_RESET) Cluster tool checks passed."
+
+preflight: preflight-cluster ## Validate dev environment (cluster tools + optional Node/Go by COMPONENT)
+	@echo "$(COLOR_BOLD)Preflight (language tools)$(COLOR_RESET)"
+	@FAILED=0; \
+	OS=$$(uname -s); \
+	NEED_NODE=0; NEED_GO=0; \
+	COMP="$(COMPONENT)"; \
+	if [ -z "$$COMP" ]; then NEED_NODE=1; NEED_GO=1; \
+	else \
+		for piece in $$(echo "$$COMP" | tr ',' ' '); do \
+			p=$$(echo "$$piece" | sed 's/^[[:space:]]*//;s/[[:space:]]*$$//'); \
+			[ -z "$$p" ] && continue; \
+			case "$$p" in \
+				frontend) NEED_NODE=1 ;; \
+				backend) NEED_GO=1 ;; \
+				*) echo "$(COLOR_RED)✗$(COLOR_RESET) Unknown COMPONENT: $$p (use frontend, backend, or frontend,backend)"; FAILED=1 ;; \
+			esac; \
+		done; \
+	fi; \
+	if [ "$$NEED_NODE" -eq 1 ]; then \
+		if command -v node >/dev/null 2>&1; then \
+			NVER=$$(node -v 2>/dev/null | sed 's/^v//'); \
+			NMAJ=$$(echo "$$NVER" | cut -d. -f1); \
+			if [ "$${NMAJ:-0}" -ge 20 ] 2>/dev/null; then \
+				echo "$(COLOR_GREEN)✓$(COLOR_RESET) node v$$NVER"; \
+			else \
+				echo "$(COLOR_RED)✗$(COLOR_RESET) node $$NVER (need >= 20)"; \
+				if [ "$$OS" = "Darwin" ]; then echo "  Install: brew install node@20"; else echo "  Install: https://nodejs.org/ (LTS)"; fi; \
+				FAILED=1; \
+			fi; \
+		else \
+			echo "$(COLOR_RED)✗$(COLOR_RESET) node not found (need >= 20)"; \
+			if [ "$$OS" = "Darwin" ]; then echo "  Install: brew install node@20"; else echo "  Install: https://nodejs.org/"; fi; \
+			FAILED=1; \
+		fi; \
+		if command -v npm >/dev/null 2>&1; then \
+			echo "$(COLOR_GREEN)✓$(COLOR_RESET) npm $$(npm -v)"; \
+		else \
+			echo "$(COLOR_RED)✗$(COLOR_RESET) npm not found"; \
+			FAILED=1; \
+		fi; \
+	fi; \
+	if [ "$$NEED_GO" -eq 1 ]; then \
+		if command -v go >/dev/null 2>&1; then \
+			GVER=$$(go env GOVERSION 2>/dev/null | sed 's/^go//'); \
+			GMAJ=$$(echo "$$GVER" | cut -d. -f1); \
+			GMIN=$$(echo "$$GVER" | cut -d. -f2); \
+			if [ "$${GMAJ:-0}" -gt 1 ] || { [ "$${GMAJ:-0}" -eq 1 ] && [ "$${GMIN:-0}" -ge 21 ]; }; then \
+				echo "$(COLOR_GREEN)✓$(COLOR_RESET) go $$GVER"; \
+			else \
+				echo "$(COLOR_RED)✗$(COLOR_RESET) go $$GVER (need >= 1.21)"; \
+				if [ "$$OS" = "Darwin" ]; then echo "  Install: brew install go"; else echo "  Install: https://go.dev/dl/"; fi; \
+				FAILED=1; \
+			fi; \
+		else \
+			echo "$(COLOR_RED)✗$(COLOR_RESET) go not found (need >= 1.21)"; \
+			if [ "$$OS" = "Darwin" ]; then echo "  Install: brew install go"; else echo "  Install: https://go.dev/dl/"; fi; \
+			FAILED=1; \
+		fi; \
+	fi; \
+	if [ "$$FAILED" -ne 0 ]; then \
+		echo "$(COLOR_RED)Preflight failed: fix the issues above.$(COLOR_RESET)"; \
+		exit 1; \
+	fi; \
+	echo "$(COLOR_GREEN)✓$(COLOR_RESET) Language tool checks passed."
+
+dev-env: check-kubectl check-local-context ## Generate components/frontend/.env.local from cluster state (DEV_BACKEND_LOCAL=true for local backend on :8080)
+	@set -e; \
+	BACKEND_URL="http://localhost:$(KIND_FWD_BACKEND_PORT)/api"; \
+	if [ "$(DEV_BACKEND_LOCAL)" = "true" ]; then BACKEND_URL="http://localhost:8080/api"; fi; \
+	TOKEN=$$(kubectl get secret test-user-token -n $(NAMESPACE) -o jsonpath='{.data.token}' 2>/dev/null | base64 -d 2>/dev/null || true); \
+	if [ -z "$$TOKEN" ]; then \
+		echo "$(COLOR_YELLOW)⚠$(COLOR_RESET) test-user-token not found — OC_TOKEN left empty (run kind-up if cluster is new)"; \
+	fi; \
+	ENV_FILE="components/frontend/.env.local"; \
+	{ \
+		echo "# Generated by make dev-env — do not commit"; \
+		echo "BACKEND_URL=$$BACKEND_URL"; \
+		echo "ENABLE_OC_WHOAMI=0"; \
+		if [ -n "$$TOKEN" ]; then echo "OC_TOKEN=$$TOKEN"; else echo "OC_TOKEN="; fi; \
+	} > "$$ENV_FILE.tmp"; \
+	if [ -f "$$ENV_FILE" ] && cmp -s "$$ENV_FILE.tmp" "$$ENV_FILE"; then \
+		rm -f "$$ENV_FILE.tmp"; \
+		echo "$(COLOR_GREEN)✓$(COLOR_RESET) $$ENV_FILE unchanged"; \
+	else \
+		mv "$$ENV_FILE.tmp" "$$ENV_FILE"; \
+		echo "$(COLOR_GREEN)✓$(COLOR_RESET) Wrote $$ENV_FILE"; \
+	fi
+
+dev: ## Local dev: preflight, cluster, dev-env, port-forwards; COMPONENT=frontend|backend|frontend,backend for hot-reload
+	@if [ -z "$(COMPONENT)" ]; then $(MAKE) --no-print-directory preflight-cluster; else $(MAKE) --no-print-directory preflight; fi
+	@set -e; \
+	if [ "$(CONTAINER_ENGINE)" = "podman" ]; then export KIND_EXPERIMENTAL_PROVIDER=podman; fi; \
+	CLUSTER_RUNNING=0; \
+	if kind get clusters 2>/dev/null | grep -q "^$(KIND_CLUSTER_NAME)$$"; then CLUSTER_RUNNING=1; fi; \
+	if [ "$$CLUSTER_RUNNING" -eq 0 ]; then \
+		if [ "$(AUTO_CLUSTER)" = "true" ]; then \
+			echo "$(COLOR_BLUE)▶$(COLOR_RESET) AUTO_CLUSTER=true — running kind-up..."; \
+			$(MAKE) kind-up CONTAINER_ENGINE=$(CONTAINER_ENGINE); \
+		elif [ -t 0 ]; then \
+			printf "Kind cluster '$(KIND_CLUSTER_NAME)' is not running. Run 'make kind-up' now? [y/N] "; \
+			read -r _ans; \
+			case "$$_ans" in y|Y|yes|YES) $(MAKE) kind-up CONTAINER_ENGINE=$(CONTAINER_ENGINE) ;; \
+			*) echo "$(COLOR_RED)✗$(COLOR_RESET) Start the cluster first: $(COLOR_BOLD)make kind-up$(COLOR_RESET)"; exit 1 ;; esac; \
+		else \
+			echo "$(COLOR_RED)✗$(COLOR_RESET) Kind cluster '$(KIND_CLUSTER_NAME)' is not running."; \
+			echo "  Run: $(COLOR_BOLD)make kind-up$(COLOR_RESET) or $(COLOR_BOLD)make dev AUTO_CLUSTER=true$(COLOR_RESET)"; \
+			exit 1; \
+		fi; \
+	fi; \
+	if [ "$(CONTAINER_ENGINE)" = "podman" ]; then \
+		KIND_EXPERIMENTAL_PROVIDER=podman kubectl config use-context kind-$(KIND_CLUSTER_NAME) 2>/dev/null || \
+			kubectl config use-context kind-$(KIND_CLUSTER_NAME); \
+	else \
+		kubectl config use-context kind-$(KIND_CLUSTER_NAME); \
+	fi; \
+	COMP="$(COMPONENT)"; \
+	HAS_FRONT=0; HAS_BACK=0; \
+	for piece in $$(echo "$$COMP" | tr ',' ' '); do \
+		p=$$(echo "$$piece" | sed 's/^[[:space:]]*//;s/[[:space:]]*$$//'); \
+		case "$$p" in frontend) HAS_FRONT=1 ;; backend) HAS_BACK=1 ;; esac; \
+	done; \
+	DEV_LOCAL=0; \
+	if [ "$$HAS_FRONT" -eq 1 ] && [ "$$HAS_BACK" -eq 1 ]; then DEV_LOCAL=1; \
+	elif [ "$$HAS_BACK" -eq 1 ] && [ "$$HAS_FRONT" -eq 0 ]; then DEV_LOCAL=1; \
+	fi; \
+	if [ -z "$$COMP" ]; then \
+		$(MAKE) dev-env DEV_BACKEND_LOCAL=false; \
+	else \
+		$(MAKE) dev-env DEV_BACKEND_LOCAL=$$( [ "$$DEV_LOCAL" -eq 1 ] && echo true || echo false ); \
+	fi; \
+	echo ""; \
+	echo "$(COLOR_BOLD)Access:$(COLOR_RESET)"; \
+	echo "  Frontend: http://localhost:$(KIND_FWD_FRONTEND_PORT)"; \
+	echo "  Backend:  http://localhost:$(KIND_FWD_BACKEND_PORT)"; \
+	echo ""; \
+	PF_PIDS=""; \
+	cleanup() { \
+		for pid in $$PF_PIDS; do kill "$$pid" 2>/dev/null || true; done; \
+		echo ""; echo "$(COLOR_GREEN)✓$(COLOR_RESET) Stopped port-forward(s)."; \
+	}; \
+	trap cleanup INT TERM; \
+	if [ -z "$$COMP" ]; then \
+		echo "$(COLOR_BLUE)▶$(COLOR_RESET) Port-forwarding frontend + backend (Ctrl+C to stop)..."; \
+		kubectl port-forward -n $(NAMESPACE) svc/frontend-service $(KIND_FWD_FRONTEND_PORT):3000 >/tmp/acp-dev-pf-frontend.log 2>&1 & PF_PIDS="$$PF_PIDS $$!"; \
+		kubectl port-forward -n $(NAMESPACE) svc/backend-service $(KIND_FWD_BACKEND_PORT):8080 >/tmp/acp-dev-pf-backend.log 2>&1 & PF_PIDS="$$PF_PIDS $$!"; \
+		echo "$(COLOR_GREEN)✓$(COLOR_RESET) Port-forwards running."; \
+		echo "$(COLOR_YELLOW)Press Ctrl+C to stop.$(COLOR_RESET)"; \
+		wait; \
+	elif [ "$$HAS_FRONT" -eq 1 ] && [ "$$HAS_BACK" -eq 0 ]; then \
+		echo "$(COLOR_BLUE)▶$(COLOR_RESET) Port-forward backend only; starting frontend dev..."; \
+		kubectl port-forward -n $(NAMESPACE) svc/backend-service $(KIND_FWD_BACKEND_PORT):8080 >/tmp/acp-dev-pf-backend.log 2>&1 & PF_PIDS=$$!; \
+		sleep 1; \
+		cd components/frontend && npm run dev; \
+	elif [ "$$HAS_BACK" -eq 1 ] && [ "$$HAS_FRONT" -eq 0 ]; then \
+		echo "$(COLOR_BLUE)▶$(COLOR_RESET) Port-forward frontend only; starting backend dev..."; \
+		kubectl port-forward -n $(NAMESPACE) svc/frontend-service $(KIND_FWD_FRONTEND_PORT):3000 >/tmp/acp-dev-pf-frontend.log 2>&1 & PF_PIDS=$$!; \
+		sleep 1; \
+		cd components/backend && go run .; \
+	elif [ "$$HAS_FRONT" -eq 1 ] && [ "$$HAS_BACK" -eq 1 ]; then \
+		echo "$(COLOR_BLUE)▶$(COLOR_RESET) Hot-reload: backend + frontend (local)..."; \
+		(cd components/backend && go run .) & GO_PID=$$!; \
+		(cd components/frontend && npm run dev) & NPM_PID=$$!; \
+		trap 'kill $$GO_PID $$NPM_PID 2>/dev/null; cleanup' INT TERM; \
+		wait $$GO_PID $$NPM_PID; \
+	fi
+
+##@ Benchmarking
+
+benchmark: ## Run component benchmarks (COMPONENT=frontend MODE=cold|warm|both REPEATS=3)
+	@bash scripts/benchmarks/component-bench.sh \
+		$(if $(COMPONENT),--components $(COMPONENT)) \
+		$(if $(MODE),--mode $(MODE)) \
+		$(if $(REPEATS),--repeats $(REPEATS)) \
+		$(if $(BASELINE),--baseline-ref $(BASELINE)) \
+		$(if $(CANDIDATE),--candidate-ref $(CANDIDATE)) \
+		$(if $(FORMAT),--format $(FORMAT))
+
+benchmark-ci: ## Run component benchmarks in CI mode
+	@bash scripts/benchmarks/component-bench.sh --ci \
+		$(if $(COMPONENT),--components $(COMPONENT)) \
+		$(if $(MODE),--mode $(MODE)) \
+		$(if $(REPEATS),--repeats $(REPEATS)) \
+		$(if $(BASELINE),--baseline-ref $(BASELINE)) \
+		$(if $(CANDIDATE),--candidate-ref $(CANDIDATE)) \
+		$(if $(FORMAT),--format $(FORMAT))
+
+kind-up: preflight-cluster ## Start kind cluster and deploy the platform (LOCAL_IMAGES=true builds from source)
 	@echo "$(COLOR_BLUE)▶$(COLOR_RESET) Starting kind cluster '$(KIND_CLUSTER_NAME)'..."
 	@cd e2e && KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND_HTTP_PORT=$(KIND_HTTP_PORT) KIND_HTTPS_PORT=$(KIND_HTTPS_PORT) KIND_HOST=$(KIND_HOST) CONTAINER_ENGINE=$(CONTAINER_ENGINE) ./scripts/setup-kind.sh
 	@if [ -n "$(KIND_HOST)" ]; then \
@@ -749,7 +990,7 @@ kind-reload-operator: check-kind check-kubectl check-local-context ## Rebuild an
 	@kubectl rollout status deployment/agentic-operator -n $(NAMESPACE) --timeout=60s
 	@echo "$(COLOR_GREEN)✓$(COLOR_RESET) Operator reloaded"
 
-kind-status: ## Show all kind clusters and their port assignments
+kind-status: check-kind ## Show all kind clusters and their port assignments
 	@echo "$(COLOR_BOLD)Kind Cluster Status$(COLOR_RESET)"
 	@echo ""
 	@echo "$(COLOR_BOLD)Current worktree:$(COLOR_RESET)"
@@ -834,13 +1075,38 @@ local-reload-operator: ## Deprecated: use kind-reload-operator
 
 ##@ Internal Helpers (do not call directly)
 
+check-minikube: ## Check if minikube is installed
+	@OS=$$(uname -s); \
+	if command -v minikube >/dev/null 2>&1; then \
+		echo "$(COLOR_GREEN)✓$(COLOR_RESET) minikube $$(minikube version 2>/dev/null | head -1)"; \
+	else \
+		echo "$(COLOR_RED)✗$(COLOR_RESET) minikube not found"; \
+		if [ "$$OS" = "Darwin" ]; then echo "  Install: brew install minikube"; fi; \
+		echo "  Install: https://minikube.sigs.k8s.io/docs/start/"; \
+		exit 1; \
+	fi
+
 check-kind: ## Check if kind is installed
-	@command -v kind >/dev/null 2>&1 || \
-		(echo "$(COLOR_RED)✗$(COLOR_RESET) kind not found. Install: https://kind.sigs.k8s.io/docs/user/quick-start/" && exit 1)
+	@OS=$$(uname -s); \
+	if command -v kind >/dev/null 2>&1; then \
+		echo "$(COLOR_GREEN)✓$(COLOR_RESET) kind $$(kind version -q 2>/dev/null || kind version 2>/dev/null | head -1)"; \
+	else \
+		echo "$(COLOR_RED)✗$(COLOR_RESET) kind not found"; \
+		if [ "$$OS" = "Darwin" ]; then echo "  Install: brew install kind"; else echo "  Install: go install sigs.k8s.io/kind@latest"; fi; \
+		echo "  https://kind.sigs.k8s.io/docs/user/quick-start/"; \
+		exit 1; \
+	fi
 
 check-kubectl: ## Check if kubectl is installed
-	@command -v kubectl >/dev/null 2>&1 || \
-		(echo "$(COLOR_RED)✗$(COLOR_RESET) kubectl not found. Install: https://kubernetes.io/docs/tasks/tools/" && exit 1)
+	@OS=$$(uname -s); \
+	if command -v kubectl >/dev/null 2>&1; then \
+		echo "$(COLOR_GREEN)✓$(COLOR_RESET) kubectl $$(kubectl version --client -o yaml 2>/dev/null | grep gitVersion | head -1 | sed 's/.*: //' || kubectl version --client 2>/dev/null | head -1)"; \
+	else \
+		echo "$(COLOR_RED)✗$(COLOR_RESET) kubectl not found"; \
+		if [ "$$OS" = "Darwin" ]; then echo "  Install: brew install kubectl"; fi; \
+		echo "  Install: https://kubernetes.io/docs/tasks/tools/"; \
+		exit 1; \
+	fi
 
 check-local-context: ## Verify kubectl context points to a local kind cluster
 ifneq ($(SKIP_CONTEXT_CHECK),true)
@@ -966,7 +1232,7 @@ _auto-port-forward: ## Internal: Auto-start port forwarding on macOS with Podman
 		echo ""; \
 		echo "$(COLOR_BLUE)▶$(COLOR_RESET) Starting port forwarding in background..."; \
 		echo "  Waiting for services to be ready..."; \
-		kubectl wait --for=condition=ready pod -l app=backend -n $(NAMESPACE) --timeout=60s 2>/dev/null || true; \
+		kubectl wait --for=condition=ready pod -l app=backend-api -n $(NAMESPACE) --timeout=60s 2>/dev/null || true; \
 		kubectl wait --for=condition=ready pod -l app=frontend -n $(NAMESPACE) --timeout=60s 2>/dev/null || true; \
 		mkdir -p /tmp/ambient-code; \
 		kubectl port-forward -n $(NAMESPACE) svc/backend-service 8080:8080 > /tmp/ambient-code/port-forward-backend.log 2>&1 & \
diff --git a/e2e/scripts/setup-kind.sh b/e2e/scripts/setup-kind.sh
index a277f51fd..9ea2c26ad 100755
--- a/e2e/scripts/setup-kind.sh
+++ b/e2e/scripts/setup-kind.sh
@@ -99,7 +99,4 @@ echo "   Cluster: ${KIND_CLUSTER_NAME}"
 echo "   Kubernetes: v1.35.0"
 echo "   NodePort: 30080 -> host port ${HTTP_PORT}"
 echo ""
-echo "Next steps:"
-echo "   1. Deploy the platform: make kind-up (continues deployment)"
-echo "   2. Access services: make kind-port-forward (in another terminal)"
-echo "   3. Frontend: http://localhost:${HTTP_PORT}"
+echo "Returning control to the Makefile for platform deployment..."
diff --git a/scripts/benchmarks/README.md b/scripts/benchmarks/README.md
new file mode 100644
index 000000000..7fd5c503d
--- /dev/null
+++ b/scripts/benchmarks/README.md
@@ -0,0 +1,131 @@
+## Component Benchmark Harness
+
+Shell-based benchmark harness for developer inner-loop timing.
+
+### Goals
+
+- Measure **truly cold** setup/install time per component
+- Measure **warm** rebuild time per component
+- Compare current branch (`HEAD`) against a baseline ref
+- Emit output that works well for both humans and agents
+
+### Commands
+
+```bash
+# Human-friendly local summary
+make benchmark
+
+# Agent-friendly / pipe-friendly output
+make benchmark FORMAT=tsv
+
+# Single component
+make benchmark COMPONENT=frontend MODE=cold
+
+# Explicit refs
+make benchmark BASELINE=origin/main CANDIDATE=HEAD
+
+# CI mode
+make benchmark-ci
+```
+
+### Agent Fast Path
+
+Use these defaults unless you have a specific reason not to:
+
+```bash
+# Cheapest useful signal for agents
+make benchmark FORMAT=tsv COMPONENT=backend MODE=warm REPEATS=1
+
+# Frontend contributor setup budget check
+make benchmark FORMAT=tsv COMPONENT=frontend MODE=cold REPEATS=1
+
+# Multi-component comparison after a broad refactor
+make benchmark FORMAT=tsv COMPONENT=backend,operator,public-api MODE=warm REPEATS=1
+```
+
+Decision guide:
+
+- **Need one quick benchmark datapoint?** Start with a single component and `REPEATS=1`
+- **Need contributor setup budget?** Run `MODE=cold`
+- **Need incremental compiler/build signal?** Run `MODE=warm`
+- **Need machine/agent consumption?** Use `FORMAT=tsv`
+- **Need human-readable scanability?** Use default `human`
+
+### Output Modes
+
+- `human`: default for TTY; uses repo conventions (`▶`, `✓`, `✗`, section dividers)
+- `tsv`: default when piped; preferred for agents and automation
+- `json`: machine-readable full result object
+
+Guidance:
+
+- **Humans**: use `make benchmark`
+- **Agents / scripts**: use `make benchmark FORMAT=tsv`
+- **Downstream tools**: consume `results.json`
+
+### Cost-Aware Benchmark Strategy
+
+To keep agent runs efficient:
+
+1. Benchmark the **smallest relevant scope first**
+2. Use **single-component warm** runs before full sweeps
+3. Only increase `REPEATS` after a suspicious or decision-relevant result
+4. Use full all-component sweeps sparingly; they are intentionally expensive
+
+Good examples:
+
+- Backend change: `make benchmark FORMAT=tsv COMPONENT=backend MODE=warm REPEATS=1`
+- Frontend setup UX: `make benchmark FORMAT=tsv COMPONENT=frontend MODE=cold REPEATS=1`
+- SDK generator change: `make benchmark FORMAT=tsv COMPONENT=sdk MODE=warm REPEATS=1`
+
+Avoid by default:
+
+- `make benchmark MODE=both REPEATS=3` on all components during exploratory work
+- Interpreting `warm` as real browser-observed HMR latency
+- Using `human` output when an agent or script will parse the result
+
+### Cold vs Warm Semantics
+
+Cold:
+
+- Uses isolated worktrees
+- Uses isolated caches (`GOMODCACHE`, `GOCACHE`, `npm_config_cache`, `UV_CACHE_DIR`, `PIP_CACHE_DIR`)
+- Removes repo-local build/install artifacts
+- Intended to approximate a first contributor setup experience
+
+Warm:
+
+- Reuses the same isolated cache root prepared by the harness
+- Measures the timed rebuild **after** untimed setup is complete
+- Intended to approximate a follow-up incremental compile/build
+
+Important:
+
+- Current `warm` numbers are **build/rebuild proxies**, not true “save file -> browser refreshed” or “save file -> process restarted” hot-reload latency
+- For frontend, `warm` currently uses `npm run build`, not a browser-observed HMR measurement
+
+### Component Prerequisites
+
+- `frontend`: Node.js 20+ and npm
+- `backend`, `operator`, `public-api`, `api-server`, `cli`, `sdk`: Go 1.21+
+- `runner`: Python 3.11+ plus `uv` or `python3 -m venv`
+- `api-server`, `cli`, `sdk`: `make`
+
+The harness now preflights these before worktree setup so failures happen fast.
+
+### Known Efficiency Lessons
+
+- Frontend benchmarking is highly sensitive to Node version; use Node 20+ or it will fail fast
+- Use `FORMAT=tsv` for agent consumption to minimize context-token cost
+- If `reports/benchmarks/` is not writable in the current environment, the harness falls back to a temp directory and prints a warning
+- Warm benchmarks only stay warm if the setup phase and timed phase share the same isolated cache env; the harness now does that explicitly
+- Session benchmarking is **contract-only** in v1 (`bench_session_*` stubs in `bench-manifest.sh`)
+- Full warm sweeps across all components are slow because each component still performs untimed setup before the measured rebuild; use them intentionally, not as the default first move
+- A failing preflight is a useful result; treat it as an environment readiness signal rather than forcing the benchmark to continue
+
+### Files
+
+- `scripts/benchmarks/component-bench.sh` - main harness
+- `scripts/benchmarks/bench-manifest.sh` - component definitions and session stubs
+- `tests/bench-test.sh` - harness self-tests
+
diff --git a/scripts/benchmarks/bench-manifest.sh b/scripts/benchmarks/bench-manifest.sh
new file mode 100644
index 000000000..2ae2e933b
--- /dev/null
+++ b/scripts/benchmarks/bench-manifest.sh
@@ -0,0 +1,352 @@
+#!/usr/bin/env bash
+
+# shellcheck shell=bash
+
+BENCH_COMPONENTS=(
+  frontend
+  backend
+  operator
+  public-api
+  api-server
+  cli
+  sdk
+  runner
+)
+
+bench_env_frontend() {
+  local cache_root=$1
+  bench_setup_frontend_env "$cache_root"
+}
+
+bench_env_backend() {
+  local cache_root=$1
+  bench_setup_go_env "$cache_root"
+}
+
+bench_env_operator() {
+  local cache_root=$1
+  bench_setup_go_env "$cache_root"
+}
+
+bench_env_public_api() {
+  local cache_root=$1
+  bench_setup_go_env "$cache_root"
+}
+
+bench_env_api_server() {
+  local cache_root=$1
+  bench_setup_go_env "$cache_root"
+}
+
+bench_env_cli() {
+  local cache_root=$1
+  bench_setup_go_env "$cache_root"
+}
+
+bench_env_sdk() {
+  local cache_root=$1
+  bench_setup_go_env "$cache_root"
+}
+
+bench_env_runner() {
+  local cache_root=$1
+  bench_setup_runner_env "$cache_root"
+}
+
+bench_preflight_frontend() {
+  bench_require_command npm
+  bench_require_command node
+  bench_require_node_version 20 0
+}
+
+bench_preflight_backend() {
+  bench_require_command go
+  bench_require_go_version 1 21
+}
+
+bench_preflight_operator() {
+  bench_preflight_backend
+}
+
+bench_preflight_public_api() {
+  bench_preflight_backend
+}
+
+bench_preflight_api_server() {
+  bench_preflight_backend
+  bench_require_command make
+}
+
+bench_preflight_cli() {
+  bench_preflight_backend
+  bench_require_command make
+}
+
+bench_preflight_sdk() {
+  bench_preflight_backend
+  bench_require_command make
+}
+
+bench_preflight_runner() {
+  bench_require_python3
+  bench_require_python_version 3 11
+  if ! command -v uv >/dev/null 2>&1 && ! python3 -m venv --help >/dev/null 2>&1; then
+    printf '%s\n' "uv or python3 -m venv support is required"
+    return 1
+  fi
+}
+
+bench_setup_go_env() {
+  local cache_root=$1
+  export GOMODCACHE="$cache_root/go-mod"
+  export GOPATH="$cache_root/go-path"
+  export GOCACHE="$cache_root/go-build"
+  mkdir -p "$GOMODCACHE" "$GOPATH" "$GOCACHE"
+}
+
+bench_setup_frontend_env() {
+  local cache_root=$1
+  export npm_config_cache="$cache_root/npm-cache"
+  mkdir -p "$npm_config_cache"
+}
+
+bench_setup_runner_env() {
+  local cache_root=$1
+  export UV_CACHE_DIR="$cache_root/uv-cache"
+  export PIP_CACHE_DIR="$cache_root/pip-cache"
+  mkdir -p "$UV_CACHE_DIR" "$PIP_CACHE_DIR"
+}
+
+bench_create_runner_venv() {
+  if command -v uv >/dev/null 2>&1; then
+    uv venv .venv >/dev/null
+  else
+    python3 -m venv .venv
+  fi
+  ./.venv/bin/python -m pip install --upgrade pip >/dev/null
+  ./.venv/bin/python -m pip install -e '.[all]'
+}
+
+bench_cold_frontend() {
+  local worktree_dir=$1
+  local cache_root=$2
+  local run_id=$3
+  local ref_name=$4
+  local port
+
+  port=$(bench_pick_port frontend "$run_id" "$ref_name")
+
+  cd "$worktree_dir/components/frontend" || return 1
+  rm -rf node_modules .next
+  npm ci
+
+  local log_file="$cache_root/frontend-dev.log"
+  : >"$log_file"
+
+  PORT="$port" npm run dev >"$log_file" 2>&1 &
+  local dev_pid=$!
+
+  if ! bench_wait_for_pattern "$log_file" 'Ready in|ready in|Local:|localhost:' 120 "$dev_pid"; then
+    bench_kill_pid "$dev_pid"
+    return 1
+  fi
+
+  bench_kill_pid "$dev_pid"
+}
+
+bench_warm_frontend() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/frontend" || return 1
+  touch src/app/projects/page.tsx
+  npm run build
+}
+
+bench_cleanup_frontend() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/frontend" || return 0
+  rm -rf node_modules .next
+}
+
+bench_cold_backend() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/backend" || return 1
+  go mod download
+  go build .
+}
+
+bench_warm_backend() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/backend" || return 1
+  touch main.go
+  go build .
+}
+
+bench_cleanup_backend() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/backend" || return 0
+  rm -f backend
+}
+
+bench_cold_operator() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/operator" || return 1
+  go mod download
+  go build ./...
+}
+
+bench_warm_operator() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/operator" || return 1
+  touch main.go
+  go build ./...
+}
+
+bench_cleanup_operator() {
+  :
+}
+
+bench_cold_public_api() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/public-api" || return 1
+  go mod download
+  go build ./...
+}
+
+bench_warm_public_api() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/public-api" || return 1
+  touch main.go
+  go build ./...
+}
+
+bench_cleanup_public_api() {
+  :
+}
+
+bench_cold_api_server() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/ambient-api-server" || return 1
+  go mod download
+  make binary
+}
+
+bench_warm_api_server() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/ambient-api-server" || return 1
+  touch cmd/ambient-api-server/main.go
+  make binary
+}
+
+bench_cleanup_api_server() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/ambient-api-server" || return 0
+  rm -f ambient-api-server
+}
+
+bench_cold_cli() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/ambient-cli" || return 1
+  go mod download
+  make build
+}
+
+bench_warm_cli() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/ambient-cli" || return 1
+  touch cmd/acpctl/main.go
+  make build
+}
+
+bench_cleanup_cli() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/ambient-cli" || return 0
+  rm -f acpctl
+}
+
+bench_cold_sdk() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/ambient-sdk" || return 1
+  make build-generator
+}
+
+bench_warm_sdk() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/ambient-sdk" || return 1
+  touch generator/main.go
+  make build-generator
+}
+
+bench_cleanup_sdk() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/ambient-sdk" || return 0
+  rm -f bin/ambient-sdk-generator
+}
+
+bench_cold_runner() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/runners/ambient-runner" || return 1
+  rm -rf .venv
+  bench_create_runner_venv
+}
+
+bench_warm_runner() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/runners/ambient-runner" || return 1
+  touch ambient_runner/__init__.py
+  ./.venv/bin/python -c "from ambient_runner import *"
+}
+
+bench_cleanup_runner() {
+  local worktree_dir=$1
+
+  cd "$worktree_dir/components/runners/ambient-runner" || return 0
+  rm -rf .venv
+}
+
+# --- Session benchmark interface (v1: contract only, v2: implementation) ---
+#
+# bench_session_create NAME PROMPT RUNNER_TYPE
+#   Create an agentic session via acpctl or SDK.
+#   Returns: session ID (stdout), exit code 0 on success.
+#
+# bench_session_wait_phase SESSION_ID TARGET_PHASE TIMEOUT_S
+#   Poll session status until TARGET_PHASE is reached or TIMEOUT_S expires.
+#   Returns: elapsed seconds (stdout), exit code 0 on success, 1 on timeout.
+#
+# bench_session_collect SESSION_ID
+#   Collect session metrics after completion.
+#   Emits JSON to stdout with fields:
+#     startup_s         -- Pending to Running
+#     total_s           -- startTime to completionTime
+#     image_pull_s      -- pod creation to container running
+#     token_provision_s -- secret creation to mount
+#     final_phase       -- Completed | Failed | Stopped
+#     exit_code         -- runner container exit code
+#
+# bench_session_cleanup SESSION_ID
+#   Delete the session CR and any associated resources.
+
+bench_session_create() { echo "NOT_IMPLEMENTED"; return 2; }
+bench_session_wait_phase() { echo "NOT_IMPLEMENTED"; return 2; }
+bench_session_collect() { echo "NOT_IMPLEMENTED"; return 2; }
+bench_session_cleanup() { echo "NOT_IMPLEMENTED"; return 2; }
diff --git a/scripts/benchmarks/component-bench.sh b/scripts/benchmarks/component-bench.sh
new file mode 100644
index 000000000..682271a67
--- /dev/null
+++ b/scripts/benchmarks/component-bench.sh
@@ -0,0 +1,1386 @@
+#!/usr/bin/env bash
+
+# shellcheck shell=bash
+
+set -euo pipefail
+
+SCRIPT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+REPO_ROOT=$(cd "$SCRIPT_DIR/../.." && pwd)
+
+COLOR_RESET=""
+COLOR_BOLD=""
+COLOR_GREEN=""
+COLOR_YELLOW=""
+COLOR_BLUE=""
+COLOR_RED=""
+
+CI_MODE=false
+BENCH_FORMAT=""
+BENCH_MODE=both
+BENCH_REPEATS=""
+BENCH_REPORT_DIR=""
+BASELINE_REF=""
+CANDIDATE_REF="HEAD"
+BASELINE_LABEL="baseline"
+CANDIDATE_LABEL="candidate"
+BENCH_TMPDIR=""
+SELECTED_COMPONENTS=()
+READY_COMPONENTS=()
+WORKTREE_PATHS=()
+BENCH_BG_PIDS=()
+BENCH_INTERRUPTED=false
+
+bench_supports_color() {
+  [[ -t 1 ]] && [[ "$CI_MODE" != true ]]
+}
+
+bench_init_colors() {
+  if bench_supports_color; then
+    COLOR_RESET=$(tput sgr0 2>/dev/null || printf '\033[0m')
+    COLOR_BOLD=$(tput bold 2>/dev/null || printf '\033[1m')
+    COLOR_GREEN=$(tput setaf 2 2>/dev/null || printf '\033[32m')
+    COLOR_YELLOW=$(tput setaf 3 2>/dev/null || printf '\033[33m')
+    COLOR_BLUE=$(tput setaf 4 2>/dev/null || printf '\033[34m')
+    COLOR_RED=$(tput setaf 1 2>/dev/null || printf '\033[31m')
+  fi
+}
+
+bench_timestamp() {
+  date -u +%Y-%m-%dT%H:%M:%SZ
+}
+
+bench_now_seconds() {
+  perl -MTime::HiRes=time -e 'printf "%.6f\n", time' 2>/dev/null || \
+    python3 -c 'import time; print(f"{time.time():.6f}")' 2>/dev/null || \
+    date +%s
+}
+
+bench_elapsed_seconds() {
+  local start=$1
+  local end=$2
+  awk -v start="$start" -v end="$end" 'BEGIN { printf "%.3f", (end - start) }'
+}
+
+bench_format_seconds() {
+  local value=${1:-0}
+  awk -v value="$value" 'BEGIN { printf "%.1f", value }'
+}
+
+bench_log_info() {
+  if [[ "$CI_MODE" == true ]]; then
+    printf '[%s] %sℹ%s %s\n' "$(bench_timestamp)" "$COLOR_BLUE" "$COLOR_RESET" "$*" >&2
+  else
+    printf '%sℹ%s %s\n' "$COLOR_BLUE" "$COLOR_RESET" "$*" >&2
+  fi
+}
+
+bench_log_start() {
+  local component=$1
+  local scenario=$2
+  local run_index=$3
+  local total_runs=$4
+  local message=$5
+
+  if [[ "$CI_MODE" == true ]]; then
+    printf '[%s] %s▶%s %-14s %s run %s/%s %s\n' \
+      "$(bench_timestamp)" "$COLOR_BLUE" "$COLOR_RESET" \
+      "$component" "$scenario" "$run_index" "$total_runs" "$message" >&2
+  else
+    printf '%s▶%s %-14s %s run %s/%s' \
+      "$COLOR_BLUE" "$COLOR_RESET" \
+      "$component" "$scenario" "$run_index" "$total_runs" >&2
+  fi
+}
+
+bench_log_dot() {
+  if [[ "$CI_MODE" != true ]]; then
+    printf '.' >&2
+  fi
+}
+
+bench_log_success() {
+  local component=$1
+  local scenario=$2
+  local run_index=$3
+  local total_runs=$4
+  local elapsed=$5
+
+  if [[ "$CI_MODE" == true ]]; then
+    printf '[%s] %s✓%s %-14s %s run %s/%s  %ss\n' \
+      "$(bench_timestamp)" "$COLOR_GREEN" "$COLOR_RESET" \
+      "$component" "$scenario" "$run_index" "$total_runs" "$(bench_format_seconds "$elapsed")" >&2
+  else
+    printf ' %s✓%s %ss\n' \
+      "$COLOR_GREEN" "$COLOR_RESET" \
+      "$(bench_format_seconds "$elapsed")" >&2
+  fi
+}
+
+bench_log_error() {
+  local message=$1
+
+  if [[ "$CI_MODE" == true ]]; then
+    printf '[%s] %s✗%s %s\n' "$(bench_timestamp)" "$COLOR_RED" "$COLOR_RESET" "$message" >&2
+  else
+    printf '%s✗%s %s\n' "$COLOR_RED" "$COLOR_RESET" "$message" >&2
+  fi
+}
+
+bench_log_error_inline() {
+  local message=$1
+
+  if [[ "$CI_MODE" == true ]]; then
+    printf '[%s] %s✗%s %s\n' "$(bench_timestamp)" "$COLOR_RED" "$COLOR_RESET" "$message" >&2
+  else
+    printf ' %s✗%s %s\n' "$COLOR_RED" "$COLOR_RESET" "$message" >&2
+  fi
+}
+
+bench_log_warning() {
+  local message=$1
+
+  if [[ "$CI_MODE" == true ]]; then
+    printf '[%s] %s⚠%s %s\n' "$(bench_timestamp)" "$COLOR_YELLOW" "$COLOR_RESET" "$message" >&2
+  else
+    printf '%s⚠%s %s\n' "$COLOR_YELLOW" "$COLOR_RESET" "$message" >&2
+  fi
+}
+
+bench_component_key() {
+  echo "${1//-/_}"
+}
+
+bench_pick_port() {
+  local component=$1
+  local run_id=$2
+  local ref_name=$3
+  local index=0
+  local current
+  local ref_offset=0
+
+  for current in "${BENCH_COMPONENTS[@]}"; do
+    if [[ "$current" == "$component" ]]; then
+      break
+    fi
+    index=$((index + 1))
+  done
+
+  if [[ "$ref_name" == "candidate" ]]; then
+    ref_offset=100
+  fi
+
+  echo $((43000 + (index * 20) + ref_offset + run_id))
+}
+
+bench_wait_for_pattern() {
+  local log_file=$1
+  local pattern=$2
+  local timeout_seconds=$3
+  local pid=$4
+  local start
+  local now
+  local elapsed
+
+  start=$(bench_now_seconds)
+
+  while true; do
+    if [[ -f "$log_file" ]] && grep -Eq "$pattern" "$log_file"; then
+      return 0
+    fi
+
+    if ! kill -0 "$pid" >/dev/null 2>&1; then
+      break
+    fi
+
+    now=$(bench_now_seconds)
+    elapsed=$(bench_elapsed_seconds "$start" "$now")
+    if awk -v elapsed="$elapsed" -v timeout="$timeout_seconds" 'BEGIN { exit !(elapsed >= timeout) }'; then
+      return 1
+    fi
+
+    sleep 1
+  done
+
+  [[ -f "$log_file" ]] && grep -Eq "$pattern" "$log_file"
+}
+
+bench_kill_pid() {
+  local pid=$1
+
+  if kill -0 "$pid" >/dev/null 2>&1; then
+    kill "$pid" >/dev/null 2>&1 || true
+    wait "$pid" >/dev/null 2>&1 || true
+  fi
+}
+
+bench_usage() {
+  cat <<'EOF'
+scripts/benchmarks/component-bench.sh [OPTIONS]
+
+  --baseline-ref REF      Git ref for baseline (default: merge-base with origin/main)
+  --candidate-ref REF     Git ref for candidate (default: HEAD)
+  --components LIST       Comma-separated component list (default: all)
+  --mode cold|warm|both   Which scenarios to benchmark (default: both)
+  --repeats N             Runs per scenario (default: 3 locally, 5 in CI)
+  --format human|tsv|json Output format (default: human if TTY, tsv if piped)
+  --report-dir DIR        Output directory (default: reports/benchmarks)
+  --ci                    CI mode: plain progress logs, 5 repeats by default
+  --help                  Show usage
+
+Recommended usage:
+
+  Human local summary:
+    make benchmark
+
+  Agent / automation fast path:
+    make benchmark FORMAT=tsv COMPONENT=backend MODE=warm REPEATS=1
+
+  First-pass exploratory run:
+    benchmark one component first; avoid full-suite warm or cold sweeps
+    unless you explicitly need the entire matrix
+
+  Output guidance:
+    human  -> interactive terminal
+    tsv    -> agents, pipes, automation
+    json   -> downstream tooling / archival
+EOF
+}
+
+bench_default_baseline_ref() {
+  local merge_base=""
+
+  if git -C "$REPO_ROOT" rev-parse --verify origin/main >/dev/null 2>&1; then
+    merge_base=$(git -C "$REPO_ROOT" merge-base HEAD origin/main 2>/dev/null || true)
+    if [[ -n "$merge_base" ]]; then
+      echo "$merge_base"
+    else
+      echo "origin/main"
+    fi
+    return
+  fi
+
+  echo "main"
+}
+
+bench_validate_format() {
+  case "$BENCH_FORMAT" in
+    human|tsv|json) ;;
+    *)
+      bench_log_error "Invalid format '$BENCH_FORMAT' (use human, tsv, or json)"
+      exit 1
+      ;;
+  esac
+}
+
+bench_validate_mode() {
+  case "$BENCH_MODE" in
+    cold|warm|both) ;;
+    *)
+      bench_log_error "Invalid mode '$BENCH_MODE' (use cold, warm, or both)"
+      exit 1
+      ;;
+  esac
+}
+
+bench_validate_ref() {
+  local ref=$1
+
+  if ! git -C "$REPO_ROOT" rev-parse --verify "$ref^{commit}" >/dev/null 2>&1; then
+    bench_log_error "Git ref '$ref' does not resolve to a commit"
+    exit 1
+  fi
+}
+
+bench_component_exists() {
+  local wanted=$1
+  local component
+
+  for component in "${BENCH_COMPONENTS[@]}"; do
+    if [[ "$component" == "$wanted" ]]; then
+      return 0
+    fi
+  done
+
+  return 1
+}
+
+bench_require_command() {
+  local command_name=$1
+
+  if ! command -v "$command_name" >/dev/null 2>&1; then
+    printf '%s\n' "required command '$command_name' is not installed"
+    return 1
+  fi
+}
+
+bench_require_node_version() {
+  local min_major=$1
+  local min_minor=$2
+  local version major minor
+
+  version=$(node -v 2>/dev/null | sed 's/^v//')
+  major=$(printf '%s' "$version" | cut -d. -f1)
+  minor=$(printf '%s' "$version" | cut -d. -f2)
+
+  if ! awk -v major="$major" -v minor="$minor" -v min_major="$min_major" -v min_minor="$min_minor" \
+    'BEGIN { exit !((major > min_major) || (major == min_major && minor >= min_minor)) }'; then
+    printf '%s\n' "node v$version found; need >= ${min_major}.${min_minor}"
+    return 1
+  fi
+}
+
+bench_require_go_version() {
+  local min_major=$1
+  local min_minor=$2
+  local version major minor
+
+  version=$(go env GOVERSION 2>/dev/null | sed 's/^go//')
+  major=$(printf '%s' "$version" | cut -d. -f1)
+  minor=$(printf '%s' "$version" | cut -d. -f2)
+
+  if ! awk -v major="$major" -v minor="$minor" -v min_major="$min_major" -v min_minor="$min_minor" \
+    'BEGIN { exit !((major > min_major) || (major == min_major && minor >= min_minor)) }'; then
+    printf '%s\n' "go $version found; need >= ${min_major}.${min_minor}"
+    return 1
+  fi
+}
+
+bench_require_python3() {
+  bench_require_command python3
+}
+
+bench_require_python_version() {
+  local min_major=$1
+  local min_minor=$2
+  local version major minor
+
+  version=$(python3 - <<'EOF'
+import sys
+print(f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")
+EOF
+)
+  major=$(printf '%s' "$version" | cut -d. -f1)
+  minor=$(printf '%s' "$version" | cut -d. -f2)
+
+  if ! awk -v major="$major" -v minor="$minor" -v min_major="$min_major" -v min_minor="$min_minor" \
+    'BEGIN { exit !((major > min_major) || (major == min_major && minor >= min_minor)) }'; then
+    printf '%s\n' "python $version found; need >= ${min_major}.${min_minor}"
+    return 1
+  fi
+}
+
+bench_prepare_component_env() {
+  local component=$1
+  local cache_root=$2
+  local function_name="bench_env_$(bench_component_key "$component")"
+
+  if declare -f "$function_name" >/dev/null 2>&1; then
+    "$function_name" "$cache_root"
+  fi
+}
+
+bench_preflight_component() {
+  local component=$1
+  local function_name="bench_preflight_$(bench_component_key "$component")"
+
+  if declare -f "$function_name" >/dev/null 2>&1; then
+    "$function_name"
+  fi
+}
+
+bench_parse_components() {
+  local raw=${1:-all}
+  local item=""
+  local trimmed=""
+
+  SELECTED_COMPONENTS=()
+
+  if [[ "$raw" == "all" || -z "$raw" ]]; then
+    for item in "${BENCH_COMPONENTS[@]}"; do
+      SELECTED_COMPONENTS+=("$item")
+    done
+    return
+  fi
+
+  OLD_IFS=$IFS
+  IFS=,
+  for item in $raw; do
+    trimmed=$(printf '%s' "$item" | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
+    [[ -z "$trimmed" ]] && continue
+    if ! bench_component_exists "$trimmed"; then
+      bench_log_error "Unknown component '$trimmed' (valid: ${BENCH_COMPONENTS[*]})"
+      exit 1
+    fi
+    SELECTED_COMPONENTS+=("$trimmed")
+  done
+  IFS=$OLD_IFS
+
+  if [[ ${#SELECTED_COMPONENTS[@]} -eq 0 ]]; then
+    bench_log_error "No components selected"
+    exit 1
+  fi
+}
+
+bench_scenarios_for_mode() {
+  case "$BENCH_MODE" in
+    cold) echo "cold" ;;
+    warm) echo "warm" ;;
+    both)
+      echo "cold"
+      echo "warm"
+      ;;
+  esac
+}
+
+bench_has_scenario() {
+  local wanted=$1
+  local scenario
+
+  while IFS= read -r scenario; do
+    if [[ "$scenario" == "$wanted" ]]; then
+      return 0
+    fi
+  done < <(bench_scenarios_for_mode)
+
+  return 1
+}
+
+bench_result_file_for_component() {
+  local component=$1
+  echo "$BENCH_REPORT_DIR/raw/${component}.tsv"
+}
+
+bench_combined_raw_file() {
+  echo "$BENCH_REPORT_DIR/results.raw.tsv"
+}
+
+bench_human_report_file() {
+  echo "$BENCH_REPORT_DIR/results.human.txt"
+}
+
+bench_tsv_report_file() {
+  echo "$BENCH_REPORT_DIR/results.tsv"
+}
+
+bench_json_report_file() {
+  echo "$BENCH_REPORT_DIR/results.json"
+}
+
+bench_safe_append() {
+  local file=$1
+  shift
+
+  if ! printf '%s\n' "$*" >>"$file" 2>/dev/null; then
+    bench_log_warning "Could not write to $file"
+  fi
+}
+
+bench_safe_redirect() {
+  local file=$1
+
+  if ! touch "$file" 2>/dev/null; then
+    file="$BENCH_TMPDIR/fallback-$(basename "$file")"
+    touch "$file" 2>/dev/null || file="/dev/null"
+    bench_log_warning "Log redirect failed; using $file"
+  fi
+  echo "$file"
+}
+
+bench_record_result() {
+  local component=$1
+  local ref_name=$2
+  local scenario=$3
+  local run_index=$4
+  local status=$5
+  local elapsed=$6
+  local message=${7:-}
+  local file
+
+  message=$(printf '%s' "$message" | tr '\t\r\n' '   ')
+  file=$(bench_result_file_for_component "$component")
+  bench_safe_append "$file" "$(printf '%s\t%s\t%s\t%s\t%s\t%s\t%s' \
+    "$component" "$ref_name" "$scenario" "$run_index" "$status" "$elapsed" "$message")"
+}
+
+bench_invoke_function() {
+  local component=$1
+  local stage=$2
+  local worktree_dir=$3
+  local cache_root=$4
+  local run_index=$5
+  local ref_name=$6
+  local function_name
+
+  function_name="bench_${stage}_$(bench_component_key "$component")"
+  if ! declare -f "$function_name" >/dev/null 2>&1; then
+    bench_log_error "Missing function '$function_name'"
+    return 1
+  fi
+
+  "$function_name" "$worktree_dir" "$cache_root" "$run_index" "$ref_name"
+}
+
+bench_prepare_cache_root() {
+  local component=$1
+  local ref_name=$2
+  local run_index=$3
+  local cache_root="$BENCH_TMPDIR/cache/$(bench_component_key "$component")/$ref_name/run-$run_index"
+
+  rm -rf "$cache_root"
+  mkdir -p "$cache_root"
+  echo "$cache_root"
+}
+
+bench_run_timed_step() {
+  local component=$1
+  local ref_name=$2
+  local scenario=$3
+  local run_index=$4
+  local total_runs=$5
+  local worktree_dir=$6
+  local cache_root=$7
+  local log_file=$8
+  local start end elapsed
+
+  local dot_pid=""
+
+  bench_prepare_component_env "$component" "$cache_root"
+  log_file=$(bench_safe_redirect "$log_file")
+  bench_log_start "$component" "$scenario/$ref_name" "$run_index" "$total_runs" ""
+
+  if [[ "$CI_MODE" != true ]]; then
+    ( while true; do sleep 3; bench_log_dot; done ) &
+    dot_pid=$!
+  fi
+
+  start=$(bench_now_seconds)
+  if bench_invoke_function "$component" "$scenario" "$worktree_dir" "$cache_root" "$run_index" "$ref_name" >"$log_file" 2>&1; then
+    end=$(bench_now_seconds)
+    elapsed=$(bench_elapsed_seconds "$start" "$end")
+    [[ -n "$dot_pid" ]] && kill "$dot_pid" 2>/dev/null; wait "$dot_pid" 2>/dev/null || true
+    bench_log_success "$component" "$scenario/$ref_name" "$run_index" "$total_runs" "$elapsed"
+    printf '%s\n' "$elapsed"
+    return 0
+  fi
+
+  [[ -n "$dot_pid" ]] && kill "$dot_pid" 2>/dev/null; wait "$dot_pid" 2>/dev/null || true
+  bench_log_error_inline "$component $scenario/$ref_name run $run_index/$total_runs failed"
+  return 1
+}
+
+bench_run_component_ref() {
+  local component=$1
+  local ref_name=$2
+  local worktree_dir=$3
+  local repeat_index
+  local cache_root
+  local log_file
+  local elapsed
+
+  for repeat_index in $(seq 1 "$BENCH_REPEATS"); do
+    cache_root=$(bench_prepare_cache_root "$component" "$ref_name" "$repeat_index")
+
+    bench_invoke_function "$component" cleanup "$worktree_dir" "$cache_root" "$repeat_index" "$ref_name" >/dev/null 2>&1 || true
+
+    if [[ "$BENCH_MODE" == "warm" ]]; then
+      log_file=$(bench_safe_redirect "$BENCH_REPORT_DIR/logs/${component}-${ref_name}-setup-${repeat_index}.log")
+      bench_prepare_component_env "$component" "$cache_root"
+      if ! bench_invoke_function "$component" cold "$worktree_dir" "$cache_root" "$repeat_index" "$ref_name" >"$log_file" 2>&1; then
+        bench_record_result "$component" "$ref_name" "warm" "$repeat_index" "error" "0" "warm setup failed ($(basename "$log_file"))"
+        return 1
+      fi
+    fi
+
+    if bench_has_scenario cold; then
+      log_file="$BENCH_REPORT_DIR/logs/${component}-${ref_name}-cold-${repeat_index}.log"
+      if elapsed=$(bench_run_timed_step "$component" "$ref_name" "cold" "$repeat_index" "$BENCH_REPEATS" "$worktree_dir" "$cache_root" "$log_file"); then
+        bench_record_result "$component" "$ref_name" "cold" "$repeat_index" "success" "$elapsed" ""
+      else
+        bench_record_result "$component" "$ref_name" "cold" "$repeat_index" "error" "0" "cold failed ($(basename "$log_file"))"
+        return 1
+      fi
+    fi
+
+    if bench_has_scenario warm; then
+      log_file="$BENCH_REPORT_DIR/logs/${component}-${ref_name}-warm-${repeat_index}.log"
+      if elapsed=$(bench_run_timed_step "$component" "$ref_name" "warm" "$repeat_index" "$BENCH_REPEATS" "$worktree_dir" "$cache_root" "$log_file"); then
+        bench_record_result "$component" "$ref_name" "warm" "$repeat_index" "success" "$elapsed" ""
+      else
+        bench_record_result "$component" "$ref_name" "warm" "$repeat_index" "error" "0" "warm failed ($(basename "$log_file"))"
+        return 1
+      fi
+    fi
+  done
+
+  return 0
+}
+
+bench_component_worktree_dir() {
+  local component=$1
+  local ref_name=$2
+
+  echo "$BENCH_TMPDIR/worktrees/$(bench_component_key "$component")-$ref_name"
+}
+
+bench_ensure_report_dir() {
+  if mkdir -p "$BENCH_REPORT_DIR/raw" "$BENCH_REPORT_DIR/logs" >/dev/null 2>&1; then
+    return
+  fi
+
+  local fallback_dir
+  fallback_dir=$(mktemp -d)
+  bench_log_warning "Could not write to '$BENCH_REPORT_DIR'; using '$fallback_dir' instead"
+  BENCH_REPORT_DIR="$fallback_dir"
+  mkdir -p "$BENCH_REPORT_DIR/raw" "$BENCH_REPORT_DIR/logs"
+}
+
+bench_setup_component_worktrees() {
+  local component=$1
+  local baseline_dir
+  local candidate_dir
+
+  baseline_dir=$(bench_component_worktree_dir "$component" "$BASELINE_LABEL")
+  candidate_dir=$(bench_component_worktree_dir "$component" "$CANDIDATE_LABEL")
+
+  mkdir -p "$BENCH_TMPDIR/worktrees"
+
+  git -C "$REPO_ROOT" worktree add --detach "$baseline_dir" "$BASELINE_REF" >/dev/null 2>&1
+  git -C "$REPO_ROOT" worktree add --detach "$candidate_dir" "$CANDIDATE_REF" >/dev/null 2>&1
+
+  WORKTREE_PATHS+=("$baseline_dir" "$candidate_dir")
+}
+
+bench_run_component_job() {
+  local component=$1
+  local baseline_dir
+  local candidate_dir
+
+  baseline_dir=$(bench_component_worktree_dir "$component" "$BASELINE_LABEL")
+  candidate_dir=$(bench_component_worktree_dir "$component" "$CANDIDATE_LABEL")
+
+  bench_run_component_ref "$component" "$BASELINE_LABEL" "$baseline_dir"
+  bench_run_component_ref "$component" "$CANDIDATE_LABEL" "$candidate_dir"
+}
+
+bench_record_component_preflight_error() {
+  local component=$1
+  local message=$2
+
+  bench_log_error "$component preflight failed: $message"
+  if bench_has_scenario cold; then
+    bench_record_result "$component" "$BASELINE_LABEL" "cold" "1" "error" "0" "$message"
+    bench_record_result "$component" "$CANDIDATE_LABEL" "cold" "1" "error" "0" "$message"
+  fi
+  if bench_has_scenario warm; then
+    bench_record_result "$component" "$BASELINE_LABEL" "warm" "1" "error" "0" "$message"
+    bench_record_result "$component" "$CANDIDATE_LABEL" "warm" "1" "error" "0" "$message"
+  fi
+}
+
+bench_preflight_selected_components() {
+  local component
+  local message
+  local status=0
+
+  READY_COMPONENTS=()
+
+  for component in "${SELECTED_COMPONENTS[@]}"; do
+    : >"$(bench_result_file_for_component "$component")"
+
+    if bench_preflight_component "$component" >"$BENCH_REPORT_DIR/logs/${component}-preflight.log" 2>&1; then
+      READY_COMPONENTS+=("$component")
+    else
+      message=$(tr '\n' ' ' <"$BENCH_REPORT_DIR/logs/${component}-preflight.log" | sed 's/[[:space:]]*$//')
+      bench_record_component_preflight_error "$component" "$message"
+      status=1
+    fi
+  done
+
+  return "$status"
+}
+
+bench_gather_runs() {
+  local raw_file=$1
+  local component=$2
+  local ref_name=$3
+  local scenario=$4
+
+  awk -F '\t' -v component="$component" -v ref_name="$ref_name" -v scenario="$scenario" \
+    '$1 == component && $2 == ref_name && $3 == scenario && $5 == "success" { print $6 }' "$raw_file"
+}
+
+bench_has_errors() {
+  local raw_file=$1
+  local component=$2
+  local scenario=$3
+
+  awk -F '\t' -v component="$component" -v scenario="$scenario" \
+    '$1 == component && $3 == scenario && $5 != "success" { found = 1 } END { exit !found }' "$raw_file"
+}
+
+bench_success_count() {
+  local raw_file=$1
+  local component=$2
+  local ref_name=$3
+  local scenario=$4
+
+  awk -F '\t' -v component="$component" -v ref_name="$ref_name" -v scenario="$scenario" \
+    '$1 == component && $2 == ref_name && $3 == scenario && $5 == "success" { count++ } END { print count + 0 }' "$raw_file"
+}
+
+bench_calc_median() {
+  if [[ $# -eq 0 ]]; then
+    echo ""
+    return
+  fi
+
+  printf '%s\n' "$@" | sort -n | awk '
+    {
+      values[NR] = $1
+    }
+    END {
+      if (NR == 0) {
+        exit
+      }
+      if (NR % 2 == 1) {
+        printf "%.1f", values[(NR + 1) / 2]
+      } else {
+        printf "%.1f", (values[NR / 2] + values[(NR / 2) + 1]) / 2
+      }
+    }'
+}
+
+bench_calc_stddev() {
+  if [[ $# -eq 0 ]]; then
+    echo ""
+    return
+  fi
+
+  printf '%s\n' "$@" | awk '
+    {
+      values[NR] = $1
+      sum += $1
+    }
+    END {
+      if (NR == 0) {
+        exit
+      }
+      mean = sum / NR
+      for (i = 1; i <= NR; i++) {
+        delta = values[i] - mean
+        variance += delta * delta
+      }
+      printf "%.1f", sqrt(variance / NR)
+    }'
+}
+
+bench_join_json_runs() {
+  local first=true
+  local value
+
+  for value in "$@"; do
+    if [[ "$first" == true ]]; then
+      first=false
+    else
+      printf ', '
+    fi
+    printf '%s' "$value"
+  done
+}
+
+bench_component_complete() {
+  local raw_file=$1
+  local component=$2
+  local scenario
+  local ref_name
+  local count
+
+  while IFS= read -r scenario; do
+    for ref_name in "$BASELINE_LABEL" "$CANDIDATE_LABEL"; do
+      count=$(bench_success_count "$raw_file" "$component" "$ref_name" "$scenario")
+      if [[ "$count" -ne "$BENCH_REPEATS" ]]; then
+        return 1
+      fi
+    done
+  done < <(bench_scenarios_for_mode)
+
+  return 0
+}
+
+bench_generate_human_report() {
+  local raw_file=$1
+  local human_file
+  local scenario
+  local component
+  local baseline_runs=()
+  local candidate_runs=()
+  local line
+  local baseline_median
+  local candidate_median
+  local candidate_stddev
+  local delta_s
+  local delta_pct
+  local budget_ok
+  local passed=0
+  local failed=0
+
+  human_file=$(bench_human_report_file)
+  : >"$human_file"
+
+  {
+    printf '═══════════════════════════════════════════\n'
+    printf '  Component Benchmark Summary\n'
+    printf '═══════════════════════════════════════════\n'
+    printf '\n'
+    printf 'Baseline: %s  Candidate: %s\n' "$BASELINE_REF" "$CANDIDATE_REF"
+    printf 'Platform: %s/%s    Repeats: %s   Date: %s\n' "$(uname -s | tr '[:upper:]' '[:lower:]')" "$(uname -m)" "$BENCH_REPEATS" "$(bench_timestamp)"
+    printf '\n'
+  } >>"$human_file"
+
+  while IFS= read -r scenario; do
+    if [[ "$scenario" == "cold" ]]; then
+      printf 'Cold Install (new contributor path)\n' >>"$human_file"
+    else
+      printf 'Warm Rebuild (incremental, after source touch)\n' >>"$human_file"
+    fi
+
+    for component in "${SELECTED_COMPONENTS[@]}"; do
+      if bench_has_errors "$raw_file" "$component" "$scenario"; then
+        printf '  ✗ %-14s failed\n' "$component" >>"$human_file"
+        continue
+      fi
+
+      baseline_runs=()
+      candidate_runs=()
+      while IFS= read -r line; do
+        [[ -n "$line" ]] && baseline_runs+=("$line")
+      done < <(bench_gather_runs "$raw_file" "$component" "$BASELINE_LABEL" "$scenario")
+      while IFS= read -r line; do
+        [[ -n "$line" ]] && candidate_runs+=("$line")
+      done < <(bench_gather_runs "$raw_file" "$component" "$CANDIDATE_LABEL" "$scenario")
+
+      if [[ ${#baseline_runs[@]} -eq 0 || ${#candidate_runs[@]} -eq 0 ]]; then
+        printf '  ✗ %-14s incomplete\n' "$component" >>"$human_file"
+        continue
+      fi
+
+      baseline_median=$(bench_calc_median "${baseline_runs[@]}")
+      candidate_median=$(bench_calc_median "${candidate_runs[@]}")
+      candidate_stddev=$(bench_calc_stddev "${candidate_runs[@]}")
+      delta_s=$(awk -v base="$baseline_median" -v cand="$candidate_median" 'BEGIN { printf "%.1f", cand - base }')
+      delta_pct=$(awk -v base="$baseline_median" -v cand="$candidate_median" 'BEGIN { if (base == 0) printf "0.0"; else printf "%.1f", ((cand - base) / base) * 100 }')
+      printf '  ✓ %-14s %5ss → %5ss  %5ss (%5s%%)  stddev %ss\n' \
+        "$component" "$baseline_median" "$candidate_median" "$delta_s" "$delta_pct" "$candidate_stddev" >>"$human_file"
+    done
+
+    printf '\n' >>"$human_file"
+  done < <(bench_scenarios_for_mode)
+
+  if bench_has_scenario cold; then
+    printf '60s Budget (cold install)\n' >>"$human_file"
+    for component in "${SELECTED_COMPONENTS[@]}"; do
+      baseline_runs=()
+      candidate_runs=()
+      while IFS= read -r line; do
+        [[ -n "$line" ]] && candidate_runs+=("$line")
+      done < <(bench_gather_runs "$raw_file" "$component" "$CANDIDATE_LABEL" "cold")
+      if [[ ${#candidate_runs[@]} -eq 0 ]]; then
+        printf '  ✗ %-9s unavailable\n' "$component" >>"$human_file"
+        continue
+      fi
+
+      candidate_median=$(bench_calc_median "${candidate_runs[@]}")
+      budget_ok=$(awk -v value="$candidate_median" 'BEGIN { if (value <= 60.0) print "true"; else print "false" }')
+      if [[ "$budget_ok" == "true" ]]; then
+        printf '  ✓ %-9s %5ss (%ss headroom)\n' \
+          "$component" "$candidate_median" "$(awk -v value="$candidate_median" 'BEGIN { printf "%.1f", 60.0 - value }')" >>"$human_file"
+      else
+        printf '  ✗ %-9s %5ss (%ss over budget)\n' \
+          "$component" "$candidate_median" "$(awk -v value="$candidate_median" 'BEGIN { printf "%.1f", value - 60.0 }')" >>"$human_file"
+      fi
+    done
+    printf '\n' >>"$human_file"
+  fi
+
+  if bench_has_scenario cold; then
+    printf 'Cold Install (visual, 60s budget)\n' >>"$human_file"
+    for component in "${SELECTED_COMPONENTS[@]}"; do
+      candidate_runs=()
+      while IFS= read -r line; do
+        [[ -n "$line" ]] && candidate_runs+=("$line")
+      done < <(bench_gather_runs "$raw_file" "$component" "$CANDIDATE_LABEL" "cold")
+      if [[ ${#candidate_runs[@]} -eq 0 ]]; then
+        printf '  %-14s  --\n' "$component" >>"$human_file"
+        continue
+      fi
+      candidate_median=$(bench_calc_median "${candidate_runs[@]}")
+      local bar_len
+      bar_len=$(awk -v value="$candidate_median" 'BEGIN { v = int(value / 2); if (v < 1) v = 1; if (v > 30) v = 30; printf "%d", v }')
+      local bar=""
+      local i
+      for (( i=0; i<bar_len; i++ )); do bar="${bar}#"; done
+      local marker=""
+      if awk -v value="$candidate_median" 'BEGIN { exit !(value > 60.0) }'; then
+        marker=" OVER"
+      fi
+      printf '  %-14s %s %ss%s\n' "$component" "$bar" "$candidate_median" "$marker" >>"$human_file"
+    done
+    printf '  %s\n' "|----|----|----|----|----|----| 60s" >>"$human_file"
+    printf '\n' >>"$human_file"
+  fi
+
+  for component in "${SELECTED_COMPONENTS[@]}"; do
+    if bench_component_complete "$raw_file" "$component"; then
+      passed=$((passed + 1))
+    else
+      failed=$((failed + 1))
+    fi
+  done
+
+  {
+    printf 'Results:\n'
+    printf '  Passed: %s\n' "$passed"
+    printf '  Failed: %s\n' "$failed"
+    printf '  Total:  %s\n' $((passed + failed))
+  } >>"$human_file"
+}
+
+bench_generate_tsv_report() {
+  local raw_file=$1
+  local tsv_file
+  local scenario
+  local component
+  local baseline_runs=()
+  local candidate_runs=()
+  local line
+  local baseline_median
+  local candidate_median
+  local candidate_stddev
+  local delta_s
+  local delta_pct
+  local budget_ok="-"
+
+  tsv_file=$(bench_tsv_report_file)
+  printf 'component\tscenario\tbaseline_s\tcandidate_s\tdelta_s\tdelta_pct\tstddev_s\tbudget_ok\n' >"$tsv_file"
+
+  while IFS= read -r scenario; do
+    for component in "${SELECTED_COMPONENTS[@]}"; do
+      baseline_runs=()
+      candidate_runs=()
+      while IFS= read -r line; do
+        [[ -n "$line" ]] && baseline_runs+=("$line")
+      done < <(bench_gather_runs "$raw_file" "$component" "$BASELINE_LABEL" "$scenario")
+      while IFS= read -r line; do
+        [[ -n "$line" ]] && candidate_runs+=("$line")
+      done < <(bench_gather_runs "$raw_file" "$component" "$CANDIDATE_LABEL" "$scenario")
+
+      if [[ ${#baseline_runs[@]} -eq 0 || ${#candidate_runs[@]} -eq 0 ]]; then
+        continue
+      fi
+
+      baseline_median=$(bench_calc_median "${baseline_runs[@]}")
+      candidate_median=$(bench_calc_median "${candidate_runs[@]}")
+      candidate_stddev=$(bench_calc_stddev "${candidate_runs[@]}")
+      delta_s=$(awk -v base="$baseline_median" -v cand="$candidate_median" 'BEGIN { printf "%.1f", cand - base }')
+      delta_pct=$(awk -v base="$baseline_median" -v cand="$candidate_median" 'BEGIN { if (base == 0) printf "0.0"; else printf "%.1f", ((cand - base) / base) * 100 }')
+
+      if [[ "$scenario" == "cold" ]]; then
+        budget_ok=$(awk -v value="$candidate_median" 'BEGIN { if (value <= 60.0) print "true"; else print "false" }')
+      else
+        budget_ok="-"
+      fi
+
+      printf '%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n' \
+        "$component" "$scenario" "$baseline_median" "$candidate_median" "$delta_s" "$delta_pct" "$candidate_stddev" "$budget_ok" >>"$tsv_file"
+    done
+  done < <(bench_scenarios_for_mode)
+}
+
+bench_generate_json_report() {
+  local raw_file=$1
+  local json_file
+  local component
+  local scenario
+  local first_component=true
+  local first_scenario
+  local baseline_runs=()
+  local candidate_runs=()
+  local line
+  local baseline_median
+  local candidate_median
+  local baseline_stddev
+  local candidate_stddev
+  local delta_s
+  local delta_pct
+  local budget_ok
+
+  json_file=$(bench_json_report_file)
+  : >"$json_file"
+
+  {
+    printf '{\n'
+    printf '  "metadata": {\n'
+    printf '    "baseline_ref": "%s",\n' "$BASELINE_REF"
+    printf '    "candidate_ref": "%s",\n' "$CANDIDATE_REF"
+    printf '    "platform": "%s/%s",\n' "$(uname -s | tr '[:upper:]' '[:lower:]')" "$(uname -m)"
+    printf '    "repeats": %s,\n' "$BENCH_REPEATS"
+    printf '    "date": "%s"\n' "$(bench_timestamp)"
+    printf '  },\n'
+    printf '  "components": {\n'
+  } >>"$json_file"
+
+  for component in "${SELECTED_COMPONENTS[@]}"; do
+    if [[ "$first_component" == true ]]; then
+      first_component=false
+    else
+      printf ',\n' >>"$json_file"
+    fi
+
+    printf '    "%s": {' "$component" >>"$json_file"
+    first_scenario=true
+
+    while IFS= read -r scenario; do
+      baseline_runs=()
+      candidate_runs=()
+      while IFS= read -r line; do
+        [[ -n "$line" ]] && baseline_runs+=("$line")
+      done < <(bench_gather_runs "$raw_file" "$component" "$BASELINE_LABEL" "$scenario")
+      while IFS= read -r line; do
+        [[ -n "$line" ]] && candidate_runs+=("$line")
+      done < <(bench_gather_runs "$raw_file" "$component" "$CANDIDATE_LABEL" "$scenario")
+
+      if [[ ${#baseline_runs[@]} -eq 0 || ${#candidate_runs[@]} -eq 0 ]]; then
+        continue
+      fi
+
+      baseline_median=$(bench_calc_median "${baseline_runs[@]}")
+      candidate_median=$(bench_calc_median "${candidate_runs[@]}")
+      baseline_stddev=$(bench_calc_stddev "${baseline_runs[@]}")
+      candidate_stddev=$(bench_calc_stddev "${candidate_runs[@]}")
+      delta_s=$(awk -v base="$baseline_median" -v cand="$candidate_median" 'BEGIN { printf "%.1f", cand - base }')
+      delta_pct=$(awk -v base="$baseline_median" -v cand="$candidate_median" 'BEGIN { if (base == 0) printf "0.0"; else printf "%.1f", ((cand - base) / base) * 100 }')
+      budget_ok="null"
+      if [[ "$scenario" == "cold" ]]; then
+        if awk -v value="$candidate_median" 'BEGIN { exit !(value <= 60.0) }'; then
+          budget_ok="true"
+        else
+          budget_ok="false"
+        fi
+      fi
+
+      if [[ "$first_scenario" == true ]]; then
+        printf '\n' >>"$json_file"
+        first_scenario=false
+      else
+        printf ',\n' >>"$json_file"
+      fi
+
+      {
+        printf '      "%s": {\n' "$scenario"
+        printf '        "baseline": { "median": %s, "stddev": %s, "runs": [%s] },\n' \
+          "$baseline_median" "$baseline_stddev" "$(bench_join_json_runs "${baseline_runs[@]}")"
+        printf '        "candidate": { "median": %s, "stddev": %s, "runs": [%s] },\n' \
+          "$candidate_median" "$candidate_stddev" "$(bench_join_json_runs "${candidate_runs[@]}")"
+        printf '        "delta_s": %s,\n' "$delta_s"
+        printf '        "delta_pct": %s' "$delta_pct"
+        if [[ "$budget_ok" != "null" ]]; then
+          printf ',\n        "budget_ok": %s\n' "$budget_ok"
+        else
+          printf '\n'
+        fi
+        printf '      }'
+      } >>"$json_file"
+    done < <(bench_scenarios_for_mode)
+
+    if [[ "$first_scenario" == true ]]; then
+      printf ' }' >>"$json_file"
+    else
+      printf '\n    }' >>"$json_file"
+    fi
+  done
+
+  {
+    printf '\n'
+    printf '  }\n'
+    printf '}\n'
+  } >>"$json_file"
+}
+
+bench_colorize_human_stream() {
+  while IFS= read -r line; do
+    case "$line" in
+      "═══════════════════════════════════════════")
+        printf '%s%s%s\n' "$COLOR_BOLD" "$line" "$COLOR_RESET"
+        ;;
+      "  Component Benchmark Summary")
+        printf '%s%s%s\n' "$COLOR_BOLD" "$line" "$COLOR_RESET"
+        ;;
+      "Cold Install"*|"Warm Rebuild"*|"60s Budget"*|"Results:")
+        printf '%s%s%s\n' "$COLOR_BOLD" "$line" "$COLOR_RESET"
+        ;;
+      "  ✓ "*)
+        printf '  %s✓%s%s\n' "$COLOR_GREEN" "$COLOR_RESET" "${line#  ✓}"
+        ;;
+      "  ✗ "*)
+        printf '  %s✗%s%s\n' "$COLOR_RED" "$COLOR_RESET" "${line#  ✗}"
+        ;;
+      *"OVER"*)
+        printf '%s%s%s\n' "$COLOR_RED" "$line" "$COLOR_RESET"
+        ;;
+      "  |"*)
+        printf '%s%s%s\n' "$COLOR_YELLOW" "$line" "$COLOR_RESET"
+        ;;
+      *)
+        printf '%s\n' "$line"
+        ;;
+    esac
+  done <"$(bench_human_report_file)"
+}
+
+bench_generate_reports() {
+  local raw_file=$1
+
+  bench_generate_human_report "$raw_file"
+  bench_generate_tsv_report "$raw_file"
+  bench_generate_json_report "$raw_file"
+}
+
+bench_emit_selected_format() {
+  case "$BENCH_FORMAT" in
+    human)
+      if bench_supports_color; then
+        bench_colorize_human_stream
+      else
+        cat "$(bench_human_report_file)"
+      fi
+      ;;
+    tsv)
+      cat "$(bench_tsv_report_file)"
+      ;;
+    json)
+      cat "$(bench_json_report_file)"
+      ;;
+  esac
+}
+
+bench_kill_tree() {
+  local pid=$1
+  local children
+  children=$(pgrep -P "$pid" 2>/dev/null) || true
+  local child
+  for child in $children; do
+    bench_kill_tree "$child"
+  done
+  kill -TERM "$pid" 2>/dev/null || true
+}
+
+bench_kill_children() {
+  local pid
+  for pid in "${BENCH_BG_PIDS[@]}"; do
+    if kill -0 "$pid" 2>/dev/null; then
+      bench_kill_tree "$pid"
+    fi
+  done
+  for pid in "${BENCH_BG_PIDS[@]}"; do
+    wait "$pid" 2>/dev/null || true
+  done
+  BENCH_BG_PIDS=()
+}
+
+bench_cleanup() {
+  bench_kill_children
+
+  local path
+
+  if [[ ${#WORKTREE_PATHS[@]} -gt 0 ]]; then
+    for path in "${WORKTREE_PATHS[@]}"; do
+      if [[ -d "$path" ]]; then
+        git -C "$REPO_ROOT" worktree remove --force "$path" >/dev/null 2>&1 || true
+      fi
+    done
+  fi
+
+  if [[ -n "$BENCH_TMPDIR" && -d "$BENCH_TMPDIR" ]]; then
+    chmod -R u+w "$BENCH_TMPDIR" >/dev/null 2>&1 || true
+    rm -rf "$BENCH_TMPDIR"
+  fi
+}
+
+bench_handle_signal() {
+  if [[ "$BENCH_INTERRUPTED" == true ]]; then
+    return
+  fi
+  BENCH_INTERRUPTED=true
+  printf '\n' >&2
+  bench_log_error "Interrupted — cleaning up…"
+  bench_cleanup
+  trap - EXIT INT TERM
+  kill -INT $$
+}
+
+bench_parse_args() {
+  local components_arg="all"
+
+  while [[ $# -gt 0 ]]; do
+    case "$1" in
+      --baseline-ref)
+        BASELINE_REF=$2
+        shift 2
+        ;;
+      --candidate-ref)
+        CANDIDATE_REF=$2
+        shift 2
+        ;;
+      --components)
+        components_arg=$2
+        shift 2
+        ;;
+      --mode)
+        BENCH_MODE=$2
+        shift 2
+        ;;
+      --repeats)
+        BENCH_REPEATS=$2
+        shift 2
+        ;;
+      --format)
+        BENCH_FORMAT=$2
+        shift 2
+        ;;
+      --report-dir)
+        BENCH_REPORT_DIR=$2
+        shift 2
+        ;;
+      --ci)
+        CI_MODE=true
+        shift
+        ;;
+      --help)
+        bench_usage
+        exit 0
+        ;;
+      *)
+        bench_log_error "Unknown argument: $1"
+        bench_usage
+        exit 1
+        ;;
+    esac
+  done
+
+  if [[ "${CI:-false}" == "true" ]]; then
+    CI_MODE=true
+  fi
+
+  if [[ -z "$BENCH_REPEATS" ]]; then
+    if [[ "$CI_MODE" == true ]]; then
+      BENCH_REPEATS=5
+    else
+      BENCH_REPEATS=3
+    fi
+  fi
+
+  if [[ -z "$BASELINE_REF" ]]; then
+    BASELINE_REF=$(bench_default_baseline_ref)
+  fi
+
+  if [[ -z "$BENCH_REPORT_DIR" ]]; then
+    BENCH_REPORT_DIR="$REPO_ROOT/reports/benchmarks"
+  fi
+
+  if [[ -z "$BENCH_FORMAT" ]]; then
+    if [[ -t 1 && "$CI_MODE" != true ]]; then
+      BENCH_FORMAT=human
+    else
+      BENCH_FORMAT=tsv
+    fi
+  fi
+
+  bench_validate_mode
+  bench_validate_format
+  bench_validate_ref "$BASELINE_REF"
+  bench_validate_ref "$CANDIDATE_REF"
+  bench_parse_components "$components_arg"
+}
+
+bench_print_intro() {
+  local baseline_short candidate_short component_count scenario_list
+
+  baseline_short=$(git -C "$REPO_ROOT" rev-parse --short "$BASELINE_REF" 2>/dev/null || echo "$BASELINE_REF")
+  candidate_short=$(git -C "$REPO_ROOT" rev-parse --short "$CANDIDATE_REF" 2>/dev/null || echo "$CANDIDATE_REF")
+  component_count=${#SELECTED_COMPONENTS[@]}
+  scenario_list=$(bench_scenarios_for_mode | tr '\n' '+' | sed 's/+$//')
+
+  {
+    printf '%s═══════════════════════════════════════════%s\n' "$COLOR_BOLD" "$COLOR_RESET"
+    printf '%s  Ambient Code Platform — Component Bench%s\n' "$COLOR_BOLD" "$COLOR_RESET"
+    printf '%s═══════════════════════════════════════════%s\n' "$COLOR_BOLD" "$COLOR_RESET"
+    printf '  Baseline:   %s\n' "$baseline_short"
+    printf '  Candidate:  %s\n' "$candidate_short"
+    printf '  Components: %s (%s)\n' "$component_count" "${SELECTED_COMPONENTS[*]}"
+    printf '  Mode:       %s   Repeats: %s\n' "$scenario_list" "$BENCH_REPEATS"
+    printf '  Platform:   %s/%s\n' "$(uname -s)" "$(uname -m)"
+    printf '\n'
+  } >&2
+}
+
+bench_run() {
+  local component
+  local pid
+  local status=0
+  local raw_file
+
+  bench_ensure_report_dir
+  BENCH_TMPDIR=$(mktemp -d)
+  trap bench_cleanup EXIT
+  trap bench_handle_signal INT TERM
+
+  bench_print_intro
+
+  if ! bench_preflight_selected_components; then
+    status=1
+  fi
+
+  if [[ ${#READY_COMPONENTS[@]} -gt 0 ]]; then
+    for component in "${READY_COMPONENTS[@]}"; do
+      bench_setup_component_worktrees "$component"
+    done
+
+    for component in "${READY_COMPONENTS[@]}"; do
+      bench_run_component_job "$component" &
+      BENCH_BG_PIDS+=($!)
+    done
+  fi
+
+  for pid in "${BENCH_BG_PIDS[@]}"; do
+    if ! wait "$pid" 2>/dev/null; then
+      status=1
+    fi
+    if [[ "$BENCH_INTERRUPTED" == true ]]; then
+      status=1
+      break
+    fi
+  done
+
+  raw_file=$(bench_combined_raw_file)
+  : >"$raw_file"
+  for component in "${SELECTED_COMPONENTS[@]}"; do
+    cat "$(bench_result_file_for_component "$component")" >>"$raw_file"
+  done
+
+  bench_generate_reports "$raw_file"
+  bench_emit_selected_format
+  return "$status"
+}
+
+source "$SCRIPT_DIR/bench-manifest.sh"
+
+main() {
+  cd "$REPO_ROOT"
+  bench_parse_args "$@"
+  bench_init_colors
+  bench_run
+}
+
+if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
+  main "$@"
+fi
diff --git a/tests/bench-test.sh b/tests/bench-test.sh
new file mode 100644
index 000000000..2c779e44b
--- /dev/null
+++ b/tests/bench-test.sh
@@ -0,0 +1,187 @@
+#!/usr/bin/env bash
+
+# shellcheck shell=bash
+
+set -euo pipefail
+
+REPO_ROOT=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
+cd "$REPO_ROOT"
+
+source "$REPO_ROOT/scripts/benchmarks/component-bench.sh"
+
+TESTS_PASSED=0
+TESTS_FAILED=0
+
+pass() {
+  printf '✓ %s\n' "$1"
+  TESTS_PASSED=$((TESTS_PASSED + 1))
+}
+
+fail() {
+  printf '✗ %s\n' "$1" >&2
+  TESTS_FAILED=$((TESTS_FAILED + 1))
+}
+
+run_test() {
+  local name=$1
+  shift
+
+  if "$@"; then
+    pass "$name"
+  else
+    fail "$name"
+  fi
+}
+
+test_bench_script_syntax() {
+  bash -n scripts/benchmarks/component-bench.sh
+}
+
+test_bench_manifest_syntax() {
+  bash -n scripts/benchmarks/bench-manifest.sh
+}
+
+test_bench_all_components_defined() {
+  local component=""
+  local key=""
+
+  for component in "${BENCH_COMPONENTS[@]}"; do
+    key=$(bench_component_key "$component")
+    declare -f "bench_env_${key}" >/dev/null 2>&1
+    declare -f "bench_preflight_${key}" >/dev/null 2>&1
+    declare -f "bench_cold_${key}" >/dev/null 2>&1
+    declare -f "bench_warm_${key}" >/dev/null 2>&1
+    declare -f "bench_cleanup_${key}" >/dev/null 2>&1
+  done
+}
+
+test_bench_dry_run() {
+  bash scripts/benchmarks/component-bench.sh --help >/dev/null
+}
+
+make_mock_raw_file() {
+  local raw_file=$1
+
+  cat >"$raw_file" <<'EOF'
+frontend	baseline	cold	1	success	42.1	
+frontend	baseline	cold	2	success	42.0	
+frontend	baseline	cold	3	success	42.2	
+frontend	candidate	cold	1	success	41.8	
+frontend	candidate	cold	2	success	41.9	
+frontend	candidate	cold	3	success	41.7	
+frontend	baseline	warm	1	success	8.2	
+frontend	baseline	warm	2	success	8.1	
+frontend	baseline	warm	3	success	8.3	
+frontend	candidate	warm	1	success	8.1	
+frontend	candidate	warm	2	success	8.0	
+frontend	candidate	warm	3	success	8.2	
+backend	baseline	cold	1	success	6.2	
+backend	baseline	cold	2	success	6.1	
+backend	baseline	cold	3	success	6.3	
+backend	candidate	cold	1	success	6.1	
+backend	candidate	cold	2	success	6.0	
+backend	candidate	cold	3	success	6.2	
+backend	baseline	warm	1	success	1.1	
+backend	baseline	warm	2	success	1.0	
+backend	baseline	warm	3	success	1.2	
+backend	candidate	warm	1	success	1.1	
+backend	candidate	warm	2	success	1.1	
+backend	candidate	warm	3	success	1.0	
+EOF
+}
+
+test_bench_report_outputs() {
+  local temp_dir
+  local raw_file
+
+  temp_dir=$(mktemp -d)
+  BENCH_REPORT_DIR="$temp_dir/reports"
+  mkdir -p "$BENCH_REPORT_DIR/raw" "$BENCH_REPORT_DIR/logs"
+  BENCH_REPEATS=3
+  BENCH_MODE=both
+  BASELINE_REF=abc1234
+  CANDIDATE_REF=def5678
+  SELECTED_COMPONENTS=(frontend backend)
+
+  raw_file="$temp_dir/raw.tsv"
+  make_mock_raw_file "$raw_file"
+  bench_generate_reports "$raw_file"
+
+  grep -q '^Results:$' "$(bench_human_report_file)"
+  grep -q '✓ frontend' "$(bench_human_report_file)"
+  head -n 1 "$(bench_tsv_report_file)" | grep -q $'^component\tscenario\tbaseline_s\tcandidate_s\tdelta_s\tdelta_pct\tstddev_s\tbudget_ok$'
+  awk -F '\t' 'NR == 1 || NF == 8 { next } { exit 1 }' "$(bench_tsv_report_file)"
+
+  if command -v python3 >/dev/null 2>&1; then
+    python3 -m json.tool "$(bench_json_report_file)" >/dev/null
+  elif command -v jq >/dev/null 2>&1; then
+    jq . "$(bench_json_report_file)" >/dev/null
+  else
+    return 1
+  fi
+
+  rm -rf "$temp_dir"
+}
+
+test_bench_no_ansi_when_piped() {
+  local temp_dir
+  local raw_file
+
+  temp_dir=$(mktemp -d)
+  BENCH_REPORT_DIR="$temp_dir/reports"
+  mkdir -p "$BENCH_REPORT_DIR/raw" "$BENCH_REPORT_DIR/logs"
+  BENCH_REPEATS=3
+  BENCH_MODE=cold
+  BASELINE_REF=abc1234
+  CANDIDATE_REF=def5678
+  SELECTED_COMPONENTS=(frontend)
+  CI_MODE=false
+  BENCH_FORMAT=human
+
+  raw_file="$temp_dir/raw.tsv"
+  make_mock_raw_file "$raw_file"
+  bench_generate_reports "$raw_file"
+
+  if bench_emit_selected_format | perl -ne 'exit 1 if /\e\[/'; then
+    rm -rf "$temp_dir"
+    return 0
+  fi
+
+  rm -rf "$temp_dir"
+  return 1
+}
+
+test_bench_report_dir_fallback() {
+  local original_report_dir=$BENCH_REPORT_DIR
+
+  BENCH_REPORT_DIR="/dev/null/benchmarks"
+  bench_ensure_report_dir
+  [[ -d "$BENCH_REPORT_DIR" ]]
+
+  rm -rf "$BENCH_REPORT_DIR"
+  BENCH_REPORT_DIR=$original_report_dir
+}
+
+test_bench_makefile_target() {
+  make -n benchmark >/dev/null 2>&1
+}
+
+main() {
+  run_test "benchmark script syntax" test_bench_script_syntax
+  run_test "benchmark manifest syntax" test_bench_manifest_syntax
+  run_test "benchmark component function coverage" test_bench_all_components_defined
+  run_test "benchmark help output" test_bench_dry_run
+  run_test "benchmark report outputs" test_bench_report_outputs
+  run_test "benchmark no ANSI when piped" test_bench_no_ansi_when_piped
+  run_test "benchmark report dir fallback" test_bench_report_dir_fallback
+  run_test "benchmark make target syntax" test_bench_makefile_target
+
+  printf '\nResults:\n'
+  printf '  Passed: %s\n' "$TESTS_PASSED"
+  printf '  Failed: %s\n' "$TESTS_FAILED"
+  printf '  Total:  %s\n' $((TESTS_PASSED + TESTS_FAILED))
+
+  [[ "$TESTS_FAILED" -eq 0 ]]
+}
+
+main "$@"
diff --git a/tests/local-dev-test.sh b/tests/local-dev-test.sh
index d8114a3af..2382d4906 100755
--- a/tests/local-dev-test.sh
+++ b/tests/local-dev-test.sh
@@ -426,9 +426,30 @@ test_reload_commands() {
     fi
 }
 
+# Test: Benchmark Harness Syntax
+test_benchmark_syntax() {
+    log_section "Test 15: Benchmark Harness Syntax"
+
+    if bash -n scripts/benchmarks/component-bench.sh 2>/dev/null; then
+        log_success "component-bench.sh syntax is valid"
+        ((PASSED_TESTS++))
+    else
+        log_error "component-bench.sh has syntax errors"
+        ((FAILED_TESTS++))
+    fi
+
+    if make -n benchmark >/dev/null 2>&1; then
+        log_success "make benchmark syntax is valid"
+        ((PASSED_TESTS++))
+    else
+        log_error "make benchmark has syntax errors"
+        ((FAILED_TESTS++))
+    fi
+}
+
 # Test: Logging Commands
 test_logging_commands() {
-    log_section "Test 15: Logging Commands"
+    log_section "Test 16: Logging Commands"
 
     # Test that we can get logs from each component
     local components=("backend-api" "frontend" "agentic-operator")
@@ -445,7 +466,7 @@ test_logging_commands() {
 
 # Test: Storage Configuration
 test_storage() {
-    log_section "Test 16: Storage Configuration"
+    log_section "Test 17: Storage Configuration"
 
     # Check if workspace PVC exists
     if kubectl get pvc workspace-pvc -n "$NAMESPACE" >/dev/null 2>&1; then
@@ -918,6 +939,7 @@ main() {
     test_rbac
     test_build_command
     test_reload_commands
+    test_benchmark_syntax
     test_logging_commands
     test_storage
     test_environment_variables

From d3ccb1b417cbe09d2c14606ea838ad65aea9c89c Mon Sep 17 00:00:00 2001
From: Bob Gregor <bobbravo2@users.noreply.github.com>
Date: Mon, 30 Mar 2026 12:42:28 -0400
Subject: [PATCH 2/2] Potential fix for code scanning alert no. 157: Workflow
 does not contain permissions

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
---
 .github/workflows/component-benchmarks.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/component-benchmarks.yml b/.github/workflows/component-benchmarks.yml
index 4a41e996d..c31139ff6 100644
--- a/.github/workflows/component-benchmarks.yml
+++ b/.github/workflows/component-benchmarks.yml
@@ -1,5 +1,8 @@
 name: Component Benchmarks
 
+permissions:
+  contents: read
+
 on:
   workflow_dispatch:
     inputs: