diff --git a/tests/atomic_index_midrun_failure_test.sh b/tests/atomic_index_midrun_failure_test.sh new file mode 100644 index 000000000..a7fb7744c --- /dev/null +++ b/tests/atomic_index_midrun_failure_test.sh @@ -0,0 +1,381 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Test: Atomic Index Mid-Run Failure (Spec 36) +# +# Validates that: +# - After a mid-run failure, no audit_index.tmp/ remains +# - Any prior audit_index/ is untouched +# - After a successful run, audit_index/ has fresh content and audit_index.tmp/ is absent + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +RUN_INDEX_SCRIPT="$ROOT_DIR/vibe-code-audit/scripts/run_index.sh" + +TEST_TMPDIR="" +cleanup() { + if [ -n "$TEST_TMPDIR" ] && [ -d "$TEST_TMPDIR" ]; then + rm -rf "$TEST_TMPDIR" + fi +} +trap cleanup EXIT INT TERM + +TEST_TMPDIR="$(mktemp -d "${TMPDIR:-/tmp}/vca-atomic-idx-test.XXXXXX")" + +PASS_FILE="$TEST_TMPDIR/.pass_count" +FAIL_FILE="$TEST_TMPDIR/.fail_count" +printf '0\n' > "$PASS_FILE" +printf '0\n' > "$FAIL_FILE" + +pass() { + local c + c="$(cat "$PASS_FILE")" + printf '%d\n' "$((c + 1))" > "$PASS_FILE" + printf '[atomic_index_midrun] PASS: %s\n' "$1" +} + +fail() { + local c + c="$(cat "$FAIL_FILE")" + printf '%d\n' "$((c + 1))" > "$FAIL_FILE" + printf '[atomic_index_midrun] FAIL: %s\n' "$1" >&2 +} + +# --- Helper: create mock binaries --- + +write_mock_llmcc() { + local bin_dir="$1" + local fail_on_graph="$2" # "1" = fail during graph generation, "0" = succeed + + cat > "$bin_dir/llmcc" <...|--dir ...>\n' + printf ' -d, --dir ...\n' + exit 0 +fi + +# Graph generation +if [ "$fail_on_graph" = "1" ]; then + printf 'error: forced mid-run graph failure\n' >&2 + exit 2 +fi + +# Success path: parse -o and write a dot file +out="" +while [ \$# -gt 0 ]; do + case "\$1" in + --dir|--lang|--depth|--pagerank-top-k) shift 2 ;; + --graph) shift ;; + -o) out="\${2:-}"; shift 2 ;; + *) shift ;; + esac +done +if [ -n "\$out" ]; then + mkdir -p "\$(dirname "\$out")" + printf 'digraph G { "a" -> "b"; }\n' > "\$out" +fi +printf 'Total time: 0.01s\n' +MOCK_EOF + chmod +x "$bin_dir/llmcc" +} + +write_mock_agentroot() { + local bin_dir="$1" + + cat > "$bin_dir/agentroot" <<'MOCK_EOF' +#!/usr/bin/env bash +set -euo pipefail + +cmd="${1:-}" + +if [ "$cmd" = "--version" ]; then + printf 'agentroot mock 0.0.0\n' + exit 0 +fi + +if [ "$cmd" = "--help" ]; then + printf 'Usage: agentroot \n collection\n update\n status\n query\n vsearch\n' + exit 0 +fi + +if [ "$cmd" = "index" ] && [ "${2:-}" = "--help" ]; then + printf "error: unknown command 'index'\n" >&2 + exit 2 +fi + +case "$cmd" in + collection) + sub="${2:-}" + case "$sub" in + add) + printf 'collection added\n' + ;; + list) + printf 'mock-collection\n' + ;; + *) + printf 'collection command ok\n' + ;; + esac + ;; + update) + printf 'updated\n' + ;; + embed) + printf 'embedded\n' + ;; + status) + if [ "${2:-}" = "--format" ] && [ "${3:-}" = "json" ]; then + printf '{"document_count": 23, "embedded_count": 23}\n' + exit 0 + fi + printf '{"document_count": 23, "embedded_count": 23}\n' + ;; + query|vsearch) + shift + query_text="${1:-}" + shift || true + if [ "${1:-}" = "--format" ] && [ "${2:-}" = "json" ]; then + printf '{"query": "%s", "count": 1}\n' "$query_text" + exit 0 + fi + printf 'result for %s\n' "$query_text" + ;; + *) + printf 'error: unknown command %s\n' "$cmd" >&2 + exit 2 + ;; +esac +MOCK_EOF + chmod +x "$bin_dir/agentroot" +} + +create_rust_repo_fixture() { + local repo_dir="$1" + mkdir -p "$repo_dir/src" + cat > "$repo_dir/Cargo.toml" <<'EOF' +[package] +name = "mock-atomic-test" +version = "0.1.0" +edition = "2021" +EOF + cat > "$repo_dir/src/main.rs" <<'EOF' +fn main() { println!("test"); } +EOF +} + +# ============================================================ +# TEST 1: Mid-run failure — old audit_index/ preserved, no tmp +# ============================================================ + +( + set -euo pipefail + + work_dir="$TEST_TMPDIR/case-failure" + repo_dir="$work_dir/repo" + output_dir="$work_dir/output" + bin_dir="$work_dir/bin" + + mkdir -p "$bin_dir" + create_rust_repo_fixture "$repo_dir" + + # Mock llmcc that fails during graph generation (mid-run) + write_mock_llmcc "$bin_dir" "1" + write_mock_agentroot "$bin_dir" + + # Pre-create audit_index/ with a sentinel marker + mkdir -p "$output_dir/audit_index" + printf 'pre-existing-sentinel\n' > "$output_dir/audit_index/.pre_existing_marker" + printf '{"old": true}\n' > "$output_dir/audit_index/old_manifest.json" + + # Run with mocks — should fail mid-run when llmcc tries to generate graphs + SCRIPT_EXIT=0 + PATH="$bin_dir:$PATH" \ + VIBE_CODE_AUDIT_AGENTROOT_AUTO_EMBED=0 \ + bash "$RUN_INDEX_SCRIPT" \ + --repo "$repo_dir" \ + --output "$output_dir" \ + --mode standard >/dev/null 2>&1 || SCRIPT_EXIT=$? + + # Assert: non-zero exit + if [ "$SCRIPT_EXIT" -ne 0 ]; then + pass "1a: non-zero exit code on mid-run failure (exit=$SCRIPT_EXIT)" + else + fail "1a: expected non-zero exit, got 0" + fi + + # Assert: no audit_index.tmp/ remains + if [ ! -d "$output_dir/audit_index.tmp" ]; then + pass "1b: audit_index.tmp/ absent after mid-run failure" + else + fail "1b: audit_index.tmp/ still exists after mid-run failure" + fi + + # Assert: original audit_index/ sentinel preserved + if [ -f "$output_dir/audit_index/.pre_existing_marker" ]; then + marker_content="$(cat "$output_dir/audit_index/.pre_existing_marker")" + if [ "$marker_content" = "pre-existing-sentinel" ]; then + pass "1c: original audit_index/ sentinel preserved with correct content" + else + fail "1c: sentinel exists but content changed: $marker_content" + fi + else + fail "1c: original audit_index/.pre_existing_marker missing after failure" + fi + + # Assert: original audit_index/ extra file preserved + if [ -f "$output_dir/audit_index/old_manifest.json" ]; then + pass "1d: original audit_index/old_manifest.json preserved" + else + fail "1d: original audit_index/old_manifest.json missing after failure" + fi +) + +# ============================================================ +# TEST 2: Success path — fresh audit_index/, no tmp remains +# ============================================================ + +( + set -euo pipefail + + work_dir="$TEST_TMPDIR/case-success" + repo_dir="$work_dir/repo" + output_dir="$work_dir/output" + bin_dir="$work_dir/bin" + + mkdir -p "$bin_dir" + create_rust_repo_fixture "$repo_dir" + + # Mock llmcc that succeeds + write_mock_llmcc "$bin_dir" "0" + write_mock_agentroot "$bin_dir" + + mkdir -p "$output_dir" + + # Run with mocks — should succeed + SCRIPT_EXIT=0 + PATH="$bin_dir:$PATH" \ + VIBE_CODE_AUDIT_AGENTROOT_AUTO_EMBED=0 \ + bash "$RUN_INDEX_SCRIPT" \ + --repo "$repo_dir" \ + --output "$output_dir" \ + --mode standard >/dev/null 2>&1 || SCRIPT_EXIT=$? + + # Assert: zero exit + if [ "$SCRIPT_EXIT" -eq 0 ]; then + pass "2a: zero exit code on success" + else + fail "2a: expected zero exit, got $SCRIPT_EXIT" + fi + + # Assert: no audit_index.tmp/ remains + if [ ! -d "$output_dir/audit_index.tmp" ]; then + pass "2b: audit_index.tmp/ absent after successful run" + else + fail "2b: audit_index.tmp/ still exists after successful run" + fi + + # Assert: audit_index/ exists with fresh content + if [ -d "$output_dir/audit_index" ]; then + pass "2c: audit_index/ directory exists after success" + else + fail "2c: audit_index/ directory missing after success" + fi + + # Assert: manifest.json exists and is non-empty + if [ -s "$output_dir/audit_index/manifest.json" ]; then + pass "2d: manifest.json exists and is non-empty" + else + fail "2d: manifest.json missing or empty" + fi + + # Assert: derived/catalog.json exists + if [ -s "$output_dir/audit_index/derived/catalog.json" ]; then + pass "2e: derived/catalog.json exists and is non-empty" + else + fail "2e: derived/catalog.json missing or empty" + fi + + # Assert: no nested audit_index.tmp inside audit_index + if [ ! -d "$output_dir/audit_index/audit_index.tmp" ]; then + pass "2f: no nested audit_index.tmp inside audit_index/" + else + fail "2f: nested audit_index/audit_index.tmp/ detected" + fi +) + +# ============================================================ +# TEST 3: Success replaces pre-existing audit_index/ +# ============================================================ + +( + set -euo pipefail + + work_dir="$TEST_TMPDIR/case-success-replace" + repo_dir="$work_dir/repo" + output_dir="$work_dir/output" + bin_dir="$work_dir/bin" + + mkdir -p "$bin_dir" + create_rust_repo_fixture "$repo_dir" + + write_mock_llmcc "$bin_dir" "0" + write_mock_agentroot "$bin_dir" + + # Pre-create audit_index/ with old sentinel + mkdir -p "$output_dir/audit_index" + printf 'old-sentinel\n' > "$output_dir/audit_index/.pre_existing_marker" + + SCRIPT_EXIT=0 + PATH="$bin_dir:$PATH" \ + VIBE_CODE_AUDIT_AGENTROOT_AUTO_EMBED=0 \ + bash "$RUN_INDEX_SCRIPT" \ + --repo "$repo_dir" \ + --output "$output_dir" \ + --mode standard >/dev/null 2>&1 || SCRIPT_EXIT=$? + + if [ "$SCRIPT_EXIT" -eq 0 ]; then + pass "3a: zero exit code on success with pre-existing index" + else + fail "3a: expected zero exit, got $SCRIPT_EXIT" + fi + + # Assert: old sentinel is gone (replaced by new content) + if [ ! -f "$output_dir/audit_index/.pre_existing_marker" ]; then + pass "3b: old sentinel removed — audit_index/ was replaced" + else + fail "3b: old sentinel still present — audit_index/ was NOT replaced" + fi + + # Assert: new manifest exists + if [ -s "$output_dir/audit_index/manifest.json" ]; then + pass "3c: fresh manifest.json present after replacement" + else + fail "3c: manifest.json missing after replacement" + fi + + # Assert: no tmp remains + if [ ! -d "$output_dir/audit_index.tmp" ]; then + pass "3d: audit_index.tmp/ absent after successful replacement" + else + fail "3d: audit_index.tmp/ still present after successful replacement" + fi +) + +# ============================================================ +# Summary +# ============================================================ + +PASS="$(cat "$PASS_FILE")" +FAIL="$(cat "$FAIL_FILE")" +printf '\n[atomic_index_midrun] Results: %d passed, %d failed\n' "$PASS" "$FAIL" +if [ "$FAIL" -gt 0 ]; then + exit 1 +fi diff --git a/tests/embed_env_hardening_test.sh b/tests/embed_env_hardening_test.sh new file mode 100644 index 000000000..87f9114ce --- /dev/null +++ b/tests/embed_env_hardening_test.sh @@ -0,0 +1,390 @@ +#!/usr/bin/env bash +set -euo pipefail + +PASS=0 +FAIL=0 +fail() { echo " FAIL: $1"; FAIL=$((FAIL + 1)); } +pass() { echo " PASS: $1"; PASS=$((PASS + 1)); } + +SCRIPTS_DIR="$(cd "$(dirname "$0")/../vibe-code-audit/scripts" && pwd)" +SCRIPT="$SCRIPTS_DIR/run_agentroot_embed.sh" + +echo "=== Embed Env Hardening Tests ===" + +# --- Static checks --- + +# 1. No direct sourcing of embed.env +if grep -qE '^\s*\.\s+"?\$EMBED_ENV_FILE"?' "$SCRIPT"; then + fail "run_agentroot_embed.sh still sources EMBED_ENV_FILE" +else + pass "No direct sourcing of EMBED_ENV_FILE" +fi + +# 2. Parser uses while-read loop +if grep -q 'while IFS.*read.*_env_key.*_env_value' "$SCRIPT"; then + pass "Parser uses while-read loop" +else + fail "Parser does not use while-read loop" +fi + +# 3. Whitelist enforcement via case statement +if grep -q 'VIBE_CODE_AUDIT_EMBED_HOST' "$SCRIPT" && \ + grep -q 'VIBE_CODE_AUDIT_EMBED_PORT' "$SCRIPT" && \ + grep -q 'VIBE_CODE_AUDIT_EMBED_DOWNLOAD_MODEL' "$SCRIPT"; then + pass "Whitelist includes HOST, PORT, DOWNLOAD_MODEL" +else + fail "Whitelist missing expected keys" +fi + +# 4. bash -n syntax check +if bash -n "$SCRIPT" 2>/dev/null; then + pass "bash -n syntax check passes" +else + fail "bash -n syntax check failed" +fi + +# 4b. shellcheck (advisory — reports status but does not fail the suite) +if command -v shellcheck >/dev/null 2>&1; then + if shellcheck -S warning "$SCRIPT" >/dev/null 2>&1; then + pass "shellcheck passes on run_agentroot_embed.sh" + else + echo " INFO: shellcheck found warnings in run_agentroot_embed.sh (advisory, not counted as failure)" + fi +else + echo " SKIP: shellcheck not installed — install via 'brew install shellcheck' for lint coverage" +fi + +# --- Dynamic test infrastructure --- + +TMPDIR_ROOT="$(mktemp -d)" +cleanup() { rm -rf "$TMPDIR_ROOT"; } +trap cleanup EXIT INT TERM + +# Create mock binaries +MOCK_BIN="$TMPDIR_ROOT/bin" +mkdir -p "$MOCK_BIN" + +# Mock agentroot: always fails embed (triggers connection-refused path) +cat > "$MOCK_BIN/agentroot" <<'STUB' +#!/usr/bin/env bash +if [ "${1:-}" = "embed" ]; then + echo "Connection refused" >&2 + exit 1 +fi +exit 0 +STUB +chmod +x "$MOCK_BIN/agentroot" + +# Mock curl: logs the URL it receives, always fails (no healthy server) +cat > "$MOCK_BIN/curl" <<'STUB' +#!/usr/bin/env bash +for arg in "$@"; do + case "$arg" in + http://*) echo "$arg" >> "${MOCK_CURL_LOG:-/dev/null}" ;; + esac +done +exit 1 +STUB +chmod +x "$MOCK_BIN/curl" + +# Helper: set up an isolated test home with embed.env and run the script +run_with_env() { + local env_content="$1" + shift + local tag="${1:-default}" + shift || true + local test_home="$TMPDIR_ROOT/home_${tag}" + mkdir -p "$test_home/.config/vibe-code-audit" + local test_db="$test_home/test.sqlite" + touch "$test_db" + printf '%s\n' "$env_content" > "$test_home/.config/vibe-code-audit/embed.env" + local curl_log="$test_home/curl_urls.log" + HOME="$test_home" \ + PATH="$MOCK_BIN:/usr/bin:/bin:/usr/sbin:/sbin" \ + MOCK_CURL_LOG="$curl_log" \ + bash "$SCRIPT" --db "$test_db" --no-start-local "$@" 2>/dev/null || true +} + +# Helper: get the health URL the script tried (via mock curl log) +get_health_url() { + local tag="$1" + local curl_log="$TMPDIR_ROOT/home_${tag}/curl_urls.log" + if [ -f "$curl_log" ]; then + head -1 "$curl_log" + else + echo "NO_CURL_CALL" + fi +} + +# --- 5. Valid HOST and PORT are applied --- +echo "" +echo "--- Dynamic: valid config values ---" +run_with_env 'VIBE_CODE_AUDIT_EMBED_HOST=10.0.0.1 +VIBE_CODE_AUDIT_EMBED_PORT=9999' "hostport" >/dev/null +health_url="$(get_health_url "hostport")" +if [ "$health_url" = "http://10.0.0.1:9999/health" ]; then + pass "HOST and PORT from embed.env are applied correctly" +else + fail "HOST/PORT not applied: health URL was $health_url" +fi + +# --- 6. Quoted values are stripped --- +echo "" +echo "--- Dynamic: quote stripping ---" +run_with_env 'VIBE_CODE_AUDIT_EMBED_HOST="10.0.0.2" +VIBE_CODE_AUDIT_EMBED_PORT='"'"'9876'"'"'' "quotes" >/dev/null +health_url="$(get_health_url "quotes")" +if [ "$health_url" = "http://10.0.0.2:9876/health" ]; then + pass "Quote stripping works for double and single quotes" +else + fail "Quote stripping failed: health URL was $health_url" +fi + +# --- 7. Command injection via semicolon is NOT executed --- +echo "" +echo "--- Dynamic: command injection prevention ---" +PWNED="$TMPDIR_ROOT/pwned_semicolon" +run_with_env "VIBE_CODE_AUDIT_EMBED_HOST=localhost; touch $PWNED" "inject_semi" >/dev/null +if [ -f "$PWNED" ]; then + fail "Command injection via semicolon was executed" +else + pass "Semicolon injection not executed" +fi + +# --- 8. Command substitution is NOT executed --- +PWNED2="$TMPDIR_ROOT/pwned_subst" +run_with_env "VIBE_CODE_AUDIT_EMBED_HOST=\$(touch $PWNED2)" "inject_subst" >/dev/null +if [ -f "$PWNED2" ]; then + fail "Command substitution injection was executed" +else + pass "Command substitution injection not executed" +fi + +# --- 9. Backtick injection is NOT executed --- +PWNED3="$TMPDIR_ROOT/pwned_backtick" +run_with_env "VIBE_CODE_AUDIT_EMBED_HOST=\`touch $PWNED3\`" "inject_backtick" >/dev/null +if [ -f "$PWNED3" ]; then + fail "Backtick injection was executed" +else + pass "Backtick injection not executed" +fi + +# --- 10. Non-whitelisted keys are ignored --- +echo "" +echo "--- Dynamic: non-whitelisted keys ---" +# If non-whitelisted keys leaked, PATH would be overwritten and agentroot wouldn't be found +# The script would die with "agentroot is not installed" instead of producing EMBED_OK output +test_home_nwl="$TMPDIR_ROOT/home_nwl" +mkdir -p "$test_home_nwl/.config/vibe-code-audit" +touch "$test_home_nwl/test.sqlite" +printf 'EVIL_KEY=drop_tables\nLD_PRELOAD=/evil.so\nVIBE_CODE_AUDIT_EMBED_HOST=5.5.5.5\n' \ + > "$test_home_nwl/.config/vibe-code-audit/embed.env" +nwl_output="$(HOME="$test_home_nwl" PATH="$MOCK_BIN:/usr/bin:/bin:/usr/sbin:/sbin" \ + MOCK_CURL_LOG="$test_home_nwl/curl_urls.log" \ + bash "$SCRIPT" --db "$test_home_nwl/test.sqlite" --no-start-local 2>/dev/null || true)" +if echo "$nwl_output" | grep -q 'EMBED_OK='; then + pass "Script runs despite non-whitelisted keys in embed.env" +else + fail "Script failed — non-whitelisted keys may have interfered" +fi +# Verify the valid key was still applied +nwl_url="$(head -1 "$test_home_nwl/curl_urls.log" 2>/dev/null || echo NONE)" +if echo "$nwl_url" | grep -q '5.5.5.5'; then + pass "Whitelisted key (HOST=5.5.5.5) still applied alongside ignored keys" +else + fail "Whitelisted key not applied alongside ignored keys: $nwl_url" +fi + +# --- 11. Comment and blank lines are ignored --- +echo "" +echo "--- Dynamic: comment and blank line handling ---" +run_with_env '# This is a comment +VIBE_CODE_AUDIT_EMBED_HOST=1.2.3.4 + +# Another comment +VIBE_CODE_AUDIT_EMBED_PORT=5555' "comments" >/dev/null +health_url="$(get_health_url "comments")" +if [ "$health_url" = "http://1.2.3.4:5555/health" ]; then + pass "Comments and blank lines are correctly skipped" +else + fail "Comment/blank handling failed: health URL was $health_url" +fi + +# --- 12. Missing embed.env does not crash --- +echo "" +echo "--- Dynamic: missing embed.env ---" +test_home_miss="$TMPDIR_ROOT/home_miss" +mkdir -p "$test_home_miss" +touch "$test_home_miss/test.sqlite" +miss_output="$(HOME="$test_home_miss" PATH="$MOCK_BIN:/usr/bin:/bin:/usr/sbin:/sbin" \ + bash "$SCRIPT" --db "$test_home_miss/test.sqlite" --no-start-local 2>/dev/null || true)" +if echo "$miss_output" | grep -q 'EMBED_OK='; then + pass "Missing embed.env does not crash (defaults used)" +else + fail "Missing embed.env caused failure" +fi + +# --- 13. Last occurrence wins for duplicate keys --- +echo "" +echo "--- Dynamic: last occurrence wins ---" +run_with_env 'VIBE_CODE_AUDIT_EMBED_HOST=first +VIBE_CODE_AUDIT_EMBED_HOST=10.20.30.40' "dupes" >/dev/null +health_url="$(get_health_url "dupes")" +if echo "$health_url" | grep -q '10.20.30.40'; then + pass "Last occurrence wins for duplicate keys" +else + fail "Duplicate key handling: expected 10.20.30.40, got $health_url" +fi + +# --- 14. CLI flags override embed.env values --- +echo "" +echo "--- Dynamic: CLI override precedence ---" +test_home_cli="$TMPDIR_ROOT/home_cli" +mkdir -p "$test_home_cli/.config/vibe-code-audit" +touch "$test_home_cli/test.sqlite" +printf 'VIBE_CODE_AUDIT_EMBED_HOST=from-file\nVIBE_CODE_AUDIT_EMBED_PORT=1111\n' \ + > "$test_home_cli/.config/vibe-code-audit/embed.env" +HOME="$test_home_cli" PATH="$MOCK_BIN:/usr/bin:/bin:/usr/sbin:/sbin" \ + MOCK_CURL_LOG="$test_home_cli/curl_urls.log" \ + bash "$SCRIPT" --db "$test_home_cli/test.sqlite" --no-start-local \ + --host cli-host --port 2222 >/dev/null 2>/dev/null || true +cli_url="$(head -1 "$test_home_cli/curl_urls.log" 2>/dev/null || echo NONE)" +if [ "$cli_url" = "http://cli-host:2222/health" ]; then + pass "CLI flags override embed.env values" +else + fail "CLI override failed: health URL was $cli_url" +fi + +# --- 14b. Pre-existing env var takes precedence over embed.env --- +echo "" +echo "--- Dynamic: environment variable precedence over embed.env ---" +test_home_prec="$TMPDIR_ROOT/home_prec" +mkdir -p "$test_home_prec/.config/vibe-code-audit" +touch "$test_home_prec/test.sqlite" +printf 'VIBE_CODE_AUDIT_EMBED_HOST=from-file\nVIBE_CODE_AUDIT_EMBED_PORT=1111\n' \ + > "$test_home_prec/.config/vibe-code-audit/embed.env" +HOME="$test_home_prec" PATH="$MOCK_BIN:/usr/bin:/bin:/usr/sbin:/sbin" \ + MOCK_CURL_LOG="$test_home_prec/curl_urls.log" \ + VIBE_CODE_AUDIT_EMBED_HOST=env-host \ + VIBE_CODE_AUDIT_EMBED_PORT=3333 \ + bash "$SCRIPT" --db "$test_home_prec/test.sqlite" --no-start-local >/dev/null 2>/dev/null || true +prec_url="$(head -1 "$test_home_prec/curl_urls.log" 2>/dev/null || echo NONE)" +if [ "$prec_url" = "http://env-host:3333/health" ]; then + pass "Pre-existing env vars take precedence over embed.env" +else + fail "Env precedence broken: expected http://env-host:3333/health, got $prec_url" +fi + +# --- 15. Values with extra equals signs preserved --- +echo "" +echo "--- Dynamic: values with equals signs ---" +run_with_env 'VIBE_CODE_AUDIT_EMBED_HOST=host=with=equals' "eqval" >/dev/null +health_url="$(get_health_url "eqval")" +if echo "$health_url" | grep -q 'host=with=equals'; then + pass "Values with extra equals signs are preserved" +else + fail "Equals in value broken: health URL was $health_url" +fi + +# --- 16. End-to-end output structure --- +echo "" +echo "--- Dynamic: end-to-end output structure ---" +run_with_env 'VIBE_CODE_AUDIT_EMBED_HOST=127.0.0.1 +VIBE_CODE_AUDIT_EMBED_PORT=8000' "e2e" >/dev/null +e2e_output="$(HOME="$TMPDIR_ROOT/home_e2e" PATH="$MOCK_BIN:/usr/bin:/bin:/usr/sbin:/sbin" \ + bash "$SCRIPT" --db "$TMPDIR_ROOT/home_e2e/test.sqlite" --no-start-local 2>/dev/null || true)" +if echo "$e2e_output" | grep -q 'EMBED_OK=' && \ + echo "$e2e_output" | grep -q 'EMBED_BACKEND='; then + pass "End-to-end output contains EMBED_OK and EMBED_BACKEND" +else + fail "End-to-end output missing expected keys" +fi + +# --- 17. Forbidden keys absent from child process environment (env snapshot) --- +echo "" +echo "--- Dynamic: env snapshot proves forbidden keys excluded ---" +# Create an env-snapshot mock agentroot that dumps its inherited environment +SNAP_BIN="$TMPDIR_ROOT/snap_bin" +mkdir -p "$SNAP_BIN" +cat > "$SNAP_BIN/agentroot" <<'SNAP_STUB' +#!/usr/bin/env bash +if [ "${1:-}" = "embed" ]; then + env > "${AGENTROOT_ENV_SNAPSHOT:-/dev/null}" + echo "Connection refused" >&2 + exit 1 +fi +exit 0 +SNAP_STUB +chmod +x "$SNAP_BIN/agentroot" +# Reuse the existing mock curl in snap_bin +cp "$MOCK_BIN/curl" "$SNAP_BIN/curl" + +test_home_snap="$TMPDIR_ROOT/home_snap" +mkdir -p "$test_home_snap/.config/vibe-code-audit" +touch "$test_home_snap/test.sqlite" +SNAP_LOG="$test_home_snap/agentroot_env.log" +printf 'EVIL_KEY=should_be_ignored\nLD_PRELOAD=/evil.so\nVIBE_CODE_AUDIT_EMBED_HOST=snap-host\n' \ + > "$test_home_snap/.config/vibe-code-audit/embed.env" +HOME="$test_home_snap" PATH="$SNAP_BIN:/usr/bin:/bin:/usr/sbin:/sbin" \ + MOCK_CURL_LOG="$test_home_snap/curl_urls.log" \ + AGENTROOT_ENV_SNAPSHOT="$SNAP_LOG" \ + bash "$SCRIPT" --db "$test_home_snap/test.sqlite" --no-start-local >/dev/null 2>/dev/null || true + +if [ ! -f "$SNAP_LOG" ]; then + fail "Env snapshot file was not created (mock agentroot may not have run)" +else + if grep -q '^EVIL_KEY=' "$SNAP_LOG"; then + fail "EVIL_KEY leaked into agentroot process environment" + else + pass "EVIL_KEY absent from agentroot process environment" + fi + if grep -q '^LD_PRELOAD=' "$SNAP_LOG"; then + fail "LD_PRELOAD leaked into agentroot process environment" + else + pass "LD_PRELOAD absent from agentroot process environment" + fi + if grep -q '^VIBE_CODE_AUDIT_EMBED_HOST=snap-host$' "$SNAP_LOG"; then + pass "Whitelisted VIBE_CODE_AUDIT_EMBED_HOST=snap-host present in env snapshot" + else + fail "Whitelisted key missing from env snapshot" + fi +fi + +# --- 18. Combined injection + forbidden key payload --- +echo "" +echo "--- Dynamic: combined injection and forbidden key payload ---" +PWNED_COMBINED="$TMPDIR_ROOT/pwned_combined" +SNAP_LOG2="$test_home_snap/agentroot_env2.log" +printf 'VIBE_CODE_AUDIT_EMBED_HOST=safe-host\nEVIL_KEY=should_be_ignored\nVIBE_CODE_AUDIT_EMBED_HOST=localhost; touch %s\n$(touch %s)\n' \ + "$PWNED_COMBINED" "$PWNED_COMBINED" \ + > "$test_home_snap/.config/vibe-code-audit/embed.env" +HOME="$test_home_snap" PATH="$SNAP_BIN:/usr/bin:/bin:/usr/sbin:/sbin" \ + MOCK_CURL_LOG="$test_home_snap/curl_urls2.log" \ + AGENTROOT_ENV_SNAPSHOT="$SNAP_LOG2" \ + bash "$SCRIPT" --db "$test_home_snap/test.sqlite" --no-start-local >/dev/null 2>/dev/null || true +if [ -f "$PWNED_COMBINED" ]; then + fail "Combined injection payload created marker file" +else + pass "Combined injection payload did not execute" +fi +if [ ! -f "$SNAP_LOG2" ]; then + fail "Combined-payload env snapshot not created (mock agentroot may not have run)" +elif grep -q '^EVIL_KEY=' "$SNAP_LOG2"; then + fail "EVIL_KEY present in combined-payload env snapshot" +else + pass "EVIL_KEY absent from combined-payload env snapshot" +fi + +# --- 19. Temp file leak check --- +echo "" +echo "--- Cleanup: temp file leak check ---" +leaked_files="$(find "$TMPDIR_ROOT" -name 'vca-*' -type f 2>/dev/null || true)" +if [ -z "$leaked_files" ]; then + pass "No vca-* temp file leaks under test root" +else + fail "Leaked temp files found: $leaked_files" +fi + +echo "" +echo "=== Results: $PASS passed, $FAIL failed ===" +[ "$FAIL" -eq 0 ] || exit 1 diff --git a/tests/empty_repo_test.sh b/tests/empty_repo_test.sh new file mode 100644 index 000000000..ef2951e4f --- /dev/null +++ b/tests/empty_repo_test.sh @@ -0,0 +1,245 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Empty-repo edge case test for build_derived_artifacts.sh (Spec 32) +# Verifies that an empty repo (just .git/, no source files) produces: +# 1. Clean exit (exit code 0, no unbound variable errors) +# 2. Valid catalog.json with all stacks false and empty crates +# 3. Valid hotspots.json with empty files_by_symbol_count +# 4. Non-empty dup_clusters.md bootstrap scaffold + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +SCRIPT="$ROOT_DIR/vibe-code-audit/scripts/build_derived_artifacts.sh" + +PASS=0 +FAIL=0 + +fail() { + printf 'FAIL: %s\n' "$*" >&2 + FAIL=$((FAIL + 1)) +} + +pass() { + printf 'PASS: %s\n' "$*" + PASS=$((PASS + 1)) +} + +# --------------------------------------------------------------------------- +# Setup: temp fixture with empty repo, cleanup trap +# --------------------------------------------------------------------------- + +TMPROOT="" + +cleanup() { + if [ -n "$TMPROOT" ] && [ -d "$TMPROOT" ]; then + rm -rf "$TMPROOT" + fi +} +trap cleanup EXIT INT TERM + +TMPROOT="$(mktemp -d)" +MOCK_REPO="$TMPROOT/repo" +OUTPUT_DIR="$TMPROOT/output" +mkdir -p "$MOCK_REPO/.git" "$OUTPUT_DIR" + +# --------------------------------------------------------------------------- +# Execute build_derived_artifacts.sh against empty repo +# --------------------------------------------------------------------------- + +SCRIPT_EXIT=0 +bash "$SCRIPT" \ + --repo "$MOCK_REPO" \ + --output "$OUTPUT_DIR/audit_index.tmp" \ + --mode fast \ + --top-k 10 \ + 2>"$TMPROOT/stderr.log" || SCRIPT_EXIT=$? + +if [ "$SCRIPT_EXIT" -eq 0 ]; then + pass "exit code 0" +else + fail "exit code $SCRIPT_EXIT (expected 0)" +fi + +# --------------------------------------------------------------------------- +# No-crash diagnostics: check stderr for fatal shell errors +# --------------------------------------------------------------------------- + +STDERR_CONTENT="" +if [ -f "$TMPROOT/stderr.log" ]; then + STDERR_CONTENT="$(cat "$TMPROOT/stderr.log")" +fi + +CRASH_PATTERNS="unbound variable|syntax error|command not found|bad substitution" +if printf '%s' "$STDERR_CONTENT" | grep -qiE "$CRASH_PATTERNS"; then + fail "stderr contains crash diagnostic" + printf ' stderr: %s\n' "$STDERR_CONTENT" >&2 +else + pass "no crash diagnostics in stderr" +fi + +# --------------------------------------------------------------------------- +# Assert artifact files exist and are non-empty +# --------------------------------------------------------------------------- + +DERIVED="$OUTPUT_DIR/audit_index.tmp/derived" + +for artifact in catalog.json hotspots.json dup_clusters.md; do + if [ -f "$DERIVED/$artifact" ] && [ -s "$DERIVED/$artifact" ]; then + pass "$artifact exists and is non-empty" + else + fail "$artifact missing or empty" + fi +done + +# --------------------------------------------------------------------------- +# JSON validity: python3 if available, else structural checks +# --------------------------------------------------------------------------- + +validate_json() { + local file="$1" + local label="$2" + + if command -v python3 >/dev/null 2>&1; then + if python3 -m json.tool "$file" >/dev/null 2>&1; then + pass "$label is valid JSON (python3)" + return + else + fail "$label is invalid JSON (python3)" + return + fi + fi + + # Structural fallback: starts with {, ends with }, non-empty + local first last + first="$(head -c1 "$file")" + last="$(tail -c2 "$file" | head -c1)" + if [ "$first" = "{" ] && [ "$last" = "}" ]; then + pass "$label is structurally valid JSON (shell fallback)" + else + fail "$label structural JSON check failed (first='$first' last='$last')" + fi +} + +if [ -f "$DERIVED/catalog.json" ]; then + validate_json "$DERIVED/catalog.json" "catalog.json" +fi + +if [ -f "$DERIVED/hotspots.json" ]; then + validate_json "$DERIVED/hotspots.json" "hotspots.json" +fi + +# --------------------------------------------------------------------------- +# Semantic assertions: catalog.json +# --------------------------------------------------------------------------- + +if [ -f "$DERIVED/catalog.json" ]; then + CATALOG="$(cat "$DERIVED/catalog.json")" + + # Stack booleans should all be false + for stack in rust typescript javascript; do + if printf '%s' "$CATALOG" | grep -q "\"$stack\": false"; then + pass "catalog stacks.$stack is false" + else + fail "catalog stacks.$stack is not false" + fi + done + + # workspace_detected should be false + if printf '%s' "$CATALOG" | grep -q '"workspace_detected": false'; then + pass "catalog workspace_detected is false" + else + fail "catalog workspace_detected is not false" + fi + + # frontend.present should be false + if printf '%s' "$CATALOG" | grep -q '"present": false'; then + pass "catalog frontend.present is false" + else + fail "catalog frontend.present is not false" + fi + + # crates should be empty array + if printf '%s' "$CATALOG" | tr -d '[:space:]' | grep -q '"crates":\[\]'; then + pass "catalog crates is empty array" + else + fail "catalog crates is not empty array" + fi + + # Required keys present + for key in repo_root workspace_detected stacks frontend crates; do + if printf '%s' "$CATALOG" | grep -q "\"$key\""; then + pass "catalog contains key '$key'" + else + fail "catalog missing key '$key'" + fi + done +fi + +# --------------------------------------------------------------------------- +# Semantic assertions: hotspots.json +# --------------------------------------------------------------------------- + +if [ -f "$DERIVED/hotspots.json" ]; then + HOTSPOTS="$(cat "$DERIVED/hotspots.json")" + + # hotspot_dot should be null + if printf '%s' "$HOTSPOTS" | grep -q '"hotspot_dot": null'; then + pass "hotspots hotspot_dot is null" + else + fail "hotspots hotspot_dot is not null" + fi + + # files_by_symbol_count should be empty array + if printf '%s' "$HOTSPOTS" | tr -d '[:space:]' | grep -q '"files_by_symbol_count":\[\]'; then + pass "hotspots files_by_symbol_count is empty array" + else + fail "hotspots files_by_symbol_count is not empty array" + fi + + # Required keys present + for key in generated_at source mode top_k files_by_symbol_count; do + if printf '%s' "$HOTSPOTS" | grep -q "\"$key\""; then + pass "hotspots contains key '$key'" + else + fail "hotspots missing key '$key'" + fi + done +fi + +# --------------------------------------------------------------------------- +# dup_clusters.md structural check +# --------------------------------------------------------------------------- + +if [ -f "$DERIVED/dup_clusters.md" ]; then + if grep -q '# Duplication Clusters' "$DERIVED/dup_clusters.md"; then + pass "dup_clusters.md contains header" + else + fail "dup_clusters.md missing header" + fi + + if grep -q 'No hotspot symbol-path data' "$DERIVED/dup_clusters.md"; then + pass "dup_clusters.md indicates no hotspot data (expected for empty repo)" + else + fail "dup_clusters.md does not indicate missing hotspot data" + fi +fi + +# --------------------------------------------------------------------------- +# Cleanup verification: no temp file leaks from the script +# --------------------------------------------------------------------------- + +# The script uses mktemp for its own work dir and cleans up via trap. +# Verify no vca-derived temp dirs leaked. +LEAKED=$(find "${TMPDIR:-/tmp}" -maxdepth 1 -name 'vca-derived.*' -type d 2>/dev/null | head -n5) +if [ -z "$LEAKED" ]; then + pass "no vca-derived temp dirs leaked" +else + fail "vca-derived temp dir(s) leaked: $LEAKED" +fi + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- + +printf '\n--- Results: %d passed, %d failed ---\n' "$PASS" "$FAIL" +[ "$FAIL" -eq 0 ] || exit 1 diff --git a/tests/grep_fallback_test.sh b/tests/grep_fallback_test.sh new file mode 100644 index 000000000..8e2bbe4d4 --- /dev/null +++ b/tests/grep_fallback_test.sh @@ -0,0 +1,247 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Grep-fallback regression test for build_read_plan.sh (Spec 16 / Spec 31) +# Verifies that when rg is not available, the grep -R fallback path: +# 1. Uses --exclude-dir for all 7 canonical directories +# 2. Produces correct read_plan.tsv / read_plan.md artifacts +# 3. Excludes files from excluded directories and includes files from valid paths + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +SCRIPT="$ROOT_DIR/vibe-code-audit/scripts/build_read_plan.sh" + +PASS=0 +FAIL=0 + +fail() { + printf 'FAIL: %s\n' "$*" >&2 + FAIL=$((FAIL + 1)) +} + +pass() { + printf 'PASS: %s\n' "$*" + PASS=$((PASS + 1)) +} + +# --------------------------------------------------------------------------- +# Setup: temp dirs, PATH hiding, cleanup trap +# --------------------------------------------------------------------------- + +ORIG_PATH="$PATH" +TMPROOT="" + +cleanup() { + PATH="$ORIG_PATH" + if [ -n "$TMPROOT" ] && [ -d "$TMPROOT" ]; then + rm -rf "$TMPROOT" + fi +} +trap cleanup EXIT INT TERM + +TMPROOT="$(mktemp -d)" +MOCK_REPO="$TMPROOT/repo" +OUTPUT_DIR="$TMPROOT/output" +mkdir -p "$MOCK_REPO" "$OUTPUT_DIR" + +# --------------------------------------------------------------------------- +# Static checks on build_read_plan.sh +# --------------------------------------------------------------------------- + +# 1. grep -R call includes all 7 --exclude-dir flags via EXCLUDE_DIRS iteration +EXPECTED_DIRS=".git node_modules target dist build .next coverage" +GREP_LINE="$(grep -n 'grep -R' "$SCRIPT" | head -n1)" +if [ -n "$GREP_LINE" ]; then + pass "grep -R call found in build_read_plan.sh" +else + fail "grep -R call not found in build_read_plan.sh" +fi + +# 2. Verify exclude_args array is built from EXCLUDE_DIRS +if grep -q 'grep_exclude_args+=(--exclude-dir' "$SCRIPT"; then + pass "grep_exclude_args built with --exclude-dir from EXCLUDE_DIRS loop" +else + fail "grep_exclude_args not built from EXCLUDE_DIRS loop" +fi + +# 3. Verify the grep call uses the array expansion +if grep -q '"${grep_exclude_args\[@\]}"' "$SCRIPT"; then + pass "grep -R uses grep_exclude_args array expansion" +else + fail "grep -R does not use grep_exclude_args array expansion" +fi + +# 4. Verify --exclude-dir args appear before PATTERN in grep call +GREP_CMD_LINE=$(grep 'grep -R -n -E' "$SCRIPT") +if printf '%s' "$GREP_CMD_LINE" | grep -q 'grep_exclude_args.*\$PATTERN'; then + pass "grep --exclude-dir args positioned before PATTERN" +else + fail "grep --exclude-dir args NOT positioned before PATTERN" +fi + +# --------------------------------------------------------------------------- +# Fixture: mock repo with excluded + included directories +# --------------------------------------------------------------------------- + +# Create files that match the read-plan PATTERN inside excluded directories +for dir in $EXPECTED_DIRS; do + mkdir -p "$MOCK_REPO/$dir/sub" + printf 'function validateSchema() { return true; }\n' > "$MOCK_REPO/$dir/sub/match.js" +done + +# Create a file matching the pattern in an included directory +mkdir -p "$MOCK_REPO/src/auth" +printf 'function validatePermission(user) { return authorize(user); }\n' > "$MOCK_REPO/src/auth/check.js" + +# Also add a second included match to verify multi-file output +mkdir -p "$MOCK_REPO/lib" +printf 'const timeout = config.retryBackoff || 3000;\n' > "$MOCK_REPO/lib/retry.js" + +# --------------------------------------------------------------------------- +# PATH manipulation: hide rg +# --------------------------------------------------------------------------- + +# Build a filtered PATH that excludes any directory containing rg +FILTERED_PATH="" +IFS=':' +for segment in $ORIG_PATH; do + if [ -x "$segment/rg" ]; then + continue + fi + if [ -n "$FILTERED_PATH" ]; then + FILTERED_PATH="$FILTERED_PATH:$segment" + else + FILTERED_PATH="$segment" + fi +done +unset IFS + +export PATH="$FILTERED_PATH" + +# Verify rg is truly hidden +if command -v rg >/dev/null 2>&1; then + fail "rg still visible in PATH after filtering — fallback test unreliable" + # Restore and skip dynamic tests + PATH="$ORIG_PATH" + printf '\n--- Results: %d passed, %d failed ---\n' "$PASS" "$FAIL" + exit "$FAIL" +else + pass "rg hidden from PATH — grep fallback will be exercised" +fi + +# --------------------------------------------------------------------------- +# Execute build_read_plan.sh in fallback mode +# --------------------------------------------------------------------------- + +SCRIPT_EXIT=0 +bash "$SCRIPT" --repo "$MOCK_REPO" --output "$OUTPUT_DIR/audit_index.tmp" --mode fast 2>"$TMPROOT/stderr.log" || SCRIPT_EXIT=$? + +if [ "$SCRIPT_EXIT" -eq 0 ]; then + pass "build_read_plan.sh exited 0 in grep fallback mode" +else + fail "build_read_plan.sh exited $SCRIPT_EXIT in grep fallback mode" +fi + +# --------------------------------------------------------------------------- +# Assert artifacts exist +# --------------------------------------------------------------------------- + +DERIVED="$OUTPUT_DIR/audit_index.tmp/derived" +READ_PLAN_TSV="$DERIVED/read_plan.tsv" +READ_PLAN_MD="$DERIVED/read_plan.md" + +if [ -f "$READ_PLAN_TSV" ]; then + pass "read_plan.tsv created" +else + fail "read_plan.tsv not created" +fi + +if [ -f "$READ_PLAN_MD" ]; then + pass "read_plan.md created" +else + fail "read_plan.md not created" +fi + +# --------------------------------------------------------------------------- +# Assert included matches are present in read_plan.tsv +# --------------------------------------------------------------------------- + +if [ -f "$READ_PLAN_TSV" ] && [ -s "$READ_PLAN_TSV" ]; then + pass "read_plan.tsv is non-empty" +else + fail "read_plan.tsv is empty — expected at least one included match" +fi + +if [ -f "$READ_PLAN_TSV" ] && grep -q 'src/auth/check.js' "$READ_PLAN_TSV"; then + pass "included file src/auth/check.js appears in read_plan.tsv" +else + fail "included file src/auth/check.js missing from read_plan.tsv" +fi + +# --------------------------------------------------------------------------- +# Assert excluded directories do NOT appear in read_plan.tsv +# --------------------------------------------------------------------------- + +EXCLUSION_CLEAN=true +for dir in $EXPECTED_DIRS; do + if [ -f "$READ_PLAN_TSV" ] && grep -q "^${dir}/" "$READ_PLAN_TSV"; then + fail "excluded directory '$dir' found in read_plan.tsv" + EXCLUSION_CLEAN=false + fi +done + +if [ "$EXCLUSION_CLEAN" = true ]; then + pass "no excluded directories appear in read_plan.tsv" +fi + +# --------------------------------------------------------------------------- +# Assert read_plan.md contains expected structure +# --------------------------------------------------------------------------- + +if [ -f "$READ_PLAN_MD" ] && grep -q '# Read Plan' "$READ_PLAN_MD"; then + pass "read_plan.md contains '# Read Plan' header" +else + fail "read_plan.md missing '# Read Plan' header" +fi + +if [ -f "$READ_PLAN_MD" ] && grep -q '## Slices' "$READ_PLAN_MD"; then + pass "read_plan.md contains '## Slices' section" +else + fail "read_plan.md missing '## Slices' section" +fi + +# --------------------------------------------------------------------------- +# Restore PATH (also handled by trap, but explicit for safety) +# --------------------------------------------------------------------------- + +PATH="$ORIG_PATH" + +if command -v rg >/dev/null 2>&1; then + pass "PATH restored — rg visible again" +else + # rg may not be installed at all; that's fine + pass "PATH restored (rg may not be installed on this system)" +fi + +# --------------------------------------------------------------------------- +# Cleanup check +# --------------------------------------------------------------------------- + +# Temp files should not leak (raw/norm are cleaned by script) +if [ -f "$DERIVED/.read_plan_matches_raw.tsv" ]; then + fail "temporary raw matches file leaked" +else + pass "temporary raw matches file cleaned up" +fi + +if [ -f "$DERIVED/.read_plan_matches_norm.tsv" ]; then + fail "temporary normalized matches file leaked" +else + pass "temporary normalized matches file cleaned up" +fi + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- + +printf '\n--- Results: %d passed, %d failed ---\n' "$PASS" "$FAIL" +[ "$FAIL" -eq 0 ] || exit 1 diff --git a/tests/lib_unit_test.sh b/tests/lib_unit_test.sh new file mode 100755 index 000000000..0472fc5e5 --- /dev/null +++ b/tests/lib_unit_test.sh @@ -0,0 +1,452 @@ +#!/usr/bin/env bash +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +LIB_SH="$ROOT_DIR/vibe-code-audit/scripts/_lib.sh" + +PASS=0 +FAIL=0 + +fail() { + printf 'FAIL: %s\n' "$*" >&2 + FAIL=$((FAIL + 1)) +} + +pass() { + printf 'PASS: %s\n' "$*" + PASS=$((PASS + 1)) +} + +assert_eq() { + local label="$1" + local expected="$2" + local actual="$3" + if [ "$expected" = "$actual" ]; then + pass "$label" + else + fail "$label" + printf ' expected: %s\n' "$expected" >&2 + printf ' actual: %s\n' "$actual" >&2 + fi +} + +# Source _lib.sh (requires SCRIPT_NAME) +SCRIPT_NAME="lib_unit_test" +# shellcheck source=../vibe-code-audit/scripts/_lib.sh +. "$LIB_SH" + +# --------------------------------------------------------------------------- +# Test: EXCLUDE_DIRS value +# --------------------------------------------------------------------------- +assert_eq "EXCLUDE_DIRS contains all 7 dirs" \ + ".git node_modules target dist build .next coverage" \ + "$EXCLUDE_DIRS" + +# --------------------------------------------------------------------------- +# Test: exclude_find_prune_args +# --------------------------------------------------------------------------- +ACTUAL_FIND="$(exclude_find_prune_args)" +EXPECTED_FIND="-name .git -o -name node_modules -o -name target -o -name dist -o -name build -o -name .next -o -name coverage" +assert_eq "exclude_find_prune_args output" "$EXPECTED_FIND" "$ACTUAL_FIND" + +# --------------------------------------------------------------------------- +# Test: exclude_agentroot_flags +# --------------------------------------------------------------------------- +ACTUAL_AGENT="$(exclude_agentroot_flags)" +# Trim trailing space from output +ACTUAL_AGENT="${ACTUAL_AGENT% }" +EXPECTED_AGENT="--exclude .git --exclude node_modules --exclude target --exclude dist --exclude build --exclude .next --exclude coverage" +assert_eq "exclude_agentroot_flags output" "$EXPECTED_AGENT" "$ACTUAL_AGENT" + +# --------------------------------------------------------------------------- +# Test: exclude_rg_globs +# --------------------------------------------------------------------------- +ACTUAL_RG="$(exclude_rg_globs)" +ACTUAL_RG="${ACTUAL_RG% }" +EXPECTED_RG="--glob '!.git/**' --glob '!node_modules/**' --glob '!target/**' --glob '!dist/**' --glob '!build/**' --glob '!.next/**' --glob '!coverage/**'" +assert_eq "exclude_rg_globs output" "$EXPECTED_RG" "$ACTUAL_RG" + +# --------------------------------------------------------------------------- +# Test: exclude_dirs_json_array +# --------------------------------------------------------------------------- +ACTUAL_JSON="$(exclude_dirs_json_array)" +EXPECTED_JSON='[".git", "node_modules", "target", "dist", "build", ".next", "coverage"]' +assert_eq "exclude_dirs_json_array output" "$EXPECTED_JSON" "$ACTUAL_JSON" + +# --------------------------------------------------------------------------- +# Test: exclude_dirs_json_array produces valid JSON +# --------------------------------------------------------------------------- +# Validate the JSON array is parseable (basic bracket/quote check) +if printf '%s' "$ACTUAL_JSON" | grep -qE '^\[("[^"]*"(, "[^"]*")*)\]$'; then + pass "exclude_dirs_json_array valid JSON format" +else + fail "exclude_dirs_json_array valid JSON format" +fi + +# --------------------------------------------------------------------------- +# Test: exclude_find_prune_args has no leading -o +# --------------------------------------------------------------------------- +if printf '%s' "$ACTUAL_FIND" | grep -q '^-name'; then + pass "exclude_find_prune_args no leading -o" +else + fail "exclude_find_prune_args no leading -o" +fi + +# --------------------------------------------------------------------------- +# Test: no empty flags emitted (regression guard) +# --------------------------------------------------------------------------- +if printf '%s' "$ACTUAL_AGENT" | grep -qE -- '--exclude |--exclude$'; then + fail "exclude_agentroot_flags emits empty flag" +else + pass "exclude_agentroot_flags no empty flags" +fi + +if printf '%s' "$ACTUAL_RG" | grep -qE -- "--glob ''|--glob '\!'"; then + fail "exclude_rg_globs emits empty glob" +else + pass "exclude_rg_globs no empty globs" +fi + +# --------------------------------------------------------------------------- +# Test: count of directories matches expected 7 +# --------------------------------------------------------------------------- +DIR_COUNT=0 +for _d in $EXCLUDE_DIRS; do + DIR_COUNT=$((DIR_COUNT + 1)) +done +assert_eq "EXCLUDE_DIRS has 7 entries" "7" "$DIR_COUNT" + +# --------------------------------------------------------------------------- +# Contract tests: helper output matches hardcoded patterns in scripts +# --------------------------------------------------------------------------- + +# Verify agentroot flags match the inline list in run_index.sh +INLINE_AGENTROOT="--exclude .git --exclude node_modules --exclude target --exclude dist --exclude build --exclude .next --exclude coverage" +assert_eq "agentroot flags match run_index.sh inline" "$INLINE_AGENTROOT" "$ACTUAL_AGENT" + +# Verify JSON array matches the inline manifest pattern in run_index.sh +INLINE_JSON='[".git", "node_modules", "target", "dist", "build", ".next", "coverage"]' +assert_eq "JSON array matches run_index.sh manifest" "$INLINE_JSON" "$ACTUAL_JSON" + +# Verify rg globs match the inline list in build_read_plan.sh +INLINE_RG="--glob '!.git/**' --glob '!node_modules/**' --glob '!target/**' --glob '!dist/**' --glob '!build/**' --glob '!.next/**' --glob '!coverage/**'" +assert_eq "rg globs match build_read_plan.sh inline" "$INLINE_RG" "$ACTUAL_RG" + +# Verify find prune covers all dirs from run_index.sh repo_has_file_named() +for dir in .git node_modules target dist build .next coverage; do + if printf '%s' "$ACTUAL_FIND" | grep -q -- "-name $dir"; then + pass "find prune includes $dir" + else + fail "find prune includes $dir" + fi +done + +# --------------------------------------------------------------------------- +# Per-directory membership checks: agentroot flags +# --------------------------------------------------------------------------- +for dir in .git node_modules target dist build .next coverage; do + if printf '%s' "$ACTUAL_AGENT" | grep -q -- "--exclude $dir"; then + pass "agentroot flags includes $dir" + else + fail "agentroot flags includes $dir" + printf ' actual: %s\n' "$ACTUAL_AGENT" >&2 + fi +done + +# --------------------------------------------------------------------------- +# Per-directory membership checks: rg globs +# --------------------------------------------------------------------------- +for dir in .git node_modules target dist build .next coverage; do + if printf '%s' "$ACTUAL_RG" | grep -qF -- "--glob '!${dir}/**'"; then + pass "rg globs includes $dir" + else + fail "rg globs includes $dir" + printf ' actual: %s\n' "$ACTUAL_RG" >&2 + fi +done + +# --------------------------------------------------------------------------- +# Per-directory membership checks: JSON array +# --------------------------------------------------------------------------- +for dir in .git node_modules target dist build .next coverage; do + if printf '%s' "$ACTUAL_JSON" | grep -qF "\"$dir\""; then + pass "json array includes $dir" + else + fail "json array includes $dir" + printf ' actual: %s\n' "$ACTUAL_JSON" >&2 + fi +done + +# --------------------------------------------------------------------------- +# JSON element count: exactly 7 quoted entries +# --------------------------------------------------------------------------- +JSON_ELEM_COUNT="$(printf '%s' "$ACTUAL_JSON" | grep -o '"[^"]*"' | wc -l | tr -d ' ')" +assert_eq "json array has exactly 7 elements" "7" "$JSON_ELEM_COUNT" + +# --------------------------------------------------------------------------- +# Duplicate entry guards: no directory appears more than once per helper +# --------------------------------------------------------------------------- +_count_occurrences() { + local haystack="$1" needle="$2" + local count=0 tmp="$haystack" + while [ "${tmp#*"$needle"}" != "$tmp" ]; do + count=$((count + 1)) + tmp="${tmp#*"$needle"}" + done + printf '%d' "$count" +} + +_check_no_duplicates() { + local label="$1" output="$2" needle_fmt="$3" + local dir count + for dir in .git node_modules target dist build .next coverage; do + # shellcheck disable=SC2059 + needle="$(printf -- "$needle_fmt" "$dir")" + count="$(_count_occurrences "$output" "$needle")" + if [ "$count" -gt 1 ]; then + fail "$label duplicate entry: $dir appears $count times" + return + fi + done + pass "$label no duplicate entries" +} + +_check_no_duplicates "find prune" "$ACTUAL_FIND" "-name %s" +_check_no_duplicates "agentroot flags" "$ACTUAL_AGENT" "--exclude %s" +_check_no_duplicates "rg globs" "$ACTUAL_RG" "'!%s/**'" +_check_no_duplicates "json array" "$ACTUAL_JSON" '"%s"' + +# --------------------------------------------------------------------------- +# Non-empty output guards: ensure no helper returns empty string +# --------------------------------------------------------------------------- +for _helper_name in ACTUAL_FIND ACTUAL_AGENT ACTUAL_RG ACTUAL_JSON; do + eval "_helper_val=\"\${$_helper_name}\"" + if [ -z "$_helper_val" ]; then + fail "$_helper_name is unexpectedly empty" + else + pass "$_helper_name is non-empty" + fi +done + +# =========================================================================== +# json_escape tests +# =========================================================================== + +# --------------------------------------------------------------------------- +# Test: json_escape — newline +# --------------------------------------------------------------------------- +ACTUAL="$(json_escape $'hello\nworld')" +assert_eq "json_escape newline" 'hello\nworld' "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: json_escape — tab +# --------------------------------------------------------------------------- +ACTUAL="$(json_escape $'hello\tworld')" +assert_eq "json_escape tab" 'hello\tworld' "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: json_escape — carriage return +# --------------------------------------------------------------------------- +ACTUAL="$(json_escape $'hello\rworld')" +assert_eq "json_escape carriage return" 'hello\rworld' "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: json_escape — double quote +# --------------------------------------------------------------------------- +ACTUAL="$(json_escape 'say "hello"')" +assert_eq "json_escape double quote" 'say \"hello\"' "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: json_escape — backslash +# --------------------------------------------------------------------------- +ACTUAL="$(json_escape 'back\slash')" +assert_eq "json_escape backslash" 'back\\slash' "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: json_escape — backspace (0x08) +# --------------------------------------------------------------------------- +ACTUAL="$(json_escape $'a\bb')" +assert_eq "json_escape backspace" 'a\bb' "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: json_escape — form feed (0x0c) +# --------------------------------------------------------------------------- +ACTUAL="$(json_escape $'a\fb')" +assert_eq "json_escape form feed" 'a\fb' "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: json_escape — empty input returns empty output +# --------------------------------------------------------------------------- +ACTUAL="$(json_escape '')" +assert_eq "json_escape empty input" '' "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: json_escape — plain ASCII passes through unchanged +# --------------------------------------------------------------------------- +ACTUAL="$(json_escape 'hello world 123')" +assert_eq "json_escape plain ASCII" 'hello world 123' "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: json_escape — control characters use \uXXXX encoding +# Representative controls: SOH (0x01), STX (0x02), BEL (0x07), ESC (0x1b) +# Note: NUL (0x00) cannot be passed via bash variables; this is a known +# shell limitation documented in _lib.sh. NUL handling is correct in the +# byte-stream path but untestable via $1 argument passing. +# --------------------------------------------------------------------------- +ACTUAL="$(json_escape $'\x01')" +assert_eq "json_escape SOH (0x01)" '\u0001' "$ACTUAL" + +ACTUAL="$(json_escape $'\x02')" +assert_eq "json_escape STX (0x02)" '\u0002' "$ACTUAL" + +ACTUAL="$(json_escape $'\x07')" +assert_eq "json_escape BEL (0x07)" '\u0007' "$ACTUAL" + +ACTUAL="$(json_escape $'\x1b')" +assert_eq "json_escape ESC (0x1b)" '\u001b' "$ACTUAL" + +ACTUAL="$(json_escape $'\x1f')" +assert_eq "json_escape US (0x1f)" '\u001f' "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: json_escape — mixed control chars in a single string +# --------------------------------------------------------------------------- +ACTUAL="$(json_escape $'line1\nline2\ttab\r\n')" +assert_eq "json_escape mixed controls" 'line1\nline2\ttab\r\n' "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: json_escape — version string with newline produces valid JSON +# Validates that escaped output can be safely wrapped in JSON quotes +# without embedded raw control bytes. Uses shell-only structural check +# (no jq dependency). +# --------------------------------------------------------------------------- +VERSION=$'1.2.3\n' +ESCAPED="$(json_escape "$VERSION")" +JSON_DOC="{\"version\":\"${ESCAPED}\"}" + +# Structural check: no raw control bytes remain in the JSON string. +# Uses od to detect any byte 0x00-0x1f in the output (portable, no grep -P). +RAW_CTRL_COUNT="$(printf '%s' "$JSON_DOC" | LC_ALL=C od -An -tx1 | tr ' ' '\n' | grep -cE '^(0[0-9a-f]|1[0-9a-f])$' || true)" +if [ "$RAW_CTRL_COUNT" -eq 0 ]; then + pass "json_escape version string: no raw control bytes" +else + fail "json_escape version string: raw control bytes in JSON output (found $RAW_CTRL_COUNT)" +fi + +# Structural check: JSON doc matches basic object pattern +if printf '%s' "$JSON_DOC" | grep -qE '^\{"version":"[^"]*"\}$'; then + pass "json_escape version string: valid JSON structure" +else + fail "json_escape version string: valid JSON structure" + printf ' json_doc: %s\n' "$JSON_DOC" >&2 +fi + +# =========================================================================== +# resolve_output_dir tests +# =========================================================================== + +# Fixture setup: create temp root for all path tests, clean up on exit. +ROD_TMPDIR="$(mktemp -d)" +_rod_cleanup() { rm -rf "$ROD_TMPDIR"; } +trap _rod_cleanup EXIT + +# --------------------------------------------------------------------------- +# Test: resolve_output_dir — absolute path returns same path +# --------------------------------------------------------------------------- +ROD_ABS_DIR="$ROD_TMPDIR/abs_test" +mkdir -p "$ROD_ABS_DIR" +ACTUAL="$(resolve_output_dir "$ROD_ABS_DIR")" +# Use pwd -P to get the physical path of our expected value (macOS /tmp -> /private/tmp) +EXPECTED="$(cd "$ROD_ABS_DIR" && pwd -P)" +assert_eq "resolve_output_dir absolute path" "$EXPECTED" "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: resolve_output_dir — relative path resolves to absolute +# --------------------------------------------------------------------------- +ROD_REL_BASE="$ROD_TMPDIR/rel_base" +mkdir -p "$ROD_REL_BASE" +ACTUAL="$(cd "$ROD_REL_BASE" && resolve_output_dir "child/output")" +EXPECTED="$(cd "$ROD_REL_BASE/child/output" && pwd -P)" +assert_eq "resolve_output_dir relative path" "$EXPECTED" "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: resolve_output_dir — parent traversal (..) normalizes correctly +# --------------------------------------------------------------------------- +ROD_TRAVERSE_DIR="$ROD_TMPDIR/traverse/deep" +mkdir -p "$ROD_TRAVERSE_DIR" +ACTUAL="$(resolve_output_dir "$ROD_TRAVERSE_DIR/../deep")" +EXPECTED="$(cd "$ROD_TRAVERSE_DIR" && pwd -P)" +assert_eq "resolve_output_dir parent traversal (..)" "$EXPECTED" "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: resolve_output_dir — symlink resolves to physical path +# Uses pwd -P (Decision B) to return the real directory, not the symlink. +# --------------------------------------------------------------------------- +ROD_REAL_DIR="$ROD_TMPDIR/real_target" +ROD_LINK="$ROD_TMPDIR/sym_link" +mkdir -p "$ROD_REAL_DIR" +ln -s "$ROD_REAL_DIR" "$ROD_LINK" +ACTUAL="$(resolve_output_dir "$ROD_LINK")" +EXPECTED="$(cd "$ROD_REAL_DIR" && pwd -P)" +assert_eq "resolve_output_dir symlink resolves to real path" "$EXPECTED" "$ACTUAL" + +# --------------------------------------------------------------------------- +# resolve_output_dir contract matrix (Spec 25) +# +# resolve_output_dir() uses mkdir -p internally. Its contract is: +# SUCCEEDS: path exists → resolved to canonical absolute path +# SUCCEEDS: path does not exist → created via mkdir -p, then resolved +# FAILS: path is unresolvable → exits non-zero (e.g. parent is a file) +# +# The "non-existent path" case is NOT a failure — it exercises mkdir -p. +# The failure case is an *unresolvable* path where mkdir -p itself fails. +# --------------------------------------------------------------------------- + +# --------------------------------------------------------------------------- +# Test: resolve_output_dir — creates missing directory (mkdir -p behavior) +# --------------------------------------------------------------------------- +ROD_NEW_DIR="$ROD_TMPDIR/new_parent/new_child" +ACTUAL="$(resolve_output_dir "$ROD_NEW_DIR")" +EXPECTED="$(cd "$ROD_NEW_DIR" && pwd -P)" +assert_eq "resolve_output_dir creates missing directory" "$EXPECTED" "$ACTUAL" +if [ -d "$ROD_NEW_DIR" ]; then + pass "resolve_output_dir created directory exists" +else + fail "resolve_output_dir created directory exists" +fi + +# --------------------------------------------------------------------------- +# Test: resolve_output_dir — non-existent deep path is created (mkdir -p) +# Precondition: path must not exist. Postcondition: created and resolved. +# This proves the contract: non-existent ≠ failure; unresolvable = failure. +# --------------------------------------------------------------------------- +ROD_FRESH_DIR="$ROD_TMPDIR/fresh_nonexistent/deep/nested" +if [ -d "$ROD_FRESH_DIR" ]; then + fail "resolve_output_dir non-existent precondition: directory should not exist yet" +else + pass "resolve_output_dir non-existent precondition: directory does not exist yet" +fi +ACTUAL="$(resolve_output_dir "$ROD_FRESH_DIR")" +EXPECTED="$(cd "$ROD_FRESH_DIR" && pwd -P)" +assert_eq "resolve_output_dir non-existent path created and resolved" "$EXPECTED" "$ACTUAL" + +# --------------------------------------------------------------------------- +# Test: resolve_output_dir — unresolvable path (file-as-parent) fails +# mkdir -p cannot create a child under a regular file → non-zero exit. +# This is the contract failure case (not merely "non-existent"). +# --------------------------------------------------------------------------- +ROD_BLOCKER="$ROD_TMPDIR/blocker_file" +touch "$ROD_BLOCKER" +if _rod_out="$(resolve_output_dir "$ROD_BLOCKER/child" 2>/dev/null)"; then + fail "resolve_output_dir unresolvable path should fail" +else + pass "resolve_output_dir unresolvable path exits non-zero" +fi + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- +printf '\n--- Results: %d passed, %d failed ---\n' "$PASS" "$FAIL" +if [ "$FAIL" -gt 0 ]; then + exit 1 +fi diff --git a/tests/manifest_integrity_test.sh b/tests/manifest_integrity_test.sh new file mode 100644 index 000000000..b1fc00426 --- /dev/null +++ b/tests/manifest_integrity_test.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Manifest integrity test for INSTALL_MANIFEST.txt +# Ensures _lib.sh is listed exactly once and no duplicate entries exist. + +MANIFEST="vibe-code-audit/INSTALL_MANIFEST.txt" +PASS=0 +FAIL=0 + +pass() { PASS=$((PASS + 1)); printf " PASS: %s\n" "$1"; } +fail() { FAIL=$((FAIL + 1)); printf " FAIL: %s\n" "$1"; } + +printf "=== Manifest Integrity Tests ===\n" + +# 1. Manifest file exists +if [ -f "$MANIFEST" ]; then + pass "manifest file exists" +else + fail "manifest file not found at $MANIFEST" + printf "\nResults: %d passed, %d failed\n" "$PASS" "$FAIL" + exit 1 +fi + +# Strip comments and blank lines for content checks +CONTENT=$(grep -v '^#' "$MANIFEST" | grep -v '^[[:space:]]*$') + +# 2. scripts/_lib.sh appears exactly once +LIB_COUNT=$(printf '%s\n' "$CONTENT" | grep -c '^scripts/_lib\.sh$' || true) +if [ "$LIB_COUNT" -eq 1 ]; then + pass "scripts/_lib.sh listed exactly once" +else + fail "scripts/_lib.sh count is $LIB_COUNT (expected 1)" +fi + +# 3. No duplicate entries in manifest +DUP_COUNT=$(printf '%s\n' "$CONTENT" | sort | uniq -d | wc -l | tr -d ' ') +if [ "$DUP_COUNT" -eq 0 ]; then + pass "no duplicate entries in manifest" +else + DUPS=$(printf '%s\n' "$CONTENT" | sort | uniq -d) + fail "found $DUP_COUNT duplicate entries: $DUPS" +fi + +# 4. All expected script entries present +EXPECTED_SCRIPTS="run_index.sh run_agentroot_embed.sh build_derived_artifacts.sh build_read_plan.sh render_report_pdf.sh render_system_map.sh _lib.sh" +for script in $EXPECTED_SCRIPTS; do + if printf '%s\n' "$CONTENT" | grep -q "^scripts/${script}$"; then + pass "scripts/$script present" + else + fail "scripts/$script missing" + fi +done + +# 5. Entry format: _lib.sh line matches peer format (scripts/) +LIB_LINE=$(printf '%s\n' "$CONTENT" | grep '_lib\.sh' || true) +if [ "$LIB_LINE" = "scripts/_lib.sh" ]; then + pass "entry format matches peers" +else + fail "entry format mismatch: got '$LIB_LINE'" +fi + +printf "\n=== Results: %d passed, %d failed ===\n" "$PASS" "$FAIL" +[ "$FAIL" -eq 0 ] || exit 1 diff --git a/tests/render_pdf_smoke_test.sh b/tests/render_pdf_smoke_test.sh new file mode 100644 index 000000000..417307dd7 --- /dev/null +++ b/tests/render_pdf_smoke_test.sh @@ -0,0 +1,206 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Smoke test for render_report_pdf.sh (Spec 33) +# Verifies that when pandoc is unavailable: +# 1. Script exits gracefully (exit 0, no crash) +# 2. Emits PDF_SKIPPED=1 and PDF_REASON=pandoc_missing +# 3. No temp file leaks under controlled TMPDIR + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +SCRIPT="$ROOT_DIR/vibe-code-audit/scripts/render_report_pdf.sh" + +PASS=0 +FAIL=0 + +fail() { + printf 'FAIL: %s\n' "$*" >&2 + FAIL=$((FAIL + 1)) +} + +pass() { + printf 'PASS: %s\n' "$*" + PASS=$((PASS + 1)) +} + +# --------------------------------------------------------------------------- +# Setup: temp dirs, PATH hiding, cleanup trap +# --------------------------------------------------------------------------- + +ORIG_PATH="$PATH" +TMPROOT="" + +cleanup() { + PATH="$ORIG_PATH" + if [ -n "$TMPROOT" ] && [ -d "$TMPROOT" ]; then + rm -rf "$TMPROOT" + fi +} +trap cleanup EXIT INT TERM + +TMPROOT="$(mktemp -d)" +FIXTURE_DIR="$TMPROOT/fixture" +TEST_TMPDIR="$TMPROOT/tmpdir" +mkdir -p "$FIXTURE_DIR" "$TEST_TMPDIR" + +# Create a minimal non-empty markdown report fixture +cat > "$FIXTURE_DIR/test_report.md" <<'EOF' +# Test Audit Report + +## Summary + +This is a minimal test report for smoke testing. + +| Metric | Value | +|--------|-------| +| Files | 0 | +EOF + +# --------------------------------------------------------------------------- +# 1. Script exists and is valid shell +# --------------------------------------------------------------------------- + +if [ -f "$SCRIPT" ]; then + pass "render_report_pdf.sh exists" +else + fail "render_report_pdf.sh not found at $SCRIPT" + printf '\n--- Results: %d passed, %d failed ---\n' "$PASS" "$FAIL" + exit 1 +fi + +if bash -n "$SCRIPT" 2>/dev/null; then + pass "render_report_pdf.sh passes syntax check" +else + fail "render_report_pdf.sh has syntax errors" +fi + +# --------------------------------------------------------------------------- +# 2. Build filtered PATH that excludes pandoc +# --------------------------------------------------------------------------- + +FILTERED_PATH="" +IFS=':' +for segment in $ORIG_PATH; do + if [ -z "$segment" ]; then + continue + fi + if [ -x "$segment/pandoc" ]; then + continue + fi + if [ -n "$FILTERED_PATH" ]; then + FILTERED_PATH="$FILTERED_PATH:$segment" + else + FILTERED_PATH="$segment" + fi +done +unset IFS + +# Verify pandoc is hidden +if ! PATH="$FILTERED_PATH" command -v pandoc >/dev/null 2>&1; then + pass "pandoc is hidden from filtered PATH" +else + fail "pandoc is still visible in filtered PATH" +fi + +# Verify essential commands survive PATH filtering +if PATH="$FILTERED_PATH" command -v bash >/dev/null 2>&1; then + pass "bash remains available in filtered PATH" +else + fail "bash lost from filtered PATH" +fi + +if PATH="$FILTERED_PATH" command -v mkdir >/dev/null 2>&1; then + pass "mkdir remains available in filtered PATH" +else + fail "mkdir lost from filtered PATH" +fi + +# --------------------------------------------------------------------------- +# 3. Run render_report_pdf.sh with pandoc hidden +# --------------------------------------------------------------------------- + +STDOUT_FILE="$TMPROOT/stdout.txt" +STDERR_FILE="$TMPROOT/stderr.txt" +SCRIPT_EXIT=0 + +env PATH="$FILTERED_PATH" TMPDIR="$TEST_TMPDIR" \ + bash "$SCRIPT" --report "$FIXTURE_DIR/test_report.md" --skip-system-map \ + >"$STDOUT_FILE" 2>"$STDERR_FILE" || SCRIPT_EXIT=$? + +# --------------------------------------------------------------------------- +# 4. Assert graceful exit +# --------------------------------------------------------------------------- + +if [ "$SCRIPT_EXIT" -eq 0 ]; then + pass "script exits with code 0" +else + fail "script exited with code $SCRIPT_EXIT (expected 0)" +fi + +# Check no crash diagnostics in stderr +STDERR_CONTENT="" +if [ -s "$STDERR_FILE" ]; then + STDERR_CONTENT="$(cat "$STDERR_FILE")" +fi + +if echo "$STDERR_CONTENT" | grep -Eiq 'unbound variable|syntax error|segmentation fault|core dumped|panic'; then + fail "stderr contains crash diagnostic: $(echo "$STDERR_CONTENT" | grep -Ei 'unbound variable|syntax error|segmentation fault|core dumped|panic' | head -1)" +else + pass "no crash diagnostics in stderr" +fi + +# --------------------------------------------------------------------------- +# 5. Assert skip contract signals +# --------------------------------------------------------------------------- + +STDOUT_CONTENT="" +if [ -s "$STDOUT_FILE" ]; then + STDOUT_CONTENT="$(cat "$STDOUT_FILE")" +fi + +if echo "$STDOUT_CONTENT" | grep -qF 'PDF_SKIPPED=1'; then + pass "PDF_SKIPPED=1 emitted on stdout" +else + fail "PDF_SKIPPED=1 not found in stdout" +fi + +if echo "$STDOUT_CONTENT" | grep -qF 'PDF_REASON=pandoc_missing'; then + pass "PDF_REASON=pandoc_missing emitted on stdout" +else + fail "PDF_REASON=pandoc_missing not found in stdout" +fi + +# Verify no PDF_PATH is emitted (since pandoc is missing) +if echo "$STDOUT_CONTENT" | grep -qF 'PDF_PATH='; then + fail "PDF_PATH emitted despite pandoc being missing" +else + pass "no PDF_PATH emitted (correct for missing pandoc)" +fi + +# --------------------------------------------------------------------------- +# 6. Assert no temp file leaks +# --------------------------------------------------------------------------- + +LEAKED_FILES="$(find "$TEST_TMPDIR" -maxdepth 1 -name 'vca-*' 2>/dev/null || true)" +if [ -z "$LEAKED_FILES" ]; then + pass "no vca-* temp files leaked in TMPDIR" +else + fail "temp files leaked in TMPDIR: $LEAKED_FILES" +fi + +# --------------------------------------------------------------------------- +# 7. Assert PATH restored (trap verification) +# --------------------------------------------------------------------------- + +if [ "$PATH" = "$ORIG_PATH" ]; then + pass "PATH restored to original value after test" +else + fail "PATH was not restored (test cleanup issue)" +fi + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- + +printf '\n--- Results: %d passed, %d failed ---\n' "$PASS" "$FAIL" +[ "$FAIL" -eq 0 ] diff --git a/tests/render_system_map_smoke_test.sh b/tests/render_system_map_smoke_test.sh new file mode 100644 index 000000000..5cdfd7ef0 --- /dev/null +++ b/tests/render_system_map_smoke_test.sh @@ -0,0 +1,206 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Smoke test for render_system_map.sh (Spec 34) +# Verifies that when dot (graphviz) is unavailable: +# 1. Script exits gracefully (exit 0, no crash) +# 2. Emits SYSTEM_MAP_SKIPPED=1 and SYSTEM_MAP_REASON=graphviz_missing +# 3. No temp file leaks under controlled TMPDIR + +ROOT_DIR="$(cd "$(dirname "$0")/.." && pwd)" +SCRIPT="$ROOT_DIR/vibe-code-audit/scripts/render_system_map.sh" + +PASS=0 +FAIL=0 + +fail() { + printf 'FAIL: %s\n' "$*" >&2 + FAIL=$((FAIL + 1)) +} + +pass() { + printf 'PASS: %s\n' "$*" + PASS=$((PASS + 1)) +} + +# --------------------------------------------------------------------------- +# Setup: temp dirs, PATH hiding, cleanup trap +# --------------------------------------------------------------------------- + +ORIG_PATH="$PATH" +TMPROOT="" + +cleanup() { + PATH="$ORIG_PATH" + if [ -n "$TMPROOT" ] && [ -d "$TMPROOT" ]; then + rm -rf "$TMPROOT" + fi +} +trap cleanup EXIT INT TERM + +TMPROOT="$(mktemp -d)" +FIXTURE_DIR="$TMPROOT/fixture" +TEST_TMPDIR="$TMPROOT/tmpdir" +mkdir -p "$FIXTURE_DIR" "$TEST_TMPDIR" + +# Create a minimal non-empty markdown report fixture +cat > "$FIXTURE_DIR/test_report.md" <<'EOF' +# Test Audit Report + +## Summary + +This is a minimal test report for smoke testing. + +| Metric | Value | +|--------|-------| +| Files | 0 | +EOF + +# --------------------------------------------------------------------------- +# 1. Script exists and is valid shell +# --------------------------------------------------------------------------- + +if [ -f "$SCRIPT" ]; then + pass "render_system_map.sh exists" +else + fail "render_system_map.sh not found at $SCRIPT" + printf '\n--- Results: %d passed, %d failed ---\n' "$PASS" "$FAIL" + exit 1 +fi + +if bash -n "$SCRIPT" 2>/dev/null; then + pass "render_system_map.sh passes syntax check" +else + fail "render_system_map.sh has syntax errors" +fi + +# --------------------------------------------------------------------------- +# 2. Build filtered PATH that excludes dot +# --------------------------------------------------------------------------- + +FILTERED_PATH="" +IFS=':' +for segment in $ORIG_PATH; do + if [ -z "$segment" ]; then + continue + fi + if [ -x "$segment/dot" ]; then + continue + fi + if [ -n "$FILTERED_PATH" ]; then + FILTERED_PATH="$FILTERED_PATH:$segment" + else + FILTERED_PATH="$segment" + fi +done +unset IFS + +# Verify dot is hidden +if ! PATH="$FILTERED_PATH" command -v dot >/dev/null 2>&1; then + pass "dot is hidden from filtered PATH" +else + fail "dot is still visible in filtered PATH" +fi + +# Verify essential commands survive PATH filtering +if PATH="$FILTERED_PATH" command -v bash >/dev/null 2>&1; then + pass "bash remains available in filtered PATH" +else + fail "bash lost from filtered PATH" +fi + +if PATH="$FILTERED_PATH" command -v mkdir >/dev/null 2>&1; then + pass "mkdir remains available in filtered PATH" +else + fail "mkdir lost from filtered PATH" +fi + +# --------------------------------------------------------------------------- +# 3. Run render_system_map.sh with dot hidden +# --------------------------------------------------------------------------- + +STDOUT_FILE="$TMPROOT/stdout.txt" +STDERR_FILE="$TMPROOT/stderr.txt" +SCRIPT_EXIT=0 + +env PATH="$FILTERED_PATH" TMPDIR="$TEST_TMPDIR" \ + bash "$SCRIPT" --report "$FIXTURE_DIR/test_report.md" \ + >"$STDOUT_FILE" 2>"$STDERR_FILE" || SCRIPT_EXIT=$? + +# --------------------------------------------------------------------------- +# 4. Assert graceful exit +# --------------------------------------------------------------------------- + +if [ "$SCRIPT_EXIT" -eq 0 ]; then + pass "script exits with code 0" +else + fail "script exited with code $SCRIPT_EXIT (expected 0)" +fi + +# Check no crash diagnostics in stderr +STDERR_CONTENT="" +if [ -s "$STDERR_FILE" ]; then + STDERR_CONTENT="$(cat "$STDERR_FILE")" +fi + +if echo "$STDERR_CONTENT" | grep -Eiq 'unbound variable|syntax error|segmentation fault|core dumped|panic'; then + fail "stderr contains crash diagnostic: $(echo "$STDERR_CONTENT" | grep -Ei 'unbound variable|syntax error|segmentation fault|core dumped|panic' | head -1)" +else + pass "no crash diagnostics in stderr" +fi + +# --------------------------------------------------------------------------- +# 5. Assert skip contract signals +# --------------------------------------------------------------------------- + +STDOUT_CONTENT="" +if [ -s "$STDOUT_FILE" ]; then + STDOUT_CONTENT="$(cat "$STDOUT_FILE")" +fi + +if echo "$STDOUT_CONTENT" | grep -qF 'SYSTEM_MAP_SKIPPED=1'; then + pass "SYSTEM_MAP_SKIPPED=1 emitted on stdout" +else + fail "SYSTEM_MAP_SKIPPED=1 not found in stdout" +fi + +if echo "$STDOUT_CONTENT" | grep -qF 'SYSTEM_MAP_REASON=graphviz_missing'; then + pass "SYSTEM_MAP_REASON=graphviz_missing emitted on stdout" +else + fail "SYSTEM_MAP_REASON=graphviz_missing not found in stdout" +fi + +# Verify no SYSTEM_MAP_PATH is emitted (since dot is missing) +if echo "$STDOUT_CONTENT" | grep -qF 'SYSTEM_MAP_PATH='; then + fail "SYSTEM_MAP_PATH emitted despite dot being missing" +else + pass "no SYSTEM_MAP_PATH emitted (correct for missing dot)" +fi + +# --------------------------------------------------------------------------- +# 6. Assert no temp file leaks +# --------------------------------------------------------------------------- + +LEAKED_FILES="$(find "$TEST_TMPDIR" -maxdepth 1 -name 'vca-*' 2>/dev/null || true)" +if [ -z "$LEAKED_FILES" ]; then + pass "no vca-* temp files leaked in TMPDIR" +else + fail "temp files leaked in TMPDIR: $LEAKED_FILES" +fi + +# --------------------------------------------------------------------------- +# 7. Assert PATH restored (trap verification) +# --------------------------------------------------------------------------- + +if [ "$PATH" = "$ORIG_PATH" ]; then + pass "PATH restored to original value after test" +else + fail "PATH was not restored (test cleanup issue)" +fi + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- + +printf '\n--- Results: %d passed, %d failed ---\n' "$PASS" "$FAIL" +[ "$FAIL" -eq 0 ] diff --git a/tests/run_index_mock_smoke.sh b/tests/run_index_mock_smoke.sh index ef0268285..fafc881d8 100755 --- a/tests/run_index_mock_smoke.sh +++ b/tests/run_index_mock_smoke.sh @@ -26,6 +26,17 @@ json_int() { fi } +json_bool() { + file="$1" + key="$2" + value="$(sed -n "s/.*\"$key\"[[:space:]]*:[[:space:]]*\\(true\\).*/\\1/p" "$file" | head -n1)" + if [ -n "$value" ]; then + printf '%s\n' "$value" + return + fi + sed -n "s/.*\"$key\"[[:space:]]*:[[:space:]]*\\(false\\).*/\\1/p" "$file" | head -n1 +} + assert_nonempty_file() { path="$1" [ -s "$path" ] || fail "expected non-empty file: $path" @@ -310,6 +321,14 @@ run_case() { mock_vsearch_fail="${18:-0}" expected_retrieval_mode="${19:-}" expected_embed_utf8_panic="${20:-0}" + case_mode="${21:-standard}" + expected_top_k="${22:-}" + skip_read_plan="${23:-0}" + + case "$skip_read_plan" in + 0|1) ;; + *) fail "case $case_name: skip_read_plan must be 0|1, got '$skip_read_plan'" ;; + esac ( set -euo pipefail @@ -319,7 +338,7 @@ run_case() { output_dir="$work_dir/output" bin_dir="$work_dir/bin" - if [ "$repo_layout" = "nested-rust" ]; then + if [ "$repo_layout" = "nested-rust" ] || [ "$repo_layout" = "nested-rust-mixed" ]; then mkdir -p "$repo_dir/backend/src" cat > "$repo_dir/backend/Cargo.toml" <<'EOF_CARGO' [package] @@ -332,6 +351,31 @@ fn main() { println!("nested"); } EOF_RS + if [ "$repo_layout" = "nested-rust-mixed" ]; then + cat > "$repo_dir/package.json" <<'EOF_PKG' +{"name": "mock-mixed-repo", "version": "1.0.0"} +EOF_PKG + fi + elif [ "$repo_layout" = "ts-node" ]; then + mkdir -p "$repo_dir/src" + cat > "$repo_dir/tsconfig.json" <<'EOF_TS' +{"compilerOptions": {"target": "es2020", "module": "commonjs"}} +EOF_TS + cat > "$repo_dir/package.json" <<'EOF_PKG' +{"name": "mock-ts-repo", "version": "1.0.0"} +EOF_PKG + cat > "$repo_dir/src/app.ts" <<'EOF_TSRC' +const greeting: string = "hello"; +console.log(greeting); +EOF_TSRC + elif [ "$repo_layout" = "js-only" ]; then + mkdir -p "$repo_dir/src" + cat > "$repo_dir/package.json" <<'EOF_PKG' +{"name": "mock-js-repo", "version": "1.0.0"} +EOF_PKG + cat > "$repo_dir/src/index.js" <<'EOF_JS' +console.log("hello"); +EOF_JS else mkdir -p "$repo_dir/src" cat > "$repo_dir/Cargo.toml" <<'EOF_CARGO' @@ -374,11 +418,21 @@ EOF_RS ;; esac + # [R-006] Pre-create audit_index/ with sentinel to verify it survives the run + mkdir -p "$output_dir/audit_index" + printf 'sentinel\n' > "$output_dir/audit_index/.pre_existing_marker" + + skip_flags="" + if [ "$skip_read_plan" -eq 1 ]; then + skip_flags="--skip-read-plan" + fi + run_output="$( bash "$RUN_INDEX_SCRIPT" \ --repo "$repo_dir" \ --output "$output_dir" \ - --mode standard + --mode "$case_mode" \ + $skip_flags )" resolved_output="$(printf '%s\n' "$run_output" | sed -n 's/^OUTPUT_DIR=//p' | tail -n1)" @@ -389,9 +443,33 @@ EOF_RS assert_nonempty_file "$resolved_output/audit_index/derived/catalog.json" assert_nonempty_file "$resolved_output/audit_index/derived/hotspots.json" assert_nonempty_file "$resolved_output/audit_index/derived/dup_clusters.md" - [ -e "$resolved_output/audit_index/derived/read_plan.tsv" ] || \ - fail "case $case_name: expected read_plan.tsv to exist" - assert_nonempty_file "$resolved_output/audit_index/derived/read_plan.md" + if [ "$skip_read_plan" -eq 1 ]; then + if [ -e "$resolved_output/audit_index/derived/read_plan.tsv" ]; then + fail "case $case_name: read_plan.tsv should NOT exist when --skip-read-plan is set" + fi + if [ -e "$resolved_output/audit_index/derived/read_plan.md" ]; then + fail "case $case_name: read_plan.md should NOT exist when --skip-read-plan is set" + fi + else + [ -e "$resolved_output/audit_index/derived/read_plan.tsv" ] || \ + fail "case $case_name: expected read_plan.tsv to exist" + assert_nonempty_file "$resolved_output/audit_index/derived/read_plan.md" + fi + + # Guard: no nested audit_index/audit_index.tmp path produced + if [ -d "$resolved_output/audit_index/audit_index.tmp" ]; then + fail "case $case_name: nested audit_index/audit_index.tmp directory detected — contract mismatch" + fi + + # Assert no stale audit_index.tmp/ remains after successful run + if [ -d "$resolved_output/audit_index.tmp" ]; then + fail "case $case_name: audit_index.tmp/ still exists after successful run — atomic rename failed" + fi + + # Pre-existing audit_index/ should be replaced (not preserved) on success + if [ -f "$resolved_output/audit_index/.pre_existing_marker" ]; then + fail "case $case_name: pre-existing sentinel still present — old audit_index/ was not replaced" + fi llmcc_mode_actual="$(json_string "$manifest" "llmcc_mode")" [ "$llmcc_mode_actual" = "$expected_llmcc_mode" ] || \ @@ -431,6 +509,58 @@ EOF_RS fail "case $case_name: expected retrieval_mode=$expected_retrieval_mode, got $retrieval_mode" fi + if [ -n "$expected_top_k" ]; then + actual_top_k="$(json_int "$manifest" "pagerank_top_k")" + [ "$actual_top_k" -eq "$expected_top_k" ] || \ + fail "case $case_name: expected pagerank_top_k=$expected_top_k, got $actual_top_k" + fi + + catalog="$resolved_output/audit_index/derived/catalog.json" + assert_nonempty_file "$catalog" + + catalog_rust="$(json_bool "$catalog" "rust")" + catalog_ts="$(json_bool "$catalog" "typescript")" + catalog_js="$(json_bool "$catalog" "javascript")" + + if [ "$repo_layout" = "nested-rust" ]; then + [ "$catalog_rust" = "true" ] || \ + fail "case $case_name: expected catalog stacks.rust=true for nested-rust layout, got $catalog_rust" + [ "$catalog_ts" = "false" ] || \ + fail "case $case_name: expected catalog stacks.typescript=false for nested-rust layout, got $catalog_ts" + [ "$catalog_js" = "false" ] || \ + fail "case $case_name: expected catalog stacks.javascript=false for nested-rust layout, got $catalog_js" + elif [ "$repo_layout" = "nested-rust-mixed" ]; then + [ "$catalog_rust" = "true" ] || \ + fail "case $case_name: expected catalog stacks.rust=true for nested-rust-mixed layout, got $catalog_rust" + [ "$catalog_ts" = "false" ] || \ + fail "case $case_name: expected catalog stacks.typescript=false for nested-rust-mixed layout, got $catalog_ts" + [ "$catalog_js" = "true" ] || \ + fail "case $case_name: expected catalog stacks.javascript=true for nested-rust-mixed layout, got $catalog_js" + elif [ "$repo_layout" = "ts-node" ]; then + [ "$catalog_rust" = "false" ] || \ + fail "case $case_name: expected catalog stacks.rust=false for ts-node layout, got $catalog_rust" + [ "$catalog_ts" = "true" ] || \ + fail "case $case_name: expected catalog stacks.typescript=true for ts-node layout, got $catalog_ts" + [ "$catalog_js" = "true" ] || \ + fail "case $case_name: expected catalog stacks.javascript=true for ts-node layout, got $catalog_js" + + # TS graph artifacts must exist and be non-empty + ts_graph_dir="$resolved_output/audit_index/llmcc/ts" + assert_nonempty_file "$ts_graph_dir/depth2.dot" + assert_nonempty_file "$ts_graph_dir/depth3.dot" + assert_nonempty_file "$ts_graph_dir/depth3_topk.dot" + elif [ "$repo_layout" = "js-only" ]; then + [ "$catalog_rust" = "false" ] || \ + fail "case $case_name: expected catalog stacks.rust=false for js-only layout, got $catalog_rust" + [ "$catalog_ts" = "false" ] || \ + fail "case $case_name: expected catalog stacks.typescript=false for js-only layout, got $catalog_ts" + [ "$catalog_js" = "true" ] || \ + fail "case $case_name: expected catalog stacks.javascript=true for js-only layout, got $catalog_js" + elif [ "$repo_layout" = "root-rust" ]; then + [ "$catalog_rust" = "true" ] || \ + fail "case $case_name: expected catalog stacks.rust=true for root-rust layout, got $catalog_rust" + fi + printf '[run_index_mock_smoke] PASS: %s\n' "$case_name" rm -rf "$work_dir" ) @@ -453,6 +583,11 @@ run_case "nested-rust-workspace-marker" \ "flag-depth" "collection-update" \ "nested-rust" +run_case "nested-rust-mixed-with-js" \ + "flag" "flag" "collection" "1" "0" \ + "flag-depth" "collection-update" \ + "nested-rust-mixed" + run_case "auto-embed-default-on-when-vectors-missing" \ "flag" "flag" "collection" "1" "0" \ "flag-depth" "collection-update" \ @@ -463,10 +598,106 @@ run_case "auto-embed-opt-out-when-vectors-missing" \ "flag-depth" "collection-update" \ "root-rust" "0" "0" "0" "0" "none" +run_case "js-only-repo-stack-flags" \ + "flag" "flag" "collection" "1" "0" \ + "flag-depth" "collection-update" \ + "js-only" + +run_case "ts-node-repo-stack-flags" \ + "flag" "flag" "collection" "1" "0" \ + "flag-depth" "collection-update" \ + "ts-node" + +run_case "mode-fast-top-k-80" \ + "flag" "flag" "collection" "1" "0" \ + "flag-depth" "collection-update" \ + "root-rust" "23" "unset" "0" "0" "none" \ + "0" "0" "0" "0" "" "0" \ + "fast" "80" + +run_case "mode-deep-top-k-350" \ + "flag" "flag" "collection" "1" "0" \ + "flag-depth" "collection-update" \ + "root-rust" "23" "unset" "0" "0" "none" \ + "0" "0" "0" "0" "" "0" \ + "deep" "350" + +run_case "skip-read-plan-no-artifacts" \ + "flag" "flag" "collection" "1" "0" \ + "flag-depth" "collection-update" \ + "root-rust" "23" "unset" "0" "0" "none" \ + "0" "0" "0" "0" "" "0" \ + "standard" "" "1" + run_case "embed-utf8-panic-falls-back-to-bm25" \ "flag" "flag" "collection" "1" "0" \ "flag-depth" "collection-update" \ "root-rust" "0" "unset" "1" "0" "direct" \ "1" "1" "1" "1" "bm25-only" "1" +# --- Failure-path test: pre-existing audit_index/ preserved, audit_index.tmp/ cleaned up --- +( + set -euo pipefail + + work_dir="$(mktemp -d "${TMPDIR:-/tmp}/vca-smoke.failure-cleanup.XXXXXX")" + repo_dir="$work_dir/repo" + output_dir="$work_dir/output" + + # Create a minimal repo (no mock bins → llmcc not found → die) + mkdir -p "$repo_dir/src" + cat > "$repo_dir/Cargo.toml" <<'EOF_CARGO' +[package] +name = "mock-fail" +version = "0.1.0" +edition = "2021" +EOF_CARGO + + # Pre-create audit_index/ with sentinel + mkdir -p "$output_dir/audit_index" + printf 'survivor\n' > "$output_dir/audit_index/.pre_existing_marker" + + # Run without mock bins on PATH — llmcc check will die + # Use a clean PATH without mock bins + if PATH="/usr/bin:/bin" bash "$RUN_INDEX_SCRIPT" \ + --repo "$repo_dir" \ + --output "$output_dir" \ + --mode standard >/dev/null 2>&1; then + fail "failure-cleanup: expected run_index.sh to fail when llmcc is missing" + fi + + # Assert: pre-existing audit_index/ is preserved + if [ ! -f "$output_dir/audit_index/.pre_existing_marker" ]; then + fail "failure-cleanup: pre-existing audit_index/ was destroyed on failure" + fi + + # Assert: audit_index.tmp/ is cleaned up + if [ -d "$output_dir/audit_index.tmp" ]; then + fail "failure-cleanup: audit_index.tmp/ still exists after failure — cleanup trap did not run" + fi + + printf '[run_index_mock_smoke] PASS: failure-cleanup (pre-existing index preserved, tmp cleaned)\n' + rm -rf "$work_dir" +) + +# Shellcheck gate for modified pipeline scripts +PIPELINE_SCRIPTS=( + "$ROOT_DIR/vibe-code-audit/scripts/run_index.sh" + "$ROOT_DIR/vibe-code-audit/scripts/build_derived_artifacts.sh" + "$ROOT_DIR/vibe-code-audit/scripts/build_read_plan.sh" +) +if command -v shellcheck >/dev/null 2>&1; then + sc_fail=0 + for script in "${PIPELINE_SCRIPTS[@]}"; do + if ! shellcheck -x -S warning "$script" >/dev/null 2>&1; then + printf '[run_index_mock_smoke] WARN: shellcheck found warnings in %s\n' "$(basename "$script")" >&2 + sc_fail=1 + fi + done + if [ "$sc_fail" -eq 0 ]; then + printf '[run_index_mock_smoke] PASS: shellcheck (no new warnings)\n' + fi +else + printf '[run_index_mock_smoke] SKIP: shellcheck not installed — install via "brew install shellcheck"\n' >&2 +fi + printf '[run_index_mock_smoke] All smoke cases passed.\n' diff --git a/tests/trap_cleanup_test.sh b/tests/trap_cleanup_test.sh new file mode 100644 index 000000000..348d91c71 --- /dev/null +++ b/tests/trap_cleanup_test.sh @@ -0,0 +1,238 @@ +#!/usr/bin/env bash +set -euo pipefail + +PASS=0 +FAIL=0 +fail() { echo " FAIL: $1"; FAIL=$((FAIL + 1)); } +pass() { echo " PASS: $1"; PASS=$((PASS + 1)); } + +SCRIPTS_DIR="$(cd "$(dirname "$0")/../vibe-code-audit/scripts" && pwd)" + +echo "=== Trap & Cleanup Tests ===" + +# --- Static checks: trap signatures --- + +if grep -q 'trap cleanup_all EXIT INT TERM' "$SCRIPTS_DIR/run_index.sh"; then + pass "run_index.sh traps EXIT INT TERM via cleanup_all" +else + fail "run_index.sh missing EXIT INT TERM trap via cleanup_all" +fi + +if grep -q 'trap cleanup EXIT INT TERM' "$SCRIPTS_DIR/run_agentroot_embed.sh"; then + pass "run_agentroot_embed.sh traps EXIT INT TERM" +else + fail "run_agentroot_embed.sh missing EXIT INT TERM trap" +fi + +# Negative: no EXIT-only traps in target scripts +if grep -qE 'trap cleanup_all EXIT$' "$SCRIPTS_DIR/run_index.sh"; then + fail "run_index.sh still has EXIT-only trap" +else + pass "run_index.sh has no EXIT-only trap" +fi + +if grep -qE 'trap cleanup EXIT$' "$SCRIPTS_DIR/run_agentroot_embed.sh"; then + fail "run_agentroot_embed.sh still has EXIT-only trap" +else + pass "run_agentroot_embed.sh has no EXIT-only trap" +fi + +# --- Static checks: idempotency guards --- + +# run_index.sh: cleanup_embed_server clears EMBED_SERVER_PID after kill +if awk '/^cleanup_embed_server\(\)/,/^}/' "$SCRIPTS_DIR/run_index.sh" | grep -q 'EMBED_SERVER_PID=""'; then + pass "run_index.sh cleanup clears EMBED_SERVER_PID" +else + fail "run_index.sh cleanup does not clear EMBED_SERVER_PID" +fi + +# run_agentroot_embed.sh: cleanup clears LLAMA_PID and SERVER_STARTED after kill +if awk '/^cleanup\(\)/,/^}/' "$SCRIPTS_DIR/run_agentroot_embed.sh" | grep -q 'LLAMA_PID=""'; then + pass "run_agentroot_embed.sh cleanup clears LLAMA_PID" +else + fail "run_agentroot_embed.sh cleanup does not clear LLAMA_PID" +fi + +if awk '/^cleanup\(\)/,/^}/' "$SCRIPTS_DIR/run_agentroot_embed.sh" | grep -q 'SERVER_STARTED=0'; then + pass "run_agentroot_embed.sh cleanup clears SERVER_STARTED" +else + fail "run_agentroot_embed.sh cleanup does not clear SERVER_STARTED" +fi + +# --- Static checks: trap registration ordering --- +# Trap must be registered after function definition but before resource-critical code + +RUN_INDEX_FUNC_LINE=$(grep -n 'cleanup_all()' "$SCRIPTS_DIR/run_index.sh" | head -1 | cut -d: -f1) +RUN_INDEX_TRAP_LINE=$(grep -n 'trap cleanup_all EXIT INT TERM' "$SCRIPTS_DIR/run_index.sh" | head -1 | cut -d: -f1) +if [ "$RUN_INDEX_TRAP_LINE" -gt "$RUN_INDEX_FUNC_LINE" ]; then + pass "run_index.sh trap registered after function definition (line $RUN_INDEX_FUNC_LINE < $RUN_INDEX_TRAP_LINE)" +else + fail "run_index.sh trap registered before function definition" +fi + +# Verify cleanup_all calls both cleanup functions +if grep -A5 'cleanup_all()' "$SCRIPTS_DIR/run_index.sh" | grep -q 'cleanup_embed_server'; then + pass "run_index.sh cleanup_all calls cleanup_embed_server" +else + fail "run_index.sh cleanup_all missing cleanup_embed_server call" +fi + +if grep -A5 'cleanup_all()' "$SCRIPTS_DIR/run_index.sh" | grep -q 'cleanup_audit_index_tmp'; then + pass "run_index.sh cleanup_all calls cleanup_audit_index_tmp" +else + fail "run_index.sh cleanup_all missing cleanup_audit_index_tmp call" +fi + +# Verify cleanup_audit_index_tmp has guard for undefined variable +if awk '/^cleanup_audit_index_tmp\(\)/,/^}/' "$SCRIPTS_DIR/run_index.sh" | grep -q 'AUDIT_INDEX_DIR:-'; then + pass "run_index.sh cleanup_audit_index_tmp guards undefined AUDIT_INDEX_DIR" +else + fail "run_index.sh cleanup_audit_index_tmp missing undefined variable guard" +fi + +EMBED_FUNC_LINE=$(grep -n '^cleanup()' "$SCRIPTS_DIR/run_agentroot_embed.sh" | head -1 | cut -d: -f1) +EMBED_TRAP_LINE=$(grep -n 'trap cleanup EXIT INT TERM' "$SCRIPTS_DIR/run_agentroot_embed.sh" | head -1 | cut -d: -f1) +if [ "$EMBED_TRAP_LINE" -gt "$EMBED_FUNC_LINE" ]; then + pass "run_agentroot_embed.sh trap registered after function definition (line $EMBED_FUNC_LINE < $EMBED_TRAP_LINE)" +else + fail "run_agentroot_embed.sh trap registered before function definition" +fi + +# --- Dynamic test: run_agentroot_embed.sh cleanup idempotency under INT --- + +TMPDIR_TEST="$(mktemp -d)" +trap 'rm -rf "$TMPDIR_TEST"' EXIT + +# Create a minimal script that sources run_agentroot_embed.sh's cleanup logic +# and tests idempotent double-call +cat > "$TMPDIR_TEST/idempotent_test.sh" <<'SCRIPT' +#!/usr/bin/env bash +set -euo pipefail +SERVER_STARTED=1 +LLAMA_PID="" +KEEP_SERVER=0 + +cleanup() { + if [ "$SERVER_STARTED" -eq 1 ] && [ -n "${LLAMA_PID:-}" ] && [ "$KEEP_SERVER" -ne 1 ]; then + kill "$LLAMA_PID" >/dev/null 2>&1 || true + LLAMA_PID="" + SERVER_STARTED=0 + fi +} + +# Start a no-op background process to get a real PID +sleep 300 & +LLAMA_PID="$!" + +# Call cleanup twice — second call must be a no-op +cleanup +cleanup + +# If we get here without error, idempotency works +echo "IDEMPOTENT_OK=1" +SCRIPT +chmod +x "$TMPDIR_TEST/idempotent_test.sh" + +IDEMPOTENT_OUT="$(bash "$TMPDIR_TEST/idempotent_test.sh" 2>&1)" +if echo "$IDEMPOTENT_OUT" | grep -q 'IDEMPOTENT_OK=1'; then + pass "cleanup is idempotent (double-call safe)" +else + fail "cleanup idempotency test failed: $IDEMPOTENT_OUT" +fi + +# --- Dynamic test: KEEP_SERVER=1 suppresses cleanup --- + +cat > "$TMPDIR_TEST/keep_server_test.sh" <<'SCRIPT' +#!/usr/bin/env bash +set -euo pipefail +SERVER_STARTED=1 +LLAMA_PID="" +KEEP_SERVER=1 + +cleanup() { + if [ "$SERVER_STARTED" -eq 1 ] && [ -n "${LLAMA_PID:-}" ] && [ "$KEEP_SERVER" -ne 1 ]; then + kill "$LLAMA_PID" >/dev/null 2>&1 || true + LLAMA_PID="" + SERVER_STARTED=0 + fi +} + +sleep 300 & +LLAMA_PID="$!" + +cleanup +# Process should still be alive since KEEP_SERVER=1 +if kill -0 "$LLAMA_PID" 2>/dev/null; then + echo "KEEP_SERVER_OK=1" + kill "$LLAMA_PID" 2>/dev/null || true +else + echo "KEEP_SERVER_FAIL=1" +fi +SCRIPT +chmod +x "$TMPDIR_TEST/keep_server_test.sh" + +KEEP_OUT="$(bash "$TMPDIR_TEST/keep_server_test.sh" 2>&1)" +if echo "$KEEP_OUT" | grep -q 'KEEP_SERVER_OK=1'; then + pass "KEEP_SERVER=1 suppresses cleanup kill" +else + fail "KEEP_SERVER test failed: $KEEP_OUT" +fi + +# --- Dynamic test: signal triggers cleanup --- +# Uses TERM (reliable for background bash processes) and verifies cleanup runs + +cat > "$TMPDIR_TEST/signal_test.sh" <<'SCRIPT' +#!/usr/bin/env bash +SERVER_STARTED=1 +LLAMA_PID="" +KEEP_SERVER=0 +MARKER_FILE="$1" + +cleanup() { + if [ "$SERVER_STARTED" -eq 1 ] && [ -n "${LLAMA_PID:-}" ] && [ "$KEEP_SERVER" -ne 1 ]; then + kill "$LLAMA_PID" >/dev/null 2>&1 || true + LLAMA_PID="" + SERVER_STARTED=0 + fi + echo "CLEANUP_RAN" > "$MARKER_FILE" +} +trap cleanup EXIT INT TERM + +sleep 300 & +LLAMA_PID="$!" + +# Write readiness marker then block +echo "READY" > "${MARKER_FILE}.ready" +# Use wait instead of sleep so signals are delivered immediately +wait +SCRIPT +chmod +x "$TMPDIR_TEST/signal_test.sh" + +MARKER_TERM="$TMPDIR_TEST/cleanup_marker_term" +bash "$TMPDIR_TEST/signal_test.sh" "$MARKER_TERM" & +SIG_PID="$!" + +waited=0 +while [ ! -f "${MARKER_TERM}.ready" ] && [ "$waited" -lt 30 ]; do + sleep 0.1 + waited=$((waited + 1)) +done + +kill -TERM "$SIG_PID" 2>/dev/null || true +wait "$SIG_PID" 2>/dev/null || true + +if [ -f "$MARKER_TERM" ] && grep -q 'CLEANUP_RAN' "$MARKER_TERM"; then + pass "TERM signal triggers cleanup" +else + fail "TERM signal did not trigger cleanup" +fi + +# INT test: bash non-interactive mode may not interrupt wait on SIGINT, +# so we verify the trap is registered and cleanup would fire on EXIT +# after INT by sending TERM (which reliably interrupts wait). +# The trap declaration covers INT identically to TERM. +# We verify INT registration via the static grep checks above. + +echo "" +echo "=== Results: $PASS passed, $FAIL failed ===" +[ "$FAIL" -eq 0 ] || exit 1 diff --git a/vibe-code-audit/INSTALL_MANIFEST.txt b/vibe-code-audit/INSTALL_MANIFEST.txt index 103e00fac..bcc21129a 100644 --- a/vibe-code-audit/INSTALL_MANIFEST.txt +++ b/vibe-code-audit/INSTALL_MANIFEST.txt @@ -7,6 +7,7 @@ scripts/build_derived_artifacts.sh scripts/build_read_plan.sh scripts/render_report_pdf.sh scripts/render_system_map.sh +scripts/_lib.sh references/core/process.md references/core/inventory.md references/core/pattern-mining.md diff --git a/vibe-code-audit/scripts/_lib.sh b/vibe-code-audit/scripts/_lib.sh new file mode 100644 index 000000000..801d94455 --- /dev/null +++ b/vibe-code-audit/scripts/_lib.sh @@ -0,0 +1,195 @@ +# _lib.sh — shared utility library for vibe-code-audit pipeline scripts +# +# Sourced by all pipeline scripts (run_index.sh, build_derived_artifacts.sh, +# build_read_plan.sh, run_agentroot_embed.sh, render_system_map.sh, +# render_report_pdf.sh). NOT sourced by install.sh. +# +# The sourcing script is responsible for: +# 1. Setting `set -euo pipefail` before sourcing. +# 2. Defining SCRIPT_NAME before sourcing (used in log prefixes). + +# --------------------------------------------------------------------------- +# Logging +# --------------------------------------------------------------------------- + +log() { + printf '[%s] %s\n' "${SCRIPT_NAME:-unknown}" "$*" >&2 +} + +warn() { + printf '[%s] WARNING: %s\n' "${SCRIPT_NAME:-unknown}" "$*" >&2 +} + +die() { + printf '[%s] FATAL: %s\n' "${SCRIPT_NAME:-unknown}" "$*" >&2 + exit 1 +} + +# --------------------------------------------------------------------------- +# File & pattern helpers +# --------------------------------------------------------------------------- + +# json_int_from_file FILE KEY +# Extracts the first integer value for KEY from a JSON-like FILE. +# Returns the integer on stdout; defaults to 0 if the file is missing, +# the key is absent, or the value is non-numeric. +# Used by run_index.sh to parse agentroot status.json fields +# (document_count, embedded_count). +json_int_from_file() { + local file="${1-}" + local key="${2-}" + local value + value="$(sed -n "s/.*\"$key\"[[:space:]]*:[[:space:]]*\\([0-9][0-9]*\\).*/\\1/p" "$file" 2>/dev/null | head -n1)" || true + if [ -z "$value" ]; then + printf '0\n' + else + printf '%s\n' "$value" + fi +} + +# has_pattern_in_files PATTERN [FILE ...] +# Returns 0 (true) if PATTERN matches in any of the listed files +# (case-insensitive extended regex via grep -Eqi). Skips missing files. +# Short-circuits on first match. Returns 1 if no match found. +# Used by run_index.sh and run_agentroot_embed.sh for retrieval +# diagnostics and error classification. +has_pattern_in_files() { + local pattern="${1-}" + shift + local file + for file in "$@"; do + [ -f "$file" ] || continue + if grep -Eqi "$pattern" "$file"; then + return 0 + fi + done + return 1 +} + +# --------------------------------------------------------------------------- +# Path helpers +# --------------------------------------------------------------------------- + +# resolve_output_dir PATH +# Resolves PATH to a canonical absolute directory path. +# Creates the directory (mkdir -p) if it doesn't exist, matching the +# inline OUTPUT_DIR_ABS resolution in run_index.sh, build_derived_artifacts.sh, +# and build_read_plan.sh. +# For absolute paths (starting with /): creates and canonicalizes directly. +# For relative paths: resolves relative to the caller's working directory. +# Callers must cd to the appropriate base (e.g., REPO_PATH_ABS) before +# invoking for relative paths. +# Returns the canonical absolute path on stdout via cd + pwd. +# Dies if the path cannot be created, is not a directory, or cannot be resolved. +resolve_output_dir() { + local dir="${1-}" + [ -n "$dir" ] || die "resolve_output_dir: path argument is required" + if ! mkdir -p "$dir" 2>/dev/null; then + die "resolve_output_dir: cannot create directory: $dir" + fi + [ -d "$dir" ] || die "resolve_output_dir: not a directory: $dir" + (cd "$dir" && pwd -P) || die "resolve_output_dir: cannot resolve directory: $dir" +} + +# --------------------------------------------------------------------------- +# Exclude-directory list and helpers +# --------------------------------------------------------------------------- + +# Canonical list of directories to exclude from traversal. +# All scripts MUST use these helpers instead of hardcoding directory names. +# Space-delimited string (not a bash array) for POSIX compatibility. +EXCLUDE_DIRS=".git node_modules target dist build .next coverage" + +# exclude_find_prune_args +# Outputs find(1) prune expression fragments for use inside \( ... \) -prune. +# Usage: find . \( $(exclude_find_prune_args) \) -prune -o ... +# Output: -name .git -o -name node_modules -o -name target ... +exclude_find_prune_args() { + local first=1 + local dir + for dir in $EXCLUDE_DIRS; do + if [ "$first" -eq 1 ]; then + first=0 + else + printf ' -o ' + fi + printf -- '-name %s' "$dir" + done +} + +# exclude_agentroot_flags +# Outputs --exclude flags for agentroot CLI. +# Usage: agentroot index . $(exclude_agentroot_flags) --output ... +# Output: --exclude .git --exclude node_modules ... +exclude_agentroot_flags() { + local dir + for dir in $EXCLUDE_DIRS; do + printf -- '--exclude %s ' "$dir" + done +} + +# exclude_rg_globs +# Outputs ripgrep glob exclusion flags. +# Usage: rg $(exclude_rg_globs) PATTERN . +# Output: --glob '!.git/**' --glob '!node_modules/**' ... +exclude_rg_globs() { + local dir + for dir in $EXCLUDE_DIRS; do + printf -- "--glob '!%s/**' " "$dir" + done +} + +# exclude_dirs_json_array +# Outputs a JSON array of excluded directory names. +# Usage: "exclude_patterns": $(exclude_dirs_json_array), +# Output: [".git", "node_modules", "target", "dist", "build", ".next", "coverage"] +exclude_dirs_json_array() { + local first=1 + local dir + printf '[' + for dir in $EXCLUDE_DIRS; do + if [ "$first" -eq 1 ]; then + first=0 + else + printf ', ' + fi + printf '"%s"' "$dir" + done + printf ']' +} + +# --------------------------------------------------------------------------- +# JSON helpers — RFC 8259 §7 compliant string escaping +# --------------------------------------------------------------------------- + +# json_escape STRING +# Escapes a string for safe embedding inside a JSON quoted value. +# Handles: backslash, double-quote, and all control chars U+0000-U+001F. +# Uses od + awk (no jq dependency). Preserves non-ASCII bytes (UTF-8 safe). +# Note: shell variables cannot contain NUL (0x00); NUL is handled correctly +# if present in the byte stream but cannot be passed via $1 in bash. +json_escape() { + local input="${1-}" + if [ -z "$input" ]; then + return + fi + printf '%s' "$input" | LC_ALL=C od -An -tx1 | awk ' + BEGIN { + split("0123456789abcdef", hx, "") + for (i = 1; i <= 16; i++) h2d[hx[i]] = i - 1 + } + { + for (i = 1; i <= NF; i++) { + d = h2d[substr($i, 1, 1)] * 16 + h2d[substr($i, 2, 1)] + if ($i == "5c") printf "\\\\" + else if ($i == "22") printf "\\\"" + else if ($i == "08") printf "\\b" + else if ($i == "09") printf "\\t" + else if ($i == "0a") printf "\\n" + else if ($i == "0c") printf "\\f" + else if ($i == "0d") printf "\\r" + else if (d < 32) printf "\\u%04x", d + else printf "%c", d + } + }' +} diff --git a/vibe-code-audit/scripts/build_derived_artifacts.sh b/vibe-code-audit/scripts/build_derived_artifacts.sh index c0803c143..51ac70ecd 100755 --- a/vibe-code-audit/scripts/build_derived_artifacts.sh +++ b/vibe-code-audit/scripts/build_derived_artifacts.sh @@ -1,7 +1,9 @@ #!/usr/bin/env bash set -euo pipefail -SCRIPT_NAME="build_derived_artifacts.sh" +SCRIPT_NAME="build_derived_artifacts" +# shellcheck source=_lib.sh +. "$(dirname "$0")/_lib.sh" usage() { cat <<'USAGE' @@ -11,29 +13,19 @@ Usage: build_derived_artifacts.sh --repo --output [--mode ] [--top-k ] Writes: - /audit_index/derived/catalog.json - /audit_index/derived/hotspots.json - /audit_index/derived/dup_clusters.md + /derived/catalog.json + /derived/hotspots.json + /derived/dup_clusters.md USAGE } -log() { - printf '[%s] %s\n' "$SCRIPT_NAME" "$*" >&2 -} - -die() { - printf '[%s] ERROR: %s\n' "$SCRIPT_NAME" "$*" >&2 - exit 1 -} - -json_escape() { - printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g' -} - REPO_PATH="" OUTPUT_DIR="" MODE="standard" TOP_K="0" +FLAG_HAS_RUST="" +FLAG_HAS_TS="" +FLAG_HAS_JS="" while [ $# -gt 0 ]; do case "$1" in @@ -57,6 +49,21 @@ while [ $# -gt 0 ]; do TOP_K="$2" shift 2 ;; + --has-rust) + [ $# -ge 2 ] || die "--has-rust requires a value" + FLAG_HAS_RUST="$2" + shift 2 + ;; + --has-ts) + [ $# -ge 2 ] || die "--has-ts requires a value" + FLAG_HAS_TS="$2" + shift 2 + ;; + --has-js) + [ $# -ge 2 ] || die "--has-js requires a value" + FLAG_HAS_JS="$2" + shift 2 + ;; --help|-h) usage exit 0 @@ -71,14 +78,21 @@ done [ -n "$OUTPUT_DIR" ] || die "--output is required" [ -d "$REPO_PATH" ] || die "repo path not found: $REPO_PATH" +# Validate --has-* flags: must be 0 or 1 if provided +if [ -n "$FLAG_HAS_RUST" ]; then + case "$FLAG_HAS_RUST" in 0|1) ;; *) die "--has-rust: invalid value '$FLAG_HAS_RUST' (expected 0 or 1)" ;; esac +fi +if [ -n "$FLAG_HAS_TS" ]; then + case "$FLAG_HAS_TS" in 0|1) ;; *) die "--has-ts: invalid value '$FLAG_HAS_TS' (expected 0 or 1)" ;; esac +fi +if [ -n "$FLAG_HAS_JS" ]; then + case "$FLAG_HAS_JS" in 0|1) ;; *) die "--has-js: invalid value '$FLAG_HAS_JS' (expected 0 or 1)" ;; esac +fi + REPO_PATH_ABS="$(cd "$REPO_PATH" && pwd)" -OUTPUT_DIR_ABS="$OUTPUT_DIR" -case "$OUTPUT_DIR_ABS" in - /*) ;; - *) OUTPUT_DIR_ABS="$(cd "$REPO_PATH_ABS" && mkdir -p "$OUTPUT_DIR" && cd "$OUTPUT_DIR" && pwd)" ;; -esac +OUTPUT_DIR_ABS="$(cd "$REPO_PATH_ABS" && resolve_output_dir "$OUTPUT_DIR")" -AUDIT_INDEX_DIR="$OUTPUT_DIR_ABS/audit_index" +AUDIT_INDEX_DIR="$OUTPUT_DIR_ABS" DERIVED_DIR="$AUDIT_INDEX_DIR/derived" mkdir -p "$DERIVED_DIR" @@ -189,9 +203,21 @@ HAS_JS="false" HAS_FRONTEND="false" WORKSPACE_DETECTED="false" -[ -f "$REPO_PATH_ABS/Cargo.toml" ] && HAS_RUST="true" -[ -f "$REPO_PATH_ABS/tsconfig.json" ] && HAS_TS="true" -[ -f "$REPO_PATH_ABS/package.json" ] && HAS_JS="true" +if [ -n "$FLAG_HAS_RUST" ]; then + [ "$FLAG_HAS_RUST" = "1" ] && HAS_RUST="true" +else + [ -f "$REPO_PATH_ABS/Cargo.toml" ] && HAS_RUST="true" +fi +if [ -n "$FLAG_HAS_TS" ]; then + [ "$FLAG_HAS_TS" = "1" ] && HAS_TS="true" +else + [ -f "$REPO_PATH_ABS/tsconfig.json" ] && HAS_TS="true" +fi +if [ -n "$FLAG_HAS_JS" ]; then + [ "$FLAG_HAS_JS" = "1" ] && HAS_JS="true" +else + [ -f "$REPO_PATH_ABS/package.json" ] && HAS_JS="true" +fi [ -d "$REPO_PATH_ABS/web/src" ] && HAS_FRONTEND="true" if [ -f "$REPO_PATH_ABS/Cargo.toml" ] && grep -Eq '^\[workspace\]' "$REPO_PATH_ABS/Cargo.toml"; then diff --git a/vibe-code-audit/scripts/build_read_plan.sh b/vibe-code-audit/scripts/build_read_plan.sh index 89cb7f89f..a29c8beb2 100755 --- a/vibe-code-audit/scripts/build_read_plan.sh +++ b/vibe-code-audit/scripts/build_read_plan.sh @@ -1,7 +1,9 @@ #!/usr/bin/env bash set -euo pipefail -SCRIPT_NAME="build_read_plan.sh" +SCRIPT_NAME="build_read_plan" +# shellcheck source=_lib.sh +. "$(dirname "$0")/_lib.sh" usage() { cat <<'USAGE' @@ -11,23 +13,14 @@ Usage: build_read_plan.sh --repo --output [--mode fast|standard|deep] Writes: - /audit_index/derived/read_plan.tsv - /audit_index/derived/read_plan.md + /derived/read_plan.tsv + /derived/read_plan.md TSV columns: file_pathmatch_linestart_lineend_linesignal USAGE } -log() { - printf '[%s] %s\n' "$SCRIPT_NAME" "$*" >&2 -} - -die() { - printf '[%s] ERROR: %s\n' "$SCRIPT_NAME" "$*" >&2 - exit 1 -} - REPO_PATH="" OUTPUT_DIR="" MODE="standard" @@ -85,13 +78,9 @@ case "$MODE" in esac REPO_PATH_ABS="$(cd "$REPO_PATH" && pwd)" -OUTPUT_DIR_ABS="$OUTPUT_DIR" -case "$OUTPUT_DIR_ABS" in - /*) ;; - *) OUTPUT_DIR_ABS="$(cd "$REPO_PATH_ABS" && mkdir -p "$OUTPUT_DIR" && cd "$OUTPUT_DIR" && pwd)" ;; -esac +OUTPUT_DIR_ABS="$(cd "$REPO_PATH_ABS" && resolve_output_dir "$OUTPUT_DIR")" -DERIVED_DIR="$OUTPUT_DIR_ABS/audit_index/derived" +DERIVED_DIR="$OUTPUT_DIR_ABS/derived" mkdir -p "$DERIVED_DIR" RAW_MATCHES="$DERIVED_DIR/.read_plan_matches_raw.tsv" @@ -108,15 +97,19 @@ log "limits: max_slices=$MAX_SLICES max_files=$MAX_FILES radius=$RADIUS" pushd "$REPO_PATH_ABS" >/dev/null +rg_exclude_args=() +for dir in $EXCLUDE_DIRS; do + rg_exclude_args+=(--glob "!${dir}/**") +done + +grep_exclude_args=() +for dir in $EXCLUDE_DIRS; do + grep_exclude_args+=(--exclude-dir "$dir") +done + if command -v rg >/dev/null 2>&1; then rg -n -S \ - --glob '!.git/**' \ - --glob '!node_modules/**' \ - --glob '!target/**' \ - --glob '!dist/**' \ - --glob '!build/**' \ - --glob '!.next/**' \ - --glob '!coverage/**' \ + "${rg_exclude_args[@]}" \ --glob '!**/*.md' \ --glob '!**/*.txt' \ --glob '!**/*.lock' \ @@ -126,7 +119,7 @@ if command -v rg >/dev/null 2>&1; then --glob '!**/*.jpeg' \ "$PATTERN" . > "$RAW_MATCHES" || true else - grep -R -n -E "$PATTERN" . > "$RAW_MATCHES" || true + grep -R -n -E "${grep_exclude_args[@]}" "$PATTERN" . > "$RAW_MATCHES" || true fi popd >/dev/null diff --git a/vibe-code-audit/scripts/render_report_pdf.sh b/vibe-code-audit/scripts/render_report_pdf.sh index e17f95d7a..6d4fd039c 100755 --- a/vibe-code-audit/scripts/render_report_pdf.sh +++ b/vibe-code-audit/scripts/render_report_pdf.sh @@ -1,7 +1,9 @@ #!/usr/bin/env bash set -euo pipefail -SCRIPT_NAME="render_report_pdf.sh" +SCRIPT_NAME="render_report_pdf" +# shellcheck source=_lib.sh +. "$(dirname "$0")/_lib.sh" usage() { cat <<'USAGE' @@ -19,14 +21,6 @@ Behavior: USAGE } -log() { - printf '[%s] %s\n' "$SCRIPT_NAME" "$*" >&2 -} - -warn() { - printf '[%s] WARNING: %s\n' "$SCRIPT_NAME" "$*" >&2 -} - REPORT_PATH="" OUTPUT_PATH="" FORCED_ENGINE="" @@ -235,6 +229,15 @@ make_report_without_system_map() { ' "$in_report" > "$out_report" } +PANDOC_LOG_MAIN="" +PANDOC_LOG_RETRY="" +TMP_REPORT_NO_MAP="" + +cleanup() { + rm -f "$PANDOC_LOG_MAIN" "$PANDOC_LOG_RETRY" "$TMP_REPORT_NO_MAP" +} +trap cleanup EXIT INT TERM + log "Rendering PDF with pandoc engine: $PDF_ENGINE" PANDOC_LOG_MAIN="$(mktemp "${TMPDIR:-/tmp}/vca-pandoc-main.XXXXXX.log")" if render_with_pandoc "$REPORT_PATH_ABS" "$PANDOC_LOG_MAIN"; then diff --git a/vibe-code-audit/scripts/render_system_map.sh b/vibe-code-audit/scripts/render_system_map.sh index c1d1d92ab..3a828d3a4 100755 --- a/vibe-code-audit/scripts/render_system_map.sh +++ b/vibe-code-audit/scripts/render_system_map.sh @@ -1,7 +1,9 @@ #!/usr/bin/env bash set -euo pipefail -SCRIPT_NAME="render_system_map.sh" +SCRIPT_NAME="render_system_map" +# shellcheck source=_lib.sh +. "$(dirname "$0")/_lib.sh" usage() { cat <<'USAGE' @@ -21,14 +23,6 @@ Behavior: USAGE } -log() { - printf '[%s] %s\n' "$SCRIPT_NAME" "$*" >&2 -} - -warn() { - printf '[%s] WARNING: %s\n' "$SCRIPT_NAME" "$*" >&2 -} - REPORT_PATH="" DOT_PATH="" IMAGE_PATH="" @@ -230,6 +224,10 @@ REPORT_UPDATED=0 IMAGE_REF="$(basename "$IMAGE_PATH")" IMAGE_LINE="![System Map - module dependencies and boundaries]($IMAGE_REF)" +tmp_report="" +cleanup() { rm -f "$tmp_report"; } +trap cleanup EXIT INT TERM + if [ "$NO_EDIT" -eq 0 ] && ! grep -Fq "]($IMAGE_REF)" "$REPORT_PATH_ABS"; then tmp_report="$(mktemp "${TMPDIR:-/tmp}/vca-system-map-report.XXXXXX")" awk -v image_line="$IMAGE_LINE" ' diff --git a/vibe-code-audit/scripts/run_agentroot_embed.sh b/vibe-code-audit/scripts/run_agentroot_embed.sh index 6aa9d1d25..2b2cf13d5 100755 --- a/vibe-code-audit/scripts/run_agentroot_embed.sh +++ b/vibe-code-audit/scripts/run_agentroot_embed.sh @@ -1,7 +1,9 @@ #!/usr/bin/env bash set -euo pipefail -SCRIPT_NAME="run_agentroot_embed.sh" +SCRIPT_NAME="run_agentroot_embed" +# shellcheck source=_lib.sh +. "$(dirname "$0")/_lib.sh" usage() { cat <<'USAGE' @@ -38,31 +40,6 @@ Environment overrides: USAGE } -log() { - printf '[%s] %s\n' "$SCRIPT_NAME" "$*" >&2 -} - -warn() { - printf '[%s] WARNING: %s\n' "$SCRIPT_NAME" "$*" >&2 -} - -die() { - printf '[%s] ERROR: %s\n' "$SCRIPT_NAME" "$*" >&2 - exit 1 -} - -has_pattern_in_files() { - pattern="$1" - shift - for file in "$@"; do - [ -f "$file" ] || continue - if grep -Eqi "$pattern" "$file"; then - return 0 - fi - done - return 1 -} - health_url() { printf 'http://%s:%s/health' "$HOST" "$PORT" } @@ -158,14 +135,63 @@ emit_result() { cleanup() { if [ "$SERVER_STARTED" -eq 1 ] && [ -n "${LLAMA_PID:-}" ] && [ "$KEEP_SERVER" -ne 1 ]; then kill "$LLAMA_PID" >/dev/null 2>&1 || true + LLAMA_PID="" + SERVER_STARTED=0 fi } -# Source persistent embed config from installer if present +# Parse persistent embed config from installer if present (safe line-by-line, +# never sourced as shell code to prevent command injection). EMBED_ENV_FILE="$HOME/.config/vibe-code-audit/embed.env" if [ -f "$EMBED_ENV_FILE" ]; then - # shellcheck disable=SC1090 - . "$EMBED_ENV_FILE" + # Snapshot which keys are already set in the environment before parsing, + # so pre-existing env vars take precedence but later lines in the file + # can still override earlier ones (last-occurrence-wins within the file). + _env_preset="" + for _env_k in VIBE_CODE_AUDIT_EMBED_MODEL_PATH VIBE_CODE_AUDIT_EMBED_MODEL_URL \ + VIBE_CODE_AUDIT_EMBED_HOST VIBE_CODE_AUDIT_EMBED_PORT \ + VIBE_CODE_AUDIT_EMBED_START_LOCAL VIBE_CODE_AUDIT_EMBED_DOWNLOAD_MODEL \ + VIBE_CODE_AUDIT_EMBED_WAIT_SECONDS VIBE_CODE_AUDIT_EMBED_KEEP_SERVER \ + VIBE_CODE_AUDIT_EMBED_CTX_SIZE VIBE_CODE_AUDIT_EMBED_BATCH_SIZE \ + VIBE_CODE_AUDIT_EMBED_UBATCH_SIZE; do + if [ -n "${!_env_k+x}" ]; then + _env_preset="${_env_preset}${_env_k} " + fi + done + while IFS='=' read -r _env_key _env_value || [ -n "$_env_key" ]; do + # Skip blank lines and comments + case "$_env_key" in + ''|\#*) continue ;; + esac + # Strip trailing carriage return from value (CRLF files) + _env_value="${_env_value%$'\r'}" + # Strip one layer of matching surrounding quotes + case "$_env_value" in + \"*\") _env_value="${_env_value#\"}"; _env_value="${_env_value%\"}" ;; + \'*\') _env_value="${_env_value#\'}"; _env_value="${_env_value%\'}" ;; + esac + # Only accept whitelisted keys; defer to pre-existing env vars + case "$_env_key" in + VIBE_CODE_AUDIT_EMBED_MODEL_PATH|\ + VIBE_CODE_AUDIT_EMBED_MODEL_URL|\ + VIBE_CODE_AUDIT_EMBED_HOST|\ + VIBE_CODE_AUDIT_EMBED_PORT|\ + VIBE_CODE_AUDIT_EMBED_START_LOCAL|\ + VIBE_CODE_AUDIT_EMBED_DOWNLOAD_MODEL|\ + VIBE_CODE_AUDIT_EMBED_WAIT_SECONDS|\ + VIBE_CODE_AUDIT_EMBED_KEEP_SERVER|\ + VIBE_CODE_AUDIT_EMBED_CTX_SIZE|\ + VIBE_CODE_AUDIT_EMBED_BATCH_SIZE|\ + VIBE_CODE_AUDIT_EMBED_UBATCH_SIZE) + # Skip if this key was already set before file parsing began + case "$_env_preset" in + *"$_env_key "*) ;; + *) export "$_env_key=$_env_value" ;; + esac + ;; + esac + done < "$EMBED_ENV_FILE" + unset _env_key _env_value _env_k _env_preset fi DB_PATH="" @@ -268,7 +294,7 @@ EMBED_LOG="$OUTPUT_DIR/embed.log" EMBED_RETRY_LOG="$OUTPUT_DIR/embed_retry.log" LLAMA_SERVER_LOG="$OUTPUT_DIR/llama_server.log" -trap cleanup EXIT +trap cleanup EXIT INT TERM BACKEND="direct" if run_embed "$EMBED_LOG"; then diff --git a/vibe-code-audit/scripts/run_index.sh b/vibe-code-audit/scripts/run_index.sh index c84600065..6f2fc5e5b 100755 --- a/vibe-code-audit/scripts/run_index.sh +++ b/vibe-code-audit/scripts/run_index.sh @@ -1,7 +1,9 @@ #!/usr/bin/env bash set -euo pipefail -SCRIPT_NAME="run_index.sh" +SCRIPT_NAME="run_index" +# shellcheck source=_lib.sh +. "$(dirname "$0")/_lib.sh" usage() { cat <<'USAGE' @@ -20,10 +22,10 @@ Options: --help Show this help This script writes: - /audit_index/ - /audit_index/derived/catalog.json - /audit_index/derived/hotspots.json - /audit_index/derived/dup_clusters.md + /audit_index.tmp/ + /audit_index.tmp/derived/catalog.json + /audit_index.tmp/derived/hotspots.json + /audit_index.tmp/derived/dup_clusters.md Machine-readable output: OUTPUT_DIR= @@ -48,34 +50,6 @@ Environment: USAGE } -log() { - printf '[%s] %s\n' "$SCRIPT_NAME" "$*" >&2 -} - -warn() { - printf '[%s] WARNING: %s\n' "$SCRIPT_NAME" "$*" >&2 -} - -die() { - printf '[%s] ERROR: %s\n' "$SCRIPT_NAME" "$*" >&2 - exit 1 -} - -json_escape() { - printf '%s' "$1" | sed 's/\\/\\\\/g; s/"/\\"/g' -} - -json_int_from_file() { - file="$1" - key="$2" - value="$(sed -n "s/.*\"$key\"[[:space:]]*:[[:space:]]*\\([0-9][0-9]*\\).*/\\1/p" "$file" | head -n1)" - if [ -z "$value" ]; then - printf '0\n' - else - printf '%s\n' "$value" - fi -} - kv_from_file() { file="$1" key="$2" @@ -83,18 +57,6 @@ kv_from_file() { printf '%s\n' "$value" } -has_pattern_in_files() { - pattern="$1" - shift - for file in "$@"; do - [ -f "$file" ] || continue - if grep -Eqi "$pattern" "$file"; then - return 0 - fi - done - return 1 -} - REPO_PATH="" OUTPUT_DIR="" MODE="standard" @@ -172,19 +134,9 @@ if [ -z "$OUTPUT_DIR" ]; then OUTPUT_DIR="$REPO_PATH_ABS/vibe-code-audit/$TIMESTAMP" fi -OUTPUT_DIR_ABS="$OUTPUT_DIR" -case "$OUTPUT_DIR_ABS" in - /*) - mkdir -p "$OUTPUT_DIR_ABS" - OUTPUT_DIR_ABS="$(cd "$OUTPUT_DIR_ABS" && pwd)" - ;; - *) - mkdir -p "$REPO_PATH_ABS/$OUTPUT_DIR_ABS" - OUTPUT_DIR_ABS="$(cd "$REPO_PATH_ABS/$OUTPUT_DIR_ABS" && pwd)" - ;; -esac +OUTPUT_DIR_ABS="$(cd "$REPO_PATH_ABS" && resolve_output_dir "$OUTPUT_DIR")" -AUDIT_INDEX_DIR="$OUTPUT_DIR_ABS/audit_index" +AUDIT_INDEX_DIR="$OUTPUT_DIR_ABS/audit_index.tmp" RUST_OUT_DIR="$AUDIT_INDEX_DIR/llmcc/rust" TS_OUT_DIR="$AUDIT_INDEX_DIR/llmcc/ts" AGENTROOT_OUT_DIR="$AUDIT_INDEX_DIR/agentroot" @@ -197,7 +149,18 @@ cleanup_embed_server() { EMBED_SERVER_PID="" fi } -trap cleanup_embed_server EXIT + +cleanup_audit_index_tmp() { + if [ -n "${AUDIT_INDEX_DIR:-}" ] && [ -d "$AUDIT_INDEX_DIR" ]; then + rm -rf "$AUDIT_INDEX_DIR" + fi +} + +cleanup_all() { + cleanup_embed_server + cleanup_audit_index_tmp +} +trap cleanup_all EXIT INT TERM log "repo: $REPO_PATH_ABS" log "output: $OUTPUT_DIR_ABS" @@ -275,14 +238,8 @@ pushd "$REPO_PATH_ABS" >/dev/null repo_has_file_named() { name="$1" - if find . \ - \( -path './.git' -o -path './.git/*' \ - -o -path './target' -o -path './target/*' \ - -o -path './node_modules' -o -path './node_modules/*' \ - -o -path './dist' -o -path './dist/*' \ - -o -path './build' -o -path './build/*' \ - -o -path './.next' -o -path './.next/*' \ - -o -path './coverage' -o -path './coverage/*' \) -prune \ + # shellcheck disable=SC2046 + if find . \( $(exclude_find_prune_args) \) -prune \ -o -type f -name "$name" -print -quit | grep -q .; then return 0 fi @@ -291,7 +248,7 @@ repo_has_file_named() { HAS_RUST=0 HAS_TS=0 -HAS_NODE=0 +HAS_JS=0 if [ -f Cargo.toml ] || repo_has_file_named "Cargo.toml"; then HAS_RUST=1 fi @@ -299,7 +256,7 @@ if [ -f tsconfig.json ] || repo_has_file_named "tsconfig.json"; then HAS_TS=1 fi if [ -f package.json ] || repo_has_file_named "package.json"; then - HAS_NODE=1 + HAS_JS=1 fi if [ "$HAS_RUST" -eq 1 ]; then @@ -323,39 +280,22 @@ AGENTROOT_DB_PATH="$AGENTROOT_OUT_DIR/index.sqlite" export AGENTROOT_DB="$AGENTROOT_DB_PATH" log "agentroot db: $AGENTROOT_DB_PATH" -run_agentroot_query_check() { - query="$1" - out="$2" - - if AGENTROOT_DB="$AGENTROOT_DB_PATH" agentroot query "$query" --format json > "$out" 2>&1; then - return 0 - fi - if AGENTROOT_DB="$AGENTROOT_DB_PATH" agentroot query "$query" > "$out" 2>&1; then - return 0 - fi - return 1 -} - -run_agentroot_status_check() { - out="$1" - - if AGENTROOT_DB="$AGENTROOT_DB_PATH" agentroot status --format json > "$out" 2>&1; then - return 0 - fi - if AGENTROOT_DB="$AGENTROOT_DB_PATH" agentroot status > "$out" 2>&1; then - return 0 - fi - return 1 -} - -run_agentroot_vsearch_check() { - query="$1" - out="$2" +# Unified agentroot probe helper. Tries --format json first, then plain +# output, returning 1 only when both attempts fail. +# Usage: run_agentroot_check [args...] +run_agentroot_check() { + local subcmd="$1"; shift + local out="${*: -1}" # last positional = output file + local -a args=() + # collect everything between subcmd and output file as command args + while [ $# -gt 1 ]; do + args+=("$1"); shift + done - if AGENTROOT_DB="$AGENTROOT_DB_PATH" agentroot vsearch "$query" --format json > "$out" 2>&1; then + if AGENTROOT_DB="$AGENTROOT_DB_PATH" agentroot "$subcmd" "${args[@]+"${args[@]}"}" --format json > "$out" 2>&1; then return 0 fi - if AGENTROOT_DB="$AGENTROOT_DB_PATH" agentroot vsearch "$query" > "$out" 2>&1; then + if AGENTROOT_DB="$AGENTROOT_DB_PATH" agentroot "$subcmd" "${args[@]+"${args[@]}"}" > "$out" 2>&1; then return 0 fi return 1 @@ -406,7 +346,7 @@ attempt_agentroot_embed() { fi fi - if run_agentroot_status_check "$AGENTROOT_OUT_DIR/status.json"; then + if run_agentroot_check status "$AGENTROOT_OUT_DIR/status.json"; then AGENTROOT_DOC_COUNT="$(json_int_from_file "$AGENTROOT_OUT_DIR/status.json" "document_count")" AGENTROOT_EMBEDDED_COUNT="$(json_int_from_file "$AGENTROOT_OUT_DIR/status.json" "embedded_count")" else @@ -418,6 +358,31 @@ attempt_agentroot_embed() { fi } +# Shared embed-and-validate seam for both agentroot mode paths. +# Runs: status refresh (warn-only) → embed attempt → query + vsearch probes. +run_embed_and_validate() { + if run_agentroot_check status "$AGENTROOT_OUT_DIR/status.json"; then + AGENTROOT_DOC_COUNT="$(json_int_from_file "$AGENTROOT_OUT_DIR/status.json" "document_count")" + AGENTROOT_EMBEDDED_COUNT="$(json_int_from_file "$AGENTROOT_OUT_DIR/status.json" "embedded_count")" + else + warn "agentroot status check failed (see $AGENTROOT_OUT_DIR/status.json)" + fi + + attempt_agentroot_embed + + log "Running retrieval validation" + if run_agentroot_check query "retry backoff" "$AGENTROOT_OUT_DIR/query_check.txt"; then + QUERY_OK=1 + else + warn "agentroot query check failed (see $AGENTROOT_OUT_DIR/query_check.txt)" + fi + if run_agentroot_check vsearch "permission check" "$AGENTROOT_OUT_DIR/vsearch_check.txt"; then + VSEARCH_OK=1 + else + warn "agentroot vsearch check failed (see $AGENTROOT_OUT_DIR/vsearch_check.txt)" + fi +} + # agentroot compatibility: older builds expose `index`; newer builds use # collection add + update. if agentroot index --help >/dev/null 2>&1; then @@ -442,36 +407,12 @@ RETRIEVAL_STRICT="${VIBE_CODE_AUDIT_RETRIEVAL_STRICT:-0}" if [ "$AGENTROOT_MODE" = "index-subcommand" ]; then log "Running agentroot index" + # shellcheck disable=SC2046 if AGENTROOT_DB="$AGENTROOT_DB_PATH" agentroot index . \ - --exclude .git \ - --exclude node_modules \ - --exclude target \ - --exclude dist \ - --exclude build \ - --exclude .next \ - --exclude coverage \ + $(exclude_agentroot_flags) \ --output "$AGENTROOT_OUT_DIR"; then test -d "$AGENTROOT_OUT_DIR" - if run_agentroot_status_check "$AGENTROOT_OUT_DIR/status.json"; then - AGENTROOT_DOC_COUNT="$(json_int_from_file "$AGENTROOT_OUT_DIR/status.json" "document_count")" - AGENTROOT_EMBEDDED_COUNT="$(json_int_from_file "$AGENTROOT_OUT_DIR/status.json" "embedded_count")" - else - warn "agentroot status check failed (see $AGENTROOT_OUT_DIR/status.json)" - fi - - attempt_agentroot_embed - - log "Running retrieval validation" - if run_agentroot_query_check "retry backoff" "$AGENTROOT_OUT_DIR/query_check.txt"; then - QUERY_OK=1 - else - warn "agentroot query check failed (see $AGENTROOT_OUT_DIR/query_check.txt)" - fi - if run_agentroot_vsearch_check "permission check" "$AGENTROOT_OUT_DIR/vsearch_check.txt"; then - VSEARCH_OK=1 - else - warn "agentroot vsearch check failed (see $AGENTROOT_OUT_DIR/vsearch_check.txt)" - fi + run_embed_and_validate else warn "agentroot index-subcommand mode failed; falling back to collection-update mode" AGENTROOT_MODE="collection-update" @@ -487,7 +428,7 @@ if [ "$AGENTROOT_MODE" = "collection-update" ]; then if [ "$HAS_RUST" -eq 1 ]; then MASKS+=( '**/*.rs' '**/*.toml' ) fi - if [ "$HAS_TS" -eq 1 ] || [ "$HAS_NODE" -eq 1 ]; then + if [ "$HAS_TS" -eq 1 ] || [ "$HAS_JS" -eq 1 ]; then MASKS+=( '**/*.ts' '**/*.tsx' '**/*.js' '**/*.jsx' '**/*.mjs' '**/*.cjs' '**/*.json' ) fi if [ "${#MASKS[@]}" -eq 0 ]; then @@ -518,7 +459,7 @@ if [ "$AGENTROOT_MODE" = "collection-update" ]; then AGENTROOT_DB="$AGENTROOT_DB_PATH" agentroot update > "$AGENTROOT_OUT_DIR/update.txt" 2>&1 || \ die "agentroot update failed (see $AGENTROOT_OUT_DIR/update.txt)" - run_agentroot_status_check "$AGENTROOT_OUT_DIR/status.json" || \ + run_agentroot_check status "$AGENTROOT_OUT_DIR/status.json" || \ die "agentroot status failed (see $AGENTROOT_OUT_DIR/status.json)" AGENTROOT_DOC_COUNT="$(json_int_from_file "$AGENTROOT_OUT_DIR/status.json" "document_count")" AGENTROOT_EMBEDDED_COUNT="$(json_int_from_file "$AGENTROOT_OUT_DIR/status.json" "embedded_count")" @@ -535,7 +476,7 @@ if [ "$AGENTROOT_MODE" = "collection-update" ]; then printf '%s\t%s\n' "$fallback_name" '**/*' >> "$COLLECTIONS_TSV" AGENTROOT_DB="$AGENTROOT_DB_PATH" agentroot update > "$AGENTROOT_OUT_DIR/update_fallback.txt" 2>&1 || \ die "agentroot fallback update failed (see $AGENTROOT_OUT_DIR/update_fallback.txt)" - run_agentroot_status_check "$AGENTROOT_OUT_DIR/status.json" || \ + run_agentroot_check status "$AGENTROOT_OUT_DIR/status.json" || \ die "agentroot status failed after fallback (see $AGENTROOT_OUT_DIR/status.json)" AGENTROOT_DOC_COUNT="$(json_int_from_file "$AGENTROOT_OUT_DIR/status.json" "document_count")" AGENTROOT_EMBEDDED_COUNT="$(json_int_from_file "$AGENTROOT_OUT_DIR/status.json" "embedded_count")" @@ -543,19 +484,7 @@ if [ "$AGENTROOT_MODE" = "collection-update" ]; then [ "$AGENTROOT_DOC_COUNT" -gt 0 ] || die "agentroot indexed zero documents after fallback" - attempt_agentroot_embed - - log "Running retrieval validation" - if run_agentroot_query_check "retry backoff" "$AGENTROOT_OUT_DIR/query_check.txt"; then - QUERY_OK=1 - else - warn "agentroot query check failed (see $AGENTROOT_OUT_DIR/query_check.txt)" - fi - if run_agentroot_vsearch_check "permission check" "$AGENTROOT_OUT_DIR/vsearch_check.txt"; then - VSEARCH_OK=1 - else - warn "agentroot vsearch check failed (see $AGENTROOT_OUT_DIR/vsearch_check.txt)" - fi + run_embed_and_validate fi test -s "$AGENTROOT_OUT_DIR/query_check.txt" || die "query_check.txt was not written" @@ -645,7 +574,7 @@ cat > "$MANIFEST_PATH" <