diff --git a/src/cli/commands/check.ts b/src/cli/commands/check.ts index 70439b9..199c4f1 100644 --- a/src/cli/commands/check.ts +++ b/src/cli/commands/check.ts @@ -4,6 +4,7 @@ import { formatText } from '../formatters/text.js'; import { formatJson } from '../formatters/json.js'; import { formatScorecard } from '../formatters/scorecard.js'; import type { SamplingStrategy } from '../../types.js'; +import { findConfig } from '../../helpers/config.js'; // Ensure all checks are registered import '../../checks/index.js'; @@ -13,28 +14,48 @@ const FORMAT_OPTIONS = ['text', 'json', 'scorecard'] as const; export function registerCheckCommand(program: Command): void { program - .command('check ') + .command('check [url]') .description('Run agent-friendly documentation checks against a URL') + .option('--config ', 'Path to config file (default: auto-discover agent-docs.config.yml)') .option('-f, --format ', 'Output format: text, json, or scorecard', 'text') .option('-c, --checks ', 'Comma-separated list of check IDs to run') - .option('--max-concurrency ', 'Maximum concurrent requests', '3') - .option('--request-delay ', 'Delay between requests in ms', '200') - .option('--max-links ', 'Maximum links to test', '50') - .option( - '--sampling ', - 'URL sampling strategy: random, deterministic, or none', - 'random', - ) - .option('--pass-threshold ', 'Pass threshold in characters', '50000') - .option('--fail-threshold ', 'Fail threshold in characters', '100000') + .option('--max-concurrency ', 'Maximum concurrent requests') + .option('--request-delay ', 'Delay between requests in ms') + .option('--max-links ', 'Maximum links to test') + .option('--sampling ', 'URL sampling strategy: random, deterministic, or none') + .option('--pass-threshold ', 'Pass threshold in characters') + .option('--fail-threshold ', 'Fail threshold in characters') .option('-v, --verbose', 'Show per-page details for checks with issues') .option('--fixes', 'Show fix suggestions for warn/fail checks') .option('--score', 'Include scoring data in JSON output') - .action(async (rawUrl: string, opts: Record) => { - const url = normalizeUrl(rawUrl); - const checkIds = opts.checks ? opts.checks.split(',').map((s) => s.trim()) : undefined; - const format = opts.format as string; + .action(async (rawUrl: string | undefined, opts: Record) => { + // Load config: explicit path or auto-discover + let config; + try { + config = await findConfig(opts.config as string | undefined); + } catch (err) { + process.stderr.write(`Error: ${(err as Error).message}\n`); + process.exitCode = 1; + return; + } + // Resolve URL: CLI arg > config url > error + const resolvedUrl = rawUrl ?? config?.url; + if (!resolvedUrl) { + process.stderr.write( + 'Error: No URL provided. Pass a URL as an argument or set "url" in agent-docs.config.yml\n', + ); + process.exitCode = 1; + return; + } + const url = normalizeUrl(resolvedUrl); + + // Resolve options: CLI flags > config > hardcoded defaults + const checkIds = opts.checks + ? (opts.checks as string).split(',').map((s) => s.trim()) + : config?.checks; + + const format = opts.format as string; if (!FORMAT_OPTIONS.includes(format as (typeof FORMAT_OPTIONS)[number])) { process.stderr.write( `Error: Invalid format "${format}". Must be one of: ${FORMAT_OPTIONS.join(', ')}\n`, @@ -43,7 +64,9 @@ export function registerCheckCommand(program: Command): void { return; } - const sampling = opts.sampling as SamplingStrategy; + const samplingRaw = + (opts.sampling as string | undefined) ?? config?.options?.samplingStrategy ?? 'random'; + const sampling = samplingRaw as SamplingStrategy; if (!SAMPLING_STRATEGIES.includes(sampling)) { process.stderr.write( `Error: Invalid sampling strategy "${sampling}". Must be one of: ${SAMPLING_STRATEGIES.join(', ')}\n`, @@ -52,6 +75,31 @@ export function registerCheckCommand(program: Command): void { return; } + const maxConcurrency = parseInt( + String((opts.maxConcurrency as string | undefined) ?? config?.options?.maxConcurrency ?? 3), + 10, + ); + const requestDelay = parseInt( + String((opts.requestDelay as string | undefined) ?? config?.options?.requestDelay ?? 200), + 10, + ); + const maxLinksToTest = parseInt( + String((opts.maxLinks as string | undefined) ?? config?.options?.maxLinksToTest ?? 50), + 10, + ); + const passThreshold = parseInt( + String( + (opts.passThreshold as string | undefined) ?? config?.options?.thresholds?.pass ?? 50000, + ), + 10, + ); + const failThreshold = parseInt( + String( + (opts.failThreshold as string | undefined) ?? config?.options?.thresholds?.fail ?? 100000, + ), + 10, + ); + if (format !== 'json') { const parsed = new URL(url); const target = @@ -63,13 +111,13 @@ export function registerCheckCommand(program: Command): void { const report = await runChecks(url, { checkIds, - maxConcurrency: parseInt(opts.maxConcurrency, 10), - requestDelay: parseInt(opts.requestDelay, 10), - maxLinksToTest: parseInt(opts.maxLinks, 10), + maxConcurrency, + requestDelay, + maxLinksToTest, samplingStrategy: sampling, thresholds: { - pass: parseInt(opts.passThreshold, 10), - fail: parseInt(opts.failThreshold, 10), + pass: passThreshold, + fail: failThreshold, }, }); diff --git a/src/cli/formatters/text.ts b/src/cli/formatters/text.ts index f63bf49..d78d31c 100644 --- a/src/cli/formatters/text.ts +++ b/src/cli/formatters/text.ts @@ -123,6 +123,203 @@ const DETAIL_FORMATTERS: Record = { return formatDetailLine('fail', b.url, info); }); }, + + 'rendering-strategy': (details) => { + const pages = details.pageResults as + | Array<{ + url: string; + status: string; + analysis?: { spaMarker?: string | null; visibleTextLength?: number }; + error?: string; + }> + | undefined; + if (!pages) return []; + return pages + .filter((p) => p.status !== 'pass') + .map((p) => { + if (p.error) return formatDetailLine('fail', p.url, p.error); + const marker = p.analysis?.spaMarker; + const textLen = p.analysis?.visibleTextLength ?? 0; + const info = marker + ? `SPA shell (${marker}, ${textLen} chars visible)` + : `sparse content (${textLen} chars visible)`; + return formatDetailLine(p.status, p.url, info); + }); + }, + + 'redirect-behavior': (details) => { + const pages = details.pageResults as + | Array<{ url: string; classification: string; redirectTarget?: string; error?: string }> + | undefined; + if (!pages) return []; + return pages + .filter( + (p) => + p.classification === 'cross-host' || + p.classification === 'js-redirect' || + p.classification === 'fetch-error', + ) + .map((p) => { + if (p.error) return formatDetailLine('fail', p.url, p.error); + const target = p.redirectTarget ? ` → ${p.redirectTarget}` : ''; + return formatDetailLine( + p.classification === 'cross-host' ? 'warn' : 'fail', + p.url, + `${p.classification}${target}`, + ); + }); + }, + + 'auth-gate-detection': (details) => { + const pages = details.pageResults as + | Array<{ + url: string; + classification: string; + status?: number | null; + hint?: string; + ssoDomain?: string; + error?: string; + }> + | undefined; + if (!pages) return []; + return pages + .filter((p) => p.classification !== 'accessible') + .map((p) => { + if (p.error) return formatDetailLine('fail', p.url, p.error); + let info = p.classification; + if (p.ssoDomain) info += ` (${p.ssoDomain})`; + else if (p.hint) info += ` (${p.hint})`; + else if (p.status) info += ` (HTTP ${p.status})`; + return formatDetailLine('fail', p.url, info); + }); + }, + + 'llms-txt-directive': (details) => { + const pages = details.pageResults as + | Array<{ url: string; found: boolean; positionPercent?: number; error?: string }> + | undefined; + if (!pages) return []; + return pages + .filter((p) => !p.found || (p.positionPercent != null && p.positionPercent > 10)) + .map((p) => { + if (p.error) return formatDetailLine('fail', p.url, p.error); + if (!p.found) return formatDetailLine('fail', p.url, 'no directive found'); + return formatDetailLine('warn', p.url, `directive at ${p.positionPercent}% of page`); + }); + }, + + 'tabbed-content-serialization': (details) => { + const pages = details.tabbedPages as + | Array<{ + url: string; + status: string; + tabGroups?: unknown[]; + totalTabbedChars?: number; + error?: string; + }> + | undefined; + if (!pages) return []; + return pages + .filter((p) => p.status !== 'pass') + .map((p) => { + if (p.error) return formatDetailLine('fail', p.url, p.error); + const groups = p.tabGroups?.length ?? 0; + const size = formatSize(p.totalTabbedChars ?? 0); + return formatDetailLine(p.status, p.url, `${groups} tab groups, ${size} serialized`); + }); + }, + + 'markdown-content-parity': (details) => { + const pages = details.pageResults as + | Array<{ + url: string; + status: string; + missingPercent?: number; + sampleDiffs?: string[]; + error?: string; + }> + | undefined; + if (!pages) return []; + return pages + .filter((p) => p.status !== 'pass') + .map((p) => { + if (p.error) return formatDetailLine('fail', p.url, p.error); + const pct = + p.missingPercent != null ? `${Math.round(p.missingPercent)}% missing` : 'content differs'; + return formatDetailLine(p.status, p.url, pct); + }); + }, + + 'http-status-codes': (details) => { + const pages = details.pageResults as + | Array<{ + url: string; + testUrl?: string; + classification: string; + status?: number | null; + bodyHint?: string; + error?: string; + }> + | undefined; + if (!pages) return []; + return pages + .filter((p) => p.classification !== 'correct-error') + .map((p) => { + if (p.error) return formatDetailLine('fail', p.testUrl ?? p.url, p.error); + const info = p.bodyHint + ? `HTTP ${p.status} (${p.bodyHint})` + : `HTTP ${p.status} instead of 404`; + return formatDetailLine('fail', p.testUrl ?? p.url, info); + }); + }, + + 'cache-header-hygiene': (details) => { + const endpoints = details.endpointResults as + | Array<{ + url: string; + status: string; + effectiveMaxAge?: number | null; + noStore?: boolean; + error?: string; + }> + | undefined; + if (!endpoints) return []; + return endpoints + .filter((e) => e.status !== 'pass') + .map((e) => { + if (e.error) return formatDetailLine('fail', e.url, e.error); + if (e.noStore) return formatDetailLine(e.status, e.url, 'no-store'); + if (e.effectiveMaxAge == null) return formatDetailLine(e.status, e.url, 'no cache headers'); + const age = e.effectiveMaxAge; + const human = + age >= 86400 + ? `${Math.round(age / 86400)}d` + : age >= 3600 + ? `${Math.round(age / 3600)}h` + : `${age}s`; + return formatDetailLine(e.status, e.url, `max-age ${human}`); + }); + }, + + 'section-header-quality': (details) => { + const analyses = details.analyses as + | Array<{ + url: string; + framework?: string; + genericHeaders?: number; + totalHeaders?: number; + hasGenericMajority?: boolean; + }> + | undefined; + if (!analyses) return []; + return analyses + .filter((a) => a.hasGenericMajority) + .map((a) => { + const ratio = `${a.genericHeaders}/${a.totalHeaders} generic`; + const fw = a.framework ? ` (${a.framework})` : ''; + return formatDetailLine('warn', a.url, `${ratio}${fw}`); + }); + }, }; function formatDetailLine(status: string, url: string, metric: string): string { diff --git a/src/helpers/config.ts b/src/helpers/config.ts index aa83cd8..a013943 100644 --- a/src/helpers/config.ts +++ b/src/helpers/config.ts @@ -38,3 +38,40 @@ export async function loadConfig(dir?: string): Promise { `No agent-docs config file found. Create ${CONFIG_FILENAMES[0]} with at least a "url" field.`, ); } + +/** + * CLI-oriented config loader: + * - If explicitPath is given, reads that file directly and throws if not found. + * - Otherwise, auto-discovers by walking up from startDir (default: cwd). + * - Returns null if no config file is found (instead of throwing). + * - Does not require the "url" field — the CLI can supply it via argument. + */ +export async function findConfig( + explicitPath?: string, + startDir?: string, +): Promise { + if (explicitPath) { + const filepath = resolve(process.cwd(), explicitPath); + const content = await readFile(filepath, 'utf-8'); + return parseYaml(content) as AgentDocsConfig; + } + + let searchDir = resolve(startDir ?? process.cwd()); + while (true) { + for (const filename of CONFIG_FILENAMES) { + const filepath = resolve(searchDir, filename); + try { + const content = await readFile(filepath, 'utf-8'); + return parseYaml(content) as AgentDocsConfig; + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOENT') continue; + throw err; + } + } + const parent = dirname(searchDir); + if (parent === searchDir) break; + searchDir = parent; + } + + return null; +} diff --git a/src/helpers/index.ts b/src/helpers/index.ts index 8e5533f..acdbc2d 100644 --- a/src/helpers/index.ts +++ b/src/helpers/index.ts @@ -1,4 +1,4 @@ -export { loadConfig } from './config.js'; +export { loadConfig, findConfig } from './config.js'; export { describeAgentDocs, describeAgentDocsPerCheck } from './vitest-runner.js'; export { looksLikeMarkdown, looksLikeHtml } from './detect-markdown.js'; export { diff --git a/test/unit/cli/check-command.test.ts b/test/unit/cli/check-command.test.ts index 555c632..dee50f8 100644 --- a/test/unit/cli/check-command.test.ts +++ b/test/unit/cli/check-command.test.ts @@ -1,6 +1,8 @@ -import { describe, it, expect, vi, beforeAll, afterEach } from 'vitest'; +import { describe, it, expect, vi, beforeAll, beforeEach, afterEach } from 'vitest'; import { http, HttpResponse } from 'msw'; import { setupServer } from 'msw/node'; +import { writeFile, mkdir, rm } from 'node:fs/promises'; +import { resolve } from 'node:path'; const VALID_LLMS_TXT = `# Test @@ -218,6 +220,28 @@ describe('check command', () => { stderrSpy.mockRestore(); }); + it('errors when no URL is provided and no config exists', async () => { + const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation(() => true); + + // Point to a config with no url field so auto-discovery doesn't find a real one + const tmpDir = resolve(import.meta.dirname, '../../fixtures/cli-no-url-test'); + await mkdir(tmpDir, { recursive: true }); + const configPath = resolve(tmpDir, 'no-url.yml'); + await writeFile(configPath, 'checks:\n - llms-txt-exists\n'); + + const { run } = await import('../../../src/cli/index.js'); + await run(['node', 'afdocs', 'check', '--config', configPath, '--request-delay', '0']); + + await new Promise((r) => setTimeout(r, 100)); + + const output = stderrSpy.mock.calls.map((c) => c[0]).join(''); + expect(output).toContain('No URL provided'); + expect(process.exitCode).toBe(1); + + stderrSpy.mockRestore(); + await rm(tmpDir, { recursive: true, force: true }); + }); + it('does not set exit code 1 when all pass', async () => { server.use( http.get('http://cmd-pass.local/llms.txt', () => HttpResponse.text(VALID_LLMS_TXT)), @@ -248,3 +272,138 @@ describe('check command', () => { writeSpy.mockRestore(); }); }); + +const CONFIG_TMP = resolve(import.meta.dirname, '../../fixtures/cli-config-test'); + +describe('check command config integration', () => { + beforeEach(async () => { + await mkdir(CONFIG_TMP, { recursive: true }); + }); + + afterEach(async () => { + await rm(CONFIG_TMP, { recursive: true, force: true }); + process.exitCode = undefined; + }); + + it('uses URL from config when no CLI arg is passed', async () => { + server.use( + http.get('http://cfg-url.local/llms.txt', () => HttpResponse.text(VALID_LLMS_TXT)), + http.get('http://cfg-url.local/docs/llms.txt', () => new HttpResponse(null, { status: 404 })), + ); + + const configPath = resolve(CONFIG_TMP, 'agent-docs.config.yml'); + await writeFile(configPath, 'url: http://cfg-url.local\nchecks:\n - llms-txt-exists\n'); + + const writeSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + const { run } = await import('../../../src/cli/index.js'); + await run(['node', 'afdocs', 'check', '--config', configPath, '--request-delay', '0']); + await new Promise((r) => setTimeout(r, 100)); + + const output = writeSpy.mock.calls.map((c) => c[0]).join(''); + expect(output).toContain('llms-txt-exists'); + expect(output).toContain('pass'); + + writeSpy.mockRestore(); + }); + + it('CLI URL arg overrides config URL', async () => { + server.use( + http.get('http://cfg-override.local/llms.txt', () => HttpResponse.text(VALID_LLMS_TXT)), + http.get( + 'http://cfg-override.local/docs/llms.txt', + () => new HttpResponse(null, { status: 404 }), + ), + ); + + const configPath = resolve(CONFIG_TMP, 'agent-docs.config.yml'); + await writeFile(configPath, 'url: http://cfg-url.local\nchecks:\n - llms-txt-exists\n'); + + const writeSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + const { run } = await import('../../../src/cli/index.js'); + await run([ + 'node', + 'afdocs', + 'check', + 'http://cfg-override.local', + '--config', + configPath, + '--request-delay', + '0', + ]); + await new Promise((r) => setTimeout(r, 100)); + + const output = writeSpy.mock.calls.map((c) => c[0]).join(''); + expect(output).toContain('cfg-override.local'); + + writeSpy.mockRestore(); + }); + + it('uses checks from config when no --checks flag', async () => { + server.use( + http.get('http://cfg-checks.local/llms.txt', () => HttpResponse.text(VALID_LLMS_TXT)), + http.get( + 'http://cfg-checks.local/docs/llms.txt', + () => new HttpResponse(null, { status: 404 }), + ), + ); + + const configPath = resolve(CONFIG_TMP, 'agent-docs.config.yml'); + await writeFile(configPath, 'url: http://cfg-checks.local\nchecks:\n - llms-txt-exists\n'); + + const writeSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + const { run } = await import('../../../src/cli/index.js'); + await run(['node', 'afdocs', 'check', '--config', configPath, '--request-delay', '0']); + await new Promise((r) => setTimeout(r, 100)); + + const output = writeSpy.mock.calls.map((c) => c[0]).join(''); + // Only llms-txt-exists ran — output should not contain checks outside the config list + expect(output).toContain('llms-txt-exists'); + expect(output).not.toContain('rendering-strategy'); + + writeSpy.mockRestore(); + }); + + it('--checks flag overrides config checks', async () => { + server.use( + http.get('http://cfg-checks-override.local/llms.txt', () => + HttpResponse.text(VALID_LLMS_TXT), + ), + http.get( + 'http://cfg-checks-override.local/docs/llms.txt', + () => new HttpResponse(null, { status: 404 }), + ), + ); + + const configPath = resolve(CONFIG_TMP, 'agent-docs.config.yml'); + // Config lists llms-txt-exists; CLI overrides with llms-txt-valid + await writeFile( + configPath, + 'url: http://cfg-checks-override.local\nchecks:\n - llms-txt-exists\n', + ); + + const writeSpy = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + const { run } = await import('../../../src/cli/index.js'); + await run([ + 'node', + 'afdocs', + 'check', + '--config', + configPath, + '--checks', + 'llms-txt-valid', + '--request-delay', + '0', + ]); + await new Promise((r) => setTimeout(r, 100)); + + const output = writeSpy.mock.calls.map((c) => c[0]).join(''); + expect(output).toContain('llms-txt-valid'); + expect(output).not.toContain('llms-txt-exists'); + + writeSpy.mockRestore(); + }); +}); diff --git a/test/unit/cli/formatters.test.ts b/test/unit/cli/formatters.test.ts index 6a7de0f..6617b80 100644 --- a/test/unit/cli/formatters.test.ts +++ b/test/unit/cli/formatters.test.ts @@ -495,6 +495,341 @@ describe('formatText', () => { expect(output).not.toContain('rendering-strategy detected SPA shells'); }); + it('shows SPA shell details for rendering-strategy', () => { + const report = makeReport({ + results: [ + { + id: 'rendering-strategy', + category: 'page-size', + status: 'fail', + message: 'SPA shells detected', + details: { + pageResults: [ + { + url: 'https://example.com/spa', + status: 'fail', + analysis: { spaMarker: '__NEXT_DATA__', visibleTextLength: 42 }, + }, + { + url: 'https://example.com/ok', + status: 'pass', + analysis: { spaMarker: null, visibleTextLength: 5000 }, + }, + { + url: 'https://example.com/sparse', + status: 'warn', + analysis: { spaMarker: null, visibleTextLength: 120 }, + }, + ], + }, + }, + ], + summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 }, + }); + const output = formatText(report, { verbose: true }); + expect(output).toContain('https://example.com/spa'); + expect(output).toContain('SPA shell'); + expect(output).toContain('__NEXT_DATA__'); + expect(output).toContain('42 chars visible'); + expect(output).toContain('https://example.com/sparse'); + expect(output).toContain('sparse content'); + expect(output).not.toContain('https://example.com/ok'); + }); + + it('shows redirect details for redirect-behavior', () => { + const report = makeReport({ + results: [ + { + id: 'redirect-behavior', + category: 'url-stability', + status: 'warn', + message: 'Cross-host redirects found', + details: { + pageResults: [ + { + url: 'https://example.com/old', + classification: 'cross-host', + redirectTarget: 'https://docs.example.com/old', + }, + { + url: 'https://example.com/fine', + classification: 'same-host', + redirectTarget: 'https://example.com/new', + }, + { + url: 'https://example.com/js', + classification: 'js-redirect', + }, + ], + }, + }, + ], + summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 }, + }); + const output = formatText(report, { verbose: true }); + expect(output).toContain('https://example.com/old'); + expect(output).toContain('cross-host'); + expect(output).toContain('https://docs.example.com/old'); + expect(output).toContain('https://example.com/js'); + expect(output).toContain('js-redirect'); + expect(output).not.toContain('https://example.com/fine'); + }); + + it('shows auth gating details for auth-gate-detection', () => { + const report = makeReport({ + results: [ + { + id: 'auth-gate-detection', + category: 'authentication', + status: 'fail', + message: 'Auth gates detected', + details: { + pageResults: [ + { + url: 'https://example.com/docs/private', + classification: 'auth-required', + status: 401, + }, + { + url: 'https://example.com/docs/sso', + classification: 'auth-redirect', + ssoDomain: 'login.okta.com', + }, + { + url: 'https://example.com/docs/login', + classification: 'soft-auth-gate', + hint: 'Contains password input field', + }, + { + url: 'https://example.com/docs/public', + classification: 'accessible', + }, + ], + }, + }, + ], + summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 }, + }); + const output = formatText(report, { verbose: true }); + expect(output).toContain('https://example.com/docs/private'); + expect(output).toContain('HTTP 401'); + expect(output).toContain('https://example.com/docs/sso'); + expect(output).toContain('login.okta.com'); + expect(output).toContain('https://example.com/docs/login'); + expect(output).toContain('Contains password input field'); + expect(output).not.toContain('https://example.com/docs/public'); + }); + + it('shows missing directives for llms-txt-directive', () => { + const report = makeReport({ + results: [ + { + id: 'llms-txt-directive', + category: 'content-discoverability', + status: 'fail', + message: 'No directives found', + details: { + pageResults: [ + { url: 'https://example.com/page1', found: false }, + { url: 'https://example.com/page2', found: true, positionPercent: 2 }, + { url: 'https://example.com/page3', found: true, positionPercent: 67 }, + ], + }, + }, + ], + summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 }, + }); + const output = formatText(report, { verbose: true }); + expect(output).toContain('https://example.com/page1'); + expect(output).toContain('no directive found'); + // page2 has directive near top (2%), should not appear + expect(output).not.toContain('https://example.com/page2'); + // page3 has buried directive + expect(output).toContain('https://example.com/page3'); + expect(output).toContain('directive at 67% of page'); + }); + + it('shows tab serialization details for tabbed-content-serialization', () => { + const report = makeReport({ + results: [ + { + id: 'tabbed-content-serialization', + category: 'content-structure', + status: 'warn', + message: 'Oversized tabs found', + details: { + tabbedPages: [ + { + url: 'https://example.com/tabs', + status: 'warn', + tabGroups: [{}, {}, {}], + totalTabbedChars: 25000, + }, + { + url: 'https://example.com/small-tabs', + status: 'pass', + tabGroups: [{}], + totalTabbedChars: 500, + }, + ], + }, + }, + ], + summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 }, + }); + const output = formatText(report, { verbose: true }); + expect(output).toContain('https://example.com/tabs'); + expect(output).toContain('3 tab groups'); + expect(output).toContain('25K chars'); + expect(output).not.toContain('https://example.com/small-tabs'); + }); + + it('shows content parity details for markdown-content-parity', () => { + const report = makeReport({ + results: [ + { + id: 'markdown-content-parity', + category: 'observability', + status: 'warn', + message: 'Content drift detected', + details: { + pageResults: [ + { + url: 'https://example.com/drift', + status: 'warn', + missingPercent: 23.4, + }, + { + url: 'https://example.com/ok', + status: 'pass', + missingPercent: 2, + }, + ], + }, + }, + ], + summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 }, + }); + const output = formatText(report, { verbose: true }); + expect(output).toContain('https://example.com/drift'); + expect(output).toContain('23% missing'); + expect(output).not.toContain('https://example.com/ok'); + }); + + it('shows soft-404 details for http-status-codes', () => { + const report = makeReport({ + results: [ + { + id: 'http-status-codes', + category: 'url-stability', + status: 'fail', + message: 'Soft 404s detected', + details: { + pageResults: [ + { + url: 'https://example.com/page1', + testUrl: 'https://example.com/page1/nonexistent-abc123', + classification: 'soft-404', + status: 200, + bodyHint: 'Body contains "not found" text', + }, + { + url: 'https://example.com/page2', + testUrl: 'https://example.com/page2/nonexistent-abc123', + classification: 'correct-error', + status: 404, + }, + ], + }, + }, + ], + summary: { total: 1, pass: 0, warn: 0, fail: 1, skip: 0, error: 0 }, + }); + const output = formatText(report, { verbose: true }); + expect(output).toContain('https://example.com/page1/nonexistent-abc123'); + expect(output).toContain('HTTP 200'); + expect(output).not.toContain('https://example.com/page2'); + }); + + it('shows cache header details for cache-header-hygiene', () => { + const report = makeReport({ + results: [ + { + id: 'cache-header-hygiene', + category: 'observability', + status: 'warn', + message: 'Long cache times detected', + details: { + endpointResults: [ + { + url: 'https://example.com/llms.txt', + status: 'warn', + effectiveMaxAge: 604800, + noStore: false, + }, + { + url: 'https://example.com/page.md', + status: 'fail', + effectiveMaxAge: null, + noStore: false, + }, + { + url: 'https://example.com/ok.md', + status: 'pass', + effectiveMaxAge: 3600, + noStore: false, + }, + ], + }, + }, + ], + summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 }, + }); + const output = formatText(report, { verbose: true }); + expect(output).toContain('https://example.com/llms.txt'); + expect(output).toContain('max-age 7d'); + expect(output).toContain('https://example.com/page.md'); + expect(output).toContain('no cache headers'); + expect(output).not.toContain('https://example.com/ok.md'); + }); + + it('shows generic header details for section-header-quality', () => { + const report = makeReport({ + results: [ + { + id: 'section-header-quality', + category: 'content-structure', + status: 'warn', + message: 'Generic headers found', + details: { + analyses: [ + { + url: 'https://example.com/tabs', + framework: 'Docusaurus', + genericHeaders: 8, + totalHeaders: 10, + hasGenericMajority: true, + }, + { + url: 'https://example.com/good', + framework: 'Docusaurus', + genericHeaders: 1, + totalHeaders: 10, + hasGenericMajority: false, + }, + ], + }, + }, + ], + summary: { total: 1, pass: 0, warn: 1, fail: 0, skip: 0, error: 0 }, + }); + const output = formatText(report, { verbose: true }); + expect(output).toContain('https://example.com/tabs'); + expect(output).toContain('8/10 generic'); + expect(output).toContain('Docusaurus'); + expect(output).not.toContain('https://example.com/good'); + }); + it('does not show details without verbose flag', () => { const report = makeReport({ results: [ diff --git a/test/unit/helpers/config.test.ts b/test/unit/helpers/config.test.ts index 0adb518..161d8c3 100644 --- a/test/unit/helpers/config.test.ts +++ b/test/unit/helpers/config.test.ts @@ -1,7 +1,7 @@ import { describe, it, expect, afterEach } from 'vitest'; import { writeFile, mkdir, rm } from 'node:fs/promises'; import { resolve } from 'node:path'; -import { loadConfig } from '../../../src/helpers/config.js'; +import { loadConfig, findConfig } from '../../../src/helpers/config.js'; const TMP_DIR = resolve(import.meta.dirname, '../../fixtures/config-test'); @@ -79,3 +79,70 @@ describe('loadConfig', () => { expect(config.url).toBe('https://child.example.com'); }); }); + +describe('findConfig', () => { + it('returns null when no config file found', async () => { + await mkdir(TMP_DIR, { recursive: true }); + + const result = await findConfig(undefined, TMP_DIR); + expect(result).toBeNull(); + }); + + it('loads config from an explicit path', async () => { + await mkdir(TMP_DIR, { recursive: true }); + const configPath = resolve(TMP_DIR, 'custom.yml'); + await writeFile(configPath, 'url: https://custom.example.com\nchecks:\n - llms-txt-exists\n'); + + const config = await findConfig(configPath); + expect(config?.url).toBe('https://custom.example.com'); + expect(config?.checks).toEqual(['llms-txt-exists']); + }); + + it('throws when explicit path does not exist', async () => { + await mkdir(TMP_DIR, { recursive: true }); + const missing = resolve(TMP_DIR, 'nonexistent.yml'); + + await expect(findConfig(missing)).rejects.toThrow(); + }); + + it('does not require the url field', async () => { + await mkdir(TMP_DIR, { recursive: true }); + const configPath = resolve(TMP_DIR, 'no-url.yml'); + await writeFile(configPath, 'checks:\n - llms-txt-exists\n'); + + const config = await findConfig(configPath); + expect(config?.checks).toEqual(['llms-txt-exists']); + expect(config?.url).toBeUndefined(); + }); + + it('loads options from explicit path', async () => { + await mkdir(TMP_DIR, { recursive: true }); + const configPath = resolve(TMP_DIR, 'with-options.yml'); + await writeFile( + configPath, + 'url: https://example.com\noptions:\n samplingStrategy: deterministic\n maxLinksToTest: 10\n', + ); + + const config = await findConfig(configPath); + expect(config?.options?.samplingStrategy).toBe('deterministic'); + expect(config?.options?.maxLinksToTest).toBe(10); + }); + + it('walks up directories to find config', async () => { + const childDir = resolve(TMP_DIR, 'sub/nested'); + await mkdir(childDir, { recursive: true }); + await writeFile(resolve(TMP_DIR, 'agent-docs.config.yml'), 'url: https://parent.example.com\n'); + + const config = await findConfig(undefined, childDir); + expect(config?.url).toBe('https://parent.example.com'); + }); + + it('prefers .yml over .yaml', async () => { + await mkdir(TMP_DIR, { recursive: true }); + await writeFile(resolve(TMP_DIR, 'agent-docs.config.yml'), 'url: https://yml.example.com\n'); + await writeFile(resolve(TMP_DIR, 'agent-docs.config.yaml'), 'url: https://yaml.example.com\n'); + + const config = await findConfig(undefined, TMP_DIR); + expect(config?.url).toBe('https://yml.example.com'); + }); +});