diff --git a/tests/INDEX.md b/tests/INDEX.md new file mode 100644 index 0000000..7effb9a --- /dev/null +++ b/tests/INDEX.md @@ -0,0 +1,192 @@ +# Test Registry + +This index maps each SDK function/module to its **single test category**. Before adding a test, check this registry — if the function is already covered in a higher category, add your assertion there instead of creating a new file in a lower category. + +## Category priority (highest wins) + +When a function could belong to multiple categories, place it in the **highest applicable** one: + +``` +pipelines > dispatch > integration > contracts > composition > boundaries > behavior +``` + +**Rule: each function gets ONE category.** If `stepCountIs` has meaningful peer boundaries, it goes in `contracts/` — not `behavior/` AND `contracts/`. Functions with no peer comparisons stay in `behavior/`. + +--- + +## Registry + +### stop-conditions.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `stepCountIs` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria | +| `hasToolCall` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria | +| `maxTokensUsed` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria | +| `maxCost` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria | +| `finishReasonIs` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria | +| stop condition evaluation (combined) | behavior | `behavior/stop-conditions-evaluation.test.ts` | Tests `evaluateStopConditions` orchestration logic (no peer comparison) | + +### stream-type-guards.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `isOutputTextDeltaEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards | +| `isReasoningDeltaEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards | +| `isFunctionCallArgumentsDeltaEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards | +| `isOutputItemAddedEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards | +| `isOutputItemDoneEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards | +| `isResponseCompletedEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards | +| `isResponseFailedEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards | +| `isResponseIncompleteEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards | +| `isFunctionCallArgumentsDoneEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards | +| `isOutputMessage` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards | +| `isFunctionCallItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards | +| `isReasoningOutputItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards | +| `isWebSearchCallOutputItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards | +| `isFileSearchCallOutputItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards | +| `isImageGenerationCallOutputItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards | +| `isOutputTextPart` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards | +| `isRefusalPart` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards | +| `isFileCitationAnnotation` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards | +| `isURLCitationAnnotation` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards | +| `isFilePathAnnotation` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards | +| `hasTypeProperty` | behavior | `behavior/stream-type-guards-negative.test.ts` | Utility function, no peers | +| stream vs output item cross-domain | boundaries | `boundaries/domain-separation.test.ts` | Guards reject events from wrong domain | +| response stream event guards | boundaries | `boundaries/response-stream-event-guards.test.ts` | Mutual exclusion with peer guards | + +### tool-types.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `hasExecuteFunction` | boundaries | `boundaries/tool-type-guards.test.ts` | Mutual exclusion across tool types | +| `isRegularExecuteTool` | boundaries | `boundaries/tool-type-guards.test.ts` | Mutual exclusion across tool types | +| `isGeneratorTool` | boundaries | `boundaries/tool-type-guards.test.ts` | Mutual exclusion across tool types | +| `isManualTool` | boundaries | `boundaries/tool-type-guards.test.ts` | Mutual exclusion across tool types | +| `toolRequiresApproval` | behavior | `behavior/tool-approval.test.ts` | No peer comparison, isolated behavior | +| `ToolEventBroadcaster` | behavior | `behavior/tool-event-broadcaster.test.ts` | No peer comparison, isolated behavior | +| tool type events (combined) | behavior | `behavior/tool-types-events.test.ts` | Event shape verification, isolated | + +### tool.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `tool()` factory | behavior | `behavior/tool-creation.test.ts` | Isolated factory behavior | +| tool factory shapes (regular vs generator vs manual) | boundaries | `boundaries/tool-factory-shapes.test.ts` | Structural distinction between tool types | + +### tool-executor.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `executeRegularTool` | behavior | `behavior/tool-execution.test.ts` | Isolated execution behavior | +| `executeRegularTool` vs `executeGeneratorTool` | contracts | `contracts/execute-tool-boundary.test.ts` | Each handles its type AND rejects the other | +| `executeTool` dispatch | dispatch | `dispatch/execute-tool-dispatch.test.ts` | Routes via type guard to correct executor | + +### tool-context.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `resolveContext`, `ToolContextStore` | behavior | `behavior/tool-context.test.ts` | Isolated context resolution | +| `buildToolExecuteContext` | integration | `integration/tool-context-execution.test.ts` | Correct output AND feeds tool execute | + +### turn-context.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `buildTurnContext`, `normalizeInputToArray` | behavior | `behavior/turn-context.test.ts` | Isolated shape verification | +| turn context -> async params | integration | `integration/turn-context-async-params.test.ts` | Correct output AND feeds resolveAsyncFunctions | + +### async-params.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `resolveAsyncFunctions` | contracts | `contracts/async-params.test.ts` | Static vs function vs client-only handled distinctly | + +### conversation-state.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `createInitialState`, `updateState`, `appendToMessages` | behavior | `behavior/conversation-state.test.ts` | Isolated state management | +| `appendToMessages` + format compat | integration | `integration/conversation-state-format.test.ts` | Output feeds format conversion | +| `partitionToolCalls` | dispatch | `dispatch/approval-partition-dispatch.test.ts` | Routes via approval checks | +| `createUnsentResult` vs `createRejectedResult` | boundaries | `boundaries/conversation-state-results.test.ts` | Structural distinction | + +### next-turn-params.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `executeNextTurnParamsFunctions` | behavior | `behavior/next-turn-params.test.ts` | Isolated param computation | +| next-turn params -> request | integration | `integration/next-turn-params-request.test.ts` | Output feeds applyNextTurnParamsToRequest | + +### reusable-stream.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `ReusableReadableStream` | behavior | `behavior/reusable-stream.test.ts` | Isolated stream behavior | +| multi-consumer correctness | integration | `integration/reusable-stream-consumers.test.ts` | Multiple consumers both get correct data | + +### stream-transformers.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `extractTextDeltas`, `extractReasoningDeltas`, `extractToolDeltas` | contracts | `contracts/delta-extractors.test.ts` | Each yields its type AND skips peers | +| `buildMessageStream`, `buildResponsesMessageStream` | contracts | `contracts/message-stream-builders.test.ts` | Each produces distinct format | +| `buildItemsStream` | contracts | `contracts/items-stream.test.ts` | Produces items format distinctly | +| `buildItemsStream` dispatch | dispatch | `dispatch/items-stream-dispatch.test.ts` | Routes events via guards | +| `consumeStreamForCompletion` | contracts | `contracts/consume-stream-completion.test.ts` | Consumes correct terminal event | +| `getResponseObject`, `getTextContent` | contracts | `contracts/response-extractors.test.ts` | Each extracts distinct data | +| `convertToClaudeMessage` | dispatch | `dispatch/claude-conversion-dispatch.test.ts` | Routes items via output guards | +| `consumeStreamForCompletion` + guards | integration | `integration/stream-completion-guards.test.ts` | Correct result AND guard identified event | + +### anthropic-compat.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `fromClaudeMessages` | contracts | `contracts/from-claude-messages.test.ts` | Maps each block type distinctly | +| `fromClaudeMessages` dispatch | dispatch | `dispatch/from-claude-dispatch.test.ts` | Routes mixed block types | +| `toClaudeMessage` + `fromClaudeMessages` round-trip | pipelines | `pipelines/format-round-trip.test.ts` | Full conversion pipeline | + +### chat-compat.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `toChatMessage` + `fromChatMessages` round-trip | pipelines | `pipelines/format-round-trip.test.ts` | Full conversion pipeline | + +### tool-orchestrator.ts + +| Function | Category | File | Reason | +|----------|----------|------|--------| +| `mapToolResults`, `summarizeStepUsage`, etc. | behavior | `behavior/tool-orchestrator.test.ts` | Isolated utility functions | + +--- + +## Pipeline tests (cross-cutting) + +These tests exercise multiple modules end-to-end and don't map to a single function: + +| Pipeline | File | +|----------|------| +| Streaming: events -> guards -> transformers -> consumer | `pipelines/streaming-pipeline.test.ts` | +| Tool execution: create -> dispatch -> validate -> execute -> format | `pipelines/tool-execution-pipeline.test.ts` | +| Context: build -> resolve -> store -> execute | `pipelines/context-pipeline.test.ts` | +| Stop conditions: results -> evaluate -> decision | `pipelines/stop-condition-pipeline.test.ts` | +| Dual-format output: same response -> chat + Claude + items | `pipelines/dual-format-output.test.ts` | +| Claude conversion deep: multi-item -> per-item routing -> blocks | `pipelines/claude-conversion-deep.test.ts` | +| Next-turn params: tool results -> compute -> apply to request | `pipelines/next-turn-params-pipeline.test.ts` | +| Async resolution: resolve -> apply -> evaluate stop | `pipelines/async-resolution-pipeline.test.ts` | +| Orchestrator chain: execute -> map -> summarize -> check errors | `pipelines/orchestrator-utility-chain.test.ts` | +| Approval -> execution -> state: partition -> execute -> format | `pipelines/approval-execution-state.test.ts` | +| Format round-trip: Claude and Chat bidirectional conversion | `pipelines/format-round-trip.test.ts` | + +## Composition tests (two-module connection) + +| Connection | File | +|------------|------| +| tool() -> type guards / convertToolsToAPIFormat | `composition/tool-lifecycle.test.ts` | +| ReusableReadableStream -> multiple consumers | `composition/stream-data-pipeline.test.ts` | +| executeNextTurnParamsFunctions -> applyNextTurnParamsToRequest | `composition/next-turn-params-flow.test.ts` | +| toChatMessage -> fromChatMessages (format round-trip) | `composition/format-compatibility.test.ts` | +| buildToolExecuteContext -> tool execute | `composition/context-flow.test.ts` | +| appendToMessages -> state update | `composition/input-normalization.test.ts` | +| createInitialState -> updateState | `composition/state-machine.test.ts` | +| orchestrator utilities -> executor results | `composition/orchestrator-executor.test.ts` | diff --git a/tests/behavior/README.md b/tests/behavior/README.md new file mode 100644 index 0000000..7c4c9e6 --- /dev/null +++ b/tests/behavior/README.md @@ -0,0 +1,26 @@ +# Behavior Tests + +Tests in this folder verify that each SDK capability works as promised **in isolation**. No comparison to similar capabilities, no cross-module composition — just: does this function do what its contract says? + +## What belongs here + +- Happy-path execution of individual functions +- Error cases and edge cases for a single function +- Return shape and type verification +- Input validation (valid and invalid) +- Default values and optional parameter handling +- Extendable: when new SDK capabilities are added, their isolated behavior tests go here + +## Examples + +- `tool()` factory produces the correct structure for each tool type +- `validateToolInput` accepts valid data and rejects invalid data +- `ReusableReadableStream` delivers items in order to a single consumer +- `createInitialState()` returns the expected shape with timestamps +- `resolveContext` handles static objects, functions, async functions, and undefined + +## What does NOT belong here + +- Tests comparing two similar functions (→ `boundaries/`) +- Tests where one module's output feeds another's input (→ `composition/`) +- End-to-end workflows (→ `pipelines/`) diff --git a/tests/behavior/async-params.test.ts b/tests/behavior/async-params.test.ts new file mode 100644 index 0000000..b7720fb --- /dev/null +++ b/tests/behavior/async-params.test.ts @@ -0,0 +1,113 @@ +import { describe, expect, it } from 'vitest'; +import { hasAsyncFunctions, resolveAsyncFunctions } from '../../src/lib/async-params.js'; +import type { TurnContext } from '../../src/lib/tool-types.js'; +import { makeCallModelInput, TEST_MODEL } from '../test-constants.js'; + +const turnCtx: TurnContext = { + numberOfTurns: 2, +}; + +describe('async params - resolveAsyncFunctions', () => { + it('passes through static values unchanged', async () => { + const input = makeCallModelInput({ + model: TEST_MODEL, + temperature: 0.7, + input: 'hi', + }); + const result = await resolveAsyncFunctions(input, turnCtx); + expect(result.model).toBe(TEST_MODEL); + expect(result.temperature).toBe(0.7); + }); + + it('resolves sync function fields with turnContext', async () => { + const input = makeCallModelInput({ + model: TEST_MODEL, + temperature: (ctx: TurnContext) => ctx.numberOfTurns * 0.1, + input: 'test', + }); + const result = await resolveAsyncFunctions(input, turnCtx); + expect(result.temperature).toBeCloseTo(0.2); + }); + + it('resolves async function fields with turnContext', async () => { + const input = makeCallModelInput({ + model: TEST_MODEL, + temperature: async (ctx: TurnContext) => ctx.numberOfTurns * 0.15, + input: 'test', + }); + const result = await resolveAsyncFunctions(input, turnCtx); + expect(result.temperature).toBeCloseTo(0.3); + }); + + it('strips client-only fields (stopWhen, state, requireApproval, context, etc.)', async () => { + const input = makeCallModelInput({ + model: TEST_MODEL, + input: 'test', + stopWhen: () => true, + state: {}, + requireApproval: () => false, + context: {}, + }); + const result = await resolveAsyncFunctions(input, turnCtx); + expect(result).not.toHaveProperty('stopWhen'); + expect(result).not.toHaveProperty('state'); + expect(result).not.toHaveProperty('requireApproval'); + expect(result).not.toHaveProperty('context'); + }); + + it('wraps field resolution errors with field name', async () => { + const input = makeCallModelInput({ + model: TEST_MODEL, + temperature: () => { + throw new Error('compute failed'); + }, + input: 'test', + }); + await expect(resolveAsyncFunctions(input, turnCtx)).rejects.toThrow(/temperature/); + }); +}); + +describe('async params - hasAsyncFunctions', () => { + it('returns true when any field is a function', () => { + expect( + hasAsyncFunctions({ + model: TEST_MODEL, + temperature: () => 0.5, + }), + ).toBe(true); + }); + + it('returns false when all fields are static values', () => { + expect( + hasAsyncFunctions({ + model: TEST_MODEL, + temperature: 0.5, + }), + ).toBe(false); + }); + + it('returns false for null input', () => { + expect(hasAsyncFunctions(null)).toBe(false); + }); + + it('returns false for undefined input', () => { + expect(hasAsyncFunctions(undefined)).toBe(false); + }); + + it('returns false for non-object input', () => { + expect(hasAsyncFunctions('string')).toBe(false); + }); + + it('returns true when nested function detected', () => { + expect( + hasAsyncFunctions({ + a: 1, + b: () => 2, + }), + ).toBe(true); + }); + + it('returns false for empty object', () => { + expect(hasAsyncFunctions({})).toBe(false); + }); +}); diff --git a/tests/behavior/async-resolution-pipeline.test.ts b/tests/behavior/async-resolution-pipeline.test.ts new file mode 100644 index 0000000..7a77636 --- /dev/null +++ b/tests/behavior/async-resolution-pipeline.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, it } from 'vitest'; + +import { resolveAsyncFunctions } from '../../src/lib/async-params.js'; +import { stepCountIs } from '../../src/lib/stop-conditions.js'; +import { makeCallModelInput, makeTurnContext, TEST_MODEL } from '../test-constants.js'; + +describe('Async resolution + clean API request', () => { + it('mixed input: static model, function temperature, client-only stopWhen -> three paths verified in one call', async () => { + const turnCtx = makeTurnContext({ + numberOfTurns: 2, + }); + + const result = await resolveAsyncFunctions( + makeCallModelInput({ + model: TEST_MODEL, + temperature: (ctx: { numberOfTurns: number }) => ctx.numberOfTurns * 0.1, + stopWhen: stepCountIs(5), + input: 'hello', + }), + turnCtx, + ); + + // Static: preserved + expect(result.model).toBe(TEST_MODEL); + // Function: resolved + expect(result.temperature).toBe(0.2); + // Client-only: stripped + expect(result).not.toHaveProperty('stopWhen'); + // Static: preserved + expect(result.input).toBe('hello'); + }); +}); diff --git a/tests/behavior/claude-conversion-annotations.test.ts b/tests/behavior/claude-conversion-annotations.test.ts new file mode 100644 index 0000000..854c7c4 --- /dev/null +++ b/tests/behavior/claude-conversion-annotations.test.ts @@ -0,0 +1,68 @@ +import { describe, expect, it } from 'vitest'; + +import { convertToClaudeMessage } from '../../src/lib/stream-transformers.js'; +import { TEST_MODEL } from '../test-constants.js'; + +describe('convertToClaudeMessage annotation handling', () => { + it('annotations: text with file_citation + url_citation + file_path -> each produces its distinct citation', () => { + const response = { + id: 'r1', + output: [ + { + type: 'message' as const, + id: 'msg_1', + role: 'assistant' as const, + status: 'completed' as const, + content: [ + { + type: 'output_text' as const, + text: 'Here is the answer', + annotations: [ + { + type: 'file_citation', + fileId: 'f1', + filename: 'doc.pdf', + index: 0, + }, + { + type: 'url_citation', + url: 'https://example.com', + title: 'Example', + startIndex: 0, + endIndex: 10, + }, + { + type: 'file_path', + fileId: 'f2', + filePath: '/tmp/out.txt', + }, + ], + }, + ], + }, + ], + status: 'completed' as const, + outputText: 'Here is the answer', + model: TEST_MODEL, + usage: { + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }, + }; + + const claude = convertToClaudeMessage(response); + const textBlock = claude.content.find((b: { type: string }) => b.type === 'text') as + | { + type: string; + text: string; + citations?: unknown[]; + } + | undefined; + expect(textBlock).toBeDefined(); + // Should have citations + if (textBlock?.citations) { + expect(textBlock.citations.length).toBeGreaterThan(0); + } + }); +}); diff --git a/tests/behavior/consume-stream-completion.test.ts b/tests/behavior/consume-stream-completion.test.ts new file mode 100644 index 0000000..dc32204 --- /dev/null +++ b/tests/behavior/consume-stream-completion.test.ts @@ -0,0 +1,80 @@ +import { describe, expect, it } from 'vitest'; + +import { ReusableReadableStream } from '../../src/lib/reusable-stream.js'; +import { consumeStreamForCompletion } from '../../src/lib/stream-transformers.js'; + +function makeStream(events: StreamEvents[]): ReusableReadableStream { + const source = new ReadableStream({ + start(controller) { + for (const event of events) { + controller.enqueue(event); + } + controller.close(); + }, + }); + return new ReusableReadableStream(source); +} + +describe('consumeStreamForCompletion - completion vs failure distinction', () => { + it('response.completed event -> returns the response', async () => { + const response = { + id: 'r1', + status: 'completed', + output: [], + }; + const stream = makeStream([ + { + type: 'response.output_text.delta', + delta: 'hello', + }, + { + type: 'response.completed', + response, + }, + ]); + const result = await consumeStreamForCompletion(stream); + expect(result).toEqual(response); + }); + + it('response.incomplete event -> returns the incomplete response', async () => { + const response = { + id: 'r1', + status: 'incomplete', + output: [], + }; + const stream = makeStream([ + { + type: 'response.incomplete', + response, + }, + ]); + const result = await consumeStreamForCompletion(stream); + expect(result).toEqual(response); + }); + + it('response.failed event -> throws', async () => { + const stream = makeStream([ + { + type: 'response.failed', + response: { + error: { + message: 'rate limited', + }, + }, + }, + ]); + await expect(consumeStreamForCompletion(stream)).rejects.toThrow('Response failed'); + }); + + it('stream ends without completion event -> throws', async () => { + const stream = makeStream([ + { + type: 'response.output_text.delta', + delta: 'hello', + }, + ]); + await expect(consumeStreamForCompletion(stream)).rejects.toThrow( + 'Stream ended without completion event', + ); + }); +}); diff --git a/tests/behavior/conversation-state-format.test.ts b/tests/behavior/conversation-state-format.test.ts new file mode 100644 index 0000000..f2f6048 --- /dev/null +++ b/tests/behavior/conversation-state-format.test.ts @@ -0,0 +1,29 @@ +import { describe, expect, it } from 'vitest'; + +import { appendToMessages } from '../../src/lib/conversation-state.js'; + +describe('Conversation state -> format conversion', () => { + it('appendToMessages with normalizeInputToArray -> string input produces correct array for API', () => { + const existing = [ + { + role: 'user' as const, + content: 'first message', + }, + ]; + + const newItem = { + role: 'user' as const, + content: 'second message', + }; + const result = appendToMessages(existing, [ + newItem, + ]); + expect(result).toHaveLength(2); + expect(result[0]).toEqual({ + role: 'user', + content: 'first message', + }); + expect(result[1]).toHaveProperty('role', 'user'); + expect(result[1]).toHaveProperty('content', 'second message'); + }); +}); diff --git a/tests/behavior/conversation-state.test.ts b/tests/behavior/conversation-state.test.ts new file mode 100644 index 0000000..2a44288 --- /dev/null +++ b/tests/behavior/conversation-state.test.ts @@ -0,0 +1,191 @@ +import type * as models from '@openrouter/sdk/models'; +import { describe, expect, it } from 'vitest'; +import { + appendToMessages, + createInitialState, + createRejectedResult, + createUnsentResult, + extractTextFromResponse, + generateConversationId, + unsentResultsToAPIFormat, + updateState, +} from '../../src/lib/conversation-state.js'; +import { makeResponse } from '../test-constants.js'; + +describe('conversation state - createInitialState', () => { + it('creates state with generated id, empty messages, in_progress status', () => { + const state = createInitialState(); + expect(state.id).toMatch(/^conv_/); + expect(state.messages).toEqual([]); + expect(state.status).toBe('in_progress'); + expect(state.createdAt).toBeTypeOf('number'); + expect(state.updatedAt).toBeTypeOf('number'); + }); + + it('uses provided custom id', () => { + const state = createInitialState('custom_123'); + expect(state.id).toBe('custom_123'); + }); +}); + +describe('conversation state - updateState', () => { + it('merges updates and bumps updatedAt timestamp', () => { + const state = createInitialState('s1'); + const before = state.updatedAt; + const updated = updateState(state, { + status: 'completed', + }); + expect(updated.status).toBe('completed'); + expect(updated.id).toBe('s1'); + expect(updated.updatedAt).toBeGreaterThanOrEqual(before); + }); + + it('preserves id and createdAt from original state', () => { + const state = createInitialState('s2'); + const updated = updateState(state, { + messages: [ + { + role: 'user', + content: 'hi', + }, + ], + }); + expect(updated.id).toBe('s2'); + expect(updated.createdAt).toBe(state.createdAt); + }); +}); + +describe('conversation state - appendToMessages', () => { + it('appends new items to existing array input', () => { + const current: models.InputsUnion = [ + { + role: 'user', + content: 'hello', + }, + ]; + const result = appendToMessages(current, [ + { + role: 'assistant', + content: 'hi', + }, + ]); + expect(result).toHaveLength(2); + }); + + it('converts string input to array then appends', () => { + const result = appendToMessages('hello', [ + { + role: 'assistant', + content: 'hi', + }, + ]); + expect(result).toHaveLength(2); + expect(result[0]).toHaveProperty('role', 'user'); + }); +}); + +describe('conversation state - generateConversationId', () => { + it('returns string starting with conv_', () => { + const id = generateConversationId(); + expect(id).toMatch(/^conv_/); + }); + + it('generates unique ids on successive calls', () => { + const ids = new Set( + Array.from( + { + length: 10, + }, + () => generateConversationId(), + ), + ); + expect(ids.size).toBe(10); + }); +}); + +describe('conversation state - unsent results', () => { + it('createUnsentResult builds valid result with callId, name, output', () => { + const result = createUnsentResult('c1', 'test', { + data: 42, + }); + expect(result.callId).toBe('c1'); + expect(result.name).toBe('test'); + expect(result.output).toEqual({ + data: 42, + }); + }); + + it('createRejectedResult builds result with error message', () => { + const result = createRejectedResult('c2', 'test', 'not allowed'); + expect(result.callId).toBe('c2'); + expect(result.output).toBeNull(); + expect(result.error).toBe('not allowed'); + }); + + it('createRejectedResult uses default rejection message', () => { + const result = createRejectedResult('c3', 'test'); + expect(result.error).toContain('rejected'); + }); + + it('unsentResultsToAPIFormat converts to FunctionCallOutputItem array', () => { + const results = [ + createUnsentResult('c1', 'test', { + data: 1, + }), + ]; + const api = unsentResultsToAPIFormat(results); + expect(api).toHaveLength(1); + expect(api[0]!.type).toBe('function_call_output'); + expect(api[0]!.callId).toBe('c1'); + expect(typeof api[0]!.output).toBe('string'); + }); +}); + +describe('conversation state - response extraction', () => { + it('extractTextFromResponse extracts text from message output items', () => { + const response = makeResponse({ + id: 'r1', + output: [ + { + type: 'message', + content: [ + { + type: 'output_text', + text: 'Hello ', + }, + ], + }, + { + type: 'message', + content: [ + { + type: 'output_text', + text: 'World', + }, + ], + }, + ], + parallel_tool_calls: false, + status: 'completed', + usage: null, + error: null, + incomplete_details: null, + created_at: 0, + }); + expect(extractTextFromResponse(response)).toBe('Hello World'); + }); + + it('extractTextFromResponse returns empty string for no output', () => { + const response = makeResponse({ + id: 'r1', + output: [], + parallel_tool_calls: false, + status: 'completed', + usage: null, + error: null, + incomplete_details: null, + created_at: 0, + }); + expect(extractTextFromResponse(response)).toBe(''); + }); +}); diff --git a/tests/behavior/format-compatibility.test.ts b/tests/behavior/format-compatibility.test.ts new file mode 100644 index 0000000..587729a --- /dev/null +++ b/tests/behavior/format-compatibility.test.ts @@ -0,0 +1,50 @@ +import { describe, expect, it } from 'vitest'; +import { toClaudeMessage } from '../../src/lib/anthropic-compat.js'; +import { toChatMessage } from '../../src/lib/chat-compat.js'; +import { TEST_MODEL } from '../test-constants.js'; + +function makeResponse(text: string) { + return { + id: 'r1', + output: [ + { + type: 'message' as const, + id: 'm1', + role: 'assistant' as const, + status: 'completed' as const, + content: [ + { + type: 'output_text' as const, + text, + annotations: [], + }, + ], + }, + ], + status: 'completed' as const, + outputText: text, + model: TEST_MODEL, + usage: { + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }, + }; +} + +describe('Format compatibility: compat layers -> stream-transformers', () => { + it('toChatMessage delegates to extractMessageFromResponse -> returns ChatAssistantMessage', () => { + const response = makeResponse('Hello world'); + const chatMsg = toChatMessage(response); + expect(chatMsg.role).toBe('assistant'); + expect(chatMsg.content).toBe('Hello world'); + }); + + it('toClaudeMessage delegates to convertToClaudeMessage -> returns ClaudeMessage', () => { + const response = makeResponse('Hello world'); + const claudeMsg = toClaudeMessage(response); + expect(claudeMsg.role).toBe('assistant'); + expect(claudeMsg.content).toBeDefined(); + expect(Array.isArray(claudeMsg.content)).toBe(true); + }); +}); diff --git a/tests/behavior/input-normalization.test.ts b/tests/behavior/input-normalization.test.ts new file mode 100644 index 0000000..b973f88 --- /dev/null +++ b/tests/behavior/input-normalization.test.ts @@ -0,0 +1,26 @@ +import { describe, expect, it } from 'vitest'; + +import { appendToMessages } from '../../src/lib/conversation-state.js'; + +describe('Input normalization: turn-context -> conversation-state', () => { + it('appendToMessages with string input normalizes to array before append', () => { + const existing = 'first message'; + const newItem = { + role: 'user' as const, + content: 'second message', + }; + const result = appendToMessages(existing, [ + newItem, + ]); + + expect(result.length).toBeGreaterThan(1); + // First item is normalized from string + const firstItem = result[0]!; + expect(firstItem).toHaveProperty('role', 'user'); + expect(firstItem).toHaveProperty('content', 'first message'); + // Second item is the appended message + const lastItem = result[result.length - 1]!; + expect(lastItem).toHaveProperty('role', 'user'); + expect(lastItem).toHaveProperty('content', 'second message'); + }); +}); diff --git a/tests/behavior/next-turn-params.test.ts b/tests/behavior/next-turn-params.test.ts new file mode 100644 index 0000000..45b96f6 --- /dev/null +++ b/tests/behavior/next-turn-params.test.ts @@ -0,0 +1,222 @@ +import type * as models from '@openrouter/sdk/models'; +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; +import { + applyNextTurnParamsToRequest, + buildNextTurnParamsContext, + executeNextTurnParamsFunctions, +} from '../../src/lib/next-turn-params.js'; +import { tool } from '../../src/lib/tool.js'; +import type { ParsedToolCall, Tool } from '../../src/lib/tool-types.js'; +import { makeRequest, TEST_MODEL } from '../test-constants.js'; + +describe('next-turn params - buildNextTurnParamsContext', () => { + it('extracts relevant fields from request', () => { + const request: models.ResponsesRequest = { + model: TEST_MODEL, + input: 'hello', + temperature: 0.7, + maxOutputTokens: 1000, + }; + const ctx = buildNextTurnParamsContext(request); + expect(ctx.model).toBe(TEST_MODEL); + expect(ctx.input).toBe('hello'); + expect(ctx.temperature).toBe(0.7); + expect(ctx.maxOutputTokens).toBe(1000); + }); + + it('defaults missing fields to null/empty', () => { + const request = makeRequest({ + model: undefined, + input: undefined, + }); + const ctx = buildNextTurnParamsContext(request); + expect(ctx.model).toBe(''); + expect(ctx.temperature).toBeNull(); + expect(ctx.maxOutputTokens).toBeNull(); + expect(ctx.models).toEqual([]); + }); +}); + +describe('next-turn params - executeNextTurnParamsFunctions', () => { + it('executes temperature function and returns computed value', async () => { + const t = tool({ + name: 'search', + inputSchema: z.object({ + query: z.string(), + }), + nextTurnParams: { + temperature: () => 0.2 as number | null, + }, + execute: async () => ({}), + }); + const tc: ParsedToolCall = { + id: 'c1', + name: 'search', + arguments: { + query: 'test', + }, + }; + const request = makeRequest({ + model: TEST_MODEL, + input: 'hello', + }); + const result = await executeNextTurnParamsFunctions( + [ + tc, + ], + [ + t, + ], + request, + ); + expect(result.temperature).toBe(0.2); + }); + + it('returns empty object when no tools have nextTurnParams', async () => { + const t = tool({ + name: 'basic', + inputSchema: z.object({}), + execute: async () => ({}), + }); + const tc: ParsedToolCall = { + id: 'c1', + name: 'basic', + arguments: {}, + }; + const result = await executeNextTurnParamsFunctions( + [ + tc, + ], + [ + t, + ], + makeRequest({}), + ); + expect(Object.keys(result)).toHaveLength(0); + }); + + it('skips tools not in toolCalls array', async () => { + const t1 = tool({ + name: 'a', + inputSchema: z.object({}), + nextTurnParams: { + temperature: () => 0.1 as number | null, + }, + execute: async () => ({}), + }); + const t2 = tool({ + name: 'b', + inputSchema: z.object({}), + execute: async () => ({}), + }); + const tc: ParsedToolCall = { + id: 'c1', + name: 'b', + arguments: {}, + }; + const result = await executeNextTurnParamsFunctions( + [ + tc, + ], + [ + t1, + t2, + ], + makeRequest({}), + ); + expect(result.temperature).toBeUndefined(); + }); + + it('composes functions from multiple tools in order', async () => { + const t1 = tool({ + name: 'first', + inputSchema: z.object({}), + nextTurnParams: { + temperature: (_p, ctx) => (ctx.temperature ?? 0) + 0.1, + }, + execute: async () => ({}), + }); + const t2 = tool({ + name: 'second', + inputSchema: z.object({}), + nextTurnParams: { + temperature: (_p, ctx) => (ctx.temperature ?? 0) + 0.2, + }, + execute: async () => ({}), + }); + const tc1: ParsedToolCall = { + id: 'c1', + name: 'first', + arguments: {}, + }; + const tc2: ParsedToolCall = { + id: 'c2', + name: 'second', + arguments: {}, + }; + const request = { + temperature: 0.5, + }; + const result = await executeNextTurnParamsFunctions( + [ + tc1, + tc2, + ], + [ + t1, + t2, + ], + request, + ); + expect(result.temperature).toBeCloseTo(0.8); + }); +}); + +describe('next-turn params - applyNextTurnParamsToRequest', () => { + it('merges computed params into request', () => { + const request = makeRequest({ + model: TEST_MODEL, + temperature: 0.7, + input: 'test', + }); + const computed = { + temperature: 0.2 as number | null, + }; + const result = applyNextTurnParamsToRequest(request, computed); + expect(result.temperature).toBe(0.2); + expect(result.model).toBe(TEST_MODEL); + }); + + it('converts null values to undefined for API compatibility', () => { + const request = makeRequest({ + model: TEST_MODEL, + }); + const computed = { + temperature: null, + }; + const result = applyNextTurnParamsToRequest(request, computed); + expect(result.temperature).toBeUndefined(); + }); + + it('returns new object without mutating original', () => { + const request = makeRequest({ + model: TEST_MODEL, + temperature: 0.7, + }); + const result = applyNextTurnParamsToRequest(request, { + temperature: 0.2, + }); + expect(request.temperature).toBe(0.7); + expect(result.temperature).toBe(0.2); + }); + + it('handles empty computed params', () => { + const request = makeRequest({ + model: TEST_MODEL, + temperature: 0.7, + }); + const result = applyNextTurnParamsToRequest(request, {}); + expect(result.temperature).toBe(0.7); + }); +}); diff --git a/tests/behavior/orchestrator-utility-chain.test.ts b/tests/behavior/orchestrator-utility-chain.test.ts new file mode 100644 index 0000000..17da20e --- /dev/null +++ b/tests/behavior/orchestrator-utility-chain.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, it } from 'vitest'; +import { + getToolExecutionErrors, + hasToolExecutionErrors, + summarizeToolExecutions, + toolResultsToMap, +} from '../../src/lib/tool-orchestrator.js'; +import type { Tool, ToolExecutionResult } from '../../src/lib/tool-types.js'; + +describe('Orchestrator utility chain', () => { + it('mixed results: one success + one failure -> toolResultsToMap -> hasToolExecutionErrors -> getToolExecutionErrors -> summarizeToolExecutions', () => { + const successResult: ToolExecutionResult = { + toolCallId: 'tc_1', + toolName: 'search', + result: { + data: 'found', + }, + }; + + const failureResult: ToolExecutionResult = { + toolCallId: 'tc_2', + toolName: 'delete', + result: null, + error: new Error('Permission denied'), + }; + + const results = [ + successResult, + failureResult, + ]; + + // Step 1: Map results + const map = toolResultsToMap(results); + expect(map.size).toBe(2); + expect(map.get('tc_1')).toBeDefined(); + expect(map.get('tc_2')).toBeDefined(); + + // Step 2: Check for errors + expect(hasToolExecutionErrors(results)).toBe(true); + + // Step 3: Get errors + const errors = getToolExecutionErrors(results); + expect(errors).toHaveLength(1); + expect(errors[0]!.message).toBe('Permission denied'); + + // Step 4: Summarize + const summary = summarizeToolExecutions(results); + expect(summary).toContain('search'); + expect(summary).toContain('SUCCESS'); + expect(summary).toContain('delete'); + expect(summary).toContain('Permission denied'); + }); +}); diff --git a/tests/behavior/reusable-stream.test.ts b/tests/behavior/reusable-stream.test.ts new file mode 100644 index 0000000..be6ab22 --- /dev/null +++ b/tests/behavior/reusable-stream.test.ts @@ -0,0 +1,204 @@ +import { describe, expect, it } from 'vitest'; +import { ReusableReadableStream } from '../../src/lib/reusable-stream.js'; + +function makeStream(values: T[]): ReadableStream { + return new ReadableStream({ + start(controller) { + for (const v of values) { + controller.enqueue(v); + } + controller.close(); + }, + }); +} + +function makeDelayedStream(values: T[], delayMs = 5): ReadableStream { + return new ReadableStream({ + async start(controller) { + for (const v of values) { + await new Promise((r) => setTimeout(r, delayMs)); + controller.enqueue(v); + } + controller.close(); + }, + }); +} + +async function collect(iter: AsyncIterableIterator): Promise { + const result: T[] = []; + for await (const v of iter) { + result.push(v); + } + return result; +} + +describe('reusable stream - single consumer', () => { + it('single consumer reads all values from source', async () => { + const rrs = new ReusableReadableStream( + makeStream([ + 1, + 2, + 3, + ]), + ); + const values = await collect(rrs.createConsumer()); + expect(values).toEqual([ + 1, + 2, + 3, + ]); + }); + + it('empty source stream yields no values', async () => { + const rrs = new ReusableReadableStream(makeStream([])); + const values = await collect(rrs.createConsumer()); + expect(values).toEqual([]); + }); +}); + +describe('reusable stream - multiple consumers', () => { + it('two consumers independently read the same values', async () => { + const rrs = new ReusableReadableStream( + makeStream([ + 10, + 20, + 30, + ]), + ); + const c1 = rrs.createConsumer(); + const c2 = rrs.createConsumer(); + const [v1, v2] = await Promise.all([ + collect(c1), + collect(c2), + ]); + expect(v1).toEqual([ + 10, + 20, + 30, + ]); + expect(v2).toEqual([ + 10, + 20, + 30, + ]); + }); + + it('late-joining consumer gets all buffered values plus new ones', async () => { + const rrs = new ReusableReadableStream( + makeDelayedStream( + [ + 1, + 2, + 3, + 4, + ], + 5, + ), + ); + const c1 = rrs.createConsumer(); + // Let first consumer read a bit + const first = await c1.next(); + expect(first.done).toBe(false); + // Join late + const c2 = rrs.createConsumer(); + const [remaining1, values2] = await Promise.all([ + collect(c1), + collect(c2), + ]); + // c1 already read first value, so remaining has rest + expect(remaining1.length).toBeGreaterThanOrEqual(2); + // c2 should have all values + expect(values2).toEqual([ + 1, + 2, + 3, + 4, + ]); + }); +}); + +describe('reusable stream - error propagation', () => { + it('propagates source error to consumer', async () => { + let controllerRef: ReadableStreamDefaultController; + const errorStream = new ReadableStream({ + start(controller) { + controllerRef = controller; + controller.enqueue(1); + }, + pull() { + controllerRef!.error(new Error('source error')); + }, + }); + const rrs = new ReusableReadableStream(errorStream); + const consumer = rrs.createConsumer(); + const first = await consumer.next(); + expect(first.value).toBe(1); + await expect(consumer.next()).rejects.toThrow('source error'); + }); +}); + +describe('reusable stream - cancellation', () => { + it('cancel() stops all consumers', async () => { + const rrs = new ReusableReadableStream( + makeDelayedStream( + [ + 1, + 2, + 3, + 4, + 5, + ], + 50, + ), + ); + const c1 = rrs.createConsumer(); + const first = await c1.next(); + expect(first.done).toBe(false); + await rrs.cancel(); + const next = await c1.next(); + expect(next.done).toBe(true); + }); + + it('consumer.return() cancels that consumer only', async () => { + const rrs = new ReusableReadableStream( + makeStream([ + 1, + 2, + 3, + ]), + ); + const c1 = rrs.createConsumer(); + const c2 = rrs.createConsumer(); + await c1.return!(); + const result = await c1.next(); + expect(result.done).toBe(true); + // c2 should still work + const values = await collect(c2); + expect(values).toEqual([ + 1, + 2, + 3, + ]); + }); +}); + +describe('reusable stream - async iteration protocol', () => { + it('supports for-await-of loop', async () => { + const rrs = new ReusableReadableStream( + makeStream([ + 'a', + 'b', + 'c', + ]), + ); + const values: string[] = []; + for await (const v of rrs.createConsumer()) { + values.push(v); + } + expect(values).toEqual([ + 'a', + 'b', + 'c', + ]); + }); +}); diff --git a/tests/behavior/stop-conditions-evaluation.test.ts b/tests/behavior/stop-conditions-evaluation.test.ts new file mode 100644 index 0000000..8237938 --- /dev/null +++ b/tests/behavior/stop-conditions-evaluation.test.ts @@ -0,0 +1,104 @@ +import { describe, expect, it } from 'vitest'; +import { hasToolCall, isStopConditionMet, stepCountIs } from '../../src/lib/stop-conditions.js'; +import type { StepResult } from '../../src/lib/tool-types.js'; + +function makeStep(overrides: Partial = {}): StepResult { + return { + stepType: 'initial', + text: '', + toolCalls: [], + toolResults: [], + response: { + id: 'r1', + output: [], + parallel_tool_calls: false, + status: 'completed', + usage: null, + error: null, + incomplete_details: null, + created_at: 0, + }, + ...overrides, + }; +} + +describe('stop conditions - isStopConditionMet evaluation', () => { + it('returns true when any condition is true (OR logic)', async () => { + const steps = [ + makeStep(), + makeStep(), + makeStep(), + ]; + const result = await isStopConditionMet({ + stopConditions: [ + stepCountIs(5), + stepCountIs(2), + ], + steps, + }); + expect(result).toBe(true); + }); + + it('returns false when all conditions are false', async () => { + const steps = [ + makeStep(), + ]; + const result = await isStopConditionMet({ + stopConditions: [ + stepCountIs(5), + hasToolCall('missing'), + ], + steps, + }); + expect(result).toBe(false); + }); + + it('handles empty conditions array (returns false)', async () => { + const result = await isStopConditionMet({ + stopConditions: [], + steps: [ + makeStep(), + ], + }); + expect(result).toBe(false); + }); + + it('handles async stop conditions', async () => { + const asyncCondition = async ({ steps }: { readonly steps: ReadonlyArray }) => { + await new Promise((resolve) => setTimeout(resolve, 1)); + return steps.length >= 2; + }; + const result = await isStopConditionMet({ + stopConditions: [ + asyncCondition, + ], + steps: [ + makeStep(), + makeStep(), + ], + }); + expect(result).toBe(true); + }); + + it('evaluates conditions in parallel', async () => { + const order: number[] = []; + const slow = async () => { + await new Promise((r) => setTimeout(r, 20)); + order.push(1); + return false; + }; + const fast = async () => { + await new Promise((r) => setTimeout(r, 1)); + order.push(2); + return true; + }; + const result = await isStopConditionMet({ + stopConditions: [ + slow, + fast, + ], + steps: [], + }); + expect(result).toBe(true); + }); +}); diff --git a/tests/behavior/stop-conditions-step-result.test.ts b/tests/behavior/stop-conditions-step-result.test.ts new file mode 100644 index 0000000..5092743 --- /dev/null +++ b/tests/behavior/stop-conditions-step-result.test.ts @@ -0,0 +1,152 @@ +import { describe, expect, it } from 'vitest'; + +import { + hasToolCall, + isStopConditionMet, + maxTokensUsed, + stepCountIs, +} from '../../src/lib/stop-conditions.js'; +import { makeStep, makeTypedToolCalls, makeUsage } from '../test-constants.js'; + +describe('Stop conditions + real StepResult shape', () => { + it('stepCountIs works with StepResult[] containing real usage and toolCalls data', () => { + const steps = [ + makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'search', + id: 'tc1', + arguments: {}, + }, + ]), + usage: makeUsage({ + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }), + }), + makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'write', + id: 'tc2', + arguments: {}, + }, + ]), + usage: makeUsage({ + totalTokens: 200, + inputTokens: 100, + outputTokens: 100, + }), + }), + ]; + const condition = stepCountIs(2); + expect( + condition({ + steps, + }), + ).toBe(true); + }); + + it('hasToolCall finds tool name inside StepResult.toolCalls array', () => { + const steps = [ + makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'search', + id: 'tc1', + arguments: {}, + }, + { + name: 'analyze', + id: 'tc2', + arguments: {}, + }, + ]), + }), + ]; + expect( + hasToolCall('search')({ + steps, + }), + ).toBe(true); + expect( + hasToolCall('analyze')({ + steps, + }), + ).toBe(true); + expect( + hasToolCall('missing')({ + steps, + }), + ).toBe(false); + }); + + it('maxTokensUsed reads from StepResult.usage.totalTokens', () => { + const steps = [ + makeStep({ + usage: makeUsage({ + totalTokens: 500, + inputTokens: 250, + outputTokens: 250, + }), + }), + makeStep({ + usage: makeUsage({ + totalTokens: 600, + inputTokens: 300, + outputTokens: 300, + }), + }), + ]; + expect( + maxTokensUsed(1000)({ + steps, + }), + ).toBe(true); + expect( + maxTokensUsed(1200)({ + steps, + }), + ).toBe(false); + }); + + it('isStopConditionMet evaluates multiple conditions against same StepResult[]', async () => { + const steps = [ + makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'search', + id: 'tc1', + arguments: {}, + }, + ]), + usage: makeUsage({ + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }), + }), + ]; + + // Neither condition met + const result1 = await isStopConditionMet({ + stopConditions: [ + stepCountIs(5), + hasToolCall('done'), + ], + steps, + }); + expect(result1).toBe(false); + + // One condition met (hasToolCall) + const result2 = await isStopConditionMet({ + stopConditions: [ + stepCountIs(5), + hasToolCall('search'), + ], + steps, + }); + expect(result2).toBe(true); + }); +}); diff --git a/tests/behavior/stop-conditions.test.ts b/tests/behavior/stop-conditions.test.ts new file mode 100644 index 0000000..9e2ece7 --- /dev/null +++ b/tests/behavior/stop-conditions.test.ts @@ -0,0 +1,521 @@ +import { describe, expect, it } from 'vitest'; + +import { + finishReasonIs, + hasToolCall, + maxCost, + maxTokensUsed, + stepCountIs, +} from '../../src/lib/stop-conditions.js'; +import { makeStep, makeTypedToolCalls, makeUsage } from '../test-constants.js'; + +describe('stepCountIs(n) - behavior and dimension isolation', () => { + it('returns false when steps.length < n', () => { + const condition = stepCountIs(3); + expect( + condition({ + steps: [ + makeStep(), + makeStep(), + ], + }), + ).toBe(false); + }); + + it('returns true when steps.length === n', () => { + const condition = stepCountIs(3); + expect( + condition({ + steps: [ + makeStep(), + makeStep(), + makeStep(), + ], + }), + ).toBe(true); + }); + + it('returns true when steps.length > n', () => { + const condition = stepCountIs(2); + expect( + condition({ + steps: [ + makeStep(), + makeStep(), + makeStep(), + ], + }), + ).toBe(true); + }); + + it('stepCountIs(0) always returns true', () => { + const condition = stepCountIs(0); + expect( + condition({ + steps: [], + }), + ).toBe(true); + expect( + condition({ + steps: [ + makeStep(), + ], + }), + ).toBe(true); + }); + + it('ignores tool names, tokens, cost, finishReason in steps', () => { + const condition = stepCountIs(1); + const step = makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'search', + id: 'tc1', + arguments: {}, + }, + ]), + usage: makeUsage({ + totalTokens: 9999, + inputTokens: 5000, + outputTokens: 4999, + cost: 100, + }), + finishReason: 'length', + }); + // Only step count matters + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(true); + }); +}); + +describe('hasToolCall(toolName) - behavior and dimension isolation', () => { + it('returns false when no steps have the named tool', () => { + const condition = hasToolCall('search'); + const step = makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'other', + id: 'tc1', + arguments: {}, + }, + ]), + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(false); + }); + + it('returns true when any step has a matching tool call', () => { + const condition = hasToolCall('search'); + const step1 = makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'other', + id: 'tc1', + arguments: {}, + }, + ]), + }); + const step2 = makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'search', + id: 'tc2', + arguments: {}, + }, + ]), + }); + expect( + condition({ + steps: [ + step1, + step2, + ], + }), + ).toBe(true); + }); + + it('returns false for different tool names', () => { + const condition = hasToolCall('search'); + const step = makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'Search', + id: 'tc1', + arguments: {}, + }, + ]), + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(false); + }); + + it('handles step with multiple tool calls, one matching', () => { + const condition = hasToolCall('search'); + const step = makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'other', + id: 'tc1', + arguments: {}, + }, + { + name: 'search', + id: 'tc2', + arguments: {}, + }, + ]), + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(true); + }); + + it('ignores step count, tokens, cost, finishReason', () => { + const condition = hasToolCall('search'); + const step = makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'search', + id: 'tc1', + arguments: {}, + }, + ]), + usage: makeUsage({ + totalTokens: 9999, + inputTokens: 5000, + outputTokens: 4999, + cost: 100, + }), + finishReason: 'length', + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(true); + }); +}); + +describe('maxTokensUsed(maxTokens) - behavior and dimension isolation', () => { + it('returns false when total tokens < threshold', () => { + const condition = maxTokensUsed(100); + const step = makeStep({ + usage: makeUsage({ + totalTokens: 50, + inputTokens: 25, + outputTokens: 25, + }), + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(false); + }); + + it('returns true when total tokens >= threshold', () => { + const condition = maxTokensUsed(100); + const step = makeStep({ + usage: makeUsage({ + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }), + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(true); + }); + + it('accumulates tokens across multiple steps', () => { + const condition = maxTokensUsed(100); + const step1 = makeStep({ + usage: makeUsage({ + totalTokens: 60, + inputTokens: 30, + outputTokens: 30, + }), + }); + const step2 = makeStep({ + usage: makeUsage({ + totalTokens: 50, + inputTokens: 25, + outputTokens: 25, + }), + }); + expect( + condition({ + steps: [ + step1, + step2, + ], + }), + ).toBe(true); + }); + + it('steps with undefined usage -> treated as 0', () => { + const condition = maxTokensUsed(100); + const step = makeStep({ + usage: undefined, + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(false); + }); + + it('ignores step count, tool names, cost, finishReason', () => { + const condition = maxTokensUsed(100); + const step = makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'search', + id: 'tc1', + arguments: {}, + }, + ]), + usage: makeUsage({ + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + cost: 999, + }), + finishReason: 'stop', + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(true); + }); +}); + +describe('maxCost(maxCostInDollars) - behavior and dimension isolation', () => { + it('returns false when total cost < threshold', () => { + const condition = maxCost(1.0); + const step = makeStep({ + usage: makeUsage({ + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + cost: 0.5, + }), + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(false); + }); + + it('returns true when total cost >= threshold', () => { + const condition = maxCost(1.0); + const step = makeStep({ + usage: makeUsage({ + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + cost: 1.0, + }), + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(true); + }); + + it('accumulates cost across multiple steps', () => { + const condition = maxCost(1.0); + const step1 = makeStep({ + usage: makeUsage({ + totalTokens: 50, + inputTokens: 25, + outputTokens: 25, + cost: 0.6, + }), + }); + const step2 = makeStep({ + usage: makeUsage({ + totalTokens: 50, + inputTokens: 25, + outputTokens: 25, + cost: 0.5, + }), + }); + expect( + condition({ + steps: [ + step1, + step2, + ], + }), + ).toBe(true); + }); + + it('steps with undefined usage.cost -> treated as 0', () => { + const condition = maxCost(1.0); + const step = makeStep({ + usage: undefined, + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(false); + }); + + it('ignores step count, tool names, tokens, finishReason', () => { + const condition = maxCost(1.0); + const step = makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'search', + id: 'tc1', + arguments: {}, + }, + ]), + usage: makeUsage({ + totalTokens: 99999, + inputTokens: 50000, + outputTokens: 49999, + cost: 1.0, + }), + finishReason: 'length', + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(true); + }); +}); + +describe('finishReasonIs(reason) - behavior and dimension isolation', () => { + it('returns false when no step has the specified reason', () => { + const condition = finishReasonIs('length'); + const step = makeStep({ + finishReason: 'stop', + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(false); + }); + + it('returns true when any step has matching reason', () => { + const condition = finishReasonIs('length'); + const step1 = makeStep({ + finishReason: 'stop', + }); + const step2 = makeStep({ + finishReason: 'length', + }); + expect( + condition({ + steps: [ + step1, + step2, + ], + }), + ).toBe(true); + }); + + it('matches "length" specifically', () => { + const condition = finishReasonIs('length'); + const step = makeStep({ + finishReason: 'length', + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(true); + }); + + it('steps with undefined finishReason -> not matched', () => { + const condition = finishReasonIs('length'); + const step = makeStep({ + finishReason: undefined, + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(false); + }); + + it('ignores step count, tool names, tokens, cost', () => { + const condition = finishReasonIs('length'); + const step = makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'search', + id: 'tc1', + arguments: {}, + }, + ]), + usage: makeUsage({ + totalTokens: 99999, + inputTokens: 50000, + outputTokens: 49999, + cost: 999, + }), + finishReason: 'length', + }); + expect( + condition({ + steps: [ + step, + ], + }), + ).toBe(true); + }); +}); diff --git a/tests/behavior/stream-type-guards-negative.test.ts b/tests/behavior/stream-type-guards-negative.test.ts new file mode 100644 index 0000000..862e7fe --- /dev/null +++ b/tests/behavior/stream-type-guards-negative.test.ts @@ -0,0 +1,133 @@ +import { describe, expect, it } from 'vitest'; +import { + isFunctionCallArgumentsDeltaEvent, + isFunctionCallArgumentsDoneEvent, + isFunctionCallItem, + isOutputItemAddedEvent, + isOutputItemDoneEvent, + isOutputMessage, + isOutputTextDeltaEvent, + isOutputTextPart, + isReasoningDeltaEvent, + isReasoningOutputItem, + isRefusalPart, + isResponseCompletedEvent, + isResponseFailedEvent, + isResponseIncompleteEvent, +} from '../../src/lib/stream-type-guards.js'; + +describe('stream event type guards - negative cases (reject wrong type)', () => { + it('isOutputTextDeltaEvent rejects reasoning delta', () => { + expect( + isOutputTextDeltaEvent({ + type: 'response.reasoning_text.delta', + } as unknown as StreamEvents), + ).toBe(false); + }); + + it('isReasoningDeltaEvent rejects text delta', () => { + expect( + isReasoningDeltaEvent({ + type: 'response.output_text.delta', + } as unknown as StreamEvents), + ).toBe(false); + }); + + it('isFunctionCallArgumentsDeltaEvent rejects text delta', () => { + expect( + isFunctionCallArgumentsDeltaEvent({ + type: 'response.output_text.delta', + } as unknown as StreamEvents), + ).toBe(false); + }); + + it('isOutputItemAddedEvent rejects output_item.done', () => { + expect( + isOutputItemAddedEvent({ + type: 'response.output_item.done', + } as unknown as StreamEvents), + ).toBe(false); + }); + + it('isOutputItemDoneEvent rejects output_item.added', () => { + expect( + isOutputItemDoneEvent({ + type: 'response.output_item.added', + } as unknown as StreamEvents), + ).toBe(false); + }); + + it('isResponseCompletedEvent rejects response.failed', () => { + expect( + isResponseCompletedEvent({ + type: 'response.failed', + } as unknown as StreamEvents), + ).toBe(false); + }); + + it('isResponseFailedEvent rejects response.completed', () => { + expect( + isResponseFailedEvent({ + type: 'response.completed', + } as unknown as StreamEvents), + ).toBe(false); + }); + + it('isResponseIncompleteEvent rejects response.completed', () => { + expect( + isResponseIncompleteEvent({ + type: 'response.completed', + } as unknown as StreamEvents), + ).toBe(false); + }); + + it('isFunctionCallArgumentsDoneEvent rejects function_call_arguments.delta', () => { + expect( + isFunctionCallArgumentsDoneEvent({ + type: 'response.function_call_arguments.delta', + } as unknown as StreamEvents), + ).toBe(false); + }); +}); + +describe('output item type guards - negative cases', () => { + it('isOutputMessage rejects function_call', () => { + expect( + isOutputMessage({ + type: 'function_call', + }), + ).toBe(false); + }); + + it('isFunctionCallItem rejects message', () => { + expect( + isFunctionCallItem({ + type: 'message', + }), + ).toBe(false); + }); + + it('isReasoningOutputItem rejects message', () => { + expect( + isReasoningOutputItem({ + type: 'message', + }), + ).toBe(false); + }); + + it('isOutputTextPart rejects refusal', () => { + expect( + isOutputTextPart({ + type: 'refusal', + }), + ).toBe(false); + }); + + it('isRefusalPart rejects output_text', () => { + expect( + isRefusalPart({ + type: 'output_text', + }), + ).toBe(false); + }); +}); diff --git a/tests/behavior/tool-approval.test.ts b/tests/behavior/tool-approval.test.ts new file mode 100644 index 0000000..0f4d309 --- /dev/null +++ b/tests/behavior/tool-approval.test.ts @@ -0,0 +1,314 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; +import { partitionToolCalls, toolRequiresApproval } from '../../src/lib/conversation-state.js'; +import { tool } from '../../src/lib/tool.js'; +import type { ParsedToolCall, Tool, TurnContext } from '../../src/lib/tool-types.js'; +import { hasApprovalRequiredTools, toolHasApprovalConfigured } from '../../src/lib/tool-types.js'; + +const turnCtx: TurnContext = { + numberOfTurns: 1, +}; + +describe('tool approval - toolRequiresApproval', () => { + it('returns false when tool has no requireApproval', async () => { + const t = tool({ + name: 'free', + inputSchema: z.object({}), + execute: async () => ({}), + }); + const tc: ParsedToolCall = { + id: 'c1', + name: 'free', + arguments: {}, + }; + expect( + await toolRequiresApproval( + tc, + [ + t, + ], + turnCtx, + ), + ).toBe(false); + }); + + it('returns true when tool has requireApproval: true', async () => { + const t = tool({ + name: 'guarded', + inputSchema: z.object({}), + requireApproval: true, + execute: async () => ({}), + }); + const tc: ParsedToolCall = { + id: 'c1', + name: 'guarded', + arguments: {}, + }; + expect( + await toolRequiresApproval( + tc, + [ + t, + ], + turnCtx, + ), + ).toBe(true); + }); + + it('returns false when tool has requireApproval: false', async () => { + const t = tool({ + name: 'open', + inputSchema: z.object({}), + requireApproval: false, + execute: async () => ({}), + }); + const tc: ParsedToolCall = { + id: 'c1', + name: 'open', + arguments: {}, + }; + expect( + await toolRequiresApproval( + tc, + [ + t, + ], + turnCtx, + ), + ).toBe(false); + }); + + it('calls requireApproval function with args and context', async () => { + const t = tool({ + name: 'conditional', + inputSchema: z.object({ + dangerous: z.boolean(), + }), + requireApproval: (params) => params.dangerous, + execute: async () => ({}), + }); + const tc1: ParsedToolCall = { + id: 'c1', + name: 'conditional', + arguments: { + dangerous: true, + }, + }; + const tc2: ParsedToolCall = { + id: 'c2', + name: 'conditional', + arguments: { + dangerous: false, + }, + }; + expect( + await toolRequiresApproval( + tc1, + [ + t, + ], + turnCtx, + ), + ).toBe(true); + expect( + await toolRequiresApproval( + tc2, + [ + t, + ], + turnCtx, + ), + ).toBe(false); + }); + + it('call-level check overrides tool-level setting', async () => { + const t = tool({ + name: 'guarded', + inputSchema: z.object({}), + requireApproval: true, + execute: async () => ({}), + }); + const tc: ParsedToolCall = { + id: 'c1', + name: 'guarded', + arguments: {}, + }; + const callCheck = () => false; + expect( + await toolRequiresApproval( + tc, + [ + t, + ], + turnCtx, + callCheck, + ), + ).toBe(false); + }); + + it('returns false for unknown tool name', async () => { + const t = tool({ + name: 'known', + inputSchema: z.object({}), + execute: async () => ({}), + }); + const tc: ParsedToolCall = { + id: 'c1', + name: 'unknown', + arguments: {}, + }; + expect( + await toolRequiresApproval( + tc, + [ + t, + ], + turnCtx, + ), + ).toBe(false); + }); +}); + +describe('tool approval - partitionToolCalls', () => { + it('separates tool calls into requiresApproval and autoExecute', async () => { + const guarded = tool({ + name: 'guarded', + inputSchema: z.object({}), + requireApproval: true, + execute: async () => ({}), + }); + const free = tool({ + name: 'free', + inputSchema: z.object({}), + execute: async () => ({}), + }); + const tc1: ParsedToolCall = { + id: 'c1', + name: 'guarded', + arguments: {}, + }; + const tc2: ParsedToolCall = { + id: 'c2', + name: 'free', + arguments: {}, + }; + const result = await partitionToolCalls( + [ + tc1, + tc2, + ], + [ + guarded, + free, + ], + turnCtx, + ); + expect(result.requiresApproval).toHaveLength(1); + expect(result.autoExecute).toHaveLength(1); + expect(result.requiresApproval[0]!.name).toBe('guarded'); + expect(result.autoExecute[0]!.name).toBe('free'); + }); + + it('all auto-execute when no tools require approval', async () => { + const free = tool({ + name: 'free', + inputSchema: z.object({}), + execute: async () => ({}), + }); + const tc: ParsedToolCall = { + id: 'c1', + name: 'free', + arguments: {}, + }; + const result = await partitionToolCalls( + [ + tc, + ], + [ + free, + ], + turnCtx, + ); + expect(result.autoExecute).toHaveLength(1); + expect(result.requiresApproval).toHaveLength(0); + }); + + it('all require approval when all tools need it', async () => { + const guarded = tool({ + name: 'g1', + inputSchema: z.object({}), + requireApproval: true, + execute: async () => ({}), + }); + const tc: ParsedToolCall = { + id: 'c1', + name: 'g1', + arguments: {}, + }; + const result = await partitionToolCalls( + [ + tc, + ], + [ + guarded, + ], + turnCtx, + ); + expect(result.requiresApproval).toHaveLength(1); + expect(result.autoExecute).toHaveLength(0); + }); +}); + +describe('tool approval - type-level utilities', () => { + it('toolHasApprovalConfigured returns true for tool with requireApproval', () => { + const t = tool({ + name: 'g', + inputSchema: z.object({}), + requireApproval: true, + execute: async () => ({}), + }); + expect(toolHasApprovalConfigured(t)).toBe(true); + }); + + it('toolHasApprovalConfigured returns false for tool without requireApproval', () => { + const t = tool({ + name: 'f', + inputSchema: z.object({}), + execute: async () => ({}), + }); + expect(toolHasApprovalConfigured(t)).toBe(false); + }); + + it('hasApprovalRequiredTools returns true when any tool needs approval', () => { + const t1 = tool({ + name: 'f', + inputSchema: z.object({}), + execute: async () => ({}), + }); + const t2 = tool({ + name: 'g', + inputSchema: z.object({}), + requireApproval: true, + execute: async () => ({}), + }); + expect( + hasApprovalRequiredTools([ + t1, + t2, + ]), + ).toBe(true); + }); + + it('hasApprovalRequiredTools returns false when no tools need approval', () => { + const t1 = tool({ + name: 'f', + inputSchema: z.object({}), + execute: async () => ({}), + }); + expect( + hasApprovalRequiredTools([ + t1, + ]), + ).toBe(false); + }); +}); diff --git a/tests/behavior/tool-context.test.ts b/tests/behavior/tool-context.test.ts new file mode 100644 index 0000000..271de3a --- /dev/null +++ b/tests/behavior/tool-context.test.ts @@ -0,0 +1,263 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; +import { + buildToolExecuteContext, + extractToolContext, + resolveContext, + ToolContextStore, +} from '../../src/lib/tool-context.js'; +import type { TurnContext } from '../../src/lib/tool-types.js'; + +const turnCtx: TurnContext = { + numberOfTurns: 1, +}; + +describe('ToolContextStore - basic operations', () => { + it('constructor initializes with given values', () => { + const store = new ToolContextStore({ + weather: { + apiKey: '123', + }, + }); + expect(store.getToolContext('weather')).toEqual({ + apiKey: '123', + }); + }); + + it('getToolContext returns empty object for unknown tool', () => { + const store = new ToolContextStore(); + expect(store.getToolContext('unknown')).toEqual({}); + }); + + it('setToolContext sets tool context and notifies listeners', () => { + const store = new ToolContextStore(); + const snapshots: Array> = []; + store.subscribe((s) => snapshots.push(s)); + store.setToolContext('tool1', { + key: 'val', + }); + expect(store.getToolContext('tool1')).toEqual({ + key: 'val', + }); + expect(snapshots).toHaveLength(1); + }); + + it('mergeToolContext merges partial values', () => { + const store = new ToolContextStore({ + tool1: { + a: 1, + b: 2, + }, + }); + store.mergeToolContext('tool1', { + b: 99, + c: 3, + }); + expect(store.getToolContext('tool1')).toEqual({ + a: 1, + b: 99, + c: 3, + }); + }); + + it('getSnapshot returns deep-shallow copy of all contexts', () => { + const store = new ToolContextStore({ + a: { + x: 1, + }, + b: { + y: 2, + }, + }); + const snapshot = store.getSnapshot(); + expect(snapshot).toEqual({ + a: { + x: 1, + }, + b: { + y: 2, + }, + }); + snapshot.a!.x = 999; + expect(store.getToolContext('a')).toEqual({ + x: 1, + }); + }); + + it('subscribe returns unsubscribe function', () => { + const store = new ToolContextStore(); + const calls: number[] = []; + const unsub = store.subscribe(() => calls.push(1)); + store.setToolContext('t', { + v: 1, + }); + expect(calls).toHaveLength(1); + unsub(); + store.setToolContext('t', { + v: 2, + }); + expect(calls).toHaveLength(1); + }); +}); + +describe('buildToolExecuteContext', () => { + it('returns object with turnContext fields merged', () => { + const ctx = buildToolExecuteContext(turnCtx, undefined, 'myTool', undefined); + expect(ctx.numberOfTurns).toBe(1); + }); + + it('local getter reads from store for the tool name', () => { + const store = new ToolContextStore({ + myTool: { + apiKey: 'abc', + }, + }); + const schema = z.object({ + apiKey: z.string(), + }); + const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', schema); + expect(ctx.local).toEqual({ + apiKey: 'abc', + }); + }); + + it('setContext merges partial values into store', () => { + const store = new ToolContextStore({ + myTool: { + apiKey: 'abc', + }, + }); + const schema = z.object({ + apiKey: z.string(), + }); + const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', schema); + ctx.setContext({ + apiKey: 'xyz', + }); + expect(ctx.local).toEqual({ + apiKey: 'xyz', + }); + }); + + it('shared getter reads shared context from store', () => { + const store = new ToolContextStore({ + shared: { + globalKey: 'val', + }, + }); + const sharedSchema = z.object({ + globalKey: z.string(), + }); + const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', undefined, sharedSchema); + expect(ctx.shared).toEqual({ + globalKey: 'val', + }); + }); + + it('setSharedContext updates shared context in store', () => { + const store = new ToolContextStore({ + shared: { + globalKey: 'old', + }, + }); + const sharedSchema = z.object({ + globalKey: z.string(), + }); + const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', undefined, sharedSchema); + ctx.setSharedContext({ + globalKey: 'new', + }); + expect(ctx.shared).toEqual({ + globalKey: 'new', + }); + }); + + it('local getter returns frozen object', () => { + const store = new ToolContextStore({ + myTool: { + val: 1, + }, + }); + const schema = z.object({ + val: z.number(), + }); + const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', schema); + expect(Object.isFrozen(ctx.local)).toBe(true); + }); +}); + +describe('resolveContext', () => { + it('returns empty object when input is undefined', async () => { + const result = await resolveContext(undefined, turnCtx); + expect(result).toEqual({}); + }); + + it('returns static value as-is', async () => { + const input = { + myTool: { + apiKey: '123', + }, + }; + const result = await resolveContext(input, turnCtx); + expect(result).toEqual({ + myTool: { + apiKey: '123', + }, + }); + }); + + it('calls sync function with turnContext and returns result', async () => { + const fn = (ctx: TurnContext) => ({ + tool: { + turn: ctx.numberOfTurns, + }, + }); + const result = await resolveContext(fn, turnCtx); + expect(result).toEqual({ + tool: { + turn: 1, + }, + }); + }); + + it('calls async function with turnContext and returns result', async () => { + const fn = async (ctx: TurnContext) => ({ + tool: { + turn: ctx.numberOfTurns * 2, + }, + }); + const result = await resolveContext(fn, turnCtx); + expect(result).toEqual({ + tool: { + turn: 2, + }, + }); + }); +}); + +describe('extractToolContext', () => { + it('extracts and validates context for tool', () => { + const store = new ToolContextStore({ + myTool: { + apiKey: 'abc', + }, + }); + const schema = z.object({ + apiKey: z.string(), + }); + const result = extractToolContext(store, 'myTool', schema); + expect(result).toEqual({ + apiKey: 'abc', + }); + }); + + it('returns empty object when no schema provided', () => { + const store = new ToolContextStore({ + myTool: { + apiKey: 'abc', + }, + }); + const result = extractToolContext(store, 'myTool', undefined); + expect(result).toEqual({}); + }); +}); diff --git a/tests/behavior/tool-creation.test.ts b/tests/behavior/tool-creation.test.ts new file mode 100644 index 0000000..d8ecc10 --- /dev/null +++ b/tests/behavior/tool-creation.test.ts @@ -0,0 +1,149 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; +import { tool } from '../../src/lib/tool.js'; +import { ToolType } from '../../src/lib/tool-types.js'; + +// Tests 1-9: Tool creation via tool() factory + +describe('tool creation - tool() factory', () => { + it('regular tool returns full shape: type, name, inputSchema, execute, description, outputSchema', () => { + const t = tool({ + name: 'greet', + description: 'Say hello', + inputSchema: z.object({ + name: z.string(), + }), + outputSchema: z.object({ + greeting: z.string(), + }), + execute: async (params) => ({ + greeting: `Hi ${params.name}`, + }), + }); + + expect(t.type).toBe(ToolType.Function); + expect(t.function.name).toBe('greet'); + expect(t.function.description).toBe('Say hello'); + expect(t.function.inputSchema).toBeDefined(); + expect(t.function.outputSchema).toBeDefined(); + expect(t.function.execute).toBeTypeOf('function'); + }); + + it('generator tool with eventSchema returns tool with eventSchema + outputSchema + execute', () => { + const t = tool({ + name: 'stream_tool', + inputSchema: z.object({ + query: z.string(), + }), + eventSchema: z.object({ + progress: z.number(), + }), + outputSchema: z.object({ + result: z.string(), + }), + execute: async function* () { + yield { + progress: 50, + }; + return { + result: 'done', + }; + }, + }); + + expect(t.type).toBe(ToolType.Function); + expect(t.function.name).toBe('stream_tool'); + expect(t.function.eventSchema).toBeDefined(); + expect(t.function.outputSchema).toBeDefined(); + expect(t.function.execute).toBeTypeOf('function'); + }); + + it('manual tool (execute: false) returns tool with no execute, no outputSchema, no eventSchema', () => { + const t = tool({ + name: 'manual', + description: 'Needs manual handling', + inputSchema: z.object({ + action: z.string(), + }), + execute: false, + }); + + expect(t.type).toBe(ToolType.Function); + expect(t.function.name).toBe('manual'); + expect(t.function).not.toHaveProperty('execute'); + expect(t.function).not.toHaveProperty('eventSchema'); + }); + + it('tool with contextSchema preserves schema on function.contextSchema', () => { + const ctxSchema = z.object({ + apiKey: z.string(), + }); + const t = tool({ + name: 'ctx_tool', + inputSchema: z.object({}), + contextSchema: ctxSchema, + execute: async () => ({}), + }); + + expect(t.function.contextSchema).toBe(ctxSchema); + }); + + it('tool with requireApproval: true preserves flag on function', () => { + const t = tool({ + name: 'approval_tool', + inputSchema: z.object({}), + requireApproval: true, + execute: async () => ({}), + }); + + expect(t.function.requireApproval).toBe(true); + }); + + it('tool with requireApproval function preserves function on function', () => { + const check = () => true; + const t = tool({ + name: 'fn_approval', + inputSchema: z.object({}), + requireApproval: check, + execute: async () => ({}), + }); + + expect(t.function.requireApproval).toBe(check); + }); + + it('tool with nextTurnParams preserves them on function', () => { + const ntp = { + temperature: () => 0.5 as number | null, + }; + const t = tool({ + name: 'ntp_tool', + inputSchema: z.object({}), + nextTurnParams: ntp, + execute: async () => ({}), + }); + + expect(t.function.nextTurnParams).toBeDefined(); + }); + + it('tool named "shared" throws (reserved for shared context)', () => { + expect(() => + tool({ + name: 'shared', + inputSchema: z.object({}), + execute: async () => ({}), + }), + ).toThrow(/reserved/i); + }); + + it('tool with no description has description absent from function object', () => { + const t = tool({ + name: 'no_desc', + inputSchema: z.object({ + x: z.number(), + }), + execute: async () => ({}), + }); + + expect(t.function.description).toBeUndefined(); + }); +}); diff --git a/tests/behavior/tool-event-broadcaster.test.ts b/tests/behavior/tool-event-broadcaster.test.ts new file mode 100644 index 0000000..b32acc0 --- /dev/null +++ b/tests/behavior/tool-event-broadcaster.test.ts @@ -0,0 +1,131 @@ +import { describe, expect, it } from 'vitest'; +import { ToolEventBroadcaster } from '../../src/lib/tool-event-broadcaster.js'; + +async function collect(iter: AsyncIterableIterator): Promise { + const result: T[] = []; + for await (const v of iter) { + result.push(v); + } + return result; +} + +describe('ToolEventBroadcaster - single consumer', () => { + it('consumer receives all pushed events after complete', async () => { + const broadcaster = new ToolEventBroadcaster(); + broadcaster.push(1); + broadcaster.push(2); + broadcaster.push(3); + broadcaster.complete(); + const consumer = broadcaster.createConsumer(); + const values = await collect(consumer); + expect(values).toEqual([ + 1, + 2, + 3, + ]); + }); + + it('consumer receives events pushed before and after creation', async () => { + const broadcaster = new ToolEventBroadcaster(); + broadcaster.push('before'); + const consumer = broadcaster.createConsumer(); + broadcaster.push('after'); + broadcaster.complete(); + const values = await collect(consumer); + expect(values).toEqual([ + 'before', + 'after', + ]); + }); + + it('empty broadcaster yields no values', async () => { + const broadcaster = new ToolEventBroadcaster(); + broadcaster.complete(); + const consumer = broadcaster.createConsumer(); + const values = await collect(consumer); + expect(values).toEqual([]); + }); +}); + +describe('ToolEventBroadcaster - multiple consumers', () => { + it('two consumers independently receive all events', async () => { + const broadcaster = new ToolEventBroadcaster(); + const c1 = broadcaster.createConsumer(); + const c2 = broadcaster.createConsumer(); + broadcaster.push(10); + broadcaster.push(20); + broadcaster.complete(); + const [v1, v2] = await Promise.all([ + collect(c1), + collect(c2), + ]); + expect(v1).toEqual([ + 10, + 20, + ]); + expect(v2).toEqual([ + 10, + 20, + ]); + }); +}); + +describe('ToolEventBroadcaster - error handling', () => { + it('complete(error) propagates error to consumer', async () => { + const broadcaster = new ToolEventBroadcaster(); + broadcaster.push(1); + const consumer = broadcaster.createConsumer(); + const first = await consumer.next(); + expect(first.value).toBe(1); + broadcaster.complete(new Error('test error')); + await expect(consumer.next()).rejects.toThrow('test error'); + }); +}); + +describe('ToolEventBroadcaster - cancellation', () => { + it('consumer.return() cancels the consumer', async () => { + const broadcaster = new ToolEventBroadcaster(); + broadcaster.push(1); + const consumer = broadcaster.createConsumer(); + await consumer.return!(); + const result = await consumer.next(); + expect(result.done).toBe(true); + }); + + it('consumer.throw() cancels the consumer and throws', async () => { + const broadcaster = new ToolEventBroadcaster(); + const consumer = broadcaster.createConsumer(); + await expect(consumer.throw!(new Error('abort'))).rejects.toThrow('abort'); + }); +}); + +describe('ToolEventBroadcaster - push after complete is ignored', () => { + it('events pushed after complete are not delivered', async () => { + const broadcaster = new ToolEventBroadcaster(); + broadcaster.push(1); + broadcaster.complete(); + broadcaster.push(2); + const consumer = broadcaster.createConsumer(); + const values = await collect(consumer); + expect(values).toEqual([ + 1, + ]); + }); +}); + +describe('ToolEventBroadcaster - async iteration protocol', () => { + it('supports for-await-of loop', async () => { + const broadcaster = new ToolEventBroadcaster(); + broadcaster.push('a'); + broadcaster.push('b'); + broadcaster.complete(); + const values: string[] = []; + for await (const v of broadcaster.createConsumer()) { + values.push(v); + } + expect(values).toEqual([ + 'a', + 'b', + ]); + }); +}); diff --git a/tests/behavior/tool-execution.test.ts b/tests/behavior/tool-execution.test.ts new file mode 100644 index 0000000..b5b7ef3 --- /dev/null +++ b/tests/behavior/tool-execution.test.ts @@ -0,0 +1,576 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; +import { tool } from '../../src/lib/tool.js'; +import { + convertToolsToAPIFormat, + convertZodToJsonSchema, + executeGeneratorTool, + executeRegularTool, + executeTool, + findToolByName, + formatToolExecutionError, + formatToolResultForModel, + parseToolCallArguments, + sanitizeJsonSchema, + validateToolInput, + validateToolOutput, +} from '../../src/lib/tool-executor.js'; +import type { ParsedToolCall, Tool, TurnContext } from '../../src/lib/tool-types.js'; + +const turnCtx: TurnContext = { + numberOfTurns: 1, +}; + +describe('tool execution - input validation', () => { + const schema = z.object({ + name: z.string(), + age: z.number(), + }); + + it('validateToolInput with valid args returns validated data', () => { + const result = validateToolInput(schema, { + name: 'Alice', + age: 30, + }); + expect(result).toEqual({ + name: 'Alice', + age: 30, + }); + }); + + it('validateToolInput with invalid args throws ZodError', () => { + expect(() => + validateToolInput(schema, { + name: 123, + }), + ).toThrow(); + }); + + it('validateToolOutput with valid result returns validated data', () => { + const outSchema = z.object({ + sum: z.number(), + }); + const result = validateToolOutput(outSchema, { + sum: 42, + }); + expect(result).toEqual({ + sum: 42, + }); + }); + + it('validateToolOutput with invalid result throws ZodError', () => { + const outSchema = z.object({ + sum: z.number(), + }); + expect(() => + validateToolOutput(outSchema, { + sum: 'not a number', + }), + ).toThrow(); + }); +}); + +describe('tool execution - argument parsing', () => { + it('parseToolCallArguments with valid JSON returns parsed object', () => { + expect(parseToolCallArguments('{"a":1}')).toEqual({ + a: 1, + }); + }); + + it('parseToolCallArguments with empty string returns empty object', () => { + expect(parseToolCallArguments('')).toEqual({}); + }); + + it('parseToolCallArguments with whitespace-only string returns empty object', () => { + expect(parseToolCallArguments(' ')).toEqual({}); + }); + + it('parseToolCallArguments with invalid JSON throws descriptive error', () => { + expect(() => parseToolCallArguments('bad json')).toThrow(/failed to parse/i); + }); +}); + +describe('tool execution - executeRegularTool', () => { + it('executes and returns { toolCallId, toolName, result }', async () => { + const t = tool({ + name: 'add', + inputSchema: z.object({ + a: z.number(), + b: z.number(), + }), + execute: async (params) => ({ + sum: params.a + params.b, + }), + }); + const tc: ParsedToolCall = { + id: 'call_1', + name: 'add', + arguments: { + a: 2, + b: 3, + }, + }; + const result = await executeRegularTool(t, tc, turnCtx); + expect(result.toolCallId).toBe('call_1'); + expect(result.toolName).toBe('add'); + expect(result.result).toEqual({ + sum: 5, + }); + expect(result.error).toBeUndefined(); + }); + + it('returns error when input validation fails', async () => { + const t = tool({ + name: 'strict', + inputSchema: z.object({ + x: z.number(), + }), + execute: async () => ({ + ok: true, + }), + }); + const tc: ParsedToolCall = { + id: 'call_2', + name: 'strict', + arguments: { + x: 'not_num', + }, + }; + const result = await executeRegularTool(t, tc, turnCtx); + expect(result.error).toBeDefined(); + expect(result.result).toBeNull(); + }); + + it('validates output when outputSchema provided', async () => { + const t = tool({ + name: 'typed_out', + inputSchema: z.object({}), + outputSchema: z.object({ + value: z.number(), + }), + execute: async () => ({ + value: 42, + }), + }); + const tc: ParsedToolCall = { + id: 'call_3', + name: 'typed_out', + arguments: {}, + }; + const result = await executeRegularTool(t, tc, turnCtx); + expect(result.result).toEqual({ + value: 42, + }); + }); + + it('returns raw result when no outputSchema', async () => { + const t = tool({ + name: 'raw_out', + inputSchema: z.object({}), + execute: async () => ({ + anything: 'goes', + }), + }); + const tc: ParsedToolCall = { + id: 'call_4', + name: 'raw_out', + arguments: {}, + }; + const result = await executeRegularTool(t, tc, turnCtx); + expect(result.result).toEqual({ + anything: 'goes', + }); + }); + + it('catches thrown error and returns { error, result: null }', async () => { + const t = tool({ + name: 'failing', + inputSchema: z.object({}), + execute: async () => { + throw new Error('boom'); + }, + }); + const tc: ParsedToolCall = { + id: 'call_5', + name: 'failing', + arguments: {}, + }; + const result = await executeRegularTool(t, tc, turnCtx); + expect(result.error).toBeDefined(); + expect(result.error!.message).toBe('boom'); + expect(result.result).toBeNull(); + }); +}); + +describe('tool execution - executeGeneratorTool', () => { + it('yields events then returns final result with preliminaryResults', async () => { + const t = tool({ + name: 'gen', + inputSchema: z.object({}), + eventSchema: z.object({ + progress: z.number(), + }), + outputSchema: z.object({ + result: z.string(), + }), + execute: async function* () { + yield { + progress: 50, + }; + yield { + progress: 100, + }; + return { + result: 'done', + }; + }, + }); + const tc: ParsedToolCall = { + id: 'call_6', + name: 'gen', + arguments: {}, + }; + const result = await executeGeneratorTool(t, tc, turnCtx); + expect(result.result).toEqual({ + result: 'done', + }); + expect(result.preliminaryResults).toHaveLength(2); + }); + + it('calls onPreliminaryResult for each yielded event', async () => { + const events: unknown[] = []; + const t = tool({ + name: 'gen_cb', + inputSchema: z.object({}), + eventSchema: z.object({ + step: z.number(), + }), + outputSchema: z.object({ + done: z.boolean(), + }), + execute: async function* () { + yield { + step: 1, + }; + yield { + step: 2, + }; + return { + done: true, + }; + }, + }); + const tc: ParsedToolCall = { + id: 'call_7', + name: 'gen_cb', + arguments: {}, + }; + await executeGeneratorTool(t, tc, turnCtx, (_id, ev) => events.push(ev)); + expect(events).toHaveLength(2); + expect(events[0]).toEqual({ + step: 1, + }); + }); + + it('returns final result with empty preliminaryResults when only return value', async () => { + const t = tool({ + name: 'gen_ret', + inputSchema: z.object({}), + eventSchema: z.object({ + ev: z.string(), + }), + outputSchema: z.object({ + val: z.number(), + }), + execute: async function* () { + return { + val: 42, + }; + }, + }); + const tc: ParsedToolCall = { + id: 'call_8', + name: 'gen_ret', + arguments: {}, + }; + const result = await executeGeneratorTool(t, tc, turnCtx); + expect(result.result).toEqual({ + val: 42, + }); + expect(result.preliminaryResults).toHaveLength(0); + }); + + it('returns error when generator throws', async () => { + const t = tool({ + name: 'gen_err', + inputSchema: z.object({}), + eventSchema: z.object({ + ev: z.string(), + }), + outputSchema: z.object({ + val: z.number(), + }), + execute: async function* () { + throw new Error('gen boom'); + }, + }); + const tc: ParsedToolCall = { + id: 'call_9', + name: 'gen_err', + arguments: {}, + }; + const result = await executeGeneratorTool(t, tc, turnCtx); + expect(result.error).toBeDefined(); + expect(result.error!.message).toBe('gen boom'); + }); + + it('returns error when generator emits nothing', async () => { + const t = tool({ + name: 'gen_empty', + inputSchema: z.object({}), + eventSchema: z.object({ + ev: z.string(), + }), + outputSchema: z.object({ + val: z.number(), + }), + execute: async function* () { + // yields nothing, returns nothing + }, + }); + const tc: ParsedToolCall = { + id: 'call_10', + name: 'gen_empty', + arguments: {}, + }; + const result = await executeGeneratorTool(t, tc, turnCtx); + expect(result.error).toBeDefined(); + expect(result.error!.message).toContain('without emitting'); + }); +}); + +describe('tool execution - executeTool dispatch', () => { + it('dispatches regular tool to executeRegularTool', async () => { + const t = tool({ + name: 'reg', + inputSchema: z.object({ + x: z.number(), + }), + execute: async (p) => ({ + doubled: p.x * 2, + }), + }); + const tc: ParsedToolCall = { + id: 'c1', + name: 'reg', + arguments: { + x: 5, + }, + }; + const result = await executeTool(t, tc, turnCtx); + expect(result.result).toEqual({ + doubled: 10, + }); + }); + + it('dispatches generator tool to executeGeneratorTool', async () => { + const t = tool({ + name: 'gen', + inputSchema: z.object({}), + eventSchema: z.object({ + ev: z.number(), + }), + outputSchema: z.object({ + done: z.boolean(), + }), + execute: async function* () { + yield { + ev: 1, + }; + return { + done: true, + }; + }, + }); + const tc: ParsedToolCall = { + id: 'c2', + name: 'gen', + arguments: {}, + }; + const result = await executeTool(t, tc, turnCtx); + expect(result.result).toEqual({ + done: true, + }); + expect(result.preliminaryResults).toHaveLength(1); + }); + + it('throws for manual tool (no execute function)', async () => { + const t = tool({ + name: 'manual', + inputSchema: z.object({}), + execute: false, + }); + const tc: ParsedToolCall = { + id: 'c3', + name: 'manual', + arguments: {}, + }; + await expect(executeTool(t, tc, turnCtx)).rejects.toThrow(/no execute function/i); + }); +}); + +describe('tool execution - utility functions', () => { + it('findToolByName returns matching tool', () => { + const t = tool({ + name: 'x', + inputSchema: z.object({}), + execute: async () => ({}), + }); + expect( + findToolByName( + [ + t, + ], + 'x', + ), + ).toBe(t); + }); + + it('findToolByName returns undefined for missing tool', () => { + const t = tool({ + name: 'x', + inputSchema: z.object({}), + execute: async () => ({}), + }); + expect( + findToolByName( + [ + t, + ], + 'missing', + ), + ).toBeUndefined(); + }); + + it('formatToolResultForModel with success returns JSON of result', () => { + const json = formatToolResultForModel({ + toolCallId: 'c1', + toolName: 'test', + result: { + data: 42, + }, + }); + expect(JSON.parse(json)).toEqual({ + data: 42, + }); + }); + + it('formatToolResultForModel with error returns JSON with error message', () => { + const json = formatToolResultForModel({ + toolCallId: 'c2', + toolName: 'test', + result: null, + error: new Error('fail'), + }); + const parsed = JSON.parse(json); + expect(parsed.error).toBe('fail'); + expect(parsed.toolName).toBe('test'); + }); + + it('formatToolExecutionError with ZodError includes validation details', () => { + try { + z.parse( + z.object({ + x: z.number(), + }), + { + x: 'bad', + }, + ); + } catch (e) { + const tc: ParsedToolCall = { + id: 'c3', + name: 'myTool', + arguments: {}, + }; + const msg = formatToolExecutionError(e as Error, tc); + expect(msg).toContain('myTool'); + expect(msg).toContain('validation error'); + } + }); + + it('formatToolExecutionError with generic Error includes message', () => { + const tc: ParsedToolCall = { + id: 'c4', + name: 'myTool', + arguments: {}, + }; + const msg = formatToolExecutionError(new Error('something went wrong'), tc); + expect(msg).toContain('myTool'); + expect(msg).toContain('something went wrong'); + }); + + it('convertToolsToAPIFormat returns correct API shape array', () => { + const t = tool({ + name: 'api_tool', + description: 'Does stuff', + inputSchema: z.object({ + x: z.number(), + }), + execute: async () => ({}), + }); + const apiTools = convertToolsToAPIFormat([ + t, + ]); + expect(apiTools).toHaveLength(1); + expect(apiTools[0]!.type).toBe('function'); + expect(apiTools[0]!.name).toBe('api_tool'); + expect(apiTools[0]!.description).toBe('Does stuff'); + expect(apiTools[0]!.parameters).toBeDefined(); + }); + + it('convertZodToJsonSchema produces valid JSON schema from Zod', () => { + const schema = z.object({ + x: z.number(), + y: z.string(), + }); + const jsonSchema = convertZodToJsonSchema(schema); + expect(jsonSchema).toHaveProperty('type', 'object'); + expect(jsonSchema).toHaveProperty('properties'); + }); + + it('sanitizeJsonSchema removes ~prefixed keys recursively', () => { + const input = { + type: 'object', + '~standard': { + meta: true, + }, + properties: { + x: { + type: 'number', + '~standard': {}, + }, + }, + }; + const result = sanitizeJsonSchema(input); + expect(result).not.toHaveProperty('~standard'); + expect((result as Record).type).toBe('object'); + }); + + it('sanitizeJsonSchema handles primitives, null, arrays', () => { + expect(sanitizeJsonSchema(null)).toBeNull(); + expect(sanitizeJsonSchema(42)).toBe(42); + expect( + sanitizeJsonSchema([ + { + '~meta': 1, + val: 2, + }, + ]), + ).toEqual([ + { + val: 2, + }, + ]); + }); +}); diff --git a/tests/behavior/tool-orchestrator.test.ts b/tests/behavior/tool-orchestrator.test.ts new file mode 100644 index 0000000..005ad8c --- /dev/null +++ b/tests/behavior/tool-orchestrator.test.ts @@ -0,0 +1,118 @@ +import { describe, expect, it } from 'vitest'; +import { + getToolExecutionErrors, + hasToolExecutionErrors, + summarizeToolExecutions, + toolResultsToMap, +} from '../../src/lib/tool-orchestrator.js'; +import type { Tool, ToolExecutionResult } from '../../src/lib/tool-types.js'; + +function makeResult(overrides: Partial>): ToolExecutionResult { + return { + toolCallId: 'c1', + toolName: 'test', + result: { + ok: true, + }, + ...overrides, + }; +} + +describe('tool orchestrator - toolResultsToMap', () => { + it('converts results array to map keyed by toolCallId', () => { + const results = [ + makeResult({ + toolCallId: 'c1', + result: 'a', + }), + makeResult({ + toolCallId: 'c2', + result: 'b', + }), + ]; + const map = toolResultsToMap(results); + expect(map.size).toBe(2); + expect(map.get('c1')!.result).toBe('a'); + expect(map.get('c2')!.result).toBe('b'); + }); + + it('includes preliminaryResults in map entries', () => { + const results = [ + makeResult({ + toolCallId: 'c1', + result: 'final', + preliminaryResults: [ + 'p1', + 'p2', + ], + }), + ]; + const map = toolResultsToMap(results); + expect(map.get('c1')!.preliminaryResults).toEqual([ + 'p1', + 'p2', + ]); + }); +}); + +describe('tool orchestrator - summarizeToolExecutions', () => { + it('produces success line for successful result', () => { + const summary = summarizeToolExecutions([ + makeResult({ + toolCallId: 'c1', + toolName: 'add', + }), + ]); + expect(summary).toContain('add'); + expect(summary).toContain('c1'); + }); + + it('produces error line for failed result', () => { + const summary = summarizeToolExecutions([ + makeResult({ + toolCallId: 'c2', + toolName: 'fail', + result: null, + error: new Error('oops'), + }), + ]); + expect(summary).toContain('fail'); + expect(summary).toContain('oops'); + }); +}); + +describe('tool orchestrator - hasToolExecutionErrors', () => { + it('returns true when any result has error', () => { + expect( + hasToolExecutionErrors([ + makeResult({}), + makeResult({ + error: new Error('err'), + }), + ]), + ).toBe(true); + }); + + it('returns false when no results have errors', () => { + expect( + hasToolExecutionErrors([ + makeResult({}), + ]), + ).toBe(false); + }); +}); + +describe('tool orchestrator - getToolExecutionErrors', () => { + it('extracts all error objects from results', () => { + const err = new Error('err1'); + const results = [ + makeResult({}), + makeResult({ + error: err, + }), + ]; + const errors = getToolExecutionErrors(results); + expect(errors).toHaveLength(1); + expect(errors[0]).toBe(err); + }); +}); diff --git a/tests/behavior/tool-types-events.test.ts b/tests/behavior/tool-types-events.test.ts new file mode 100644 index 0000000..f8e44cd --- /dev/null +++ b/tests/behavior/tool-types-events.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, it } from 'vitest'; +import { + isToolCallOutputEvent, + isToolPreliminaryResultEvent, + isToolResultEvent, + isTurnEndEvent, + isTurnStartEvent, +} from '../../src/lib/tool-types.js'; + +describe('tool-types event type guards', () => { + it('isToolPreliminaryResultEvent matches tool.preliminary_result', () => { + expect( + isToolPreliminaryResultEvent({ + type: 'tool.preliminary_result', + toolCallId: 'c1', + result: {}, + timestamp: 0, + }), + ).toBe(true); + }); + + it('isToolPreliminaryResultEvent rejects tool.result', () => { + expect( + isToolPreliminaryResultEvent({ + type: 'tool.result', + toolCallId: 'c1', + result: {}, + timestamp: 0, + }), + ).toBe(false); + }); + + it('isToolResultEvent matches tool.result', () => { + expect( + isToolResultEvent({ + type: 'tool.result', + toolCallId: 'c1', + result: {}, + timestamp: 0, + }), + ).toBe(true); + }); + + it('isToolResultEvent rejects tool.preliminary_result', () => { + expect( + isToolResultEvent({ + type: 'tool.preliminary_result', + toolCallId: 'c1', + result: {}, + timestamp: 0, + }), + ).toBe(false); + }); + + it('isToolCallOutputEvent matches tool.call_output', () => { + expect( + isToolCallOutputEvent({ + type: 'tool.call_output', + output: {}, + timestamp: 0, + }), + ).toBe(true); + }); + + it('isTurnStartEvent matches turn.start', () => { + expect( + isTurnStartEvent({ + type: 'turn.start', + turnNumber: 1, + timestamp: 0, + }), + ).toBe(true); + }); + + it('isTurnEndEvent matches turn.end', () => { + expect( + isTurnEndEvent({ + type: 'turn.end', + turnNumber: 1, + timestamp: 0, + }), + ).toBe(true); + }); + + it('isTurnStartEvent rejects turn.end', () => { + expect( + isTurnStartEvent({ + type: 'turn.end', + turnNumber: 1, + timestamp: 0, + }), + ).toBe(false); + }); +}); diff --git a/tests/behavior/turn-context.test.ts b/tests/behavior/turn-context.test.ts new file mode 100644 index 0000000..0896777 --- /dev/null +++ b/tests/behavior/turn-context.test.ts @@ -0,0 +1,68 @@ +import { describe, expect, it } from 'vitest'; +import { buildTurnContext, normalizeInputToArray } from '../../src/lib/turn-context.js'; +import { makeRequest, TEST_MODEL } from '../test-constants.js'; + +describe('turn context - buildTurnContext', () => { + it('sets numberOfTurns from options', () => { + const ctx = buildTurnContext({ + numberOfTurns: 3, + }); + expect(ctx.numberOfTurns).toBe(3); + }); + + it('includes toolCall when provided', () => { + const toolCall = { + type: 'function_call' as const, + callId: 'c1', + name: 'test', + arguments: '{}', + id: 'c1', + status: 'completed' as const, + }; + const ctx = buildTurnContext({ + numberOfTurns: 1, + toolCall, + }); + expect(ctx.toolCall).toBe(toolCall); + }); + + it('includes turnRequest when provided', () => { + const request = makeRequest({ + model: TEST_MODEL, + input: 'hello', + }); + const ctx = buildTurnContext({ + numberOfTurns: 1, + turnRequest: request, + }); + expect(ctx.turnRequest).toBe(request); + }); + + it('omits toolCall and turnRequest when not provided', () => { + const ctx = buildTurnContext({ + numberOfTurns: 0, + }); + expect(ctx).not.toHaveProperty('toolCall'); + expect(ctx).not.toHaveProperty('turnRequest'); + }); +}); + +describe('turn context - normalizeInputToArray', () => { + it('converts string input to array with user message', () => { + const result = normalizeInputToArray('Hello!'); + expect(result).toHaveLength(1); + expect(result[0]).toHaveProperty('role', 'user'); + expect(result[0]).toHaveProperty('content', 'Hello!'); + }); + + it('returns array input as-is', () => { + const input = [ + { + role: 'user' as const, + content: 'hi', + }, + ]; + const result = normalizeInputToArray(input); + expect(result).toBe(input); + }); +}); diff --git a/tests/boundaries/README.md b/tests/boundaries/README.md new file mode 100644 index 0000000..02e8fb4 --- /dev/null +++ b/tests/boundaries/README.md @@ -0,0 +1,26 @@ +# Boundaries Tests + +Tests in this folder verify that **similar capabilities are correctly bounded from each other**. "This one handles X; that one handles Y; they don't bleed." Each guard, classifier, or extractor must accept its own domain and reject its peers. + +## What belongs here + +- Mutual exclusion between type guards (stream event guards, output item guards, content part guards) +- Cross-domain rejection (stream guards reject output items and vice versa) +- Tool type classifier mutual exclusion (regular vs generator vs manual) +- Null/undefined/malformed input safety for all guards +- Structural distinction between similar output shapes +- Extendable: when new guards or classifiers are added, their mutual exclusion tests go here + +## Examples + +- `isOutputTextDeltaEvent` returns true for text delta, false for reasoning delta +- `isOutputMessage` rejects a `TextDeltaEvent` (wrong domain) +- Regular tool satisfies `isRegularExecuteTool` but NOT `isGeneratorTool` +- `isToolPreliminaryResultEvent` rejects `{ type: 'tool.result' }` +- `createUnsentResult` shape vs `createRejectedResult` shape + +## What does NOT belong here + +- Testing what a guard does with valid input in detail (→ `behavior/`) +- Testing that guard output feeds a transformer (→ `composition/` or `dispatch/`) +- End-to-end type-guard-driven pipelines (→ `pipelines/`) diff --git a/tests/boundaries/content-annotation-guards.test.ts b/tests/boundaries/content-annotation-guards.test.ts new file mode 100644 index 0000000..56de338 --- /dev/null +++ b/tests/boundaries/content-annotation-guards.test.ts @@ -0,0 +1,91 @@ +import { describe, expect, it } from 'vitest'; + +import { + hasTypeProperty, + isFileCitationAnnotation, + isFilePathAnnotation, + isOutputTextPart, + isRefusalPart, + isURLCitationAnnotation, +} from '../../src/lib/stream-type-guards.js'; + +describe('Content part and annotation guards - boundary between similar types', () => { + it('isOutputTextPart: true for output_text, false for refusal', () => { + expect( + isOutputTextPart({ + type: 'output_text', + }), + ).toBe(true); + expect( + isOutputTextPart({ + type: 'refusal', + }), + ).toBe(false); + }); + + it('isRefusalPart: true for refusal, false for output_text', () => { + expect( + isRefusalPart({ + type: 'refusal', + }), + ).toBe(true); + expect( + isRefusalPart({ + type: 'output_text', + }), + ).toBe(false); + }); + + it('isFileCitationAnnotation: true for file_citation, false for url_citation', () => { + expect( + isFileCitationAnnotation({ + type: 'file_citation', + }), + ).toBe(true); + expect( + isFileCitationAnnotation({ + type: 'url_citation', + }), + ).toBe(false); + }); + + it('isURLCitationAnnotation: true for url_citation, false for file_citation', () => { + expect( + isURLCitationAnnotation({ + type: 'url_citation', + }), + ).toBe(true); + expect( + isURLCitationAnnotation({ + type: 'file_citation', + }), + ).toBe(false); + }); + + it('isFilePathAnnotation: true for file_path, false for file_citation', () => { + expect( + isFilePathAnnotation({ + type: 'file_path', + }), + ).toBe(true); + expect( + isFilePathAnnotation({ + type: 'file_citation', + }), + ).toBe(false); + }); + + it('hasTypeProperty: { type: "x" } -> true; { type: 123 } -> false; null -> false', () => { + expect( + hasTypeProperty({ + type: 'x', + }), + ).toBe(true); + expect( + hasTypeProperty({ + type: 123, + }), + ).toBe(false); + expect(hasTypeProperty(null)).toBe(false); + }); +}); diff --git a/tests/boundaries/domain-separation.test.ts b/tests/boundaries/domain-separation.test.ts new file mode 100644 index 0000000..ad3ba49 --- /dev/null +++ b/tests/boundaries/domain-separation.test.ts @@ -0,0 +1,45 @@ +import { describe, expect, it } from 'vitest'; + +import { + isFunctionCallArgumentsDeltaEvent, + isFunctionCallItem, + isOutputMessage, + isOutputTextDeltaEvent, +} from '../../src/lib/stream-type-guards.js'; + +describe('Stream guards vs output item guards - domain separation', () => { + it('isOutputTextDeltaEvent rejects an OutputMessage (item, not stream event)', () => { + const item = { + type: 'message', + role: 'assistant', + content: [], + }; + expect(isOutputTextDeltaEvent(item as unknown as StreamEvents)).toBe(false); + }); + + it('isOutputMessage rejects a TextDeltaEvent (stream event, not item)', () => { + const event = { + type: 'response.output_text.delta', + delta: 'hello', + }; + expect(isOutputMessage(event)).toBe(false); + }); + + it('isFunctionCallArgumentsDeltaEvent rejects a FunctionCallItem (completed item, not delta)', () => { + const item = { + type: 'function_call', + callId: 'c1', + name: 'test', + arguments: '{}', + }; + expect(isFunctionCallArgumentsDeltaEvent(item as unknown as StreamEvents)).toBe(false); + }); + + it('isFunctionCallItem rejects a FunctionCallArgsDeltaEvent (delta, not item)', () => { + const event = { + type: 'response.function_call_arguments.delta', + delta: '{"a":', + }; + expect(isFunctionCallItem(event)).toBe(false); + }); +}); diff --git a/tests/boundaries/execute-tool-boundary.test.ts b/tests/boundaries/execute-tool-boundary.test.ts new file mode 100644 index 0000000..c7df6a8 --- /dev/null +++ b/tests/boundaries/execute-tool-boundary.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; + +import { tool } from '../../src/index.js'; +import { executeGeneratorTool, executeRegularTool } from '../../src/lib/tool-executor.js'; + +describe('executeRegularTool vs executeGeneratorTool - structural boundary', () => { + const regularTool = tool({ + name: 'regular', + inputSchema: z.object({ + x: z.number(), + }), + execute: async (args) => args.x * 2, + }); + + const generatorTool = tool({ + name: 'generator', + inputSchema: z.object({ + x: z.number(), + }), + eventSchema: z.object({ + progress: z.number(), + }), + outputSchema: z.object({ + result: z.number(), + }), + execute: async function* (args) { + yield { + progress: 50, + }; + return { + result: args.x * 2, + }; + }, + }); + + const toolCall = { + id: 'tc_1', + name: 'test', + arguments: { + x: 5, + }, + }; + const turnCtx = { + numberOfTurns: 1, + }; + + it('executeRegularTool throws when given a generator tool', async () => { + await expect(executeRegularTool(generatorTool, toolCall, turnCtx)).rejects.toThrow(); + }); + + it('executeGeneratorTool throws when given a regular tool', async () => { + await expect(executeGeneratorTool(regularTool, toolCall, turnCtx)).rejects.toThrow(); + }); + + it('executeRegularTool result has NO preliminaryResults', async () => { + const result = await executeRegularTool(regularTool, toolCall, turnCtx); + expect(result).not.toHaveProperty('preliminaryResults'); + }); + + it('executeGeneratorTool result HAS preliminaryResults array', async () => { + const result = await executeGeneratorTool(generatorTool, toolCall, turnCtx); + expect(result).toHaveProperty('preliminaryResults'); + expect(Array.isArray(result.preliminaryResults)).toBe(true); + }); +}); diff --git a/tests/boundaries/output-item-guards.test.ts b/tests/boundaries/output-item-guards.test.ts new file mode 100644 index 0000000..df5d29b --- /dev/null +++ b/tests/boundaries/output-item-guards.test.ts @@ -0,0 +1,70 @@ +import { describe, expect, it } from 'vitest'; + +import { + isFileSearchCallOutputItem, + isFunctionCallItem, + isImageGenerationCallOutputItem, + isOutputMessage, + isReasoningOutputItem, + isWebSearchCallOutputItem, +} from '../../src/lib/stream-type-guards.js'; + +const guards = [ + { + name: 'isOutputMessage', + fn: isOutputMessage, + type: 'message', + }, + { + name: 'isFunctionCallItem', + fn: isFunctionCallItem, + type: 'function_call', + }, + { + name: 'isReasoningOutputItem', + fn: isReasoningOutputItem, + type: 'reasoning', + }, + { + name: 'isWebSearchCallOutputItem', + fn: isWebSearchCallOutputItem, + type: 'web_search_call', + }, + { + name: 'isFileSearchCallOutputItem', + fn: isFileSearchCallOutputItem, + type: 'file_search_call', + }, + { + name: 'isImageGenerationCallOutputItem', + fn: isImageGenerationCallOutputItem, + type: 'image_generation_call', + }, +] as const; + +describe('Output item type guards - mutual exclusion', () => { + for (const guard of guards) { + describe(guard.name, () => { + it(`returns true for its own item type: ${guard.type}`, () => { + const item = { + type: guard.type, + }; + expect(guard.fn(item)).toBe(true); + }); + + it('returns false for at least one other output item type', () => { + const other = guards.find((g) => g.type !== guard.type)!; + const item = { + type: other.type, + }; + expect(guard.fn(item)).toBe(false); + }); + + it('returns false for null, undefined, and primitive', () => { + expect(guard.fn(null)).toBe(false); + expect(guard.fn(undefined)).toBe(false); + expect(guard.fn(42)).toBe(false); + }); + }); + } +}); diff --git a/tests/boundaries/response-stream-event-guards.test.ts b/tests/boundaries/response-stream-event-guards.test.ts new file mode 100644 index 0000000..27e8fdc --- /dev/null +++ b/tests/boundaries/response-stream-event-guards.test.ts @@ -0,0 +1,59 @@ +import { describe, expect, it } from 'vitest'; + +import { + isToolCallOutputEvent, + isToolPreliminaryResultEvent, + isToolResultEvent, + isTurnEndEvent, + isTurnStartEvent, +} from '../../src/lib/tool-types.js'; + +describe('ResponseStreamEvent guards - mutual exclusion', () => { + it('isToolPreliminaryResultEvent rejects { type: "tool.result" }', () => { + const event = { + type: 'tool.result', + toolCallId: 'c1', + result: 42, + timestamp: 1, + }; + expect(isToolPreliminaryResultEvent(event)).toBe(false); + }); + + it('isToolResultEvent rejects { type: "tool.preliminary_result" }', () => { + const event = { + type: 'tool.preliminary_result', + toolCallId: 'c1', + result: 42, + timestamp: 1, + }; + expect(isToolResultEvent(event)).toBe(false); + }); + + it('isTurnStartEvent rejects { type: "turn.end" }', () => { + const event = { + type: 'turn.end', + turnNumber: 1, + timestamp: 1, + }; + expect(isTurnStartEvent(event)).toBe(false); + }); + + it('isTurnEndEvent rejects { type: "turn.start" }', () => { + const event = { + type: 'turn.start', + turnNumber: 1, + timestamp: 1, + }; + expect(isTurnEndEvent(event)).toBe(false); + }); + + it('isToolCallOutputEvent rejects { type: "tool.result" }', () => { + const event = { + type: 'tool.result', + toolCallId: 'c1', + result: 42, + timestamp: 1, + }; + expect(isToolCallOutputEvent(event)).toBe(false); + }); +}); diff --git a/tests/boundaries/stream-event-guards.test.ts b/tests/boundaries/stream-event-guards.test.ts new file mode 100644 index 0000000..5fef197 --- /dev/null +++ b/tests/boundaries/stream-event-guards.test.ts @@ -0,0 +1,97 @@ +import { describe, expect, it } from 'vitest'; + +import { + isFunctionCallArgumentsDeltaEvent, + isFunctionCallArgumentsDoneEvent, + isOutputItemAddedEvent, + isOutputItemDoneEvent, + isOutputTextDeltaEvent, + isReasoningDeltaEvent, + isResponseCompletedEvent, + isResponseFailedEvent, + isResponseIncompleteEvent, +} from '../../src/lib/stream-type-guards.js'; +import { makeRequest } from '../test-constants.js'; + +const guards = [ + { + name: 'isOutputTextDeltaEvent', + fn: isOutputTextDeltaEvent, + type: 'response.output_text.delta', + }, + { + name: 'isReasoningDeltaEvent', + fn: isReasoningDeltaEvent, + type: 'response.reasoning_text.delta', + }, + { + name: 'isFunctionCallArgumentsDeltaEvent', + fn: isFunctionCallArgumentsDeltaEvent, + type: 'response.function_call_arguments.delta', + }, + { + name: 'isOutputItemAddedEvent', + fn: isOutputItemAddedEvent, + type: 'response.output_item.added', + }, + { + name: 'isOutputItemDoneEvent', + fn: isOutputItemDoneEvent, + type: 'response.output_item.done', + }, + { + name: 'isResponseCompletedEvent', + fn: isResponseCompletedEvent, + type: 'response.completed', + }, + { + name: 'isResponseFailedEvent', + fn: isResponseFailedEvent, + type: 'response.failed', + }, + { + name: 'isResponseIncompleteEvent', + fn: isResponseIncompleteEvent, + type: 'response.incomplete', + }, + { + name: 'isFunctionCallArgumentsDoneEvent', + fn: isFunctionCallArgumentsDoneEvent, + type: 'response.function_call_arguments.done', + }, +] as const; + +describe('Stream event type guards - mutual exclusion', () => { + for (const guard of guards) { + describe(guard.name, () => { + it(`returns true for its own event type: ${guard.type}`, () => { + const event = { + type: guard.type, + }; + expect(guard.fn(event)).toBe(true); + }); + + it('returns false for at least one other stream event type', () => { + const other = guards.find((g) => g.type !== guard.type)!; + const event = { + type: other.type, + }; + expect(guard.fn(event)).toBe(false); + }); + + it('returns false for objects missing type or with wrong type', () => { + expect(guard.fn(makeRequest({}))).toBe(false); + expect( + guard.fn({ + type: 'unrelated.event', + } as unknown as StreamEvents), + ).toBe(false); + expect( + guard.fn({ + type: '', + } as unknown as StreamEvents), + ).toBe(false); + }); + }); + } +}); diff --git a/tests/boundaries/tool-type-guards.test.ts b/tests/boundaries/tool-type-guards.test.ts new file mode 100644 index 0000000..83dff73 --- /dev/null +++ b/tests/boundaries/tool-type-guards.test.ts @@ -0,0 +1,86 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; + +import { tool } from '../../src/index.js'; +import { + hasExecuteFunction, + isGeneratorTool, + isManualTool, + isRegularExecuteTool, +} from '../../src/lib/tool-types.js'; + +describe('Tool type guards - mutual exclusion across 4 classifiers', () => { + const regularTool = tool({ + name: 'regular', + description: 'A regular tool', + inputSchema: z.object({ + x: z.number(), + }), + execute: async (args) => args.x * 2, + }); + + const generatorTool = tool({ + name: 'generator', + description: 'A generator tool', + inputSchema: z.object({ + x: z.number(), + }), + eventSchema: z.object({ + progress: z.number(), + }), + outputSchema: z.object({ + result: z.number(), + }), + execute: async function* (args) { + yield { + progress: 50, + }; + return { + result: args.x * 2, + }; + }, + }); + + const manualTool = tool({ + name: 'manual', + description: 'A manual tool', + inputSchema: z.object({ + x: z.number(), + }), + execute: false, + }); + + it('regular tool: hasExecuteFunction=T, isRegularExecuteTool=T, isGeneratorTool=F, isManualTool=F', () => { + expect(hasExecuteFunction(regularTool)).toBe(true); + expect(isRegularExecuteTool(regularTool)).toBe(true); + expect(isGeneratorTool(regularTool)).toBe(false); + expect(isManualTool(regularTool)).toBe(false); + }); + + it('generator tool: hasExecuteFunction=T, isRegularExecuteTool=F, isGeneratorTool=T, isManualTool=F', () => { + expect(hasExecuteFunction(generatorTool)).toBe(true); + expect(isRegularExecuteTool(generatorTool)).toBe(false); + expect(isGeneratorTool(generatorTool)).toBe(true); + expect(isManualTool(generatorTool)).toBe(false); + }); + + it('manual tool: hasExecuteFunction=F, isRegularExecuteTool=F, isGeneratorTool=F, isManualTool=T', () => { + expect(hasExecuteFunction(manualTool)).toBe(false); + expect(isRegularExecuteTool(manualTool)).toBe(false); + expect(isGeneratorTool(manualTool)).toBe(false); + expect(isManualTool(manualTool)).toBe(true); + }); + + it('no tool satisfies both isRegularExecuteTool and isGeneratorTool', () => { + const allTools = [ + regularTool, + generatorTool, + manualTool, + ]; + for (const t of allTools) { + const isRegular = isRegularExecuteTool(t); + const isGenerator = isGeneratorTool(t); + expect(isRegular && isGenerator).toBe(false); + } + }); +}); diff --git a/tests/composition/README.md b/tests/composition/README.md new file mode 100644 index 0000000..fe8db71 --- /dev/null +++ b/tests/composition/README.md @@ -0,0 +1,25 @@ +# Composition Tests + +Tests in this folder verify that **capabilities compose** — the output of one module is accepted as input by the next. No detailed correctness of individual outputs; just: do they connect? + +## What belongs here + +- Module A's output shape is accepted by Module B's input +- Data flows through a two-module chain without errors +- Multi-consumer scenarios where the same source feeds multiple consumers +- Cross-module contract verification (e.g., orchestrator utilities consume real executor results) +- Extendable: when new modules are introduced, their connection tests with existing modules go here + +## Examples + +- `tool()` output is accepted by `isRegularExecuteTool` / `convertToolsToAPIFormat` +- Two `ReusableReadableStream` consumers both receive all items +- `executeNextTurnParamsFunctions` output is accepted by `applyNextTurnParamsToRequest` +- `createUnsentResult` output is accepted by `unsentResultsToAPIFormat` +- `partitionToolCalls` internally uses `toolRequiresApproval` + +## What does NOT belong here + +- Verifying the detailed correctness of each module's output (→ `behavior/`) +- Verifying that similar modules don't accept each other's input (→ `boundaries/`) +- Full multi-layer pipelines with per-layer assertions (→ `pipelines/`) diff --git a/tests/composition/context-flow.test.ts b/tests/composition/context-flow.test.ts new file mode 100644 index 0000000..caba3ce --- /dev/null +++ b/tests/composition/context-flow.test.ts @@ -0,0 +1,46 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; +import { + buildToolExecuteContext, + resolveContext, + ToolContextStore, +} from '../../src/lib/tool-context.js'; +import { buildTurnContext } from '../../src/lib/turn-context.js'; + +describe('Context flow: turn context -> tool execute context -> tool function', () => { + it('buildToolExecuteContext receives TurnContext from buildTurnContext -> tool execute receives correct numberOfTurns', () => { + const turnCtx = buildTurnContext({ + numberOfTurns: 3, + }); + const store = new ToolContextStore(); + + const execCtx = buildToolExecuteContext(turnCtx, store, 'test', undefined); + expect(execCtx.numberOfTurns).toBe(3); + }); + + it('resolveContext passes TurnContext to context function -> result populates ToolContextStore -> buildToolExecuteContext.local reads from store', async () => { + const turnCtx = buildTurnContext({ + numberOfTurns: 2, + }); + const contextFn = (ctx: { numberOfTurns: number }) => ({ + apiKey: `key-for-turn-${ctx.numberOfTurns}`, + }); + + const resolved = await resolveContext(contextFn, turnCtx); + expect(resolved).toEqual({ + apiKey: 'key-for-turn-2', + }); + + const store = new ToolContextStore({ + test: resolved, + }); + const contextSchema = z.object({ + apiKey: z.string(), + }); + + const execCtx = buildToolExecuteContext(turnCtx, store, 'test', contextSchema); + expect(execCtx.local).toEqual({ + apiKey: 'key-for-turn-2', + }); + }); +}); diff --git a/tests/composition/state-machine.test.ts b/tests/composition/state-machine.test.ts new file mode 100644 index 0000000..c1e3d0f --- /dev/null +++ b/tests/composition/state-machine.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; + +import { tool } from '../../src/index.js'; +import { + createRejectedResult, + createUnsentResult, + partitionToolCalls, + unsentResultsToAPIFormat, +} from '../../src/lib/conversation-state.js'; + +describe('State machine: state -> approval -> resumption', () => { + it('partitionToolCalls uses toolRequiresApproval internally -> partitioned results are consistent', async () => { + const approvalTool = tool({ + name: 'dangerous', + inputSchema: z.object({ + target: z.string(), + }), + requireApproval: true, + execute: async () => 'deleted', + }); + + const safeTool = tool({ + name: 'safe', + inputSchema: z.object({ + q: z.string(), + }), + execute: async () => 'result', + }); + + const toolCalls = [ + { + id: 'tc_1', + name: 'dangerous', + arguments: { + target: 'file.txt', + }, + }, + { + id: 'tc_2', + name: 'safe', + arguments: { + q: 'hello', + }, + }, + ]; + + const tools = [ + approvalTool, + safeTool, + ]; + const partition = await partitionToolCalls(toolCalls, tools); + + expect(partition.requiresApproval).toHaveLength(1); + expect(partition.autoExecute).toHaveLength(1); + expect(partition.requiresApproval[0]!.name).toBe('dangerous'); + expect(partition.autoExecute[0]!.name).toBe('safe'); + }); + + it('createUnsentResult / createRejectedResult output accepted by unsentResultsToAPIFormat', () => { + const unsent = createUnsentResult('tc_1', 'search', { + data: 'found', + }); + const rejected = createRejectedResult('tc_2', 'delete'); + + const formatted = unsentResultsToAPIFormat([ + unsent, + rejected, + ]); + expect(formatted).toHaveLength(2); + expect(formatted[0]!.callId).toBe('tc_1'); + expect(formatted[0]!.type).toBe('function_call_output'); + expect(formatted[1]!.callId).toBe('tc_2'); + expect(formatted[1]!.type).toBe('function_call_output'); + }); +}); diff --git a/tests/composition/tool-lifecycle.test.ts b/tests/composition/tool-lifecycle.test.ts new file mode 100644 index 0000000..28d1f95 --- /dev/null +++ b/tests/composition/tool-lifecycle.test.ts @@ -0,0 +1,98 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; + +import { tool } from '../../src/index.js'; +import { + convertToolsToAPIFormat, + executeTool, + findToolByName, +} from '../../src/lib/tool-executor.js'; +import { isGeneratorTool, isManualTool, isRegularExecuteTool } from '../../src/lib/tool-types.js'; + +describe('Tool lifecycle: definition -> classification -> execution', () => { + const regularTool = tool({ + name: 'add', + description: 'Add numbers', + inputSchema: z.object({ + a: z.number(), + b: z.number(), + }), + execute: async (args) => args.a + args.b, + }); + + const generatorTool = tool({ + name: 'stream_add', + description: 'Stream add', + inputSchema: z.object({ + a: z.number(), + b: z.number(), + }), + eventSchema: z.object({ + progress: z.number(), + }), + outputSchema: z.object({ + sum: z.number(), + }), + execute: async function* (args) { + yield { + progress: 50, + }; + return { + sum: args.a + args.b, + }; + }, + }); + + const manualTool = tool({ + name: 'manual_op', + description: 'Manual tool', + inputSchema: z.object({ + x: z.string(), + }), + execute: false, + }); + + it('tool() output is accepted by isRegularExecuteTool / isGeneratorTool / isManualTool', () => { + expect(isRegularExecuteTool(regularTool)).toBe(true); + expect(isGeneratorTool(generatorTool)).toBe(true); + expect(isManualTool(manualTool)).toBe(true); + }); + + it('tool() output is accepted by convertToolsToAPIFormat', () => { + const apiTools = convertToolsToAPIFormat([ + regularTool, + generatorTool, + manualTool, + ]); + expect(apiTools).toHaveLength(3); + expect(apiTools[0]!.name).toBe('add'); + expect(apiTools[0]!.type).toBe('function'); + expect(apiTools[1]!.name).toBe('stream_add'); + expect(apiTools[2]!.name).toBe('manual_op'); + }); + + it('extractToolCallsFromResponse output shape is accepted by findToolByName + executeTool', async () => { + const tools = [ + regularTool, + generatorTool, + manualTool, + ]; + const toolCallShape = { + id: 'tc_1', + name: 'add', + arguments: { + a: 1, + b: 2, + }, + }; + + const found = findToolByName(tools, toolCallShape.name); + expect(found).toBeDefined(); + + const result = await executeTool(found!, toolCallShape, { + numberOfTurns: 1, + }); + expect(result.toolCallId).toBe('tc_1'); + expect(result.result).toBe(3); + }); +}); diff --git a/tests/contracts/README.md b/tests/contracts/README.md new file mode 100644 index 0000000..f8b468b --- /dev/null +++ b/tests/contracts/README.md @@ -0,0 +1,26 @@ +# Contracts Tests + +Tests in this folder verify that a capability **works as specified AND its boundary with similar capabilities is correct**. Both the "what it does" and the "what it does NOT do" are asserted in the same test. + +## What belongs here + +- Stop conditions that check their own criterion AND ignore all others +- Delta extractors that yield their event type AND skip peer event types +- Message stream builders that produce their format AND differ structurally from peers +- Executor functions that handle their tool type AND reject the other type +- Async param resolution where static, function, and client-only fields are each handled distinctly +- Extendable: when a capability gains new peers or alternatives, their combined behavior-and-boundary tests go here + +## Examples + +- `stepCountIs(3)` returns true at 3 steps AND ignores tool names, tokens, cost, finishReason +- `extractTextDeltas` yields text deltas AND skips reasoning + tool deltas in the same stream +- `executeRegularTool` succeeds with regular tools AND throws when given a generator tool +- `resolveAsyncFunctions` passes static values through, resolves functions, AND strips client-only fields +- `fromClaudeMessages` maps text blocks to messages, tool_use to function calls, each distinctly + +## What does NOT belong here + +- Pure specification without boundary checking (→ `behavior/`) +- Pure boundary checking without verifying output correctness (→ `boundaries/`) +- Cross-module composition (→ `integration/` or `pipelines/`) diff --git a/tests/contracts/async-params.test.ts b/tests/contracts/async-params.test.ts new file mode 100644 index 0000000..5468739 --- /dev/null +++ b/tests/contracts/async-params.test.ts @@ -0,0 +1,106 @@ +import { describe, expect, it } from 'vitest'; + +import { resolveAsyncFunctions } from '../../src/lib/async-params.js'; +import { makeCallModelInput, makeTurnContext, TEST_MODEL } from '../test-constants.js'; + +describe('resolveAsyncFunctions - three field types handled distinctly', () => { + const turnCtx = makeTurnContext({ + numberOfTurns: 2, + }); + + it('static values (model, temperature as literals) -> passed through unchanged', async () => { + const result = await resolveAsyncFunctions( + makeCallModelInput({ + model: TEST_MODEL, + temperature: 0.7, + }), + turnCtx, + ); + expect(result.model).toBe(TEST_MODEL); + expect(result.temperature).toBe(0.7); + }); + + it('function values -> resolved by calling with context, result stored', async () => { + const result = await resolveAsyncFunctions( + makeCallModelInput({ + temperature: (ctx: { numberOfTurns: number }) => ctx.numberOfTurns * 0.1, + }), + turnCtx, + ); + expect(result.temperature).toBe(0.2); + }); + + it('client-only fields (stopWhen, state, requireApproval, context, onTurnStart, onTurnEnd) -> stripped entirely', async () => { + const result = await resolveAsyncFunctions( + makeCallModelInput({ + model: TEST_MODEL, + stopWhen: () => true, + state: { + get: () => null, + }, + requireApproval: () => false, + context: { + shared: {}, + }, + onTurnStart: () => {}, + onTurnEnd: () => {}, + }), + turnCtx, + ); + expect(result).not.toHaveProperty('stopWhen'); + expect(result).not.toHaveProperty('state'); + expect(result).not.toHaveProperty('requireApproval'); + expect(result).not.toHaveProperty('context'); + expect(result).not.toHaveProperty('onTurnStart'); + expect(result).not.toHaveProperty('onTurnEnd'); + expect(result.model).toBe(TEST_MODEL); + }); + + it('tools field -> preserved (exception to client-only stripping)', async () => { + const tools = [ + { + type: 'function', + function: { + name: 'test', + }, + }, + ]; + const result = await resolveAsyncFunctions( + makeCallModelInput({ + model: TEST_MODEL, + tools, + }), + turnCtx, + ); + expect(result).toHaveProperty('tools'); + }); + + it('function error -> wraps with field name context', async () => { + await expect( + resolveAsyncFunctions( + makeCallModelInput({ + temperature: () => { + throw new Error('boom'); + }, + }), + turnCtx, + ), + ).rejects.toThrow('Failed to resolve async function for field "temperature"'); + }); + + it('mix of static + function + client-only in one call -> all handled correctly', async () => { + const result = await resolveAsyncFunctions( + makeCallModelInput({ + model: TEST_MODEL, + temperature: (ctx: { numberOfTurns: number }) => ctx.numberOfTurns * 0.1, + stopWhen: () => true, + input: 'hello', + }), + turnCtx, + ); + expect(result.model).toBe(TEST_MODEL); + expect(result.temperature).toBe(0.2); + expect(result).not.toHaveProperty('stopWhen'); + expect(result.input).toBe('hello'); + }); +}); diff --git a/tests/contracts/conversation-state-results.test.ts b/tests/contracts/conversation-state-results.test.ts new file mode 100644 index 0000000..c7a4e11 --- /dev/null +++ b/tests/contracts/conversation-state-results.test.ts @@ -0,0 +1,51 @@ +import { describe, expect, it } from 'vitest'; + +import { + createRejectedResult, + createUnsentResult, + unsentResultsToAPIFormat, +} from '../../src/lib/conversation-state.js'; + +describe('Conversation state utilities - distinct result types', () => { + it('createUnsentResult output has output (value) but no error', () => { + const result = createUnsentResult('c1', 'search', { + data: 'found', + }); + expect(result.output).toEqual({ + data: 'found', + }); + expect(result).not.toHaveProperty('error'); + }); + + it('createRejectedResult output has output: null AND error string', () => { + const result = createRejectedResult('c1', 'delete'); + expect(result.output).toBeNull(); + expect(result.error).toBe('Tool call rejected by user'); + }); + + it('unsentResultsToAPIFormat: success result -> output is JSON.stringify(output)', () => { + const unsent = createUnsentResult('c1', 'search', { + data: 'found', + }); + const formatted = unsentResultsToAPIFormat([ + unsent, + ]); + expect(formatted[0]!.output).toBe( + JSON.stringify({ + data: 'found', + }), + ); + }); + + it('unsentResultsToAPIFormat: error result -> output is JSON.stringify({ error })', () => { + const rejected = createRejectedResult('c1', 'delete', 'Not allowed'); + const formatted = unsentResultsToAPIFormat([ + rejected, + ]); + expect(formatted[0]!.output).toBe( + JSON.stringify({ + error: 'Not allowed', + }), + ); + }); +}); diff --git a/tests/contracts/delta-extractors.test.ts b/tests/contracts/delta-extractors.test.ts new file mode 100644 index 0000000..fb36514 --- /dev/null +++ b/tests/contracts/delta-extractors.test.ts @@ -0,0 +1,111 @@ +import { describe, expect, it } from 'vitest'; + +import { ReusableReadableStream } from '../../src/lib/reusable-stream.js'; +import { + extractReasoningDeltas, + extractTextDeltas, + extractToolDeltas, +} from '../../src/lib/stream-transformers.js'; + +function makeStream(events: StreamEvents[]): ReusableReadableStream { + const source = new ReadableStream({ + start(controller) { + for (const event of events) { + controller.enqueue(event); + } + controller.close(); + }, + }); + return new ReusableReadableStream(source); +} + +async function collect(iter: AsyncIterable): Promise { + const result: string[] = []; + for await (const item of iter) { + result.push(item); + } + return result; +} + +describe('Delta extractors - each yields ONLY its event type', () => { + const mixedEvents = [ + { + type: 'response.output_text.delta', + delta: 'hello', + }, + { + type: 'response.reasoning_text.delta', + delta: 'thinking', + }, + { + type: 'response.function_call_arguments.delta', + delta: '{"q":', + }, + { + type: 'response.output_text.delta', + delta: ' world', + }, + { + type: 'response.reasoning_text.delta', + delta: ' more', + }, + { + type: 'response.function_call_arguments.delta', + delta: '"test"}', + }, + ]; + + it('extractTextDeltas yields strings from output_text.delta events; reasoning + tool deltas ignored', async () => { + const stream = makeStream(mixedEvents); + const result = await collect(extractTextDeltas(stream)); + expect(result).toEqual([ + 'hello', + ' world', + ]); + }); + + it('extractReasoningDeltas yields strings from reasoning_text.delta events; ignores text + tool', async () => { + const stream = makeStream(mixedEvents); + const result = await collect(extractReasoningDeltas(stream)); + expect(result).toEqual([ + 'thinking', + ' more', + ]); + }); + + it('extractToolDeltas yields strings from function_call_arguments.delta events; ignores text + reasoning', async () => { + const stream = makeStream(mixedEvents); + const result = await collect(extractToolDeltas(stream)); + expect(result).toEqual([ + '{"q":', + '"test"}', + ]); + }); + + it('extractTextDeltas skips events with empty/undefined delta', async () => { + const events = [ + { + type: 'response.output_text.delta', + delta: 'hello', + }, + { + type: 'response.output_text.delta', + delta: '', + }, + { + type: 'response.output_text.delta', + delta: undefined, + }, + { + type: 'response.output_text.delta', + delta: ' world', + }, + ]; + const stream = makeStream(events); + const result = await collect(extractTextDeltas(stream)); + expect(result).toEqual([ + 'hello', + ' world', + ]); + }); +}); diff --git a/tests/contracts/from-claude-messages.test.ts b/tests/contracts/from-claude-messages.test.ts new file mode 100644 index 0000000..56d5c29 --- /dev/null +++ b/tests/contracts/from-claude-messages.test.ts @@ -0,0 +1,91 @@ +import type * as models from '@openrouter/sdk/models'; +import { describe, expect, it } from 'vitest'; + +import { fromClaudeMessages } from '../../src/lib/anthropic-compat.js'; + +describe('fromClaudeMessages - each block type maps distinctly', () => { + it('text blocks -> EasyInputMessage (not function_call_output, not function_call)', () => { + const result = fromClaudeMessages([ + { + role: 'user', + content: [ + { + type: 'text' as const, + text: 'Hello', + }, + ], + }, + ]); + const items = result as models.OutputItems[]; + expect(items).toHaveLength(1); + expect(items[0]).toHaveProperty('role'); + expect(items[0]).toHaveProperty('content', 'Hello'); + expect(items[0]).not.toHaveProperty('type'); + }); + + it('tool_use blocks -> FunctionCallItem (not EasyInputMessage, not function_call_output)', () => { + const result = fromClaudeMessages([ + { + role: 'assistant', + content: [ + { + type: 'tool_use' as const, + id: 'tu_1', + name: 'search', + input: { + q: 'test', + }, + }, + ], + }, + ]); + const items = result as models.OutputItems[]; + const toolItem = items.find((i) => i.type === 'function_call'); + expect(toolItem).toBeDefined(); + expect(toolItem.name).toBe('search'); + expect(toolItem.callId).toBe('tu_1'); + }); + + it('tool_result blocks -> FunctionCallOutputItem (not EasyInputMessage, not function_call)', () => { + const result = fromClaudeMessages([ + { + role: 'user', + content: [ + { + type: 'tool_result' as const, + tool_use_id: 'tu_1', + content: 'Search result', + }, + ], + }, + ]); + const items = result as models.OutputItems[]; + const outputItem = items.find((i) => i.type === 'function_call_output'); + expect(outputItem).toBeDefined(); + expect(outputItem.callId).toBe('tu_1'); + expect(outputItem.output).toBe('Search result'); + }); + + it('image blocks -> structured content EasyInputMessage (not input_image alone)', () => { + const result = fromClaudeMessages([ + { + role: 'user', + content: [ + { + type: 'image' as const, + source: { + type: 'url' as const, + url: 'https://example.com/img.png', + }, + }, + ], + }, + ]); + const items = result as models.OutputItems[]; + expect(items).toHaveLength(1); + expect(items[0]).toHaveProperty('role'); + expect(items[0]).toHaveProperty('content'); + expect(Array.isArray(items[0].content)).toBe(true); + expect(items[0].content[0].type).toBe('input_image'); + }); +}); diff --git a/tests/contracts/items-stream.test.ts b/tests/contracts/items-stream.test.ts new file mode 100644 index 0000000..bd0d6de --- /dev/null +++ b/tests/contracts/items-stream.test.ts @@ -0,0 +1,286 @@ +import { describe, expect, it } from 'vitest'; + +import { ReusableReadableStream } from '../../src/lib/reusable-stream.js'; +import { buildItemsStream } from '../../src/lib/stream-transformers.js'; + +function makeStream( + events: Record[], +): ReusableReadableStream> { + const source = new ReadableStream({ + start(controller) { + for (const event of events) { + controller.enqueue(event); + } + controller.close(); + }, + }); + return new ReusableReadableStream(source); +} + +async function collectAll(iter: AsyncIterable): Promise { + const result: T[] = []; + for await (const item of iter) { + result.push(item); + } + return result; +} + +describe('buildItemsStream - yields distinct item types per event', () => { + it('message items: accumulated text from text deltas', async () => { + const events = [ + { + type: 'response.output_item.added', + item: { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'in_progress', + content: [], + }, + }, + { + type: 'response.output_text.delta', + delta: 'Hello', + itemId: 'msg_1', + }, + { + type: 'response.output_text.delta', + delta: ' world', + itemId: 'msg_1', + }, + { + type: 'response.completed', + response: {}, + }, + ]; + const stream = makeStream(events); + const items = await collectAll(buildItemsStream(stream)); + const lastMsg = items.filter((i) => i.type === 'message').pop()!; + expect( + ( + lastMsg as { + content: Array<{ + text: string; + }>; + } + ).content[0].text, + ).toBe('Hello world'); + }); + + it('function_call items: accumulated arguments from function_call deltas', async () => { + const events = [ + { + type: 'response.output_item.added', + item: { + type: 'function_call', + id: 'fc_1', + callId: 'fc_1', + name: 'search', + arguments: '', + status: 'in_progress', + }, + }, + { + type: 'response.function_call_arguments.delta', + delta: '{"q":', + itemId: 'fc_1', + }, + { + type: 'response.function_call_arguments.delta', + delta: '"test"}', + itemId: 'fc_1', + }, + { + type: 'response.completed', + response: {}, + }, + ]; + const stream = makeStream(events); + const items = await collectAll(buildItemsStream(stream)); + const lastFn = items.filter((i) => i.type === 'function_call').pop()!; + expect( + ( + lastFn as { + arguments: string; + } + ).arguments, + ).toBe('{"q":"test"}'); + }); + + it('reasoning items: accumulated content from reasoning deltas', async () => { + const events = [ + { + type: 'response.output_item.added', + item: { + type: 'reasoning', + id: 'r_1', + status: 'in_progress', + summary: [], + }, + }, + { + type: 'response.reasoning_text.delta', + delta: 'thinking', + itemId: 'r_1', + }, + { + type: 'response.reasoning_text.delta', + delta: ' more', + itemId: 'r_1', + }, + { + type: 'response.completed', + response: {}, + }, + ]; + const stream = makeStream(events); + const items = await collectAll(buildItemsStream(stream)); + const lastReasoning = items.filter((i) => i.type === 'reasoning').pop()!; + expect( + ( + lastReasoning as { + summary: Array<{ + text: string; + }>; + } + ).summary[0].text, + ).toBe('thinking more'); + }); + + it('server tool items (web_search_call, file_search_call, image_generation_call): passthrough', async () => { + const webSearch = { + type: 'web_search_call', + id: 'ws_1', + status: 'completed', + }; + const fileSearch = { + type: 'file_search_call', + id: 'fs_1', + status: 'completed', + }; + const imageGen = { + type: 'image_generation_call', + id: 'ig_1', + status: 'completed', + }; + const events = [ + { + type: 'response.output_item.added', + item: webSearch, + }, + { + type: 'response.output_item.added', + item: fileSearch, + }, + { + type: 'response.output_item.added', + item: imageGen, + }, + { + type: 'response.completed', + response: {}, + }, + ]; + const stream = makeStream(events); + const items = await collectAll(buildItemsStream(stream)); + const types = items.map((i) => i.type); + expect(types).toContain('web_search_call'); + expect(types).toContain('file_search_call'); + expect(types).toContain('image_generation_call'); + }); + + it('final complete items from output_item.done events', async () => { + const events = [ + { + type: 'response.output_item.added', + item: { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'in_progress', + content: [], + }, + }, + { + type: 'response.output_text.delta', + delta: 'Hi', + itemId: 'msg_1', + }, + { + type: 'response.output_item.done', + item: { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'completed', + content: [ + { + type: 'output_text', + text: 'Hi', + annotations: [], + }, + ], + }, + }, + { + type: 'response.completed', + response: {}, + }, + ]; + const stream = makeStream(events); + const items = await collectAll(buildItemsStream(stream)); + const doneItem = items[items.length - 1]!; + expect( + ( + doneItem as { + status: string; + } + ).status, + ).toBe('completed'); + }); + + it('termination events (completed/failed/incomplete) -> stream stops', async () => { + const events = [ + { + type: 'response.output_item.added', + item: { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'in_progress', + content: [], + }, + }, + { + type: 'response.output_text.delta', + delta: 'Hi', + itemId: 'msg_1', + }, + { + type: 'response.completed', + response: {}, + }, + // These should never be reached + { + type: 'response.output_text.delta', + delta: 'SHOULD NOT APPEAR', + itemId: 'msg_1', + }, + ]; + const stream = makeStream(events); + const items = await collectAll(buildItemsStream(stream)); + const allText = items + .filter((i) => i.type === 'message') + .map( + (i) => + ( + i as { + content?: Array<{ + text?: string; + }>; + } + ).content?.[0]?.text ?? '', + ); + expect(allText.join('')).not.toContain('SHOULD NOT APPEAR'); + }); +}); diff --git a/tests/contracts/message-stream-builders.test.ts b/tests/contracts/message-stream-builders.test.ts new file mode 100644 index 0000000..9710bb8 --- /dev/null +++ b/tests/contracts/message-stream-builders.test.ts @@ -0,0 +1,118 @@ +import { describe, expect, it } from 'vitest'; + +import { ReusableReadableStream } from '../../src/lib/reusable-stream.js'; +import { + buildMessageStream, + buildResponsesMessageStream, +} from '../../src/lib/stream-transformers.js'; + +function makeStream(events: StreamEvents[]): ReusableReadableStream { + const source = new ReadableStream({ + start(controller) { + for (const event of events) { + controller.enqueue(event); + } + controller.close(); + }, + }); + return new ReusableReadableStream(source); +} + +async function collectAll(iter: AsyncIterable): Promise { + const result: T[] = []; + for await (const item of iter) { + result.push(item); + } + return result; +} + +const streamEvents = [ + { + type: 'response.output_item.added', + item: { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'in_progress', + content: [], + }, + }, + { + type: 'response.output_text.delta', + delta: 'Hello', + itemId: 'msg_1', + }, + { + type: 'response.output_text.delta', + delta: ' world', + itemId: 'msg_1', + }, + { + type: 'response.output_item.done', + item: { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'completed', + content: [ + { + type: 'output_text', + text: 'Hello world', + annotations: [], + }, + ], + }, + }, + { + type: 'response.completed', + response: {}, + }, +]; + +describe('Message stream builders - same input, structurally distinct outputs', () => { + it('buildResponsesMessageStream yields OutputMessage: { id, type: "message", role: "assistant", content: [...] }', async () => { + const stream = makeStream(streamEvents); + const results = await collectAll(buildResponsesMessageStream(stream)); + expect(results.length).toBeGreaterThan(0); + const last = results[results.length - 1]!; + expect(last).toHaveProperty('id'); + expect(last).toHaveProperty('type', 'message'); + expect(last).toHaveProperty('role', 'assistant'); + expect(last).toHaveProperty('content'); + expect(Array.isArray(last.content)).toBe(true); + }); + + it('buildMessageStream yields ChatAssistantMessage: { role: "assistant", content: string }', async () => { + const stream = makeStream(streamEvents); + const results = await collectAll(buildMessageStream(stream)); + expect(results.length).toBeGreaterThan(0); + const last = results[results.length - 1]!; + expect(last).toHaveProperty('role', 'assistant'); + expect(typeof last.content).toBe('string'); + expect(last).not.toHaveProperty('id'); + expect(last).not.toHaveProperty('type'); + }); + + it('same stream events -> both produce same text content but structurally different objects', async () => { + const stream1 = makeStream(streamEvents); + const stream2 = makeStream(streamEvents); + + const responsesResults = await collectAll(buildResponsesMessageStream(stream1)); + const chatResults = await collectAll(buildMessageStream(stream2)); + + const responsesLast = responsesResults[responsesResults.length - 1]!; + const chatLast = chatResults[chatResults.length - 1]!; + + // Same text content + const responsesText = responsesLast.content + .filter((c: { type: string; text?: string }) => c.type === 'output_text') + .map((c: { type: string; text?: string }) => c.text) + .join(''); + expect(responsesText).toBe('Hello world'); + expect(chatLast.content).toBe('Hello world'); + + // Structurally different + expect('id' in responsesLast).toBe(true); + expect('id' in chatLast).toBe(false); + }); +}); diff --git a/tests/contracts/response-extractors.test.ts b/tests/contracts/response-extractors.test.ts new file mode 100644 index 0000000..77a71f7 --- /dev/null +++ b/tests/contracts/response-extractors.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, it } from 'vitest'; + +import { + extractMessageFromResponse, + extractResponsesMessageFromResponse, +} from '../../src/lib/stream-transformers.js'; +import { TEST_MODEL } from '../test-constants.js'; + +function makeResponse(text: string) { + return { + id: 'r1', + output: [ + { + type: 'message' as const, + id: 'msg_1', + role: 'assistant' as const, + status: 'completed' as const, + content: [ + { + type: 'output_text' as const, + text, + annotations: [], + }, + ], + }, + ], + status: 'completed' as const, + outputText: text, + model: TEST_MODEL, + usage: { + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }, + }; +} + +describe('Response extractors - same response, distinct shapes', () => { + it('extractMessageFromResponse returns ChatAssistantMessage (role + content string)', () => { + const response = makeResponse('Hello world'); + const msg = extractMessageFromResponse(response); + expect(msg.role).toBe('assistant'); + expect(typeof msg.content).toBe('string'); + expect(msg).not.toHaveProperty('id'); + expect(msg).not.toHaveProperty('type'); + }); + + it('extractResponsesMessageFromResponse returns OutputMessage (id + type + content array)', () => { + const response = makeResponse('Hello world'); + const msg = extractResponsesMessageFromResponse(response); + expect(msg.id).toBe('msg_1'); + expect(msg.type).toBe('message'); + expect(Array.isArray(msg.content)).toBe(true); + }); + + it('same response -> both extract same text but structurally different objects', () => { + const response = makeResponse('Hello world'); + const chatMsg = extractMessageFromResponse(response); + const responsesMsg = extractResponsesMessageFromResponse(response); + + expect(chatMsg.content).toBe('Hello world'); + const responsesText = responsesMsg.content + .filter((c: { type: string; text?: string }) => c.type === 'output_text') + .map((c: { type: string; text?: string }) => c.text) + .join(''); + expect(responsesText).toBe('Hello world'); + + // Structurally different + expect('id' in chatMsg).toBe(false); + expect('id' in responsesMsg).toBe(true); + }); + + it('both throw when response has no message item', () => { + const response = { + id: 'r1', + output: [ + { + type: 'function_call' as const, + id: 'fc_1', + callId: 'fc_1', + name: 'search', + arguments: '{}', + status: 'completed' as const, + }, + ], + status: 'completed' as const, + outputText: '', + model: TEST_MODEL, + usage: { + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }, + }; + + expect(() => extractMessageFromResponse(response)).toThrow('No message found'); + expect(() => extractResponsesMessageFromResponse(response)).toThrow('No message found'); + }); +}); diff --git a/tests/contracts/tool-call-response-consistency.test.ts b/tests/contracts/tool-call-response-consistency.test.ts new file mode 100644 index 0000000..ac40989 --- /dev/null +++ b/tests/contracts/tool-call-response-consistency.test.ts @@ -0,0 +1,72 @@ +import { describe, expect, it } from 'vitest'; + +import { + extractToolCallsFromResponse, + responseHasToolCalls, +} from '../../src/lib/stream-transformers.js'; +import { TEST_MODEL } from '../test-constants.js'; + +describe('responseHasToolCalls and extractToolCallsFromResponse produce consistent results', () => { + it('responseHasToolCalls returning true <-> extractToolCallsFromResponse returning non-empty', () => { + const responseWithTools = { + id: 'r1', + output: [ + { + type: 'function_call' as const, + id: 'fc1', + callId: 'fc1', + name: 'search', + arguments: '{"q":"test"}', + status: 'completed' as const, + }, + ], + status: 'completed' as const, + outputText: '', + model: TEST_MODEL, + usage: { + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }, + }; + + const hasTools = responseHasToolCalls(responseWithTools); + const extracted = extractToolCallsFromResponse(responseWithTools); + + expect(hasTools).toBe(true); + expect(extracted.length).toBeGreaterThan(0); + + const responseNoTools = { + id: 'r2', + output: [ + { + type: 'message' as const, + id: 'm1', + role: 'assistant' as const, + status: 'completed' as const, + content: [ + { + type: 'output_text' as const, + text: 'Hello', + annotations: [], + }, + ], + }, + ], + status: 'completed' as const, + outputText: 'Hello', + model: TEST_MODEL, + usage: { + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }, + }; + + const hasTools2 = responseHasToolCalls(responseNoTools); + const extracted2 = extractToolCallsFromResponse(responseNoTools); + + expect(hasTools2).toBe(false); + expect(extracted2).toEqual([]); + }); +}); diff --git a/tests/contracts/tool-factory-shapes.test.ts b/tests/contracts/tool-factory-shapes.test.ts new file mode 100644 index 0000000..cb70cc3 --- /dev/null +++ b/tests/contracts/tool-factory-shapes.test.ts @@ -0,0 +1,92 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; + +import { tool } from '../../src/index.js'; + +const inputSchema = z.object({ + query: z.string(), +}); + +describe('tool() factory - three tool types produce distinct structures', () => { + it('regular tool has execute function, no eventSchema', () => { + const t = tool({ + name: 'regular', + inputSchema, + execute: async () => 'done', + }); + expect(t.function).toHaveProperty('execute'); + expect(t.function).not.toHaveProperty('eventSchema'); + }); + + it('generator tool has execute function AND eventSchema AND outputSchema', () => { + const t = tool({ + name: 'generator', + inputSchema, + eventSchema: z.object({ + status: z.string(), + }), + outputSchema: z.object({ + result: z.string(), + }), + execute: async function* () { + yield { + status: 'working', + }; + return { + result: 'done', + }; + }, + }); + expect(t.function).toHaveProperty('execute'); + expect(t.function).toHaveProperty('eventSchema'); + expect(t.function).toHaveProperty('outputSchema'); + }); + + it('manual tool has NO execute, no eventSchema, no outputSchema', () => { + const t = tool({ + name: 'manual', + inputSchema, + execute: false, + }); + expect(t.function).not.toHaveProperty('execute'); + expect(t.function).not.toHaveProperty('eventSchema'); + expect(t.function).not.toHaveProperty('outputSchema'); + }); + + it('same input schema -> three different tool types depending on config', () => { + const regular = tool({ + name: 'r', + inputSchema, + execute: async () => 'ok', + }); + const generator = tool({ + name: 'g', + inputSchema, + eventSchema: z.object({ + s: z.string(), + }), + outputSchema: z.object({ + r: z.string(), + }), + execute: async function* () { + return { + r: 'ok', + }; + }, + }); + const manual = tool({ + name: 'm', + inputSchema, + execute: false, + }); + + expect('execute' in regular.function).toBe(true); + expect('eventSchema' in regular.function).toBe(false); + + expect('execute' in generator.function).toBe(true); + expect('eventSchema' in generator.function).toBe(true); + + expect('execute' in manual.function).toBe(false); + expect('eventSchema' in manual.function).toBe(false); + }); +}); diff --git a/tests/dispatch/README.md b/tests/dispatch/README.md new file mode 100644 index 0000000..dd6a3ba --- /dev/null +++ b/tests/dispatch/README.md @@ -0,0 +1,25 @@ +# Dispatch Tests + +Tests in this folder verify that **boundaries between capabilities hold at their composition points**. The correct path is chosen AND the modules connect. This is where routing logic meets module integration. + +## What belongs here + +- `executeTool` dispatching to the correct executor based on type guards +- `convertToClaudeMessage` routing items via output item guards to distinct Claude blocks +- `buildItemsStream` routing events via stream type guards to correct handlers +- `fromClaudeMessages` routing mixed block types to distinct output types +- Approval partitioning choosing the correct path based on tool-level vs call-level checks +- Extendable: when new routing or dispatching logic is added, its boundary-at-junction tests go here + +## Examples + +- `executeTool` dispatches regular tool to `executeRegularTool` because `isRegularExecuteTool` returns true +- Same response with message + function_call: `isOutputMessage` -> text block, `isFunctionCallItem` -> tool_use block +- `partitionToolCalls` with call-level check overrides tool-level `requireApproval` +- `buildItemsStream` routes `output_item.added` to handler because `isOutputItemAddedEvent` matches + +## What does NOT belong here + +- Pure boundary testing without composition (→ `boundaries/`) +- Pure composition without boundary verification (→ `composition/`) +- Full end-to-end pipelines (→ `pipelines/`) diff --git a/tests/dispatch/approval-partition-dispatch.test.ts b/tests/dispatch/approval-partition-dispatch.test.ts new file mode 100644 index 0000000..3bdce4b --- /dev/null +++ b/tests/dispatch/approval-partition-dispatch.test.ts @@ -0,0 +1,79 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; + +import { tool } from '../../src/index.js'; +import { partitionToolCalls } from '../../src/lib/conversation-state.js'; + +describe('Approval partitioning dispatches via tool-level vs call-level checks', () => { + const approvalTool = tool({ + name: 'dangerous', + inputSchema: z.object({ + target: z.string(), + }), + requireApproval: true, + execute: async () => 'deleted', + }); + + const safeTool = tool({ + name: 'safe', + inputSchema: z.object({ + q: z.string(), + }), + execute: async () => 'result', + }); + + const toolCalls = [ + { + id: 'tc_1', + name: 'dangerous', + arguments: { + target: 'file.txt', + }, + }, + { + id: 'tc_2', + name: 'safe', + arguments: { + q: 'hello', + }, + }, + ]; + + it('partitionToolCalls with call-level check -> call-level overrides tool-level requireApproval', async () => { + // Call-level check says: no approval needed for anything + const callLevelCheck = async () => false; + const context = { + numberOfTurns: 1, + }; + const partition = await partitionToolCalls( + toolCalls, + [ + approvalTool, + safeTool, + ], + context, + callLevelCheck, + ); + // Call-level override: both should be auto-execute + expect(partition.autoExecute).toHaveLength(2); + expect(partition.requiresApproval).toHaveLength(0); + }); + + it('partitionToolCalls without call-level check -> falls back to each tool requireApproval', async () => { + const context = { + numberOfTurns: 1, + }; + const partition = await partitionToolCalls( + toolCalls, + [ + approvalTool, + safeTool, + ], + context, + ); + expect(partition.requiresApproval).toHaveLength(1); + expect(partition.requiresApproval[0]!.name).toBe('dangerous'); + expect(partition.autoExecute).toHaveLength(1); + expect(partition.autoExecute[0]!.name).toBe('safe'); + }); +}); diff --git a/tests/dispatch/claude-conversion-deep-dispatch.test.ts b/tests/dispatch/claude-conversion-deep-dispatch.test.ts new file mode 100644 index 0000000..58ce914 --- /dev/null +++ b/tests/dispatch/claude-conversion-deep-dispatch.test.ts @@ -0,0 +1,67 @@ +import { describe, expect, it } from 'vitest'; + +import { convertToClaudeMessage } from '../../src/lib/stream-transformers.js'; +import { TEST_MODEL } from '../test-constants.js'; + +describe('convertToClaudeMessage routes multi-item response via output item guards', () => { + it('multi-item response: message + function_call + reasoning + web_search -> each guard routes to distinct block', () => { + const response = { + id: 'r1', + output: [ + { + type: 'message' as const, + id: 'msg_1', + role: 'assistant' as const, + status: 'completed' as const, + content: [ + { + type: 'output_text' as const, + text: 'Hello', + annotations: [], + }, + ], + }, + { + type: 'function_call' as const, + id: 'fc_1', + callId: 'fc_1', + name: 'search', + arguments: '{"q":"test"}', + status: 'completed' as const, + }, + { + type: 'reasoning' as const, + id: 'r_1', + status: 'completed' as const, + summary: [ + { + type: 'summary_text' as const, + text: 'thinking', + }, + ], + }, + { + type: 'web_search_call' as const, + id: 'ws_1', + status: 'completed' as const, + }, + ], + status: 'completed' as const, + outputText: 'Hello', + model: TEST_MODEL, + usage: { + totalTokens: 200, + inputTokens: 100, + outputTokens: 100, + }, + }; + + const claude = convertToClaudeMessage(response); + const types = claude.content.map((b: { type: string }) => b.type); + + expect(types).toContain('text'); + expect(types).toContain('tool_use'); + expect(types).toContain('thinking'); + expect(types).toContain('server_tool_use'); + }); +}); diff --git a/tests/dispatch/claude-conversion-dispatch.test.ts b/tests/dispatch/claude-conversion-dispatch.test.ts new file mode 100644 index 0000000..f30f073 --- /dev/null +++ b/tests/dispatch/claude-conversion-dispatch.test.ts @@ -0,0 +1,107 @@ +import { describe, expect, it } from 'vitest'; + +import { convertToClaudeMessage } from '../../src/lib/stream-transformers.js'; +import { TEST_MODEL } from '../test-constants.js'; + +describe('convertToClaudeMessage routes items via output item guards', () => { + it('same response with message + function_call: isOutputMessage -> text block, isFunctionCallItem -> tool_use block', () => { + const response = { + id: 'r1', + output: [ + { + type: 'message' as const, + id: 'msg_1', + role: 'assistant' as const, + status: 'completed' as const, + content: [ + { + type: 'output_text' as const, + text: 'Hello', + annotations: [], + }, + ], + }, + { + type: 'function_call' as const, + id: 'fc_1', + callId: 'fc_1', + name: 'search', + arguments: '{"q":"test"}', + status: 'completed' as const, + }, + ], + status: 'completed' as const, + outputText: 'Hello', + model: TEST_MODEL, + usage: { + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }, + }; + + const claude = convertToClaudeMessage(response); + const textBlock = claude.content.find((b: { type: string }) => b.type === 'text'); + const toolBlock = claude.content.find((b: { type: string }) => b.type === 'tool_use'); + + expect(textBlock).toBeDefined(); + expect( + ( + textBlock as { + type: string; + text: string; + } + ).text, + ).toBe('Hello'); + expect(toolBlock).toBeDefined(); + expect( + ( + toolBlock as { + type: string; + name: string; + } + ).name, + ).toBe('search'); + }); + + it('same response with reasoning + web_search_call: isReasoningOutputItem -> thinking, isWebSearchCallOutputItem -> server_tool_use', () => { + const response = { + id: 'r1', + output: [ + { + type: 'reasoning' as const, + id: 'r_1', + status: 'completed' as const, + summary: [ + { + type: 'summary_text' as const, + text: 'thinking about it', + }, + ], + }, + { + type: 'web_search_call' as const, + id: 'ws_1', + status: 'completed' as const, + }, + ], + status: 'completed' as const, + outputText: '', + model: TEST_MODEL, + usage: { + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }, + }; + + const claude = convertToClaudeMessage(response); + const thinkingBlock = claude.content.find((b: { type: string }) => b.type === 'thinking'); + const serverToolBlock = claude.content.find( + (b: { type: string }) => b.type === 'server_tool_use', + ); + + expect(thinkingBlock).toBeDefined(); + expect(serverToolBlock).toBeDefined(); + }); +}); diff --git a/tests/dispatch/from-claude-dispatch.test.ts b/tests/dispatch/from-claude-dispatch.test.ts new file mode 100644 index 0000000..8002880 --- /dev/null +++ b/tests/dispatch/from-claude-dispatch.test.ts @@ -0,0 +1,59 @@ +import type * as models from '@openrouter/sdk/models'; +import { describe, expect, it } from 'vitest'; + +import { fromClaudeMessages } from '../../src/lib/anthropic-compat.js'; + +describe('fromClaudeMessages routes blocks to distinct output types', () => { + it('mixed Claude message with text + tool_use + tool_result blocks -> each block produces its correct OR type, interleaved correctly', () => { + const result = fromClaudeMessages([ + { + role: 'assistant', + content: [ + { + type: 'text' as const, + text: 'Let me search for that.', + }, + { + type: 'tool_use' as const, + id: 'tu_1', + name: 'search', + input: { + q: 'test', + }, + }, + ], + }, + { + role: 'user', + content: [ + { + type: 'tool_result' as const, + tool_use_id: 'tu_1', + content: 'Found results', + }, + { + type: 'text' as const, + text: 'Thanks for the results', + }, + ], + }, + ]); + + const items = result as models.OutputItems[]; + // Should have: text message, function_call, function_call_output, text message + const types = items.map((i) => i.type || 'easy_input_message'); + + expect(types).toContain('function_call'); + expect(types).toContain('function_call_output'); + + // Check that the function_call has correct properties + const fnCall = items.find((i) => i.type === 'function_call'); + expect(fnCall.name).toBe('search'); + expect(fnCall.callId).toBe('tu_1'); + + // Check that the function_call_output has correct properties + const fnOutput = items.find((i) => i.type === 'function_call_output'); + expect(fnOutput.callId).toBe('tu_1'); + expect(fnOutput.output).toBe('Found results'); + }); +}); diff --git a/tests/dispatch/items-stream-dispatch.test.ts b/tests/dispatch/items-stream-dispatch.test.ts new file mode 100644 index 0000000..d2480be --- /dev/null +++ b/tests/dispatch/items-stream-dispatch.test.ts @@ -0,0 +1,76 @@ +import { describe, expect, it } from 'vitest'; + +import { ReusableReadableStream } from '../../src/lib/reusable-stream.js'; +import { buildItemsStream } from '../../src/lib/stream-transformers.js'; + +function makeStream(events: StreamEvents[]): ReusableReadableStream { + const source = new ReadableStream({ + start(controller) { + for (const event of events) { + controller.enqueue(event); + } + controller.close(); + }, + }); + return new ReusableReadableStream(source); +} + +async function collectAll(iter: AsyncIterable): Promise { + const result: T[] = []; + for await (const item of iter) { + result.push(item); + } + return result; +} + +describe('buildItemsStream routes events via stream type guards', () => { + it('routes output_item.added to handler because isOutputItemAddedEvent matches (not other guards)', async () => { + const events = [ + { + type: 'response.output_item.added', + item: { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'in_progress', + content: [], + }, + }, + { + type: 'response.completed', + response: {}, + }, + ]; + const stream = makeStream(events); + const items = await collectAll(buildItemsStream(stream)); + expect(items.length).toBeGreaterThan(0); + expect(items[0].type).toBe('message'); + }); + + it('skips unknown event types that do not match any guard', async () => { + const events = [ + { + type: 'response.some_unknown_event', + data: 'ignored', + }, + { + type: 'response.output_item.added', + item: { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'in_progress', + content: [], + }, + }, + { + type: 'response.completed', + response: {}, + }, + ]; + const stream = makeStream(events); + const items = await collectAll(buildItemsStream(stream)); + // Only the message item should be yielded, unknown events are silently skipped + expect(items.every((i) => i.type === 'message')).toBe(true); + }); +}); diff --git a/tests/integration/README.md b/tests/integration/README.md new file mode 100644 index 0000000..62eb964 --- /dev/null +++ b/tests/integration/README.md @@ -0,0 +1,26 @@ +# Integration Tests + +Tests in this folder verify that a capability **works as specified AND composes correctly with another module**. Both the output correctness and the cross-module data flow are asserted. + +## What belongs here + +- A function produces the correct output AND that output successfully feeds the next module +- Stop conditions evaluated against real `StepResult` shapes from the orchestrator +- Context store data flowing through `buildToolExecuteContext` into a tool's `execute` function +- Stream consumers at different speeds both getting complete, correct data +- Next-turn param computation feeding into request modification with verified results +- Extendable: when new cross-module flows are introduced, their correctness-through-composition tests go here + +## Examples + +- `consumeStreamForCompletion` returns the response because `isResponseCompletedEvent` identified it +- `stepCountIs` works with `StepResult[]` containing real `usage` and `toolCalls` data +- Tool's `execute` receives context where `local` reflects store data set before execution +- Two `ReusableReadableStream` consumers at different read speeds both get all items correctly +- `executeNextTurnParamsFunctions` computes temperature -> `applyNextTurnParamsToRequest` produces correct request + +## What does NOT belong here + +- Pure specification without cross-module flow (→ `behavior/`) +- Pure composition without output verification (→ `composition/`) +- Boundary checks at composition points (→ `dispatch/`) diff --git a/tests/integration/claude-unsupported-content.test.ts b/tests/integration/claude-unsupported-content.test.ts new file mode 100644 index 0000000..e343dc3 --- /dev/null +++ b/tests/integration/claude-unsupported-content.test.ts @@ -0,0 +1,52 @@ +import { describe, expect, it } from 'vitest'; + +import { + convertToClaudeMessage, + getUnsupportedContentSummary, + hasUnsupportedContent, +} from '../../src/lib/stream-transformers.js'; +import { TEST_MODEL } from '../test-constants.js'; + +describe('convertToClaudeMessage -> unsupported content utilities', () => { + it('unsupported content round-trip: refusal + image_generation -> convertToClaudeMessage -> unsupported_content utilities work', () => { + const response = { + id: 'r1', + output: [ + { + type: 'message' as const, + id: 'msg_1', + role: 'assistant' as const, + status: 'completed' as const, + content: [ + { + type: 'refusal' as const, + refusal: 'I cannot do that', + }, + ], + }, + { + type: 'image_generation_call' as const, + id: 'ig_1', + result: 'base64data', + status: 'completed' as const, + }, + ], + status: 'completed' as const, + outputText: '', + model: TEST_MODEL, + usage: { + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }, + }; + + const claude = convertToClaudeMessage(response); + // unsupported_content is a property on the message, not content blocks + expect(hasUnsupportedContent(claude)).toBe(true); + const summary = getUnsupportedContentSummary(claude); + expect(summary).toBeDefined(); + // refusal and image_generation_call should both appear as unsupported + expect(Object.keys(summary).length).toBeGreaterThan(0); + }); +}); diff --git a/tests/integration/format-round-trip.test.ts b/tests/integration/format-round-trip.test.ts new file mode 100644 index 0000000..0a46779 --- /dev/null +++ b/tests/integration/format-round-trip.test.ts @@ -0,0 +1,171 @@ +import type * as models from '@openrouter/sdk/models'; +import { describe, expect, it } from 'vitest'; + +import { fromClaudeMessages, toClaudeMessage } from '../../src/lib/anthropic-compat.js'; +import { fromChatMessages, toChatMessage } from '../../src/lib/chat-compat.js'; +import { TEST_MODEL } from '../test-constants.js'; + +describe('Bidirectional format conversion', () => { + it('Claude round-trip: Claude messages -> fromClaudeMessages -> OR format -> each block type maps distinctly', () => { + const claudeMessages = [ + { + role: 'user' as const, + content: [ + { + type: 'text' as const, + text: 'Search for cats', + }, + ], + }, + { + role: 'assistant' as const, + content: [ + { + type: 'text' as const, + text: 'Let me search.', + }, + { + type: 'tool_use' as const, + id: 'tu_1', + name: 'search', + input: { + q: 'cats', + }, + }, + ], + }, + { + role: 'user' as const, + content: [ + { + type: 'tool_result' as const, + tool_use_id: 'tu_1', + content: 'Found cats', + }, + ], + }, + ]; + + // Claude -> OR format + const orFormat = fromClaudeMessages(claudeMessages); + const items = orFormat as models.OutputItems[]; + + // Text blocks -> EasyInputMessage + const textItems = items.filter((i) => !i.type || i.role); + expect(textItems.length).toBeGreaterThan(0); + + // tool_use -> FunctionCallItem + const fnCalls = items.filter((i) => i.type === 'function_call'); + expect(fnCalls).toHaveLength(1); + expect(fnCalls[0].name).toBe('search'); + + // tool_result -> FunctionCallOutputItem + const fnOutputs = items.filter((i) => i.type === 'function_call_output'); + expect(fnOutputs).toHaveLength(1); + expect(fnOutputs[0].callId).toBe('tu_1'); + + // Verify OR format -> Claude format works on a response + const mockResponse = { + id: 'r1', + output: [ + { + type: 'message' as const, + id: 'msg_1', + role: 'assistant' as const, + status: 'completed' as const, + content: [ + { + type: 'output_text' as const, + text: 'Here are cats', + annotations: [], + }, + ], + }, + ], + status: 'completed' as const, + outputText: 'Here are cats', + model: TEST_MODEL, + usage: { + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }, + }; + const claudeResponse = toClaudeMessage(mockResponse); + expect(claudeResponse.role).toBe('assistant'); + expect(Array.isArray(claudeResponse.content)).toBe(true); + }); + + it('Chat round-trip: Chat messages -> fromChatMessages -> OR format -> each role maps distinctly', () => { + const chatMessages = [ + { + role: 'system' as const, + content: 'You are helpful', + }, + { + role: 'user' as const, + content: 'Hello', + }, + { + role: 'assistant' as const, + content: 'Hi there', + }, + { + role: 'tool' as const, + toolCallId: 'tc_1', + content: 'Tool result', + }, + ]; + + // Chat -> OR format + const orFormat = fromChatMessages(chatMessages); + const items = orFormat as models.OutputItems[]; + + // System message + const systemItems = items.filter((i) => i.role === 'system'); + expect(systemItems).toHaveLength(1); + + // User message + const userItems = items.filter((i) => i.role === 'user'); + expect(userItems).toHaveLength(1); + + // Assistant message + const assistantItems = items.filter((i) => i.role === 'assistant'); + expect(assistantItems).toHaveLength(1); + + // Tool message -> FunctionCallOutputItem + const toolOutputs = items.filter((i) => i.type === 'function_call_output'); + expect(toolOutputs).toHaveLength(1); + + // Verify OR format -> Chat format works on a response + const mockResponse = { + id: 'r1', + output: [ + { + type: 'message' as const, + id: 'msg_1', + role: 'assistant' as const, + status: 'completed' as const, + content: [ + { + type: 'output_text' as const, + text: 'Response', + annotations: [], + }, + ], + }, + ], + status: 'completed' as const, + outputText: 'Response', + model: TEST_MODEL, + usage: { + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }, + }; + const chatResponse = toChatMessage(mockResponse); + expect(chatResponse.role).toBe('assistant'); + expect(typeof chatResponse.content).toBe('string'); + }); +}); diff --git a/tests/integration/next-turn-params-flow.test.ts b/tests/integration/next-turn-params-flow.test.ts new file mode 100644 index 0000000..95f1b5d --- /dev/null +++ b/tests/integration/next-turn-params-flow.test.ts @@ -0,0 +1,63 @@ +import { describe, expect, it } from 'vitest'; + +import { + applyNextTurnParamsToRequest, + buildNextTurnParamsContext, + executeNextTurnParamsFunctions, +} from '../../src/lib/next-turn-params.js'; +import { TEST_MODEL } from '../test-constants.js'; + +describe('Next-turn params: tools -> computation -> request modification', () => { + it('executeNextTurnParamsFunctions output accepted by applyNextTurnParamsToRequest -> modified request', async () => { + const toolsWithNextTurnParams = [ + { + type: 'function', + function: { + name: 'search', + nextTurnParams: { + temperature: () => 0.5, + }, + }, + }, + ]; + + const toolCalls = [ + { + id: 'tc_1', + name: 'search', + arguments: { + q: 'test', + }, + }, + ]; + const request = { + model: TEST_MODEL, + temperature: 0.7, + }; + + const params = await executeNextTurnParamsFunctions( + toolCalls, + toolsWithNextTurnParams, + request, + ); + + expect(params).toHaveProperty('temperature', 0.5); + + const modified = applyNextTurnParamsToRequest(request, params); + expect(modified.temperature).toBe(0.5); + expect(modified.model).toBe(TEST_MODEL); + }); + + it('buildNextTurnParamsContext extracts from request -> context passed to nextTurnParams functions', () => { + const request = { + model: TEST_MODEL, + temperature: 0.7, + input: 'hello', + }; + + const ctx = buildNextTurnParamsContext(request); + expect(ctx.model).toBe(TEST_MODEL); + expect(ctx.temperature).toBe(0.7); + expect(ctx.input).toBe('hello'); + }); +}); diff --git a/tests/integration/next-turn-params-request.test.ts b/tests/integration/next-turn-params-request.test.ts new file mode 100644 index 0000000..da44632 --- /dev/null +++ b/tests/integration/next-turn-params-request.test.ts @@ -0,0 +1,44 @@ +import { describe, expect, it } from 'vitest'; + +import { + applyNextTurnParamsToRequest, + executeNextTurnParamsFunctions, +} from '../../src/lib/next-turn-params.js'; +import { TEST_MODEL } from '../test-constants.js'; + +describe('Next-turn params -> request modification -> API readiness', () => { + it('executeNextTurnParamsFunctions computes new temperature -> applyNextTurnParamsToRequest produces request with updated temperature', async () => { + const tools = [ + { + type: 'function', + function: { + name: 'search', + nextTurnParams: { + temperature: () => 0.3, + }, + }, + }, + ]; + + const toolCalls = [ + { + id: 'tc_1', + name: 'search', + arguments: { + q: 'test', + }, + }, + ]; + const request = { + model: TEST_MODEL, + temperature: 0.7, + input: 'hello', + }; + const params = await executeNextTurnParamsFunctions(toolCalls, tools, request); + + const modified = applyNextTurnParamsToRequest(request, params); + expect(modified.temperature).toBe(0.3); + expect(modified.model).toBe(TEST_MODEL); + expect(modified.input).toBe('hello'); + }); +}); diff --git a/tests/integration/orchestrator-executor.test.ts b/tests/integration/orchestrator-executor.test.ts new file mode 100644 index 0000000..8afd8e2 --- /dev/null +++ b/tests/integration/orchestrator-executor.test.ts @@ -0,0 +1,66 @@ +import { describe, expect, it } from 'vitest'; +import { + hasToolExecutionErrors, + summarizeToolExecutions, + toolResultsToMap, +} from '../../src/lib/tool-orchestrator.js'; +import type { Tool, ToolExecutionResult } from '../../src/lib/tool-types.js'; + +describe('Orchestrator <- Executor: utility functions consume real ToolExecutionResult', () => { + const successResult: ToolExecutionResult = { + toolCallId: 'tc_1', + toolName: 'search', + result: { + data: 'found', + }, + }; + + const errorResult: ToolExecutionResult = { + toolCallId: 'tc_2', + toolName: 'delete', + result: null, + error: new Error('Permission denied'), + }; + + it('toolResultsToMap correctly maps real ToolExecutionResult objects', () => { + const map = toolResultsToMap([ + successResult, + errorResult, + ]); + expect(map.get('tc_1')).toEqual({ + result: { + data: 'found', + }, + preliminaryResults: undefined, + }); + expect(map.get('tc_2')).toEqual({ + result: null, + preliminaryResults: undefined, + }); + }); + + it('hasToolExecutionErrors detects error field on real ToolExecutionResult', () => { + expect( + hasToolExecutionErrors([ + successResult, + ]), + ).toBe(false); + expect( + hasToolExecutionErrors([ + successResult, + errorResult, + ]), + ).toBe(true); + }); + + it('summarizeToolExecutions formats real success + error results', () => { + const summary = summarizeToolExecutions([ + successResult, + errorResult, + ]); + expect(summary).toContain('search'); + expect(summary).toContain('SUCCESS'); + expect(summary).toContain('delete'); + expect(summary).toContain('Permission denied'); + }); +}); diff --git a/tests/integration/reusable-stream-consumers.test.ts b/tests/integration/reusable-stream-consumers.test.ts new file mode 100644 index 0000000..82ae5ec --- /dev/null +++ b/tests/integration/reusable-stream-consumers.test.ts @@ -0,0 +1,81 @@ +import { describe, expect, it } from 'vitest'; + +import { ReusableReadableStream } from '../../src/lib/reusable-stream.js'; +import { buildItemsStream, consumeStreamForCompletion } from '../../src/lib/stream-transformers.js'; + +function makeStream(events: StreamEvents[]): ReusableReadableStream { + const source = new ReadableStream({ + start(controller) { + for (const event of events) { + controller.enqueue(event); + } + controller.close(); + }, + }); + return new ReusableReadableStream(source); +} + +async function collectAll(iter: AsyncIterable): Promise { + const result: T[] = []; + for await (const item of iter) { + result.push(item); + } + return result; +} + +describe('ReusableReadableStream -> concurrent transformer consumption', () => { + it('buildItemsStream and consumeStreamForCompletion both consume same stream correctly', async () => { + const response = { + id: 'r1', + status: 'completed', + output: [], + }; + const events = [ + { + type: 'response.output_item.added', + item: { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'in_progress', + content: [], + }, + }, + { + type: 'response.output_text.delta', + delta: 'Hello', + itemId: 'msg_1', + }, + { + type: 'response.output_item.done', + item: { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'completed', + content: [ + { + type: 'output_text', + text: 'Hello', + annotations: [], + }, + ], + }, + }, + { + type: 'response.completed', + response, + }, + ]; + + const stream = makeStream(events); + + const [items, completedResponse] = await Promise.all([ + collectAll(buildItemsStream(stream)), + consumeStreamForCompletion(stream), + ]); + + expect(items.length).toBeGreaterThan(0); + expect(completedResponse).toEqual(response); + }); +}); diff --git a/tests/integration/stream-completion-guards.test.ts b/tests/integration/stream-completion-guards.test.ts new file mode 100644 index 0000000..a291270 --- /dev/null +++ b/tests/integration/stream-completion-guards.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, it } from 'vitest'; + +import { ReusableReadableStream } from '../../src/lib/reusable-stream.js'; +import { consumeStreamForCompletion } from '../../src/lib/stream-transformers.js'; + +function makeStream(events: StreamEvents[]): ReusableReadableStream { + const source = new ReadableStream({ + start(controller) { + for (const event of events) { + controller.enqueue(event); + } + controller.close(); + }, + }); + return new ReusableReadableStream(source); +} + +describe('consumeStreamForCompletion + stream type guards', () => { + it('returns response object because isResponseCompletedEvent identified the completion event', async () => { + const response = { + id: 'r1', + status: 'completed', + output: [ + { + type: 'message', + id: 'm1', + role: 'assistant', + content: [], + }, + ], + }; + const stream = makeStream([ + { + type: 'response.output_text.delta', + delta: 'hello', + }, + { + type: 'response.completed', + response, + }, + ]); + const result = await consumeStreamForCompletion(stream); + expect(result.id).toBe('r1'); + expect(result.status).toBe('completed'); + }); + + it('throws on failed response because isResponseFailedEvent caught the failure', async () => { + const stream = makeStream([ + { + type: 'response.failed', + response: { + error: { + message: 'quota exceeded', + }, + }, + }, + ]); + await expect(consumeStreamForCompletion(stream)).rejects.toThrow('Response failed'); + }); +}); diff --git a/tests/integration/tool-context-execution.test.ts b/tests/integration/tool-context-execution.test.ts new file mode 100644 index 0000000..cf44d4a --- /dev/null +++ b/tests/integration/tool-context-execution.test.ts @@ -0,0 +1,64 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; + +import { buildToolExecuteContext, ToolContextStore } from '../../src/lib/tool-context.js'; +import { buildTurnContext } from '../../src/lib/turn-context.js'; + +describe('ToolContextStore -> buildToolExecuteContext -> tool execution', () => { + it('tool execute receives context where local reflects store data set before execution', () => { + const store = new ToolContextStore({ + myTool: { + apiKey: 'key-123', + }, + }); + const turnCtx = buildTurnContext({ + numberOfTurns: 1, + }); + const _toolFn = { + name: 'myTool', + inputSchema: z.object({}), + contextSchema: z.object({ + apiKey: z.string(), + }), + }; + + const contextSchema = z.object({ + apiKey: z.string(), + }); + const execCtx = buildToolExecuteContext(turnCtx, store, 'myTool', contextSchema); + expect(execCtx.local).toEqual({ + apiKey: 'key-123', + }); + }); + + it('tool calls setContext -> store updated -> next tool reads updated value via local', () => { + const store = new ToolContextStore({ + toolA: { + count: 0, + }, + toolB: {}, + }); + const turnCtx = buildTurnContext({ + numberOfTurns: 1, + }); + const contextSchema = z.object({ + count: z.number(), + }); + + const execCtxA = buildToolExecuteContext(turnCtx, store, 'toolA', contextSchema); + expect(execCtxA.local).toEqual({ + count: 0, + }); + + // Simulate tool A updating context + store.mergeToolContext('toolA', { + count: 42, + }); + + // Tool A now reads updated value + const execCtxA2 = buildToolExecuteContext(turnCtx, store, 'toolA', contextSchema); + expect(execCtxA2.local).toEqual({ + count: 42, + }); + }); +}); diff --git a/tests/integration/turn-context-async-params.test.ts b/tests/integration/turn-context-async-params.test.ts new file mode 100644 index 0000000..bf707a7 --- /dev/null +++ b/tests/integration/turn-context-async-params.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, it } from 'vitest'; +import { resolveAsyncFunctions } from '../../src/lib/async-params.js'; +import { buildTurnContext } from '../../src/lib/turn-context.js'; +import { makeCallModelInput, TEST_MODEL, TEST_MODEL_ALT } from '../test-constants.js'; + +describe('buildTurnContext -> resolveAsyncFunctions', () => { + it('parameter function receives TurnContext with correct numberOfTurns', async () => { + const turnCtx = buildTurnContext({ + numberOfTurns: 5, + }); + const result = await resolveAsyncFunctions( + makeCallModelInput({ + model: TEST_MODEL, + temperature: (ctx: { numberOfTurns: number }) => ctx.numberOfTurns * 0.1, + }), + turnCtx, + ); + expect(result.temperature).toBe(0.5); + }); + + it('parameter function can read toolCall from context when provided', async () => { + const toolCall = { + id: 'tc_1', + name: 'search', + arguments: { + q: 'test', + }, + }; + const turnCtx = buildTurnContext({ + numberOfTurns: 1, + toolCall: toolCall, + }); + const result = await resolveAsyncFunctions( + makeCallModelInput({ + model: (ctx: { toolCall?: unknown }) => (ctx.toolCall ? TEST_MODEL_ALT : TEST_MODEL), + }), + turnCtx, + ); + expect(result.model).toBe(TEST_MODEL_ALT); + }); +}); diff --git a/tests/pipelines/README.md b/tests/pipelines/README.md new file mode 100644 index 0000000..fd0c51f --- /dev/null +++ b/tests/pipelines/README.md @@ -0,0 +1,28 @@ +# Pipelines Tests + +Tests in this folder are **end-to-end agent workflows** where multiple capabilities compose, and at each layer we verify: what it specifically produces, how it differs from peers, and how it feeds the next layer. + +These are the most comprehensive tests in the suite. Each test exercises a complete pipeline and makes assertions at every stage. + +## What belongs here + +- Full streaming pipeline: raw events -> guards -> transformers -> consumer +- Full tool execution pipeline: definition -> dispatch -> validate -> execute -> format +- Context pipeline: build -> resolve -> store -> execute +- Stop condition pipeline: results -> steps -> conditions -> decision +- Dual-format output: same response -> structurally distinct formats simultaneously +- Claude conversion deep pipeline: multi-item response -> per-item guard routing -> distinct blocks +- Bidirectional format conversion round-trips +- Extendable: when new end-to-end workflows are introduced, their full-pipeline tests with per-layer verification go here + +## Examples + +- Mixed stream events flow through type guards, extractors yield correct types, consumer receives via ReusableReadableStream +- `tool()` creates tool -> `executeTool` dispatches via guard -> validates input -> executes -> validates output -> formats for model +- Three concurrent stream consumers (`buildMessageStream`, `buildResponsesMessageStream`, `buildItemsStream`) all complete, each structurally different, no consumer blocks another + +## What does NOT belong here + +- Single-module behavior (-> `behavior/`) +- Two-module composition without per-layer verification (-> `composition/` or `integration/`) +- Tests that don't verify behavior, boundaries, and composition together (-> appropriate simpler category) diff --git a/tests/pipelines/approval-execution-state.test.ts b/tests/pipelines/approval-execution-state.test.ts new file mode 100644 index 0000000..b2844a7 --- /dev/null +++ b/tests/pipelines/approval-execution-state.test.ts @@ -0,0 +1,96 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; + +import { tool } from '../../src/index.js'; +import { + createInitialState, + createRejectedResult, + createUnsentResult, + partitionToolCalls, + unsentResultsToAPIFormat, + updateState, +} from '../../src/lib/conversation-state.js'; +import { executeTool } from '../../src/lib/tool-executor.js'; + +describe('Approval -> execution -> state update pipeline', () => { + it('approval workflow: partition -> execute auto -> create results -> format -> update state', async () => { + const autoTool = tool({ + name: 'search', + inputSchema: z.object({ + q: z.string(), + }), + execute: async (args) => ({ + results: [ + `found: ${args.q}`, + ], + }), + }); + + const approvalTool = tool({ + name: 'delete', + inputSchema: z.object({ + target: z.string(), + }), + requireApproval: true, + execute: async () => 'deleted', + }); + + const toolCalls = [ + { + id: 'tc_1', + name: 'search', + arguments: { + q: 'test', + }, + }, + { + id: 'tc_2', + name: 'delete', + arguments: { + target: 'file.txt', + }, + }, + ]; + + const tools = [ + autoTool, + approvalTool, + ]; + + // Step 1: Partition + const partition = await partitionToolCalls(toolCalls, tools); + expect(partition.autoExecute).toHaveLength(1); + expect(partition.requiresApproval).toHaveLength(1); + + // Step 2: Execute auto tool + const autoResult = await executeTool(autoTool, partition.autoExecute[0]!, { + numberOfTurns: 1, + }); + expect(autoResult.result).toEqual({ + results: [ + 'found: test', + ], + }); + + // Step 3: Create results + const unsent = createUnsentResult('tc_1', 'search', autoResult.result); + const rejected = createRejectedResult('tc_2', 'delete'); + + // Step 4: Format for API + const formatted = unsentResultsToAPIFormat([ + unsent, + rejected, + ]); + expect(formatted).toHaveLength(2); + expect(formatted[0]!.type).toBe('function_call_output'); + expect(formatted[1]!.type).toBe('function_call_output'); + + // Step 5: Update state + const state = createInitialState(); + const updated = updateState(state, { + status: 'completed', + }); + expect(updated.status).toBe('completed'); + expect(updated.id).toBe(state.id); + }); +}); diff --git a/tests/pipelines/context-pipeline.test.ts b/tests/pipelines/context-pipeline.test.ts new file mode 100644 index 0000000..c4f6580 --- /dev/null +++ b/tests/pipelines/context-pipeline.test.ts @@ -0,0 +1,91 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; +import { + buildToolExecuteContext, + resolveContext, + ToolContextStore, +} from '../../src/lib/tool-context.js'; +import { buildTurnContext } from '../../src/lib/turn-context.js'; + +describe('Context pipeline: build -> resolve -> store -> execute', () => { + it('turn 0 with context: buildTurnContext -> resolveContext -> ToolContextStore -> buildToolExecuteContext -> tool reads local', async () => { + // Build turn context + const turnCtx = buildTurnContext({ + numberOfTurns: 0, + }); + expect(turnCtx.numberOfTurns).toBe(0); + + // Resolve context via function + const contextFn = () => ({ + apiKey: 'secret-123', + }); + const resolved = await resolveContext(contextFn, turnCtx); + expect(resolved).toEqual({ + apiKey: 'secret-123', + }); + + // Populate store + const store = new ToolContextStore({ + myTool: resolved, + }); + + // Build tool execute context + const _toolFn = { + name: 'myTool', + inputSchema: z.object({}), + contextSchema: z.object({ + apiKey: z.string(), + }), + }; + const contextSchema = z.object({ + apiKey: z.string(), + }); + const execCtx = buildToolExecuteContext(turnCtx, store, 'myTool', contextSchema); + + // Tool reads from local + expect(execCtx.local).toEqual({ + apiKey: 'secret-123', + }); + expect(execCtx.numberOfTurns).toBe(0); + }); + + it('shared context mutation: tool A reads count=0 -> sets count=1 -> tool B reads count=1', () => { + const store = new ToolContextStore({ + shared: { + count: 0, + }, + }); + const turnCtx = buildTurnContext({ + numberOfTurns: 1, + }); + + const _sharedToolFn = { + name: 'shared', + inputSchema: z.object({}), + contextSchema: z.object({ + count: z.number(), + }), + }; + + const contextSchema = z.object({ + count: z.number(), + }); + + // Tool A reads shared.count === 0 + const ctxA = buildToolExecuteContext(turnCtx, store, 'shared', contextSchema); + expect(ctxA.local).toEqual({ + count: 0, + }); + + // Tool A updates shared context + store.setToolContext('shared', { + count: 1, + }); + + // Tool B reads shared.count === 1 + const ctxB = buildToolExecuteContext(turnCtx, store, 'shared', contextSchema); + expect(ctxB.local).toEqual({ + count: 1, + }); + }); +}); diff --git a/tests/pipelines/dual-format-output.test.ts b/tests/pipelines/dual-format-output.test.ts new file mode 100644 index 0000000..f74b8aa --- /dev/null +++ b/tests/pipelines/dual-format-output.test.ts @@ -0,0 +1,166 @@ +import { describe, expect, it } from 'vitest'; + +import { ReusableReadableStream } from '../../src/lib/reusable-stream.js'; +import { + buildItemsStream, + buildMessageStream, + buildResponsesMessageStream, + convertToClaudeMessage, + extractMessageFromResponse, + extractToolCallsFromResponse, +} from '../../src/lib/stream-transformers.js'; +import { TEST_MODEL } from '../test-constants.js'; + +function makeStream(events: StreamEvents[]): ReusableReadableStream { + const source = new ReadableStream({ + start(controller) { + for (const event of events) { + controller.enqueue(event); + } + controller.close(); + }, + }); + return new ReusableReadableStream(source); +} + +async function collectAll(iter: AsyncIterable): Promise { + const result: T[] = []; + for await (const item of iter) { + result.push(item); + } + return result; +} + +describe('Dual-format output: same response -> structurally distinct formats', () => { + it('from response: same response -> extractMessageFromResponse, convertToClaudeMessage, extractToolCallsFromResponse all work', () => { + const response = { + id: 'r1', + output: [ + { + type: 'message' as const, + id: 'msg_1', + role: 'assistant' as const, + status: 'completed' as const, + content: [ + { + type: 'output_text' as const, + text: 'Found results', + annotations: [], + }, + ], + }, + { + type: 'function_call' as const, + id: 'fc_1', + callId: 'fc_1', + name: 'search', + arguments: '{"q":"test"}', + status: 'completed' as const, + }, + ], + status: 'completed' as const, + outputText: 'Found results', + model: TEST_MODEL, + usage: { + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + }, + }; + + // Chat format + const chatMsg = extractMessageFromResponse(response); + expect(chatMsg.role).toBe('assistant'); + expect(typeof chatMsg.content).toBe('string'); + + // Claude format + const claudeMsg = convertToClaudeMessage(response); + expect(claudeMsg.role).toBe('assistant'); + expect(Array.isArray(claudeMsg.content)).toBe(true); + + // Tool calls + const toolCalls = extractToolCallsFromResponse(response); + expect(toolCalls).toHaveLength(1); + expect(toolCalls[0]!.name).toBe('search'); + + // All semantically equivalent, structurally different + expect(chatMsg.content).toBe('Found results'); + const claudeText = claudeMsg.content.find((b: { type: string }) => b.type === 'text'); + expect( + ( + claudeText as { + type: string; + text: string; + } + ).text, + ).toBe('Found results'); + }); + + it('through streaming: same ReusableReadableStream -> three concurrent consumers all complete', async () => { + const events = [ + { + type: 'response.output_item.added', + item: { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'in_progress', + content: [], + }, + }, + { + type: 'response.output_text.delta', + delta: 'Hello', + itemId: 'msg_1', + }, + { + type: 'response.output_text.delta', + delta: ' world', + itemId: 'msg_1', + }, + { + type: 'response.output_item.done', + item: { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'completed', + content: [ + { + type: 'output_text', + text: 'Hello world', + annotations: [], + }, + ], + }, + }, + { + type: 'response.completed', + response: { + id: 'r1', + }, + }, + ]; + + const stream = makeStream(events); + + // Three concurrent consumers + const [chatMsgs, responsesMsgs, items] = await Promise.all([ + collectAll(buildMessageStream(stream)), + collectAll(buildResponsesMessageStream(stream)), + collectAll(buildItemsStream(stream)), + ]); + + // All complete without blocking each other + expect(chatMsgs.length).toBeGreaterThan(0); + expect(responsesMsgs.length).toBeGreaterThan(0); + expect(items.length).toBeGreaterThan(0); + + // Structurally different + const lastChat = chatMsgs[chatMsgs.length - 1]!; + const lastResponses = responsesMsgs[responsesMsgs.length - 1]!; + + expect('id' in lastChat).toBe(false); + expect('id' in lastResponses).toBe(true); + }); +}); diff --git a/tests/pipelines/next-turn-params-pipeline.test.ts b/tests/pipelines/next-turn-params-pipeline.test.ts new file mode 100644 index 0000000..57156d4 --- /dev/null +++ b/tests/pipelines/next-turn-params-pipeline.test.ts @@ -0,0 +1,60 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; + +import { tool } from '../../src/index.js'; +import { + applyNextTurnParamsToRequest, + buildNextTurnParamsContext, + executeNextTurnParamsFunctions, +} from '../../src/lib/next-turn-params.js'; +import { TEST_MODEL } from '../test-constants.js'; + +describe('Next-turn parameter adjustment pipeline', () => { + it('dynamic temperature: search tool with nextTurnParams.temperature -> context -> execute -> apply -> request updated', async () => { + const searchTool = tool({ + name: 'search', + inputSchema: z.object({ + query: z.string(), + }), + execute: async (args) => `Results for: ${args.query}`, + nextTurnParams: { + temperature: (input: { query?: string }) => (input.query?.includes('creative') ? 0.9 : 0.1), + }, + }); + + const request = { + model: TEST_MODEL, + temperature: 0.5, + input: 'hello', + }; + + // Step 1: Build context from request + const ctx = buildNextTurnParamsContext(request); + expect(ctx.model).toBe(TEST_MODEL); + expect(ctx.temperature).toBe(0.5); + + // Step 2: Execute nextTurnParams functions + // The tool was called with { query: 'creative writing' } + const tools = [ + searchTool, + ]; + const toolCalls = [ + { + id: 'tc_1', + name: 'search', + arguments: { + query: 'creative writing', + }, + }, + ]; + const params = await executeNextTurnParamsFunctions(toolCalls, tools, request); + + expect(params).toHaveProperty('temperature', 0.9); + + // Step 3: Apply to request + const modified = applyNextTurnParamsToRequest(request, params); + expect(modified.temperature).toBe(0.9); + expect(modified.model).toBe(TEST_MODEL); + expect(modified.input).toBe('hello'); + }); +}); diff --git a/tests/pipelines/stop-condition-pipeline.test.ts b/tests/pipelines/stop-condition-pipeline.test.ts new file mode 100644 index 0000000..1b50c3c --- /dev/null +++ b/tests/pipelines/stop-condition-pipeline.test.ts @@ -0,0 +1,117 @@ +import { describe, expect, it } from 'vitest'; + +import { + hasToolCall, + isStopConditionMet, + maxCost, + stepCountIs, +} from '../../src/lib/stop-conditions.js'; +import { makeStep, makeTypedToolCalls, makeUsage } from '../test-constants.js'; + +describe('Stop condition pipeline: results -> steps -> conditions -> decision', () => { + it('step count: 3 tool rounds -> StepResult[] length 3 -> stepCountIs(3) true -> isStopConditionMet true', async () => { + const steps = [ + makeStep(), + makeStep(), + makeStep(), + ]; + expect( + stepCountIs(3)({ + steps, + }), + ).toBe(true); + const result = await isStopConditionMet({ + stopConditions: [ + stepCountIs(3), + ], + steps, + }); + expect(result).toBe(true); + }); + + it('tool call: round with "search" tool -> hasToolCall("search") true -> isStopConditionMet true', async () => { + const steps = [ + makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'search', + id: 'tc1', + arguments: {}, + }, + ]), + }), + ]; + expect( + hasToolCall('search')({ + steps, + }), + ).toBe(true); + const result = await isStopConditionMet({ + stopConditions: [ + hasToolCall('search'), + ], + steps, + }); + expect(result).toBe(true); + }); + + it('cost: round with usage.cost = 0.30 -> maxCost(0.25) true -> stop', async () => { + const steps = [ + makeStep({ + usage: makeUsage({ + totalTokens: 100, + inputTokens: 50, + outputTokens: 50, + cost: 0.3, + }), + }), + ]; + expect( + maxCost(0.25)({ + steps, + }), + ).toBe(true); + const result = await isStopConditionMet({ + stopConditions: [ + maxCost(0.25), + ], + steps, + }); + expect(result).toBe(true); + }); + + it('combined OR: stepCountIs(10) false + hasToolCall("done") true -> isStopConditionMet true', async () => { + const steps = [ + makeStep({ + toolCalls: makeTypedToolCalls([ + { + name: 'done', + id: 'tc1', + arguments: {}, + }, + ]), + }), + ]; + // stepCountIs(10) is false (only 1 step) + expect( + stepCountIs(10)({ + steps, + }), + ).toBe(false); + // hasToolCall('done') is true + expect( + hasToolCall('done')({ + steps, + }), + ).toBe(true); + // OR logic -> true + const result = await isStopConditionMet({ + stopConditions: [ + stepCountIs(10), + hasToolCall('done'), + ], + steps, + }); + expect(result).toBe(true); + }); +}); diff --git a/tests/pipelines/streaming-pipeline.test.ts b/tests/pipelines/streaming-pipeline.test.ts new file mode 100644 index 0000000..144440f --- /dev/null +++ b/tests/pipelines/streaming-pipeline.test.ts @@ -0,0 +1,159 @@ +import { describe, expect, it } from 'vitest'; + +import { ReusableReadableStream } from '../../src/lib/reusable-stream.js'; +import { + buildItemsStream, + consumeStreamForCompletion, + extractTextDeltas, +} from '../../src/lib/stream-transformers.js'; + +function makeStream(events: StreamEvents[]): ReusableReadableStream { + const source = new ReadableStream({ + start(controller) { + for (const event of events) { + controller.enqueue(event); + } + controller.close(); + }, + }); + return new ReusableReadableStream(source); +} + +async function collectAll(iter: AsyncIterable): Promise { + const result: T[] = []; + for await (const item of iter) { + result.push(item); + } + return result; +} + +describe('Full streaming pipeline: raw events -> guards -> transformers -> consumer', () => { + it('text streaming: guard filters to text only -> extractTextDeltas yields strings -> non-text absent', async () => { + const events = [ + { + type: 'response.output_text.delta', + delta: 'Hello', + }, + { + type: 'response.reasoning_text.delta', + delta: 'thinking', + }, + { + type: 'response.function_call_arguments.delta', + delta: '{"q":', + }, + { + type: 'response.output_text.delta', + delta: ' world', + }, + { + type: 'response.completed', + response: {}, + }, + ]; + const stream = makeStream(events); + const textDeltas = await collectAll(extractTextDeltas(stream)); + + // Guard true only for text events + expect(textDeltas).toEqual([ + 'Hello', + ' world', + ]); + // Non-text absent + expect(textDeltas).not.toContain('thinking'); + expect(textDeltas).not.toContain('{"q":'); + }); + + it('items streaming: type guards dispatch to per-type handlers -> consumer gets distinct item types', async () => { + const events = [ + { + type: 'response.output_item.added', + item: { + type: 'message', + id: 'msg_1', + role: 'assistant', + status: 'in_progress', + content: [], + }, + }, + { + type: 'response.output_text.delta', + delta: 'Hello', + itemId: 'msg_1', + }, + { + type: 'response.output_item.added', + item: { + type: 'function_call', + id: 'fc_1', + callId: 'fc_1', + name: 'search', + arguments: '', + status: 'in_progress', + }, + }, + { + type: 'response.function_call_arguments.delta', + delta: '{"q":"test"}', + itemId: 'fc_1', + }, + { + type: 'response.output_item.added', + item: { + type: 'reasoning', + id: 'r_1', + status: 'in_progress', + summary: [], + }, + }, + { + type: 'response.reasoning_text.delta', + delta: 'thinking', + itemId: 'r_1', + }, + { + type: 'response.completed', + response: {}, + }, + ]; + const stream = makeStream(events); + const items = await collectAll(buildItemsStream(stream)); + + const messageItems = items.filter((i) => i.type === 'message'); + const fnCallItems = items.filter((i) => i.type === 'function_call'); + const reasoningItems = items.filter((i) => i.type === 'reasoning'); + + // Each type present and distinct + expect(messageItems.length).toBeGreaterThan(0); + expect(fnCallItems.length).toBeGreaterThan(0); + expect(reasoningItems.length).toBeGreaterThan(0); + + // Message items have text + expect(messageItems[messageItems.length - 1].content[0].text).toBe('Hello'); + // Function call items have arguments + expect(fnCallItems[fnCallItems.length - 1].arguments).toBe('{"q":"test"}'); + // Reasoning items have content + expect(reasoningItems[reasoningItems.length - 1].summary[0].text).toBe('thinking'); + }); + + it('completion: isResponseCompletedEvent true -> consumeStreamForCompletion returns response -> stream terminates', async () => { + const response = { + id: 'r1', + status: 'completed', + output: [], + }; + const events = [ + { + type: 'response.output_text.delta', + delta: 'data', + }, + { + type: 'response.completed', + response, + }, + ]; + const stream = makeStream(events); + const result = await consumeStreamForCompletion(stream); + expect(result).toEqual(response); + }); +}); diff --git a/tests/pipelines/tool-execution-pipeline.test.ts b/tests/pipelines/tool-execution-pipeline.test.ts new file mode 100644 index 0000000..34dbaed --- /dev/null +++ b/tests/pipelines/tool-execution-pipeline.test.ts @@ -0,0 +1,138 @@ +import { describe, expect, it } from 'vitest'; +import { z } from 'zod/v4'; + +import { tool } from '../../src/index.js'; +import { + executeTool, + formatToolExecutionError, + formatToolResultForModel, +} from '../../src/lib/tool-executor.js'; + +describe('Full tool execution pipeline: definition -> dispatch -> validate -> execute -> format', () => { + it('regular tool: tool() -> executeTool -> validates -> executes -> formatToolResultForModel produces JSON', async () => { + const addTool = tool({ + name: 'add', + inputSchema: z.object({ + a: z.number(), + b: z.number(), + }), + outputSchema: z.object({ + sum: z.number(), + }), + execute: async (args) => ({ + sum: args.a + args.b, + }), + }); + + const toolCall = { + id: 'tc_1', + name: 'add', + arguments: { + a: 2, + b: 3, + }, + }; + const result = await executeTool(addTool, toolCall, { + numberOfTurns: 1, + }); + + // Dispatch worked (regular path) + expect(result.toolCallId).toBe('tc_1'); + expect(result.toolName).toBe('add'); + // Execution worked + expect(result.result).toEqual({ + sum: 5, + }); + // No error + expect(result.error).toBeUndefined(); + + // Format for model + const formatted = formatToolResultForModel(result); + expect(typeof formatted).toBe('string'); + const parsed = JSON.parse(formatted); + expect(parsed.sum).toBe(5); + }); + + it('generator tool: tool() with eventSchema -> executeTool -> generator yields events -> result has both', async () => { + const streamTool = tool({ + name: 'stream_add', + inputSchema: z.object({ + a: z.number(), + b: z.number(), + }), + eventSchema: z.object({ + progress: z.number(), + }), + outputSchema: z.object({ + sum: z.number(), + }), + execute: async function* (args) { + yield { + progress: 50, + }; + yield { + progress: 100, + }; + return { + sum: args.a + args.b, + }; + }, + }); + + const toolCall = { + id: 'tc_2', + name: 'stream_add', + arguments: { + a: 3, + b: 4, + }, + }; + const result = await executeTool(streamTool, toolCall, { + numberOfTurns: 1, + }); + + // Dispatch worked (generator path) + expect(result.toolCallId).toBe('tc_2'); + // Generator yielded events + expect(result.preliminaryResults).toHaveLength(2); + expect(result.preliminaryResults![0]).toEqual({ + progress: 50, + }); + expect(result.preliminaryResults![1]).toEqual({ + progress: 100, + }); + // Final result + expect(result.result).toEqual({ + sum: 7, + }); + }); + + it('error pipeline: invalid input -> executeTool -> caught -> ToolExecutionResult has error -> formatToolExecutionError includes details', async () => { + const strictTool = tool({ + name: 'strict', + inputSchema: z.object({ + count: z.number().min(1), + }), + execute: async (args) => args.count, + }); + + const toolCall = { + id: 'tc_3', + name: 'strict', + arguments: { + count: -5, + }, + }; + const result = await executeTool(strictTool, toolCall, { + numberOfTurns: 1, + }); + + // Error was caught + expect(result.error).toBeDefined(); + expect(result.result).toBeNull(); + + // Format error includes details + const errorFormatted = formatToolExecutionError(result.error!, toolCall); + expect(errorFormatted).toContain('strict'); + }); +}); diff --git a/tests/test-constants.ts b/tests/test-constants.ts new file mode 100644 index 0000000..c2f749e --- /dev/null +++ b/tests/test-constants.ts @@ -0,0 +1,152 @@ +/** + * Shared test constants and typed factory helpers. + * + * Unit/integration tests use a synthetic placeholder so they never + * depend on a real model existing. Change these in one place if the + * convention needs to be updated. + */ + +import type * as models from '@openrouter/sdk/models'; +import type { CallModelInput } from '../src/lib/async-params.js'; +import type { + ParsedToolCall, + StepResult, + Tool, + ToolExecutionResult, + TurnContext, + TypedToolCallUnion, +} from '../src/lib/tool-types.js'; + +/** Default model identifier used in non-e2e tests. */ +export const TEST_MODEL = 'openai/gpt-4.1-nano'; + +/** Alternative model for tests that need a second, distinct model. */ +export const TEST_MODEL_ALT = 'openai/gpt-4.1-mini'; + +// --------------------------------------------------------------------------- +// Factory helpers – build properly typed test data without `as any` +// --------------------------------------------------------------------------- + +/** Minimal Usage object that satisfies the SDK's required fields. */ +export function makeUsage( + overrides: Partial & { + totalTokens: number; + inputTokens: number; + outputTokens: number; + }, +): models.Usage { + return { + inputTokensDetails: { + cachedTokens: 0, + }, + outputTokensDetails: { + reasoningTokens: 0, + }, + ...overrides, + }; +} + +/** Minimal OpenResponsesResult that satisfies the SDK's required fields. */ +export function makeResponse( + overrides: Partial & { + output: models.OutputItems[]; + }, +): models.OpenResponsesResult { + return { + id: 'resp_test', + object: 'response', + createdAt: 0, + model: TEST_MODEL, + status: 'completed', + completedAt: null, + error: null, + incompleteDetails: null, + temperature: null, + topP: null, + presencePenalty: null, + frequencyPenalty: null, + instructions: null, + metadata: null, + tools: [], + toolChoice: 'auto', + parallelToolCalls: false, + ...overrides, + }; +} + +/** Minimal StepResult that satisfies the interface without `as any`. */ +export function makeStep(overrides: Partial = {}): StepResult { + return { + stepType: 'initial', + text: '', + response: makeResponse({ + output: [], + }), + toolCalls: [], + toolResults: [], + finishReason: undefined, + usage: undefined, + ...overrides, + }; +} + +/** Minimal TurnContext for tests. */ +export function makeTurnContext(overrides: Partial = {}): TurnContext { + return { + numberOfTurns: 0, + ...overrides, + }; +} + +/** Typed ParsedToolCall factory. */ +export function makeToolCall(overrides: { + id: string; + name: string; + arguments: unknown; +}): ParsedToolCall { + return overrides; +} + +/** Typed ToolExecutionResult factory. */ +export function makeToolResult( + overrides: Partial> & { + toolCallId: string; + toolName: string; + }, +): ToolExecutionResult { + return { + result: undefined, + ...overrides, + }; +} + +/** + * Cast a partial CallModelInput to the full type. + * Use when tests provide only a subset of fields (model, temperature, etc.) + * that don't include the full union-discriminant fields. + */ +export function makeCallModelInput(fields: Record): CallModelInput { + return fields as CallModelInput; +} + +/** Typed tool call array for StepResult.toolCalls */ +export function makeTypedToolCalls( + calls: Array<{ + id: string; + name: string; + arguments: unknown; + }>, +): TypedToolCallUnion[] { + return calls as TypedToolCallUnion[]; +} + +/** Minimal ResponsesRequest for tests. */ +export function makeRequest( + overrides: Partial = {}, +): models.ResponsesRequest { + return { + model: TEST_MODEL, + input: 'test', + ...overrides, + }; +} diff --git a/vitest.config.ts b/vitest.config.ts index efca4ae..40ad0b0 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -28,6 +28,83 @@ export default defineConfig({ hookTimeout: 10000, }, }, + { + extends: true, + test: { + name: 'behavior', + include: [ + 'tests/behavior/**/*.test.ts', + ], + testTimeout: 10000, + hookTimeout: 10000, + }, + }, + { + extends: true, + test: { + name: 'boundaries', + include: [ + 'tests/boundaries/**/*.test.ts', + ], + testTimeout: 10000, + hookTimeout: 10000, + }, + }, + { + extends: true, + test: { + name: 'composition', + include: [ + 'tests/composition/**/*.test.ts', + ], + testTimeout: 10000, + hookTimeout: 10000, + }, + }, + { + extends: true, + test: { + name: 'contracts', + include: [ + 'tests/contracts/**/*.test.ts', + ], + testTimeout: 10000, + hookTimeout: 10000, + }, + }, + { + extends: true, + test: { + name: 'integration', + include: [ + 'tests/integration/**/*.test.ts', + ], + testTimeout: 10000, + hookTimeout: 10000, + }, + }, + { + extends: true, + test: { + name: 'dispatch', + include: [ + 'tests/dispatch/**/*.test.ts', + ], + testTimeout: 10000, + hookTimeout: 10000, + }, + }, + { + extends: true, + test: { + name: 'pipelines', + include: [ + 'tests/pipelines/**/*.test.ts', + ], + testTimeout: 10000, + hookTimeout: 10000, + }, + }, { extends: true, test: {