From 53a204bf11ec07df2ea9f929477f5617b4806f2a Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 10 Apr 2026 00:05:17 +0000
Subject: [PATCH 1/4] feat: expand test suite with behavior, boundaries,
 composition, contracts, dispatch, integration, and pipeline tests

Add comprehensive categorized test suite covering:
- behavior: isolated function behavior tests
- boundaries: mutual exclusion and domain separation tests
- composition: two-module connection tests
- contracts: cross-type distinction tests
- dispatch: routing and dispatch logic tests
- integration: output-feeds-input tests
- pipelines: end-to-end multi-module pipeline tests

Also adds tests/INDEX.md registry mapping functions to test categories,
README files for each category, and updates vitest.config.ts with
project configurations for all new test categories.

Co-Authored-By: Robert Yeakel <robert.yeakel@openrouter.ai>
---
 tests/INDEX.md                                | 192 ++++++
 tests/behavior/README.md                      |  26 +
 tests/behavior/async-params.test.ts           | 112 ++++
 tests/behavior/conversation-state.test.ts     | 190 ++++++
 tests/behavior/next-turn-params.test.ts       | 221 +++++++
 tests/behavior/reusable-stream.test.ts        | 204 +++++++
 .../stop-conditions-evaluation.test.ts        | 104 ++++
 .../stream-type-guards-negative.test.ts       | 133 ++++
 tests/behavior/tool-approval.test.ts          | 314 ++++++++++
 tests/behavior/tool-context.test.ts           | 263 ++++++++
 tests/behavior/tool-creation.test.ts          | 149 +++++
 tests/behavior/tool-event-broadcaster.test.ts | 131 ++++
 tests/behavior/tool-execution.test.ts         | 576 ++++++++++++++++++
 tests/behavior/tool-orchestrator.test.ts      | 118 ++++
 tests/behavior/tool-types-events.test.ts      |  94 +++
 tests/behavior/turn-context.test.ts           |  67 ++
 tests/boundaries/README.md                    |  26 +
 .../content-annotation-guards.test.ts         |  91 +++
 .../conversation-state-results.test.ts        |  51 ++
 tests/boundaries/domain-separation.test.ts    |  45 ++
 tests/boundaries/output-item-guards.test.ts   |  70 +++
 .../response-stream-event-guards.test.ts      |  59 ++
 tests/boundaries/stream-event-guards.test.ts  |  96 +++
 tests/boundaries/tool-factory-shapes.test.ts  |  92 +++
 tests/boundaries/tool-type-guards.test.ts     |  86 +++
 tests/composition/README.md                   |  25 +
 tests/composition/context-flow.test.ts        |  46 ++
 .../composition/format-compatibility.test.ts  |  49 ++
 tests/composition/input-normalization.test.ts |  29 +
 .../composition/next-turn-params-flow.test.ts |  62 ++
 .../composition/orchestrator-executor.test.ts |  66 ++
 tests/composition/state-machine.test.ts       |  76 +++
 .../composition/stream-data-pipeline.test.ts  | 163 +++++
 tests/composition/tool-lifecycle.test.ts      |  98 +++
 tests/contracts/README.md                     |  26 +
 tests/contracts/async-params.test.ts          | 105 ++++
 .../consume-stream-completion.test.ts         |  80 +++
 tests/contracts/delta-extractors.test.ts      | 111 ++++
 tests/contracts/execute-tool-boundary.test.ts |  66 ++
 tests/contracts/from-claude-messages.test.ts  |  90 +++
 tests/contracts/items-stream.test.ts          | 247 ++++++++
 .../contracts/message-stream-builders.test.ts | 118 ++++
 tests/contracts/response-extractors.test.ts   |  98 +++
 tests/contracts/stop-conditions.test.ts       | 531 ++++++++++++++++
 tests/dispatch/README.md                      |  25 +
 .../approval-partition-dispatch.test.ts       |  79 +++
 .../claude-conversion-dispatch.test.ts        |  90 +++
 tests/dispatch/execute-tool-dispatch.test.ts  |  84 +++
 tests/dispatch/from-claude-dispatch.test.ts   |  58 ++
 tests/dispatch/items-stream-dispatch.test.ts  |  76 +++
 tests/integration/README.md                   |  26 +
 .../conversation-state-format.test.ts         |  32 +
 .../next-turn-params-request.test.ts          |  47 ++
 .../reusable-stream-consumers.test.ts         | 114 ++++
 .../stop-conditions-step-result.test.ts       | 162 +++++
 .../stream-completion-guards.test.ts          |  60 ++
 .../tool-context-execution.test.ts            |  64 ++
 .../turn-context-async-params.test.ts         |  40 ++
 tests/pipelines/README.md                     |  28 +
 .../approval-execution-state.test.ts          |  96 +++
 .../async-resolution-pipeline.test.ts         |  31 +
 .../pipelines/claude-conversion-deep.test.ts  | 168 +++++
 tests/pipelines/context-pipeline.test.ts      |  91 +++
 tests/pipelines/dual-format-output.test.ts    | 158 +++++
 tests/pipelines/format-round-trip.test.ts     | 169 +++++
 .../next-turn-params-pipeline.test.ts         |  63 ++
 .../orchestrator-utility-chain.test.ts        |  53 ++
 .../pipelines/stop-condition-pipeline.test.ts | 127 ++++
 tests/pipelines/streaming-pipeline.test.ts    | 159 +++++
 .../pipelines/tool-execution-pipeline.test.ts | 138 +++++
 vitest.config.ts                              |  77 +++
 71 files changed, 7911 insertions(+)
 create mode 100644 tests/INDEX.md
 create mode 100644 tests/behavior/README.md
 create mode 100644 tests/behavior/async-params.test.ts
 create mode 100644 tests/behavior/conversation-state.test.ts
 create mode 100644 tests/behavior/next-turn-params.test.ts
 create mode 100644 tests/behavior/reusable-stream.test.ts
 create mode 100644 tests/behavior/stop-conditions-evaluation.test.ts
 create mode 100644 tests/behavior/stream-type-guards-negative.test.ts
 create mode 100644 tests/behavior/tool-approval.test.ts
 create mode 100644 tests/behavior/tool-context.test.ts
 create mode 100644 tests/behavior/tool-creation.test.ts
 create mode 100644 tests/behavior/tool-event-broadcaster.test.ts
 create mode 100644 tests/behavior/tool-execution.test.ts
 create mode 100644 tests/behavior/tool-orchestrator.test.ts
 create mode 100644 tests/behavior/tool-types-events.test.ts
 create mode 100644 tests/behavior/turn-context.test.ts
 create mode 100644 tests/boundaries/README.md
 create mode 100644 tests/boundaries/content-annotation-guards.test.ts
 create mode 100644 tests/boundaries/conversation-state-results.test.ts
 create mode 100644 tests/boundaries/domain-separation.test.ts
 create mode 100644 tests/boundaries/output-item-guards.test.ts
 create mode 100644 tests/boundaries/response-stream-event-guards.test.ts
 create mode 100644 tests/boundaries/stream-event-guards.test.ts
 create mode 100644 tests/boundaries/tool-factory-shapes.test.ts
 create mode 100644 tests/boundaries/tool-type-guards.test.ts
 create mode 100644 tests/composition/README.md
 create mode 100644 tests/composition/context-flow.test.ts
 create mode 100644 tests/composition/format-compatibility.test.ts
 create mode 100644 tests/composition/input-normalization.test.ts
 create mode 100644 tests/composition/next-turn-params-flow.test.ts
 create mode 100644 tests/composition/orchestrator-executor.test.ts
 create mode 100644 tests/composition/state-machine.test.ts
 create mode 100644 tests/composition/stream-data-pipeline.test.ts
 create mode 100644 tests/composition/tool-lifecycle.test.ts
 create mode 100644 tests/contracts/README.md
 create mode 100644 tests/contracts/async-params.test.ts
 create mode 100644 tests/contracts/consume-stream-completion.test.ts
 create mode 100644 tests/contracts/delta-extractors.test.ts
 create mode 100644 tests/contracts/execute-tool-boundary.test.ts
 create mode 100644 tests/contracts/from-claude-messages.test.ts
 create mode 100644 tests/contracts/items-stream.test.ts
 create mode 100644 tests/contracts/message-stream-builders.test.ts
 create mode 100644 tests/contracts/response-extractors.test.ts
 create mode 100644 tests/contracts/stop-conditions.test.ts
 create mode 100644 tests/dispatch/README.md
 create mode 100644 tests/dispatch/approval-partition-dispatch.test.ts
 create mode 100644 tests/dispatch/claude-conversion-dispatch.test.ts
 create mode 100644 tests/dispatch/execute-tool-dispatch.test.ts
 create mode 100644 tests/dispatch/from-claude-dispatch.test.ts
 create mode 100644 tests/dispatch/items-stream-dispatch.test.ts
 create mode 100644 tests/integration/README.md
 create mode 100644 tests/integration/conversation-state-format.test.ts
 create mode 100644 tests/integration/next-turn-params-request.test.ts
 create mode 100644 tests/integration/reusable-stream-consumers.test.ts
 create mode 100644 tests/integration/stop-conditions-step-result.test.ts
 create mode 100644 tests/integration/stream-completion-guards.test.ts
 create mode 100644 tests/integration/tool-context-execution.test.ts
 create mode 100644 tests/integration/turn-context-async-params.test.ts
 create mode 100644 tests/pipelines/README.md
 create mode 100644 tests/pipelines/approval-execution-state.test.ts
 create mode 100644 tests/pipelines/async-resolution-pipeline.test.ts
 create mode 100644 tests/pipelines/claude-conversion-deep.test.ts
 create mode 100644 tests/pipelines/context-pipeline.test.ts
 create mode 100644 tests/pipelines/dual-format-output.test.ts
 create mode 100644 tests/pipelines/format-round-trip.test.ts
 create mode 100644 tests/pipelines/next-turn-params-pipeline.test.ts
 create mode 100644 tests/pipelines/orchestrator-utility-chain.test.ts
 create mode 100644 tests/pipelines/stop-condition-pipeline.test.ts
 create mode 100644 tests/pipelines/streaming-pipeline.test.ts
 create mode 100644 tests/pipelines/tool-execution-pipeline.test.ts

diff --git a/tests/INDEX.md b/tests/INDEX.md
new file mode 100644
index 0000000..7effb9a
--- /dev/null
+++ b/tests/INDEX.md
@@ -0,0 +1,192 @@
+# Test Registry
+
+This index maps each SDK function/module to its **single test category**. Before adding a test, check this registry — if the function is already covered in a higher category, add your assertion there instead of creating a new file in a lower category.
+
+## Category priority (highest wins)
+
+When a function could belong to multiple categories, place it in the **highest applicable** one:
+
+```
+pipelines > dispatch > integration > contracts > composition > boundaries > behavior
+```
+
+**Rule: each function gets ONE category.** If `stepCountIs` has meaningful peer boundaries, it goes in `contracts/` — not `behavior/` AND `contracts/`. Functions with no peer comparisons stay in `behavior/`.
+
+---
+
+## Registry
+
+### stop-conditions.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `stepCountIs` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria |
+| `hasToolCall` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria |
+| `maxTokensUsed` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria |
+| `maxCost` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria |
+| `finishReasonIs` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria |
+| stop condition evaluation (combined) | behavior | `behavior/stop-conditions-evaluation.test.ts` | Tests `evaluateStopConditions` orchestration logic (no peer comparison) |
+
+### stream-type-guards.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `isOutputTextDeltaEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isReasoningDeltaEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isFunctionCallArgumentsDeltaEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isOutputItemAddedEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isOutputItemDoneEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isResponseCompletedEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isResponseFailedEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isResponseIncompleteEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isFunctionCallArgumentsDoneEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isOutputMessage` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards |
+| `isFunctionCallItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards |
+| `isReasoningOutputItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards |
+| `isWebSearchCallOutputItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards |
+| `isFileSearchCallOutputItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards |
+| `isImageGenerationCallOutputItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards |
+| `isOutputTextPart` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards |
+| `isRefusalPart` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards |
+| `isFileCitationAnnotation` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards |
+| `isURLCitationAnnotation` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards |
+| `isFilePathAnnotation` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards |
+| `hasTypeProperty` | behavior | `behavior/stream-type-guards-negative.test.ts` | Utility function, no peers |
+| stream vs output item cross-domain | boundaries | `boundaries/domain-separation.test.ts` | Guards reject events from wrong domain |
+| response stream event guards | boundaries | `boundaries/response-stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+
+### tool-types.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `hasExecuteFunction` | boundaries | `boundaries/tool-type-guards.test.ts` | Mutual exclusion across tool types |
+| `isRegularExecuteTool` | boundaries | `boundaries/tool-type-guards.test.ts` | Mutual exclusion across tool types |
+| `isGeneratorTool` | boundaries | `boundaries/tool-type-guards.test.ts` | Mutual exclusion across tool types |
+| `isManualTool` | boundaries | `boundaries/tool-type-guards.test.ts` | Mutual exclusion across tool types |
+| `toolRequiresApproval` | behavior | `behavior/tool-approval.test.ts` | No peer comparison, isolated behavior |
+| `ToolEventBroadcaster` | behavior | `behavior/tool-event-broadcaster.test.ts` | No peer comparison, isolated behavior |
+| tool type events (combined) | behavior | `behavior/tool-types-events.test.ts` | Event shape verification, isolated |
+
+### tool.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `tool()` factory | behavior | `behavior/tool-creation.test.ts` | Isolated factory behavior |
+| tool factory shapes (regular vs generator vs manual) | boundaries | `boundaries/tool-factory-shapes.test.ts` | Structural distinction between tool types |
+
+### tool-executor.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `executeRegularTool` | behavior | `behavior/tool-execution.test.ts` | Isolated execution behavior |
+| `executeRegularTool` vs `executeGeneratorTool` | contracts | `contracts/execute-tool-boundary.test.ts` | Each handles its type AND rejects the other |
+| `executeTool` dispatch | dispatch | `dispatch/execute-tool-dispatch.test.ts` | Routes via type guard to correct executor |
+
+### tool-context.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `resolveContext`, `ToolContextStore` | behavior | `behavior/tool-context.test.ts` | Isolated context resolution |
+| `buildToolExecuteContext` | integration | `integration/tool-context-execution.test.ts` | Correct output AND feeds tool execute |
+
+### turn-context.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `buildTurnContext`, `normalizeInputToArray` | behavior | `behavior/turn-context.test.ts` | Isolated shape verification |
+| turn context -> async params | integration | `integration/turn-context-async-params.test.ts` | Correct output AND feeds resolveAsyncFunctions |
+
+### async-params.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `resolveAsyncFunctions` | contracts | `contracts/async-params.test.ts` | Static vs function vs client-only handled distinctly |
+
+### conversation-state.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `createInitialState`, `updateState`, `appendToMessages` | behavior | `behavior/conversation-state.test.ts` | Isolated state management |
+| `appendToMessages` + format compat | integration | `integration/conversation-state-format.test.ts` | Output feeds format conversion |
+| `partitionToolCalls` | dispatch | `dispatch/approval-partition-dispatch.test.ts` | Routes via approval checks |
+| `createUnsentResult` vs `createRejectedResult` | boundaries | `boundaries/conversation-state-results.test.ts` | Structural distinction |
+
+### next-turn-params.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `executeNextTurnParamsFunctions` | behavior | `behavior/next-turn-params.test.ts` | Isolated param computation |
+| next-turn params -> request | integration | `integration/next-turn-params-request.test.ts` | Output feeds applyNextTurnParamsToRequest |
+
+### reusable-stream.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `ReusableReadableStream` | behavior | `behavior/reusable-stream.test.ts` | Isolated stream behavior |
+| multi-consumer correctness | integration | `integration/reusable-stream-consumers.test.ts` | Multiple consumers both get correct data |
+
+### stream-transformers.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `extractTextDeltas`, `extractReasoningDeltas`, `extractToolDeltas` | contracts | `contracts/delta-extractors.test.ts` | Each yields its type AND skips peers |
+| `buildMessageStream`, `buildResponsesMessageStream` | contracts | `contracts/message-stream-builders.test.ts` | Each produces distinct format |
+| `buildItemsStream` | contracts | `contracts/items-stream.test.ts` | Produces items format distinctly |
+| `buildItemsStream` dispatch | dispatch | `dispatch/items-stream-dispatch.test.ts` | Routes events via guards |
+| `consumeStreamForCompletion` | contracts | `contracts/consume-stream-completion.test.ts` | Consumes correct terminal event |
+| `getResponseObject`, `getTextContent` | contracts | `contracts/response-extractors.test.ts` | Each extracts distinct data |
+| `convertToClaudeMessage` | dispatch | `dispatch/claude-conversion-dispatch.test.ts` | Routes items via output guards |
+| `consumeStreamForCompletion` + guards | integration | `integration/stream-completion-guards.test.ts` | Correct result AND guard identified event |
+
+### anthropic-compat.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `fromClaudeMessages` | contracts | `contracts/from-claude-messages.test.ts` | Maps each block type distinctly |
+| `fromClaudeMessages` dispatch | dispatch | `dispatch/from-claude-dispatch.test.ts` | Routes mixed block types |
+| `toClaudeMessage` + `fromClaudeMessages` round-trip | pipelines | `pipelines/format-round-trip.test.ts` | Full conversion pipeline |
+
+### chat-compat.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `toChatMessage` + `fromChatMessages` round-trip | pipelines | `pipelines/format-round-trip.test.ts` | Full conversion pipeline |
+
+### tool-orchestrator.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `mapToolResults`, `summarizeStepUsage`, etc. | behavior | `behavior/tool-orchestrator.test.ts` | Isolated utility functions |
+
+---
+
+## Pipeline tests (cross-cutting)
+
+These tests exercise multiple modules end-to-end and don't map to a single function:
+
+| Pipeline | File |
+|----------|------|
+| Streaming: events -> guards -> transformers -> consumer | `pipelines/streaming-pipeline.test.ts` |
+| Tool execution: create -> dispatch -> validate -> execute -> format | `pipelines/tool-execution-pipeline.test.ts` |
+| Context: build -> resolve -> store -> execute | `pipelines/context-pipeline.test.ts` |
+| Stop conditions: results -> evaluate -> decision | `pipelines/stop-condition-pipeline.test.ts` |
+| Dual-format output: same response -> chat + Claude + items | `pipelines/dual-format-output.test.ts` |
+| Claude conversion deep: multi-item -> per-item routing -> blocks | `pipelines/claude-conversion-deep.test.ts` |
+| Next-turn params: tool results -> compute -> apply to request | `pipelines/next-turn-params-pipeline.test.ts` |
+| Async resolution: resolve -> apply -> evaluate stop | `pipelines/async-resolution-pipeline.test.ts` |
+| Orchestrator chain: execute -> map -> summarize -> check errors | `pipelines/orchestrator-utility-chain.test.ts` |
+| Approval -> execution -> state: partition -> execute -> format | `pipelines/approval-execution-state.test.ts` |
+| Format round-trip: Claude and Chat bidirectional conversion | `pipelines/format-round-trip.test.ts` |
+
+## Composition tests (two-module connection)
+
+| Connection | File |
+|------------|------|
+| tool() -> type guards / convertToolsToAPIFormat | `composition/tool-lifecycle.test.ts` |
+| ReusableReadableStream -> multiple consumers | `composition/stream-data-pipeline.test.ts` |
+| executeNextTurnParamsFunctions -> applyNextTurnParamsToRequest | `composition/next-turn-params-flow.test.ts` |
+| toChatMessage -> fromChatMessages (format round-trip) | `composition/format-compatibility.test.ts` |
+| buildToolExecuteContext -> tool execute | `composition/context-flow.test.ts` |
+| appendToMessages -> state update | `composition/input-normalization.test.ts` |
+| createInitialState -> updateState | `composition/state-machine.test.ts` |
+| orchestrator utilities -> executor results | `composition/orchestrator-executor.test.ts` |
diff --git a/tests/behavior/README.md b/tests/behavior/README.md
new file mode 100644
index 0000000..7c4c9e6
--- /dev/null
+++ b/tests/behavior/README.md
@@ -0,0 +1,26 @@
+# Behavior Tests
+
+Tests in this folder verify that each SDK capability works as promised **in isolation**. No comparison to similar capabilities, no cross-module composition — just: does this function do what its contract says?
+
+## What belongs here
+
+- Happy-path execution of individual functions
+- Error cases and edge cases for a single function
+- Return shape and type verification
+- Input validation (valid and invalid)
+- Default values and optional parameter handling
+- Extendable: when new SDK capabilities are added, their isolated behavior tests go here
+
+## Examples
+
+- `tool()` factory produces the correct structure for each tool type
+- `validateToolInput` accepts valid data and rejects invalid data
+- `ReusableReadableStream` delivers items in order to a single consumer
+- `createInitialState()` returns the expected shape with timestamps
+- `resolveContext` handles static objects, functions, async functions, and undefined
+
+## What does NOT belong here
+
+- Tests comparing two similar functions (→ `boundaries/`)
+- Tests where one module's output feeds another's input (→ `composition/`)
+- End-to-end workflows (→ `pipelines/`)
diff --git a/tests/behavior/async-params.test.ts b/tests/behavior/async-params.test.ts
new file mode 100644
index 0000000..f0dc02f
--- /dev/null
+++ b/tests/behavior/async-params.test.ts
@@ -0,0 +1,112 @@
+import { describe, expect, it } from 'vitest';
+import { hasAsyncFunctions, resolveAsyncFunctions } from '../../src/lib/async-params.js';
+import type { TurnContext } from '../../src/lib/tool-types.js';
+
+const turnCtx: TurnContext = {
+  numberOfTurns: 2,
+};
+
+describe('async params - resolveAsyncFunctions', () => {
+  it('passes through static values unchanged', async () => {
+    const input = {
+      model: 'gpt-4',
+      temperature: 0.7,
+      input: 'hi',
+    } as any;
+    const result = await resolveAsyncFunctions(input, turnCtx);
+    expect(result.model).toBe('gpt-4');
+    expect(result.temperature).toBe(0.7);
+  });
+
+  it('resolves sync function fields with turnContext', async () => {
+    const input = {
+      model: 'gpt-4',
+      temperature: (ctx: TurnContext) => ctx.numberOfTurns * 0.1,
+      input: 'test',
+    } as any;
+    const result = await resolveAsyncFunctions(input, turnCtx);
+    expect(result.temperature).toBeCloseTo(0.2);
+  });
+
+  it('resolves async function fields with turnContext', async () => {
+    const input = {
+      model: 'gpt-4',
+      temperature: async (ctx: TurnContext) => ctx.numberOfTurns * 0.15,
+      input: 'test',
+    } as any;
+    const result = await resolveAsyncFunctions(input, turnCtx);
+    expect(result.temperature).toBeCloseTo(0.3);
+  });
+
+  it('strips client-only fields (stopWhen, state, requireApproval, context, etc.)', async () => {
+    const input = {
+      model: 'gpt-4',
+      input: 'test',
+      stopWhen: () => true,
+      state: {},
+      requireApproval: () => false,
+      context: {},
+    } as any;
+    const result = await resolveAsyncFunctions(input, turnCtx);
+    expect(result).not.toHaveProperty('stopWhen');
+    expect(result).not.toHaveProperty('state');
+    expect(result).not.toHaveProperty('requireApproval');
+    expect(result).not.toHaveProperty('context');
+  });
+
+  it('wraps field resolution errors with field name', async () => {
+    const input = {
+      model: 'gpt-4',
+      temperature: () => {
+        throw new Error('compute failed');
+      },
+      input: 'test',
+    } as any;
+    await expect(resolveAsyncFunctions(input, turnCtx)).rejects.toThrow(/temperature/);
+  });
+});
+
+describe('async params - hasAsyncFunctions', () => {
+  it('returns true when any field is a function', () => {
+    expect(
+      hasAsyncFunctions({
+        model: 'gpt-4',
+        temperature: () => 0.5,
+      }),
+    ).toBe(true);
+  });
+
+  it('returns false when all fields are static values', () => {
+    expect(
+      hasAsyncFunctions({
+        model: 'gpt-4',
+        temperature: 0.5,
+      }),
+    ).toBe(false);
+  });
+
+  it('returns false for null input', () => {
+    expect(hasAsyncFunctions(null)).toBe(false);
+  });
+
+  it('returns false for undefined input', () => {
+    expect(hasAsyncFunctions(undefined)).toBe(false);
+  });
+
+  it('returns false for non-object input', () => {
+    expect(hasAsyncFunctions('string')).toBe(false);
+  });
+
+  it('returns true when nested function detected', () => {
+    expect(
+      hasAsyncFunctions({
+        a: 1,
+        b: () => 2,
+      }),
+    ).toBe(true);
+  });
+
+  it('returns false for empty object', () => {
+    expect(hasAsyncFunctions({})).toBe(false);
+  });
+});
diff --git a/tests/behavior/conversation-state.test.ts b/tests/behavior/conversation-state.test.ts
new file mode 100644
index 0000000..7f66639
--- /dev/null
+++ b/tests/behavior/conversation-state.test.ts
@@ -0,0 +1,190 @@
+import type * as models from '@openrouter/sdk/models';
+import { describe, expect, it } from 'vitest';
+import {
+  appendToMessages,
+  createInitialState,
+  createRejectedResult,
+  createUnsentResult,
+  extractTextFromResponse,
+  generateConversationId,
+  unsentResultsToAPIFormat,
+  updateState,
+} from '../../src/lib/conversation-state.js';
+
+describe('conversation state - createInitialState', () => {
+  it('creates state with generated id, empty messages, in_progress status', () => {
+    const state = createInitialState();
+    expect(state.id).toMatch(/^conv_/);
+    expect(state.messages).toEqual([]);
+    expect(state.status).toBe('in_progress');
+    expect(state.createdAt).toBeTypeOf('number');
+    expect(state.updatedAt).toBeTypeOf('number');
+  });
+
+  it('uses provided custom id', () => {
+    const state = createInitialState('custom_123');
+    expect(state.id).toBe('custom_123');
+  });
+});
+
+describe('conversation state - updateState', () => {
+  it('merges updates and bumps updatedAt timestamp', () => {
+    const state = createInitialState('s1');
+    const before = state.updatedAt;
+    const updated = updateState(state, {
+      status: 'completed',
+    });
+    expect(updated.status).toBe('completed');
+    expect(updated.id).toBe('s1');
+    expect(updated.updatedAt).toBeGreaterThanOrEqual(before);
+  });
+
+  it('preserves id and createdAt from original state', () => {
+    const state = createInitialState('s2');
+    const updated = updateState(state, {
+      messages: [
+        {
+          role: 'user',
+          content: 'hi',
+        },
+      ] as any,
+    });
+    expect(updated.id).toBe('s2');
+    expect(updated.createdAt).toBe(state.createdAt);
+  });
+});
+
+describe('conversation state - appendToMessages', () => {
+  it('appends new items to existing array input', () => {
+    const current: models.InputsUnion = [
+      {
+        role: 'user',
+        content: 'hello',
+      },
+    ];
+    const result = appendToMessages(current, [
+      {
+        role: 'assistant',
+        content: 'hi',
+      } as any,
+    ]);
+    expect(result).toHaveLength(2);
+  });
+
+  it('converts string input to array then appends', () => {
+    const result = appendToMessages('hello', [
+      {
+        role: 'assistant',
+        content: 'hi',
+      } as any,
+    ]);
+    expect(result).toHaveLength(2);
+    expect(result[0]).toHaveProperty('role', 'user');
+  });
+});
+
+describe('conversation state - generateConversationId', () => {
+  it('returns string starting with conv_', () => {
+    const id = generateConversationId();
+    expect(id).toMatch(/^conv_/);
+  });
+
+  it('generates unique ids on successive calls', () => {
+    const ids = new Set(
+      Array.from(
+        {
+          length: 10,
+        },
+        () => generateConversationId(),
+      ),
+    );
+    expect(ids.size).toBe(10);
+  });
+});
+
+describe('conversation state - unsent results', () => {
+  it('createUnsentResult builds valid result with callId, name, output', () => {
+    const result = createUnsentResult('c1', 'test', {
+      data: 42,
+    });
+    expect(result.callId).toBe('c1');
+    expect(result.name).toBe('test');
+    expect(result.output).toEqual({
+      data: 42,
+    });
+  });
+
+  it('createRejectedResult builds result with error message', () => {
+    const result = createRejectedResult('c2', 'test', 'not allowed');
+    expect(result.callId).toBe('c2');
+    expect(result.output).toBeNull();
+    expect(result.error).toBe('not allowed');
+  });
+
+  it('createRejectedResult uses default rejection message', () => {
+    const result = createRejectedResult('c3', 'test');
+    expect(result.error).toContain('rejected');
+  });
+
+  it('unsentResultsToAPIFormat converts to FunctionCallOutputItem array', () => {
+    const results = [
+      createUnsentResult('c1', 'test', {
+        data: 1,
+      }),
+    ];
+    const api = unsentResultsToAPIFormat(results);
+    expect(api).toHaveLength(1);
+    expect(api[0]!.type).toBe('function_call_output');
+    expect(api[0]!.callId).toBe('c1');
+    expect(typeof api[0]!.output).toBe('string');
+  });
+});
+
+describe('conversation state - response extraction', () => {
+  it('extractTextFromResponse extracts text from message output items', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message',
+          content: [
+            {
+              type: 'output_text',
+              text: 'Hello ',
+            },
+          ],
+        },
+        {
+          type: 'message',
+          content: [
+            {
+              type: 'output_text',
+              text: 'World',
+            },
+          ],
+        },
+      ],
+      parallel_tool_calls: false,
+      status: 'completed',
+      usage: null,
+      error: null,
+      incomplete_details: null,
+      created_at: 0,
+    } as any;
+    expect(extractTextFromResponse(response)).toBe('Hello World');
+  });
+
+  it('extractTextFromResponse returns empty string for no output', () => {
+    const response = {
+      id: 'r1',
+      output: [],
+      parallel_tool_calls: false,
+      status: 'completed',
+      usage: null,
+      error: null,
+      incomplete_details: null,
+      created_at: 0,
+    } as any;
+    expect(extractTextFromResponse(response)).toBe('');
+  });
+});
diff --git a/tests/behavior/next-turn-params.test.ts b/tests/behavior/next-turn-params.test.ts
new file mode 100644
index 0000000..234c843
--- /dev/null
+++ b/tests/behavior/next-turn-params.test.ts
@@ -0,0 +1,221 @@
+import type * as models from '@openrouter/sdk/models';
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import {
+  applyNextTurnParamsToRequest,
+  buildNextTurnParamsContext,
+  executeNextTurnParamsFunctions,
+} from '../../src/lib/next-turn-params.js';
+import { tool } from '../../src/lib/tool.js';
+import type { ParsedToolCall, Tool } from '../../src/lib/tool-types.js';
+
+describe('next-turn params - buildNextTurnParamsContext', () => {
+  it('extracts relevant fields from request', () => {
+    const request: models.ResponsesRequest = {
+      model: 'gpt-4',
+      input: 'hello',
+      temperature: 0.7,
+      maxOutputTokens: 1000,
+    } as any;
+    const ctx = buildNextTurnParamsContext(request);
+    expect(ctx.model).toBe('gpt-4');
+    expect(ctx.input).toBe('hello');
+    expect(ctx.temperature).toBe(0.7);
+    expect(ctx.maxOutputTokens).toBe(1000);
+  });
+
+  it('defaults missing fields to null/empty', () => {
+    const request = {
+      model: undefined,
+      input: undefined,
+    } as any;
+    const ctx = buildNextTurnParamsContext(request);
+    expect(ctx.model).toBe('');
+    expect(ctx.temperature).toBeNull();
+    expect(ctx.maxOutputTokens).toBeNull();
+    expect(ctx.models).toEqual([]);
+  });
+});
+
+describe('next-turn params - executeNextTurnParamsFunctions', () => {
+  it('executes temperature function and returns computed value', async () => {
+    const t = tool({
+      name: 'search',
+      inputSchema: z.object({
+        query: z.string(),
+      }),
+      nextTurnParams: {
+        temperature: () => 0.2 as number | null,
+      },
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'search',
+      arguments: {
+        query: 'test',
+      },
+    };
+    const request = {
+      model: 'gpt-4',
+      input: 'hello',
+    } as any;
+    const result = await executeNextTurnParamsFunctions(
+      [
+        tc,
+      ],
+      [
+        t,
+      ],
+      request,
+    );
+    expect(result.temperature).toBe(0.2);
+  });
+
+  it('returns empty object when no tools have nextTurnParams', async () => {
+    const t = tool({
+      name: 'basic',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'basic',
+      arguments: {},
+    };
+    const result = await executeNextTurnParamsFunctions(
+      [
+        tc,
+      ],
+      [
+        t,
+      ],
+      {} as any,
+    );
+    expect(Object.keys(result)).toHaveLength(0);
+  });
+
+  it('skips tools not in toolCalls array', async () => {
+    const t1 = tool({
+      name: 'a',
+      inputSchema: z.object({}),
+      nextTurnParams: {
+        temperature: () => 0.1 as number | null,
+      },
+      execute: async () => ({}),
+    });
+    const t2 = tool({
+      name: 'b',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'b',
+      arguments: {},
+    };
+    const result = await executeNextTurnParamsFunctions(
+      [
+        tc,
+      ],
+      [
+        t1,
+        t2,
+      ],
+      {} as any,
+    );
+    expect(result.temperature).toBeUndefined();
+  });
+
+  it('composes functions from multiple tools in order', async () => {
+    const t1 = tool({
+      name: 'first',
+      inputSchema: z.object({}),
+      nextTurnParams: {
+        temperature: (_p, ctx) => (ctx.temperature ?? 0) + 0.1,
+      },
+      execute: async () => ({}),
+    });
+    const t2 = tool({
+      name: 'second',
+      inputSchema: z.object({}),
+      nextTurnParams: {
+        temperature: (_p, ctx) => (ctx.temperature ?? 0) + 0.2,
+      },
+      execute: async () => ({}),
+    });
+    const tc1: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'first',
+      arguments: {},
+    };
+    const tc2: ParsedToolCall<Tool> = {
+      id: 'c2',
+      name: 'second',
+      arguments: {},
+    };
+    const request = {
+      temperature: 0.5,
+    } as any;
+    const result = await executeNextTurnParamsFunctions(
+      [
+        tc1,
+        tc2,
+      ],
+      [
+        t1,
+        t2,
+      ],
+      request,
+    );
+    expect(result.temperature).toBeCloseTo(0.8);
+  });
+});
+
+describe('next-turn params - applyNextTurnParamsToRequest', () => {
+  it('merges computed params into request', () => {
+    const request = {
+      model: 'gpt-4',
+      temperature: 0.7,
+      input: 'test',
+    } as any;
+    const computed = {
+      temperature: 0.2 as number | null,
+    };
+    const result = applyNextTurnParamsToRequest(request, computed);
+    expect(result.temperature).toBe(0.2);
+    expect(result.model).toBe('gpt-4');
+  });
+
+  it('converts null values to undefined for API compatibility', () => {
+    const request = {
+      model: 'gpt-4',
+    } as any;
+    const computed = {
+      temperature: null,
+    };
+    const result = applyNextTurnParamsToRequest(request, computed);
+    expect(result.temperature).toBeUndefined();
+  });
+
+  it('returns new object without mutating original', () => {
+    const request = {
+      model: 'gpt-4',
+      temperature: 0.7,
+    } as any;
+    const result = applyNextTurnParamsToRequest(request, {
+      temperature: 0.2,
+    });
+    expect(request.temperature).toBe(0.7);
+    expect(result.temperature).toBe(0.2);
+  });
+
+  it('handles empty computed params', () => {
+    const request = {
+      model: 'gpt-4',
+      temperature: 0.7,
+    } as any;
+    const result = applyNextTurnParamsToRequest(request, {});
+    expect(result.temperature).toBe(0.7);
+  });
+});
diff --git a/tests/behavior/reusable-stream.test.ts b/tests/behavior/reusable-stream.test.ts
new file mode 100644
index 0000000..be6ab22
--- /dev/null
+++ b/tests/behavior/reusable-stream.test.ts
@@ -0,0 +1,204 @@
+import { describe, expect, it } from 'vitest';
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+
+function makeStream<T>(values: T[]): ReadableStream<T> {
+  return new ReadableStream<T>({
+    start(controller) {
+      for (const v of values) {
+        controller.enqueue(v);
+      }
+      controller.close();
+    },
+  });
+}
+
+function makeDelayedStream<T>(values: T[], delayMs = 5): ReadableStream<T> {
+  return new ReadableStream<T>({
+    async start(controller) {
+      for (const v of values) {
+        await new Promise((r) => setTimeout(r, delayMs));
+        controller.enqueue(v);
+      }
+      controller.close();
+    },
+  });
+}
+
+async function collect<T>(iter: AsyncIterableIterator<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const v of iter) {
+    result.push(v);
+  }
+  return result;
+}
+
+describe('reusable stream - single consumer', () => {
+  it('single consumer reads all values from source', async () => {
+    const rrs = new ReusableReadableStream(
+      makeStream([
+        1,
+        2,
+        3,
+      ]),
+    );
+    const values = await collect(rrs.createConsumer());
+    expect(values).toEqual([
+      1,
+      2,
+      3,
+    ]);
+  });
+
+  it('empty source stream yields no values', async () => {
+    const rrs = new ReusableReadableStream(makeStream([]));
+    const values = await collect(rrs.createConsumer());
+    expect(values).toEqual([]);
+  });
+});
+
+describe('reusable stream - multiple consumers', () => {
+  it('two consumers independently read the same values', async () => {
+    const rrs = new ReusableReadableStream(
+      makeStream([
+        10,
+        20,
+        30,
+      ]),
+    );
+    const c1 = rrs.createConsumer();
+    const c2 = rrs.createConsumer();
+    const [v1, v2] = await Promise.all([
+      collect(c1),
+      collect(c2),
+    ]);
+    expect(v1).toEqual([
+      10,
+      20,
+      30,
+    ]);
+    expect(v2).toEqual([
+      10,
+      20,
+      30,
+    ]);
+  });
+
+  it('late-joining consumer gets all buffered values plus new ones', async () => {
+    const rrs = new ReusableReadableStream(
+      makeDelayedStream(
+        [
+          1,
+          2,
+          3,
+          4,
+        ],
+        5,
+      ),
+    );
+    const c1 = rrs.createConsumer();
+    // Let first consumer read a bit
+    const first = await c1.next();
+    expect(first.done).toBe(false);
+    // Join late
+    const c2 = rrs.createConsumer();
+    const [remaining1, values2] = await Promise.all([
+      collect(c1),
+      collect(c2),
+    ]);
+    // c1 already read first value, so remaining has rest
+    expect(remaining1.length).toBeGreaterThanOrEqual(2);
+    // c2 should have all values
+    expect(values2).toEqual([
+      1,
+      2,
+      3,
+      4,
+    ]);
+  });
+});
+
+describe('reusable stream - error propagation', () => {
+  it('propagates source error to consumer', async () => {
+    let controllerRef: ReadableStreamDefaultController<number>;
+    const errorStream = new ReadableStream<number>({
+      start(controller) {
+        controllerRef = controller;
+        controller.enqueue(1);
+      },
+      pull() {
+        controllerRef!.error(new Error('source error'));
+      },
+    });
+    const rrs = new ReusableReadableStream(errorStream);
+    const consumer = rrs.createConsumer();
+    const first = await consumer.next();
+    expect(first.value).toBe(1);
+    await expect(consumer.next()).rejects.toThrow('source error');
+  });
+});
+
+describe('reusable stream - cancellation', () => {
+  it('cancel() stops all consumers', async () => {
+    const rrs = new ReusableReadableStream(
+      makeDelayedStream(
+        [
+          1,
+          2,
+          3,
+          4,
+          5,
+        ],
+        50,
+      ),
+    );
+    const c1 = rrs.createConsumer();
+    const first = await c1.next();
+    expect(first.done).toBe(false);
+    await rrs.cancel();
+    const next = await c1.next();
+    expect(next.done).toBe(true);
+  });
+
+  it('consumer.return() cancels that consumer only', async () => {
+    const rrs = new ReusableReadableStream(
+      makeStream([
+        1,
+        2,
+        3,
+      ]),
+    );
+    const c1 = rrs.createConsumer();
+    const c2 = rrs.createConsumer();
+    await c1.return!();
+    const result = await c1.next();
+    expect(result.done).toBe(true);
+    // c2 should still work
+    const values = await collect(c2);
+    expect(values).toEqual([
+      1,
+      2,
+      3,
+    ]);
+  });
+});
+
+describe('reusable stream - async iteration protocol', () => {
+  it('supports for-await-of loop', async () => {
+    const rrs = new ReusableReadableStream(
+      makeStream([
+        'a',
+        'b',
+        'c',
+      ]),
+    );
+    const values: string[] = [];
+    for await (const v of rrs.createConsumer()) {
+      values.push(v);
+    }
+    expect(values).toEqual([
+      'a',
+      'b',
+      'c',
+    ]);
+  });
+});
diff --git a/tests/behavior/stop-conditions-evaluation.test.ts b/tests/behavior/stop-conditions-evaluation.test.ts
new file mode 100644
index 0000000..8237938
--- /dev/null
+++ b/tests/behavior/stop-conditions-evaluation.test.ts
@@ -0,0 +1,104 @@
+import { describe, expect, it } from 'vitest';
+import { hasToolCall, isStopConditionMet, stepCountIs } from '../../src/lib/stop-conditions.js';
+import type { StepResult } from '../../src/lib/tool-types.js';
+
+function makeStep(overrides: Partial<StepResult> = {}): StepResult {
+  return {
+    stepType: 'initial',
+    text: '',
+    toolCalls: [],
+    toolResults: [],
+    response: {
+      id: 'r1',
+      output: [],
+      parallel_tool_calls: false,
+      status: 'completed',
+      usage: null,
+      error: null,
+      incomplete_details: null,
+      created_at: 0,
+    },
+    ...overrides,
+  };
+}
+
+describe('stop conditions - isStopConditionMet evaluation', () => {
+  it('returns true when any condition is true (OR logic)', async () => {
+    const steps = [
+      makeStep(),
+      makeStep(),
+      makeStep(),
+    ];
+    const result = await isStopConditionMet({
+      stopConditions: [
+        stepCountIs(5),
+        stepCountIs(2),
+      ],
+      steps,
+    });
+    expect(result).toBe(true);
+  });
+
+  it('returns false when all conditions are false', async () => {
+    const steps = [
+      makeStep(),
+    ];
+    const result = await isStopConditionMet({
+      stopConditions: [
+        stepCountIs(5),
+        hasToolCall('missing'),
+      ],
+      steps,
+    });
+    expect(result).toBe(false);
+  });
+
+  it('handles empty conditions array (returns false)', async () => {
+    const result = await isStopConditionMet({
+      stopConditions: [],
+      steps: [
+        makeStep(),
+      ],
+    });
+    expect(result).toBe(false);
+  });
+
+  it('handles async stop conditions', async () => {
+    const asyncCondition = async ({ steps }: { readonly steps: ReadonlyArray<StepResult> }) => {
+      await new Promise((resolve) => setTimeout(resolve, 1));
+      return steps.length >= 2;
+    };
+    const result = await isStopConditionMet({
+      stopConditions: [
+        asyncCondition,
+      ],
+      steps: [
+        makeStep(),
+        makeStep(),
+      ],
+    });
+    expect(result).toBe(true);
+  });
+
+  it('evaluates conditions in parallel', async () => {
+    const order: number[] = [];
+    const slow = async () => {
+      await new Promise((r) => setTimeout(r, 20));
+      order.push(1);
+      return false;
+    };
+    const fast = async () => {
+      await new Promise((r) => setTimeout(r, 1));
+      order.push(2);
+      return true;
+    };
+    const result = await isStopConditionMet({
+      stopConditions: [
+        slow,
+        fast,
+      ],
+      steps: [],
+    });
+    expect(result).toBe(true);
+  });
+});
diff --git a/tests/behavior/stream-type-guards-negative.test.ts b/tests/behavior/stream-type-guards-negative.test.ts
new file mode 100644
index 0000000..12cf1ae
--- /dev/null
+++ b/tests/behavior/stream-type-guards-negative.test.ts
@@ -0,0 +1,133 @@
+import { describe, expect, it } from 'vitest';
+import {
+  isFunctionCallArgumentsDeltaEvent,
+  isFunctionCallArgumentsDoneEvent,
+  isFunctionCallItem,
+  isOutputItemAddedEvent,
+  isOutputItemDoneEvent,
+  isOutputMessage,
+  isOutputTextDeltaEvent,
+  isOutputTextPart,
+  isReasoningDeltaEvent,
+  isReasoningOutputItem,
+  isRefusalPart,
+  isResponseCompletedEvent,
+  isResponseFailedEvent,
+  isResponseIncompleteEvent,
+} from '../../src/lib/stream-type-guards.js';
+
+describe('stream event type guards - negative cases (reject wrong type)', () => {
+  it('isOutputTextDeltaEvent rejects reasoning delta', () => {
+    expect(
+      isOutputTextDeltaEvent({
+        type: 'response.reasoning_text.delta',
+      } as any),
+    ).toBe(false);
+  });
+
+  it('isReasoningDeltaEvent rejects text delta', () => {
+    expect(
+      isReasoningDeltaEvent({
+        type: 'response.output_text.delta',
+      } as any),
+    ).toBe(false);
+  });
+
+  it('isFunctionCallArgumentsDeltaEvent rejects text delta', () => {
+    expect(
+      isFunctionCallArgumentsDeltaEvent({
+        type: 'response.output_text.delta',
+      } as any),
+    ).toBe(false);
+  });
+
+  it('isOutputItemAddedEvent rejects output_item.done', () => {
+    expect(
+      isOutputItemAddedEvent({
+        type: 'response.output_item.done',
+      } as any),
+    ).toBe(false);
+  });
+
+  it('isOutputItemDoneEvent rejects output_item.added', () => {
+    expect(
+      isOutputItemDoneEvent({
+        type: 'response.output_item.added',
+      } as any),
+    ).toBe(false);
+  });
+
+  it('isResponseCompletedEvent rejects response.failed', () => {
+    expect(
+      isResponseCompletedEvent({
+        type: 'response.failed',
+      } as any),
+    ).toBe(false);
+  });
+
+  it('isResponseFailedEvent rejects response.completed', () => {
+    expect(
+      isResponseFailedEvent({
+        type: 'response.completed',
+      } as any),
+    ).toBe(false);
+  });
+
+  it('isResponseIncompleteEvent rejects response.completed', () => {
+    expect(
+      isResponseIncompleteEvent({
+        type: 'response.completed',
+      } as any),
+    ).toBe(false);
+  });
+
+  it('isFunctionCallArgumentsDoneEvent rejects function_call_arguments.delta', () => {
+    expect(
+      isFunctionCallArgumentsDoneEvent({
+        type: 'response.function_call_arguments.delta',
+      } as any),
+    ).toBe(false);
+  });
+});
+
+describe('output item type guards - negative cases', () => {
+  it('isOutputMessage rejects function_call', () => {
+    expect(
+      isOutputMessage({
+        type: 'function_call',
+      }),
+    ).toBe(false);
+  });
+
+  it('isFunctionCallItem rejects message', () => {
+    expect(
+      isFunctionCallItem({
+        type: 'message',
+      }),
+    ).toBe(false);
+  });
+
+  it('isReasoningOutputItem rejects message', () => {
+    expect(
+      isReasoningOutputItem({
+        type: 'message',
+      }),
+    ).toBe(false);
+  });
+
+  it('isOutputTextPart rejects refusal', () => {
+    expect(
+      isOutputTextPart({
+        type: 'refusal',
+      }),
+    ).toBe(false);
+  });
+
+  it('isRefusalPart rejects output_text', () => {
+    expect(
+      isRefusalPart({
+        type: 'output_text',
+      }),
+    ).toBe(false);
+  });
+});
diff --git a/tests/behavior/tool-approval.test.ts b/tests/behavior/tool-approval.test.ts
new file mode 100644
index 0000000..0f4d309
--- /dev/null
+++ b/tests/behavior/tool-approval.test.ts
@@ -0,0 +1,314 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import { partitionToolCalls, toolRequiresApproval } from '../../src/lib/conversation-state.js';
+import { tool } from '../../src/lib/tool.js';
+import type { ParsedToolCall, Tool, TurnContext } from '../../src/lib/tool-types.js';
+import { hasApprovalRequiredTools, toolHasApprovalConfigured } from '../../src/lib/tool-types.js';
+
+const turnCtx: TurnContext = {
+  numberOfTurns: 1,
+};
+
+describe('tool approval - toolRequiresApproval', () => {
+  it('returns false when tool has no requireApproval', async () => {
+    const t = tool({
+      name: 'free',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'free',
+      arguments: {},
+    };
+    expect(
+      await toolRequiresApproval(
+        tc,
+        [
+          t,
+        ],
+        turnCtx,
+      ),
+    ).toBe(false);
+  });
+
+  it('returns true when tool has requireApproval: true', async () => {
+    const t = tool({
+      name: 'guarded',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'guarded',
+      arguments: {},
+    };
+    expect(
+      await toolRequiresApproval(
+        tc,
+        [
+          t,
+        ],
+        turnCtx,
+      ),
+    ).toBe(true);
+  });
+
+  it('returns false when tool has requireApproval: false', async () => {
+    const t = tool({
+      name: 'open',
+      inputSchema: z.object({}),
+      requireApproval: false,
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'open',
+      arguments: {},
+    };
+    expect(
+      await toolRequiresApproval(
+        tc,
+        [
+          t,
+        ],
+        turnCtx,
+      ),
+    ).toBe(false);
+  });
+
+  it('calls requireApproval function with args and context', async () => {
+    const t = tool({
+      name: 'conditional',
+      inputSchema: z.object({
+        dangerous: z.boolean(),
+      }),
+      requireApproval: (params) => params.dangerous,
+      execute: async () => ({}),
+    });
+    const tc1: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'conditional',
+      arguments: {
+        dangerous: true,
+      },
+    };
+    const tc2: ParsedToolCall<Tool> = {
+      id: 'c2',
+      name: 'conditional',
+      arguments: {
+        dangerous: false,
+      },
+    };
+    expect(
+      await toolRequiresApproval(
+        tc1,
+        [
+          t,
+        ],
+        turnCtx,
+      ),
+    ).toBe(true);
+    expect(
+      await toolRequiresApproval(
+        tc2,
+        [
+          t,
+        ],
+        turnCtx,
+      ),
+    ).toBe(false);
+  });
+
+  it('call-level check overrides tool-level setting', async () => {
+    const t = tool({
+      name: 'guarded',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'guarded',
+      arguments: {},
+    };
+    const callCheck = () => false;
+    expect(
+      await toolRequiresApproval(
+        tc,
+        [
+          t,
+        ],
+        turnCtx,
+        callCheck,
+      ),
+    ).toBe(false);
+  });
+
+  it('returns false for unknown tool name', async () => {
+    const t = tool({
+      name: 'known',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'unknown',
+      arguments: {},
+    };
+    expect(
+      await toolRequiresApproval(
+        tc,
+        [
+          t,
+        ],
+        turnCtx,
+      ),
+    ).toBe(false);
+  });
+});
+
+describe('tool approval - partitionToolCalls', () => {
+  it('separates tool calls into requiresApproval and autoExecute', async () => {
+    const guarded = tool({
+      name: 'guarded',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+    const free = tool({
+      name: 'free',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const tc1: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'guarded',
+      arguments: {},
+    };
+    const tc2: ParsedToolCall<Tool> = {
+      id: 'c2',
+      name: 'free',
+      arguments: {},
+    };
+    const result = await partitionToolCalls(
+      [
+        tc1,
+        tc2,
+      ],
+      [
+        guarded,
+        free,
+      ],
+      turnCtx,
+    );
+    expect(result.requiresApproval).toHaveLength(1);
+    expect(result.autoExecute).toHaveLength(1);
+    expect(result.requiresApproval[0]!.name).toBe('guarded');
+    expect(result.autoExecute[0]!.name).toBe('free');
+  });
+
+  it('all auto-execute when no tools require approval', async () => {
+    const free = tool({
+      name: 'free',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'free',
+      arguments: {},
+    };
+    const result = await partitionToolCalls(
+      [
+        tc,
+      ],
+      [
+        free,
+      ],
+      turnCtx,
+    );
+    expect(result.autoExecute).toHaveLength(1);
+    expect(result.requiresApproval).toHaveLength(0);
+  });
+
+  it('all require approval when all tools need it', async () => {
+    const guarded = tool({
+      name: 'g1',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'g1',
+      arguments: {},
+    };
+    const result = await partitionToolCalls(
+      [
+        tc,
+      ],
+      [
+        guarded,
+      ],
+      turnCtx,
+    );
+    expect(result.requiresApproval).toHaveLength(1);
+    expect(result.autoExecute).toHaveLength(0);
+  });
+});
+
+describe('tool approval - type-level utilities', () => {
+  it('toolHasApprovalConfigured returns true for tool with requireApproval', () => {
+    const t = tool({
+      name: 'g',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+    expect(toolHasApprovalConfigured(t)).toBe(true);
+  });
+
+  it('toolHasApprovalConfigured returns false for tool without requireApproval', () => {
+    const t = tool({
+      name: 'f',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    expect(toolHasApprovalConfigured(t)).toBe(false);
+  });
+
+  it('hasApprovalRequiredTools returns true when any tool needs approval', () => {
+    const t1 = tool({
+      name: 'f',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const t2 = tool({
+      name: 'g',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+    expect(
+      hasApprovalRequiredTools([
+        t1,
+        t2,
+      ]),
+    ).toBe(true);
+  });
+
+  it('hasApprovalRequiredTools returns false when no tools need approval', () => {
+    const t1 = tool({
+      name: 'f',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    expect(
+      hasApprovalRequiredTools([
+        t1,
+      ]),
+    ).toBe(false);
+  });
+});
diff --git a/tests/behavior/tool-context.test.ts b/tests/behavior/tool-context.test.ts
new file mode 100644
index 0000000..0f3fa28
--- /dev/null
+++ b/tests/behavior/tool-context.test.ts
@@ -0,0 +1,263 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import {
+  buildToolExecuteContext,
+  extractToolContext,
+  resolveContext,
+  ToolContextStore,
+} from '../../src/lib/tool-context.js';
+import type { TurnContext } from '../../src/lib/tool-types.js';
+
+const turnCtx: TurnContext = {
+  numberOfTurns: 1,
+};
+
+describe('ToolContextStore - basic operations', () => {
+  it('constructor initializes with given values', () => {
+    const store = new ToolContextStore({
+      weather: {
+        apiKey: '123',
+      },
+    });
+    expect(store.getToolContext('weather')).toEqual({
+      apiKey: '123',
+    });
+  });
+
+  it('getToolContext returns empty object for unknown tool', () => {
+    const store = new ToolContextStore();
+    expect(store.getToolContext('unknown')).toEqual({});
+  });
+
+  it('setToolContext sets tool context and notifies listeners', () => {
+    const store = new ToolContextStore();
+    const snapshots: any[] = [];
+    store.subscribe((s) => snapshots.push(s));
+    store.setToolContext('tool1', {
+      key: 'val',
+    });
+    expect(store.getToolContext('tool1')).toEqual({
+      key: 'val',
+    });
+    expect(snapshots).toHaveLength(1);
+  });
+
+  it('mergeToolContext merges partial values', () => {
+    const store = new ToolContextStore({
+      tool1: {
+        a: 1,
+        b: 2,
+      },
+    });
+    store.mergeToolContext('tool1', {
+      b: 99,
+      c: 3,
+    });
+    expect(store.getToolContext('tool1')).toEqual({
+      a: 1,
+      b: 99,
+      c: 3,
+    });
+  });
+
+  it('getSnapshot returns deep-shallow copy of all contexts', () => {
+    const store = new ToolContextStore({
+      a: {
+        x: 1,
+      },
+      b: {
+        y: 2,
+      },
+    });
+    const snapshot = store.getSnapshot();
+    expect(snapshot).toEqual({
+      a: {
+        x: 1,
+      },
+      b: {
+        y: 2,
+      },
+    });
+    snapshot.a!.x = 999;
+    expect(store.getToolContext('a')).toEqual({
+      x: 1,
+    });
+  });
+
+  it('subscribe returns unsubscribe function', () => {
+    const store = new ToolContextStore();
+    const calls: number[] = [];
+    const unsub = store.subscribe(() => calls.push(1));
+    store.setToolContext('t', {
+      v: 1,
+    });
+    expect(calls).toHaveLength(1);
+    unsub();
+    store.setToolContext('t', {
+      v: 2,
+    });
+    expect(calls).toHaveLength(1);
+  });
+});
+
+describe('buildToolExecuteContext', () => {
+  it('returns object with turnContext fields merged', () => {
+    const ctx = buildToolExecuteContext(turnCtx, undefined, 'myTool', undefined);
+    expect(ctx.numberOfTurns).toBe(1);
+  });
+
+  it('local getter reads from store for the tool name', () => {
+    const store = new ToolContextStore({
+      myTool: {
+        apiKey: 'abc',
+      },
+    });
+    const schema = z.object({
+      apiKey: z.string(),
+    });
+    const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', schema);
+    expect(ctx.local).toEqual({
+      apiKey: 'abc',
+    });
+  });
+
+  it('setContext merges partial values into store', () => {
+    const store = new ToolContextStore({
+      myTool: {
+        apiKey: 'abc',
+      },
+    });
+    const schema = z.object({
+      apiKey: z.string(),
+    });
+    const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', schema);
+    ctx.setContext({
+      apiKey: 'xyz',
+    });
+    expect(ctx.local).toEqual({
+      apiKey: 'xyz',
+    });
+  });
+
+  it('shared getter reads shared context from store', () => {
+    const store = new ToolContextStore({
+      shared: {
+        globalKey: 'val',
+      },
+    });
+    const sharedSchema = z.object({
+      globalKey: z.string(),
+    });
+    const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', undefined, sharedSchema);
+    expect(ctx.shared).toEqual({
+      globalKey: 'val',
+    });
+  });
+
+  it('setSharedContext updates shared context in store', () => {
+    const store = new ToolContextStore({
+      shared: {
+        globalKey: 'old',
+      },
+    });
+    const sharedSchema = z.object({
+      globalKey: z.string(),
+    });
+    const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', undefined, sharedSchema);
+    ctx.setSharedContext({
+      globalKey: 'new',
+    });
+    expect(ctx.shared).toEqual({
+      globalKey: 'new',
+    });
+  });
+
+  it('local getter returns frozen object', () => {
+    const store = new ToolContextStore({
+      myTool: {
+        val: 1,
+      },
+    });
+    const schema = z.object({
+      val: z.number(),
+    });
+    const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', schema);
+    expect(Object.isFrozen(ctx.local)).toBe(true);
+  });
+});
+
+describe('resolveContext', () => {
+  it('returns empty object when input is undefined', async () => {
+    const result = await resolveContext(undefined, turnCtx);
+    expect(result).toEqual({});
+  });
+
+  it('returns static value as-is', async () => {
+    const input = {
+      myTool: {
+        apiKey: '123',
+      },
+    };
+    const result = await resolveContext(input, turnCtx);
+    expect(result).toEqual({
+      myTool: {
+        apiKey: '123',
+      },
+    });
+  });
+
+  it('calls sync function with turnContext and returns result', async () => {
+    const fn = (ctx: TurnContext) => ({
+      tool: {
+        turn: ctx.numberOfTurns,
+      },
+    });
+    const result = await resolveContext(fn, turnCtx);
+    expect(result).toEqual({
+      tool: {
+        turn: 1,
+      },
+    });
+  });
+
+  it('calls async function with turnContext and returns result', async () => {
+    const fn = async (ctx: TurnContext) => ({
+      tool: {
+        turn: ctx.numberOfTurns * 2,
+      },
+    });
+    const result = await resolveContext(fn, turnCtx);
+    expect(result).toEqual({
+      tool: {
+        turn: 2,
+      },
+    });
+  });
+});
+
+describe('extractToolContext', () => {
+  it('extracts and validates context for tool', () => {
+    const store = new ToolContextStore({
+      myTool: {
+        apiKey: 'abc',
+      },
+    });
+    const schema = z.object({
+      apiKey: z.string(),
+    });
+    const result = extractToolContext(store, 'myTool', schema);
+    expect(result).toEqual({
+      apiKey: 'abc',
+    });
+  });
+
+  it('returns empty object when no schema provided', () => {
+    const store = new ToolContextStore({
+      myTool: {
+        apiKey: 'abc',
+      },
+    });
+    const result = extractToolContext(store, 'myTool', undefined);
+    expect(result).toEqual({});
+  });
+});
diff --git a/tests/behavior/tool-creation.test.ts b/tests/behavior/tool-creation.test.ts
new file mode 100644
index 0000000..d8ecc10
--- /dev/null
+++ b/tests/behavior/tool-creation.test.ts
@@ -0,0 +1,149 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import { tool } from '../../src/lib/tool.js';
+import { ToolType } from '../../src/lib/tool-types.js';
+
+// Tests 1-9: Tool creation via tool() factory
+
+describe('tool creation - tool() factory', () => {
+  it('regular tool returns full shape: type, name, inputSchema, execute, description, outputSchema', () => {
+    const t = tool({
+      name: 'greet',
+      description: 'Say hello',
+      inputSchema: z.object({
+        name: z.string(),
+      }),
+      outputSchema: z.object({
+        greeting: z.string(),
+      }),
+      execute: async (params) => ({
+        greeting: `Hi ${params.name}`,
+      }),
+    });
+
+    expect(t.type).toBe(ToolType.Function);
+    expect(t.function.name).toBe('greet');
+    expect(t.function.description).toBe('Say hello');
+    expect(t.function.inputSchema).toBeDefined();
+    expect(t.function.outputSchema).toBeDefined();
+    expect(t.function.execute).toBeTypeOf('function');
+  });
+
+  it('generator tool with eventSchema returns tool with eventSchema + outputSchema + execute', () => {
+    const t = tool({
+      name: 'stream_tool',
+      inputSchema: z.object({
+        query: z.string(),
+      }),
+      eventSchema: z.object({
+        progress: z.number(),
+      }),
+      outputSchema: z.object({
+        result: z.string(),
+      }),
+      execute: async function* () {
+        yield {
+          progress: 50,
+        };
+        return {
+          result: 'done',
+        };
+      },
+    });
+
+    expect(t.type).toBe(ToolType.Function);
+    expect(t.function.name).toBe('stream_tool');
+    expect(t.function.eventSchema).toBeDefined();
+    expect(t.function.outputSchema).toBeDefined();
+    expect(t.function.execute).toBeTypeOf('function');
+  });
+
+  it('manual tool (execute: false) returns tool with no execute, no outputSchema, no eventSchema', () => {
+    const t = tool({
+      name: 'manual',
+      description: 'Needs manual handling',
+      inputSchema: z.object({
+        action: z.string(),
+      }),
+      execute: false,
+    });
+
+    expect(t.type).toBe(ToolType.Function);
+    expect(t.function.name).toBe('manual');
+    expect(t.function).not.toHaveProperty('execute');
+    expect(t.function).not.toHaveProperty('eventSchema');
+  });
+
+  it('tool with contextSchema preserves schema on function.contextSchema', () => {
+    const ctxSchema = z.object({
+      apiKey: z.string(),
+    });
+    const t = tool({
+      name: 'ctx_tool',
+      inputSchema: z.object({}),
+      contextSchema: ctxSchema,
+      execute: async () => ({}),
+    });
+
+    expect(t.function.contextSchema).toBe(ctxSchema);
+  });
+
+  it('tool with requireApproval: true preserves flag on function', () => {
+    const t = tool({
+      name: 'approval_tool',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+
+    expect(t.function.requireApproval).toBe(true);
+  });
+
+  it('tool with requireApproval function preserves function on function', () => {
+    const check = () => true;
+    const t = tool({
+      name: 'fn_approval',
+      inputSchema: z.object({}),
+      requireApproval: check,
+      execute: async () => ({}),
+    });
+
+    expect(t.function.requireApproval).toBe(check);
+  });
+
+  it('tool with nextTurnParams preserves them on function', () => {
+    const ntp = {
+      temperature: () => 0.5 as number | null,
+    };
+    const t = tool({
+      name: 'ntp_tool',
+      inputSchema: z.object({}),
+      nextTurnParams: ntp,
+      execute: async () => ({}),
+    });
+
+    expect(t.function.nextTurnParams).toBeDefined();
+  });
+
+  it('tool named "shared" throws (reserved for shared context)', () => {
+    expect(() =>
+      tool({
+        name: 'shared',
+        inputSchema: z.object({}),
+        execute: async () => ({}),
+      }),
+    ).toThrow(/reserved/i);
+  });
+
+  it('tool with no description has description absent from function object', () => {
+    const t = tool({
+      name: 'no_desc',
+      inputSchema: z.object({
+        x: z.number(),
+      }),
+      execute: async () => ({}),
+    });
+
+    expect(t.function.description).toBeUndefined();
+  });
+});
diff --git a/tests/behavior/tool-event-broadcaster.test.ts b/tests/behavior/tool-event-broadcaster.test.ts
new file mode 100644
index 0000000..b32acc0
--- /dev/null
+++ b/tests/behavior/tool-event-broadcaster.test.ts
@@ -0,0 +1,131 @@
+import { describe, expect, it } from 'vitest';
+import { ToolEventBroadcaster } from '../../src/lib/tool-event-broadcaster.js';
+
+async function collect<T>(iter: AsyncIterableIterator<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const v of iter) {
+    result.push(v);
+  }
+  return result;
+}
+
+describe('ToolEventBroadcaster - single consumer', () => {
+  it('consumer receives all pushed events after complete', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    broadcaster.push(1);
+    broadcaster.push(2);
+    broadcaster.push(3);
+    broadcaster.complete();
+    const consumer = broadcaster.createConsumer();
+    const values = await collect(consumer);
+    expect(values).toEqual([
+      1,
+      2,
+      3,
+    ]);
+  });
+
+  it('consumer receives events pushed before and after creation', async () => {
+    const broadcaster = new ToolEventBroadcaster<string>();
+    broadcaster.push('before');
+    const consumer = broadcaster.createConsumer();
+    broadcaster.push('after');
+    broadcaster.complete();
+    const values = await collect(consumer);
+    expect(values).toEqual([
+      'before',
+      'after',
+    ]);
+  });
+
+  it('empty broadcaster yields no values', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    broadcaster.complete();
+    const consumer = broadcaster.createConsumer();
+    const values = await collect(consumer);
+    expect(values).toEqual([]);
+  });
+});
+
+describe('ToolEventBroadcaster - multiple consumers', () => {
+  it('two consumers independently receive all events', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    const c1 = broadcaster.createConsumer();
+    const c2 = broadcaster.createConsumer();
+    broadcaster.push(10);
+    broadcaster.push(20);
+    broadcaster.complete();
+    const [v1, v2] = await Promise.all([
+      collect(c1),
+      collect(c2),
+    ]);
+    expect(v1).toEqual([
+      10,
+      20,
+    ]);
+    expect(v2).toEqual([
+      10,
+      20,
+    ]);
+  });
+});
+
+describe('ToolEventBroadcaster - error handling', () => {
+  it('complete(error) propagates error to consumer', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    broadcaster.push(1);
+    const consumer = broadcaster.createConsumer();
+    const first = await consumer.next();
+    expect(first.value).toBe(1);
+    broadcaster.complete(new Error('test error'));
+    await expect(consumer.next()).rejects.toThrow('test error');
+  });
+});
+
+describe('ToolEventBroadcaster - cancellation', () => {
+  it('consumer.return() cancels the consumer', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    broadcaster.push(1);
+    const consumer = broadcaster.createConsumer();
+    await consumer.return!();
+    const result = await consumer.next();
+    expect(result.done).toBe(true);
+  });
+
+  it('consumer.throw() cancels the consumer and throws', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    const consumer = broadcaster.createConsumer();
+    await expect(consumer.throw!(new Error('abort'))).rejects.toThrow('abort');
+  });
+});
+
+describe('ToolEventBroadcaster - push after complete is ignored', () => {
+  it('events pushed after complete are not delivered', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    broadcaster.push(1);
+    broadcaster.complete();
+    broadcaster.push(2);
+    const consumer = broadcaster.createConsumer();
+    const values = await collect(consumer);
+    expect(values).toEqual([
+      1,
+    ]);
+  });
+});
+
+describe('ToolEventBroadcaster - async iteration protocol', () => {
+  it('supports for-await-of loop', async () => {
+    const broadcaster = new ToolEventBroadcaster<string>();
+    broadcaster.push('a');
+    broadcaster.push('b');
+    broadcaster.complete();
+    const values: string[] = [];
+    for await (const v of broadcaster.createConsumer()) {
+      values.push(v);
+    }
+    expect(values).toEqual([
+      'a',
+      'b',
+    ]);
+  });
+});
diff --git a/tests/behavior/tool-execution.test.ts b/tests/behavior/tool-execution.test.ts
new file mode 100644
index 0000000..b5b7ef3
--- /dev/null
+++ b/tests/behavior/tool-execution.test.ts
@@ -0,0 +1,576 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import { tool } from '../../src/lib/tool.js';
+import {
+  convertToolsToAPIFormat,
+  convertZodToJsonSchema,
+  executeGeneratorTool,
+  executeRegularTool,
+  executeTool,
+  findToolByName,
+  formatToolExecutionError,
+  formatToolResultForModel,
+  parseToolCallArguments,
+  sanitizeJsonSchema,
+  validateToolInput,
+  validateToolOutput,
+} from '../../src/lib/tool-executor.js';
+import type { ParsedToolCall, Tool, TurnContext } from '../../src/lib/tool-types.js';
+
+const turnCtx: TurnContext = {
+  numberOfTurns: 1,
+};
+
+describe('tool execution - input validation', () => {
+  const schema = z.object({
+    name: z.string(),
+    age: z.number(),
+  });
+
+  it('validateToolInput with valid args returns validated data', () => {
+    const result = validateToolInput(schema, {
+      name: 'Alice',
+      age: 30,
+    });
+    expect(result).toEqual({
+      name: 'Alice',
+      age: 30,
+    });
+  });
+
+  it('validateToolInput with invalid args throws ZodError', () => {
+    expect(() =>
+      validateToolInput(schema, {
+        name: 123,
+      }),
+    ).toThrow();
+  });
+
+  it('validateToolOutput with valid result returns validated data', () => {
+    const outSchema = z.object({
+      sum: z.number(),
+    });
+    const result = validateToolOutput(outSchema, {
+      sum: 42,
+    });
+    expect(result).toEqual({
+      sum: 42,
+    });
+  });
+
+  it('validateToolOutput with invalid result throws ZodError', () => {
+    const outSchema = z.object({
+      sum: z.number(),
+    });
+    expect(() =>
+      validateToolOutput(outSchema, {
+        sum: 'not a number',
+      }),
+    ).toThrow();
+  });
+});
+
+describe('tool execution - argument parsing', () => {
+  it('parseToolCallArguments with valid JSON returns parsed object', () => {
+    expect(parseToolCallArguments('{"a":1}')).toEqual({
+      a: 1,
+    });
+  });
+
+  it('parseToolCallArguments with empty string returns empty object', () => {
+    expect(parseToolCallArguments('')).toEqual({});
+  });
+
+  it('parseToolCallArguments with whitespace-only string returns empty object', () => {
+    expect(parseToolCallArguments('   ')).toEqual({});
+  });
+
+  it('parseToolCallArguments with invalid JSON throws descriptive error', () => {
+    expect(() => parseToolCallArguments('bad json')).toThrow(/failed to parse/i);
+  });
+});
+
+describe('tool execution - executeRegularTool', () => {
+  it('executes and returns { toolCallId, toolName, result }', async () => {
+    const t = tool({
+      name: 'add',
+      inputSchema: z.object({
+        a: z.number(),
+        b: z.number(),
+      }),
+      execute: async (params) => ({
+        sum: params.a + params.b,
+      }),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_1',
+      name: 'add',
+      arguments: {
+        a: 2,
+        b: 3,
+      },
+    };
+    const result = await executeRegularTool(t, tc, turnCtx);
+    expect(result.toolCallId).toBe('call_1');
+    expect(result.toolName).toBe('add');
+    expect(result.result).toEqual({
+      sum: 5,
+    });
+    expect(result.error).toBeUndefined();
+  });
+
+  it('returns error when input validation fails', async () => {
+    const t = tool({
+      name: 'strict',
+      inputSchema: z.object({
+        x: z.number(),
+      }),
+      execute: async () => ({
+        ok: true,
+      }),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_2',
+      name: 'strict',
+      arguments: {
+        x: 'not_num',
+      },
+    };
+    const result = await executeRegularTool(t, tc, turnCtx);
+    expect(result.error).toBeDefined();
+    expect(result.result).toBeNull();
+  });
+
+  it('validates output when outputSchema provided', async () => {
+    const t = tool({
+      name: 'typed_out',
+      inputSchema: z.object({}),
+      outputSchema: z.object({
+        value: z.number(),
+      }),
+      execute: async () => ({
+        value: 42,
+      }),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_3',
+      name: 'typed_out',
+      arguments: {},
+    };
+    const result = await executeRegularTool(t, tc, turnCtx);
+    expect(result.result).toEqual({
+      value: 42,
+    });
+  });
+
+  it('returns raw result when no outputSchema', async () => {
+    const t = tool({
+      name: 'raw_out',
+      inputSchema: z.object({}),
+      execute: async () => ({
+        anything: 'goes',
+      }),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_4',
+      name: 'raw_out',
+      arguments: {},
+    };
+    const result = await executeRegularTool(t, tc, turnCtx);
+    expect(result.result).toEqual({
+      anything: 'goes',
+    });
+  });
+
+  it('catches thrown error and returns { error, result: null }', async () => {
+    const t = tool({
+      name: 'failing',
+      inputSchema: z.object({}),
+      execute: async () => {
+        throw new Error('boom');
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_5',
+      name: 'failing',
+      arguments: {},
+    };
+    const result = await executeRegularTool(t, tc, turnCtx);
+    expect(result.error).toBeDefined();
+    expect(result.error!.message).toBe('boom');
+    expect(result.result).toBeNull();
+  });
+});
+
+describe('tool execution - executeGeneratorTool', () => {
+  it('yields events then returns final result with preliminaryResults', async () => {
+    const t = tool({
+      name: 'gen',
+      inputSchema: z.object({}),
+      eventSchema: z.object({
+        progress: z.number(),
+      }),
+      outputSchema: z.object({
+        result: z.string(),
+      }),
+      execute: async function* () {
+        yield {
+          progress: 50,
+        };
+        yield {
+          progress: 100,
+        };
+        return {
+          result: 'done',
+        };
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_6',
+      name: 'gen',
+      arguments: {},
+    };
+    const result = await executeGeneratorTool(t, tc, turnCtx);
+    expect(result.result).toEqual({
+      result: 'done',
+    });
+    expect(result.preliminaryResults).toHaveLength(2);
+  });
+
+  it('calls onPreliminaryResult for each yielded event', async () => {
+    const events: unknown[] = [];
+    const t = tool({
+      name: 'gen_cb',
+      inputSchema: z.object({}),
+      eventSchema: z.object({
+        step: z.number(),
+      }),
+      outputSchema: z.object({
+        done: z.boolean(),
+      }),
+      execute: async function* () {
+        yield {
+          step: 1,
+        };
+        yield {
+          step: 2,
+        };
+        return {
+          done: true,
+        };
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_7',
+      name: 'gen_cb',
+      arguments: {},
+    };
+    await executeGeneratorTool(t, tc, turnCtx, (_id, ev) => events.push(ev));
+    expect(events).toHaveLength(2);
+    expect(events[0]).toEqual({
+      step: 1,
+    });
+  });
+
+  it('returns final result with empty preliminaryResults when only return value', async () => {
+    const t = tool({
+      name: 'gen_ret',
+      inputSchema: z.object({}),
+      eventSchema: z.object({
+        ev: z.string(),
+      }),
+      outputSchema: z.object({
+        val: z.number(),
+      }),
+      execute: async function* () {
+        return {
+          val: 42,
+        };
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_8',
+      name: 'gen_ret',
+      arguments: {},
+    };
+    const result = await executeGeneratorTool(t, tc, turnCtx);
+    expect(result.result).toEqual({
+      val: 42,
+    });
+    expect(result.preliminaryResults).toHaveLength(0);
+  });
+
+  it('returns error when generator throws', async () => {
+    const t = tool({
+      name: 'gen_err',
+      inputSchema: z.object({}),
+      eventSchema: z.object({
+        ev: z.string(),
+      }),
+      outputSchema: z.object({
+        val: z.number(),
+      }),
+      execute: async function* () {
+        throw new Error('gen boom');
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_9',
+      name: 'gen_err',
+      arguments: {},
+    };
+    const result = await executeGeneratorTool(t, tc, turnCtx);
+    expect(result.error).toBeDefined();
+    expect(result.error!.message).toBe('gen boom');
+  });
+
+  it('returns error when generator emits nothing', async () => {
+    const t = tool({
+      name: 'gen_empty',
+      inputSchema: z.object({}),
+      eventSchema: z.object({
+        ev: z.string(),
+      }),
+      outputSchema: z.object({
+        val: z.number(),
+      }),
+      execute: async function* () {
+        // yields nothing, returns nothing
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_10',
+      name: 'gen_empty',
+      arguments: {},
+    };
+    const result = await executeGeneratorTool(t, tc, turnCtx);
+    expect(result.error).toBeDefined();
+    expect(result.error!.message).toContain('without emitting');
+  });
+});
+
+describe('tool execution - executeTool dispatch', () => {
+  it('dispatches regular tool to executeRegularTool', async () => {
+    const t = tool({
+      name: 'reg',
+      inputSchema: z.object({
+        x: z.number(),
+      }),
+      execute: async (p) => ({
+        doubled: p.x * 2,
+      }),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'reg',
+      arguments: {
+        x: 5,
+      },
+    };
+    const result = await executeTool(t, tc, turnCtx);
+    expect(result.result).toEqual({
+      doubled: 10,
+    });
+  });
+
+  it('dispatches generator tool to executeGeneratorTool', async () => {
+    const t = tool({
+      name: 'gen',
+      inputSchema: z.object({}),
+      eventSchema: z.object({
+        ev: z.number(),
+      }),
+      outputSchema: z.object({
+        done: z.boolean(),
+      }),
+      execute: async function* () {
+        yield {
+          ev: 1,
+        };
+        return {
+          done: true,
+        };
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c2',
+      name: 'gen',
+      arguments: {},
+    };
+    const result = await executeTool(t, tc, turnCtx);
+    expect(result.result).toEqual({
+      done: true,
+    });
+    expect(result.preliminaryResults).toHaveLength(1);
+  });
+
+  it('throws for manual tool (no execute function)', async () => {
+    const t = tool({
+      name: 'manual',
+      inputSchema: z.object({}),
+      execute: false,
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c3',
+      name: 'manual',
+      arguments: {},
+    };
+    await expect(executeTool(t, tc, turnCtx)).rejects.toThrow(/no execute function/i);
+  });
+});
+
+describe('tool execution - utility functions', () => {
+  it('findToolByName returns matching tool', () => {
+    const t = tool({
+      name: 'x',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    expect(
+      findToolByName(
+        [
+          t,
+        ],
+        'x',
+      ),
+    ).toBe(t);
+  });
+
+  it('findToolByName returns undefined for missing tool', () => {
+    const t = tool({
+      name: 'x',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    expect(
+      findToolByName(
+        [
+          t,
+        ],
+        'missing',
+      ),
+    ).toBeUndefined();
+  });
+
+  it('formatToolResultForModel with success returns JSON of result', () => {
+    const json = formatToolResultForModel({
+      toolCallId: 'c1',
+      toolName: 'test',
+      result: {
+        data: 42,
+      },
+    });
+    expect(JSON.parse(json)).toEqual({
+      data: 42,
+    });
+  });
+
+  it('formatToolResultForModel with error returns JSON with error message', () => {
+    const json = formatToolResultForModel({
+      toolCallId: 'c2',
+      toolName: 'test',
+      result: null,
+      error: new Error('fail'),
+    });
+    const parsed = JSON.parse(json);
+    expect(parsed.error).toBe('fail');
+    expect(parsed.toolName).toBe('test');
+  });
+
+  it('formatToolExecutionError with ZodError includes validation details', () => {
+    try {
+      z.parse(
+        z.object({
+          x: z.number(),
+        }),
+        {
+          x: 'bad',
+        },
+      );
+    } catch (e) {
+      const tc: ParsedToolCall<Tool> = {
+        id: 'c3',
+        name: 'myTool',
+        arguments: {},
+      };
+      const msg = formatToolExecutionError(e as Error, tc);
+      expect(msg).toContain('myTool');
+      expect(msg).toContain('validation error');
+    }
+  });
+
+  it('formatToolExecutionError with generic Error includes message', () => {
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c4',
+      name: 'myTool',
+      arguments: {},
+    };
+    const msg = formatToolExecutionError(new Error('something went wrong'), tc);
+    expect(msg).toContain('myTool');
+    expect(msg).toContain('something went wrong');
+  });
+
+  it('convertToolsToAPIFormat returns correct API shape array', () => {
+    const t = tool({
+      name: 'api_tool',
+      description: 'Does stuff',
+      inputSchema: z.object({
+        x: z.number(),
+      }),
+      execute: async () => ({}),
+    });
+    const apiTools = convertToolsToAPIFormat([
+      t,
+    ]);
+    expect(apiTools).toHaveLength(1);
+    expect(apiTools[0]!.type).toBe('function');
+    expect(apiTools[0]!.name).toBe('api_tool');
+    expect(apiTools[0]!.description).toBe('Does stuff');
+    expect(apiTools[0]!.parameters).toBeDefined();
+  });
+
+  it('convertZodToJsonSchema produces valid JSON schema from Zod', () => {
+    const schema = z.object({
+      x: z.number(),
+      y: z.string(),
+    });
+    const jsonSchema = convertZodToJsonSchema(schema);
+    expect(jsonSchema).toHaveProperty('type', 'object');
+    expect(jsonSchema).toHaveProperty('properties');
+  });
+
+  it('sanitizeJsonSchema removes ~prefixed keys recursively', () => {
+    const input = {
+      type: 'object',
+      '~standard': {
+        meta: true,
+      },
+      properties: {
+        x: {
+          type: 'number',
+          '~standard': {},
+        },
+      },
+    };
+    const result = sanitizeJsonSchema(input);
+    expect(result).not.toHaveProperty('~standard');
+    expect((result as Record<string, unknown>).type).toBe('object');
+  });
+
+  it('sanitizeJsonSchema handles primitives, null, arrays', () => {
+    expect(sanitizeJsonSchema(null)).toBeNull();
+    expect(sanitizeJsonSchema(42)).toBe(42);
+    expect(
+      sanitizeJsonSchema([
+        {
+          '~meta': 1,
+          val: 2,
+        },
+      ]),
+    ).toEqual([
+      {
+        val: 2,
+      },
+    ]);
+  });
+});
diff --git a/tests/behavior/tool-orchestrator.test.ts b/tests/behavior/tool-orchestrator.test.ts
new file mode 100644
index 0000000..46ef1c7
--- /dev/null
+++ b/tests/behavior/tool-orchestrator.test.ts
@@ -0,0 +1,118 @@
+import { describe, expect, it } from 'vitest';
+import {
+  getToolExecutionErrors,
+  hasToolExecutionErrors,
+  summarizeToolExecutions,
+  toolResultsToMap,
+} from '../../src/lib/tool-orchestrator.js';
+import type { Tool, ToolExecutionResult } from '../../src/lib/tool-types.js';
+
+function makeResult(overrides: Partial<ToolExecutionResult<Tool>>): ToolExecutionResult<Tool> {
+  return {
+    toolCallId: 'c1',
+    toolName: 'test',
+    result: {
+      ok: true,
+    },
+    ...overrides,
+  };
+}
+
+describe('tool orchestrator - toolResultsToMap', () => {
+  it('converts results array to map keyed by toolCallId', () => {
+    const results = [
+      makeResult({
+        toolCallId: 'c1',
+        result: 'a',
+      }),
+      makeResult({
+        toolCallId: 'c2',
+        result: 'b',
+      }),
+    ];
+    const map = toolResultsToMap(results);
+    expect(map.size).toBe(2);
+    expect(map.get('c1')!.result).toBe('a');
+    expect(map.get('c2')!.result).toBe('b');
+  });
+
+  it('includes preliminaryResults in map entries', () => {
+    const results = [
+      makeResult({
+        toolCallId: 'c1',
+        result: 'final',
+        preliminaryResults: [
+          'p1',
+          'p2',
+        ] as any,
+      }),
+    ];
+    const map = toolResultsToMap(results);
+    expect(map.get('c1')!.preliminaryResults).toEqual([
+      'p1',
+      'p2',
+    ]);
+  });
+});
+
+describe('tool orchestrator - summarizeToolExecutions', () => {
+  it('produces success line for successful result', () => {
+    const summary = summarizeToolExecutions([
+      makeResult({
+        toolCallId: 'c1',
+        toolName: 'add',
+      }),
+    ]);
+    expect(summary).toContain('add');
+    expect(summary).toContain('c1');
+  });
+
+  it('produces error line for failed result', () => {
+    const summary = summarizeToolExecutions([
+      makeResult({
+        toolCallId: 'c2',
+        toolName: 'fail',
+        result: null,
+        error: new Error('oops'),
+      }),
+    ]);
+    expect(summary).toContain('fail');
+    expect(summary).toContain('oops');
+  });
+});
+
+describe('tool orchestrator - hasToolExecutionErrors', () => {
+  it('returns true when any result has error', () => {
+    expect(
+      hasToolExecutionErrors([
+        makeResult({}),
+        makeResult({
+          error: new Error('err'),
+        }),
+      ]),
+    ).toBe(true);
+  });
+
+  it('returns false when no results have errors', () => {
+    expect(
+      hasToolExecutionErrors([
+        makeResult({}),
+      ]),
+    ).toBe(false);
+  });
+});
+
+describe('tool orchestrator - getToolExecutionErrors', () => {
+  it('extracts all error objects from results', () => {
+    const err = new Error('err1');
+    const results = [
+      makeResult({}),
+      makeResult({
+        error: err,
+      }),
+    ];
+    const errors = getToolExecutionErrors(results);
+    expect(errors).toHaveLength(1);
+    expect(errors[0]).toBe(err);
+  });
+});
diff --git a/tests/behavior/tool-types-events.test.ts b/tests/behavior/tool-types-events.test.ts
new file mode 100644
index 0000000..f8e44cd
--- /dev/null
+++ b/tests/behavior/tool-types-events.test.ts
@@ -0,0 +1,94 @@
+import { describe, expect, it } from 'vitest';
+import {
+  isToolCallOutputEvent,
+  isToolPreliminaryResultEvent,
+  isToolResultEvent,
+  isTurnEndEvent,
+  isTurnStartEvent,
+} from '../../src/lib/tool-types.js';
+
+describe('tool-types event type guards', () => {
+  it('isToolPreliminaryResultEvent matches tool.preliminary_result', () => {
+    expect(
+      isToolPreliminaryResultEvent({
+        type: 'tool.preliminary_result',
+        toolCallId: 'c1',
+        result: {},
+        timestamp: 0,
+      }),
+    ).toBe(true);
+  });
+
+  it('isToolPreliminaryResultEvent rejects tool.result', () => {
+    expect(
+      isToolPreliminaryResultEvent({
+        type: 'tool.result',
+        toolCallId: 'c1',
+        result: {},
+        timestamp: 0,
+      }),
+    ).toBe(false);
+  });
+
+  it('isToolResultEvent matches tool.result', () => {
+    expect(
+      isToolResultEvent({
+        type: 'tool.result',
+        toolCallId: 'c1',
+        result: {},
+        timestamp: 0,
+      }),
+    ).toBe(true);
+  });
+
+  it('isToolResultEvent rejects tool.preliminary_result', () => {
+    expect(
+      isToolResultEvent({
+        type: 'tool.preliminary_result',
+        toolCallId: 'c1',
+        result: {},
+        timestamp: 0,
+      }),
+    ).toBe(false);
+  });
+
+  it('isToolCallOutputEvent matches tool.call_output', () => {
+    expect(
+      isToolCallOutputEvent({
+        type: 'tool.call_output',
+        output: {},
+        timestamp: 0,
+      }),
+    ).toBe(true);
+  });
+
+  it('isTurnStartEvent matches turn.start', () => {
+    expect(
+      isTurnStartEvent({
+        type: 'turn.start',
+        turnNumber: 1,
+        timestamp: 0,
+      }),
+    ).toBe(true);
+  });
+
+  it('isTurnEndEvent matches turn.end', () => {
+    expect(
+      isTurnEndEvent({
+        type: 'turn.end',
+        turnNumber: 1,
+        timestamp: 0,
+      }),
+    ).toBe(true);
+  });
+
+  it('isTurnStartEvent rejects turn.end', () => {
+    expect(
+      isTurnStartEvent({
+        type: 'turn.end',
+        turnNumber: 1,
+        timestamp: 0,
+      }),
+    ).toBe(false);
+  });
+});
diff --git a/tests/behavior/turn-context.test.ts b/tests/behavior/turn-context.test.ts
new file mode 100644
index 0000000..2ae2724
--- /dev/null
+++ b/tests/behavior/turn-context.test.ts
@@ -0,0 +1,67 @@
+import { describe, expect, it } from 'vitest';
+import { buildTurnContext, normalizeInputToArray } from '../../src/lib/turn-context.js';
+
+describe('turn context - buildTurnContext', () => {
+  it('sets numberOfTurns from options', () => {
+    const ctx = buildTurnContext({
+      numberOfTurns: 3,
+    });
+    expect(ctx.numberOfTurns).toBe(3);
+  });
+
+  it('includes toolCall when provided', () => {
+    const toolCall = {
+      type: 'function_call' as const,
+      callId: 'c1',
+      name: 'test',
+      arguments: '{}',
+      id: 'c1',
+      status: 'completed' as const,
+    };
+    const ctx = buildTurnContext({
+      numberOfTurns: 1,
+      toolCall,
+    });
+    expect(ctx.toolCall).toBe(toolCall);
+  });
+
+  it('includes turnRequest when provided', () => {
+    const request = {
+      model: 'gpt-4',
+      input: 'hello',
+    } as any;
+    const ctx = buildTurnContext({
+      numberOfTurns: 1,
+      turnRequest: request,
+    });
+    expect(ctx.turnRequest).toBe(request);
+  });
+
+  it('omits toolCall and turnRequest when not provided', () => {
+    const ctx = buildTurnContext({
+      numberOfTurns: 0,
+    });
+    expect(ctx).not.toHaveProperty('toolCall');
+    expect(ctx).not.toHaveProperty('turnRequest');
+  });
+});
+
+describe('turn context - normalizeInputToArray', () => {
+  it('converts string input to array with user message', () => {
+    const result = normalizeInputToArray('Hello!');
+    expect(result).toHaveLength(1);
+    expect(result[0]).toHaveProperty('role', 'user');
+    expect(result[0]).toHaveProperty('content', 'Hello!');
+  });
+
+  it('returns array input as-is', () => {
+    const input = [
+      {
+        role: 'user' as const,
+        content: 'hi',
+      },
+    ];
+    const result = normalizeInputToArray(input);
+    expect(result).toBe(input);
+  });
+});
diff --git a/tests/boundaries/README.md b/tests/boundaries/README.md
new file mode 100644
index 0000000..02e8fb4
--- /dev/null
+++ b/tests/boundaries/README.md
@@ -0,0 +1,26 @@
+# Boundaries Tests
+
+Tests in this folder verify that **similar capabilities are correctly bounded from each other**. "This one handles X; that one handles Y; they don't bleed." Each guard, classifier, or extractor must accept its own domain and reject its peers.
+
+## What belongs here
+
+- Mutual exclusion between type guards (stream event guards, output item guards, content part guards)
+- Cross-domain rejection (stream guards reject output items and vice versa)
+- Tool type classifier mutual exclusion (regular vs generator vs manual)
+- Null/undefined/malformed input safety for all guards
+- Structural distinction between similar output shapes
+- Extendable: when new guards or classifiers are added, their mutual exclusion tests go here
+
+## Examples
+
+- `isOutputTextDeltaEvent` returns true for text delta, false for reasoning delta
+- `isOutputMessage` rejects a `TextDeltaEvent` (wrong domain)
+- Regular tool satisfies `isRegularExecuteTool` but NOT `isGeneratorTool`
+- `isToolPreliminaryResultEvent` rejects `{ type: 'tool.result' }`
+- `createUnsentResult` shape vs `createRejectedResult` shape
+
+## What does NOT belong here
+
+- Testing what a guard does with valid input in detail (→ `behavior/`)
+- Testing that guard output feeds a transformer (→ `composition/` or `dispatch/`)
+- End-to-end type-guard-driven pipelines (→ `pipelines/`)
diff --git a/tests/boundaries/content-annotation-guards.test.ts b/tests/boundaries/content-annotation-guards.test.ts
new file mode 100644
index 0000000..56de338
--- /dev/null
+++ b/tests/boundaries/content-annotation-guards.test.ts
@@ -0,0 +1,91 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  hasTypeProperty,
+  isFileCitationAnnotation,
+  isFilePathAnnotation,
+  isOutputTextPart,
+  isRefusalPart,
+  isURLCitationAnnotation,
+} from '../../src/lib/stream-type-guards.js';
+
+describe('Content part and annotation guards - boundary between similar types', () => {
+  it('isOutputTextPart: true for output_text, false for refusal', () => {
+    expect(
+      isOutputTextPart({
+        type: 'output_text',
+      }),
+    ).toBe(true);
+    expect(
+      isOutputTextPart({
+        type: 'refusal',
+      }),
+    ).toBe(false);
+  });
+
+  it('isRefusalPart: true for refusal, false for output_text', () => {
+    expect(
+      isRefusalPart({
+        type: 'refusal',
+      }),
+    ).toBe(true);
+    expect(
+      isRefusalPart({
+        type: 'output_text',
+      }),
+    ).toBe(false);
+  });
+
+  it('isFileCitationAnnotation: true for file_citation, false for url_citation', () => {
+    expect(
+      isFileCitationAnnotation({
+        type: 'file_citation',
+      }),
+    ).toBe(true);
+    expect(
+      isFileCitationAnnotation({
+        type: 'url_citation',
+      }),
+    ).toBe(false);
+  });
+
+  it('isURLCitationAnnotation: true for url_citation, false for file_citation', () => {
+    expect(
+      isURLCitationAnnotation({
+        type: 'url_citation',
+      }),
+    ).toBe(true);
+    expect(
+      isURLCitationAnnotation({
+        type: 'file_citation',
+      }),
+    ).toBe(false);
+  });
+
+  it('isFilePathAnnotation: true for file_path, false for file_citation', () => {
+    expect(
+      isFilePathAnnotation({
+        type: 'file_path',
+      }),
+    ).toBe(true);
+    expect(
+      isFilePathAnnotation({
+        type: 'file_citation',
+      }),
+    ).toBe(false);
+  });
+
+  it('hasTypeProperty: { type: "x" } -> true; { type: 123 } -> false; null -> false', () => {
+    expect(
+      hasTypeProperty({
+        type: 'x',
+      }),
+    ).toBe(true);
+    expect(
+      hasTypeProperty({
+        type: 123,
+      }),
+    ).toBe(false);
+    expect(hasTypeProperty(null)).toBe(false);
+  });
+});
diff --git a/tests/boundaries/conversation-state-results.test.ts b/tests/boundaries/conversation-state-results.test.ts
new file mode 100644
index 0000000..c7a4e11
--- /dev/null
+++ b/tests/boundaries/conversation-state-results.test.ts
@@ -0,0 +1,51 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  createRejectedResult,
+  createUnsentResult,
+  unsentResultsToAPIFormat,
+} from '../../src/lib/conversation-state.js';
+
+describe('Conversation state utilities - distinct result types', () => {
+  it('createUnsentResult output has output (value) but no error', () => {
+    const result = createUnsentResult('c1', 'search', {
+      data: 'found',
+    });
+    expect(result.output).toEqual({
+      data: 'found',
+    });
+    expect(result).not.toHaveProperty('error');
+  });
+
+  it('createRejectedResult output has output: null AND error string', () => {
+    const result = createRejectedResult('c1', 'delete');
+    expect(result.output).toBeNull();
+    expect(result.error).toBe('Tool call rejected by user');
+  });
+
+  it('unsentResultsToAPIFormat: success result -> output is JSON.stringify(output)', () => {
+    const unsent = createUnsentResult('c1', 'search', {
+      data: 'found',
+    });
+    const formatted = unsentResultsToAPIFormat([
+      unsent,
+    ]);
+    expect(formatted[0]!.output).toBe(
+      JSON.stringify({
+        data: 'found',
+      }),
+    );
+  });
+
+  it('unsentResultsToAPIFormat: error result -> output is JSON.stringify({ error })', () => {
+    const rejected = createRejectedResult('c1', 'delete', 'Not allowed');
+    const formatted = unsentResultsToAPIFormat([
+      rejected,
+    ]);
+    expect(formatted[0]!.output).toBe(
+      JSON.stringify({
+        error: 'Not allowed',
+      }),
+    );
+  });
+});
diff --git a/tests/boundaries/domain-separation.test.ts b/tests/boundaries/domain-separation.test.ts
new file mode 100644
index 0000000..091a3f7
--- /dev/null
+++ b/tests/boundaries/domain-separation.test.ts
@@ -0,0 +1,45 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  isFunctionCallArgumentsDeltaEvent,
+  isFunctionCallItem,
+  isOutputMessage,
+  isOutputTextDeltaEvent,
+} from '../../src/lib/stream-type-guards.js';
+
+describe('Stream guards vs output item guards - domain separation', () => {
+  it('isOutputTextDeltaEvent rejects an OutputMessage (item, not stream event)', () => {
+    const item = {
+      type: 'message',
+      role: 'assistant',
+      content: [],
+    };
+    expect(isOutputTextDeltaEvent(item as any)).toBe(false);
+  });
+
+  it('isOutputMessage rejects a TextDeltaEvent (stream event, not item)', () => {
+    const event = {
+      type: 'response.output_text.delta',
+      delta: 'hello',
+    };
+    expect(isOutputMessage(event)).toBe(false);
+  });
+
+  it('isFunctionCallArgumentsDeltaEvent rejects a FunctionCallItem (completed item, not delta)', () => {
+    const item = {
+      type: 'function_call',
+      callId: 'c1',
+      name: 'test',
+      arguments: '{}',
+    };
+    expect(isFunctionCallArgumentsDeltaEvent(item as any)).toBe(false);
+  });
+
+  it('isFunctionCallItem rejects a FunctionCallArgsDeltaEvent (delta, not item)', () => {
+    const event = {
+      type: 'response.function_call_arguments.delta',
+      delta: '{"a":',
+    };
+    expect(isFunctionCallItem(event)).toBe(false);
+  });
+});
diff --git a/tests/boundaries/output-item-guards.test.ts b/tests/boundaries/output-item-guards.test.ts
new file mode 100644
index 0000000..df5d29b
--- /dev/null
+++ b/tests/boundaries/output-item-guards.test.ts
@@ -0,0 +1,70 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  isFileSearchCallOutputItem,
+  isFunctionCallItem,
+  isImageGenerationCallOutputItem,
+  isOutputMessage,
+  isReasoningOutputItem,
+  isWebSearchCallOutputItem,
+} from '../../src/lib/stream-type-guards.js';
+
+const guards = [
+  {
+    name: 'isOutputMessage',
+    fn: isOutputMessage,
+    type: 'message',
+  },
+  {
+    name: 'isFunctionCallItem',
+    fn: isFunctionCallItem,
+    type: 'function_call',
+  },
+  {
+    name: 'isReasoningOutputItem',
+    fn: isReasoningOutputItem,
+    type: 'reasoning',
+  },
+  {
+    name: 'isWebSearchCallOutputItem',
+    fn: isWebSearchCallOutputItem,
+    type: 'web_search_call',
+  },
+  {
+    name: 'isFileSearchCallOutputItem',
+    fn: isFileSearchCallOutputItem,
+    type: 'file_search_call',
+  },
+  {
+    name: 'isImageGenerationCallOutputItem',
+    fn: isImageGenerationCallOutputItem,
+    type: 'image_generation_call',
+  },
+] as const;
+
+describe('Output item type guards - mutual exclusion', () => {
+  for (const guard of guards) {
+    describe(guard.name, () => {
+      it(`returns true for its own item type: ${guard.type}`, () => {
+        const item = {
+          type: guard.type,
+        };
+        expect(guard.fn(item)).toBe(true);
+      });
+
+      it('returns false for at least one other output item type', () => {
+        const other = guards.find((g) => g.type !== guard.type)!;
+        const item = {
+          type: other.type,
+        };
+        expect(guard.fn(item)).toBe(false);
+      });
+
+      it('returns false for null, undefined, and primitive', () => {
+        expect(guard.fn(null)).toBe(false);
+        expect(guard.fn(undefined)).toBe(false);
+        expect(guard.fn(42)).toBe(false);
+      });
+    });
+  }
+});
diff --git a/tests/boundaries/response-stream-event-guards.test.ts b/tests/boundaries/response-stream-event-guards.test.ts
new file mode 100644
index 0000000..633f601
--- /dev/null
+++ b/tests/boundaries/response-stream-event-guards.test.ts
@@ -0,0 +1,59 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  isToolCallOutputEvent,
+  isToolPreliminaryResultEvent,
+  isToolResultEvent,
+  isTurnEndEvent,
+  isTurnStartEvent,
+} from '../../src/lib/tool-types.js';
+
+describe('ResponseStreamEvent guards - mutual exclusion', () => {
+  it('isToolPreliminaryResultEvent rejects { type: "tool.result" }', () => {
+    const event = {
+      type: 'tool.result',
+      toolCallId: 'c1',
+      result: 42,
+      timestamp: 1,
+    } as any;
+    expect(isToolPreliminaryResultEvent(event)).toBe(false);
+  });
+
+  it('isToolResultEvent rejects { type: "tool.preliminary_result" }', () => {
+    const event = {
+      type: 'tool.preliminary_result',
+      toolCallId: 'c1',
+      result: 42,
+      timestamp: 1,
+    } as any;
+    expect(isToolResultEvent(event)).toBe(false);
+  });
+
+  it('isTurnStartEvent rejects { type: "turn.end" }', () => {
+    const event = {
+      type: 'turn.end',
+      turnNumber: 1,
+      timestamp: 1,
+    } as any;
+    expect(isTurnStartEvent(event)).toBe(false);
+  });
+
+  it('isTurnEndEvent rejects { type: "turn.start" }', () => {
+    const event = {
+      type: 'turn.start',
+      turnNumber: 1,
+      timestamp: 1,
+    } as any;
+    expect(isTurnEndEvent(event)).toBe(false);
+  });
+
+  it('isToolCallOutputEvent rejects { type: "tool.result" }', () => {
+    const event = {
+      type: 'tool.result',
+      toolCallId: 'c1',
+      result: 42,
+      timestamp: 1,
+    } as any;
+    expect(isToolCallOutputEvent(event)).toBe(false);
+  });
+});
diff --git a/tests/boundaries/stream-event-guards.test.ts b/tests/boundaries/stream-event-guards.test.ts
new file mode 100644
index 0000000..7c6a052
--- /dev/null
+++ b/tests/boundaries/stream-event-guards.test.ts
@@ -0,0 +1,96 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  isFunctionCallArgumentsDeltaEvent,
+  isFunctionCallArgumentsDoneEvent,
+  isOutputItemAddedEvent,
+  isOutputItemDoneEvent,
+  isOutputTextDeltaEvent,
+  isReasoningDeltaEvent,
+  isResponseCompletedEvent,
+  isResponseFailedEvent,
+  isResponseIncompleteEvent,
+} from '../../src/lib/stream-type-guards.js';
+
+const guards = [
+  {
+    name: 'isOutputTextDeltaEvent',
+    fn: isOutputTextDeltaEvent,
+    type: 'response.output_text.delta',
+  },
+  {
+    name: 'isReasoningDeltaEvent',
+    fn: isReasoningDeltaEvent,
+    type: 'response.reasoning_text.delta',
+  },
+  {
+    name: 'isFunctionCallArgumentsDeltaEvent',
+    fn: isFunctionCallArgumentsDeltaEvent,
+    type: 'response.function_call_arguments.delta',
+  },
+  {
+    name: 'isOutputItemAddedEvent',
+    fn: isOutputItemAddedEvent,
+    type: 'response.output_item.added',
+  },
+  {
+    name: 'isOutputItemDoneEvent',
+    fn: isOutputItemDoneEvent,
+    type: 'response.output_item.done',
+  },
+  {
+    name: 'isResponseCompletedEvent',
+    fn: isResponseCompletedEvent,
+    type: 'response.completed',
+  },
+  {
+    name: 'isResponseFailedEvent',
+    fn: isResponseFailedEvent,
+    type: 'response.failed',
+  },
+  {
+    name: 'isResponseIncompleteEvent',
+    fn: isResponseIncompleteEvent,
+    type: 'response.incomplete',
+  },
+  {
+    name: 'isFunctionCallArgumentsDoneEvent',
+    fn: isFunctionCallArgumentsDoneEvent,
+    type: 'response.function_call_arguments.done',
+  },
+] as const;
+
+describe('Stream event type guards - mutual exclusion', () => {
+  for (const guard of guards) {
+    describe(guard.name, () => {
+      it(`returns true for its own event type: ${guard.type}`, () => {
+        const event = {
+          type: guard.type,
+        } as any;
+        expect(guard.fn(event)).toBe(true);
+      });
+
+      it('returns false for at least one other stream event type', () => {
+        const other = guards.find((g) => g.type !== guard.type)!;
+        const event = {
+          type: other.type,
+        } as any;
+        expect(guard.fn(event)).toBe(false);
+      });
+
+      it('returns false for objects missing type or with wrong type', () => {
+        expect(guard.fn({} as any)).toBe(false);
+        expect(
+          guard.fn({
+            type: 'unrelated.event',
+          } as any),
+        ).toBe(false);
+        expect(
+          guard.fn({
+            type: '',
+          } as any),
+        ).toBe(false);
+      });
+    });
+  }
+});
diff --git a/tests/boundaries/tool-factory-shapes.test.ts b/tests/boundaries/tool-factory-shapes.test.ts
new file mode 100644
index 0000000..cb70cc3
--- /dev/null
+++ b/tests/boundaries/tool-factory-shapes.test.ts
@@ -0,0 +1,92 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+
+const inputSchema = z.object({
+  query: z.string(),
+});
+
+describe('tool() factory - three tool types produce distinct structures', () => {
+  it('regular tool has execute function, no eventSchema', () => {
+    const t = tool({
+      name: 'regular',
+      inputSchema,
+      execute: async () => 'done',
+    });
+    expect(t.function).toHaveProperty('execute');
+    expect(t.function).not.toHaveProperty('eventSchema');
+  });
+
+  it('generator tool has execute function AND eventSchema AND outputSchema', () => {
+    const t = tool({
+      name: 'generator',
+      inputSchema,
+      eventSchema: z.object({
+        status: z.string(),
+      }),
+      outputSchema: z.object({
+        result: z.string(),
+      }),
+      execute: async function* () {
+        yield {
+          status: 'working',
+        };
+        return {
+          result: 'done',
+        };
+      },
+    });
+    expect(t.function).toHaveProperty('execute');
+    expect(t.function).toHaveProperty('eventSchema');
+    expect(t.function).toHaveProperty('outputSchema');
+  });
+
+  it('manual tool has NO execute, no eventSchema, no outputSchema', () => {
+    const t = tool({
+      name: 'manual',
+      inputSchema,
+      execute: false,
+    });
+    expect(t.function).not.toHaveProperty('execute');
+    expect(t.function).not.toHaveProperty('eventSchema');
+    expect(t.function).not.toHaveProperty('outputSchema');
+  });
+
+  it('same input schema -> three different tool types depending on config', () => {
+    const regular = tool({
+      name: 'r',
+      inputSchema,
+      execute: async () => 'ok',
+    });
+    const generator = tool({
+      name: 'g',
+      inputSchema,
+      eventSchema: z.object({
+        s: z.string(),
+      }),
+      outputSchema: z.object({
+        r: z.string(),
+      }),
+      execute: async function* () {
+        return {
+          r: 'ok',
+        };
+      },
+    });
+    const manual = tool({
+      name: 'm',
+      inputSchema,
+      execute: false,
+    });
+
+    expect('execute' in regular.function).toBe(true);
+    expect('eventSchema' in regular.function).toBe(false);
+
+    expect('execute' in generator.function).toBe(true);
+    expect('eventSchema' in generator.function).toBe(true);
+
+    expect('execute' in manual.function).toBe(false);
+    expect('eventSchema' in manual.function).toBe(false);
+  });
+});
diff --git a/tests/boundaries/tool-type-guards.test.ts b/tests/boundaries/tool-type-guards.test.ts
new file mode 100644
index 0000000..83dff73
--- /dev/null
+++ b/tests/boundaries/tool-type-guards.test.ts
@@ -0,0 +1,86 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import {
+  hasExecuteFunction,
+  isGeneratorTool,
+  isManualTool,
+  isRegularExecuteTool,
+} from '../../src/lib/tool-types.js';
+
+describe('Tool type guards - mutual exclusion across 4 classifiers', () => {
+  const regularTool = tool({
+    name: 'regular',
+    description: 'A regular tool',
+    inputSchema: z.object({
+      x: z.number(),
+    }),
+    execute: async (args) => args.x * 2,
+  });
+
+  const generatorTool = tool({
+    name: 'generator',
+    description: 'A generator tool',
+    inputSchema: z.object({
+      x: z.number(),
+    }),
+    eventSchema: z.object({
+      progress: z.number(),
+    }),
+    outputSchema: z.object({
+      result: z.number(),
+    }),
+    execute: async function* (args) {
+      yield {
+        progress: 50,
+      };
+      return {
+        result: args.x * 2,
+      };
+    },
+  });
+
+  const manualTool = tool({
+    name: 'manual',
+    description: 'A manual tool',
+    inputSchema: z.object({
+      x: z.number(),
+    }),
+    execute: false,
+  });
+
+  it('regular tool: hasExecuteFunction=T, isRegularExecuteTool=T, isGeneratorTool=F, isManualTool=F', () => {
+    expect(hasExecuteFunction(regularTool)).toBe(true);
+    expect(isRegularExecuteTool(regularTool)).toBe(true);
+    expect(isGeneratorTool(regularTool)).toBe(false);
+    expect(isManualTool(regularTool)).toBe(false);
+  });
+
+  it('generator tool: hasExecuteFunction=T, isRegularExecuteTool=F, isGeneratorTool=T, isManualTool=F', () => {
+    expect(hasExecuteFunction(generatorTool)).toBe(true);
+    expect(isRegularExecuteTool(generatorTool)).toBe(false);
+    expect(isGeneratorTool(generatorTool)).toBe(true);
+    expect(isManualTool(generatorTool)).toBe(false);
+  });
+
+  it('manual tool: hasExecuteFunction=F, isRegularExecuteTool=F, isGeneratorTool=F, isManualTool=T', () => {
+    expect(hasExecuteFunction(manualTool)).toBe(false);
+    expect(isRegularExecuteTool(manualTool)).toBe(false);
+    expect(isGeneratorTool(manualTool)).toBe(false);
+    expect(isManualTool(manualTool)).toBe(true);
+  });
+
+  it('no tool satisfies both isRegularExecuteTool and isGeneratorTool', () => {
+    const allTools = [
+      regularTool,
+      generatorTool,
+      manualTool,
+    ];
+    for (const t of allTools) {
+      const isRegular = isRegularExecuteTool(t);
+      const isGenerator = isGeneratorTool(t);
+      expect(isRegular && isGenerator).toBe(false);
+    }
+  });
+});
diff --git a/tests/composition/README.md b/tests/composition/README.md
new file mode 100644
index 0000000..fe8db71
--- /dev/null
+++ b/tests/composition/README.md
@@ -0,0 +1,25 @@
+# Composition Tests
+
+Tests in this folder verify that **capabilities compose** — the output of one module is accepted as input by the next. No detailed correctness of individual outputs; just: do they connect?
+
+## What belongs here
+
+- Module A's output shape is accepted by Module B's input
+- Data flows through a two-module chain without errors
+- Multi-consumer scenarios where the same source feeds multiple consumers
+- Cross-module contract verification (e.g., orchestrator utilities consume real executor results)
+- Extendable: when new modules are introduced, their connection tests with existing modules go here
+
+## Examples
+
+- `tool()` output is accepted by `isRegularExecuteTool` / `convertToolsToAPIFormat`
+- Two `ReusableReadableStream` consumers both receive all items
+- `executeNextTurnParamsFunctions` output is accepted by `applyNextTurnParamsToRequest`
+- `createUnsentResult` output is accepted by `unsentResultsToAPIFormat`
+- `partitionToolCalls` internally uses `toolRequiresApproval`
+
+## What does NOT belong here
+
+- Verifying the detailed correctness of each module's output (→ `behavior/`)
+- Verifying that similar modules don't accept each other's input (→ `boundaries/`)
+- Full multi-layer pipelines with per-layer assertions (→ `pipelines/`)
diff --git a/tests/composition/context-flow.test.ts b/tests/composition/context-flow.test.ts
new file mode 100644
index 0000000..4be50c1
--- /dev/null
+++ b/tests/composition/context-flow.test.ts
@@ -0,0 +1,46 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import {
+  buildToolExecuteContext,
+  resolveContext,
+  ToolContextStore,
+} from '../../src/lib/tool-context.js';
+import { buildTurnContext } from '../../src/lib/turn-context.js';
+
+describe('Context flow: turn context -> tool execute context -> tool function', () => {
+  it('buildToolExecuteContext receives TurnContext from buildTurnContext -> tool execute receives correct numberOfTurns', () => {
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 3,
+    });
+    const store = new ToolContextStore();
+
+    const execCtx = buildToolExecuteContext(turnCtx, store, 'test', undefined);
+    expect(execCtx.numberOfTurns).toBe(3);
+  });
+
+  it('resolveContext passes TurnContext to context function -> result populates ToolContextStore -> buildToolExecuteContext.local reads from store', async () => {
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 2,
+    });
+    const contextFn = (ctx: any) => ({
+      apiKey: `key-for-turn-${ctx.numberOfTurns}`,
+    });
+
+    const resolved = await resolveContext(contextFn, turnCtx);
+    expect(resolved).toEqual({
+      apiKey: 'key-for-turn-2',
+    });
+
+    const store = new ToolContextStore({
+      test: resolved,
+    });
+    const contextSchema = z.object({
+      apiKey: z.string(),
+    });
+
+    const execCtx = buildToolExecuteContext(turnCtx, store, 'test', contextSchema);
+    expect(execCtx.local).toEqual({
+      apiKey: 'key-for-turn-2',
+    });
+  });
+});
diff --git a/tests/composition/format-compatibility.test.ts b/tests/composition/format-compatibility.test.ts
new file mode 100644
index 0000000..393acf2
--- /dev/null
+++ b/tests/composition/format-compatibility.test.ts
@@ -0,0 +1,49 @@
+import { describe, expect, it } from 'vitest';
+import { toClaudeMessage } from '../../src/lib/anthropic-compat.js';
+import { toChatMessage } from '../../src/lib/chat-compat.js';
+
+function makeResponse(text: string) {
+  return {
+    id: 'r1',
+    output: [
+      {
+        type: 'message' as const,
+        id: 'm1',
+        role: 'assistant' as const,
+        status: 'completed' as const,
+        content: [
+          {
+            type: 'output_text' as const,
+            text,
+            annotations: [],
+          },
+        ],
+      },
+    ],
+    status: 'completed' as const,
+    outputText: text,
+    model: 'test-model',
+    usage: {
+      totalTokens: 100,
+      inputTokens: 50,
+      outputTokens: 50,
+    },
+  };
+}
+
+describe('Format compatibility: compat layers -> stream-transformers', () => {
+  it('toChatMessage delegates to extractMessageFromResponse -> returns ChatAssistantMessage', () => {
+    const response = makeResponse('Hello world');
+    const chatMsg = toChatMessage(response as any);
+    expect(chatMsg.role).toBe('assistant');
+    expect(chatMsg.content).toBe('Hello world');
+  });
+
+  it('toClaudeMessage delegates to convertToClaudeMessage -> returns ClaudeMessage', () => {
+    const response = makeResponse('Hello world');
+    const claudeMsg = toClaudeMessage(response as any);
+    expect(claudeMsg.role).toBe('assistant');
+    expect(claudeMsg.content).toBeDefined();
+    expect(Array.isArray(claudeMsg.content)).toBe(true);
+  });
+});
diff --git a/tests/composition/input-normalization.test.ts b/tests/composition/input-normalization.test.ts
new file mode 100644
index 0000000..15225ea
--- /dev/null
+++ b/tests/composition/input-normalization.test.ts
@@ -0,0 +1,29 @@
+import { describe, expect, it } from 'vitest';
+
+import { appendToMessages } from '../../src/lib/conversation-state.js';
+
+describe('Input normalization: turn-context -> conversation-state', () => {
+  it('appendToMessages with string input normalizes to array before append', () => {
+    const existing = 'first message';
+    const newItem = {
+      role: 'user' as const,
+      content: 'second message',
+    };
+    const result = appendToMessages(
+      existing as any,
+      [
+        newItem,
+      ] as any,
+    );
+
+    expect(result.length).toBeGreaterThan(1);
+    // First item is normalized from string
+    const firstItem = result[0]!;
+    expect(firstItem).toHaveProperty('role', 'user');
+    expect(firstItem).toHaveProperty('content', 'first message');
+    // Second item is the appended message
+    const lastItem = result[result.length - 1]!;
+    expect(lastItem).toHaveProperty('role', 'user');
+    expect(lastItem).toHaveProperty('content', 'second message');
+  });
+});
diff --git a/tests/composition/next-turn-params-flow.test.ts b/tests/composition/next-turn-params-flow.test.ts
new file mode 100644
index 0000000..d99b1a8
--- /dev/null
+++ b/tests/composition/next-turn-params-flow.test.ts
@@ -0,0 +1,62 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  applyNextTurnParamsToRequest,
+  buildNextTurnParamsContext,
+  executeNextTurnParamsFunctions,
+} from '../../src/lib/next-turn-params.js';
+
+describe('Next-turn params: tools -> computation -> request modification', () => {
+  it('executeNextTurnParamsFunctions output accepted by applyNextTurnParamsToRequest -> modified request', async () => {
+    const toolsWithNextTurnParams = [
+      {
+        type: 'function',
+        function: {
+          name: 'search',
+          nextTurnParams: {
+            temperature: () => 0.5,
+          },
+        },
+      },
+    ];
+
+    const toolCalls = [
+      {
+        id: 'tc_1',
+        name: 'search',
+        arguments: {
+          q: 'test',
+        },
+      },
+    ];
+    const request = {
+      model: 'gpt-4',
+      temperature: 0.7,
+    };
+
+    const params = await executeNextTurnParamsFunctions(
+      toolCalls as any,
+      toolsWithNextTurnParams as any,
+      request as any,
+    );
+
+    expect(params).toHaveProperty('temperature', 0.5);
+
+    const modified = applyNextTurnParamsToRequest(request as any, params);
+    expect(modified.temperature).toBe(0.5);
+    expect(modified.model).toBe('gpt-4');
+  });
+
+  it('buildNextTurnParamsContext extracts from request -> context passed to nextTurnParams functions', () => {
+    const request = {
+      model: 'gpt-4',
+      temperature: 0.7,
+      input: 'hello',
+    };
+
+    const ctx = buildNextTurnParamsContext(request as any);
+    expect(ctx.model).toBe('gpt-4');
+    expect(ctx.temperature).toBe(0.7);
+    expect(ctx.input).toBe('hello');
+  });
+});
diff --git a/tests/composition/orchestrator-executor.test.ts b/tests/composition/orchestrator-executor.test.ts
new file mode 100644
index 0000000..8afd8e2
--- /dev/null
+++ b/tests/composition/orchestrator-executor.test.ts
@@ -0,0 +1,66 @@
+import { describe, expect, it } from 'vitest';
+import {
+  hasToolExecutionErrors,
+  summarizeToolExecutions,
+  toolResultsToMap,
+} from '../../src/lib/tool-orchestrator.js';
+import type { Tool, ToolExecutionResult } from '../../src/lib/tool-types.js';
+
+describe('Orchestrator <- Executor: utility functions consume real ToolExecutionResult', () => {
+  const successResult: ToolExecutionResult<Tool> = {
+    toolCallId: 'tc_1',
+    toolName: 'search',
+    result: {
+      data: 'found',
+    },
+  };
+
+  const errorResult: ToolExecutionResult<Tool> = {
+    toolCallId: 'tc_2',
+    toolName: 'delete',
+    result: null,
+    error: new Error('Permission denied'),
+  };
+
+  it('toolResultsToMap correctly maps real ToolExecutionResult objects', () => {
+    const map = toolResultsToMap([
+      successResult,
+      errorResult,
+    ]);
+    expect(map.get('tc_1')).toEqual({
+      result: {
+        data: 'found',
+      },
+      preliminaryResults: undefined,
+    });
+    expect(map.get('tc_2')).toEqual({
+      result: null,
+      preliminaryResults: undefined,
+    });
+  });
+
+  it('hasToolExecutionErrors detects error field on real ToolExecutionResult', () => {
+    expect(
+      hasToolExecutionErrors([
+        successResult,
+      ]),
+    ).toBe(false);
+    expect(
+      hasToolExecutionErrors([
+        successResult,
+        errorResult,
+      ]),
+    ).toBe(true);
+  });
+
+  it('summarizeToolExecutions formats real success + error results', () => {
+    const summary = summarizeToolExecutions([
+      successResult,
+      errorResult,
+    ]);
+    expect(summary).toContain('search');
+    expect(summary).toContain('SUCCESS');
+    expect(summary).toContain('delete');
+    expect(summary).toContain('Permission denied');
+  });
+});
diff --git a/tests/composition/state-machine.test.ts b/tests/composition/state-machine.test.ts
new file mode 100644
index 0000000..6e4e1a1
--- /dev/null
+++ b/tests/composition/state-machine.test.ts
@@ -0,0 +1,76 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import {
+  createRejectedResult,
+  createUnsentResult,
+  partitionToolCalls,
+  unsentResultsToAPIFormat,
+} from '../../src/lib/conversation-state.js';
+
+describe('State machine: state -> approval -> resumption', () => {
+  it('partitionToolCalls uses toolRequiresApproval internally -> partitioned results are consistent', async () => {
+    const approvalTool = tool({
+      name: 'dangerous',
+      inputSchema: z.object({
+        target: z.string(),
+      }),
+      requireApproval: true,
+      execute: async () => 'deleted',
+    });
+
+    const safeTool = tool({
+      name: 'safe',
+      inputSchema: z.object({
+        q: z.string(),
+      }),
+      execute: async () => 'result',
+    });
+
+    const toolCalls = [
+      {
+        id: 'tc_1',
+        name: 'dangerous',
+        arguments: {
+          target: 'file.txt',
+        },
+      },
+      {
+        id: 'tc_2',
+        name: 'safe',
+        arguments: {
+          q: 'hello',
+        },
+      },
+    ];
+
+    const tools = [
+      approvalTool,
+      safeTool,
+    ];
+    const partition = await partitionToolCalls(toolCalls as any, tools);
+
+    expect(partition.requiresApproval).toHaveLength(1);
+    expect(partition.autoExecute).toHaveLength(1);
+    expect(partition.requiresApproval[0]!.name).toBe('dangerous');
+    expect(partition.autoExecute[0]!.name).toBe('safe');
+  });
+
+  it('createUnsentResult / createRejectedResult output accepted by unsentResultsToAPIFormat', () => {
+    const unsent = createUnsentResult('tc_1', 'search', {
+      data: 'found',
+    });
+    const rejected = createRejectedResult('tc_2', 'delete');
+
+    const formatted = unsentResultsToAPIFormat([
+      unsent,
+      rejected,
+    ]);
+    expect(formatted).toHaveLength(2);
+    expect(formatted[0]!.callId).toBe('tc_1');
+    expect(formatted[0]!.type).toBe('function_call_output');
+    expect(formatted[1]!.callId).toBe('tc_2');
+    expect(formatted[1]!.type).toBe('function_call_output');
+  });
+});
diff --git a/tests/composition/stream-data-pipeline.test.ts b/tests/composition/stream-data-pipeline.test.ts
new file mode 100644
index 0000000..13215be
--- /dev/null
+++ b/tests/composition/stream-data-pipeline.test.ts
@@ -0,0 +1,163 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import {
+  extractToolCallsFromResponse,
+  responseHasToolCalls,
+} from '../../src/lib/stream-transformers.js';
+
+function makeStream<T>(items: T[]): ReusableReadableStream<T> {
+  const source = new ReadableStream<T>({
+    start(controller) {
+      for (const item of items) {
+        controller.enqueue(item);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collect<T>(stream: AsyncIterable<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const item of stream) {
+    result.push(item);
+  }
+  return result;
+}
+
+describe('Stream data pipeline: source -> guards -> transformers -> consumers', () => {
+  it('two consumers created from same ReusableReadableStream both receive all items', async () => {
+    const stream = makeStream([
+      1,
+      2,
+      3,
+    ]);
+    const consumer1 = stream.createConsumer();
+    const consumer2 = stream.createConsumer();
+
+    const [result1, result2] = await Promise.all([
+      collect(consumer1),
+      collect(consumer2),
+    ]);
+    expect(result1).toEqual([
+      1,
+      2,
+      3,
+    ]);
+    expect(result2).toEqual([
+      1,
+      2,
+      3,
+    ]);
+  });
+
+  it('consumer created after some items buffered still gets all items from position 0', async () => {
+    const stream = makeStream([
+      10,
+      20,
+      30,
+    ]);
+
+    const consumer1 = stream.createConsumer();
+    const items1: number[] = [];
+    for await (const item of consumer1) {
+      items1.push(item);
+      if (items1.length === 2) {
+        break;
+      }
+    }
+
+    // Create second consumer after first has consumed some items
+    const consumer2 = stream.createConsumer();
+    const items2 = await collect(consumer2);
+    expect(items2).toEqual([
+      10,
+      20,
+      30,
+    ]);
+  });
+
+  it('consumer created after source completes still gets all buffered items', async () => {
+    const stream = makeStream([
+      1,
+      2,
+      3,
+    ]);
+    // Consume fully to complete
+    const c1 = stream.createConsumer();
+    await collect(c1);
+
+    // Late join after completion
+    const c2 = stream.createConsumer();
+    const result = await collect(c2);
+    expect(result).toEqual([
+      1,
+      2,
+      3,
+    ]);
+  });
+
+  it('responseHasToolCalls returning true <-> extractToolCallsFromResponse returning non-empty', () => {
+    const responseWithTools = {
+      id: 'r1',
+      output: [
+        {
+          type: 'function_call' as const,
+          id: 'fc1',
+          callId: 'fc1',
+          name: 'search',
+          arguments: '{"q":"test"}',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: '',
+      model: 'test-model',
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const hasTools = responseHasToolCalls(responseWithTools as any);
+    const extracted = extractToolCallsFromResponse(responseWithTools as any);
+
+    expect(hasTools).toBe(true);
+    expect(extracted.length).toBeGreaterThan(0);
+
+    const responseNoTools = {
+      id: 'r2',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'm1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Hello',
+              annotations: [],
+            },
+          ],
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Hello',
+      model: 'test-model',
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const hasTools2 = responseHasToolCalls(responseNoTools as any);
+    const extracted2 = extractToolCallsFromResponse(responseNoTools as any);
+
+    expect(hasTools2).toBe(false);
+    expect(extracted2).toEqual([]);
+  });
+});
diff --git a/tests/composition/tool-lifecycle.test.ts b/tests/composition/tool-lifecycle.test.ts
new file mode 100644
index 0000000..28d1f95
--- /dev/null
+++ b/tests/composition/tool-lifecycle.test.ts
@@ -0,0 +1,98 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import {
+  convertToolsToAPIFormat,
+  executeTool,
+  findToolByName,
+} from '../../src/lib/tool-executor.js';
+import { isGeneratorTool, isManualTool, isRegularExecuteTool } from '../../src/lib/tool-types.js';
+
+describe('Tool lifecycle: definition -> classification -> execution', () => {
+  const regularTool = tool({
+    name: 'add',
+    description: 'Add numbers',
+    inputSchema: z.object({
+      a: z.number(),
+      b: z.number(),
+    }),
+    execute: async (args) => args.a + args.b,
+  });
+
+  const generatorTool = tool({
+    name: 'stream_add',
+    description: 'Stream add',
+    inputSchema: z.object({
+      a: z.number(),
+      b: z.number(),
+    }),
+    eventSchema: z.object({
+      progress: z.number(),
+    }),
+    outputSchema: z.object({
+      sum: z.number(),
+    }),
+    execute: async function* (args) {
+      yield {
+        progress: 50,
+      };
+      return {
+        sum: args.a + args.b,
+      };
+    },
+  });
+
+  const manualTool = tool({
+    name: 'manual_op',
+    description: 'Manual tool',
+    inputSchema: z.object({
+      x: z.string(),
+    }),
+    execute: false,
+  });
+
+  it('tool() output is accepted by isRegularExecuteTool / isGeneratorTool / isManualTool', () => {
+    expect(isRegularExecuteTool(regularTool)).toBe(true);
+    expect(isGeneratorTool(generatorTool)).toBe(true);
+    expect(isManualTool(manualTool)).toBe(true);
+  });
+
+  it('tool() output is accepted by convertToolsToAPIFormat', () => {
+    const apiTools = convertToolsToAPIFormat([
+      regularTool,
+      generatorTool,
+      manualTool,
+    ]);
+    expect(apiTools).toHaveLength(3);
+    expect(apiTools[0]!.name).toBe('add');
+    expect(apiTools[0]!.type).toBe('function');
+    expect(apiTools[1]!.name).toBe('stream_add');
+    expect(apiTools[2]!.name).toBe('manual_op');
+  });
+
+  it('extractToolCallsFromResponse output shape is accepted by findToolByName + executeTool', async () => {
+    const tools = [
+      regularTool,
+      generatorTool,
+      manualTool,
+    ];
+    const toolCallShape = {
+      id: 'tc_1',
+      name: 'add',
+      arguments: {
+        a: 1,
+        b: 2,
+      },
+    };
+
+    const found = findToolByName(tools, toolCallShape.name);
+    expect(found).toBeDefined();
+
+    const result = await executeTool(found!, toolCallShape, {
+      numberOfTurns: 1,
+    });
+    expect(result.toolCallId).toBe('tc_1');
+    expect(result.result).toBe(3);
+  });
+});
diff --git a/tests/contracts/README.md b/tests/contracts/README.md
new file mode 100644
index 0000000..f8b468b
--- /dev/null
+++ b/tests/contracts/README.md
@@ -0,0 +1,26 @@
+# Contracts Tests
+
+Tests in this folder verify that a capability **works as specified AND its boundary with similar capabilities is correct**. Both the "what it does" and the "what it does NOT do" are asserted in the same test.
+
+## What belongs here
+
+- Stop conditions that check their own criterion AND ignore all others
+- Delta extractors that yield their event type AND skip peer event types
+- Message stream builders that produce their format AND differ structurally from peers
+- Executor functions that handle their tool type AND reject the other type
+- Async param resolution where static, function, and client-only fields are each handled distinctly
+- Extendable: when a capability gains new peers or alternatives, their combined behavior-and-boundary tests go here
+
+## Examples
+
+- `stepCountIs(3)` returns true at 3 steps AND ignores tool names, tokens, cost, finishReason
+- `extractTextDeltas` yields text deltas AND skips reasoning + tool deltas in the same stream
+- `executeRegularTool` succeeds with regular tools AND throws when given a generator tool
+- `resolveAsyncFunctions` passes static values through, resolves functions, AND strips client-only fields
+- `fromClaudeMessages` maps text blocks to messages, tool_use to function calls, each distinctly
+
+## What does NOT belong here
+
+- Pure specification without boundary checking (→ `behavior/`)
+- Pure boundary checking without verifying output correctness (→ `boundaries/`)
+- Cross-module composition (→ `integration/` or `pipelines/`)
diff --git a/tests/contracts/async-params.test.ts b/tests/contracts/async-params.test.ts
new file mode 100644
index 0000000..c647630
--- /dev/null
+++ b/tests/contracts/async-params.test.ts
@@ -0,0 +1,105 @@
+import { describe, expect, it } from 'vitest';
+
+import { resolveAsyncFunctions } from '../../src/lib/async-params.js';
+
+describe('resolveAsyncFunctions - three field types handled distinctly', () => {
+  const turnCtx = {
+    numberOfTurns: 2,
+  } as any;
+
+  it('static values (model, temperature as literals) -> passed through unchanged', async () => {
+    const result = await resolveAsyncFunctions(
+      {
+        model: 'gpt-4',
+        temperature: 0.7,
+      } as any,
+      turnCtx,
+    );
+    expect(result.model).toBe('gpt-4');
+    expect(result.temperature).toBe(0.7);
+  });
+
+  it('function values -> resolved by calling with context, result stored', async () => {
+    const result = await resolveAsyncFunctions(
+      {
+        temperature: (ctx: any) => ctx.numberOfTurns * 0.1,
+      } as any,
+      turnCtx,
+    );
+    expect(result.temperature).toBe(0.2);
+  });
+
+  it('client-only fields (stopWhen, state, requireApproval, context, onTurnStart, onTurnEnd) -> stripped entirely', async () => {
+    const result = await resolveAsyncFunctions(
+      {
+        model: 'gpt-4',
+        stopWhen: () => true,
+        state: {
+          get: () => null,
+        },
+        requireApproval: () => false,
+        context: {
+          shared: {},
+        },
+        onTurnStart: () => {},
+        onTurnEnd: () => {},
+      } as any,
+      turnCtx,
+    );
+    expect(result).not.toHaveProperty('stopWhen');
+    expect(result).not.toHaveProperty('state');
+    expect(result).not.toHaveProperty('requireApproval');
+    expect(result).not.toHaveProperty('context');
+    expect(result).not.toHaveProperty('onTurnStart');
+    expect(result).not.toHaveProperty('onTurnEnd');
+    expect(result.model).toBe('gpt-4');
+  });
+
+  it('tools field -> preserved (exception to client-only stripping)', async () => {
+    const tools = [
+      {
+        type: 'function',
+        function: {
+          name: 'test',
+        },
+      },
+    ];
+    const result = await resolveAsyncFunctions(
+      {
+        model: 'gpt-4',
+        tools,
+      } as any,
+      turnCtx,
+    );
+    expect(result).toHaveProperty('tools');
+  });
+
+  it('function error -> wraps with field name context', async () => {
+    await expect(
+      resolveAsyncFunctions(
+        {
+          temperature: () => {
+            throw new Error('boom');
+          },
+        } as any,
+        turnCtx,
+      ),
+    ).rejects.toThrow('Failed to resolve async function for field "temperature"');
+  });
+
+  it('mix of static + function + client-only in one call -> all handled correctly', async () => {
+    const result = await resolveAsyncFunctions(
+      {
+        model: 'gpt-4',
+        temperature: (ctx: any) => ctx.numberOfTurns * 0.1,
+        stopWhen: () => true,
+        input: 'hello',
+      } as any,
+      turnCtx,
+    );
+    expect(result.model).toBe('gpt-4');
+    expect(result.temperature).toBe(0.2);
+    expect(result).not.toHaveProperty('stopWhen');
+    expect(result.input).toBe('hello');
+  });
+});
diff --git a/tests/contracts/consume-stream-completion.test.ts b/tests/contracts/consume-stream-completion.test.ts
new file mode 100644
index 0000000..638a738
--- /dev/null
+++ b/tests/contracts/consume-stream-completion.test.ts
@@ -0,0 +1,80 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import { consumeStreamForCompletion } from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: any[]): ReusableReadableStream<any> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+describe('consumeStreamForCompletion - completion vs failure distinction', () => {
+  it('response.completed event -> returns the response', async () => {
+    const response = {
+      id: 'r1',
+      status: 'completed',
+      output: [],
+    };
+    const stream = makeStream([
+      {
+        type: 'response.output_text.delta',
+        delta: 'hello',
+      },
+      {
+        type: 'response.completed',
+        response,
+      },
+    ]);
+    const result = await consumeStreamForCompletion(stream);
+    expect(result).toEqual(response);
+  });
+
+  it('response.incomplete event -> returns the incomplete response', async () => {
+    const response = {
+      id: 'r1',
+      status: 'incomplete',
+      output: [],
+    };
+    const stream = makeStream([
+      {
+        type: 'response.incomplete',
+        response,
+      },
+    ]);
+    const result = await consumeStreamForCompletion(stream);
+    expect(result).toEqual(response);
+  });
+
+  it('response.failed event -> throws', async () => {
+    const stream = makeStream([
+      {
+        type: 'response.failed',
+        response: {
+          error: {
+            message: 'rate limited',
+          },
+        },
+      },
+    ]);
+    await expect(consumeStreamForCompletion(stream)).rejects.toThrow('Response failed');
+  });
+
+  it('stream ends without completion event -> throws', async () => {
+    const stream = makeStream([
+      {
+        type: 'response.output_text.delta',
+        delta: 'hello',
+      },
+    ]);
+    await expect(consumeStreamForCompletion(stream)).rejects.toThrow(
+      'Stream ended without completion event',
+    );
+  });
+});
diff --git a/tests/contracts/delta-extractors.test.ts b/tests/contracts/delta-extractors.test.ts
new file mode 100644
index 0000000..a56c0d7
--- /dev/null
+++ b/tests/contracts/delta-extractors.test.ts
@@ -0,0 +1,111 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import {
+  extractReasoningDeltas,
+  extractTextDeltas,
+  extractToolDeltas,
+} from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: any[]): ReusableReadableStream<any> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collect(iter: AsyncIterable<string>): Promise<string[]> {
+  const result: string[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+describe('Delta extractors - each yields ONLY its event type', () => {
+  const mixedEvents = [
+    {
+      type: 'response.output_text.delta',
+      delta: 'hello',
+    },
+    {
+      type: 'response.reasoning_text.delta',
+      delta: 'thinking',
+    },
+    {
+      type: 'response.function_call_arguments.delta',
+      delta: '{"q":',
+    },
+    {
+      type: 'response.output_text.delta',
+      delta: ' world',
+    },
+    {
+      type: 'response.reasoning_text.delta',
+      delta: ' more',
+    },
+    {
+      type: 'response.function_call_arguments.delta',
+      delta: '"test"}',
+    },
+  ];
+
+  it('extractTextDeltas yields strings from output_text.delta events; reasoning + tool deltas ignored', async () => {
+    const stream = makeStream(mixedEvents);
+    const result = await collect(extractTextDeltas(stream));
+    expect(result).toEqual([
+      'hello',
+      ' world',
+    ]);
+  });
+
+  it('extractReasoningDeltas yields strings from reasoning_text.delta events; ignores text + tool', async () => {
+    const stream = makeStream(mixedEvents);
+    const result = await collect(extractReasoningDeltas(stream));
+    expect(result).toEqual([
+      'thinking',
+      ' more',
+    ]);
+  });
+
+  it('extractToolDeltas yields strings from function_call_arguments.delta events; ignores text + reasoning', async () => {
+    const stream = makeStream(mixedEvents);
+    const result = await collect(extractToolDeltas(stream));
+    expect(result).toEqual([
+      '{"q":',
+      '"test"}',
+    ]);
+  });
+
+  it('extractTextDeltas skips events with empty/undefined delta', async () => {
+    const events = [
+      {
+        type: 'response.output_text.delta',
+        delta: 'hello',
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: '',
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: undefined,
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: ' world',
+      },
+    ];
+    const stream = makeStream(events);
+    const result = await collect(extractTextDeltas(stream));
+    expect(result).toEqual([
+      'hello',
+      ' world',
+    ]);
+  });
+});
diff --git a/tests/contracts/execute-tool-boundary.test.ts b/tests/contracts/execute-tool-boundary.test.ts
new file mode 100644
index 0000000..525ada6
--- /dev/null
+++ b/tests/contracts/execute-tool-boundary.test.ts
@@ -0,0 +1,66 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import { executeGeneratorTool, executeRegularTool } from '../../src/lib/tool-executor.js';
+
+describe('executeRegularTool vs executeGeneratorTool - structural boundary', () => {
+  const regularTool = tool({
+    name: 'regular',
+    inputSchema: z.object({
+      x: z.number(),
+    }),
+    execute: async (args) => args.x * 2,
+  });
+
+  const generatorTool = tool({
+    name: 'generator',
+    inputSchema: z.object({
+      x: z.number(),
+    }),
+    eventSchema: z.object({
+      progress: z.number(),
+    }),
+    outputSchema: z.object({
+      result: z.number(),
+    }),
+    execute: async function* (args) {
+      yield {
+        progress: 50,
+      };
+      return {
+        result: args.x * 2,
+      };
+    },
+  });
+
+  const toolCall = {
+    id: 'tc_1',
+    name: 'test',
+    arguments: {
+      x: 5,
+    },
+  };
+  const turnCtx = {
+    numberOfTurns: 1,
+  };
+
+  it('executeRegularTool throws when given a generator tool', async () => {
+    await expect(executeRegularTool(generatorTool as any, toolCall, turnCtx)).rejects.toThrow();
+  });
+
+  it('executeGeneratorTool throws when given a regular tool', async () => {
+    await expect(executeGeneratorTool(regularTool as any, toolCall, turnCtx)).rejects.toThrow();
+  });
+
+  it('executeRegularTool result has NO preliminaryResults', async () => {
+    const result = await executeRegularTool(regularTool as any, toolCall, turnCtx);
+    expect(result).not.toHaveProperty('preliminaryResults');
+  });
+
+  it('executeGeneratorTool result HAS preliminaryResults array', async () => {
+    const result = await executeGeneratorTool(generatorTool as any, toolCall, turnCtx);
+    expect(result).toHaveProperty('preliminaryResults');
+    expect(Array.isArray(result.preliminaryResults)).toBe(true);
+  });
+});
diff --git a/tests/contracts/from-claude-messages.test.ts b/tests/contracts/from-claude-messages.test.ts
new file mode 100644
index 0000000..17b8444
--- /dev/null
+++ b/tests/contracts/from-claude-messages.test.ts
@@ -0,0 +1,90 @@
+import { describe, expect, it } from 'vitest';
+
+import { fromClaudeMessages } from '../../src/lib/anthropic-compat.js';
+
+describe('fromClaudeMessages - each block type maps distinctly', () => {
+  it('text blocks -> EasyInputMessage (not function_call_output, not function_call)', () => {
+    const result = fromClaudeMessages([
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'text' as const,
+            text: 'Hello',
+          },
+        ],
+      },
+    ]);
+    const items = result as any[];
+    expect(items).toHaveLength(1);
+    expect(items[0]).toHaveProperty('role');
+    expect(items[0]).toHaveProperty('content', 'Hello');
+    expect(items[0]).not.toHaveProperty('type');
+  });
+
+  it('tool_use blocks -> FunctionCallItem (not EasyInputMessage, not function_call_output)', () => {
+    const result = fromClaudeMessages([
+      {
+        role: 'assistant',
+        content: [
+          {
+            type: 'tool_use' as const,
+            id: 'tu_1',
+            name: 'search',
+            input: {
+              q: 'test',
+            },
+          },
+        ],
+      },
+    ]);
+    const items = result as any[];
+    const toolItem = items.find((i: any) => i.type === 'function_call');
+    expect(toolItem).toBeDefined();
+    expect(toolItem.name).toBe('search');
+    expect(toolItem.callId).toBe('tu_1');
+  });
+
+  it('tool_result blocks -> FunctionCallOutputItem (not EasyInputMessage, not function_call)', () => {
+    const result = fromClaudeMessages([
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'tool_result' as const,
+            tool_use_id: 'tu_1',
+            content: 'Search result',
+          },
+        ],
+      },
+    ]);
+    const items = result as any[];
+    const outputItem = items.find((i: any) => i.type === 'function_call_output');
+    expect(outputItem).toBeDefined();
+    expect(outputItem.callId).toBe('tu_1');
+    expect(outputItem.output).toBe('Search result');
+  });
+
+  it('image blocks -> structured content EasyInputMessage (not input_image alone)', () => {
+    const result = fromClaudeMessages([
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'image' as const,
+            source: {
+              type: 'url' as const,
+              url: 'https://example.com/img.png',
+            },
+          },
+        ],
+      },
+    ]);
+    const items = result as any[];
+    expect(items).toHaveLength(1);
+    expect(items[0]).toHaveProperty('role');
+    expect(items[0]).toHaveProperty('content');
+    expect(Array.isArray(items[0].content)).toBe(true);
+    expect(items[0].content[0].type).toBe('input_image');
+  });
+});
diff --git a/tests/contracts/items-stream.test.ts b/tests/contracts/items-stream.test.ts
new file mode 100644
index 0000000..8131802
--- /dev/null
+++ b/tests/contracts/items-stream.test.ts
@@ -0,0 +1,247 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import { buildItemsStream } from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: any[]): ReusableReadableStream<any> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collectAll<T>(iter: AsyncIterable<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+describe('buildItemsStream - yields distinct item types per event', () => {
+  it('message items: accumulated text from text deltas', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hello',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: ' world',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    const lastMsg = items.filter((i: any) => i.type === 'message').pop()!;
+    expect((lastMsg as any).content[0].text).toBe('Hello world');
+  });
+
+  it('function_call items: accumulated arguments from function_call deltas', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'function_call',
+          id: 'fc_1',
+          callId: 'fc_1',
+          name: 'search',
+          arguments: '',
+          status: 'in_progress',
+        },
+      },
+      {
+        type: 'response.function_call_arguments.delta',
+        delta: '{"q":',
+        itemId: 'fc_1',
+      },
+      {
+        type: 'response.function_call_arguments.delta',
+        delta: '"test"}',
+        itemId: 'fc_1',
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    const lastFn = items.filter((i: any) => i.type === 'function_call').pop()!;
+    expect((lastFn as any).arguments).toBe('{"q":"test"}');
+  });
+
+  it('reasoning items: accumulated content from reasoning deltas', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'reasoning',
+          id: 'r_1',
+          status: 'in_progress',
+          summary: [],
+        },
+      },
+      {
+        type: 'response.reasoning_text.delta',
+        delta: 'thinking',
+        itemId: 'r_1',
+      },
+      {
+        type: 'response.reasoning_text.delta',
+        delta: ' more',
+        itemId: 'r_1',
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    const lastReasoning = items.filter((i: any) => i.type === 'reasoning').pop()!;
+    expect((lastReasoning as any).summary[0].text).toBe('thinking more');
+  });
+
+  it('server tool items (web_search_call, file_search_call, image_generation_call): passthrough', async () => {
+    const webSearch = {
+      type: 'web_search_call',
+      id: 'ws_1',
+      status: 'completed',
+    };
+    const fileSearch = {
+      type: 'file_search_call',
+      id: 'fs_1',
+      status: 'completed',
+    };
+    const imageGen = {
+      type: 'image_generation_call',
+      id: 'ig_1',
+      status: 'completed',
+    };
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: webSearch,
+      },
+      {
+        type: 'response.output_item.added',
+        item: fileSearch,
+      },
+      {
+        type: 'response.output_item.added',
+        item: imageGen,
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    const types = items.map((i: any) => i.type);
+    expect(types).toContain('web_search_call');
+    expect(types).toContain('file_search_call');
+    expect(types).toContain('image_generation_call');
+  });
+
+  it('final complete items from output_item.done events', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hi',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.output_item.done',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'completed',
+          content: [
+            {
+              type: 'output_text',
+              text: 'Hi',
+              annotations: [],
+            },
+          ],
+        },
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    const doneItem = items[items.length - 1]!;
+    expect((doneItem as any).status).toBe('completed');
+  });
+
+  it('termination events (completed/failed/incomplete) -> stream stops', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hi',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+      // These should never be reached
+      {
+        type: 'response.output_text.delta',
+        delta: 'SHOULD NOT APPEAR',
+        itemId: 'msg_1',
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    const allText = items
+      .filter((i: any) => i.type === 'message')
+      .map((i: any) => i.content?.[0]?.text ?? '');
+    expect(allText.join('')).not.toContain('SHOULD NOT APPEAR');
+  });
+});
diff --git a/tests/contracts/message-stream-builders.test.ts b/tests/contracts/message-stream-builders.test.ts
new file mode 100644
index 0000000..760dd27
--- /dev/null
+++ b/tests/contracts/message-stream-builders.test.ts
@@ -0,0 +1,118 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import {
+  buildMessageStream,
+  buildResponsesMessageStream,
+} from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: any[]): ReusableReadableStream<any> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collectAll<T>(iter: AsyncIterable<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+const streamEvents = [
+  {
+    type: 'response.output_item.added',
+    item: {
+      type: 'message',
+      id: 'msg_1',
+      role: 'assistant',
+      status: 'in_progress',
+      content: [],
+    },
+  },
+  {
+    type: 'response.output_text.delta',
+    delta: 'Hello',
+    itemId: 'msg_1',
+  },
+  {
+    type: 'response.output_text.delta',
+    delta: ' world',
+    itemId: 'msg_1',
+  },
+  {
+    type: 'response.output_item.done',
+    item: {
+      type: 'message',
+      id: 'msg_1',
+      role: 'assistant',
+      status: 'completed',
+      content: [
+        {
+          type: 'output_text',
+          text: 'Hello world',
+          annotations: [],
+        },
+      ],
+    },
+  },
+  {
+    type: 'response.completed',
+    response: {},
+  },
+];
+
+describe('Message stream builders - same input, structurally distinct outputs', () => {
+  it('buildResponsesMessageStream yields OutputMessage: { id, type: "message", role: "assistant", content: [...] }', async () => {
+    const stream = makeStream(streamEvents);
+    const results = await collectAll(buildResponsesMessageStream(stream));
+    expect(results.length).toBeGreaterThan(0);
+    const last = results[results.length - 1]!;
+    expect(last).toHaveProperty('id');
+    expect(last).toHaveProperty('type', 'message');
+    expect(last).toHaveProperty('role', 'assistant');
+    expect(last).toHaveProperty('content');
+    expect(Array.isArray(last.content)).toBe(true);
+  });
+
+  it('buildMessageStream yields ChatAssistantMessage: { role: "assistant", content: string }', async () => {
+    const stream = makeStream(streamEvents);
+    const results = await collectAll(buildMessageStream(stream));
+    expect(results.length).toBeGreaterThan(0);
+    const last = results[results.length - 1]!;
+    expect(last).toHaveProperty('role', 'assistant');
+    expect(typeof last.content).toBe('string');
+    expect(last).not.toHaveProperty('id');
+    expect(last).not.toHaveProperty('type');
+  });
+
+  it('same stream events -> both produce same text content but structurally different objects', async () => {
+    const stream1 = makeStream(streamEvents);
+    const stream2 = makeStream(streamEvents);
+
+    const responsesResults = await collectAll(buildResponsesMessageStream(stream1));
+    const chatResults = await collectAll(buildMessageStream(stream2));
+
+    const responsesLast = responsesResults[responsesResults.length - 1]!;
+    const chatLast = chatResults[chatResults.length - 1]!;
+
+    // Same text content
+    const responsesText = responsesLast.content
+      .filter((c: any) => c.type === 'output_text')
+      .map((c: any) => c.text)
+      .join('');
+    expect(responsesText).toBe('Hello world');
+    expect(chatLast.content).toBe('Hello world');
+
+    // Structurally different
+    expect('id' in responsesLast).toBe(true);
+    expect('id' in chatLast).toBe(false);
+  });
+});
diff --git a/tests/contracts/response-extractors.test.ts b/tests/contracts/response-extractors.test.ts
new file mode 100644
index 0000000..05b0732
--- /dev/null
+++ b/tests/contracts/response-extractors.test.ts
@@ -0,0 +1,98 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  extractMessageFromResponse,
+  extractResponsesMessageFromResponse,
+} from '../../src/lib/stream-transformers.js';
+
+function makeResponse(text: string) {
+  return {
+    id: 'r1',
+    output: [
+      {
+        type: 'message' as const,
+        id: 'msg_1',
+        role: 'assistant' as const,
+        status: 'completed' as const,
+        content: [
+          {
+            type: 'output_text' as const,
+            text,
+            annotations: [],
+          },
+        ],
+      },
+    ],
+    status: 'completed' as const,
+    outputText: text,
+    model: 'test-model',
+    usage: {
+      totalTokens: 100,
+      inputTokens: 50,
+      outputTokens: 50,
+    },
+  };
+}
+
+describe('Response extractors - same response, distinct shapes', () => {
+  it('extractMessageFromResponse returns ChatAssistantMessage (role + content string)', () => {
+    const response = makeResponse('Hello world');
+    const msg = extractMessageFromResponse(response as any);
+    expect(msg.role).toBe('assistant');
+    expect(typeof msg.content).toBe('string');
+    expect(msg).not.toHaveProperty('id');
+    expect(msg).not.toHaveProperty('type');
+  });
+
+  it('extractResponsesMessageFromResponse returns OutputMessage (id + type + content array)', () => {
+    const response = makeResponse('Hello world');
+    const msg = extractResponsesMessageFromResponse(response as any);
+    expect(msg.id).toBe('msg_1');
+    expect(msg.type).toBe('message');
+    expect(Array.isArray(msg.content)).toBe(true);
+  });
+
+  it('same response -> both extract same text but structurally different objects', () => {
+    const response = makeResponse('Hello world');
+    const chatMsg = extractMessageFromResponse(response as any);
+    const responsesMsg = extractResponsesMessageFromResponse(response as any);
+
+    expect(chatMsg.content).toBe('Hello world');
+    const responsesText = responsesMsg.content
+      .filter((c: any) => c.type === 'output_text')
+      .map((c: any) => c.text)
+      .join('');
+    expect(responsesText).toBe('Hello world');
+
+    // Structurally different
+    expect('id' in chatMsg).toBe(false);
+    expect('id' in responsesMsg).toBe(true);
+  });
+
+  it('both throw when response has no message item', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'function_call' as const,
+          id: 'fc_1',
+          callId: 'fc_1',
+          name: 'search',
+          arguments: '{}',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: '',
+      model: 'test-model',
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    expect(() => extractMessageFromResponse(response as any)).toThrow('No message found');
+    expect(() => extractResponsesMessageFromResponse(response as any)).toThrow('No message found');
+  });
+});
diff --git a/tests/contracts/stop-conditions.test.ts b/tests/contracts/stop-conditions.test.ts
new file mode 100644
index 0000000..9f65809
--- /dev/null
+++ b/tests/contracts/stop-conditions.test.ts
@@ -0,0 +1,531 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  finishReasonIs,
+  hasToolCall,
+  maxCost,
+  maxTokensUsed,
+  stepCountIs,
+} from '../../src/lib/stop-conditions.js';
+import type { StepResult } from '../../src/lib/tool-types.js';
+
+function makeStep(overrides: Partial<StepResult> = {}): StepResult {
+  return {
+    response: {} as any,
+    toolCalls: [],
+    finishReason: undefined,
+    usage: undefined,
+    ...overrides,
+  } as StepResult;
+}
+
+describe('stepCountIs(n) - behavior and dimension isolation', () => {
+  it('returns false when steps.length < n', () => {
+    const condition = stepCountIs(3);
+    expect(
+      condition({
+        steps: [
+          makeStep(),
+          makeStep(),
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('returns true when steps.length === n', () => {
+    const condition = stepCountIs(3);
+    expect(
+      condition({
+        steps: [
+          makeStep(),
+          makeStep(),
+          makeStep(),
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('returns true when steps.length > n', () => {
+    const condition = stepCountIs(2);
+    expect(
+      condition({
+        steps: [
+          makeStep(),
+          makeStep(),
+          makeStep(),
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('stepCountIs(0) always returns true', () => {
+    const condition = stepCountIs(0);
+    expect(
+      condition({
+        steps: [],
+      }),
+    ).toBe(true);
+    expect(
+      condition({
+        steps: [
+          makeStep(),
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('ignores tool names, tokens, cost, finishReason in steps', () => {
+    const condition = stepCountIs(1);
+    const step = makeStep({
+      toolCalls: [
+        {
+          name: 'search',
+          id: 'tc1',
+          arguments: {},
+        },
+      ] as any,
+      usage: {
+        totalTokens: 9999,
+        inputTokens: 5000,
+        outputTokens: 4999,
+        cost: 100,
+      } as any,
+      finishReason: 'length',
+    });
+    // Only step count matters
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+});
+
+describe('hasToolCall(toolName) - behavior and dimension isolation', () => {
+  it('returns false when no steps have the named tool', () => {
+    const condition = hasToolCall('search');
+    const step = makeStep({
+      toolCalls: [
+        {
+          name: 'other',
+          id: 'tc1',
+          arguments: {},
+        },
+      ] as any,
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('returns true when any step has a matching tool call', () => {
+    const condition = hasToolCall('search');
+    const step1 = makeStep({
+      toolCalls: [
+        {
+          name: 'other',
+          id: 'tc1',
+          arguments: {},
+        },
+      ] as any,
+    });
+    const step2 = makeStep({
+      toolCalls: [
+        {
+          name: 'search',
+          id: 'tc2',
+          arguments: {},
+        },
+      ] as any,
+    });
+    expect(
+      condition({
+        steps: [
+          step1,
+          step2,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('returns false for different tool names', () => {
+    const condition = hasToolCall('search');
+    const step = makeStep({
+      toolCalls: [
+        {
+          name: 'Search',
+          id: 'tc1',
+          arguments: {},
+        },
+      ] as any,
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('handles step with multiple tool calls, one matching', () => {
+    const condition = hasToolCall('search');
+    const step = makeStep({
+      toolCalls: [
+        {
+          name: 'other',
+          id: 'tc1',
+          arguments: {},
+        },
+        {
+          name: 'search',
+          id: 'tc2',
+          arguments: {},
+        },
+      ] as any,
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('ignores step count, tokens, cost, finishReason', () => {
+    const condition = hasToolCall('search');
+    const step = makeStep({
+      toolCalls: [
+        {
+          name: 'search',
+          id: 'tc1',
+          arguments: {},
+        },
+      ] as any,
+      usage: {
+        totalTokens: 9999,
+        inputTokens: 5000,
+        outputTokens: 4999,
+        cost: 100,
+      } as any,
+      finishReason: 'length',
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+});
+
+describe('maxTokensUsed(maxTokens) - behavior and dimension isolation', () => {
+  it('returns false when total tokens < threshold', () => {
+    const condition = maxTokensUsed(100);
+    const step = makeStep({
+      usage: {
+        totalTokens: 50,
+        inputTokens: 25,
+        outputTokens: 25,
+      } as any,
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('returns true when total tokens >= threshold', () => {
+    const condition = maxTokensUsed(100);
+    const step = makeStep({
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      } as any,
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('accumulates tokens across multiple steps', () => {
+    const condition = maxTokensUsed(100);
+    const step1 = makeStep({
+      usage: {
+        totalTokens: 60,
+        inputTokens: 30,
+        outputTokens: 30,
+      } as any,
+    });
+    const step2 = makeStep({
+      usage: {
+        totalTokens: 50,
+        inputTokens: 25,
+        outputTokens: 25,
+      } as any,
+    });
+    expect(
+      condition({
+        steps: [
+          step1,
+          step2,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('steps with undefined usage -> treated as 0', () => {
+    const condition = maxTokensUsed(100);
+    const step = makeStep({
+      usage: undefined,
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('ignores step count, tool names, cost, finishReason', () => {
+    const condition = maxTokensUsed(100);
+    const step = makeStep({
+      toolCalls: [
+        {
+          name: 'search',
+          id: 'tc1',
+          arguments: {},
+        },
+      ] as any,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+        cost: 999,
+      } as any,
+      finishReason: 'stop',
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+});
+
+describe('maxCost(maxCostInDollars) - behavior and dimension isolation', () => {
+  it('returns false when total cost < threshold', () => {
+    const condition = maxCost(1.0);
+    const step = makeStep({
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+        cost: 0.5,
+      } as any,
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('returns true when total cost >= threshold', () => {
+    const condition = maxCost(1.0);
+    const step = makeStep({
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+        cost: 1.0,
+      } as any,
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('accumulates cost across multiple steps', () => {
+    const condition = maxCost(1.0);
+    const step1 = makeStep({
+      usage: {
+        totalTokens: 50,
+        inputTokens: 25,
+        outputTokens: 25,
+        cost: 0.6,
+      } as any,
+    });
+    const step2 = makeStep({
+      usage: {
+        totalTokens: 50,
+        inputTokens: 25,
+        outputTokens: 25,
+        cost: 0.5,
+      } as any,
+    });
+    expect(
+      condition({
+        steps: [
+          step1,
+          step2,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('steps with undefined usage.cost -> treated as 0', () => {
+    const condition = maxCost(1.0);
+    const step = makeStep({
+      usage: undefined,
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('ignores step count, tool names, tokens, finishReason', () => {
+    const condition = maxCost(1.0);
+    const step = makeStep({
+      toolCalls: [
+        {
+          name: 'search',
+          id: 'tc1',
+          arguments: {},
+        },
+      ] as any,
+      usage: {
+        totalTokens: 99999,
+        inputTokens: 50000,
+        outputTokens: 49999,
+        cost: 1.0,
+      } as any,
+      finishReason: 'length',
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+});
+
+describe('finishReasonIs(reason) - behavior and dimension isolation', () => {
+  it('returns false when no step has the specified reason', () => {
+    const condition = finishReasonIs('length');
+    const step = makeStep({
+      finishReason: 'stop',
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('returns true when any step has matching reason', () => {
+    const condition = finishReasonIs('length');
+    const step1 = makeStep({
+      finishReason: 'stop',
+    });
+    const step2 = makeStep({
+      finishReason: 'length',
+    });
+    expect(
+      condition({
+        steps: [
+          step1,
+          step2,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('matches "length" specifically', () => {
+    const condition = finishReasonIs('length');
+    const step = makeStep({
+      finishReason: 'length',
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('steps with undefined finishReason -> not matched', () => {
+    const condition = finishReasonIs('length');
+    const step = makeStep({
+      finishReason: undefined,
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('ignores step count, tool names, tokens, cost', () => {
+    const condition = finishReasonIs('length');
+    const step = makeStep({
+      toolCalls: [
+        {
+          name: 'search',
+          id: 'tc1',
+          arguments: {},
+        },
+      ] as any,
+      usage: {
+        totalTokens: 99999,
+        inputTokens: 50000,
+        outputTokens: 49999,
+        cost: 999,
+      } as any,
+      finishReason: 'length',
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+});
diff --git a/tests/dispatch/README.md b/tests/dispatch/README.md
new file mode 100644
index 0000000..dd6a3ba
--- /dev/null
+++ b/tests/dispatch/README.md
@@ -0,0 +1,25 @@
+# Dispatch Tests
+
+Tests in this folder verify that **boundaries between capabilities hold at their composition points**. The correct path is chosen AND the modules connect. This is where routing logic meets module integration.
+
+## What belongs here
+
+- `executeTool` dispatching to the correct executor based on type guards
+- `convertToClaudeMessage` routing items via output item guards to distinct Claude blocks
+- `buildItemsStream` routing events via stream type guards to correct handlers
+- `fromClaudeMessages` routing mixed block types to distinct output types
+- Approval partitioning choosing the correct path based on tool-level vs call-level checks
+- Extendable: when new routing or dispatching logic is added, its boundary-at-junction tests go here
+
+## Examples
+
+- `executeTool` dispatches regular tool to `executeRegularTool` because `isRegularExecuteTool` returns true
+- Same response with message + function_call: `isOutputMessage` -> text block, `isFunctionCallItem` -> tool_use block
+- `partitionToolCalls` with call-level check overrides tool-level `requireApproval`
+- `buildItemsStream` routes `output_item.added` to handler because `isOutputItemAddedEvent` matches
+
+## What does NOT belong here
+
+- Pure boundary testing without composition (→ `boundaries/`)
+- Pure composition without boundary verification (→ `composition/`)
+- Full end-to-end pipelines (→ `pipelines/`)
diff --git a/tests/dispatch/approval-partition-dispatch.test.ts b/tests/dispatch/approval-partition-dispatch.test.ts
new file mode 100644
index 0000000..3f44e78
--- /dev/null
+++ b/tests/dispatch/approval-partition-dispatch.test.ts
@@ -0,0 +1,79 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import { partitionToolCalls } from '../../src/lib/conversation-state.js';
+
+describe('Approval partitioning dispatches via tool-level vs call-level checks', () => {
+  const approvalTool = tool({
+    name: 'dangerous',
+    inputSchema: z.object({
+      target: z.string(),
+    }),
+    requireApproval: true,
+    execute: async () => 'deleted',
+  });
+
+  const safeTool = tool({
+    name: 'safe',
+    inputSchema: z.object({
+      q: z.string(),
+    }),
+    execute: async () => 'result',
+  });
+
+  const toolCalls = [
+    {
+      id: 'tc_1',
+      name: 'dangerous',
+      arguments: {
+        target: 'file.txt',
+      },
+    },
+    {
+      id: 'tc_2',
+      name: 'safe',
+      arguments: {
+        q: 'hello',
+      },
+    },
+  ];
+
+  it('partitionToolCalls with call-level check -> call-level overrides tool-level requireApproval', async () => {
+    // Call-level check says: no approval needed for anything
+    const callLevelCheck = async () => false;
+    const context = {
+      numberOfTurns: 1,
+    };
+    const partition = await partitionToolCalls(
+      toolCalls as any,
+      [
+        approvalTool,
+        safeTool,
+      ],
+      context,
+      callLevelCheck as any,
+    );
+    // Call-level override: both should be auto-execute
+    expect(partition.autoExecute).toHaveLength(2);
+    expect(partition.requiresApproval).toHaveLength(0);
+  });
+
+  it('partitionToolCalls without call-level check -> falls back to each tool requireApproval', async () => {
+    const context = {
+      numberOfTurns: 1,
+    };
+    const partition = await partitionToolCalls(
+      toolCalls as any,
+      [
+        approvalTool,
+        safeTool,
+      ],
+      context,
+    );
+    expect(partition.requiresApproval).toHaveLength(1);
+    expect(partition.requiresApproval[0]!.name).toBe('dangerous');
+    expect(partition.autoExecute).toHaveLength(1);
+    expect(partition.autoExecute[0]!.name).toBe('safe');
+  });
+});
diff --git a/tests/dispatch/claude-conversion-dispatch.test.ts b/tests/dispatch/claude-conversion-dispatch.test.ts
new file mode 100644
index 0000000..491459a
--- /dev/null
+++ b/tests/dispatch/claude-conversion-dispatch.test.ts
@@ -0,0 +1,90 @@
+import { describe, expect, it } from 'vitest';
+
+import { convertToClaudeMessage } from '../../src/lib/stream-transformers.js';
+
+describe('convertToClaudeMessage routes items via output item guards', () => {
+  it('same response with message + function_call: isOutputMessage -> text block, isFunctionCallItem -> tool_use block', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Hello',
+              annotations: [],
+            },
+          ],
+        },
+        {
+          type: 'function_call' as const,
+          id: 'fc_1',
+          callId: 'fc_1',
+          name: 'search',
+          arguments: '{"q":"test"}',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Hello',
+      model: 'test-model',
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const claude = convertToClaudeMessage(response as any);
+    const textBlock = claude.content.find((b: any) => b.type === 'text');
+    const toolBlock = claude.content.find((b: any) => b.type === 'tool_use');
+
+    expect(textBlock).toBeDefined();
+    expect((textBlock as any).text).toBe('Hello');
+    expect(toolBlock).toBeDefined();
+    expect((toolBlock as any).name).toBe('search');
+  });
+
+  it('same response with reasoning + web_search_call: isReasoningOutputItem -> thinking, isWebSearchCallOutputItem -> server_tool_use', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'reasoning' as const,
+          id: 'r_1',
+          status: 'completed' as const,
+          summary: [
+            {
+              type: 'summary_text' as const,
+              text: 'thinking about it',
+            },
+          ],
+        },
+        {
+          type: 'web_search_call' as const,
+          id: 'ws_1',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: '',
+      model: 'test-model',
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const claude = convertToClaudeMessage(response as any);
+    const thinkingBlock = claude.content.find((b: any) => b.type === 'thinking');
+    const serverToolBlock = claude.content.find((b: any) => b.type === 'server_tool_use');
+
+    expect(thinkingBlock).toBeDefined();
+    expect(serverToolBlock).toBeDefined();
+  });
+});
diff --git a/tests/dispatch/execute-tool-dispatch.test.ts b/tests/dispatch/execute-tool-dispatch.test.ts
new file mode 100644
index 0000000..9ca3f91
--- /dev/null
+++ b/tests/dispatch/execute-tool-dispatch.test.ts
@@ -0,0 +1,84 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import { executeTool } from '../../src/lib/tool-executor.js';
+
+describe('executeTool dispatches via tool type guards', () => {
+  const regularTool = tool({
+    name: 'add',
+    inputSchema: z.object({
+      a: z.number(),
+      b: z.number(),
+    }),
+    execute: async (args) => args.a + args.b,
+  });
+
+  const generatorTool = tool({
+    name: 'stream_add',
+    inputSchema: z.object({
+      a: z.number(),
+      b: z.number(),
+    }),
+    eventSchema: z.object({
+      progress: z.number(),
+    }),
+    outputSchema: z.object({
+      sum: z.number(),
+    }),
+    execute: async function* (args) {
+      yield {
+        progress: 50,
+      };
+      return {
+        sum: args.a + args.b,
+      };
+    },
+  });
+
+  const manualTool = tool({
+    name: 'manual_op',
+    inputSchema: z.object({
+      x: z.string(),
+    }),
+  });
+
+  const toolCall = {
+    id: 'tc_1',
+    name: 'test',
+    arguments: {
+      a: 2,
+      b: 3,
+    },
+  };
+  const turnCtx = {
+    numberOfTurns: 1,
+  };
+
+  it('dispatches regular tool to executeRegularTool path because isRegularExecuteTool returns true', async () => {
+    const result = await executeTool(regularTool, toolCall, turnCtx);
+    expect(result.toolCallId).toBe('tc_1');
+    expect(result.result).toBe(5);
+    expect(result).not.toHaveProperty('preliminaryResults');
+  });
+
+  it('dispatches generator tool to executeGeneratorTool path because isGeneratorTool returns true', async () => {
+    const result = await executeTool(generatorTool, toolCall, turnCtx);
+    expect(result.toolCallId).toBe('tc_1');
+    expect(result.result).toEqual({
+      sum: 5,
+    });
+    expect(result).toHaveProperty('preliminaryResults');
+  });
+
+  it('rejects manual tool because hasExecuteFunction returns false', async () => {
+    const manualCall = {
+      id: 'tc_1',
+      name: 'manual_op',
+      arguments: {
+        x: 'hi',
+      },
+    };
+    await expect(executeTool(manualTool as any, manualCall, turnCtx)).rejects.toThrow();
+  });
+});
diff --git a/tests/dispatch/from-claude-dispatch.test.ts b/tests/dispatch/from-claude-dispatch.test.ts
new file mode 100644
index 0000000..de9a3c9
--- /dev/null
+++ b/tests/dispatch/from-claude-dispatch.test.ts
@@ -0,0 +1,58 @@
+import { describe, expect, it } from 'vitest';
+
+import { fromClaudeMessages } from '../../src/lib/anthropic-compat.js';
+
+describe('fromClaudeMessages routes blocks to distinct output types', () => {
+  it('mixed Claude message with text + tool_use + tool_result blocks -> each block produces its correct OR type, interleaved correctly', () => {
+    const result = fromClaudeMessages([
+      {
+        role: 'assistant',
+        content: [
+          {
+            type: 'text' as const,
+            text: 'Let me search for that.',
+          },
+          {
+            type: 'tool_use' as const,
+            id: 'tu_1',
+            name: 'search',
+            input: {
+              q: 'test',
+            },
+          },
+        ],
+      },
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'tool_result' as const,
+            tool_use_id: 'tu_1',
+            content: 'Found results',
+          },
+          {
+            type: 'text' as const,
+            text: 'Thanks for the results',
+          },
+        ],
+      },
+    ]);
+
+    const items = result as any[];
+    // Should have: text message, function_call, function_call_output, text message
+    const types = items.map((i: any) => i.type || 'easy_input_message');
+
+    expect(types).toContain('function_call');
+    expect(types).toContain('function_call_output');
+
+    // Check that the function_call has correct properties
+    const fnCall = items.find((i: any) => i.type === 'function_call');
+    expect(fnCall.name).toBe('search');
+    expect(fnCall.callId).toBe('tu_1');
+
+    // Check that the function_call_output has correct properties
+    const fnOutput = items.find((i: any) => i.type === 'function_call_output');
+    expect(fnOutput.callId).toBe('tu_1');
+    expect(fnOutput.output).toBe('Found results');
+  });
+});
diff --git a/tests/dispatch/items-stream-dispatch.test.ts b/tests/dispatch/items-stream-dispatch.test.ts
new file mode 100644
index 0000000..8deb687
--- /dev/null
+++ b/tests/dispatch/items-stream-dispatch.test.ts
@@ -0,0 +1,76 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import { buildItemsStream } from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: any[]): ReusableReadableStream<any> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collectAll<T>(iter: AsyncIterable<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+describe('buildItemsStream routes events via stream type guards', () => {
+  it('routes output_item.added to handler because isOutputItemAddedEvent matches (not other guards)', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    expect(items.length).toBeGreaterThan(0);
+    expect((items[0] as any).type).toBe('message');
+  });
+
+  it('skips unknown event types that do not match any guard', async () => {
+    const events = [
+      {
+        type: 'response.some_unknown_event',
+        data: 'ignored',
+      },
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    // Only the message item should be yielded, unknown events are silently skipped
+    expect(items.every((i: any) => i.type === 'message')).toBe(true);
+  });
+});
diff --git a/tests/integration/README.md b/tests/integration/README.md
new file mode 100644
index 0000000..62eb964
--- /dev/null
+++ b/tests/integration/README.md
@@ -0,0 +1,26 @@
+# Integration Tests
+
+Tests in this folder verify that a capability **works as specified AND composes correctly with another module**. Both the output correctness and the cross-module data flow are asserted.
+
+## What belongs here
+
+- A function produces the correct output AND that output successfully feeds the next module
+- Stop conditions evaluated against real `StepResult` shapes from the orchestrator
+- Context store data flowing through `buildToolExecuteContext` into a tool's `execute` function
+- Stream consumers at different speeds both getting complete, correct data
+- Next-turn param computation feeding into request modification with verified results
+- Extendable: when new cross-module flows are introduced, their correctness-through-composition tests go here
+
+## Examples
+
+- `consumeStreamForCompletion` returns the response because `isResponseCompletedEvent` identified it
+- `stepCountIs` works with `StepResult[]` containing real `usage` and `toolCalls` data
+- Tool's `execute` receives context where `local` reflects store data set before execution
+- Two `ReusableReadableStream` consumers at different read speeds both get all items correctly
+- `executeNextTurnParamsFunctions` computes temperature -> `applyNextTurnParamsToRequest` produces correct request
+
+## What does NOT belong here
+
+- Pure specification without cross-module flow (→ `behavior/`)
+- Pure composition without output verification (→ `composition/`)
+- Boundary checks at composition points (→ `dispatch/`)
diff --git a/tests/integration/conversation-state-format.test.ts b/tests/integration/conversation-state-format.test.ts
new file mode 100644
index 0000000..1f49afd
--- /dev/null
+++ b/tests/integration/conversation-state-format.test.ts
@@ -0,0 +1,32 @@
+import { describe, expect, it } from 'vitest';
+
+import { appendToMessages } from '../../src/lib/conversation-state.js';
+
+describe('Conversation state -> format conversion', () => {
+  it('appendToMessages with normalizeInputToArray -> string input produces correct array for API', () => {
+    const existing = [
+      {
+        role: 'user' as const,
+        content: 'first message',
+      },
+    ];
+
+    const newItem = {
+      role: 'user' as const,
+      content: 'second message',
+    };
+    const result = appendToMessages(
+      existing as any,
+      [
+        newItem,
+      ] as any,
+    );
+    expect(result).toHaveLength(2);
+    expect(result[0]).toEqual({
+      role: 'user',
+      content: 'first message',
+    });
+    expect(result[1]).toHaveProperty('role', 'user');
+    expect(result[1]).toHaveProperty('content', 'second message');
+  });
+});
diff --git a/tests/integration/next-turn-params-request.test.ts b/tests/integration/next-turn-params-request.test.ts
new file mode 100644
index 0000000..b40034c
--- /dev/null
+++ b/tests/integration/next-turn-params-request.test.ts
@@ -0,0 +1,47 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  applyNextTurnParamsToRequest,
+  executeNextTurnParamsFunctions,
+} from '../../src/lib/next-turn-params.js';
+
+describe('Next-turn params -> request modification -> API readiness', () => {
+  it('executeNextTurnParamsFunctions computes new temperature -> applyNextTurnParamsToRequest produces request with updated temperature', async () => {
+    const tools = [
+      {
+        type: 'function',
+        function: {
+          name: 'search',
+          nextTurnParams: {
+            temperature: () => 0.3,
+          },
+        },
+      },
+    ];
+
+    const toolCalls = [
+      {
+        id: 'tc_1',
+        name: 'search',
+        arguments: {
+          q: 'test',
+        },
+      },
+    ];
+    const request = {
+      model: 'gpt-4',
+      temperature: 0.7,
+      input: 'hello',
+    };
+    const params = await executeNextTurnParamsFunctions(
+      toolCalls as any,
+      tools as any,
+      request as any,
+    );
+
+    const modified = applyNextTurnParamsToRequest(request as any, params);
+    expect(modified.temperature).toBe(0.3);
+    expect(modified.model).toBe('gpt-4');
+    expect(modified.input).toBe('hello');
+  });
+});
diff --git a/tests/integration/reusable-stream-consumers.test.ts b/tests/integration/reusable-stream-consumers.test.ts
new file mode 100644
index 0000000..ffa4ccd
--- /dev/null
+++ b/tests/integration/reusable-stream-consumers.test.ts
@@ -0,0 +1,114 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import { buildItemsStream, consumeStreamForCompletion } from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: any[]): ReusableReadableStream<any> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collectAll<T>(iter: AsyncIterable<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+describe('ReusableReadableStream -> concurrent transformer consumption', () => {
+  it('two consumers at different read speeds both get all items', async () => {
+    const stream = makeStream([
+      1,
+      2,
+      3,
+      4,
+      5,
+    ]);
+
+    const consumer1 = stream.createConsumer();
+    const consumer2 = stream.createConsumer();
+
+    const [result1, result2] = await Promise.all([
+      collectAll(consumer1),
+      collectAll(consumer2),
+    ]);
+
+    expect(result1).toEqual([
+      1,
+      2,
+      3,
+      4,
+      5,
+    ]);
+    expect(result2).toEqual([
+      1,
+      2,
+      3,
+      4,
+      5,
+    ]);
+  });
+
+  it('buildItemsStream and consumeStreamForCompletion both consume same stream correctly', async () => {
+    const response = {
+      id: 'r1',
+      status: 'completed',
+      output: [],
+    };
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hello',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.output_item.done',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'completed',
+          content: [
+            {
+              type: 'output_text',
+              text: 'Hello',
+              annotations: [],
+            },
+          ],
+        },
+      },
+      {
+        type: 'response.completed',
+        response,
+      },
+    ];
+
+    const stream = makeStream(events);
+
+    const [items, completedResponse] = await Promise.all([
+      collectAll(buildItemsStream(stream)),
+      consumeStreamForCompletion(stream),
+    ]);
+
+    expect(items.length).toBeGreaterThan(0);
+    expect(completedResponse).toEqual(response);
+  });
+});
diff --git a/tests/integration/stop-conditions-step-result.test.ts b/tests/integration/stop-conditions-step-result.test.ts
new file mode 100644
index 0000000..de0d4ca
--- /dev/null
+++ b/tests/integration/stop-conditions-step-result.test.ts
@@ -0,0 +1,162 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  hasToolCall,
+  isStopConditionMet,
+  maxTokensUsed,
+  stepCountIs,
+} from '../../src/lib/stop-conditions.js';
+import type { StepResult } from '../../src/lib/tool-types.js';
+
+function makeStep(overrides: Partial<StepResult> = {}): StepResult {
+  return {
+    response: {} as any,
+    toolCalls: [],
+    finishReason: undefined,
+    usage: undefined,
+    ...overrides,
+  } as StepResult;
+}
+
+describe('Stop conditions + real StepResult shape', () => {
+  it('stepCountIs works with StepResult[] containing real usage and toolCalls data', () => {
+    const steps = [
+      makeStep({
+        toolCalls: [
+          {
+            name: 'search',
+            id: 'tc1',
+            arguments: {},
+          },
+        ] as any,
+        usage: {
+          totalTokens: 100,
+          inputTokens: 50,
+          outputTokens: 50,
+        } as any,
+      }),
+      makeStep({
+        toolCalls: [
+          {
+            name: 'write',
+            id: 'tc2',
+            arguments: {},
+          },
+        ] as any,
+        usage: {
+          totalTokens: 200,
+          inputTokens: 100,
+          outputTokens: 100,
+        } as any,
+      }),
+    ];
+    const condition = stepCountIs(2);
+    expect(
+      condition({
+        steps,
+      }),
+    ).toBe(true);
+  });
+
+  it('hasToolCall finds tool name inside StepResult.toolCalls array', () => {
+    const steps = [
+      makeStep({
+        toolCalls: [
+          {
+            name: 'search',
+            id: 'tc1',
+            arguments: {},
+          },
+          {
+            name: 'analyze',
+            id: 'tc2',
+            arguments: {},
+          },
+        ] as any,
+      }),
+    ];
+    expect(
+      hasToolCall('search')({
+        steps,
+      }),
+    ).toBe(true);
+    expect(
+      hasToolCall('analyze')({
+        steps,
+      }),
+    ).toBe(true);
+    expect(
+      hasToolCall('missing')({
+        steps,
+      }),
+    ).toBe(false);
+  });
+
+  it('maxTokensUsed reads from StepResult.usage.totalTokens', () => {
+    const steps = [
+      makeStep({
+        usage: {
+          totalTokens: 500,
+          inputTokens: 250,
+          outputTokens: 250,
+        } as any,
+      }),
+      makeStep({
+        usage: {
+          totalTokens: 600,
+          inputTokens: 300,
+          outputTokens: 300,
+        } as any,
+      }),
+    ];
+    expect(
+      maxTokensUsed(1000)({
+        steps,
+      }),
+    ).toBe(true);
+    expect(
+      maxTokensUsed(1200)({
+        steps,
+      }),
+    ).toBe(false);
+  });
+
+  it('isStopConditionMet evaluates multiple conditions against same StepResult[]', async () => {
+    const steps = [
+      makeStep({
+        toolCalls: [
+          {
+            name: 'search',
+            id: 'tc1',
+            arguments: {},
+          },
+        ] as any,
+        usage: {
+          totalTokens: 100,
+          inputTokens: 50,
+          outputTokens: 50,
+        } as any,
+      }),
+    ];
+
+    // Neither condition met
+    const result1 = await isStopConditionMet({
+      stopConditions: [
+        stepCountIs(5),
+        hasToolCall('done'),
+      ],
+      steps,
+    });
+    expect(result1).toBe(false);
+
+    // One condition met (hasToolCall)
+    const result2 = await isStopConditionMet({
+      stopConditions: [
+        stepCountIs(5),
+        hasToolCall('search'),
+      ],
+      steps,
+    });
+    expect(result2).toBe(true);
+  });
+});
diff --git a/tests/integration/stream-completion-guards.test.ts b/tests/integration/stream-completion-guards.test.ts
new file mode 100644
index 0000000..3449df3
--- /dev/null
+++ b/tests/integration/stream-completion-guards.test.ts
@@ -0,0 +1,60 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import { consumeStreamForCompletion } from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: any[]): ReusableReadableStream<any> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+describe('consumeStreamForCompletion + stream type guards', () => {
+  it('returns response object because isResponseCompletedEvent identified the completion event', async () => {
+    const response = {
+      id: 'r1',
+      status: 'completed',
+      output: [
+        {
+          type: 'message',
+          id: 'm1',
+          role: 'assistant',
+          content: [],
+        },
+      ],
+    };
+    const stream = makeStream([
+      {
+        type: 'response.output_text.delta',
+        delta: 'hello',
+      },
+      {
+        type: 'response.completed',
+        response,
+      },
+    ]);
+    const result = await consumeStreamForCompletion(stream);
+    expect(result.id).toBe('r1');
+    expect(result.status).toBe('completed');
+  });
+
+  it('throws on failed response because isResponseFailedEvent caught the failure', async () => {
+    const stream = makeStream([
+      {
+        type: 'response.failed',
+        response: {
+          error: {
+            message: 'quota exceeded',
+          },
+        },
+      },
+    ]);
+    await expect(consumeStreamForCompletion(stream)).rejects.toThrow('Response failed');
+  });
+});
diff --git a/tests/integration/tool-context-execution.test.ts b/tests/integration/tool-context-execution.test.ts
new file mode 100644
index 0000000..cf44d4a
--- /dev/null
+++ b/tests/integration/tool-context-execution.test.ts
@@ -0,0 +1,64 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { buildToolExecuteContext, ToolContextStore } from '../../src/lib/tool-context.js';
+import { buildTurnContext } from '../../src/lib/turn-context.js';
+
+describe('ToolContextStore -> buildToolExecuteContext -> tool execution', () => {
+  it('tool execute receives context where local reflects store data set before execution', () => {
+    const store = new ToolContextStore({
+      myTool: {
+        apiKey: 'key-123',
+      },
+    });
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 1,
+    });
+    const _toolFn = {
+      name: 'myTool',
+      inputSchema: z.object({}),
+      contextSchema: z.object({
+        apiKey: z.string(),
+      }),
+    };
+
+    const contextSchema = z.object({
+      apiKey: z.string(),
+    });
+    const execCtx = buildToolExecuteContext(turnCtx, store, 'myTool', contextSchema);
+    expect(execCtx.local).toEqual({
+      apiKey: 'key-123',
+    });
+  });
+
+  it('tool calls setContext -> store updated -> next tool reads updated value via local', () => {
+    const store = new ToolContextStore({
+      toolA: {
+        count: 0,
+      },
+      toolB: {},
+    });
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 1,
+    });
+    const contextSchema = z.object({
+      count: z.number(),
+    });
+
+    const execCtxA = buildToolExecuteContext(turnCtx, store, 'toolA', contextSchema);
+    expect(execCtxA.local).toEqual({
+      count: 0,
+    });
+
+    // Simulate tool A updating context
+    store.mergeToolContext('toolA', {
+      count: 42,
+    });
+
+    // Tool A now reads updated value
+    const execCtxA2 = buildToolExecuteContext(turnCtx, store, 'toolA', contextSchema);
+    expect(execCtxA2.local).toEqual({
+      count: 42,
+    });
+  });
+});
diff --git a/tests/integration/turn-context-async-params.test.ts b/tests/integration/turn-context-async-params.test.ts
new file mode 100644
index 0000000..98ec05c
--- /dev/null
+++ b/tests/integration/turn-context-async-params.test.ts
@@ -0,0 +1,40 @@
+import { describe, expect, it } from 'vitest';
+import { resolveAsyncFunctions } from '../../src/lib/async-params.js';
+import { buildTurnContext } from '../../src/lib/turn-context.js';
+
+describe('buildTurnContext -> resolveAsyncFunctions', () => {
+  it('parameter function receives TurnContext with correct numberOfTurns', async () => {
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 5,
+    });
+    const result = await resolveAsyncFunctions(
+      {
+        model: 'gpt-4',
+        temperature: (ctx: any) => ctx.numberOfTurns * 0.1,
+      } as any,
+      turnCtx,
+    );
+    expect(result.temperature).toBe(0.5);
+  });
+
+  it('parameter function can read toolCall from context when provided', async () => {
+    const toolCall = {
+      id: 'tc_1',
+      name: 'search',
+      arguments: {
+        q: 'test',
+      },
+    };
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 1,
+      toolCall: toolCall as any,
+    });
+    const result = await resolveAsyncFunctions(
+      {
+        model: (ctx: any) => (ctx.toolCall ? 'gpt-4-turbo' : 'gpt-4'),
+      } as any,
+      turnCtx,
+    );
+    expect(result.model).toBe('gpt-4-turbo');
+  });
+});
diff --git a/tests/pipelines/README.md b/tests/pipelines/README.md
new file mode 100644
index 0000000..fd0c51f
--- /dev/null
+++ b/tests/pipelines/README.md
@@ -0,0 +1,28 @@
+# Pipelines Tests
+
+Tests in this folder are **end-to-end agent workflows** where multiple capabilities compose, and at each layer we verify: what it specifically produces, how it differs from peers, and how it feeds the next layer.
+
+These are the most comprehensive tests in the suite. Each test exercises a complete pipeline and makes assertions at every stage.
+
+## What belongs here
+
+- Full streaming pipeline: raw events -> guards -> transformers -> consumer
+- Full tool execution pipeline: definition -> dispatch -> validate -> execute -> format
+- Context pipeline: build -> resolve -> store -> execute
+- Stop condition pipeline: results -> steps -> conditions -> decision
+- Dual-format output: same response -> structurally distinct formats simultaneously
+- Claude conversion deep pipeline: multi-item response -> per-item guard routing -> distinct blocks
+- Bidirectional format conversion round-trips
+- Extendable: when new end-to-end workflows are introduced, their full-pipeline tests with per-layer verification go here
+
+## Examples
+
+- Mixed stream events flow through type guards, extractors yield correct types, consumer receives via ReusableReadableStream
+- `tool()` creates tool -> `executeTool` dispatches via guard -> validates input -> executes -> validates output -> formats for model
+- Three concurrent stream consumers (`buildMessageStream`, `buildResponsesMessageStream`, `buildItemsStream`) all complete, each structurally different, no consumer blocks another
+
+## What does NOT belong here
+
+- Single-module behavior (-> `behavior/`)
+- Two-module composition without per-layer verification (-> `composition/` or `integration/`)
+- Tests that don't verify behavior, boundaries, and composition together (-> appropriate simpler category)
diff --git a/tests/pipelines/approval-execution-state.test.ts b/tests/pipelines/approval-execution-state.test.ts
new file mode 100644
index 0000000..1b74b77
--- /dev/null
+++ b/tests/pipelines/approval-execution-state.test.ts
@@ -0,0 +1,96 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import {
+  createInitialState,
+  createRejectedResult,
+  createUnsentResult,
+  partitionToolCalls,
+  unsentResultsToAPIFormat,
+  updateState,
+} from '../../src/lib/conversation-state.js';
+import { executeTool } from '../../src/lib/tool-executor.js';
+
+describe('Approval -> execution -> state update pipeline', () => {
+  it('approval workflow: partition -> execute auto -> create results -> format -> update state', async () => {
+    const autoTool = tool({
+      name: 'search',
+      inputSchema: z.object({
+        q: z.string(),
+      }),
+      execute: async (args) => ({
+        results: [
+          `found: ${args.q}`,
+        ],
+      }),
+    });
+
+    const approvalTool = tool({
+      name: 'delete',
+      inputSchema: z.object({
+        target: z.string(),
+      }),
+      requireApproval: true,
+      execute: async () => 'deleted',
+    });
+
+    const toolCalls = [
+      {
+        id: 'tc_1',
+        name: 'search',
+        arguments: {
+          q: 'test',
+        },
+      },
+      {
+        id: 'tc_2',
+        name: 'delete',
+        arguments: {
+          target: 'file.txt',
+        },
+      },
+    ];
+
+    const tools = [
+      autoTool,
+      approvalTool,
+    ];
+
+    // Step 1: Partition
+    const partition = await partitionToolCalls(toolCalls as any, tools);
+    expect(partition.autoExecute).toHaveLength(1);
+    expect(partition.requiresApproval).toHaveLength(1);
+
+    // Step 2: Execute auto tool
+    const autoResult = await executeTool(autoTool, partition.autoExecute[0]!, {
+      numberOfTurns: 1,
+    });
+    expect(autoResult.result).toEqual({
+      results: [
+        'found: test',
+      ],
+    });
+
+    // Step 3: Create results
+    const unsent = createUnsentResult('tc_1', 'search', autoResult.result);
+    const rejected = createRejectedResult('tc_2', 'delete');
+
+    // Step 4: Format for API
+    const formatted = unsentResultsToAPIFormat([
+      unsent,
+      rejected,
+    ]);
+    expect(formatted).toHaveLength(2);
+    expect(formatted[0]!.type).toBe('function_call_output');
+    expect(formatted[1]!.type).toBe('function_call_output');
+
+    // Step 5: Update state
+    const state = createInitialState();
+    const updated = updateState(state, {
+      status: 'completed',
+    });
+    expect(updated.status).toBe('completed');
+    expect(updated.id).toBe(state.id);
+  });
+});
diff --git a/tests/pipelines/async-resolution-pipeline.test.ts b/tests/pipelines/async-resolution-pipeline.test.ts
new file mode 100644
index 0000000..8ec3109
--- /dev/null
+++ b/tests/pipelines/async-resolution-pipeline.test.ts
@@ -0,0 +1,31 @@
+import { describe, expect, it } from 'vitest';
+
+import { resolveAsyncFunctions } from '../../src/lib/async-params.js';
+import { stepCountIs } from '../../src/lib/stop-conditions.js';
+
+describe('Async resolution + clean API request', () => {
+  it('mixed input: static model, function temperature, client-only stopWhen -> three paths verified in one call', async () => {
+    const turnCtx = {
+      numberOfTurns: 2,
+    } as any;
+
+    const result = await resolveAsyncFunctions(
+      {
+        model: 'gpt-4',
+        temperature: (ctx: any) => ctx.numberOfTurns * 0.1,
+        stopWhen: stepCountIs(5),
+        input: 'hello',
+      } as any,
+      turnCtx,
+    );
+
+    // Static: preserved
+    expect(result.model).toBe('gpt-4');
+    // Function: resolved
+    expect(result.temperature).toBe(0.2);
+    // Client-only: stripped
+    expect(result).not.toHaveProperty('stopWhen');
+    // Static: preserved
+    expect(result.input).toBe('hello');
+  });
+});
diff --git a/tests/pipelines/claude-conversion-deep.test.ts b/tests/pipelines/claude-conversion-deep.test.ts
new file mode 100644
index 0000000..fdd536c
--- /dev/null
+++ b/tests/pipelines/claude-conversion-deep.test.ts
@@ -0,0 +1,168 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  convertToClaudeMessage,
+  getUnsupportedContentSummary,
+  hasUnsupportedContent,
+} from '../../src/lib/stream-transformers.js';
+
+describe('Claude conversion deep pipeline', () => {
+  it('multi-item response: message + function_call + reasoning + web_search -> each guard routes to distinct block', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Hello',
+              annotations: [],
+            },
+          ],
+        },
+        {
+          type: 'function_call' as const,
+          id: 'fc_1',
+          callId: 'fc_1',
+          name: 'search',
+          arguments: '{"q":"test"}',
+          status: 'completed' as const,
+        },
+        {
+          type: 'reasoning' as const,
+          id: 'r_1',
+          status: 'completed' as const,
+          summary: [
+            {
+              type: 'summary_text' as const,
+              text: 'thinking',
+            },
+          ],
+        },
+        {
+          type: 'web_search_call' as const,
+          id: 'ws_1',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Hello',
+      model: 'test-model',
+      usage: {
+        totalTokens: 200,
+        inputTokens: 100,
+        outputTokens: 100,
+      },
+    };
+
+    const claude = convertToClaudeMessage(response as any);
+    const types = claude.content.map((b: any) => b.type);
+
+    expect(types).toContain('text');
+    expect(types).toContain('tool_use');
+    expect(types).toContain('thinking');
+    expect(types).toContain('server_tool_use');
+  });
+
+  it('annotations: text with file_citation + url_citation + file_path -> each produces its distinct citation', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Here is the answer',
+              annotations: [
+                {
+                  type: 'file_citation',
+                  fileId: 'f1',
+                  filename: 'doc.pdf',
+                  index: 0,
+                },
+                {
+                  type: 'url_citation',
+                  url: 'https://example.com',
+                  title: 'Example',
+                  startIndex: 0,
+                  endIndex: 10,
+                },
+                {
+                  type: 'file_path',
+                  fileId: 'f2',
+                  filePath: '/tmp/out.txt',
+                },
+              ],
+            },
+          ],
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Here is the answer',
+      model: 'test-model',
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const claude = convertToClaudeMessage(response as any);
+    const textBlock = claude.content.find((b: any) => b.type === 'text') as any;
+    expect(textBlock).toBeDefined();
+    // Should have citations
+    if (textBlock.citations) {
+      expect(textBlock.citations.length).toBeGreaterThan(0);
+    }
+  });
+
+  it('unsupported content round-trip: refusal + image_generation -> convertToClaudeMessage -> unsupported_content utilities work', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'refusal' as const,
+              refusal: 'I cannot do that',
+            },
+          ],
+        },
+        {
+          type: 'image_generation_call' as const,
+          id: 'ig_1',
+          result: 'base64data',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: '',
+      model: 'test-model',
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const claude = convertToClaudeMessage(response as any);
+    // unsupported_content is a property on the message, not content blocks
+    expect(hasUnsupportedContent(claude)).toBe(true);
+    const summary = getUnsupportedContentSummary(claude);
+    expect(summary).toBeDefined();
+    // refusal and image_generation_call should both appear as unsupported
+    expect(Object.keys(summary).length).toBeGreaterThan(0);
+  });
+});
diff --git a/tests/pipelines/context-pipeline.test.ts b/tests/pipelines/context-pipeline.test.ts
new file mode 100644
index 0000000..c4f6580
--- /dev/null
+++ b/tests/pipelines/context-pipeline.test.ts
@@ -0,0 +1,91 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import {
+  buildToolExecuteContext,
+  resolveContext,
+  ToolContextStore,
+} from '../../src/lib/tool-context.js';
+import { buildTurnContext } from '../../src/lib/turn-context.js';
+
+describe('Context pipeline: build -> resolve -> store -> execute', () => {
+  it('turn 0 with context: buildTurnContext -> resolveContext -> ToolContextStore -> buildToolExecuteContext -> tool reads local', async () => {
+    // Build turn context
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 0,
+    });
+    expect(turnCtx.numberOfTurns).toBe(0);
+
+    // Resolve context via function
+    const contextFn = () => ({
+      apiKey: 'secret-123',
+    });
+    const resolved = await resolveContext(contextFn, turnCtx);
+    expect(resolved).toEqual({
+      apiKey: 'secret-123',
+    });
+
+    // Populate store
+    const store = new ToolContextStore({
+      myTool: resolved,
+    });
+
+    // Build tool execute context
+    const _toolFn = {
+      name: 'myTool',
+      inputSchema: z.object({}),
+      contextSchema: z.object({
+        apiKey: z.string(),
+      }),
+    };
+    const contextSchema = z.object({
+      apiKey: z.string(),
+    });
+    const execCtx = buildToolExecuteContext(turnCtx, store, 'myTool', contextSchema);
+
+    // Tool reads from local
+    expect(execCtx.local).toEqual({
+      apiKey: 'secret-123',
+    });
+    expect(execCtx.numberOfTurns).toBe(0);
+  });
+
+  it('shared context mutation: tool A reads count=0 -> sets count=1 -> tool B reads count=1', () => {
+    const store = new ToolContextStore({
+      shared: {
+        count: 0,
+      },
+    });
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 1,
+    });
+
+    const _sharedToolFn = {
+      name: 'shared',
+      inputSchema: z.object({}),
+      contextSchema: z.object({
+        count: z.number(),
+      }),
+    };
+
+    const contextSchema = z.object({
+      count: z.number(),
+    });
+
+    // Tool A reads shared.count === 0
+    const ctxA = buildToolExecuteContext(turnCtx, store, 'shared', contextSchema);
+    expect(ctxA.local).toEqual({
+      count: 0,
+    });
+
+    // Tool A updates shared context
+    store.setToolContext('shared', {
+      count: 1,
+    });
+
+    // Tool B reads shared.count === 1
+    const ctxB = buildToolExecuteContext(turnCtx, store, 'shared', contextSchema);
+    expect(ctxB.local).toEqual({
+      count: 1,
+    });
+  });
+});
diff --git a/tests/pipelines/dual-format-output.test.ts b/tests/pipelines/dual-format-output.test.ts
new file mode 100644
index 0000000..cd65a9e
--- /dev/null
+++ b/tests/pipelines/dual-format-output.test.ts
@@ -0,0 +1,158 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import {
+  buildItemsStream,
+  buildMessageStream,
+  buildResponsesMessageStream,
+  convertToClaudeMessage,
+  extractMessageFromResponse,
+  extractToolCallsFromResponse,
+} from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: any[]): ReusableReadableStream<any> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collectAll<T>(iter: AsyncIterable<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+describe('Dual-format output: same response -> structurally distinct formats', () => {
+  it('from response: same response -> extractMessageFromResponse, convertToClaudeMessage, extractToolCallsFromResponse all work', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Found results',
+              annotations: [],
+            },
+          ],
+        },
+        {
+          type: 'function_call' as const,
+          id: 'fc_1',
+          callId: 'fc_1',
+          name: 'search',
+          arguments: '{"q":"test"}',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Found results',
+      model: 'test-model',
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    // Chat format
+    const chatMsg = extractMessageFromResponse(response as any);
+    expect(chatMsg.role).toBe('assistant');
+    expect(typeof chatMsg.content).toBe('string');
+
+    // Claude format
+    const claudeMsg = convertToClaudeMessage(response as any);
+    expect(claudeMsg.role).toBe('assistant');
+    expect(Array.isArray(claudeMsg.content)).toBe(true);
+
+    // Tool calls
+    const toolCalls = extractToolCallsFromResponse(response as any);
+    expect(toolCalls).toHaveLength(1);
+    expect(toolCalls[0]!.name).toBe('search');
+
+    // All semantically equivalent, structurally different
+    expect(chatMsg.content).toBe('Found results');
+    const claudeText = claudeMsg.content.find((b: any) => b.type === 'text');
+    expect((claudeText as any).text).toBe('Found results');
+  });
+
+  it('through streaming: same ReusableReadableStream -> three concurrent consumers all complete', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hello',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: ' world',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.output_item.done',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'completed',
+          content: [
+            {
+              type: 'output_text',
+              text: 'Hello world',
+              annotations: [],
+            },
+          ],
+        },
+      },
+      {
+        type: 'response.completed',
+        response: {
+          id: 'r1',
+        },
+      },
+    ];
+
+    const stream = makeStream(events);
+
+    // Three concurrent consumers
+    const [chatMsgs, responsesMsgs, items] = await Promise.all([
+      collectAll(buildMessageStream(stream)),
+      collectAll(buildResponsesMessageStream(stream)),
+      collectAll(buildItemsStream(stream)),
+    ]);
+
+    // All complete without blocking each other
+    expect(chatMsgs.length).toBeGreaterThan(0);
+    expect(responsesMsgs.length).toBeGreaterThan(0);
+    expect(items.length).toBeGreaterThan(0);
+
+    // Structurally different
+    const lastChat = chatMsgs[chatMsgs.length - 1]!;
+    const lastResponses = responsesMsgs[responsesMsgs.length - 1]!;
+
+    expect('id' in lastChat).toBe(false);
+    expect('id' in lastResponses).toBe(true);
+  });
+});
diff --git a/tests/pipelines/format-round-trip.test.ts b/tests/pipelines/format-round-trip.test.ts
new file mode 100644
index 0000000..fd4ab9a
--- /dev/null
+++ b/tests/pipelines/format-round-trip.test.ts
@@ -0,0 +1,169 @@
+import { describe, expect, it } from 'vitest';
+
+import { fromClaudeMessages, toClaudeMessage } from '../../src/lib/anthropic-compat.js';
+import { fromChatMessages, toChatMessage } from '../../src/lib/chat-compat.js';
+
+describe('Bidirectional format conversion', () => {
+  it('Claude round-trip: Claude messages -> fromClaudeMessages -> OR format -> each block type maps distinctly', () => {
+    const claudeMessages = [
+      {
+        role: 'user' as const,
+        content: [
+          {
+            type: 'text' as const,
+            text: 'Search for cats',
+          },
+        ],
+      },
+      {
+        role: 'assistant' as const,
+        content: [
+          {
+            type: 'text' as const,
+            text: 'Let me search.',
+          },
+          {
+            type: 'tool_use' as const,
+            id: 'tu_1',
+            name: 'search',
+            input: {
+              q: 'cats',
+            },
+          },
+        ],
+      },
+      {
+        role: 'user' as const,
+        content: [
+          {
+            type: 'tool_result' as const,
+            tool_use_id: 'tu_1',
+            content: 'Found cats',
+          },
+        ],
+      },
+    ];
+
+    // Claude -> OR format
+    const orFormat = fromClaudeMessages(claudeMessages);
+    const items = orFormat as any[];
+
+    // Text blocks -> EasyInputMessage
+    const textItems = items.filter((i: any) => !i.type || i.role);
+    expect(textItems.length).toBeGreaterThan(0);
+
+    // tool_use -> FunctionCallItem
+    const fnCalls = items.filter((i: any) => i.type === 'function_call');
+    expect(fnCalls).toHaveLength(1);
+    expect(fnCalls[0].name).toBe('search');
+
+    // tool_result -> FunctionCallOutputItem
+    const fnOutputs = items.filter((i: any) => i.type === 'function_call_output');
+    expect(fnOutputs).toHaveLength(1);
+    expect(fnOutputs[0].callId).toBe('tu_1');
+
+    // Verify OR format -> Claude format works on a response
+    const mockResponse = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Here are cats',
+              annotations: [],
+            },
+          ],
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Here are cats',
+      model: 'test-model',
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+    const claudeResponse = toClaudeMessage(mockResponse as any);
+    expect(claudeResponse.role).toBe('assistant');
+    expect(Array.isArray(claudeResponse.content)).toBe(true);
+  });
+
+  it('Chat round-trip: Chat messages -> fromChatMessages -> OR format -> each role maps distinctly', () => {
+    const chatMessages = [
+      {
+        role: 'system' as const,
+        content: 'You are helpful',
+      },
+      {
+        role: 'user' as const,
+        content: 'Hello',
+      },
+      {
+        role: 'assistant' as const,
+        content: 'Hi there',
+      },
+      {
+        role: 'tool' as const,
+        toolCallId: 'tc_1',
+        content: 'Tool result',
+      },
+    ] as any[];
+
+    // Chat -> OR format
+    const orFormat = fromChatMessages(chatMessages);
+    const items = orFormat as any[];
+
+    // System message
+    const systemItems = items.filter((i: any) => i.role === 'system');
+    expect(systemItems).toHaveLength(1);
+
+    // User message
+    const userItems = items.filter((i: any) => i.role === 'user');
+    expect(userItems).toHaveLength(1);
+
+    // Assistant message
+    const assistantItems = items.filter((i: any) => i.role === 'assistant');
+    expect(assistantItems).toHaveLength(1);
+
+    // Tool message -> FunctionCallOutputItem
+    const toolOutputs = items.filter((i: any) => i.type === 'function_call_output');
+    expect(toolOutputs).toHaveLength(1);
+
+    // Verify OR format -> Chat format works on a response
+    const mockResponse = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Response',
+              annotations: [],
+            },
+          ],
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Response',
+      model: 'test-model',
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+    const chatResponse = toChatMessage(mockResponse as any);
+    expect(chatResponse.role).toBe('assistant');
+    expect(typeof chatResponse.content).toBe('string');
+  });
+});
diff --git a/tests/pipelines/next-turn-params-pipeline.test.ts b/tests/pipelines/next-turn-params-pipeline.test.ts
new file mode 100644
index 0000000..f7becb5
--- /dev/null
+++ b/tests/pipelines/next-turn-params-pipeline.test.ts
@@ -0,0 +1,63 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import {
+  applyNextTurnParamsToRequest,
+  buildNextTurnParamsContext,
+  executeNextTurnParamsFunctions,
+} from '../../src/lib/next-turn-params.js';
+
+describe('Next-turn parameter adjustment pipeline', () => {
+  it('dynamic temperature: search tool with nextTurnParams.temperature -> context -> execute -> apply -> request updated', async () => {
+    const searchTool = tool({
+      name: 'search',
+      inputSchema: z.object({
+        query: z.string(),
+      }),
+      execute: async (args) => `Results for: ${args.query}`,
+      nextTurnParams: {
+        temperature: (input: any) => (input.query?.includes('creative') ? 0.9 : 0.1),
+      },
+    });
+
+    const request = {
+      model: 'gpt-4',
+      temperature: 0.5,
+      input: 'hello',
+    };
+
+    // Step 1: Build context from request
+    const ctx = buildNextTurnParamsContext(request as any);
+    expect(ctx.model).toBe('gpt-4');
+    expect(ctx.temperature).toBe(0.5);
+
+    // Step 2: Execute nextTurnParams functions
+    // The tool was called with { query: 'creative writing' }
+    const tools = [
+      searchTool,
+    ];
+    const toolCalls = [
+      {
+        id: 'tc_1',
+        name: 'search',
+        arguments: {
+          query: 'creative writing',
+        },
+      },
+    ];
+    const params = await executeNextTurnParamsFunctions(
+      toolCalls as any,
+      tools as any,
+      request as any,
+    );
+
+    expect(params).toHaveProperty('temperature', 0.9);
+
+    // Step 3: Apply to request
+    const modified = applyNextTurnParamsToRequest(request as any, params);
+    expect(modified.temperature).toBe(0.9);
+    expect(modified.model).toBe('gpt-4');
+    expect(modified.input).toBe('hello');
+  });
+});
diff --git a/tests/pipelines/orchestrator-utility-chain.test.ts b/tests/pipelines/orchestrator-utility-chain.test.ts
new file mode 100644
index 0000000..17da20e
--- /dev/null
+++ b/tests/pipelines/orchestrator-utility-chain.test.ts
@@ -0,0 +1,53 @@
+import { describe, expect, it } from 'vitest';
+import {
+  getToolExecutionErrors,
+  hasToolExecutionErrors,
+  summarizeToolExecutions,
+  toolResultsToMap,
+} from '../../src/lib/tool-orchestrator.js';
+import type { Tool, ToolExecutionResult } from '../../src/lib/tool-types.js';
+
+describe('Orchestrator utility chain', () => {
+  it('mixed results: one success + one failure -> toolResultsToMap -> hasToolExecutionErrors -> getToolExecutionErrors -> summarizeToolExecutions', () => {
+    const successResult: ToolExecutionResult<Tool> = {
+      toolCallId: 'tc_1',
+      toolName: 'search',
+      result: {
+        data: 'found',
+      },
+    };
+
+    const failureResult: ToolExecutionResult<Tool> = {
+      toolCallId: 'tc_2',
+      toolName: 'delete',
+      result: null,
+      error: new Error('Permission denied'),
+    };
+
+    const results = [
+      successResult,
+      failureResult,
+    ];
+
+    // Step 1: Map results
+    const map = toolResultsToMap(results);
+    expect(map.size).toBe(2);
+    expect(map.get('tc_1')).toBeDefined();
+    expect(map.get('tc_2')).toBeDefined();
+
+    // Step 2: Check for errors
+    expect(hasToolExecutionErrors(results)).toBe(true);
+
+    // Step 3: Get errors
+    const errors = getToolExecutionErrors(results);
+    expect(errors).toHaveLength(1);
+    expect(errors[0]!.message).toBe('Permission denied');
+
+    // Step 4: Summarize
+    const summary = summarizeToolExecutions(results);
+    expect(summary).toContain('search');
+    expect(summary).toContain('SUCCESS');
+    expect(summary).toContain('delete');
+    expect(summary).toContain('Permission denied');
+  });
+});
diff --git a/tests/pipelines/stop-condition-pipeline.test.ts b/tests/pipelines/stop-condition-pipeline.test.ts
new file mode 100644
index 0000000..43ffea3
--- /dev/null
+++ b/tests/pipelines/stop-condition-pipeline.test.ts
@@ -0,0 +1,127 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  hasToolCall,
+  isStopConditionMet,
+  maxCost,
+  stepCountIs,
+} from '../../src/lib/stop-conditions.js';
+import type { StepResult } from '../../src/lib/tool-types.js';
+
+function makeStep(overrides: Partial<StepResult> = {}): StepResult {
+  return {
+    response: {} as any,
+    toolCalls: [],
+    finishReason: undefined,
+    usage: undefined,
+    ...overrides,
+  } as StepResult;
+}
+
+describe('Stop condition pipeline: results -> steps -> conditions -> decision', () => {
+  it('step count: 3 tool rounds -> StepResult[] length 3 -> stepCountIs(3) true -> isStopConditionMet true', async () => {
+    const steps = [
+      makeStep(),
+      makeStep(),
+      makeStep(),
+    ];
+    expect(
+      stepCountIs(3)({
+        steps,
+      }),
+    ).toBe(true);
+    const result = await isStopConditionMet({
+      stopConditions: [
+        stepCountIs(3),
+      ],
+      steps,
+    });
+    expect(result).toBe(true);
+  });
+
+  it('tool call: round with "search" tool -> hasToolCall("search") true -> isStopConditionMet true', async () => {
+    const steps = [
+      makeStep({
+        toolCalls: [
+          {
+            name: 'search',
+            id: 'tc1',
+            arguments: {},
+          },
+        ] as any,
+      }),
+    ];
+    expect(
+      hasToolCall('search')({
+        steps,
+      }),
+    ).toBe(true);
+    const result = await isStopConditionMet({
+      stopConditions: [
+        hasToolCall('search'),
+      ],
+      steps,
+    });
+    expect(result).toBe(true);
+  });
+
+  it('cost: round with usage.cost = 0.30 -> maxCost(0.25) true -> stop', async () => {
+    const steps = [
+      makeStep({
+        usage: {
+          totalTokens: 100,
+          inputTokens: 50,
+          outputTokens: 50,
+          cost: 0.3,
+        } as any,
+      }),
+    ];
+    expect(
+      maxCost(0.25)({
+        steps,
+      }),
+    ).toBe(true);
+    const result = await isStopConditionMet({
+      stopConditions: [
+        maxCost(0.25),
+      ],
+      steps,
+    });
+    expect(result).toBe(true);
+  });
+
+  it('combined OR: stepCountIs(10) false + hasToolCall("done") true -> isStopConditionMet true', async () => {
+    const steps = [
+      makeStep({
+        toolCalls: [
+          {
+            name: 'done',
+            id: 'tc1',
+            arguments: {},
+          },
+        ] as any,
+      }),
+    ];
+    // stepCountIs(10) is false (only 1 step)
+    expect(
+      stepCountIs(10)({
+        steps,
+      }),
+    ).toBe(false);
+    // hasToolCall('done') is true
+    expect(
+      hasToolCall('done')({
+        steps,
+      }),
+    ).toBe(true);
+    // OR logic -> true
+    const result = await isStopConditionMet({
+      stopConditions: [
+        stepCountIs(10),
+        hasToolCall('done'),
+      ],
+      steps,
+    });
+    expect(result).toBe(true);
+  });
+});
diff --git a/tests/pipelines/streaming-pipeline.test.ts b/tests/pipelines/streaming-pipeline.test.ts
new file mode 100644
index 0000000..2c9c6af
--- /dev/null
+++ b/tests/pipelines/streaming-pipeline.test.ts
@@ -0,0 +1,159 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import {
+  buildItemsStream,
+  consumeStreamForCompletion,
+  extractTextDeltas,
+} from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: any[]): ReusableReadableStream<any> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collectAll<T>(iter: AsyncIterable<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+describe('Full streaming pipeline: raw events -> guards -> transformers -> consumer', () => {
+  it('text streaming: guard filters to text only -> extractTextDeltas yields strings -> non-text absent', async () => {
+    const events = [
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hello',
+      },
+      {
+        type: 'response.reasoning_text.delta',
+        delta: 'thinking',
+      },
+      {
+        type: 'response.function_call_arguments.delta',
+        delta: '{"q":',
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: ' world',
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const textDeltas = await collectAll(extractTextDeltas(stream));
+
+    // Guard true only for text events
+    expect(textDeltas).toEqual([
+      'Hello',
+      ' world',
+    ]);
+    // Non-text absent
+    expect(textDeltas).not.toContain('thinking');
+    expect(textDeltas).not.toContain('{"q":');
+  });
+
+  it('items streaming: type guards dispatch to per-type handlers -> consumer gets distinct item types', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hello',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'function_call',
+          id: 'fc_1',
+          callId: 'fc_1',
+          name: 'search',
+          arguments: '',
+          status: 'in_progress',
+        },
+      },
+      {
+        type: 'response.function_call_arguments.delta',
+        delta: '{"q":"test"}',
+        itemId: 'fc_1',
+      },
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'reasoning',
+          id: 'r_1',
+          status: 'in_progress',
+          summary: [],
+        },
+      },
+      {
+        type: 'response.reasoning_text.delta',
+        delta: 'thinking',
+        itemId: 'r_1',
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+
+    const messageItems = items.filter((i: any) => i.type === 'message');
+    const fnCallItems = items.filter((i: any) => i.type === 'function_call');
+    const reasoningItems = items.filter((i: any) => i.type === 'reasoning');
+
+    // Each type present and distinct
+    expect(messageItems.length).toBeGreaterThan(0);
+    expect(fnCallItems.length).toBeGreaterThan(0);
+    expect(reasoningItems.length).toBeGreaterThan(0);
+
+    // Message items have text
+    expect((messageItems[messageItems.length - 1] as any).content[0].text).toBe('Hello');
+    // Function call items have arguments
+    expect((fnCallItems[fnCallItems.length - 1] as any).arguments).toBe('{"q":"test"}');
+    // Reasoning items have content
+    expect((reasoningItems[reasoningItems.length - 1] as any).summary[0].text).toBe('thinking');
+  });
+
+  it('completion: isResponseCompletedEvent true -> consumeStreamForCompletion returns response -> stream terminates', async () => {
+    const response = {
+      id: 'r1',
+      status: 'completed',
+      output: [],
+    };
+    const events = [
+      {
+        type: 'response.output_text.delta',
+        delta: 'data',
+      },
+      {
+        type: 'response.completed',
+        response,
+      },
+    ];
+    const stream = makeStream(events);
+    const result = await consumeStreamForCompletion(stream);
+    expect(result).toEqual(response);
+  });
+});
diff --git a/tests/pipelines/tool-execution-pipeline.test.ts b/tests/pipelines/tool-execution-pipeline.test.ts
new file mode 100644
index 0000000..ec5c52c
--- /dev/null
+++ b/tests/pipelines/tool-execution-pipeline.test.ts
@@ -0,0 +1,138 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import {
+  executeTool,
+  formatToolExecutionError,
+  formatToolResultForModel,
+} from '../../src/lib/tool-executor.js';
+
+describe('Full tool execution pipeline: definition -> dispatch -> validate -> execute -> format', () => {
+  it('regular tool: tool() -> executeTool -> validates -> executes -> formatToolResultForModel produces JSON', async () => {
+    const addTool = tool({
+      name: 'add',
+      inputSchema: z.object({
+        a: z.number(),
+        b: z.number(),
+      }),
+      outputSchema: z.object({
+        sum: z.number(),
+      }),
+      execute: async (args) => ({
+        sum: args.a + args.b,
+      }),
+    });
+
+    const toolCall = {
+      id: 'tc_1',
+      name: 'add',
+      arguments: {
+        a: 2,
+        b: 3,
+      },
+    };
+    const result = await executeTool(addTool, toolCall, {
+      numberOfTurns: 1,
+    });
+
+    // Dispatch worked (regular path)
+    expect(result.toolCallId).toBe('tc_1');
+    expect(result.toolName).toBe('add');
+    // Execution worked
+    expect(result.result).toEqual({
+      sum: 5,
+    });
+    // No error
+    expect(result.error).toBeUndefined();
+
+    // Format for model
+    const formatted = formatToolResultForModel(result);
+    expect(typeof formatted).toBe('string');
+    const parsed = JSON.parse(formatted);
+    expect(parsed.sum).toBe(5);
+  });
+
+  it('generator tool: tool() with eventSchema -> executeTool -> generator yields events -> result has both', async () => {
+    const streamTool = tool({
+      name: 'stream_add',
+      inputSchema: z.object({
+        a: z.number(),
+        b: z.number(),
+      }),
+      eventSchema: z.object({
+        progress: z.number(),
+      }),
+      outputSchema: z.object({
+        sum: z.number(),
+      }),
+      execute: async function* (args) {
+        yield {
+          progress: 50,
+        };
+        yield {
+          progress: 100,
+        };
+        return {
+          sum: args.a + args.b,
+        };
+      },
+    });
+
+    const toolCall = {
+      id: 'tc_2',
+      name: 'stream_add',
+      arguments: {
+        a: 3,
+        b: 4,
+      },
+    };
+    const result = await executeTool(streamTool, toolCall, {
+      numberOfTurns: 1,
+    });
+
+    // Dispatch worked (generator path)
+    expect(result.toolCallId).toBe('tc_2');
+    // Generator yielded events
+    expect(result.preliminaryResults).toHaveLength(2);
+    expect(result.preliminaryResults![0]).toEqual({
+      progress: 50,
+    });
+    expect(result.preliminaryResults![1]).toEqual({
+      progress: 100,
+    });
+    // Final result
+    expect(result.result).toEqual({
+      sum: 7,
+    });
+  });
+
+  it('error pipeline: invalid input -> executeTool -> caught -> ToolExecutionResult has error -> formatToolExecutionError includes details', async () => {
+    const strictTool = tool({
+      name: 'strict',
+      inputSchema: z.object({
+        count: z.number().min(1),
+      }),
+      execute: async (args) => args.count,
+    });
+
+    const toolCall = {
+      id: 'tc_3',
+      name: 'strict',
+      arguments: {
+        count: -5,
+      },
+    };
+    const result = await executeTool(strictTool, toolCall, {
+      numberOfTurns: 1,
+    });
+
+    // Error was caught
+    expect(result.error).toBeDefined();
+    expect(result.result).toBeNull();
+
+    // Format error includes details
+    const errorFormatted = formatToolExecutionError(result.error!, toolCall as any);
+    expect(errorFormatted).toContain('strict');
+  });
+});
diff --git a/vitest.config.ts b/vitest.config.ts
index efca4ae..40ad0b0 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -28,6 +28,83 @@ export default defineConfig({
           hookTimeout: 10000,
         },
       },
+      {
+        extends: true,
+        test: {
+          name: 'behavior',
+          include: [
+            'tests/behavior/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
+      {
+        extends: true,
+        test: {
+          name: 'boundaries',
+          include: [
+            'tests/boundaries/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
+      {
+        extends: true,
+        test: {
+          name: 'composition',
+          include: [
+            'tests/composition/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
+      {
+        extends: true,
+        test: {
+          name: 'contracts',
+          include: [
+            'tests/contracts/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
+      {
+        extends: true,
+        test: {
+          name: 'integration',
+          include: [
+            'tests/integration/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
+      {
+        extends: true,
+        test: {
+          name: 'dispatch',
+          include: [
+            'tests/dispatch/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
+      {
+        extends: true,
+        test: {
+          name: 'pipelines',
+          include: [
+            'tests/pipelines/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
       {
         extends: true,
         test: {

From d6506a9bca86830641eb25736ad7a22dc26fc9cf Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 10 Apr 2026 00:26:06 +0000
Subject: [PATCH 2/4] refactor: centralize test model identifiers into shared
 test-constants

Replace all hardcoded 'gpt-4' and 'test-model' references in new test
files with TEST_MODEL and TEST_MODEL_ALT imported from
tests/test-constants.ts. This makes it easy to update the model name
in one place and avoids referencing outdated model identifiers.

Co-Authored-By: Robert Yeakel <robert.yeakel@openrouter.ai>
---
 tests/behavior/async-params.test.ts             | 17 +++++++++--------
 tests/behavior/next-turn-params.test.ts         | 17 +++++++++--------
 tests/behavior/turn-context.test.ts             |  3 ++-
 tests/composition/format-compatibility.test.ts  |  3 ++-
 tests/composition/next-turn-params-flow.test.ts |  9 +++++----
 tests/composition/stream-data-pipeline.test.ts  |  5 +++--
 tests/contracts/async-params.test.ts            | 15 ++++++++-------
 tests/contracts/response-extractors.test.ts     |  5 +++--
 .../dispatch/claude-conversion-dispatch.test.ts |  5 +++--
 .../next-turn-params-request.test.ts            |  5 +++--
 .../turn-context-async-params.test.ts           |  7 ++++---
 .../pipelines/async-resolution-pipeline.test.ts |  5 +++--
 tests/pipelines/claude-conversion-deep.test.ts  |  7 ++++---
 tests/pipelines/dual-format-output.test.ts      |  3 ++-
 tests/pipelines/format-round-trip.test.ts       |  5 +++--
 .../pipelines/next-turn-params-pipeline.test.ts |  7 ++++---
 tests/test-constants.ts                         | 13 +++++++++++++
 17 files changed, 80 insertions(+), 51 deletions(-)
 create mode 100644 tests/test-constants.ts

diff --git a/tests/behavior/async-params.test.ts b/tests/behavior/async-params.test.ts
index f0dc02f..1a405a8 100644
--- a/tests/behavior/async-params.test.ts
+++ b/tests/behavior/async-params.test.ts
@@ -1,6 +1,7 @@
 import { describe, expect, it } from 'vitest';
 import { hasAsyncFunctions, resolveAsyncFunctions } from '../../src/lib/async-params.js';
 import type { TurnContext } from '../../src/lib/tool-types.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 const turnCtx: TurnContext = {
   numberOfTurns: 2,
@@ -9,18 +10,18 @@ const turnCtx: TurnContext = {
 describe('async params - resolveAsyncFunctions', () => {
   it('passes through static values unchanged', async () => {
     const input = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       temperature: 0.7,
       input: 'hi',
     } as any;
     const result = await resolveAsyncFunctions(input, turnCtx);
-    expect(result.model).toBe('gpt-4');
+    expect(result.model).toBe(TEST_MODEL);
     expect(result.temperature).toBe(0.7);
   });
 
   it('resolves sync function fields with turnContext', async () => {
     const input = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       temperature: (ctx: TurnContext) => ctx.numberOfTurns * 0.1,
       input: 'test',
     } as any;
@@ -30,7 +31,7 @@ describe('async params - resolveAsyncFunctions', () => {
 
   it('resolves async function fields with turnContext', async () => {
     const input = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       temperature: async (ctx: TurnContext) => ctx.numberOfTurns * 0.15,
       input: 'test',
     } as any;
@@ -40,7 +41,7 @@ describe('async params - resolveAsyncFunctions', () => {
 
   it('strips client-only fields (stopWhen, state, requireApproval, context, etc.)', async () => {
     const input = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       input: 'test',
       stopWhen: () => true,
       state: {},
@@ -56,7 +57,7 @@ describe('async params - resolveAsyncFunctions', () => {
 
   it('wraps field resolution errors with field name', async () => {
     const input = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       temperature: () => {
         throw new Error('compute failed');
       },
@@ -70,7 +71,7 @@ describe('async params - hasAsyncFunctions', () => {
   it('returns true when any field is a function', () => {
     expect(
       hasAsyncFunctions({
-        model: 'gpt-4',
+        model: TEST_MODEL,
         temperature: () => 0.5,
       }),
     ).toBe(true);
@@ -79,7 +80,7 @@ describe('async params - hasAsyncFunctions', () => {
   it('returns false when all fields are static values', () => {
     expect(
       hasAsyncFunctions({
-        model: 'gpt-4',
+        model: TEST_MODEL,
         temperature: 0.5,
       }),
     ).toBe(false);
diff --git a/tests/behavior/next-turn-params.test.ts b/tests/behavior/next-turn-params.test.ts
index 234c843..2295011 100644
--- a/tests/behavior/next-turn-params.test.ts
+++ b/tests/behavior/next-turn-params.test.ts
@@ -8,17 +8,18 @@ import {
 } from '../../src/lib/next-turn-params.js';
 import { tool } from '../../src/lib/tool.js';
 import type { ParsedToolCall, Tool } from '../../src/lib/tool-types.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 describe('next-turn params - buildNextTurnParamsContext', () => {
   it('extracts relevant fields from request', () => {
     const request: models.ResponsesRequest = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       input: 'hello',
       temperature: 0.7,
       maxOutputTokens: 1000,
     } as any;
     const ctx = buildNextTurnParamsContext(request);
-    expect(ctx.model).toBe('gpt-4');
+    expect(ctx.model).toBe(TEST_MODEL);
     expect(ctx.input).toBe('hello');
     expect(ctx.temperature).toBe(0.7);
     expect(ctx.maxOutputTokens).toBe(1000);
@@ -57,7 +58,7 @@ describe('next-turn params - executeNextTurnParamsFunctions', () => {
       },
     };
     const request = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       input: 'hello',
     } as any;
     const result = await executeNextTurnParamsFunctions(
@@ -175,7 +176,7 @@ describe('next-turn params - executeNextTurnParamsFunctions', () => {
 describe('next-turn params - applyNextTurnParamsToRequest', () => {
   it('merges computed params into request', () => {
     const request = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       temperature: 0.7,
       input: 'test',
     } as any;
@@ -184,12 +185,12 @@ describe('next-turn params - applyNextTurnParamsToRequest', () => {
     };
     const result = applyNextTurnParamsToRequest(request, computed);
     expect(result.temperature).toBe(0.2);
-    expect(result.model).toBe('gpt-4');
+    expect(result.model).toBe(TEST_MODEL);
   });
 
   it('converts null values to undefined for API compatibility', () => {
     const request = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
     } as any;
     const computed = {
       temperature: null,
@@ -200,7 +201,7 @@ describe('next-turn params - applyNextTurnParamsToRequest', () => {
 
   it('returns new object without mutating original', () => {
     const request = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       temperature: 0.7,
     } as any;
     const result = applyNextTurnParamsToRequest(request, {
@@ -212,7 +213,7 @@ describe('next-turn params - applyNextTurnParamsToRequest', () => {
 
   it('handles empty computed params', () => {
     const request = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       temperature: 0.7,
     } as any;
     const result = applyNextTurnParamsToRequest(request, {});
diff --git a/tests/behavior/turn-context.test.ts b/tests/behavior/turn-context.test.ts
index 2ae2724..e0c8326 100644
--- a/tests/behavior/turn-context.test.ts
+++ b/tests/behavior/turn-context.test.ts
@@ -1,5 +1,6 @@
 import { describe, expect, it } from 'vitest';
 import { buildTurnContext, normalizeInputToArray } from '../../src/lib/turn-context.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 describe('turn context - buildTurnContext', () => {
   it('sets numberOfTurns from options', () => {
@@ -27,7 +28,7 @@ describe('turn context - buildTurnContext', () => {
 
   it('includes turnRequest when provided', () => {
     const request = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       input: 'hello',
     } as any;
     const ctx = buildTurnContext({
diff --git a/tests/composition/format-compatibility.test.ts b/tests/composition/format-compatibility.test.ts
index 393acf2..8588ed1 100644
--- a/tests/composition/format-compatibility.test.ts
+++ b/tests/composition/format-compatibility.test.ts
@@ -1,6 +1,7 @@
 import { describe, expect, it } from 'vitest';
 import { toClaudeMessage } from '../../src/lib/anthropic-compat.js';
 import { toChatMessage } from '../../src/lib/chat-compat.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 function makeResponse(text: string) {
   return {
@@ -22,7 +23,7 @@ function makeResponse(text: string) {
     ],
     status: 'completed' as const,
     outputText: text,
-    model: 'test-model',
+    model: TEST_MODEL,
     usage: {
       totalTokens: 100,
       inputTokens: 50,
diff --git a/tests/composition/next-turn-params-flow.test.ts b/tests/composition/next-turn-params-flow.test.ts
index d99b1a8..3496452 100644
--- a/tests/composition/next-turn-params-flow.test.ts
+++ b/tests/composition/next-turn-params-flow.test.ts
@@ -5,6 +5,7 @@ import {
   buildNextTurnParamsContext,
   executeNextTurnParamsFunctions,
 } from '../../src/lib/next-turn-params.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 describe('Next-turn params: tools -> computation -> request modification', () => {
   it('executeNextTurnParamsFunctions output accepted by applyNextTurnParamsToRequest -> modified request', async () => {
@@ -30,7 +31,7 @@ describe('Next-turn params: tools -> computation -> request modification', () =>
       },
     ];
     const request = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       temperature: 0.7,
     };
 
@@ -44,18 +45,18 @@ describe('Next-turn params: tools -> computation -> request modification', () =>
 
     const modified = applyNextTurnParamsToRequest(request as any, params);
     expect(modified.temperature).toBe(0.5);
-    expect(modified.model).toBe('gpt-4');
+    expect(modified.model).toBe(TEST_MODEL);
   });
 
   it('buildNextTurnParamsContext extracts from request -> context passed to nextTurnParams functions', () => {
     const request = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       temperature: 0.7,
       input: 'hello',
     };
 
     const ctx = buildNextTurnParamsContext(request as any);
-    expect(ctx.model).toBe('gpt-4');
+    expect(ctx.model).toBe(TEST_MODEL);
     expect(ctx.temperature).toBe(0.7);
     expect(ctx.input).toBe('hello');
   });
diff --git a/tests/composition/stream-data-pipeline.test.ts b/tests/composition/stream-data-pipeline.test.ts
index 13215be..cd9f565 100644
--- a/tests/composition/stream-data-pipeline.test.ts
+++ b/tests/composition/stream-data-pipeline.test.ts
@@ -5,6 +5,7 @@ import {
   extractToolCallsFromResponse,
   responseHasToolCalls,
 } from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 function makeStream<T>(items: T[]): ReusableReadableStream<T> {
   const source = new ReadableStream<T>({
@@ -113,7 +114,7 @@ describe('Stream data pipeline: source -> guards -> transformers -> consumers',
       ],
       status: 'completed' as const,
       outputText: '',
-      model: 'test-model',
+      model: TEST_MODEL,
       usage: {
         totalTokens: 100,
         inputTokens: 50,
@@ -146,7 +147,7 @@ describe('Stream data pipeline: source -> guards -> transformers -> consumers',
       ],
       status: 'completed' as const,
       outputText: 'Hello',
-      model: 'test-model',
+      model: TEST_MODEL,
       usage: {
         totalTokens: 100,
         inputTokens: 50,
diff --git a/tests/contracts/async-params.test.ts b/tests/contracts/async-params.test.ts
index c647630..1acdda4 100644
--- a/tests/contracts/async-params.test.ts
+++ b/tests/contracts/async-params.test.ts
@@ -1,6 +1,7 @@
 import { describe, expect, it } from 'vitest';
 
 import { resolveAsyncFunctions } from '../../src/lib/async-params.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 describe('resolveAsyncFunctions - three field types handled distinctly', () => {
   const turnCtx = {
@@ -10,12 +11,12 @@ describe('resolveAsyncFunctions - three field types handled distinctly', () => {
   it('static values (model, temperature as literals) -> passed through unchanged', async () => {
     const result = await resolveAsyncFunctions(
       {
-        model: 'gpt-4',
+        model: TEST_MODEL,
         temperature: 0.7,
       } as any,
       turnCtx,
     );
-    expect(result.model).toBe('gpt-4');
+    expect(result.model).toBe(TEST_MODEL);
     expect(result.temperature).toBe(0.7);
   });
 
@@ -32,7 +33,7 @@ describe('resolveAsyncFunctions - three field types handled distinctly', () => {
   it('client-only fields (stopWhen, state, requireApproval, context, onTurnStart, onTurnEnd) -> stripped entirely', async () => {
     const result = await resolveAsyncFunctions(
       {
-        model: 'gpt-4',
+        model: TEST_MODEL,
         stopWhen: () => true,
         state: {
           get: () => null,
@@ -52,7 +53,7 @@ describe('resolveAsyncFunctions - three field types handled distinctly', () => {
     expect(result).not.toHaveProperty('context');
     expect(result).not.toHaveProperty('onTurnStart');
     expect(result).not.toHaveProperty('onTurnEnd');
-    expect(result.model).toBe('gpt-4');
+    expect(result.model).toBe(TEST_MODEL);
   });
 
   it('tools field -> preserved (exception to client-only stripping)', async () => {
@@ -66,7 +67,7 @@ describe('resolveAsyncFunctions - three field types handled distinctly', () => {
     ];
     const result = await resolveAsyncFunctions(
       {
-        model: 'gpt-4',
+        model: TEST_MODEL,
         tools,
       } as any,
       turnCtx,
@@ -90,14 +91,14 @@ describe('resolveAsyncFunctions - three field types handled distinctly', () => {
   it('mix of static + function + client-only in one call -> all handled correctly', async () => {
     const result = await resolveAsyncFunctions(
       {
-        model: 'gpt-4',
+        model: TEST_MODEL,
         temperature: (ctx: any) => ctx.numberOfTurns * 0.1,
         stopWhen: () => true,
         input: 'hello',
       } as any,
       turnCtx,
     );
-    expect(result.model).toBe('gpt-4');
+    expect(result.model).toBe(TEST_MODEL);
     expect(result.temperature).toBe(0.2);
     expect(result).not.toHaveProperty('stopWhen');
     expect(result.input).toBe('hello');
diff --git a/tests/contracts/response-extractors.test.ts b/tests/contracts/response-extractors.test.ts
index 05b0732..e2fd008 100644
--- a/tests/contracts/response-extractors.test.ts
+++ b/tests/contracts/response-extractors.test.ts
@@ -4,6 +4,7 @@ import {
   extractMessageFromResponse,
   extractResponsesMessageFromResponse,
 } from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 function makeResponse(text: string) {
   return {
@@ -25,7 +26,7 @@ function makeResponse(text: string) {
     ],
     status: 'completed' as const,
     outputText: text,
-    model: 'test-model',
+    model: TEST_MODEL,
     usage: {
       totalTokens: 100,
       inputTokens: 50,
@@ -84,7 +85,7 @@ describe('Response extractors - same response, distinct shapes', () => {
       ],
       status: 'completed' as const,
       outputText: '',
-      model: 'test-model',
+      model: TEST_MODEL,
       usage: {
         totalTokens: 100,
         inputTokens: 50,
diff --git a/tests/dispatch/claude-conversion-dispatch.test.ts b/tests/dispatch/claude-conversion-dispatch.test.ts
index 491459a..db15336 100644
--- a/tests/dispatch/claude-conversion-dispatch.test.ts
+++ b/tests/dispatch/claude-conversion-dispatch.test.ts
@@ -1,6 +1,7 @@
 import { describe, expect, it } from 'vitest';
 
 import { convertToClaudeMessage } from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 describe('convertToClaudeMessage routes items via output item guards', () => {
   it('same response with message + function_call: isOutputMessage -> text block, isFunctionCallItem -> tool_use block', () => {
@@ -31,7 +32,7 @@ describe('convertToClaudeMessage routes items via output item guards', () => {
       ],
       status: 'completed' as const,
       outputText: 'Hello',
-      model: 'test-model',
+      model: TEST_MODEL,
       usage: {
         totalTokens: 100,
         inputTokens: 50,
@@ -72,7 +73,7 @@ describe('convertToClaudeMessage routes items via output item guards', () => {
       ],
       status: 'completed' as const,
       outputText: '',
-      model: 'test-model',
+      model: TEST_MODEL,
       usage: {
         totalTokens: 100,
         inputTokens: 50,
diff --git a/tests/integration/next-turn-params-request.test.ts b/tests/integration/next-turn-params-request.test.ts
index b40034c..8fc8852 100644
--- a/tests/integration/next-turn-params-request.test.ts
+++ b/tests/integration/next-turn-params-request.test.ts
@@ -4,6 +4,7 @@ import {
   applyNextTurnParamsToRequest,
   executeNextTurnParamsFunctions,
 } from '../../src/lib/next-turn-params.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 describe('Next-turn params -> request modification -> API readiness', () => {
   it('executeNextTurnParamsFunctions computes new temperature -> applyNextTurnParamsToRequest produces request with updated temperature', async () => {
@@ -29,7 +30,7 @@ describe('Next-turn params -> request modification -> API readiness', () => {
       },
     ];
     const request = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       temperature: 0.7,
       input: 'hello',
     };
@@ -41,7 +42,7 @@ describe('Next-turn params -> request modification -> API readiness', () => {
 
     const modified = applyNextTurnParamsToRequest(request as any, params);
     expect(modified.temperature).toBe(0.3);
-    expect(modified.model).toBe('gpt-4');
+    expect(modified.model).toBe(TEST_MODEL);
     expect(modified.input).toBe('hello');
   });
 });
diff --git a/tests/integration/turn-context-async-params.test.ts b/tests/integration/turn-context-async-params.test.ts
index 98ec05c..dcccfd1 100644
--- a/tests/integration/turn-context-async-params.test.ts
+++ b/tests/integration/turn-context-async-params.test.ts
@@ -1,6 +1,7 @@
 import { describe, expect, it } from 'vitest';
 import { resolveAsyncFunctions } from '../../src/lib/async-params.js';
 import { buildTurnContext } from '../../src/lib/turn-context.js';
+import { TEST_MODEL, TEST_MODEL_ALT } from '../test-constants.js';
 
 describe('buildTurnContext -> resolveAsyncFunctions', () => {
   it('parameter function receives TurnContext with correct numberOfTurns', async () => {
@@ -9,7 +10,7 @@ describe('buildTurnContext -> resolveAsyncFunctions', () => {
     });
     const result = await resolveAsyncFunctions(
       {
-        model: 'gpt-4',
+        model: TEST_MODEL,
         temperature: (ctx: any) => ctx.numberOfTurns * 0.1,
       } as any,
       turnCtx,
@@ -31,10 +32,10 @@ describe('buildTurnContext -> resolveAsyncFunctions', () => {
     });
     const result = await resolveAsyncFunctions(
       {
-        model: (ctx: any) => (ctx.toolCall ? 'gpt-4-turbo' : 'gpt-4'),
+        model: (ctx: any) => (ctx.toolCall ? TEST_MODEL_ALT : TEST_MODEL),
       } as any,
       turnCtx,
     );
-    expect(result.model).toBe('gpt-4-turbo');
+    expect(result.model).toBe(TEST_MODEL_ALT);
   });
 });
diff --git a/tests/pipelines/async-resolution-pipeline.test.ts b/tests/pipelines/async-resolution-pipeline.test.ts
index 8ec3109..aea4d03 100644
--- a/tests/pipelines/async-resolution-pipeline.test.ts
+++ b/tests/pipelines/async-resolution-pipeline.test.ts
@@ -2,6 +2,7 @@ import { describe, expect, it } from 'vitest';
 
 import { resolveAsyncFunctions } from '../../src/lib/async-params.js';
 import { stepCountIs } from '../../src/lib/stop-conditions.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 describe('Async resolution + clean API request', () => {
   it('mixed input: static model, function temperature, client-only stopWhen -> three paths verified in one call', async () => {
@@ -11,7 +12,7 @@ describe('Async resolution + clean API request', () => {
 
     const result = await resolveAsyncFunctions(
       {
-        model: 'gpt-4',
+        model: TEST_MODEL,
         temperature: (ctx: any) => ctx.numberOfTurns * 0.1,
         stopWhen: stepCountIs(5),
         input: 'hello',
@@ -20,7 +21,7 @@ describe('Async resolution + clean API request', () => {
     );
 
     // Static: preserved
-    expect(result.model).toBe('gpt-4');
+    expect(result.model).toBe(TEST_MODEL);
     // Function: resolved
     expect(result.temperature).toBe(0.2);
     // Client-only: stripped
diff --git a/tests/pipelines/claude-conversion-deep.test.ts b/tests/pipelines/claude-conversion-deep.test.ts
index fdd536c..27ab9aa 100644
--- a/tests/pipelines/claude-conversion-deep.test.ts
+++ b/tests/pipelines/claude-conversion-deep.test.ts
@@ -5,6 +5,7 @@ import {
   getUnsupportedContentSummary,
   hasUnsupportedContent,
 } from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 describe('Claude conversion deep pipeline', () => {
   it('multi-item response: message + function_call + reasoning + web_search -> each guard routes to distinct block', () => {
@@ -51,7 +52,7 @@ describe('Claude conversion deep pipeline', () => {
       ],
       status: 'completed' as const,
       outputText: 'Hello',
-      model: 'test-model',
+      model: TEST_MODEL,
       usage: {
         totalTokens: 200,
         inputTokens: 100,
@@ -107,7 +108,7 @@ describe('Claude conversion deep pipeline', () => {
       ],
       status: 'completed' as const,
       outputText: 'Here is the answer',
-      model: 'test-model',
+      model: TEST_MODEL,
       usage: {
         totalTokens: 100,
         inputTokens: 50,
@@ -149,7 +150,7 @@ describe('Claude conversion deep pipeline', () => {
       ],
       status: 'completed' as const,
       outputText: '',
-      model: 'test-model',
+      model: TEST_MODEL,
       usage: {
         totalTokens: 100,
         inputTokens: 50,
diff --git a/tests/pipelines/dual-format-output.test.ts b/tests/pipelines/dual-format-output.test.ts
index cd65a9e..4755564 100644
--- a/tests/pipelines/dual-format-output.test.ts
+++ b/tests/pipelines/dual-format-output.test.ts
@@ -9,6 +9,7 @@ import {
   extractMessageFromResponse,
   extractToolCallsFromResponse,
 } from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 function makeStream(events: any[]): ReusableReadableStream<any> {
   const source = new ReadableStream({
@@ -59,7 +60,7 @@ describe('Dual-format output: same response -> structurally distinct formats', (
       ],
       status: 'completed' as const,
       outputText: 'Found results',
-      model: 'test-model',
+      model: TEST_MODEL,
       usage: {
         totalTokens: 100,
         inputTokens: 50,
diff --git a/tests/pipelines/format-round-trip.test.ts b/tests/pipelines/format-round-trip.test.ts
index fd4ab9a..9e1f200 100644
--- a/tests/pipelines/format-round-trip.test.ts
+++ b/tests/pipelines/format-round-trip.test.ts
@@ -2,6 +2,7 @@ import { describe, expect, it } from 'vitest';
 
 import { fromClaudeMessages, toClaudeMessage } from '../../src/lib/anthropic-compat.js';
 import { fromChatMessages, toChatMessage } from '../../src/lib/chat-compat.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 describe('Bidirectional format conversion', () => {
   it('Claude round-trip: Claude messages -> fromClaudeMessages -> OR format -> each block type maps distinctly', () => {
@@ -82,7 +83,7 @@ describe('Bidirectional format conversion', () => {
       ],
       status: 'completed' as const,
       outputText: 'Here are cats',
-      model: 'test-model',
+      model: TEST_MODEL,
       usage: {
         totalTokens: 100,
         inputTokens: 50,
@@ -155,7 +156,7 @@ describe('Bidirectional format conversion', () => {
       ],
       status: 'completed' as const,
       outputText: 'Response',
-      model: 'test-model',
+      model: TEST_MODEL,
       usage: {
         totalTokens: 100,
         inputTokens: 50,
diff --git a/tests/pipelines/next-turn-params-pipeline.test.ts b/tests/pipelines/next-turn-params-pipeline.test.ts
index f7becb5..bd1eef1 100644
--- a/tests/pipelines/next-turn-params-pipeline.test.ts
+++ b/tests/pipelines/next-turn-params-pipeline.test.ts
@@ -7,6 +7,7 @@ import {
   buildNextTurnParamsContext,
   executeNextTurnParamsFunctions,
 } from '../../src/lib/next-turn-params.js';
+import { TEST_MODEL } from '../test-constants.js';
 
 describe('Next-turn parameter adjustment pipeline', () => {
   it('dynamic temperature: search tool with nextTurnParams.temperature -> context -> execute -> apply -> request updated', async () => {
@@ -22,14 +23,14 @@ describe('Next-turn parameter adjustment pipeline', () => {
     });
 
     const request = {
-      model: 'gpt-4',
+      model: TEST_MODEL,
       temperature: 0.5,
       input: 'hello',
     };
 
     // Step 1: Build context from request
     const ctx = buildNextTurnParamsContext(request as any);
-    expect(ctx.model).toBe('gpt-4');
+    expect(ctx.model).toBe(TEST_MODEL);
     expect(ctx.temperature).toBe(0.5);
 
     // Step 2: Execute nextTurnParams functions
@@ -57,7 +58,7 @@ describe('Next-turn parameter adjustment pipeline', () => {
     // Step 3: Apply to request
     const modified = applyNextTurnParamsToRequest(request as any, params);
     expect(modified.temperature).toBe(0.9);
-    expect(modified.model).toBe('gpt-4');
+    expect(modified.model).toBe(TEST_MODEL);
     expect(modified.input).toBe('hello');
   });
 });
diff --git a/tests/test-constants.ts b/tests/test-constants.ts
new file mode 100644
index 0000000..ee921a9
--- /dev/null
+++ b/tests/test-constants.ts
@@ -0,0 +1,13 @@
+/**
+ * Shared test constants for model identifiers.
+ *
+ * Unit/integration tests use a synthetic placeholder so they never
+ * depend on a real model existing. Change these in one place if the
+ * convention needs to be updated.
+ */
+
+/** Default model identifier used in non-e2e tests. */
+export const TEST_MODEL = 'openai/gpt-4.1-nano';
+
+/** Alternative model for tests that need a second, distinct model. */
+export const TEST_MODEL_ALT = 'openai/gpt-4.1-mini';

From 96ba74bbdb3e80a328ccd4fb1f56958e92eceb2b Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 10 Apr 2026 03:31:49 +0000
Subject: [PATCH 3/4] refactor: recategorize miscategorized tests and remove
 redundant tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Moves:
- boundaries → contracts: conversation-state-results, tool-factory-shapes
- contracts → boundaries: execute-tool-boundary
- contracts → behavior: consume-stream-completion, stop-conditions
- composition → behavior: input-normalization, format-compatibility
- composition → integration: next-turn-params-flow, orchestrator-executor
- integration → behavior: conversation-state-format, stop-conditions-step-result
- pipelines → behavior: async-resolution-pipeline, orchestrator-utility-chain
- pipelines → integration: format-round-trip

Splits:
- pipelines/claude-conversion-deep → dispatch (routing test), behavior (annotations), integration (unsupported content)
- composition/stream-data-pipeline → contracts/tool-call-response-consistency (kept contract test, removed redundant stream tests)

Removals:
- dispatch/execute-tool-dispatch (redundant with behavior/tool-execution)
- integration/reusable-stream-consumers test 1 (redundant with behavior/reusable-stream)

Co-Authored-By: Robert Yeakel <robert.yeakel@openrouter.ai>
---
 .../async-resolution-pipeline.test.ts         |   0
 .../claude-conversion-annotations.test.ts     |  62 +++++++
 .../consume-stream-completion.test.ts         |   0
 .../conversation-state-format.test.ts         |   0
 .../format-compatibility.test.ts              |   0
 .../input-normalization.test.ts               |   0
 .../orchestrator-utility-chain.test.ts        |   0
 .../stop-conditions-step-result.test.ts       |   0
 .../stop-conditions.test.ts                   |   0
 .../execute-tool-boundary.test.ts             |   0
 .../composition/stream-data-pipeline.test.ts  | 164 -----------------
 .../conversation-state-results.test.ts        |   0
 .../tool-call-response-consistency.test.ts    |  72 ++++++++
 .../tool-factory-shapes.test.ts               |   0
 .../claude-conversion-deep-dispatch.test.ts   |  67 +++++++
 tests/dispatch/execute-tool-dispatch.test.ts  |  84 ---------
 .../claude-unsupported-content.test.ts        |  52 ++++++
 .../format-round-trip.test.ts                 |   0
 .../next-turn-params-flow.test.ts             |   0
 .../orchestrator-executor.test.ts             |   0
 .../reusable-stream-consumers.test.ts         |  33 ----
 .../pipelines/claude-conversion-deep.test.ts  | 169 ------------------
 22 files changed, 253 insertions(+), 450 deletions(-)
 rename tests/{pipelines => behavior}/async-resolution-pipeline.test.ts (100%)
 create mode 100644 tests/behavior/claude-conversion-annotations.test.ts
 rename tests/{contracts => behavior}/consume-stream-completion.test.ts (100%)
 rename tests/{integration => behavior}/conversation-state-format.test.ts (100%)
 rename tests/{composition => behavior}/format-compatibility.test.ts (100%)
 rename tests/{composition => behavior}/input-normalization.test.ts (100%)
 rename tests/{pipelines => behavior}/orchestrator-utility-chain.test.ts (100%)
 rename tests/{integration => behavior}/stop-conditions-step-result.test.ts (100%)
 rename tests/{contracts => behavior}/stop-conditions.test.ts (100%)
 rename tests/{contracts => boundaries}/execute-tool-boundary.test.ts (100%)
 delete mode 100644 tests/composition/stream-data-pipeline.test.ts
 rename tests/{boundaries => contracts}/conversation-state-results.test.ts (100%)
 create mode 100644 tests/contracts/tool-call-response-consistency.test.ts
 rename tests/{boundaries => contracts}/tool-factory-shapes.test.ts (100%)
 create mode 100644 tests/dispatch/claude-conversion-deep-dispatch.test.ts
 delete mode 100644 tests/dispatch/execute-tool-dispatch.test.ts
 create mode 100644 tests/integration/claude-unsupported-content.test.ts
 rename tests/{pipelines => integration}/format-round-trip.test.ts (100%)
 rename tests/{composition => integration}/next-turn-params-flow.test.ts (100%)
 rename tests/{composition => integration}/orchestrator-executor.test.ts (100%)
 delete mode 100644 tests/pipelines/claude-conversion-deep.test.ts

diff --git a/tests/pipelines/async-resolution-pipeline.test.ts b/tests/behavior/async-resolution-pipeline.test.ts
similarity index 100%
rename from tests/pipelines/async-resolution-pipeline.test.ts
rename to tests/behavior/async-resolution-pipeline.test.ts
diff --git a/tests/behavior/claude-conversion-annotations.test.ts b/tests/behavior/claude-conversion-annotations.test.ts
new file mode 100644
index 0000000..5354854
--- /dev/null
+++ b/tests/behavior/claude-conversion-annotations.test.ts
@@ -0,0 +1,62 @@
+import { describe, expect, it } from 'vitest';
+
+import { convertToClaudeMessage } from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+describe('convertToClaudeMessage annotation handling', () => {
+  it('annotations: text with file_citation + url_citation + file_path -> each produces its distinct citation', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Here is the answer',
+              annotations: [
+                {
+                  type: 'file_citation',
+                  fileId: 'f1',
+                  filename: 'doc.pdf',
+                  index: 0,
+                },
+                {
+                  type: 'url_citation',
+                  url: 'https://example.com',
+                  title: 'Example',
+                  startIndex: 0,
+                  endIndex: 10,
+                },
+                {
+                  type: 'file_path',
+                  fileId: 'f2',
+                  filePath: '/tmp/out.txt',
+                },
+              ],
+            },
+          ],
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Here is the answer',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const claude = convertToClaudeMessage(response as any);
+    const textBlock = claude.content.find((b: any) => b.type === 'text') as any;
+    expect(textBlock).toBeDefined();
+    // Should have citations
+    if (textBlock.citations) {
+      expect(textBlock.citations.length).toBeGreaterThan(0);
+    }
+  });
+});
diff --git a/tests/contracts/consume-stream-completion.test.ts b/tests/behavior/consume-stream-completion.test.ts
similarity index 100%
rename from tests/contracts/consume-stream-completion.test.ts
rename to tests/behavior/consume-stream-completion.test.ts
diff --git a/tests/integration/conversation-state-format.test.ts b/tests/behavior/conversation-state-format.test.ts
similarity index 100%
rename from tests/integration/conversation-state-format.test.ts
rename to tests/behavior/conversation-state-format.test.ts
diff --git a/tests/composition/format-compatibility.test.ts b/tests/behavior/format-compatibility.test.ts
similarity index 100%
rename from tests/composition/format-compatibility.test.ts
rename to tests/behavior/format-compatibility.test.ts
diff --git a/tests/composition/input-normalization.test.ts b/tests/behavior/input-normalization.test.ts
similarity index 100%
rename from tests/composition/input-normalization.test.ts
rename to tests/behavior/input-normalization.test.ts
diff --git a/tests/pipelines/orchestrator-utility-chain.test.ts b/tests/behavior/orchestrator-utility-chain.test.ts
similarity index 100%
rename from tests/pipelines/orchestrator-utility-chain.test.ts
rename to tests/behavior/orchestrator-utility-chain.test.ts
diff --git a/tests/integration/stop-conditions-step-result.test.ts b/tests/behavior/stop-conditions-step-result.test.ts
similarity index 100%
rename from tests/integration/stop-conditions-step-result.test.ts
rename to tests/behavior/stop-conditions-step-result.test.ts
diff --git a/tests/contracts/stop-conditions.test.ts b/tests/behavior/stop-conditions.test.ts
similarity index 100%
rename from tests/contracts/stop-conditions.test.ts
rename to tests/behavior/stop-conditions.test.ts
diff --git a/tests/contracts/execute-tool-boundary.test.ts b/tests/boundaries/execute-tool-boundary.test.ts
similarity index 100%
rename from tests/contracts/execute-tool-boundary.test.ts
rename to tests/boundaries/execute-tool-boundary.test.ts
diff --git a/tests/composition/stream-data-pipeline.test.ts b/tests/composition/stream-data-pipeline.test.ts
deleted file mode 100644
index cd9f565..0000000
--- a/tests/composition/stream-data-pipeline.test.ts
+++ /dev/null
@@ -1,164 +0,0 @@
-import { describe, expect, it } from 'vitest';
-
-import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
-import {
-  extractToolCallsFromResponse,
-  responseHasToolCalls,
-} from '../../src/lib/stream-transformers.js';
-import { TEST_MODEL } from '../test-constants.js';
-
-function makeStream<T>(items: T[]): ReusableReadableStream<T> {
-  const source = new ReadableStream<T>({
-    start(controller) {
-      for (const item of items) {
-        controller.enqueue(item);
-      }
-      controller.close();
-    },
-  });
-  return new ReusableReadableStream(source);
-}
-
-async function collect<T>(stream: AsyncIterable<T>): Promise<T[]> {
-  const result: T[] = [];
-  for await (const item of stream) {
-    result.push(item);
-  }
-  return result;
-}
-
-describe('Stream data pipeline: source -> guards -> transformers -> consumers', () => {
-  it('two consumers created from same ReusableReadableStream both receive all items', async () => {
-    const stream = makeStream([
-      1,
-      2,
-      3,
-    ]);
-    const consumer1 = stream.createConsumer();
-    const consumer2 = stream.createConsumer();
-
-    const [result1, result2] = await Promise.all([
-      collect(consumer1),
-      collect(consumer2),
-    ]);
-    expect(result1).toEqual([
-      1,
-      2,
-      3,
-    ]);
-    expect(result2).toEqual([
-      1,
-      2,
-      3,
-    ]);
-  });
-
-  it('consumer created after some items buffered still gets all items from position 0', async () => {
-    const stream = makeStream([
-      10,
-      20,
-      30,
-    ]);
-
-    const consumer1 = stream.createConsumer();
-    const items1: number[] = [];
-    for await (const item of consumer1) {
-      items1.push(item);
-      if (items1.length === 2) {
-        break;
-      }
-    }
-
-    // Create second consumer after first has consumed some items
-    const consumer2 = stream.createConsumer();
-    const items2 = await collect(consumer2);
-    expect(items2).toEqual([
-      10,
-      20,
-      30,
-    ]);
-  });
-
-  it('consumer created after source completes still gets all buffered items', async () => {
-    const stream = makeStream([
-      1,
-      2,
-      3,
-    ]);
-    // Consume fully to complete
-    const c1 = stream.createConsumer();
-    await collect(c1);
-
-    // Late join after completion
-    const c2 = stream.createConsumer();
-    const result = await collect(c2);
-    expect(result).toEqual([
-      1,
-      2,
-      3,
-    ]);
-  });
-
-  it('responseHasToolCalls returning true <-> extractToolCallsFromResponse returning non-empty', () => {
-    const responseWithTools = {
-      id: 'r1',
-      output: [
-        {
-          type: 'function_call' as const,
-          id: 'fc1',
-          callId: 'fc1',
-          name: 'search',
-          arguments: '{"q":"test"}',
-          status: 'completed' as const,
-        },
-      ],
-      status: 'completed' as const,
-      outputText: '',
-      model: TEST_MODEL,
-      usage: {
-        totalTokens: 100,
-        inputTokens: 50,
-        outputTokens: 50,
-      },
-    };
-
-    const hasTools = responseHasToolCalls(responseWithTools as any);
-    const extracted = extractToolCallsFromResponse(responseWithTools as any);
-
-    expect(hasTools).toBe(true);
-    expect(extracted.length).toBeGreaterThan(0);
-
-    const responseNoTools = {
-      id: 'r2',
-      output: [
-        {
-          type: 'message' as const,
-          id: 'm1',
-          role: 'assistant' as const,
-          status: 'completed' as const,
-          content: [
-            {
-              type: 'output_text' as const,
-              text: 'Hello',
-              annotations: [],
-            },
-          ],
-        },
-      ],
-      status: 'completed' as const,
-      outputText: 'Hello',
-      model: TEST_MODEL,
-      usage: {
-        totalTokens: 100,
-        inputTokens: 50,
-        outputTokens: 50,
-      },
-    };
-
-    const hasTools2 = responseHasToolCalls(responseNoTools as any);
-    const extracted2 = extractToolCallsFromResponse(responseNoTools as any);
-
-    expect(hasTools2).toBe(false);
-    expect(extracted2).toEqual([]);
-  });
-});
diff --git a/tests/boundaries/conversation-state-results.test.ts b/tests/contracts/conversation-state-results.test.ts
similarity index 100%
rename from tests/boundaries/conversation-state-results.test.ts
rename to tests/contracts/conversation-state-results.test.ts
diff --git a/tests/contracts/tool-call-response-consistency.test.ts b/tests/contracts/tool-call-response-consistency.test.ts
new file mode 100644
index 0000000..508f1af
--- /dev/null
+++ b/tests/contracts/tool-call-response-consistency.test.ts
@@ -0,0 +1,72 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  extractToolCallsFromResponse,
+  responseHasToolCalls,
+} from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+describe('responseHasToolCalls and extractToolCallsFromResponse produce consistent results', () => {
+  it('responseHasToolCalls returning true <-> extractToolCallsFromResponse returning non-empty', () => {
+    const responseWithTools = {
+      id: 'r1',
+      output: [
+        {
+          type: 'function_call' as const,
+          id: 'fc1',
+          callId: 'fc1',
+          name: 'search',
+          arguments: '{"q":"test"}',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: '',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const hasTools = responseHasToolCalls(responseWithTools as any);
+    const extracted = extractToolCallsFromResponse(responseWithTools as any);
+
+    expect(hasTools).toBe(true);
+    expect(extracted.length).toBeGreaterThan(0);
+
+    const responseNoTools = {
+      id: 'r2',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'm1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Hello',
+              annotations: [],
+            },
+          ],
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Hello',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const hasTools2 = responseHasToolCalls(responseNoTools as any);
+    const extracted2 = extractToolCallsFromResponse(responseNoTools as any);
+
+    expect(hasTools2).toBe(false);
+    expect(extracted2).toEqual([]);
+  });
+});
diff --git a/tests/boundaries/tool-factory-shapes.test.ts b/tests/contracts/tool-factory-shapes.test.ts
similarity index 100%
rename from tests/boundaries/tool-factory-shapes.test.ts
rename to tests/contracts/tool-factory-shapes.test.ts
diff --git a/tests/dispatch/claude-conversion-deep-dispatch.test.ts b/tests/dispatch/claude-conversion-deep-dispatch.test.ts
new file mode 100644
index 0000000..3065c61
--- /dev/null
+++ b/tests/dispatch/claude-conversion-deep-dispatch.test.ts
@@ -0,0 +1,67 @@
+import { describe, expect, it } from 'vitest';
+
+import { convertToClaudeMessage } from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+describe('convertToClaudeMessage routes multi-item response via output item guards', () => {
+  it('multi-item response: message + function_call + reasoning + web_search -> each guard routes to distinct block', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Hello',
+              annotations: [],
+            },
+          ],
+        },
+        {
+          type: 'function_call' as const,
+          id: 'fc_1',
+          callId: 'fc_1',
+          name: 'search',
+          arguments: '{"q":"test"}',
+          status: 'completed' as const,
+        },
+        {
+          type: 'reasoning' as const,
+          id: 'r_1',
+          status: 'completed' as const,
+          summary: [
+            {
+              type: 'summary_text' as const,
+              text: 'thinking',
+            },
+          ],
+        },
+        {
+          type: 'web_search_call' as const,
+          id: 'ws_1',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Hello',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 200,
+        inputTokens: 100,
+        outputTokens: 100,
+      },
+    };
+
+    const claude = convertToClaudeMessage(response as any);
+    const types = claude.content.map((b: any) => b.type);
+
+    expect(types).toContain('text');
+    expect(types).toContain('tool_use');
+    expect(types).toContain('thinking');
+    expect(types).toContain('server_tool_use');
+  });
+});
diff --git a/tests/dispatch/execute-tool-dispatch.test.ts b/tests/dispatch/execute-tool-dispatch.test.ts
deleted file mode 100644
index 9ca3f91..0000000
--- a/tests/dispatch/execute-tool-dispatch.test.ts
+++ /dev/null
@@ -1,84 +0,0 @@
-import { describe, expect, it } from 'vitest';
-import { z } from 'zod/v4';
-
-import { tool } from '../../src/index.js';
-import { executeTool } from '../../src/lib/tool-executor.js';
-
-describe('executeTool dispatches via tool type guards', () => {
-  const regularTool = tool({
-    name: 'add',
-    inputSchema: z.object({
-      a: z.number(),
-      b: z.number(),
-    }),
-    execute: async (args) => args.a + args.b,
-  });
-
-  const generatorTool = tool({
-    name: 'stream_add',
-    inputSchema: z.object({
-      a: z.number(),
-      b: z.number(),
-    }),
-    eventSchema: z.object({
-      progress: z.number(),
-    }),
-    outputSchema: z.object({
-      sum: z.number(),
-    }),
-    execute: async function* (args) {
-      yield {
-        progress: 50,
-      };
-      return {
-        sum: args.a + args.b,
-      };
-    },
-  });
-
-  const manualTool = tool({
-    name: 'manual_op',
-    inputSchema: z.object({
-      x: z.string(),
-    }),
-  });
-
-  const toolCall = {
-    id: 'tc_1',
-    name: 'test',
-    arguments: {
-      a: 2,
-      b: 3,
-    },
-  };
-  const turnCtx = {
-    numberOfTurns: 1,
-  };
-
-  it('dispatches regular tool to executeRegularTool path because isRegularExecuteTool returns true', async () => {
-    const result = await executeTool(regularTool, toolCall, turnCtx);
-    expect(result.toolCallId).toBe('tc_1');
-    expect(result.result).toBe(5);
-    expect(result).not.toHaveProperty('preliminaryResults');
-  });
-
-  it('dispatches generator tool to executeGeneratorTool path because isGeneratorTool returns true', async () => {
-    const result = await executeTool(generatorTool, toolCall, turnCtx);
-    expect(result.toolCallId).toBe('tc_1');
-    expect(result.result).toEqual({
-      sum: 5,
-    });
-    expect(result).toHaveProperty('preliminaryResults');
-  });
-
-  it('rejects manual tool because hasExecuteFunction returns false', async () => {
-    const manualCall = {
-      id: 'tc_1',
-      name: 'manual_op',
-      arguments: {
-        x: 'hi',
-      },
-    };
-    await expect(executeTool(manualTool as any, manualCall, turnCtx)).rejects.toThrow();
-  });
-});
diff --git a/tests/integration/claude-unsupported-content.test.ts b/tests/integration/claude-unsupported-content.test.ts
new file mode 100644
index 0000000..9a64f3f
--- /dev/null
+++ b/tests/integration/claude-unsupported-content.test.ts
@@ -0,0 +1,52 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  convertToClaudeMessage,
+  getUnsupportedContentSummary,
+  hasUnsupportedContent,
+} from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+describe('convertToClaudeMessage -> unsupported content utilities', () => {
+  it('unsupported content round-trip: refusal + image_generation -> convertToClaudeMessage -> unsupported_content utilities work', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'refusal' as const,
+              refusal: 'I cannot do that',
+            },
+          ],
+        },
+        {
+          type: 'image_generation_call' as const,
+          id: 'ig_1',
+          result: 'base64data',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: '',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const claude = convertToClaudeMessage(response as any);
+    // unsupported_content is a property on the message, not content blocks
+    expect(hasUnsupportedContent(claude)).toBe(true);
+    const summary = getUnsupportedContentSummary(claude);
+    expect(summary).toBeDefined();
+    // refusal and image_generation_call should both appear as unsupported
+    expect(Object.keys(summary).length).toBeGreaterThan(0);
+  });
+});
diff --git a/tests/pipelines/format-round-trip.test.ts b/tests/integration/format-round-trip.test.ts
similarity index 100%
rename from tests/pipelines/format-round-trip.test.ts
rename to tests/integration/format-round-trip.test.ts
diff --git a/tests/composition/next-turn-params-flow.test.ts b/tests/integration/next-turn-params-flow.test.ts
similarity index 100%
rename from tests/composition/next-turn-params-flow.test.ts
rename to tests/integration/next-turn-params-flow.test.ts
diff --git a/tests/composition/orchestrator-executor.test.ts b/tests/integration/orchestrator-executor.test.ts
similarity index 100%
rename from tests/composition/orchestrator-executor.test.ts
rename to tests/integration/orchestrator-executor.test.ts
diff --git a/tests/integration/reusable-stream-consumers.test.ts b/tests/integration/reusable-stream-consumers.test.ts
index ffa4ccd..bc44231 100644
--- a/tests/integration/reusable-stream-consumers.test.ts
+++ b/tests/integration/reusable-stream-consumers.test.ts
@@ -24,39 +24,6 @@ async function collectAll<T>(iter: AsyncIterable<T>): Promise<T[]> {
 }
 
 describe('ReusableReadableStream -> concurrent transformer consumption', () => {
-  it('two consumers at different read speeds both get all items', async () => {
-    const stream = makeStream([
-      1,
-      2,
-      3,
-      4,
-      5,
-    ]);
-
-    const consumer1 = stream.createConsumer();
-    const consumer2 = stream.createConsumer();
-
-    const [result1, result2] = await Promise.all([
-      collectAll(consumer1),
-      collectAll(consumer2),
-    ]);
-
-    expect(result1).toEqual([
-      1,
-      2,
-      3,
-      4,
-      5,
-    ]);
-    expect(result2).toEqual([
-      1,
-      2,
-      3,
-      4,
-      5,
-    ]);
-  });
-
   it('buildItemsStream and consumeStreamForCompletion both consume same stream correctly', async () => {
     const response = {
       id: 'r1',
diff --git a/tests/pipelines/claude-conversion-deep.test.ts b/tests/pipelines/claude-conversion-deep.test.ts
deleted file mode 100644
index 27ab9aa..0000000
--- a/tests/pipelines/claude-conversion-deep.test.ts
+++ /dev/null
@@ -1,169 +0,0 @@
-import { describe, expect, it } from 'vitest';
-
-import {
-  convertToClaudeMessage,
-  getUnsupportedContentSummary,
-  hasUnsupportedContent,
-} from '../../src/lib/stream-transformers.js';
-import { TEST_MODEL } from '../test-constants.js';
-
-describe('Claude conversion deep pipeline', () => {
-  it('multi-item response: message + function_call + reasoning + web_search -> each guard routes to distinct block', () => {
-    const response = {
-      id: 'r1',
-      output: [
-        {
-          type: 'message' as const,
-          id: 'msg_1',
-          role: 'assistant' as const,
-          status: 'completed' as const,
-          content: [
-            {
-              type: 'output_text' as const,
-              text: 'Hello',
-              annotations: [],
-            },
-          ],
-        },
-        {
-          type: 'function_call' as const,
-          id: 'fc_1',
-          callId: 'fc_1',
-          name: 'search',
-          arguments: '{"q":"test"}',
-          status: 'completed' as const,
-        },
-        {
-          type: 'reasoning' as const,
-          id: 'r_1',
-          status: 'completed' as const,
-          summary: [
-            {
-              type: 'summary_text' as const,
-              text: 'thinking',
-            },
-          ],
-        },
-        {
-          type: 'web_search_call' as const,
-          id: 'ws_1',
-          status: 'completed' as const,
-        },
-      ],
-      status: 'completed' as const,
-      outputText: 'Hello',
-      model: TEST_MODEL,
-      usage: {
-        totalTokens: 200,
-        inputTokens: 100,
-        outputTokens: 100,
-      },
-    };
-
-    const claude = convertToClaudeMessage(response as any);
-    const types = claude.content.map((b: any) => b.type);
-
-    expect(types).toContain('text');
-    expect(types).toContain('tool_use');
-    expect(types).toContain('thinking');
-    expect(types).toContain('server_tool_use');
-  });
-
-  it('annotations: text with file_citation + url_citation + file_path -> each produces its distinct citation', () => {
-    const response = {
-      id: 'r1',
-      output: [
-        {
-          type: 'message' as const,
-          id: 'msg_1',
-          role: 'assistant' as const,
-          status: 'completed' as const,
-          content: [
-            {
-              type: 'output_text' as const,
-              text: 'Here is the answer',
-              annotations: [
-                {
-                  type: 'file_citation',
-                  fileId: 'f1',
-                  filename: 'doc.pdf',
-                  index: 0,
-                },
-                {
-                  type: 'url_citation',
-                  url: 'https://example.com',
-                  title: 'Example',
-                  startIndex: 0,
-                  endIndex: 10,
-                },
-                {
-                  type: 'file_path',
-                  fileId: 'f2',
-                  filePath: '/tmp/out.txt',
-                },
-              ],
-            },
-          ],
-        },
-      ],
-      status: 'completed' as const,
-      outputText: 'Here is the answer',
-      model: TEST_MODEL,
-      usage: {
-        totalTokens: 100,
-        inputTokens: 50,
-        outputTokens: 50,
-      },
-    };
-
-    const claude = convertToClaudeMessage(response as any);
-    const textBlock = claude.content.find((b: any) => b.type === 'text') as any;
-    expect(textBlock).toBeDefined();
-    // Should have citations
-    if (textBlock.citations) {
-      expect(textBlock.citations.length).toBeGreaterThan(0);
-    }
-  });
-
-  it('unsupported content round-trip: refusal + image_generation -> convertToClaudeMessage -> unsupported_content utilities work', () => {
-    const response = {
-      id: 'r1',
-      output: [
-        {
-          type: 'message' as const,
-          id: 'msg_1',
-          role: 'assistant' as const,
-          status: 'completed' as const,
-          content: [
-            {
-              type: 'refusal' as const,
-              refusal: 'I cannot do that',
-            },
-          ],
-        },
-        {
-          type: 'image_generation_call' as const,
-          id: 'ig_1',
-          result: 'base64data',
-          status: 'completed' as const,
-        },
-      ],
-      status: 'completed' as const,
-      outputText: '',
-      model: TEST_MODEL,
-      usage: {
-        totalTokens: 100,
-        inputTokens: 50,
-        outputTokens: 50,
-      },
-    };
-
-    const claude = convertToClaudeMessage(response as any);
-    // unsupported_content is a property on the message, not content blocks
-    expect(hasUnsupportedContent(claude)).toBe(true);
-    const summary = getUnsupportedContentSummary(claude);
-    expect(summary).toBeDefined();
-    // refusal and image_generation_call should both appear as unsupported
-    expect(Object.keys(summary).length).toBeGreaterThan(0);
-  });
-});

From d0a2d27cf590eec5ee4121b37a5cac5681eac003 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Fri, 10 Apr 2026 05:40:28 +0000
Subject: [PATCH 4/4] refactor: remove all `any` types from test files and
 replace with proper types

- Replace all `as any` casts with correct types matching actual program types
- Replace all `: any` parameter annotations with proper typed signatures
- Add typed factory helpers to test-constants.ts (makeStep, makeResponse, makeUsage, etc.)
- Use `StreamEvents` type for makeStream helper functions
- Use proper callback types for filter/map/find/every callbacks
- Use typed context shapes for async param function callbacks
- Remove unused imports flagged by biome after type fixes

47 test files updated across all 7 categories. Zero `any` types remain in categorized tests.
Lint, typecheck, and all 573 tests pass.

Co-Authored-By: Robert Yeakel <robert.yeakel@openrouter.ai>
---
 tests/behavior/async-params.test.ts           |  22 +--
 .../async-resolution-pipeline.test.ts         |  12 +-
 .../claude-conversion-annotations.test.ts     |  12 +-
 .../consume-stream-completion.test.ts         |   2 +-
 .../conversation-state-format.test.ts         |   9 +-
 tests/behavior/conversation-state.test.ts     |  15 +-
 tests/behavior/format-compatibility.test.ts   |   4 +-
 tests/behavior/input-normalization.test.ts    |   9 +-
 tests/behavior/next-turn-params.test.ts       |  34 ++---
 .../stop-conditions-step-result.test.ts       |  48 +++---
 tests/behavior/stop-conditions.test.ts        | 104 ++++++-------
 .../stream-type-guards-negative.test.ts       |  18 +--
 tests/behavior/tool-context.test.ts           |   2 +-
 tests/behavior/tool-orchestrator.test.ts      |   2 +-
 tests/behavior/turn-context.test.ts           |   6 +-
 tests/boundaries/domain-separation.test.ts    |   4 +-
 .../boundaries/execute-tool-boundary.test.ts  |   8 +-
 .../response-stream-event-guards.test.ts      |  10 +-
 tests/boundaries/stream-event-guards.test.ts  |  11 +-
 tests/composition/context-flow.test.ts        |   2 +-
 tests/composition/state-machine.test.ts       |   2 +-
 tests/contracts/async-params.test.ts          |  34 ++---
 tests/contracts/delta-extractors.test.ts      |   2 +-
 tests/contracts/from-claude-messages.test.ts  |  13 +-
 tests/contracts/items-stream.test.ts          |  61 ++++++--
 .../contracts/message-stream-builders.test.ts |   6 +-
 tests/contracts/response-extractors.test.ts   |  16 +-
 .../tool-call-response-consistency.test.ts    |   8 +-
 .../approval-partition-dispatch.test.ts       |   6 +-
 .../claude-conversion-deep-dispatch.test.ts   |   4 +-
 .../claude-conversion-dispatch.test.ts        |  32 +++-
 tests/dispatch/from-claude-dispatch.test.ts   |   9 +-
 tests/dispatch/items-stream-dispatch.test.ts  |   6 +-
 .../claude-unsupported-content.test.ts        |   2 +-
 tests/integration/format-round-trip.test.ts   |  25 ++--
 .../integration/next-turn-params-flow.test.ts |  10 +-
 .../next-turn-params-request.test.ts          |   8 +-
 .../reusable-stream-consumers.test.ts         |   2 +-
 .../stream-completion-guards.test.ts          |   2 +-
 .../turn-context-async-params.test.ts         |  16 +-
 .../approval-execution-state.test.ts          |   2 +-
 tests/pipelines/dual-format-output.test.ts    |  19 ++-
 .../next-turn-params-pipeline.test.ts         |  12 +-
 .../pipelines/stop-condition-pipeline.test.ts |  24 +--
 tests/pipelines/streaming-pipeline.test.ts    |  14 +-
 .../pipelines/tool-execution-pipeline.test.ts |   2 +-
 tests/test-constants.ts                       | 141 +++++++++++++++++-
 47 files changed, 490 insertions(+), 322 deletions(-)

diff --git a/tests/behavior/async-params.test.ts b/tests/behavior/async-params.test.ts
index 1a405a8..b7720fb 100644
--- a/tests/behavior/async-params.test.ts
+++ b/tests/behavior/async-params.test.ts
@@ -1,7 +1,7 @@
 import { describe, expect, it } from 'vitest';
 import { hasAsyncFunctions, resolveAsyncFunctions } from '../../src/lib/async-params.js';
 import type { TurnContext } from '../../src/lib/tool-types.js';
-import { TEST_MODEL } from '../test-constants.js';
+import { makeCallModelInput, TEST_MODEL } from '../test-constants.js';
 
 const turnCtx: TurnContext = {
   numberOfTurns: 2,
@@ -9,45 +9,45 @@ const turnCtx: TurnContext = {
 
 describe('async params - resolveAsyncFunctions', () => {
   it('passes through static values unchanged', async () => {
-    const input = {
+    const input = makeCallModelInput({
       model: TEST_MODEL,
       temperature: 0.7,
       input: 'hi',
-    } as any;
+    });
     const result = await resolveAsyncFunctions(input, turnCtx);
     expect(result.model).toBe(TEST_MODEL);
     expect(result.temperature).toBe(0.7);
   });
 
   it('resolves sync function fields with turnContext', async () => {
-    const input = {
+    const input = makeCallModelInput({
       model: TEST_MODEL,
       temperature: (ctx: TurnContext) => ctx.numberOfTurns * 0.1,
       input: 'test',
-    } as any;
+    });
     const result = await resolveAsyncFunctions(input, turnCtx);
     expect(result.temperature).toBeCloseTo(0.2);
   });
 
   it('resolves async function fields with turnContext', async () => {
-    const input = {
+    const input = makeCallModelInput({
       model: TEST_MODEL,
       temperature: async (ctx: TurnContext) => ctx.numberOfTurns * 0.15,
       input: 'test',
-    } as any;
+    });
     const result = await resolveAsyncFunctions(input, turnCtx);
     expect(result.temperature).toBeCloseTo(0.3);
   });
 
   it('strips client-only fields (stopWhen, state, requireApproval, context, etc.)', async () => {
-    const input = {
+    const input = makeCallModelInput({
       model: TEST_MODEL,
       input: 'test',
       stopWhen: () => true,
       state: {},
       requireApproval: () => false,
       context: {},
-    } as any;
+    });
     const result = await resolveAsyncFunctions(input, turnCtx);
     expect(result).not.toHaveProperty('stopWhen');
     expect(result).not.toHaveProperty('state');
@@ -56,13 +56,13 @@ describe('async params - resolveAsyncFunctions', () => {
   });
 
   it('wraps field resolution errors with field name', async () => {
-    const input = {
+    const input = makeCallModelInput({
       model: TEST_MODEL,
       temperature: () => {
         throw new Error('compute failed');
       },
       input: 'test',
-    } as any;
+    });
     await expect(resolveAsyncFunctions(input, turnCtx)).rejects.toThrow(/temperature/);
   });
 });
diff --git a/tests/behavior/async-resolution-pipeline.test.ts b/tests/behavior/async-resolution-pipeline.test.ts
index aea4d03..7a77636 100644
--- a/tests/behavior/async-resolution-pipeline.test.ts
+++ b/tests/behavior/async-resolution-pipeline.test.ts
@@ -2,21 +2,21 @@ import { describe, expect, it } from 'vitest';
 
 import { resolveAsyncFunctions } from '../../src/lib/async-params.js';
 import { stepCountIs } from '../../src/lib/stop-conditions.js';
-import { TEST_MODEL } from '../test-constants.js';
+import { makeCallModelInput, makeTurnContext, TEST_MODEL } from '../test-constants.js';
 
 describe('Async resolution + clean API request', () => {
   it('mixed input: static model, function temperature, client-only stopWhen -> three paths verified in one call', async () => {
-    const turnCtx = {
+    const turnCtx = makeTurnContext({
       numberOfTurns: 2,
-    } as any;
+    });
 
     const result = await resolveAsyncFunctions(
-      {
+      makeCallModelInput({
         model: TEST_MODEL,
-        temperature: (ctx: any) => ctx.numberOfTurns * 0.1,
+        temperature: (ctx: { numberOfTurns: number }) => ctx.numberOfTurns * 0.1,
         stopWhen: stepCountIs(5),
         input: 'hello',
-      } as any,
+      }),
       turnCtx,
     );
 
diff --git a/tests/behavior/claude-conversion-annotations.test.ts b/tests/behavior/claude-conversion-annotations.test.ts
index 5354854..854c7c4 100644
--- a/tests/behavior/claude-conversion-annotations.test.ts
+++ b/tests/behavior/claude-conversion-annotations.test.ts
@@ -51,11 +51,17 @@ describe('convertToClaudeMessage annotation handling', () => {
       },
     };
 
-    const claude = convertToClaudeMessage(response as any);
-    const textBlock = claude.content.find((b: any) => b.type === 'text') as any;
+    const claude = convertToClaudeMessage(response);
+    const textBlock = claude.content.find((b: { type: string }) => b.type === 'text') as
+      | {
+          type: string;
+          text: string;
+          citations?: unknown[];
+        }
+      | undefined;
     expect(textBlock).toBeDefined();
     // Should have citations
-    if (textBlock.citations) {
+    if (textBlock?.citations) {
       expect(textBlock.citations.length).toBeGreaterThan(0);
     }
   });
diff --git a/tests/behavior/consume-stream-completion.test.ts b/tests/behavior/consume-stream-completion.test.ts
index 638a738..dc32204 100644
--- a/tests/behavior/consume-stream-completion.test.ts
+++ b/tests/behavior/consume-stream-completion.test.ts
@@ -3,7 +3,7 @@ import { describe, expect, it } from 'vitest';
 import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
 import { consumeStreamForCompletion } from '../../src/lib/stream-transformers.js';
 
-function makeStream(events: any[]): ReusableReadableStream<any> {
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
   const source = new ReadableStream({
     start(controller) {
       for (const event of events) {
diff --git a/tests/behavior/conversation-state-format.test.ts b/tests/behavior/conversation-state-format.test.ts
index 1f49afd..f2f6048 100644
--- a/tests/behavior/conversation-state-format.test.ts
+++ b/tests/behavior/conversation-state-format.test.ts
@@ -15,12 +15,9 @@ describe('Conversation state -> format conversion', () => {
       role: 'user' as const,
       content: 'second message',
     };
-    const result = appendToMessages(
-      existing as any,
-      [
-        newItem,
-      ] as any,
-    );
+    const result = appendToMessages(existing, [
+      newItem,
+    ]);
     expect(result).toHaveLength(2);
     expect(result[0]).toEqual({
       role: 'user',
diff --git a/tests/behavior/conversation-state.test.ts b/tests/behavior/conversation-state.test.ts
index 7f66639..2a44288 100644
--- a/tests/behavior/conversation-state.test.ts
+++ b/tests/behavior/conversation-state.test.ts
@@ -10,6 +10,7 @@ import {
   unsentResultsToAPIFormat,
   updateState,
 } from '../../src/lib/conversation-state.js';
+import { makeResponse } from '../test-constants.js';
 
 describe('conversation state - createInitialState', () => {
   it('creates state with generated id, empty messages, in_progress status', () => {
@@ -47,7 +48,7 @@ describe('conversation state - updateState', () => {
           role: 'user',
           content: 'hi',
         },
-      ] as any,
+      ],
     });
     expect(updated.id).toBe('s2');
     expect(updated.createdAt).toBe(state.createdAt);
@@ -66,7 +67,7 @@ describe('conversation state - appendToMessages', () => {
       {
         role: 'assistant',
         content: 'hi',
-      } as any,
+      },
     ]);
     expect(result).toHaveLength(2);
   });
@@ -76,7 +77,7 @@ describe('conversation state - appendToMessages', () => {
       {
         role: 'assistant',
         content: 'hi',
-      } as any,
+      },
     ]);
     expect(result).toHaveLength(2);
     expect(result[0]).toHaveProperty('role', 'user');
@@ -142,7 +143,7 @@ describe('conversation state - unsent results', () => {
 
 describe('conversation state - response extraction', () => {
   it('extractTextFromResponse extracts text from message output items', () => {
-    const response = {
+    const response = makeResponse({
       id: 'r1',
       output: [
         {
@@ -170,12 +171,12 @@ describe('conversation state - response extraction', () => {
       error: null,
       incomplete_details: null,
       created_at: 0,
-    } as any;
+    });
     expect(extractTextFromResponse(response)).toBe('Hello World');
   });
 
   it('extractTextFromResponse returns empty string for no output', () => {
-    const response = {
+    const response = makeResponse({
       id: 'r1',
       output: [],
       parallel_tool_calls: false,
@@ -184,7 +185,7 @@ describe('conversation state - response extraction', () => {
       error: null,
       incomplete_details: null,
       created_at: 0,
-    } as any;
+    });
     expect(extractTextFromResponse(response)).toBe('');
   });
 });
diff --git a/tests/behavior/format-compatibility.test.ts b/tests/behavior/format-compatibility.test.ts
index 8588ed1..587729a 100644
--- a/tests/behavior/format-compatibility.test.ts
+++ b/tests/behavior/format-compatibility.test.ts
@@ -35,14 +35,14 @@ function makeResponse(text: string) {
 describe('Format compatibility: compat layers -> stream-transformers', () => {
   it('toChatMessage delegates to extractMessageFromResponse -> returns ChatAssistantMessage', () => {
     const response = makeResponse('Hello world');
-    const chatMsg = toChatMessage(response as any);
+    const chatMsg = toChatMessage(response);
     expect(chatMsg.role).toBe('assistant');
     expect(chatMsg.content).toBe('Hello world');
   });
 
   it('toClaudeMessage delegates to convertToClaudeMessage -> returns ClaudeMessage', () => {
     const response = makeResponse('Hello world');
-    const claudeMsg = toClaudeMessage(response as any);
+    const claudeMsg = toClaudeMessage(response);
     expect(claudeMsg.role).toBe('assistant');
     expect(claudeMsg.content).toBeDefined();
     expect(Array.isArray(claudeMsg.content)).toBe(true);
diff --git a/tests/behavior/input-normalization.test.ts b/tests/behavior/input-normalization.test.ts
index 15225ea..b973f88 100644
--- a/tests/behavior/input-normalization.test.ts
+++ b/tests/behavior/input-normalization.test.ts
@@ -9,12 +9,9 @@ describe('Input normalization: turn-context -> conversation-state', () => {
       role: 'user' as const,
       content: 'second message',
     };
-    const result = appendToMessages(
-      existing as any,
-      [
-        newItem,
-      ] as any,
-    );
+    const result = appendToMessages(existing, [
+      newItem,
+    ]);
 
     expect(result.length).toBeGreaterThan(1);
     // First item is normalized from string
diff --git a/tests/behavior/next-turn-params.test.ts b/tests/behavior/next-turn-params.test.ts
index 2295011..45b96f6 100644
--- a/tests/behavior/next-turn-params.test.ts
+++ b/tests/behavior/next-turn-params.test.ts
@@ -8,7 +8,7 @@ import {
 } from '../../src/lib/next-turn-params.js';
 import { tool } from '../../src/lib/tool.js';
 import type { ParsedToolCall, Tool } from '../../src/lib/tool-types.js';
-import { TEST_MODEL } from '../test-constants.js';
+import { makeRequest, TEST_MODEL } from '../test-constants.js';
 
 describe('next-turn params - buildNextTurnParamsContext', () => {
   it('extracts relevant fields from request', () => {
@@ -17,7 +17,7 @@ describe('next-turn params - buildNextTurnParamsContext', () => {
       input: 'hello',
       temperature: 0.7,
       maxOutputTokens: 1000,
-    } as any;
+    };
     const ctx = buildNextTurnParamsContext(request);
     expect(ctx.model).toBe(TEST_MODEL);
     expect(ctx.input).toBe('hello');
@@ -26,10 +26,10 @@ describe('next-turn params - buildNextTurnParamsContext', () => {
   });
 
   it('defaults missing fields to null/empty', () => {
-    const request = {
+    const request = makeRequest({
       model: undefined,
       input: undefined,
-    } as any;
+    });
     const ctx = buildNextTurnParamsContext(request);
     expect(ctx.model).toBe('');
     expect(ctx.temperature).toBeNull();
@@ -57,10 +57,10 @@ describe('next-turn params - executeNextTurnParamsFunctions', () => {
         query: 'test',
       },
     };
-    const request = {
+    const request = makeRequest({
       model: TEST_MODEL,
       input: 'hello',
-    } as any;
+    });
     const result = await executeNextTurnParamsFunctions(
       [
         tc,
@@ -91,7 +91,7 @@ describe('next-turn params - executeNextTurnParamsFunctions', () => {
       [
         t,
       ],
-      {} as any,
+      makeRequest({}),
     );
     expect(Object.keys(result)).toHaveLength(0);
   });
@@ -123,7 +123,7 @@ describe('next-turn params - executeNextTurnParamsFunctions', () => {
         t1,
         t2,
       ],
-      {} as any,
+      makeRequest({}),
     );
     expect(result.temperature).toBeUndefined();
   });
@@ -157,7 +157,7 @@ describe('next-turn params - executeNextTurnParamsFunctions', () => {
     };
     const request = {
       temperature: 0.5,
-    } as any;
+    };
     const result = await executeNextTurnParamsFunctions(
       [
         tc1,
@@ -175,11 +175,11 @@ describe('next-turn params - executeNextTurnParamsFunctions', () => {
 
 describe('next-turn params - applyNextTurnParamsToRequest', () => {
   it('merges computed params into request', () => {
-    const request = {
+    const request = makeRequest({
       model: TEST_MODEL,
       temperature: 0.7,
       input: 'test',
-    } as any;
+    });
     const computed = {
       temperature: 0.2 as number | null,
     };
@@ -189,9 +189,9 @@ describe('next-turn params - applyNextTurnParamsToRequest', () => {
   });
 
   it('converts null values to undefined for API compatibility', () => {
-    const request = {
+    const request = makeRequest({
       model: TEST_MODEL,
-    } as any;
+    });
     const computed = {
       temperature: null,
     };
@@ -200,10 +200,10 @@ describe('next-turn params - applyNextTurnParamsToRequest', () => {
   });
 
   it('returns new object without mutating original', () => {
-    const request = {
+    const request = makeRequest({
       model: TEST_MODEL,
       temperature: 0.7,
-    } as any;
+    });
     const result = applyNextTurnParamsToRequest(request, {
       temperature: 0.2,
     });
@@ -212,10 +212,10 @@ describe('next-turn params - applyNextTurnParamsToRequest', () => {
   });
 
   it('handles empty computed params', () => {
-    const request = {
+    const request = makeRequest({
       model: TEST_MODEL,
       temperature: 0.7,
-    } as any;
+    });
     const result = applyNextTurnParamsToRequest(request, {});
     expect(result.temperature).toBe(0.7);
   });
diff --git a/tests/behavior/stop-conditions-step-result.test.ts b/tests/behavior/stop-conditions-step-result.test.ts
index de0d4ca..5092743 100644
--- a/tests/behavior/stop-conditions-step-result.test.ts
+++ b/tests/behavior/stop-conditions-step-result.test.ts
@@ -6,48 +6,38 @@ import {
   maxTokensUsed,
   stepCountIs,
 } from '../../src/lib/stop-conditions.js';
-import type { StepResult } from '../../src/lib/tool-types.js';
-
-function makeStep(overrides: Partial<StepResult> = {}): StepResult {
-  return {
-    response: {} as any,
-    toolCalls: [],
-    finishReason: undefined,
-    usage: undefined,
-    ...overrides,
-  } as StepResult;
-}
+import { makeStep, makeTypedToolCalls, makeUsage } from '../test-constants.js';
 
 describe('Stop conditions + real StepResult shape', () => {
   it('stepCountIs works with StepResult[] containing real usage and toolCalls data', () => {
     const steps = [
       makeStep({
-        toolCalls: [
+        toolCalls: makeTypedToolCalls([
           {
             name: 'search',
             id: 'tc1',
             arguments: {},
           },
-        ] as any,
-        usage: {
+        ]),
+        usage: makeUsage({
           totalTokens: 100,
           inputTokens: 50,
           outputTokens: 50,
-        } as any,
+        }),
       }),
       makeStep({
-        toolCalls: [
+        toolCalls: makeTypedToolCalls([
           {
             name: 'write',
             id: 'tc2',
             arguments: {},
           },
-        ] as any,
-        usage: {
+        ]),
+        usage: makeUsage({
           totalTokens: 200,
           inputTokens: 100,
           outputTokens: 100,
-        } as any,
+        }),
       }),
     ];
     const condition = stepCountIs(2);
@@ -61,7 +51,7 @@ describe('Stop conditions + real StepResult shape', () => {
   it('hasToolCall finds tool name inside StepResult.toolCalls array', () => {
     const steps = [
       makeStep({
-        toolCalls: [
+        toolCalls: makeTypedToolCalls([
           {
             name: 'search',
             id: 'tc1',
@@ -72,7 +62,7 @@ describe('Stop conditions + real StepResult shape', () => {
             id: 'tc2',
             arguments: {},
           },
-        ] as any,
+        ]),
       }),
     ];
     expect(
@@ -95,18 +85,18 @@ describe('Stop conditions + real StepResult shape', () => {
   it('maxTokensUsed reads from StepResult.usage.totalTokens', () => {
     const steps = [
       makeStep({
-        usage: {
+        usage: makeUsage({
           totalTokens: 500,
           inputTokens: 250,
           outputTokens: 250,
-        } as any,
+        }),
       }),
       makeStep({
-        usage: {
+        usage: makeUsage({
           totalTokens: 600,
           inputTokens: 300,
           outputTokens: 300,
-        } as any,
+        }),
       }),
     ];
     expect(
@@ -124,18 +114,18 @@ describe('Stop conditions + real StepResult shape', () => {
   it('isStopConditionMet evaluates multiple conditions against same StepResult[]', async () => {
     const steps = [
       makeStep({
-        toolCalls: [
+        toolCalls: makeTypedToolCalls([
           {
             name: 'search',
             id: 'tc1',
             arguments: {},
           },
-        ] as any,
-        usage: {
+        ]),
+        usage: makeUsage({
           totalTokens: 100,
           inputTokens: 50,
           outputTokens: 50,
-        } as any,
+        }),
       }),
     ];
 
diff --git a/tests/behavior/stop-conditions.test.ts b/tests/behavior/stop-conditions.test.ts
index 9f65809..9e2ece7 100644
--- a/tests/behavior/stop-conditions.test.ts
+++ b/tests/behavior/stop-conditions.test.ts
@@ -7,17 +7,7 @@ import {
   maxTokensUsed,
   stepCountIs,
 } from '../../src/lib/stop-conditions.js';
-import type { StepResult } from '../../src/lib/tool-types.js';
-
-function makeStep(overrides: Partial<StepResult> = {}): StepResult {
-  return {
-    response: {} as any,
-    toolCalls: [],
-    finishReason: undefined,
-    usage: undefined,
-    ...overrides,
-  } as StepResult;
-}
+import { makeStep, makeTypedToolCalls, makeUsage } from '../test-constants.js';
 
 describe('stepCountIs(n) - behavior and dimension isolation', () => {
   it('returns false when steps.length < n', () => {
@@ -77,19 +67,19 @@ describe('stepCountIs(n) - behavior and dimension isolation', () => {
   it('ignores tool names, tokens, cost, finishReason in steps', () => {
     const condition = stepCountIs(1);
     const step = makeStep({
-      toolCalls: [
+      toolCalls: makeTypedToolCalls([
         {
           name: 'search',
           id: 'tc1',
           arguments: {},
         },
-      ] as any,
-      usage: {
+      ]),
+      usage: makeUsage({
         totalTokens: 9999,
         inputTokens: 5000,
         outputTokens: 4999,
         cost: 100,
-      } as any,
+      }),
       finishReason: 'length',
     });
     // Only step count matters
@@ -107,13 +97,13 @@ describe('hasToolCall(toolName) - behavior and dimension isolation', () => {
   it('returns false when no steps have the named tool', () => {
     const condition = hasToolCall('search');
     const step = makeStep({
-      toolCalls: [
+      toolCalls: makeTypedToolCalls([
         {
           name: 'other',
           id: 'tc1',
           arguments: {},
         },
-      ] as any,
+      ]),
     });
     expect(
       condition({
@@ -127,22 +117,22 @@ describe('hasToolCall(toolName) - behavior and dimension isolation', () => {
   it('returns true when any step has a matching tool call', () => {
     const condition = hasToolCall('search');
     const step1 = makeStep({
-      toolCalls: [
+      toolCalls: makeTypedToolCalls([
         {
           name: 'other',
           id: 'tc1',
           arguments: {},
         },
-      ] as any,
+      ]),
     });
     const step2 = makeStep({
-      toolCalls: [
+      toolCalls: makeTypedToolCalls([
         {
           name: 'search',
           id: 'tc2',
           arguments: {},
         },
-      ] as any,
+      ]),
     });
     expect(
       condition({
@@ -157,13 +147,13 @@ describe('hasToolCall(toolName) - behavior and dimension isolation', () => {
   it('returns false for different tool names', () => {
     const condition = hasToolCall('search');
     const step = makeStep({
-      toolCalls: [
+      toolCalls: makeTypedToolCalls([
         {
           name: 'Search',
           id: 'tc1',
           arguments: {},
         },
-      ] as any,
+      ]),
     });
     expect(
       condition({
@@ -177,7 +167,7 @@ describe('hasToolCall(toolName) - behavior and dimension isolation', () => {
   it('handles step with multiple tool calls, one matching', () => {
     const condition = hasToolCall('search');
     const step = makeStep({
-      toolCalls: [
+      toolCalls: makeTypedToolCalls([
         {
           name: 'other',
           id: 'tc1',
@@ -188,7 +178,7 @@ describe('hasToolCall(toolName) - behavior and dimension isolation', () => {
           id: 'tc2',
           arguments: {},
         },
-      ] as any,
+      ]),
     });
     expect(
       condition({
@@ -202,19 +192,19 @@ describe('hasToolCall(toolName) - behavior and dimension isolation', () => {
   it('ignores step count, tokens, cost, finishReason', () => {
     const condition = hasToolCall('search');
     const step = makeStep({
-      toolCalls: [
+      toolCalls: makeTypedToolCalls([
         {
           name: 'search',
           id: 'tc1',
           arguments: {},
         },
-      ] as any,
-      usage: {
+      ]),
+      usage: makeUsage({
         totalTokens: 9999,
         inputTokens: 5000,
         outputTokens: 4999,
         cost: 100,
-      } as any,
+      }),
       finishReason: 'length',
     });
     expect(
@@ -231,11 +221,11 @@ describe('maxTokensUsed(maxTokens) - behavior and dimension isolation', () => {
   it('returns false when total tokens < threshold', () => {
     const condition = maxTokensUsed(100);
     const step = makeStep({
-      usage: {
+      usage: makeUsage({
         totalTokens: 50,
         inputTokens: 25,
         outputTokens: 25,
-      } as any,
+      }),
     });
     expect(
       condition({
@@ -249,11 +239,11 @@ describe('maxTokensUsed(maxTokens) - behavior and dimension isolation', () => {
   it('returns true when total tokens >= threshold', () => {
     const condition = maxTokensUsed(100);
     const step = makeStep({
-      usage: {
+      usage: makeUsage({
         totalTokens: 100,
         inputTokens: 50,
         outputTokens: 50,
-      } as any,
+      }),
     });
     expect(
       condition({
@@ -267,18 +257,18 @@ describe('maxTokensUsed(maxTokens) - behavior and dimension isolation', () => {
   it('accumulates tokens across multiple steps', () => {
     const condition = maxTokensUsed(100);
     const step1 = makeStep({
-      usage: {
+      usage: makeUsage({
         totalTokens: 60,
         inputTokens: 30,
         outputTokens: 30,
-      } as any,
+      }),
     });
     const step2 = makeStep({
-      usage: {
+      usage: makeUsage({
         totalTokens: 50,
         inputTokens: 25,
         outputTokens: 25,
-      } as any,
+      }),
     });
     expect(
       condition({
@@ -307,19 +297,19 @@ describe('maxTokensUsed(maxTokens) - behavior and dimension isolation', () => {
   it('ignores step count, tool names, cost, finishReason', () => {
     const condition = maxTokensUsed(100);
     const step = makeStep({
-      toolCalls: [
+      toolCalls: makeTypedToolCalls([
         {
           name: 'search',
           id: 'tc1',
           arguments: {},
         },
-      ] as any,
-      usage: {
+      ]),
+      usage: makeUsage({
         totalTokens: 100,
         inputTokens: 50,
         outputTokens: 50,
         cost: 999,
-      } as any,
+      }),
       finishReason: 'stop',
     });
     expect(
@@ -336,12 +326,12 @@ describe('maxCost(maxCostInDollars) - behavior and dimension isolation', () => {
   it('returns false when total cost < threshold', () => {
     const condition = maxCost(1.0);
     const step = makeStep({
-      usage: {
+      usage: makeUsage({
         totalTokens: 100,
         inputTokens: 50,
         outputTokens: 50,
         cost: 0.5,
-      } as any,
+      }),
     });
     expect(
       condition({
@@ -355,12 +345,12 @@ describe('maxCost(maxCostInDollars) - behavior and dimension isolation', () => {
   it('returns true when total cost >= threshold', () => {
     const condition = maxCost(1.0);
     const step = makeStep({
-      usage: {
+      usage: makeUsage({
         totalTokens: 100,
         inputTokens: 50,
         outputTokens: 50,
         cost: 1.0,
-      } as any,
+      }),
     });
     expect(
       condition({
@@ -374,20 +364,20 @@ describe('maxCost(maxCostInDollars) - behavior and dimension isolation', () => {
   it('accumulates cost across multiple steps', () => {
     const condition = maxCost(1.0);
     const step1 = makeStep({
-      usage: {
+      usage: makeUsage({
         totalTokens: 50,
         inputTokens: 25,
         outputTokens: 25,
         cost: 0.6,
-      } as any,
+      }),
     });
     const step2 = makeStep({
-      usage: {
+      usage: makeUsage({
         totalTokens: 50,
         inputTokens: 25,
         outputTokens: 25,
         cost: 0.5,
-      } as any,
+      }),
     });
     expect(
       condition({
@@ -416,19 +406,19 @@ describe('maxCost(maxCostInDollars) - behavior and dimension isolation', () => {
   it('ignores step count, tool names, tokens, finishReason', () => {
     const condition = maxCost(1.0);
     const step = makeStep({
-      toolCalls: [
+      toolCalls: makeTypedToolCalls([
         {
           name: 'search',
           id: 'tc1',
           arguments: {},
         },
-      ] as any,
-      usage: {
+      ]),
+      usage: makeUsage({
         totalTokens: 99999,
         inputTokens: 50000,
         outputTokens: 49999,
         cost: 1.0,
-      } as any,
+      }),
       finishReason: 'length',
     });
     expect(
@@ -505,19 +495,19 @@ describe('finishReasonIs(reason) - behavior and dimension isolation', () => {
   it('ignores step count, tool names, tokens, cost', () => {
     const condition = finishReasonIs('length');
     const step = makeStep({
-      toolCalls: [
+      toolCalls: makeTypedToolCalls([
         {
           name: 'search',
           id: 'tc1',
           arguments: {},
         },
-      ] as any,
-      usage: {
+      ]),
+      usage: makeUsage({
         totalTokens: 99999,
         inputTokens: 50000,
         outputTokens: 49999,
         cost: 999,
-      } as any,
+      }),
       finishReason: 'length',
     });
     expect(
diff --git a/tests/behavior/stream-type-guards-negative.test.ts b/tests/behavior/stream-type-guards-negative.test.ts
index 12cf1ae..862e7fe 100644
--- a/tests/behavior/stream-type-guards-negative.test.ts
+++ b/tests/behavior/stream-type-guards-negative.test.ts
@@ -21,7 +21,7 @@ describe('stream event type guards - negative cases (reject wrong type)', () =>
     expect(
       isOutputTextDeltaEvent({
         type: 'response.reasoning_text.delta',
-      } as any),
+      } as unknown as StreamEvents),
     ).toBe(false);
   });
 
@@ -29,7 +29,7 @@ describe('stream event type guards - negative cases (reject wrong type)', () =>
     expect(
       isReasoningDeltaEvent({
         type: 'response.output_text.delta',
-      } as any),
+      } as unknown as StreamEvents),
     ).toBe(false);
   });
 
@@ -37,7 +37,7 @@ describe('stream event type guards - negative cases (reject wrong type)', () =>
     expect(
       isFunctionCallArgumentsDeltaEvent({
         type: 'response.output_text.delta',
-      } as any),
+      } as unknown as StreamEvents),
     ).toBe(false);
   });
 
@@ -45,7 +45,7 @@ describe('stream event type guards - negative cases (reject wrong type)', () =>
     expect(
       isOutputItemAddedEvent({
         type: 'response.output_item.done',
-      } as any),
+      } as unknown as StreamEvents),
     ).toBe(false);
   });
 
@@ -53,7 +53,7 @@ describe('stream event type guards - negative cases (reject wrong type)', () =>
     expect(
       isOutputItemDoneEvent({
         type: 'response.output_item.added',
-      } as any),
+      } as unknown as StreamEvents),
     ).toBe(false);
   });
 
@@ -61,7 +61,7 @@ describe('stream event type guards - negative cases (reject wrong type)', () =>
     expect(
       isResponseCompletedEvent({
         type: 'response.failed',
-      } as any),
+      } as unknown as StreamEvents),
     ).toBe(false);
   });
 
@@ -69,7 +69,7 @@ describe('stream event type guards - negative cases (reject wrong type)', () =>
     expect(
       isResponseFailedEvent({
         type: 'response.completed',
-      } as any),
+      } as unknown as StreamEvents),
     ).toBe(false);
   });
 
@@ -77,7 +77,7 @@ describe('stream event type guards - negative cases (reject wrong type)', () =>
     expect(
       isResponseIncompleteEvent({
         type: 'response.completed',
-      } as any),
+      } as unknown as StreamEvents),
     ).toBe(false);
   });
 
@@ -85,7 +85,7 @@ describe('stream event type guards - negative cases (reject wrong type)', () =>
     expect(
       isFunctionCallArgumentsDoneEvent({
         type: 'response.function_call_arguments.delta',
-      } as any),
+      } as unknown as StreamEvents),
     ).toBe(false);
   });
 });
diff --git a/tests/behavior/tool-context.test.ts b/tests/behavior/tool-context.test.ts
index 0f3fa28..271de3a 100644
--- a/tests/behavior/tool-context.test.ts
+++ b/tests/behavior/tool-context.test.ts
@@ -31,7 +31,7 @@ describe('ToolContextStore - basic operations', () => {
 
   it('setToolContext sets tool context and notifies listeners', () => {
     const store = new ToolContextStore();
-    const snapshots: any[] = [];
+    const snapshots: Array<Record<string, unknown>> = [];
     store.subscribe((s) => snapshots.push(s));
     store.setToolContext('tool1', {
       key: 'val',
diff --git a/tests/behavior/tool-orchestrator.test.ts b/tests/behavior/tool-orchestrator.test.ts
index 46ef1c7..005ad8c 100644
--- a/tests/behavior/tool-orchestrator.test.ts
+++ b/tests/behavior/tool-orchestrator.test.ts
@@ -44,7 +44,7 @@ describe('tool orchestrator - toolResultsToMap', () => {
         preliminaryResults: [
           'p1',
           'p2',
-        ] as any,
+        ],
       }),
     ];
     const map = toolResultsToMap(results);
diff --git a/tests/behavior/turn-context.test.ts b/tests/behavior/turn-context.test.ts
index e0c8326..0896777 100644
--- a/tests/behavior/turn-context.test.ts
+++ b/tests/behavior/turn-context.test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest';
 import { buildTurnContext, normalizeInputToArray } from '../../src/lib/turn-context.js';
-import { TEST_MODEL } from '../test-constants.js';
+import { makeRequest, TEST_MODEL } from '../test-constants.js';
 
 describe('turn context - buildTurnContext', () => {
   it('sets numberOfTurns from options', () => {
@@ -27,10 +27,10 @@ describe('turn context - buildTurnContext', () => {
   });
 
   it('includes turnRequest when provided', () => {
-    const request = {
+    const request = makeRequest({
       model: TEST_MODEL,
       input: 'hello',
-    } as any;
+    });
     const ctx = buildTurnContext({
       numberOfTurns: 1,
       turnRequest: request,
diff --git a/tests/boundaries/domain-separation.test.ts b/tests/boundaries/domain-separation.test.ts
index 091a3f7..ad3ba49 100644
--- a/tests/boundaries/domain-separation.test.ts
+++ b/tests/boundaries/domain-separation.test.ts
@@ -14,7 +14,7 @@ describe('Stream guards vs output item guards - domain separation', () => {
       role: 'assistant',
       content: [],
     };
-    expect(isOutputTextDeltaEvent(item as any)).toBe(false);
+    expect(isOutputTextDeltaEvent(item as unknown as StreamEvents)).toBe(false);
   });
 
   it('isOutputMessage rejects a TextDeltaEvent (stream event, not item)', () => {
@@ -32,7 +32,7 @@ describe('Stream guards vs output item guards - domain separation', () => {
       name: 'test',
       arguments: '{}',
     };
-    expect(isFunctionCallArgumentsDeltaEvent(item as any)).toBe(false);
+    expect(isFunctionCallArgumentsDeltaEvent(item as unknown as StreamEvents)).toBe(false);
   });
 
   it('isFunctionCallItem rejects a FunctionCallArgsDeltaEvent (delta, not item)', () => {
diff --git a/tests/boundaries/execute-tool-boundary.test.ts b/tests/boundaries/execute-tool-boundary.test.ts
index 525ada6..c7df6a8 100644
--- a/tests/boundaries/execute-tool-boundary.test.ts
+++ b/tests/boundaries/execute-tool-boundary.test.ts
@@ -46,20 +46,20 @@ describe('executeRegularTool vs executeGeneratorTool - structural boundary', ()
   };
 
   it('executeRegularTool throws when given a generator tool', async () => {
-    await expect(executeRegularTool(generatorTool as any, toolCall, turnCtx)).rejects.toThrow();
+    await expect(executeRegularTool(generatorTool, toolCall, turnCtx)).rejects.toThrow();
   });
 
   it('executeGeneratorTool throws when given a regular tool', async () => {
-    await expect(executeGeneratorTool(regularTool as any, toolCall, turnCtx)).rejects.toThrow();
+    await expect(executeGeneratorTool(regularTool, toolCall, turnCtx)).rejects.toThrow();
   });
 
   it('executeRegularTool result has NO preliminaryResults', async () => {
-    const result = await executeRegularTool(regularTool as any, toolCall, turnCtx);
+    const result = await executeRegularTool(regularTool, toolCall, turnCtx);
     expect(result).not.toHaveProperty('preliminaryResults');
   });
 
   it('executeGeneratorTool result HAS preliminaryResults array', async () => {
-    const result = await executeGeneratorTool(generatorTool as any, toolCall, turnCtx);
+    const result = await executeGeneratorTool(generatorTool, toolCall, turnCtx);
     expect(result).toHaveProperty('preliminaryResults');
     expect(Array.isArray(result.preliminaryResults)).toBe(true);
   });
diff --git a/tests/boundaries/response-stream-event-guards.test.ts b/tests/boundaries/response-stream-event-guards.test.ts
index 633f601..27e8fdc 100644
--- a/tests/boundaries/response-stream-event-guards.test.ts
+++ b/tests/boundaries/response-stream-event-guards.test.ts
@@ -15,7 +15,7 @@ describe('ResponseStreamEvent guards - mutual exclusion', () => {
       toolCallId: 'c1',
       result: 42,
       timestamp: 1,
-    } as any;
+    };
     expect(isToolPreliminaryResultEvent(event)).toBe(false);
   });
 
@@ -25,7 +25,7 @@ describe('ResponseStreamEvent guards - mutual exclusion', () => {
       toolCallId: 'c1',
       result: 42,
       timestamp: 1,
-    } as any;
+    };
     expect(isToolResultEvent(event)).toBe(false);
   });
 
@@ -34,7 +34,7 @@ describe('ResponseStreamEvent guards - mutual exclusion', () => {
       type: 'turn.end',
       turnNumber: 1,
       timestamp: 1,
-    } as any;
+    };
     expect(isTurnStartEvent(event)).toBe(false);
   });
 
@@ -43,7 +43,7 @@ describe('ResponseStreamEvent guards - mutual exclusion', () => {
       type: 'turn.start',
       turnNumber: 1,
       timestamp: 1,
-    } as any;
+    };
     expect(isTurnEndEvent(event)).toBe(false);
   });
 
@@ -53,7 +53,7 @@ describe('ResponseStreamEvent guards - mutual exclusion', () => {
       toolCallId: 'c1',
       result: 42,
       timestamp: 1,
-    } as any;
+    };
     expect(isToolCallOutputEvent(event)).toBe(false);
   });
 });
diff --git a/tests/boundaries/stream-event-guards.test.ts b/tests/boundaries/stream-event-guards.test.ts
index 7c6a052..5fef197 100644
--- a/tests/boundaries/stream-event-guards.test.ts
+++ b/tests/boundaries/stream-event-guards.test.ts
@@ -11,6 +11,7 @@ import {
   isResponseFailedEvent,
   isResponseIncompleteEvent,
 } from '../../src/lib/stream-type-guards.js';
+import { makeRequest } from '../test-constants.js';
 
 const guards = [
   {
@@ -66,7 +67,7 @@ describe('Stream event type guards - mutual exclusion', () => {
       it(`returns true for its own event type: ${guard.type}`, () => {
         const event = {
           type: guard.type,
-        } as any;
+        };
         expect(guard.fn(event)).toBe(true);
       });
 
@@ -74,21 +75,21 @@ describe('Stream event type guards - mutual exclusion', () => {
         const other = guards.find((g) => g.type !== guard.type)!;
         const event = {
           type: other.type,
-        } as any;
+        };
         expect(guard.fn(event)).toBe(false);
       });
 
       it('returns false for objects missing type or with wrong type', () => {
-        expect(guard.fn({} as any)).toBe(false);
+        expect(guard.fn(makeRequest({}))).toBe(false);
         expect(
           guard.fn({
             type: 'unrelated.event',
-          } as any),
+          } as unknown as StreamEvents),
         ).toBe(false);
         expect(
           guard.fn({
             type: '',
-          } as any),
+          } as unknown as StreamEvents),
         ).toBe(false);
       });
     });
diff --git a/tests/composition/context-flow.test.ts b/tests/composition/context-flow.test.ts
index 4be50c1..caba3ce 100644
--- a/tests/composition/context-flow.test.ts
+++ b/tests/composition/context-flow.test.ts
@@ -22,7 +22,7 @@ describe('Context flow: turn context -> tool execute context -> tool function',
     const turnCtx = buildTurnContext({
       numberOfTurns: 2,
     });
-    const contextFn = (ctx: any) => ({
+    const contextFn = (ctx: { numberOfTurns: number }) => ({
       apiKey: `key-for-turn-${ctx.numberOfTurns}`,
     });
 
diff --git a/tests/composition/state-machine.test.ts b/tests/composition/state-machine.test.ts
index 6e4e1a1..c1e3d0f 100644
--- a/tests/composition/state-machine.test.ts
+++ b/tests/composition/state-machine.test.ts
@@ -49,7 +49,7 @@ describe('State machine: state -> approval -> resumption', () => {
       approvalTool,
       safeTool,
     ];
-    const partition = await partitionToolCalls(toolCalls as any, tools);
+    const partition = await partitionToolCalls(toolCalls, tools);
 
     expect(partition.requiresApproval).toHaveLength(1);
     expect(partition.autoExecute).toHaveLength(1);
diff --git a/tests/contracts/async-params.test.ts b/tests/contracts/async-params.test.ts
index 1acdda4..5468739 100644
--- a/tests/contracts/async-params.test.ts
+++ b/tests/contracts/async-params.test.ts
@@ -1,19 +1,19 @@
 import { describe, expect, it } from 'vitest';
 
 import { resolveAsyncFunctions } from '../../src/lib/async-params.js';
-import { TEST_MODEL } from '../test-constants.js';
+import { makeCallModelInput, makeTurnContext, TEST_MODEL } from '../test-constants.js';
 
 describe('resolveAsyncFunctions - three field types handled distinctly', () => {
-  const turnCtx = {
+  const turnCtx = makeTurnContext({
     numberOfTurns: 2,
-  } as any;
+  });
 
   it('static values (model, temperature as literals) -> passed through unchanged', async () => {
     const result = await resolveAsyncFunctions(
-      {
+      makeCallModelInput({
         model: TEST_MODEL,
         temperature: 0.7,
-      } as any,
+      }),
       turnCtx,
     );
     expect(result.model).toBe(TEST_MODEL);
@@ -22,9 +22,9 @@ describe('resolveAsyncFunctions - three field types handled distinctly', () => {
 
   it('function values -> resolved by calling with context, result stored', async () => {
     const result = await resolveAsyncFunctions(
-      {
-        temperature: (ctx: any) => ctx.numberOfTurns * 0.1,
-      } as any,
+      makeCallModelInput({
+        temperature: (ctx: { numberOfTurns: number }) => ctx.numberOfTurns * 0.1,
+      }),
       turnCtx,
     );
     expect(result.temperature).toBe(0.2);
@@ -32,7 +32,7 @@ describe('resolveAsyncFunctions - three field types handled distinctly', () => {
 
   it('client-only fields (stopWhen, state, requireApproval, context, onTurnStart, onTurnEnd) -> stripped entirely', async () => {
     const result = await resolveAsyncFunctions(
-      {
+      makeCallModelInput({
         model: TEST_MODEL,
         stopWhen: () => true,
         state: {
@@ -44,7 +44,7 @@ describe('resolveAsyncFunctions - three field types handled distinctly', () => {
         },
         onTurnStart: () => {},
         onTurnEnd: () => {},
-      } as any,
+      }),
       turnCtx,
     );
     expect(result).not.toHaveProperty('stopWhen');
@@ -66,10 +66,10 @@ describe('resolveAsyncFunctions - three field types handled distinctly', () => {
       },
     ];
     const result = await resolveAsyncFunctions(
-      {
+      makeCallModelInput({
         model: TEST_MODEL,
         tools,
-      } as any,
+      }),
       turnCtx,
     );
     expect(result).toHaveProperty('tools');
@@ -78,11 +78,11 @@ describe('resolveAsyncFunctions - three field types handled distinctly', () => {
   it('function error -> wraps with field name context', async () => {
     await expect(
       resolveAsyncFunctions(
-        {
+        makeCallModelInput({
           temperature: () => {
             throw new Error('boom');
           },
-        } as any,
+        }),
         turnCtx,
       ),
     ).rejects.toThrow('Failed to resolve async function for field "temperature"');
@@ -90,12 +90,12 @@ describe('resolveAsyncFunctions - three field types handled distinctly', () => {
 
   it('mix of static + function + client-only in one call -> all handled correctly', async () => {
     const result = await resolveAsyncFunctions(
-      {
+      makeCallModelInput({
         model: TEST_MODEL,
-        temperature: (ctx: any) => ctx.numberOfTurns * 0.1,
+        temperature: (ctx: { numberOfTurns: number }) => ctx.numberOfTurns * 0.1,
         stopWhen: () => true,
         input: 'hello',
-      } as any,
+      }),
       turnCtx,
     );
     expect(result.model).toBe(TEST_MODEL);
diff --git a/tests/contracts/delta-extractors.test.ts b/tests/contracts/delta-extractors.test.ts
index a56c0d7..fb36514 100644
--- a/tests/contracts/delta-extractors.test.ts
+++ b/tests/contracts/delta-extractors.test.ts
@@ -7,7 +7,7 @@ import {
   extractToolDeltas,
 } from '../../src/lib/stream-transformers.js';
 
-function makeStream(events: any[]): ReusableReadableStream<any> {
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
   const source = new ReadableStream({
     start(controller) {
       for (const event of events) {
diff --git a/tests/contracts/from-claude-messages.test.ts b/tests/contracts/from-claude-messages.test.ts
index 17b8444..56d5c29 100644
--- a/tests/contracts/from-claude-messages.test.ts
+++ b/tests/contracts/from-claude-messages.test.ts
@@ -1,3 +1,4 @@
+import type * as models from '@openrouter/sdk/models';
 import { describe, expect, it } from 'vitest';
 
 import { fromClaudeMessages } from '../../src/lib/anthropic-compat.js';
@@ -15,7 +16,7 @@ describe('fromClaudeMessages - each block type maps distinctly', () => {
         ],
       },
     ]);
-    const items = result as any[];
+    const items = result as models.OutputItems[];
     expect(items).toHaveLength(1);
     expect(items[0]).toHaveProperty('role');
     expect(items[0]).toHaveProperty('content', 'Hello');
@@ -38,8 +39,8 @@ describe('fromClaudeMessages - each block type maps distinctly', () => {
         ],
       },
     ]);
-    const items = result as any[];
-    const toolItem = items.find((i: any) => i.type === 'function_call');
+    const items = result as models.OutputItems[];
+    const toolItem = items.find((i) => i.type === 'function_call');
     expect(toolItem).toBeDefined();
     expect(toolItem.name).toBe('search');
     expect(toolItem.callId).toBe('tu_1');
@@ -58,8 +59,8 @@ describe('fromClaudeMessages - each block type maps distinctly', () => {
         ],
       },
     ]);
-    const items = result as any[];
-    const outputItem = items.find((i: any) => i.type === 'function_call_output');
+    const items = result as models.OutputItems[];
+    const outputItem = items.find((i) => i.type === 'function_call_output');
     expect(outputItem).toBeDefined();
     expect(outputItem.callId).toBe('tu_1');
     expect(outputItem.output).toBe('Search result');
@@ -80,7 +81,7 @@ describe('fromClaudeMessages - each block type maps distinctly', () => {
         ],
       },
     ]);
-    const items = result as any[];
+    const items = result as models.OutputItems[];
     expect(items).toHaveLength(1);
     expect(items[0]).toHaveProperty('role');
     expect(items[0]).toHaveProperty('content');
diff --git a/tests/contracts/items-stream.test.ts b/tests/contracts/items-stream.test.ts
index 8131802..bd0d6de 100644
--- a/tests/contracts/items-stream.test.ts
+++ b/tests/contracts/items-stream.test.ts
@@ -3,7 +3,9 @@ import { describe, expect, it } from 'vitest';
 import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
 import { buildItemsStream } from '../../src/lib/stream-transformers.js';
 
-function makeStream(events: any[]): ReusableReadableStream<any> {
+function makeStream(
+  events: Record<string, unknown>[],
+): ReusableReadableStream<Record<string, unknown>> {
   const source = new ReadableStream({
     start(controller) {
       for (const event of events) {
@@ -53,8 +55,16 @@ describe('buildItemsStream - yields distinct item types per event', () => {
     ];
     const stream = makeStream(events);
     const items = await collectAll(buildItemsStream(stream));
-    const lastMsg = items.filter((i: any) => i.type === 'message').pop()!;
-    expect((lastMsg as any).content[0].text).toBe('Hello world');
+    const lastMsg = items.filter((i) => i.type === 'message').pop()!;
+    expect(
+      (
+        lastMsg as {
+          content: Array<{
+            text: string;
+          }>;
+        }
+      ).content[0].text,
+    ).toBe('Hello world');
   });
 
   it('function_call items: accumulated arguments from function_call deltas', async () => {
@@ -87,8 +97,14 @@ describe('buildItemsStream - yields distinct item types per event', () => {
     ];
     const stream = makeStream(events);
     const items = await collectAll(buildItemsStream(stream));
-    const lastFn = items.filter((i: any) => i.type === 'function_call').pop()!;
-    expect((lastFn as any).arguments).toBe('{"q":"test"}');
+    const lastFn = items.filter((i) => i.type === 'function_call').pop()!;
+    expect(
+      (
+        lastFn as {
+          arguments: string;
+        }
+      ).arguments,
+    ).toBe('{"q":"test"}');
   });
 
   it('reasoning items: accumulated content from reasoning deltas', async () => {
@@ -119,8 +135,16 @@ describe('buildItemsStream - yields distinct item types per event', () => {
     ];
     const stream = makeStream(events);
     const items = await collectAll(buildItemsStream(stream));
-    const lastReasoning = items.filter((i: any) => i.type === 'reasoning').pop()!;
-    expect((lastReasoning as any).summary[0].text).toBe('thinking more');
+    const lastReasoning = items.filter((i) => i.type === 'reasoning').pop()!;
+    expect(
+      (
+        lastReasoning as {
+          summary: Array<{
+            text: string;
+          }>;
+        }
+      ).summary[0].text,
+    ).toBe('thinking more');
   });
 
   it('server tool items (web_search_call, file_search_call, image_generation_call): passthrough', async () => {
@@ -159,7 +183,7 @@ describe('buildItemsStream - yields distinct item types per event', () => {
     ];
     const stream = makeStream(events);
     const items = await collectAll(buildItemsStream(stream));
-    const types = items.map((i: any) => i.type);
+    const types = items.map((i) => i.type);
     expect(types).toContain('web_search_call');
     expect(types).toContain('file_search_call');
     expect(types).toContain('image_generation_call');
@@ -206,7 +230,13 @@ describe('buildItemsStream - yields distinct item types per event', () => {
     const stream = makeStream(events);
     const items = await collectAll(buildItemsStream(stream));
     const doneItem = items[items.length - 1]!;
-    expect((doneItem as any).status).toBe('completed');
+    expect(
+      (
+        doneItem as {
+          status: string;
+        }
+      ).status,
+    ).toBe('completed');
   });
 
   it('termination events (completed/failed/incomplete) -> stream stops', async () => {
@@ -240,8 +270,17 @@ describe('buildItemsStream - yields distinct item types per event', () => {
     const stream = makeStream(events);
     const items = await collectAll(buildItemsStream(stream));
     const allText = items
-      .filter((i: any) => i.type === 'message')
-      .map((i: any) => i.content?.[0]?.text ?? '');
+      .filter((i) => i.type === 'message')
+      .map(
+        (i) =>
+          (
+            i as {
+              content?: Array<{
+                text?: string;
+              }>;
+            }
+          ).content?.[0]?.text ?? '',
+      );
     expect(allText.join('')).not.toContain('SHOULD NOT APPEAR');
   });
 });
diff --git a/tests/contracts/message-stream-builders.test.ts b/tests/contracts/message-stream-builders.test.ts
index 760dd27..9710bb8 100644
--- a/tests/contracts/message-stream-builders.test.ts
+++ b/tests/contracts/message-stream-builders.test.ts
@@ -6,7 +6,7 @@ import {
   buildResponsesMessageStream,
 } from '../../src/lib/stream-transformers.js';
 
-function makeStream(events: any[]): ReusableReadableStream<any> {
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
   const source = new ReadableStream({
     start(controller) {
       for (const event of events) {
@@ -105,8 +105,8 @@ describe('Message stream builders - same input, structurally distinct outputs',
 
     // Same text content
     const responsesText = responsesLast.content
-      .filter((c: any) => c.type === 'output_text')
-      .map((c: any) => c.text)
+      .filter((c: { type: string; text?: string }) => c.type === 'output_text')
+      .map((c: { type: string; text?: string }) => c.text)
       .join('');
     expect(responsesText).toBe('Hello world');
     expect(chatLast.content).toBe('Hello world');
diff --git a/tests/contracts/response-extractors.test.ts b/tests/contracts/response-extractors.test.ts
index e2fd008..77a71f7 100644
--- a/tests/contracts/response-extractors.test.ts
+++ b/tests/contracts/response-extractors.test.ts
@@ -38,7 +38,7 @@ function makeResponse(text: string) {
 describe('Response extractors - same response, distinct shapes', () => {
   it('extractMessageFromResponse returns ChatAssistantMessage (role + content string)', () => {
     const response = makeResponse('Hello world');
-    const msg = extractMessageFromResponse(response as any);
+    const msg = extractMessageFromResponse(response);
     expect(msg.role).toBe('assistant');
     expect(typeof msg.content).toBe('string');
     expect(msg).not.toHaveProperty('id');
@@ -47,7 +47,7 @@ describe('Response extractors - same response, distinct shapes', () => {
 
   it('extractResponsesMessageFromResponse returns OutputMessage (id + type + content array)', () => {
     const response = makeResponse('Hello world');
-    const msg = extractResponsesMessageFromResponse(response as any);
+    const msg = extractResponsesMessageFromResponse(response);
     expect(msg.id).toBe('msg_1');
     expect(msg.type).toBe('message');
     expect(Array.isArray(msg.content)).toBe(true);
@@ -55,13 +55,13 @@ describe('Response extractors - same response, distinct shapes', () => {
 
   it('same response -> both extract same text but structurally different objects', () => {
     const response = makeResponse('Hello world');
-    const chatMsg = extractMessageFromResponse(response as any);
-    const responsesMsg = extractResponsesMessageFromResponse(response as any);
+    const chatMsg = extractMessageFromResponse(response);
+    const responsesMsg = extractResponsesMessageFromResponse(response);
 
     expect(chatMsg.content).toBe('Hello world');
     const responsesText = responsesMsg.content
-      .filter((c: any) => c.type === 'output_text')
-      .map((c: any) => c.text)
+      .filter((c: { type: string; text?: string }) => c.type === 'output_text')
+      .map((c: { type: string; text?: string }) => c.text)
       .join('');
     expect(responsesText).toBe('Hello world');
 
@@ -93,7 +93,7 @@ describe('Response extractors - same response, distinct shapes', () => {
       },
     };
 
-    expect(() => extractMessageFromResponse(response as any)).toThrow('No message found');
-    expect(() => extractResponsesMessageFromResponse(response as any)).toThrow('No message found');
+    expect(() => extractMessageFromResponse(response)).toThrow('No message found');
+    expect(() => extractResponsesMessageFromResponse(response)).toThrow('No message found');
   });
 });
diff --git a/tests/contracts/tool-call-response-consistency.test.ts b/tests/contracts/tool-call-response-consistency.test.ts
index 508f1af..ac40989 100644
--- a/tests/contracts/tool-call-response-consistency.test.ts
+++ b/tests/contracts/tool-call-response-consistency.test.ts
@@ -30,8 +30,8 @@ describe('responseHasToolCalls and extractToolCallsFromResponse produce consiste
       },
     };
 
-    const hasTools = responseHasToolCalls(responseWithTools as any);
-    const extracted = extractToolCallsFromResponse(responseWithTools as any);
+    const hasTools = responseHasToolCalls(responseWithTools);
+    const extracted = extractToolCallsFromResponse(responseWithTools);
 
     expect(hasTools).toBe(true);
     expect(extracted.length).toBeGreaterThan(0);
@@ -63,8 +63,8 @@ describe('responseHasToolCalls and extractToolCallsFromResponse produce consiste
       },
     };
 
-    const hasTools2 = responseHasToolCalls(responseNoTools as any);
-    const extracted2 = extractToolCallsFromResponse(responseNoTools as any);
+    const hasTools2 = responseHasToolCalls(responseNoTools);
+    const extracted2 = extractToolCallsFromResponse(responseNoTools);
 
     expect(hasTools2).toBe(false);
     expect(extracted2).toEqual([]);
diff --git a/tests/dispatch/approval-partition-dispatch.test.ts b/tests/dispatch/approval-partition-dispatch.test.ts
index 3f44e78..3bdce4b 100644
--- a/tests/dispatch/approval-partition-dispatch.test.ts
+++ b/tests/dispatch/approval-partition-dispatch.test.ts
@@ -46,13 +46,13 @@ describe('Approval partitioning dispatches via tool-level vs call-level checks',
       numberOfTurns: 1,
     };
     const partition = await partitionToolCalls(
-      toolCalls as any,
+      toolCalls,
       [
         approvalTool,
         safeTool,
       ],
       context,
-      callLevelCheck as any,
+      callLevelCheck,
     );
     // Call-level override: both should be auto-execute
     expect(partition.autoExecute).toHaveLength(2);
@@ -64,7 +64,7 @@ describe('Approval partitioning dispatches via tool-level vs call-level checks',
       numberOfTurns: 1,
     };
     const partition = await partitionToolCalls(
-      toolCalls as any,
+      toolCalls,
       [
         approvalTool,
         safeTool,
diff --git a/tests/dispatch/claude-conversion-deep-dispatch.test.ts b/tests/dispatch/claude-conversion-deep-dispatch.test.ts
index 3065c61..58ce914 100644
--- a/tests/dispatch/claude-conversion-deep-dispatch.test.ts
+++ b/tests/dispatch/claude-conversion-deep-dispatch.test.ts
@@ -56,8 +56,8 @@ describe('convertToClaudeMessage routes multi-item response via output item guar
       },
     };
 
-    const claude = convertToClaudeMessage(response as any);
-    const types = claude.content.map((b: any) => b.type);
+    const claude = convertToClaudeMessage(response);
+    const types = claude.content.map((b: { type: string }) => b.type);
 
     expect(types).toContain('text');
     expect(types).toContain('tool_use');
diff --git a/tests/dispatch/claude-conversion-dispatch.test.ts b/tests/dispatch/claude-conversion-dispatch.test.ts
index db15336..f30f073 100644
--- a/tests/dispatch/claude-conversion-dispatch.test.ts
+++ b/tests/dispatch/claude-conversion-dispatch.test.ts
@@ -40,14 +40,28 @@ describe('convertToClaudeMessage routes items via output item guards', () => {
       },
     };
 
-    const claude = convertToClaudeMessage(response as any);
-    const textBlock = claude.content.find((b: any) => b.type === 'text');
-    const toolBlock = claude.content.find((b: any) => b.type === 'tool_use');
+    const claude = convertToClaudeMessage(response);
+    const textBlock = claude.content.find((b: { type: string }) => b.type === 'text');
+    const toolBlock = claude.content.find((b: { type: string }) => b.type === 'tool_use');
 
     expect(textBlock).toBeDefined();
-    expect((textBlock as any).text).toBe('Hello');
+    expect(
+      (
+        textBlock as {
+          type: string;
+          text: string;
+        }
+      ).text,
+    ).toBe('Hello');
     expect(toolBlock).toBeDefined();
-    expect((toolBlock as any).name).toBe('search');
+    expect(
+      (
+        toolBlock as {
+          type: string;
+          name: string;
+        }
+      ).name,
+    ).toBe('search');
   });
 
   it('same response with reasoning + web_search_call: isReasoningOutputItem -> thinking, isWebSearchCallOutputItem -> server_tool_use', () => {
@@ -81,9 +95,11 @@ describe('convertToClaudeMessage routes items via output item guards', () => {
       },
     };
 
-    const claude = convertToClaudeMessage(response as any);
-    const thinkingBlock = claude.content.find((b: any) => b.type === 'thinking');
-    const serverToolBlock = claude.content.find((b: any) => b.type === 'server_tool_use');
+    const claude = convertToClaudeMessage(response);
+    const thinkingBlock = claude.content.find((b: { type: string }) => b.type === 'thinking');
+    const serverToolBlock = claude.content.find(
+      (b: { type: string }) => b.type === 'server_tool_use',
+    );
 
     expect(thinkingBlock).toBeDefined();
     expect(serverToolBlock).toBeDefined();
diff --git a/tests/dispatch/from-claude-dispatch.test.ts b/tests/dispatch/from-claude-dispatch.test.ts
index de9a3c9..8002880 100644
--- a/tests/dispatch/from-claude-dispatch.test.ts
+++ b/tests/dispatch/from-claude-dispatch.test.ts
@@ -1,3 +1,4 @@
+import type * as models from '@openrouter/sdk/models';
 import { describe, expect, it } from 'vitest';
 
 import { fromClaudeMessages } from '../../src/lib/anthropic-compat.js';
@@ -38,20 +39,20 @@ describe('fromClaudeMessages routes blocks to distinct output types', () => {
       },
     ]);
 
-    const items = result as any[];
+    const items = result as models.OutputItems[];
     // Should have: text message, function_call, function_call_output, text message
-    const types = items.map((i: any) => i.type || 'easy_input_message');
+    const types = items.map((i) => i.type || 'easy_input_message');
 
     expect(types).toContain('function_call');
     expect(types).toContain('function_call_output');
 
     // Check that the function_call has correct properties
-    const fnCall = items.find((i: any) => i.type === 'function_call');
+    const fnCall = items.find((i) => i.type === 'function_call');
     expect(fnCall.name).toBe('search');
     expect(fnCall.callId).toBe('tu_1');
 
     // Check that the function_call_output has correct properties
-    const fnOutput = items.find((i: any) => i.type === 'function_call_output');
+    const fnOutput = items.find((i) => i.type === 'function_call_output');
     expect(fnOutput.callId).toBe('tu_1');
     expect(fnOutput.output).toBe('Found results');
   });
diff --git a/tests/dispatch/items-stream-dispatch.test.ts b/tests/dispatch/items-stream-dispatch.test.ts
index 8deb687..d2480be 100644
--- a/tests/dispatch/items-stream-dispatch.test.ts
+++ b/tests/dispatch/items-stream-dispatch.test.ts
@@ -3,7 +3,7 @@ import { describe, expect, it } from 'vitest';
 import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
 import { buildItemsStream } from '../../src/lib/stream-transformers.js';
 
-function makeStream(events: any[]): ReusableReadableStream<any> {
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
   const source = new ReadableStream({
     start(controller) {
       for (const event of events) {
@@ -44,7 +44,7 @@ describe('buildItemsStream routes events via stream type guards', () => {
     const stream = makeStream(events);
     const items = await collectAll(buildItemsStream(stream));
     expect(items.length).toBeGreaterThan(0);
-    expect((items[0] as any).type).toBe('message');
+    expect(items[0].type).toBe('message');
   });
 
   it('skips unknown event types that do not match any guard', async () => {
@@ -71,6 +71,6 @@ describe('buildItemsStream routes events via stream type guards', () => {
     const stream = makeStream(events);
     const items = await collectAll(buildItemsStream(stream));
     // Only the message item should be yielded, unknown events are silently skipped
-    expect(items.every((i: any) => i.type === 'message')).toBe(true);
+    expect(items.every((i) => i.type === 'message')).toBe(true);
   });
 });
diff --git a/tests/integration/claude-unsupported-content.test.ts b/tests/integration/claude-unsupported-content.test.ts
index 9a64f3f..e343dc3 100644
--- a/tests/integration/claude-unsupported-content.test.ts
+++ b/tests/integration/claude-unsupported-content.test.ts
@@ -41,7 +41,7 @@ describe('convertToClaudeMessage -> unsupported content utilities', () => {
       },
     };
 
-    const claude = convertToClaudeMessage(response as any);
+    const claude = convertToClaudeMessage(response);
     // unsupported_content is a property on the message, not content blocks
     expect(hasUnsupportedContent(claude)).toBe(true);
     const summary = getUnsupportedContentSummary(claude);
diff --git a/tests/integration/format-round-trip.test.ts b/tests/integration/format-round-trip.test.ts
index 9e1f200..0a46779 100644
--- a/tests/integration/format-round-trip.test.ts
+++ b/tests/integration/format-round-trip.test.ts
@@ -1,3 +1,4 @@
+import type * as models from '@openrouter/sdk/models';
 import { describe, expect, it } from 'vitest';
 
 import { fromClaudeMessages, toClaudeMessage } from '../../src/lib/anthropic-compat.js';
@@ -47,19 +48,19 @@ describe('Bidirectional format conversion', () => {
 
     // Claude -> OR format
     const orFormat = fromClaudeMessages(claudeMessages);
-    const items = orFormat as any[];
+    const items = orFormat as models.OutputItems[];
 
     // Text blocks -> EasyInputMessage
-    const textItems = items.filter((i: any) => !i.type || i.role);
+    const textItems = items.filter((i) => !i.type || i.role);
     expect(textItems.length).toBeGreaterThan(0);
 
     // tool_use -> FunctionCallItem
-    const fnCalls = items.filter((i: any) => i.type === 'function_call');
+    const fnCalls = items.filter((i) => i.type === 'function_call');
     expect(fnCalls).toHaveLength(1);
     expect(fnCalls[0].name).toBe('search');
 
     // tool_result -> FunctionCallOutputItem
-    const fnOutputs = items.filter((i: any) => i.type === 'function_call_output');
+    const fnOutputs = items.filter((i) => i.type === 'function_call_output');
     expect(fnOutputs).toHaveLength(1);
     expect(fnOutputs[0].callId).toBe('tu_1');
 
@@ -90,7 +91,7 @@ describe('Bidirectional format conversion', () => {
         outputTokens: 50,
       },
     };
-    const claudeResponse = toClaudeMessage(mockResponse as any);
+    const claudeResponse = toClaudeMessage(mockResponse);
     expect(claudeResponse.role).toBe('assistant');
     expect(Array.isArray(claudeResponse.content)).toBe(true);
   });
@@ -114,26 +115,26 @@ describe('Bidirectional format conversion', () => {
         toolCallId: 'tc_1',
         content: 'Tool result',
       },
-    ] as any[];
+    ];
 
     // Chat -> OR format
     const orFormat = fromChatMessages(chatMessages);
-    const items = orFormat as any[];
+    const items = orFormat as models.OutputItems[];
 
     // System message
-    const systemItems = items.filter((i: any) => i.role === 'system');
+    const systemItems = items.filter((i) => i.role === 'system');
     expect(systemItems).toHaveLength(1);
 
     // User message
-    const userItems = items.filter((i: any) => i.role === 'user');
+    const userItems = items.filter((i) => i.role === 'user');
     expect(userItems).toHaveLength(1);
 
     // Assistant message
-    const assistantItems = items.filter((i: any) => i.role === 'assistant');
+    const assistantItems = items.filter((i) => i.role === 'assistant');
     expect(assistantItems).toHaveLength(1);
 
     // Tool message -> FunctionCallOutputItem
-    const toolOutputs = items.filter((i: any) => i.type === 'function_call_output');
+    const toolOutputs = items.filter((i) => i.type === 'function_call_output');
     expect(toolOutputs).toHaveLength(1);
 
     // Verify OR format -> Chat format works on a response
@@ -163,7 +164,7 @@ describe('Bidirectional format conversion', () => {
         outputTokens: 50,
       },
     };
-    const chatResponse = toChatMessage(mockResponse as any);
+    const chatResponse = toChatMessage(mockResponse);
     expect(chatResponse.role).toBe('assistant');
     expect(typeof chatResponse.content).toBe('string');
   });
diff --git a/tests/integration/next-turn-params-flow.test.ts b/tests/integration/next-turn-params-flow.test.ts
index 3496452..95f1b5d 100644
--- a/tests/integration/next-turn-params-flow.test.ts
+++ b/tests/integration/next-turn-params-flow.test.ts
@@ -36,14 +36,14 @@ describe('Next-turn params: tools -> computation -> request modification', () =>
     };
 
     const params = await executeNextTurnParamsFunctions(
-      toolCalls as any,
-      toolsWithNextTurnParams as any,
-      request as any,
+      toolCalls,
+      toolsWithNextTurnParams,
+      request,
     );
 
     expect(params).toHaveProperty('temperature', 0.5);
 
-    const modified = applyNextTurnParamsToRequest(request as any, params);
+    const modified = applyNextTurnParamsToRequest(request, params);
     expect(modified.temperature).toBe(0.5);
     expect(modified.model).toBe(TEST_MODEL);
   });
@@ -55,7 +55,7 @@ describe('Next-turn params: tools -> computation -> request modification', () =>
       input: 'hello',
     };
 
-    const ctx = buildNextTurnParamsContext(request as any);
+    const ctx = buildNextTurnParamsContext(request);
     expect(ctx.model).toBe(TEST_MODEL);
     expect(ctx.temperature).toBe(0.7);
     expect(ctx.input).toBe('hello');
diff --git a/tests/integration/next-turn-params-request.test.ts b/tests/integration/next-turn-params-request.test.ts
index 8fc8852..da44632 100644
--- a/tests/integration/next-turn-params-request.test.ts
+++ b/tests/integration/next-turn-params-request.test.ts
@@ -34,13 +34,9 @@ describe('Next-turn params -> request modification -> API readiness', () => {
       temperature: 0.7,
       input: 'hello',
     };
-    const params = await executeNextTurnParamsFunctions(
-      toolCalls as any,
-      tools as any,
-      request as any,
-    );
+    const params = await executeNextTurnParamsFunctions(toolCalls, tools, request);
 
-    const modified = applyNextTurnParamsToRequest(request as any, params);
+    const modified = applyNextTurnParamsToRequest(request, params);
     expect(modified.temperature).toBe(0.3);
     expect(modified.model).toBe(TEST_MODEL);
     expect(modified.input).toBe('hello');
diff --git a/tests/integration/reusable-stream-consumers.test.ts b/tests/integration/reusable-stream-consumers.test.ts
index bc44231..82ae5ec 100644
--- a/tests/integration/reusable-stream-consumers.test.ts
+++ b/tests/integration/reusable-stream-consumers.test.ts
@@ -3,7 +3,7 @@ import { describe, expect, it } from 'vitest';
 import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
 import { buildItemsStream, consumeStreamForCompletion } from '../../src/lib/stream-transformers.js';
 
-function makeStream(events: any[]): ReusableReadableStream<any> {
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
   const source = new ReadableStream({
     start(controller) {
       for (const event of events) {
diff --git a/tests/integration/stream-completion-guards.test.ts b/tests/integration/stream-completion-guards.test.ts
index 3449df3..a291270 100644
--- a/tests/integration/stream-completion-guards.test.ts
+++ b/tests/integration/stream-completion-guards.test.ts
@@ -3,7 +3,7 @@ import { describe, expect, it } from 'vitest';
 import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
 import { consumeStreamForCompletion } from '../../src/lib/stream-transformers.js';
 
-function makeStream(events: any[]): ReusableReadableStream<any> {
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
   const source = new ReadableStream({
     start(controller) {
       for (const event of events) {
diff --git a/tests/integration/turn-context-async-params.test.ts b/tests/integration/turn-context-async-params.test.ts
index dcccfd1..bf707a7 100644
--- a/tests/integration/turn-context-async-params.test.ts
+++ b/tests/integration/turn-context-async-params.test.ts
@@ -1,7 +1,7 @@
 import { describe, expect, it } from 'vitest';
 import { resolveAsyncFunctions } from '../../src/lib/async-params.js';
 import { buildTurnContext } from '../../src/lib/turn-context.js';
-import { TEST_MODEL, TEST_MODEL_ALT } from '../test-constants.js';
+import { makeCallModelInput, TEST_MODEL, TEST_MODEL_ALT } from '../test-constants.js';
 
 describe('buildTurnContext -> resolveAsyncFunctions', () => {
   it('parameter function receives TurnContext with correct numberOfTurns', async () => {
@@ -9,10 +9,10 @@ describe('buildTurnContext -> resolveAsyncFunctions', () => {
       numberOfTurns: 5,
     });
     const result = await resolveAsyncFunctions(
-      {
+      makeCallModelInput({
         model: TEST_MODEL,
-        temperature: (ctx: any) => ctx.numberOfTurns * 0.1,
-      } as any,
+        temperature: (ctx: { numberOfTurns: number }) => ctx.numberOfTurns * 0.1,
+      }),
       turnCtx,
     );
     expect(result.temperature).toBe(0.5);
@@ -28,12 +28,12 @@ describe('buildTurnContext -> resolveAsyncFunctions', () => {
     };
     const turnCtx = buildTurnContext({
       numberOfTurns: 1,
-      toolCall: toolCall as any,
+      toolCall: toolCall,
     });
     const result = await resolveAsyncFunctions(
-      {
-        model: (ctx: any) => (ctx.toolCall ? TEST_MODEL_ALT : TEST_MODEL),
-      } as any,
+      makeCallModelInput({
+        model: (ctx: { toolCall?: unknown }) => (ctx.toolCall ? TEST_MODEL_ALT : TEST_MODEL),
+      }),
       turnCtx,
     );
     expect(result.model).toBe(TEST_MODEL_ALT);
diff --git a/tests/pipelines/approval-execution-state.test.ts b/tests/pipelines/approval-execution-state.test.ts
index 1b74b77..b2844a7 100644
--- a/tests/pipelines/approval-execution-state.test.ts
+++ b/tests/pipelines/approval-execution-state.test.ts
@@ -58,7 +58,7 @@ describe('Approval -> execution -> state update pipeline', () => {
     ];
 
     // Step 1: Partition
-    const partition = await partitionToolCalls(toolCalls as any, tools);
+    const partition = await partitionToolCalls(toolCalls, tools);
     expect(partition.autoExecute).toHaveLength(1);
     expect(partition.requiresApproval).toHaveLength(1);
 
diff --git a/tests/pipelines/dual-format-output.test.ts b/tests/pipelines/dual-format-output.test.ts
index 4755564..f74b8aa 100644
--- a/tests/pipelines/dual-format-output.test.ts
+++ b/tests/pipelines/dual-format-output.test.ts
@@ -11,7 +11,7 @@ import {
 } from '../../src/lib/stream-transformers.js';
 import { TEST_MODEL } from '../test-constants.js';
 
-function makeStream(events: any[]): ReusableReadableStream<any> {
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
   const source = new ReadableStream({
     start(controller) {
       for (const event of events) {
@@ -69,24 +69,31 @@ describe('Dual-format output: same response -> structurally distinct formats', (
     };
 
     // Chat format
-    const chatMsg = extractMessageFromResponse(response as any);
+    const chatMsg = extractMessageFromResponse(response);
     expect(chatMsg.role).toBe('assistant');
     expect(typeof chatMsg.content).toBe('string');
 
     // Claude format
-    const claudeMsg = convertToClaudeMessage(response as any);
+    const claudeMsg = convertToClaudeMessage(response);
     expect(claudeMsg.role).toBe('assistant');
     expect(Array.isArray(claudeMsg.content)).toBe(true);
 
     // Tool calls
-    const toolCalls = extractToolCallsFromResponse(response as any);
+    const toolCalls = extractToolCallsFromResponse(response);
     expect(toolCalls).toHaveLength(1);
     expect(toolCalls[0]!.name).toBe('search');
 
     // All semantically equivalent, structurally different
     expect(chatMsg.content).toBe('Found results');
-    const claudeText = claudeMsg.content.find((b: any) => b.type === 'text');
-    expect((claudeText as any).text).toBe('Found results');
+    const claudeText = claudeMsg.content.find((b: { type: string }) => b.type === 'text');
+    expect(
+      (
+        claudeText as {
+          type: string;
+          text: string;
+        }
+      ).text,
+    ).toBe('Found results');
   });
 
   it('through streaming: same ReusableReadableStream -> three concurrent consumers all complete', async () => {
diff --git a/tests/pipelines/next-turn-params-pipeline.test.ts b/tests/pipelines/next-turn-params-pipeline.test.ts
index bd1eef1..57156d4 100644
--- a/tests/pipelines/next-turn-params-pipeline.test.ts
+++ b/tests/pipelines/next-turn-params-pipeline.test.ts
@@ -18,7 +18,7 @@ describe('Next-turn parameter adjustment pipeline', () => {
       }),
       execute: async (args) => `Results for: ${args.query}`,
       nextTurnParams: {
-        temperature: (input: any) => (input.query?.includes('creative') ? 0.9 : 0.1),
+        temperature: (input: { query?: string }) => (input.query?.includes('creative') ? 0.9 : 0.1),
       },
     });
 
@@ -29,7 +29,7 @@ describe('Next-turn parameter adjustment pipeline', () => {
     };
 
     // Step 1: Build context from request
-    const ctx = buildNextTurnParamsContext(request as any);
+    const ctx = buildNextTurnParamsContext(request);
     expect(ctx.model).toBe(TEST_MODEL);
     expect(ctx.temperature).toBe(0.5);
 
@@ -47,16 +47,12 @@ describe('Next-turn parameter adjustment pipeline', () => {
         },
       },
     ];
-    const params = await executeNextTurnParamsFunctions(
-      toolCalls as any,
-      tools as any,
-      request as any,
-    );
+    const params = await executeNextTurnParamsFunctions(toolCalls, tools, request);
 
     expect(params).toHaveProperty('temperature', 0.9);
 
     // Step 3: Apply to request
-    const modified = applyNextTurnParamsToRequest(request as any, params);
+    const modified = applyNextTurnParamsToRequest(request, params);
     expect(modified.temperature).toBe(0.9);
     expect(modified.model).toBe(TEST_MODEL);
     expect(modified.input).toBe('hello');
diff --git a/tests/pipelines/stop-condition-pipeline.test.ts b/tests/pipelines/stop-condition-pipeline.test.ts
index 43ffea3..1b50c3c 100644
--- a/tests/pipelines/stop-condition-pipeline.test.ts
+++ b/tests/pipelines/stop-condition-pipeline.test.ts
@@ -6,17 +6,7 @@ import {
   maxCost,
   stepCountIs,
 } from '../../src/lib/stop-conditions.js';
-import type { StepResult } from '../../src/lib/tool-types.js';
-
-function makeStep(overrides: Partial<StepResult> = {}): StepResult {
-  return {
-    response: {} as any,
-    toolCalls: [],
-    finishReason: undefined,
-    usage: undefined,
-    ...overrides,
-  } as StepResult;
-}
+import { makeStep, makeTypedToolCalls, makeUsage } from '../test-constants.js';
 
 describe('Stop condition pipeline: results -> steps -> conditions -> decision', () => {
   it('step count: 3 tool rounds -> StepResult[] length 3 -> stepCountIs(3) true -> isStopConditionMet true', async () => {
@@ -42,13 +32,13 @@ describe('Stop condition pipeline: results -> steps -> conditions -> decision',
   it('tool call: round with "search" tool -> hasToolCall("search") true -> isStopConditionMet true', async () => {
     const steps = [
       makeStep({
-        toolCalls: [
+        toolCalls: makeTypedToolCalls([
           {
             name: 'search',
             id: 'tc1',
             arguments: {},
           },
-        ] as any,
+        ]),
       }),
     ];
     expect(
@@ -68,12 +58,12 @@ describe('Stop condition pipeline: results -> steps -> conditions -> decision',
   it('cost: round with usage.cost = 0.30 -> maxCost(0.25) true -> stop', async () => {
     const steps = [
       makeStep({
-        usage: {
+        usage: makeUsage({
           totalTokens: 100,
           inputTokens: 50,
           outputTokens: 50,
           cost: 0.3,
-        } as any,
+        }),
       }),
     ];
     expect(
@@ -93,13 +83,13 @@ describe('Stop condition pipeline: results -> steps -> conditions -> decision',
   it('combined OR: stepCountIs(10) false + hasToolCall("done") true -> isStopConditionMet true', async () => {
     const steps = [
       makeStep({
-        toolCalls: [
+        toolCalls: makeTypedToolCalls([
           {
             name: 'done',
             id: 'tc1',
             arguments: {},
           },
-        ] as any,
+        ]),
       }),
     ];
     // stepCountIs(10) is false (only 1 step)
diff --git a/tests/pipelines/streaming-pipeline.test.ts b/tests/pipelines/streaming-pipeline.test.ts
index 2c9c6af..144440f 100644
--- a/tests/pipelines/streaming-pipeline.test.ts
+++ b/tests/pipelines/streaming-pipeline.test.ts
@@ -7,7 +7,7 @@ import {
   extractTextDeltas,
 } from '../../src/lib/stream-transformers.js';
 
-function makeStream(events: any[]): ReusableReadableStream<any> {
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
   const source = new ReadableStream({
     start(controller) {
       for (const event of events) {
@@ -119,9 +119,9 @@ describe('Full streaming pipeline: raw events -> guards -> transformers -> consu
     const stream = makeStream(events);
     const items = await collectAll(buildItemsStream(stream));
 
-    const messageItems = items.filter((i: any) => i.type === 'message');
-    const fnCallItems = items.filter((i: any) => i.type === 'function_call');
-    const reasoningItems = items.filter((i: any) => i.type === 'reasoning');
+    const messageItems = items.filter((i) => i.type === 'message');
+    const fnCallItems = items.filter((i) => i.type === 'function_call');
+    const reasoningItems = items.filter((i) => i.type === 'reasoning');
 
     // Each type present and distinct
     expect(messageItems.length).toBeGreaterThan(0);
@@ -129,11 +129,11 @@ describe('Full streaming pipeline: raw events -> guards -> transformers -> consu
     expect(reasoningItems.length).toBeGreaterThan(0);
 
     // Message items have text
-    expect((messageItems[messageItems.length - 1] as any).content[0].text).toBe('Hello');
+    expect(messageItems[messageItems.length - 1].content[0].text).toBe('Hello');
     // Function call items have arguments
-    expect((fnCallItems[fnCallItems.length - 1] as any).arguments).toBe('{"q":"test"}');
+    expect(fnCallItems[fnCallItems.length - 1].arguments).toBe('{"q":"test"}');
     // Reasoning items have content
-    expect((reasoningItems[reasoningItems.length - 1] as any).summary[0].text).toBe('thinking');
+    expect(reasoningItems[reasoningItems.length - 1].summary[0].text).toBe('thinking');
   });
 
   it('completion: isResponseCompletedEvent true -> consumeStreamForCompletion returns response -> stream terminates', async () => {
diff --git a/tests/pipelines/tool-execution-pipeline.test.ts b/tests/pipelines/tool-execution-pipeline.test.ts
index ec5c52c..34dbaed 100644
--- a/tests/pipelines/tool-execution-pipeline.test.ts
+++ b/tests/pipelines/tool-execution-pipeline.test.ts
@@ -132,7 +132,7 @@ describe('Full tool execution pipeline: definition -> dispatch -> validate -> ex
     expect(result.result).toBeNull();
 
     // Format error includes details
-    const errorFormatted = formatToolExecutionError(result.error!, toolCall as any);
+    const errorFormatted = formatToolExecutionError(result.error!, toolCall);
     expect(errorFormatted).toContain('strict');
   });
 });
diff --git a/tests/test-constants.ts b/tests/test-constants.ts
index ee921a9..c2f749e 100644
--- a/tests/test-constants.ts
+++ b/tests/test-constants.ts
@@ -1,13 +1,152 @@
 /**
- * Shared test constants for model identifiers.
+ * Shared test constants and typed factory helpers.
  *
  * Unit/integration tests use a synthetic placeholder so they never
  * depend on a real model existing. Change these in one place if the
  * convention needs to be updated.
  */
 
+import type * as models from '@openrouter/sdk/models';
+import type { CallModelInput } from '../src/lib/async-params.js';
+import type {
+  ParsedToolCall,
+  StepResult,
+  Tool,
+  ToolExecutionResult,
+  TurnContext,
+  TypedToolCallUnion,
+} from '../src/lib/tool-types.js';
+
 /** Default model identifier used in non-e2e tests. */
 export const TEST_MODEL = 'openai/gpt-4.1-nano';
 
 /** Alternative model for tests that need a second, distinct model. */
 export const TEST_MODEL_ALT = 'openai/gpt-4.1-mini';
+
+// ---------------------------------------------------------------------------
+// Factory helpers – build properly typed test data without `as any`
+// ---------------------------------------------------------------------------
+
+/** Minimal Usage object that satisfies the SDK's required fields. */
+export function makeUsage(
+  overrides: Partial<models.Usage> & {
+    totalTokens: number;
+    inputTokens: number;
+    outputTokens: number;
+  },
+): models.Usage {
+  return {
+    inputTokensDetails: {
+      cachedTokens: 0,
+    },
+    outputTokensDetails: {
+      reasoningTokens: 0,
+    },
+    ...overrides,
+  };
+}
+
+/** Minimal OpenResponsesResult that satisfies the SDK's required fields. */
+export function makeResponse(
+  overrides: Partial<models.OpenResponsesResult> & {
+    output: models.OutputItems[];
+  },
+): models.OpenResponsesResult {
+  return {
+    id: 'resp_test',
+    object: 'response',
+    createdAt: 0,
+    model: TEST_MODEL,
+    status: 'completed',
+    completedAt: null,
+    error: null,
+    incompleteDetails: null,
+    temperature: null,
+    topP: null,
+    presencePenalty: null,
+    frequencyPenalty: null,
+    instructions: null,
+    metadata: null,
+    tools: [],
+    toolChoice: 'auto',
+    parallelToolCalls: false,
+    ...overrides,
+  };
+}
+
+/** Minimal StepResult that satisfies the interface without `as any`. */
+export function makeStep(overrides: Partial<StepResult> = {}): StepResult {
+  return {
+    stepType: 'initial',
+    text: '',
+    response: makeResponse({
+      output: [],
+    }),
+    toolCalls: [],
+    toolResults: [],
+    finishReason: undefined,
+    usage: undefined,
+    ...overrides,
+  };
+}
+
+/** Minimal TurnContext for tests. */
+export function makeTurnContext(overrides: Partial<TurnContext> = {}): TurnContext {
+  return {
+    numberOfTurns: 0,
+    ...overrides,
+  };
+}
+
+/** Typed ParsedToolCall factory. */
+export function makeToolCall(overrides: {
+  id: string;
+  name: string;
+  arguments: unknown;
+}): ParsedToolCall<Tool> {
+  return overrides;
+}
+
+/** Typed ToolExecutionResult factory. */
+export function makeToolResult(
+  overrides: Partial<ToolExecutionResult<Tool>> & {
+    toolCallId: string;
+    toolName: string;
+  },
+): ToolExecutionResult<Tool> {
+  return {
+    result: undefined,
+    ...overrides,
+  };
+}
+
+/**
+ * Cast a partial CallModelInput to the full type.
+ * Use when tests provide only a subset of fields (model, temperature, etc.)
+ * that don't include the full union-discriminant fields.
+ */
+export function makeCallModelInput(fields: Record<string, unknown>): CallModelInput {
+  return fields as CallModelInput;
+}
+
+/** Typed tool call array for StepResult.toolCalls */
+export function makeTypedToolCalls(
+  calls: Array<{
+    id: string;
+    name: string;
+    arguments: unknown;
+  }>,
+): TypedToolCallUnion<readonly Tool[]>[] {
+  return calls as TypedToolCallUnion<readonly Tool[]>[];
+}
+
+/** Minimal ResponsesRequest for tests. */
+export function makeRequest(
+  overrides: Partial<models.ResponsesRequest> = {},
+): models.ResponsesRequest {
+  return {
+    model: TEST_MODEL,
+    input: 'test',
+    ...overrides,
+  };
+}