diff --git a/tests/INDEX.md b/tests/INDEX.md
new file mode 100644
index 0000000..7effb9a
--- /dev/null
+++ b/tests/INDEX.md
@@ -0,0 +1,192 @@
+# Test Registry
+
+This index maps each SDK function/module to its **single test category**. Before adding a test, check this registry — if the function is already covered in a higher category, add your assertion there instead of creating a new file in a lower category.
+
+## Category priority (highest wins)
+
+When a function could belong to multiple categories, place it in the **highest applicable** one:
+
+```
+pipelines > dispatch > integration > contracts > composition > boundaries > behavior
+```
+
+**Rule: each function gets ONE category.** If `stepCountIs` has meaningful peer boundaries, it goes in `contracts/` — not `behavior/` AND `contracts/`. Functions with no peer comparisons stay in `behavior/`.
+
+---
+
+## Registry
+
+### stop-conditions.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `stepCountIs` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria |
+| `hasToolCall` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria |
+| `maxTokensUsed` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria |
+| `maxCost` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria |
+| `finishReasonIs` | contracts | `contracts/stop-conditions.test.ts` | Checks own criterion AND ignores peer criteria |
+| stop condition evaluation (combined) | behavior | `behavior/stop-conditions-evaluation.test.ts` | Tests `evaluateStopConditions` orchestration logic (no peer comparison) |
+
+### stream-type-guards.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `isOutputTextDeltaEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isReasoningDeltaEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isFunctionCallArgumentsDeltaEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isOutputItemAddedEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isOutputItemDoneEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isResponseCompletedEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isResponseFailedEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isResponseIncompleteEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isFunctionCallArgumentsDoneEvent` | boundaries | `boundaries/stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+| `isOutputMessage` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards |
+| `isFunctionCallItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards |
+| `isReasoningOutputItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards |
+| `isWebSearchCallOutputItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards |
+| `isFileSearchCallOutputItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards |
+| `isImageGenerationCallOutputItem` | boundaries | `boundaries/output-item-guards.test.ts` | Mutual exclusion with peer guards |
+| `isOutputTextPart` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards |
+| `isRefusalPart` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards |
+| `isFileCitationAnnotation` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards |
+| `isURLCitationAnnotation` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards |
+| `isFilePathAnnotation` | boundaries | `boundaries/content-annotation-guards.test.ts` | Mutual exclusion with peer guards |
+| `hasTypeProperty` | behavior | `behavior/stream-type-guards-negative.test.ts` | Utility function, no peers |
+| stream vs output item cross-domain | boundaries | `boundaries/domain-separation.test.ts` | Guards reject events from wrong domain |
+| response stream event guards | boundaries | `boundaries/response-stream-event-guards.test.ts` | Mutual exclusion with peer guards |
+
+### tool-types.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `hasExecuteFunction` | boundaries | `boundaries/tool-type-guards.test.ts` | Mutual exclusion across tool types |
+| `isRegularExecuteTool` | boundaries | `boundaries/tool-type-guards.test.ts` | Mutual exclusion across tool types |
+| `isGeneratorTool` | boundaries | `boundaries/tool-type-guards.test.ts` | Mutual exclusion across tool types |
+| `isManualTool` | boundaries | `boundaries/tool-type-guards.test.ts` | Mutual exclusion across tool types |
+| `toolRequiresApproval` | behavior | `behavior/tool-approval.test.ts` | No peer comparison, isolated behavior |
+| `ToolEventBroadcaster` | behavior | `behavior/tool-event-broadcaster.test.ts` | No peer comparison, isolated behavior |
+| tool type events (combined) | behavior | `behavior/tool-types-events.test.ts` | Event shape verification, isolated |
+
+### tool.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `tool()` factory | behavior | `behavior/tool-creation.test.ts` | Isolated factory behavior |
+| tool factory shapes (regular vs generator vs manual) | boundaries | `boundaries/tool-factory-shapes.test.ts` | Structural distinction between tool types |
+
+### tool-executor.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `executeRegularTool` | behavior | `behavior/tool-execution.test.ts` | Isolated execution behavior |
+| `executeRegularTool` vs `executeGeneratorTool` | contracts | `contracts/execute-tool-boundary.test.ts` | Each handles its type AND rejects the other |
+| `executeTool` dispatch | dispatch | `dispatch/execute-tool-dispatch.test.ts` | Routes via type guard to correct executor |
+
+### tool-context.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `resolveContext`, `ToolContextStore` | behavior | `behavior/tool-context.test.ts` | Isolated context resolution |
+| `buildToolExecuteContext` | integration | `integration/tool-context-execution.test.ts` | Correct output AND feeds tool execute |
+
+### turn-context.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `buildTurnContext`, `normalizeInputToArray` | behavior | `behavior/turn-context.test.ts` | Isolated shape verification |
+| turn context -> async params | integration | `integration/turn-context-async-params.test.ts` | Correct output AND feeds resolveAsyncFunctions |
+
+### async-params.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `resolveAsyncFunctions` | contracts | `contracts/async-params.test.ts` | Static vs function vs client-only handled distinctly |
+
+### conversation-state.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `createInitialState`, `updateState`, `appendToMessages` | behavior | `behavior/conversation-state.test.ts` | Isolated state management |
+| `appendToMessages` + format compat | integration | `integration/conversation-state-format.test.ts` | Output feeds format conversion |
+| `partitionToolCalls` | dispatch | `dispatch/approval-partition-dispatch.test.ts` | Routes via approval checks |
+| `createUnsentResult` vs `createRejectedResult` | boundaries | `boundaries/conversation-state-results.test.ts` | Structural distinction |
+
+### next-turn-params.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `executeNextTurnParamsFunctions` | behavior | `behavior/next-turn-params.test.ts` | Isolated param computation |
+| next-turn params -> request | integration | `integration/next-turn-params-request.test.ts` | Output feeds applyNextTurnParamsToRequest |
+
+### reusable-stream.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `ReusableReadableStream` | behavior | `behavior/reusable-stream.test.ts` | Isolated stream behavior |
+| multi-consumer correctness | integration | `integration/reusable-stream-consumers.test.ts` | Multiple consumers both get correct data |
+
+### stream-transformers.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `extractTextDeltas`, `extractReasoningDeltas`, `extractToolDeltas` | contracts | `contracts/delta-extractors.test.ts` | Each yields its type AND skips peers |
+| `buildMessageStream`, `buildResponsesMessageStream` | contracts | `contracts/message-stream-builders.test.ts` | Each produces distinct format |
+| `buildItemsStream` | contracts | `contracts/items-stream.test.ts` | Produces items format distinctly |
+| `buildItemsStream` dispatch | dispatch | `dispatch/items-stream-dispatch.test.ts` | Routes events via guards |
+| `consumeStreamForCompletion` | contracts | `contracts/consume-stream-completion.test.ts` | Consumes correct terminal event |
+| `getResponseObject`, `getTextContent` | contracts | `contracts/response-extractors.test.ts` | Each extracts distinct data |
+| `convertToClaudeMessage` | dispatch | `dispatch/claude-conversion-dispatch.test.ts` | Routes items via output guards |
+| `consumeStreamForCompletion` + guards | integration | `integration/stream-completion-guards.test.ts` | Correct result AND guard identified event |
+
+### anthropic-compat.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `fromClaudeMessages` | contracts | `contracts/from-claude-messages.test.ts` | Maps each block type distinctly |
+| `fromClaudeMessages` dispatch | dispatch | `dispatch/from-claude-dispatch.test.ts` | Routes mixed block types |
+| `toClaudeMessage` + `fromClaudeMessages` round-trip | pipelines | `pipelines/format-round-trip.test.ts` | Full conversion pipeline |
+
+### chat-compat.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `toChatMessage` + `fromChatMessages` round-trip | pipelines | `pipelines/format-round-trip.test.ts` | Full conversion pipeline |
+
+### tool-orchestrator.ts
+
+| Function | Category | File | Reason |
+|----------|----------|------|--------|
+| `mapToolResults`, `summarizeStepUsage`, etc. | behavior | `behavior/tool-orchestrator.test.ts` | Isolated utility functions |
+
+---
+
+## Pipeline tests (cross-cutting)
+
+These tests exercise multiple modules end-to-end and don't map to a single function:
+
+| Pipeline | File |
+|----------|------|
+| Streaming: events -> guards -> transformers -> consumer | `pipelines/streaming-pipeline.test.ts` |
+| Tool execution: create -> dispatch -> validate -> execute -> format | `pipelines/tool-execution-pipeline.test.ts` |
+| Context: build -> resolve -> store -> execute | `pipelines/context-pipeline.test.ts` |
+| Stop conditions: results -> evaluate -> decision | `pipelines/stop-condition-pipeline.test.ts` |
+| Dual-format output: same response -> chat + Claude + items | `pipelines/dual-format-output.test.ts` |
+| Claude conversion deep: multi-item -> per-item routing -> blocks | `pipelines/claude-conversion-deep.test.ts` |
+| Next-turn params: tool results -> compute -> apply to request | `pipelines/next-turn-params-pipeline.test.ts` |
+| Async resolution: resolve -> apply -> evaluate stop | `pipelines/async-resolution-pipeline.test.ts` |
+| Orchestrator chain: execute -> map -> summarize -> check errors | `pipelines/orchestrator-utility-chain.test.ts` |
+| Approval -> execution -> state: partition -> execute -> format | `pipelines/approval-execution-state.test.ts` |
+| Format round-trip: Claude and Chat bidirectional conversion | `pipelines/format-round-trip.test.ts` |
+
+## Composition tests (two-module connection)
+
+| Connection | File |
+|------------|------|
+| tool() -> type guards / convertToolsToAPIFormat | `composition/tool-lifecycle.test.ts` |
+| ReusableReadableStream -> multiple consumers | `composition/stream-data-pipeline.test.ts` |
+| executeNextTurnParamsFunctions -> applyNextTurnParamsToRequest | `composition/next-turn-params-flow.test.ts` |
+| toChatMessage -> fromChatMessages (format round-trip) | `composition/format-compatibility.test.ts` |
+| buildToolExecuteContext -> tool execute | `composition/context-flow.test.ts` |
+| appendToMessages -> state update | `composition/input-normalization.test.ts` |
+| createInitialState -> updateState | `composition/state-machine.test.ts` |
+| orchestrator utilities -> executor results | `composition/orchestrator-executor.test.ts` |
diff --git a/tests/behavior/README.md b/tests/behavior/README.md
new file mode 100644
index 0000000..7c4c9e6
--- /dev/null
+++ b/tests/behavior/README.md
@@ -0,0 +1,26 @@
+# Behavior Tests
+
+Tests in this folder verify that each SDK capability works as promised **in isolation**. No comparison to similar capabilities, no cross-module composition — just: does this function do what its contract says?
+
+## What belongs here
+
+- Happy-path execution of individual functions
+- Error cases and edge cases for a single function
+- Return shape and type verification
+- Input validation (valid and invalid)
+- Default values and optional parameter handling
+- Extendable: when new SDK capabilities are added, their isolated behavior tests go here
+
+## Examples
+
+- `tool()` factory produces the correct structure for each tool type
+- `validateToolInput` accepts valid data and rejects invalid data
+- `ReusableReadableStream` delivers items in order to a single consumer
+- `createInitialState()` returns the expected shape with timestamps
+- `resolveContext` handles static objects, functions, async functions, and undefined
+
+## What does NOT belong here
+
+- Tests comparing two similar functions (→ `boundaries/`)
+- Tests where one module's output feeds another's input (→ `composition/`)
+- End-to-end workflows (→ `pipelines/`)
diff --git a/tests/behavior/async-params.test.ts b/tests/behavior/async-params.test.ts
new file mode 100644
index 0000000..b7720fb
--- /dev/null
+++ b/tests/behavior/async-params.test.ts
@@ -0,0 +1,113 @@
+import { describe, expect, it } from 'vitest';
+import { hasAsyncFunctions, resolveAsyncFunctions } from '../../src/lib/async-params.js';
+import type { TurnContext } from '../../src/lib/tool-types.js';
+import { makeCallModelInput, TEST_MODEL } from '../test-constants.js';
+
+const turnCtx: TurnContext = {
+  numberOfTurns: 2,
+};
+
+describe('async params - resolveAsyncFunctions', () => {
+  it('passes through static values unchanged', async () => {
+    const input = makeCallModelInput({
+      model: TEST_MODEL,
+      temperature: 0.7,
+      input: 'hi',
+    });
+    const result = await resolveAsyncFunctions(input, turnCtx);
+    expect(result.model).toBe(TEST_MODEL);
+    expect(result.temperature).toBe(0.7);
+  });
+
+  it('resolves sync function fields with turnContext', async () => {
+    const input = makeCallModelInput({
+      model: TEST_MODEL,
+      temperature: (ctx: TurnContext) => ctx.numberOfTurns * 0.1,
+      input: 'test',
+    });
+    const result = await resolveAsyncFunctions(input, turnCtx);
+    expect(result.temperature).toBeCloseTo(0.2);
+  });
+
+  it('resolves async function fields with turnContext', async () => {
+    const input = makeCallModelInput({
+      model: TEST_MODEL,
+      temperature: async (ctx: TurnContext) => ctx.numberOfTurns * 0.15,
+      input: 'test',
+    });
+    const result = await resolveAsyncFunctions(input, turnCtx);
+    expect(result.temperature).toBeCloseTo(0.3);
+  });
+
+  it('strips client-only fields (stopWhen, state, requireApproval, context, etc.)', async () => {
+    const input = makeCallModelInput({
+      model: TEST_MODEL,
+      input: 'test',
+      stopWhen: () => true,
+      state: {},
+      requireApproval: () => false,
+      context: {},
+    });
+    const result = await resolveAsyncFunctions(input, turnCtx);
+    expect(result).not.toHaveProperty('stopWhen');
+    expect(result).not.toHaveProperty('state');
+    expect(result).not.toHaveProperty('requireApproval');
+    expect(result).not.toHaveProperty('context');
+  });
+
+  it('wraps field resolution errors with field name', async () => {
+    const input = makeCallModelInput({
+      model: TEST_MODEL,
+      temperature: () => {
+        throw new Error('compute failed');
+      },
+      input: 'test',
+    });
+    await expect(resolveAsyncFunctions(input, turnCtx)).rejects.toThrow(/temperature/);
+  });
+});
+
+describe('async params - hasAsyncFunctions', () => {
+  it('returns true when any field is a function', () => {
+    expect(
+      hasAsyncFunctions({
+        model: TEST_MODEL,
+        temperature: () => 0.5,
+      }),
+    ).toBe(true);
+  });
+
+  it('returns false when all fields are static values', () => {
+    expect(
+      hasAsyncFunctions({
+        model: TEST_MODEL,
+        temperature: 0.5,
+      }),
+    ).toBe(false);
+  });
+
+  it('returns false for null input', () => {
+    expect(hasAsyncFunctions(null)).toBe(false);
+  });
+
+  it('returns false for undefined input', () => {
+    expect(hasAsyncFunctions(undefined)).toBe(false);
+  });
+
+  it('returns false for non-object input', () => {
+    expect(hasAsyncFunctions('string')).toBe(false);
+  });
+
+  it('returns true when nested function detected', () => {
+    expect(
+      hasAsyncFunctions({
+        a: 1,
+        b: () => 2,
+      }),
+    ).toBe(true);
+  });
+
+  it('returns false for empty object', () => {
+    expect(hasAsyncFunctions({})).toBe(false);
+  });
+});
diff --git a/tests/behavior/async-resolution-pipeline.test.ts b/tests/behavior/async-resolution-pipeline.test.ts
new file mode 100644
index 0000000..7a77636
--- /dev/null
+++ b/tests/behavior/async-resolution-pipeline.test.ts
@@ -0,0 +1,32 @@
+import { describe, expect, it } from 'vitest';
+
+import { resolveAsyncFunctions } from '../../src/lib/async-params.js';
+import { stepCountIs } from '../../src/lib/stop-conditions.js';
+import { makeCallModelInput, makeTurnContext, TEST_MODEL } from '../test-constants.js';
+
+describe('Async resolution + clean API request', () => {
+  it('mixed input: static model, function temperature, client-only stopWhen -> three paths verified in one call', async () => {
+    const turnCtx = makeTurnContext({
+      numberOfTurns: 2,
+    });
+
+    const result = await resolveAsyncFunctions(
+      makeCallModelInput({
+        model: TEST_MODEL,
+        temperature: (ctx: { numberOfTurns: number }) => ctx.numberOfTurns * 0.1,
+        stopWhen: stepCountIs(5),
+        input: 'hello',
+      }),
+      turnCtx,
+    );
+
+    // Static: preserved
+    expect(result.model).toBe(TEST_MODEL);
+    // Function: resolved
+    expect(result.temperature).toBe(0.2);
+    // Client-only: stripped
+    expect(result).not.toHaveProperty('stopWhen');
+    // Static: preserved
+    expect(result.input).toBe('hello');
+  });
+});
diff --git a/tests/behavior/claude-conversion-annotations.test.ts b/tests/behavior/claude-conversion-annotations.test.ts
new file mode 100644
index 0000000..854c7c4
--- /dev/null
+++ b/tests/behavior/claude-conversion-annotations.test.ts
@@ -0,0 +1,68 @@
+import { describe, expect, it } from 'vitest';
+
+import { convertToClaudeMessage } from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+describe('convertToClaudeMessage annotation handling', () => {
+  it('annotations: text with file_citation + url_citation + file_path -> each produces its distinct citation', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Here is the answer',
+              annotations: [
+                {
+                  type: 'file_citation',
+                  fileId: 'f1',
+                  filename: 'doc.pdf',
+                  index: 0,
+                },
+                {
+                  type: 'url_citation',
+                  url: 'https://example.com',
+                  title: 'Example',
+                  startIndex: 0,
+                  endIndex: 10,
+                },
+                {
+                  type: 'file_path',
+                  fileId: 'f2',
+                  filePath: '/tmp/out.txt',
+                },
+              ],
+            },
+          ],
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Here is the answer',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const claude = convertToClaudeMessage(response);
+    const textBlock = claude.content.find((b: { type: string }) => b.type === 'text') as
+      | {
+          type: string;
+          text: string;
+          citations?: unknown[];
+        }
+      | undefined;
+    expect(textBlock).toBeDefined();
+    // Should have citations
+    if (textBlock?.citations) {
+      expect(textBlock.citations.length).toBeGreaterThan(0);
+    }
+  });
+});
diff --git a/tests/behavior/consume-stream-completion.test.ts b/tests/behavior/consume-stream-completion.test.ts
new file mode 100644
index 0000000..dc32204
--- /dev/null
+++ b/tests/behavior/consume-stream-completion.test.ts
@@ -0,0 +1,80 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import { consumeStreamForCompletion } from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+describe('consumeStreamForCompletion - completion vs failure distinction', () => {
+  it('response.completed event -> returns the response', async () => {
+    const response = {
+      id: 'r1',
+      status: 'completed',
+      output: [],
+    };
+    const stream = makeStream([
+      {
+        type: 'response.output_text.delta',
+        delta: 'hello',
+      },
+      {
+        type: 'response.completed',
+        response,
+      },
+    ]);
+    const result = await consumeStreamForCompletion(stream);
+    expect(result).toEqual(response);
+  });
+
+  it('response.incomplete event -> returns the incomplete response', async () => {
+    const response = {
+      id: 'r1',
+      status: 'incomplete',
+      output: [],
+    };
+    const stream = makeStream([
+      {
+        type: 'response.incomplete',
+        response,
+      },
+    ]);
+    const result = await consumeStreamForCompletion(stream);
+    expect(result).toEqual(response);
+  });
+
+  it('response.failed event -> throws', async () => {
+    const stream = makeStream([
+      {
+        type: 'response.failed',
+        response: {
+          error: {
+            message: 'rate limited',
+          },
+        },
+      },
+    ]);
+    await expect(consumeStreamForCompletion(stream)).rejects.toThrow('Response failed');
+  });
+
+  it('stream ends without completion event -> throws', async () => {
+    const stream = makeStream([
+      {
+        type: 'response.output_text.delta',
+        delta: 'hello',
+      },
+    ]);
+    await expect(consumeStreamForCompletion(stream)).rejects.toThrow(
+      'Stream ended without completion event',
+    );
+  });
+});
diff --git a/tests/behavior/conversation-state-format.test.ts b/tests/behavior/conversation-state-format.test.ts
new file mode 100644
index 0000000..f2f6048
--- /dev/null
+++ b/tests/behavior/conversation-state-format.test.ts
@@ -0,0 +1,29 @@
+import { describe, expect, it } from 'vitest';
+
+import { appendToMessages } from '../../src/lib/conversation-state.js';
+
+describe('Conversation state -> format conversion', () => {
+  it('appendToMessages with normalizeInputToArray -> string input produces correct array for API', () => {
+    const existing = [
+      {
+        role: 'user' as const,
+        content: 'first message',
+      },
+    ];
+
+    const newItem = {
+      role: 'user' as const,
+      content: 'second message',
+    };
+    const result = appendToMessages(existing, [
+      newItem,
+    ]);
+    expect(result).toHaveLength(2);
+    expect(result[0]).toEqual({
+      role: 'user',
+      content: 'first message',
+    });
+    expect(result[1]).toHaveProperty('role', 'user');
+    expect(result[1]).toHaveProperty('content', 'second message');
+  });
+});
diff --git a/tests/behavior/conversation-state.test.ts b/tests/behavior/conversation-state.test.ts
new file mode 100644
index 0000000..2a44288
--- /dev/null
+++ b/tests/behavior/conversation-state.test.ts
@@ -0,0 +1,191 @@
+import type * as models from '@openrouter/sdk/models';
+import { describe, expect, it } from 'vitest';
+import {
+  appendToMessages,
+  createInitialState,
+  createRejectedResult,
+  createUnsentResult,
+  extractTextFromResponse,
+  generateConversationId,
+  unsentResultsToAPIFormat,
+  updateState,
+} from '../../src/lib/conversation-state.js';
+import { makeResponse } from '../test-constants.js';
+
+describe('conversation state - createInitialState', () => {
+  it('creates state with generated id, empty messages, in_progress status', () => {
+    const state = createInitialState();
+    expect(state.id).toMatch(/^conv_/);
+    expect(state.messages).toEqual([]);
+    expect(state.status).toBe('in_progress');
+    expect(state.createdAt).toBeTypeOf('number');
+    expect(state.updatedAt).toBeTypeOf('number');
+  });
+
+  it('uses provided custom id', () => {
+    const state = createInitialState('custom_123');
+    expect(state.id).toBe('custom_123');
+  });
+});
+
+describe('conversation state - updateState', () => {
+  it('merges updates and bumps updatedAt timestamp', () => {
+    const state = createInitialState('s1');
+    const before = state.updatedAt;
+    const updated = updateState(state, {
+      status: 'completed',
+    });
+    expect(updated.status).toBe('completed');
+    expect(updated.id).toBe('s1');
+    expect(updated.updatedAt).toBeGreaterThanOrEqual(before);
+  });
+
+  it('preserves id and createdAt from original state', () => {
+    const state = createInitialState('s2');
+    const updated = updateState(state, {
+      messages: [
+        {
+          role: 'user',
+          content: 'hi',
+        },
+      ],
+    });
+    expect(updated.id).toBe('s2');
+    expect(updated.createdAt).toBe(state.createdAt);
+  });
+});
+
+describe('conversation state - appendToMessages', () => {
+  it('appends new items to existing array input', () => {
+    const current: models.InputsUnion = [
+      {
+        role: 'user',
+        content: 'hello',
+      },
+    ];
+    const result = appendToMessages(current, [
+      {
+        role: 'assistant',
+        content: 'hi',
+      },
+    ]);
+    expect(result).toHaveLength(2);
+  });
+
+  it('converts string input to array then appends', () => {
+    const result = appendToMessages('hello', [
+      {
+        role: 'assistant',
+        content: 'hi',
+      },
+    ]);
+    expect(result).toHaveLength(2);
+    expect(result[0]).toHaveProperty('role', 'user');
+  });
+});
+
+describe('conversation state - generateConversationId', () => {
+  it('returns string starting with conv_', () => {
+    const id = generateConversationId();
+    expect(id).toMatch(/^conv_/);
+  });
+
+  it('generates unique ids on successive calls', () => {
+    const ids = new Set(
+      Array.from(
+        {
+          length: 10,
+        },
+        () => generateConversationId(),
+      ),
+    );
+    expect(ids.size).toBe(10);
+  });
+});
+
+describe('conversation state - unsent results', () => {
+  it('createUnsentResult builds valid result with callId, name, output', () => {
+    const result = createUnsentResult('c1', 'test', {
+      data: 42,
+    });
+    expect(result.callId).toBe('c1');
+    expect(result.name).toBe('test');
+    expect(result.output).toEqual({
+      data: 42,
+    });
+  });
+
+  it('createRejectedResult builds result with error message', () => {
+    const result = createRejectedResult('c2', 'test', 'not allowed');
+    expect(result.callId).toBe('c2');
+    expect(result.output).toBeNull();
+    expect(result.error).toBe('not allowed');
+  });
+
+  it('createRejectedResult uses default rejection message', () => {
+    const result = createRejectedResult('c3', 'test');
+    expect(result.error).toContain('rejected');
+  });
+
+  it('unsentResultsToAPIFormat converts to FunctionCallOutputItem array', () => {
+    const results = [
+      createUnsentResult('c1', 'test', {
+        data: 1,
+      }),
+    ];
+    const api = unsentResultsToAPIFormat(results);
+    expect(api).toHaveLength(1);
+    expect(api[0]!.type).toBe('function_call_output');
+    expect(api[0]!.callId).toBe('c1');
+    expect(typeof api[0]!.output).toBe('string');
+  });
+});
+
+describe('conversation state - response extraction', () => {
+  it('extractTextFromResponse extracts text from message output items', () => {
+    const response = makeResponse({
+      id: 'r1',
+      output: [
+        {
+          type: 'message',
+          content: [
+            {
+              type: 'output_text',
+              text: 'Hello ',
+            },
+          ],
+        },
+        {
+          type: 'message',
+          content: [
+            {
+              type: 'output_text',
+              text: 'World',
+            },
+          ],
+        },
+      ],
+      parallel_tool_calls: false,
+      status: 'completed',
+      usage: null,
+      error: null,
+      incomplete_details: null,
+      created_at: 0,
+    });
+    expect(extractTextFromResponse(response)).toBe('Hello World');
+  });
+
+  it('extractTextFromResponse returns empty string for no output', () => {
+    const response = makeResponse({
+      id: 'r1',
+      output: [],
+      parallel_tool_calls: false,
+      status: 'completed',
+      usage: null,
+      error: null,
+      incomplete_details: null,
+      created_at: 0,
+    });
+    expect(extractTextFromResponse(response)).toBe('');
+  });
+});
diff --git a/tests/behavior/format-compatibility.test.ts b/tests/behavior/format-compatibility.test.ts
new file mode 100644
index 0000000..587729a
--- /dev/null
+++ b/tests/behavior/format-compatibility.test.ts
@@ -0,0 +1,50 @@
+import { describe, expect, it } from 'vitest';
+import { toClaudeMessage } from '../../src/lib/anthropic-compat.js';
+import { toChatMessage } from '../../src/lib/chat-compat.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+function makeResponse(text: string) {
+  return {
+    id: 'r1',
+    output: [
+      {
+        type: 'message' as const,
+        id: 'm1',
+        role: 'assistant' as const,
+        status: 'completed' as const,
+        content: [
+          {
+            type: 'output_text' as const,
+            text,
+            annotations: [],
+          },
+        ],
+      },
+    ],
+    status: 'completed' as const,
+    outputText: text,
+    model: TEST_MODEL,
+    usage: {
+      totalTokens: 100,
+      inputTokens: 50,
+      outputTokens: 50,
+    },
+  };
+}
+
+describe('Format compatibility: compat layers -> stream-transformers', () => {
+  it('toChatMessage delegates to extractMessageFromResponse -> returns ChatAssistantMessage', () => {
+    const response = makeResponse('Hello world');
+    const chatMsg = toChatMessage(response);
+    expect(chatMsg.role).toBe('assistant');
+    expect(chatMsg.content).toBe('Hello world');
+  });
+
+  it('toClaudeMessage delegates to convertToClaudeMessage -> returns ClaudeMessage', () => {
+    const response = makeResponse('Hello world');
+    const claudeMsg = toClaudeMessage(response);
+    expect(claudeMsg.role).toBe('assistant');
+    expect(claudeMsg.content).toBeDefined();
+    expect(Array.isArray(claudeMsg.content)).toBe(true);
+  });
+});
diff --git a/tests/behavior/input-normalization.test.ts b/tests/behavior/input-normalization.test.ts
new file mode 100644
index 0000000..b973f88
--- /dev/null
+++ b/tests/behavior/input-normalization.test.ts
@@ -0,0 +1,26 @@
+import { describe, expect, it } from 'vitest';
+
+import { appendToMessages } from '../../src/lib/conversation-state.js';
+
+describe('Input normalization: turn-context -> conversation-state', () => {
+  it('appendToMessages with string input normalizes to array before append', () => {
+    const existing = 'first message';
+    const newItem = {
+      role: 'user' as const,
+      content: 'second message',
+    };
+    const result = appendToMessages(existing, [
+      newItem,
+    ]);
+
+    expect(result.length).toBeGreaterThan(1);
+    // First item is normalized from string
+    const firstItem = result[0]!;
+    expect(firstItem).toHaveProperty('role', 'user');
+    expect(firstItem).toHaveProperty('content', 'first message');
+    // Second item is the appended message
+    const lastItem = result[result.length - 1]!;
+    expect(lastItem).toHaveProperty('role', 'user');
+    expect(lastItem).toHaveProperty('content', 'second message');
+  });
+});
diff --git a/tests/behavior/next-turn-params.test.ts b/tests/behavior/next-turn-params.test.ts
new file mode 100644
index 0000000..45b96f6
--- /dev/null
+++ b/tests/behavior/next-turn-params.test.ts
@@ -0,0 +1,222 @@
+import type * as models from '@openrouter/sdk/models';
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import {
+  applyNextTurnParamsToRequest,
+  buildNextTurnParamsContext,
+  executeNextTurnParamsFunctions,
+} from '../../src/lib/next-turn-params.js';
+import { tool } from '../../src/lib/tool.js';
+import type { ParsedToolCall, Tool } from '../../src/lib/tool-types.js';
+import { makeRequest, TEST_MODEL } from '../test-constants.js';
+
+describe('next-turn params - buildNextTurnParamsContext', () => {
+  it('extracts relevant fields from request', () => {
+    const request: models.ResponsesRequest = {
+      model: TEST_MODEL,
+      input: 'hello',
+      temperature: 0.7,
+      maxOutputTokens: 1000,
+    };
+    const ctx = buildNextTurnParamsContext(request);
+    expect(ctx.model).toBe(TEST_MODEL);
+    expect(ctx.input).toBe('hello');
+    expect(ctx.temperature).toBe(0.7);
+    expect(ctx.maxOutputTokens).toBe(1000);
+  });
+
+  it('defaults missing fields to null/empty', () => {
+    const request = makeRequest({
+      model: undefined,
+      input: undefined,
+    });
+    const ctx = buildNextTurnParamsContext(request);
+    expect(ctx.model).toBe('');
+    expect(ctx.temperature).toBeNull();
+    expect(ctx.maxOutputTokens).toBeNull();
+    expect(ctx.models).toEqual([]);
+  });
+});
+
+describe('next-turn params - executeNextTurnParamsFunctions', () => {
+  it('executes temperature function and returns computed value', async () => {
+    const t = tool({
+      name: 'search',
+      inputSchema: z.object({
+        query: z.string(),
+      }),
+      nextTurnParams: {
+        temperature: () => 0.2 as number | null,
+      },
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'search',
+      arguments: {
+        query: 'test',
+      },
+    };
+    const request = makeRequest({
+      model: TEST_MODEL,
+      input: 'hello',
+    });
+    const result = await executeNextTurnParamsFunctions(
+      [
+        tc,
+      ],
+      [
+        t,
+      ],
+      request,
+    );
+    expect(result.temperature).toBe(0.2);
+  });
+
+  it('returns empty object when no tools have nextTurnParams', async () => {
+    const t = tool({
+      name: 'basic',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'basic',
+      arguments: {},
+    };
+    const result = await executeNextTurnParamsFunctions(
+      [
+        tc,
+      ],
+      [
+        t,
+      ],
+      makeRequest({}),
+    );
+    expect(Object.keys(result)).toHaveLength(0);
+  });
+
+  it('skips tools not in toolCalls array', async () => {
+    const t1 = tool({
+      name: 'a',
+      inputSchema: z.object({}),
+      nextTurnParams: {
+        temperature: () => 0.1 as number | null,
+      },
+      execute: async () => ({}),
+    });
+    const t2 = tool({
+      name: 'b',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'b',
+      arguments: {},
+    };
+    const result = await executeNextTurnParamsFunctions(
+      [
+        tc,
+      ],
+      [
+        t1,
+        t2,
+      ],
+      makeRequest({}),
+    );
+    expect(result.temperature).toBeUndefined();
+  });
+
+  it('composes functions from multiple tools in order', async () => {
+    const t1 = tool({
+      name: 'first',
+      inputSchema: z.object({}),
+      nextTurnParams: {
+        temperature: (_p, ctx) => (ctx.temperature ?? 0) + 0.1,
+      },
+      execute: async () => ({}),
+    });
+    const t2 = tool({
+      name: 'second',
+      inputSchema: z.object({}),
+      nextTurnParams: {
+        temperature: (_p, ctx) => (ctx.temperature ?? 0) + 0.2,
+      },
+      execute: async () => ({}),
+    });
+    const tc1: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'first',
+      arguments: {},
+    };
+    const tc2: ParsedToolCall<Tool> = {
+      id: 'c2',
+      name: 'second',
+      arguments: {},
+    };
+    const request = {
+      temperature: 0.5,
+    };
+    const result = await executeNextTurnParamsFunctions(
+      [
+        tc1,
+        tc2,
+      ],
+      [
+        t1,
+        t2,
+      ],
+      request,
+    );
+    expect(result.temperature).toBeCloseTo(0.8);
+  });
+});
+
+describe('next-turn params - applyNextTurnParamsToRequest', () => {
+  it('merges computed params into request', () => {
+    const request = makeRequest({
+      model: TEST_MODEL,
+      temperature: 0.7,
+      input: 'test',
+    });
+    const computed = {
+      temperature: 0.2 as number | null,
+    };
+    const result = applyNextTurnParamsToRequest(request, computed);
+    expect(result.temperature).toBe(0.2);
+    expect(result.model).toBe(TEST_MODEL);
+  });
+
+  it('converts null values to undefined for API compatibility', () => {
+    const request = makeRequest({
+      model: TEST_MODEL,
+    });
+    const computed = {
+      temperature: null,
+    };
+    const result = applyNextTurnParamsToRequest(request, computed);
+    expect(result.temperature).toBeUndefined();
+  });
+
+  it('returns new object without mutating original', () => {
+    const request = makeRequest({
+      model: TEST_MODEL,
+      temperature: 0.7,
+    });
+    const result = applyNextTurnParamsToRequest(request, {
+      temperature: 0.2,
+    });
+    expect(request.temperature).toBe(0.7);
+    expect(result.temperature).toBe(0.2);
+  });
+
+  it('handles empty computed params', () => {
+    const request = makeRequest({
+      model: TEST_MODEL,
+      temperature: 0.7,
+    });
+    const result = applyNextTurnParamsToRequest(request, {});
+    expect(result.temperature).toBe(0.7);
+  });
+});
diff --git a/tests/behavior/orchestrator-utility-chain.test.ts b/tests/behavior/orchestrator-utility-chain.test.ts
new file mode 100644
index 0000000..17da20e
--- /dev/null
+++ b/tests/behavior/orchestrator-utility-chain.test.ts
@@ -0,0 +1,53 @@
+import { describe, expect, it } from 'vitest';
+import {
+  getToolExecutionErrors,
+  hasToolExecutionErrors,
+  summarizeToolExecutions,
+  toolResultsToMap,
+} from '../../src/lib/tool-orchestrator.js';
+import type { Tool, ToolExecutionResult } from '../../src/lib/tool-types.js';
+
+describe('Orchestrator utility chain', () => {
+  it('mixed results: one success + one failure -> toolResultsToMap -> hasToolExecutionErrors -> getToolExecutionErrors -> summarizeToolExecutions', () => {
+    const successResult: ToolExecutionResult<Tool> = {
+      toolCallId: 'tc_1',
+      toolName: 'search',
+      result: {
+        data: 'found',
+      },
+    };
+
+    const failureResult: ToolExecutionResult<Tool> = {
+      toolCallId: 'tc_2',
+      toolName: 'delete',
+      result: null,
+      error: new Error('Permission denied'),
+    };
+
+    const results = [
+      successResult,
+      failureResult,
+    ];
+
+    // Step 1: Map results
+    const map = toolResultsToMap(results);
+    expect(map.size).toBe(2);
+    expect(map.get('tc_1')).toBeDefined();
+    expect(map.get('tc_2')).toBeDefined();
+
+    // Step 2: Check for errors
+    expect(hasToolExecutionErrors(results)).toBe(true);
+
+    // Step 3: Get errors
+    const errors = getToolExecutionErrors(results);
+    expect(errors).toHaveLength(1);
+    expect(errors[0]!.message).toBe('Permission denied');
+
+    // Step 4: Summarize
+    const summary = summarizeToolExecutions(results);
+    expect(summary).toContain('search');
+    expect(summary).toContain('SUCCESS');
+    expect(summary).toContain('delete');
+    expect(summary).toContain('Permission denied');
+  });
+});
diff --git a/tests/behavior/reusable-stream.test.ts b/tests/behavior/reusable-stream.test.ts
new file mode 100644
index 0000000..be6ab22
--- /dev/null
+++ b/tests/behavior/reusable-stream.test.ts
@@ -0,0 +1,204 @@
+import { describe, expect, it } from 'vitest';
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+
+function makeStream<T>(values: T[]): ReadableStream<T> {
+  return new ReadableStream<T>({
+    start(controller) {
+      for (const v of values) {
+        controller.enqueue(v);
+      }
+      controller.close();
+    },
+  });
+}
+
+function makeDelayedStream<T>(values: T[], delayMs = 5): ReadableStream<T> {
+  return new ReadableStream<T>({
+    async start(controller) {
+      for (const v of values) {
+        await new Promise((r) => setTimeout(r, delayMs));
+        controller.enqueue(v);
+      }
+      controller.close();
+    },
+  });
+}
+
+async function collect<T>(iter: AsyncIterableIterator<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const v of iter) {
+    result.push(v);
+  }
+  return result;
+}
+
+describe('reusable stream - single consumer', () => {
+  it('single consumer reads all values from source', async () => {
+    const rrs = new ReusableReadableStream(
+      makeStream([
+        1,
+        2,
+        3,
+      ]),
+    );
+    const values = await collect(rrs.createConsumer());
+    expect(values).toEqual([
+      1,
+      2,
+      3,
+    ]);
+  });
+
+  it('empty source stream yields no values', async () => {
+    const rrs = new ReusableReadableStream(makeStream([]));
+    const values = await collect(rrs.createConsumer());
+    expect(values).toEqual([]);
+  });
+});
+
+describe('reusable stream - multiple consumers', () => {
+  it('two consumers independently read the same values', async () => {
+    const rrs = new ReusableReadableStream(
+      makeStream([
+        10,
+        20,
+        30,
+      ]),
+    );
+    const c1 = rrs.createConsumer();
+    const c2 = rrs.createConsumer();
+    const [v1, v2] = await Promise.all([
+      collect(c1),
+      collect(c2),
+    ]);
+    expect(v1).toEqual([
+      10,
+      20,
+      30,
+    ]);
+    expect(v2).toEqual([
+      10,
+      20,
+      30,
+    ]);
+  });
+
+  it('late-joining consumer gets all buffered values plus new ones', async () => {
+    const rrs = new ReusableReadableStream(
+      makeDelayedStream(
+        [
+          1,
+          2,
+          3,
+          4,
+        ],
+        5,
+      ),
+    );
+    const c1 = rrs.createConsumer();
+    // Let first consumer read a bit
+    const first = await c1.next();
+    expect(first.done).toBe(false);
+    // Join late
+    const c2 = rrs.createConsumer();
+    const [remaining1, values2] = await Promise.all([
+      collect(c1),
+      collect(c2),
+    ]);
+    // c1 already read first value, so remaining has rest
+    expect(remaining1.length).toBeGreaterThanOrEqual(2);
+    // c2 should have all values
+    expect(values2).toEqual([
+      1,
+      2,
+      3,
+      4,
+    ]);
+  });
+});
+
+describe('reusable stream - error propagation', () => {
+  it('propagates source error to consumer', async () => {
+    let controllerRef: ReadableStreamDefaultController<number>;
+    const errorStream = new ReadableStream<number>({
+      start(controller) {
+        controllerRef = controller;
+        controller.enqueue(1);
+      },
+      pull() {
+        controllerRef!.error(new Error('source error'));
+      },
+    });
+    const rrs = new ReusableReadableStream(errorStream);
+    const consumer = rrs.createConsumer();
+    const first = await consumer.next();
+    expect(first.value).toBe(1);
+    await expect(consumer.next()).rejects.toThrow('source error');
+  });
+});
+
+describe('reusable stream - cancellation', () => {
+  it('cancel() stops all consumers', async () => {
+    const rrs = new ReusableReadableStream(
+      makeDelayedStream(
+        [
+          1,
+          2,
+          3,
+          4,
+          5,
+        ],
+        50,
+      ),
+    );
+    const c1 = rrs.createConsumer();
+    const first = await c1.next();
+    expect(first.done).toBe(false);
+    await rrs.cancel();
+    const next = await c1.next();
+    expect(next.done).toBe(true);
+  });
+
+  it('consumer.return() cancels that consumer only', async () => {
+    const rrs = new ReusableReadableStream(
+      makeStream([
+        1,
+        2,
+        3,
+      ]),
+    );
+    const c1 = rrs.createConsumer();
+    const c2 = rrs.createConsumer();
+    await c1.return!();
+    const result = await c1.next();
+    expect(result.done).toBe(true);
+    // c2 should still work
+    const values = await collect(c2);
+    expect(values).toEqual([
+      1,
+      2,
+      3,
+    ]);
+  });
+});
+
+describe('reusable stream - async iteration protocol', () => {
+  it('supports for-await-of loop', async () => {
+    const rrs = new ReusableReadableStream(
+      makeStream([
+        'a',
+        'b',
+        'c',
+      ]),
+    );
+    const values: string[] = [];
+    for await (const v of rrs.createConsumer()) {
+      values.push(v);
+    }
+    expect(values).toEqual([
+      'a',
+      'b',
+      'c',
+    ]);
+  });
+});
diff --git a/tests/behavior/stop-conditions-evaluation.test.ts b/tests/behavior/stop-conditions-evaluation.test.ts
new file mode 100644
index 0000000..8237938
--- /dev/null
+++ b/tests/behavior/stop-conditions-evaluation.test.ts
@@ -0,0 +1,104 @@
+import { describe, expect, it } from 'vitest';
+import { hasToolCall, isStopConditionMet, stepCountIs } from '../../src/lib/stop-conditions.js';
+import type { StepResult } from '../../src/lib/tool-types.js';
+
+function makeStep(overrides: Partial<StepResult> = {}): StepResult {
+  return {
+    stepType: 'initial',
+    text: '',
+    toolCalls: [],
+    toolResults: [],
+    response: {
+      id: 'r1',
+      output: [],
+      parallel_tool_calls: false,
+      status: 'completed',
+      usage: null,
+      error: null,
+      incomplete_details: null,
+      created_at: 0,
+    },
+    ...overrides,
+  };
+}
+
+describe('stop conditions - isStopConditionMet evaluation', () => {
+  it('returns true when any condition is true (OR logic)', async () => {
+    const steps = [
+      makeStep(),
+      makeStep(),
+      makeStep(),
+    ];
+    const result = await isStopConditionMet({
+      stopConditions: [
+        stepCountIs(5),
+        stepCountIs(2),
+      ],
+      steps,
+    });
+    expect(result).toBe(true);
+  });
+
+  it('returns false when all conditions are false', async () => {
+    const steps = [
+      makeStep(),
+    ];
+    const result = await isStopConditionMet({
+      stopConditions: [
+        stepCountIs(5),
+        hasToolCall('missing'),
+      ],
+      steps,
+    });
+    expect(result).toBe(false);
+  });
+
+  it('handles empty conditions array (returns false)', async () => {
+    const result = await isStopConditionMet({
+      stopConditions: [],
+      steps: [
+        makeStep(),
+      ],
+    });
+    expect(result).toBe(false);
+  });
+
+  it('handles async stop conditions', async () => {
+    const asyncCondition = async ({ steps }: { readonly steps: ReadonlyArray<StepResult> }) => {
+      await new Promise((resolve) => setTimeout(resolve, 1));
+      return steps.length >= 2;
+    };
+    const result = await isStopConditionMet({
+      stopConditions: [
+        asyncCondition,
+      ],
+      steps: [
+        makeStep(),
+        makeStep(),
+      ],
+    });
+    expect(result).toBe(true);
+  });
+
+  it('evaluates conditions in parallel', async () => {
+    const order: number[] = [];
+    const slow = async () => {
+      await new Promise((r) => setTimeout(r, 20));
+      order.push(1);
+      return false;
+    };
+    const fast = async () => {
+      await new Promise((r) => setTimeout(r, 1));
+      order.push(2);
+      return true;
+    };
+    const result = await isStopConditionMet({
+      stopConditions: [
+        slow,
+        fast,
+      ],
+      steps: [],
+    });
+    expect(result).toBe(true);
+  });
+});
diff --git a/tests/behavior/stop-conditions-step-result.test.ts b/tests/behavior/stop-conditions-step-result.test.ts
new file mode 100644
index 0000000..5092743
--- /dev/null
+++ b/tests/behavior/stop-conditions-step-result.test.ts
@@ -0,0 +1,152 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  hasToolCall,
+  isStopConditionMet,
+  maxTokensUsed,
+  stepCountIs,
+} from '../../src/lib/stop-conditions.js';
+import { makeStep, makeTypedToolCalls, makeUsage } from '../test-constants.js';
+
+describe('Stop conditions + real StepResult shape', () => {
+  it('stepCountIs works with StepResult[] containing real usage and toolCalls data', () => {
+    const steps = [
+      makeStep({
+        toolCalls: makeTypedToolCalls([
+          {
+            name: 'search',
+            id: 'tc1',
+            arguments: {},
+          },
+        ]),
+        usage: makeUsage({
+          totalTokens: 100,
+          inputTokens: 50,
+          outputTokens: 50,
+        }),
+      }),
+      makeStep({
+        toolCalls: makeTypedToolCalls([
+          {
+            name: 'write',
+            id: 'tc2',
+            arguments: {},
+          },
+        ]),
+        usage: makeUsage({
+          totalTokens: 200,
+          inputTokens: 100,
+          outputTokens: 100,
+        }),
+      }),
+    ];
+    const condition = stepCountIs(2);
+    expect(
+      condition({
+        steps,
+      }),
+    ).toBe(true);
+  });
+
+  it('hasToolCall finds tool name inside StepResult.toolCalls array', () => {
+    const steps = [
+      makeStep({
+        toolCalls: makeTypedToolCalls([
+          {
+            name: 'search',
+            id: 'tc1',
+            arguments: {},
+          },
+          {
+            name: 'analyze',
+            id: 'tc2',
+            arguments: {},
+          },
+        ]),
+      }),
+    ];
+    expect(
+      hasToolCall('search')({
+        steps,
+      }),
+    ).toBe(true);
+    expect(
+      hasToolCall('analyze')({
+        steps,
+      }),
+    ).toBe(true);
+    expect(
+      hasToolCall('missing')({
+        steps,
+      }),
+    ).toBe(false);
+  });
+
+  it('maxTokensUsed reads from StepResult.usage.totalTokens', () => {
+    const steps = [
+      makeStep({
+        usage: makeUsage({
+          totalTokens: 500,
+          inputTokens: 250,
+          outputTokens: 250,
+        }),
+      }),
+      makeStep({
+        usage: makeUsage({
+          totalTokens: 600,
+          inputTokens: 300,
+          outputTokens: 300,
+        }),
+      }),
+    ];
+    expect(
+      maxTokensUsed(1000)({
+        steps,
+      }),
+    ).toBe(true);
+    expect(
+      maxTokensUsed(1200)({
+        steps,
+      }),
+    ).toBe(false);
+  });
+
+  it('isStopConditionMet evaluates multiple conditions against same StepResult[]', async () => {
+    const steps = [
+      makeStep({
+        toolCalls: makeTypedToolCalls([
+          {
+            name: 'search',
+            id: 'tc1',
+            arguments: {},
+          },
+        ]),
+        usage: makeUsage({
+          totalTokens: 100,
+          inputTokens: 50,
+          outputTokens: 50,
+        }),
+      }),
+    ];
+
+    // Neither condition met
+    const result1 = await isStopConditionMet({
+      stopConditions: [
+        stepCountIs(5),
+        hasToolCall('done'),
+      ],
+      steps,
+    });
+    expect(result1).toBe(false);
+
+    // One condition met (hasToolCall)
+    const result2 = await isStopConditionMet({
+      stopConditions: [
+        stepCountIs(5),
+        hasToolCall('search'),
+      ],
+      steps,
+    });
+    expect(result2).toBe(true);
+  });
+});
diff --git a/tests/behavior/stop-conditions.test.ts b/tests/behavior/stop-conditions.test.ts
new file mode 100644
index 0000000..9e2ece7
--- /dev/null
+++ b/tests/behavior/stop-conditions.test.ts
@@ -0,0 +1,521 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  finishReasonIs,
+  hasToolCall,
+  maxCost,
+  maxTokensUsed,
+  stepCountIs,
+} from '../../src/lib/stop-conditions.js';
+import { makeStep, makeTypedToolCalls, makeUsage } from '../test-constants.js';
+
+describe('stepCountIs(n) - behavior and dimension isolation', () => {
+  it('returns false when steps.length < n', () => {
+    const condition = stepCountIs(3);
+    expect(
+      condition({
+        steps: [
+          makeStep(),
+          makeStep(),
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('returns true when steps.length === n', () => {
+    const condition = stepCountIs(3);
+    expect(
+      condition({
+        steps: [
+          makeStep(),
+          makeStep(),
+          makeStep(),
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('returns true when steps.length > n', () => {
+    const condition = stepCountIs(2);
+    expect(
+      condition({
+        steps: [
+          makeStep(),
+          makeStep(),
+          makeStep(),
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('stepCountIs(0) always returns true', () => {
+    const condition = stepCountIs(0);
+    expect(
+      condition({
+        steps: [],
+      }),
+    ).toBe(true);
+    expect(
+      condition({
+        steps: [
+          makeStep(),
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('ignores tool names, tokens, cost, finishReason in steps', () => {
+    const condition = stepCountIs(1);
+    const step = makeStep({
+      toolCalls: makeTypedToolCalls([
+        {
+          name: 'search',
+          id: 'tc1',
+          arguments: {},
+        },
+      ]),
+      usage: makeUsage({
+        totalTokens: 9999,
+        inputTokens: 5000,
+        outputTokens: 4999,
+        cost: 100,
+      }),
+      finishReason: 'length',
+    });
+    // Only step count matters
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+});
+
+describe('hasToolCall(toolName) - behavior and dimension isolation', () => {
+  it('returns false when no steps have the named tool', () => {
+    const condition = hasToolCall('search');
+    const step = makeStep({
+      toolCalls: makeTypedToolCalls([
+        {
+          name: 'other',
+          id: 'tc1',
+          arguments: {},
+        },
+      ]),
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('returns true when any step has a matching tool call', () => {
+    const condition = hasToolCall('search');
+    const step1 = makeStep({
+      toolCalls: makeTypedToolCalls([
+        {
+          name: 'other',
+          id: 'tc1',
+          arguments: {},
+        },
+      ]),
+    });
+    const step2 = makeStep({
+      toolCalls: makeTypedToolCalls([
+        {
+          name: 'search',
+          id: 'tc2',
+          arguments: {},
+        },
+      ]),
+    });
+    expect(
+      condition({
+        steps: [
+          step1,
+          step2,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('returns false for different tool names', () => {
+    const condition = hasToolCall('search');
+    const step = makeStep({
+      toolCalls: makeTypedToolCalls([
+        {
+          name: 'Search',
+          id: 'tc1',
+          arguments: {},
+        },
+      ]),
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('handles step with multiple tool calls, one matching', () => {
+    const condition = hasToolCall('search');
+    const step = makeStep({
+      toolCalls: makeTypedToolCalls([
+        {
+          name: 'other',
+          id: 'tc1',
+          arguments: {},
+        },
+        {
+          name: 'search',
+          id: 'tc2',
+          arguments: {},
+        },
+      ]),
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('ignores step count, tokens, cost, finishReason', () => {
+    const condition = hasToolCall('search');
+    const step = makeStep({
+      toolCalls: makeTypedToolCalls([
+        {
+          name: 'search',
+          id: 'tc1',
+          arguments: {},
+        },
+      ]),
+      usage: makeUsage({
+        totalTokens: 9999,
+        inputTokens: 5000,
+        outputTokens: 4999,
+        cost: 100,
+      }),
+      finishReason: 'length',
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+});
+
+describe('maxTokensUsed(maxTokens) - behavior and dimension isolation', () => {
+  it('returns false when total tokens < threshold', () => {
+    const condition = maxTokensUsed(100);
+    const step = makeStep({
+      usage: makeUsage({
+        totalTokens: 50,
+        inputTokens: 25,
+        outputTokens: 25,
+      }),
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('returns true when total tokens >= threshold', () => {
+    const condition = maxTokensUsed(100);
+    const step = makeStep({
+      usage: makeUsage({
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      }),
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('accumulates tokens across multiple steps', () => {
+    const condition = maxTokensUsed(100);
+    const step1 = makeStep({
+      usage: makeUsage({
+        totalTokens: 60,
+        inputTokens: 30,
+        outputTokens: 30,
+      }),
+    });
+    const step2 = makeStep({
+      usage: makeUsage({
+        totalTokens: 50,
+        inputTokens: 25,
+        outputTokens: 25,
+      }),
+    });
+    expect(
+      condition({
+        steps: [
+          step1,
+          step2,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('steps with undefined usage -> treated as 0', () => {
+    const condition = maxTokensUsed(100);
+    const step = makeStep({
+      usage: undefined,
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('ignores step count, tool names, cost, finishReason', () => {
+    const condition = maxTokensUsed(100);
+    const step = makeStep({
+      toolCalls: makeTypedToolCalls([
+        {
+          name: 'search',
+          id: 'tc1',
+          arguments: {},
+        },
+      ]),
+      usage: makeUsage({
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+        cost: 999,
+      }),
+      finishReason: 'stop',
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+});
+
+describe('maxCost(maxCostInDollars) - behavior and dimension isolation', () => {
+  it('returns false when total cost < threshold', () => {
+    const condition = maxCost(1.0);
+    const step = makeStep({
+      usage: makeUsage({
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+        cost: 0.5,
+      }),
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('returns true when total cost >= threshold', () => {
+    const condition = maxCost(1.0);
+    const step = makeStep({
+      usage: makeUsage({
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+        cost: 1.0,
+      }),
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('accumulates cost across multiple steps', () => {
+    const condition = maxCost(1.0);
+    const step1 = makeStep({
+      usage: makeUsage({
+        totalTokens: 50,
+        inputTokens: 25,
+        outputTokens: 25,
+        cost: 0.6,
+      }),
+    });
+    const step2 = makeStep({
+      usage: makeUsage({
+        totalTokens: 50,
+        inputTokens: 25,
+        outputTokens: 25,
+        cost: 0.5,
+      }),
+    });
+    expect(
+      condition({
+        steps: [
+          step1,
+          step2,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('steps with undefined usage.cost -> treated as 0', () => {
+    const condition = maxCost(1.0);
+    const step = makeStep({
+      usage: undefined,
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('ignores step count, tool names, tokens, finishReason', () => {
+    const condition = maxCost(1.0);
+    const step = makeStep({
+      toolCalls: makeTypedToolCalls([
+        {
+          name: 'search',
+          id: 'tc1',
+          arguments: {},
+        },
+      ]),
+      usage: makeUsage({
+        totalTokens: 99999,
+        inputTokens: 50000,
+        outputTokens: 49999,
+        cost: 1.0,
+      }),
+      finishReason: 'length',
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+});
+
+describe('finishReasonIs(reason) - behavior and dimension isolation', () => {
+  it('returns false when no step has the specified reason', () => {
+    const condition = finishReasonIs('length');
+    const step = makeStep({
+      finishReason: 'stop',
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('returns true when any step has matching reason', () => {
+    const condition = finishReasonIs('length');
+    const step1 = makeStep({
+      finishReason: 'stop',
+    });
+    const step2 = makeStep({
+      finishReason: 'length',
+    });
+    expect(
+      condition({
+        steps: [
+          step1,
+          step2,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('matches "length" specifically', () => {
+    const condition = finishReasonIs('length');
+    const step = makeStep({
+      finishReason: 'length',
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+
+  it('steps with undefined finishReason -> not matched', () => {
+    const condition = finishReasonIs('length');
+    const step = makeStep({
+      finishReason: undefined,
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(false);
+  });
+
+  it('ignores step count, tool names, tokens, cost', () => {
+    const condition = finishReasonIs('length');
+    const step = makeStep({
+      toolCalls: makeTypedToolCalls([
+        {
+          name: 'search',
+          id: 'tc1',
+          arguments: {},
+        },
+      ]),
+      usage: makeUsage({
+        totalTokens: 99999,
+        inputTokens: 50000,
+        outputTokens: 49999,
+        cost: 999,
+      }),
+      finishReason: 'length',
+    });
+    expect(
+      condition({
+        steps: [
+          step,
+        ],
+      }),
+    ).toBe(true);
+  });
+});
diff --git a/tests/behavior/stream-type-guards-negative.test.ts b/tests/behavior/stream-type-guards-negative.test.ts
new file mode 100644
index 0000000..862e7fe
--- /dev/null
+++ b/tests/behavior/stream-type-guards-negative.test.ts
@@ -0,0 +1,133 @@
+import { describe, expect, it } from 'vitest';
+import {
+  isFunctionCallArgumentsDeltaEvent,
+  isFunctionCallArgumentsDoneEvent,
+  isFunctionCallItem,
+  isOutputItemAddedEvent,
+  isOutputItemDoneEvent,
+  isOutputMessage,
+  isOutputTextDeltaEvent,
+  isOutputTextPart,
+  isReasoningDeltaEvent,
+  isReasoningOutputItem,
+  isRefusalPart,
+  isResponseCompletedEvent,
+  isResponseFailedEvent,
+  isResponseIncompleteEvent,
+} from '../../src/lib/stream-type-guards.js';
+
+describe('stream event type guards - negative cases (reject wrong type)', () => {
+  it('isOutputTextDeltaEvent rejects reasoning delta', () => {
+    expect(
+      isOutputTextDeltaEvent({
+        type: 'response.reasoning_text.delta',
+      } as unknown as StreamEvents),
+    ).toBe(false);
+  });
+
+  it('isReasoningDeltaEvent rejects text delta', () => {
+    expect(
+      isReasoningDeltaEvent({
+        type: 'response.output_text.delta',
+      } as unknown as StreamEvents),
+    ).toBe(false);
+  });
+
+  it('isFunctionCallArgumentsDeltaEvent rejects text delta', () => {
+    expect(
+      isFunctionCallArgumentsDeltaEvent({
+        type: 'response.output_text.delta',
+      } as unknown as StreamEvents),
+    ).toBe(false);
+  });
+
+  it('isOutputItemAddedEvent rejects output_item.done', () => {
+    expect(
+      isOutputItemAddedEvent({
+        type: 'response.output_item.done',
+      } as unknown as StreamEvents),
+    ).toBe(false);
+  });
+
+  it('isOutputItemDoneEvent rejects output_item.added', () => {
+    expect(
+      isOutputItemDoneEvent({
+        type: 'response.output_item.added',
+      } as unknown as StreamEvents),
+    ).toBe(false);
+  });
+
+  it('isResponseCompletedEvent rejects response.failed', () => {
+    expect(
+      isResponseCompletedEvent({
+        type: 'response.failed',
+      } as unknown as StreamEvents),
+    ).toBe(false);
+  });
+
+  it('isResponseFailedEvent rejects response.completed', () => {
+    expect(
+      isResponseFailedEvent({
+        type: 'response.completed',
+      } as unknown as StreamEvents),
+    ).toBe(false);
+  });
+
+  it('isResponseIncompleteEvent rejects response.completed', () => {
+    expect(
+      isResponseIncompleteEvent({
+        type: 'response.completed',
+      } as unknown as StreamEvents),
+    ).toBe(false);
+  });
+
+  it('isFunctionCallArgumentsDoneEvent rejects function_call_arguments.delta', () => {
+    expect(
+      isFunctionCallArgumentsDoneEvent({
+        type: 'response.function_call_arguments.delta',
+      } as unknown as StreamEvents),
+    ).toBe(false);
+  });
+});
+
+describe('output item type guards - negative cases', () => {
+  it('isOutputMessage rejects function_call', () => {
+    expect(
+      isOutputMessage({
+        type: 'function_call',
+      }),
+    ).toBe(false);
+  });
+
+  it('isFunctionCallItem rejects message', () => {
+    expect(
+      isFunctionCallItem({
+        type: 'message',
+      }),
+    ).toBe(false);
+  });
+
+  it('isReasoningOutputItem rejects message', () => {
+    expect(
+      isReasoningOutputItem({
+        type: 'message',
+      }),
+    ).toBe(false);
+  });
+
+  it('isOutputTextPart rejects refusal', () => {
+    expect(
+      isOutputTextPart({
+        type: 'refusal',
+      }),
+    ).toBe(false);
+  });
+
+  it('isRefusalPart rejects output_text', () => {
+    expect(
+      isRefusalPart({
+        type: 'output_text',
+      }),
+    ).toBe(false);
+  });
+});
diff --git a/tests/behavior/tool-approval.test.ts b/tests/behavior/tool-approval.test.ts
new file mode 100644
index 0000000..0f4d309
--- /dev/null
+++ b/tests/behavior/tool-approval.test.ts
@@ -0,0 +1,314 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import { partitionToolCalls, toolRequiresApproval } from '../../src/lib/conversation-state.js';
+import { tool } from '../../src/lib/tool.js';
+import type { ParsedToolCall, Tool, TurnContext } from '../../src/lib/tool-types.js';
+import { hasApprovalRequiredTools, toolHasApprovalConfigured } from '../../src/lib/tool-types.js';
+
+const turnCtx: TurnContext = {
+  numberOfTurns: 1,
+};
+
+describe('tool approval - toolRequiresApproval', () => {
+  it('returns false when tool has no requireApproval', async () => {
+    const t = tool({
+      name: 'free',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'free',
+      arguments: {},
+    };
+    expect(
+      await toolRequiresApproval(
+        tc,
+        [
+          t,
+        ],
+        turnCtx,
+      ),
+    ).toBe(false);
+  });
+
+  it('returns true when tool has requireApproval: true', async () => {
+    const t = tool({
+      name: 'guarded',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'guarded',
+      arguments: {},
+    };
+    expect(
+      await toolRequiresApproval(
+        tc,
+        [
+          t,
+        ],
+        turnCtx,
+      ),
+    ).toBe(true);
+  });
+
+  it('returns false when tool has requireApproval: false', async () => {
+    const t = tool({
+      name: 'open',
+      inputSchema: z.object({}),
+      requireApproval: false,
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'open',
+      arguments: {},
+    };
+    expect(
+      await toolRequiresApproval(
+        tc,
+        [
+          t,
+        ],
+        turnCtx,
+      ),
+    ).toBe(false);
+  });
+
+  it('calls requireApproval function with args and context', async () => {
+    const t = tool({
+      name: 'conditional',
+      inputSchema: z.object({
+        dangerous: z.boolean(),
+      }),
+      requireApproval: (params) => params.dangerous,
+      execute: async () => ({}),
+    });
+    const tc1: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'conditional',
+      arguments: {
+        dangerous: true,
+      },
+    };
+    const tc2: ParsedToolCall<Tool> = {
+      id: 'c2',
+      name: 'conditional',
+      arguments: {
+        dangerous: false,
+      },
+    };
+    expect(
+      await toolRequiresApproval(
+        tc1,
+        [
+          t,
+        ],
+        turnCtx,
+      ),
+    ).toBe(true);
+    expect(
+      await toolRequiresApproval(
+        tc2,
+        [
+          t,
+        ],
+        turnCtx,
+      ),
+    ).toBe(false);
+  });
+
+  it('call-level check overrides tool-level setting', async () => {
+    const t = tool({
+      name: 'guarded',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'guarded',
+      arguments: {},
+    };
+    const callCheck = () => false;
+    expect(
+      await toolRequiresApproval(
+        tc,
+        [
+          t,
+        ],
+        turnCtx,
+        callCheck,
+      ),
+    ).toBe(false);
+  });
+
+  it('returns false for unknown tool name', async () => {
+    const t = tool({
+      name: 'known',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'unknown',
+      arguments: {},
+    };
+    expect(
+      await toolRequiresApproval(
+        tc,
+        [
+          t,
+        ],
+        turnCtx,
+      ),
+    ).toBe(false);
+  });
+});
+
+describe('tool approval - partitionToolCalls', () => {
+  it('separates tool calls into requiresApproval and autoExecute', async () => {
+    const guarded = tool({
+      name: 'guarded',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+    const free = tool({
+      name: 'free',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const tc1: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'guarded',
+      arguments: {},
+    };
+    const tc2: ParsedToolCall<Tool> = {
+      id: 'c2',
+      name: 'free',
+      arguments: {},
+    };
+    const result = await partitionToolCalls(
+      [
+        tc1,
+        tc2,
+      ],
+      [
+        guarded,
+        free,
+      ],
+      turnCtx,
+    );
+    expect(result.requiresApproval).toHaveLength(1);
+    expect(result.autoExecute).toHaveLength(1);
+    expect(result.requiresApproval[0]!.name).toBe('guarded');
+    expect(result.autoExecute[0]!.name).toBe('free');
+  });
+
+  it('all auto-execute when no tools require approval', async () => {
+    const free = tool({
+      name: 'free',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'free',
+      arguments: {},
+    };
+    const result = await partitionToolCalls(
+      [
+        tc,
+      ],
+      [
+        free,
+      ],
+      turnCtx,
+    );
+    expect(result.autoExecute).toHaveLength(1);
+    expect(result.requiresApproval).toHaveLength(0);
+  });
+
+  it('all require approval when all tools need it', async () => {
+    const guarded = tool({
+      name: 'g1',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'g1',
+      arguments: {},
+    };
+    const result = await partitionToolCalls(
+      [
+        tc,
+      ],
+      [
+        guarded,
+      ],
+      turnCtx,
+    );
+    expect(result.requiresApproval).toHaveLength(1);
+    expect(result.autoExecute).toHaveLength(0);
+  });
+});
+
+describe('tool approval - type-level utilities', () => {
+  it('toolHasApprovalConfigured returns true for tool with requireApproval', () => {
+    const t = tool({
+      name: 'g',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+    expect(toolHasApprovalConfigured(t)).toBe(true);
+  });
+
+  it('toolHasApprovalConfigured returns false for tool without requireApproval', () => {
+    const t = tool({
+      name: 'f',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    expect(toolHasApprovalConfigured(t)).toBe(false);
+  });
+
+  it('hasApprovalRequiredTools returns true when any tool needs approval', () => {
+    const t1 = tool({
+      name: 'f',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    const t2 = tool({
+      name: 'g',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+    expect(
+      hasApprovalRequiredTools([
+        t1,
+        t2,
+      ]),
+    ).toBe(true);
+  });
+
+  it('hasApprovalRequiredTools returns false when no tools need approval', () => {
+    const t1 = tool({
+      name: 'f',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    expect(
+      hasApprovalRequiredTools([
+        t1,
+      ]),
+    ).toBe(false);
+  });
+});
diff --git a/tests/behavior/tool-context.test.ts b/tests/behavior/tool-context.test.ts
new file mode 100644
index 0000000..271de3a
--- /dev/null
+++ b/tests/behavior/tool-context.test.ts
@@ -0,0 +1,263 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import {
+  buildToolExecuteContext,
+  extractToolContext,
+  resolveContext,
+  ToolContextStore,
+} from '../../src/lib/tool-context.js';
+import type { TurnContext } from '../../src/lib/tool-types.js';
+
+const turnCtx: TurnContext = {
+  numberOfTurns: 1,
+};
+
+describe('ToolContextStore - basic operations', () => {
+  it('constructor initializes with given values', () => {
+    const store = new ToolContextStore({
+      weather: {
+        apiKey: '123',
+      },
+    });
+    expect(store.getToolContext('weather')).toEqual({
+      apiKey: '123',
+    });
+  });
+
+  it('getToolContext returns empty object for unknown tool', () => {
+    const store = new ToolContextStore();
+    expect(store.getToolContext('unknown')).toEqual({});
+  });
+
+  it('setToolContext sets tool context and notifies listeners', () => {
+    const store = new ToolContextStore();
+    const snapshots: Array<Record<string, unknown>> = [];
+    store.subscribe((s) => snapshots.push(s));
+    store.setToolContext('tool1', {
+      key: 'val',
+    });
+    expect(store.getToolContext('tool1')).toEqual({
+      key: 'val',
+    });
+    expect(snapshots).toHaveLength(1);
+  });
+
+  it('mergeToolContext merges partial values', () => {
+    const store = new ToolContextStore({
+      tool1: {
+        a: 1,
+        b: 2,
+      },
+    });
+    store.mergeToolContext('tool1', {
+      b: 99,
+      c: 3,
+    });
+    expect(store.getToolContext('tool1')).toEqual({
+      a: 1,
+      b: 99,
+      c: 3,
+    });
+  });
+
+  it('getSnapshot returns deep-shallow copy of all contexts', () => {
+    const store = new ToolContextStore({
+      a: {
+        x: 1,
+      },
+      b: {
+        y: 2,
+      },
+    });
+    const snapshot = store.getSnapshot();
+    expect(snapshot).toEqual({
+      a: {
+        x: 1,
+      },
+      b: {
+        y: 2,
+      },
+    });
+    snapshot.a!.x = 999;
+    expect(store.getToolContext('a')).toEqual({
+      x: 1,
+    });
+  });
+
+  it('subscribe returns unsubscribe function', () => {
+    const store = new ToolContextStore();
+    const calls: number[] = [];
+    const unsub = store.subscribe(() => calls.push(1));
+    store.setToolContext('t', {
+      v: 1,
+    });
+    expect(calls).toHaveLength(1);
+    unsub();
+    store.setToolContext('t', {
+      v: 2,
+    });
+    expect(calls).toHaveLength(1);
+  });
+});
+
+describe('buildToolExecuteContext', () => {
+  it('returns object with turnContext fields merged', () => {
+    const ctx = buildToolExecuteContext(turnCtx, undefined, 'myTool', undefined);
+    expect(ctx.numberOfTurns).toBe(1);
+  });
+
+  it('local getter reads from store for the tool name', () => {
+    const store = new ToolContextStore({
+      myTool: {
+        apiKey: 'abc',
+      },
+    });
+    const schema = z.object({
+      apiKey: z.string(),
+    });
+    const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', schema);
+    expect(ctx.local).toEqual({
+      apiKey: 'abc',
+    });
+  });
+
+  it('setContext merges partial values into store', () => {
+    const store = new ToolContextStore({
+      myTool: {
+        apiKey: 'abc',
+      },
+    });
+    const schema = z.object({
+      apiKey: z.string(),
+    });
+    const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', schema);
+    ctx.setContext({
+      apiKey: 'xyz',
+    });
+    expect(ctx.local).toEqual({
+      apiKey: 'xyz',
+    });
+  });
+
+  it('shared getter reads shared context from store', () => {
+    const store = new ToolContextStore({
+      shared: {
+        globalKey: 'val',
+      },
+    });
+    const sharedSchema = z.object({
+      globalKey: z.string(),
+    });
+    const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', undefined, sharedSchema);
+    expect(ctx.shared).toEqual({
+      globalKey: 'val',
+    });
+  });
+
+  it('setSharedContext updates shared context in store', () => {
+    const store = new ToolContextStore({
+      shared: {
+        globalKey: 'old',
+      },
+    });
+    const sharedSchema = z.object({
+      globalKey: z.string(),
+    });
+    const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', undefined, sharedSchema);
+    ctx.setSharedContext({
+      globalKey: 'new',
+    });
+    expect(ctx.shared).toEqual({
+      globalKey: 'new',
+    });
+  });
+
+  it('local getter returns frozen object', () => {
+    const store = new ToolContextStore({
+      myTool: {
+        val: 1,
+      },
+    });
+    const schema = z.object({
+      val: z.number(),
+    });
+    const ctx = buildToolExecuteContext(turnCtx, store, 'myTool', schema);
+    expect(Object.isFrozen(ctx.local)).toBe(true);
+  });
+});
+
+describe('resolveContext', () => {
+  it('returns empty object when input is undefined', async () => {
+    const result = await resolveContext(undefined, turnCtx);
+    expect(result).toEqual({});
+  });
+
+  it('returns static value as-is', async () => {
+    const input = {
+      myTool: {
+        apiKey: '123',
+      },
+    };
+    const result = await resolveContext(input, turnCtx);
+    expect(result).toEqual({
+      myTool: {
+        apiKey: '123',
+      },
+    });
+  });
+
+  it('calls sync function with turnContext and returns result', async () => {
+    const fn = (ctx: TurnContext) => ({
+      tool: {
+        turn: ctx.numberOfTurns,
+      },
+    });
+    const result = await resolveContext(fn, turnCtx);
+    expect(result).toEqual({
+      tool: {
+        turn: 1,
+      },
+    });
+  });
+
+  it('calls async function with turnContext and returns result', async () => {
+    const fn = async (ctx: TurnContext) => ({
+      tool: {
+        turn: ctx.numberOfTurns * 2,
+      },
+    });
+    const result = await resolveContext(fn, turnCtx);
+    expect(result).toEqual({
+      tool: {
+        turn: 2,
+      },
+    });
+  });
+});
+
+describe('extractToolContext', () => {
+  it('extracts and validates context for tool', () => {
+    const store = new ToolContextStore({
+      myTool: {
+        apiKey: 'abc',
+      },
+    });
+    const schema = z.object({
+      apiKey: z.string(),
+    });
+    const result = extractToolContext(store, 'myTool', schema);
+    expect(result).toEqual({
+      apiKey: 'abc',
+    });
+  });
+
+  it('returns empty object when no schema provided', () => {
+    const store = new ToolContextStore({
+      myTool: {
+        apiKey: 'abc',
+      },
+    });
+    const result = extractToolContext(store, 'myTool', undefined);
+    expect(result).toEqual({});
+  });
+});
diff --git a/tests/behavior/tool-creation.test.ts b/tests/behavior/tool-creation.test.ts
new file mode 100644
index 0000000..d8ecc10
--- /dev/null
+++ b/tests/behavior/tool-creation.test.ts
@@ -0,0 +1,149 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import { tool } from '../../src/lib/tool.js';
+import { ToolType } from '../../src/lib/tool-types.js';
+
+// Tests 1-9: Tool creation via tool() factory
+
+describe('tool creation - tool() factory', () => {
+  it('regular tool returns full shape: type, name, inputSchema, execute, description, outputSchema', () => {
+    const t = tool({
+      name: 'greet',
+      description: 'Say hello',
+      inputSchema: z.object({
+        name: z.string(),
+      }),
+      outputSchema: z.object({
+        greeting: z.string(),
+      }),
+      execute: async (params) => ({
+        greeting: `Hi ${params.name}`,
+      }),
+    });
+
+    expect(t.type).toBe(ToolType.Function);
+    expect(t.function.name).toBe('greet');
+    expect(t.function.description).toBe('Say hello');
+    expect(t.function.inputSchema).toBeDefined();
+    expect(t.function.outputSchema).toBeDefined();
+    expect(t.function.execute).toBeTypeOf('function');
+  });
+
+  it('generator tool with eventSchema returns tool with eventSchema + outputSchema + execute', () => {
+    const t = tool({
+      name: 'stream_tool',
+      inputSchema: z.object({
+        query: z.string(),
+      }),
+      eventSchema: z.object({
+        progress: z.number(),
+      }),
+      outputSchema: z.object({
+        result: z.string(),
+      }),
+      execute: async function* () {
+        yield {
+          progress: 50,
+        };
+        return {
+          result: 'done',
+        };
+      },
+    });
+
+    expect(t.type).toBe(ToolType.Function);
+    expect(t.function.name).toBe('stream_tool');
+    expect(t.function.eventSchema).toBeDefined();
+    expect(t.function.outputSchema).toBeDefined();
+    expect(t.function.execute).toBeTypeOf('function');
+  });
+
+  it('manual tool (execute: false) returns tool with no execute, no outputSchema, no eventSchema', () => {
+    const t = tool({
+      name: 'manual',
+      description: 'Needs manual handling',
+      inputSchema: z.object({
+        action: z.string(),
+      }),
+      execute: false,
+    });
+
+    expect(t.type).toBe(ToolType.Function);
+    expect(t.function.name).toBe('manual');
+    expect(t.function).not.toHaveProperty('execute');
+    expect(t.function).not.toHaveProperty('eventSchema');
+  });
+
+  it('tool with contextSchema preserves schema on function.contextSchema', () => {
+    const ctxSchema = z.object({
+      apiKey: z.string(),
+    });
+    const t = tool({
+      name: 'ctx_tool',
+      inputSchema: z.object({}),
+      contextSchema: ctxSchema,
+      execute: async () => ({}),
+    });
+
+    expect(t.function.contextSchema).toBe(ctxSchema);
+  });
+
+  it('tool with requireApproval: true preserves flag on function', () => {
+    const t = tool({
+      name: 'approval_tool',
+      inputSchema: z.object({}),
+      requireApproval: true,
+      execute: async () => ({}),
+    });
+
+    expect(t.function.requireApproval).toBe(true);
+  });
+
+  it('tool with requireApproval function preserves function on function', () => {
+    const check = () => true;
+    const t = tool({
+      name: 'fn_approval',
+      inputSchema: z.object({}),
+      requireApproval: check,
+      execute: async () => ({}),
+    });
+
+    expect(t.function.requireApproval).toBe(check);
+  });
+
+  it('tool with nextTurnParams preserves them on function', () => {
+    const ntp = {
+      temperature: () => 0.5 as number | null,
+    };
+    const t = tool({
+      name: 'ntp_tool',
+      inputSchema: z.object({}),
+      nextTurnParams: ntp,
+      execute: async () => ({}),
+    });
+
+    expect(t.function.nextTurnParams).toBeDefined();
+  });
+
+  it('tool named "shared" throws (reserved for shared context)', () => {
+    expect(() =>
+      tool({
+        name: 'shared',
+        inputSchema: z.object({}),
+        execute: async () => ({}),
+      }),
+    ).toThrow(/reserved/i);
+  });
+
+  it('tool with no description has description absent from function object', () => {
+    const t = tool({
+      name: 'no_desc',
+      inputSchema: z.object({
+        x: z.number(),
+      }),
+      execute: async () => ({}),
+    });
+
+    expect(t.function.description).toBeUndefined();
+  });
+});
diff --git a/tests/behavior/tool-event-broadcaster.test.ts b/tests/behavior/tool-event-broadcaster.test.ts
new file mode 100644
index 0000000..b32acc0
--- /dev/null
+++ b/tests/behavior/tool-event-broadcaster.test.ts
@@ -0,0 +1,131 @@
+import { describe, expect, it } from 'vitest';
+import { ToolEventBroadcaster } from '../../src/lib/tool-event-broadcaster.js';
+
+async function collect<T>(iter: AsyncIterableIterator<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const v of iter) {
+    result.push(v);
+  }
+  return result;
+}
+
+describe('ToolEventBroadcaster - single consumer', () => {
+  it('consumer receives all pushed events after complete', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    broadcaster.push(1);
+    broadcaster.push(2);
+    broadcaster.push(3);
+    broadcaster.complete();
+    const consumer = broadcaster.createConsumer();
+    const values = await collect(consumer);
+    expect(values).toEqual([
+      1,
+      2,
+      3,
+    ]);
+  });
+
+  it('consumer receives events pushed before and after creation', async () => {
+    const broadcaster = new ToolEventBroadcaster<string>();
+    broadcaster.push('before');
+    const consumer = broadcaster.createConsumer();
+    broadcaster.push('after');
+    broadcaster.complete();
+    const values = await collect(consumer);
+    expect(values).toEqual([
+      'before',
+      'after',
+    ]);
+  });
+
+  it('empty broadcaster yields no values', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    broadcaster.complete();
+    const consumer = broadcaster.createConsumer();
+    const values = await collect(consumer);
+    expect(values).toEqual([]);
+  });
+});
+
+describe('ToolEventBroadcaster - multiple consumers', () => {
+  it('two consumers independently receive all events', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    const c1 = broadcaster.createConsumer();
+    const c2 = broadcaster.createConsumer();
+    broadcaster.push(10);
+    broadcaster.push(20);
+    broadcaster.complete();
+    const [v1, v2] = await Promise.all([
+      collect(c1),
+      collect(c2),
+    ]);
+    expect(v1).toEqual([
+      10,
+      20,
+    ]);
+    expect(v2).toEqual([
+      10,
+      20,
+    ]);
+  });
+});
+
+describe('ToolEventBroadcaster - error handling', () => {
+  it('complete(error) propagates error to consumer', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    broadcaster.push(1);
+    const consumer = broadcaster.createConsumer();
+    const first = await consumer.next();
+    expect(first.value).toBe(1);
+    broadcaster.complete(new Error('test error'));
+    await expect(consumer.next()).rejects.toThrow('test error');
+  });
+});
+
+describe('ToolEventBroadcaster - cancellation', () => {
+  it('consumer.return() cancels the consumer', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    broadcaster.push(1);
+    const consumer = broadcaster.createConsumer();
+    await consumer.return!();
+    const result = await consumer.next();
+    expect(result.done).toBe(true);
+  });
+
+  it('consumer.throw() cancels the consumer and throws', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    const consumer = broadcaster.createConsumer();
+    await expect(consumer.throw!(new Error('abort'))).rejects.toThrow('abort');
+  });
+});
+
+describe('ToolEventBroadcaster - push after complete is ignored', () => {
+  it('events pushed after complete are not delivered', async () => {
+    const broadcaster = new ToolEventBroadcaster<number>();
+    broadcaster.push(1);
+    broadcaster.complete();
+    broadcaster.push(2);
+    const consumer = broadcaster.createConsumer();
+    const values = await collect(consumer);
+    expect(values).toEqual([
+      1,
+    ]);
+  });
+});
+
+describe('ToolEventBroadcaster - async iteration protocol', () => {
+  it('supports for-await-of loop', async () => {
+    const broadcaster = new ToolEventBroadcaster<string>();
+    broadcaster.push('a');
+    broadcaster.push('b');
+    broadcaster.complete();
+    const values: string[] = [];
+    for await (const v of broadcaster.createConsumer()) {
+      values.push(v);
+    }
+    expect(values).toEqual([
+      'a',
+      'b',
+    ]);
+  });
+});
diff --git a/tests/behavior/tool-execution.test.ts b/tests/behavior/tool-execution.test.ts
new file mode 100644
index 0000000..b5b7ef3
--- /dev/null
+++ b/tests/behavior/tool-execution.test.ts
@@ -0,0 +1,576 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import { tool } from '../../src/lib/tool.js';
+import {
+  convertToolsToAPIFormat,
+  convertZodToJsonSchema,
+  executeGeneratorTool,
+  executeRegularTool,
+  executeTool,
+  findToolByName,
+  formatToolExecutionError,
+  formatToolResultForModel,
+  parseToolCallArguments,
+  sanitizeJsonSchema,
+  validateToolInput,
+  validateToolOutput,
+} from '../../src/lib/tool-executor.js';
+import type { ParsedToolCall, Tool, TurnContext } from '../../src/lib/tool-types.js';
+
+const turnCtx: TurnContext = {
+  numberOfTurns: 1,
+};
+
+describe('tool execution - input validation', () => {
+  const schema = z.object({
+    name: z.string(),
+    age: z.number(),
+  });
+
+  it('validateToolInput with valid args returns validated data', () => {
+    const result = validateToolInput(schema, {
+      name: 'Alice',
+      age: 30,
+    });
+    expect(result).toEqual({
+      name: 'Alice',
+      age: 30,
+    });
+  });
+
+  it('validateToolInput with invalid args throws ZodError', () => {
+    expect(() =>
+      validateToolInput(schema, {
+        name: 123,
+      }),
+    ).toThrow();
+  });
+
+  it('validateToolOutput with valid result returns validated data', () => {
+    const outSchema = z.object({
+      sum: z.number(),
+    });
+    const result = validateToolOutput(outSchema, {
+      sum: 42,
+    });
+    expect(result).toEqual({
+      sum: 42,
+    });
+  });
+
+  it('validateToolOutput with invalid result throws ZodError', () => {
+    const outSchema = z.object({
+      sum: z.number(),
+    });
+    expect(() =>
+      validateToolOutput(outSchema, {
+        sum: 'not a number',
+      }),
+    ).toThrow();
+  });
+});
+
+describe('tool execution - argument parsing', () => {
+  it('parseToolCallArguments with valid JSON returns parsed object', () => {
+    expect(parseToolCallArguments('{"a":1}')).toEqual({
+      a: 1,
+    });
+  });
+
+  it('parseToolCallArguments with empty string returns empty object', () => {
+    expect(parseToolCallArguments('')).toEqual({});
+  });
+
+  it('parseToolCallArguments with whitespace-only string returns empty object', () => {
+    expect(parseToolCallArguments('   ')).toEqual({});
+  });
+
+  it('parseToolCallArguments with invalid JSON throws descriptive error', () => {
+    expect(() => parseToolCallArguments('bad json')).toThrow(/failed to parse/i);
+  });
+});
+
+describe('tool execution - executeRegularTool', () => {
+  it('executes and returns { toolCallId, toolName, result }', async () => {
+    const t = tool({
+      name: 'add',
+      inputSchema: z.object({
+        a: z.number(),
+        b: z.number(),
+      }),
+      execute: async (params) => ({
+        sum: params.a + params.b,
+      }),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_1',
+      name: 'add',
+      arguments: {
+        a: 2,
+        b: 3,
+      },
+    };
+    const result = await executeRegularTool(t, tc, turnCtx);
+    expect(result.toolCallId).toBe('call_1');
+    expect(result.toolName).toBe('add');
+    expect(result.result).toEqual({
+      sum: 5,
+    });
+    expect(result.error).toBeUndefined();
+  });
+
+  it('returns error when input validation fails', async () => {
+    const t = tool({
+      name: 'strict',
+      inputSchema: z.object({
+        x: z.number(),
+      }),
+      execute: async () => ({
+        ok: true,
+      }),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_2',
+      name: 'strict',
+      arguments: {
+        x: 'not_num',
+      },
+    };
+    const result = await executeRegularTool(t, tc, turnCtx);
+    expect(result.error).toBeDefined();
+    expect(result.result).toBeNull();
+  });
+
+  it('validates output when outputSchema provided', async () => {
+    const t = tool({
+      name: 'typed_out',
+      inputSchema: z.object({}),
+      outputSchema: z.object({
+        value: z.number(),
+      }),
+      execute: async () => ({
+        value: 42,
+      }),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_3',
+      name: 'typed_out',
+      arguments: {},
+    };
+    const result = await executeRegularTool(t, tc, turnCtx);
+    expect(result.result).toEqual({
+      value: 42,
+    });
+  });
+
+  it('returns raw result when no outputSchema', async () => {
+    const t = tool({
+      name: 'raw_out',
+      inputSchema: z.object({}),
+      execute: async () => ({
+        anything: 'goes',
+      }),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_4',
+      name: 'raw_out',
+      arguments: {},
+    };
+    const result = await executeRegularTool(t, tc, turnCtx);
+    expect(result.result).toEqual({
+      anything: 'goes',
+    });
+  });
+
+  it('catches thrown error and returns { error, result: null }', async () => {
+    const t = tool({
+      name: 'failing',
+      inputSchema: z.object({}),
+      execute: async () => {
+        throw new Error('boom');
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_5',
+      name: 'failing',
+      arguments: {},
+    };
+    const result = await executeRegularTool(t, tc, turnCtx);
+    expect(result.error).toBeDefined();
+    expect(result.error!.message).toBe('boom');
+    expect(result.result).toBeNull();
+  });
+});
+
+describe('tool execution - executeGeneratorTool', () => {
+  it('yields events then returns final result with preliminaryResults', async () => {
+    const t = tool({
+      name: 'gen',
+      inputSchema: z.object({}),
+      eventSchema: z.object({
+        progress: z.number(),
+      }),
+      outputSchema: z.object({
+        result: z.string(),
+      }),
+      execute: async function* () {
+        yield {
+          progress: 50,
+        };
+        yield {
+          progress: 100,
+        };
+        return {
+          result: 'done',
+        };
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_6',
+      name: 'gen',
+      arguments: {},
+    };
+    const result = await executeGeneratorTool(t, tc, turnCtx);
+    expect(result.result).toEqual({
+      result: 'done',
+    });
+    expect(result.preliminaryResults).toHaveLength(2);
+  });
+
+  it('calls onPreliminaryResult for each yielded event', async () => {
+    const events: unknown[] = [];
+    const t = tool({
+      name: 'gen_cb',
+      inputSchema: z.object({}),
+      eventSchema: z.object({
+        step: z.number(),
+      }),
+      outputSchema: z.object({
+        done: z.boolean(),
+      }),
+      execute: async function* () {
+        yield {
+          step: 1,
+        };
+        yield {
+          step: 2,
+        };
+        return {
+          done: true,
+        };
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_7',
+      name: 'gen_cb',
+      arguments: {},
+    };
+    await executeGeneratorTool(t, tc, turnCtx, (_id, ev) => events.push(ev));
+    expect(events).toHaveLength(2);
+    expect(events[0]).toEqual({
+      step: 1,
+    });
+  });
+
+  it('returns final result with empty preliminaryResults when only return value', async () => {
+    const t = tool({
+      name: 'gen_ret',
+      inputSchema: z.object({}),
+      eventSchema: z.object({
+        ev: z.string(),
+      }),
+      outputSchema: z.object({
+        val: z.number(),
+      }),
+      execute: async function* () {
+        return {
+          val: 42,
+        };
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_8',
+      name: 'gen_ret',
+      arguments: {},
+    };
+    const result = await executeGeneratorTool(t, tc, turnCtx);
+    expect(result.result).toEqual({
+      val: 42,
+    });
+    expect(result.preliminaryResults).toHaveLength(0);
+  });
+
+  it('returns error when generator throws', async () => {
+    const t = tool({
+      name: 'gen_err',
+      inputSchema: z.object({}),
+      eventSchema: z.object({
+        ev: z.string(),
+      }),
+      outputSchema: z.object({
+        val: z.number(),
+      }),
+      execute: async function* () {
+        throw new Error('gen boom');
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_9',
+      name: 'gen_err',
+      arguments: {},
+    };
+    const result = await executeGeneratorTool(t, tc, turnCtx);
+    expect(result.error).toBeDefined();
+    expect(result.error!.message).toBe('gen boom');
+  });
+
+  it('returns error when generator emits nothing', async () => {
+    const t = tool({
+      name: 'gen_empty',
+      inputSchema: z.object({}),
+      eventSchema: z.object({
+        ev: z.string(),
+      }),
+      outputSchema: z.object({
+        val: z.number(),
+      }),
+      execute: async function* () {
+        // yields nothing, returns nothing
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'call_10',
+      name: 'gen_empty',
+      arguments: {},
+    };
+    const result = await executeGeneratorTool(t, tc, turnCtx);
+    expect(result.error).toBeDefined();
+    expect(result.error!.message).toContain('without emitting');
+  });
+});
+
+describe('tool execution - executeTool dispatch', () => {
+  it('dispatches regular tool to executeRegularTool', async () => {
+    const t = tool({
+      name: 'reg',
+      inputSchema: z.object({
+        x: z.number(),
+      }),
+      execute: async (p) => ({
+        doubled: p.x * 2,
+      }),
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c1',
+      name: 'reg',
+      arguments: {
+        x: 5,
+      },
+    };
+    const result = await executeTool(t, tc, turnCtx);
+    expect(result.result).toEqual({
+      doubled: 10,
+    });
+  });
+
+  it('dispatches generator tool to executeGeneratorTool', async () => {
+    const t = tool({
+      name: 'gen',
+      inputSchema: z.object({}),
+      eventSchema: z.object({
+        ev: z.number(),
+      }),
+      outputSchema: z.object({
+        done: z.boolean(),
+      }),
+      execute: async function* () {
+        yield {
+          ev: 1,
+        };
+        return {
+          done: true,
+        };
+      },
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c2',
+      name: 'gen',
+      arguments: {},
+    };
+    const result = await executeTool(t, tc, turnCtx);
+    expect(result.result).toEqual({
+      done: true,
+    });
+    expect(result.preliminaryResults).toHaveLength(1);
+  });
+
+  it('throws for manual tool (no execute function)', async () => {
+    const t = tool({
+      name: 'manual',
+      inputSchema: z.object({}),
+      execute: false,
+    });
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c3',
+      name: 'manual',
+      arguments: {},
+    };
+    await expect(executeTool(t, tc, turnCtx)).rejects.toThrow(/no execute function/i);
+  });
+});
+
+describe('tool execution - utility functions', () => {
+  it('findToolByName returns matching tool', () => {
+    const t = tool({
+      name: 'x',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    expect(
+      findToolByName(
+        [
+          t,
+        ],
+        'x',
+      ),
+    ).toBe(t);
+  });
+
+  it('findToolByName returns undefined for missing tool', () => {
+    const t = tool({
+      name: 'x',
+      inputSchema: z.object({}),
+      execute: async () => ({}),
+    });
+    expect(
+      findToolByName(
+        [
+          t,
+        ],
+        'missing',
+      ),
+    ).toBeUndefined();
+  });
+
+  it('formatToolResultForModel with success returns JSON of result', () => {
+    const json = formatToolResultForModel({
+      toolCallId: 'c1',
+      toolName: 'test',
+      result: {
+        data: 42,
+      },
+    });
+    expect(JSON.parse(json)).toEqual({
+      data: 42,
+    });
+  });
+
+  it('formatToolResultForModel with error returns JSON with error message', () => {
+    const json = formatToolResultForModel({
+      toolCallId: 'c2',
+      toolName: 'test',
+      result: null,
+      error: new Error('fail'),
+    });
+    const parsed = JSON.parse(json);
+    expect(parsed.error).toBe('fail');
+    expect(parsed.toolName).toBe('test');
+  });
+
+  it('formatToolExecutionError with ZodError includes validation details', () => {
+    try {
+      z.parse(
+        z.object({
+          x: z.number(),
+        }),
+        {
+          x: 'bad',
+        },
+      );
+    } catch (e) {
+      const tc: ParsedToolCall<Tool> = {
+        id: 'c3',
+        name: 'myTool',
+        arguments: {},
+      };
+      const msg = formatToolExecutionError(e as Error, tc);
+      expect(msg).toContain('myTool');
+      expect(msg).toContain('validation error');
+    }
+  });
+
+  it('formatToolExecutionError with generic Error includes message', () => {
+    const tc: ParsedToolCall<Tool> = {
+      id: 'c4',
+      name: 'myTool',
+      arguments: {},
+    };
+    const msg = formatToolExecutionError(new Error('something went wrong'), tc);
+    expect(msg).toContain('myTool');
+    expect(msg).toContain('something went wrong');
+  });
+
+  it('convertToolsToAPIFormat returns correct API shape array', () => {
+    const t = tool({
+      name: 'api_tool',
+      description: 'Does stuff',
+      inputSchema: z.object({
+        x: z.number(),
+      }),
+      execute: async () => ({}),
+    });
+    const apiTools = convertToolsToAPIFormat([
+      t,
+    ]);
+    expect(apiTools).toHaveLength(1);
+    expect(apiTools[0]!.type).toBe('function');
+    expect(apiTools[0]!.name).toBe('api_tool');
+    expect(apiTools[0]!.description).toBe('Does stuff');
+    expect(apiTools[0]!.parameters).toBeDefined();
+  });
+
+  it('convertZodToJsonSchema produces valid JSON schema from Zod', () => {
+    const schema = z.object({
+      x: z.number(),
+      y: z.string(),
+    });
+    const jsonSchema = convertZodToJsonSchema(schema);
+    expect(jsonSchema).toHaveProperty('type', 'object');
+    expect(jsonSchema).toHaveProperty('properties');
+  });
+
+  it('sanitizeJsonSchema removes ~prefixed keys recursively', () => {
+    const input = {
+      type: 'object',
+      '~standard': {
+        meta: true,
+      },
+      properties: {
+        x: {
+          type: 'number',
+          '~standard': {},
+        },
+      },
+    };
+    const result = sanitizeJsonSchema(input);
+    expect(result).not.toHaveProperty('~standard');
+    expect((result as Record<string, unknown>).type).toBe('object');
+  });
+
+  it('sanitizeJsonSchema handles primitives, null, arrays', () => {
+    expect(sanitizeJsonSchema(null)).toBeNull();
+    expect(sanitizeJsonSchema(42)).toBe(42);
+    expect(
+      sanitizeJsonSchema([
+        {
+          '~meta': 1,
+          val: 2,
+        },
+      ]),
+    ).toEqual([
+      {
+        val: 2,
+      },
+    ]);
+  });
+});
diff --git a/tests/behavior/tool-orchestrator.test.ts b/tests/behavior/tool-orchestrator.test.ts
new file mode 100644
index 0000000..005ad8c
--- /dev/null
+++ b/tests/behavior/tool-orchestrator.test.ts
@@ -0,0 +1,118 @@
+import { describe, expect, it } from 'vitest';
+import {
+  getToolExecutionErrors,
+  hasToolExecutionErrors,
+  summarizeToolExecutions,
+  toolResultsToMap,
+} from '../../src/lib/tool-orchestrator.js';
+import type { Tool, ToolExecutionResult } from '../../src/lib/tool-types.js';
+
+function makeResult(overrides: Partial<ToolExecutionResult<Tool>>): ToolExecutionResult<Tool> {
+  return {
+    toolCallId: 'c1',
+    toolName: 'test',
+    result: {
+      ok: true,
+    },
+    ...overrides,
+  };
+}
+
+describe('tool orchestrator - toolResultsToMap', () => {
+  it('converts results array to map keyed by toolCallId', () => {
+    const results = [
+      makeResult({
+        toolCallId: 'c1',
+        result: 'a',
+      }),
+      makeResult({
+        toolCallId: 'c2',
+        result: 'b',
+      }),
+    ];
+    const map = toolResultsToMap(results);
+    expect(map.size).toBe(2);
+    expect(map.get('c1')!.result).toBe('a');
+    expect(map.get('c2')!.result).toBe('b');
+  });
+
+  it('includes preliminaryResults in map entries', () => {
+    const results = [
+      makeResult({
+        toolCallId: 'c1',
+        result: 'final',
+        preliminaryResults: [
+          'p1',
+          'p2',
+        ],
+      }),
+    ];
+    const map = toolResultsToMap(results);
+    expect(map.get('c1')!.preliminaryResults).toEqual([
+      'p1',
+      'p2',
+    ]);
+  });
+});
+
+describe('tool orchestrator - summarizeToolExecutions', () => {
+  it('produces success line for successful result', () => {
+    const summary = summarizeToolExecutions([
+      makeResult({
+        toolCallId: 'c1',
+        toolName: 'add',
+      }),
+    ]);
+    expect(summary).toContain('add');
+    expect(summary).toContain('c1');
+  });
+
+  it('produces error line for failed result', () => {
+    const summary = summarizeToolExecutions([
+      makeResult({
+        toolCallId: 'c2',
+        toolName: 'fail',
+        result: null,
+        error: new Error('oops'),
+      }),
+    ]);
+    expect(summary).toContain('fail');
+    expect(summary).toContain('oops');
+  });
+});
+
+describe('tool orchestrator - hasToolExecutionErrors', () => {
+  it('returns true when any result has error', () => {
+    expect(
+      hasToolExecutionErrors([
+        makeResult({}),
+        makeResult({
+          error: new Error('err'),
+        }),
+      ]),
+    ).toBe(true);
+  });
+
+  it('returns false when no results have errors', () => {
+    expect(
+      hasToolExecutionErrors([
+        makeResult({}),
+      ]),
+    ).toBe(false);
+  });
+});
+
+describe('tool orchestrator - getToolExecutionErrors', () => {
+  it('extracts all error objects from results', () => {
+    const err = new Error('err1');
+    const results = [
+      makeResult({}),
+      makeResult({
+        error: err,
+      }),
+    ];
+    const errors = getToolExecutionErrors(results);
+    expect(errors).toHaveLength(1);
+    expect(errors[0]).toBe(err);
+  });
+});
diff --git a/tests/behavior/tool-types-events.test.ts b/tests/behavior/tool-types-events.test.ts
new file mode 100644
index 0000000..f8e44cd
--- /dev/null
+++ b/tests/behavior/tool-types-events.test.ts
@@ -0,0 +1,94 @@
+import { describe, expect, it } from 'vitest';
+import {
+  isToolCallOutputEvent,
+  isToolPreliminaryResultEvent,
+  isToolResultEvent,
+  isTurnEndEvent,
+  isTurnStartEvent,
+} from '../../src/lib/tool-types.js';
+
+describe('tool-types event type guards', () => {
+  it('isToolPreliminaryResultEvent matches tool.preliminary_result', () => {
+    expect(
+      isToolPreliminaryResultEvent({
+        type: 'tool.preliminary_result',
+        toolCallId: 'c1',
+        result: {},
+        timestamp: 0,
+      }),
+    ).toBe(true);
+  });
+
+  it('isToolPreliminaryResultEvent rejects tool.result', () => {
+    expect(
+      isToolPreliminaryResultEvent({
+        type: 'tool.result',
+        toolCallId: 'c1',
+        result: {},
+        timestamp: 0,
+      }),
+    ).toBe(false);
+  });
+
+  it('isToolResultEvent matches tool.result', () => {
+    expect(
+      isToolResultEvent({
+        type: 'tool.result',
+        toolCallId: 'c1',
+        result: {},
+        timestamp: 0,
+      }),
+    ).toBe(true);
+  });
+
+  it('isToolResultEvent rejects tool.preliminary_result', () => {
+    expect(
+      isToolResultEvent({
+        type: 'tool.preliminary_result',
+        toolCallId: 'c1',
+        result: {},
+        timestamp: 0,
+      }),
+    ).toBe(false);
+  });
+
+  it('isToolCallOutputEvent matches tool.call_output', () => {
+    expect(
+      isToolCallOutputEvent({
+        type: 'tool.call_output',
+        output: {},
+        timestamp: 0,
+      }),
+    ).toBe(true);
+  });
+
+  it('isTurnStartEvent matches turn.start', () => {
+    expect(
+      isTurnStartEvent({
+        type: 'turn.start',
+        turnNumber: 1,
+        timestamp: 0,
+      }),
+    ).toBe(true);
+  });
+
+  it('isTurnEndEvent matches turn.end', () => {
+    expect(
+      isTurnEndEvent({
+        type: 'turn.end',
+        turnNumber: 1,
+        timestamp: 0,
+      }),
+    ).toBe(true);
+  });
+
+  it('isTurnStartEvent rejects turn.end', () => {
+    expect(
+      isTurnStartEvent({
+        type: 'turn.end',
+        turnNumber: 1,
+        timestamp: 0,
+      }),
+    ).toBe(false);
+  });
+});
diff --git a/tests/behavior/turn-context.test.ts b/tests/behavior/turn-context.test.ts
new file mode 100644
index 0000000..0896777
--- /dev/null
+++ b/tests/behavior/turn-context.test.ts
@@ -0,0 +1,68 @@
+import { describe, expect, it } from 'vitest';
+import { buildTurnContext, normalizeInputToArray } from '../../src/lib/turn-context.js';
+import { makeRequest, TEST_MODEL } from '../test-constants.js';
+
+describe('turn context - buildTurnContext', () => {
+  it('sets numberOfTurns from options', () => {
+    const ctx = buildTurnContext({
+      numberOfTurns: 3,
+    });
+    expect(ctx.numberOfTurns).toBe(3);
+  });
+
+  it('includes toolCall when provided', () => {
+    const toolCall = {
+      type: 'function_call' as const,
+      callId: 'c1',
+      name: 'test',
+      arguments: '{}',
+      id: 'c1',
+      status: 'completed' as const,
+    };
+    const ctx = buildTurnContext({
+      numberOfTurns: 1,
+      toolCall,
+    });
+    expect(ctx.toolCall).toBe(toolCall);
+  });
+
+  it('includes turnRequest when provided', () => {
+    const request = makeRequest({
+      model: TEST_MODEL,
+      input: 'hello',
+    });
+    const ctx = buildTurnContext({
+      numberOfTurns: 1,
+      turnRequest: request,
+    });
+    expect(ctx.turnRequest).toBe(request);
+  });
+
+  it('omits toolCall and turnRequest when not provided', () => {
+    const ctx = buildTurnContext({
+      numberOfTurns: 0,
+    });
+    expect(ctx).not.toHaveProperty('toolCall');
+    expect(ctx).not.toHaveProperty('turnRequest');
+  });
+});
+
+describe('turn context - normalizeInputToArray', () => {
+  it('converts string input to array with user message', () => {
+    const result = normalizeInputToArray('Hello!');
+    expect(result).toHaveLength(1);
+    expect(result[0]).toHaveProperty('role', 'user');
+    expect(result[0]).toHaveProperty('content', 'Hello!');
+  });
+
+  it('returns array input as-is', () => {
+    const input = [
+      {
+        role: 'user' as const,
+        content: 'hi',
+      },
+    ];
+    const result = normalizeInputToArray(input);
+    expect(result).toBe(input);
+  });
+});
diff --git a/tests/boundaries/README.md b/tests/boundaries/README.md
new file mode 100644
index 0000000..02e8fb4
--- /dev/null
+++ b/tests/boundaries/README.md
@@ -0,0 +1,26 @@
+# Boundaries Tests
+
+Tests in this folder verify that **similar capabilities are correctly bounded from each other**. "This one handles X; that one handles Y; they don't bleed." Each guard, classifier, or extractor must accept its own domain and reject its peers.
+
+## What belongs here
+
+- Mutual exclusion between type guards (stream event guards, output item guards, content part guards)
+- Cross-domain rejection (stream guards reject output items and vice versa)
+- Tool type classifier mutual exclusion (regular vs generator vs manual)
+- Null/undefined/malformed input safety for all guards
+- Structural distinction between similar output shapes
+- Extendable: when new guards or classifiers are added, their mutual exclusion tests go here
+
+## Examples
+
+- `isOutputTextDeltaEvent` returns true for text delta, false for reasoning delta
+- `isOutputMessage` rejects a `TextDeltaEvent` (wrong domain)
+- Regular tool satisfies `isRegularExecuteTool` but NOT `isGeneratorTool`
+- `isToolPreliminaryResultEvent` rejects `{ type: 'tool.result' }`
+- `createUnsentResult` shape vs `createRejectedResult` shape
+
+## What does NOT belong here
+
+- Testing what a guard does with valid input in detail (→ `behavior/`)
+- Testing that guard output feeds a transformer (→ `composition/` or `dispatch/`)
+- End-to-end type-guard-driven pipelines (→ `pipelines/`)
diff --git a/tests/boundaries/content-annotation-guards.test.ts b/tests/boundaries/content-annotation-guards.test.ts
new file mode 100644
index 0000000..56de338
--- /dev/null
+++ b/tests/boundaries/content-annotation-guards.test.ts
@@ -0,0 +1,91 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  hasTypeProperty,
+  isFileCitationAnnotation,
+  isFilePathAnnotation,
+  isOutputTextPart,
+  isRefusalPart,
+  isURLCitationAnnotation,
+} from '../../src/lib/stream-type-guards.js';
+
+describe('Content part and annotation guards - boundary between similar types', () => {
+  it('isOutputTextPart: true for output_text, false for refusal', () => {
+    expect(
+      isOutputTextPart({
+        type: 'output_text',
+      }),
+    ).toBe(true);
+    expect(
+      isOutputTextPart({
+        type: 'refusal',
+      }),
+    ).toBe(false);
+  });
+
+  it('isRefusalPart: true for refusal, false for output_text', () => {
+    expect(
+      isRefusalPart({
+        type: 'refusal',
+      }),
+    ).toBe(true);
+    expect(
+      isRefusalPart({
+        type: 'output_text',
+      }),
+    ).toBe(false);
+  });
+
+  it('isFileCitationAnnotation: true for file_citation, false for url_citation', () => {
+    expect(
+      isFileCitationAnnotation({
+        type: 'file_citation',
+      }),
+    ).toBe(true);
+    expect(
+      isFileCitationAnnotation({
+        type: 'url_citation',
+      }),
+    ).toBe(false);
+  });
+
+  it('isURLCitationAnnotation: true for url_citation, false for file_citation', () => {
+    expect(
+      isURLCitationAnnotation({
+        type: 'url_citation',
+      }),
+    ).toBe(true);
+    expect(
+      isURLCitationAnnotation({
+        type: 'file_citation',
+      }),
+    ).toBe(false);
+  });
+
+  it('isFilePathAnnotation: true for file_path, false for file_citation', () => {
+    expect(
+      isFilePathAnnotation({
+        type: 'file_path',
+      }),
+    ).toBe(true);
+    expect(
+      isFilePathAnnotation({
+        type: 'file_citation',
+      }),
+    ).toBe(false);
+  });
+
+  it('hasTypeProperty: { type: "x" } -> true; { type: 123 } -> false; null -> false', () => {
+    expect(
+      hasTypeProperty({
+        type: 'x',
+      }),
+    ).toBe(true);
+    expect(
+      hasTypeProperty({
+        type: 123,
+      }),
+    ).toBe(false);
+    expect(hasTypeProperty(null)).toBe(false);
+  });
+});
diff --git a/tests/boundaries/domain-separation.test.ts b/tests/boundaries/domain-separation.test.ts
new file mode 100644
index 0000000..ad3ba49
--- /dev/null
+++ b/tests/boundaries/domain-separation.test.ts
@@ -0,0 +1,45 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  isFunctionCallArgumentsDeltaEvent,
+  isFunctionCallItem,
+  isOutputMessage,
+  isOutputTextDeltaEvent,
+} from '../../src/lib/stream-type-guards.js';
+
+describe('Stream guards vs output item guards - domain separation', () => {
+  it('isOutputTextDeltaEvent rejects an OutputMessage (item, not stream event)', () => {
+    const item = {
+      type: 'message',
+      role: 'assistant',
+      content: [],
+    };
+    expect(isOutputTextDeltaEvent(item as unknown as StreamEvents)).toBe(false);
+  });
+
+  it('isOutputMessage rejects a TextDeltaEvent (stream event, not item)', () => {
+    const event = {
+      type: 'response.output_text.delta',
+      delta: 'hello',
+    };
+    expect(isOutputMessage(event)).toBe(false);
+  });
+
+  it('isFunctionCallArgumentsDeltaEvent rejects a FunctionCallItem (completed item, not delta)', () => {
+    const item = {
+      type: 'function_call',
+      callId: 'c1',
+      name: 'test',
+      arguments: '{}',
+    };
+    expect(isFunctionCallArgumentsDeltaEvent(item as unknown as StreamEvents)).toBe(false);
+  });
+
+  it('isFunctionCallItem rejects a FunctionCallArgsDeltaEvent (delta, not item)', () => {
+    const event = {
+      type: 'response.function_call_arguments.delta',
+      delta: '{"a":',
+    };
+    expect(isFunctionCallItem(event)).toBe(false);
+  });
+});
diff --git a/tests/boundaries/execute-tool-boundary.test.ts b/tests/boundaries/execute-tool-boundary.test.ts
new file mode 100644
index 0000000..c7df6a8
--- /dev/null
+++ b/tests/boundaries/execute-tool-boundary.test.ts
@@ -0,0 +1,66 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import { executeGeneratorTool, executeRegularTool } from '../../src/lib/tool-executor.js';
+
+describe('executeRegularTool vs executeGeneratorTool - structural boundary', () => {
+  const regularTool = tool({
+    name: 'regular',
+    inputSchema: z.object({
+      x: z.number(),
+    }),
+    execute: async (args) => args.x * 2,
+  });
+
+  const generatorTool = tool({
+    name: 'generator',
+    inputSchema: z.object({
+      x: z.number(),
+    }),
+    eventSchema: z.object({
+      progress: z.number(),
+    }),
+    outputSchema: z.object({
+      result: z.number(),
+    }),
+    execute: async function* (args) {
+      yield {
+        progress: 50,
+      };
+      return {
+        result: args.x * 2,
+      };
+    },
+  });
+
+  const toolCall = {
+    id: 'tc_1',
+    name: 'test',
+    arguments: {
+      x: 5,
+    },
+  };
+  const turnCtx = {
+    numberOfTurns: 1,
+  };
+
+  it('executeRegularTool throws when given a generator tool', async () => {
+    await expect(executeRegularTool(generatorTool, toolCall, turnCtx)).rejects.toThrow();
+  });
+
+  it('executeGeneratorTool throws when given a regular tool', async () => {
+    await expect(executeGeneratorTool(regularTool, toolCall, turnCtx)).rejects.toThrow();
+  });
+
+  it('executeRegularTool result has NO preliminaryResults', async () => {
+    const result = await executeRegularTool(regularTool, toolCall, turnCtx);
+    expect(result).not.toHaveProperty('preliminaryResults');
+  });
+
+  it('executeGeneratorTool result HAS preliminaryResults array', async () => {
+    const result = await executeGeneratorTool(generatorTool, toolCall, turnCtx);
+    expect(result).toHaveProperty('preliminaryResults');
+    expect(Array.isArray(result.preliminaryResults)).toBe(true);
+  });
+});
diff --git a/tests/boundaries/output-item-guards.test.ts b/tests/boundaries/output-item-guards.test.ts
new file mode 100644
index 0000000..df5d29b
--- /dev/null
+++ b/tests/boundaries/output-item-guards.test.ts
@@ -0,0 +1,70 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  isFileSearchCallOutputItem,
+  isFunctionCallItem,
+  isImageGenerationCallOutputItem,
+  isOutputMessage,
+  isReasoningOutputItem,
+  isWebSearchCallOutputItem,
+} from '../../src/lib/stream-type-guards.js';
+
+const guards = [
+  {
+    name: 'isOutputMessage',
+    fn: isOutputMessage,
+    type: 'message',
+  },
+  {
+    name: 'isFunctionCallItem',
+    fn: isFunctionCallItem,
+    type: 'function_call',
+  },
+  {
+    name: 'isReasoningOutputItem',
+    fn: isReasoningOutputItem,
+    type: 'reasoning',
+  },
+  {
+    name: 'isWebSearchCallOutputItem',
+    fn: isWebSearchCallOutputItem,
+    type: 'web_search_call',
+  },
+  {
+    name: 'isFileSearchCallOutputItem',
+    fn: isFileSearchCallOutputItem,
+    type: 'file_search_call',
+  },
+  {
+    name: 'isImageGenerationCallOutputItem',
+    fn: isImageGenerationCallOutputItem,
+    type: 'image_generation_call',
+  },
+] as const;
+
+describe('Output item type guards - mutual exclusion', () => {
+  for (const guard of guards) {
+    describe(guard.name, () => {
+      it(`returns true for its own item type: ${guard.type}`, () => {
+        const item = {
+          type: guard.type,
+        };
+        expect(guard.fn(item)).toBe(true);
+      });
+
+      it('returns false for at least one other output item type', () => {
+        const other = guards.find((g) => g.type !== guard.type)!;
+        const item = {
+          type: other.type,
+        };
+        expect(guard.fn(item)).toBe(false);
+      });
+
+      it('returns false for null, undefined, and primitive', () => {
+        expect(guard.fn(null)).toBe(false);
+        expect(guard.fn(undefined)).toBe(false);
+        expect(guard.fn(42)).toBe(false);
+      });
+    });
+  }
+});
diff --git a/tests/boundaries/response-stream-event-guards.test.ts b/tests/boundaries/response-stream-event-guards.test.ts
new file mode 100644
index 0000000..27e8fdc
--- /dev/null
+++ b/tests/boundaries/response-stream-event-guards.test.ts
@@ -0,0 +1,59 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  isToolCallOutputEvent,
+  isToolPreliminaryResultEvent,
+  isToolResultEvent,
+  isTurnEndEvent,
+  isTurnStartEvent,
+} from '../../src/lib/tool-types.js';
+
+describe('ResponseStreamEvent guards - mutual exclusion', () => {
+  it('isToolPreliminaryResultEvent rejects { type: "tool.result" }', () => {
+    const event = {
+      type: 'tool.result',
+      toolCallId: 'c1',
+      result: 42,
+      timestamp: 1,
+    };
+    expect(isToolPreliminaryResultEvent(event)).toBe(false);
+  });
+
+  it('isToolResultEvent rejects { type: "tool.preliminary_result" }', () => {
+    const event = {
+      type: 'tool.preliminary_result',
+      toolCallId: 'c1',
+      result: 42,
+      timestamp: 1,
+    };
+    expect(isToolResultEvent(event)).toBe(false);
+  });
+
+  it('isTurnStartEvent rejects { type: "turn.end" }', () => {
+    const event = {
+      type: 'turn.end',
+      turnNumber: 1,
+      timestamp: 1,
+    };
+    expect(isTurnStartEvent(event)).toBe(false);
+  });
+
+  it('isTurnEndEvent rejects { type: "turn.start" }', () => {
+    const event = {
+      type: 'turn.start',
+      turnNumber: 1,
+      timestamp: 1,
+    };
+    expect(isTurnEndEvent(event)).toBe(false);
+  });
+
+  it('isToolCallOutputEvent rejects { type: "tool.result" }', () => {
+    const event = {
+      type: 'tool.result',
+      toolCallId: 'c1',
+      result: 42,
+      timestamp: 1,
+    };
+    expect(isToolCallOutputEvent(event)).toBe(false);
+  });
+});
diff --git a/tests/boundaries/stream-event-guards.test.ts b/tests/boundaries/stream-event-guards.test.ts
new file mode 100644
index 0000000..5fef197
--- /dev/null
+++ b/tests/boundaries/stream-event-guards.test.ts
@@ -0,0 +1,97 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  isFunctionCallArgumentsDeltaEvent,
+  isFunctionCallArgumentsDoneEvent,
+  isOutputItemAddedEvent,
+  isOutputItemDoneEvent,
+  isOutputTextDeltaEvent,
+  isReasoningDeltaEvent,
+  isResponseCompletedEvent,
+  isResponseFailedEvent,
+  isResponseIncompleteEvent,
+} from '../../src/lib/stream-type-guards.js';
+import { makeRequest } from '../test-constants.js';
+
+const guards = [
+  {
+    name: 'isOutputTextDeltaEvent',
+    fn: isOutputTextDeltaEvent,
+    type: 'response.output_text.delta',
+  },
+  {
+    name: 'isReasoningDeltaEvent',
+    fn: isReasoningDeltaEvent,
+    type: 'response.reasoning_text.delta',
+  },
+  {
+    name: 'isFunctionCallArgumentsDeltaEvent',
+    fn: isFunctionCallArgumentsDeltaEvent,
+    type: 'response.function_call_arguments.delta',
+  },
+  {
+    name: 'isOutputItemAddedEvent',
+    fn: isOutputItemAddedEvent,
+    type: 'response.output_item.added',
+  },
+  {
+    name: 'isOutputItemDoneEvent',
+    fn: isOutputItemDoneEvent,
+    type: 'response.output_item.done',
+  },
+  {
+    name: 'isResponseCompletedEvent',
+    fn: isResponseCompletedEvent,
+    type: 'response.completed',
+  },
+  {
+    name: 'isResponseFailedEvent',
+    fn: isResponseFailedEvent,
+    type: 'response.failed',
+  },
+  {
+    name: 'isResponseIncompleteEvent',
+    fn: isResponseIncompleteEvent,
+    type: 'response.incomplete',
+  },
+  {
+    name: 'isFunctionCallArgumentsDoneEvent',
+    fn: isFunctionCallArgumentsDoneEvent,
+    type: 'response.function_call_arguments.done',
+  },
+] as const;
+
+describe('Stream event type guards - mutual exclusion', () => {
+  for (const guard of guards) {
+    describe(guard.name, () => {
+      it(`returns true for its own event type: ${guard.type}`, () => {
+        const event = {
+          type: guard.type,
+        };
+        expect(guard.fn(event)).toBe(true);
+      });
+
+      it('returns false for at least one other stream event type', () => {
+        const other = guards.find((g) => g.type !== guard.type)!;
+        const event = {
+          type: other.type,
+        };
+        expect(guard.fn(event)).toBe(false);
+      });
+
+      it('returns false for objects missing type or with wrong type', () => {
+        expect(guard.fn(makeRequest({}))).toBe(false);
+        expect(
+          guard.fn({
+            type: 'unrelated.event',
+          } as unknown as StreamEvents),
+        ).toBe(false);
+        expect(
+          guard.fn({
+            type: '',
+          } as unknown as StreamEvents),
+        ).toBe(false);
+      });
+    });
+  }
+});
diff --git a/tests/boundaries/tool-type-guards.test.ts b/tests/boundaries/tool-type-guards.test.ts
new file mode 100644
index 0000000..83dff73
--- /dev/null
+++ b/tests/boundaries/tool-type-guards.test.ts
@@ -0,0 +1,86 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import {
+  hasExecuteFunction,
+  isGeneratorTool,
+  isManualTool,
+  isRegularExecuteTool,
+} from '../../src/lib/tool-types.js';
+
+describe('Tool type guards - mutual exclusion across 4 classifiers', () => {
+  const regularTool = tool({
+    name: 'regular',
+    description: 'A regular tool',
+    inputSchema: z.object({
+      x: z.number(),
+    }),
+    execute: async (args) => args.x * 2,
+  });
+
+  const generatorTool = tool({
+    name: 'generator',
+    description: 'A generator tool',
+    inputSchema: z.object({
+      x: z.number(),
+    }),
+    eventSchema: z.object({
+      progress: z.number(),
+    }),
+    outputSchema: z.object({
+      result: z.number(),
+    }),
+    execute: async function* (args) {
+      yield {
+        progress: 50,
+      };
+      return {
+        result: args.x * 2,
+      };
+    },
+  });
+
+  const manualTool = tool({
+    name: 'manual',
+    description: 'A manual tool',
+    inputSchema: z.object({
+      x: z.number(),
+    }),
+    execute: false,
+  });
+
+  it('regular tool: hasExecuteFunction=T, isRegularExecuteTool=T, isGeneratorTool=F, isManualTool=F', () => {
+    expect(hasExecuteFunction(regularTool)).toBe(true);
+    expect(isRegularExecuteTool(regularTool)).toBe(true);
+    expect(isGeneratorTool(regularTool)).toBe(false);
+    expect(isManualTool(regularTool)).toBe(false);
+  });
+
+  it('generator tool: hasExecuteFunction=T, isRegularExecuteTool=F, isGeneratorTool=T, isManualTool=F', () => {
+    expect(hasExecuteFunction(generatorTool)).toBe(true);
+    expect(isRegularExecuteTool(generatorTool)).toBe(false);
+    expect(isGeneratorTool(generatorTool)).toBe(true);
+    expect(isManualTool(generatorTool)).toBe(false);
+  });
+
+  it('manual tool: hasExecuteFunction=F, isRegularExecuteTool=F, isGeneratorTool=F, isManualTool=T', () => {
+    expect(hasExecuteFunction(manualTool)).toBe(false);
+    expect(isRegularExecuteTool(manualTool)).toBe(false);
+    expect(isGeneratorTool(manualTool)).toBe(false);
+    expect(isManualTool(manualTool)).toBe(true);
+  });
+
+  it('no tool satisfies both isRegularExecuteTool and isGeneratorTool', () => {
+    const allTools = [
+      regularTool,
+      generatorTool,
+      manualTool,
+    ];
+    for (const t of allTools) {
+      const isRegular = isRegularExecuteTool(t);
+      const isGenerator = isGeneratorTool(t);
+      expect(isRegular && isGenerator).toBe(false);
+    }
+  });
+});
diff --git a/tests/composition/README.md b/tests/composition/README.md
new file mode 100644
index 0000000..fe8db71
--- /dev/null
+++ b/tests/composition/README.md
@@ -0,0 +1,25 @@
+# Composition Tests
+
+Tests in this folder verify that **capabilities compose** — the output of one module is accepted as input by the next. No detailed correctness of individual outputs; just: do they connect?
+
+## What belongs here
+
+- Module A's output shape is accepted by Module B's input
+- Data flows through a two-module chain without errors
+- Multi-consumer scenarios where the same source feeds multiple consumers
+- Cross-module contract verification (e.g., orchestrator utilities consume real executor results)
+- Extendable: when new modules are introduced, their connection tests with existing modules go here
+
+## Examples
+
+- `tool()` output is accepted by `isRegularExecuteTool` / `convertToolsToAPIFormat`
+- Two `ReusableReadableStream` consumers both receive all items
+- `executeNextTurnParamsFunctions` output is accepted by `applyNextTurnParamsToRequest`
+- `createUnsentResult` output is accepted by `unsentResultsToAPIFormat`
+- `partitionToolCalls` internally uses `toolRequiresApproval`
+
+## What does NOT belong here
+
+- Verifying the detailed correctness of each module's output (→ `behavior/`)
+- Verifying that similar modules don't accept each other's input (→ `boundaries/`)
+- Full multi-layer pipelines with per-layer assertions (→ `pipelines/`)
diff --git a/tests/composition/context-flow.test.ts b/tests/composition/context-flow.test.ts
new file mode 100644
index 0000000..caba3ce
--- /dev/null
+++ b/tests/composition/context-flow.test.ts
@@ -0,0 +1,46 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import {
+  buildToolExecuteContext,
+  resolveContext,
+  ToolContextStore,
+} from '../../src/lib/tool-context.js';
+import { buildTurnContext } from '../../src/lib/turn-context.js';
+
+describe('Context flow: turn context -> tool execute context -> tool function', () => {
+  it('buildToolExecuteContext receives TurnContext from buildTurnContext -> tool execute receives correct numberOfTurns', () => {
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 3,
+    });
+    const store = new ToolContextStore();
+
+    const execCtx = buildToolExecuteContext(turnCtx, store, 'test', undefined);
+    expect(execCtx.numberOfTurns).toBe(3);
+  });
+
+  it('resolveContext passes TurnContext to context function -> result populates ToolContextStore -> buildToolExecuteContext.local reads from store', async () => {
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 2,
+    });
+    const contextFn = (ctx: { numberOfTurns: number }) => ({
+      apiKey: `key-for-turn-${ctx.numberOfTurns}`,
+    });
+
+    const resolved = await resolveContext(contextFn, turnCtx);
+    expect(resolved).toEqual({
+      apiKey: 'key-for-turn-2',
+    });
+
+    const store = new ToolContextStore({
+      test: resolved,
+    });
+    const contextSchema = z.object({
+      apiKey: z.string(),
+    });
+
+    const execCtx = buildToolExecuteContext(turnCtx, store, 'test', contextSchema);
+    expect(execCtx.local).toEqual({
+      apiKey: 'key-for-turn-2',
+    });
+  });
+});
diff --git a/tests/composition/state-machine.test.ts b/tests/composition/state-machine.test.ts
new file mode 100644
index 0000000..c1e3d0f
--- /dev/null
+++ b/tests/composition/state-machine.test.ts
@@ -0,0 +1,76 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import {
+  createRejectedResult,
+  createUnsentResult,
+  partitionToolCalls,
+  unsentResultsToAPIFormat,
+} from '../../src/lib/conversation-state.js';
+
+describe('State machine: state -> approval -> resumption', () => {
+  it('partitionToolCalls uses toolRequiresApproval internally -> partitioned results are consistent', async () => {
+    const approvalTool = tool({
+      name: 'dangerous',
+      inputSchema: z.object({
+        target: z.string(),
+      }),
+      requireApproval: true,
+      execute: async () => 'deleted',
+    });
+
+    const safeTool = tool({
+      name: 'safe',
+      inputSchema: z.object({
+        q: z.string(),
+      }),
+      execute: async () => 'result',
+    });
+
+    const toolCalls = [
+      {
+        id: 'tc_1',
+        name: 'dangerous',
+        arguments: {
+          target: 'file.txt',
+        },
+      },
+      {
+        id: 'tc_2',
+        name: 'safe',
+        arguments: {
+          q: 'hello',
+        },
+      },
+    ];
+
+    const tools = [
+      approvalTool,
+      safeTool,
+    ];
+    const partition = await partitionToolCalls(toolCalls, tools);
+
+    expect(partition.requiresApproval).toHaveLength(1);
+    expect(partition.autoExecute).toHaveLength(1);
+    expect(partition.requiresApproval[0]!.name).toBe('dangerous');
+    expect(partition.autoExecute[0]!.name).toBe('safe');
+  });
+
+  it('createUnsentResult / createRejectedResult output accepted by unsentResultsToAPIFormat', () => {
+    const unsent = createUnsentResult('tc_1', 'search', {
+      data: 'found',
+    });
+    const rejected = createRejectedResult('tc_2', 'delete');
+
+    const formatted = unsentResultsToAPIFormat([
+      unsent,
+      rejected,
+    ]);
+    expect(formatted).toHaveLength(2);
+    expect(formatted[0]!.callId).toBe('tc_1');
+    expect(formatted[0]!.type).toBe('function_call_output');
+    expect(formatted[1]!.callId).toBe('tc_2');
+    expect(formatted[1]!.type).toBe('function_call_output');
+  });
+});
diff --git a/tests/composition/tool-lifecycle.test.ts b/tests/composition/tool-lifecycle.test.ts
new file mode 100644
index 0000000..28d1f95
--- /dev/null
+++ b/tests/composition/tool-lifecycle.test.ts
@@ -0,0 +1,98 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import {
+  convertToolsToAPIFormat,
+  executeTool,
+  findToolByName,
+} from '../../src/lib/tool-executor.js';
+import { isGeneratorTool, isManualTool, isRegularExecuteTool } from '../../src/lib/tool-types.js';
+
+describe('Tool lifecycle: definition -> classification -> execution', () => {
+  const regularTool = tool({
+    name: 'add',
+    description: 'Add numbers',
+    inputSchema: z.object({
+      a: z.number(),
+      b: z.number(),
+    }),
+    execute: async (args) => args.a + args.b,
+  });
+
+  const generatorTool = tool({
+    name: 'stream_add',
+    description: 'Stream add',
+    inputSchema: z.object({
+      a: z.number(),
+      b: z.number(),
+    }),
+    eventSchema: z.object({
+      progress: z.number(),
+    }),
+    outputSchema: z.object({
+      sum: z.number(),
+    }),
+    execute: async function* (args) {
+      yield {
+        progress: 50,
+      };
+      return {
+        sum: args.a + args.b,
+      };
+    },
+  });
+
+  const manualTool = tool({
+    name: 'manual_op',
+    description: 'Manual tool',
+    inputSchema: z.object({
+      x: z.string(),
+    }),
+    execute: false,
+  });
+
+  it('tool() output is accepted by isRegularExecuteTool / isGeneratorTool / isManualTool', () => {
+    expect(isRegularExecuteTool(regularTool)).toBe(true);
+    expect(isGeneratorTool(generatorTool)).toBe(true);
+    expect(isManualTool(manualTool)).toBe(true);
+  });
+
+  it('tool() output is accepted by convertToolsToAPIFormat', () => {
+    const apiTools = convertToolsToAPIFormat([
+      regularTool,
+      generatorTool,
+      manualTool,
+    ]);
+    expect(apiTools).toHaveLength(3);
+    expect(apiTools[0]!.name).toBe('add');
+    expect(apiTools[0]!.type).toBe('function');
+    expect(apiTools[1]!.name).toBe('stream_add');
+    expect(apiTools[2]!.name).toBe('manual_op');
+  });
+
+  it('extractToolCallsFromResponse output shape is accepted by findToolByName + executeTool', async () => {
+    const tools = [
+      regularTool,
+      generatorTool,
+      manualTool,
+    ];
+    const toolCallShape = {
+      id: 'tc_1',
+      name: 'add',
+      arguments: {
+        a: 1,
+        b: 2,
+      },
+    };
+
+    const found = findToolByName(tools, toolCallShape.name);
+    expect(found).toBeDefined();
+
+    const result = await executeTool(found!, toolCallShape, {
+      numberOfTurns: 1,
+    });
+    expect(result.toolCallId).toBe('tc_1');
+    expect(result.result).toBe(3);
+  });
+});
diff --git a/tests/contracts/README.md b/tests/contracts/README.md
new file mode 100644
index 0000000..f8b468b
--- /dev/null
+++ b/tests/contracts/README.md
@@ -0,0 +1,26 @@
+# Contracts Tests
+
+Tests in this folder verify that a capability **works as specified AND its boundary with similar capabilities is correct**. Both the "what it does" and the "what it does NOT do" are asserted in the same test.
+
+## What belongs here
+
+- Stop conditions that check their own criterion AND ignore all others
+- Delta extractors that yield their event type AND skip peer event types
+- Message stream builders that produce their format AND differ structurally from peers
+- Executor functions that handle their tool type AND reject the other type
+- Async param resolution where static, function, and client-only fields are each handled distinctly
+- Extendable: when a capability gains new peers or alternatives, their combined behavior-and-boundary tests go here
+
+## Examples
+
+- `stepCountIs(3)` returns true at 3 steps AND ignores tool names, tokens, cost, finishReason
+- `extractTextDeltas` yields text deltas AND skips reasoning + tool deltas in the same stream
+- `executeRegularTool` succeeds with regular tools AND throws when given a generator tool
+- `resolveAsyncFunctions` passes static values through, resolves functions, AND strips client-only fields
+- `fromClaudeMessages` maps text blocks to messages, tool_use to function calls, each distinctly
+
+## What does NOT belong here
+
+- Pure specification without boundary checking (→ `behavior/`)
+- Pure boundary checking without verifying output correctness (→ `boundaries/`)
+- Cross-module composition (→ `integration/` or `pipelines/`)
diff --git a/tests/contracts/async-params.test.ts b/tests/contracts/async-params.test.ts
new file mode 100644
index 0000000..5468739
--- /dev/null
+++ b/tests/contracts/async-params.test.ts
@@ -0,0 +1,106 @@
+import { describe, expect, it } from 'vitest';
+
+import { resolveAsyncFunctions } from '../../src/lib/async-params.js';
+import { makeCallModelInput, makeTurnContext, TEST_MODEL } from '../test-constants.js';
+
+describe('resolveAsyncFunctions - three field types handled distinctly', () => {
+  const turnCtx = makeTurnContext({
+    numberOfTurns: 2,
+  });
+
+  it('static values (model, temperature as literals) -> passed through unchanged', async () => {
+    const result = await resolveAsyncFunctions(
+      makeCallModelInput({
+        model: TEST_MODEL,
+        temperature: 0.7,
+      }),
+      turnCtx,
+    );
+    expect(result.model).toBe(TEST_MODEL);
+    expect(result.temperature).toBe(0.7);
+  });
+
+  it('function values -> resolved by calling with context, result stored', async () => {
+    const result = await resolveAsyncFunctions(
+      makeCallModelInput({
+        temperature: (ctx: { numberOfTurns: number }) => ctx.numberOfTurns * 0.1,
+      }),
+      turnCtx,
+    );
+    expect(result.temperature).toBe(0.2);
+  });
+
+  it('client-only fields (stopWhen, state, requireApproval, context, onTurnStart, onTurnEnd) -> stripped entirely', async () => {
+    const result = await resolveAsyncFunctions(
+      makeCallModelInput({
+        model: TEST_MODEL,
+        stopWhen: () => true,
+        state: {
+          get: () => null,
+        },
+        requireApproval: () => false,
+        context: {
+          shared: {},
+        },
+        onTurnStart: () => {},
+        onTurnEnd: () => {},
+      }),
+      turnCtx,
+    );
+    expect(result).not.toHaveProperty('stopWhen');
+    expect(result).not.toHaveProperty('state');
+    expect(result).not.toHaveProperty('requireApproval');
+    expect(result).not.toHaveProperty('context');
+    expect(result).not.toHaveProperty('onTurnStart');
+    expect(result).not.toHaveProperty('onTurnEnd');
+    expect(result.model).toBe(TEST_MODEL);
+  });
+
+  it('tools field -> preserved (exception to client-only stripping)', async () => {
+    const tools = [
+      {
+        type: 'function',
+        function: {
+          name: 'test',
+        },
+      },
+    ];
+    const result = await resolveAsyncFunctions(
+      makeCallModelInput({
+        model: TEST_MODEL,
+        tools,
+      }),
+      turnCtx,
+    );
+    expect(result).toHaveProperty('tools');
+  });
+
+  it('function error -> wraps with field name context', async () => {
+    await expect(
+      resolveAsyncFunctions(
+        makeCallModelInput({
+          temperature: () => {
+            throw new Error('boom');
+          },
+        }),
+        turnCtx,
+      ),
+    ).rejects.toThrow('Failed to resolve async function for field "temperature"');
+  });
+
+  it('mix of static + function + client-only in one call -> all handled correctly', async () => {
+    const result = await resolveAsyncFunctions(
+      makeCallModelInput({
+        model: TEST_MODEL,
+        temperature: (ctx: { numberOfTurns: number }) => ctx.numberOfTurns * 0.1,
+        stopWhen: () => true,
+        input: 'hello',
+      }),
+      turnCtx,
+    );
+    expect(result.model).toBe(TEST_MODEL);
+    expect(result.temperature).toBe(0.2);
+    expect(result).not.toHaveProperty('stopWhen');
+    expect(result.input).toBe('hello');
+  });
+});
diff --git a/tests/contracts/conversation-state-results.test.ts b/tests/contracts/conversation-state-results.test.ts
new file mode 100644
index 0000000..c7a4e11
--- /dev/null
+++ b/tests/contracts/conversation-state-results.test.ts
@@ -0,0 +1,51 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  createRejectedResult,
+  createUnsentResult,
+  unsentResultsToAPIFormat,
+} from '../../src/lib/conversation-state.js';
+
+describe('Conversation state utilities - distinct result types', () => {
+  it('createUnsentResult output has output (value) but no error', () => {
+    const result = createUnsentResult('c1', 'search', {
+      data: 'found',
+    });
+    expect(result.output).toEqual({
+      data: 'found',
+    });
+    expect(result).not.toHaveProperty('error');
+  });
+
+  it('createRejectedResult output has output: null AND error string', () => {
+    const result = createRejectedResult('c1', 'delete');
+    expect(result.output).toBeNull();
+    expect(result.error).toBe('Tool call rejected by user');
+  });
+
+  it('unsentResultsToAPIFormat: success result -> output is JSON.stringify(output)', () => {
+    const unsent = createUnsentResult('c1', 'search', {
+      data: 'found',
+    });
+    const formatted = unsentResultsToAPIFormat([
+      unsent,
+    ]);
+    expect(formatted[0]!.output).toBe(
+      JSON.stringify({
+        data: 'found',
+      }),
+    );
+  });
+
+  it('unsentResultsToAPIFormat: error result -> output is JSON.stringify({ error })', () => {
+    const rejected = createRejectedResult('c1', 'delete', 'Not allowed');
+    const formatted = unsentResultsToAPIFormat([
+      rejected,
+    ]);
+    expect(formatted[0]!.output).toBe(
+      JSON.stringify({
+        error: 'Not allowed',
+      }),
+    );
+  });
+});
diff --git a/tests/contracts/delta-extractors.test.ts b/tests/contracts/delta-extractors.test.ts
new file mode 100644
index 0000000..fb36514
--- /dev/null
+++ b/tests/contracts/delta-extractors.test.ts
@@ -0,0 +1,111 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import {
+  extractReasoningDeltas,
+  extractTextDeltas,
+  extractToolDeltas,
+} from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collect(iter: AsyncIterable<string>): Promise<string[]> {
+  const result: string[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+describe('Delta extractors - each yields ONLY its event type', () => {
+  const mixedEvents = [
+    {
+      type: 'response.output_text.delta',
+      delta: 'hello',
+    },
+    {
+      type: 'response.reasoning_text.delta',
+      delta: 'thinking',
+    },
+    {
+      type: 'response.function_call_arguments.delta',
+      delta: '{"q":',
+    },
+    {
+      type: 'response.output_text.delta',
+      delta: ' world',
+    },
+    {
+      type: 'response.reasoning_text.delta',
+      delta: ' more',
+    },
+    {
+      type: 'response.function_call_arguments.delta',
+      delta: '"test"}',
+    },
+  ];
+
+  it('extractTextDeltas yields strings from output_text.delta events; reasoning + tool deltas ignored', async () => {
+    const stream = makeStream(mixedEvents);
+    const result = await collect(extractTextDeltas(stream));
+    expect(result).toEqual([
+      'hello',
+      ' world',
+    ]);
+  });
+
+  it('extractReasoningDeltas yields strings from reasoning_text.delta events; ignores text + tool', async () => {
+    const stream = makeStream(mixedEvents);
+    const result = await collect(extractReasoningDeltas(stream));
+    expect(result).toEqual([
+      'thinking',
+      ' more',
+    ]);
+  });
+
+  it('extractToolDeltas yields strings from function_call_arguments.delta events; ignores text + reasoning', async () => {
+    const stream = makeStream(mixedEvents);
+    const result = await collect(extractToolDeltas(stream));
+    expect(result).toEqual([
+      '{"q":',
+      '"test"}',
+    ]);
+  });
+
+  it('extractTextDeltas skips events with empty/undefined delta', async () => {
+    const events = [
+      {
+        type: 'response.output_text.delta',
+        delta: 'hello',
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: '',
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: undefined,
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: ' world',
+      },
+    ];
+    const stream = makeStream(events);
+    const result = await collect(extractTextDeltas(stream));
+    expect(result).toEqual([
+      'hello',
+      ' world',
+    ]);
+  });
+});
diff --git a/tests/contracts/from-claude-messages.test.ts b/tests/contracts/from-claude-messages.test.ts
new file mode 100644
index 0000000..56d5c29
--- /dev/null
+++ b/tests/contracts/from-claude-messages.test.ts
@@ -0,0 +1,91 @@
+import type * as models from '@openrouter/sdk/models';
+import { describe, expect, it } from 'vitest';
+
+import { fromClaudeMessages } from '../../src/lib/anthropic-compat.js';
+
+describe('fromClaudeMessages - each block type maps distinctly', () => {
+  it('text blocks -> EasyInputMessage (not function_call_output, not function_call)', () => {
+    const result = fromClaudeMessages([
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'text' as const,
+            text: 'Hello',
+          },
+        ],
+      },
+    ]);
+    const items = result as models.OutputItems[];
+    expect(items).toHaveLength(1);
+    expect(items[0]).toHaveProperty('role');
+    expect(items[0]).toHaveProperty('content', 'Hello');
+    expect(items[0]).not.toHaveProperty('type');
+  });
+
+  it('tool_use blocks -> FunctionCallItem (not EasyInputMessage, not function_call_output)', () => {
+    const result = fromClaudeMessages([
+      {
+        role: 'assistant',
+        content: [
+          {
+            type: 'tool_use' as const,
+            id: 'tu_1',
+            name: 'search',
+            input: {
+              q: 'test',
+            },
+          },
+        ],
+      },
+    ]);
+    const items = result as models.OutputItems[];
+    const toolItem = items.find((i) => i.type === 'function_call');
+    expect(toolItem).toBeDefined();
+    expect(toolItem.name).toBe('search');
+    expect(toolItem.callId).toBe('tu_1');
+  });
+
+  it('tool_result blocks -> FunctionCallOutputItem (not EasyInputMessage, not function_call)', () => {
+    const result = fromClaudeMessages([
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'tool_result' as const,
+            tool_use_id: 'tu_1',
+            content: 'Search result',
+          },
+        ],
+      },
+    ]);
+    const items = result as models.OutputItems[];
+    const outputItem = items.find((i) => i.type === 'function_call_output');
+    expect(outputItem).toBeDefined();
+    expect(outputItem.callId).toBe('tu_1');
+    expect(outputItem.output).toBe('Search result');
+  });
+
+  it('image blocks -> structured content EasyInputMessage (not input_image alone)', () => {
+    const result = fromClaudeMessages([
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'image' as const,
+            source: {
+              type: 'url' as const,
+              url: 'https://example.com/img.png',
+            },
+          },
+        ],
+      },
+    ]);
+    const items = result as models.OutputItems[];
+    expect(items).toHaveLength(1);
+    expect(items[0]).toHaveProperty('role');
+    expect(items[0]).toHaveProperty('content');
+    expect(Array.isArray(items[0].content)).toBe(true);
+    expect(items[0].content[0].type).toBe('input_image');
+  });
+});
diff --git a/tests/contracts/items-stream.test.ts b/tests/contracts/items-stream.test.ts
new file mode 100644
index 0000000..bd0d6de
--- /dev/null
+++ b/tests/contracts/items-stream.test.ts
@@ -0,0 +1,286 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import { buildItemsStream } from '../../src/lib/stream-transformers.js';
+
+function makeStream(
+  events: Record<string, unknown>[],
+): ReusableReadableStream<Record<string, unknown>> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collectAll<T>(iter: AsyncIterable<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+describe('buildItemsStream - yields distinct item types per event', () => {
+  it('message items: accumulated text from text deltas', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hello',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: ' world',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    const lastMsg = items.filter((i) => i.type === 'message').pop()!;
+    expect(
+      (
+        lastMsg as {
+          content: Array<{
+            text: string;
+          }>;
+        }
+      ).content[0].text,
+    ).toBe('Hello world');
+  });
+
+  it('function_call items: accumulated arguments from function_call deltas', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'function_call',
+          id: 'fc_1',
+          callId: 'fc_1',
+          name: 'search',
+          arguments: '',
+          status: 'in_progress',
+        },
+      },
+      {
+        type: 'response.function_call_arguments.delta',
+        delta: '{"q":',
+        itemId: 'fc_1',
+      },
+      {
+        type: 'response.function_call_arguments.delta',
+        delta: '"test"}',
+        itemId: 'fc_1',
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    const lastFn = items.filter((i) => i.type === 'function_call').pop()!;
+    expect(
+      (
+        lastFn as {
+          arguments: string;
+        }
+      ).arguments,
+    ).toBe('{"q":"test"}');
+  });
+
+  it('reasoning items: accumulated content from reasoning deltas', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'reasoning',
+          id: 'r_1',
+          status: 'in_progress',
+          summary: [],
+        },
+      },
+      {
+        type: 'response.reasoning_text.delta',
+        delta: 'thinking',
+        itemId: 'r_1',
+      },
+      {
+        type: 'response.reasoning_text.delta',
+        delta: ' more',
+        itemId: 'r_1',
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    const lastReasoning = items.filter((i) => i.type === 'reasoning').pop()!;
+    expect(
+      (
+        lastReasoning as {
+          summary: Array<{
+            text: string;
+          }>;
+        }
+      ).summary[0].text,
+    ).toBe('thinking more');
+  });
+
+  it('server tool items (web_search_call, file_search_call, image_generation_call): passthrough', async () => {
+    const webSearch = {
+      type: 'web_search_call',
+      id: 'ws_1',
+      status: 'completed',
+    };
+    const fileSearch = {
+      type: 'file_search_call',
+      id: 'fs_1',
+      status: 'completed',
+    };
+    const imageGen = {
+      type: 'image_generation_call',
+      id: 'ig_1',
+      status: 'completed',
+    };
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: webSearch,
+      },
+      {
+        type: 'response.output_item.added',
+        item: fileSearch,
+      },
+      {
+        type: 'response.output_item.added',
+        item: imageGen,
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    const types = items.map((i) => i.type);
+    expect(types).toContain('web_search_call');
+    expect(types).toContain('file_search_call');
+    expect(types).toContain('image_generation_call');
+  });
+
+  it('final complete items from output_item.done events', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hi',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.output_item.done',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'completed',
+          content: [
+            {
+              type: 'output_text',
+              text: 'Hi',
+              annotations: [],
+            },
+          ],
+        },
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    const doneItem = items[items.length - 1]!;
+    expect(
+      (
+        doneItem as {
+          status: string;
+        }
+      ).status,
+    ).toBe('completed');
+  });
+
+  it('termination events (completed/failed/incomplete) -> stream stops', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hi',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+      // These should never be reached
+      {
+        type: 'response.output_text.delta',
+        delta: 'SHOULD NOT APPEAR',
+        itemId: 'msg_1',
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    const allText = items
+      .filter((i) => i.type === 'message')
+      .map(
+        (i) =>
+          (
+            i as {
+              content?: Array<{
+                text?: string;
+              }>;
+            }
+          ).content?.[0]?.text ?? '',
+      );
+    expect(allText.join('')).not.toContain('SHOULD NOT APPEAR');
+  });
+});
diff --git a/tests/contracts/message-stream-builders.test.ts b/tests/contracts/message-stream-builders.test.ts
new file mode 100644
index 0000000..9710bb8
--- /dev/null
+++ b/tests/contracts/message-stream-builders.test.ts
@@ -0,0 +1,118 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import {
+  buildMessageStream,
+  buildResponsesMessageStream,
+} from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collectAll<T>(iter: AsyncIterable<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+const streamEvents = [
+  {
+    type: 'response.output_item.added',
+    item: {
+      type: 'message',
+      id: 'msg_1',
+      role: 'assistant',
+      status: 'in_progress',
+      content: [],
+    },
+  },
+  {
+    type: 'response.output_text.delta',
+    delta: 'Hello',
+    itemId: 'msg_1',
+  },
+  {
+    type: 'response.output_text.delta',
+    delta: ' world',
+    itemId: 'msg_1',
+  },
+  {
+    type: 'response.output_item.done',
+    item: {
+      type: 'message',
+      id: 'msg_1',
+      role: 'assistant',
+      status: 'completed',
+      content: [
+        {
+          type: 'output_text',
+          text: 'Hello world',
+          annotations: [],
+        },
+      ],
+    },
+  },
+  {
+    type: 'response.completed',
+    response: {},
+  },
+];
+
+describe('Message stream builders - same input, structurally distinct outputs', () => {
+  it('buildResponsesMessageStream yields OutputMessage: { id, type: "message", role: "assistant", content: [...] }', async () => {
+    const stream = makeStream(streamEvents);
+    const results = await collectAll(buildResponsesMessageStream(stream));
+    expect(results.length).toBeGreaterThan(0);
+    const last = results[results.length - 1]!;
+    expect(last).toHaveProperty('id');
+    expect(last).toHaveProperty('type', 'message');
+    expect(last).toHaveProperty('role', 'assistant');
+    expect(last).toHaveProperty('content');
+    expect(Array.isArray(last.content)).toBe(true);
+  });
+
+  it('buildMessageStream yields ChatAssistantMessage: { role: "assistant", content: string }', async () => {
+    const stream = makeStream(streamEvents);
+    const results = await collectAll(buildMessageStream(stream));
+    expect(results.length).toBeGreaterThan(0);
+    const last = results[results.length - 1]!;
+    expect(last).toHaveProperty('role', 'assistant');
+    expect(typeof last.content).toBe('string');
+    expect(last).not.toHaveProperty('id');
+    expect(last).not.toHaveProperty('type');
+  });
+
+  it('same stream events -> both produce same text content but structurally different objects', async () => {
+    const stream1 = makeStream(streamEvents);
+    const stream2 = makeStream(streamEvents);
+
+    const responsesResults = await collectAll(buildResponsesMessageStream(stream1));
+    const chatResults = await collectAll(buildMessageStream(stream2));
+
+    const responsesLast = responsesResults[responsesResults.length - 1]!;
+    const chatLast = chatResults[chatResults.length - 1]!;
+
+    // Same text content
+    const responsesText = responsesLast.content
+      .filter((c: { type: string; text?: string }) => c.type === 'output_text')
+      .map((c: { type: string; text?: string }) => c.text)
+      .join('');
+    expect(responsesText).toBe('Hello world');
+    expect(chatLast.content).toBe('Hello world');
+
+    // Structurally different
+    expect('id' in responsesLast).toBe(true);
+    expect('id' in chatLast).toBe(false);
+  });
+});
diff --git a/tests/contracts/response-extractors.test.ts b/tests/contracts/response-extractors.test.ts
new file mode 100644
index 0000000..77a71f7
--- /dev/null
+++ b/tests/contracts/response-extractors.test.ts
@@ -0,0 +1,99 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  extractMessageFromResponse,
+  extractResponsesMessageFromResponse,
+} from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+function makeResponse(text: string) {
+  return {
+    id: 'r1',
+    output: [
+      {
+        type: 'message' as const,
+        id: 'msg_1',
+        role: 'assistant' as const,
+        status: 'completed' as const,
+        content: [
+          {
+            type: 'output_text' as const,
+            text,
+            annotations: [],
+          },
+        ],
+      },
+    ],
+    status: 'completed' as const,
+    outputText: text,
+    model: TEST_MODEL,
+    usage: {
+      totalTokens: 100,
+      inputTokens: 50,
+      outputTokens: 50,
+    },
+  };
+}
+
+describe('Response extractors - same response, distinct shapes', () => {
+  it('extractMessageFromResponse returns ChatAssistantMessage (role + content string)', () => {
+    const response = makeResponse('Hello world');
+    const msg = extractMessageFromResponse(response);
+    expect(msg.role).toBe('assistant');
+    expect(typeof msg.content).toBe('string');
+    expect(msg).not.toHaveProperty('id');
+    expect(msg).not.toHaveProperty('type');
+  });
+
+  it('extractResponsesMessageFromResponse returns OutputMessage (id + type + content array)', () => {
+    const response = makeResponse('Hello world');
+    const msg = extractResponsesMessageFromResponse(response);
+    expect(msg.id).toBe('msg_1');
+    expect(msg.type).toBe('message');
+    expect(Array.isArray(msg.content)).toBe(true);
+  });
+
+  it('same response -> both extract same text but structurally different objects', () => {
+    const response = makeResponse('Hello world');
+    const chatMsg = extractMessageFromResponse(response);
+    const responsesMsg = extractResponsesMessageFromResponse(response);
+
+    expect(chatMsg.content).toBe('Hello world');
+    const responsesText = responsesMsg.content
+      .filter((c: { type: string; text?: string }) => c.type === 'output_text')
+      .map((c: { type: string; text?: string }) => c.text)
+      .join('');
+    expect(responsesText).toBe('Hello world');
+
+    // Structurally different
+    expect('id' in chatMsg).toBe(false);
+    expect('id' in responsesMsg).toBe(true);
+  });
+
+  it('both throw when response has no message item', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'function_call' as const,
+          id: 'fc_1',
+          callId: 'fc_1',
+          name: 'search',
+          arguments: '{}',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: '',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    expect(() => extractMessageFromResponse(response)).toThrow('No message found');
+    expect(() => extractResponsesMessageFromResponse(response)).toThrow('No message found');
+  });
+});
diff --git a/tests/contracts/tool-call-response-consistency.test.ts b/tests/contracts/tool-call-response-consistency.test.ts
new file mode 100644
index 0000000..ac40989
--- /dev/null
+++ b/tests/contracts/tool-call-response-consistency.test.ts
@@ -0,0 +1,72 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  extractToolCallsFromResponse,
+  responseHasToolCalls,
+} from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+describe('responseHasToolCalls and extractToolCallsFromResponse produce consistent results', () => {
+  it('responseHasToolCalls returning true <-> extractToolCallsFromResponse returning non-empty', () => {
+    const responseWithTools = {
+      id: 'r1',
+      output: [
+        {
+          type: 'function_call' as const,
+          id: 'fc1',
+          callId: 'fc1',
+          name: 'search',
+          arguments: '{"q":"test"}',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: '',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const hasTools = responseHasToolCalls(responseWithTools);
+    const extracted = extractToolCallsFromResponse(responseWithTools);
+
+    expect(hasTools).toBe(true);
+    expect(extracted.length).toBeGreaterThan(0);
+
+    const responseNoTools = {
+      id: 'r2',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'm1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Hello',
+              annotations: [],
+            },
+          ],
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Hello',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const hasTools2 = responseHasToolCalls(responseNoTools);
+    const extracted2 = extractToolCallsFromResponse(responseNoTools);
+
+    expect(hasTools2).toBe(false);
+    expect(extracted2).toEqual([]);
+  });
+});
diff --git a/tests/contracts/tool-factory-shapes.test.ts b/tests/contracts/tool-factory-shapes.test.ts
new file mode 100644
index 0000000..cb70cc3
--- /dev/null
+++ b/tests/contracts/tool-factory-shapes.test.ts
@@ -0,0 +1,92 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+
+const inputSchema = z.object({
+  query: z.string(),
+});
+
+describe('tool() factory - three tool types produce distinct structures', () => {
+  it('regular tool has execute function, no eventSchema', () => {
+    const t = tool({
+      name: 'regular',
+      inputSchema,
+      execute: async () => 'done',
+    });
+    expect(t.function).toHaveProperty('execute');
+    expect(t.function).not.toHaveProperty('eventSchema');
+  });
+
+  it('generator tool has execute function AND eventSchema AND outputSchema', () => {
+    const t = tool({
+      name: 'generator',
+      inputSchema,
+      eventSchema: z.object({
+        status: z.string(),
+      }),
+      outputSchema: z.object({
+        result: z.string(),
+      }),
+      execute: async function* () {
+        yield {
+          status: 'working',
+        };
+        return {
+          result: 'done',
+        };
+      },
+    });
+    expect(t.function).toHaveProperty('execute');
+    expect(t.function).toHaveProperty('eventSchema');
+    expect(t.function).toHaveProperty('outputSchema');
+  });
+
+  it('manual tool has NO execute, no eventSchema, no outputSchema', () => {
+    const t = tool({
+      name: 'manual',
+      inputSchema,
+      execute: false,
+    });
+    expect(t.function).not.toHaveProperty('execute');
+    expect(t.function).not.toHaveProperty('eventSchema');
+    expect(t.function).not.toHaveProperty('outputSchema');
+  });
+
+  it('same input schema -> three different tool types depending on config', () => {
+    const regular = tool({
+      name: 'r',
+      inputSchema,
+      execute: async () => 'ok',
+    });
+    const generator = tool({
+      name: 'g',
+      inputSchema,
+      eventSchema: z.object({
+        s: z.string(),
+      }),
+      outputSchema: z.object({
+        r: z.string(),
+      }),
+      execute: async function* () {
+        return {
+          r: 'ok',
+        };
+      },
+    });
+    const manual = tool({
+      name: 'm',
+      inputSchema,
+      execute: false,
+    });
+
+    expect('execute' in regular.function).toBe(true);
+    expect('eventSchema' in regular.function).toBe(false);
+
+    expect('execute' in generator.function).toBe(true);
+    expect('eventSchema' in generator.function).toBe(true);
+
+    expect('execute' in manual.function).toBe(false);
+    expect('eventSchema' in manual.function).toBe(false);
+  });
+});
diff --git a/tests/dispatch/README.md b/tests/dispatch/README.md
new file mode 100644
index 0000000..dd6a3ba
--- /dev/null
+++ b/tests/dispatch/README.md
@@ -0,0 +1,25 @@
+# Dispatch Tests
+
+Tests in this folder verify that **boundaries between capabilities hold at their composition points**. The correct path is chosen AND the modules connect. This is where routing logic meets module integration.
+
+## What belongs here
+
+- `executeTool` dispatching to the correct executor based on type guards
+- `convertToClaudeMessage` routing items via output item guards to distinct Claude blocks
+- `buildItemsStream` routing events via stream type guards to correct handlers
+- `fromClaudeMessages` routing mixed block types to distinct output types
+- Approval partitioning choosing the correct path based on tool-level vs call-level checks
+- Extendable: when new routing or dispatching logic is added, its boundary-at-junction tests go here
+
+## Examples
+
+- `executeTool` dispatches regular tool to `executeRegularTool` because `isRegularExecuteTool` returns true
+- Same response with message + function_call: `isOutputMessage` -> text block, `isFunctionCallItem` -> tool_use block
+- `partitionToolCalls` with call-level check overrides tool-level `requireApproval`
+- `buildItemsStream` routes `output_item.added` to handler because `isOutputItemAddedEvent` matches
+
+## What does NOT belong here
+
+- Pure boundary testing without composition (→ `boundaries/`)
+- Pure composition without boundary verification (→ `composition/`)
+- Full end-to-end pipelines (→ `pipelines/`)
diff --git a/tests/dispatch/approval-partition-dispatch.test.ts b/tests/dispatch/approval-partition-dispatch.test.ts
new file mode 100644
index 0000000..3bdce4b
--- /dev/null
+++ b/tests/dispatch/approval-partition-dispatch.test.ts
@@ -0,0 +1,79 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import { partitionToolCalls } from '../../src/lib/conversation-state.js';
+
+describe('Approval partitioning dispatches via tool-level vs call-level checks', () => {
+  const approvalTool = tool({
+    name: 'dangerous',
+    inputSchema: z.object({
+      target: z.string(),
+    }),
+    requireApproval: true,
+    execute: async () => 'deleted',
+  });
+
+  const safeTool = tool({
+    name: 'safe',
+    inputSchema: z.object({
+      q: z.string(),
+    }),
+    execute: async () => 'result',
+  });
+
+  const toolCalls = [
+    {
+      id: 'tc_1',
+      name: 'dangerous',
+      arguments: {
+        target: 'file.txt',
+      },
+    },
+    {
+      id: 'tc_2',
+      name: 'safe',
+      arguments: {
+        q: 'hello',
+      },
+    },
+  ];
+
+  it('partitionToolCalls with call-level check -> call-level overrides tool-level requireApproval', async () => {
+    // Call-level check says: no approval needed for anything
+    const callLevelCheck = async () => false;
+    const context = {
+      numberOfTurns: 1,
+    };
+    const partition = await partitionToolCalls(
+      toolCalls,
+      [
+        approvalTool,
+        safeTool,
+      ],
+      context,
+      callLevelCheck,
+    );
+    // Call-level override: both should be auto-execute
+    expect(partition.autoExecute).toHaveLength(2);
+    expect(partition.requiresApproval).toHaveLength(0);
+  });
+
+  it('partitionToolCalls without call-level check -> falls back to each tool requireApproval', async () => {
+    const context = {
+      numberOfTurns: 1,
+    };
+    const partition = await partitionToolCalls(
+      toolCalls,
+      [
+        approvalTool,
+        safeTool,
+      ],
+      context,
+    );
+    expect(partition.requiresApproval).toHaveLength(1);
+    expect(partition.requiresApproval[0]!.name).toBe('dangerous');
+    expect(partition.autoExecute).toHaveLength(1);
+    expect(partition.autoExecute[0]!.name).toBe('safe');
+  });
+});
diff --git a/tests/dispatch/claude-conversion-deep-dispatch.test.ts b/tests/dispatch/claude-conversion-deep-dispatch.test.ts
new file mode 100644
index 0000000..58ce914
--- /dev/null
+++ b/tests/dispatch/claude-conversion-deep-dispatch.test.ts
@@ -0,0 +1,67 @@
+import { describe, expect, it } from 'vitest';
+
+import { convertToClaudeMessage } from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+describe('convertToClaudeMessage routes multi-item response via output item guards', () => {
+  it('multi-item response: message + function_call + reasoning + web_search -> each guard routes to distinct block', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Hello',
+              annotations: [],
+            },
+          ],
+        },
+        {
+          type: 'function_call' as const,
+          id: 'fc_1',
+          callId: 'fc_1',
+          name: 'search',
+          arguments: '{"q":"test"}',
+          status: 'completed' as const,
+        },
+        {
+          type: 'reasoning' as const,
+          id: 'r_1',
+          status: 'completed' as const,
+          summary: [
+            {
+              type: 'summary_text' as const,
+              text: 'thinking',
+            },
+          ],
+        },
+        {
+          type: 'web_search_call' as const,
+          id: 'ws_1',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Hello',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 200,
+        inputTokens: 100,
+        outputTokens: 100,
+      },
+    };
+
+    const claude = convertToClaudeMessage(response);
+    const types = claude.content.map((b: { type: string }) => b.type);
+
+    expect(types).toContain('text');
+    expect(types).toContain('tool_use');
+    expect(types).toContain('thinking');
+    expect(types).toContain('server_tool_use');
+  });
+});
diff --git a/tests/dispatch/claude-conversion-dispatch.test.ts b/tests/dispatch/claude-conversion-dispatch.test.ts
new file mode 100644
index 0000000..f30f073
--- /dev/null
+++ b/tests/dispatch/claude-conversion-dispatch.test.ts
@@ -0,0 +1,107 @@
+import { describe, expect, it } from 'vitest';
+
+import { convertToClaudeMessage } from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+describe('convertToClaudeMessage routes items via output item guards', () => {
+  it('same response with message + function_call: isOutputMessage -> text block, isFunctionCallItem -> tool_use block', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Hello',
+              annotations: [],
+            },
+          ],
+        },
+        {
+          type: 'function_call' as const,
+          id: 'fc_1',
+          callId: 'fc_1',
+          name: 'search',
+          arguments: '{"q":"test"}',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Hello',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const claude = convertToClaudeMessage(response);
+    const textBlock = claude.content.find((b: { type: string }) => b.type === 'text');
+    const toolBlock = claude.content.find((b: { type: string }) => b.type === 'tool_use');
+
+    expect(textBlock).toBeDefined();
+    expect(
+      (
+        textBlock as {
+          type: string;
+          text: string;
+        }
+      ).text,
+    ).toBe('Hello');
+    expect(toolBlock).toBeDefined();
+    expect(
+      (
+        toolBlock as {
+          type: string;
+          name: string;
+        }
+      ).name,
+    ).toBe('search');
+  });
+
+  it('same response with reasoning + web_search_call: isReasoningOutputItem -> thinking, isWebSearchCallOutputItem -> server_tool_use', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'reasoning' as const,
+          id: 'r_1',
+          status: 'completed' as const,
+          summary: [
+            {
+              type: 'summary_text' as const,
+              text: 'thinking about it',
+            },
+          ],
+        },
+        {
+          type: 'web_search_call' as const,
+          id: 'ws_1',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: '',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const claude = convertToClaudeMessage(response);
+    const thinkingBlock = claude.content.find((b: { type: string }) => b.type === 'thinking');
+    const serverToolBlock = claude.content.find(
+      (b: { type: string }) => b.type === 'server_tool_use',
+    );
+
+    expect(thinkingBlock).toBeDefined();
+    expect(serverToolBlock).toBeDefined();
+  });
+});
diff --git a/tests/dispatch/from-claude-dispatch.test.ts b/tests/dispatch/from-claude-dispatch.test.ts
new file mode 100644
index 0000000..8002880
--- /dev/null
+++ b/tests/dispatch/from-claude-dispatch.test.ts
@@ -0,0 +1,59 @@
+import type * as models from '@openrouter/sdk/models';
+import { describe, expect, it } from 'vitest';
+
+import { fromClaudeMessages } from '../../src/lib/anthropic-compat.js';
+
+describe('fromClaudeMessages routes blocks to distinct output types', () => {
+  it('mixed Claude message with text + tool_use + tool_result blocks -> each block produces its correct OR type, interleaved correctly', () => {
+    const result = fromClaudeMessages([
+      {
+        role: 'assistant',
+        content: [
+          {
+            type: 'text' as const,
+            text: 'Let me search for that.',
+          },
+          {
+            type: 'tool_use' as const,
+            id: 'tu_1',
+            name: 'search',
+            input: {
+              q: 'test',
+            },
+          },
+        ],
+      },
+      {
+        role: 'user',
+        content: [
+          {
+            type: 'tool_result' as const,
+            tool_use_id: 'tu_1',
+            content: 'Found results',
+          },
+          {
+            type: 'text' as const,
+            text: 'Thanks for the results',
+          },
+        ],
+      },
+    ]);
+
+    const items = result as models.OutputItems[];
+    // Should have: text message, function_call, function_call_output, text message
+    const types = items.map((i) => i.type || 'easy_input_message');
+
+    expect(types).toContain('function_call');
+    expect(types).toContain('function_call_output');
+
+    // Check that the function_call has correct properties
+    const fnCall = items.find((i) => i.type === 'function_call');
+    expect(fnCall.name).toBe('search');
+    expect(fnCall.callId).toBe('tu_1');
+
+    // Check that the function_call_output has correct properties
+    const fnOutput = items.find((i) => i.type === 'function_call_output');
+    expect(fnOutput.callId).toBe('tu_1');
+    expect(fnOutput.output).toBe('Found results');
+  });
+});
diff --git a/tests/dispatch/items-stream-dispatch.test.ts b/tests/dispatch/items-stream-dispatch.test.ts
new file mode 100644
index 0000000..d2480be
--- /dev/null
+++ b/tests/dispatch/items-stream-dispatch.test.ts
@@ -0,0 +1,76 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import { buildItemsStream } from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collectAll<T>(iter: AsyncIterable<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+describe('buildItemsStream routes events via stream type guards', () => {
+  it('routes output_item.added to handler because isOutputItemAddedEvent matches (not other guards)', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    expect(items.length).toBeGreaterThan(0);
+    expect(items[0].type).toBe('message');
+  });
+
+  it('skips unknown event types that do not match any guard', async () => {
+    const events = [
+      {
+        type: 'response.some_unknown_event',
+        data: 'ignored',
+      },
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+    // Only the message item should be yielded, unknown events are silently skipped
+    expect(items.every((i) => i.type === 'message')).toBe(true);
+  });
+});
diff --git a/tests/integration/README.md b/tests/integration/README.md
new file mode 100644
index 0000000..62eb964
--- /dev/null
+++ b/tests/integration/README.md
@@ -0,0 +1,26 @@
+# Integration Tests
+
+Tests in this folder verify that a capability **works as specified AND composes correctly with another module**. Both the output correctness and the cross-module data flow are asserted.
+
+## What belongs here
+
+- A function produces the correct output AND that output successfully feeds the next module
+- Stop conditions evaluated against real `StepResult` shapes from the orchestrator
+- Context store data flowing through `buildToolExecuteContext` into a tool's `execute` function
+- Stream consumers at different speeds both getting complete, correct data
+- Next-turn param computation feeding into request modification with verified results
+- Extendable: when new cross-module flows are introduced, their correctness-through-composition tests go here
+
+## Examples
+
+- `consumeStreamForCompletion` returns the response because `isResponseCompletedEvent` identified it
+- `stepCountIs` works with `StepResult[]` containing real `usage` and `toolCalls` data
+- Tool's `execute` receives context where `local` reflects store data set before execution
+- Two `ReusableReadableStream` consumers at different read speeds both get all items correctly
+- `executeNextTurnParamsFunctions` computes temperature -> `applyNextTurnParamsToRequest` produces correct request
+
+## What does NOT belong here
+
+- Pure specification without cross-module flow (→ `behavior/`)
+- Pure composition without output verification (→ `composition/`)
+- Boundary checks at composition points (→ `dispatch/`)
diff --git a/tests/integration/claude-unsupported-content.test.ts b/tests/integration/claude-unsupported-content.test.ts
new file mode 100644
index 0000000..e343dc3
--- /dev/null
+++ b/tests/integration/claude-unsupported-content.test.ts
@@ -0,0 +1,52 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  convertToClaudeMessage,
+  getUnsupportedContentSummary,
+  hasUnsupportedContent,
+} from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+describe('convertToClaudeMessage -> unsupported content utilities', () => {
+  it('unsupported content round-trip: refusal + image_generation -> convertToClaudeMessage -> unsupported_content utilities work', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'refusal' as const,
+              refusal: 'I cannot do that',
+            },
+          ],
+        },
+        {
+          type: 'image_generation_call' as const,
+          id: 'ig_1',
+          result: 'base64data',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: '',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    const claude = convertToClaudeMessage(response);
+    // unsupported_content is a property on the message, not content blocks
+    expect(hasUnsupportedContent(claude)).toBe(true);
+    const summary = getUnsupportedContentSummary(claude);
+    expect(summary).toBeDefined();
+    // refusal and image_generation_call should both appear as unsupported
+    expect(Object.keys(summary).length).toBeGreaterThan(0);
+  });
+});
diff --git a/tests/integration/format-round-trip.test.ts b/tests/integration/format-round-trip.test.ts
new file mode 100644
index 0000000..0a46779
--- /dev/null
+++ b/tests/integration/format-round-trip.test.ts
@@ -0,0 +1,171 @@
+import type * as models from '@openrouter/sdk/models';
+import { describe, expect, it } from 'vitest';
+
+import { fromClaudeMessages, toClaudeMessage } from '../../src/lib/anthropic-compat.js';
+import { fromChatMessages, toChatMessage } from '../../src/lib/chat-compat.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+describe('Bidirectional format conversion', () => {
+  it('Claude round-trip: Claude messages -> fromClaudeMessages -> OR format -> each block type maps distinctly', () => {
+    const claudeMessages = [
+      {
+        role: 'user' as const,
+        content: [
+          {
+            type: 'text' as const,
+            text: 'Search for cats',
+          },
+        ],
+      },
+      {
+        role: 'assistant' as const,
+        content: [
+          {
+            type: 'text' as const,
+            text: 'Let me search.',
+          },
+          {
+            type: 'tool_use' as const,
+            id: 'tu_1',
+            name: 'search',
+            input: {
+              q: 'cats',
+            },
+          },
+        ],
+      },
+      {
+        role: 'user' as const,
+        content: [
+          {
+            type: 'tool_result' as const,
+            tool_use_id: 'tu_1',
+            content: 'Found cats',
+          },
+        ],
+      },
+    ];
+
+    // Claude -> OR format
+    const orFormat = fromClaudeMessages(claudeMessages);
+    const items = orFormat as models.OutputItems[];
+
+    // Text blocks -> EasyInputMessage
+    const textItems = items.filter((i) => !i.type || i.role);
+    expect(textItems.length).toBeGreaterThan(0);
+
+    // tool_use -> FunctionCallItem
+    const fnCalls = items.filter((i) => i.type === 'function_call');
+    expect(fnCalls).toHaveLength(1);
+    expect(fnCalls[0].name).toBe('search');
+
+    // tool_result -> FunctionCallOutputItem
+    const fnOutputs = items.filter((i) => i.type === 'function_call_output');
+    expect(fnOutputs).toHaveLength(1);
+    expect(fnOutputs[0].callId).toBe('tu_1');
+
+    // Verify OR format -> Claude format works on a response
+    const mockResponse = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Here are cats',
+              annotations: [],
+            },
+          ],
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Here are cats',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+    const claudeResponse = toClaudeMessage(mockResponse);
+    expect(claudeResponse.role).toBe('assistant');
+    expect(Array.isArray(claudeResponse.content)).toBe(true);
+  });
+
+  it('Chat round-trip: Chat messages -> fromChatMessages -> OR format -> each role maps distinctly', () => {
+    const chatMessages = [
+      {
+        role: 'system' as const,
+        content: 'You are helpful',
+      },
+      {
+        role: 'user' as const,
+        content: 'Hello',
+      },
+      {
+        role: 'assistant' as const,
+        content: 'Hi there',
+      },
+      {
+        role: 'tool' as const,
+        toolCallId: 'tc_1',
+        content: 'Tool result',
+      },
+    ];
+
+    // Chat -> OR format
+    const orFormat = fromChatMessages(chatMessages);
+    const items = orFormat as models.OutputItems[];
+
+    // System message
+    const systemItems = items.filter((i) => i.role === 'system');
+    expect(systemItems).toHaveLength(1);
+
+    // User message
+    const userItems = items.filter((i) => i.role === 'user');
+    expect(userItems).toHaveLength(1);
+
+    // Assistant message
+    const assistantItems = items.filter((i) => i.role === 'assistant');
+    expect(assistantItems).toHaveLength(1);
+
+    // Tool message -> FunctionCallOutputItem
+    const toolOutputs = items.filter((i) => i.type === 'function_call_output');
+    expect(toolOutputs).toHaveLength(1);
+
+    // Verify OR format -> Chat format works on a response
+    const mockResponse = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Response',
+              annotations: [],
+            },
+          ],
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Response',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+    const chatResponse = toChatMessage(mockResponse);
+    expect(chatResponse.role).toBe('assistant');
+    expect(typeof chatResponse.content).toBe('string');
+  });
+});
diff --git a/tests/integration/next-turn-params-flow.test.ts b/tests/integration/next-turn-params-flow.test.ts
new file mode 100644
index 0000000..95f1b5d
--- /dev/null
+++ b/tests/integration/next-turn-params-flow.test.ts
@@ -0,0 +1,63 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  applyNextTurnParamsToRequest,
+  buildNextTurnParamsContext,
+  executeNextTurnParamsFunctions,
+} from '../../src/lib/next-turn-params.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+describe('Next-turn params: tools -> computation -> request modification', () => {
+  it('executeNextTurnParamsFunctions output accepted by applyNextTurnParamsToRequest -> modified request', async () => {
+    const toolsWithNextTurnParams = [
+      {
+        type: 'function',
+        function: {
+          name: 'search',
+          nextTurnParams: {
+            temperature: () => 0.5,
+          },
+        },
+      },
+    ];
+
+    const toolCalls = [
+      {
+        id: 'tc_1',
+        name: 'search',
+        arguments: {
+          q: 'test',
+        },
+      },
+    ];
+    const request = {
+      model: TEST_MODEL,
+      temperature: 0.7,
+    };
+
+    const params = await executeNextTurnParamsFunctions(
+      toolCalls,
+      toolsWithNextTurnParams,
+      request,
+    );
+
+    expect(params).toHaveProperty('temperature', 0.5);
+
+    const modified = applyNextTurnParamsToRequest(request, params);
+    expect(modified.temperature).toBe(0.5);
+    expect(modified.model).toBe(TEST_MODEL);
+  });
+
+  it('buildNextTurnParamsContext extracts from request -> context passed to nextTurnParams functions', () => {
+    const request = {
+      model: TEST_MODEL,
+      temperature: 0.7,
+      input: 'hello',
+    };
+
+    const ctx = buildNextTurnParamsContext(request);
+    expect(ctx.model).toBe(TEST_MODEL);
+    expect(ctx.temperature).toBe(0.7);
+    expect(ctx.input).toBe('hello');
+  });
+});
diff --git a/tests/integration/next-turn-params-request.test.ts b/tests/integration/next-turn-params-request.test.ts
new file mode 100644
index 0000000..da44632
--- /dev/null
+++ b/tests/integration/next-turn-params-request.test.ts
@@ -0,0 +1,44 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  applyNextTurnParamsToRequest,
+  executeNextTurnParamsFunctions,
+} from '../../src/lib/next-turn-params.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+describe('Next-turn params -> request modification -> API readiness', () => {
+  it('executeNextTurnParamsFunctions computes new temperature -> applyNextTurnParamsToRequest produces request with updated temperature', async () => {
+    const tools = [
+      {
+        type: 'function',
+        function: {
+          name: 'search',
+          nextTurnParams: {
+            temperature: () => 0.3,
+          },
+        },
+      },
+    ];
+
+    const toolCalls = [
+      {
+        id: 'tc_1',
+        name: 'search',
+        arguments: {
+          q: 'test',
+        },
+      },
+    ];
+    const request = {
+      model: TEST_MODEL,
+      temperature: 0.7,
+      input: 'hello',
+    };
+    const params = await executeNextTurnParamsFunctions(toolCalls, tools, request);
+
+    const modified = applyNextTurnParamsToRequest(request, params);
+    expect(modified.temperature).toBe(0.3);
+    expect(modified.model).toBe(TEST_MODEL);
+    expect(modified.input).toBe('hello');
+  });
+});
diff --git a/tests/integration/orchestrator-executor.test.ts b/tests/integration/orchestrator-executor.test.ts
new file mode 100644
index 0000000..8afd8e2
--- /dev/null
+++ b/tests/integration/orchestrator-executor.test.ts
@@ -0,0 +1,66 @@
+import { describe, expect, it } from 'vitest';
+import {
+  hasToolExecutionErrors,
+  summarizeToolExecutions,
+  toolResultsToMap,
+} from '../../src/lib/tool-orchestrator.js';
+import type { Tool, ToolExecutionResult } from '../../src/lib/tool-types.js';
+
+describe('Orchestrator <- Executor: utility functions consume real ToolExecutionResult', () => {
+  const successResult: ToolExecutionResult<Tool> = {
+    toolCallId: 'tc_1',
+    toolName: 'search',
+    result: {
+      data: 'found',
+    },
+  };
+
+  const errorResult: ToolExecutionResult<Tool> = {
+    toolCallId: 'tc_2',
+    toolName: 'delete',
+    result: null,
+    error: new Error('Permission denied'),
+  };
+
+  it('toolResultsToMap correctly maps real ToolExecutionResult objects', () => {
+    const map = toolResultsToMap([
+      successResult,
+      errorResult,
+    ]);
+    expect(map.get('tc_1')).toEqual({
+      result: {
+        data: 'found',
+      },
+      preliminaryResults: undefined,
+    });
+    expect(map.get('tc_2')).toEqual({
+      result: null,
+      preliminaryResults: undefined,
+    });
+  });
+
+  it('hasToolExecutionErrors detects error field on real ToolExecutionResult', () => {
+    expect(
+      hasToolExecutionErrors([
+        successResult,
+      ]),
+    ).toBe(false);
+    expect(
+      hasToolExecutionErrors([
+        successResult,
+        errorResult,
+      ]),
+    ).toBe(true);
+  });
+
+  it('summarizeToolExecutions formats real success + error results', () => {
+    const summary = summarizeToolExecutions([
+      successResult,
+      errorResult,
+    ]);
+    expect(summary).toContain('search');
+    expect(summary).toContain('SUCCESS');
+    expect(summary).toContain('delete');
+    expect(summary).toContain('Permission denied');
+  });
+});
diff --git a/tests/integration/reusable-stream-consumers.test.ts b/tests/integration/reusable-stream-consumers.test.ts
new file mode 100644
index 0000000..82ae5ec
--- /dev/null
+++ b/tests/integration/reusable-stream-consumers.test.ts
@@ -0,0 +1,81 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import { buildItemsStream, consumeStreamForCompletion } from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collectAll<T>(iter: AsyncIterable<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+describe('ReusableReadableStream -> concurrent transformer consumption', () => {
+  it('buildItemsStream and consumeStreamForCompletion both consume same stream correctly', async () => {
+    const response = {
+      id: 'r1',
+      status: 'completed',
+      output: [],
+    };
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hello',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.output_item.done',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'completed',
+          content: [
+            {
+              type: 'output_text',
+              text: 'Hello',
+              annotations: [],
+            },
+          ],
+        },
+      },
+      {
+        type: 'response.completed',
+        response,
+      },
+    ];
+
+    const stream = makeStream(events);
+
+    const [items, completedResponse] = await Promise.all([
+      collectAll(buildItemsStream(stream)),
+      consumeStreamForCompletion(stream),
+    ]);
+
+    expect(items.length).toBeGreaterThan(0);
+    expect(completedResponse).toEqual(response);
+  });
+});
diff --git a/tests/integration/stream-completion-guards.test.ts b/tests/integration/stream-completion-guards.test.ts
new file mode 100644
index 0000000..a291270
--- /dev/null
+++ b/tests/integration/stream-completion-guards.test.ts
@@ -0,0 +1,60 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import { consumeStreamForCompletion } from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+describe('consumeStreamForCompletion + stream type guards', () => {
+  it('returns response object because isResponseCompletedEvent identified the completion event', async () => {
+    const response = {
+      id: 'r1',
+      status: 'completed',
+      output: [
+        {
+          type: 'message',
+          id: 'm1',
+          role: 'assistant',
+          content: [],
+        },
+      ],
+    };
+    const stream = makeStream([
+      {
+        type: 'response.output_text.delta',
+        delta: 'hello',
+      },
+      {
+        type: 'response.completed',
+        response,
+      },
+    ]);
+    const result = await consumeStreamForCompletion(stream);
+    expect(result.id).toBe('r1');
+    expect(result.status).toBe('completed');
+  });
+
+  it('throws on failed response because isResponseFailedEvent caught the failure', async () => {
+    const stream = makeStream([
+      {
+        type: 'response.failed',
+        response: {
+          error: {
+            message: 'quota exceeded',
+          },
+        },
+      },
+    ]);
+    await expect(consumeStreamForCompletion(stream)).rejects.toThrow('Response failed');
+  });
+});
diff --git a/tests/integration/tool-context-execution.test.ts b/tests/integration/tool-context-execution.test.ts
new file mode 100644
index 0000000..cf44d4a
--- /dev/null
+++ b/tests/integration/tool-context-execution.test.ts
@@ -0,0 +1,64 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { buildToolExecuteContext, ToolContextStore } from '../../src/lib/tool-context.js';
+import { buildTurnContext } from '../../src/lib/turn-context.js';
+
+describe('ToolContextStore -> buildToolExecuteContext -> tool execution', () => {
+  it('tool execute receives context where local reflects store data set before execution', () => {
+    const store = new ToolContextStore({
+      myTool: {
+        apiKey: 'key-123',
+      },
+    });
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 1,
+    });
+    const _toolFn = {
+      name: 'myTool',
+      inputSchema: z.object({}),
+      contextSchema: z.object({
+        apiKey: z.string(),
+      }),
+    };
+
+    const contextSchema = z.object({
+      apiKey: z.string(),
+    });
+    const execCtx = buildToolExecuteContext(turnCtx, store, 'myTool', contextSchema);
+    expect(execCtx.local).toEqual({
+      apiKey: 'key-123',
+    });
+  });
+
+  it('tool calls setContext -> store updated -> next tool reads updated value via local', () => {
+    const store = new ToolContextStore({
+      toolA: {
+        count: 0,
+      },
+      toolB: {},
+    });
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 1,
+    });
+    const contextSchema = z.object({
+      count: z.number(),
+    });
+
+    const execCtxA = buildToolExecuteContext(turnCtx, store, 'toolA', contextSchema);
+    expect(execCtxA.local).toEqual({
+      count: 0,
+    });
+
+    // Simulate tool A updating context
+    store.mergeToolContext('toolA', {
+      count: 42,
+    });
+
+    // Tool A now reads updated value
+    const execCtxA2 = buildToolExecuteContext(turnCtx, store, 'toolA', contextSchema);
+    expect(execCtxA2.local).toEqual({
+      count: 42,
+    });
+  });
+});
diff --git a/tests/integration/turn-context-async-params.test.ts b/tests/integration/turn-context-async-params.test.ts
new file mode 100644
index 0000000..bf707a7
--- /dev/null
+++ b/tests/integration/turn-context-async-params.test.ts
@@ -0,0 +1,41 @@
+import { describe, expect, it } from 'vitest';
+import { resolveAsyncFunctions } from '../../src/lib/async-params.js';
+import { buildTurnContext } from '../../src/lib/turn-context.js';
+import { makeCallModelInput, TEST_MODEL, TEST_MODEL_ALT } from '../test-constants.js';
+
+describe('buildTurnContext -> resolveAsyncFunctions', () => {
+  it('parameter function receives TurnContext with correct numberOfTurns', async () => {
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 5,
+    });
+    const result = await resolveAsyncFunctions(
+      makeCallModelInput({
+        model: TEST_MODEL,
+        temperature: (ctx: { numberOfTurns: number }) => ctx.numberOfTurns * 0.1,
+      }),
+      turnCtx,
+    );
+    expect(result.temperature).toBe(0.5);
+  });
+
+  it('parameter function can read toolCall from context when provided', async () => {
+    const toolCall = {
+      id: 'tc_1',
+      name: 'search',
+      arguments: {
+        q: 'test',
+      },
+    };
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 1,
+      toolCall: toolCall,
+    });
+    const result = await resolveAsyncFunctions(
+      makeCallModelInput({
+        model: (ctx: { toolCall?: unknown }) => (ctx.toolCall ? TEST_MODEL_ALT : TEST_MODEL),
+      }),
+      turnCtx,
+    );
+    expect(result.model).toBe(TEST_MODEL_ALT);
+  });
+});
diff --git a/tests/pipelines/README.md b/tests/pipelines/README.md
new file mode 100644
index 0000000..fd0c51f
--- /dev/null
+++ b/tests/pipelines/README.md
@@ -0,0 +1,28 @@
+# Pipelines Tests
+
+Tests in this folder are **end-to-end agent workflows** where multiple capabilities compose, and at each layer we verify: what it specifically produces, how it differs from peers, and how it feeds the next layer.
+
+These are the most comprehensive tests in the suite. Each test exercises a complete pipeline and makes assertions at every stage.
+
+## What belongs here
+
+- Full streaming pipeline: raw events -> guards -> transformers -> consumer
+- Full tool execution pipeline: definition -> dispatch -> validate -> execute -> format
+- Context pipeline: build -> resolve -> store -> execute
+- Stop condition pipeline: results -> steps -> conditions -> decision
+- Dual-format output: same response -> structurally distinct formats simultaneously
+- Claude conversion deep pipeline: multi-item response -> per-item guard routing -> distinct blocks
+- Bidirectional format conversion round-trips
+- Extendable: when new end-to-end workflows are introduced, their full-pipeline tests with per-layer verification go here
+
+## Examples
+
+- Mixed stream events flow through type guards, extractors yield correct types, consumer receives via ReusableReadableStream
+- `tool()` creates tool -> `executeTool` dispatches via guard -> validates input -> executes -> validates output -> formats for model
+- Three concurrent stream consumers (`buildMessageStream`, `buildResponsesMessageStream`, `buildItemsStream`) all complete, each structurally different, no consumer blocks another
+
+## What does NOT belong here
+
+- Single-module behavior (-> `behavior/`)
+- Two-module composition without per-layer verification (-> `composition/` or `integration/`)
+- Tests that don't verify behavior, boundaries, and composition together (-> appropriate simpler category)
diff --git a/tests/pipelines/approval-execution-state.test.ts b/tests/pipelines/approval-execution-state.test.ts
new file mode 100644
index 0000000..b2844a7
--- /dev/null
+++ b/tests/pipelines/approval-execution-state.test.ts
@@ -0,0 +1,96 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import {
+  createInitialState,
+  createRejectedResult,
+  createUnsentResult,
+  partitionToolCalls,
+  unsentResultsToAPIFormat,
+  updateState,
+} from '../../src/lib/conversation-state.js';
+import { executeTool } from '../../src/lib/tool-executor.js';
+
+describe('Approval -> execution -> state update pipeline', () => {
+  it('approval workflow: partition -> execute auto -> create results -> format -> update state', async () => {
+    const autoTool = tool({
+      name: 'search',
+      inputSchema: z.object({
+        q: z.string(),
+      }),
+      execute: async (args) => ({
+        results: [
+          `found: ${args.q}`,
+        ],
+      }),
+    });
+
+    const approvalTool = tool({
+      name: 'delete',
+      inputSchema: z.object({
+        target: z.string(),
+      }),
+      requireApproval: true,
+      execute: async () => 'deleted',
+    });
+
+    const toolCalls = [
+      {
+        id: 'tc_1',
+        name: 'search',
+        arguments: {
+          q: 'test',
+        },
+      },
+      {
+        id: 'tc_2',
+        name: 'delete',
+        arguments: {
+          target: 'file.txt',
+        },
+      },
+    ];
+
+    const tools = [
+      autoTool,
+      approvalTool,
+    ];
+
+    // Step 1: Partition
+    const partition = await partitionToolCalls(toolCalls, tools);
+    expect(partition.autoExecute).toHaveLength(1);
+    expect(partition.requiresApproval).toHaveLength(1);
+
+    // Step 2: Execute auto tool
+    const autoResult = await executeTool(autoTool, partition.autoExecute[0]!, {
+      numberOfTurns: 1,
+    });
+    expect(autoResult.result).toEqual({
+      results: [
+        'found: test',
+      ],
+    });
+
+    // Step 3: Create results
+    const unsent = createUnsentResult('tc_1', 'search', autoResult.result);
+    const rejected = createRejectedResult('tc_2', 'delete');
+
+    // Step 4: Format for API
+    const formatted = unsentResultsToAPIFormat([
+      unsent,
+      rejected,
+    ]);
+    expect(formatted).toHaveLength(2);
+    expect(formatted[0]!.type).toBe('function_call_output');
+    expect(formatted[1]!.type).toBe('function_call_output');
+
+    // Step 5: Update state
+    const state = createInitialState();
+    const updated = updateState(state, {
+      status: 'completed',
+    });
+    expect(updated.status).toBe('completed');
+    expect(updated.id).toBe(state.id);
+  });
+});
diff --git a/tests/pipelines/context-pipeline.test.ts b/tests/pipelines/context-pipeline.test.ts
new file mode 100644
index 0000000..c4f6580
--- /dev/null
+++ b/tests/pipelines/context-pipeline.test.ts
@@ -0,0 +1,91 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+import {
+  buildToolExecuteContext,
+  resolveContext,
+  ToolContextStore,
+} from '../../src/lib/tool-context.js';
+import { buildTurnContext } from '../../src/lib/turn-context.js';
+
+describe('Context pipeline: build -> resolve -> store -> execute', () => {
+  it('turn 0 with context: buildTurnContext -> resolveContext -> ToolContextStore -> buildToolExecuteContext -> tool reads local', async () => {
+    // Build turn context
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 0,
+    });
+    expect(turnCtx.numberOfTurns).toBe(0);
+
+    // Resolve context via function
+    const contextFn = () => ({
+      apiKey: 'secret-123',
+    });
+    const resolved = await resolveContext(contextFn, turnCtx);
+    expect(resolved).toEqual({
+      apiKey: 'secret-123',
+    });
+
+    // Populate store
+    const store = new ToolContextStore({
+      myTool: resolved,
+    });
+
+    // Build tool execute context
+    const _toolFn = {
+      name: 'myTool',
+      inputSchema: z.object({}),
+      contextSchema: z.object({
+        apiKey: z.string(),
+      }),
+    };
+    const contextSchema = z.object({
+      apiKey: z.string(),
+    });
+    const execCtx = buildToolExecuteContext(turnCtx, store, 'myTool', contextSchema);
+
+    // Tool reads from local
+    expect(execCtx.local).toEqual({
+      apiKey: 'secret-123',
+    });
+    expect(execCtx.numberOfTurns).toBe(0);
+  });
+
+  it('shared context mutation: tool A reads count=0 -> sets count=1 -> tool B reads count=1', () => {
+    const store = new ToolContextStore({
+      shared: {
+        count: 0,
+      },
+    });
+    const turnCtx = buildTurnContext({
+      numberOfTurns: 1,
+    });
+
+    const _sharedToolFn = {
+      name: 'shared',
+      inputSchema: z.object({}),
+      contextSchema: z.object({
+        count: z.number(),
+      }),
+    };
+
+    const contextSchema = z.object({
+      count: z.number(),
+    });
+
+    // Tool A reads shared.count === 0
+    const ctxA = buildToolExecuteContext(turnCtx, store, 'shared', contextSchema);
+    expect(ctxA.local).toEqual({
+      count: 0,
+    });
+
+    // Tool A updates shared context
+    store.setToolContext('shared', {
+      count: 1,
+    });
+
+    // Tool B reads shared.count === 1
+    const ctxB = buildToolExecuteContext(turnCtx, store, 'shared', contextSchema);
+    expect(ctxB.local).toEqual({
+      count: 1,
+    });
+  });
+});
diff --git a/tests/pipelines/dual-format-output.test.ts b/tests/pipelines/dual-format-output.test.ts
new file mode 100644
index 0000000..f74b8aa
--- /dev/null
+++ b/tests/pipelines/dual-format-output.test.ts
@@ -0,0 +1,166 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import {
+  buildItemsStream,
+  buildMessageStream,
+  buildResponsesMessageStream,
+  convertToClaudeMessage,
+  extractMessageFromResponse,
+  extractToolCallsFromResponse,
+} from '../../src/lib/stream-transformers.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collectAll<T>(iter: AsyncIterable<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+describe('Dual-format output: same response -> structurally distinct formats', () => {
+  it('from response: same response -> extractMessageFromResponse, convertToClaudeMessage, extractToolCallsFromResponse all work', () => {
+    const response = {
+      id: 'r1',
+      output: [
+        {
+          type: 'message' as const,
+          id: 'msg_1',
+          role: 'assistant' as const,
+          status: 'completed' as const,
+          content: [
+            {
+              type: 'output_text' as const,
+              text: 'Found results',
+              annotations: [],
+            },
+          ],
+        },
+        {
+          type: 'function_call' as const,
+          id: 'fc_1',
+          callId: 'fc_1',
+          name: 'search',
+          arguments: '{"q":"test"}',
+          status: 'completed' as const,
+        },
+      ],
+      status: 'completed' as const,
+      outputText: 'Found results',
+      model: TEST_MODEL,
+      usage: {
+        totalTokens: 100,
+        inputTokens: 50,
+        outputTokens: 50,
+      },
+    };
+
+    // Chat format
+    const chatMsg = extractMessageFromResponse(response);
+    expect(chatMsg.role).toBe('assistant');
+    expect(typeof chatMsg.content).toBe('string');
+
+    // Claude format
+    const claudeMsg = convertToClaudeMessage(response);
+    expect(claudeMsg.role).toBe('assistant');
+    expect(Array.isArray(claudeMsg.content)).toBe(true);
+
+    // Tool calls
+    const toolCalls = extractToolCallsFromResponse(response);
+    expect(toolCalls).toHaveLength(1);
+    expect(toolCalls[0]!.name).toBe('search');
+
+    // All semantically equivalent, structurally different
+    expect(chatMsg.content).toBe('Found results');
+    const claudeText = claudeMsg.content.find((b: { type: string }) => b.type === 'text');
+    expect(
+      (
+        claudeText as {
+          type: string;
+          text: string;
+        }
+      ).text,
+    ).toBe('Found results');
+  });
+
+  it('through streaming: same ReusableReadableStream -> three concurrent consumers all complete', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hello',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: ' world',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.output_item.done',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'completed',
+          content: [
+            {
+              type: 'output_text',
+              text: 'Hello world',
+              annotations: [],
+            },
+          ],
+        },
+      },
+      {
+        type: 'response.completed',
+        response: {
+          id: 'r1',
+        },
+      },
+    ];
+
+    const stream = makeStream(events);
+
+    // Three concurrent consumers
+    const [chatMsgs, responsesMsgs, items] = await Promise.all([
+      collectAll(buildMessageStream(stream)),
+      collectAll(buildResponsesMessageStream(stream)),
+      collectAll(buildItemsStream(stream)),
+    ]);
+
+    // All complete without blocking each other
+    expect(chatMsgs.length).toBeGreaterThan(0);
+    expect(responsesMsgs.length).toBeGreaterThan(0);
+    expect(items.length).toBeGreaterThan(0);
+
+    // Structurally different
+    const lastChat = chatMsgs[chatMsgs.length - 1]!;
+    const lastResponses = responsesMsgs[responsesMsgs.length - 1]!;
+
+    expect('id' in lastChat).toBe(false);
+    expect('id' in lastResponses).toBe(true);
+  });
+});
diff --git a/tests/pipelines/next-turn-params-pipeline.test.ts b/tests/pipelines/next-turn-params-pipeline.test.ts
new file mode 100644
index 0000000..57156d4
--- /dev/null
+++ b/tests/pipelines/next-turn-params-pipeline.test.ts
@@ -0,0 +1,60 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import {
+  applyNextTurnParamsToRequest,
+  buildNextTurnParamsContext,
+  executeNextTurnParamsFunctions,
+} from '../../src/lib/next-turn-params.js';
+import { TEST_MODEL } from '../test-constants.js';
+
+describe('Next-turn parameter adjustment pipeline', () => {
+  it('dynamic temperature: search tool with nextTurnParams.temperature -> context -> execute -> apply -> request updated', async () => {
+    const searchTool = tool({
+      name: 'search',
+      inputSchema: z.object({
+        query: z.string(),
+      }),
+      execute: async (args) => `Results for: ${args.query}`,
+      nextTurnParams: {
+        temperature: (input: { query?: string }) => (input.query?.includes('creative') ? 0.9 : 0.1),
+      },
+    });
+
+    const request = {
+      model: TEST_MODEL,
+      temperature: 0.5,
+      input: 'hello',
+    };
+
+    // Step 1: Build context from request
+    const ctx = buildNextTurnParamsContext(request);
+    expect(ctx.model).toBe(TEST_MODEL);
+    expect(ctx.temperature).toBe(0.5);
+
+    // Step 2: Execute nextTurnParams functions
+    // The tool was called with { query: 'creative writing' }
+    const tools = [
+      searchTool,
+    ];
+    const toolCalls = [
+      {
+        id: 'tc_1',
+        name: 'search',
+        arguments: {
+          query: 'creative writing',
+        },
+      },
+    ];
+    const params = await executeNextTurnParamsFunctions(toolCalls, tools, request);
+
+    expect(params).toHaveProperty('temperature', 0.9);
+
+    // Step 3: Apply to request
+    const modified = applyNextTurnParamsToRequest(request, params);
+    expect(modified.temperature).toBe(0.9);
+    expect(modified.model).toBe(TEST_MODEL);
+    expect(modified.input).toBe('hello');
+  });
+});
diff --git a/tests/pipelines/stop-condition-pipeline.test.ts b/tests/pipelines/stop-condition-pipeline.test.ts
new file mode 100644
index 0000000..1b50c3c
--- /dev/null
+++ b/tests/pipelines/stop-condition-pipeline.test.ts
@@ -0,0 +1,117 @@
+import { describe, expect, it } from 'vitest';
+
+import {
+  hasToolCall,
+  isStopConditionMet,
+  maxCost,
+  stepCountIs,
+} from '../../src/lib/stop-conditions.js';
+import { makeStep, makeTypedToolCalls, makeUsage } from '../test-constants.js';
+
+describe('Stop condition pipeline: results -> steps -> conditions -> decision', () => {
+  it('step count: 3 tool rounds -> StepResult[] length 3 -> stepCountIs(3) true -> isStopConditionMet true', async () => {
+    const steps = [
+      makeStep(),
+      makeStep(),
+      makeStep(),
+    ];
+    expect(
+      stepCountIs(3)({
+        steps,
+      }),
+    ).toBe(true);
+    const result = await isStopConditionMet({
+      stopConditions: [
+        stepCountIs(3),
+      ],
+      steps,
+    });
+    expect(result).toBe(true);
+  });
+
+  it('tool call: round with "search" tool -> hasToolCall("search") true -> isStopConditionMet true', async () => {
+    const steps = [
+      makeStep({
+        toolCalls: makeTypedToolCalls([
+          {
+            name: 'search',
+            id: 'tc1',
+            arguments: {},
+          },
+        ]),
+      }),
+    ];
+    expect(
+      hasToolCall('search')({
+        steps,
+      }),
+    ).toBe(true);
+    const result = await isStopConditionMet({
+      stopConditions: [
+        hasToolCall('search'),
+      ],
+      steps,
+    });
+    expect(result).toBe(true);
+  });
+
+  it('cost: round with usage.cost = 0.30 -> maxCost(0.25) true -> stop', async () => {
+    const steps = [
+      makeStep({
+        usage: makeUsage({
+          totalTokens: 100,
+          inputTokens: 50,
+          outputTokens: 50,
+          cost: 0.3,
+        }),
+      }),
+    ];
+    expect(
+      maxCost(0.25)({
+        steps,
+      }),
+    ).toBe(true);
+    const result = await isStopConditionMet({
+      stopConditions: [
+        maxCost(0.25),
+      ],
+      steps,
+    });
+    expect(result).toBe(true);
+  });
+
+  it('combined OR: stepCountIs(10) false + hasToolCall("done") true -> isStopConditionMet true', async () => {
+    const steps = [
+      makeStep({
+        toolCalls: makeTypedToolCalls([
+          {
+            name: 'done',
+            id: 'tc1',
+            arguments: {},
+          },
+        ]),
+      }),
+    ];
+    // stepCountIs(10) is false (only 1 step)
+    expect(
+      stepCountIs(10)({
+        steps,
+      }),
+    ).toBe(false);
+    // hasToolCall('done') is true
+    expect(
+      hasToolCall('done')({
+        steps,
+      }),
+    ).toBe(true);
+    // OR logic -> true
+    const result = await isStopConditionMet({
+      stopConditions: [
+        stepCountIs(10),
+        hasToolCall('done'),
+      ],
+      steps,
+    });
+    expect(result).toBe(true);
+  });
+});
diff --git a/tests/pipelines/streaming-pipeline.test.ts b/tests/pipelines/streaming-pipeline.test.ts
new file mode 100644
index 0000000..144440f
--- /dev/null
+++ b/tests/pipelines/streaming-pipeline.test.ts
@@ -0,0 +1,159 @@
+import { describe, expect, it } from 'vitest';
+
+import { ReusableReadableStream } from '../../src/lib/reusable-stream.js';
+import {
+  buildItemsStream,
+  consumeStreamForCompletion,
+  extractTextDeltas,
+} from '../../src/lib/stream-transformers.js';
+
+function makeStream(events: StreamEvents[]): ReusableReadableStream<StreamEvents> {
+  const source = new ReadableStream({
+    start(controller) {
+      for (const event of events) {
+        controller.enqueue(event);
+      }
+      controller.close();
+    },
+  });
+  return new ReusableReadableStream(source);
+}
+
+async function collectAll<T>(iter: AsyncIterable<T>): Promise<T[]> {
+  const result: T[] = [];
+  for await (const item of iter) {
+    result.push(item);
+  }
+  return result;
+}
+
+describe('Full streaming pipeline: raw events -> guards -> transformers -> consumer', () => {
+  it('text streaming: guard filters to text only -> extractTextDeltas yields strings -> non-text absent', async () => {
+    const events = [
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hello',
+      },
+      {
+        type: 'response.reasoning_text.delta',
+        delta: 'thinking',
+      },
+      {
+        type: 'response.function_call_arguments.delta',
+        delta: '{"q":',
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: ' world',
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const textDeltas = await collectAll(extractTextDeltas(stream));
+
+    // Guard true only for text events
+    expect(textDeltas).toEqual([
+      'Hello',
+      ' world',
+    ]);
+    // Non-text absent
+    expect(textDeltas).not.toContain('thinking');
+    expect(textDeltas).not.toContain('{"q":');
+  });
+
+  it('items streaming: type guards dispatch to per-type handlers -> consumer gets distinct item types', async () => {
+    const events = [
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'message',
+          id: 'msg_1',
+          role: 'assistant',
+          status: 'in_progress',
+          content: [],
+        },
+      },
+      {
+        type: 'response.output_text.delta',
+        delta: 'Hello',
+        itemId: 'msg_1',
+      },
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'function_call',
+          id: 'fc_1',
+          callId: 'fc_1',
+          name: 'search',
+          arguments: '',
+          status: 'in_progress',
+        },
+      },
+      {
+        type: 'response.function_call_arguments.delta',
+        delta: '{"q":"test"}',
+        itemId: 'fc_1',
+      },
+      {
+        type: 'response.output_item.added',
+        item: {
+          type: 'reasoning',
+          id: 'r_1',
+          status: 'in_progress',
+          summary: [],
+        },
+      },
+      {
+        type: 'response.reasoning_text.delta',
+        delta: 'thinking',
+        itemId: 'r_1',
+      },
+      {
+        type: 'response.completed',
+        response: {},
+      },
+    ];
+    const stream = makeStream(events);
+    const items = await collectAll(buildItemsStream(stream));
+
+    const messageItems = items.filter((i) => i.type === 'message');
+    const fnCallItems = items.filter((i) => i.type === 'function_call');
+    const reasoningItems = items.filter((i) => i.type === 'reasoning');
+
+    // Each type present and distinct
+    expect(messageItems.length).toBeGreaterThan(0);
+    expect(fnCallItems.length).toBeGreaterThan(0);
+    expect(reasoningItems.length).toBeGreaterThan(0);
+
+    // Message items have text
+    expect(messageItems[messageItems.length - 1].content[0].text).toBe('Hello');
+    // Function call items have arguments
+    expect(fnCallItems[fnCallItems.length - 1].arguments).toBe('{"q":"test"}');
+    // Reasoning items have content
+    expect(reasoningItems[reasoningItems.length - 1].summary[0].text).toBe('thinking');
+  });
+
+  it('completion: isResponseCompletedEvent true -> consumeStreamForCompletion returns response -> stream terminates', async () => {
+    const response = {
+      id: 'r1',
+      status: 'completed',
+      output: [],
+    };
+    const events = [
+      {
+        type: 'response.output_text.delta',
+        delta: 'data',
+      },
+      {
+        type: 'response.completed',
+        response,
+      },
+    ];
+    const stream = makeStream(events);
+    const result = await consumeStreamForCompletion(stream);
+    expect(result).toEqual(response);
+  });
+});
diff --git a/tests/pipelines/tool-execution-pipeline.test.ts b/tests/pipelines/tool-execution-pipeline.test.ts
new file mode 100644
index 0000000..34dbaed
--- /dev/null
+++ b/tests/pipelines/tool-execution-pipeline.test.ts
@@ -0,0 +1,138 @@
+import { describe, expect, it } from 'vitest';
+import { z } from 'zod/v4';
+
+import { tool } from '../../src/index.js';
+import {
+  executeTool,
+  formatToolExecutionError,
+  formatToolResultForModel,
+} from '../../src/lib/tool-executor.js';
+
+describe('Full tool execution pipeline: definition -> dispatch -> validate -> execute -> format', () => {
+  it('regular tool: tool() -> executeTool -> validates -> executes -> formatToolResultForModel produces JSON', async () => {
+    const addTool = tool({
+      name: 'add',
+      inputSchema: z.object({
+        a: z.number(),
+        b: z.number(),
+      }),
+      outputSchema: z.object({
+        sum: z.number(),
+      }),
+      execute: async (args) => ({
+        sum: args.a + args.b,
+      }),
+    });
+
+    const toolCall = {
+      id: 'tc_1',
+      name: 'add',
+      arguments: {
+        a: 2,
+        b: 3,
+      },
+    };
+    const result = await executeTool(addTool, toolCall, {
+      numberOfTurns: 1,
+    });
+
+    // Dispatch worked (regular path)
+    expect(result.toolCallId).toBe('tc_1');
+    expect(result.toolName).toBe('add');
+    // Execution worked
+    expect(result.result).toEqual({
+      sum: 5,
+    });
+    // No error
+    expect(result.error).toBeUndefined();
+
+    // Format for model
+    const formatted = formatToolResultForModel(result);
+    expect(typeof formatted).toBe('string');
+    const parsed = JSON.parse(formatted);
+    expect(parsed.sum).toBe(5);
+  });
+
+  it('generator tool: tool() with eventSchema -> executeTool -> generator yields events -> result has both', async () => {
+    const streamTool = tool({
+      name: 'stream_add',
+      inputSchema: z.object({
+        a: z.number(),
+        b: z.number(),
+      }),
+      eventSchema: z.object({
+        progress: z.number(),
+      }),
+      outputSchema: z.object({
+        sum: z.number(),
+      }),
+      execute: async function* (args) {
+        yield {
+          progress: 50,
+        };
+        yield {
+          progress: 100,
+        };
+        return {
+          sum: args.a + args.b,
+        };
+      },
+    });
+
+    const toolCall = {
+      id: 'tc_2',
+      name: 'stream_add',
+      arguments: {
+        a: 3,
+        b: 4,
+      },
+    };
+    const result = await executeTool(streamTool, toolCall, {
+      numberOfTurns: 1,
+    });
+
+    // Dispatch worked (generator path)
+    expect(result.toolCallId).toBe('tc_2');
+    // Generator yielded events
+    expect(result.preliminaryResults).toHaveLength(2);
+    expect(result.preliminaryResults![0]).toEqual({
+      progress: 50,
+    });
+    expect(result.preliminaryResults![1]).toEqual({
+      progress: 100,
+    });
+    // Final result
+    expect(result.result).toEqual({
+      sum: 7,
+    });
+  });
+
+  it('error pipeline: invalid input -> executeTool -> caught -> ToolExecutionResult has error -> formatToolExecutionError includes details', async () => {
+    const strictTool = tool({
+      name: 'strict',
+      inputSchema: z.object({
+        count: z.number().min(1),
+      }),
+      execute: async (args) => args.count,
+    });
+
+    const toolCall = {
+      id: 'tc_3',
+      name: 'strict',
+      arguments: {
+        count: -5,
+      },
+    };
+    const result = await executeTool(strictTool, toolCall, {
+      numberOfTurns: 1,
+    });
+
+    // Error was caught
+    expect(result.error).toBeDefined();
+    expect(result.result).toBeNull();
+
+    // Format error includes details
+    const errorFormatted = formatToolExecutionError(result.error!, toolCall);
+    expect(errorFormatted).toContain('strict');
+  });
+});
diff --git a/tests/test-constants.ts b/tests/test-constants.ts
new file mode 100644
index 0000000..c2f749e
--- /dev/null
+++ b/tests/test-constants.ts
@@ -0,0 +1,152 @@
+/**
+ * Shared test constants and typed factory helpers.
+ *
+ * Unit/integration tests use a synthetic placeholder so they never
+ * depend on a real model existing. Change these in one place if the
+ * convention needs to be updated.
+ */
+
+import type * as models from '@openrouter/sdk/models';
+import type { CallModelInput } from '../src/lib/async-params.js';
+import type {
+  ParsedToolCall,
+  StepResult,
+  Tool,
+  ToolExecutionResult,
+  TurnContext,
+  TypedToolCallUnion,
+} from '../src/lib/tool-types.js';
+
+/** Default model identifier used in non-e2e tests. */
+export const TEST_MODEL = 'openai/gpt-4.1-nano';
+
+/** Alternative model for tests that need a second, distinct model. */
+export const TEST_MODEL_ALT = 'openai/gpt-4.1-mini';
+
+// ---------------------------------------------------------------------------
+// Factory helpers – build properly typed test data without `as any`
+// ---------------------------------------------------------------------------
+
+/** Minimal Usage object that satisfies the SDK's required fields. */
+export function makeUsage(
+  overrides: Partial<models.Usage> & {
+    totalTokens: number;
+    inputTokens: number;
+    outputTokens: number;
+  },
+): models.Usage {
+  return {
+    inputTokensDetails: {
+      cachedTokens: 0,
+    },
+    outputTokensDetails: {
+      reasoningTokens: 0,
+    },
+    ...overrides,
+  };
+}
+
+/** Minimal OpenResponsesResult that satisfies the SDK's required fields. */
+export function makeResponse(
+  overrides: Partial<models.OpenResponsesResult> & {
+    output: models.OutputItems[];
+  },
+): models.OpenResponsesResult {
+  return {
+    id: 'resp_test',
+    object: 'response',
+    createdAt: 0,
+    model: TEST_MODEL,
+    status: 'completed',
+    completedAt: null,
+    error: null,
+    incompleteDetails: null,
+    temperature: null,
+    topP: null,
+    presencePenalty: null,
+    frequencyPenalty: null,
+    instructions: null,
+    metadata: null,
+    tools: [],
+    toolChoice: 'auto',
+    parallelToolCalls: false,
+    ...overrides,
+  };
+}
+
+/** Minimal StepResult that satisfies the interface without `as any`. */
+export function makeStep(overrides: Partial<StepResult> = {}): StepResult {
+  return {
+    stepType: 'initial',
+    text: '',
+    response: makeResponse({
+      output: [],
+    }),
+    toolCalls: [],
+    toolResults: [],
+    finishReason: undefined,
+    usage: undefined,
+    ...overrides,
+  };
+}
+
+/** Minimal TurnContext for tests. */
+export function makeTurnContext(overrides: Partial<TurnContext> = {}): TurnContext {
+  return {
+    numberOfTurns: 0,
+    ...overrides,
+  };
+}
+
+/** Typed ParsedToolCall factory. */
+export function makeToolCall(overrides: {
+  id: string;
+  name: string;
+  arguments: unknown;
+}): ParsedToolCall<Tool> {
+  return overrides;
+}
+
+/** Typed ToolExecutionResult factory. */
+export function makeToolResult(
+  overrides: Partial<ToolExecutionResult<Tool>> & {
+    toolCallId: string;
+    toolName: string;
+  },
+): ToolExecutionResult<Tool> {
+  return {
+    result: undefined,
+    ...overrides,
+  };
+}
+
+/**
+ * Cast a partial CallModelInput to the full type.
+ * Use when tests provide only a subset of fields (model, temperature, etc.)
+ * that don't include the full union-discriminant fields.
+ */
+export function makeCallModelInput(fields: Record<string, unknown>): CallModelInput {
+  return fields as CallModelInput;
+}
+
+/** Typed tool call array for StepResult.toolCalls */
+export function makeTypedToolCalls(
+  calls: Array<{
+    id: string;
+    name: string;
+    arguments: unknown;
+  }>,
+): TypedToolCallUnion<readonly Tool[]>[] {
+  return calls as TypedToolCallUnion<readonly Tool[]>[];
+}
+
+/** Minimal ResponsesRequest for tests. */
+export function makeRequest(
+  overrides: Partial<models.ResponsesRequest> = {},
+): models.ResponsesRequest {
+  return {
+    model: TEST_MODEL,
+    input: 'test',
+    ...overrides,
+  };
+}
diff --git a/vitest.config.ts b/vitest.config.ts
index efca4ae..40ad0b0 100644
--- a/vitest.config.ts
+++ b/vitest.config.ts
@@ -28,6 +28,83 @@ export default defineConfig({
           hookTimeout: 10000,
         },
       },
+      {
+        extends: true,
+        test: {
+          name: 'behavior',
+          include: [
+            'tests/behavior/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
+      {
+        extends: true,
+        test: {
+          name: 'boundaries',
+          include: [
+            'tests/boundaries/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
+      {
+        extends: true,
+        test: {
+          name: 'composition',
+          include: [
+            'tests/composition/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
+      {
+        extends: true,
+        test: {
+          name: 'contracts',
+          include: [
+            'tests/contracts/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
+      {
+        extends: true,
+        test: {
+          name: 'integration',
+          include: [
+            'tests/integration/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
+      {
+        extends: true,
+        test: {
+          name: 'dispatch',
+          include: [
+            'tests/dispatch/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
+      {
+        extends: true,
+        test: {
+          name: 'pipelines',
+          include: [
+            'tests/pipelines/**/*.test.ts',
+          ],
+          testTimeout: 10000,
+          hookTimeout: 10000,
+        },
+      },
       {
         extends: true,
         test: {