diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.log-payloads.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.log-payloads.json index 2ab51b170..69590d2f3 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.log-payloads.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.log-payloads.json @@ -234,9 +234,6 @@ "output": { "content": [ { - "caller": { - "type": "direct" - }, "id": "", "input": { "location": "Paris, France" @@ -287,9 +284,6 @@ "output": { "content": [ { - "caller": { - "type": "direct" - }, "id": "", "input": { "location": "Paris, France" diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.log-payloads.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.log-payloads.json index 30d2b4d04..ba4f1d519 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.log-payloads.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.log-payloads.json @@ -234,9 +234,6 @@ "output": { "content": [ { - "caller": { - "type": "direct" - }, "id": "", "input": { "location": "Paris, France" @@ -287,9 +284,6 @@ "output": { "content": [ { - "caller": { - "type": "direct" - }, "id": "", "input": { "location": "Paris, France" diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.log-payloads.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.log-payloads.json index b46bfc0c0..0de23aac4 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.log-payloads.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.log-payloads.json @@ -234,9 +234,6 @@ "output": { "content": [ { - "caller": { - "type": "direct" - }, "id": "", "input": { "location": "Paris, France" @@ -287,9 +284,6 @@ "output": { "content": [ { - "caller": { - "type": "direct" - }, "id": "", "input": { "location": "Paris, France" diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.log-payloads.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.log-payloads.json index b46bfc0c0..0de23aac4 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.log-payloads.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.log-payloads.json @@ -234,9 +234,6 @@ "output": { "content": [ { - "caller": { - "type": "direct" - }, "id": "", "input": { "location": "Paris, France" @@ -287,9 +284,6 @@ "output": { "content": [ { - "caller": { - "type": "direct" - }, "id": "", "input": { "location": "Paris, France" diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.log-payloads.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.log-payloads.json index b46bfc0c0..0de23aac4 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.log-payloads.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.log-payloads.json @@ -234,9 +234,6 @@ "output": { "content": [ { - "caller": { - "type": "direct" - }, "id": "", "input": { "location": "Paris, France" @@ -287,9 +284,6 @@ "output": { "content": [ { - "caller": { - "type": "direct" - }, "id": "", "input": { "location": "Paris, France" diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.log-payloads.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.log-payloads.json index b46bfc0c0..0de23aac4 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.log-payloads.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.log-payloads.json @@ -234,9 +234,6 @@ "output": { "content": [ { - "caller": { - "type": "direct" - }, "id": "", "input": { "location": "Paris, France" @@ -287,9 +284,6 @@ "output": { "content": [ { - "caller": { - "type": "direct" - }, "id": "", "input": { "location": "Paris, France" diff --git a/e2e/scenarios/anthropic-instrumentation/assertions.ts b/e2e/scenarios/anthropic-instrumentation/assertions.ts index 6658c9ccc..9eb8a7362 100644 --- a/e2e/scenarios/anthropic-instrumentation/assertions.ts +++ b/e2e/scenarios/anthropic-instrumentation/assertions.ts @@ -89,6 +89,10 @@ function summarizeAnthropicPayload(event: CapturedLogEvent): Json { const output = structuredClone( summary.output as { content: Array<{ + caller?: unknown; + input?: Record; + name?: string; + id?: string; text?: string; type?: string; thinking?: string; @@ -124,6 +128,18 @@ function summarizeAnthropicPayload(event: CapturedLogEvent): Json { return summary; } + // `caller` is only present in newer Anthropic SDK responses. + // Drop it so payload snapshots stay stable across SDK versions. + for (const block of output.content) { + if ( + (block.type === "tool_use" || block.type === "server_tool_use") && + "caller" in block + ) { + delete block.caller; + } + } + summary.output = output as Json; + const textBlock = output.content.find( (block) => block.type === "text" && typeof block.text === "string", ); @@ -564,9 +580,18 @@ export function defineAnthropicInstrumentationAssertions(options: { expect(span?.row.metadata).toMatchObject({ provider: "anthropic", }); - expect(span?.metrics).toMatchObject({ - server_tool_use_web_search_requests: expect.any(Number), - }); + const metrics = (span?.metrics ?? {}) as Record; + if ("server_tool_use_web_search_requests" in metrics) { + expect(metrics.server_tool_use_web_search_requests).toEqual( + expect.any(Number), + ); + } else { + expect(metrics).toMatchObject({ + completion_tokens: expect.any(Number), + prompt_tokens: expect.any(Number), + tokens: expect.any(Number), + }); + } expect( output?.content?.some( (block) => diff --git a/e2e/scenarios/openai-instrumentation/assertions.ts b/e2e/scenarios/openai-instrumentation/assertions.ts index 07dc501c0..b0b377329 100644 --- a/e2e/scenarios/openai-instrumentation/assertions.ts +++ b/e2e/scenarios/openai-instrumentation/assertions.ts @@ -342,21 +342,20 @@ function summarizeChatOutput(output: Json): Json { }) satisfies Json; } -function summarizeResponsesOutput(output: Json): Json { +function summarizeResponsesOutput( + output: Json, + options?: { + // For normalization across SDK versions + dropEmptyOutputTextMessages?: boolean; + }, +): Json { if (!Array.isArray(output)) { return null; } - // Deduplicate identical items — the Responses API occasionally returns - // duplicate output entries (e.g., two identical "message" items when - // streaming), which would cause non-deterministic snapshot failures. - const seen = new Set(); - const result: Json[] = []; - - for (const item of output) { + const summaries = output.map((item) => { if (!isRecord(item as Json)) { - result.push(null); - continue; + return null; } const content = Array.isArray(item.content) ? item.content : []; @@ -369,22 +368,49 @@ function summarizeResponsesOutput(output: Json): Json { isRecord(entry as Json) ? jsonKeysFromText(entry.text) : [], ); - const summarized = { + return { content_types: contentTypes, json_keys: [...new Set(jsonKeys)].sort(), role: item.role ?? null, status: item.status ?? null, type: item.type ?? null, } satisfies Json; + }); + + const filtered = options?.dropEmptyOutputTextMessages + ? summaries.filter((item) => { + if (!isRecord(item as Json)) { + return true; + } + + return !( + item.role === "assistant" && + item.status === "completed" && + item.type === "message" && + Array.isArray(item.content_types) && + item.content_types.length === 1 && + item.content_types[0] === "output_text" && + Array.isArray(item.json_keys) && + item.json_keys.length === 0 + ); + }) + : summaries; + // Deduplicate identical items — the Responses API occasionally returns + // duplicate output entries (e.g., two identical "message" items when + // streaming), which would cause non-deterministic snapshot failures. + const seen = new Set(); + const deduped: Json[] = []; + + for (const summarized of filtered) { const key = JSON.stringify(summarized); if (!seen.has(key)) { seen.add(key); - result.push(summarized); + deduped.push(summarized); } } - return result; + return deduped; } function summarizeOutput(name: string, output: Json): Json { @@ -413,12 +439,17 @@ function summarizeOutput(name: string, output: Json): Json { if ( name === "openai.responses.create" || - name === "openai.responses.parse" || name === "openai.responses.compact" ) { return summarizeResponsesOutput(output); } + if (name === "openai.responses.parse") { + return summarizeResponsesOutput(output, { + dropEmptyOutputTextMessages: true, + }); + } + return output === null || output === undefined ? null : ({ kind: typeof output } satisfies Json); diff --git a/js/src/instrumentation/plugins/anthropic-plugin.test.ts b/js/src/instrumentation/plugins/anthropic-plugin.test.ts index fd9eb037a..811221c4f 100644 --- a/js/src/instrumentation/plugins/anthropic-plugin.test.ts +++ b/js/src/instrumentation/plugins/anthropic-plugin.test.ts @@ -621,6 +621,51 @@ describe("aggregateAnthropicStreamChunks", () => { }); }); + it("should parse streamed input_json_delta for server_tool_use blocks", () => { + const chunks = [ + { + type: "content_block_start", + index: 0, + content_block: { + type: "server_tool_use", + id: "srvtoolu_abc123", + name: "web_search", + input: {}, + }, + }, + { + type: "content_block_delta", + index: 0, + delta: { + type: "input_json_delta", + partial_json: '{"query":"braintrust"', + }, + }, + { + type: "content_block_delta", + index: 0, + delta: { + type: "input_json_delta", + partial_json: ',"max_uses":1}', + }, + }, + { type: "content_block_stop", index: 0 }, + ]; + + const result = aggregateAnthropicStreamChunks(chunks); + + expect(result.output).toEqual({ + content: [ + { + type: "server_tool_use", + id: "srvtoolu_abc123", + name: "web_search", + input: { query: "braintrust", max_uses: 1 }, + }, + ], + }); + }); + it("should preserve web_search_tool_result blocks without deltas", () => { const chunks = [ { diff --git a/js/src/instrumentation/plugins/anthropic-plugin.ts b/js/src/instrumentation/plugins/anthropic-plugin.ts index 54c056a6a..fcda080bc 100644 --- a/js/src/instrumentation/plugins/anthropic-plugin.ts +++ b/js/src/instrumentation/plugins/anthropic-plugin.ts @@ -150,6 +150,13 @@ type ContentBlockAccumulator = { citations: AnthropicCitation[]; }; +type ToolUseLikeContentBlock = { + type: "tool_use" | "server_tool_use"; + id: string; + name: string; + input: Record; +}; + export function aggregateAnthropicStreamChunks( chunks: AnthropicStreamEvent[], ): { @@ -300,16 +307,26 @@ function finalizeContentBlock( const acc = contentBlockDeltas[index]; const text = acc?.textDeltas.join("") ?? ""; - if (isToolUseContentBlock(contentBlock)) { + if (isToolUseLikeContentBlock(contentBlock)) { if (!text) { return; } try { - contentBlocks[index] = { - ...contentBlock, - input: JSON.parse(text), + const parsedInput = JSON.parse(text) as unknown; + if (!isObject(parsedInput)) { + fallbackTextDeltas.push(text); + delete contentBlocks[index]; + return; + } + + const parsedToolUseBlock: ToolUseLikeContentBlock = { + type: contentBlock.type, + id: contentBlock.id, + name: contentBlock.name, + input: parsedInput, }; + contentBlocks[index] = parsedToolUseBlock; } catch { fallbackTextDeltas.push(text); delete contentBlocks[index]; @@ -361,10 +378,16 @@ function isTextContentBlock( return contentBlock.type === "text"; } -function isToolUseContentBlock( +function isToolUseLikeContentBlock( contentBlock: AnthropicOutputContentBlock, -): contentBlock is Extract { - return contentBlock.type === "tool_use"; +): contentBlock is ToolUseLikeContentBlock { + return ( + (contentBlock.type === "tool_use" || + contentBlock.type === "server_tool_use") && + typeof (contentBlock as { id?: unknown }).id === "string" && + typeof (contentBlock as { name?: unknown }).name === "string" && + isObject((contentBlock as { input?: unknown }).input) + ); } function isThinkingContentBlock( diff --git a/js/src/vendor-sdk-types/anthropic.ts b/js/src/vendor-sdk-types/anthropic.ts index 79f8258b1..eda552d4a 100644 --- a/js/src/vendor-sdk-types/anthropic.ts +++ b/js/src/vendor-sdk-types/anthropic.ts @@ -87,15 +87,42 @@ export interface AnthropicCitation { [key: string]: unknown; } +export interface AnthropicToolUseContentBlock { + type: "tool_use"; + id: string; + name: string; + input: Record; +} + +export interface AnthropicServerToolUseContentBlock { + type: "server_tool_use"; + id: string; + name: string; + input: Record; +} + +export interface AnthropicWebSearchResultContentBlock { + type: string; + [key: string]: unknown; +} + +export interface AnthropicWebSearchToolResultContentBlock { + type: "web_search_tool_result"; + tool_use_id: string; + content: AnthropicWebSearchResultContentBlock[]; +} + +export interface AnthropicThinkingContentBlock { + type: "thinking"; + thinking: string; +} + export type AnthropicOutputContentBlock = | { type: "text"; text: string; citations?: AnthropicCitation[] } - | { - type: "tool_use"; - id: string; - name: string; - input: Record; - } - | { type: "thinking"; thinking: string } + | AnthropicToolUseContentBlock + | AnthropicServerToolUseContentBlock + | AnthropicWebSearchToolResultContentBlock + | AnthropicThinkingContentBlock | { type: string }; export interface AnthropicUsage { diff --git a/turbo.json b/turbo.json index 42046bc5d..f33fe0a5d 100644 --- a/turbo.json +++ b/turbo.json @@ -32,6 +32,7 @@ "outputs": [] }, "test:e2e": { + "cache": false, "env": [ "ANTHROPIC_API_KEY", "BRAINTRUST_API_KEY", @@ -52,6 +53,7 @@ "outputs": [] }, "test:e2e:canary": { + "cache": false, "env": [ "ANTHROPIC_API_KEY", "BRAINTRUST_API_KEY", @@ -67,6 +69,7 @@ "outputs": [] }, "test:e2e:update": { + "cache": false, "env": [ "ANTHROPIC_API_KEY", "BRAINTRUST_API_KEY",