Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,6 @@
"output": {
"content": [
{
"caller": {
"type": "direct"
},
"id": "<span:1>",
"input": {
"location": "Paris, France"
Expand Down Expand Up @@ -287,9 +284,6 @@
"output": {
"content": [
{
"caller": {
"type": "direct"
},
"id": "<span:2>",
"input": {
"location": "Paris, France"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,6 @@
"output": {
"content": [
{
"caller": {
"type": "direct"
},
"id": "<span:1>",
"input": {
"location": "Paris, France"
Expand Down Expand Up @@ -287,9 +284,6 @@
"output": {
"content": [
{
"caller": {
"type": "direct"
},
"id": "<span:2>",
"input": {
"location": "Paris, France"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,6 @@
"output": {
"content": [
{
"caller": {
"type": "direct"
},
"id": "<span:1>",
"input": {
"location": "Paris, France"
Expand Down Expand Up @@ -287,9 +284,6 @@
"output": {
"content": [
{
"caller": {
"type": "direct"
},
"id": "<span:2>",
"input": {
"location": "Paris, France"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,6 @@
"output": {
"content": [
{
"caller": {
"type": "direct"
},
"id": "<span:1>",
"input": {
"location": "Paris, France"
Expand Down Expand Up @@ -287,9 +284,6 @@
"output": {
"content": [
{
"caller": {
"type": "direct"
},
"id": "<span:2>",
"input": {
"location": "Paris, France"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,6 @@
"output": {
"content": [
{
"caller": {
"type": "direct"
},
"id": "<span:1>",
"input": {
"location": "Paris, France"
Expand Down Expand Up @@ -287,9 +284,6 @@
"output": {
"content": [
{
"caller": {
"type": "direct"
},
"id": "<span:2>",
"input": {
"location": "Paris, France"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,6 @@
"output": {
"content": [
{
"caller": {
"type": "direct"
},
"id": "<span:1>",
"input": {
"location": "Paris, France"
Expand Down Expand Up @@ -287,9 +284,6 @@
"output": {
"content": [
{
"caller": {
"type": "direct"
},
"id": "<span:2>",
"input": {
"location": "Paris, France"
Expand Down
31 changes: 28 additions & 3 deletions e2e/scenarios/anthropic-instrumentation/assertions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,10 @@ function summarizeAnthropicPayload(event: CapturedLogEvent): Json {
const output = structuredClone(
summary.output as {
content: Array<{
caller?: unknown;
input?: Record<string, unknown>;
name?: string;
id?: string;
text?: string;
type?: string;
thinking?: string;
Expand Down Expand Up @@ -124,6 +128,18 @@ function summarizeAnthropicPayload(event: CapturedLogEvent): Json {
return summary;
}

// `caller` is only present in newer Anthropic SDK responses.
// Drop it so payload snapshots stay stable across SDK versions.
for (const block of output.content) {
if (
(block.type === "tool_use" || block.type === "server_tool_use") &&
"caller" in block
) {
delete block.caller;
}
}
summary.output = output as Json;

const textBlock = output.content.find(
(block) => block.type === "text" && typeof block.text === "string",
);
Expand Down Expand Up @@ -564,9 +580,18 @@ export function defineAnthropicInstrumentationAssertions(options: {
expect(span?.row.metadata).toMatchObject({
provider: "anthropic",
});
expect(span?.metrics).toMatchObject({
server_tool_use_web_search_requests: expect.any(Number),
});
const metrics = (span?.metrics ?? {}) as Record<string, unknown>;
if ("server_tool_use_web_search_requests" in metrics) {
expect(metrics.server_tool_use_web_search_requests).toEqual(
expect.any(Number),
);
} else {
expect(metrics).toMatchObject({
completion_tokens: expect.any(Number),
prompt_tokens: expect.any(Number),
tokens: expect.any(Number),
});
}
expect(
output?.content?.some(
(block) =>
Expand Down
59 changes: 45 additions & 14 deletions e2e/scenarios/openai-instrumentation/assertions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -342,21 +342,20 @@ function summarizeChatOutput(output: Json): Json {
}) satisfies Json;
}

function summarizeResponsesOutput(output: Json): Json {
function summarizeResponsesOutput(
output: Json,
options?: {
// For normalization across SDK versions
dropEmptyOutputTextMessages?: boolean;
},
): Json {
if (!Array.isArray(output)) {
return null;
}

// Deduplicate identical items — the Responses API occasionally returns
// duplicate output entries (e.g., two identical "message" items when
// streaming), which would cause non-deterministic snapshot failures.
const seen = new Set<string>();
const result: Json[] = [];

for (const item of output) {
const summaries = output.map((item) => {
if (!isRecord(item as Json)) {
result.push(null);
continue;
return null;
}

const content = Array.isArray(item.content) ? item.content : [];
Expand All @@ -369,22 +368,49 @@ function summarizeResponsesOutput(output: Json): Json {
isRecord(entry as Json) ? jsonKeysFromText(entry.text) : [],
);

const summarized = {
return {
content_types: contentTypes,
json_keys: [...new Set(jsonKeys)].sort(),
role: item.role ?? null,
status: item.status ?? null,
type: item.type ?? null,
} satisfies Json;
});

const filtered = options?.dropEmptyOutputTextMessages
? summaries.filter((item) => {
if (!isRecord(item as Json)) {
return true;
}

return !(
item.role === "assistant" &&
item.status === "completed" &&
item.type === "message" &&
Array.isArray(item.content_types) &&
item.content_types.length === 1 &&
item.content_types[0] === "output_text" &&
Array.isArray(item.json_keys) &&
item.json_keys.length === 0
);
})
: summaries;

// Deduplicate identical items — the Responses API occasionally returns
// duplicate output entries (e.g., two identical "message" items when
// streaming), which would cause non-deterministic snapshot failures.
const seen = new Set<string>();
const deduped: Json[] = [];

for (const summarized of filtered) {
const key = JSON.stringify(summarized);
if (!seen.has(key)) {
seen.add(key);
result.push(summarized);
deduped.push(summarized);
}
}

return result;
return deduped;
}

function summarizeOutput(name: string, output: Json): Json {
Expand Down Expand Up @@ -413,12 +439,17 @@ function summarizeOutput(name: string, output: Json): Json {

if (
name === "openai.responses.create" ||
name === "openai.responses.parse" ||
name === "openai.responses.compact"
) {
return summarizeResponsesOutput(output);
}

if (name === "openai.responses.parse") {
return summarizeResponsesOutput(output, {
dropEmptyOutputTextMessages: true,
});
}

return output === null || output === undefined
? null
: ({ kind: typeof output } satisfies Json);
Expand Down
45 changes: 45 additions & 0 deletions js/src/instrumentation/plugins/anthropic-plugin.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -621,6 +621,51 @@ describe("aggregateAnthropicStreamChunks", () => {
});
});

it("should parse streamed input_json_delta for server_tool_use blocks", () => {
const chunks = [
{
type: "content_block_start",
index: 0,
content_block: {
type: "server_tool_use",
id: "srvtoolu_abc123",
name: "web_search",
input: {},
},
},
{
type: "content_block_delta",
index: 0,
delta: {
type: "input_json_delta",
partial_json: '{"query":"braintrust"',
},
},
{
type: "content_block_delta",
index: 0,
delta: {
type: "input_json_delta",
partial_json: ',"max_uses":1}',
},
},
{ type: "content_block_stop", index: 0 },
];

const result = aggregateAnthropicStreamChunks(chunks);

expect(result.output).toEqual({
content: [
{
type: "server_tool_use",
id: "srvtoolu_abc123",
name: "web_search",
input: { query: "braintrust", max_uses: 1 },
},
],
});
});

it("should preserve web_search_tool_result blocks without deltas", () => {
const chunks = [
{
Expand Down
37 changes: 30 additions & 7 deletions js/src/instrumentation/plugins/anthropic-plugin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,13 @@ type ContentBlockAccumulator = {
citations: AnthropicCitation[];
};

type ToolUseLikeContentBlock = {
type: "tool_use" | "server_tool_use";
id: string;
name: string;
input: Record<string, unknown>;
};

export function aggregateAnthropicStreamChunks(
chunks: AnthropicStreamEvent[],
): {
Expand Down Expand Up @@ -300,16 +307,26 @@ function finalizeContentBlock(
const acc = contentBlockDeltas[index];
const text = acc?.textDeltas.join("") ?? "";

if (isToolUseContentBlock(contentBlock)) {
if (isToolUseLikeContentBlock(contentBlock)) {
if (!text) {
return;
}

try {
contentBlocks[index] = {
...contentBlock,
input: JSON.parse(text),
const parsedInput = JSON.parse(text) as unknown;
if (!isObject(parsedInput)) {
fallbackTextDeltas.push(text);
delete contentBlocks[index];
return;
}

const parsedToolUseBlock: ToolUseLikeContentBlock = {
type: contentBlock.type,
id: contentBlock.id,
name: contentBlock.name,
input: parsedInput,
};
contentBlocks[index] = parsedToolUseBlock;
} catch {
fallbackTextDeltas.push(text);
delete contentBlocks[index];
Expand Down Expand Up @@ -361,10 +378,16 @@ function isTextContentBlock(
return contentBlock.type === "text";
}

function isToolUseContentBlock(
function isToolUseLikeContentBlock(
contentBlock: AnthropicOutputContentBlock,
): contentBlock is Extract<AnthropicOutputContentBlock, { type: "tool_use" }> {
return contentBlock.type === "tool_use";
): contentBlock is ToolUseLikeContentBlock {
return (
(contentBlock.type === "tool_use" ||
contentBlock.type === "server_tool_use") &&
typeof (contentBlock as { id?: unknown }).id === "string" &&
typeof (contentBlock as { name?: unknown }).name === "string" &&
isObject((contentBlock as { input?: unknown }).input)
);
}

function isThinkingContentBlock(
Expand Down
Loading
Loading