diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json index 240e39cf8..68fbc65d5 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json @@ -213,6 +213,21 @@ "type": "task" }, "subagent": { + "handoff_tool": { + "has_input": true, + "has_output": false, + "metadata": { + "gen_ai.tool.name": "Task" + }, + "metric_keys": [], + "name": "tool: Task", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, "llm": { "has_input": true, "has_output": true, @@ -229,9 +244,9 @@ ], "name": "anthropic.messages.create", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -246,9 +261,9 @@ "metric_keys": [], "name": "Agent: ", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "task" }, @@ -261,7 +276,7 @@ "metric_keys": [], "name": "claude-agent-subagent-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -276,9 +291,9 @@ "metric_keys": [], "name": "Claude Agent", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "task" }, @@ -292,9 +307,9 @@ "metric_keys": [], "name": "tool: calculator/calculator", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "tool" } diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json index b9a37d5fc..e947c64e5 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json @@ -213,6 +213,21 @@ "type": "task" }, "subagent": { + "handoff_tool": { + "has_input": true, + "has_output": false, + "metadata": { + "gen_ai.tool.name": "Agent" + }, + "metric_keys": [], + "name": "tool: Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, "llm": { "has_input": true, "has_output": true, @@ -229,9 +244,9 @@ ], "name": "anthropic.messages.create", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -248,9 +263,9 @@ "metric_keys": [], "name": "Agent: ", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "task" }, @@ -263,7 +278,7 @@ "metric_keys": [], "name": "claude-agent-subagent-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -278,9 +293,9 @@ "metric_keys": [], "name": "Claude Agent", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "task" }, @@ -294,9 +309,9 @@ "metric_keys": [], "name": "tool: calculator/calculator", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "tool" } diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json index b9a37d5fc..e947c64e5 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json @@ -213,6 +213,21 @@ "type": "task" }, "subagent": { + "handoff_tool": { + "has_input": true, + "has_output": false, + "metadata": { + "gen_ai.tool.name": "Agent" + }, + "metric_keys": [], + "name": "tool: Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, "llm": { "has_input": true, "has_output": true, @@ -229,9 +244,9 @@ ], "name": "anthropic.messages.create", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -248,9 +263,9 @@ "metric_keys": [], "name": "Agent: ", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "task" }, @@ -263,7 +278,7 @@ "metric_keys": [], "name": "claude-agent-subagent-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -278,9 +293,9 @@ "metric_keys": [], "name": "Claude Agent", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "task" }, @@ -294,9 +309,9 @@ "metric_keys": [], "name": "tool: calculator/calculator", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "tool" } diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json index b9a37d5fc..e947c64e5 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json @@ -213,6 +213,21 @@ "type": "task" }, "subagent": { + "handoff_tool": { + "has_input": true, + "has_output": false, + "metadata": { + "gen_ai.tool.name": "Agent" + }, + "metric_keys": [], + "name": "tool: Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, "llm": { "has_input": true, "has_output": true, @@ -229,9 +244,9 @@ ], "name": "anthropic.messages.create", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -248,9 +263,9 @@ "metric_keys": [], "name": "Agent: ", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "task" }, @@ -263,7 +278,7 @@ "metric_keys": [], "name": "claude-agent-subagent-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -278,9 +293,9 @@ "metric_keys": [], "name": "Claude Agent", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "task" }, @@ -294,9 +309,9 @@ "metric_keys": [], "name": "tool: calculator/calculator", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "tool" } diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts index ed56c6a04..11efcb451 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts @@ -158,6 +158,50 @@ function findToolSpanByLocalHandler( ); } +function findSpanById( + events: CapturedLogEvent[], + spanId: string | undefined, +): CapturedLogEvent | undefined { + if (!spanId) { + return undefined; + } + + return events.find((event) => event.span.id === spanId); +} + +function hasSubAgentHandoffToolName( + event: CapturedLogEvent | undefined, +): boolean { + if (event?.span.type !== "tool") { + return false; + } + + const metadata = event.row.metadata as Record | undefined; + return ( + metadata?.["gen_ai.tool.name"] === "Agent" || + metadata?.["gen_ai.tool.name"] === "Task" + ); +} + +function findSubAgentTaskSpan( + events: CapturedLogEvent[], +): CapturedLogEvent | undefined { + return events.find( + (event) => + event.span.type === "task" && event.span.name?.startsWith("Agent:"), + ); +} + +function findSubAgentHandoffTool( + events: CapturedLogEvent[], + subAgentTask: CapturedLogEvent | undefined, +): CapturedLogEvent | undefined { + const parentId = subAgentTask?.span.parentIds[0]; + const parentSpan = findSpanById(events, parentId); + + return hasSubAgentHandoffToolName(parentSpan) ? parentSpan : undefined; +} + function buildSpanSummary(events: CapturedLogEvent[]): Json { const root = findLatestSpan(events, ROOT_NAME); const basicOperation = findLatestSpan(events, "claude-agent-basic-operation"); @@ -208,9 +252,13 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json { const input = event.input as Array<{ content?: string }> | undefined; return Array.isArray(input) && input.some((item) => item.content); }); - const subAgentLlm = findAllSpans(events, "anthropic.messages.create").find( - (event) => event.span.parentIds.includes(subAgentTaskRoot?.span.id ?? ""), - ); + const subAgentTask = findSubAgentTaskSpan(events); + const subAgentLlm = findChildSpans( + events, + "anthropic.messages.create", + subAgentTask?.span.id, + ).at(-1); + const subAgentHandoffTool = findSubAgentHandoffTool(events, subAgentTask); const failureLlm = findChildSpans( events, "anthropic.messages.create", @@ -220,12 +268,6 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json { const basicTool = findToolSpanByLocalHandler(events, "calculator-local-handler-multiply") ?? findToolSpanByOperation(events, "multiply"); - const subAgentTask = events.find( - (event) => - event.span.type === "task" && - event.span.parentIds.includes(subAgentTaskRoot?.span.id ?? "") && - event.span.name?.startsWith("Agent:"), - ); const subAgentTool = findToolSpanByLocalHandler(events, "calculator-local-handler-add") ?? findToolSpanByOperation(events, "add"); @@ -253,6 +295,7 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json { }, root: summarizeSpan(root), subagent: { + handoff_tool: summarizeSpan(subAgentHandoffTool), llm: summarizeSpan(subAgentLlm), nested_task: summarizeSpan(subAgentTask), operation: summarizeSpan(subAgentOperation), @@ -412,12 +455,13 @@ export function defineClaudeAgentSDKInstrumentationAssertions(options: { const llm = findAllSpans(events, "anthropic.messages.create").find( (event) => event.span.parentIds.includes(taskRoot?.span.id ?? ""), ); - const nestedTask = events.find( - (event) => - event.span.type === "task" && - event.span.parentIds.includes(taskRoot?.span.id ?? "") && - event.span.name?.startsWith("Agent:"), - ); + const nestedTask = findSubAgentTaskSpan(events); + const handoffTool = findSubAgentHandoffTool(events, nestedTask); + const nestedTaskLlm = findChildSpans( + events, + "anthropic.messages.create", + nestedTask?.span.id, + ).at(-1); const tool = findToolSpanByLocalHandler(events, "calculator-local-handler-add") ?? findToolSpanByOperation(events, "add"); @@ -438,6 +482,16 @@ export function defineClaudeAgentSDKInstrumentationAssertions(options: { // Rich lifecycle naming should avoid the old coarse fallback label. expect(nestedTask?.span.name).not.toBe("Agent: sub-agent"); } + expect(handoffTool).toBeDefined(); + expect(hasSubAgentHandoffToolName(handoffTool)).toBe(true); + expect(nestedTask?.span.parentIds).toEqual([handoffTool?.span.id ?? ""]); + expect(nestedTaskLlm).toBeDefined(); + expect(nestedTaskLlm?.span.parentIds).toContain( + nestedTask?.span.id ?? "", + ); + expect(nestedTaskLlm?.span.parentIds).not.toContain( + taskRoot?.span.id ?? "", + ); if (tool) { expect(tool.span.parentIds).not.toContain(taskRoot?.span.id ?? ""); if (toolParent?.span.type === "llm") { diff --git a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-3-4.log-payloads.json b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-3-4.log-payloads.json index 19d6a3806..6fde9fa53 100644 --- a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-3-4.log-payloads.json +++ b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-3-4.log-payloads.json @@ -78,14 +78,11 @@ "provider": "mistral" }, "metric_keys": [ - "completion_tokens", - "prompt_tokens", - "time_to_first_token", - "tokens" + "time_to_first_token" ], "output": { "choice_count": 1, - "finish_reason": "stop", + "finish_reason": null, "has_content": true, "role": "assistant", "tool_call_count": 0, diff --git a/e2e/scenarios/mistral-instrumentation/assertions.ts b/e2e/scenarios/mistral-instrumentation/assertions.ts index da757b9f9..13f31b1b1 100644 --- a/e2e/scenarios/mistral-instrumentation/assertions.ts +++ b/e2e/scenarios/mistral-instrumentation/assertions.ts @@ -232,9 +232,14 @@ function normalizeLegacyV134PayloadSummaryRow( snapshotName: string, spanName: string | undefined, ): Json { + const unstableLegacyV134SpanNames = new Set([ + "mistral.chat.stream", + "mistral.fim.stream", + ]); if ( snapshotName !== "mistral-v1-3-4" || - spanName !== "mistral.fim.stream" || + !spanName || + !unstableLegacyV134SpanNames.has(spanName) || !isRecord(summaryRow) ) { return summaryRow; diff --git a/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts b/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts index 6a19ceb72..06e71bded 100644 --- a/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts +++ b/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts @@ -58,7 +58,7 @@ function llmParentKey(parentToolUseId: string | null): string { return parentToolUseId ?? ROOT_LLM_PARENT_KEY; } -function isSubAgentToolName(toolName: string): boolean { +function isSubAgentDelegationToolName(toolName: string): boolean { return toolName === "Agent" || toolName === "Task"; } @@ -464,10 +464,6 @@ function createToolTracingHooks( return {}; } - if (isSubAgentToolName(input.tool_name)) { - return {}; - } - const parsed = parseToolName(input.tool_name); const toolSpan = startSpan({ event: { @@ -505,6 +501,7 @@ function createToolTracingHooks( } const subAgentSpan = subAgentSpans.get(toolUseID); + const toolSpan = activeToolSpans.get(toolUseID); if (subAgentSpan) { if (endedSubAgentSpans.has(toolUseID)) { return {}; @@ -539,10 +536,18 @@ function createToolTracingHooks( endedSubAgentSpans.add(toolUseID); } + if (toolSpan) { + try { + toolSpan.log({ output: input.tool_response }); + } finally { + toolSpan.end(); + activeToolSpans.delete(toolUseID); + } + } + return {}; } - const toolSpan = activeToolSpans.get(toolUseID); if (!toolSpan) { return {}; } @@ -574,6 +579,7 @@ function createToolTracingHooks( } const subAgentSpan = subAgentSpans.get(toolUseID); + const toolSpan = activeToolSpans.get(toolUseID); if (subAgentSpan) { if (endedSubAgentSpans.has(toolUseID)) { return {}; @@ -591,10 +597,29 @@ function createToolTracingHooks( endedSubAgentSpans.add(toolUseID); } + if (toolSpan) { + const parsed = parseToolName(input.tool_name); + try { + toolSpan.log({ + error: input.error, + metadata: { + "claude_agent_sdk.is_interrupt": input.is_interrupt, + "claude_agent_sdk.session_id": input.session_id, + "claude_agent_sdk.raw_tool_name": parsed.rawToolName, + "gen_ai.tool.call.id": toolUseID, + "gen_ai.tool.name": parsed.toolName, + ...(parsed.mcpServer && { "mcp.server": parsed.mcpServer }), + }, + }); + } finally { + toolSpan.end(); + activeToolSpans.delete(toolUseID); + } + } + return {}; } - const toolSpan = activeToolSpans.get(toolUseID); if (!toolSpan) { return {}; } @@ -865,7 +890,7 @@ function maybeTrackToolUseContext( if ( typeof block.name === "string" && - isSubAgentToolName(block.name) && + isSubAgentDelegationToolName(block.name) && typeof block.input === "object" && block.input !== null ) { @@ -894,6 +919,7 @@ async function maybeStartSubAgentSpan( await ensureSubAgentSpan( state.subAgentDetailsByToolUseId, state.span, + state.activeToolSpans, state.subAgentSpans, parentToolUseId, ); @@ -902,6 +928,7 @@ async function maybeStartSubAgentSpan( async function ensureSubAgentSpan( subAgentDetailsByToolUseId: Map, rootSpan: Span, + activeToolSpans: Map, subAgentSpans: Map, parentToolUseId: string, ): Promise { @@ -912,13 +939,17 @@ async function ensureSubAgentSpan( const details = subAgentDetailsByToolUseId.get(parentToolUseId); const spanName = formatSubAgentSpanName(details); + const parentToolSpan = activeToolSpans.get(parentToolUseId); + const parentSpan = parentToolSpan + ? await parentToolSpan.export() + : await rootSpan.export(); const subAgentSpan = startSpan({ event: { metadata: subAgentDetailsToMetadata(details), }, name: spanName, - parent: await rootSpan.export(), + parent: parentSpan, spanAttributes: { type: SpanTypeAttribute.TASK }, }); @@ -961,6 +992,7 @@ async function maybeHandleTaskLifecycleMessage( const subAgentSpan = await ensureSubAgentSpan( state.subAgentDetailsByToolUseId, state.span, + state.activeToolSpans, state.subAgentSpans, toolUseId, ); @@ -1069,6 +1101,7 @@ async function handleStreamMessage( const subAgentSpan = await ensureSubAgentSpan( state.subAgentDetailsByToolUseId, state.span, + state.activeToolSpans, state.subAgentSpans, parentToolUseId, ); @@ -1167,6 +1200,11 @@ async function finalizeQuerySpan(state: QueryState): Promise { } state.activeLlmSpansByParentToolUse.clear(); + for (const toolSpan of state.activeToolSpans.values()) { + toolSpan.end(); + } + state.activeToolSpans.clear(); + for (const [id, subAgentSpan] of state.subAgentSpans) { if (!state.endedSubAgentSpans.has(id)) { subAgentSpan.end(); @@ -1287,6 +1325,7 @@ export class ClaudeAgentSDKPlugin extends BasePlugin { const subAgentSpan = await ensureSubAgentSpan( subAgentDetailsByToolUseId, span, + activeToolSpans, subAgentSpans, parentToolUseId, ); diff --git a/js/src/wrappers/claude-agent-sdk/claude-agent-sdk.test.ts b/js/src/wrappers/claude-agent-sdk/claude-agent-sdk.test.ts index 9f9f8c05a..9c0c7acd3 100644 --- a/js/src/wrappers/claude-agent-sdk/claude-agent-sdk.test.ts +++ b/js/src/wrappers/claude-agent-sdk/claude-agent-sdk.test.ts @@ -954,10 +954,6 @@ describe.skipIf(!claudeSDK)("claude-agent-sdk integration tests", () => { ); expect(subAgentSpan).toBeDefined(); - // Sub-agent should be a child of root - expect(subAgentSpan!.root_span_id).toBe(rootSpan!.span_id); - expect(subAgentSpan!.span_parents).toContain(rootSpan!.span_id); - // There should be LLM spans under the sub-agent const subAgentLlmSpans = spans.filter( (s) => @@ -997,6 +993,35 @@ describe.skipIf(!claudeSDK)("claude-agent-sdk integration tests", () => { return false; }; + // Sub-agent should stay in the root trace and be directly parented by + // either the root span (legacy) or the Task/Agent handoff tool span. + expect(subAgentSpan!.root_span_id).toBe(rootSpan!.span_id); + const subAgentParentIds = + (subAgentSpan!.span_parents as string[] | undefined) ?? []; + const hasRootDirectParent = subAgentParentIds.includes( + rootSpan!.span_id as string, + ); + const hasHandoffToolDirectParent = subAgentParentIds.some((parentId) => { + const parentSpan = spanById.get(parentId); + if (!parentSpan) { + return false; + } + const spanAttributes = parentSpan["span_attributes"] as Record< + string, + unknown + >; + const metadata = parentSpan.metadata as Record; + return ( + spanAttributes.type === "tool" && + (metadata["gen_ai.tool.name"] === "Task" || + metadata["gen_ai.tool.name"] === "Agent") + ); + }); + expect(hasRootDirectParent || hasHandoffToolDirectParent).toBe(true); + expect(isDescendantOf(subAgentSpan!, rootSpan!.span_id as string)).toBe( + true, + ); + // Local tool spans should be nested under the sub-agent. const subAgentToolSpans = spans.filter( (s) =>