braintrustdata · Stephen Belanger (Qard) · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026
diff --git a/e2e/scenarios/openai-instrumentation/assertions.ts b/e2e/scenarios/openai-instrumentation/assertions.ts
@@ -315,9 +315,16 @@ function summarizeResponsesOutput(output: Json): Json {
     return null;
   }
 
-  return output.map((item) => {
+  // Deduplicate identical items — the Responses API occasionally returns
+  // duplicate output entries (e.g., two identical "message" items when
+  // streaming), which would cause non-deterministic snapshot failures.
+  const seen = new Set<string>();
+  const result: Json[] = [];
+
+  for (const item of output) {
     if (!isRecord(item as Json)) {
-      return null;
+      result.push(null);
+      continue;
     }
 
     const content = Array.isArray(item.content) ? item.content : [];
@@ -330,14 +337,22 @@ function summarizeResponsesOutput(output: Json): Json {
       isRecord(entry as Json) ? jsonKeysFromText(entry.text) : [],
     );
 
-    return {
+    const summarized = {
       content_types: contentTypes,
       json_keys: [...new Set(jsonKeys)].sort(),
       role: item.role ?? null,
       status: item.status ?? null,
       type: item.type ?? null,
     } satisfies Json;
-  }) satisfies Json;
+
+    const key = JSON.stringify(summarized);
+    if (!seen.has(key)) {
+      seen.add(key);
+      result.push(summarized);
+    }
+  }
+
+  return result;
 }
 
 function summarizeOutput(name: string, output: Json): Json {

diff --git a/e2e/scenarios/openai-instrumentation/scenario.test.ts b/e2e/scenarios/openai-instrumentation/scenario.test.ts
@@ -9,7 +9,7 @@ import { defineOpenAIInstrumentationAssertions } from "./assertions";
 const scenarioDir = await prepareScenarioDir({
   scenarioDir: resolveScenarioDir(import.meta.url),
 });
-const TIMEOUT_MS = 60_000;
+const TIMEOUT_MS = 120_000;
 const openaiScenarios = await Promise.all(
   [
     {

diff --git a/js/src/wrappers/ai-sdk/ai-sdk.test.ts b/js/src/wrappers/ai-sdk/ai-sdk.test.ts
@@ -1537,122 +1537,130 @@ describe("ai sdk client unit tests", TEST_SUITE_OPTIONS, () => {
   // Once processInputAttachments is made async and properly handles the Promise,
   // we should verify that the schema is serialized correctly in the logs.
 
-  test("ai sdk multi-round tool use with metrics", async () => {
-    expect(await backgroundLogger.drain()).toHaveLength(0);
+  test(
+    "ai sdk multi-round tool use with metrics",
+    { timeout: 60000 },
+    async () => {
+      expect(await backgroundLogger.drain()).toHaveLength(0);
 
-    const getStorePriceTool = ai.tool({
-      description: "Get the price of an item from a specific store",
-      inputSchema: z.object({
-        store: z.string().describe("The store name (e.g., 'StoreA', 'StoreB')"),
-        item: z.string().describe("The item to get the price for"),
-      }),
-      execute: async (args: { store: string; item: string }) => {
-        const prices: Record<string, Record<string, number>> = {
-          StoreA: { laptop: 999, mouse: 25, keyboard: 75 },
-          StoreB: { laptop: 1099, mouse: 20, keyboard: 80 },
-        };
-        const price = prices[args.store]?.[args.item] ?? 0;
-        return JSON.stringify({ store: args.store, item: args.item, price });
-      },
-    });
+      const getStorePriceTool = ai.tool({
+        description: "Get the price of an item from a specific store",
+        inputSchema: z.object({
+          store: z
+            .string()
+            .describe("The store name (e.g., 'StoreA', 'StoreB')"),
+          item: z.string().describe("The item to get the price for"),
+        }),
+        execute: async (args: { store: string; item: string }) => {
+          const prices: Record<string, Record<string, number>> = {
+            StoreA: { laptop: 999, mouse: 25, keyboard: 75 },
+            StoreB: { laptop: 1099, mouse: 20, keyboard: 80 },
+          };
+          const price = prices[args.store]?.[args.item] ?? 0;
+          return JSON.stringify({ store: args.store, item: args.item, price });
+        },
+      });
 
-    const applyDiscountTool = ai.tool({
-      description: "Apply a discount code to a total amount",
-      inputSchema: z.object({
-        total: z.number().describe("The total amount before discount"),
-        discountCode: z.string().describe("The discount code to apply"),
-      }),
-      execute: async (args: { total: number; discountCode: string }) => {
-        const discounts: Record<string, number> = {
-          SAVE10: 0.1,
-          SAVE20: 0.2,
-        };
-        const discountRate = discounts[args.discountCode] ?? 0;
-        const finalTotal = args.total - args.total * discountRate;
-        return JSON.stringify({
-          originalTotal: args.total,
-          discountCode: args.discountCode,
-          finalTotal,
-        });
-      },
-    });
+      const applyDiscountTool = ai.tool({
+        description: "Apply a discount code to a total amount",
+        inputSchema: z.object({
+          total: z.number().describe("The total amount before discount"),
+          discountCode: z.string().describe("The discount code to apply"),
+        }),
+        execute: async (args: { total: number; discountCode: string }) => {
+          const discounts: Record<string, number> = {
+            SAVE10: 0.1,
+            SAVE20: 0.2,
+          };
+          const discountRate = discounts[args.discountCode] ?? 0;
+          const finalTotal = args.total - args.total * discountRate;
+          return JSON.stringify({
+            originalTotal: args.total,
+            discountCode: args.discountCode,
+            finalTotal,
+          });
+        },
+      });
 
-    const model = openai(TEST_MODEL);
-    const start = getCurrentUnixTimestamp();
+      const model = openai(TEST_MODEL);
+      const start = getCurrentUnixTimestamp();
 
-    const result = await wrappedAI.generateText({
-      model,
-      system:
-        "You are a shopping assistant. When asked about prices, always get the price from each store mentioned using get_store_price, then apply any discount codes using apply_discount. Use the tools provided.",
-      tools: {
-        get_store_price: getStorePriceTool,
-        apply_discount: applyDiscountTool,
-      },
-      toolChoice: "required",
-      prompt:
-        "I want to buy a laptop. Get the price from StoreA and StoreB, then apply the discount code SAVE20 to whichever is cheaper.",
-      stopWhen: ai.stepCountIs(6),
-    });
+      const result = await wrappedAI.generateText({
+        model,
+        system:
+          "You are a shopping assistant. When asked about prices, always get the price from each store mentioned using get_store_price, then apply any discount codes using apply_discount. Use the tools provided.",
+        tools: {
+          get_store_price: getStorePriceTool,
+          apply_discount: applyDiscountTool,
+        },
+        toolChoice: "required",
+        prompt:
+          "I want to buy a laptop. Get the price from StoreA and StoreB, then apply the discount code SAVE20 to whichever is cheaper.",
+        stopWhen: ai.stepCountIs(6),
+      });
 
-    const end = getCurrentUnixTimestamp();
-    assert.ok(result);
+      const end = getCurrentUnixTimestamp();
+      assert.ok(result);
 
-    const spans = await backgroundLogger.drain();
+      const spans = await backgroundLogger.drain();
 
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const llmSpans = spans.filter(
-      (s: any) =>
-        s.span_attributes?.type === "llm" &&
-        s.span_attributes?.name === "doGenerate",
-    );
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const toolSpans = spans.filter(
       // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      (s: any) => s.span_attributes?.type === "tool",
-    );
-
-    // Should have multiple doGenerate spans - one per LLM round/step
-    // This allows visualizing the LLM ↔ tool roundtrips
-    expect(llmSpans.length).toBeGreaterThanOrEqual(2);
-
-    // Should have tool spans for get_store_price calls (at least 2 for StoreA and StoreB)
-    expect(toolSpans.length).toBeGreaterThanOrEqual(2);
-
-    // Verify each doGenerate span has its own metrics
-    for (const llmSpan of llmSpans) {
+      const llmSpans = spans.filter(
+        (s: any) =>
+          s.span_attributes?.type === "llm" &&
+          s.span_attributes?.name === "doGenerate",
+      );
       // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      const span = llmSpan as any;
-      expect(span.metrics).toBeDefined();
-      expect(span.metrics.start).toBeDefined();
-      expect(span.metrics.end).toBeDefined();
-      expect(start).toBeLessThanOrEqual(span.metrics.start);
-      expect(span.metrics.end).toBeLessThanOrEqual(end);
-
-      // Token metrics structure varies by AI SDK version
-      // v5: metrics.tokens, prompt_tokens, completion_tokens are defined
-      // v6: metrics structure may differ - see v5-specific tests for strict assertions
-    }
+      const toolSpans = spans.filter(
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        (s: any) => s.span_attributes?.type === "tool",
+      );
 
-    // Verify tool spans have the expected structure
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const storePriceSpans = toolSpans.filter(
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      (s: any) => s.span_attributes?.name === "get_store_price",
-    );
-    expect(storePriceSpans.length).toBeGreaterThanOrEqual(2);
+      // Should have multiple doGenerate spans - one per LLM round/step
+      // This allows visualizing the LLM ↔ tool roundtrips
+      expect(llmSpans.length).toBeGreaterThanOrEqual(2);
+
+      // Should have tool spans for get_store_price calls (at least 2 for StoreA and StoreB)
+      expect(toolSpans.length).toBeGreaterThanOrEqual(2);
+
+      // Verify each doGenerate span has its own metrics
+      for (const llmSpan of llmSpans) {
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const span = llmSpan as any;
+        expect(span.metrics).toBeDefined();
+        expect(span.metrics.start).toBeDefined();
+        expect(span.metrics.end).toBeDefined();
+        expect(start).toBeLessThanOrEqual(span.metrics.start);
+        expect(span.metrics.end).toBeLessThanOrEqual(end);
+
+        // Token metrics structure varies by AI SDK version
+        // v5: metrics.tokens, prompt_tokens, completion_tokens are defined
+        // v6: metrics structure may differ - see v5-specific tests for strict assertions
+      }
 
-    // Verify tool spans have input/output
-    for (const toolSpan of storePriceSpans) {
+      // Verify tool spans have the expected structure
       // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      const span = toolSpan as any;
-      expect(span.input).toBeDefined();
-      expect(span.output).toBeDefined();
-
-      const inputData = Array.isArray(span.input) ? span.input[0] : span.input;
-      expect(inputData.store).toMatch(/^Store[AB]$/);
-      expect(inputData.item).toBe("laptop");
-    }
-  });
+      const storePriceSpans = toolSpans.filter(
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        (s: any) => s.span_attributes?.name === "get_store_price",
+      );
+      expect(storePriceSpans.length).toBeGreaterThanOrEqual(2);
+
+      // Verify tool spans have input/output
+      for (const toolSpan of storePriceSpans) {
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const span = toolSpan as any;
+        expect(span.input).toBeDefined();
+        expect(span.output).toBeDefined();
+
+        const inputData = Array.isArray(span.input)
+          ? span.input[0]
+          : span.input;
+        expect(inputData.store).toMatch(/^Store[AB]$/);
+        expect(inputData.item).toBe("laptop");
+      }
+    },
+  );
 
   test("ai sdk multi-round tool use span hierarchy", async () => {
     expect(await backgroundLogger.drain()).toHaveLength(0);