braintrustdata · Luca Forstner (lforst) · Apr 10, 2026 · Apr 10, 2026
diff --git a/js/src/wrappers/ai-sdk/ai-sdk.test.ts b/js/src/wrappers/ai-sdk/ai-sdk.test.ts
@@ -1537,122 +1537,130 @@ describe("ai sdk client unit tests", TEST_SUITE_OPTIONS, () => {
   // Once processInputAttachments is made async and properly handles the Promise,
   // we should verify that the schema is serialized correctly in the logs.
 
-  test("ai sdk multi-round tool use with metrics", async () => {
-    expect(await backgroundLogger.drain()).toHaveLength(0);
+  test(
+    "ai sdk multi-round tool use with metrics",
+    { timeout: 180000, retry: 0 },
+    async () => {
+      expect(await backgroundLogger.drain()).toHaveLength(0);
 
-    const getStorePriceTool = ai.tool({
-      description: "Get the price of an item from a specific store",
-      inputSchema: z.object({
-        store: z.string().describe("The store name (e.g., 'StoreA', 'StoreB')"),
-        item: z.string().describe("The item to get the price for"),
-      }),
-      execute: async (args: { store: string; item: string }) => {
-        const prices: Record<string, Record<string, number>> = {
-          StoreA: { laptop: 999, mouse: 25, keyboard: 75 },
-          StoreB: { laptop: 1099, mouse: 20, keyboard: 80 },
-        };
-        const price = prices[args.store]?.[args.item] ?? 0;
-        return JSON.stringify({ store: args.store, item: args.item, price });
-      },
-    });
+      const getStorePriceTool = ai.tool({
+        description: "Get the price of an item from a specific store",
+        inputSchema: z.object({
+          store: z
+            .string()
+            .describe("The store name (e.g., 'StoreA', 'StoreB')"),
+          item: z.string().describe("The item to get the price for"),
+        }),
+        execute: async (args: { store: string; item: string }) => {
+          const prices: Record<string, Record<string, number>> = {
+            StoreA: { laptop: 999, mouse: 25, keyboard: 75 },
+            StoreB: { laptop: 1099, mouse: 20, keyboard: 80 },
+          };
+          const price = prices[args.store]?.[args.item] ?? 0;
+          return JSON.stringify({ store: args.store, item: args.item, price });
+        },
+      });
 
-    const applyDiscountTool = ai.tool({
-      description: "Apply a discount code to a total amount",
-      inputSchema: z.object({
-        total: z.number().describe("The total amount before discount"),
-        discountCode: z.string().describe("The discount code to apply"),
-      }),
-      execute: async (args: { total: number; discountCode: string }) => {
-        const discounts: Record<string, number> = {
-          SAVE10: 0.1,
-          SAVE20: 0.2,
-        };
-        const discountRate = discounts[args.discountCode] ?? 0;
-        const finalTotal = args.total - args.total * discountRate;
-        return JSON.stringify({
-          originalTotal: args.total,
-          discountCode: args.discountCode,
-          finalTotal,
-        });
-      },
-    });
+      const applyDiscountTool = ai.tool({
+        description: "Apply a discount code to a total amount",
+        inputSchema: z.object({
+          total: z.number().describe("The total amount before discount"),
+          discountCode: z.string().describe("The discount code to apply"),
+        }),
+        execute: async (args: { total: number; discountCode: string }) => {
+          const discounts: Record<string, number> = {
+            SAVE10: 0.1,
+            SAVE20: 0.2,
+          };
+          const discountRate = discounts[args.discountCode] ?? 0;
+          const finalTotal = args.total - args.total * discountRate;
+          return JSON.stringify({
+            originalTotal: args.total,
+            discountCode: args.discountCode,
+            finalTotal,
+          });
+        },
+      });
 
-    const model = openai(TEST_MODEL);
-    const start = getCurrentUnixTimestamp();
+      const model = openai(TEST_MODEL);
+      const start = getCurrentUnixTimestamp();
 
-    const result = await wrappedAI.generateText({
-      model,
-      system:
-        "You are a shopping assistant. When asked about prices, always get the price from each store mentioned using get_store_price, then apply any discount codes using apply_discount. Use the tools provided.",
-      tools: {
-        get_store_price: getStorePriceTool,
-        apply_discount: applyDiscountTool,
-      },
-      toolChoice: "required",
-      prompt:
-        "I want to buy a laptop. Get the price from StoreA and StoreB, then apply the discount code SAVE20 to whichever is cheaper.",
-      stopWhen: ai.stepCountIs(6),
-    });
+      const result = await wrappedAI.generateText({
+        model,
+        system:
+          "You are a shopping assistant. When asked about prices, always get the price from each store mentioned using get_store_price, then apply any discount codes using apply_discount. Use the tools provided.",
+        tools: {
+          get_store_price: getStorePriceTool,
+          apply_discount: applyDiscountTool,
+        },
+        toolChoice: "required",
+        prompt:
+          "I want to buy a laptop. Get the price from StoreA and StoreB, then apply the discount code SAVE20 to whichever is cheaper.",
+        stopWhen: ai.stepCountIs(6),
+      });
 
-    const end = getCurrentUnixTimestamp();
-    assert.ok(result);
+      const end = getCurrentUnixTimestamp();
+      assert.ok(result);
 
-    const spans = await backgroundLogger.drain();
+      const spans = await backgroundLogger.drain();
 
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const llmSpans = spans.filter(
-      (s: any) =>
-        s.span_attributes?.type === "llm" &&
-        s.span_attributes?.name === "doGenerate",
-    );
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const toolSpans = spans.filter(
       // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      (s: any) => s.span_attributes?.type === "tool",
-    );
-
-    // Should have multiple doGenerate spans - one per LLM round/step
-    // This allows visualizing the LLM ↔ tool roundtrips
-    expect(llmSpans.length).toBeGreaterThanOrEqual(2);
-
-    // Should have tool spans for get_store_price calls (at least 2 for StoreA and StoreB)
-    expect(toolSpans.length).toBeGreaterThanOrEqual(2);
-
-    // Verify each doGenerate span has its own metrics
-    for (const llmSpan of llmSpans) {
+      const llmSpans = spans.filter(
+        (s: any) =>
+          s.span_attributes?.type === "llm" &&
+          s.span_attributes?.name === "doGenerate",
+      );
       // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      const span = llmSpan as any;
-      expect(span.metrics).toBeDefined();
-      expect(span.metrics.start).toBeDefined();
-      expect(span.metrics.end).toBeDefined();
-      expect(start).toBeLessThanOrEqual(span.metrics.start);
-      expect(span.metrics.end).toBeLessThanOrEqual(end);
-
-      // Token metrics structure varies by AI SDK version
-      // v5: metrics.tokens, prompt_tokens, completion_tokens are defined
-      // v6: metrics structure may differ - see v5-specific tests for strict assertions
-    }
+      const toolSpans = spans.filter(
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        (s: any) => s.span_attributes?.type === "tool",
+      );
 
-    // Verify tool spans have the expected structure
-    // eslint-disable-next-line @typescript-eslint/no-explicit-any
-    const storePriceSpans = toolSpans.filter(
-      // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      (s: any) => s.span_attributes?.name === "get_store_price",
-    );
-    expect(storePriceSpans.length).toBeGreaterThanOrEqual(2);
+      // Should have multiple doGenerate spans - one per LLM round/step
+      // This allows visualizing the LLM ↔ tool roundtrips
+      expect(llmSpans.length).toBeGreaterThanOrEqual(2);
+
+      // Should have tool spans for get_store_price calls (at least 2 for StoreA and StoreB)
+      expect(toolSpans.length).toBeGreaterThanOrEqual(2);
+
+      // Verify each doGenerate span has its own metrics
+      for (const llmSpan of llmSpans) {
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const span = llmSpan as any;
+        expect(span.metrics).toBeDefined();
+        expect(span.metrics.start).toBeDefined();
+        expect(span.metrics.end).toBeDefined();
+        expect(start).toBeLessThanOrEqual(span.metrics.start);
+        expect(span.metrics.end).toBeLessThanOrEqual(end);
+
+        // Token metrics structure varies by AI SDK version
+        // v5: metrics.tokens, prompt_tokens, completion_tokens are defined
+        // v6: metrics structure may differ - see v5-specific tests for strict assertions
+      }
 
-    // Verify tool spans have input/output
-    for (const toolSpan of storePriceSpans) {
+      // Verify tool spans have the expected structure
       // eslint-disable-next-line @typescript-eslint/no-explicit-any
-      const span = toolSpan as any;
-      expect(span.input).toBeDefined();
-      expect(span.output).toBeDefined();
-
-      const inputData = Array.isArray(span.input) ? span.input[0] : span.input;
-      expect(inputData.store).toMatch(/^Store[AB]$/);
-      expect(inputData.item).toBe("laptop");
-    }
-  });
+      const storePriceSpans = toolSpans.filter(
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        (s: any) => s.span_attributes?.name === "get_store_price",
+      );
+      expect(storePriceSpans.length).toBeGreaterThanOrEqual(2);
+
+      // Verify tool spans have input/output
+      for (const toolSpan of storePriceSpans) {
+        // eslint-disable-next-line @typescript-eslint/no-explicit-any
+        const span = toolSpan as any;
+        expect(span.input).toBeDefined();
+        expect(span.output).toBeDefined();
+
+        const inputData = Array.isArray(span.input)
+          ? span.input[0]
+          : span.input;
+        expect(inputData.store).toMatch(/^Store[AB]$/);
+        expect(inputData.item).toBe("laptop");
+      }
+    },
+  );
 
   test("ai sdk multi-round tool use span hierarchy", async () => {
     expect(await backgroundLogger.drain()).toHaveLength(0);

diff --git a/js/src/wrappers/oai.test.ts b/js/src/wrappers/oai.test.ts
@@ -23,7 +23,7 @@ import { parseMetricsFromUsage } from "./oai_responses";
 
 // use the cheapest model for tests
 const TEST_MODEL = "gpt-4o-mini";
-const TEST_SUITE_OPTIONS = { timeout: 10000, retry: 3 };
+const TEST_SUITE_OPTIONS = { timeout: 120000, retry: 3 };
 
 try {
   configureNode();
@@ -734,73 +734,77 @@ describe("openai client unit tests", TEST_SUITE_OPTIONS, () => {
     assert.isTrue(m.completion_reasoning_tokens >= 0);
   });
 
-  test("openai.responses.compact", async (context) => {
-    if (!oai.responses || typeof oai.responses.compact !== "function") {
-      context.skip();
-    }
-    const wrappedCompact = client.responses?.compact;
-    if (typeof wrappedCompact !== "function") {
-      context.skip();
-    }
+  test(
+    "openai.responses.compact",
+    { timeout: 180000, retry: 0 },
+    async (context) => {
+      if (!oai.responses || typeof oai.responses.compact !== "function") {
+        context.skip();
+      }
+      const wrappedCompact = client.responses?.compact;
+      if (typeof wrappedCompact !== "function") {
+        context.skip();
+      }
 
-    assert.lengthOf(await backgroundLogger.drain(), 0);
+      assert.lengthOf(await backgroundLogger.drain(), 0);
 
-    const compactInput = [
-      {
-        role: "user",
-        content: [
-          {
-            type: "input_text",
-            text: "My name is Ada and I like concise responses.",
-          },
-        ],
-      },
-      {
-        role: "assistant",
-        content: [
-          {
-            type: "output_text",
-            text: "Nice to meet you, Ada. I will keep responses concise.",
-          },
-        ],
-      },
-    ];
+      const compactInput = [
+        {
+          role: "user",
+          content: [
+            {
+              type: "input_text",
+              text: "My name is Ada and I like concise responses.",
+            },
+          ],
+        },
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "output_text",
+              text: "Nice to meet you, Ada. I will keep responses concise.",
+            },
+          ],
+        },
+      ];
 
-    const compactArgs = {
-      model: TEST_MODEL,
-      input: compactInput,
-      instructions: "Keep only durable user preferences.",
-    };
+      const compactArgs = {
+        model: TEST_MODEL,
+        input: compactInput,
+        instructions: "Keep only durable user preferences.",
+      };
 
-    const unwrappedResponse = await oai.responses.compact(compactArgs);
-    assert.ok(unwrappedResponse);
-    assert.lengthOf(await backgroundLogger.drain(), 0);
+      const unwrappedResponse = await oai.responses.compact(compactArgs);
+      assert.ok(unwrappedResponse);
+      assert.lengthOf(await backgroundLogger.drain(), 0);
 
-    const start = getCurrentUnixTimestamp();
-    const response = await wrappedCompact(compactArgs);
-    const end = getCurrentUnixTimestamp();
+      const start = getCurrentUnixTimestamp();
+      const response = await wrappedCompact(compactArgs);
+      const end = getCurrentUnixTimestamp();
 
-    assert.ok(response);
+      assert.ok(response);
 
-    const spans = await backgroundLogger.drain();
-    assert.lengthOf(spans, 1);
-    // eslint-disable-next-line @typescript-eslint/consistent-type-assertions, @typescript-eslint/no-explicit-any
-    const span = spans[0] as any;
-    assert.equal(span.span_attributes.name, "openai.responses.compact");
-    assert.equal(span.span_attributes.type, "llm");
-    assert.deepEqual(span.input, compactInput);
-    assert.equal(span.metadata.provider, "openai");
-    assert.equal(span.metadata.instructions, compactArgs.instructions);
-    assert.ok(span.metadata.model.startsWith(TEST_MODEL));
-    assert.isDefined(span.output);
+      const spans = await backgroundLogger.drain();
+      assert.lengthOf(spans, 1);
+      // eslint-disable-next-line @typescript-eslint/consistent-type-assertions, @typescript-eslint/no-explicit-any
+      const span = spans[0] as any;
+      assert.equal(span.span_attributes.name, "openai.responses.compact");
+      assert.equal(span.span_attributes.type, "llm");
+      assert.deepEqual(span.input, compactInput);
+      assert.equal(span.metadata.provider, "openai");
+      assert.equal(span.metadata.instructions, compactArgs.instructions);
+      assert.ok(span.metadata.model.startsWith(TEST_MODEL));
+      assert.isDefined(span.output);
 
-    const m = span.metrics;
-    assert.isTrue(start <= m.start && m.start < m.end && m.end <= end);
-    if (m.tokens !== undefined) {
-      assert.isTrue(m.tokens > 0);
-      assert.isTrue(m.prompt_tokens > 0);
-    }
-  });
+      const m = span.metrics;
+      assert.isTrue(start <= m.start && m.start < m.end && m.end <= end);
+      if (m.tokens !== undefined) {
+        assert.isTrue(m.tokens > 0);
+        assert.isTrue(m.prompt_tokens > 0);
+      }
+    },
+  );
 
   test("openai.chat.completions.parse (v5 GA method)", async () => {
     // Test that the parse method is properly wrapped in the GA namespace (v5)