Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
216 changes: 112 additions & 104 deletions js/src/wrappers/ai-sdk/ai-sdk.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1537,122 +1537,130 @@ describe("ai sdk client unit tests", TEST_SUITE_OPTIONS, () => {
// Once processInputAttachments is made async and properly handles the Promise,
// we should verify that the schema is serialized correctly in the logs.

test("ai sdk multi-round tool use with metrics", async () => {
expect(await backgroundLogger.drain()).toHaveLength(0);
test(
"ai sdk multi-round tool use with metrics",
{ timeout: 180000, retry: 0 },
async () => {
expect(await backgroundLogger.drain()).toHaveLength(0);

const getStorePriceTool = ai.tool({
description: "Get the price of an item from a specific store",
inputSchema: z.object({
store: z.string().describe("The store name (e.g., 'StoreA', 'StoreB')"),
item: z.string().describe("The item to get the price for"),
}),
execute: async (args: { store: string; item: string }) => {
const prices: Record<string, Record<string, number>> = {
StoreA: { laptop: 999, mouse: 25, keyboard: 75 },
StoreB: { laptop: 1099, mouse: 20, keyboard: 80 },
};
const price = prices[args.store]?.[args.item] ?? 0;
return JSON.stringify({ store: args.store, item: args.item, price });
},
});
const getStorePriceTool = ai.tool({
description: "Get the price of an item from a specific store",
inputSchema: z.object({
store: z
.string()
.describe("The store name (e.g., 'StoreA', 'StoreB')"),
item: z.string().describe("The item to get the price for"),
}),
execute: async (args: { store: string; item: string }) => {
const prices: Record<string, Record<string, number>> = {
StoreA: { laptop: 999, mouse: 25, keyboard: 75 },
StoreB: { laptop: 1099, mouse: 20, keyboard: 80 },
};
const price = prices[args.store]?.[args.item] ?? 0;
return JSON.stringify({ store: args.store, item: args.item, price });
},
});

const applyDiscountTool = ai.tool({
description: "Apply a discount code to a total amount",
inputSchema: z.object({
total: z.number().describe("The total amount before discount"),
discountCode: z.string().describe("The discount code to apply"),
}),
execute: async (args: { total: number; discountCode: string }) => {
const discounts: Record<string, number> = {
SAVE10: 0.1,
SAVE20: 0.2,
};
const discountRate = discounts[args.discountCode] ?? 0;
const finalTotal = args.total - args.total * discountRate;
return JSON.stringify({
originalTotal: args.total,
discountCode: args.discountCode,
finalTotal,
});
},
});
const applyDiscountTool = ai.tool({
description: "Apply a discount code to a total amount",
inputSchema: z.object({
total: z.number().describe("The total amount before discount"),
discountCode: z.string().describe("The discount code to apply"),
}),
execute: async (args: { total: number; discountCode: string }) => {
const discounts: Record<string, number> = {
SAVE10: 0.1,
SAVE20: 0.2,
};
const discountRate = discounts[args.discountCode] ?? 0;
const finalTotal = args.total - args.total * discountRate;
return JSON.stringify({
originalTotal: args.total,
discountCode: args.discountCode,
finalTotal,
});
},
});

const model = openai(TEST_MODEL);
const start = getCurrentUnixTimestamp();
const model = openai(TEST_MODEL);
const start = getCurrentUnixTimestamp();

const result = await wrappedAI.generateText({
model,
system:
"You are a shopping assistant. When asked about prices, always get the price from each store mentioned using get_store_price, then apply any discount codes using apply_discount. Use the tools provided.",
tools: {
get_store_price: getStorePriceTool,
apply_discount: applyDiscountTool,
},
toolChoice: "required",
prompt:
"I want to buy a laptop. Get the price from StoreA and StoreB, then apply the discount code SAVE20 to whichever is cheaper.",
stopWhen: ai.stepCountIs(6),
});
const result = await wrappedAI.generateText({
model,
system:
"You are a shopping assistant. When asked about prices, always get the price from each store mentioned using get_store_price, then apply any discount codes using apply_discount. Use the tools provided.",
tools: {
get_store_price: getStorePriceTool,
apply_discount: applyDiscountTool,
},
toolChoice: "required",
prompt:
"I want to buy a laptop. Get the price from StoreA and StoreB, then apply the discount code SAVE20 to whichever is cheaper.",
stopWhen: ai.stepCountIs(6),
});

const end = getCurrentUnixTimestamp();
assert.ok(result);
const end = getCurrentUnixTimestamp();
assert.ok(result);

const spans = await backgroundLogger.drain();
const spans = await backgroundLogger.drain();

// eslint-disable-next-line @typescript-eslint/no-explicit-any
const llmSpans = spans.filter(
(s: any) =>
s.span_attributes?.type === "llm" &&
s.span_attributes?.name === "doGenerate",
);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const toolSpans = spans.filter(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(s: any) => s.span_attributes?.type === "tool",
);

// Should have multiple doGenerate spans - one per LLM round/step
// This allows visualizing the LLM ↔ tool roundtrips
expect(llmSpans.length).toBeGreaterThanOrEqual(2);

// Should have tool spans for get_store_price calls (at least 2 for StoreA and StoreB)
expect(toolSpans.length).toBeGreaterThanOrEqual(2);

// Verify each doGenerate span has its own metrics
for (const llmSpan of llmSpans) {
const llmSpans = spans.filter(
(s: any) =>
s.span_attributes?.type === "llm" &&
s.span_attributes?.name === "doGenerate",
);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const span = llmSpan as any;
expect(span.metrics).toBeDefined();
expect(span.metrics.start).toBeDefined();
expect(span.metrics.end).toBeDefined();
expect(start).toBeLessThanOrEqual(span.metrics.start);
expect(span.metrics.end).toBeLessThanOrEqual(end);

// Token metrics structure varies by AI SDK version
// v5: metrics.tokens, prompt_tokens, completion_tokens are defined
// v6: metrics structure may differ - see v5-specific tests for strict assertions
}
const toolSpans = spans.filter(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(s: any) => s.span_attributes?.type === "tool",
);

// Verify tool spans have the expected structure
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const storePriceSpans = toolSpans.filter(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(s: any) => s.span_attributes?.name === "get_store_price",
);
expect(storePriceSpans.length).toBeGreaterThanOrEqual(2);
// Should have multiple doGenerate spans - one per LLM round/step
// This allows visualizing the LLM ↔ tool roundtrips
expect(llmSpans.length).toBeGreaterThanOrEqual(2);

// Should have tool spans for get_store_price calls (at least 2 for StoreA and StoreB)
expect(toolSpans.length).toBeGreaterThanOrEqual(2);

// Verify each doGenerate span has its own metrics
for (const llmSpan of llmSpans) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const span = llmSpan as any;
expect(span.metrics).toBeDefined();
expect(span.metrics.start).toBeDefined();
expect(span.metrics.end).toBeDefined();
expect(start).toBeLessThanOrEqual(span.metrics.start);
expect(span.metrics.end).toBeLessThanOrEqual(end);

// Token metrics structure varies by AI SDK version
// v5: metrics.tokens, prompt_tokens, completion_tokens are defined
// v6: metrics structure may differ - see v5-specific tests for strict assertions
}

// Verify tool spans have input/output
for (const toolSpan of storePriceSpans) {
// Verify tool spans have the expected structure
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const span = toolSpan as any;
expect(span.input).toBeDefined();
expect(span.output).toBeDefined();

const inputData = Array.isArray(span.input) ? span.input[0] : span.input;
expect(inputData.store).toMatch(/^Store[AB]$/);
expect(inputData.item).toBe("laptop");
}
});
const storePriceSpans = toolSpans.filter(
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(s: any) => s.span_attributes?.name === "get_store_price",
);
expect(storePriceSpans.length).toBeGreaterThanOrEqual(2);

// Verify tool spans have input/output
for (const toolSpan of storePriceSpans) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const span = toolSpan as any;
expect(span.input).toBeDefined();
expect(span.output).toBeDefined();

const inputData = Array.isArray(span.input)
? span.input[0]
: span.input;
expect(inputData.store).toMatch(/^Store[AB]$/);
expect(inputData.item).toBe("laptop");
}
},
);

test("ai sdk multi-round tool use span hierarchy", async () => {
expect(await backgroundLogger.drain()).toHaveLength(0);
Expand Down
124 changes: 64 additions & 60 deletions js/src/wrappers/oai.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import { parseMetricsFromUsage } from "./oai_responses";

// use the cheapest model for tests
const TEST_MODEL = "gpt-4o-mini";
const TEST_SUITE_OPTIONS = { timeout: 10000, retry: 3 };
const TEST_SUITE_OPTIONS = { timeout: 120000, retry: 3 };

try {
configureNode();
Expand Down Expand Up @@ -734,73 +734,77 @@ describe("openai client unit tests", TEST_SUITE_OPTIONS, () => {
assert.isTrue(m.completion_reasoning_tokens >= 0);
});

test("openai.responses.compact", async (context) => {
if (!oai.responses || typeof oai.responses.compact !== "function") {
context.skip();
}
const wrappedCompact = client.responses?.compact;
if (typeof wrappedCompact !== "function") {
context.skip();
}
test(
"openai.responses.compact",
{ timeout: 180000, retry: 0 },
async (context) => {
if (!oai.responses || typeof oai.responses.compact !== "function") {
context.skip();
}
const wrappedCompact = client.responses?.compact;
if (typeof wrappedCompact !== "function") {
context.skip();
}

assert.lengthOf(await backgroundLogger.drain(), 0);
assert.lengthOf(await backgroundLogger.drain(), 0);

const compactInput = [
{
role: "user",
content: [
{
type: "input_text",
text: "My name is Ada and I like concise responses.",
},
],
},
{
role: "assistant",
content: [
{
type: "output_text",
text: "Nice to meet you, Ada. I will keep responses concise.",
},
],
},
];
const compactInput = [
{
role: "user",
content: [
{
type: "input_text",
text: "My name is Ada and I like concise responses.",
},
],
},
{
role: "assistant",
content: [
{
type: "output_text",
text: "Nice to meet you, Ada. I will keep responses concise.",
},
],
},
];

const compactArgs = {
model: TEST_MODEL,
input: compactInput,
instructions: "Keep only durable user preferences.",
};
const compactArgs = {
model: TEST_MODEL,
input: compactInput,
instructions: "Keep only durable user preferences.",
};

const unwrappedResponse = await oai.responses.compact(compactArgs);
assert.ok(unwrappedResponse);
assert.lengthOf(await backgroundLogger.drain(), 0);
const unwrappedResponse = await oai.responses.compact(compactArgs);
assert.ok(unwrappedResponse);
assert.lengthOf(await backgroundLogger.drain(), 0);

const start = getCurrentUnixTimestamp();
const response = await wrappedCompact(compactArgs);
const end = getCurrentUnixTimestamp();
const start = getCurrentUnixTimestamp();
const response = await wrappedCompact(compactArgs);
const end = getCurrentUnixTimestamp();

assert.ok(response);
assert.ok(response);

const spans = await backgroundLogger.drain();
assert.lengthOf(spans, 1);
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions, @typescript-eslint/no-explicit-any
const span = spans[0] as any;
assert.equal(span.span_attributes.name, "openai.responses.compact");
assert.equal(span.span_attributes.type, "llm");
assert.deepEqual(span.input, compactInput);
assert.equal(span.metadata.provider, "openai");
assert.equal(span.metadata.instructions, compactArgs.instructions);
assert.ok(span.metadata.model.startsWith(TEST_MODEL));
assert.isDefined(span.output);
const spans = await backgroundLogger.drain();
assert.lengthOf(spans, 1);
// eslint-disable-next-line @typescript-eslint/consistent-type-assertions, @typescript-eslint/no-explicit-any
const span = spans[0] as any;
assert.equal(span.span_attributes.name, "openai.responses.compact");
assert.equal(span.span_attributes.type, "llm");
assert.deepEqual(span.input, compactInput);
assert.equal(span.metadata.provider, "openai");
assert.equal(span.metadata.instructions, compactArgs.instructions);
assert.ok(span.metadata.model.startsWith(TEST_MODEL));
assert.isDefined(span.output);

const m = span.metrics;
assert.isTrue(start <= m.start && m.start < m.end && m.end <= end);
if (m.tokens !== undefined) {
assert.isTrue(m.tokens > 0);
assert.isTrue(m.prompt_tokens > 0);
}
});
const m = span.metrics;
assert.isTrue(start <= m.start && m.start < m.end && m.end <= end);
if (m.tokens !== undefined) {
assert.isTrue(m.tokens > 0);
assert.isTrue(m.prompt_tokens > 0);
}
},
);

test("openai.chat.completions.parse (v5 GA method)", async () => {
// Test that the parse method is properly wrapped in the GA namespace (v5)
Expand Down
Loading