From 1a04bd6250addb53fa19bf1fcfdb49b040343750 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Fri, 10 Apr 2026 11:29:44 +0200 Subject: [PATCH 1/3] feat: Capture grounding metadata for Google GenAI --- .../assertions.ts | 85 +++++++++++++++++++ .../scenario.impl.mjs | 39 +++++++++ .../plugins/google-genai-plugin.ts | 53 +++++++++++- js/src/vendor-sdk-types/google-genai.ts | 17 ++++ 4 files changed, 193 insertions(+), 1 deletion(-) diff --git a/e2e/scenarios/google-genai-instrumentation/assertions.ts b/e2e/scenarios/google-genai-instrumentation/assertions.ts index dac27b78a..36ee29e3f 100644 --- a/e2e/scenarios/google-genai-instrumentation/assertions.ts +++ b/e2e/scenarios/google-genai-instrumentation/assertions.ts @@ -63,6 +63,35 @@ function isRecord(value: Json | undefined): value is Record { return typeof value === "object" && value !== null && !Array.isArray(value); } +function extractGroundingMetadataFromOutput( + output: Record | undefined, +): Record | undefined { + if (!output) { + return undefined; + } + + if (isRecord(output.groundingMetadata as Json)) { + return output.groundingMetadata as Record; + } + + const candidates = output.candidates; + if (!Array.isArray(candidates)) { + return undefined; + } + + for (const candidate of candidates) { + if (!isRecord(candidate as Json)) { + continue; + } + + if (isRecord(candidate.groundingMetadata as Json)) { + return candidate.groundingMetadata as Record; + } + } + + return undefined; +} + function normalizeGoogleVariableTokenCounts(value: Json): Json { if (Array.isArray(value)) { return value.map((entry) => @@ -425,6 +454,62 @@ export function defineGoogleGenAIInstrumentationAssertions(options: { }, ); + test("captures grounding metadata for generateContent", testConfig, () => { + const operation = findLatestSpan( + events, + "google-grounded-generate-operation", + ); + const span = findGoogleSpan(events, operation?.span.id, [ + "generate_content", + "google-genai.generateContent", + ]); + const metadata = span?.row.metadata as + | Record + | undefined; + const output = span?.output as Record | undefined; + const metadataGrounding = metadata?.groundingMetadata as + | Record + | undefined; + const outputGrounding = extractGroundingMetadataFromOutput(output); + + expect(operation).toBeDefined(); + expect(span).toBeDefined(); + expect(metadataGrounding).toBeDefined(); + expect(outputGrounding).toBeDefined(); + expect(Array.isArray(metadataGrounding?.webSearchQueries)).toBe(true); + expect(Array.isArray(outputGrounding?.webSearchQueries)).toBe(true); + }); + + test( + "captures grounding metadata for generateContentStream", + testConfig, + () => { + const operation = findLatestSpan( + events, + "google-grounded-stream-operation", + ); + const span = findGoogleSpan(events, operation?.span.id, [ + "generate_content_stream", + "google-genai.generateContentStream", + ]); + const metadata = span?.row.metadata as + | Record + | undefined; + const output = span?.output as Record | undefined; + const metadataGrounding = metadata?.groundingMetadata as + | Record + | undefined; + const outputGrounding = extractGroundingMetadataFromOutput(output); + + expect(operation).toBeDefined(); + expect(span).toBeDefined(); + expect(metadataGrounding).toBeDefined(); + expect(outputGrounding).toBeDefined(); + expect(Array.isArray(metadataGrounding?.webSearchQueries)).toBe(true); + expect(Array.isArray(outputGrounding?.webSearchQueries)).toBe(true); + }, + ); + test("captures trace for tool calling", testConfig, () => { const root = findLatestSpan(events, ROOT_NAME); const operation = findLatestSpan(events, "google-tool-operation"); diff --git a/e2e/scenarios/google-genai-instrumentation/scenario.impl.mjs b/e2e/scenarios/google-genai-instrumentation/scenario.impl.mjs index 1ff56ef23..3711a557c 100644 --- a/e2e/scenarios/google-genai-instrumentation/scenario.impl.mjs +++ b/e2e/scenarios/google-genai-instrumentation/scenario.impl.mjs @@ -7,6 +7,7 @@ import { } from "../../helpers/provider-runtime.mjs"; const GOOGLE_MODEL = "gemini-2.5-flash-lite"; +const GOOGLE_GROUNDING_MODEL = "gemini-2.0-flash"; const ROOT_NAME = "google-genai-instrumentation-root"; const SCENARIO_NAME = "google-genai-instrumentation"; const WEATHER_TOOL = { @@ -27,6 +28,9 @@ const WEATHER_TOOL = { }, ], }; +const GOOGLE_SEARCH_TOOL = { + googleSearch: {}, +}; async function runGoogleGenAIInstrumentationScenario(sdk, options = {}) { const imageBase64 = ( @@ -112,6 +116,41 @@ async function runGoogleGenAIInstrumentationScenario(sdk, options = {}) { }, ); + await runOperation( + "google-grounded-generate-operation", + "grounded-generate", + async () => { + await client.models.generateContent({ + model: GOOGLE_GROUNDING_MODEL, + contents: + "Use Google Search grounding and answer in one sentence: What is the current population of Paris, France?", + config: { + maxOutputTokens: 256, + temperature: 0, + tools: [GOOGLE_SEARCH_TOOL], + }, + }); + }, + ); + + await runOperation( + "google-grounded-stream-operation", + "grounded-stream", + async () => { + const stream = await client.models.generateContentStream({ + model: GOOGLE_GROUNDING_MODEL, + contents: + "Use Google Search grounding and answer in one sentence: What is the current weather in Paris?", + config: { + maxOutputTokens: 256, + temperature: 0, + tools: [GOOGLE_SEARCH_TOOL], + }, + }); + await collectAsync(stream); + }, + ); + await runOperation("google-tool-operation", "tool", async () => { await client.models.generateContent({ model: GOOGLE_MODEL, diff --git a/js/src/instrumentation/plugins/google-genai-plugin.ts b/js/src/instrumentation/plugins/google-genai-plugin.ts index b8ae386e8..bc739c9a4 100644 --- a/js/src/instrumentation/plugins/google-genai-plugin.ts +++ b/js/src/instrumentation/plugins/google-genai-plugin.ts @@ -142,7 +142,9 @@ export class GoogleGenAIPlugin extends BasePlugin { } try { + const responseMetadata = extractResponseMetadata(event.result); spanState.span.log({ + ...(responseMetadata ? { metadata: responseMetadata } : {}), metrics: cleanMetrics( extractGenerateContentMetrics( event.result, @@ -331,7 +333,11 @@ function patchGoogleGenAIStreamingResult(args: { if (options.result) { const { end, ...metricsWithoutEnd } = options.result.metrics; + const responseMetadata = extractResponseMetadata( + options.result.aggregated, + ); span.log({ + ...(responseMetadata ? { metadata: responseMetadata } : {}), metrics: cleanMetrics(metricsWithoutEnd), output: options.result.aggregated, }); @@ -714,7 +720,7 @@ function populateUsageMetrics( /** * Aggregate chunks from streaming generateContentStream response. */ -function aggregateGenerateContentChunks( +export function aggregateGenerateContentChunks( chunks: GoogleGenAIGenerateContentResponse[], startTime: number, firstTokenTime: number | null, @@ -740,6 +746,7 @@ function aggregateGenerateContentChunks( let text = ""; let thoughtText = ""; const otherParts: Record[] = []; + let groundingMetadata: unknown = undefined; let usageMetadata: GoogleGenAIUsageMetadata | null = null; let lastResponse: GoogleGenAIGenerateContentResponse | null = null; @@ -749,6 +756,9 @@ function aggregateGenerateContentChunks( if (chunk.usageMetadata) { usageMetadata = chunk.usageMetadata; } + if (chunk.groundingMetadata !== undefined) { + groundingMetadata = chunk.groundingMetadata; + } if (chunk.candidates && Array.isArray(chunk.candidates)) { for (const candidate of chunk.candidates) { @@ -799,6 +809,12 @@ function aggregateGenerateContentChunks( if (candidate.finishReason !== undefined) { candidateDict.finishReason = candidate.finishReason; } + if (candidate.groundingMetadata !== undefined) { + candidateDict.groundingMetadata = candidate.groundingMetadata; + if (groundingMetadata === undefined) { + groundingMetadata = candidate.groundingMetadata; + } + } if (candidate.safetyRatings) { candidateDict.safetyRatings = candidate.safetyRatings; } @@ -812,6 +828,9 @@ function aggregateGenerateContentChunks( aggregated.usageMetadata = usageMetadata; populateUsageMetrics(metrics, usageMetadata); } + if (groundingMetadata !== undefined) { + aggregated.groundingMetadata = groundingMetadata; + } if (text) { aggregated.text = text; @@ -830,6 +849,38 @@ function cleanMetrics(metrics: Record): Record { return cleaned; } +export function extractResponseMetadata( + response: unknown, +): Record | undefined { + const responseDict = tryToDict(response); + if (!responseDict) { + return undefined; + } + + const metadata: Record = {}; + const responseGroundingMetadata = responseDict.groundingMetadata; + const candidateGroundingMetadata: unknown[] = []; + + if (Array.isArray(responseDict.candidates)) { + for (const candidate of responseDict.candidates) { + const candidateDict = tryToDict(candidate); + if (candidateDict?.groundingMetadata !== undefined) { + candidateGroundingMetadata.push(candidateDict.groundingMetadata); + } + } + } + + if (responseGroundingMetadata !== undefined) { + metadata.groundingMetadata = responseGroundingMetadata; + } else if (candidateGroundingMetadata.length === 1) { + [metadata.groundingMetadata] = candidateGroundingMetadata; + } else if (candidateGroundingMetadata.length > 1) { + metadata.groundingMetadata = candidateGroundingMetadata; + } + + return Object.keys(metadata).length > 0 ? metadata : undefined; +} + /** * Helper to convert objects to dictionaries. */ diff --git a/js/src/vendor-sdk-types/google-genai.ts b/js/src/vendor-sdk-types/google-genai.ts index e48e0b615..c0db447c3 100644 --- a/js/src/vendor-sdk-types/google-genai.ts +++ b/js/src/vendor-sdk-types/google-genai.ts @@ -69,13 +69,30 @@ export interface GoogleGenAIGenerateContentResponse { role?: string; }; finishReason?: string; + groundingMetadata?: GoogleGenAIGroundingMetadata; safetyRatings?: Record[]; }[]; + groundingMetadata?: GoogleGenAIGroundingMetadata; usageMetadata?: GoogleGenAIUsageMetadata; text?: string; [key: string]: unknown; } +export interface GoogleGenAIGroundingMetadata { + groundingChunks?: Array<{ + web?: { + title?: string; + uri?: string; + [key: string]: unknown; + }; + [key: string]: unknown; + }>; + groundingSupports?: Record[]; + searchEntryPoint?: Record; + webSearchQueries?: string[]; + [key: string]: unknown; +} + export interface GoogleGenAIUsageMetadata { promptTokenCount?: number; candidatesTokenCount?: number; From 7411f66cbbd40328451ca521f2a90705782e2b0e Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Fri, 10 Apr 2026 11:49:34 +0200 Subject: [PATCH 2/3] remove unused export --- js/src/instrumentation/plugins/google-genai-plugin.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/js/src/instrumentation/plugins/google-genai-plugin.ts b/js/src/instrumentation/plugins/google-genai-plugin.ts index bc739c9a4..09ff9bd40 100644 --- a/js/src/instrumentation/plugins/google-genai-plugin.ts +++ b/js/src/instrumentation/plugins/google-genai-plugin.ts @@ -720,7 +720,7 @@ function populateUsageMetrics( /** * Aggregate chunks from streaming generateContentStream response. */ -export function aggregateGenerateContentChunks( +function aggregateGenerateContentChunks( chunks: GoogleGenAIGenerateContentResponse[], startTime: number, firstTokenTime: number | null, @@ -849,7 +849,7 @@ function cleanMetrics(metrics: Record): Record { return cleaned; } -export function extractResponseMetadata( +function extractResponseMetadata( response: unknown, ): Record | undefined { const responseDict = tryToDict(response); From fffceab51edf84c7a5179b2dec425cb0cb380644 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 13 Apr 2026 11:30:11 +0200 Subject: [PATCH 3/3] Retry --- .../assertions.ts | 10 +- .../scenario.impl.mjs | 219 +++++++++++------- 2 files changed, 149 insertions(+), 80 deletions(-) diff --git a/e2e/scenarios/google-genai-instrumentation/assertions.ts b/e2e/scenarios/google-genai-instrumentation/assertions.ts index 36ee29e3f..b7c9b47c7 100644 --- a/e2e/scenarios/google-genai-instrumentation/assertions.ts +++ b/e2e/scenarios/google-genai-instrumentation/assertions.ts @@ -6,7 +6,11 @@ import { resolveFileSnapshotPath, } from "../../helpers/file-snapshot"; import { withScenarioHarness } from "../../helpers/scenario-harness"; -import { findChildSpans, findLatestSpan } from "../../helpers/trace-selectors"; +import { + findChildSpans, + findLatestChildSpan, + findLatestSpan, +} from "../../helpers/trace-selectors"; import { summarizeWrapperContract } from "../../helpers/wrapper-contract"; import { GOOGLE_MODEL, ROOT_NAME, SCENARIO_NAME } from "./scenario.impl.mjs"; @@ -33,7 +37,9 @@ function findGoogleSpan( names: string[], ) { for (const name of names) { - const span = findChildSpans(events, name, parentId)[0]; + const span = + findLatestChildSpan(events, name, parentId) ?? + findChildSpans(events, name, parentId)[0]; if (span) { return span; } diff --git a/e2e/scenarios/google-genai-instrumentation/scenario.impl.mjs b/e2e/scenarios/google-genai-instrumentation/scenario.impl.mjs index 3711a557c..f0d0fbe85 100644 --- a/e2e/scenarios/google-genai-instrumentation/scenario.impl.mjs +++ b/e2e/scenarios/google-genai-instrumentation/scenario.impl.mjs @@ -10,6 +10,10 @@ const GOOGLE_MODEL = "gemini-2.5-flash-lite"; const GOOGLE_GROUNDING_MODEL = "gemini-2.0-flash"; const ROOT_NAME = "google-genai-instrumentation-root"; const SCENARIO_NAME = "google-genai-instrumentation"; +const GOOGLE_RETRY_ATTEMPTS = 4; +const GOOGLE_RETRY_BASE_DELAY_MS = 1_000; +const GOOGLE_RETRY_MAX_DELAY_MS = 8_000; +const GOOGLE_RETRYABLE_STATUS_CODES = new Set([408, 429, 500, 502, 503, 504]); const WEATHER_TOOL = { functionDeclarations: [ { @@ -32,6 +36,51 @@ const GOOGLE_SEARCH_TOOL = { googleSearch: {}, }; +function shouldRetryGoogleError(error) { + if (!error || typeof error !== "object") { + return false; + } + + const status = "status" in error ? error.status : undefined; + if (typeof status === "number" && GOOGLE_RETRYABLE_STATUS_CODES.has(status)) { + return true; + } + + const message = + "message" in error && typeof error.message === "string" + ? error.message.toLowerCase() + : ""; + return ( + message.includes("timed out") || + message.includes("high demand") || + message.includes("unavailable") + ); +} + +async function withGoogleRetry(callback) { + let lastError; + + for (let attempt = 1; attempt <= GOOGLE_RETRY_ATTEMPTS; attempt++) { + try { + return await callback(); + } catch (error) { + lastError = error; + + if (attempt >= GOOGLE_RETRY_ATTEMPTS || !shouldRetryGoogleError(error)) { + throw error; + } + + const delayMs = Math.min( + GOOGLE_RETRY_BASE_DELAY_MS * attempt, + GOOGLE_RETRY_MAX_DELAY_MS, + ); + await new Promise((resolve) => setTimeout(resolve, delayMs)); + } + } + + throw lastError; +} + async function runGoogleGenAIInstrumentationScenario(sdk, options = {}) { const imageBase64 = ( await readFile(new URL("./test-image.png", import.meta.url)) @@ -45,13 +94,15 @@ async function runGoogleGenAIInstrumentationScenario(sdk, options = {}) { await runTracedScenario({ callback: async () => { await runOperation("google-generate-operation", "generate", async () => { - await client.models.generateContent({ - model: GOOGLE_MODEL, - contents: "Reply with exactly PARIS.", - config: { - maxOutputTokens: 24, - temperature: 0, - }, + await withGoogleRetry(async () => { + await client.models.generateContent({ + model: GOOGLE_MODEL, + contents: "Reply with exactly PARIS.", + config: { + maxOutputTokens: 24, + temperature: 0, + }, + }); }); }); @@ -59,60 +110,66 @@ async function runGoogleGenAIInstrumentationScenario(sdk, options = {}) { "google-attachment-operation", "attachment", async () => { - await client.models.generateContent({ - model: GOOGLE_MODEL, - contents: [ - { - parts: [ - { - inlineData: { - data: imageBase64, - mimeType: "image/png", + await withGoogleRetry(async () => { + await client.models.generateContent({ + model: GOOGLE_MODEL, + contents: [ + { + parts: [ + { + inlineData: { + data: imageBase64, + mimeType: "image/png", + }, + }, + { + text: "Describe the attached image in one short sentence.", }, - }, - { - text: "Describe the attached image in one short sentence.", - }, - ], - role: "user", + ], + role: "user", + }, + ], + config: { + maxOutputTokens: 48, + temperature: 0, }, - ], - config: { - maxOutputTokens: 48, - temperature: 0, - }, + }); }); }, ); await runOperation("google-stream-operation", "stream", async () => { - const stream = await client.models.generateContentStream({ - model: GOOGLE_MODEL, - contents: "Count from 1 to 3 and include the words one two three.", - config: { - maxOutputTokens: 64, - temperature: 0, - }, + await withGoogleRetry(async () => { + const stream = await client.models.generateContentStream({ + model: GOOGLE_MODEL, + contents: "Count from 1 to 3 and include the words one two three.", + config: { + maxOutputTokens: 64, + temperature: 0, + }, + }); + await collectAsync(stream); }); - await collectAsync(stream); }); await runOperation( "google-stream-return-operation", "stream-return", async () => { - const stream = await client.models.generateContentStream({ - model: GOOGLE_MODEL, - contents: "Reply with exactly BONJOUR.", - config: { - maxOutputTokens: 24, - temperature: 0, - }, - }); + await withGoogleRetry(async () => { + const stream = await client.models.generateContentStream({ + model: GOOGLE_MODEL, + contents: "Reply with exactly BONJOUR.", + config: { + maxOutputTokens: 24, + temperature: 0, + }, + }); - for await (const _chunk of stream) { - break; - } + for await (const _chunk of stream) { + break; + } + }); }, ); @@ -120,15 +177,17 @@ async function runGoogleGenAIInstrumentationScenario(sdk, options = {}) { "google-grounded-generate-operation", "grounded-generate", async () => { - await client.models.generateContent({ - model: GOOGLE_GROUNDING_MODEL, - contents: - "Use Google Search grounding and answer in one sentence: What is the current population of Paris, France?", - config: { - maxOutputTokens: 256, - temperature: 0, - tools: [GOOGLE_SEARCH_TOOL], - }, + await withGoogleRetry(async () => { + await client.models.generateContent({ + model: GOOGLE_GROUNDING_MODEL, + contents: + "Use Google Search grounding and answer in one sentence: What is the current population of Paris, France?", + config: { + maxOutputTokens: 256, + temperature: 0, + tools: [GOOGLE_SEARCH_TOOL], + }, + }); }); }, ); @@ -137,36 +196,40 @@ async function runGoogleGenAIInstrumentationScenario(sdk, options = {}) { "google-grounded-stream-operation", "grounded-stream", async () => { - const stream = await client.models.generateContentStream({ - model: GOOGLE_GROUNDING_MODEL, - contents: - "Use Google Search grounding and answer in one sentence: What is the current weather in Paris?", - config: { - maxOutputTokens: 256, - temperature: 0, - tools: [GOOGLE_SEARCH_TOOL], - }, + await withGoogleRetry(async () => { + const stream = await client.models.generateContentStream({ + model: GOOGLE_GROUNDING_MODEL, + contents: + "Use Google Search grounding and answer in one sentence: What is the current weather in Paris?", + config: { + maxOutputTokens: 256, + temperature: 0, + tools: [GOOGLE_SEARCH_TOOL], + }, + }); + await collectAsync(stream); }); - await collectAsync(stream); }, ); await runOperation("google-tool-operation", "tool", async () => { - await client.models.generateContent({ - model: GOOGLE_MODEL, - contents: - "Use the get_weather function for Paris, France. Do not answer from memory.", - config: { - maxOutputTokens: 128, - temperature: 0, - tools: [WEATHER_TOOL], - toolConfig: { - functionCallingConfig: { - allowedFunctionNames: ["get_weather"], - mode: sdk.FunctionCallingConfigMode.ANY, + await withGoogleRetry(async () => { + await client.models.generateContent({ + model: GOOGLE_MODEL, + contents: + "Use the get_weather function for Paris, France. Do not answer from memory.", + config: { + maxOutputTokens: 128, + temperature: 0, + tools: [WEATHER_TOOL], + toolConfig: { + functionCallingConfig: { + allowedFunctionNames: ["get_weather"], + mode: sdk.FunctionCallingConfigMode.ANY, + }, }, }, - }, + }); }); }); },