diff --git a/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v4.log-payloads.json b/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v4.log-payloads.json index db9f2d590..241bbb154 100644 --- a/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v4.log-payloads.json +++ b/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v4.log-payloads.json @@ -162,6 +162,43 @@ ], "type": "llm" }, + { + "input": { + "kind": "undefined" + }, + "metadata": { + "operation": "stream-fixture" + }, + "metrics": { + "has_time_to_first_token": false + }, + "name": "openai-stream-fixture-operation", + "output": null, + "type": null + }, + { + "input": [ + { + "content_kind": "string", + "role": "user" + } + ], + "metadata": { + "model": "gpt-4o-mini-2024-07-18", + "provider": "openai" + }, + "metrics": { + "has_time_to_first_token": true + }, + "name": "Chat Completion", + "output": [ + { + "json_keys": [], + "role": "assistant" + } + ], + "type": "llm" + }, { "input": { "kind": "undefined" diff --git a/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v4.span-events.json b/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v4.span-events.json index 4dc6b4bde..0c4347f90 100644 --- a/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v4.span-events.json +++ b/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v4.span-events.json @@ -112,6 +112,31 @@ "name": "Chat Completion", "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "stream-fixture" + }, + "metrics": { + "has_time_to_first_token": false + }, + "name": "openai-stream-fixture-operation", + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "gpt-4o-mini-2024-07-18", + "provider": "openai" + }, + "metrics": { + "has_time_to_first_token": true + }, + "name": "Chat Completion", + "type": "llm" + }, { "has_input": false, "has_output": false, diff --git a/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v5.log-payloads.json b/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v5.log-payloads.json index db9f2d590..241bbb154 100644 --- a/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v5.log-payloads.json +++ b/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v5.log-payloads.json @@ -162,6 +162,43 @@ ], "type": "llm" }, + { + "input": { + "kind": "undefined" + }, + "metadata": { + "operation": "stream-fixture" + }, + "metrics": { + "has_time_to_first_token": false + }, + "name": "openai-stream-fixture-operation", + "output": null, + "type": null + }, + { + "input": [ + { + "content_kind": "string", + "role": "user" + } + ], + "metadata": { + "model": "gpt-4o-mini-2024-07-18", + "provider": "openai" + }, + "metrics": { + "has_time_to_first_token": true + }, + "name": "Chat Completion", + "output": [ + { + "json_keys": [], + "role": "assistant" + } + ], + "type": "llm" + }, { "input": { "kind": "undefined" diff --git a/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v5.span-events.json b/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v5.span-events.json index 4dc6b4bde..0c4347f90 100644 --- a/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v5.span-events.json +++ b/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v5.span-events.json @@ -112,6 +112,31 @@ "name": "Chat Completion", "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "stream-fixture" + }, + "metrics": { + "has_time_to_first_token": false + }, + "name": "openai-stream-fixture-operation", + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "gpt-4o-mini-2024-07-18", + "provider": "openai" + }, + "metrics": { + "has_time_to_first_token": true + }, + "name": "Chat Completion", + "type": "llm" + }, { "has_input": false, "has_output": false, diff --git a/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v6.log-payloads.json b/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v6.log-payloads.json index 982b5ae25..84d7ae072 100644 --- a/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v6.log-payloads.json +++ b/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v6.log-payloads.json @@ -162,6 +162,43 @@ ], "type": "llm" }, + { + "input": { + "kind": "undefined" + }, + "metadata": { + "operation": "stream-fixture" + }, + "metrics": { + "has_time_to_first_token": false + }, + "name": "openai-stream-fixture-operation", + "output": null, + "type": null + }, + { + "input": [ + { + "content_kind": "string", + "role": "user" + } + ], + "metadata": { + "model": "gpt-4o-mini-2024-07-18", + "provider": "openai" + }, + "metrics": { + "has_time_to_first_token": true + }, + "name": "Chat Completion", + "output": [ + { + "json_keys": [], + "role": "assistant" + } + ], + "type": "llm" + }, { "input": { "kind": "undefined" diff --git a/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v6.span-events.json b/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v6.span-events.json index dcbd1c10a..6b6bac8c3 100644 --- a/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v6.span-events.json +++ b/e2e/scenarios/openai-instrumentation/__snapshots__/openai-v6.span-events.json @@ -112,6 +112,31 @@ "name": "Chat Completion", "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "stream-fixture" + }, + "metrics": { + "has_time_to_first_token": false + }, + "name": "openai-stream-fixture-operation", + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "gpt-4o-mini-2024-07-18", + "provider": "openai" + }, + "metrics": { + "has_time_to_first_token": true + }, + "name": "Chat Completion", + "type": "llm" + }, { "has_input": false, "has_output": false, diff --git a/e2e/scenarios/openai-instrumentation/assertions.ts b/e2e/scenarios/openai-instrumentation/assertions.ts index df03b9d48..07dc501c0 100644 --- a/e2e/scenarios/openai-instrumentation/assertions.ts +++ b/e2e/scenarios/openai-instrumentation/assertions.ts @@ -42,6 +42,28 @@ type OperationSpec = { validate?: (span: CapturedLogEvent | undefined) => void; }; +function asRecord(value: unknown): Record | undefined { + return typeof value === "object" && value !== null && !Array.isArray(value) + ? (value as Record) + : undefined; +} + +function validateStreamFixtureOutput(span: CapturedLogEvent | undefined): void { + const firstChoice = Array.isArray(span?.output) ? span?.output[0] : undefined; + const choice = asRecord(firstChoice); + const message = asRecord(choice?.message); + + expect(choice?.logprobs).toEqual( + expect.objectContaining({ + content: expect.arrayContaining([ + expect.objectContaining({ token: "NO" }), + expect.objectContaining({ token: "PE" }), + ]), + }), + ); + expect(message?.refusal).toBe("NOPE"); +} + const OPERATION_SPECS: readonly OperationSpec[] = [ { childNames: ["Chat Completion"], @@ -77,6 +99,16 @@ const OPERATION_SPECS: readonly OperationSpec[] = [ testName: "captures trace for streamed chat completion with response metadata", }, + { + childNames: ["Chat Completion"], + expectsOutput: true, + expectsTimeToFirstToken: true, + name: "openai-stream-fixture-operation", + operation: "stream-fixture", + testName: + "captures trace for streamed chat completion with logprobs and refusal", + validate: validateStreamFixtureOutput, + }, { childNames: ["Chat Completion"], expectsOutput: true, diff --git a/e2e/scenarios/openai-instrumentation/scenario.impl.mjs b/e2e/scenarios/openai-instrumentation/scenario.impl.mjs index 398fb6370..bee4a767e 100644 --- a/e2e/scenarios/openai-instrumentation/scenario.impl.mjs +++ b/e2e/scenarios/openai-instrumentation/scenario.impl.mjs @@ -9,6 +9,16 @@ const EMBEDDING_MODEL = "text-embedding-3-small"; const MODERATION_MODEL = "omni-moderation-2024-09-26"; const ROOT_NAME = "openai-instrumentation-root"; const SCENARIO_NAME = "openai-instrumentation"; +const MOCK_CHAT_STREAM_SSE = [ + 'data: {"id":"chatcmpl-fixture","object":"chat.completion.chunk","created":1740000000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"role":"assistant"},"logprobs":null,"finish_reason":null}]}', + "", + 'data: {"id":"chatcmpl-fixture","object":"chat.completion.chunk","created":1740000000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"refusal":"NO"},"logprobs":{"content":[{"token":"NO","logprob":-0.1,"bytes":[78,79],"top_logprobs":[{"token":"NO","logprob":-0.1,"bytes":[78,79]}]}]},"finish_reason":null}]}', + "", + 'data: {"id":"chatcmpl-fixture","object":"chat.completion.chunk","created":1740000000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"refusal":"PE"},"logprobs":{"content":[{"token":"PE","logprob":-0.2,"bytes":[80,69],"top_logprobs":[{"token":"PE","logprob":-0.2,"bytes":[80,69]}]}]},"finish_reason":"stop"}]}', + "", + "data: [DONE]", + "", +].join("\n"); const CHAT_PARSE_SCHEMA = { type: "object", @@ -53,6 +63,24 @@ function parseMajorVersion(version) { return Number.isNaN(major) ? null : major; } +function createMockStreamingClient(options) { + const baseClient = new options.OpenAI({ + apiKey: process.env.OPENAI_API_KEY ?? "test-openai-key", + baseURL: "https://example.test/v1", + fetch: async () => + new Response(MOCK_CHAT_STREAM_SSE, { + headers: { + "content-type": "text/event-stream", + }, + status: 200, + }), + }); + + return options.decorateClient + ? options.decorateClient(baseClient) + : baseClient; +} + export async function runOpenAIInstrumentationScenario(options) { const baseClient = new options.OpenAI({ apiKey: process.env.OPENAI_API_KEY, @@ -61,6 +89,7 @@ export async function runOpenAIInstrumentationScenario(options) { const client = options.decorateClient ? options.decorateClient(baseClient) : baseClient; + const streamFixtureClient = createMockStreamingClient(options); const openAIMajorVersion = parseMajorVersion(options.openaiSdkVersion); const shouldCheckPrivateFieldMethods = typeof options.decorateClient === "function" && @@ -169,6 +198,28 @@ export async function runOpenAIInstrumentationScenario(options) { }, ); + await runOperation( + "openai-stream-fixture-operation", + "stream-fixture", + async () => { + const chatStream = await streamFixtureClient.chat.completions.create({ + model: OPENAI_MODEL, + messages: [ + { + role: "user", + content: "Reply with a refusal stream fixture.", + }, + ], + stream: true, + logprobs: true, + top_logprobs: 2, + max_tokens: 12, + temperature: 0, + }); + await collectAsync(chatStream); + }, + ); + await runOperation("openai-parse-operation", "parse", async () => { const parseArgs = { messages: [{ role: "user", content: "What is 2 + 2?" }], diff --git a/js/src/instrumentation/plugins/openai-plugin.ts b/js/src/instrumentation/plugins/openai-plugin.ts index 4adc7b068..d1ca89dcc 100644 --- a/js/src/instrumentation/plugins/openai-plugin.ts +++ b/js/src/instrumentation/plugins/openai-plugin.ts @@ -18,6 +18,7 @@ import { import type { OpenAIChatChoice, OpenAIChatCompletionChunk, + OpenAIChatLogprobs, OpenAIResponseStreamEvent, } from "../../vendor-sdk-types/openai"; @@ -417,6 +418,55 @@ export function processImagesInOutput(output: any): any { return output; } +function mergeLogprobTokens( + existing: OpenAIChatLogprobs["content"] | OpenAIChatLogprobs["refusal"], + incoming: OpenAIChatLogprobs["content"] | OpenAIChatLogprobs["refusal"], +): OpenAIChatLogprobs["content"] | OpenAIChatLogprobs["refusal"] { + if (incoming === undefined) { + return existing; + } + + if (incoming === null) { + return existing ?? null; + } + + if (Array.isArray(existing)) { + return [...existing, ...incoming]; + } + + return [...incoming]; +} + +function aggregateChatLogprobs( + existing: OpenAIChatLogprobs | null | undefined, + incoming: OpenAIChatLogprobs | null | undefined, +): OpenAIChatLogprobs | null | undefined { + if (incoming === undefined) { + return existing; + } + + if (incoming === null) { + return existing ?? null; + } + + const aggregated: OpenAIChatLogprobs = + existing && existing !== null + ? { ...existing, ...incoming } + : { ...incoming }; + + const content = mergeLogprobTokens(existing?.content, incoming.content); + if (content !== undefined) { + aggregated.content = content; + } + + const refusal = mergeLogprobTokens(existing?.refusal, incoming.refusal); + if (refusal !== undefined) { + aggregated.refusal = refusal; + } + + return aggregated; +} + /** * Aggregate chat completion chunks into a single response. * Combines role (first), content (concatenated), tool_calls (by id), @@ -432,7 +482,9 @@ export function aggregateChatCompletionChunks( } { let role = undefined; let content = undefined; + let refusal = undefined; let tool_calls = undefined; + let logprobs: OpenAIChatLogprobs | null | undefined = undefined; let finish_reason = undefined; let metrics: Record = {}; @@ -444,23 +496,38 @@ export function aggregateChatCompletionChunks( }; } - const delta = chunk.choices?.[0]?.delta; - if (!delta) { + const choice = chunk.choices?.[0]; + if (!choice) { continue; } - if (!role && delta.role) { - role = delta.role; + if (choice.finish_reason) { + finish_reason = choice.finish_reason; + } + + logprobs = aggregateChatLogprobs(logprobs, choice.logprobs); + + const delta = choice.delta; + if (!delta) { + continue; } if (delta.finish_reason) { finish_reason = delta.finish_reason; } + if (!role && delta.role) { + role = delta.role; + } + if (delta.content) { content = (content || "") + delta.content; } + if (delta.refusal) { + refusal = (refusal || "") + delta.refusal; + } + if (delta.tool_calls) { const toolDelta = delta.tool_calls[0]; if ( @@ -492,9 +559,10 @@ export function aggregateChatCompletionChunks( message: { role, content, + ...(refusal !== undefined ? { refusal } : {}), tool_calls, }, - logprobs: null, + logprobs: logprobs ?? null, finish_reason, }, ], diff --git a/js/src/vendor-sdk-types/openai-common.ts b/js/src/vendor-sdk-types/openai-common.ts index 8f8d5727e..dafa95b4e 100644 --- a/js/src/vendor-sdk-types/openai-common.ts +++ b/js/src/vendor-sdk-types/openai-common.ts @@ -74,15 +74,30 @@ export interface OpenAIChatToolCall { export interface OpenAIChatMessage { role?: string; content?: unknown; + refusal?: string; tool_calls?: OpenAIChatToolCall[]; [key: string]: unknown; } +export interface OpenAIChatTokenLogprob { + token?: string; + logprob?: number; + bytes?: number[] | null; + top_logprobs?: OpenAIChatTokenLogprob[]; + [key: string]: unknown; +} + +export interface OpenAIChatLogprobs { + content?: OpenAIChatTokenLogprob[] | null; + refusal?: OpenAIChatTokenLogprob[] | null; + [key: string]: unknown; +} + export interface OpenAIChatChoice { index: number; message: OpenAIChatMessage; finish_reason?: string | null; - logprobs?: unknown; + logprobs?: OpenAIChatLogprobs | null; [key: string]: unknown; } @@ -95,6 +110,7 @@ export interface OpenAIChatCompletion { export interface OpenAIChatDelta { role?: string; content?: string; + refusal?: string; tool_calls?: OpenAIChatToolCall[]; finish_reason?: string | null; [key: string]: unknown; @@ -103,6 +119,7 @@ export interface OpenAIChatDelta { export interface OpenAIChatChunkChoice { delta?: OpenAIChatDelta; finish_reason?: string | null; + logprobs?: OpenAIChatLogprobs | null; [key: string]: unknown; } diff --git a/js/src/vendor-sdk-types/openai.ts b/js/src/vendor-sdk-types/openai.ts index c00dc261a..231d6300e 100644 --- a/js/src/vendor-sdk-types/openai.ts +++ b/js/src/vendor-sdk-types/openai.ts @@ -2,6 +2,7 @@ import type { OpenAIChatChoice, OpenAIChatCompletion, OpenAIChatCompletionChunk, + OpenAIChatLogprobs, OpenAIChatCreateParams, OpenAIChatStream, OpenAIEmbeddingCreateParams, @@ -29,6 +30,7 @@ export type { OpenAIChatChoice, OpenAIChatCompletion, OpenAIChatCompletionChunk, + OpenAIChatLogprobs, OpenAIChatCreateParams, OpenAIChatStream, OpenAIEmbeddingCreateParams,