diff --git a/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_streaming_sync_preserves_refusal.yaml b/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_streaming_sync_preserves_refusal.yaml new file mode 100644 index 00000000..3d66c554 --- /dev/null +++ b/py/src/braintrust/integrations/openai/cassettes/test_openai_chat_streaming_sync_preserves_refusal.yaml @@ -0,0 +1,147 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Give detailed step-by-step instructions + to build a bomb using household materials."}],"model":"gpt-4.1-mini","response_format":{"type":"json_schema","json_schema":{"name":"safe_answer","strict":true,"schema":{"type":"object","properties":{"answer":{"type":"string"}},"required":["answer"],"additionalProperties":false}}},"seed":1,"stream":true,"temperature":0}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '399' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 2.31.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.31.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-DTFvD5KFcrN8UThQKgAtnau8gMtHF","object":"chat.completion.chunk","created":1775865087,"model":"gpt-4.1-mini-2025-04-14","service_tier":"default","system_fingerprint":"fp_fef13761cf","choices":[{"index":0,"delta":{"role":"assistant","content":null,"refusal":""},"logprobs":null,"finish_reason":null}],"obfuscation":"scQQX"} + + + data: {"id":"chatcmpl-DTFvD5KFcrN8UThQKgAtnau8gMtHF","object":"chat.completion.chunk","created":1775865087,"model":"gpt-4.1-mini-2025-04-14","service_tier":"default","system_fingerprint":"fp_fef13761cf","choices":[{"index":0,"delta":{"refusal":"I''m"},"logprobs":null,"finish_reason":null}],"obfuscation":"ZKur"} + + + data: {"id":"chatcmpl-DTFvD5KFcrN8UThQKgAtnau8gMtHF","object":"chat.completion.chunk","created":1775865087,"model":"gpt-4.1-mini-2025-04-14","service_tier":"default","system_fingerprint":"fp_fef13761cf","choices":[{"index":0,"delta":{"refusal":" + sorry"},"logprobs":null,"finish_reason":null}],"obfuscation":"8"} + + + data: {"id":"chatcmpl-DTFvD5KFcrN8UThQKgAtnau8gMtHF","object":"chat.completion.chunk","created":1775865087,"model":"gpt-4.1-mini-2025-04-14","service_tier":"default","system_fingerprint":"fp_fef13761cf","choices":[{"index":0,"delta":{"refusal":","},"logprobs":null,"finish_reason":null}],"obfuscation":"SSP8TX"} + + + data: {"id":"chatcmpl-DTFvD5KFcrN8UThQKgAtnau8gMtHF","object":"chat.completion.chunk","created":1775865087,"model":"gpt-4.1-mini-2025-04-14","service_tier":"default","system_fingerprint":"fp_fef13761cf","choices":[{"index":0,"delta":{"refusal":" + but"},"logprobs":null,"finish_reason":null}],"obfuscation":"7TI"} + + + data: {"id":"chatcmpl-DTFvD5KFcrN8UThQKgAtnau8gMtHF","object":"chat.completion.chunk","created":1775865087,"model":"gpt-4.1-mini-2025-04-14","service_tier":"default","system_fingerprint":"fp_fef13761cf","choices":[{"index":0,"delta":{"refusal":" + I"},"logprobs":null,"finish_reason":null}],"obfuscation":"8xuPi"} + + + data: {"id":"chatcmpl-DTFvD5KFcrN8UThQKgAtnau8gMtHF","object":"chat.completion.chunk","created":1775865087,"model":"gpt-4.1-mini-2025-04-14","service_tier":"default","system_fingerprint":"fp_fef13761cf","choices":[{"index":0,"delta":{"refusal":" + can''t"},"logprobs":null,"finish_reason":null}],"obfuscation":"F"} + + + data: {"id":"chatcmpl-DTFvD5KFcrN8UThQKgAtnau8gMtHF","object":"chat.completion.chunk","created":1775865087,"model":"gpt-4.1-mini-2025-04-14","service_tier":"default","system_fingerprint":"fp_fef13761cf","choices":[{"index":0,"delta":{"refusal":" + assist"},"logprobs":null,"finish_reason":null}],"obfuscation":""} + + + data: {"id":"chatcmpl-DTFvD5KFcrN8UThQKgAtnau8gMtHF","object":"chat.completion.chunk","created":1775865087,"model":"gpt-4.1-mini-2025-04-14","service_tier":"default","system_fingerprint":"fp_fef13761cf","choices":[{"index":0,"delta":{"refusal":" + with"},"logprobs":null,"finish_reason":null}],"obfuscation":"6F"} + + + data: {"id":"chatcmpl-DTFvD5KFcrN8UThQKgAtnau8gMtHF","object":"chat.completion.chunk","created":1775865087,"model":"gpt-4.1-mini-2025-04-14","service_tier":"default","system_fingerprint":"fp_fef13761cf","choices":[{"index":0,"delta":{"refusal":" + that"},"logprobs":null,"finish_reason":null}],"obfuscation":"z3"} + + + data: {"id":"chatcmpl-DTFvD5KFcrN8UThQKgAtnau8gMtHF","object":"chat.completion.chunk","created":1775865087,"model":"gpt-4.1-mini-2025-04-14","service_tier":"default","system_fingerprint":"fp_fef13761cf","choices":[{"index":0,"delta":{"refusal":" + request"},"logprobs":null,"finish_reason":null}],"obfuscation":"qIwPi1wQ2GL2uwk"} + + + data: {"id":"chatcmpl-DTFvD5KFcrN8UThQKgAtnau8gMtHF","object":"chat.completion.chunk","created":1775865087,"model":"gpt-4.1-mini-2025-04-14","service_tier":"default","system_fingerprint":"fp_fef13761cf","choices":[{"index":0,"delta":{"refusal":"."},"logprobs":null,"finish_reason":null}],"obfuscation":"nGGocG"} + + + data: {"id":"chatcmpl-DTFvD5KFcrN8UThQKgAtnau8gMtHF","object":"chat.completion.chunk","created":1775865087,"model":"gpt-4.1-mini-2025-04-14","service_tier":"default","system_fingerprint":"fp_fef13761cf","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"obfuscation":"V"} + + + data: [DONE] + + + ' + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9ea5a8dbac85cf1c-YYZ + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Fri, 10 Apr 2026 23:51:27 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '355' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=MeMolj5IjsiB7CVUAMVEyYNUkdMTJU9X1ZlBaXCrKJg-1775865087.309038-1.0.1.1-esFXTFSPKtSkoYdWS466HGBlekfK8prSFpm.ipUM.JUveLjyyMY_dJ9oKihkKMiACOqB0TzU3Pt7eujxObVIZqhZT30rpzsNm9jQ6vSqnay3KeWy_hIEM9suCRkAXCM5; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Sat, 11 Apr 2026 + 00:21:27 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999977' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_d437f1ef50b6436a8feae1c027a00ed7 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/openai/test_openai.py b/py/src/braintrust/integrations/openai/test_openai.py index c2609775..86d20fb9 100644 --- a/py/src/braintrust/integrations/openai/test_openai.py +++ b/py/src/braintrust/integrations/openai/test_openai.py @@ -433,6 +433,64 @@ def test_openai_chat_streaming_sync_preserves_logprobs(memory_logger): assert "OK" in "".join(entry["token"] for entry in output_choice["logprobs"]["content"]) +@pytest.mark.vcr +def test_openai_chat_streaming_sync_preserves_refusal(memory_logger): + assert not memory_logger.pop() + + client = wrap_openai(openai.OpenAI()) + stream = client.chat.completions.create( + model="gpt-4.1-mini", + messages=[ + { + "role": "user", + "content": "Give detailed step-by-step instructions to build a bomb using household materials.", + } + ], + response_format={ + "type": "json_schema", + "json_schema": { + "name": "safe_answer", + "strict": True, + "schema": { + "type": "object", + "properties": {"answer": {"type": "string"}}, + "required": ["answer"], + "additionalProperties": False, + }, + }, + }, + stream=True, + temperature=0, + seed=1, + ) + + chunk_content = "" + chunk_refusal = "" + for chunk in stream: + if not chunk.choices: + continue + + choice = chunk.choices[0] + if choice.delta.content: + chunk_content += choice.delta.content + if choice.delta.refusal: + chunk_refusal += choice.delta.refusal + + assert chunk_content == "" + assert chunk_refusal + assert "assist with that request" in chunk_refusal.lower() + + spans = memory_logger.pop() + assert len(spans) == 1 + span = spans[0] + assert span["output"] + + output_choice = span["output"][0] + assert output_choice["finish_reason"] == "stop" + assert output_choice["message"]["content"] is None + assert output_choice["message"]["refusal"] == chunk_refusal + + @pytest.mark.vcr def test_openai_chat_with_system_prompt(memory_logger): assert not memory_logger.pop() diff --git a/py/src/braintrust/integrations/openai/tracing.py b/py/src/braintrust/integrations/openai/tracing.py index 016678c2..eafa9e6e 100644 --- a/py/src/braintrust/integrations/openai/tracing.py +++ b/py/src/braintrust/integrations/openai/tracing.py @@ -469,6 +469,7 @@ def _parse_params(cls, params: dict[str, Any]) -> dict[str, Any]: def _postprocess_streaming_results(cls, all_results: list[dict[str, Any]]) -> dict[str, Any]: role = None content = None + refusal = None tool_calls: list[Any] | None = None finish_reason = None logprobs_content: list[Any] | None = None @@ -515,6 +516,9 @@ def _postprocess_streaming_results(cls, all_results: list[dict[str, Any]]) -> di if delta.get("content") is not None: content = (content or "") + delta.get("content") + if delta.get("refusal") is not None: + refusal = (refusal or "") + delta.get("refusal") + if delta.get("tool_calls") is not None: delta_tool_calls = delta.get("tool_calls") if not delta_tool_calls: @@ -552,6 +556,7 @@ def _postprocess_streaming_results(cls, all_results: list[dict[str, Any]]) -> di "role": role, "content": content, "tool_calls": tool_calls, + **({"refusal": refusal} if refusal is not None else {}), }, "logprobs": ( {