braintrustdata · Abhijeet Prasad (AbhiPrasad) · Apr 10, 2026 · Apr 10, 2026
diff --git a/.agents/skills/sdk-integrations/SKILL.md b/.agents/skills/sdk-integrations/SKILL.md
@@ -28,6 +28,7 @@ Always read:
 - `py/src/braintrust/integrations/base.py`
 - `py/src/braintrust/integrations/versioning.py`
 - `py/src/braintrust/integrations/__init__.py`
+- `py/src/braintrust/integrations/utils.py`
 - `py/noxfile.py`
 
 Read these when working on an existing integration:
@@ -43,8 +44,9 @@ Read these when relevant:
 - `py/src/braintrust/auto.py` for `auto_instrument()` changes
 - `py/src/braintrust/conftest.py` for VCR behavior
 - `py/src/braintrust/integrations/auto_test_scripts/` for subprocess auto-instrument coverage
-- `py/src/braintrust/integrations/adk/test_adk.py` and `py/src/braintrust/integrations/anthropic/test_anthropic.py` for test layout patterns
-- `py/src/braintrust/integrations/adk/tracing.py` and `py/src/braintrust/integrations/google_genai/tracing.py` when handling multimodal content, binary inputs, or generated media
+- `py/src/braintrust/integrations/test_utils.py` when touching shared attachment materialization or multimodal payload shaping
+- `py/src/braintrust/integrations/adk/test_adk.py`, `py/src/braintrust/integrations/anthropic/test_anthropic.py`, and `py/src/braintrust/integrations/google_genai/test_google_genai.py` for attachment-focused test layout patterns
+- `py/src/braintrust/integrations/adk/tracing.py`, `py/src/braintrust/integrations/anthropic/tracing.py`, and `py/src/braintrust/integrations/google_genai/tracing.py` when handling multimodal content, binary inputs, generated media, or attachment materialization behavior
 
 Do not forget `auto.py` and `auto_test_scripts/`. Import-order and subprocess regressions often only show up there.
 
@@ -54,8 +56,8 @@ Start from the nearest current integration:
 
 - ADK: direct method patching, `target_module`, `CompositeFunctionWrapperPatcher`, manual `wrap_*()` helpers, context propagation, inline data to `Attachment`
 - Agno: multi-target patching, several related patchers, version-conditional fallbacks with `superseded_by`
-- Anthropic: compact constructor patching and a small public surface
-- Google GenAI: multimodal tracing, generated media, output-side `Attachment` handling
+- Anthropic: compact constructor patching, a small public surface, and multimodal request blocks that distinguish image vs document attachment payloads
+- Google GenAI: multimodal tracing, generated media, output-side `Attachment` handling, and nested attachment materialization while preserving non-attachment values
 
 Choose the reference based on the hardest part of the task:
 
@@ -188,10 +190,16 @@ def _process_result(result: Any, start: float) -> tuple[dict[str, Any], dict[str
 
 Treat binary payloads as attachments, not logged bytes:
 
-- convert raw `bytes` to `braintrust.logger.Attachment`
+- prefer the shared `_materialize_attachment(...)` helper in `py/src/braintrust/integrations/utils.py` over provider-local base64 or file-decoding code
+- convert provider-owned raw `bytes`, base64 payloads, data URLs, file inputs, and generated media into `braintrust.logger.Attachment` objects when Braintrust should upload the content
 - preserve normal remote URLs as strings
-- keep useful metadata such as MIME type, size, or provider ids next to the attachment
-- follow existing repo content shapes for multimodal payloads
+- use the repo's existing multimodal payload shapes after materialization:
+  - images -> `{"image_url": {"url": attachment}}`
+  - non-image media/documents/files -> `{"file": {"file_data": attachment, "filename": resolved.filename}}`
+- do not force non-image payloads through `image_url` shims
+- if attachment materialization fails, keep the original value instead of dropping it or replacing it with `None`
+- preserve non-attachment values while walking nested payloads unless you are intentionally normalizing them for readability
+- keep useful metadata such as MIME type, size, safety data, filenames, or provider ids next to the attachment
 
 ## Patcher Rules
 
@@ -251,14 +259,21 @@ Cover the surfaces that changed:
 - idempotence
 - failure and error logging
 - patcher resolution and duplicate detection when relevant
-- attachment conversion for binary inputs or generated media
+- attachment conversion for binary inputs or generated media, including assertions that images land under `image_url.url`, non-image payloads land under `file.file_data`, and traced payloads contain `Attachment` objects rather than raw bytes or base64 blobs
 - span structure, especially `input`, `output`, `metadata`, and `metrics`
 
 For streaming changes, verify both:
 
 - the provider still returns the expected iterator or async iterator
 - the final logged span contains the aggregated `output` and stream-specific `metrics`
 
+Also verify, when relevant:
+
+- the `input` contains the expected model/messages/prompt/config fields
+- the `output` contains normalized provider results rather than opaque SDK instances
+- the `metadata` contains finish reasons, ids, or annotations in the expected place
+- binary payloads are represented as `Attachment` objects where applicable, while remote URLs and non-attachment values remain unchanged and unmaterialized file inputs are preserved rather than dropped
+
 Keep VCR cassettes in `py/src/braintrust/integrations/<provider>/cassettes/`. Re-record only when behavior intentionally changes.
 
 When the provider returns binary HTTP responses or generated media, sanitize cassettes as needed so fixtures do not store raw file bytes.
@@ -296,3 +311,4 @@ Avoid these failures:
 - re-recording cassettes when behavior did not intentionally change
 - adding a custom `_instrument_*` helper where `_instrument_integration()` already fits
 - forgetting `target_module` for deep or optional patch targets
+- forcing non-image attachments through `image_url` shims, dropping unrecognized file inputs, or re-serializing non-attachment values while materializing payloads
diff --git a/py/src/braintrust/integrations/adk/test_adk.py b/py/src/braintrust/integrations/adk/test_adk.py
@@ -314,8 +314,9 @@ async def generate_content_async(self, llm_request: LlmRequest, stream: bool = F
     assert len(new_message["parts"]) == 2
 
     document_part = new_message["parts"][0]
-    assert "image_url" in document_part
-    attachment = document_part["image_url"]["url"]
+    assert "file" in document_part
+    assert document_part["file"]["filename"] == "file.pdf"
+    attachment = document_part["file"]["file_data"]
     assert isinstance(attachment, Attachment)
     assert attachment.reference["content_type"] == "application/pdf"
     assert attachment.reference["filename"] == "file.pdf"
@@ -329,8 +330,8 @@ async def generate_content_async(self, llm_request: LlmRequest, stream: bool = F
     llm_span = next(row for row in spans if row["span_attributes"]["type"] == "llm")
     llm_contents = llm_span["input"]["contents"]
     llm_document_part = llm_contents[0]["parts"][0]
-    assert isinstance(llm_document_part["image_url"]["url"], Attachment)
-    assert llm_document_part["image_url"]["url"].reference["content_type"] == "application/pdf"
+    assert isinstance(llm_document_part["file"]["file_data"], Attachment)
+    assert llm_document_part["file"]["file_data"].reference["content_type"] == "application/pdf"
 
 
 @pytest.mark.vcr
@@ -592,7 +593,7 @@ def __init__(self, inline_data=None, text=None):
     assert isinstance(attachment, Attachment), "Should be an Attachment object"
     assert attachment.reference["type"] == "braintrust_attachment"
     assert attachment.reference["content_type"] == "image/png"
-    assert attachment.reference["filename"] == "file.png"
+    assert attachment.reference["filename"] == "image.png"
     assert "key" in attachment.reference
 
     # Test serializing a Part with text
@@ -740,7 +741,7 @@ async def test_adk_binary_data_attachment_conversion(memory_logger):
     assert "filename" in ref, "Attachment reference should have a filename"
     assert "content_type" in ref, "Attachment reference should have a content_type"
     assert ref["content_type"] == "image/png", "Content type should be image/png"
-    assert ref["filename"] == "file.png", "Filename should be file.png"
+    assert ref["filename"] == "image.png", "Filename should be image.png"
 
     # Second part should be the text
     text_part = new_message["parts"][1]

diff --git a/py/src/braintrust/integrations/adk/tracing.py b/py/src/braintrust/integrations/adk/tracing.py
@@ -9,7 +9,7 @@
 from typing import Any, cast
 
 from braintrust.bt_json import bt_safe_deep_copy
-from braintrust.integrations.utils import _attachment_from_bytes, _image_url_payload
+from braintrust.integrations.utils import _materialize_attachment
 from braintrust.logger import start_span
 from braintrust.span_types import SpanTypeAttribute
 
@@ -58,12 +58,10 @@ def _serialize_part(part: Any) -> Any:
             data = inline_data.data
             mime_type = inline_data.mime_type
 
-            # Convert bytes to Attachment
             if isinstance(data, bytes):
-                attachment = _attachment_from_bytes(data, mime_type)
-
-                # Return in image_url format - SDK will replace with AttachmentReference
-                return _image_url_payload(attachment)
+                resolved_attachment = _materialize_attachment(data, mime_type=mime_type)
+                if resolved_attachment is not None:
+                    return resolved_attachment.multimodal_part_payload
 
     # Handle Part objects with file_data (file references)
     if hasattr(part, "file_data") and part.file_data:

diff --git a/py/src/braintrust/integrations/anthropic/test_anthropic.py b/py/src/braintrust/integrations/anthropic/test_anthropic.py
@@ -124,9 +124,10 @@ def test_get_input_from_kwargs_converts_multimodal_base64_blocks_to_attachments(
 
     assert document_block["type"] == "document"
     assert document_block["source"] == {"type": "base64", "media_type": "application/pdf"}
-    assert isinstance(document_block["image_url"]["url"], Attachment)
-    assert document_block["image_url"]["url"].reference["content_type"] == "application/pdf"
-    assert document_block["image_url"]["url"].reference["filename"] == "document.pdf"
+    assert document_block["file"]["filename"] == "document.pdf"
+    assert isinstance(document_block["file"]["file_data"], Attachment)
+    assert document_block["file"]["file_data"].reference["content_type"] == "application/pdf"
+    assert document_block["file"]["file_data"].reference["filename"] == "document.pdf"
 
     serialized = str(processed_input)
     assert PNG_BASE64 not in serialized
@@ -365,9 +366,10 @@ def test_anthropic_messages_create_with_document_attachment_input(memory_logger)
 
     assert document_block["type"] == "document"
     assert document_block["source"] == {"type": "base64", "media_type": "application/pdf"}
-    assert isinstance(document_block["image_url"]["url"], Attachment)
-    assert document_block["image_url"]["url"].reference["content_type"] == "application/pdf"
-    assert document_block["image_url"]["url"].reference["filename"] == "document.pdf"
+    assert document_block["file"]["filename"] == "document.pdf"
+    assert isinstance(document_block["file"]["file_data"], Attachment)
+    assert document_block["file"]["file_data"].reference["content_type"] == "application/pdf"
+    assert document_block["file"]["file_data"].reference["filename"] == "document.pdf"
     assert PDF_BASE64 not in str(span["input"])
 
 

diff --git a/py/src/braintrust/integrations/anthropic/tracing.py b/py/src/braintrust/integrations/anthropic/tracing.py
@@ -4,11 +4,7 @@
 
 from braintrust.bt_json import bt_safe_deep_copy
 from braintrust.integrations.anthropic._utils import Wrapper, extract_anthropic_usage
-from braintrust.integrations.utils import (
-    _attachment_filename_for_mime_type,
-    _attachment_from_base64_data,
-    _image_url_payload,
-)
+from braintrust.integrations.utils import _materialize_attachment
 from braintrust.logger import log_exc_info_to_span, start_span
 
 
@@ -415,11 +411,6 @@ def _start_batch_results_span(args, kwargs):
     return start_span(name="anthropic.messages.batches.results", type="task", metadata=metadata, input=_input)
 
 
-def _attachment_filename_for_media_type(media_type: str, block_type: str) -> str:
-    prefix = "image" if block_type == "image" else "document"
-    return _attachment_filename_for_mime_type(media_type, prefix=prefix)
-
-
 def _convert_base64_source_to_attachment(block_type, source):
     if not isinstance(source, dict):
         return None
@@ -431,10 +422,10 @@ def _convert_base64_source_to_attachment(block_type, source):
     if not isinstance(media_type, str) or not isinstance(data, str):
         return None
 
-    return _attachment_from_base64_data(
+    return _materialize_attachment(
         data,
-        media_type,
-        filename=_attachment_filename_for_media_type(media_type, block_type),
+        mime_type=media_type,
+        prefix="image" if block_type == "image" else "document",
     )
 
 
@@ -447,11 +438,11 @@ def _process_input_attachments(value):
         source = value.get("source")
 
         if block_type in {"image", "document"} and isinstance(source, dict):
-            attachment = _convert_base64_source_to_attachment(block_type, source)
-            if attachment is not None:
+            resolved_attachment = _convert_base64_source_to_attachment(block_type, source)
+            if resolved_attachment is not None:
                 processed = {k: _process_input_attachments(v) for k, v in value.items() if k != "source"}
                 processed["source"] = {k: _process_input_attachments(v) for k, v in source.items() if k != "data"}
-                processed.update(_image_url_payload(attachment))
+                processed.update(resolved_attachment.multimodal_part_payload)
                 return processed
 
         return {k: _process_input_attachments(v) for k, v in value.items()}

diff --git a/py/src/braintrust/integrations/google_genai/test_google_genai.py b/py/src/braintrust/integrations/google_genai/test_google_genai.py
@@ -143,10 +143,16 @@ def _assert_timing_metrics_are_valid(metrics, start=None, end=None):
 
 
 def _assert_attachment_part(part, *, content_type, filename):
-    assert "image_url" in part
-    assert "url" in part["image_url"]
+    if content_type.startswith("image/"):
+        assert "image_url" in part
+        assert "url" in part["image_url"]
+        attachment = part["image_url"]["url"]
+    else:
+        assert "file" in part
+        assert "file_data" in part["file"]
+        assert part["file"]["filename"] == filename
+        attachment = part["file"]["file_data"]
 
-    attachment = part["image_url"]["url"]
     assert isinstance(attachment, Attachment)
     assert attachment.reference["type"] == "braintrust_attachment"
     assert attachment.reference["content_type"] == content_type
@@ -1030,6 +1036,43 @@ class TestModel(BaseModel):
     assert copied["context_file"] is attachment
 
 
+def test_interaction_materialization_only_converts_multimodal_payloads():
+    """Interaction helpers should only materialize attachments, not re-serialize values."""
+    from datetime import datetime
+    from enum import Enum
+
+    from braintrust.integrations.google_genai.tracing import _materialize_interaction_value
+    from pydantic import BaseModel
+
+    class Mode(Enum):
+        CHAT = "chat"
+
+    class InteractionPayload(BaseModel):
+        created_at: datetime
+        mode: Mode
+        media: dict[str, object]
+
+    created_at = datetime(2024, 1, 2, 3, 4, 5)
+    materialized = _materialize_interaction_value(
+        InteractionPayload(
+            created_at=created_at,
+            mode=Mode.CHAT,
+            media={
+                "type": "image",
+                "data": TINY_PNG_BASE64,
+                "mime_type": "image/png",
+                "caption": None,
+            },
+        )
+    )
+
+    assert materialized["created_at"] == created_at
+    assert materialized["mode"] is Mode.CHAT
+    assert materialized["media"]["caption"] is None
+    assert isinstance(materialized["media"]["data"], Attachment)
+    assert materialized["media"]["image_url"]["url"] is materialized["media"]["data"]
+
+
 GROUNDING_MODEL = "gemini-2.0-flash-001"