From d1dc744171eea507bbb2d7f71f6ac9cd051b6aa1 Mon Sep 17 00:00:00 2001
From: Abhijeet Prasad <abhijeet@braintrustdata.com>
Date: Fri, 10 Apr 2026 12:46:15 -0400
Subject: [PATCH] fix(adk): avoid duplicate sync runner invocation spans

Trace only Runner.run_async because supported ADK versions route the sync
Runner.run API through the async implementation. Patching both surfaces
created duplicate invocation spans for sync callers.

Add a regression test and cassette coverage to verify Runner.run emits a
single invocation span with the expected child agent span structure.
---
 ...n_does_not_duplicate_invocation_spans.yaml | 137 ++++++++++++++++++
 .../integrations/adk/integration.py           |   4 +-
 .../braintrust/integrations/adk/patchers.py   |  25 ++--
 .../braintrust/integrations/adk/test_adk.py   |  77 ++++++++--
 py/src/braintrust/integrations/adk/tracing.py |  32 ----
 .../auto_test_scripts/test_auto_adk.py        |   6 +-
 6 files changed, 218 insertions(+), 63 deletions(-)
 create mode 100644 py/src/braintrust/integrations/adk/cassettes/test_adk_sync_runner_run_does_not_duplicate_invocation_spans.yaml

diff --git a/py/src/braintrust/integrations/adk/cassettes/test_adk_sync_runner_run_does_not_duplicate_invocation_spans.yaml b/py/src/braintrust/integrations/adk/cassettes/test_adk_sync_runner_run_does_not_duplicate_invocation_spans.yaml
new file mode 100644
index 00000000..d8b3b333
--- /dev/null
+++ b/py/src/braintrust/integrations/adk/cassettes/test_adk_sync_runner_run_does_not_duplicate_invocation_spans.yaml
@@ -0,0 +1,137 @@
+interactions:
+- request:
+    body: '{"contents": [{"parts": [{"text": "What''s the weather in San Francisco?"}],
+      "role": "user"}], "systemInstruction": {"parts": [{"text": "You are a helpful
+      weather assistant. Use the get_weather tool to answer questions about weather.\n\nYou
+      are an agent. Your internal name is \"weather_agent\"."}], "role": "user"},
+      "tools": [{"functionDeclarations": [{"description": "Get the weather for a location.",
+      "name": "get_weather", "parameters": {"properties": {"location": {"type": "STRING"}},
+      "required": ["location"], "type": "OBJECT"}}]}], "generationConfig": {}}'
+    headers:
+      accept:
+      - '*/*'
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '561'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      user-agent:
+      - google-genai-sdk/1.31.0 gl-python/3.9.21 google-adk/1.14.1 gl-python/3.9.21
+      x-goog-api-client:
+      - google-genai-sdk/1.31.0 gl-python/3.9.21 google-adk/1.14.1 gl-python/3.9.21
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAC/61SXU+DMBR9768gfR4LMofG100TP4hTyWJijLnChTWWFttuy1z23y1fG0x9ExJS
+        7jk9596ebonj0BhEwhIwqOmF82IrjrOtviUmhUFhLNCWbLEAZQ7c+tl21paSLkVsmBQT4Ly3ucEF
+        5GjrNEPztkYwC1R0cEwClelfNluEyxhK+VLiCYRzpUDETMeSHnF35K+/w/r1YEyV5FVfuUyQt2K7
+        lkBTJphePCLoxju6n+37prDK7mRWKPletu2eDwPf98bBaeD59h2fjND1AtKaV7Z0qSHDEA3YAGA/
+        LLUieWEi+YFiIpdVAOOz2qiTVw8PGthIA7yPjAY/VPXUejLejbGTsB0fODObcsbo8jnqZGP1e021
+        Z0Q6R3nc4j+ZBX0v0iRThzVHpZsbkWFuc3L9oeemHPSiEqQKdSGFxuuk5Ez9NITwFqfhav1p9EzG
+        XzebB4+SHfkGVtQS/hUDAAA=
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Thu, 18 Sep 2025 20:09:51 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=530
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"contents": [{"parts": [{"text": "What''s the weather in San Francisco?"}],
+      "role": "user"}, {"parts": [{"functionCall": {"args": {"location": "San Francisco"},
+      "name": "get_weather"}}], "role": "model"}, {"parts": [{"functionResponse":
+      {"name": "get_weather", "response": {"location": "San Francisco", "temperature":
+      "72\u00b0F", "condition": "sunny", "humidity": "45%", "wind": "5 mph NW"}}}],
+      "role": "user"}], "systemInstruction": {"parts": [{"text": "You are a helpful
+      weather assistant. Use the get_weather tool to answer questions about weather.\n\nYou
+      are an agent. Your internal name is \"weather_agent\"."}], "role": "user"},
+      "tools": [{"functionDeclarations": [{"description": "Get the weather for a location.",
+      "name": "get_weather", "parameters": {"properties": {"location": {"type": "STRING"}},
+      "required": ["location"], "type": "OBJECT"}}]}], "generationConfig": {}}'
+    headers:
+      accept:
+      - '*/*'
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '881'
+      content-type:
+      - application/json
+      host:
+      - generativelanguage.googleapis.com
+      user-agent:
+      - google-genai-sdk/1.31.0 gl-python/3.9.21 google-adk/1.14.1 gl-python/3.9.21
+      x-goog-api-client:
+      - google-genai-sdk/1.31.0 gl-python/3.9.21 google-adk/1.14.1 gl-python/3.9.21
+    method: POST
+    uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAC/61R0U7bQBB891esTuobiS5OHELfqqaRUIEisACprdA2Xsen2nfmbl2IovwT38CX
+        cefUwaGv9YO12pnbGc1sIgCxRJ2pDJmc+Ajf/QZg0/4DZjSTZg90K7+s0fIbd/dterOnMD2FRyIt
+        CB4JuSALSsM1alhY1EvllgaUA9dovYZHxQUgMFU1WeTGEpgcjuOX58UQwomiqVSmeB2eTJIP4B0D
+        h9PKD36XQFUXcHE7/KFFz8h2P/88erNvTUnBW2UyKjv6tiOIXGnliitCZ3SgXaffLsUexT+rM7Oq
+        rfkVEhjIoZQymU1HUo6n8iSeTpKTWRx14q2saByu6JwYfci4j1L4I1XNqflN+rNp2pBnk51Qr5MD
+        fNzhbBjLA2g0mh39c9fNvaoq+2X1evQBYOlTbYv6cpeKXkh8aKtLKeqF+d7kfxIbvxOL/paz6+uG
+        rFO7YlZU+aoG8VAO8hJd0V4UllxttKPTLHDmcX6Op+nXi3v19MDu8j6npPkkRbSNXgFas32N/AIA
+        AA==
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Thu, 18 Sep 2025 20:09:52 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Server-Timing:
+      - gfet4t7; dur=612
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-XSS-Protection:
+      - '0'
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/adk/integration.py b/py/src/braintrust/integrations/adk/integration.py
index a7dc55e5..ca4e5a4f 100644
--- a/py/src/braintrust/integrations/adk/integration.py
+++ b/py/src/braintrust/integrations/adk/integration.py
@@ -8,7 +8,7 @@
     AgentRunAsyncPatcher,
     FlowRunAsyncPatcher,
     McpToolPatcher,
-    RunnerRunSyncPatcher,
+    RunnerRunPatcher,
     ThreadBridgePatcher,
     ToolCallAsyncPatcher,
 )
@@ -26,7 +26,7 @@ class ADKIntegration(BaseIntegration):
     patchers = (
         ThreadBridgePatcher,
         AgentRunAsyncPatcher,
-        RunnerRunSyncPatcher,
+        RunnerRunPatcher,
         FlowRunAsyncPatcher,
         ToolCallAsyncPatcher,
         McpToolPatcher,
diff --git a/py/src/braintrust/integrations/adk/patchers.py b/py/src/braintrust/integrations/adk/patchers.py
index eb8d681d..7a5fa195 100644
--- a/py/src/braintrust/integrations/adk/patchers.py
+++ b/py/src/braintrust/integrations/adk/patchers.py
@@ -11,7 +11,6 @@
     _flow_run_async_wrapper,
     _mcp_tool_run_async_wrapper_async,
     _runner_run_async_wrapper,
-    _runner_run_wrapper,
     _tool_call_async_wrapper,
 )
 
@@ -31,19 +30,10 @@ class AgentRunAsyncPatcher(FunctionWrapperPatcher):
 
 
 # ---------------------------------------------------------------------------
-# Runner patchers (sync + async)
+# Runner patchers
 # ---------------------------------------------------------------------------
 
 
-class _RunnerRunSyncSubPatcher(FunctionWrapperPatcher):
-    """Patch ``Runner.run`` (sync generator)."""
-
-    name = "adk.runner.run.sync"
-    target_module = "google.adk.runners"
-    target_path = "Runner.run"
-    wrapper = _runner_run_wrapper
-
-
 class _RunnerRunAsyncSubPatcher(FunctionWrapperPatcher):
     """Patch ``Runner.run_async`` (async generator)."""
 
@@ -53,11 +43,16 @@ class _RunnerRunAsyncSubPatcher(FunctionWrapperPatcher):
     wrapper = _runner_run_async_wrapper
 
 
-class RunnerRunSyncPatcher(CompositeFunctionWrapperPatcher):
-    """Patch ``Runner.run`` (sync) and ``Runner.run_async`` for tracing."""
+class RunnerRunPatcher(CompositeFunctionWrapperPatcher):
+    """Patch ``Runner.run_async`` for tracing.
+
+    ``Runner.run()`` already delegates to ``run_async()`` in supported ADK
+    versions, so tracing the async surface alone gives sync and async callers a
+    single invocation span with the same child structure.
+    """
 
     name = "adk.runner.run"
-    sub_patchers = (_RunnerRunSyncSubPatcher, _RunnerRunAsyncSubPatcher)
+    sub_patchers = (_RunnerRunAsyncSubPatcher,)
 
 
 # ---------------------------------------------------------------------------
@@ -161,7 +156,7 @@ def wrap_agent(Agent: Any) -> Any:
 
 def wrap_runner(Runner: Any) -> Any:
     """Manually patch a runner class for tracing."""
-    return RunnerRunSyncPatcher.wrap_target(Runner)
+    return RunnerRunPatcher.wrap_target(Runner)
 
 
 def wrap_flow(Flow: Any) -> Any:
diff --git a/py/src/braintrust/integrations/adk/test_adk.py b/py/src/braintrust/integrations/adk/test_adk.py
index 4e424a4c..37165f52 100644
--- a/py/src/braintrust/integrations/adk/test_adk.py
+++ b/py/src/braintrust/integrations/adk/test_adk.py
@@ -61,6 +61,17 @@ async def _create_runner(agent: Agent, *, app_name: str, user_id: str, session_i
     return Runner(agent=agent, app_name=app_name, session_service=session_service)
 
 
+def get_weather(location: str):
+    """Get the weather for a location."""
+    return {
+        "location": location,
+        "temperature": "72°F",
+        "condition": "sunny",
+        "humidity": "45%",
+        "wind": "5 mph NW",
+    }
+
+
 def _extract_text_parts(contents):
     texts = []
     for content in contents or []:
@@ -333,21 +344,67 @@ async def generate_content_async(self, llm_request: LlmRequest, stream: bool = F
     assert llm_document_part["image_url"]["url"].reference["content_type"] == "application/pdf"
 
 
+@pytest.mark.vcr
+def test_adk_sync_runner_run_does_not_duplicate_invocation_spans(memory_logger):
+    """Runner.run() should emit a single invocation span even though it delegates to run_async()."""
+    import asyncio
+
+    from braintrust.util import LazyValue
+
+    assert not memory_logger.pop()
+
+    agent = Agent(
+        name="weather_agent",
+        model="gemini-2.0-flash",
+        instruction="You are a helpful weather assistant. Use the get_weather tool to answer questions about weather.",
+        tools=[get_weather],
+    )
+
+    app_name = "weather_app"
+    user_id = "test-user"
+    session_id = "test-session"
+
+    runner = asyncio.run(_create_runner(agent, app_name=app_name, user_id=user_id, session_id=session_id))
+    user_msg = types.Content(role="user", parts=[types.Part(text="What's the weather in San Francisco?")])
+
+    # The memory_logger fixture overrides via thread-local (_override_bg_logger),
+    # but Runner.run() dispatches to a background thread where that's invisible.
+    # We must also set _global_bg_logger so spans emitted on the worker thread
+    # are captured.
+    original_global_bg_logger = logger._state._global_bg_logger
+    logger._state._global_bg_logger = LazyValue(lambda: memory_logger, use_mutex=False)
+    try:
+        responses = [
+            event
+            for event in runner.run(user_id=user_id, session_id=session_id, new_message=user_msg)
+            if event.is_final_response()
+        ]
+    finally:
+        logger._state._global_bg_logger = original_global_bg_logger
+
+    assert responses
+    spans = memory_logger.pop()
+
+    invocation_spans = [row for row in spans if row["span_attributes"]["name"] == f"invocation [{app_name}]"]
+    assert len(invocation_spans) == 1, (
+        f"expected exactly one invocation span for Runner.run(), got {len(invocation_spans)}: "
+        f"{[span['span_id'] for span in invocation_spans]}"
+    )
+
+    invocation_span = invocation_spans[0]
+    agent_spans = [row for row in spans if row["span_attributes"]["name"] == "agent_run [weather_agent]"]
+    assert len(agent_spans) == 1
+    assert invocation_span["span_id"] in agent_spans[0].get("span_parents", []), (
+        f"agent span should be parented to the single sync invocation span {invocation_span['span_id']}, "
+        f"got parents {agent_spans[0].get('span_parents')}"
+    )
+
+
 @pytest.mark.vcr
 @pytest.mark.asyncio
 async def test_adk_braintrust_integration(memory_logger):
     assert not memory_logger.pop()
 
-    def get_weather(location: str):
-        """Get the weather for a location."""
-        return {
-            "location": location,
-            "temperature": "72°F",
-            "condition": "sunny",
-            "humidity": "45%",
-            "wind": "5 mph NW",
-        }
-
     agent = Agent(
         name="weather_agent",
         model="gemini-2.0-flash",
diff --git a/py/src/braintrust/integrations/adk/tracing.py b/py/src/braintrust/integrations/adk/tracing.py
index d25c68fd..bc0c3195 100644
--- a/py/src/braintrust/integrations/adk/tracing.py
+++ b/py/src/braintrust/integrations/adk/tracing.py
@@ -478,38 +478,6 @@ async def _trace():
             yield event
 
 
-def _runner_run_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
-    user_id = kwargs.get("user_id")
-    session_id = kwargs.get("session_id")
-    new_message = kwargs.get("new_message")
-
-    # Serialize new_message before any dict conversion to handle binary data
-    serialized_message = _serialize_content(new_message) if new_message else None
-
-    def _trace():
-        with start_span(
-            name=f"invocation [{instance.app_name}]",
-            type=SpanTypeAttribute.TASK,
-            input={"new_message": serialized_message},
-            metadata=bt_safe_deep_copy(
-                {
-                    "user_id": user_id,
-                    "session_id": session_id,
-                    **_omit(kwargs, ["user_id", "session_id", "new_message"]),
-                }
-            ),
-        ) as runner_span:
-            last_event = None
-            for event in wrapped(*args, **kwargs):
-                if event.is_final_response():
-                    last_event = event
-                yield event
-            if last_event:
-                runner_span.log(output=last_event)
-
-    yield from _trace()
-
-
 async def _runner_run_async_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
     user_id = kwargs.get("user_id")
     session_id = kwargs.get("session_id")
diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_adk.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_adk.py
index f09d1242..0e092841 100644
--- a/py/src/braintrust/integrations/auto_test_scripts/test_auto_adk.py
+++ b/py/src/braintrust/integrations/auto_test_scripts/test_auto_adk.py
@@ -7,7 +7,6 @@
 from braintrust.integrations.adk.patchers import (
     AgentRunAsyncPatcher,
     _RunnerRunAsyncSubPatcher,
-    _RunnerRunSyncSubPatcher,
     _ThreadBridgePlatformSubPatcher,
     _ThreadBridgeRunnersSubPatcher,
 )
@@ -27,7 +26,6 @@ def is_patched(target, patcher):
 
 # 1. Verify ADK surfaces are not patched initially.
 assert not is_patched(BaseAgent.run_async, AgentRunAsyncPatcher)
-assert not is_patched(Runner.run, _RunnerRunSyncSubPatcher)
 assert not is_patched(Runner.run_async, _RunnerRunAsyncSubPatcher)
 assert not is_patched(platform_thread.create_thread, _ThreadBridgePlatformSubPatcher)
 assert not is_patched(adk_runners.create_thread, _ThreadBridgeRunnersSubPatcher)
@@ -38,8 +36,8 @@ def is_patched(target, patcher):
 
 # 3. Verify the imported google.adk surfaces are patched.
 assert is_patched(BaseAgent.run_async, AgentRunAsyncPatcher)
-assert is_patched(Runner.run, _RunnerRunSyncSubPatcher)
 assert is_patched(Runner.run_async, _RunnerRunAsyncSubPatcher)
+assert not is_patched(Runner.run, _RunnerRunAsyncSubPatcher)
 assert is_patched(platform_thread.create_thread, _ThreadBridgePlatformSubPatcher)
 assert is_patched(adk_runners.create_thread, _ThreadBridgeRunnersSubPatcher)
 
@@ -47,7 +45,7 @@ def is_patched(target, patcher):
 results2 = auto_instrument()
 assert results2.get("adk") == True, "auto_instrument should still return True on second call"
 assert is_patched(BaseAgent.run_async, AgentRunAsyncPatcher)
-assert is_patched(Runner.run, _RunnerRunSyncSubPatcher)
 assert is_patched(Runner.run_async, _RunnerRunAsyncSubPatcher)
+assert not is_patched(Runner.run, _RunnerRunAsyncSubPatcher)
 
 print("SUCCESS")