Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions src/databricks/sql/common/agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
Detects whether the Python SQL connector is being invoked by an AI coding agent
by checking for well-known environment variables that agents set in their spawned
shell processes.

Detection only succeeds when exactly one agent environment variable is present,
to avoid ambiguous attribution when multiple agent environments overlap.

Adding a new agent requires only a new entry in KNOWN_AGENTS.

References for each environment variable:
- ANTIGRAVITY_AGENT: Closed source. Google Antigravity sets this variable.
- CLAUDECODE: https://github.com/anthropics/claude-code (sets CLAUDECODE=1)
- CLINE_ACTIVE: https://github.com/cline/cline (shipped in v3.24.0)
- CODEX_CI: https://github.com/openai/codex (part of UNIFIED_EXEC_ENV array in codex-rs)
- CURSOR_AGENT: Closed source. Referenced in a gist by johnlindquist.
- GEMINI_CLI: https://google-gemini.github.io/gemini-cli/docs/tools/shell.html (sets GEMINI_CLI=1)
- OPENCODE: https://github.com/opencode-ai/opencode (sets OPENCODE=1)
"""

import os

KNOWN_AGENTS = [
("ANTIGRAVITY_AGENT", "antigravity"),
("CLAUDECODE", "claude-code"),
("CLINE_ACTIVE", "cline"),
("CODEX_CI", "codex"),
("CURSOR_AGENT", "cursor"),
("GEMINI_CLI", "gemini-cli"),
("OPENCODE", "opencode"),
]


def detect(env=None):
"""Detect which AI coding agent (if any) is driving the current process.

Args:
env: Optional dict-like object for environment variable lookup.
Defaults to os.environ. Exists for testability.

Returns:
The agent product string if exactly one agent is detected,
or an empty string otherwise.
"""
if env is None:
env = os.environ

detected = [product for var, product in KNOWN_AGENTS if env.get(var)]

if len(detected) == 1:
return detected[0]
return ""
5 changes: 5 additions & 0 deletions src/databricks/sql/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from databricks.sql.backend.databricks_client import DatabricksClient
from databricks.sql.backend.types import SessionId, BackendType
from databricks.sql.common.unified_http_client import UnifiedHttpClient
from databricks.sql.common.agent import detect as detect_agent

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -64,6 +65,10 @@ def __init__(
else:
self.useragent_header = "{}/{}".format(USER_AGENT_NAME, __version__)

agent_product = detect_agent()
if agent_product:
self.useragent_header += " agent/{}".format(agent_product)
Comment on lines +68 to +70
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding integration tests to verify that the agent detection is properly integrated into the User-Agent header. The existing test_useragent_header in test_session.py could be extended to verify that when an agent environment variable is set, the User-Agent header includes the agent suffix. This would ensure the integration works end-to-end, not just the detection logic in isolation.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The detection logic is fully covered by unit tests in test_agent_detection.py. The integration in session.py is a 3-line append that is straightforward. Adding an integration test here would require mocking the full Session constructor which adds complexity without meaningful coverage gain.


base_headers = [("User-Agent", self.useragent_header)]
all_headers = (http_headers or []) + base_headers

Expand Down
6 changes: 6 additions & 0 deletions src/databricks/sql/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -914,12 +914,18 @@ def build_client_context(server_hostname: str, version: str, **kwargs):
)

# Build user agent
from databricks.sql.common.agent import detect as detect_agent
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The import statement should be moved to the top of the function (after line 904) to group all imports together. This improves code readability and follows the convention established in this function where imports are placed at the beginning.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The import is intentionally placed inside the function to avoid a circular import — utils.py is imported early in the module graph and agent.py lives in common/. This is consistent with the existing local import pattern used in this function (e.g. from databricks.sql.auth.common import ClientContext on line 903).


user_agent_entry = kwargs.get("user_agent_entry", "")
if user_agent_entry:
user_agent = f"PyDatabricksSqlConnector/{version} ({user_agent_entry})"
else:
user_agent = f"PyDatabricksSqlConnector/{version}"

agent_product = detect_agent()
if agent_product:
user_agent += f" agent/{agent_product}"
Comment on lines +925 to +927
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding integration tests to verify that the agent detection is properly integrated into the User-Agent header in the SEA path. This would ensure the integration works end-to-end and the User-Agent header includes the agent suffix when an agent environment variable is set.

Copilot uses AI. Check for mistakes.
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The detection logic is fully covered by unit tests in test_agent_detection.py. The integration in build_client_context is a trivial string append. The SEA path uses the same detect() function already covered by tests.


# Explicitly construct ClientContext with proper types
return ClientContext(
hostname=server_hostname,
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/test_agent_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import pytest
from databricks.sql.common.agent import detect, KNOWN_AGENTS


class TestAgentDetection:
def test_detects_single_agent_claude_code(self):
assert detect({"CLAUDECODE": "1"}) == "claude-code"

def test_detects_single_agent_cursor(self):
assert detect({"CURSOR_AGENT": "1"}) == "cursor"

def test_detects_single_agent_gemini_cli(self):
assert detect({"GEMINI_CLI": "1"}) == "gemini-cli"

def test_detects_single_agent_cline(self):
assert detect({"CLINE_ACTIVE": "1"}) == "cline"

def test_detects_single_agent_codex(self):
assert detect({"CODEX_CI": "1"}) == "codex"

def test_detects_single_agent_opencode(self):
assert detect({"OPENCODE": "1"}) == "opencode"

def test_detects_single_agent_antigravity(self):
assert detect({"ANTIGRAVITY_AGENT": "1"}) == "antigravity"

def test_returns_empty_when_no_agent_detected(self):
assert detect({}) == ""

def test_returns_empty_when_multiple_agents_detected(self):
assert detect({"CLAUDECODE": "1", "CURSOR_AGENT": "1"}) == ""

def test_ignores_empty_env_var_values(self):
assert detect({"CLAUDECODE": ""}) == ""

def test_all_known_agents_are_covered(self):
for env_var, product in KNOWN_AGENTS:
assert detect({env_var: "1"}) == product, (
f"Agent with env var {env_var} should be detected as {product}"
)

def test_defaults_to_os_environ(self, monkeypatch):
monkeypatch.delenv("CLAUDECODE", raising=False)
monkeypatch.delenv("CURSOR_AGENT", raising=False)
monkeypatch.delenv("GEMINI_CLI", raising=False)
monkeypatch.delenv("CLINE_ACTIVE", raising=False)
monkeypatch.delenv("CODEX_CI", raising=False)
monkeypatch.delenv("OPENCODE", raising=False)
monkeypatch.delenv("ANTIGRAVITY_AGENT", raising=False)
# With all agent vars cleared, detect() should return empty
assert detect() == ""
Loading