From 3bc784a94c5ffe5949c2f34d631c6a0be9da05b4 Mon Sep 17 00:00:00 2001
From: A Vertex SDK engineer <vertex-sdk-bot@google.com>
Date: Sat, 28 Feb 2026 15:19:36 -0800
Subject: [PATCH] fix: Only include CandidateResponse if a response is present

PiperOrigin-RevId: 876784701
---
 .../replays/test_create_evaluation_run.py     | 57 +++++++++++++++----
 vertexai/_genai/_evals_common.py              | 21 ++++---
 2 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
index cd97ab042c..18a05b642c 100644
--- a/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
+++ b/tests/unit/vertexai/genai/replays/test_create_evaluation_run.py
@@ -63,7 +63,9 @@
         )
     ),
 )
-
+INFERENCE_CONFIG = types.EvaluationRunInferenceConfig(
+    model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
+)
 
 def test_create_eval_run_data_source_evaluation_set(client):
     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun."""
@@ -189,9 +191,6 @@ def test_create_eval_run_data_source_bigquery_request_set(client):
 def test_create_eval_run_with_inference_configs(client):
     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs."""
     client._api_client._http_options.api_version = "v1beta1"
-    inference_config = types.EvaluationRunInferenceConfig(
-        model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
-    )
     evaluation_run = client.evals.create_evaluation_run(
         name="test_inference_config",
         display_name="test_inference_config",
@@ -200,7 +199,7 @@ def test_create_eval_run_with_inference_configs(client):
         ),
         dest=GCS_DEST,
         metrics=[GENERAL_QUALITY_METRIC],
-        inference_configs={"model_1": inference_config},
+        inference_configs={"model_1": INFERENCE_CONFIG},
         labels={"label1": "value1"},
     )
     assert isinstance(evaluation_run, types.EvaluationRun)
@@ -216,7 +215,7 @@ def test_create_eval_run_with_inference_configs(client):
         ),
         metrics=[GENERAL_QUALITY_METRIC],
     )
-    assert evaluation_run.inference_configs["model_1"] == inference_config
+    assert evaluation_run.inference_configs["model_1"] == INFERENCE_CONFIG
     assert evaluation_run.labels == {
         "label1": "value1",
     }
@@ -318,6 +317,45 @@ def test_create_eval_run_with_inference_configs(client):
 #         )
 #     assert evaluation_run.error is None
 
+import pandas as pd
+
+def test_create_eval_run_data_source_evaluation_dataset_inference_config(client):
+    """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with EvaluationDataset."""
+    input_df = pd.DataFrame(
+        {
+            "prompt": ["prompt1", "prompt2"],
+            "reference": ["reference1", "reference2"],
+        }
+    )
+    evaluation_run = client.evals.create_evaluation_run(
+        name="test9",
+        display_name="test9",
+        dataset=types.EvaluationDataset(
+            candidate_name="candidate_1",
+            eval_dataset_df=input_df,
+        ),
+        dest=GCS_DEST,
+        metrics=[GENERAL_QUALITY_METRIC],
+        inference_configs={"candidate_1": INFERENCE_CONFIG},
+    )
+    assert isinstance(evaluation_run, types.EvaluationRun)
+    assert evaluation_run.display_name == "test9"
+    assert evaluation_run.state == types.EvaluationRunState.PENDING
+    assert isinstance(evaluation_run.data_source, types.EvaluationRunDataSource)
+    # Check evaluation set
+    assert evaluation_run.data_source.evaluation_set
+    eval_set = client.evals.get_evaluation_set(
+        name=evaluation_run.data_source.evaluation_set
+    )
+    assert len(eval_set.evaluation_items) == 2
+    assert evaluation_run.inference_configs["candidate_1"] == INFERENCE_CONFIG
+    # Check evaluation items
+    for i, eval_item_name in enumerate(eval_set.evaluation_items):
+        eval_item = client.evals.get_evaluation_item(name=eval_item_name)
+        assert eval_item.evaluation_item_type == types.EvaluationItemType.REQUEST
+        assert eval_item.evaluation_request.prompt.text == input_df.iloc[i]["prompt"]
+        assert eval_item.evaluation_request.candidate_responses == []
+    assert evaluation_run.error is None
 
 pytest_plugins = ("pytest_asyncio",)
 
@@ -370,9 +408,6 @@ async def test_create_eval_run_async(client):
 async def test_create_eval_run_async_with_inference_configs(client):
     """Tests that create_evaluation_run() creates a correctly structured EvaluationRun with inference_configs asynchronously."""
     client._api_client._http_options.api_version = "v1beta1"
-    inference_config = types.EvaluationRunInferenceConfig(
-        model="projects/503583131166/locations/us-central1/publishers/google/models/gemini-2.5-flash"
-    )
     evaluation_run = await client.aio.evals.create_evaluation_run(
         name="test_inference_config_async",
         display_name="test_inference_config_async",
@@ -381,7 +416,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
         ),
         dest=GCS_DEST,
         metrics=[GENERAL_QUALITY_METRIC],
-        inference_configs={"model_1": inference_config},
+        inference_configs={"model_1": INFERENCE_CONFIG},
         labels={"label1": "value1"},
     )
     assert isinstance(evaluation_run, types.EvaluationRun)
@@ -397,7 +432,7 @@ async def test_create_eval_run_async_with_inference_configs(client):
         ),
         metrics=[GENERAL_QUALITY_METRIC],
     )
-    assert evaluation_run.inference_configs["model_1"] == inference_config
+    assert evaluation_run.inference_configs["model_1"] == INFERENCE_CONFIG
     assert evaluation_run.labels == {
         "label1": "value1",
     }
diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py
index 0bc28994ed..a0e986612b 100644
--- a/vertexai/_genai/_evals_common.py
+++ b/vertexai/_genai/_evals_common.py
@@ -1962,6 +1962,15 @@ def _create_evaluation_set_from_dataframe(
             for event in row[_evals_constant.INTERMEDIATE_EVENTS]:
                 if CONTENT in event:
                     intermediate_events.append(event[CONTENT])
+        candidate_responses = []
+        if _evals_constant.RESPONSE in row:
+            candidate_responses.append(
+                types.CandidateResponse(
+                    candidate=candidate_name or "Candidate 1",
+                    text=row[_evals_constant.RESPONSE],
+                    events=intermediate_events or None,
+                )
+            )
         eval_item_requests.append(
             types.EvaluationItemRequest(
                 prompt=(
@@ -1974,17 +1983,7 @@ def _create_evaluation_set_from_dataframe(
                     if _evals_constant.REFERENCE in row
                     else None
                 ),
-                candidate_responses=[
-                    types.CandidateResponse(
-                        candidate=candidate_name or "Candidate 1",
-                        text=row.get(_evals_constant.RESPONSE, None),
-                        events=(
-                            intermediate_events
-                            if len(intermediate_events) > 0
-                            else None
-                        ),
-                    )
-                ],
+                candidate_responses=candidate_responses,
             )
         )
     logger.info("Writing evaluation item requests to GCS.")