From fea1d4e7ee4f21fa58824f2cc5dac176040732e8 Mon Sep 17 00:00:00 2001 From: Sait Cakmak Date: Tue, 7 Apr 2026 07:45:26 -0700 Subject: [PATCH] Change get_trace to return dict[int, float] and filter by MetricAvailability (#5140) Summary: Pull Request resolved: https://github.com/facebook/Ax/pull/5140 Goal: Eliminate assumptions around COMPLETED trials always having all metrics available. This is part of the code that would've raised errors if it encountered missing metrics in COMPLETED trials. Changing the return type here seemed like a better move than using carry-forward logic, and fits better with the logic that skips status quo as well (which may lead to confusing indexing when skipping status quo). How does this new return type fit with `BatchTrial`: `get_trace` takes the best arm out of each batch, so it returns (and did so before this change) one result per trial. Nothing changed. Changes (claude summary): `get_trace` previously returned `list[float]` with positional indexing, which forced callers to fabricate sequential trial indices. This change: 1. Changes the return type to `dict[int, float]` mapping trial_index to performance value, so callers get real trial indices. 2. Adds `MetricAvailability` filtering to exclude trials with incomplete metric data before pivoting. This prevents the `ValueError("Some metrics are not present for all trials and arms")` that `_pivot_data_with_feasibility` raises when a completed trial is missing any metric (e.g., partial fetches, fetch failures, metrics added mid-experiment). 3. Removes the carry-forward expansion loop for abandoned/failed trials. These trials are now simply excluded from the returned dict. Callers updated: - `UtilityProgressionAnalysis` now shows real trial indices on x-axis - `pareto_frontier.hypervolume_trace_plot` uses real trial indices - `benchmark.py` extracts values from dict Differential Revision: D99448053 --- .../plotly/tests/test_utility_progression.py | 4 +- ax/analysis/plotly/utility_progression.py | 24 +++--- ax/benchmark/benchmark.py | 28 +++++-- ax/plot/pareto_frontier.py | 4 +- ax/service/tests/test_best_point.py | 82 ++++++++++--------- ax/service/utils/best_point.py | 62 ++++++-------- 6 files changed, 108 insertions(+), 96 deletions(-) diff --git a/ax/analysis/plotly/tests/test_utility_progression.py b/ax/analysis/plotly/tests/test_utility_progression.py index cfaf90e214e..d2fe59ecd7c 100644 --- a/ax/analysis/plotly/tests/test_utility_progression.py +++ b/ax/analysis/plotly/tests/test_utility_progression.py @@ -36,7 +36,7 @@ def _assert_valid_utility_card( """Assert that a card has valid structure for utility progression.""" self.assertIsInstance(card, PlotlyAnalysisCard) self.assertEqual(card.name, "UtilityProgressionAnalysis") - self.assertIn("trace_index", card.df.columns) + self.assertIn("trial_index", card.df.columns) self.assertIn("utility", card.df.columns) def test_utility_progression_soo(self) -> None: @@ -211,7 +211,7 @@ def test_all_infeasible_points_raises_error(self) -> None: with ( patch( "ax.analysis.plotly.utility_progression.get_trace", - return_value=[math.inf, -math.inf, math.inf], + return_value={0: math.inf, 1: -math.inf, 2: math.inf}, ), self.assertRaises(ExperimentNotReadyError) as cm, ): diff --git a/ax/analysis/plotly/utility_progression.py b/ax/analysis/plotly/utility_progression.py index 5ffc765d761..ee738d3845c 100644 --- a/ax/analysis/plotly/utility_progression.py +++ b/ax/analysis/plotly/utility_progression.py @@ -28,11 +28,9 @@ _UTILITY_PROGRESSION_TITLE = "Utility Progression" _TRACE_INDEX_EXPLANATION = ( - "The x-axis shows trace index, which counts completed or early-stopped trials " - "sequentially (1, 2, 3, ...). This differs from trial index, which may have " - "gaps if some trials failed or were abandoned. For example, if trials 0, 2, " - "and 5 completed while trials 1, 3, and 4 failed, the trace indices would be " - "1, 2, 3 corresponding to trial indices 0, 2, 5." + "The x-axis shows trial index. Only completed or early-stopped trials with " + "complete metric data are included, so there may be gaps if some trials " + "failed, were abandoned, or have incomplete data." ) _CUMULATIVE_BEST_EXPLANATION = ( @@ -57,7 +55,8 @@ class UtilityProgressionAnalysis(Analysis): The DataFrame computed will contain one row per completed trial and the following columns: - - trace_index: Sequential index of completed/early-stopped trials (1, 2, 3, ...) + - trial_index: The trial index of each completed/early-stopped trial + that has complete metric avilability. - utility: The cumulative best utility value at that trial """ @@ -114,7 +113,7 @@ def compute( ) # Check if all points are infeasible (inf or -inf values) - if all(np.isinf(value) for value in trace): + if all(np.isinf(value) for value in trace.values()): raise ExperimentNotReadyError( "All trials in the utility trace are infeasible i.e., they violate " "outcome constraints, so there are no feasible points to plot. During " @@ -125,12 +124,11 @@ def compute( "space, or (2) relaxing outcome constraints." ) - # Create DataFrame with 1-based trace index for user-friendly display - # (1st completed trial, 2nd completed trial, etc. instead of 0-indexed) + # Create DataFrame with trial indices from the trace df = pd.DataFrame( { - "trace_index": list(range(1, len(trace) + 1)), - "utility": trace, + "trial_index": list(trace.keys()), + "utility": list(trace.values()), } ) @@ -185,14 +183,14 @@ def compute( # Create the plot fig = px.line( data_frame=df, - x="trace_index", + x="trial_index", y="utility", markers=True, color_discrete_sequence=[AX_BLUE], ) # Update axis labels and format x-axis to show integers only - fig.update_xaxes(title_text="Trace Index", dtick=1, rangemode="nonnegative") + fig.update_xaxes(title_text="Trial Index", dtick=1, rangemode="nonnegative") fig.update_yaxes(title_text=y_label) return create_plotly_analysis_card( diff --git a/ax/benchmark/benchmark.py b/ax/benchmark/benchmark.py index bd257c92d9a..2b066442183 100644 --- a/ax/benchmark/benchmark.py +++ b/ax/benchmark/benchmark.py @@ -320,9 +320,10 @@ def _get_oracle_value_of_params( dummy_experiment = get_oracle_experiment_from_params( problem=problem, dict_of_dict_of_params={0: {"0_0": params}} ) - (inference_value,) = get_trace( + trace = get_trace( experiment=dummy_experiment, optimization_config=problem.optimization_config ) + inference_value = next(iter(trace.values())) return inference_value @@ -510,12 +511,27 @@ def get_benchmark_result_from_experiment_and_gs( dict_of_dict_of_params=dict_of_dict_of_params, trial_statuses=trial_statuses, ) - oracle_trace = np.array( - get_trace( - experiment=actual_params_oracle_dummy_experiment, - optimization_config=problem.optimization_config, - ) + oracle_trace_dict = get_trace( + experiment=actual_params_oracle_dummy_experiment, + optimization_config=problem.optimization_config, + ) + # Expand trace dict to a positional array aligned with all trials, + # carry-forwarding the last best value for trials without data (e.g., + # failed or abandoned trials preserved via trial_statuses). + maximize = ( + isinstance(problem.optimization_config, MultiObjectiveOptimizationConfig) + or problem.optimization_config.objective.is_scalarized_objective + or not problem.optimization_config.objective.minimize ) + all_trial_indices = sorted(actual_params_oracle_dummy_experiment.trials.keys()) + last_best = -float("inf") if maximize else float("inf") + oracle_trace_list: list[float] = [] + for idx in all_trial_indices: + if idx in oracle_trace_dict: + last_best = oracle_trace_dict[idx] + oracle_trace_list.append(last_best) + oracle_trace = np.array(oracle_trace_list) + is_feasible_trace = np.array( get_is_feasible_trace( experiment=actual_params_oracle_dummy_experiment, diff --git a/ax/plot/pareto_frontier.py b/ax/plot/pareto_frontier.py index fec10dd7fac..5d11638f4e5 100644 --- a/ax/plot/pareto_frontier.py +++ b/ax/plot/pareto_frontier.py @@ -61,8 +61,8 @@ def scatter_plot_with_hypervolume_trace_plotly(experiment: Experiment) -> go.Fig df = pd.DataFrame( { - "hypervolume": hypervolume_trace, - "trial_index": [*range(len(hypervolume_trace))], + "trial_index": list(hypervolume_trace.keys()), + "hypervolume": list(hypervolume_trace.values()), } ) diff --git a/ax/service/tests/test_best_point.py b/ax/service/tests/test_best_point.py index 03393107221..e4a14ebfba5 100644 --- a/ax/service/tests/test_best_point.py +++ b/ax/service/tests/test_best_point.py @@ -57,7 +57,7 @@ def test_get_trace(self) -> None: exp = get_experiment_with_observations( observations=[[11], [10], [9], [15], [5]], minimize=True ) - self.assertEqual(get_trace(exp), [11, 10, 9, 9, 5]) + self.assertEqual(get_trace(exp), {0: 11, 1: 10, 2: 9, 3: 9, 4: 5}) # Same experiment with maximize via new optimization config. opt_conf = none_throws(exp.optimization_config).clone() @@ -67,7 +67,7 @@ def test_get_trace(self) -> None: opt_conf.objective.metric_names[0]: opt_conf.objective.metric_names[0] }, ) - self.assertEqual(get_trace(exp, opt_conf), [11, 11, 11, 15, 15]) + self.assertEqual(get_trace(exp, opt_conf), {0: 11, 1: 11, 2: 11, 3: 15, 4: 15}) with self.subTest("Single objective with constraints"): # The second metric is the constraint and needs to be >= 0 @@ -76,48 +76,52 @@ def test_get_trace(self) -> None: minimize=False, constrained=True, ) - self.assertEqual(get_trace(exp), [float("-inf"), 10, 10, 10, 11]) + self.assertEqual( + get_trace(exp), + {0: float("-inf"), 1: 10, 2: 10, 3: 10, 4: 11}, + ) exp = get_experiment_with_observations( observations=[[11, -1], [10, 1], [9, 1], [15, -1], [11, 1]], minimize=True, constrained=True, ) - self.assertEqual(get_trace(exp), [float("inf"), 10, 9, 9, 9]) + self.assertEqual(get_trace(exp), {0: float("inf"), 1: 10, 2: 9, 3: 9, 4: 9}) # Scalarized. exp = get_experiment_with_observations( observations=[[1, 1], [2, 2], [3, 3]], scalarized=True, ) - self.assertEqual(get_trace(exp), [2, 4, 6]) + self.assertEqual(get_trace(exp), {0: 2, 1: 4, 2: 6}) # Multi objective. exp = get_experiment_with_observations( observations=[[1, 1], [-1, 100], [1, 2], [3, 3], [2, 4], [2, 1]], ) - self.assertEqual(get_trace(exp), [1, 1, 2, 9, 11, 11]) + self.assertEqual(get_trace(exp), {0: 1, 1: 1, 2: 2, 3: 9, 4: 11, 5: 11}) # W/o ObjectiveThresholds (inferring ObjectiveThresholds from scaled nadir) assert_is_instance( exp.optimization_config, MultiObjectiveOptimizationConfig ).objective_thresholds = [] trace = get_trace(exp) + trace_values = list(trace.values()) # With inferred thresholds via scaled nadir, check trace properties: # - All values should be non-negative - self.assertTrue(all(v >= 0.0 for v in trace)) + self.assertTrue(all(v >= 0.0 for v in trace_values)) # - Trace should be non-decreasing (cumulative best) - for i in range(1, len(trace)): - self.assertGreaterEqual(trace[i], trace[i - 1]) + for i in range(1, len(trace_values)): + self.assertGreaterEqual(trace_values[i], trace_values[i - 1]) # - Final value should be positive (non-trivial HV) - self.assertGreater(trace[-1], 0.0) + self.assertGreater(trace_values[-1], 0.0) # Multi-objective w/ constraints. exp = get_experiment_with_observations( observations=[[-1, 1, 1], [1, 2, 1], [3, 3, -1], [2, 4, 1], [2, 1, 1]], constrained=True, ) - self.assertEqual(get_trace(exp), [0, 2, 2, 8, 8]) + self.assertEqual(get_trace(exp), {0: 0, 1: 2, 2: 2, 3: 8, 4: 8}) # W/ relative constraints & status quo. exp.status_quo = Arm(parameters={"x": 0.5, "y": 0.5}, name="status_quo") @@ -149,17 +153,17 @@ def test_get_trace(self) -> None: ] status_quo_data = Data(df=pd.DataFrame.from_records(df_dict)) exp.attach_data(data=status_quo_data) - self.assertEqual(get_trace(exp), [0, 2, 2, 8, 8]) + self.assertEqual(get_trace(exp), {0: 0, 1: 2, 2: 2, 3: 8, 4: 8}) # W/ first objective being minimized. exp = get_experiment_with_observations( observations=[[1, 1], [-1, 2], [3, 3], [-2, 4], [2, 1]], minimize=True ) - self.assertEqual(get_trace(exp), [0, 2, 2, 8, 8]) + self.assertEqual(get_trace(exp), {0: 0, 1: 2, 2: 2, 3: 8, 4: 8}) # W/ empty data. exp = get_experiment_with_trial() - self.assertEqual(get_trace(exp), []) + self.assertEqual(get_trace(exp), {}) # test batch trial exp = get_experiment_with_batch_trial(with_status_quo=False) @@ -191,7 +195,7 @@ def test_get_trace(self) -> None: ] ) exp.attach_data(Data(df=pd.DataFrame.from_records(df_dict))) - self.assertEqual(get_trace(exp), [2.0]) + self.assertEqual(get_trace(exp), {0: 2.0}) # test that there is performance metric in the trace for each # completed/early-stopped trial trial1 = assert_is_instance(trial, BatchTrial).clone_to(include_sq=False) @@ -214,7 +218,7 @@ def test_get_trace(self) -> None: ] ) exp.attach_data(Data(df=pd.DataFrame.from_records(df_dict2))) - self.assertEqual(get_trace(exp), [2.0, 2.0, 20.0]) + self.assertEqual(get_trace(exp), {0: 2.0, 2: 20.0}) def test_get_trace_with_non_completed_trials(self) -> None: with self.subTest("minimize with abandoned trial"): @@ -224,12 +228,11 @@ def test_get_trace_with_non_completed_trials(self) -> None: # Mark trial 2 (value=9) as abandoned exp.trials[2].mark_abandoned(unsafe=True) - # Abandoned trial carries forward the last best value + # Abandoned trial is excluded from trace trace = get_trace(exp) - self.assertEqual(len(trace), 5) - # Trial 0: 11, Trial 1: 10, Trial 2 (abandoned): carry forward 10 + # Trial 0: 11, Trial 1: 10, Trial 2 (abandoned): excluded # Trial 3: 10 (15 > 10), Trial 4: 5 - self.assertEqual(trace, [11, 10, 10, 10, 5]) + self.assertEqual(trace, {0: 11, 1: 10, 3: 10, 4: 5}) with self.subTest("maximize with abandoned trial"): exp = get_experiment_with_observations( @@ -238,12 +241,11 @@ def test_get_trace_with_non_completed_trials(self) -> None: # Mark trial 1 (value=3) as abandoned exp.trials[1].mark_abandoned(unsafe=True) - # Abandoned trial carries forward the last best value + # Abandoned trial is excluded from trace trace = get_trace(exp) - self.assertEqual(len(trace), 5) - # Trial 0: 1, Trial 1 (abandoned): carry forward 1, + # Trial 0: 1, Trial 1 (abandoned): excluded, # Trial 2: 2, Trial 3: 5, Trial 4: 5 - self.assertEqual(trace, [1, 1, 2, 5, 5]) + self.assertEqual(trace, {0: 1, 2: 2, 3: 5, 4: 5}) with self.subTest("minimize with failed trial"): exp = get_experiment_with_observations( @@ -252,12 +254,11 @@ def test_get_trace_with_non_completed_trials(self) -> None: # Mark trial 2 (value=9) as failed exp.trials[2].mark_failed(unsafe=True) - # Failed trial carries forward the last best value + # Failed trial is excluded from trace trace = get_trace(exp) - self.assertEqual(len(trace), 5) - # Trial 0: 11, Trial 1: 10, Trial 2 (failed): carry forward 10 + # Trial 0: 11, Trial 1: 10, Trial 2 (failed): excluded # Trial 3: 10 (15 > 10), Trial 4: 5 - self.assertEqual(trace, [11, 10, 10, 10, 5]) + self.assertEqual(trace, {0: 11, 1: 10, 3: 10, 4: 5}) def test_get_trace_with_include_status_quo(self) -> None: with self.subTest("Multi-objective: status quo dominates in some trials"): @@ -347,9 +348,11 @@ def test_get_trace_with_include_status_quo(self) -> None: # The last value MUST differ because status quo dominates # Without status quo, only poor arms contribute (low hypervolume) # With status quo, excellent values contribute (high hypervolume) + last_without = list(trace_without_sq.values())[-1] + last_with = list(trace_with_sq.values())[-1] self.assertGreater( - trace_with_sq[-1], - trace_without_sq[-1], + last_with, + last_without, f"Status quo dominates in trial 3, so trace with SQ should be higher. " f"Without SQ: {trace_without_sq}, With SQ: {trace_with_sq}", ) @@ -418,9 +421,11 @@ def test_get_trace_with_include_status_quo(self) -> None: # The last value MUST differ because status quo is best # Without status quo: best in trial 3 is 15.0, cumulative min is 9 # With status quo: best in trial 3 is 5.0, cumulative min is 5 + last_without = list(trace_without_sq.values())[-1] + last_with = list(trace_with_sq.values())[-1] self.assertLess( - trace_with_sq[-1], - trace_without_sq[-1], + last_with, + last_without, f"Status quo is best in trial 3, so trace with SQ should be " f"lower (minimize). Without SQ: {trace_without_sq}, " f"With SQ: {trace_with_sq}", @@ -502,19 +507,20 @@ def _make_pref_opt_config(self, profile_name: str) -> PreferenceOptimizationConf preference_profile_name=profile_name, ) - def _assert_valid_trace(self, trace: list[float], expected_len: int) -> None: + def _assert_valid_trace(self, trace: dict[int, float], expected_len: int) -> None: """Assert trace has expected length, contains floats, is non-decreasing and has more than one unique value.""" self.assertEqual(len(trace), expected_len) - for value in trace: + trace_values = list(trace.values()) + for value in trace_values: self.assertIsInstance(value, float) - for i in range(1, len(trace)): + for i in range(1, len(trace_values)): self.assertGreaterEqual( - trace[i], - trace[i - 1], + trace_values[i], + trace_values[i - 1], msg=f"Trace not monotonically increasing at index {i}: {trace}", ) - unique_values = set(trace) + unique_values = set(trace_values) self.assertGreater( len(unique_values), 1, diff --git a/ax/service/utils/best_point.py b/ax/service/utils/best_point.py index 0b43001a4a5..40fbebf861c 100644 --- a/ax/service/utils/best_point.py +++ b/ax/service/utils/best_point.py @@ -45,6 +45,7 @@ from ax.core.outcome_constraint import _op_to_str, OutcomeConstraint from ax.core.trial import Trial from ax.core.types import ComparisonOp, TModelPredictArm, TParameterization +from ax.core.utils import compute_metric_availability, MetricAvailability from ax.exceptions.core import DataRequiredError, UnsupportedError, UserInputError from ax.generation_strategy.generation_strategy import GenerationStrategy from ax.generators.torch_base import TorchGenerator @@ -1212,7 +1213,7 @@ def get_trace( experiment: Experiment, optimization_config: OptimizationConfig | None = None, include_status_quo: bool = False, -) -> list[float]: +) -> dict[int, float]: """ Compute the optimization trace at each iteration. Given an experiment and an optimization config, compute the performance @@ -1229,10 +1230,10 @@ def get_trace( improvements in the optimization trace. If the first trial(s) are infeasible, the trace can start at inf or -inf. - An iteration here refers to a completed or early-stopped (batch) trial. - There will be one performance metric in the trace for each iteration. - Trials without data (e.g. abandoned or failed) carry forward the last - best value. + An iteration here refers to a completed or early-stopped (batch) trial + with complete metric data for all metrics in the optimization config. + Trials with incomplete data, or trials that are abandoned or failed, are + excluded from the trace. Args: experiment: The experiment to get the trace for. @@ -1243,14 +1244,15 @@ def get_trace( behavior. Returns: - A list of performance values at each iteration. + A dict mapping trial index to performance value, ordered by trial + index. Only trials with complete metric data are included. """ optimization_config = optimization_config or none_throws( experiment.optimization_config ) df = experiment.lookup_data().df if len(df) == 0: - return [] + return {} # Get the names of the metrics in optimization config. metric_names = set(optimization_config.objective.metric_names) @@ -1271,7 +1273,22 @@ def get_trace( idx &= df["arm_name"] != status_quo.name df = df.loc[idx, :] if len(df) == 0: - return [] + return {} + + # Filter to trials with complete metric data. + availability = compute_metric_availability( + experiment=experiment, + trial_indices=df["trial_index"].unique().tolist(), + optimization_config=optimization_config, + ) + complete_trials = { + idx + for idx, avail in availability.items() + if avail == MetricAvailability.COMPLETE + } + df = df[df["trial_index"].isin(complete_trials)] + if len(df) == 0: + return {} # Derelativize the optimization config only if needed (i.e., if there are # relative constraints). This avoids unnecessary data pivoting that can @@ -1289,7 +1306,7 @@ def get_trace( use_cumulative_best=True, experiment=experiment, ) - # Aggregate by trial, then. compute cumulative best + # Aggregate by trial, then compute cumulative best objective = optimization_config.objective maximize = ( isinstance(optimization_config, MultiObjectiveOptimizationConfig) @@ -1303,32 +1320,7 @@ def get_trace( keep_order=False, # sort by trial index ) - compact_trace = cumulative_value.tolist() - - # Expand trace to include trials without data (e.g. ABANDONED, FAILED) - # with carry-forward values. - data_trial_indices = set(cumulative_value.index) - expanded_trace = [] - compact_idx = 0 - last_best_value = -float("inf") if maximize else float("inf") - - for trial_index in sorted(experiment.trials.keys()): - trial = experiment.trials[trial_index] - if trial_index in data_trial_indices: - # Trial has data in compact trace - if compact_idx < len(compact_trace): - value = compact_trace[compact_idx] - expanded_trace.append(value) - last_best_value = value - compact_idx += 1 - else: - # Should not happen, but handle gracefully - expanded_trace.append(last_best_value) - elif trial.status in (TrialStatus.ABANDONED, TrialStatus.FAILED): - # Trial has no data; carry forward the last best value. - expanded_trace.append(last_best_value) - - return expanded_trace + return {int(k): float(v) for k, v in cumulative_value.items()} def get_tensor_converter_adapter(