diff --git a/ax/analysis/plotly/tests/test_utility_progression.py b/ax/analysis/plotly/tests/test_utility_progression.py index cfaf90e214e..d2fe59ecd7c 100644 --- a/ax/analysis/plotly/tests/test_utility_progression.py +++ b/ax/analysis/plotly/tests/test_utility_progression.py @@ -36,7 +36,7 @@ def _assert_valid_utility_card( """Assert that a card has valid structure for utility progression.""" self.assertIsInstance(card, PlotlyAnalysisCard) self.assertEqual(card.name, "UtilityProgressionAnalysis") - self.assertIn("trace_index", card.df.columns) + self.assertIn("trial_index", card.df.columns) self.assertIn("utility", card.df.columns) def test_utility_progression_soo(self) -> None: @@ -211,7 +211,7 @@ def test_all_infeasible_points_raises_error(self) -> None: with ( patch( "ax.analysis.plotly.utility_progression.get_trace", - return_value=[math.inf, -math.inf, math.inf], + return_value={0: math.inf, 1: -math.inf, 2: math.inf}, ), self.assertRaises(ExperimentNotReadyError) as cm, ): diff --git a/ax/analysis/plotly/utility_progression.py b/ax/analysis/plotly/utility_progression.py index 5ffc765d761..ee738d3845c 100644 --- a/ax/analysis/plotly/utility_progression.py +++ b/ax/analysis/plotly/utility_progression.py @@ -28,11 +28,9 @@ _UTILITY_PROGRESSION_TITLE = "Utility Progression" _TRACE_INDEX_EXPLANATION = ( - "The x-axis shows trace index, which counts completed or early-stopped trials " - "sequentially (1, 2, 3, ...). This differs from trial index, which may have " - "gaps if some trials failed or were abandoned. For example, if trials 0, 2, " - "and 5 completed while trials 1, 3, and 4 failed, the trace indices would be " - "1, 2, 3 corresponding to trial indices 0, 2, 5." + "The x-axis shows trial index. Only completed or early-stopped trials with " + "complete metric data are included, so there may be gaps if some trials " + "failed, were abandoned, or have incomplete data." ) _CUMULATIVE_BEST_EXPLANATION = ( @@ -57,7 +55,8 @@ class UtilityProgressionAnalysis(Analysis): The DataFrame computed will contain one row per completed trial and the following columns: - - trace_index: Sequential index of completed/early-stopped trials (1, 2, 3, ...) + - trial_index: The trial index of each completed/early-stopped trial + that has complete metric avilability. - utility: The cumulative best utility value at that trial """ @@ -114,7 +113,7 @@ def compute( ) # Check if all points are infeasible (inf or -inf values) - if all(np.isinf(value) for value in trace): + if all(np.isinf(value) for value in trace.values()): raise ExperimentNotReadyError( "All trials in the utility trace are infeasible i.e., they violate " "outcome constraints, so there are no feasible points to plot. During " @@ -125,12 +124,11 @@ def compute( "space, or (2) relaxing outcome constraints." ) - # Create DataFrame with 1-based trace index for user-friendly display - # (1st completed trial, 2nd completed trial, etc. instead of 0-indexed) + # Create DataFrame with trial indices from the trace df = pd.DataFrame( { - "trace_index": list(range(1, len(trace) + 1)), - "utility": trace, + "trial_index": list(trace.keys()), + "utility": list(trace.values()), } ) @@ -185,14 +183,14 @@ def compute( # Create the plot fig = px.line( data_frame=df, - x="trace_index", + x="trial_index", y="utility", markers=True, color_discrete_sequence=[AX_BLUE], ) # Update axis labels and format x-axis to show integers only - fig.update_xaxes(title_text="Trace Index", dtick=1, rangemode="nonnegative") + fig.update_xaxes(title_text="Trial Index", dtick=1, rangemode="nonnegative") fig.update_yaxes(title_text=y_label) return create_plotly_analysis_card( diff --git a/ax/benchmark/benchmark.py b/ax/benchmark/benchmark.py index bd257c92d9a..2b066442183 100644 --- a/ax/benchmark/benchmark.py +++ b/ax/benchmark/benchmark.py @@ -320,9 +320,10 @@ def _get_oracle_value_of_params( dummy_experiment = get_oracle_experiment_from_params( problem=problem, dict_of_dict_of_params={0: {"0_0": params}} ) - (inference_value,) = get_trace( + trace = get_trace( experiment=dummy_experiment, optimization_config=problem.optimization_config ) + inference_value = next(iter(trace.values())) return inference_value @@ -510,12 +511,27 @@ def get_benchmark_result_from_experiment_and_gs( dict_of_dict_of_params=dict_of_dict_of_params, trial_statuses=trial_statuses, ) - oracle_trace = np.array( - get_trace( - experiment=actual_params_oracle_dummy_experiment, - optimization_config=problem.optimization_config, - ) + oracle_trace_dict = get_trace( + experiment=actual_params_oracle_dummy_experiment, + optimization_config=problem.optimization_config, + ) + # Expand trace dict to a positional array aligned with all trials, + # carry-forwarding the last best value for trials without data (e.g., + # failed or abandoned trials preserved via trial_statuses). + maximize = ( + isinstance(problem.optimization_config, MultiObjectiveOptimizationConfig) + or problem.optimization_config.objective.is_scalarized_objective + or not problem.optimization_config.objective.minimize ) + all_trial_indices = sorted(actual_params_oracle_dummy_experiment.trials.keys()) + last_best = -float("inf") if maximize else float("inf") + oracle_trace_list: list[float] = [] + for idx in all_trial_indices: + if idx in oracle_trace_dict: + last_best = oracle_trace_dict[idx] + oracle_trace_list.append(last_best) + oracle_trace = np.array(oracle_trace_list) + is_feasible_trace = np.array( get_is_feasible_trace( experiment=actual_params_oracle_dummy_experiment, diff --git a/ax/plot/pareto_frontier.py b/ax/plot/pareto_frontier.py index fec10dd7fac..5d11638f4e5 100644 --- a/ax/plot/pareto_frontier.py +++ b/ax/plot/pareto_frontier.py @@ -61,8 +61,8 @@ def scatter_plot_with_hypervolume_trace_plotly(experiment: Experiment) -> go.Fig df = pd.DataFrame( { - "hypervolume": hypervolume_trace, - "trial_index": [*range(len(hypervolume_trace))], + "trial_index": list(hypervolume_trace.keys()), + "hypervolume": list(hypervolume_trace.values()), } ) diff --git a/ax/service/tests/test_best_point.py b/ax/service/tests/test_best_point.py index 03393107221..e4a14ebfba5 100644 --- a/ax/service/tests/test_best_point.py +++ b/ax/service/tests/test_best_point.py @@ -57,7 +57,7 @@ def test_get_trace(self) -> None: exp = get_experiment_with_observations( observations=[[11], [10], [9], [15], [5]], minimize=True ) - self.assertEqual(get_trace(exp), [11, 10, 9, 9, 5]) + self.assertEqual(get_trace(exp), {0: 11, 1: 10, 2: 9, 3: 9, 4: 5}) # Same experiment with maximize via new optimization config. opt_conf = none_throws(exp.optimization_config).clone() @@ -67,7 +67,7 @@ def test_get_trace(self) -> None: opt_conf.objective.metric_names[0]: opt_conf.objective.metric_names[0] }, ) - self.assertEqual(get_trace(exp, opt_conf), [11, 11, 11, 15, 15]) + self.assertEqual(get_trace(exp, opt_conf), {0: 11, 1: 11, 2: 11, 3: 15, 4: 15}) with self.subTest("Single objective with constraints"): # The second metric is the constraint and needs to be >= 0 @@ -76,48 +76,52 @@ def test_get_trace(self) -> None: minimize=False, constrained=True, ) - self.assertEqual(get_trace(exp), [float("-inf"), 10, 10, 10, 11]) + self.assertEqual( + get_trace(exp), + {0: float("-inf"), 1: 10, 2: 10, 3: 10, 4: 11}, + ) exp = get_experiment_with_observations( observations=[[11, -1], [10, 1], [9, 1], [15, -1], [11, 1]], minimize=True, constrained=True, ) - self.assertEqual(get_trace(exp), [float("inf"), 10, 9, 9, 9]) + self.assertEqual(get_trace(exp), {0: float("inf"), 1: 10, 2: 9, 3: 9, 4: 9}) # Scalarized. exp = get_experiment_with_observations( observations=[[1, 1], [2, 2], [3, 3]], scalarized=True, ) - self.assertEqual(get_trace(exp), [2, 4, 6]) + self.assertEqual(get_trace(exp), {0: 2, 1: 4, 2: 6}) # Multi objective. exp = get_experiment_with_observations( observations=[[1, 1], [-1, 100], [1, 2], [3, 3], [2, 4], [2, 1]], ) - self.assertEqual(get_trace(exp), [1, 1, 2, 9, 11, 11]) + self.assertEqual(get_trace(exp), {0: 1, 1: 1, 2: 2, 3: 9, 4: 11, 5: 11}) # W/o ObjectiveThresholds (inferring ObjectiveThresholds from scaled nadir) assert_is_instance( exp.optimization_config, MultiObjectiveOptimizationConfig ).objective_thresholds = [] trace = get_trace(exp) + trace_values = list(trace.values()) # With inferred thresholds via scaled nadir, check trace properties: # - All values should be non-negative - self.assertTrue(all(v >= 0.0 for v in trace)) + self.assertTrue(all(v >= 0.0 for v in trace_values)) # - Trace should be non-decreasing (cumulative best) - for i in range(1, len(trace)): - self.assertGreaterEqual(trace[i], trace[i - 1]) + for i in range(1, len(trace_values)): + self.assertGreaterEqual(trace_values[i], trace_values[i - 1]) # - Final value should be positive (non-trivial HV) - self.assertGreater(trace[-1], 0.0) + self.assertGreater(trace_values[-1], 0.0) # Multi-objective w/ constraints. exp = get_experiment_with_observations( observations=[[-1, 1, 1], [1, 2, 1], [3, 3, -1], [2, 4, 1], [2, 1, 1]], constrained=True, ) - self.assertEqual(get_trace(exp), [0, 2, 2, 8, 8]) + self.assertEqual(get_trace(exp), {0: 0, 1: 2, 2: 2, 3: 8, 4: 8}) # W/ relative constraints & status quo. exp.status_quo = Arm(parameters={"x": 0.5, "y": 0.5}, name="status_quo") @@ -149,17 +153,17 @@ def test_get_trace(self) -> None: ] status_quo_data = Data(df=pd.DataFrame.from_records(df_dict)) exp.attach_data(data=status_quo_data) - self.assertEqual(get_trace(exp), [0, 2, 2, 8, 8]) + self.assertEqual(get_trace(exp), {0: 0, 1: 2, 2: 2, 3: 8, 4: 8}) # W/ first objective being minimized. exp = get_experiment_with_observations( observations=[[1, 1], [-1, 2], [3, 3], [-2, 4], [2, 1]], minimize=True ) - self.assertEqual(get_trace(exp), [0, 2, 2, 8, 8]) + self.assertEqual(get_trace(exp), {0: 0, 1: 2, 2: 2, 3: 8, 4: 8}) # W/ empty data. exp = get_experiment_with_trial() - self.assertEqual(get_trace(exp), []) + self.assertEqual(get_trace(exp), {}) # test batch trial exp = get_experiment_with_batch_trial(with_status_quo=False) @@ -191,7 +195,7 @@ def test_get_trace(self) -> None: ] ) exp.attach_data(Data(df=pd.DataFrame.from_records(df_dict))) - self.assertEqual(get_trace(exp), [2.0]) + self.assertEqual(get_trace(exp), {0: 2.0}) # test that there is performance metric in the trace for each # completed/early-stopped trial trial1 = assert_is_instance(trial, BatchTrial).clone_to(include_sq=False) @@ -214,7 +218,7 @@ def test_get_trace(self) -> None: ] ) exp.attach_data(Data(df=pd.DataFrame.from_records(df_dict2))) - self.assertEqual(get_trace(exp), [2.0, 2.0, 20.0]) + self.assertEqual(get_trace(exp), {0: 2.0, 2: 20.0}) def test_get_trace_with_non_completed_trials(self) -> None: with self.subTest("minimize with abandoned trial"): @@ -224,12 +228,11 @@ def test_get_trace_with_non_completed_trials(self) -> None: # Mark trial 2 (value=9) as abandoned exp.trials[2].mark_abandoned(unsafe=True) - # Abandoned trial carries forward the last best value + # Abandoned trial is excluded from trace trace = get_trace(exp) - self.assertEqual(len(trace), 5) - # Trial 0: 11, Trial 1: 10, Trial 2 (abandoned): carry forward 10 + # Trial 0: 11, Trial 1: 10, Trial 2 (abandoned): excluded # Trial 3: 10 (15 > 10), Trial 4: 5 - self.assertEqual(trace, [11, 10, 10, 10, 5]) + self.assertEqual(trace, {0: 11, 1: 10, 3: 10, 4: 5}) with self.subTest("maximize with abandoned trial"): exp = get_experiment_with_observations( @@ -238,12 +241,11 @@ def test_get_trace_with_non_completed_trials(self) -> None: # Mark trial 1 (value=3) as abandoned exp.trials[1].mark_abandoned(unsafe=True) - # Abandoned trial carries forward the last best value + # Abandoned trial is excluded from trace trace = get_trace(exp) - self.assertEqual(len(trace), 5) - # Trial 0: 1, Trial 1 (abandoned): carry forward 1, + # Trial 0: 1, Trial 1 (abandoned): excluded, # Trial 2: 2, Trial 3: 5, Trial 4: 5 - self.assertEqual(trace, [1, 1, 2, 5, 5]) + self.assertEqual(trace, {0: 1, 2: 2, 3: 5, 4: 5}) with self.subTest("minimize with failed trial"): exp = get_experiment_with_observations( @@ -252,12 +254,11 @@ def test_get_trace_with_non_completed_trials(self) -> None: # Mark trial 2 (value=9) as failed exp.trials[2].mark_failed(unsafe=True) - # Failed trial carries forward the last best value + # Failed trial is excluded from trace trace = get_trace(exp) - self.assertEqual(len(trace), 5) - # Trial 0: 11, Trial 1: 10, Trial 2 (failed): carry forward 10 + # Trial 0: 11, Trial 1: 10, Trial 2 (failed): excluded # Trial 3: 10 (15 > 10), Trial 4: 5 - self.assertEqual(trace, [11, 10, 10, 10, 5]) + self.assertEqual(trace, {0: 11, 1: 10, 3: 10, 4: 5}) def test_get_trace_with_include_status_quo(self) -> None: with self.subTest("Multi-objective: status quo dominates in some trials"): @@ -347,9 +348,11 @@ def test_get_trace_with_include_status_quo(self) -> None: # The last value MUST differ because status quo dominates # Without status quo, only poor arms contribute (low hypervolume) # With status quo, excellent values contribute (high hypervolume) + last_without = list(trace_without_sq.values())[-1] + last_with = list(trace_with_sq.values())[-1] self.assertGreater( - trace_with_sq[-1], - trace_without_sq[-1], + last_with, + last_without, f"Status quo dominates in trial 3, so trace with SQ should be higher. " f"Without SQ: {trace_without_sq}, With SQ: {trace_with_sq}", ) @@ -418,9 +421,11 @@ def test_get_trace_with_include_status_quo(self) -> None: # The last value MUST differ because status quo is best # Without status quo: best in trial 3 is 15.0, cumulative min is 9 # With status quo: best in trial 3 is 5.0, cumulative min is 5 + last_without = list(trace_without_sq.values())[-1] + last_with = list(trace_with_sq.values())[-1] self.assertLess( - trace_with_sq[-1], - trace_without_sq[-1], + last_with, + last_without, f"Status quo is best in trial 3, so trace with SQ should be " f"lower (minimize). Without SQ: {trace_without_sq}, " f"With SQ: {trace_with_sq}", @@ -502,19 +507,20 @@ def _make_pref_opt_config(self, profile_name: str) -> PreferenceOptimizationConf preference_profile_name=profile_name, ) - def _assert_valid_trace(self, trace: list[float], expected_len: int) -> None: + def _assert_valid_trace(self, trace: dict[int, float], expected_len: int) -> None: """Assert trace has expected length, contains floats, is non-decreasing and has more than one unique value.""" self.assertEqual(len(trace), expected_len) - for value in trace: + trace_values = list(trace.values()) + for value in trace_values: self.assertIsInstance(value, float) - for i in range(1, len(trace)): + for i in range(1, len(trace_values)): self.assertGreaterEqual( - trace[i], - trace[i - 1], + trace_values[i], + trace_values[i - 1], msg=f"Trace not monotonically increasing at index {i}: {trace}", ) - unique_values = set(trace) + unique_values = set(trace_values) self.assertGreater( len(unique_values), 1, diff --git a/ax/service/utils/best_point.py b/ax/service/utils/best_point.py index 0b43001a4a5..40fbebf861c 100644 --- a/ax/service/utils/best_point.py +++ b/ax/service/utils/best_point.py @@ -45,6 +45,7 @@ from ax.core.outcome_constraint import _op_to_str, OutcomeConstraint from ax.core.trial import Trial from ax.core.types import ComparisonOp, TModelPredictArm, TParameterization +from ax.core.utils import compute_metric_availability, MetricAvailability from ax.exceptions.core import DataRequiredError, UnsupportedError, UserInputError from ax.generation_strategy.generation_strategy import GenerationStrategy from ax.generators.torch_base import TorchGenerator @@ -1212,7 +1213,7 @@ def get_trace( experiment: Experiment, optimization_config: OptimizationConfig | None = None, include_status_quo: bool = False, -) -> list[float]: +) -> dict[int, float]: """ Compute the optimization trace at each iteration. Given an experiment and an optimization config, compute the performance @@ -1229,10 +1230,10 @@ def get_trace( improvements in the optimization trace. If the first trial(s) are infeasible, the trace can start at inf or -inf. - An iteration here refers to a completed or early-stopped (batch) trial. - There will be one performance metric in the trace for each iteration. - Trials without data (e.g. abandoned or failed) carry forward the last - best value. + An iteration here refers to a completed or early-stopped (batch) trial + with complete metric data for all metrics in the optimization config. + Trials with incomplete data, or trials that are abandoned or failed, are + excluded from the trace. Args: experiment: The experiment to get the trace for. @@ -1243,14 +1244,15 @@ def get_trace( behavior. Returns: - A list of performance values at each iteration. + A dict mapping trial index to performance value, ordered by trial + index. Only trials with complete metric data are included. """ optimization_config = optimization_config or none_throws( experiment.optimization_config ) df = experiment.lookup_data().df if len(df) == 0: - return [] + return {} # Get the names of the metrics in optimization config. metric_names = set(optimization_config.objective.metric_names) @@ -1271,7 +1273,22 @@ def get_trace( idx &= df["arm_name"] != status_quo.name df = df.loc[idx, :] if len(df) == 0: - return [] + return {} + + # Filter to trials with complete metric data. + availability = compute_metric_availability( + experiment=experiment, + trial_indices=df["trial_index"].unique().tolist(), + optimization_config=optimization_config, + ) + complete_trials = { + idx + for idx, avail in availability.items() + if avail == MetricAvailability.COMPLETE + } + df = df[df["trial_index"].isin(complete_trials)] + if len(df) == 0: + return {} # Derelativize the optimization config only if needed (i.e., if there are # relative constraints). This avoids unnecessary data pivoting that can @@ -1289,7 +1306,7 @@ def get_trace( use_cumulative_best=True, experiment=experiment, ) - # Aggregate by trial, then. compute cumulative best + # Aggregate by trial, then compute cumulative best objective = optimization_config.objective maximize = ( isinstance(optimization_config, MultiObjectiveOptimizationConfig) @@ -1303,32 +1320,7 @@ def get_trace( keep_order=False, # sort by trial index ) - compact_trace = cumulative_value.tolist() - - # Expand trace to include trials without data (e.g. ABANDONED, FAILED) - # with carry-forward values. - data_trial_indices = set(cumulative_value.index) - expanded_trace = [] - compact_idx = 0 - last_best_value = -float("inf") if maximize else float("inf") - - for trial_index in sorted(experiment.trials.keys()): - trial = experiment.trials[trial_index] - if trial_index in data_trial_indices: - # Trial has data in compact trace - if compact_idx < len(compact_trace): - value = compact_trace[compact_idx] - expanded_trace.append(value) - last_best_value = value - compact_idx += 1 - else: - # Should not happen, but handle gracefully - expanded_trace.append(last_best_value) - elif trial.status in (TrialStatus.ABANDONED, TrialStatus.FAILED): - # Trial has no data; carry forward the last best value. - expanded_trace.append(last_best_value) - - return expanded_trace + return {int(k): float(v) for k, v in cumulative_value.items()} def get_tensor_converter_adapter(