From a9273291f6003f74b387df05202590d870c9ee9b Mon Sep 17 00:00:00 2001 From: Mohamed Bilel Besbes Date: Tue, 31 Mar 2026 19:30:47 -0400 Subject: [PATCH] Alerts verbosity fix --- tests/perfalert/test_methods_alerts.py | 2 +- treeherder/perf/alerts.py | 112 +++++++++--------- ...performancealerttesting_unique_together.py | 24 ++++ treeherder/perf/models.py | 1 + 4 files changed, 85 insertions(+), 54 deletions(-) create mode 100644 treeherder/perf/migrations/0069_alter_performancealerttesting_unique_together.py diff --git a/tests/perfalert/test_methods_alerts.py b/tests/perfalert/test_methods_alerts.py index 09644380dd1..a6541543406 100644 --- a/tests/perfalert/test_methods_alerts.py +++ b/tests/perfalert/test_methods_alerts.py @@ -586,7 +586,7 @@ def vote_without_guard( """Guard-free version: collects detections without deduplication, then calls create_alert.""" detections = [] detection_method_naming = name_voting_strategy( - "equal", min_method_agreement, detection_index_tolerance, False, None + "equal", min_method_agreement, detection_index_tolerance, False ) for i in range(1, len(analyzed_series)): methods_detecting_data = get_methods_detecting_at_index( diff --git a/treeherder/perf/alerts.py b/treeherder/perf/alerts.py index 05f71650132..ce56d03f1f1 100644 --- a/treeherder/perf/alerts.py +++ b/treeherder/perf/alerts.py @@ -13,7 +13,6 @@ from treeherder.perf.email import AlertNotificationWriter from treeherder.perf.methods.CramerVonMisesDetector import CramerVonMisesDetector from treeherder.perf.methods.KolmogorovSmirnovDetector import KolmogorovSmirnovDetector -from treeherder.perf.methods.LeveneDetector import LeveneDetector from treeherder.perf.methods.MannWhitneyUDetector import MannWhitneyUDetector from treeherder.perf.methods.StudentDetector import StudentDetector from treeherder.perf.methods.WelchDetector import WelchDetector @@ -228,6 +227,17 @@ def generate_new_alerts_in_series(signature): def build_cpd_methods(): + """ + Upon doing the initial hyper-parameter tuning of the methods (each individually), we experimented + with multiple hyper parameters including the min/max back windows and the foreward window in order + to have the best possible performance. For each method, we select the best-performing hyper-parameter + configuration upoon evaluating the methods individually. However, upon incorporating those into the + voting system, we need to select a set of hyper-parameter configuration that has a fixed forward and + back windows across all methods. This is because having different windows values for different methods + makes them fundamentally not evaluate the same set of data, which is inconsistent. Therefore, we + choose to fix the back and forward windows values by using the existing values for the Student T Test + and do the tuning of only the confidence and magnitude of check hyper parameters. + """ student = StudentDetector( name="student", min_back_window=12, @@ -243,9 +253,9 @@ def build_cpd_methods(): min_back_window=12, max_back_window=24, fore_window=12, - alert_threshold=2.0, - confidence_threshold=0.05, - mag_check=False, + alert_threshold=3.0, + confidence_threshold=0.005, + mag_check=True, above_threshold_is_anomaly=False, ) ks = KolmogorovSmirnovDetector( @@ -253,9 +263,9 @@ def build_cpd_methods(): min_back_window=12, max_back_window=24, fore_window=12, - alert_threshold=2.0, - confidence_threshold=0.05, - mag_check=False, + alert_threshold=3.0, + confidence_threshold=0.005, + mag_check=True, above_threshold_is_anomaly=False, ) welch = WelchDetector( @@ -263,29 +273,40 @@ def build_cpd_methods(): min_back_window=12, max_back_window=24, fore_window=12, - alert_threshold=2.0, - confidence_threshold=0.05, - mag_check=False, + alert_threshold=3.0, + confidence_threshold=0.005, + mag_check=True, above_threshold_is_anomaly=False, ) + """ + Levene is currently excluded from the voting ensemble because in practice we've observed it + to be degrading the quality of the voting as it gives more false positives and false negatives + compared to the other methods. This is reflected in the evaluation results we found earlier as + the precision and recall values of this method is significantly lower than the other methods. + We may consider re-adding it to the ensemble in the future if more tuning is done to improve + its performance or if we want to increase the diversity of the methods in the ensemble, but + for now we have decided to exclude it to maximize the overall quality of the voting ensemble. + """ + """ levene = LeveneDetector( name="levene", min_back_window=12, max_back_window=24, fore_window=12, - alert_threshold=2.0, - confidence_threshold=0.05, - mag_check=False, + alert_threshold=3.0, + confidence_threshold=0.035, + mag_check=True, above_threshold_is_anomaly=False, ) + """ mwu = MannWhitneyUDetector( name="mwu", min_back_window=12, max_back_window=24, fore_window=12, - alert_threshold=2.0, - confidence_threshold=0.05, - mag_check=False, + alert_threshold=3.0, + confidence_threshold=0.005, + mag_check=True, above_threshold_is_anomaly=False, ) methods = { @@ -293,7 +314,6 @@ def build_cpd_methods(): "cvm": cvm, "ks": ks, "welch": welch, - "levene": levene, "mwu": mwu, } return methods @@ -304,14 +324,11 @@ def name_voting_strategy( min_method_agreement, detection_index_tolerance, replicates_enabled, - existing_name=None, ): """ Builds a string label encoding the active voting configuration, used to tag alerts with the strategy that produced them. """ - if existing_name is not None: - return existing_name suffix = "replicates_enabled" if replicates_enabled else "replicates_not_enabled" voting_strategy_naming = ( @@ -339,8 +356,8 @@ def vote( analyzed_series, voting_strategy="equal", min_method_agreement=3, - detection_index_tolerance=2, - replicates_enabled=False, + detection_index_tolerance=1, + detection_method_name=None, ): """ Apply voting logic to determine which alerts to create based on multiple detection methods. @@ -349,18 +366,16 @@ def vote( alert is created per agreed-upon change point regardless of which voting strategy is used. """ if voting_strategy == "equal": - detections, detection_method_naming = equal_voting_strategy( + detections = equal_voting_strategy( analyzed_series=analyzed_series, min_method_agreement=min_method_agreement, detection_index_tolerance=detection_index_tolerance, - replicates_enabled=replicates_enabled, ) elif voting_strategy == "priority": - detections, detection_method_naming = priority_voting_strategy( + detections = priority_voting_strategy( analyzed_series=analyzed_series, min_method_agreement=min_method_agreement, detection_index_tolerance=detection_index_tolerance, - replicates_enabled=replicates_enabled, ) else: raise ValueError(f"Unknown voting strategy: {voting_strategy}") @@ -375,7 +390,7 @@ def vote( cur, weighted_index, methods_data, - detection_method_naming, + detection_method_name, ) @@ -444,20 +459,13 @@ def get_weighted_average_push(analyzed_series, methods, start_idx, end_idx): return weighted_avg_index, prev_index -def priority_voting_strategy( - analyzed_series, min_method_agreement=3, detection_index_tolerance=1, replicates_enabled=False -): +def priority_voting_strategy(analyzed_series, min_method_agreement=3, detection_index_tolerance=1): """ Priority voting strategy where student method has voting priority. Returns a list of (weighted_index, prev_index, methods_data) tuples and a naming string. """ if not analyzed_series or len(analyzed_series) < 2: - return [], name_voting_strategy( - "priority", min_method_agreement, detection_index_tolerance, replicates_enabled - ) - detection_method_naming = name_voting_strategy( - "priority", min_method_agreement, detection_index_tolerance, replicates_enabled - ) + return [] detections = [] # Track which indices we've already added detections for (to avoid duplicates @@ -487,16 +495,15 @@ def priority_voting_strategy( # Phase 2: Fall back to equal voting strategy for indices not caught by Student # Student won't influence the vote here since change_detected["student"] # is False for all remaining candidates - equal_detections, _ = equal_voting_strategy( + equal_detections = equal_voting_strategy( analyzed_series=analyzed_series, min_method_agreement=min_method_agreement, detection_index_tolerance=detection_index_tolerance, alerted_indices=alerted_indices, - replicates_enabled=replicates_enabled, ) detections.extend(equal_detections) - return detections, detection_method_naming + return detections def equal_voting_strategy( @@ -504,22 +511,13 @@ def equal_voting_strategy( min_method_agreement=3, detection_index_tolerance=1, alerted_indices=None, - detection_method_naming=None, - replicates_enabled=False, ): """ Equal voting strategy where all methods have equal weight. Returns a list of (weighted_index, prev_index, methods_data) tuples and a naming string. """ - detection_method_naming = name_voting_strategy( - "equal", - min_method_agreement, - detection_index_tolerance, - replicates_enabled, - detection_method_naming, - ) if not analyzed_series or len(analyzed_series) < 2: - return [], detection_method_naming + return [] alerted_indices = alerted_indices if alerted_indices is not None else set() detections = [] @@ -549,7 +547,7 @@ def equal_voting_strategy( detections.append((weighted_index, prev_index, methods_detecting_data)) alerted_indices.add(weighted_index) - return detections, detection_method_naming + return detections def create_alert( @@ -646,6 +644,7 @@ def create_alert( summary=summary, series_signature=signature, telemetry_series_signature=telemetry_sig, + detection_method=detection_method_naming, defaults={ "noise_profile": noise_profile, "is_regression": alert_properties.is_regression, @@ -654,7 +653,6 @@ def create_alert( "prev_value": prev_value, "new_value": new_value, "t_value": student_confidence, # Student's confidence for backwards compatibility - "detection_method": detection_method_naming, "confidences": confidences, "sheriffed": not signature.monitor, "prev_median": 0, @@ -678,6 +676,12 @@ def generate_new_test_alerts_in_series( detection_index_tolerance=DETECTION_INDEX_TOLERANCE, replicates_enabled=REPLICATES, ): + detection_method_name = name_voting_strategy( + voting_strategy, + min_method_agreement, + detection_index_tolerance, + replicates_enabled, + ) # get series data starting from either: # (1) the last alert, if there is one # (2) the alerts max age @@ -688,7 +692,9 @@ def generate_new_test_alerts_in_series( signature=signature, push_timestamp__gte=max_alert_age ) latest_alert_timestamp = ( - PerformanceAlertTesting.objects.filter(series_signature=signature) + PerformanceAlertTesting.objects.filter( + series_signature=signature, detection_method=detection_method_name + ) .select_related("summary__push__time") .order_by("-summary__push__time") .values_list("summary__push__time", flat=True)[:1] @@ -743,5 +749,5 @@ def generate_new_test_alerts_in_series( voting_strategy=voting_strategy, min_method_agreement=min_method_agreement, detection_index_tolerance=detection_index_tolerance, - replicates_enabled=replicates_enabled, + detection_method_name=detection_method_name, ) diff --git a/treeherder/perf/migrations/0069_alter_performancealerttesting_unique_together.py b/treeherder/perf/migrations/0069_alter_performancealerttesting_unique_together.py new file mode 100644 index 00000000000..7962aceb098 --- /dev/null +++ b/treeherder/perf/migrations/0069_alter_performancealerttesting_unique_together.py @@ -0,0 +1,24 @@ +# Generated by Django 5.1.15 on 2026-03-31 20:47 + +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ( + "perf", + "0064_performancealerttesting_confidences_squashed_0068_remove_performancealerttesting_detected_changes_and_more", + ), + ] + + operations = [ + migrations.AlterUniqueTogether( + name="performancealerttesting", + unique_together={ + ("summary", "detection_method"), + ("summary", "series_signature"), + ("summary", "telemetry_series_signature"), + }, + ), + ] diff --git a/treeherder/perf/models.py b/treeherder/perf/models.py index b596ecfde57..2a3e771c668 100644 --- a/treeherder/perf/models.py +++ b/treeherder/perf/models.py @@ -821,6 +821,7 @@ class Meta: unique_together = ( ("summary", "series_signature"), ("summary", "telemetry_series_signature"), + ("summary", "detection_method"), )