mozilla · MohamedBilelBesbes · Mar 31, 2026
@@ -586,7 +586,7 @@ def vote_without_guard(
         """Guard-free version: collects detections without deduplication, then calls create_alert."""
         detections = []
         detection_method_naming = name_voting_strategy(
-            "equal", min_method_agreement, detection_index_tolerance, False, None
+            "equal", min_method_agreement, detection_index_tolerance, False
         )
         for i in range(1, len(analyzed_series)):
             methods_detecting_data = get_methods_detecting_at_index(

@@ -13,7 +13,6 @@
 from treeherder.perf.email import AlertNotificationWriter
 from treeherder.perf.methods.CramerVonMisesDetector import CramerVonMisesDetector
 from treeherder.perf.methods.KolmogorovSmirnovDetector import KolmogorovSmirnovDetector
-from treeherder.perf.methods.LeveneDetector import LeveneDetector
 from treeherder.perf.methods.MannWhitneyUDetector import MannWhitneyUDetector
 from treeherder.perf.methods.StudentDetector import StudentDetector
 from treeherder.perf.methods.WelchDetector import WelchDetector
@@ -228,6 +227,17 @@ def generate_new_alerts_in_series(signature):
 
 
 def build_cpd_methods():
+    """
+    Upon doing the initial hyper-parameter tuning of the methods (each individually), we experimented
+    with multiple hyper parameters including the min/max back windows and the foreward window in order
+    to have the best possible performance. For each method, we select the best-performing hyper-parameter
+    configuration upoon evaluating the methods individually. However, upon incorporating those into the
+    voting system, we need to select a set of hyper-parameter configuration that has a fixed forward and
+    back windows across all methods. This is because having different windows values for different methods
+    makes them fundamentally not evaluate the same set of data, which is inconsistent. Therefore, we
+    choose to fix the back and forward windows values by using the existing values for the Student T Test
+    and do the tuning of only the confidence and magnitude of check hyper parameters.
+    """
     student = StudentDetector(
         name="student",
         min_back_window=12,
@@ -243,57 +253,67 @@ def build_cpd_methods():
         min_back_window=12,
         max_back_window=24,
         fore_window=12,
-        alert_threshold=2.0,
-        confidence_threshold=0.05,
-        mag_check=False,
+        alert_threshold=3.0,
+        confidence_threshold=0.005,
+        mag_check=True,
         above_threshold_is_anomaly=False,
     )
     ks = KolmogorovSmirnovDetector(
         name="ks",
         min_back_window=12,
         max_back_window=24,
         fore_window=12,
-        alert_threshold=2.0,
-        confidence_threshold=0.05,
-        mag_check=False,
+        alert_threshold=3.0,
+        confidence_threshold=0.005,
+        mag_check=True,
         above_threshold_is_anomaly=False,
     )
     welch = WelchDetector(
         name="welch",
         min_back_window=12,
         max_back_window=24,
         fore_window=12,
-        alert_threshold=2.0,
-        confidence_threshold=0.05,
-        mag_check=False,
+        alert_threshold=3.0,
+        confidence_threshold=0.005,
+        mag_check=True,
         above_threshold_is_anomaly=False,
     )
+    """
+    Levene is currently excluded from the voting ensemble because in practice we've observed it
+    to be degrading the quality of the voting as it gives more false positives and false negatives
+    compared to the other methods. This is reflected in the evaluation results we found earlier as
+    the precision and recall values of this method is significantly lower than the other methods.
+    We may consider re-adding it to the ensemble in the future if more tuning is done to improve
+    its performance or if we want to increase the diversity of the methods in the ensemble, but
+    for now we have decided to exclude it to maximize the overall quality of the voting ensemble.
+    """
+    """
     levene = LeveneDetector(
         name="levene",
         min_back_window=12,
         max_back_window=24,
         fore_window=12,
-        alert_threshold=2.0,
-        confidence_threshold=0.05,
-        mag_check=False,
+        alert_threshold=3.0,
+        confidence_threshold=0.035,
+        mag_check=True,
         above_threshold_is_anomaly=False,
     )
+    """
     mwu = MannWhitneyUDetector(
         name="mwu",
         min_back_window=12,
         max_back_window=24,
         fore_window=12,
-        alert_threshold=2.0,
-        confidence_threshold=0.05,
-        mag_check=False,
+        alert_threshold=3.0,
+        confidence_threshold=0.005,
+        mag_check=True,
         above_threshold_is_anomaly=False,
     )
     methods = {
         "student": student,
         "cvm": cvm,
         "ks": ks,
         "welch": welch,
-        "levene": levene,
         "mwu": mwu,
     }
     return methods
@@ -304,14 +324,11 @@ def name_voting_strategy(
     min_method_agreement,
     detection_index_tolerance,
     replicates_enabled,
-    existing_name=None,
 ):
     """
     Builds a string label encoding the active voting configuration, used to tag
     alerts with the strategy that produced them.
     """
-    if existing_name is not None:
-        return existing_name
     suffix = "replicates_enabled" if replicates_enabled else "replicates_not_enabled"
 
     voting_strategy_naming = (
@@ -339,8 +356,8 @@ def vote(
     analyzed_series,
     voting_strategy="equal",
     min_method_agreement=3,
-    detection_index_tolerance=2,
-    replicates_enabled=False,
+    detection_index_tolerance=1,
+    detection_method_name=None,
 ):
     """
     Apply voting logic to determine which alerts to create based on multiple detection methods.
@@ -349,18 +366,16 @@ def vote(
     alert is created per agreed-upon change point regardless of which voting strategy is used.
     """
     if voting_strategy == "equal":
-        detections, detection_method_naming = equal_voting_strategy(
+        detections = equal_voting_strategy(
             analyzed_series=analyzed_series,
             min_method_agreement=min_method_agreement,
             detection_index_tolerance=detection_index_tolerance,
-            replicates_enabled=replicates_enabled,
         )
     elif voting_strategy == "priority":
-        detections, detection_method_naming = priority_voting_strategy(
+        detections = priority_voting_strategy(
             analyzed_series=analyzed_series,
             min_method_agreement=min_method_agreement,
             detection_index_tolerance=detection_index_tolerance,
-            replicates_enabled=replicates_enabled,
         )
     else:
         raise ValueError(f"Unknown voting strategy: {voting_strategy}")
@@ -375,7 +390,7 @@ def vote(
             cur,
             weighted_index,
             methods_data,
-            detection_method_naming,
+            detection_method_name,
         )
 
 
@@ -444,20 +459,13 @@ def get_weighted_average_push(analyzed_series, methods, start_idx, end_idx):
     return weighted_avg_index, prev_index
 
 
-def priority_voting_strategy(
-    analyzed_series, min_method_agreement=3, detection_index_tolerance=1, replicates_enabled=False
-):
+def priority_voting_strategy(analyzed_series, min_method_agreement=3, detection_index_tolerance=1):
     """
     Priority voting strategy where student method has voting priority.
     Returns a list of (weighted_index, prev_index, methods_data) tuples and a naming string.
     """
     if not analyzed_series or len(analyzed_series) < 2:
-        return [], name_voting_strategy(
-            "priority", min_method_agreement, detection_index_tolerance, replicates_enabled
-        )
-    detection_method_naming = name_voting_strategy(
-        "priority", min_method_agreement, detection_index_tolerance, replicates_enabled
-    )
+        return []
 
     detections = []
     # Track which indices we've already added detections for (to avoid duplicates
@@ -487,39 +495,29 @@ def priority_voting_strategy(
     # Phase 2: Fall back to equal voting strategy for indices not caught by Student
     # Student won't influence the vote here since change_detected["student"]
     # is False for all remaining candidates
-    equal_detections, _ = equal_voting_strategy(
+    equal_detections = equal_voting_strategy(
         analyzed_series=analyzed_series,
         min_method_agreement=min_method_agreement,
         detection_index_tolerance=detection_index_tolerance,
         alerted_indices=alerted_indices,
-        replicates_enabled=replicates_enabled,
     )
     detections.extend(equal_detections)
 
-    return detections, detection_method_naming
+    return detections
 
 
 def equal_voting_strategy(
     analyzed_series,
     min_method_agreement=3,
     detection_index_tolerance=1,
     alerted_indices=None,
-    detection_method_naming=None,
-    replicates_enabled=False,
 ):
     """
     Equal voting strategy where all methods have equal weight.
     Returns a list of (weighted_index, prev_index, methods_data) tuples and a naming string.
     """
-    detection_method_naming = name_voting_strategy(
-        "equal",
-        min_method_agreement,
-        detection_index_tolerance,
-        replicates_enabled,
-        detection_method_naming,
-    )
     if not analyzed_series or len(analyzed_series) < 2:
-        return [], detection_method_naming
+        return []
 
     alerted_indices = alerted_indices if alerted_indices is not None else set()
     detections = []
@@ -549,7 +547,7 @@ def equal_voting_strategy(
                 detections.append((weighted_index, prev_index, methods_detecting_data))
                 alerted_indices.add(weighted_index)
 
-    return detections, detection_method_naming
+    return detections
 
 
 def create_alert(
@@ -646,6 +644,7 @@ def create_alert(
         summary=summary,
         series_signature=signature,
         telemetry_series_signature=telemetry_sig,
+        detection_method=detection_method_naming,
         defaults={
             "noise_profile": noise_profile,
             "is_regression": alert_properties.is_regression,
@@ -654,7 +653,6 @@ def create_alert(
             "prev_value": prev_value,
             "new_value": new_value,
             "t_value": student_confidence,  # Student's confidence for backwards compatibility
-            "detection_method": detection_method_naming,
             "confidences": confidences,
             "sheriffed": not signature.monitor,
             "prev_median": 0,
@@ -678,6 +676,12 @@ def generate_new_test_alerts_in_series(
     detection_index_tolerance=DETECTION_INDEX_TOLERANCE,
     replicates_enabled=REPLICATES,
 ):
+    detection_method_name = name_voting_strategy(
+        voting_strategy,
+        min_method_agreement,
+        detection_index_tolerance,
+        replicates_enabled,
+    )
     # get series data starting from either:
     # (1) the last alert, if there is one
     # (2) the alerts max age
@@ -688,7 +692,9 @@ def generate_new_test_alerts_in_series(
             signature=signature, push_timestamp__gte=max_alert_age
         )
         latest_alert_timestamp = (
-            PerformanceAlertTesting.objects.filter(series_signature=signature)
+            PerformanceAlertTesting.objects.filter(
+                series_signature=signature, detection_method=detection_method_name
+            )
             .select_related("summary__push__time")
             .order_by("-summary__push__time")
             .values_list("summary__push__time", flat=True)[:1]
@@ -743,5 +749,5 @@ def generate_new_test_alerts_in_series(
             voting_strategy=voting_strategy,
             min_method_agreement=min_method_agreement,
             detection_index_tolerance=detection_index_tolerance,
-            replicates_enabled=replicates_enabled,
+            detection_method_name=detection_method_name,
         )
@@ -0,0 +1,24 @@
+# Generated by Django 5.1.15 on 2026-03-31 20:47
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        (
+            "perf",
+            "0064_performancealerttesting_confidences_squashed_0068_remove_performancealerttesting_detected_changes_and_more",
+        ),
+    ]
+
+    operations = [
+        migrations.AlterUniqueTogether(
+            name="performancealerttesting",
+            unique_together={
+                ("summary", "detection_method"),
+                ("summary", "series_signature"),
+                ("summary", "telemetry_series_signature"),
+            },
+        ),
+    ]
@@ -821,6 +821,7 @@ class Meta:
         unique_together = (
             ("summary", "series_signature"),
             ("summary", "telemetry_series_signature"),
+            ("summary", "detection_method"),
         )