Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/perfalert/test_methods_alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ def vote_without_guard(
"""Guard-free version: collects detections without deduplication, then calls create_alert."""
detections = []
detection_method_naming = name_voting_strategy(
"equal", min_method_agreement, detection_index_tolerance, False, None
"equal", min_method_agreement, detection_index_tolerance, False
)
for i in range(1, len(analyzed_series)):
methods_detecting_data = get_methods_detecting_at_index(
Expand Down
112 changes: 59 additions & 53 deletions treeherder/perf/alerts.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from treeherder.perf.email import AlertNotificationWriter
from treeherder.perf.methods.CramerVonMisesDetector import CramerVonMisesDetector
from treeherder.perf.methods.KolmogorovSmirnovDetector import KolmogorovSmirnovDetector
from treeherder.perf.methods.LeveneDetector import LeveneDetector
from treeherder.perf.methods.MannWhitneyUDetector import MannWhitneyUDetector
from treeherder.perf.methods.StudentDetector import StudentDetector
from treeherder.perf.methods.WelchDetector import WelchDetector
Expand Down Expand Up @@ -228,6 +227,17 @@ def generate_new_alerts_in_series(signature):


def build_cpd_methods():
"""
Upon doing the initial hyper-parameter tuning of the methods (each individually), we experimented
with multiple hyper parameters including the min/max back windows and the foreward window in order
to have the best possible performance. For each method, we select the best-performing hyper-parameter
configuration upoon evaluating the methods individually. However, upon incorporating those into the
voting system, we need to select a set of hyper-parameter configuration that has a fixed forward and
back windows across all methods. This is because having different windows values for different methods
makes them fundamentally not evaluate the same set of data, which is inconsistent. Therefore, we
choose to fix the back and forward windows values by using the existing values for the Student T Test
and do the tuning of only the confidence and magnitude of check hyper parameters.
"""
student = StudentDetector(
name="student",
min_back_window=12,
Expand All @@ -243,57 +253,67 @@ def build_cpd_methods():
min_back_window=12,
max_back_window=24,
fore_window=12,
alert_threshold=2.0,
confidence_threshold=0.05,
mag_check=False,
alert_threshold=3.0,
confidence_threshold=0.005,
mag_check=True,
above_threshold_is_anomaly=False,
)
ks = KolmogorovSmirnovDetector(
name="ks",
min_back_window=12,
max_back_window=24,
fore_window=12,
alert_threshold=2.0,
confidence_threshold=0.05,
mag_check=False,
alert_threshold=3.0,
confidence_threshold=0.005,
mag_check=True,
above_threshold_is_anomaly=False,
)
welch = WelchDetector(
name="welch",
min_back_window=12,
max_back_window=24,
fore_window=12,
alert_threshold=2.0,
confidence_threshold=0.05,
mag_check=False,
alert_threshold=3.0,
confidence_threshold=0.005,
mag_check=True,
above_threshold_is_anomaly=False,
)
"""
Levene is currently excluded from the voting ensemble because in practice we've observed it
to be degrading the quality of the voting as it gives more false positives and false negatives
compared to the other methods. This is reflected in the evaluation results we found earlier as
the precision and recall values of this method is significantly lower than the other methods.
We may consider re-adding it to the ensemble in the future if more tuning is done to improve
its performance or if we want to increase the diversity of the methods in the ensemble, but
for now we have decided to exclude it to maximize the overall quality of the voting ensemble.
"""
"""
levene = LeveneDetector(
name="levene",
min_back_window=12,
max_back_window=24,
fore_window=12,
alert_threshold=2.0,
confidence_threshold=0.05,
mag_check=False,
alert_threshold=3.0,
confidence_threshold=0.035,
mag_check=True,
above_threshold_is_anomaly=False,
)
"""
mwu = MannWhitneyUDetector(
name="mwu",
min_back_window=12,
max_back_window=24,
fore_window=12,
alert_threshold=2.0,
confidence_threshold=0.05,
mag_check=False,
alert_threshold=3.0,
confidence_threshold=0.005,
mag_check=True,
above_threshold_is_anomaly=False,
)
methods = {
"student": student,
"cvm": cvm,
"ks": ks,
"welch": welch,
"levene": levene,
"mwu": mwu,
}
return methods
Expand All @@ -304,14 +324,11 @@ def name_voting_strategy(
min_method_agreement,
detection_index_tolerance,
replicates_enabled,
existing_name=None,
):
"""
Builds a string label encoding the active voting configuration, used to tag
alerts with the strategy that produced them.
"""
if existing_name is not None:
return existing_name
suffix = "replicates_enabled" if replicates_enabled else "replicates_not_enabled"

voting_strategy_naming = (
Expand Down Expand Up @@ -339,8 +356,8 @@ def vote(
analyzed_series,
voting_strategy="equal",
min_method_agreement=3,
detection_index_tolerance=2,
replicates_enabled=False,
detection_index_tolerance=1,
detection_method_name=None,
):
"""
Apply voting logic to determine which alerts to create based on multiple detection methods.
Expand All @@ -349,18 +366,16 @@ def vote(
alert is created per agreed-upon change point regardless of which voting strategy is used.
"""
if voting_strategy == "equal":
detections, detection_method_naming = equal_voting_strategy(
detections = equal_voting_strategy(
analyzed_series=analyzed_series,
min_method_agreement=min_method_agreement,
detection_index_tolerance=detection_index_tolerance,
replicates_enabled=replicates_enabled,
)
elif voting_strategy == "priority":
detections, detection_method_naming = priority_voting_strategy(
detections = priority_voting_strategy(
analyzed_series=analyzed_series,
min_method_agreement=min_method_agreement,
detection_index_tolerance=detection_index_tolerance,
replicates_enabled=replicates_enabled,
)
else:
raise ValueError(f"Unknown voting strategy: {voting_strategy}")
Expand All @@ -375,7 +390,7 @@ def vote(
cur,
weighted_index,
methods_data,
detection_method_naming,
detection_method_name,
)


Expand Down Expand Up @@ -444,20 +459,13 @@ def get_weighted_average_push(analyzed_series, methods, start_idx, end_idx):
return weighted_avg_index, prev_index


def priority_voting_strategy(
analyzed_series, min_method_agreement=3, detection_index_tolerance=1, replicates_enabled=False
):
def priority_voting_strategy(analyzed_series, min_method_agreement=3, detection_index_tolerance=1):
"""
Priority voting strategy where student method has voting priority.
Returns a list of (weighted_index, prev_index, methods_data) tuples and a naming string.
"""
if not analyzed_series or len(analyzed_series) < 2:
return [], name_voting_strategy(
"priority", min_method_agreement, detection_index_tolerance, replicates_enabled
)
detection_method_naming = name_voting_strategy(
"priority", min_method_agreement, detection_index_tolerance, replicates_enabled
)
return []

detections = []
# Track which indices we've already added detections for (to avoid duplicates
Expand Down Expand Up @@ -487,39 +495,29 @@ def priority_voting_strategy(
# Phase 2: Fall back to equal voting strategy for indices not caught by Student
# Student won't influence the vote here since change_detected["student"]
# is False for all remaining candidates
equal_detections, _ = equal_voting_strategy(
equal_detections = equal_voting_strategy(
analyzed_series=analyzed_series,
min_method_agreement=min_method_agreement,
detection_index_tolerance=detection_index_tolerance,
alerted_indices=alerted_indices,
replicates_enabled=replicates_enabled,
)
detections.extend(equal_detections)

return detections, detection_method_naming
return detections


def equal_voting_strategy(
analyzed_series,
min_method_agreement=3,
detection_index_tolerance=1,
alerted_indices=None,
detection_method_naming=None,
replicates_enabled=False,
):
"""
Equal voting strategy where all methods have equal weight.
Returns a list of (weighted_index, prev_index, methods_data) tuples and a naming string.
"""
detection_method_naming = name_voting_strategy(
"equal",
min_method_agreement,
detection_index_tolerance,
replicates_enabled,
detection_method_naming,
)
if not analyzed_series or len(analyzed_series) < 2:
return [], detection_method_naming
return []

alerted_indices = alerted_indices if alerted_indices is not None else set()
detections = []
Expand Down Expand Up @@ -549,7 +547,7 @@ def equal_voting_strategy(
detections.append((weighted_index, prev_index, methods_detecting_data))
alerted_indices.add(weighted_index)

return detections, detection_method_naming
return detections


def create_alert(
Expand Down Expand Up @@ -646,6 +644,7 @@ def create_alert(
summary=summary,
series_signature=signature,
telemetry_series_signature=telemetry_sig,
detection_method=detection_method_naming,
defaults={
"noise_profile": noise_profile,
"is_regression": alert_properties.is_regression,
Expand All @@ -654,7 +653,6 @@ def create_alert(
"prev_value": prev_value,
"new_value": new_value,
"t_value": student_confidence, # Student's confidence for backwards compatibility
"detection_method": detection_method_naming,
"confidences": confidences,
"sheriffed": not signature.monitor,
"prev_median": 0,
Expand All @@ -678,6 +676,12 @@ def generate_new_test_alerts_in_series(
detection_index_tolerance=DETECTION_INDEX_TOLERANCE,
replicates_enabled=REPLICATES,
):
detection_method_name = name_voting_strategy(
voting_strategy,
min_method_agreement,
detection_index_tolerance,
replicates_enabled,
)
# get series data starting from either:
# (1) the last alert, if there is one
# (2) the alerts max age
Expand All @@ -688,7 +692,9 @@ def generate_new_test_alerts_in_series(
signature=signature, push_timestamp__gte=max_alert_age
)
latest_alert_timestamp = (
PerformanceAlertTesting.objects.filter(series_signature=signature)
PerformanceAlertTesting.objects.filter(
series_signature=signature, detection_method=detection_method_name
)
.select_related("summary__push__time")
.order_by("-summary__push__time")
.values_list("summary__push__time", flat=True)[:1]
Expand Down Expand Up @@ -743,5 +749,5 @@ def generate_new_test_alerts_in_series(
voting_strategy=voting_strategy,
min_method_agreement=min_method_agreement,
detection_index_tolerance=detection_index_tolerance,
replicates_enabled=replicates_enabled,
detection_method_name=detection_method_name,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 5.1.15 on 2026-03-31 20:47

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
(
"perf",
"0064_performancealerttesting_confidences_squashed_0068_remove_performancealerttesting_detected_changes_and_more",
),
]

operations = [
migrations.AlterUniqueTogether(
name="performancealerttesting",
unique_together={
("summary", "detection_method"),
("summary", "series_signature"),
("summary", "telemetry_series_signature"),
},
),
]
1 change: 1 addition & 0 deletions treeherder/perf/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,7 @@ class Meta:
unique_together = (
("summary", "series_signature"),
("summary", "telemetry_series_signature"),
("summary", "detection_method"),
)


Expand Down