diff --git a/tests/unit/score/test_console_scorer_printer.py b/tests/unit/score/test_console_scorer_printer.py
new file mode 100644
index 000000000..fc7d1e64f
--- /dev/null
+++ b/tests/unit/score/test_console_scorer_printer.py
@@ -0,0 +1,366 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+from colorama import Fore, Style
+
+from pyrit.identifiers import ComponentIdentifier
+from pyrit.score.printer.console_scorer_printer import ConsoleScorerPrinter
+from pyrit.score.scorer_evaluation.scorer_metrics import (
+    HarmScorerMetrics,
+    ObjectiveScorerMetrics,
+)
+
+
+def _make_scorer_identifier(
+    *,
+    class_name: str = "TestScorer",
+    params: dict | None = None,
+    children: dict | None = None,
+) -> ComponentIdentifier:
+    return ComponentIdentifier(
+        class_name=class_name,
+        class_module="pyrit.score.test_scorer",
+        params=params or {},
+        children=children or {},
+    )
+
+
+def _make_objective_metrics(**overrides) -> ObjectiveScorerMetrics:
+    defaults = {
+        "num_responses": 100,
+        "num_human_raters": 3,
+        "accuracy": 0.92,
+        "accuracy_standard_error": 0.02,
+        "f1_score": 0.91,
+        "precision": 0.93,
+        "recall": 0.90,
+        "average_score_time_seconds": 0.3,
+    }
+    defaults.update(overrides)
+    return ObjectiveScorerMetrics(**defaults)
+
+
+def _make_harm_metrics(**overrides) -> HarmScorerMetrics:
+    defaults = {
+        "num_responses": 50,
+        "num_human_raters": 2,
+        "mean_absolute_error": 0.08,
+        "mae_standard_error": 0.01,
+        "t_statistic": 1.5,
+        "p_value": 0.13,
+        "krippendorff_alpha_combined": 0.85,
+        "krippendorff_alpha_model": 0.82,
+        "average_score_time_seconds": 0.8,
+    }
+    defaults.update(overrides)
+    return HarmScorerMetrics(**defaults)
+
+
+# --- __init__ tests ---
+
+
+def test_init_default_values():
+    printer = ConsoleScorerPrinter()
+    assert printer._indent == "  "
+    assert printer._enable_colors is True
+
+
+def test_init_custom_indent():
+    printer = ConsoleScorerPrinter(indent_size=4)
+    assert printer._indent == "    "
+
+
+def test_init_zero_indent():
+    printer = ConsoleScorerPrinter(indent_size=0)
+    assert printer._indent == ""
+
+
+def test_init_negative_indent_raises():
+    with pytest.raises(ValueError, match="indent_size must be non-negative"):
+        ConsoleScorerPrinter(indent_size=-1)
+
+
+def test_init_colors_disabled():
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    assert printer._enable_colors is False
+
+
+# --- _print_colored tests ---
+
+
+def test_print_colored_with_colors_enabled(capsys):
+    printer = ConsoleScorerPrinter(enable_colors=True)
+    printer._print_colored("hello", Fore.GREEN)
+    captured = capsys.readouterr()
+    assert "hello" in captured.out
+    assert Style.RESET_ALL in captured.out
+
+
+def test_print_colored_with_colors_disabled(capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    printer._print_colored("hello", Fore.GREEN)
+    captured = capsys.readouterr()
+    assert captured.out.strip() == "hello"
+    assert Style.RESET_ALL not in captured.out
+
+
+def test_print_colored_no_colors_arg(capsys):
+    printer = ConsoleScorerPrinter(enable_colors=True)
+    printer._print_colored("plain text")
+    captured = capsys.readouterr()
+    assert captured.out.strip() == "plain text"
+
+
+# --- _get_quality_color tests ---
+
+
+def test_quality_color_higher_is_better_good():
+    printer = ConsoleScorerPrinter()
+    color = printer._get_quality_color(0.95, higher_is_better=True, good_threshold=0.9, bad_threshold=0.7)
+    assert color == Fore.GREEN
+
+
+def test_quality_color_higher_is_better_bad():
+    printer = ConsoleScorerPrinter()
+    color = printer._get_quality_color(0.5, higher_is_better=True, good_threshold=0.9, bad_threshold=0.7)
+    assert color == Fore.RED
+
+
+def test_quality_color_higher_is_better_middle():
+    printer = ConsoleScorerPrinter()
+    color = printer._get_quality_color(0.8, higher_is_better=True, good_threshold=0.9, bad_threshold=0.7)
+    assert color == Fore.CYAN
+
+
+def test_quality_color_lower_is_better_good():
+    printer = ConsoleScorerPrinter()
+    color = printer._get_quality_color(0.05, higher_is_better=False, good_threshold=0.1, bad_threshold=0.25)
+    assert color == Fore.GREEN
+
+
+def test_quality_color_lower_is_better_bad():
+    printer = ConsoleScorerPrinter()
+    color = printer._get_quality_color(0.3, higher_is_better=False, good_threshold=0.1, bad_threshold=0.25)
+    assert color == Fore.RED
+
+
+def test_quality_color_lower_is_better_middle():
+    printer = ConsoleScorerPrinter()
+    color = printer._get_quality_color(0.15, higher_is_better=False, good_threshold=0.1, bad_threshold=0.25)
+    assert color == Fore.CYAN
+
+
+# --- _print_scorer_info tests ---
+
+
+def test_print_scorer_info_basic(capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    identifier = _make_scorer_identifier(class_name="SelfAskScaleScorer")
+    printer._print_scorer_info(identifier, indent_level=2)
+    output = capsys.readouterr().out
+    assert "SelfAskScaleScorer" in output
+
+
+def test_print_scorer_info_with_display_params(capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    identifier = _make_scorer_identifier(
+        class_name="TestScorer",
+        params={"scorer_type": "likert", "score_aggregator": "mean", "hidden_param": "ignore"},
+    )
+    printer._print_scorer_info(identifier, indent_level=2)
+    output = capsys.readouterr().out
+    assert "scorer_type" in output
+    assert "score_aggregator" in output
+    assert "hidden_param" not in output
+
+
+def test_print_scorer_info_with_prompt_target_child(capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    target_id = ComponentIdentifier(
+        class_name="OpenAIChatTarget",
+        class_module="pyrit.prompt_target",
+        params={"model_name": "gpt-4", "temperature": "0.0", "extra": "skip"},
+    )
+    identifier = _make_scorer_identifier(
+        children={"prompt_target": target_id},
+    )
+    printer._print_scorer_info(identifier, indent_level=2)
+    output = capsys.readouterr().out
+    assert "gpt-4" in output
+    assert "extra" not in output
+
+
+def test_print_scorer_info_with_sub_scorers(capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    sub1 = _make_scorer_identifier(class_name="SubScorer1")
+    sub2 = _make_scorer_identifier(class_name="SubScorer2")
+    identifier = _make_scorer_identifier(
+        class_name="CompositeScorer",
+        children={"sub_scorers": [sub1, sub2]},
+    )
+    printer._print_scorer_info(identifier, indent_level=2)
+    output = capsys.readouterr().out
+    assert "Composite of 2 scorer(s)" in output
+    assert "SubScorer1" in output
+    assert "SubScorer2" in output
+
+
+# --- _print_objective_metrics tests ---
+
+
+def test_print_objective_metrics_none(capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    printer._print_objective_metrics(None)
+    output = capsys.readouterr().out
+    assert "Official evaluation has not been run yet" in output
+
+
+def test_print_objective_metrics_full(capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    metrics = _make_objective_metrics()
+    printer._print_objective_metrics(metrics)
+    output = capsys.readouterr().out
+    assert "Accuracy" in output
+    assert "F1 Score" in output
+    assert "Precision" in output
+    assert "Recall" in output
+    assert "Average Score Time" in output
+
+
+def test_print_objective_metrics_optional_fields_none(capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    metrics = _make_objective_metrics(
+        accuracy_standard_error=None,
+        f1_score=None,
+        precision=None,
+        recall=None,
+        average_score_time_seconds=None,
+    )
+    printer._print_objective_metrics(metrics)
+    output = capsys.readouterr().out
+    assert "Accuracy" in output
+    assert "F1 Score" not in output
+    assert "Precision" not in output
+    assert "Recall" not in output
+    assert "Average Score Time" not in output
+
+
+# --- _print_harm_metrics tests ---
+
+
+def test_print_harm_metrics_none(capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    printer._print_harm_metrics(None)
+    output = capsys.readouterr().out
+    assert "Official evaluation has not been run yet" in output
+
+
+def test_print_harm_metrics_full(capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    metrics = _make_harm_metrics()
+    printer._print_harm_metrics(metrics)
+    output = capsys.readouterr().out
+    assert "Mean Absolute Error" in output
+    assert "Krippendorff Alpha (Combined)" in output
+    assert "Krippendorff Alpha (Model)" in output
+    assert "Average Score Time" in output
+
+
+def test_print_harm_metrics_optional_fields_none(capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    metrics = _make_harm_metrics(
+        mae_standard_error=None,
+        krippendorff_alpha_combined=None,
+        krippendorff_alpha_model=None,
+        average_score_time_seconds=None,
+    )
+    printer._print_harm_metrics(metrics)
+    output = capsys.readouterr().out
+    assert "Mean Absolute Error" in output
+    assert "MAE Std Error" not in output
+    assert "Krippendorff Alpha (Combined)" not in output
+    assert "Krippendorff Alpha (Model)" not in output
+    assert "Average Score Time" not in output
+
+
+# --- print_objective_scorer tests ---
+
+
+@patch("pyrit.score.scorer_evaluation.scorer_metrics_io.find_objective_metrics_by_eval_hash")
+@patch("pyrit.identifiers.evaluation_identifier.ScorerEvaluationIdentifier")
+def test_print_objective_scorer_with_metrics(mock_eval_id_cls, mock_find, capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    identifier = _make_scorer_identifier(class_name="MyScorer")
+    metrics = _make_objective_metrics()
+
+    mock_eval_instance = MagicMock()
+    mock_eval_instance.eval_hash = "abc123"
+    mock_eval_id_cls.return_value = mock_eval_instance
+    mock_find.return_value = metrics
+
+    printer.print_objective_scorer(scorer_identifier=identifier)
+    output = capsys.readouterr().out
+
+    assert "Scorer Information" in output
+    assert "MyScorer" in output
+    assert "Accuracy" in output
+    mock_find.assert_called_once_with(eval_hash="abc123")
+
+
+@patch("pyrit.score.scorer_evaluation.scorer_metrics_io.find_objective_metrics_by_eval_hash")
+@patch("pyrit.identifiers.evaluation_identifier.ScorerEvaluationIdentifier")
+def test_print_objective_scorer_no_metrics(mock_eval_id_cls, mock_find, capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    identifier = _make_scorer_identifier()
+
+    mock_eval_instance = MagicMock()
+    mock_eval_instance.eval_hash = "xyz"
+    mock_eval_id_cls.return_value = mock_eval_instance
+    mock_find.return_value = None
+
+    printer.print_objective_scorer(scorer_identifier=identifier)
+    output = capsys.readouterr().out
+    assert "Official evaluation has not been run yet" in output
+
+
+# --- print_harm_scorer tests ---
+
+
+@patch("pyrit.score.scorer_evaluation.scorer_metrics_io.find_harm_metrics_by_eval_hash")
+@patch("pyrit.identifiers.evaluation_identifier.ScorerEvaluationIdentifier")
+def test_print_harm_scorer_with_metrics(mock_eval_id_cls, mock_find, capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    identifier = _make_scorer_identifier(class_name="HarmScorer")
+    metrics = _make_harm_metrics()
+
+    mock_eval_instance = MagicMock()
+    mock_eval_instance.eval_hash = "harm_hash"
+    mock_eval_id_cls.return_value = mock_eval_instance
+    mock_find.return_value = metrics
+
+    printer.print_harm_scorer(identifier, harm_category="hate_speech")
+    output = capsys.readouterr().out
+
+    assert "Scorer Information" in output
+    assert "HarmScorer" in output
+    assert "Mean Absolute Error" in output
+    mock_find.assert_called_once_with(eval_hash="harm_hash", harm_category="hate_speech")
+
+
+@patch("pyrit.score.scorer_evaluation.scorer_metrics_io.find_harm_metrics_by_eval_hash")
+@patch("pyrit.identifiers.evaluation_identifier.ScorerEvaluationIdentifier")
+def test_print_harm_scorer_no_metrics(mock_eval_id_cls, mock_find, capsys):
+    printer = ConsoleScorerPrinter(enable_colors=False)
+    identifier = _make_scorer_identifier()
+
+    mock_eval_instance = MagicMock()
+    mock_eval_instance.eval_hash = "no_data"
+    mock_eval_id_cls.return_value = mock_eval_instance
+    mock_find.return_value = None
+
+    printer.print_harm_scorer(identifier, harm_category="violence")
+    output = capsys.readouterr().out
+    assert "Official evaluation has not been run yet" in output
diff --git a/tests/unit/score/test_metrics_type.py b/tests/unit/score/test_metrics_type.py
new file mode 100644
index 000000000..53276fc4a
--- /dev/null
+++ b/tests/unit/score/test_metrics_type.py
@@ -0,0 +1,47 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from pyrit.score.scorer_evaluation.metrics_type import MetricsType, RegistryUpdateBehavior
+
+
+def test_metrics_type_harm_value():
+    assert MetricsType.HARM.value == "harm"
+
+
+def test_metrics_type_objective_value():
+    assert MetricsType.OBJECTIVE.value == "objective"
+
+
+def test_metrics_type_members():
+    members = list(MetricsType)
+    assert len(members) == 2
+    assert MetricsType.HARM in members
+    assert MetricsType.OBJECTIVE in members
+
+
+def test_metrics_type_from_value():
+    assert MetricsType("harm") is MetricsType.HARM
+    assert MetricsType("objective") is MetricsType.OBJECTIVE
+
+
+def test_registry_update_behavior_skip_if_exists():
+    assert RegistryUpdateBehavior.SKIP_IF_EXISTS.value == "skip_if_exists"
+
+
+def test_registry_update_behavior_always_update():
+    assert RegistryUpdateBehavior.ALWAYS_UPDATE.value == "always_update"
+
+
+def test_registry_update_behavior_never_update():
+    assert RegistryUpdateBehavior.NEVER_UPDATE.value == "never_update"
+
+
+def test_registry_update_behavior_members():
+    members = list(RegistryUpdateBehavior)
+    assert len(members) == 3
+
+
+def test_registry_update_behavior_from_value():
+    assert RegistryUpdateBehavior("skip_if_exists") is RegistryUpdateBehavior.SKIP_IF_EXISTS
+    assert RegistryUpdateBehavior("always_update") is RegistryUpdateBehavior.ALWAYS_UPDATE
+    assert RegistryUpdateBehavior("never_update") is RegistryUpdateBehavior.NEVER_UPDATE
diff --git a/tests/unit/score/test_score_aggregator_result.py b/tests/unit/score/test_score_aggregator_result.py
new file mode 100644
index 000000000..6212fc42e
--- /dev/null
+++ b/tests/unit/score/test_score_aggregator_result.py
@@ -0,0 +1,110 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+from dataclasses import FrozenInstanceError
+
+import pytest
+
+from pyrit.score.score_aggregator_result import ScoreAggregatorResult
+
+
+def test_init_with_bool_value():
+    result = ScoreAggregatorResult(
+        value=True,
+        description="All passed",
+        rationale="All scores were true",
+        category=["safety"],
+        metadata={"count": 3},
+    )
+    assert result.value is True
+    assert result.description == "All passed"
+    assert result.rationale == "All scores were true"
+    assert result.category == ["safety"]
+    assert result.metadata == {"count": 3}
+
+
+def test_init_with_float_value():
+    result = ScoreAggregatorResult(
+        value=0.75,
+        description="High score",
+        rationale="Average was above threshold",
+        category=["harm", "violence"],
+        metadata={"mean": 0.75, "std": 0.1},
+    )
+    assert result.value == 0.75
+    assert result.description == "High score"
+    assert result.category == ["harm", "violence"]
+    assert result.metadata == {"mean": 0.75, "std": 0.1}
+
+
+def test_init_with_empty_category_and_metadata():
+    result = ScoreAggregatorResult(
+        value=False,
+        description="No matches",
+        rationale="",
+        category=[],
+        metadata={},
+    )
+    assert result.category == []
+    assert result.metadata == {}
+    assert result.rationale == ""
+
+
+def test_frozen_cannot_set_value():
+    result = ScoreAggregatorResult(
+        value=True,
+        description="test",
+        rationale="test",
+        category=[],
+        metadata={},
+    )
+    with pytest.raises(FrozenInstanceError):
+        result.value = False  # type: ignore[misc]
+
+
+def test_frozen_cannot_set_description():
+    result = ScoreAggregatorResult(
+        value=0.5,
+        description="original",
+        rationale="test",
+        category=[],
+        metadata={},
+    )
+    with pytest.raises(FrozenInstanceError):
+        result.description = "changed"  # type: ignore[misc]
+
+
+def test_equality_same_values():
+    r1 = ScoreAggregatorResult(value=True, description="d", rationale="r", category=["c"], metadata={"k": 1})
+    r2 = ScoreAggregatorResult(value=True, description="d", rationale="r", category=["c"], metadata={"k": 1})
+    assert r1 == r2
+
+
+def test_inequality_different_values():
+    r1 = ScoreAggregatorResult(value=True, description="d", rationale="r", category=[], metadata={})
+    r2 = ScoreAggregatorResult(value=False, description="d", rationale="r", category=[], metadata={})
+    assert r1 != r2
+
+
+def test_inequality_different_description():
+    r1 = ScoreAggregatorResult(value=0.5, description="a", rationale="r", category=[], metadata={})
+    r2 = ScoreAggregatorResult(value=0.5, description="b", rationale="r", category=[], metadata={})
+    assert r1 != r2
+
+
+def test_slots_no_dict():
+    result = ScoreAggregatorResult(value=True, description="d", rationale="r", category=[], metadata={})
+    assert not hasattr(result, "__dict__")
+
+
+def test_metadata_with_mixed_types():
+    result = ScoreAggregatorResult(
+        value=0.9,
+        description="mixed",
+        rationale="test",
+        category=["a"],
+        metadata={"name": "scorer1", "count": 5, "threshold": 0.8},
+    )
+    assert result.metadata["name"] == "scorer1"
+    assert result.metadata["count"] == 5
+    assert result.metadata["threshold"] == 0.8
diff --git a/tests/unit/score/test_scorer_metrics_io.py b/tests/unit/score/test_scorer_metrics_io.py
new file mode 100644
index 000000000..9ed4e4138
--- /dev/null
+++ b/tests/unit/score/test_scorer_metrics_io.py
@@ -0,0 +1,414 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+from pyrit.identifiers import ComponentIdentifier
+from pyrit.score.scorer_evaluation.scorer_metrics import (
+    HarmScorerMetrics,
+    ObjectiveScorerMetrics,
+    ScorerMetricsWithIdentity,
+)
+from pyrit.score.scorer_evaluation.scorer_metrics_io import (
+    _append_jsonl_entry,
+    _load_jsonl,
+    _metrics_to_registry_dict,
+    add_evaluation_results,
+    find_harm_metrics_by_eval_hash,
+    find_objective_metrics_by_eval_hash,
+    get_all_harm_metrics,
+    get_all_objective_metrics,
+    replace_evaluation_results,
+)
+
+
+def _make_identifier(*, class_name: str = "TestScorer") -> ComponentIdentifier:
+    return ComponentIdentifier(
+        class_name=class_name,
+        class_module="pyrit.score.test",
+        params={"model_name": "gpt-4"},
+    )
+
+
+def _make_objective_metrics(**overrides) -> ObjectiveScorerMetrics:
+    defaults = {
+        "num_responses": 100,
+        "num_human_raters": 3,
+        "accuracy": 0.92,
+        "accuracy_standard_error": 0.02,
+        "f1_score": 0.91,
+        "precision": 0.93,
+        "recall": 0.90,
+    }
+    defaults.update(overrides)
+    return ObjectiveScorerMetrics(**defaults)
+
+
+def _make_harm_metrics(**overrides) -> HarmScorerMetrics:
+    defaults = {
+        "num_responses": 50,
+        "num_human_raters": 2,
+        "mean_absolute_error": 0.08,
+        "mae_standard_error": 0.01,
+        "t_statistic": 1.5,
+        "p_value": 0.13,
+        "krippendorff_alpha_combined": 0.85,
+    }
+    defaults.update(overrides)
+    return HarmScorerMetrics(**defaults)
+
+
+def _write_jsonl(path: Path, entries: list[dict]) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with open(path, "w", encoding="utf-8") as f:
+        for entry in entries:
+            f.write(json.dumps(entry) + "\n")
+
+
+# --- _load_jsonl tests ---
+
+
+def test_load_jsonl_file_not_found(tmp_path):
+    result = _load_jsonl(tmp_path / "missing.jsonl")
+    assert result == []
+
+
+def test_load_jsonl_valid_entries(tmp_path):
+    path = tmp_path / "data.jsonl"
+    entries = [{"a": 1}, {"b": 2}]
+    _write_jsonl(path, entries)
+    result = _load_jsonl(path)
+    assert result == entries
+
+
+def test_load_jsonl_skips_invalid_json(tmp_path):
+    path = tmp_path / "data.jsonl"
+    path.write_text('{"valid": true}\nnot json\n{"also_valid": true}\n', encoding="utf-8")
+    result = _load_jsonl(path)
+    assert len(result) == 2
+    assert result[0] == {"valid": True}
+    assert result[1] == {"also_valid": True}
+
+
+def test_load_jsonl_skips_blank_lines(tmp_path):
+    path = tmp_path / "data.jsonl"
+    path.write_text('{"a": 1}\n\n\n{"b": 2}\n', encoding="utf-8")
+    result = _load_jsonl(path)
+    assert len(result) == 2
+
+
+# --- _append_jsonl_entry tests ---
+
+
+def test_append_jsonl_entry_creates_file(tmp_path):
+    import threading
+
+    path = tmp_path / "subdir" / "out.jsonl"
+    lock = threading.Lock()
+    entry = {"key": "value"}
+    _append_jsonl_entry(file_path=path, lock=lock, entry=entry)
+
+    assert path.exists()
+    lines = path.read_text(encoding="utf-8").strip().split("\n")
+    assert len(lines) == 1
+    assert json.loads(lines[0]) == entry
+
+
+def test_append_jsonl_entry_appends(tmp_path):
+    import threading
+
+    path = tmp_path / "out.jsonl"
+    _write_jsonl(path, [{"first": 1}])
+    lock = threading.Lock()
+    _append_jsonl_entry(file_path=path, lock=lock, entry={"second": 2})
+
+    lines = path.read_text(encoding="utf-8").strip().split("\n")
+    assert len(lines) == 2
+
+
+# --- _metrics_to_registry_dict tests ---
+
+
+def test_metrics_to_registry_dict_excludes_trial_scores():
+    metrics = _make_objective_metrics()
+    result = _metrics_to_registry_dict(metrics)
+    assert "trial_scores" not in result
+
+
+def test_metrics_to_registry_dict_excludes_none_values():
+    metrics = _make_objective_metrics(average_score_time_seconds=None, dataset_name=None)
+    result = _metrics_to_registry_dict(metrics)
+    assert "average_score_time_seconds" not in result
+    assert "dataset_name" not in result
+
+
+def test_metrics_to_registry_dict_excludes_private_fields():
+    metrics = _make_harm_metrics()
+    result = _metrics_to_registry_dict(metrics)
+    assert "_harm_definition_obj" not in result
+
+
+def test_metrics_to_registry_dict_includes_values():
+    metrics = _make_objective_metrics()
+    result = _metrics_to_registry_dict(metrics)
+    assert result["accuracy"] == 0.92
+    assert result["f1_score"] == 0.91
+    assert result["num_responses"] == 100
+
+
+# --- find_objective_metrics_by_eval_hash tests ---
+
+
+def test_find_objective_metrics_by_eval_hash_found(tmp_path):
+    identifier = _make_identifier()
+    entry = identifier.to_dict()
+    entry["eval_hash"] = "hash_abc"
+    entry["metrics"] = _metrics_to_registry_dict(_make_objective_metrics(accuracy=0.88))
+    path = tmp_path / "objective_achieved_metrics.jsonl"
+    _write_jsonl(path, [entry])
+
+    result = find_objective_metrics_by_eval_hash(eval_hash="hash_abc", file_path=path)
+    assert result is not None
+    assert result.accuracy == 0.88
+
+
+def test_find_objective_metrics_by_eval_hash_not_found(tmp_path):
+    path = tmp_path / "objective_achieved_metrics.jsonl"
+    _write_jsonl(path, [])
+    result = find_objective_metrics_by_eval_hash(eval_hash="missing", file_path=path)
+    assert result is None
+
+
+def test_find_objective_metrics_by_eval_hash_missing_file(tmp_path):
+    result = find_objective_metrics_by_eval_hash(eval_hash="nope", file_path=tmp_path / "nonexistent.jsonl")
+    assert result is None
+
+
+def test_find_objective_metrics_default_path():
+    with patch("pyrit.score.scorer_evaluation.scorer_metrics_io._load_jsonl", return_value=[]) as mock_load:
+        result = find_objective_metrics_by_eval_hash(eval_hash="test_hash")
+        assert result is None
+        call_args = mock_load.call_args[0][0]
+        assert "objective" in str(call_args)
+        assert "objective_achieved_metrics.jsonl" in str(call_args)
+
+
+# --- find_harm_metrics_by_eval_hash tests ---
+
+
+def test_find_harm_metrics_by_eval_hash_found():
+    identifier = _make_identifier()
+    entry = identifier.to_dict()
+    entry["eval_hash"] = "harm_hash"
+    entry["metrics"] = _metrics_to_registry_dict(_make_harm_metrics(mean_absolute_error=0.12))
+
+    with patch("pyrit.score.scorer_evaluation.scorer_metrics_io._load_jsonl") as mock_load:
+        mock_load.return_value = [entry]
+        result = find_harm_metrics_by_eval_hash(eval_hash="harm_hash", harm_category="hate_speech")
+    assert result is not None
+    assert result.mean_absolute_error == 0.12
+
+
+def test_find_harm_metrics_by_eval_hash_not_found():
+    with patch("pyrit.score.scorer_evaluation.scorer_metrics_io._load_jsonl", return_value=[]):
+        result = find_harm_metrics_by_eval_hash(eval_hash="missing", harm_category="violence")
+    assert result is None
+
+
+# --- get_all_objective_metrics tests ---
+
+
+def test_get_all_objective_metrics_from_file(tmp_path):
+    identifier = _make_identifier(class_name="Scorer1")
+    metrics = _make_objective_metrics()
+    entry = identifier.to_dict()
+    entry["eval_hash"] = "h1"
+    entry["metrics"] = _metrics_to_registry_dict(metrics)
+    path = tmp_path / "objective_achieved_metrics.jsonl"
+    _write_jsonl(path, [entry])
+
+    results = get_all_objective_metrics(file_path=path)
+    assert len(results) == 1
+    assert isinstance(results[0], ScorerMetricsWithIdentity)
+    assert results[0].metrics.accuracy == 0.92
+    assert results[0].scorer_identifier.class_name == "Scorer1"
+
+
+def test_get_all_objective_metrics_empty_file(tmp_path):
+    path = tmp_path / "empty.jsonl"
+    _write_jsonl(path, [])
+    results = get_all_objective_metrics(file_path=path)
+    assert results == []
+
+
+def test_get_all_objective_metrics_default_path():
+    with patch("pyrit.score.scorer_evaluation.scorer_metrics_io._load_metrics_from_file", return_value=[]) as mock_load:
+        results = get_all_objective_metrics()
+        assert results == []
+        call_path = mock_load.call_args[1]["file_path"]
+        assert "objective_achieved_metrics.jsonl" in str(call_path)
+
+
+# --- get_all_harm_metrics tests ---
+
+
+def test_get_all_harm_metrics():
+    identifier = _make_identifier()
+    metrics = _make_harm_metrics()
+    entry = identifier.to_dict()
+    entry["metrics"] = _metrics_to_registry_dict(metrics)
+
+    with patch("pyrit.score.scorer_evaluation.scorer_metrics_io._load_jsonl") as mock_load:
+        mock_load.return_value = [entry]
+        results = get_all_harm_metrics(harm_category="hate_speech")
+    assert len(results) == 1
+    assert results[0].metrics.mean_absolute_error == 0.08
+
+
+# --- add_evaluation_results tests ---
+
+
+def test_add_evaluation_results_creates_entry(tmp_path):
+    import pyrit.score.scorer_evaluation.scorer_metrics_io as sio
+
+    original_locks = sio._file_write_locks.copy()
+    try:
+        path = tmp_path / "objective" / "test_metrics.jsonl"
+        identifier = _make_identifier()
+        metrics = _make_objective_metrics()
+
+        add_evaluation_results(
+            file_path=path,
+            scorer_identifier=identifier,
+            eval_hash="eval_abc",
+            metrics=metrics,
+        )
+
+        assert path.exists()
+        entries = _load_jsonl(path)
+        assert len(entries) == 1
+        assert entries[0]["eval_hash"] == "eval_abc"
+        assert entries[0]["metrics"]["accuracy"] == 0.92
+        assert entries[0]["class_name"] == "TestScorer"
+    finally:
+        sio._file_write_locks = original_locks
+
+
+def test_add_evaluation_results_appends_multiple(tmp_path):
+    import pyrit.score.scorer_evaluation.scorer_metrics_io as sio
+
+    original_locks = sio._file_write_locks.copy()
+    try:
+        path = tmp_path / "test_metrics.jsonl"
+
+        add_evaluation_results(
+            file_path=path,
+            scorer_identifier=_make_identifier(class_name="Scorer1"),
+            eval_hash="h1",
+            metrics=_make_objective_metrics(accuracy=0.80),
+        )
+        add_evaluation_results(
+            file_path=path,
+            scorer_identifier=_make_identifier(class_name="Scorer2"),
+            eval_hash="h2",
+            metrics=_make_objective_metrics(accuracy=0.90),
+        )
+
+        entries = _load_jsonl(path)
+        assert len(entries) == 2
+        assert entries[0]["eval_hash"] == "h1"
+        assert entries[1]["eval_hash"] == "h2"
+    finally:
+        sio._file_write_locks = original_locks
+
+
+# --- replace_evaluation_results tests ---
+
+
+def test_replace_evaluation_results_replaces_existing(tmp_path):
+    import pyrit.score.scorer_evaluation.scorer_metrics_io as sio
+
+    original_locks = sio._file_write_locks.copy()
+    try:
+        path = tmp_path / "test_metrics.jsonl"
+        identifier = _make_identifier()
+
+        add_evaluation_results(
+            file_path=path,
+            scorer_identifier=identifier,
+            eval_hash="h1",
+            metrics=_make_objective_metrics(accuracy=0.80),
+        )
+
+        replace_evaluation_results(
+            file_path=path,
+            scorer_identifier=identifier,
+            eval_hash="h1",
+            metrics=_make_objective_metrics(accuracy=0.95),
+        )
+
+        entries = _load_jsonl(path)
+        assert len(entries) == 1
+        assert entries[0]["metrics"]["accuracy"] == 0.95
+    finally:
+        sio._file_write_locks = original_locks
+
+
+def test_replace_evaluation_results_adds_when_not_exists(tmp_path):
+    import pyrit.score.scorer_evaluation.scorer_metrics_io as sio
+
+    original_locks = sio._file_write_locks.copy()
+    try:
+        path = tmp_path / "test_metrics.jsonl"
+
+        replace_evaluation_results(
+            file_path=path,
+            scorer_identifier=_make_identifier(),
+            eval_hash="new_hash",
+            metrics=_make_objective_metrics(accuracy=0.85),
+        )
+
+        entries = _load_jsonl(path)
+        assert len(entries) == 1
+        assert entries[0]["eval_hash"] == "new_hash"
+    finally:
+        sio._file_write_locks = original_locks
+
+
+def test_replace_evaluation_results_preserves_other_entries(tmp_path):
+    import pyrit.score.scorer_evaluation.scorer_metrics_io as sio
+
+    original_locks = sio._file_write_locks.copy()
+    try:
+        path = tmp_path / "test_metrics.jsonl"
+
+        add_evaluation_results(
+            file_path=path,
+            scorer_identifier=_make_identifier(class_name="A"),
+            eval_hash="keep_me",
+            metrics=_make_objective_metrics(accuracy=0.70),
+        )
+        add_evaluation_results(
+            file_path=path,
+            scorer_identifier=_make_identifier(class_name="B"),
+            eval_hash="replace_me",
+            metrics=_make_objective_metrics(accuracy=0.80),
+        )
+
+        replace_evaluation_results(
+            file_path=path,
+            scorer_identifier=_make_identifier(class_name="B_new"),
+            eval_hash="replace_me",
+            metrics=_make_objective_metrics(accuracy=0.99),
+        )
+
+        entries = _load_jsonl(path)
+        assert len(entries) == 2
+        hashes = {e["eval_hash"] for e in entries}
+        assert hashes == {"keep_me", "replace_me"}
+        replaced = [e for e in entries if e["eval_hash"] == "replace_me"][0]
+        assert replaced["metrics"]["accuracy"] == 0.99
+    finally:
+        sio._file_write_locks = original_locks
diff --git a/tests/unit/score/test_scorer_printer.py b/tests/unit/score/test_scorer_printer.py
new file mode 100644
index 000000000..edd8b6a26
--- /dev/null
+++ b/tests/unit/score/test_scorer_printer.py
@@ -0,0 +1,42 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import pytest
+
+from pyrit.identifiers import ComponentIdentifier
+from pyrit.score.printer.scorer_printer import ScorerPrinter
+
+
+def test_scorer_printer_cannot_be_instantiated():
+    with pytest.raises(TypeError, match="Can't instantiate abstract class"):
+        ScorerPrinter()  # type: ignore[abstract]
+
+
+def test_scorer_printer_subclass_must_implement_print_objective_scorer():
+    class IncompletePrinter(ScorerPrinter):
+        def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_category: str) -> None:
+            pass
+
+    with pytest.raises(TypeError, match="Can't instantiate abstract class"):
+        IncompletePrinter()  # type: ignore[abstract]
+
+
+def test_scorer_printer_subclass_must_implement_print_harm_scorer():
+    class IncompletePrinter(ScorerPrinter):
+        def print_objective_scorer(self, *, scorer_identifier: ComponentIdentifier) -> None:
+            pass
+
+    with pytest.raises(TypeError, match="Can't instantiate abstract class"):
+        IncompletePrinter()  # type: ignore[abstract]
+
+
+def test_scorer_printer_complete_subclass_can_be_instantiated():
+    class CompletePrinter(ScorerPrinter):
+        def print_objective_scorer(self, *, scorer_identifier: ComponentIdentifier) -> None:
+            pass
+
+        def print_harm_scorer(self, scorer_identifier: ComponentIdentifier, *, harm_category: str) -> None:
+            pass
+
+    printer = CompletePrinter()
+    assert isinstance(printer, ScorerPrinter)