documentdb · eerxuan · Mar 16, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
@@ -1,4 +1,4 @@
-name: Lint
+name: Lint & Unit Tests
 
 on:
   pull_request:
@@ -17,7 +17,7 @@ jobs:
           python-version: "3.12"
 
       - name: Install lint tools
-        run: pip install flake8 black isort
+        run: pip install flake8 black isort mypy
 
       - name: Run flake8
         run: flake8
@@ -27,3 +27,23 @@ jobs:
 
       - name: Check import sorting
         run: isort --check-only .
+
+      - name: Run type checking
+        run: mypy documentdb_tests/ --no-site-packages
+
+  unit-tests:
+    name: Unit Tests
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+
+      - name: Run unit tests
+        run: pytest documentdb_tests/compatibility/result_analyzer/test_analyzer.py -v
@@ -9,29 +9,10 @@
 import re
 from collections import defaultdict
 from pathlib import Path
-from typing import Any, Dict, List
+from typing import Any, Dict, List, Optional
 
 # Module-level constants
-INFRA_EXCEPTIONS = {
-    # Python built-in connection errors
-    "ConnectionError",
-    "ConnectionRefusedError",
-    "ConnectionResetError",
-    "ConnectionAbortedError",
-    # Python timeout errors
-    "TimeoutError",
-    "socket.timeout",
-    "socket.error",
-    # PyMongo connection errors
-    "pymongo.errors.ConnectionFailure",
-    "pymongo.errors.ServerSelectionTimeoutError",
-    "pymongo.errors.NetworkTimeout",
-    "pymongo.errors.AutoReconnect",
-    "pymongo.errors.ExecutionTimeout",
-    # Generic network/OS errors
-    "OSError",
-}
-
+from documentdb_tests.framework.infra_exceptions import INFRA_EXCEPTION_NAMES as INFRA_EXCEPTIONS
 
 # Mapping from TestOutcome to counter key names
 OUTCOME_TO_KEY = {
@@ -95,6 +76,30 @@ def extract_exception_type(crash_message: str) -> str:
     return ""
 
 
+def extract_failure_tag(test_result: Dict[str, Any]) -> str:
+    """
+    Extract failure tag (e.g. RESULT_MISMATCH) from assertion message.
+
+    The framework assertions prefix errors with tags like:
+    [RESULT_MISMATCH], [UNEXPECTED_ERROR], [UNEXPECTED_SUCCESS],
+    [ERROR_MISMATCH], [TEST_EXCEPTION]
+
+    Args:
+        test_result: Full test result dict from pytest JSON
+
+    Returns:
+        Tag string without brackets, or empty string if not found
+    """
+    call_info = test_result.get("call", {})
+    crash_info = call_info.get("crash", {})
+    crash_message = crash_info.get("message", "")
+
+    match = re.search(r"\[([A-Z_]+)\]", crash_message)
+    if match:
+        return match.group(1)
+    return ""
+
+
 def is_infrastructure_error(test_result: Dict[str, Any]) -> bool:
     """
     Check if error is infrastructure-related based on exception type.
@@ -189,15 +194,17 @@ class ResultAnalyzer:
         results = analyzer.analyze_results("report.json")
     """
 
-    def __init__(self, pytest_ini_path: str = "pytest.ini"):
+    _DEFAULT_PYTEST_INI = str(Path(__file__).resolve().parent.parent.parent / "pytest.ini")
+
+    def __init__(self, pytest_ini_path: str = _DEFAULT_PYTEST_INI):
         """
         Initialize the result analyzer.
 
         Args:
-            pytest_ini_path: Path to pytest.ini file (default: "pytest.ini")
+            pytest_ini_path: Path to pytest.ini file (default: documentdb_tests/pytest.ini)
         """
         self.pytest_ini_path = pytest_ini_path
-        self._markers_cache: set = None
+        self._markers_cache: Optional[set] = None
 
     def _get_registered_markers(self) -> set:
         """
@@ -330,11 +337,14 @@ def analyze_results(self, json_report_path: str) -> Dict[str, Any]:
                 "tags": tags,
             }
 
-            # Add error information and infra error flag for failed tests
+            # Add error information for failed tests
             if test_outcome == TestOutcome.FAIL:
                 call_info = test.get("call", {})
                 test_detail["error"] = call_info.get("longrepr", "")
-                test_detail["is_infra_error"] = is_infrastructure_error(test)
+                if is_infrastructure_error(test):
+                    test_detail["failure_type"] = "INFRA_ERROR"
+                else:
+                    test_detail["failure_type"] = extract_failure_tag(test) or "UNKNOWN"
 
             tests_details.append(test_detail)
 

@@ -95,18 +95,6 @@ def main():
             if not args.quiet:
                 print(f"\nReport saved to: {args.output}")
 
-        # If no output file and quiet mode, print to stdout
-        elif not args.quiet:
-            print("\nResults by Tag:")
-            print("-" * 60)
-            for tag, stats in sorted(
-                analysis["by_tag"].items(), key=lambda x: x[1]["pass_rate"], reverse=True
-            ):
-                passed = stats["passed"]
-                total = stats["total"]
-                rate = stats["pass_rate"]
-                print(f"{tag:30s} | {passed:3d}/{total:3d} passed ({rate:5.1f}%)")
-
         # Return exit code based on test results
         if analysis["summary"]["failed"] > 0:
             return 1

@@ -46,9 +46,39 @@ def generate_json_report(analysis: Dict[str, Any], output_path: str):
         json.dump(report, f, indent=2)
 
 
+def _format_by_tag(analysis: Dict[str, Any]) -> list:
+    """Format by-tag results as lines. Shared by both report functions."""
+    lines = []
+    by_tag = analysis.get("by_tag", {})
+    if by_tag:
+        sorted_tags = sorted(by_tag.items(), key=lambda x: x[1]["pass_rate"])
+        for tag, stats in sorted_tags:
+            lines.append(
+                {
+                    "tag": tag,
+                    "passed": stats["passed"],
+                    "total": stats["total"],
+                    "failed": stats["failed"],
+                    "skipped": stats["skipped"],
+                    "pass_rate": stats["pass_rate"],
+                }
+            )
+    return lines
+
+
+def _categorize_failures(analysis: Dict[str, Any]) -> Dict[str, list]:
+    """Group failed tests by failure_type. Shared by both report functions."""
+    failed_tests = [t for t in analysis["tests"] if t["outcome"] == "FAIL"]
+    grouped: Dict[str, list] = {}
+    for test in failed_tests:
+        ft = test.get("failure_type", "UNKNOWN")
+        grouped.setdefault(ft, []).append(test)
+    return grouped
+
+
 def generate_text_report(analysis: Dict[str, Any], output_path: str):
     """
-    Generate a human-readable text report.
+    Generate a detailed human-readable text report to file.
 
     Args:
         analysis: Analysis results from analyze_results()
@@ -76,39 +106,44 @@ def generate_text_report(analysis: Dict[str, Any], output_path: str):
     # Results by tag
     lines.append("RESULTS BY TAG")
     lines.append("-" * 80)
-
-    if analysis["by_tag"]:
-        # Sort tags by pass rate (ascending) to highlight problematic areas
-        sorted_tags = sorted(analysis["by_tag"].items(), key=lambda x: x[1]["pass_rate"])
-
-        for tag, stats in sorted_tags:
-            lines.append(f"\n{tag}:")
-            lines.append(f"  Total:   {stats['total']}")
-            lines.append(f"  Passed:  {stats['passed']} ({stats['pass_rate']}%)")
-            lines.append(f"  Failed:  {stats['failed']}")
-            lines.append(f"  Skipped: {stats['skipped']}")
+    tag_data = _format_by_tag(analysis)
+    if tag_data:
+        for t in tag_data:
+            lines.append(f"\n{t['tag']}:")
+            lines.append(f"  Total:   {t['total']}")
+            lines.append(f"  Passed:  {t['passed']} ({t['pass_rate']}%)")
+            lines.append(f"  Failed:  {t['failed']}")
+            lines.append(f"  Skipped: {t['skipped']}")
     else:
         lines.append("No tags found in test results.")
-
     lines.append("")
 
     # Failed tests details
-    failed_tests = [t for t in analysis["tests"] if t["outcome"] == "FAIL"]
-    if failed_tests:
+    grouped = _categorize_failures(analysis)
+    if grouped:
         lines.append("FAILED TESTS")
         lines.append("-" * 80)
-        for test in failed_tests:
-            lines.append(f"\n{test['name']}")
-            lines.append(f"  Tags: {', '.join(test['tags'])}")
-            lines.append(f"  Duration: {test['duration']:.2f}s")
-            if "error" in test:
-                error_preview = test["error"][:200]
-                lines.append(f"  Error: {error_preview}...")
+        for ft in sorted(grouped):
+            lines.append(f"\n  {ft} ({len(grouped[ft])}):")
+            for test in grouped[ft]:
+                lines.append(f"\n    {test['name']}")
+                lines.append(f"      Tags: {', '.join(test['tags'])}")
+                lines.append(f"      Duration: {test['duration']:.2f}s")
+                if "error" in test:
+                    lines.append(f"      Error: {test['error'][:200]}...")
+
+    # Skipped tests
+    skipped_tests = [t for t in analysis["tests"] if t["outcome"] == "SKIPPED"]
+    if skipped_tests:
+        lines.append("")
+        lines.append("SKIPPED TESTS")
+        lines.append("-" * 80)
+        for test in skipped_tests:
+            lines.append(f"  {test['name']}")
 
     lines.append("")
     lines.append("=" * 80)
 
-    # Write report
     with open(output_path, "w") as f:
         f.write("\n".join(lines))
 
@@ -128,4 +163,15 @@ def print_summary(analysis: Dict[str, Any]):
     print(f"Passed:  {summary['passed']} ({summary['pass_rate']}%)")
     print(f"Failed:  {summary['failed']}")
     print(f"Skipped: {summary['skipped']}")
+    print("=" * 60)
+
+    # Failed test counts by type
+    grouped = _categorize_failures(analysis)
+    if grouped:
+        total = sum(len(v) for v in grouped.values())
+        print(f"\nFailed Tests ({total}):")
+        print("-" * 60)
+        for ft in sorted(grouped):
+            print(f"  {ft}: {len(grouped[ft])}")
+
     print("=" * 60 + "\n")
@@ -0,0 +1,104 @@
+"""Tests for failure extraction and categorization in the analyzer."""
+
+from documentdb_tests.compatibility.result_analyzer.analyzer import (
+    extract_exception_type,
+    extract_failure_tag,
+    is_infrastructure_error,
+)
+
+
+def _make_test_result(crash_message: str) -> dict:
+    """Helper to build a minimal test result dict with a crash message."""
+    return {"call": {"crash": {"message": crash_message}}}
+
+
+# --- extract_failure_tag ---
+
+
+class TestExtractFailureTag:
+    def test_result_mismatch(self):
+        result = _make_test_result("[RESULT_MISMATCH] Expected [1,2,3] but got [1,2]")
+        assert extract_failure_tag(result) == "RESULT_MISMATCH"
+
+    def test_unexpected_error(self):
+        result = _make_test_result("[UNEXPECTED_ERROR] Expected success but got exception")
+        assert extract_failure_tag(result) == "UNEXPECTED_ERROR"
+
+    def test_error_mismatch(self):
+        result = _make_test_result("[ERROR_MISMATCH] Expected code 11000 but got 26")
+        assert extract_failure_tag(result) == "ERROR_MISMATCH"
+
+    def test_unexpected_success(self):
+        result = _make_test_result("[UNEXPECTED_SUCCESS] Expected error but got result")
+        assert extract_failure_tag(result) == "UNEXPECTED_SUCCESS"
+
+    def test_test_exception(self):
+        result = _make_test_result("[TEST_EXCEPTION] Bad test setup")
+        assert extract_failure_tag(result) == "TEST_EXCEPTION"
+
+    def test_no_tag(self):
+        result = _make_test_result("AssertionError: values differ")
+        assert extract_failure_tag(result) == ""
+
+    def test_empty_message(self):
+        result = _make_test_result("")
+        assert extract_failure_tag(result) == ""
+
+    def test_missing_call(self):
+        assert extract_failure_tag({}) == ""
+
+
+# --- extract_exception_type ---
+
+
+class TestExtractExceptionType:
+    def test_simple_exception(self):
+        assert extract_exception_type("ConnectionError: refused") == "ConnectionError"
+
+    def test_dotted_exception(self):
+        assert (
+            extract_exception_type("pymongo.errors.OperationFailure: code 11000")
+            == "pymongo.errors.OperationFailure"
+        )
+
+    def test_no_colon(self):
+        assert extract_exception_type("just a message") == ""
+
+    def test_empty(self):
+        assert extract_exception_type("") == ""
+
+
+# --- is_infrastructure_error ---
+
+
+class TestIsInfrastructureError:
+    def test_connection_error(self):
+        result = _make_test_result("ConnectionError: Cannot connect")
+        assert is_infrastructure_error(result) is True
+
+    def test_timeout_error(self):
+        result = _make_test_result("TimeoutError: timed out")
+        assert is_infrastructure_error(result) is True
+
+    def test_pymongo_connection_failure(self):
+        result = _make_test_result("pymongo.errors.ConnectionFailure: connection lost")
+        assert is_infrastructure_error(result) is True
+
+    def test_pymongo_server_selection(self):
+        result = _make_test_result("pymongo.errors.ServerSelectionTimeoutError: no servers")
+        assert is_infrastructure_error(result) is True
+
+    def test_assertion_error_not_infra(self):
+        result = _make_test_result("AssertionError: [RESULT_MISMATCH] wrong value")
+        assert is_infrastructure_error(result) is False
+
+    def test_operation_failure_not_infra(self):
+        result = _make_test_result("pymongo.errors.OperationFailure: code 11000")
+        assert is_infrastructure_error(result) is False
+
+    def test_empty_message(self):
+        result = _make_test_result("")
+        assert is_infrastructure_error(result) is False
+
+    def test_missing_call(self):
+        assert is_infrastructure_error({}) is False
@@ -0,0 +1 @@
+"""Collection management tests."""