From a5d9588a830364b63047f242b38f9c9128da4204 Mon Sep 17 00:00:00 2001
From: Victor Lopez <victor@thevictorlopez.com>
Date: Tue, 24 Mar 2026 13:43:35 +0100
Subject: [PATCH 01/13] Cleanup base python project.

---
 .flake8                  |  16 -----
 .github/workflows/ci.yml |  57 +++++------------
 .pre-commit-config.yaml  | 119 ++++-------------------------------
 .vulture_whitelist.py    |   5 --
 pyproject.toml           | 131 ++++++++++++---------------------------
 5 files changed, 68 insertions(+), 260 deletions(-)
 delete mode 100644 .flake8
 delete mode 100644 .vulture_whitelist.py

diff --git a/.flake8 b/.flake8
deleted file mode 100644
index 2f3d98d..0000000
--- a/.flake8
+++ /dev/null
@@ -1,16 +0,0 @@
-[flake8]
-max-line-length = 88
-extend-ignore = E203, W503, C901
-per-file-ignores =
-    __init__.py:F401
-    tests/*:D100,D101,D102,D103
-exclude =
-    .git,
-    __pycache__,
-    .venv,
-    venv,
-    build,
-    dist,
-    *.egg-info,
-    .pytest_cache,
-    .mypy_cache
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 61513ca..a53579b 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -10,12 +10,12 @@ permissions:
   contents: read
 
 jobs:
-  version-checks:
+  test:
     name: Python ${{ matrix.python-version }} Tests
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
+        python-version: ['3.10', '3.11', '3.12', '3.13']
 
     steps:
       - uses: actions/checkout@v4
@@ -35,45 +35,14 @@ jobs:
         run: pytest --cov=xarf --cov-report=term -v tests/
 
       - name: Upload coverage
-        if: matrix.python-version == '3.11'
+        if: matrix.python-version == '3.12'
         uses: codecov/codecov-action@v4
         with:
           fail_ci_if_error: false
 
   code-quality:
-    name: Code Quality - ${{ matrix.check.name }}
+    name: Code Quality
     runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        check:
-          - name: "Format (black)"
-            cmd: "black --check ."
-            error: false
-          - name: "Imports (isort)"
-            cmd: "isort --check-only --profile black ."
-            error: false
-          - name: "Linting (flake8)"
-            cmd: "flake8 xarf/ tests/"
-            error: false
-          - name: "Security (bandit)"
-            cmd: "bandit -r xarf/ -ll"
-            error: false
-          - name: "Types (mypy)"
-            cmd: "mypy xarf/"
-            error: false
-          - name: "Complexity (radon)"
-            cmd: "radon cc xarf/ -a -nb"
-            error: false
-          - name: "Maintainability (radon)"
-            cmd: "radon mi xarf/ -nb"
-            error: false
-          - name: "Docstrings (pydocstyle)"
-            cmd: "pydocstyle xarf/"
-            error: false
-          - name: "Dead code (vulture)"
-            cmd: "vulture xarf/ .vulture_whitelist.py --min-confidence 80"
-            error: false
 
     steps:
       - uses: actions/checkout@v4
@@ -81,14 +50,22 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v5
         with:
-          python-version: '3.11'
+          python-version: '3.12'
           cache: 'pip'
 
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          pip install -e ".[dev,test]"
+          pip install -e ".[dev]"
+
+      - name: Lint (ruff)
+        run: ruff check .
+
+      - name: Format (ruff)
+        run: ruff format --check .
+
+      - name: Types (mypy)
+        run: mypy --strict xarf/
 
-      - name: Run ${{ matrix.check.name }}
-        run: ${{ matrix.check.cmd }}
-        continue-on-error: ${{ matrix.check.error }}
+      - name: Security (bandit)
+        run: bandit -r xarf/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 439508b..7c8c9a8 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,116 +1,19 @@
-# Pre-commit hooks configuration
-# Install: pip install pre-commit && pre-commit install
-# Run manually: pre-commit run --all-files
-# Update hooks: pre-commit autoupdate
-
 repos:
-  # Code formatting - black
-  - repo: https://github.com/psf/black
-    rev: 24.10.0
-    hooks:
-      - id: black
-        args: [--line-length=88]
-
-  # Import sorting - isort
-  - repo: https://github.com/PyCQA/isort
-    rev: 5.13.2
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.10
     hooks:
-      - id: isort
-        args: [--profile=black, --line-length=88]
+      - id: ruff
+        args: [--fix]
+      - id: ruff-format
 
-  # Linting - flake8
-  - repo: https://github.com/PyCQA/flake8
-    rev: 7.1.1
-    hooks:
-      - id: flake8
-        args: [--max-line-length=100, --extend-ignore=E203,W503,C901]
-        additional_dependencies: [flake8-docstrings]
-
-  # Security scanning - bandit
-  - repo: https://github.com/PyCQA/bandit
-    rev: 1.7.10
-    hooks:
-      - id: bandit
-        args: [-r, xarf/, -ll]
-        exclude: ^tests/
-
-  # Type checking - mypy
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.13.0
+    rev: v1.10.0
     hooks:
       - id: mypy
-        args: [--strict, --python-version=3.8]
-        additional_dependencies: [pydantic>=2.0.0, types-python-dateutil]
-        files: ^xarf/
-
-  # Dead code detection - vulture
-  - repo: https://github.com/jendrikseipp/vulture
-    rev: v2.13
-    hooks:
-      - id: vulture
-        args: [xarf/, .vulture_whitelist.py, --min-confidence=80]
+        additional_dependencies: [pydantic, types-jsonschema]
 
-  # Docstring style - pydocstyle
-  - repo: https://github.com/PyCQA/pydocstyle
-    rev: 6.3.0
-    hooks:
-      - id: pydocstyle
-        args: [--convention=google, --add-ignore=D100,D104,D105,D107]
-        files: ^xarf/
-        exclude: ^tests/
-
-  # Code complexity - radon
-  - repo: local
-    hooks:
-      - id: radon-cc
-        name: radon complexity check
-        entry: radon
-        language: system
-        args: [cc, xarf/, -a, -nb]
-        files: ^xarf/.*\.py$
-        pass_filenames: false
-      - id: radon-mi
-        name: radon maintainability check
-        entry: radon
-        language: system
-        args: [mi, xarf/, -nb]
-        files: ^xarf/.*\.py$
-        pass_filenames: false
-
-  # YAML validation
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v5.0.0
-    hooks:
-      - id: check-yaml
-      - id: check-json
-        exclude: ^\.vscode/.*\.json$
-      - id: check-toml
-      - id: end-of-file-fixer
-      - id: trailing-whitespace
-      - id: check-added-large-files
-        args: [--maxkb=1000]
-      - id: check-merge-conflict
-      - id: check-case-conflict
-      - id: detect-private-key
-      - id: mixed-line-ending
-        args: [--fix=lf]
-
-  # Python security checks
-  - repo: https://github.com/Lucas-C/pre-commit-hooks-safety
-    rev: v1.3.3
+  - repo: https://github.com/PyCQA/bandit
+    rev: 1.7.9
     hooks:
-      - id: python-safety-dependencies-check
-        files: pyproject.toml
-
-# CI/CD equivalent hooks (informational only, not blocking)
-ci:
-  autofix_commit_msg: |
-    [pre-commit.ci] auto fixes from pre-commit.com hooks
-
-    for more information, see https://pre-commit.ci
-  autofix_prs: true
-  autoupdate_branch: ''
-  autoupdate_commit_msg: '[pre-commit.ci] pre-commit autoupdate'
-  autoupdate_schedule: weekly
-  skip: [python-safety-dependencies-check]
-  submodules: false
+      - id: bandit
+        args: [-r, xarf/]
diff --git a/.vulture_whitelist.py b/.vulture_whitelist.py
deleted file mode 100644
index ff934dd..0000000
--- a/.vulture_whitelist.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# Vulture whitelist for intentionally unused code
-# https://github.com/jendrikseipp/vulture
-
-# Pydantic validators require 'cls' parameter even if unused
-_.cls  # unused variable (validators)
diff --git a/pyproject.toml b/pyproject.toml
index 658aa63..ccc29c7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,8 +4,8 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "xarf"
-version = "4.0.0a1"
-description = "XARF v4 Python Parser - Parse and validate XARF v4 abuse reports"
+version = "1.0.0"
+description = "Python library for parsing, generating, and validating XARF v4 abuse reports"
 readme = "README.md"
 license = {text = "MIT"}
 authors = [
@@ -16,57 +16,50 @@ maintainers = [
 ]
 keywords = ["xarf", "abuse", "security", "parser", "validation"]
 classifiers = [
-    "Development Status :: 4 - Beta",
+    "Development Status :: 5 - Production/Stable",
     "Intended Audience :: Developers",
     "Intended Audience :: Information Technology",
     "License :: OSI Approved :: MIT License",
     "Operating System :: OS Independent",
     "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.8",
-    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
-    "Topic :: Internet :: WWW/HTTP",
+    "Programming Language :: Python :: 3.13",
     "Topic :: Security",
     "Topic :: Software Development :: Libraries :: Python Modules",
     "Topic :: System :: Networking :: Monitoring"
 ]
-requires-python = ">=3.8"
+requires-python = ">=3.10"
 dependencies = [
-    "jsonschema>=4.0.0",
-    "python-dateutil>=2.8.0",
+    "pydantic>=2.0",
+    "jsonschema>=4.18",
+    "referencing>=0.28",
     "email-validator>=2.0.0",
-    "pydantic>=2.0.0"
 ]
 
 [project.urls]
 "Homepage" = "https://xarf.org"
-"Documentation" = "https://github.com/xarf/xarf-parser-python"
-"Repository" = "https://github.com/xarf/xarf-parser-python"
-"Bug Reports" = "https://github.com/xarf/xarf-parser-python/issues"
+"Documentation" = "https://xarf.org"
+"Repository" = "https://github.com/xarf/xarf-python"
+"Bug Reports" = "https://github.com/xarf/xarf-python/issues"
 "Specification" = "https://github.com/xarf/xarf-spec"
-"Changelog" = "https://github.com/xarf/xarf-parser-python/blob/master/CHANGELOG.md"
 
 [project.optional-dependencies]
 dev = [
-    "pytest>=7.0.0",
-    "pytest-cov>=4.0.0",
-    "black>=23.0.0",
-    "flake8>=6.0.0",
-    "mypy>=1.0.0",
-    "isort>=5.0.0",
-    "pre-commit>=3.0.0",
-    "bandit[toml]>=1.7.0",
-    "pydocstyle[toml]>=6.0.0",
-    "radon>=6.0.0",
-    "pip-audit>=2.0.0",
-    "pylint>=2.0.0",
-    "vulture>=2.0.0"
+    "pytest>=8.0",
+    "pytest-cov",
+    "mypy>=1.8",
+    "ruff>=0.4",
+    "bandit[toml]>=1.7",
+    "pre-commit",
+    "build",
+    "twine",
+    "types-jsonschema",
 ]
 test = [
-    "pytest>=7.0.0",
-    "pytest-cov>=4.0.0"
+    "pytest>=8.0",
+    "pytest-cov",
 ]
 
 [tool.setuptools.packages.find]
@@ -74,26 +67,34 @@ where = ["."]
 include = ["xarf*"]
 
 [tool.setuptools.package-data]
-xarf = ["schemas/*.json"]
+xarf = ["schemas/**/*.json", "schemas/.version"]
 
-[tool.black]
+[tool.xarf]
+spec_version = "v4.2.0"
+
+[tool.ruff]
 line-length = 88
-target-version = ["py38"]
-include = '\.pyi?$'
+target-version = "py310"
+
+[tool.ruff.lint]
+select = ["E", "W", "F", "I", "N", "D", "UP", "B"]
+ignore = ["D203", "D213"]
+
+[tool.ruff.lint.pydocstyle]
+convention = "google"
 
-[tool.isort]
-profile = "black"
-line_length = 88
+[tool.ruff.format]
+quote-style = "double"
 
 [tool.mypy]
-python_version = "3.8"
+python_version = "3.10"
 strict = true
 warn_return_any = true
 warn_unused_configs = true
 disallow_untyped_defs = true
 
 [tool.pytest.ini_options]
-minversion = "7.0"
+minversion = "8.0"
 addopts = "-v --cov=xarf --cov-report=term-missing --cov-report=html"
 testpaths = ["tests"]
 python_files = ["test_*.py", "*_test.py"]
@@ -102,7 +103,7 @@ python_functions = ["test_*"]
 
 [tool.coverage.run]
 source = ["xarf"]
-omit = ["tests/*", "setup.py"]
+omit = ["tests/*"]
 
 [tool.coverage.report]
 exclude_lines = [
@@ -112,58 +113,6 @@ exclude_lines = [
     "raise NotImplementedError"
 ]
 
-[tool.flake8]
-max-line-length = 100
-extend-ignore = ["E203", "W503", "C901"]
-per-file-ignores = [
-    "__init__.py:F401"
-]
-
 [tool.bandit]
 exclude_dirs = ["tests", "venv", ".venv", "build", "dist"]
 skips = ["B101", "B601"]
-
-[tool.pydocstyle]
-convention = "google"
-add_ignore = ["D100", "D104", "D105", "D107"]
-match = "(?!test_).*\\.py"
-match_dir = "^(?!tests|venv|\\.venv|build|dist).*"
-
-[tool.pylint.master]
-ignore = ["CVS"]
-ignore-patterns = [".*_test\\.py"]
-jobs = 1
-
-[tool.pylint.messages_control]
-disable = [
-    "missing-docstring",
-    "bare-except",
-    "locally-disabled",
-    "broad-except",
-    "unused-argument",
-    "no-member"
-]
-
-[tool.pylint.format]
-max-line-length = 100
-indent-string = "    "
-indent-after-paren = 4
-
-[tool.pylint.basic]
-good-names = ["i", "j", "k", "ex", "Run", "_", "ip"]
-bad-names = ["foo", "bar", "baz", "toto", "tutu", "tata"]
-
-[tool.pylint.design]
-max-args = 10
-max-locals = 15
-max-returns = 6
-max-branches = 12
-max-statements = 50
-max-attributes = 10
-min-public-methods = 1
-max-public-methods = 20
-
-[tool.radon]
-exclude = ["tests/*", "venv/*", ".venv/*", "build/*", "dist/*"]
-show_complexity = true
-show_mi = true
\ No newline at end of file

From 49ddd8ee3425f5da0806e0aa1d45087719bd3ba4 Mon Sep 17 00:00:00 2001
From: Victor Lopez <victor@thevictorlopez.com>
Date: Tue, 24 Mar 2026 16:42:28 +0100
Subject: [PATCH 02/13] Add script to fetch schemas from public repo. Cleanup
 IDE-related config files.

---
 .gitignore                                    |  24 +-
 .idea/.gitignore                              |  30 ---
 .idea/codeStyles/Project.xml                  |  15 --
 .idea/codeStyles/codeStyleConfig.xml          |   5 -
 .idea/inspectionProfiles/Project_Default.xml  |  27 ---
 .idea/misc.xml                                |   7 -
 .../runConfigurations/Format_Code__Black_.xml |  17 --
 .idea/runConfigurations/Pre_commit_All.xml    |  17 --
 .idea/runConfigurations/Tests.xml             |  18 --
 .idea/vcs.xml                                 |   6 -
 .vscode/extensions.json                       |  39 ---
 .vscode/launch.json                           |  51 ----
 .vscode/settings.json                         | 104 --------
 .vscode/tasks.json                            | 141 -----------
 pyproject.toml                                |   3 +-
 scripts/fetch_schemas.py                      | 224 ++++++++++++++++++
 16 files changed, 235 insertions(+), 493 deletions(-)
 delete mode 100644 .idea/.gitignore
 delete mode 100644 .idea/codeStyles/Project.xml
 delete mode 100644 .idea/codeStyles/codeStyleConfig.xml
 delete mode 100644 .idea/inspectionProfiles/Project_Default.xml
 delete mode 100644 .idea/misc.xml
 delete mode 100644 .idea/runConfigurations/Format_Code__Black_.xml
 delete mode 100644 .idea/runConfigurations/Pre_commit_All.xml
 delete mode 100644 .idea/runConfigurations/Tests.xml
 delete mode 100644 .idea/vcs.xml
 delete mode 100644 .vscode/extensions.json
 delete mode 100644 .vscode/launch.json
 delete mode 100644 .vscode/settings.json
 delete mode 100644 .vscode/tasks.json
 create mode 100644 scripts/fetch_schemas.py

diff --git a/.gitignore b/.gitignore
index a696c36..4a5582e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,9 @@ share/python-wheels/
 *.egg
 MANIFEST
 
+# Fetched XARF schemas
+xarf/schemas/
+
 # Virtual environments
 .env
 .venv
@@ -45,24 +48,15 @@ coverage.xml
 
 # MyPy
 .mypy_cache/
+
+# Ruff
+.ruff_cache/
 .dmypy.json
 dmypy.json
 
-# IDE - VS Code (keep shared settings, ignore user-specific)
-.vscode/*
-!.vscode/settings.json
-!.vscode/extensions.json
-!.vscode/tasks.json
-!.vscode/launch.json
-
-# IDE - IntelliJ IDEA (keep shared settings, ignore user-specific)
-.idea/*
-!.idea/inspectionProfiles/
-!.idea/codeStyles/
-!.idea/runConfigurations/
-!.idea/vcs.xml
-!.idea/misc.xml
-!.idea/.gitignore
+# IDE
+.vscode/
+.idea/
 
 # IDE - Other
 *.swp
diff --git a/.idea/.gitignore b/.idea/.gitignore
deleted file mode 100644
index 06facd2..0000000
--- a/.idea/.gitignore
+++ /dev/null
@@ -1,30 +0,0 @@
-# User-specific stuff
-workspace.xml
-tasks.xml
-usage.statistics.xml
-dictionaries
-shelf/
-
-# Generated files
-contentModel.xml
-uiDesigner.xml
-
-# Sensitive or high-churn files
-dataSources/
-dataSources.ids
-dataSources.local.xml
-sqlDataSources.xml
-dynamic.xml
-
-# Gradle
-.idea/**/gradle.xml
-.idea/**/libraries
-
-# CMake
-cmake-build-*/
-
-# File-based project format
-*.iws
-
-# IntelliJ
-out/
diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml
deleted file mode 100644
index 0958f6f..0000000
--- a/.idea/codeStyles/Project.xml
+++ /dev/null
@@ -1,15 +0,0 @@
-<component name="ProjectCodeStyleConfiguration">
-  <code_scheme name="Project" version="173">
-    <option name="LINE_SEPARATOR" value="&#10;" />
-    <Python>
-      <option name="ALIGN_COLLECTIONS_AND_COMPREHENSIONS" value="false" />
-      <option name="SPACE_AFTER_NUMBER_SIGN" value="true" />
-      <option name="SPACE_BEFORE_NUMBER_SIGN" value="true" />
-      <option name="OPTIMIZE_IMPORTS_SORT_NAMES_IN_FROM_IMPORTS" value="true" />
-      <option name="OPTIMIZE_IMPORTS_CASE_INSENSITIVE_ORDER" value="true" />
-      <option name="OPTIMIZE_IMPORTS_JOIN_FROM_IMPORTS_WITH_SAME_SOURCE" value="true" />
-      <option name="USE_CONTINUATION_INDENT_FOR_ARGUMENTS" value="true" />
-      <option name="DICT_ALIGNMENT" value="0" />
-    </Python>
-  </code_scheme>
-</component>
diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml
deleted file mode 100644
index 0f7bc51..0000000
--- a/.idea/codeStyles/codeStyleConfig.xml
+++ /dev/null
@@ -1,5 +0,0 @@
-<component name="ProjectCodeStyleConfiguration">
-  <state>
-    <option name="USE_PER_PROJECT_SETTINGS" value="true" />
-  </state>
-</component>
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
deleted file mode 100644
index 7b746be..0000000
--- a/.idea/inspectionProfiles/Project_Default.xml
+++ /dev/null
@@ -1,27 +0,0 @@
-<component name="InspectionProjectProfileManager">
-  <profile version="1.0">
-    <option name="myName" value="Project Default" />
-    <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
-      <option name="ignoredErrors">
-        <list>
-          <option value="E203" />
-          <option value="W503" />
-          <option value="C901" />
-        </list>
-      </option>
-    </inspection_tool>
-    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
-      <option name="ignoredErrors">
-        <list>
-          <option value="N802" />
-          <option value="N806" />
-        </list>
-      </option>
-    </inspection_tool>
-    <inspection_tool class="SpellCheckingInspection" enabled="false" level="TYPO" enabled_by_default="false">
-      <option name="processCode" value="true" />
-      <option name="processLiterals" value="true" />
-      <option name="processComments" value="true" />
-    </inspection_tool>
-  </profile>
-</component>
diff --git a/.idea/misc.xml b/.idea/misc.xml
deleted file mode 100644
index 49ffe2c..0000000
--- a/.idea/misc.xml
+++ /dev/null
@@ -1,7 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="Black">
-    <option name="sdkName" value="Python 3.8 (xarf-python)" />
-  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (xarf-python)" project-jdk-type="Python SDK" />
-</project>
diff --git a/.idea/runConfigurations/Format_Code__Black_.xml b/.idea/runConfigurations/Format_Code__Black_.xml
deleted file mode 100644
index 5fbf44a..0000000
--- a/.idea/runConfigurations/Format_Code__Black_.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<component name="ProjectRunConfigurationManager">
-  <configuration default="false" name="Format Code (Black)" type="ShConfigurationType">
-    <option name="SCRIPT_TEXT" value="black ." />
-    <option name="INDEPENDENT_SCRIPT_PATH" value="true" />
-    <option name="SCRIPT_PATH" value="" />
-    <option name="SCRIPT_OPTIONS" value="" />
-    <option name="INDEPENDENT_SCRIPT_WORKING_DIRECTORY" value="true" />
-    <option name="SCRIPT_WORKING_DIRECTORY" value="$PROJECT_DIR$" />
-    <option name="INDEPENDENT_INTERPRETER_PATH" value="true" />
-    <option name="INTERPRETER_PATH" value="/bin/bash" />
-    <option name="INTERPRETER_OPTIONS" value="" />
-    <option name="EXECUTE_IN_TERMINAL" value="true" />
-    <option name="EXECUTE_SCRIPT_FILE" value="false" />
-    <envs />
-    <method v="2" />
-  </configuration>
-</component>
diff --git a/.idea/runConfigurations/Pre_commit_All.xml b/.idea/runConfigurations/Pre_commit_All.xml
deleted file mode 100644
index 8a19ecd..0000000
--- a/.idea/runConfigurations/Pre_commit_All.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<component name="ProjectRunConfigurationManager">
-  <configuration default="false" name="Pre-commit All" type="ShConfigurationType">
-    <option name="SCRIPT_TEXT" value="pre-commit run --all-files" />
-    <option name="INDEPENDENT_SCRIPT_PATH" value="true" />
-    <option name="SCRIPT_PATH" value="" />
-    <option name="SCRIPT_OPTIONS" value="" />
-    <option name="INDEPENDENT_SCRIPT_WORKING_DIRECTORY" value="true" />
-    <option name="SCRIPT_WORKING_DIRECTORY" value="$PROJECT_DIR$" />
-    <option name="INDEPENDENT_INTERPRETER_PATH" value="true" />
-    <option name="INTERPRETER_PATH" value="/bin/bash" />
-    <option name="INTERPRETER_OPTIONS" value="" />
-    <option name="EXECUTE_IN_TERMINAL" value="true" />
-    <option name="EXECUTE_SCRIPT_FILE" value="false" />
-    <envs />
-    <method v="2" />
-  </configuration>
-</component>
diff --git a/.idea/runConfigurations/Tests.xml b/.idea/runConfigurations/Tests.xml
deleted file mode 100644
index b98924c..0000000
--- a/.idea/runConfigurations/Tests.xml
+++ /dev/null
@@ -1,18 +0,0 @@
-<component name="ProjectRunConfigurationManager">
-  <configuration default="false" name="Tests" type="tests" factoryName="py.test">
-    <module name="xarf-python" />
-    <option name="INTERPRETER_OPTIONS" value="" />
-    <option name="PARENT_ENVS" value="true" />
-    <option name="SDK_HOME" value="" />
-    <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
-    <option name="IS_MODULE_SDK" value="true" />
-    <option name="ADD_CONTENT_ROOTS" value="true" />
-    <option name="ADD_SOURCE_ROOTS" value="true" />
-    <option name="_new_keywords" value="&quot;&quot;" />
-    <option name="_new_parameters" value="&quot;--cov=xarf --cov-report=term-missing -v&quot;" />
-    <option name="_new_additionalArguments" value="&quot;&quot;" />
-    <option name="_new_target" value="&quot;$PROJECT_DIR$/tests&quot;" />
-    <option name="_new_targetType" value="&quot;PATH&quot;" />
-    <method v="2" />
-  </configuration>
-</component>
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 5ace414..0000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,6 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
-  </component>
-</project>
diff --git a/.vscode/extensions.json b/.vscode/extensions.json
deleted file mode 100644
index a174158..0000000
--- a/.vscode/extensions.json
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-  "recommendations": [
-    // Python essentials
-    "ms-python.python",
-    "ms-python.vscode-pylance",
-    "ms-python.black-formatter",
-    "ms-python.isort",
-    "ms-python.mypy-type-checker",
-
-    // Testing
-    "littlefoxteam.vscode-python-test-adapter",
-    "ryanluker.vscode-coverage-gutters",
-
-    // Code quality
-    "ms-python.flake8",
-    "usernamehw.errorlens",
-
-    // Git
-    "eamodio.gitlens",
-    "mhutchie.git-graph",
-
-    // YAML/JSON
-    "redhat.vscode-yaml",
-    "ZainChen.json",
-
-    // GitHub
-    "github.vscode-pull-request-github",
-    "github.copilot",
-
-    // Markdown
-    "yzhang.markdown-all-in-one",
-    "DavidAnson.vscode-markdownlint",
-
-    // Utilities
-    "christian-kohler.path-intellisense",
-    "visualstudioexptteam.vscodeintellicode",
-    "gruntfuggly.todo-tree"
-  ]
-}
diff --git a/.vscode/launch.json b/.vscode/launch.json
deleted file mode 100644
index 8c0bb71..0000000
--- a/.vscode/launch.json
+++ /dev/null
@@ -1,51 +0,0 @@
-{
-  "version": "0.2.0",
-  "configurations": [
-    {
-      "name": "Python: Current File",
-      "type": "debugpy",
-      "request": "launch",
-      "program": "${file}",
-      "console": "integratedTerminal",
-      "justMyCode": true
-    },
-    {
-      "name": "Python: Tests",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "pytest",
-      "args": [
-        "-v",
-        "--cov=xarf",
-        "--cov-report=term-missing",
-        "tests/"
-      ],
-      "console": "integratedTerminal",
-      "justMyCode": false
-    },
-    {
-      "name": "Python: Current Test File",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "pytest",
-      "args": [
-        "-v",
-        "${file}"
-      ],
-      "console": "integratedTerminal",
-      "justMyCode": false
-    },
-    {
-      "name": "Python: Specific Test Function",
-      "type": "debugpy",
-      "request": "launch",
-      "module": "pytest",
-      "args": [
-        "-v",
-        "${file}::${selectedText}"
-      ],
-      "console": "integratedTerminal",
-      "justMyCode": false
-    }
-  ]
-}
diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index c74ad99..0000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1,104 +0,0 @@
-{
-  // Python configuration
-  "python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
-  "python.terminal.activateEnvironment": true,
-
-  // Formatting
-  "[python]": {
-    "editor.defaultFormatter": "ms-python.black-formatter",
-    "editor.formatOnSave": true,
-    "editor.codeActionsOnSave": {
-      "source.organizeImports": "explicit"
-    }
-  },
-
-  // Black formatter
-  "black-formatter.args": [
-    "--line-length=88"
-  ],
-
-  // isort
-  "isort.args": [
-    "--profile=black",
-    "--line-length=88"
-  ],
-
-  // Linting
-  "python.linting.enabled": true,
-  "python.linting.flake8Enabled": true,
-  "python.linting.flake8Args": [
-    "--max-line-length=100",
-    "--extend-ignore=E203,W503,C901"
-  ],
-  "python.linting.banditEnabled": true,
-  "python.linting.banditArgs": [
-    "-r",
-    "xarf/",
-    "-ll"
-  ],
-  "python.linting.mypyEnabled": true,
-  "python.linting.mypyArgs": [
-    "--strict",
-    "--python-version=3.8"
-  ],
-  "python.linting.pydocstyleEnabled": true,
-  "python.linting.pydocstyleArgs": [
-    "--convention=google",
-    "--add-ignore=D100,D104,D105,D107"
-  ],
-  "python.linting.lintOnSave": true,
-
-  // Testing
-  "python.testing.pytestEnabled": true,
-  "python.testing.unittestEnabled": false,
-  "python.testing.pytestArgs": [
-    "tests",
-    "-v",
-    "--cov=xarf",
-    "--cov-report=term-missing"
-  ],
-  "python.testing.autoTestDiscoverOnSaveEnabled": true,
-
-  // Editor
-  "editor.rulers": [88, 100],
-  "editor.trimAutoWhitespace": true,
-  "files.trimTrailingWhitespace": true,
-  "files.insertFinalNewline": true,
-  "files.eol": "\n",
-
-  // File associations
-  "files.associations": {
-    "*.yaml": "yaml",
-    "*.yml": "yaml",
-    ".vulture_whitelist.py": "python"
-  },
-
-  // Exclude from file watcher
-  "files.watcherExclude": {
-    "**/.git/objects/**": true,
-    "**/.git/subtree-cache/**": true,
-    "**/node_modules/*/**": true,
-    "**/.venv/**": true,
-    "**/venv/**": true,
-    "**/__pycache__/**": true,
-    "**/.pytest_cache/**": true,
-    "**/.mypy_cache/**": true,
-    "**/dist/**": true,
-    "**/build/**": true,
-    "**/*.egg-info/**": true
-  },
-
-  // Search exclude
-  "search.exclude": {
-    "**/.venv": true,
-    "**/venv": true,
-    "**/__pycache__": true,
-    "**/.pytest_cache": true,
-    "**/.mypy_cache": true,
-    "**/dist": true,
-    "**/build": true,
-    "**/*.egg-info": true,
-    "**/.coverage": true,
-    "**/htmlcov": true
-  }
-}
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
deleted file mode 100644
index 8104335..0000000
--- a/.vscode/tasks.json
+++ /dev/null
@@ -1,141 +0,0 @@
-{
-  "version": "2.0.0",
-  "tasks": [
-    {
-      "label": "Install Dependencies",
-      "type": "shell",
-      "command": "pip install -e '.[dev,test]'",
-      "group": "build",
-      "presentation": {
-        "reveal": "always",
-        "panel": "new"
-      }
-    },
-    {
-      "label": "Run Tests",
-      "type": "shell",
-      "command": "pytest --cov=xarf --cov-report=term-missing -v tests/",
-      "group": {
-        "kind": "test",
-        "isDefault": true
-      },
-      "presentation": {
-        "reveal": "always",
-        "panel": "dedicated"
-      }
-    },
-    {
-      "label": "Run Tests with Coverage",
-      "type": "shell",
-      "command": "pytest --cov=xarf --cov-report=html --cov-report=term-missing -v tests/",
-      "group": "test",
-      "presentation": {
-        "reveal": "always",
-        "panel": "dedicated"
-      }
-    },
-    {
-      "label": "Format Code (Black)",
-      "type": "shell",
-      "command": "black .",
-      "group": "build",
-      "presentation": {
-        "reveal": "silent"
-      }
-    },
-    {
-      "label": "Sort Imports (isort)",
-      "type": "shell",
-      "command": "isort --profile black .",
-      "group": "build",
-      "presentation": {
-        "reveal": "silent"
-      }
-    },
-    {
-      "label": "Lint (flake8)",
-      "type": "shell",
-      "command": "flake8 xarf/ tests/",
-      "group": "build",
-      "problemMatcher": "$python",
-      "presentation": {
-        "reveal": "always"
-      }
-    },
-    {
-      "label": "Type Check (mypy)",
-      "type": "shell",
-      "command": "mypy xarf/",
-      "group": "build",
-      "problemMatcher": "$python",
-      "presentation": {
-        "reveal": "always"
-      }
-    },
-    {
-      "label": "Security Scan (bandit)",
-      "type": "shell",
-      "command": "bandit -r xarf/ -ll",
-      "group": "build",
-      "presentation": {
-        "reveal": "always"
-      }
-    },
-    {
-      "label": "Dead Code Check (vulture)",
-      "type": "shell",
-      "command": "vulture xarf/ .vulture_whitelist.py --min-confidence 80",
-      "group": "build",
-      "presentation": {
-        "reveal": "always"
-      }
-    },
-    {
-      "label": "Code Quality - All Checks",
-      "type": "shell",
-      "command": "black --check . && isort --check-only --profile black . && flake8 xarf/ tests/ && bandit -r xarf/ -ll && mypy xarf/ && vulture xarf/ .vulture_whitelist.py --min-confidence 80",
-      "group": "build",
-      "presentation": {
-        "reveal": "always",
-        "panel": "dedicated"
-      }
-    },
-    {
-      "label": "Pre-commit Run All",
-      "type": "shell",
-      "command": "pre-commit run --all-files",
-      "group": "build",
-      "presentation": {
-        "reveal": "always",
-        "panel": "dedicated"
-      }
-    },
-    {
-      "label": "Install Pre-commit Hooks",
-      "type": "shell",
-      "command": "pre-commit install",
-      "group": "build",
-      "presentation": {
-        "reveal": "always"
-      }
-    },
-    {
-      "label": "Build Package",
-      "type": "shell",
-      "command": "python -m build",
-      "group": "build",
-      "presentation": {
-        "reveal": "always"
-      }
-    },
-    {
-      "label": "Clean Build Artifacts",
-      "type": "shell",
-      "command": "rm -rf build/ dist/ *.egg-info .pytest_cache .mypy_cache .coverage htmlcov/",
-      "group": "build",
-      "presentation": {
-        "reveal": "silent"
-      }
-    }
-  ]
-}
diff --git a/pyproject.toml b/pyproject.toml
index ccc29c7..0907f12 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,6 +56,7 @@ dev = [
     "build",
     "twine",
     "types-jsonschema",
+    "tomli>=2.0; python_version < '3.11'",
 ]
 test = [
     "pytest>=8.0",
@@ -77,7 +78,7 @@ line-length = 88
 target-version = "py310"
 
 [tool.ruff.lint]
-select = ["E", "W", "F", "I", "N", "D", "UP", "B"]
+select = ["E", "W", "F", "I", "N", "D", "UP", "B", "PLC", "PLE"]
 ignore = ["D203", "D213"]
 
 [tool.ruff.lint.pydocstyle]
diff --git a/scripts/fetch_schemas.py b/scripts/fetch_schemas.py
new file mode 100644
index 0000000..ba16c84
--- /dev/null
+++ b/scripts/fetch_schemas.py
@@ -0,0 +1,224 @@
+#!/usr/bin/env python3
+"""Fetch XARF schemas from the official xarf-spec GitHub release.
+
+This script downloads JSON schemas from a specific tagged release of
+https://github.com/xarf/xarf-spec and extracts them into xarf/schemas/.
+
+The target spec version is read from ``[tool.xarf] spec_version`` in
+``pyproject.toml``. Run this script before publishing a new library release
+to update the bundled schemas.
+
+Usage:
+    python scripts/fetch_schemas.py
+    python scripts/fetch_schemas.py --force   # re-fetch even if up to date
+"""
+
+import argparse
+import datetime
+import io
+import json
+import shutil
+import sys
+import tarfile
+import tempfile
+import urllib.request
+from pathlib import Path
+
+GITHUB_REPO = "xarf/xarf-spec"
+REPO_ROOT = Path(__file__).parent.parent
+SCHEMAS_DIR = REPO_ROOT / "xarf" / "schemas"
+PYPROJECT_PATH = REPO_ROOT / "pyproject.toml"
+
+
+def get_configured_version() -> str:
+    """Read the target spec version from pyproject.toml.
+
+    Returns:
+        The spec version string (e.g. ``"v4.2.0"``).
+
+    Raises:
+        SystemExit: If the version key is missing or pyproject.toml is unreadable.
+    """
+    # tomllib is stdlib in 3.11+; tomli is the backport for 3.10
+    try:
+        import tomllib  # noqa: PLC0415
+    except ImportError:
+        try:
+            import tomli as tomllib  # type: ignore[no-redef]  # noqa: PLC0415
+        except ImportError:
+            print(
+                "ERROR: tomllib not available. Use Python 3.11+ or install tomli.",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+
+    try:
+        with PYPROJECT_PATH.open("rb") as f:
+            data = tomllib.load(f)
+    except OSError as exc:
+        print(f"ERROR: Cannot read {PYPROJECT_PATH}: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+    version = data.get("tool", {}).get("xarf", {}).get("spec_version")
+    if not version:
+        print(
+            "ERROR: [tool.xarf] spec_version not found in pyproject.toml.",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+
+    return version
+
+
+def needs_fetch(version: str) -> bool:
+    """Check whether schemas need to be (re-)fetched.
+
+    Args:
+        version: Target spec version string.
+
+    Returns:
+        ``True`` if the local schemas are absent or pinned to a different version.
+    """
+    version_file = SCHEMAS_DIR / ".version"
+    if not version_file.exists():
+        return True
+    try:
+        info = json.loads(version_file.read_text())
+        return info.get("version") != version
+    except (json.JSONDecodeError, OSError):
+        return True
+
+
+def download(url: str) -> bytes:
+    """Download a URL, following redirects, with a 60-second timeout.
+
+    Args:
+        url: The URL to download.
+
+    Returns:
+        The raw response bytes.
+
+    Raises:
+        SystemExit: On HTTP error or timeout.
+    """
+    print(f"[xarf] Downloading {url}...")
+    try:
+        req = urllib.request.Request(
+            url, headers={"User-Agent": "xarf-python/fetch-schemas"}
+        )
+        with urllib.request.urlopen(req, timeout=60) as response:
+            data = response.read()
+        print(f"[xarf] Downloaded {len(data) / 1024:.1f} KB")
+        return data
+    except urllib.error.HTTPError as exc:
+        print(f"ERROR: HTTP {exc.code} fetching {url}", file=sys.stderr)
+        sys.exit(1)
+    except OSError as exc:
+        print(f"ERROR: {exc}", file=sys.stderr)
+        sys.exit(1)
+
+
+def extract_and_copy(tarball: bytes, version: str) -> None:
+    """Extract schemas/v4/ from the tarball into xarf/schemas/.
+
+    Args:
+        tarball: Raw ``.tar.gz`` bytes.
+        version: Version string, used to locate the extracted root directory.
+
+    Raises:
+        SystemExit: If the expected directory structure is not found in the tarball.
+    """
+    print("[xarf] Extracting schemas...")
+    with tempfile.TemporaryDirectory() as tmp:
+        tmp_path = Path(tmp)
+        with tarfile.open(fileobj=io.BytesIO(tarball), mode="r:gz") as tf:
+            tf.extractall(tmp_path)  # noqa: S202  (trusted GitHub tarball)
+
+        # GitHub tarballs extract to xarf-spec-{version-without-v}/
+        version_without_v = version.lstrip("v")
+        candidate = tmp_path / f"xarf-spec-{version_without_v}"
+        if not candidate.is_dir():
+            # Fall back: find the first directory in the temp root
+            dirs = [p for p in tmp_path.iterdir() if p.is_dir()]
+            if not dirs:
+                print("ERROR: No directory found in tarball.", file=sys.stderr)
+                sys.exit(1)
+            candidate = dirs[0]
+
+        source = candidate / "schemas" / "v4"
+        if not source.is_dir():
+            print(
+                f"ERROR: schemas/v4/ not found inside tarball at {source}",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+
+        # Replace xarf/schemas/ with fresh content
+        if SCHEMAS_DIR.exists():
+            shutil.rmtree(SCHEMAS_DIR)
+        SCHEMAS_DIR.mkdir(parents=True)
+        (SCHEMAS_DIR / "types").mkdir()
+
+        for item in source.iterdir():
+            if item.is_file() and item.suffix == ".json":
+                shutil.copy2(item, SCHEMAS_DIR / item.name)
+                print(f"[xarf]   - {item.name}")
+
+        types_src = source / "types"
+        if types_src.is_dir():
+            for item in types_src.iterdir():
+                if item.is_file() and item.suffix == ".json":
+                    shutil.copy2(item, SCHEMAS_DIR / "types" / item.name)
+                    print(f"[xarf]   - types/{item.name}")
+
+
+def write_version_info(version: str) -> None:
+    """Write a .version file recording the fetched spec version.
+
+    Args:
+        version: The spec version string that was fetched.
+    """
+    info = {
+        "version": version,
+        "fetched_at": datetime.datetime.now(datetime.timezone.utc).isoformat(),
+        "source": f"https://github.com/{GITHUB_REPO}/tree/{version}",
+    }
+    (SCHEMAS_DIR / ".version").write_text(json.dumps(info, indent=2) + "\n")
+
+
+def fetch_schemas(force: bool = False) -> None:
+    """Main entry point: fetch and install schemas from GitHub.
+
+    Args:
+        force: If ``True``, fetch even when the local version is already current.
+    """
+    version = get_configured_version()
+    print(f"[xarf] Checking schemas for xarf-spec {version}...")
+
+    if not force and not needs_fetch(version):
+        print(f"[xarf] Schemas already up to date ({version})")
+        return
+
+    tarball_url = f"https://github.com/{GITHUB_REPO}/archive/refs/tags/{version}.tar.gz"
+    tarball = download(tarball_url)
+    extract_and_copy(tarball, version)
+    write_version_info(version)
+    print(f"[xarf] Successfully fetched schemas for xarf-spec {version}")
+
+
+def main() -> None:
+    """Parse CLI arguments and run the fetch."""
+    parser = argparse.ArgumentParser(
+        description="Fetch XARF JSON schemas from the xarf-spec GitHub release."
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Re-fetch even if the local schemas are already at the target version.",
+    )
+    args = parser.parse_args()
+    fetch_schemas(force=args.force)
+
+
+if __name__ == "__main__":
+    main()

From 239df049885f9f1dbe0d323aea8f7a0d6e0b7a31 Mon Sep 17 00:00:00 2001
From: Victor Lopez <victor@thevictorlopez.com>
Date: Tue, 24 Mar 2026 18:02:02 +0100
Subject: [PATCH 03/13] Add types for all report Category and Type combinations

---
 pyproject.toml               |   4 +-
 xarf/__init__.py             | 216 +++++++++++++++-
 xarf/models.py               | 399 +++++++++++++++++++----------
 xarf/types_connection.py     | 234 +++++++++++++++++
 xarf/types_content.py        | 308 ++++++++++++++++++++++
 xarf/types_copyright.py      | 477 +++++++++++++++++++++++++++++++++++
 xarf/types_infrastructure.py |  62 +++++
 xarf/types_messaging.py      | 109 ++++++++
 xarf/types_reputation.py     |  51 ++++
 xarf/types_vulnerability.py  | 115 +++++++++
 10 files changed, 1833 insertions(+), 142 deletions(-)
 create mode 100644 xarf/types_connection.py
 create mode 100644 xarf/types_content.py
 create mode 100644 xarf/types_copyright.py
 create mode 100644 xarf/types_infrastructure.py
 create mode 100644 xarf/types_messaging.py
 create mode 100644 xarf/types_reputation.py
 create mode 100644 xarf/types_vulnerability.py

diff --git a/pyproject.toml b/pyproject.toml
index 0907f12..aea0dfa 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,10 +9,10 @@ description = "Python library for parsing, generating, and validating XARF v4 ab
 readme = "README.md"
 license = {text = "MIT"}
 authors = [
-    {name = "XARF Project", email = "contact@xarf.org"}
+    {name = "XARF Project", email = "admin@abusix.com"}
 ]
 maintainers = [
-    {name = "XARF Project", email = "contact@xarf.org"}
+    {name = "XARF Project", email = "admin@abusix.com"}
 ]
 keywords = ["xarf", "abuse", "security", "parser", "validation"]
 classifiers = [
diff --git a/xarf/__init__.py b/xarf/__init__.py
index eb892b8..8c488bb 100644
--- a/xarf/__init__.py
+++ b/xarf/__init__.py
@@ -1,27 +1,217 @@
-"""XARF v4 Python Parser.
+"""XARF v4 Python library.
 
-A Python library for parsing and validating XARF v4
-(eXtended Abuse Reporting Format) reports.
-Includes backwards compatibility with XARF v3.
+A Python library for parsing, generating, and validating XARF v4
+(eXtended Abuse Reporting Format) reports. Includes backwards
+compatibility with XARF v3.
+
+Example:
+    >>> from xarf import parse, create_report, create_evidence
+    >>> result = parse(json_data)
+    >>> result.report
+    SpamReport(...)
 """
 
-__version__ = "4.0.0a1"
+from xarf.exceptions import (
+    XARFError,
+    XARFParseError,
+    XARFSchemaError,
+    XARFValidationError,
+)
+from xarf.models import (
+    AnyXARFReport,
+    ContactInfo,
+    CreateReportResult,
+    ParseResult,
+    ValidationError,
+    ValidationWarning,
+    XARFEvidence,
+    XARFReport,
+)
+from xarf.types_connection import (
+    ConnectionBaseReport,
+    ConnectionReport,
+    DdosReport,
+    InfectedHostReport,
+    LoginAttackReport,
+    PortScanReport,
+    ReconnaissanceReport,
+    ScrapingReport,
+    SqlInjectionReport,
+    VulnerabilityScanReport,
+)
+from xarf.types_content import (
+    BrandInfringementReport,
+    CompromiseIndicator,
+    ContentBaseReport,
+    ContentReport,
+    CsamReport,
+    CsemReport,
+    ExposedDataReport,
+    FraudReport,
+    MalwareReport,
+    PhishingReport,
+    RegistrantDetails,
+    RemoteCompromiseReport,
+    SuspiciousRegistrationReport,
+    WebshellDetails,
+)
+from xarf.types_copyright import (
+    CopyrightBaseReport,
+    CopyrightCopyrightReport,
+    CopyrightCyberlockerReport,
+    CopyrightLinkSiteReport,
+    CopyrightP2pReport,
+    CopyrightReport,
+    CopyrightUgcPlatformReport,
+    CopyrightUsenetReport,
+    CyberlockerTakedownInfo,
+    CyberlockerUploaderInfo,
+    FileInfo,
+    LinkedContentItem,
+    LinkSiteLinkInfo,
+    LinkSiteRanking,
+    MessageInfo,
+    PeerInfo,
+    SwarmInfo,
+    UgcContentInfo,
+    UgcMatchDetails,
+    UgcMonetizationInfo,
+    UgcUploaderInfo,
+    UsenetEncodingInfo,
+    UsenetNzbInfo,
+    UsenetServerInfo,
+)
+from xarf.types_infrastructure import (
+    BotnetReport,
+    CompromisedServerReport,
+    InfrastructureBaseReport,
+    InfrastructureReport,
+)
+from xarf.types_messaging import (
+    BulkIndicators,
+    BulkMessagingReport,
+    MessagingBaseReport,
+    MessagingReport,
+    SpamIndicators,
+    SpamReport,
+)
+from xarf.types_reputation import (
+    BlocklistReport,
+    ReputationBaseReport,
+    ReputationReport,
+    ThreatIntelligenceReport,
+)
+from xarf.types_vulnerability import (
+    CveReport,
+    ImpactAssessment,
+    MisconfigurationReport,
+    OpenServiceReport,
+    VulnerabilityBaseReport,
+    VulnerabilityReport,
+)
+from xarf.v3_compat import convert_v3_to_v4, is_v3_report
+
+__version__ = "0.1.0.dev0"
 __author__ = "XARF Project"
 __email__ = "contact@xarf.org"
 
-from .exceptions import XARFError, XARFParseError, XARFValidationError
-from .generator import XARFGenerator
-from .models import XARFReport
-from .parser import XARFParser
-from .v3_compat import convert_v3_to_v4, is_v3_report
+# Spec version this library was built against.
+SPEC_VERSION = "4.2.0"
 
 __all__ = [
-    "XARFParser",
+    # Version
+    "SPEC_VERSION",
+    # Result types
+    "AnyXARFReport",
+    "ParseResult",
+    "CreateReportResult",
+    "ValidationError",
+    "ValidationWarning",
+    # Base models
     "XARFReport",
+    "XARFEvidence",
+    "ContactInfo",
+    # Exceptions
     "XARFError",
     "XARFValidationError",
     "XARFParseError",
-    "XARFGenerator",
-    "convert_v3_to_v4",
+    "XARFSchemaError",
+    # v3 compatibility
     "is_v3_report",
+    "convert_v3_to_v4",
+    # Messaging
+    "MessagingBaseReport",
+    "SpamIndicators",
+    "SpamReport",
+    "BulkIndicators",
+    "BulkMessagingReport",
+    "MessagingReport",
+    # Connection
+    "ConnectionBaseReport",
+    "LoginAttackReport",
+    "PortScanReport",
+    "DdosReport",
+    "InfectedHostReport",
+    "ReconnaissanceReport",
+    "ScrapingReport",
+    "SqlInjectionReport",
+    "VulnerabilityScanReport",
+    "ConnectionReport",
+    # Content
+    "ContentBaseReport",
+    "PhishingReport",
+    "MalwareReport",
+    "CsamReport",
+    "CsemReport",
+    "ExposedDataReport",
+    "BrandInfringementReport",
+    "FraudReport",
+    "CompromiseIndicator",
+    "WebshellDetails",
+    "RemoteCompromiseReport",
+    "RegistrantDetails",
+    "SuspiciousRegistrationReport",
+    "ContentReport",
+    # Infrastructure
+    "InfrastructureBaseReport",
+    "BotnetReport",
+    "CompromisedServerReport",
+    "InfrastructureReport",
+    # Copyright
+    "CopyrightBaseReport",
+    "CopyrightCopyrightReport",
+    "SwarmInfo",
+    "PeerInfo",
+    "CopyrightP2pReport",
+    "FileInfo",
+    "CyberlockerTakedownInfo",
+    "CyberlockerUploaderInfo",
+    "CopyrightCyberlockerReport",
+    "UgcContentInfo",
+    "UgcUploaderInfo",
+    "UgcMatchDetails",
+    "UgcMonetizationInfo",
+    "CopyrightUgcPlatformReport",
+    "LinkSiteLinkInfo",
+    "LinkedContentItem",
+    "LinkSiteRanking",
+    "CopyrightLinkSiteReport",
+    "MessageInfo",
+    "UsenetEncodingInfo",
+    "UsenetNzbInfo",
+    "UsenetServerInfo",
+    "CopyrightUsenetReport",
+    "CopyrightReport",
+    # Vulnerability
+    "VulnerabilityBaseReport",
+    "ImpactAssessment",
+    "CveReport",
+    "OpenServiceReport",
+    "MisconfigurationReport",
+    "VulnerabilityReport",
+    # Reputation
+    "ReputationBaseReport",
+    "BlocklistReport",
+    "ThreatIntelligenceReport",
+    "ReputationReport",
 ]
diff --git a/xarf/models.py b/xarf/models.py
index 826f9d8..d356436 100644
--- a/xarf/models.py
+++ b/xarf/models.py
@@ -1,151 +1,296 @@
-"""XARF Data Models."""
+"""XARF v4 base models, result types, and report union.
 
-from datetime import datetime
-from typing import Any, Dict, List, Optional
+This module defines the foundational Pydantic models (ContactInfo, XARFEvidence,
+XARFReport), result dataclasses (ParseResult, CreateReportResult), and the
+AnyXARFReport discriminated union used throughout the library.
+"""
 
-from pydantic import BaseModel, ConfigDict, Field, field_validator
+from __future__ import annotations
 
+from dataclasses import dataclass
+from typing import Annotated
 
-class XARFReporter(BaseModel):
-    """XARF Reporter information."""
+from pydantic import BaseModel, ConfigDict, Discriminator, Field, Tag
+
+# ---------------------------------------------------------------------------
+# Result dataclasses
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class ValidationError:
+    """A single validation error found during parsing or report creation.
+
+    Attributes:
+        field: The field path where the error occurred (e.g. ``"reporter.org"``).
+        message: Human-readable description of the error.
+        value: The offending value, if available.
+    """
+
+    field: str
+    message: str
+    value: object = None
+
+
+@dataclass
+class ValidationWarning:
+    """A non-fatal warning produced during validation.
+
+    Attributes:
+        field: The field path where the warning applies.
+        message: Human-readable description of the warning.
+    """
+
+    field: str
+    message: str
+
+
+@dataclass
+class ParseResult:
+    """Result returned by :func:`xarf.parse`.
+
+    Attributes:
+        report: The parsed report, or ``None`` if parsing failed entirely.
+        errors: List of validation errors encountered.
+        warnings: List of non-fatal warnings.
+        info: Optional metadata dict (populated when ``show_missing_optional=True``).
+    """
+
+    report: AnyXARFReport | None
+    errors: list[ValidationError]
+    warnings: list[ValidationWarning]
+    info: dict[str, object] | None = None
+
+
+@dataclass
+class CreateReportResult:
+    """Result returned by :func:`xarf.create_report`.
+
+    Attributes:
+        report: The created report, or ``None`` if creation failed.
+        errors: List of validation errors encountered.
+        warnings: List of non-fatal warnings.
+        info: Optional metadata dict.
+    """
+
+    report: AnyXARFReport | None
+    errors: list[ValidationError]
+    warnings: list[ValidationWarning]
+    info: dict[str, object] | None = None
+
+
+# ---------------------------------------------------------------------------
+# Base Pydantic models
+# ---------------------------------------------------------------------------
+
+
+class ContactInfo(BaseModel):
+    """Contact information for a reporter or sender.
+
+    Attributes:
+        org: Name of the organization.
+        contact: Contact email address or identifier.
+        domain: Domain associated with the organization.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
 
     org: str
     contact: str
-    type: str = Field(..., pattern="^(automated|manual|hybrid)$")
+    domain: str
 
 
 class XARFEvidence(BaseModel):
-    """XARF Evidence item."""
+    """A single evidence item attached to an XARF report.
+
+    Attributes:
+        content_type: MIME type of the evidence payload (e.g. ``"message/rfc822"``).
+        payload: Base64-encoded or raw evidence data.
+        description: Human-readable description of this evidence item.
+        hash: Hex digest of the payload (algorithm indicated by ``hash_algorithm``).
+        size: Size of the payload in bytes.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
 
     content_type: str
-    description: str
     payload: str
+    description: str | None = None
+    hash: str | None = None
+    size: int | None = None
 
 
 class XARFReport(BaseModel):
-    """Base XARF v4 Report model."""
+    """Base XARF v4 report structure shared by all report types.
 
-    # Required base fields
-    xarf_version: str = Field(..., pattern="^4\\.0\\.0$")
+    Fields marked *Recommended* in the XARF spec (``x-recommended: true``) are
+    modelled as plain optional fields here. Strict-mode validation in
+    :mod:`xarf.schema_validator` promotes them to required at validation time.
+
+    Attributes:
+        xarf_version: XARF specification version (e.g. ``"4.2.0"``).
+        report_id: Unique identifier for this report (UUID recommended).
+        timestamp: ISO 8601 datetime string of when the incident was observed.
+        reporter: Contact information for the reporting party.
+        sender: Contact information for the sending/originating party.
+        source_identifier: IP address, domain, or other identifier of the source.
+        category: One of the 7 XARF abuse categories.
+        type: Report type within the category (e.g. ``"spam"``, ``"ddos"``).
+        evidence_source: How the evidence was collected (recommended).
+        source_port: Source TCP/UDP port (recommended).
+        description: Free-text description of the incident.
+        legacy_version: Set to ``"3"`` only for reports converted from XARF v3.
+        evidence: List of attached evidence items.
+        tags: Arbitrary string tags for categorization.
+        confidence: Confidence score for the report (0-100).
+        internal: Internal metadata; serialized as ``_internal`` in JSON.
+    """
+
+    model_config = ConfigDict(
+        extra="allow",
+        populate_by_name=True,
+    )
+
+    # Required fields
+    xarf_version: str
     report_id: str
-    timestamp: datetime
-    reporter: XARFReporter
-    on_behalf_of: Optional[XARFReporter] = None
+    timestamp: str
+    reporter: ContactInfo
+    sender: ContactInfo
     source_identifier: str
-    category: str = Field(..., alias="category")
+    category: str
     type: str
-    evidence_source: str
 
-    # Optional base fields
-    evidence: Optional[List[XARFEvidence]] = []
-    tags: Optional[List[str]] = []
-    _internal: Optional[Dict[str, Any]] = None
+    # Recommended fields (optional in schema; promoted to required under strict mode)
+    evidence_source: str | None = None
+    source_port: int | None = None
 
-    # Category-specific fields (will be populated based on category)
-    additional_fields: Optional[Dict[str, Any]] = {}
+    # Optional fields
+    description: str | None = None
+    legacy_version: str | None = None
+    evidence: list[XARFEvidence] | None = None
+    tags: list[str] | None = None
+    confidence: int | None = None
+    internal: dict[str, object] | None = Field(default=None, alias="_internal")
 
-    model_config = ConfigDict(
-        populate_by_name=True,
-        extra="allow",  # Allow additional fields for category-specific data
-    )
 
-    @field_validator("category")
-    @classmethod
-    def validate_category(cls, v: str) -> str:
-        """Validate XARF category field."""
-        valid_categories = {
-            "messaging",
-            "connection",
-            "content",
-            "infrastructure",
-            "copyright",
-            "vulnerability",
-            "reputation",
-            "other",
-        }
-        if v not in valid_categories:
-            raise ValueError(
-                f"Invalid category '{v}'. Must be one of: {valid_categories}"
-            )
-        return v
-
-    @field_validator("evidence_source")
-    @classmethod
-    def validate_evidence_source(cls, v: str) -> str:
-        """Validate evidence source field."""
-        valid_sources = {
-            "spamtrap",
-            "honeypot",
-            "user_report",
-            "automated_scan",
-            "manual_analysis",
-            "vulnerability_scan",
-            "researcher_analysis",
-            "threat_intelligence",
-        }
-        if v not in valid_sources:
-            raise ValueError(
-                f"Invalid evidence_source '{v}'. Must be one of: {valid_sources}"
-            )
-        return v
-
-
-class MessagingReport(XARFReport):
-    """XARF Messaging category report."""
-
-    # Required for messaging
-    protocol: Optional[str] = None
-
-    # Email-specific fields
-    smtp_from: Optional[str] = None
-    smtp_to: Optional[str] = None
-    subject: Optional[str] = None
-    message_id: Optional[str] = None
-
-    # Common messaging fields
-    sender_display_name: Optional[str] = None
-    target_victim: Optional[str] = None
-    message_content: Optional[str] = None
-
-
-class ConnectionReport(XARFReport):
-    """XARF Connection category report."""
-
-    # Required for connection
-    destination_ip: str
-    protocol: str
-
-    # Optional connection fields
-    destination_port: Optional[int] = None
-    source_port: Optional[int] = None
-    attack_type: Optional[str] = None
-    duration_minutes: Optional[int] = None
-    packet_count: Optional[int] = None
-    byte_count: Optional[int] = None
-
-    # Login attack specific
-    attempt_count: Optional[int] = None
-    successful_logins: Optional[int] = None
-    usernames_attempted: Optional[List[str]] = []
-    attack_pattern: Optional[str] = None
-
-
-class ContentReport(XARFReport):
-    """XARF Content category report."""
-
-    # Required for content
-    url: str
-
-    # Optional content fields
-    content_type: Optional[str] = None
-    attack_type: Optional[str] = None
-    affected_pages: Optional[List[str]] = []
-    cms_platform: Optional[str] = None
-    vulnerability_exploited: Optional[str] = None
-
-    # Web hack specific
-    affected_parameters: Optional[List[str]] = []
-    payload_detected: Optional[str] = None
-    data_exposed: Optional[List[str]] = []
-    database_type: Optional[str] = None
-    records_potentially_affected: Optional[int] = None
+# ---------------------------------------------------------------------------
+# AnyXARFReport discriminated union
+# ---------------------------------------------------------------------------
+# Concrete type imports live at the bottom to avoid circular imports.
+# models.py defines XARFReport; category files import XARFReport from models;
+# models.py then imports the concrete types after XARFReport is fully defined.
+
+
+def _report_discriminator(v: dict[str, object] | XARFReport) -> str:
+    """Derive a composite discriminator key ``"<category>/<type>"`` from a report.
+
+    Args:
+        v: A raw dict or an already-constructed :class:`XARFReport` subclass.
+
+    Returns:
+        A string of the form ``"<category>/<type>"`` used to select the concrete
+        model class during Pydantic discriminated-union validation.
+    """
+    if isinstance(v, dict):
+        return f"{v.get('category')}/{v.get('type')}"
+    return f"{v.category}/{v.type}"
+
+
+from xarf.types_connection import (  # noqa: E402
+    DdosReport,
+    InfectedHostReport,
+    LoginAttackReport,
+    PortScanReport,
+    ReconnaissanceReport,
+    ScrapingReport,
+    SqlInjectionReport,
+    VulnerabilityScanReport,
+)
+from xarf.types_content import (  # noqa: E402
+    BrandInfringementReport,
+    CsamReport,
+    CsemReport,
+    ExposedDataReport,
+    FraudReport,
+    MalwareReport,
+    PhishingReport,
+    RemoteCompromiseReport,
+    SuspiciousRegistrationReport,
+)
+from xarf.types_copyright import (  # noqa: E402
+    CopyrightCopyrightReport,
+    CopyrightCyberlockerReport,
+    CopyrightLinkSiteReport,
+    CopyrightP2pReport,
+    CopyrightUgcPlatformReport,
+    CopyrightUsenetReport,
+)
+from xarf.types_infrastructure import (  # noqa: E402
+    BotnetReport,
+    CompromisedServerReport,
+)
+from xarf.types_messaging import BulkMessagingReport, SpamReport  # noqa: E402
+from xarf.types_reputation import (  # noqa: E402
+    BlocklistReport,
+    ThreatIntelligenceReport,
+)
+from xarf.types_vulnerability import (  # noqa: E402
+    CveReport,
+    MisconfigurationReport,
+    OpenServiceReport,
+)
+
+AnyXARFReport = Annotated[
+    # messaging
+    Annotated[SpamReport, Tag("messaging/spam")]
+    | Annotated[BulkMessagingReport, Tag("messaging/bulk_messaging")]
+    # connection
+    | Annotated[LoginAttackReport, Tag("connection/login_attack")]
+    | Annotated[PortScanReport, Tag("connection/port_scan")]
+    | Annotated[DdosReport, Tag("connection/ddos")]
+    | Annotated[InfectedHostReport, Tag("connection/infected_host")]
+    | Annotated[ReconnaissanceReport, Tag("connection/reconnaissance")]
+    | Annotated[ScrapingReport, Tag("connection/scraping")]
+    | Annotated[SqlInjectionReport, Tag("connection/sql_injection")]
+    | Annotated[VulnerabilityScanReport, Tag("connection/vulnerability_scan")]
+    # content
+    | Annotated[PhishingReport, Tag("content/phishing")]
+    | Annotated[MalwareReport, Tag("content/malware")]
+    | Annotated[CsamReport, Tag("content/csam")]
+    | Annotated[CsemReport, Tag("content/csem")]
+    | Annotated[ExposedDataReport, Tag("content/exposed_data")]
+    | Annotated[BrandInfringementReport, Tag("content/brand_infringement")]
+    | Annotated[FraudReport, Tag("content/fraud")]
+    | Annotated[RemoteCompromiseReport, Tag("content/remote_compromise")]
+    | Annotated[SuspiciousRegistrationReport, Tag("content/suspicious_registration")]
+    # copyright
+    | Annotated[CopyrightCopyrightReport, Tag("copyright/copyright")]
+    | Annotated[CopyrightP2pReport, Tag("copyright/p2p")]
+    | Annotated[CopyrightCyberlockerReport, Tag("copyright/cyberlocker")]
+    | Annotated[CopyrightUgcPlatformReport, Tag("copyright/ugc_platform")]
+    | Annotated[CopyrightLinkSiteReport, Tag("copyright/link_site")]
+    | Annotated[CopyrightUsenetReport, Tag("copyright/usenet")]
+    # infrastructure
+    | Annotated[BotnetReport, Tag("infrastructure/botnet")]
+    | Annotated[CompromisedServerReport, Tag("infrastructure/compromised_server")]
+    # vulnerability
+    | Annotated[CveReport, Tag("vulnerability/cve")]
+    | Annotated[OpenServiceReport, Tag("vulnerability/open_service")]
+    | Annotated[MisconfigurationReport, Tag("vulnerability/misconfiguration")]
+    # reputation
+    | Annotated[BlocklistReport, Tag("reputation/blocklist")]
+    | Annotated[ThreatIntelligenceReport, Tag("reputation/threat_intelligence")],
+    Discriminator(_report_discriminator),
+]
+"""Union of all 32 concrete XARF report types with a composite discriminator.
+
+Pydantic resolves the correct subclass at runtime using the composite
+``"<category>/<type>"`` key produced by :func:`_report_discriminator`.
+
+Example:
+    >>> from pydantic import TypeAdapter
+    >>> from xarf.models import AnyXARFReport
+    >>> adapter = TypeAdapter(AnyXARFReport)
+    >>> report = adapter.validate_python({"category": "messaging", "type": "spam", ...})
+"""
diff --git a/xarf/types_connection.py b/xarf/types_connection.py
new file mode 100644
index 0000000..6388c8c
--- /dev/null
+++ b/xarf/types_connection.py
@@ -0,0 +1,234 @@
+"""XARF v4 Connection category type definitions.
+
+Mirrors ``types-connection.ts`` from the JavaScript reference implementation.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import ConfigDict
+
+from xarf.models import XARFReport
+
+
+class ConnectionBaseReport(XARFReport):
+    """Shared fields for all connection-category reports.
+
+    Attributes:
+        category: Always ``"connection"`` for this category.
+        first_seen: ISO 8601 timestamp of when the activity was first observed.
+        protocol: Network protocol (e.g. ``"tcp"``, ``"udp"``, ``"icmp"``).
+        destination_ip: Destination IP address targeted by the source.
+        destination_port: Destination port number.
+        last_seen: ISO 8601 timestamp of when the activity was last observed.
+    """
+
+    model_config = ConfigDict(extra="allow", populate_by_name=True)
+
+    category: Literal["connection"]
+    first_seen: str
+    protocol: str
+    destination_ip: str | None = None
+    destination_port: int | None = None
+    last_seen: str | None = None
+
+
+class LoginAttackReport(ConnectionBaseReport):
+    """Connection - Login Attack report.
+
+    Attributes:
+        type: Always ``"login_attack"``.
+    """
+
+    type: Literal["login_attack"]
+
+
+class PortScanReport(ConnectionBaseReport):
+    """Connection - Port Scan report.
+
+    Attributes:
+        type: Always ``"port_scan"``.
+    """
+
+    type: Literal["port_scan"]
+
+
+class DdosReport(ConnectionBaseReport):
+    """Connection - DDoS (Distributed Denial of Service) report.
+
+    Attributes:
+        type: Always ``"ddos"``.
+        amplification_factor: Amplification factor used in the attack.
+        attack_vector: Attack vector description (e.g. ``"udp_flood"``, ``"ntp"``).
+        duration_seconds: Duration of the attack in seconds.
+        mitigation_applied: Whether active mitigation was applied.
+        peak_bps: Peak attack bandwidth in bits per second.
+        peak_pps: Peak attack rate in packets per second.
+        service_impact: Description of the impact on services.
+        threshold_exceeded: Description of which thresholds were exceeded.
+    """
+
+    type: Literal["ddos"]
+    amplification_factor: float | None = None
+    attack_vector: str | None = None
+    duration_seconds: int | None = None
+    mitigation_applied: bool | None = None
+    peak_bps: int | None = None
+    peak_pps: int | None = None
+    service_impact: str | None = None
+    threshold_exceeded: str | None = None
+
+
+class InfectedHostReport(ConnectionBaseReport):
+    """Connection - Infected Host report.
+
+    Attributes:
+        type: Always ``"infected_host"``.
+        bot_type: Type of bot or malicious agent (required).
+        accepts_cookies: Whether the bot accepts cookies.
+        api_endpoints_accessed: API endpoints accessed by the bot.
+        behavior_pattern: Description of observed behaviour patterns.
+        bot_name: Known name of the bot or malware family.
+        follows_crawl_delay: Whether the bot respects crawl-delay directives.
+        javascript_execution: Whether the bot executes JavaScript.
+        request_rate: Observed request rate in requests per second.
+        respects_robots_txt: Whether the bot respects ``robots.txt``.
+        total_requests: Total number of requests observed.
+        user_agent: User-Agent string used by the bot.
+        verification_status: Status of bot verification checks.
+    """
+
+    type: Literal["infected_host"]
+    bot_type: str
+    accepts_cookies: bool | None = None
+    api_endpoints_accessed: list[str] | None = None
+    behavior_pattern: str | None = None
+    bot_name: str | None = None
+    follows_crawl_delay: bool | None = None
+    javascript_execution: bool | None = None
+    request_rate: float | None = None
+    respects_robots_txt: bool | None = None
+    total_requests: int | None = None
+    user_agent: str | None = None
+    verification_status: str | None = None
+
+
+class ReconnaissanceReport(ConnectionBaseReport):
+    """Connection - Reconnaissance report.
+
+    Attributes:
+        type: Always ``"reconnaissance"``.
+        probed_resources: List of resources probed by the source (required).
+        automated_tool: Whether an automated tool was detected.
+        http_methods: HTTP methods observed in the reconnaissance activity.
+        resource_categories: Categories of resources targeted.
+        response_codes: HTTP response codes returned to the source.
+        successful_probes: Resources that responded successfully.
+        total_probes: Total number of probe attempts.
+        user_agent: User-Agent string used during reconnaissance.
+    """
+
+    type: Literal["reconnaissance"]
+    probed_resources: list[str]
+    automated_tool: bool | None = None
+    http_methods: list[str] | None = None
+    resource_categories: list[str] | None = None
+    response_codes: list[int] | None = None
+    successful_probes: list[str] | None = None
+    total_probes: int | None = None
+    user_agent: str | None = None
+
+
+class ScrapingReport(ConnectionBaseReport):
+    """Connection - Scraping report.
+
+    Attributes:
+        type: Always ``"scraping"``.
+        total_requests: Total number of requests made by the scraper (required).
+        bot_signature: Signature or fingerprint of the scraping tool.
+        concurrent_connections: Number of concurrent connections observed.
+        data_volume: Total volume of data scraped in bytes.
+        request_rate: Request rate in requests per second.
+        respects_robots_txt: Whether the scraper respects ``robots.txt``.
+        scraping_pattern: Description of the scraping pattern observed.
+        session_duration: Duration of the scraping session in seconds.
+        target_content: Type of content being scraped.
+        unique_urls: Number of unique URLs accessed.
+        user_agent: User-Agent string used by the scraper.
+    """
+
+    type: Literal["scraping"]
+    total_requests: int
+    bot_signature: str | None = None
+    concurrent_connections: int | None = None
+    data_volume: int | None = None
+    request_rate: float | None = None
+    respects_robots_txt: bool | None = None
+    scraping_pattern: str | None = None
+    session_duration: int | None = None
+    target_content: str | None = None
+    unique_urls: int | None = None
+    user_agent: str | None = None
+
+
+class SqlInjectionReport(ConnectionBaseReport):
+    """Connection - SQL Injection report.
+
+    Attributes:
+        type: Always ``"sql_injection"``.
+        attack_technique: SQL injection technique used (e.g. ``"blind"``, ``"union"``).
+        attempts_count: Number of injection attempts observed.
+        http_method: HTTP method used (e.g. ``"GET"``, ``"POST"``).
+        injection_point: Where injection was attempted (e.g. ``"query_param"``).
+        payload_sample: Sample of the injection payload observed.
+        target_url: URL targeted by the SQL injection attempt.
+    """
+
+    type: Literal["sql_injection"]
+    attack_technique: str | None = None
+    attempts_count: int | None = None
+    http_method: str | None = None
+    injection_point: str | None = None
+    payload_sample: str | None = None
+    target_url: str | None = None
+
+
+class VulnerabilityScanReport(ConnectionBaseReport):
+    """Connection - Vulnerability Scan report.
+
+    Attributes:
+        type: Always ``"vulnerability_scan"``.
+        scan_type: Type of vulnerability scan (e.g. ``"port_scan"``) (required).
+        scan_rate: Scan rate in probes per second.
+        scanner_signature: Identified scanner tool or signature.
+        targeted_ports: List of ports targeted by the scan.
+        targeted_services: List of services or service names targeted.
+        total_requests: Total number of scan probe requests.
+        user_agent: User-Agent string used by the scanner.
+        vulnerabilities_probed: CVE IDs or vulnerability names probed.
+    """
+
+    type: Literal["vulnerability_scan"]
+    scan_type: str
+    scan_rate: float | None = None
+    scanner_signature: str | None = None
+    targeted_ports: list[int] | None = None
+    targeted_services: list[str] | None = None
+    total_requests: int | None = None
+    user_agent: str | None = None
+    vulnerabilities_probed: list[str] | None = None
+
+
+# Category-level union alias (for isinstance checks and type annotations).
+ConnectionReport = (
+    LoginAttackReport
+    | PortScanReport
+    | DdosReport
+    | InfectedHostReport
+    | ReconnaissanceReport
+    | ScrapingReport
+    | SqlInjectionReport
+    | VulnerabilityScanReport
+)
+"""Union of all connection-category report types."""
diff --git a/xarf/types_content.py b/xarf/types_content.py
new file mode 100644
index 0000000..a96e186
--- /dev/null
+++ b/xarf/types_content.py
@@ -0,0 +1,308 @@
+"""XARF v4 Content category type definitions.
+
+Mirrors ``types-content.ts`` from the JavaScript reference implementation.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import BaseModel, ConfigDict
+
+from xarf.models import XARFReport
+
+
+class ContentBaseReport(XARFReport):
+    """Shared fields for all content-category reports.
+
+    Mirrors ``content-base.json`` in the spec.
+
+    Attributes:
+        category: Always ``"content"`` for this category.
+        url: URL where the abusive content is hosted (required).
+        domain: Domain associated with the abusive content.
+        target_brand: Brand being targeted or impersonated.
+        verified_at: ISO 8601 timestamp when the content was verified.
+        verification_method: Method used to verify the content (e.g. ``"manual"``).
+    """
+
+    model_config = ConfigDict(extra="allow", populate_by_name=True)
+
+    category: Literal["content"]
+    url: str
+    domain: str | None = None
+    target_brand: str | None = None
+    verified_at: str | None = None
+    verification_method: str | None = None
+
+
+class PhishingReport(ContentBaseReport):
+    """Content - Phishing report.
+
+    Attributes:
+        type: Always ``"phishing"``.
+        cloned_site: URL of the legitimate site being cloned.
+        credential_fields: Form field names harvesting credentials.
+        lure_type: Social-engineering lure used (e.g. ``"banking"``,
+            ``"tech_support"``).
+        submission_url: URL where harvested credentials are submitted.
+    """
+
+    type: Literal["phishing"]
+    cloned_site: str | None = None
+    credential_fields: list[str] | None = None
+    lure_type: str | None = None
+    submission_url: str | None = None
+
+
+class MalwareReport(ContentBaseReport):
+    """Content - Malware report.
+
+    Attributes:
+        type: Always ``"malware"``.
+        distribution_method: How the malware is distributed (e.g. ``"drive_by"``).
+        file_hashes: Map of hash algorithm to hex digest (e.g. ``{"sha256": "ab..."}``.
+        malware_family: Known malware family name.
+        malware_type: Malware classification (e.g. ``"trojan"``, ``"ransomware"``).
+    """
+
+    type: Literal["malware"]
+    distribution_method: str | None = None
+    file_hashes: dict[str, str] | None = None
+    malware_family: str | None = None
+    malware_type: str | None = None
+
+
+class CsamReport(ContentBaseReport):
+    """Content - CSAM (Child Sexual Abuse Material) report.
+
+    Attributes:
+        type: Always ``"csam"``.
+        classification: CSAM classification level (required).
+        detection_method: Method used to detect the content (required).
+        content_removed: Whether the content has been removed.
+        hash_values: Map of hash algorithm to hex digest for matching.
+        media_type: Media type of the content (e.g. ``"image"``, ``"video"``).
+        ncmec_report_id: NCMEC CyberTipline report ID, if filed.
+    """
+
+    type: Literal["csam"]
+    classification: str
+    detection_method: str
+    content_removed: bool | None = None
+    hash_values: dict[str, str] | None = None
+    media_type: str | None = None
+    ncmec_report_id: str | None = None
+
+
+class CsemReport(ContentBaseReport):
+    """Content - CSEM (Child Sexual Exploitation Material) report.
+
+    Attributes:
+        type: Always ``"csem"``.
+        detection_method: Method used to detect the content (required).
+        exploitation_type: Type of exploitation depicted (required).
+        evidence_type: Types of evidence collected.
+        platform: Platform where the content was found.
+        reporting_obligations: Legal reporting obligations triggered.
+        victim_age_range: Estimated age range of the victim(s).
+    """
+
+    type: Literal["csem"]
+    detection_method: str
+    exploitation_type: str
+    evidence_type: list[str] | None = None
+    platform: str | None = None
+    reporting_obligations: list[str] | None = None
+    victim_age_range: str | None = None
+
+
+class ExposedDataReport(ContentBaseReport):
+    """Content - Exposed Data report.
+
+    Attributes:
+        type: Always ``"exposed_data"``.
+        data_types: Categories of data exposed (required,
+            e.g. ``["pii", "credentials"]``).
+        exposure_method: How the data was exposed (required,
+            e.g. ``"misconfigured_bucket"``).
+        affected_organization: Organization whose data was exposed.
+        encryption_status: Encryption status of the exposed data.
+        record_count: Approximate number of records exposed.
+        sensitive_fields: Specific sensitive field names exposed.
+    """
+
+    type: Literal["exposed_data"]
+    data_types: list[str]
+    exposure_method: str
+    affected_organization: str | None = None
+    encryption_status: str | None = None
+    record_count: int | None = None
+    sensitive_fields: list[str] | None = None
+
+
+class BrandInfringementReport(ContentBaseReport):
+    """Content - Brand Infringement report.
+
+    Attributes:
+        type: Always ``"brand_infringement"``.
+        infringement_type: Type of infringement (required, e.g. ``"trademark"``).
+        legitimate_site: URL of the legitimate brand site (required).
+        infringing_elements: Specific elements that infringe the brand.
+        similarity_score: Similarity score between infringing and legitimate site (0–1).
+    """
+
+    type: Literal["brand_infringement"]
+    infringement_type: str
+    legitimate_site: str
+    infringing_elements: list[str] | None = None
+    similarity_score: float | None = None
+
+
+class FraudReport(ContentBaseReport):
+    """Content - Fraud report.
+
+    Attributes:
+        type: Always ``"fraud"``.
+        fraud_type: Type of fraud (required, e.g. ``"investment_scam"``).
+        claimed_entity: Entity fraudulently claimed or impersonated.
+        payment_methods: Payment methods promoted or used by the fraud.
+    """
+
+    type: Literal["fraud"]
+    fraud_type: str
+    claimed_entity: str | None = None
+    payment_methods: list[str] | None = None
+
+
+class CompromiseIndicator(BaseModel):
+    """A single indicator of compromise (IOC).
+
+    Attributes:
+        type: IOC type (e.g. ``"file_path"``, ``"process"``).
+        value: The indicator value.
+        description: Human-readable description of this IOC.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    type: Literal[
+        "file_path",
+        "process",
+        "network_connection",
+        "user_account",
+        "scheduled_task",
+        "registry_key",
+        "service",
+    ]
+    value: str
+    description: str | None = None
+
+
+class WebshellDetails(BaseModel):
+    """Details about a webshell found on a compromised server.
+
+    Attributes:
+        family: Known webshell family name.
+        capabilities: Capabilities provided by the webshell.
+        password_protected: Whether the webshell is password-protected.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    family: str | None = None
+    capabilities: (
+        list[
+            Literal[
+                "file_manager",
+                "command_execution",
+                "database_access",
+                "network_scanning",
+                "privilege_escalation",
+                "persistence",
+                "other",
+            ]
+        ]
+        | None
+    ) = None
+    password_protected: bool | None = None
+
+
+class RemoteCompromiseReport(ContentBaseReport):
+    """Content - Remote Compromise report.
+
+    Attributes:
+        type: Always ``"remote_compromise"``.
+        compromise_type: How the system was compromised (required, e.g. ``"webshell"``).
+        affected_cms: CMS platform affected (e.g. ``"wordpress"``).
+        compromise_indicators: List of indicators of compromise found.
+        malicious_activities: Malicious activities observed on the host.
+        persistence_mechanisms: Persistence mechanisms installed by the attacker.
+        webshell_details: Details about a webshell, if present.
+    """
+
+    type: Literal["remote_compromise"]
+    compromise_type: str
+    affected_cms: str | None = None
+    compromise_indicators: list[CompromiseIndicator] | None = None
+    malicious_activities: list[str] | None = None
+    persistence_mechanisms: list[str] | None = None
+    webshell_details: WebshellDetails | None = None
+
+
+class RegistrantDetails(BaseModel):
+    """Details about the domain registrant.
+
+    Attributes:
+        email_domain: Domain of the registrant email address.
+        country: Country of the registrant.
+        privacy_protected: Whether WHOIS privacy protection is enabled.
+        bulk_registrations: Number of bulk domain registrations by this registrant.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    email_domain: str | None = None
+    country: str | None = None
+    privacy_protected: bool | None = None
+    bulk_registrations: int | None = None
+
+
+class SuspiciousRegistrationReport(ContentBaseReport):
+    """Content - Suspicious Registration report.
+
+    Attributes:
+        type: Always ``"suspicious_registration"``.
+        registration_date: ISO 8601 date when the domain was registered (required).
+        suspicious_indicators: Reasons the registration is considered
+            suspicious (required).
+        days_since_registration: Number of days since the domain was registered.
+        predicted_usage: Predicted abuse types for the domain.
+        registrant_details: Details about the registrant.
+        risk_score: Risk score for the registration (0–100).
+        targeted_brands: Brands the domain appears to target.
+    """
+
+    type: Literal["suspicious_registration"]
+    registration_date: str
+    suspicious_indicators: list[str]
+    days_since_registration: int | None = None
+    predicted_usage: list[str] | None = None
+    registrant_details: RegistrantDetails | None = None
+    risk_score: float | None = None
+    targeted_brands: list[str] | None = None
+
+
+# Category-level union alias (for isinstance checks and type annotations).
+ContentReport = (
+    PhishingReport
+    | MalwareReport
+    | CsamReport
+    | CsemReport
+    | ExposedDataReport
+    | BrandInfringementReport
+    | FraudReport
+    | RemoteCompromiseReport
+    | SuspiciousRegistrationReport
+)
+"""Union of all content-category report types."""
diff --git a/xarf/types_copyright.py b/xarf/types_copyright.py
new file mode 100644
index 0000000..291419f
--- /dev/null
+++ b/xarf/types_copyright.py
@@ -0,0 +1,477 @@
+"""XARF v4 Copyright category type definitions.
+
+Mirrors ``types-copyright.ts`` from the JavaScript reference implementation.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import BaseModel, ConfigDict
+
+from xarf.models import XARFReport
+
+
+class CopyrightBaseReport(XARFReport):
+    """Shared fields for all copyright-category reports.
+
+    Attributes:
+        category: Always ``"copyright"`` for this category.
+        rights_holder: Name of the rights holder filing the report.
+        work_category: Category of the copyrighted work (e.g. ``"music"``, ``"film"``).
+        work_title: Title of the copyrighted work.
+    """
+
+    model_config = ConfigDict(extra="allow", populate_by_name=True)
+
+    category: Literal["copyright"]
+    rights_holder: str | None = None
+    work_category: str | None = None
+    work_title: str | None = None
+
+
+class CopyrightCopyrightReport(CopyrightBaseReport):
+    """Copyright - Direct Infringement / DMCA report.
+
+    Attributes:
+        type: Always ``"copyright"``.
+        infringing_url: URL where infringing content is hosted (required).
+        infringement_type: Type of infringement (e.g. ``"reproduction"``,
+            ``"distribution"``).
+        original_url: URL of the original legitimate work.
+    """
+
+    type: Literal["copyright"]
+    infringing_url: str
+    infringement_type: str | None = None
+    original_url: str | None = None
+
+
+class SwarmInfo(BaseModel):
+    """BitTorrent swarm information.
+
+    Note: either ``info_hash`` or ``magnet_uri`` is required at runtime (enforced
+    by AJV/jsonschema validation, not by Pydantic).
+
+    Attributes:
+        info_hash: Hex-encoded info hash of the torrent.
+        magnet_uri: Magnet URI for the torrent.
+        torrent_name: Display name of the torrent.
+        file_count: Number of files in the torrent.
+        total_size: Total size of the torrent in bytes.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    info_hash: str | None = None
+    magnet_uri: str | None = None
+    torrent_name: str | None = None
+    file_count: int | None = None
+    total_size: int | None = None
+
+
+class PeerInfo(BaseModel):
+    """BitTorrent peer information.
+
+    Attributes:
+        peer_id: Peer ID observed in the swarm.
+        client_version: BitTorrent client version string.
+        upload_amount: Amount of data uploaded by this peer in bytes.
+        download_amount: Amount of data downloaded by this peer in bytes.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    peer_id: str | None = None
+    client_version: str | None = None
+    upload_amount: int | None = None
+    download_amount: int | None = None
+
+
+class CopyrightP2pReport(CopyrightBaseReport):
+    """Copyright - P2P (BitTorrent / peer-to-peer) report.
+
+    Attributes:
+        type: Always ``"p2p"``.
+        p2p_protocol: P2P protocol used (required, e.g. ``"bittorrent"``).
+        swarm_info: Information about the torrent swarm (required).
+        detection_method: How the infringement was detected.
+        peer_info: Information about the infringing peer.
+        release_date: ISO 8601 release date of the work.
+    """
+
+    type: Literal["p2p"]
+    p2p_protocol: str
+    swarm_info: SwarmInfo
+    detection_method: str | None = None
+    peer_info: PeerInfo | None = None
+    release_date: str | None = None
+
+
+class FileInfo(BaseModel):
+    """Cyberlocker file metadata.
+
+    Attributes:
+        filename: Original filename of the infringing file.
+        file_size: File size in bytes.
+        file_hash: Hash of the file (algorithm implied by context).
+        upload_date: ISO 8601 date the file was uploaded.
+        download_count: Number of times the file has been downloaded.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    filename: str | None = None
+    file_size: int | None = None
+    file_hash: str | None = None
+    upload_date: str | None = None
+    download_count: int | None = None
+
+
+class CyberlockerTakedownInfo(BaseModel):
+    """Information about previous takedown requests for cyberlocker content.
+
+    Attributes:
+        previous_requests: Number of prior takedown requests submitted.
+        service_response_time: Typical response time of the service.
+        automated_removal: Whether the service offers automated removal.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    previous_requests: int | None = None
+    service_response_time: str | None = None
+    automated_removal: bool | None = None
+
+
+class CyberlockerUploaderInfo(BaseModel):
+    """Information about the uploader on a cyberlocker service.
+
+    Attributes:
+        username: Username of the uploader.
+        user_id: Platform-specific user identifier.
+        account_type: Account tier of the uploader.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    username: str | None = None
+    user_id: str | None = None
+    account_type: Literal["free", "premium", "business", "unknown"] | None = None
+
+
+class CopyrightCyberlockerReport(CopyrightBaseReport):
+    """Copyright - Cyberlocker report.
+
+    Attributes:
+        type: Always ``"cyberlocker"``.
+        hosting_service: Name of the cyberlocker service (required).
+        infringing_url: Direct URL to the infringing file (required).
+        access_method: How the file is accessed (e.g. ``"direct_link"``).
+        file_info: Metadata about the infringing file.
+        takedown_info: Information about previous takedown requests.
+        uploader_info: Information about the uploader.
+    """
+
+    type: Literal["cyberlocker"]
+    hosting_service: str
+    infringing_url: str
+    access_method: str | None = None
+    file_info: FileInfo | None = None
+    takedown_info: CyberlockerTakedownInfo | None = None
+    uploader_info: CyberlockerUploaderInfo | None = None
+
+
+class UgcContentInfo(BaseModel):
+    """Content information for a UGC platform upload.
+
+    Attributes:
+        content_id: Platform-specific content identifier.
+        content_title: Title of the uploaded content.
+        content_description: Description of the uploaded content.
+        upload_date: ISO 8601 date the content was uploaded.
+        content_duration: Duration of the content in seconds.
+        view_count: Number of views.
+        like_count: Number of likes.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    content_id: str | None = None
+    content_title: str | None = None
+    content_description: str | None = None
+    upload_date: str | None = None
+    content_duration: int | None = None
+    view_count: int | None = None
+    like_count: int | None = None
+
+
+class UgcUploaderInfo(BaseModel):
+    """Uploader information for a UGC platform.
+
+    Attributes:
+        username: Username of the uploader.
+        user_id: Platform-specific user identifier.
+        account_verified: Whether the account is verified.
+        subscriber_count: Number of subscribers/followers.
+        account_creation_date: ISO 8601 date the account was created.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    username: str | None = None
+    user_id: str | None = None
+    account_verified: bool | None = None
+    subscriber_count: int | None = None
+    account_creation_date: str | None = None
+
+
+class UgcMatchDetails(BaseModel):
+    """Content match details from a reference fingerprinting system.
+
+    Attributes:
+        match_confidence: Confidence of the content match (0–1).
+        match_duration: Duration of the matched segment in seconds.
+        match_percentage: Percentage of the work matched (0–100).
+        reference_id: Reference system identifier for the matched work.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    match_confidence: float | None = None
+    match_duration: float | None = None
+    match_percentage: float | None = None
+    reference_id: str | None = None
+
+
+class UgcMonetizationInfo(BaseModel):
+    """Monetization information for UGC platform content.
+
+    Attributes:
+        monetized: Whether the content is monetized.
+        ad_revenue: Whether the content generates ad revenue.
+        premium_content: Whether the content is behind a paywall.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    monetized: bool | None = None
+    ad_revenue: bool | None = None
+    premium_content: bool | None = None
+
+
+class CopyrightUgcPlatformReport(CopyrightBaseReport):
+    """Copyright - UGC Platform report.
+
+    Attributes:
+        type: Always ``"ugc_platform"``.
+        infringing_url: URL of the infringing content (required).
+        platform_name: Name of the UGC platform (required).
+        content_info: Metadata about the infringing content.
+        infringement_type: Type of infringement (e.g. ``"full_copy"``).
+        match_details: Content match details from a fingerprinting system.
+        monetization_info: Monetization information.
+        uploader_info: Information about the uploader.
+    """
+
+    type: Literal["ugc_platform"]
+    infringing_url: str
+    platform_name: str
+    content_info: UgcContentInfo | None = None
+    infringement_type: str | None = None
+    match_details: UgcMatchDetails | None = None
+    monetization_info: UgcMonetizationInfo | None = None
+    uploader_info: UgcUploaderInfo | None = None
+
+
+class LinkSiteLinkInfo(BaseModel):
+    """Link metadata from a link site listing.
+
+    Attributes:
+        page_title: Title of the link site page.
+        posting_date: ISO 8601 date the link was posted.
+        uploader: Username of who posted the link.
+        download_count: Reported download count for the link.
+        link_count: Number of links on the page.
+        comments_count: Number of comments on the page.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    page_title: str | None = None
+    posting_date: str | None = None
+    uploader: str | None = None
+    download_count: int | None = None
+    link_count: int | None = None
+    comments_count: int | None = None
+
+
+class LinkedContentItem(BaseModel):
+    """A single linked content item on a link site.
+
+    Attributes:
+        target_url: URL the link points to (required).
+        link_type: Type of link (required).
+        hosting_service: Name of the hosting service at ``target_url``.
+        file_size: File size in bytes, if known.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    target_url: str
+    link_type: Literal[
+        "torrent_file",
+        "magnet_link",
+        "direct_download",
+        "streaming_link",
+        "usenet_nzb",
+        "other",
+    ]
+    hosting_service: str | None = None
+    file_size: int | None = None
+
+
+class LinkSiteRanking(BaseModel):
+    """Ranking information for a link site.
+
+    Attributes:
+        alexa_rank: Alexa traffic rank of the site.
+        popularity_score: Relative popularity score.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    alexa_rank: int | None = None
+    popularity_score: float | None = None
+
+
+class CopyrightLinkSiteReport(CopyrightBaseReport):
+    """Copyright - Link Site report.
+
+    Attributes:
+        type: Always ``"link_site"``.
+        infringing_url: URL of the link site page listing infringing links (required).
+        site_name: Name of the link site (required).
+        link_info: Metadata about the link listing.
+        linked_content: Individual links to infringing content.
+        search_terms: Search terms used to find the listing.
+        site_category: Category of the link site (e.g. ``"warez"``, ``"general"``).
+        site_ranking: Traffic ranking information for the site.
+    """
+
+    type: Literal["link_site"]
+    infringing_url: str
+    site_name: str
+    link_info: LinkSiteLinkInfo | None = None
+    linked_content: list[LinkedContentItem] | None = None
+    search_terms: list[str] | None = None
+    site_category: str | None = None
+    site_ranking: LinkSiteRanking | None = None
+
+
+class MessageInfo(BaseModel):
+    """Usenet message metadata.
+
+    Attributes:
+        message_id: Message-ID header of the Usenet article (required).
+        subject: Subject of the Usenet article.
+        from_header: From header of the Usenet article.
+        posting_date: ISO 8601 date the article was posted.
+        part_number: Part number for multi-part posts.
+        total_parts: Total number of parts in a multi-part post.
+        file_size: File size in bytes.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    message_id: str
+    subject: str | None = None
+    from_header: str | None = None
+    posting_date: str | None = None
+    part_number: int | None = None
+    total_parts: int | None = None
+    file_size: int | None = None
+
+
+class UsenetEncodingInfo(BaseModel):
+    """Encoding information for Usenet content.
+
+    Attributes:
+        encoding_format: Encoding format used (e.g. ``"yenc"``).
+        par2_recovery: Whether PAR2 recovery files are present.
+        rar_compression: Whether RAR compression was used.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    encoding_format: Literal["yenc", "uuencode", "base64", "other"] | None = None
+    par2_recovery: bool | None = None
+    rar_compression: bool | None = None
+
+
+class UsenetNzbInfo(BaseModel):
+    """NZB file metadata for Usenet content.
+
+    Attributes:
+        nzb_name: Name of the NZB file.
+        nzb_url: URL where the NZB file can be found.
+        indexer_site: Usenet indexer site that published the NZB.
+        completion_percentage: Download completion percentage (0–100).
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    nzb_name: str | None = None
+    nzb_url: str | None = None
+    indexer_site: str | None = None
+    completion_percentage: float | None = None
+
+
+class UsenetServerInfo(BaseModel):
+    """Usenet server information.
+
+    Attributes:
+        nntp_server: Hostname of the NNTP server.
+        server_group: Newsgroup name on the server.
+        retention_days: Number of days articles are retained.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    nntp_server: str | None = None
+    server_group: str | None = None
+    retention_days: int | None = None
+
+
+class CopyrightUsenetReport(CopyrightBaseReport):
+    """Copyright - Usenet report.
+
+    Attributes:
+        type: Always ``"usenet"``.
+        newsgroup: Usenet newsgroup where the content was posted (required).
+        message_info: Usenet article metadata (required).
+        detection_method: How the infringement was detected.
+        encoding_info: Encoding information for the content.
+        nzb_info: NZB file metadata.
+        server_info: Usenet server information.
+    """
+
+    type: Literal["usenet"]
+    newsgroup: str
+    message_info: MessageInfo
+    detection_method: str | None = None
+    encoding_info: UsenetEncodingInfo | None = None
+    nzb_info: UsenetNzbInfo | None = None
+    server_info: UsenetServerInfo | None = None
+
+
+# Category-level union alias (for isinstance checks and type annotations).
+CopyrightReport = (
+    CopyrightCopyrightReport
+    | CopyrightP2pReport
+    | CopyrightCyberlockerReport
+    | CopyrightUgcPlatformReport
+    | CopyrightLinkSiteReport
+    | CopyrightUsenetReport
+)
+"""Union of all copyright-category report types."""
diff --git a/xarf/types_infrastructure.py b/xarf/types_infrastructure.py
new file mode 100644
index 0000000..4f637be
--- /dev/null
+++ b/xarf/types_infrastructure.py
@@ -0,0 +1,62 @@
+"""XARF v4 Infrastructure category type definitions.
+
+Mirrors ``types-infrastructure.ts`` from the JavaScript reference implementation.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import ConfigDict
+
+from xarf.models import XARFReport
+
+
+class InfrastructureBaseReport(XARFReport):
+    """Shared fields for all infrastructure-category reports.
+
+    Attributes:
+        category: Always ``"infrastructure"`` for this category.
+    """
+
+    model_config = ConfigDict(extra="allow", populate_by_name=True)
+
+    category: Literal["infrastructure"]
+
+
+class BotnetReport(InfrastructureBaseReport):
+    """Infrastructure - Botnet report.
+
+    Attributes:
+        type: Always ``"botnet"``.
+        compromise_evidence: Evidence that the host is part of a botnet (required).
+        bot_capabilities: Capabilities of the bot (e.g. ``["ddos", "spam"]``).
+        c2_protocol: Command-and-control protocol used (e.g. ``"irc"``, ``"http"``).
+        c2_server: Hostname or IP of the C2 server.
+        malware_family: Malware family associated with the botnet.
+    """
+
+    type: Literal["botnet"]
+    compromise_evidence: str
+    bot_capabilities: list[str] | None = None
+    c2_protocol: str | None = None
+    c2_server: str | None = None
+    malware_family: str | None = None
+
+
+class CompromisedServerReport(InfrastructureBaseReport):
+    """Infrastructure - Compromised Server report.
+
+    Attributes:
+        type: Always ``"compromised_server"``.
+        compromise_method: How the server was compromised (required,
+            e.g. ``"brute_force"``).
+    """
+
+    type: Literal["compromised_server"]
+    compromise_method: str
+
+
+# Category-level union alias (for isinstance checks and type annotations).
+InfrastructureReport = BotnetReport | CompromisedServerReport
+"""Union of all infrastructure-category report types."""
diff --git a/xarf/types_messaging.py b/xarf/types_messaging.py
new file mode 100644
index 0000000..4a2ec76
--- /dev/null
+++ b/xarf/types_messaging.py
@@ -0,0 +1,109 @@
+"""XARF v4 Messaging category type definitions.
+
+Mirrors ``types-messaging.ts`` from the JavaScript reference implementation.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import BaseModel, ConfigDict
+
+from xarf.models import XARFReport
+
+
+class MessagingBaseReport(XARFReport):
+    """Shared fields for all messaging-category reports.
+
+    Attributes:
+        category: Always ``"messaging"`` for this category.
+        protocol: Messaging protocol used (e.g. ``"smtp"``, ``"imap"``).
+        sender_name: Display name of the sending party.
+        smtp_from: SMTP envelope sender address (MAIL FROM).
+        subject: Subject line of the message.
+    """
+
+    model_config = ConfigDict(extra="allow", populate_by_name=True)
+
+    category: Literal["messaging"]
+    protocol: str
+    sender_name: str | None = None
+    smtp_from: str | None = None
+    subject: str | None = None
+
+
+class SpamIndicators(BaseModel):
+    """Spam analysis indicators found in the message.
+
+    Attributes:
+        suspicious_links: List of suspicious URLs found in the message.
+        commercial_content: Whether the message contains commercial content.
+        bulk_characteristics: Whether the message exhibits bulk-sending patterns.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    suspicious_links: list[str] | None = None
+    commercial_content: bool | None = None
+    bulk_characteristics: bool | None = None
+
+
+class SpamReport(MessagingBaseReport):
+    """Messaging - Spam report.
+
+    Attributes:
+        type: Always ``"spam"``.
+        language: Detected language of the message (e.g. ``"en"``).
+        message_id: Message-ID header value.
+        recipient_count: Number of recipients the message was sent to.
+        smtp_to: SMTP envelope recipient address (RCPT TO).
+        spam_indicators: Structured spam analysis indicators.
+        user_agent: User-Agent or X-Mailer header value.
+    """
+
+    type: Literal["spam"]
+    language: str | None = None
+    message_id: str | None = None
+    recipient_count: int | None = None
+    smtp_to: str | None = None
+    spam_indicators: SpamIndicators | None = None
+    user_agent: str | None = None
+
+
+class BulkIndicators(BaseModel):
+    """Bulk messaging indicators found in the message.
+
+    Attributes:
+        high_volume: Whether the message was sent in high volume.
+        template_based: Whether the message is template-generated.
+        commercial_sender: Whether the sender is a commercial entity.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    high_volume: bool | None = None
+    template_based: bool | None = None
+    commercial_sender: bool | None = None
+
+
+class BulkMessagingReport(MessagingBaseReport):
+    """Messaging - Bulk Messaging report.
+
+    Attributes:
+        type: Always ``"bulk_messaging"``.
+        recipient_count: Number of recipients (required for bulk reports).
+        bulk_indicators: Structured bulk-sending indicators.
+        opt_in_evidence: Whether evidence of recipient opt-in exists.
+        unsubscribe_provided: Whether an unsubscribe mechanism was provided.
+    """
+
+    type: Literal["bulk_messaging"]
+    recipient_count: int
+    bulk_indicators: BulkIndicators | None = None
+    opt_in_evidence: bool | None = None
+    unsubscribe_provided: bool | None = None
+
+
+# Category-level union alias (for isinstance checks and type annotations).
+MessagingReport = SpamReport | BulkMessagingReport
+"""Union of all messaging-category report types."""
diff --git a/xarf/types_reputation.py b/xarf/types_reputation.py
new file mode 100644
index 0000000..8a61f4d
--- /dev/null
+++ b/xarf/types_reputation.py
@@ -0,0 +1,51 @@
+"""XARF v4 Reputation category type definitions.
+
+Mirrors ``types-reputation.ts`` from the JavaScript reference implementation.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import ConfigDict
+
+from xarf.models import XARFReport
+
+
+class ReputationBaseReport(XARFReport):
+    """Shared fields for all reputation-category reports.
+
+    Attributes:
+        category: Always ``"reputation"`` for this category.
+        threat_type: Type of threat associated with this reputation entry (required).
+    """
+
+    model_config = ConfigDict(extra="allow", populate_by_name=True)
+
+    category: Literal["reputation"]
+    threat_type: str
+
+
+class BlocklistReport(ReputationBaseReport):
+    """Reputation - Blocklist report.
+
+    Attributes:
+        type: Always ``"blocklist"``.
+    """
+
+    type: Literal["blocklist"]
+
+
+class ThreatIntelligenceReport(ReputationBaseReport):
+    """Reputation - Threat Intelligence report.
+
+    Attributes:
+        type: Always ``"threat_intelligence"``.
+    """
+
+    type: Literal["threat_intelligence"]
+
+
+# Category-level union alias (for isinstance checks and type annotations).
+ReputationReport = BlocklistReport | ThreatIntelligenceReport
+"""Union of all reputation-category report types."""
diff --git a/xarf/types_vulnerability.py b/xarf/types_vulnerability.py
new file mode 100644
index 0000000..eb65de2
--- /dev/null
+++ b/xarf/types_vulnerability.py
@@ -0,0 +1,115 @@
+"""XARF v4 Vulnerability category type definitions.
+
+Mirrors ``types-vulnerability.ts`` from the JavaScript reference implementation.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import BaseModel, ConfigDict
+
+from xarf.models import XARFReport
+
+
+class VulnerabilityBaseReport(XARFReport):
+    """Shared fields for all vulnerability-category reports.
+
+    Attributes:
+        category: Always ``"vulnerability"`` for this category.
+        service: Service or software containing the vulnerability (required).
+    """
+
+    model_config = ConfigDict(extra="allow", populate_by_name=True)
+
+    category: Literal["vulnerability"]
+    service: str
+
+
+# Internal type alias for impact levels.
+_ImpactLevel = Literal["none", "low", "high"]
+
+
+class ImpactAssessment(BaseModel):
+    """CVE impact assessment across the CIA triad.
+
+    Attributes:
+        confidentiality: Impact on confidentiality.
+        integrity: Impact on integrity.
+        availability: Impact on availability.
+    """
+
+    model_config = ConfigDict(populate_by_name=True)
+
+    confidentiality: _ImpactLevel | None = None
+    integrity: _ImpactLevel | None = None
+    availability: _ImpactLevel | None = None
+
+
+class CveReport(VulnerabilityBaseReport):
+    """Vulnerability - CVE (Common Vulnerabilities and Exposures) report.
+
+    Attributes:
+        type: Always ``"cve"``.
+        cve_id: Primary CVE identifier (required, e.g. ``"CVE-2024-12345"``).
+        service_port: Port on which the vulnerable service is listening (required).
+        cvss_score: CVSS base score (0.0–10.0).
+        cvss_vector: CVSS vector string.
+        cvss_version: CVSS version (e.g. ``"3.1"``).
+        cve_ids: Additional CVE IDs associated with this report.
+        disclosure_date: ISO 8601 date the CVE was publicly disclosed.
+        exploitability: Exploitability assessment (e.g. ``"actively_exploited"``).
+        impact_assessment: CIA triad impact assessment.
+        patch_available: Whether a patch is available.
+        patch_url: URL where the patch can be obtained.
+        patch_version: Version of the software that includes the patch.
+        remediation_priority: Recommended remediation priority.
+        risk_level: Overall risk level (e.g. ``"critical"``, ``"high"``).
+        service_version: Version of the vulnerable service.
+        severity: Severity label (e.g. ``"critical"``).
+        vendor_advisory: URL to the vendor security advisory.
+    """
+
+    type: Literal["cve"]
+    cve_id: str
+    service_port: int
+    cvss_score: float | None = None
+    cvss_vector: str | None = None
+    cvss_version: str | None = None
+    cve_ids: list[str] | None = None
+    disclosure_date: str | None = None
+    exploitability: str | None = None
+    impact_assessment: ImpactAssessment | None = None
+    patch_available: bool | None = None
+    patch_url: str | None = None
+    patch_version: str | None = None
+    remediation_priority: str | None = None
+    risk_level: str | None = None
+    service_version: str | None = None
+    severity: str | None = None
+    vendor_advisory: str | None = None
+
+
+class OpenServiceReport(VulnerabilityBaseReport):
+    """Vulnerability - Open Service report.
+
+    Attributes:
+        type: Always ``"open_service"``.
+    """
+
+    type: Literal["open_service"]
+
+
+class MisconfigurationReport(VulnerabilityBaseReport):
+    """Vulnerability - Misconfiguration report.
+
+    Attributes:
+        type: Always ``"misconfiguration"``.
+    """
+
+    type: Literal["misconfiguration"]
+
+
+# Category-level union alias (for isinstance checks and type annotations).
+VulnerabilityReport = CveReport | OpenServiceReport | MisconfigurationReport
+"""Union of all vulnerability-category report types."""

From b9f7a237044909658902ca6a7fc6e1b777222ce1 Mon Sep 17 00:00:00 2001
From: Victor Lopez <victor@thevictorlopez.com>
Date: Tue, 24 Mar 2026 18:02:40 +0100
Subject: [PATCH 04/13] Add tests for type and models construction

---
 tests/test_models.py | 939 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 939 insertions(+)
 create mode 100644 tests/test_models.py

diff --git a/tests/test_models.py b/tests/test_models.py
new file mode 100644
index 0000000..15373e1
--- /dev/null
+++ b/tests/test_models.py
@@ -0,0 +1,939 @@
+"""Tests for Phase 1: Models & Type System."""
+
+from __future__ import annotations
+
+import pytest
+from pydantic import TypeAdapter, ValidationError as PydanticValidationError
+
+from xarf.models import (
+    AnyXARFReport,
+    ContactInfo,
+    CreateReportResult,
+    ParseResult,
+    ValidationError,
+    ValidationWarning,
+    XARFEvidence,
+    XARFReport,
+    _report_discriminator,
+)
+from xarf.types_connection import (
+    ConnectionBaseReport,
+    DdosReport,
+    InfectedHostReport,
+    LoginAttackReport,
+    PortScanReport,
+    ReconnaissanceReport,
+    ScrapingReport,
+    SqlInjectionReport,
+    VulnerabilityScanReport,
+)
+from xarf.types_content import (
+    BrandInfringementReport,
+    CompromiseIndicator,
+    ContentBaseReport,
+    CsamReport,
+    CsemReport,
+    ExposedDataReport,
+    FraudReport,
+    MalwareReport,
+    PhishingReport,
+    RegistrantDetails,
+    RemoteCompromiseReport,
+    SuspiciousRegistrationReport,
+    WebshellDetails,
+)
+from xarf.types_copyright import (
+    CopyrightBaseReport,
+    CopyrightCopyrightReport,
+    CopyrightCyberlockerReport,
+    CopyrightLinkSiteReport,
+    CopyrightP2pReport,
+    CopyrightUgcPlatformReport,
+    CopyrightUsenetReport,
+    MessageInfo,
+    SwarmInfo,
+)
+from xarf.types_infrastructure import BotnetReport, CompromisedServerReport
+from xarf.types_messaging import (
+    BulkIndicators,
+    BulkMessagingReport,
+    MessagingBaseReport,
+    SpamIndicators,
+    SpamReport,
+)
+from xarf.types_reputation import BlocklistReport, ThreatIntelligenceReport
+from xarf.types_vulnerability import (
+    CveReport,
+    ImpactAssessment,
+    MisconfigurationReport,
+    OpenServiceReport,
+    VulnerabilityBaseReport,
+)
+
+# ---------------------------------------------------------------------------
+# Shared fixtures
+# ---------------------------------------------------------------------------
+
+REPORTER = {"org": "Example Corp", "contact": "abuse@example.com", "domain": "example.com"}
+SENDER = {"org": "Bad Actor LLC", "contact": "noreply@bad.example", "domain": "bad.example"}
+
+BASE_FIELDS: dict[str, object] = {
+    "xarf_version": "4.2.0",
+    "report_id": "550e8400-e29b-41d4-a716-446655440000",
+    "timestamp": "2026-01-01T12:00:00Z",
+    "reporter": REPORTER,
+    "sender": SENDER,
+    "source_identifier": "192.0.2.1",
+}
+
+
+# ---------------------------------------------------------------------------
+# Result dataclass tests
+# ---------------------------------------------------------------------------
+
+
+class TestValidationError:
+    """Tests for the ValidationError dataclass."""
+
+    def test_required_fields(self) -> None:
+        """ValidationError requires field and message."""
+        err = ValidationError(field="reporter.org", message="Missing required field")
+        assert err.field == "reporter.org"
+        assert err.message == "Missing required field"
+        assert err.value is None
+
+    def test_optional_value(self) -> None:
+        """ValidationError accepts an optional value."""
+        err = ValidationError(field="confidence", message="Out of range", value=150)
+        assert err.value == 150
+
+
+class TestValidationWarning:
+    """Tests for the ValidationWarning dataclass."""
+
+    def test_required_fields(self) -> None:
+        """ValidationWarning requires field and message."""
+        warn = ValidationWarning(field="evidence_source", message="Recommended field missing")
+        assert warn.field == "evidence_source"
+        assert warn.message == "Recommended field missing"
+
+
+class TestParseResult:
+    """Tests for the ParseResult dataclass."""
+
+    def test_with_report(self) -> None:
+        """ParseResult holds a report and empty error/warning lists."""
+        report = SpamReport(
+            **BASE_FIELDS,
+            category="messaging",
+            type="spam",
+            protocol="smtp",
+        )
+        result = ParseResult(report=report, errors=[], warnings=[])
+        assert result.report is report
+        assert result.errors == []
+        assert result.warnings == []
+        assert result.info is None
+
+    def test_without_report(self) -> None:
+        """ParseResult can hold None for report on failure."""
+        result = ParseResult(
+            report=None,
+            errors=[ValidationError(field="category", message="Missing")],
+            warnings=[],
+        )
+        assert result.report is None
+        assert len(result.errors) == 1
+
+    def test_with_info(self) -> None:
+        """ParseResult accepts optional info dict."""
+        result = ParseResult(
+            report=None,
+            errors=[],
+            warnings=[],
+            info={"missing_optional": ["evidence_source"]},
+        )
+        assert result.info is not None
+        assert "missing_optional" in result.info
+
+
+class TestCreateReportResult:
+    """Tests for the CreateReportResult dataclass."""
+
+    def test_structure(self) -> None:
+        """CreateReportResult has the same structure as ParseResult."""
+        result = CreateReportResult(report=None, errors=[], warnings=[])
+        assert result.report is None
+        assert result.info is None
+
+
+# ---------------------------------------------------------------------------
+# Base model tests
+# ---------------------------------------------------------------------------
+
+
+class TestContactInfo:
+    """Tests for the ContactInfo model."""
+
+    def test_valid(self) -> None:
+        """ContactInfo accepts valid org/contact/domain."""
+        ci = ContactInfo(org="ACME", contact="admin@acme.com", domain="acme.com")
+        assert ci.org == "ACME"
+        assert ci.contact == "admin@acme.com"
+        assert ci.domain == "acme.com"
+
+    def test_missing_field(self) -> None:
+        """ContactInfo raises on missing required field."""
+        with pytest.raises(PydanticValidationError):
+            ContactInfo(org="ACME", contact="admin@acme.com")  # type: ignore[call-arg]
+
+
+class TestXARFEvidence:
+    """Tests for the XARFEvidence model."""
+
+    def test_required_fields(self) -> None:
+        """XARFEvidence requires content_type and payload."""
+        ev = XARFEvidence(content_type="message/rfc822", payload="base64data==")
+        assert ev.content_type == "message/rfc822"
+        assert ev.payload == "base64data=="
+        assert ev.description is None
+        assert ev.hash is None
+        assert ev.size is None
+
+    def test_all_fields(self) -> None:
+        """XARFEvidence accepts all optional fields."""
+        ev = XARFEvidence(
+            content_type="application/octet-stream",
+            payload="abc123",
+            description="Malware sample",
+            hash="deadbeef",
+            size=1024,
+        )
+        assert ev.hash == "deadbeef"
+        assert ev.size == 1024
+
+
+class TestXARFReport:
+    """Tests for the base XARFReport model."""
+
+    def test_required_fields(self) -> None:
+        """XARFReport accepts all required base fields."""
+        report = XARFReport(
+            **BASE_FIELDS,
+            category="messaging",
+            type="spam",
+        )
+        assert report.xarf_version == "4.2.0"
+        assert report.category == "messaging"
+        assert report.type == "spam"
+
+    def test_recommended_fields_default_none(self) -> None:
+        """Recommended fields default to None."""
+        report = XARFReport(**BASE_FIELDS, category="messaging", type="spam")
+        assert report.evidence_source is None
+        assert report.source_port is None
+
+    def test_optional_fields_default_none(self) -> None:
+        """Optional fields default to None."""
+        report = XARFReport(**BASE_FIELDS, category="messaging", type="spam")
+        assert report.description is None
+        assert report.legacy_version is None
+        assert report.evidence is None
+        assert report.tags is None
+        assert report.confidence is None
+        assert report.internal is None
+
+    def test_internal_field_alias(self) -> None:
+        """The _internal field is aliased as 'internal' in Python."""
+        report = XARFReport(
+            **BASE_FIELDS,
+            category="connection",
+            type="ddos",
+            **{"_internal": {"ticket": "INC-001"}},
+        )
+        assert report.internal == {"ticket": "INC-001"}
+
+    def test_extra_fields_allowed(self) -> None:
+        """Extra fields pass through via extra='allow'."""
+        report = XARFReport(
+            **BASE_FIELDS,
+            category="messaging",
+            type="spam",
+            custom_field="custom_value",
+        )
+        assert report.model_extra is not None
+        assert report.model_extra.get("custom_field") == "custom_value"
+
+    def test_evidence_list(self) -> None:
+        """XARFReport accepts a list of XARFEvidence items."""
+        report = XARFReport(
+            **BASE_FIELDS,
+            category="messaging",
+            type="spam",
+            evidence=[{"content_type": "text/plain", "payload": "hello"}],
+        )
+        assert report.evidence is not None
+        assert len(report.evidence) == 1
+        assert isinstance(report.evidence[0], XARFEvidence)
+
+
+# ---------------------------------------------------------------------------
+# Messaging type tests
+# ---------------------------------------------------------------------------
+
+
+class TestSpamReport:
+    """Tests for SpamReport."""
+
+    def test_valid_minimal(self) -> None:
+        """SpamReport requires category, type, and protocol."""
+        report = SpamReport(
+            **BASE_FIELDS,
+            category="messaging",
+            type="spam",
+            protocol="smtp",
+        )
+        assert report.category == "messaging"
+        assert report.type == "spam"
+        assert report.protocol == "smtp"
+
+    def test_optional_fields(self) -> None:
+        """SpamReport optional fields default to None."""
+        report = SpamReport(**BASE_FIELDS, category="messaging", type="spam", protocol="smtp")
+        assert report.language is None
+        assert report.message_id is None
+        assert report.recipient_count is None
+        assert report.smtp_to is None
+        assert report.spam_indicators is None
+        assert report.user_agent is None
+
+    def test_spam_indicators_nested(self) -> None:
+        """SpamReport accepts nested SpamIndicators."""
+        report = SpamReport(
+            **BASE_FIELDS,
+            category="messaging",
+            type="spam",
+            protocol="smtp",
+            spam_indicators={"suspicious_links": ["http://evil.example/"], "commercial_content": True},
+        )
+        assert report.spam_indicators is not None
+        assert isinstance(report.spam_indicators, SpamIndicators)
+        assert report.spam_indicators.commercial_content is True
+
+    def test_wrong_type_literal_rejected(self) -> None:
+        """SpamReport rejects type != 'spam'."""
+        with pytest.raises(PydanticValidationError):
+            SpamReport(
+                **BASE_FIELDS,
+                category="messaging",
+                type="bulk_messaging",
+                protocol="smtp",
+            )
+
+
+class TestBulkMessagingReport:
+    """Tests for BulkMessagingReport."""
+
+    def test_valid(self) -> None:
+        """BulkMessagingReport requires recipient_count."""
+        report = BulkMessagingReport(
+            **BASE_FIELDS,
+            category="messaging",
+            type="bulk_messaging",
+            protocol="smtp",
+            recipient_count=5000,
+        )
+        assert report.recipient_count == 5000
+
+    def test_bulk_indicators_nested(self) -> None:
+        """BulkMessagingReport accepts nested BulkIndicators."""
+        report = BulkMessagingReport(
+            **BASE_FIELDS,
+            category="messaging",
+            type="bulk_messaging",
+            protocol="smtp",
+            recipient_count=100,
+            bulk_indicators={"high_volume": True, "template_based": True},
+        )
+        assert report.bulk_indicators is not None
+        assert isinstance(report.bulk_indicators, BulkIndicators)
+
+    def test_missing_recipient_count(self) -> None:
+        """BulkMessagingReport requires recipient_count."""
+        with pytest.raises(PydanticValidationError):
+            BulkMessagingReport(
+                **BASE_FIELDS,
+                category="messaging",
+                type="bulk_messaging",
+                protocol="smtp",
+            )
+
+
+# ---------------------------------------------------------------------------
+# Connection type tests
+# ---------------------------------------------------------------------------
+
+CONNECTION_BASE: dict[str, object] = {
+    **BASE_FIELDS,
+    "category": "connection",
+    "first_seen": "2026-01-01T00:00:00Z",
+    "protocol": "tcp",
+}
+
+
+class TestConnectionReports:
+    """Tests for connection-category report types."""
+
+    def test_login_attack(self) -> None:
+        """LoginAttackReport constructs correctly."""
+        r = LoginAttackReport(**CONNECTION_BASE, type="login_attack")
+        assert r.type == "login_attack"
+        assert r.category == "connection"
+
+    def test_port_scan(self) -> None:
+        """PortScanReport constructs correctly."""
+        r = PortScanReport(**CONNECTION_BASE, type="port_scan")
+        assert r.type == "port_scan"
+
+    def test_ddos(self) -> None:
+        """DdosReport accepts optional fields."""
+        r = DdosReport(
+            **CONNECTION_BASE,
+            type="ddos",
+            peak_bps=10_000_000,
+            attack_vector="udp_flood",
+        )
+        assert r.peak_bps == 10_000_000
+        assert r.attack_vector == "udp_flood"
+
+    def test_infected_host_requires_bot_type(self) -> None:
+        """InfectedHostReport requires bot_type."""
+        with pytest.raises(PydanticValidationError):
+            InfectedHostReport(**CONNECTION_BASE, type="infected_host")
+
+    def test_infected_host(self) -> None:
+        """InfectedHostReport constructs with bot_type."""
+        r = InfectedHostReport(**CONNECTION_BASE, type="infected_host", bot_type="mirai")
+        assert r.bot_type == "mirai"
+
+    def test_reconnaissance_requires_probed_resources(self) -> None:
+        """ReconnaissanceReport requires probed_resources."""
+        with pytest.raises(PydanticValidationError):
+            ReconnaissanceReport(**CONNECTION_BASE, type="reconnaissance")
+
+    def test_reconnaissance(self) -> None:
+        """ReconnaissanceReport constructs with probed_resources."""
+        r = ReconnaissanceReport(
+            **CONNECTION_BASE,
+            type="reconnaissance",
+            probed_resources=["/admin", "/.env"],
+        )
+        assert r.probed_resources == ["/admin", "/.env"]
+
+    def test_scraping_requires_total_requests(self) -> None:
+        """ScrapingReport requires total_requests."""
+        with pytest.raises(PydanticValidationError):
+            ScrapingReport(**CONNECTION_BASE, type="scraping")
+
+    def test_vulnerability_scan_requires_scan_type(self) -> None:
+        """VulnerabilityScanReport requires scan_type."""
+        with pytest.raises(PydanticValidationError):
+            VulnerabilityScanReport(**CONNECTION_BASE, type="vulnerability_scan")
+
+
+# ---------------------------------------------------------------------------
+# Content type tests
+# ---------------------------------------------------------------------------
+
+CONTENT_BASE: dict[str, object] = {
+    **BASE_FIELDS,
+    "category": "content",
+    "url": "https://evil.example/phish",
+}
+
+
+class TestContentReports:
+    """Tests for content-category report types."""
+
+    def test_phishing(self) -> None:
+        """PhishingReport constructs correctly."""
+        r = PhishingReport(**CONTENT_BASE, type="phishing")
+        assert r.type == "phishing"
+        assert r.url == "https://evil.example/phish"
+
+    def test_malware(self) -> None:
+        """MalwareReport accepts file_hashes dict."""
+        r = MalwareReport(
+            **CONTENT_BASE,
+            type="malware",
+            file_hashes={"sha256": "abc123"},
+        )
+        assert r.file_hashes == {"sha256": "abc123"}
+
+    def test_csam_requires_classification_and_detection(self) -> None:
+        """CsamReport requires classification and detection_method."""
+        with pytest.raises(PydanticValidationError):
+            CsamReport(**CONTENT_BASE, type="csam", classification="level_a")
+
+    def test_csam(self) -> None:
+        """CsamReport constructs with required fields."""
+        r = CsamReport(
+            **CONTENT_BASE,
+            type="csam",
+            classification="level_a",
+            detection_method="hash_match",
+        )
+        assert r.classification == "level_a"
+
+    def test_exposed_data_requires_data_types_and_method(self) -> None:
+        """ExposedDataReport requires data_types and exposure_method."""
+        with pytest.raises(PydanticValidationError):
+            ExposedDataReport(**CONTENT_BASE, type="exposed_data")
+
+    def test_brand_infringement_requires_fields(self) -> None:
+        """BrandInfringementReport requires infringement_type and legitimate_site."""
+        with pytest.raises(PydanticValidationError):
+            BrandInfringementReport(**CONTENT_BASE, type="brand_infringement")
+
+    def test_remote_compromise_nested_indicators(self) -> None:
+        """RemoteCompromiseReport accepts nested CompromiseIndicator and WebshellDetails."""
+        r = RemoteCompromiseReport(
+            **CONTENT_BASE,
+            type="remote_compromise",
+            compromise_type="webshell",
+            compromise_indicators=[{"type": "file_path", "value": "/var/www/shell.php"}],
+            webshell_details={"family": "c99", "password_protected": True},
+        )
+        assert r.compromise_indicators is not None
+        assert isinstance(r.compromise_indicators[0], CompromiseIndicator)
+        assert r.webshell_details is not None
+        assert isinstance(r.webshell_details, WebshellDetails)
+
+    def test_suspicious_registration_requires_fields(self) -> None:
+        """SuspiciousRegistrationReport requires registration_date and suspicious_indicators."""
+        with pytest.raises(PydanticValidationError):
+            SuspiciousRegistrationReport(**CONTENT_BASE, type="suspicious_registration")
+
+
+# ---------------------------------------------------------------------------
+# Infrastructure type tests
+# ---------------------------------------------------------------------------
+
+INFRA_BASE: dict[str, object] = {**BASE_FIELDS, "category": "infrastructure"}
+
+
+class TestInfrastructureReports:
+    """Tests for infrastructure-category report types."""
+
+    def test_botnet_requires_compromise_evidence(self) -> None:
+        """BotnetReport requires compromise_evidence."""
+        with pytest.raises(PydanticValidationError):
+            BotnetReport(**INFRA_BASE, type="botnet")
+
+    def test_botnet(self) -> None:
+        """BotnetReport constructs correctly."""
+        r = BotnetReport(
+            **INFRA_BASE,
+            type="botnet",
+            compromise_evidence="C2 traffic observed to 10.0.0.1:6667",
+            malware_family="mirai",
+        )
+        assert r.malware_family == "mirai"
+
+    def test_compromised_server(self) -> None:
+        """CompromisedServerReport requires compromise_method."""
+        r = CompromisedServerReport(
+            **INFRA_BASE,
+            type="compromised_server",
+            compromise_method="brute_force",
+        )
+        assert r.compromise_method == "brute_force"
+
+
+# ---------------------------------------------------------------------------
+# Copyright type tests
+# ---------------------------------------------------------------------------
+
+COPYRIGHT_BASE: dict[str, object] = {**BASE_FIELDS, "category": "copyright"}
+
+
+class TestCopyrightReports:
+    """Tests for copyright-category report types."""
+
+    def test_copyright_copyright_requires_infringing_url(self) -> None:
+        """CopyrightCopyrightReport requires infringing_url."""
+        with pytest.raises(PydanticValidationError):
+            CopyrightCopyrightReport(**COPYRIGHT_BASE, type="copyright")
+
+    def test_copyright_copyright(self) -> None:
+        """CopyrightCopyrightReport constructs correctly."""
+        r = CopyrightCopyrightReport(
+            **COPYRIGHT_BASE,
+            type="copyright",
+            infringing_url="https://pirate.example/movie.mkv",
+        )
+        assert r.type == "copyright"
+        assert r.infringing_url == "https://pirate.example/movie.mkv"
+
+    def test_p2p_requires_swarm_info(self) -> None:
+        """CopyrightP2pReport requires swarm_info."""
+        with pytest.raises(PydanticValidationError):
+            CopyrightP2pReport(**COPYRIGHT_BASE, type="p2p", p2p_protocol="bittorrent")
+
+    def test_p2p(self) -> None:
+        """CopyrightP2pReport constructs with nested SwarmInfo."""
+        r = CopyrightP2pReport(
+            **COPYRIGHT_BASE,
+            type="p2p",
+            p2p_protocol="bittorrent",
+            swarm_info={"info_hash": "abc123def456"},
+        )
+        assert r.p2p_protocol == "bittorrent"
+        assert isinstance(r.swarm_info, SwarmInfo)
+        assert r.swarm_info.info_hash == "abc123def456"
+
+    def test_cyberlocker_requires_fields(self) -> None:
+        """CopyrightCyberlockerReport requires hosting_service and infringing_url."""
+        with pytest.raises(PydanticValidationError):
+            CopyrightCyberlockerReport(**COPYRIGHT_BASE, type="cyberlocker")
+
+    def test_ugc_platform_requires_fields(self) -> None:
+        """CopyrightUgcPlatformReport requires infringing_url and platform_name."""
+        with pytest.raises(PydanticValidationError):
+            CopyrightUgcPlatformReport(**COPYRIGHT_BASE, type="ugc_platform")
+
+    def test_link_site_requires_fields(self) -> None:
+        """CopyrightLinkSiteReport requires infringing_url and site_name."""
+        with pytest.raises(PydanticValidationError):
+            CopyrightLinkSiteReport(**COPYRIGHT_BASE, type="link_site")
+
+    def test_usenet_requires_newsgroup_and_message_info(self) -> None:
+        """CopyrightUsenetReport requires newsgroup and message_info."""
+        with pytest.raises(PydanticValidationError):
+            CopyrightUsenetReport(**COPYRIGHT_BASE, type="usenet")
+
+    def test_usenet(self) -> None:
+        """CopyrightUsenetReport constructs with nested MessageInfo."""
+        r = CopyrightUsenetReport(
+            **COPYRIGHT_BASE,
+            type="usenet",
+            newsgroup="alt.binaries.example",
+            message_info={"message_id": "<abc@news.example>"},
+        )
+        assert isinstance(r.message_info, MessageInfo)
+        assert r.message_info.message_id == "<abc@news.example>"
+
+
+# ---------------------------------------------------------------------------
+# Vulnerability type tests
+# ---------------------------------------------------------------------------
+
+VULN_BASE: dict[str, object] = {**BASE_FIELDS, "category": "vulnerability", "service": "openssh"}
+
+
+class TestVulnerabilityReports:
+    """Tests for vulnerability-category report types."""
+
+    def test_cve_requires_cve_id_and_port(self) -> None:
+        """CveReport requires cve_id and service_port."""
+        with pytest.raises(PydanticValidationError):
+            CveReport(**VULN_BASE, type="cve")
+
+    def test_cve(self) -> None:
+        """CveReport constructs with impact assessment."""
+        r = CveReport(
+            **VULN_BASE,
+            type="cve",
+            cve_id="CVE-2024-12345",
+            service_port=22,
+            cvss_score=9.8,
+            impact_assessment={"confidentiality": "high", "integrity": "high", "availability": "high"},
+        )
+        assert r.cve_id == "CVE-2024-12345"
+        assert r.service_port == 22
+        assert isinstance(r.impact_assessment, ImpactAssessment)
+        assert r.impact_assessment.confidentiality == "high"
+
+    def test_open_service(self) -> None:
+        """OpenServiceReport constructs with just base fields."""
+        r = OpenServiceReport(**VULN_BASE, type="open_service")
+        assert r.type == "open_service"
+        assert r.service == "openssh"
+
+    def test_misconfiguration(self) -> None:
+        """MisconfigurationReport constructs correctly."""
+        r = MisconfigurationReport(**VULN_BASE, type="misconfiguration")
+        assert r.type == "misconfiguration"
+
+
+# ---------------------------------------------------------------------------
+# Reputation type tests
+# ---------------------------------------------------------------------------
+
+REP_BASE: dict[str, object] = {
+    **BASE_FIELDS,
+    "category": "reputation",
+    "threat_type": "phishing",
+}
+
+
+class TestReputationReports:
+    """Tests for reputation-category report types."""
+
+    def test_blocklist(self) -> None:
+        """BlocklistReport constructs correctly."""
+        r = BlocklistReport(**REP_BASE, type="blocklist")
+        assert r.type == "blocklist"
+        assert r.threat_type == "phishing"
+
+    def test_threat_intelligence(self) -> None:
+        """ThreatIntelligenceReport constructs correctly."""
+        r = ThreatIntelligenceReport(**REP_BASE, type="threat_intelligence")
+        assert r.type == "threat_intelligence"
+
+    def test_missing_threat_type(self) -> None:
+        """Reputation reports require threat_type."""
+        with pytest.raises(PydanticValidationError):
+            BlocklistReport(**BASE_FIELDS, category="reputation", type="blocklist")
+
+
+# ---------------------------------------------------------------------------
+# AnyXARFReport discriminated union tests
+# ---------------------------------------------------------------------------
+
+_adapter: TypeAdapter[AnyXARFReport] = TypeAdapter(AnyXARFReport)
+
+
+class TestAnyXARFReportDiscriminator:
+    """Tests for AnyXARFReport discriminated union resolution."""
+
+    @pytest.mark.parametrize(
+        ("category", "report_type", "extra"),
+        [
+            ("messaging", "spam", {"protocol": "smtp"}),
+            ("messaging", "bulk_messaging", {"protocol": "smtp", "recipient_count": 100}),
+            ("connection", "login_attack", {"first_seen": "2026-01-01T00:00:00Z", "protocol": "tcp"}),
+            ("connection", "port_scan", {"first_seen": "2026-01-01T00:00:00Z", "protocol": "tcp"}),
+            ("connection", "ddos", {"first_seen": "2026-01-01T00:00:00Z", "protocol": "udp"}),
+            (
+                "connection",
+                "infected_host",
+                {"first_seen": "2026-01-01T00:00:00Z", "protocol": "tcp", "bot_type": "mirai"},
+            ),
+            (
+                "connection",
+                "reconnaissance",
+                {
+                    "first_seen": "2026-01-01T00:00:00Z",
+                    "protocol": "tcp",
+                    "probed_resources": ["/"],
+                },
+            ),
+            (
+                "connection",
+                "scraping",
+                {"first_seen": "2026-01-01T00:00:00Z", "protocol": "http", "total_requests": 1000},
+            ),
+            (
+                "connection",
+                "sql_injection",
+                {"first_seen": "2026-01-01T00:00:00Z", "protocol": "http"},
+            ),
+            (
+                "connection",
+                "vulnerability_scan",
+                {"first_seen": "2026-01-01T00:00:00Z", "protocol": "tcp", "scan_type": "port"},
+            ),
+            ("content", "phishing", {"url": "https://evil.example/"}),
+            ("content", "malware", {"url": "https://evil.example/payload.exe"}),
+            (
+                "content",
+                "csam",
+                {
+                    "url": "https://evil.example/",
+                    "classification": "a",
+                    "detection_method": "hash",
+                },
+            ),
+            (
+                "content",
+                "csem",
+                {
+                    "url": "https://evil.example/",
+                    "detection_method": "hash",
+                    "exploitation_type": "grooming",
+                },
+            ),
+            (
+                "content",
+                "exposed_data",
+                {
+                    "url": "https://evil.example/",
+                    "data_types": ["pii"],
+                    "exposure_method": "bucket",
+                },
+            ),
+            (
+                "content",
+                "brand_infringement",
+                {
+                    "url": "https://evil.example/",
+                    "infringement_type": "trademark",
+                    "legitimate_site": "https://legit.example/",
+                },
+            ),
+            (
+                "content",
+                "fraud",
+                {"url": "https://evil.example/", "fraud_type": "investment_scam"},
+            ),
+            (
+                "content",
+                "remote_compromise",
+                {"url": "https://evil.example/", "compromise_type": "webshell"},
+            ),
+            (
+                "content",
+                "suspicious_registration",
+                {
+                    "url": "https://evil.example/",
+                    "registration_date": "2026-01-01",
+                    "suspicious_indicators": ["typosquat"],
+                },
+            ),
+            (
+                "copyright",
+                "copyright",
+                {"infringing_url": "https://pirate.example/file"},
+            ),
+            (
+                "copyright",
+                "p2p",
+                {
+                    "p2p_protocol": "bittorrent",
+                    "swarm_info": {"info_hash": "abc123"},
+                },
+            ),
+            (
+                "copyright",
+                "cyberlocker",
+                {
+                    "hosting_service": "megaupload",
+                    "infringing_url": "https://mega.example/file",
+                },
+            ),
+            (
+                "copyright",
+                "ugc_platform",
+                {
+                    "infringing_url": "https://tube.example/video",
+                    "platform_name": "TubeSite",
+                },
+            ),
+            (
+                "copyright",
+                "link_site",
+                {
+                    "infringing_url": "https://links.example/page",
+                    "site_name": "LinkDump",
+                },
+            ),
+            (
+                "copyright",
+                "usenet",
+                {
+                    "newsgroup": "alt.binaries.example",
+                    "message_info": {"message_id": "<abc@news.example>"},
+                },
+            ),
+            (
+                "infrastructure",
+                "botnet",
+                {"compromise_evidence": "C2 traffic observed"},
+            ),
+            (
+                "infrastructure",
+                "compromised_server",
+                {"compromise_method": "brute_force"},
+            ),
+            (
+                "vulnerability",
+                "cve",
+                {"service": "openssh", "cve_id": "CVE-2024-1234", "service_port": 22},
+            ),
+            ("vulnerability", "open_service", {"service": "redis"}),
+            ("vulnerability", "misconfiguration", {"service": "nginx"}),
+            ("reputation", "blocklist", {"threat_type": "spam"}),
+            ("reputation", "threat_intelligence", {"threat_type": "malware"}),
+        ],
+    )
+    def test_discriminator_resolves_correct_type(
+        self,
+        category: str,
+        report_type: str,
+        extra: dict[str, object],
+    ) -> None:
+        """AnyXARFReport discriminator resolves each of the 32 concrete types."""
+        data: dict[str, object] = {
+            **BASE_FIELDS,
+            "category": category,
+            "type": report_type,
+            **extra,
+        }
+        report = _adapter.validate_python(data)
+        assert report.category == category
+        assert report.type == report_type
+
+    def test_unknown_category_raises(self) -> None:
+        """AnyXARFReport raises on unknown category/type combination."""
+        data: dict[str, object] = {
+            **BASE_FIELDS,
+            "category": "unknown",
+            "type": "spam",
+        }
+        with pytest.raises(PydanticValidationError):
+            _adapter.validate_python(data)
+
+    def test_unknown_type_raises(self) -> None:
+        """AnyXARFReport raises on valid category but unknown type."""
+        data: dict[str, object] = {
+            **BASE_FIELDS,
+            "category": "messaging",
+            "type": "unknown_type",
+            "protocol": "smtp",
+        }
+        with pytest.raises(PydanticValidationError):
+            _adapter.validate_python(data)
+
+    def test_extra_fields_pass_through(self) -> None:
+        """AnyXARFReport passes extra fields through via extra='allow'."""
+        data: dict[str, object] = {
+            **BASE_FIELDS,
+            "category": "messaging",
+            "type": "spam",
+            "protocol": "smtp",
+            "custom_extension": "value",
+        }
+        report = _adapter.validate_python(data)
+        assert report.model_extra is not None
+        assert report.model_extra.get("custom_extension") == "value"
+
+
+class TestReportDiscriminatorFunction:
+    """Tests for the _report_discriminator helper."""
+
+    def test_dict_input(self) -> None:
+        """_report_discriminator extracts key from a dict."""
+        key = _report_discriminator({"category": "messaging", "type": "spam"})
+        assert key == "messaging/spam"
+
+    def test_model_input(self) -> None:
+        """_report_discriminator extracts key from a model instance."""
+        report = SpamReport(**BASE_FIELDS, category="messaging", type="spam", protocol="smtp")
+        key = _report_discriminator(report)
+        assert key == "messaging/spam"
+
+    def test_missing_keys_returns_none_string(self) -> None:
+        """_report_discriminator returns 'None/None' for empty dict."""
+        key = _report_discriminator({})
+        assert key == "None/None"

From 3f04c1d5373f679a3a245a826d7718e4579f4107 Mon Sep 17 00:00:00 2001
From: Victor Lopez <victor@thevictorlopez.com>
Date: Wed, 25 Mar 2026 12:35:49 +0100
Subject: [PATCH 05/13] Add schema_registry class, logic and tests.

---
 pyproject.toml                |   3 +
 tests/test_schema_registry.py | 428 ++++++++++++++++++++++++++
 xarf/__init__.py              |  13 +
 xarf/schema_registry.py       | 558 ++++++++++++++++++++++++++++++++++
 4 files changed, 1002 insertions(+)
 create mode 100644 tests/test_schema_registry.py
 create mode 100644 xarf/schema_registry.py

diff --git a/pyproject.toml b/pyproject.toml
index aea0dfa..a075ae2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,6 +84,9 @@ ignore = ["D203", "D213"]
 [tool.ruff.lint.pydocstyle]
 convention = "google"
 
+[tool.ruff.lint.per-file-ignores]
+"tests/**/*.py" = ["D"]  # docstrings not required on test classes/methods
+
 [tool.ruff.format]
 quote-style = "double"
 
diff --git a/tests/test_schema_registry.py b/tests/test_schema_registry.py
new file mode 100644
index 0000000..216a336
--- /dev/null
+++ b/tests/test_schema_registry.py
@@ -0,0 +1,428 @@
+"""Tests for Phase 2: Schema Registry."""
+
+from __future__ import annotations
+
+import pytest
+
+from xarf.schema_registry import (
+    FieldMetadata,
+    SchemaRegistry,
+    get_registry,
+    reset_registry,
+    schema_registry,
+)
+
+# ---------------------------------------------------------------------------
+# Fixtures
+# ---------------------------------------------------------------------------
+
+
+@pytest.fixture(autouse=True)
+def _reset_registry_after_test() -> None:
+    """Reset the module-level singleton after every test for isolation."""
+    yield
+    reset_registry()
+
+
+# ---------------------------------------------------------------------------
+# Singleton behaviour
+# ---------------------------------------------------------------------------
+
+
+class TestSingleton:
+    def test_get_registry_returns_same_instance(self) -> None:
+        r1 = get_registry()
+        r2 = get_registry()
+        assert r1 is r2
+
+    def test_module_level_alias_is_a_loaded_registry(self) -> None:
+        # schema_registry is created eagerly at import time and may differ from
+        # a fresh get_registry() call (if reset_registry() ran between them).
+        # What matters is that both are functional SchemaRegistry instances.
+        r = get_registry()
+        assert isinstance(schema_registry, SchemaRegistry)
+        assert schema_registry.is_loaded()
+        assert isinstance(r, SchemaRegistry)
+        assert r.is_loaded()
+
+    def test_reset_registry_clears_singleton(self) -> None:
+        r1 = get_registry()
+        reset_registry()
+        r2 = get_registry()
+        assert r1 is not r2
+
+    def test_reset_registry_new_instance_is_functional(self) -> None:
+        reset_registry()
+        r = get_registry()
+        assert r.is_loaded()
+        assert "messaging" in r.get_categories()
+
+
+# ---------------------------------------------------------------------------
+# is_loaded
+# ---------------------------------------------------------------------------
+
+
+class TestIsLoaded:
+    def test_is_loaded_after_normal_init(self) -> None:
+        assert get_registry().is_loaded()
+
+
+# ---------------------------------------------------------------------------
+# get_categories
+# ---------------------------------------------------------------------------
+
+
+class TestGetCategories:
+    EXPECTED_CATEGORIES = {
+        "messaging",
+        "connection",
+        "content",
+        "infrastructure",
+        "copyright",
+        "vulnerability",
+        "reputation",
+    }
+
+    def test_returns_all_seven_categories(self) -> None:
+        cats = get_registry().get_categories()
+        assert cats == self.EXPECTED_CATEGORIES
+
+    def test_result_is_cached(self) -> None:
+        r = get_registry()
+        assert r.get_categories() is r.get_categories()
+
+
+# ---------------------------------------------------------------------------
+# get_types_for_category
+# ---------------------------------------------------------------------------
+
+
+class TestGetTypesForCategory:
+    def test_messaging_types(self) -> None:
+        types = get_registry().get_types_for_category("messaging")
+        assert "spam" in types
+        assert "bulk_messaging" in types
+
+    def test_connection_types(self) -> None:
+        types = get_registry().get_types_for_category("connection")
+        expected = {
+            "login_attack",
+            "port_scan",
+            "ddos",
+            "infected_host",
+            "reconnaissance",
+            "scraping",
+            "sql_injection",
+            "vulnerability_scan",
+        }
+        assert expected.issubset(types)
+
+    def test_content_types(self) -> None:
+        types = get_registry().get_types_for_category("content")
+        assert "phishing" in types
+        assert "malware" in types
+
+    def test_infrastructure_types(self) -> None:
+        types = get_registry().get_types_for_category("infrastructure")
+        assert "botnet" in types
+        assert "compromised_server" in types
+
+    def test_copyright_types(self) -> None:
+        types = get_registry().get_types_for_category("copyright")
+        assert "copyright" in types
+        assert "p2p" in types
+
+    def test_vulnerability_types(self) -> None:
+        types = get_registry().get_types_for_category("vulnerability")
+        assert "cve" in types
+        assert "open_service" in types
+        assert "misconfiguration" in types
+
+    def test_reputation_types(self) -> None:
+        types = get_registry().get_types_for_category("reputation")
+        assert "blocklist" in types
+        assert "threat_intelligence" in types
+
+    def test_unknown_category_returns_empty_set(self) -> None:
+        assert get_registry().get_types_for_category("nonexistent") == set()
+
+
+# ---------------------------------------------------------------------------
+# get_all_types
+# ---------------------------------------------------------------------------
+
+
+class TestGetAllTypes:
+    def test_returns_dict(self) -> None:
+        assert isinstance(get_registry().get_all_types(), dict)
+
+    def test_contains_all_categories(self) -> None:
+        all_types = get_registry().get_all_types()
+        expected_categories = {
+            "messaging",
+            "connection",
+            "content",
+            "infrastructure",
+            "copyright",
+            "vulnerability",
+            "reputation",
+        }
+        assert expected_categories.issubset(all_types.keys())
+
+    def test_result_is_cached(self) -> None:
+        r = get_registry()
+        assert r.get_all_types() is r.get_all_types()
+
+
+# ---------------------------------------------------------------------------
+# is_valid_category
+# ---------------------------------------------------------------------------
+
+
+class TestIsValidCategory:
+    def test_valid_categories(self) -> None:
+        r = get_registry()
+        for cat in (
+            "messaging",
+            "connection",
+            "content",
+            "infrastructure",
+            "copyright",
+            "vulnerability",
+            "reputation",
+        ):
+            assert r.is_valid_category(cat) is True
+
+    def test_invalid_category(self) -> None:
+        assert get_registry().is_valid_category("abuse") is False
+        assert get_registry().is_valid_category("") is False
+        assert get_registry().is_valid_category("MESSAGING") is False
+
+
+# ---------------------------------------------------------------------------
+# is_valid_type
+# ---------------------------------------------------------------------------
+
+
+class TestIsValidType:
+    def test_valid_pair(self) -> None:
+        assert get_registry().is_valid_type("messaging", "spam") is True
+
+    def test_valid_pair_with_underscore_type(self) -> None:
+        assert get_registry().is_valid_type("connection", "login_attack") is True
+
+    def test_invalid_type_for_valid_category(self) -> None:
+        assert get_registry().is_valid_type("messaging", "ddos") is False
+
+    def test_invalid_category(self) -> None:
+        assert get_registry().is_valid_type("nonexistent", "spam") is False
+
+    def test_both_invalid(self) -> None:
+        assert get_registry().is_valid_type("nope", "nope") is False
+
+
+# ---------------------------------------------------------------------------
+# get_required_fields
+# ---------------------------------------------------------------------------
+
+
+class TestGetRequiredFields:
+    EXPECTED_REQUIRED = {
+        "xarf_version",
+        "report_id",
+        "timestamp",
+        "reporter",
+        "sender",
+        "source_identifier",
+        "category",
+        "type",
+    }
+
+    def test_returns_exact_core_required_fields(self) -> None:
+        assert get_registry().get_required_fields() == self.EXPECTED_REQUIRED
+
+    def test_result_is_cached(self) -> None:
+        r = get_registry()
+        assert r.get_required_fields() is r.get_required_fields()
+
+
+# ---------------------------------------------------------------------------
+# get_contact_required_fields
+# ---------------------------------------------------------------------------
+
+
+class TestGetContactRequiredFields:
+    def test_returns_exact_contact_required_fields(self) -> None:
+        assert get_registry().get_contact_required_fields() == {
+            "org",
+            "contact",
+            "domain",
+        }
+
+    def test_result_is_cached(self) -> None:
+        r = get_registry()
+        assert r.get_contact_required_fields() is r.get_contact_required_fields()
+
+
+# ---------------------------------------------------------------------------
+# get_field_metadata
+# ---------------------------------------------------------------------------
+
+
+class TestGetFieldMetadata:
+    def test_known_required_field(self) -> None:
+        meta = get_registry().get_field_metadata("source_identifier")
+        assert isinstance(meta, FieldMetadata)
+        assert meta.required is True
+        assert meta.recommended is False
+        assert meta.description != ""
+
+    def test_known_recommended_field(self) -> None:
+        # source_port is x-recommended in the core schema
+        meta = get_registry().get_field_metadata("source_port")
+        assert meta is not None
+        assert meta.recommended is True
+        assert meta.required is False
+
+    def test_known_optional_field(self) -> None:
+        # description is an optional, non-recommended core field
+        meta = get_registry().get_field_metadata("description")
+        assert meta is not None
+        assert meta.required is False
+        assert meta.recommended is False
+        assert meta.type == "string"
+
+    def test_known_recommended_numeric_field(self) -> None:
+        # confidence is x-recommended with numeric constraints
+        meta = get_registry().get_field_metadata("confidence")
+        assert meta is not None
+        assert meta.required is False
+        assert meta.recommended is True
+        assert meta.minimum is not None
+        assert meta.maximum is not None
+
+    def test_unknown_field_returns_none(self) -> None:
+        assert get_registry().get_field_metadata("nonexistent_field_xyz") is None
+
+    def test_field_with_enum(self) -> None:
+        # category has an enum constraint in the core schema
+        meta = get_registry().get_field_metadata("category")
+        assert meta is not None
+        assert meta.enum is not None
+        assert len(meta.enum) == 7
+
+
+# ---------------------------------------------------------------------------
+# get_core_property_names
+# ---------------------------------------------------------------------------
+
+
+class TestGetCorePropertyNames:
+    def test_contains_known_fields(self) -> None:
+        names = get_registry().get_core_property_names()
+        for f in (
+            "xarf_version",
+            "report_id",
+            "timestamp",
+            "reporter",
+            "sender",
+            "source_identifier",
+            "category",
+            "type",
+        ):
+            assert f in names
+
+
+
+# ---------------------------------------------------------------------------
+# get_type_schema
+# ---------------------------------------------------------------------------
+
+
+class TestGetTypeSchema:
+    def test_known_type_returns_dict(self) -> None:
+        schema = get_registry().get_type_schema("messaging", "spam")
+        assert isinstance(schema, dict)
+        assert "allOf" in schema or "properties" in schema
+
+    def test_unknown_type_returns_none(self) -> None:
+        assert get_registry().get_type_schema("messaging", "nonexistent") is None
+
+    def test_unknown_category_returns_none(self) -> None:
+        assert get_registry().get_type_schema("nope", "spam") is None
+
+    def test_all_known_type_schemas_loadable(self) -> None:
+        r = get_registry()
+        for category, types in r.get_all_types().items():
+            for type_ in types:
+                schema = r.get_type_schema(category, type_)
+                assert schema is not None, f"Missing schema for {category}/{type_}"
+
+
+# ---------------------------------------------------------------------------
+# get_category_fields
+# ---------------------------------------------------------------------------
+
+
+class TestGetCategoryFields:
+    def test_spam_has_type_specific_fields(self) -> None:
+        fields = get_registry().get_category_fields("messaging", "spam")
+        # protocol is spam-specific (not in core schema)
+        assert "protocol" in fields
+
+    def test_excludes_core_fields(self) -> None:
+        core_fields = get_registry().get_core_property_names()
+        fields = get_registry().get_category_fields("messaging", "spam")
+        for f in fields:
+            assert f not in core_fields, f"Core field '{f}' leaked into category fields"
+
+    def test_excludes_category_and_type_meta_fields(self) -> None:
+        fields = get_registry().get_category_fields("messaging", "spam")
+        assert "category" not in fields
+        assert "type" not in fields
+
+    def test_unknown_type_returns_empty_list(self) -> None:
+        assert get_registry().get_category_fields("messaging", "nonexistent") == []
+
+    def test_content_base_fields_are_included_via_ref(self) -> None:
+        # content types use allOf $ref to content-base.json;
+        # content-base fields should appear in get_category_fields
+        fields = get_registry().get_category_fields("content", "phishing")
+        # url is a content-base field
+        assert "url" in fields
+
+    def test_no_duplicate_fields(self) -> None:
+        fields = get_registry().get_category_fields("content", "phishing")
+        assert len(fields) == len(set(fields))
+
+
+# ---------------------------------------------------------------------------
+# get_all_fields_for_category
+# ---------------------------------------------------------------------------
+
+
+class TestGetAllFieldsForCategory:
+    def test_messaging_union_includes_fields_from_both_types(self) -> None:
+        fields = get_registry().get_all_fields_for_category("messaging")
+        # spam-specific (not in bulk_messaging)
+        assert "spam_indicators" in fields
+        # bulk_messaging-specific (not in spam)
+        assert "unsubscribe_provided" in fields
+
+    def test_excludes_core_fields(self) -> None:
+        core_fields = get_registry().get_core_property_names()
+        all_fields = get_registry().get_all_fields_for_category("connection")
+        for f in all_fields:
+            assert f not in core_fields, (
+                f"Core field '{f}' leaked into get_all_fields_for_category"
+            )  # noqa: E501
+
+    def test_unknown_category_returns_empty_set(self) -> None:
+        assert get_registry().get_all_fields_for_category("nonexistent") == set()
+
+    def test_is_superset_of_single_type_fields(self) -> None:
+        r = get_registry()
+        spam_fields = set(r.get_category_fields("messaging", "spam"))
+        all_messaging = r.get_all_fields_for_category("messaging")
+        assert spam_fields.issubset(all_messaging)
diff --git a/xarf/__init__.py b/xarf/__init__.py
index 8c488bb..b03d9ce 100644
--- a/xarf/__init__.py
+++ b/xarf/__init__.py
@@ -27,6 +27,13 @@
     XARFEvidence,
     XARFReport,
 )
+from xarf.schema_registry import (
+    FieldMetadata,
+    SchemaRegistry,
+    get_registry,
+    reset_registry,
+    schema_registry,
+)
 from xarf.types_connection import (
     ConnectionBaseReport,
     ConnectionReport,
@@ -136,6 +143,12 @@
     "XARFValidationError",
     "XARFParseError",
     "XARFSchemaError",
+    # Schema registry
+    "schema_registry",
+    "SchemaRegistry",
+    "FieldMetadata",
+    "get_registry",
+    "reset_registry",
     # v3 compatibility
     "is_v3_report",
     "convert_v3_to_v4",
diff --git a/xarf/schema_registry.py b/xarf/schema_registry.py
new file mode 100644
index 0000000..2acdcbb
--- /dev/null
+++ b/xarf/schema_registry.py
@@ -0,0 +1,558 @@
+"""Schema Registry — schema-driven source of truth for categories, types, and metadata.
+
+Python port of ``schema-registry.ts`` from the JavaScript reference implementation.
+
+Provides centralized, schema-derived access to valid categories, types, required fields,
+and field metadata without any hardcoded enums or lists.
+
+Example:
+    >>> from xarf import schema_registry
+    >>> schema_registry.get_categories()
+    {'messaging', 'connection', 'content', ...}
+    >>> schema_registry.get_types_for_category("connection")
+    {'ddos', 'login_attack', ...}
+    >>> schema_registry.is_valid_type("messaging", "spam")
+    True
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from importlib import resources
+from pathlib import Path
+from typing import Any
+
+from xarf.exceptions import XARFSchemaError
+
+# ---------------------------------------------------------------------------
+# FieldMetadata
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class FieldMetadata:
+    """Metadata extracted from a JSON schema property definition.
+
+    Attributes:
+        description: Human-readable field description from the schema.
+        required: Whether the field is in the core schema ``required`` array.
+        recommended: Whether the field carries ``x-recommended: true``.
+        type: JSON Schema ``type`` value (e.g. ``"string"``, ``"integer"``).
+        enum: Allowed values if the field has an ``enum`` constraint.
+        format: JSON Schema ``format`` value (e.g. ``"email"``, ``"uuid"``).
+        minimum: Numeric minimum constraint, if present.
+        maximum: Numeric maximum constraint, if present.
+    """
+
+    description: str
+    required: bool
+    recommended: bool
+    type: str | None = None
+    enum: list[Any] | None = None
+    format: str | None = None
+    minimum: float | None = None
+    maximum: float | None = None
+
+
+# ---------------------------------------------------------------------------
+# Internal type aliases
+# ---------------------------------------------------------------------------
+
+_SchemaDict = dict[str, Any]
+
+# ---------------------------------------------------------------------------
+# SchemaRegistry
+# ---------------------------------------------------------------------------
+
+
+class SchemaRegistry:
+    """Singleton registry that loads XARF JSON schemas and exposes validation rules.
+
+    All public methods are cached after first access.  The registry is initialised
+    lazily by :func:`get_registry` and exposed as the module-level
+    :data:`schema_registry` singleton.
+
+    Raises:
+        XARFSchemaError: On construction, if the bundled schemas cannot be located
+            or the core schema cannot be parsed.
+    """
+
+    def __init__(self) -> None:
+        """Load bundled schemas and build internal caches."""
+        self._schemas_dir: Path = self._find_schemas_dir()
+        self._core_schema: _SchemaDict = self._load_core_schema()
+        self._type_schemas: dict[str, _SchemaDict] = {}
+        self._scan_type_schemas()
+
+        # Lazy-init caches
+        self._categories_cache: set[str] | None = None
+        self._types_per_category_cache: dict[str, set[str]] | None = None
+        self._required_fields_cache: set[str] | None = None
+        self._contact_required_fields_cache: set[str] | None = None
+
+    # ------------------------------------------------------------------
+    # Schema loading helpers
+    # ------------------------------------------------------------------
+
+    def _find_schemas_dir(self) -> Path:
+        """Locate the bundled ``schemas/`` directory inside the package.
+
+        Returns:
+            Absolute path to the schemas directory.
+
+        Raises:
+            XARFSchemaError: If the directory cannot be found.
+        """
+        try:
+            pkg = resources.files("xarf")
+            schemas_path = Path(str(pkg)) / "schemas"
+            if not schemas_path.is_dir():
+                raise XARFSchemaError(
+                    f"Bundled schemas directory not found at {schemas_path}. "
+                    "Run 'python scripts/fetch_schemas.py' to download schemas."
+                )
+            return schemas_path
+        except (TypeError, FileNotFoundError) as exc:
+            raise XARFSchemaError(
+                "Could not locate the xarf package directory while searching "
+                "for bundled schemas."
+            ) from exc
+
+    def _load_json_file(self, path: Path) -> _SchemaDict | None:
+        """Load and parse a single JSON file.
+
+        Args:
+            path: Absolute path to the JSON file.
+
+        Returns:
+            Parsed dict, or ``None`` if the file cannot be read or parsed.
+        """
+        try:
+            with path.open(encoding="utf-8") as fh:
+                return json.load(fh)  # type: ignore[no-any-return]
+        except (OSError, json.JSONDecodeError):
+            return None
+
+    def _load_core_schema(self) -> _SchemaDict:
+        """Load ``xarf-core.json``.
+
+        Returns:
+            Parsed core schema dict.
+
+        Raises:
+            XARFSchemaError: If the file is missing or cannot be parsed.
+        """
+        core_path = self._schemas_dir / "xarf-core.json"
+        schema = self._load_json_file(core_path)
+        if schema is None:
+            raise XARFSchemaError(
+                f"Failed to load core schema from {core_path}. "
+                "The bundled schemas may be corrupted."
+            )
+        return schema
+
+    def _scan_type_schemas(self) -> None:
+        """Scan ``schemas/types/`` and populate :attr:`_type_schemas`.
+
+        Filenames follow the pattern ``{category}-{type}.json``.  The type
+        portion may contain hyphens (e.g. ``login-attack``), which are
+        normalised to underscores for the registry key (``login_attack``),
+        matching the Python model naming convention.
+
+        ``content-base.json`` is a shared base schema and is skipped.
+        """
+        types_dir = self._schemas_dir / "types"
+        if not types_dir.is_dir():
+            return
+
+        for json_file in sorted(types_dir.glob("*.json")):
+            stem = json_file.stem
+            if stem == "content-base":
+                continue
+            # Split on first hyphen only to get category; rest is type
+            parts = stem.split("-", 1)
+            if len(parts) != 2:
+                continue
+            category, raw_type = parts
+            normalised_type = raw_type.replace("-", "_")
+            schema = self._load_json_file(json_file)
+            if schema is not None:
+                self._type_schemas[f"{category}/{normalised_type}"] = schema
+
+    # ------------------------------------------------------------------
+    # Category / type enumeration
+    # ------------------------------------------------------------------
+
+    def get_categories(self) -> set[str]:
+        """Return all valid categories derived from the core schema enum.
+
+        Returns:
+            Set of category name strings (e.g. ``{'messaging', 'connection', ...}``).
+        """
+        if self._categories_cache is not None:
+            return self._categories_cache
+
+        categories: set[str] = set()
+        props = self._core_schema.get("properties", {})
+        cat_enum = props.get("category", {}).get("enum", [])
+        for cat in cat_enum:
+            categories.add(str(cat))
+
+        self._categories_cache = categories
+        return categories
+
+    def get_types_for_category(self, category: str) -> set[str]:
+        """Return valid type names for a given category.
+
+        Args:
+            category: Category name (e.g. ``"connection"``).
+
+        Returns:
+            Set of type name strings for the category, or an empty set if the
+            category is unknown.
+        """
+        return self.get_all_types().get(category, set())
+
+    def get_all_types(self) -> dict[str, set[str]]:
+        """Return all types organised by category.
+
+        Returns:
+            Mapping of category name → set of type names.
+        """
+        if self._types_per_category_cache is not None:
+            return self._types_per_category_cache
+
+        cache: dict[str, set[str]] = {}
+        for key in self._type_schemas:
+            category, type_ = key.split("/", 1)
+            cache.setdefault(category, set()).add(type_)
+
+        self._types_per_category_cache = cache
+        return cache
+
+    # ------------------------------------------------------------------
+    # Validation helpers
+    # ------------------------------------------------------------------
+
+    def is_valid_category(self, category: str) -> bool:
+        """Check whether *category* is a known XARF category.
+
+        Args:
+            category: Category name to check.
+
+        Returns:
+            ``True`` if the category appears in the core schema enum.
+        """
+        return category in self.get_categories()
+
+    def is_valid_type(self, category: str, type_: str) -> bool:
+        """Check whether *type_* is valid for *category*.
+
+        Args:
+            category: Category name.
+            type_: Type name to check.
+
+        Returns:
+            ``True`` if the ``category/type_`` combination exists in the
+            scanned type schemas.
+        """
+        return type_ in self.get_types_for_category(category)
+
+    # ------------------------------------------------------------------
+    # Required / contact fields
+    # ------------------------------------------------------------------
+
+    def get_required_fields(self) -> set[str]:
+        """Return the set of fields listed as required in the core schema.
+
+        Returns:
+            Set of required field name strings.
+        """
+        if self._required_fields_cache is not None:
+            return self._required_fields_cache
+
+        self._required_fields_cache = set(self._core_schema.get("required", []))
+        return self._required_fields_cache
+
+    def get_contact_required_fields(self) -> set[str]:
+        """Return the required fields for the ``contact_info`` sub-object.
+
+        Falls back to ``{"org", "contact", "domain"}`` if the schema does not
+        define them explicitly (matching the JS fallback).
+
+        Returns:
+            Set of required contact field name strings.
+        """
+        if self._contact_required_fields_cache is not None:
+            return self._contact_required_fields_cache
+
+        defs = self._core_schema.get("$defs", {})
+        contact_def = defs.get("contact_info", {})
+        required = contact_def.get("required", ["org", "contact", "domain"])
+        self._contact_required_fields_cache = set(required)
+        return self._contact_required_fields_cache
+
+    # ------------------------------------------------------------------
+    # Schema / field access
+    # ------------------------------------------------------------------
+
+    def get_type_schema(self, category: str, type_: str) -> dict[str, Any] | None:
+        """Return the raw schema dict for a specific ``category/type_`` pair.
+
+        Args:
+            category: Category name.
+            type_: Type name.
+
+        Returns:
+            Schema dict, or ``None`` if the combination is unknown.
+        """
+        return self._type_schemas.get(f"{category}/{type_}")
+
+    def get_field_metadata(self, field_name: str) -> FieldMetadata | None:
+        """Return metadata for a field defined in the core schema.
+
+        Args:
+            field_name: Name of the field to look up.
+
+        Returns:
+            :class:`FieldMetadata` instance, or ``None`` if the field is not
+            in the core schema properties.
+        """
+        props = self._core_schema.get("properties", {})
+        prop = props.get(field_name)
+        if prop is None:
+            return None
+
+        return FieldMetadata(
+            description=prop.get("description", ""),
+            required=field_name in self.get_required_fields(),
+            recommended=prop.get("x-recommended") is True,
+            type=prop.get("type"),
+            enum=prop.get("enum"),
+            format=prop.get("format"),
+            minimum=prop.get("minimum"),
+            maximum=prop.get("maximum"),
+        )
+
+    def get_core_property_names(self) -> set[str]:
+        """Return all property names defined in the core schema.
+
+        Returns:
+            Set of property name strings.
+        """
+        return set(self._core_schema.get("properties", {}).keys())
+
+    def get_category_fields(self, category: str, type_: str) -> list[str]:
+        """Return type-specific field names for a ``category/type_`` pair.
+
+        These are fields defined in the type schema that are *not* part of the
+        core schema (i.e. the category-specific additions).  Ordering is
+        preserved, matching the JS array return.
+
+        Args:
+            category: Category name.
+            type_: Type name.
+
+        Returns:
+            Ordered list of category-specific field names, or an empty list if
+            the ``category/type_`` combination is unknown.
+        """
+        schema = self.get_type_schema(category, type_)
+        if schema is None:
+            return []
+
+        core_fields = self.get_core_property_names()
+        result: list[str] = []
+        self._extract_fields_from_schema(schema, core_fields, result)
+        return result
+
+    def get_all_fields_for_category(self, category: str) -> set[str]:
+        """Return the union of all type-specific fields across a category.
+
+        Useful for building exhaustive field sets per category (e.g. for
+        unknown-field detection in the parser).
+
+        Args:
+            category: Category name.
+
+        Returns:
+            Set of all field names used by any type in the category.
+        """
+        all_fields: set[str] = set()
+        for type_ in self.get_types_for_category(category):
+            all_fields.update(self.get_category_fields(category, type_))
+        return all_fields
+
+    # ------------------------------------------------------------------
+    # Health check
+    # ------------------------------------------------------------------
+
+    def is_loaded(self) -> bool:
+        """Return whether the core schema was successfully loaded.
+
+        Returns:
+            ``True`` if the core schema is present in memory.
+        """
+        return bool(self._core_schema)
+
+    # ------------------------------------------------------------------
+    # Private schema traversal helpers (mirrors JS private methods)
+    # ------------------------------------------------------------------
+
+    def _extract_fields_from_schema(
+        self,
+        schema: _SchemaDict,
+        core_fields: set[str],
+        result: list[str],
+    ) -> None:
+        """Recursively collect category-specific fields from *schema*.
+
+        Args:
+            schema: Schema dict to inspect.
+            core_fields: Set of core field names to exclude.
+            result: Accumulator list; mutated in place.
+        """
+        self._extract_direct_properties(schema, core_fields, result)
+        self._extract_from_all_of(schema, core_fields, result)
+
+    def _extract_direct_properties(
+        self,
+        schema: _SchemaDict,
+        core_fields: set[str],
+        result: list[str],
+    ) -> None:
+        """Collect fields from the ``properties`` key of *schema*.
+
+        Args:
+            schema: Schema dict to inspect.
+            core_fields: Set of core field names to exclude.
+            result: Accumulator list; mutated in place.
+        """
+        for field_name in schema.get("properties", {}):
+            if field_name in core_fields:
+                continue
+            if field_name in ("category", "type"):
+                continue
+            if field_name not in result:
+                result.append(field_name)
+
+    def _extract_from_all_of(
+        self,
+        schema: _SchemaDict,
+        core_fields: set[str],
+        result: list[str],
+    ) -> None:
+        """Collect fields from each entry in ``allOf``.
+
+        Args:
+            schema: Schema dict that may contain an ``allOf`` array.
+            core_fields: Set of core field names to exclude.
+            result: Accumulator list; mutated in place.
+        """
+        for sub_schema in schema.get("allOf", []):
+            self._process_sub_schema(sub_schema, core_fields, result)
+
+    def _process_sub_schema(
+        self,
+        sub_schema: _SchemaDict,
+        core_fields: set[str],
+        result: list[str],
+    ) -> None:
+        """Dispatch a sub-schema to the appropriate extraction path.
+
+        If the sub-schema is a ``$ref``, delegate to
+        :meth:`_process_schema_reference`; otherwise recurse into it directly.
+
+        Args:
+            sub_schema: Individual entry from an ``allOf`` array.
+            core_fields: Set of core field names to exclude.
+            result: Accumulator list; mutated in place.
+        """
+        ref = sub_schema.get("$ref")
+        if ref:
+            self._process_schema_reference(ref, core_fields, result)
+        else:
+            self._extract_fields_from_schema(sub_schema, core_fields, result)
+
+    def _process_schema_reference(
+        self,
+        ref: str,
+        core_fields: set[str],
+        result: list[str],
+    ) -> None:
+        """Follow a ``$ref`` only when it points to a ``-base.json`` schema.
+
+        Mirrors the JS behaviour: references to the core schema
+        (``../xarf-core.json``) are intentionally ignored here because core
+        fields are already captured in *core_fields*.  Only base schemas such
+        as ``./content-base.json`` are resolved.
+
+        Args:
+            ref: The ``$ref`` value from the schema.
+            core_fields: Set of core field names to exclude.
+            result: Accumulator list; mutated in place.
+        """
+        if "-base.json" not in ref:
+            return
+        base_schema = self._load_base_schema(ref)
+        if base_schema is not None:
+            self._extract_fields_from_schema(base_schema, core_fields, result)
+
+    def _load_base_schema(self, ref: str) -> _SchemaDict | None:
+        """Load a base schema file referenced by ``$ref``.
+
+        Args:
+            ref: The ``$ref`` value (e.g. ``"./content-base.json"``).
+
+        Returns:
+            Parsed schema dict, or ``None`` if the file cannot be loaded.
+        """
+        # Strip leading ./ or ../ path prefix to get a bare filename.
+        filename = ref.removeprefix("./").removeprefix("../")
+        schema_path = self._schemas_dir / "types" / filename
+        return self._load_json_file(schema_path)
+
+
+# ---------------------------------------------------------------------------
+# Module-level singleton
+# ---------------------------------------------------------------------------
+
+_registry: SchemaRegistry | None = None
+
+
+def get_registry() -> SchemaRegistry:
+    """Return the module-level :class:`SchemaRegistry` singleton.
+
+    Creates it on first call.
+
+    Returns:
+        The shared :class:`SchemaRegistry` instance.
+
+    Raises:
+        XARFSchemaError: If schema initialisation fails.
+    """
+    global _registry  # noqa: PLW0603
+    if _registry is None:
+        _registry = SchemaRegistry()
+    return _registry
+
+
+def reset_registry() -> None:
+    """Reset the module-level singleton.
+
+    The next call to :func:`get_registry` (or any access via the
+    :data:`schema_registry` convenience alias) will re-initialise the registry
+    from scratch.
+
+    Warning:
+        This function is intended **exclusively for test isolation**.  Do not
+        call it in production code.
+    """
+    global _registry  # noqa: PLW0603
+    _registry = None
+
+
+#: Convenience singleton — equivalent to ``get_registry()``.
+#: Import this directly: ``from xarf import schema_registry``.
+schema_registry: SchemaRegistry = get_registry()

From fe02e82815aa582c4d92e67e163e29cdb8f9839d Mon Sep 17 00:00:00 2001
From: Victor Lopez <victor@thevictorlopez.com>
Date: Wed, 25 Mar 2026 18:06:04 +0100
Subject: [PATCH 06/13] Add schema validator. Cleanup stale tests.

---
 tests/test_generator.py        |  34 ---
 tests/test_parser.py           | 226 -----------------
 tests/test_schema_validator.py | 239 ++++++++++++++++++
 tests/test_security.py         | 374 ----------------------------
 tests/test_v3_compatibility.py | 398 ------------------------------
 tests/test_validation.py       | 435 ---------------------------------
 xarf/__init__.py               |   4 +
 xarf/schema_validator.py       | 406 ++++++++++++++++++++++++++++++
 8 files changed, 649 insertions(+), 1467 deletions(-)
 delete mode 100644 tests/test_generator.py
 delete mode 100644 tests/test_parser.py
 create mode 100644 tests/test_schema_validator.py
 delete mode 100644 tests/test_security.py
 delete mode 100644 tests/test_v3_compatibility.py
 delete mode 100644 tests/test_validation.py
 create mode 100644 xarf/schema_validator.py

diff --git a/tests/test_generator.py b/tests/test_generator.py
deleted file mode 100644
index c2560c4..0000000
--- a/tests/test_generator.py
+++ /dev/null
@@ -1,34 +0,0 @@
-"""Tests for XARF Report Generator (if implemented)."""
-
-import uuid
-from datetime import datetime, timezone
-
-from xarf.models import MessagingReport, XARFReporter
-
-
-class TestReportGeneration:
-    """Test report generation and helper functions."""
-
-    def test_create_messaging_report(self):
-        """Test creating a messaging report programmatically."""
-        reporter = XARFReporter(
-            org="Test Organization", contact="abuse@test.com", type="automated"
-        )
-
-        report = MessagingReport(
-            xarf_version="4.0.0",
-            report_id=str(uuid.uuid4()),
-            timestamp=datetime.now(timezone.utc),
-            reporter=reporter,
-            source_identifier="192.0.2.1",
-            category="messaging",
-            type="spam",
-            evidence_source="spamtrap",
-            protocol="smtp",
-            smtp_from="spammer@example.com",
-            subject="Spam Message",
-        )
-
-        assert report.category == "messaging"
-        assert report.type == "spam"
-        assert report.smtp_from == "spammer@example.com"
diff --git a/tests/test_parser.py b/tests/test_parser.py
deleted file mode 100644
index 9c52568..0000000
--- a/tests/test_parser.py
+++ /dev/null
@@ -1,226 +0,0 @@
-"""Tests for XARF Parser."""
-
-import json
-
-import pytest
-
-from xarf import XARFParseError, XARFParser, XARFValidationError
-from xarf.models import ConnectionReport, ContentReport, MessagingReport
-
-
-class TestXARFParser:
-    """Test XARF Parser functionality."""
-
-    def test_parse_valid_messaging_report(self):
-        """Test parsing valid messaging report."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test Org",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.100",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-            "protocol": "smtp",
-            "smtp_from": "spammer@example.com",
-            "subject": "Test Spam",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-
-        assert isinstance(report, MessagingReport)
-        assert report.category == "messaging"
-        assert report.type == "spam"
-        assert report.smtp_from == "spammer@example.com"
-
-    def test_parse_valid_connection_report(self):
-        """Test parsing valid connection report."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "b2c3d4e5-f6g7-8901-bcde-f1234567890a",
-            "timestamp": "2024-01-15T11:00:00Z",
-            "reporter": {
-                "org": "Security Monitor",
-                "contact": "security@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.200",
-            "category": "connection",
-            "type": "ddos",
-            "evidence_source": "honeypot",
-            "destination_ip": "203.0.113.10",
-            "protocol": "tcp",
-            "destination_port": 80,
-            "attack_type": "syn_flood",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-
-        assert isinstance(report, ConnectionReport)
-        assert report.category == "connection"
-        assert report.type == "ddos"
-        assert report.destination_ip == "203.0.113.10"
-
-    def test_parse_valid_content_report(self):
-        """Test parsing valid content report."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "c3d4e5f6-g7h8-9012-cdef-234567890abc",
-            "timestamp": "2024-01-15T12:00:00Z",
-            "reporter": {
-                "org": "Web Security",
-                "contact": "web@example.com",
-                "type": "manual",
-            },
-            "source_identifier": "192.0.2.300",
-            "category": "content",
-            "type": "phishing_site",
-            "evidence_source": "user_report",
-            "url": "http://phishing.example.com",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-
-        assert isinstance(report, ContentReport)
-        assert report.category == "content"
-        assert report.type == "phishing_site"
-        assert report.url == "http://phishing.example.com"
-
-    def test_parse_json_string(self):
-        """Test parsing from JSON string."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-id",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(json.dumps(report_data))
-
-        assert report.category == "messaging"
-        assert report.type == "spam"
-
-    def test_validation_errors(self):
-        """Test validation error collection."""
-        invalid_data = {
-            "xarf_version": "3.0.0",  # Wrong version
-            "report_id": "test-id",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(invalid_data)
-
-        assert result is False
-        errors = parser.get_errors()
-        assert len(errors) > 0
-        assert "Unsupported XARF version" in errors[0]
-
-    def test_strict_mode_validation_error(self):
-        """Test strict mode raises validation errors."""
-        invalid_data = {
-            "xarf_version": "4.0.0",
-            # Missing required fields
-        }
-
-        parser = XARFParser(strict=True)
-
-        with pytest.raises(XARFValidationError):
-            parser.parse(invalid_data)
-
-    def test_invalid_json_error(self):
-        """Test invalid JSON handling."""
-        parser = XARFParser()
-
-        with pytest.raises(XARFParseError):
-            parser.parse("{invalid json}")
-
-    def test_unsupported_category_alpha(self):
-        """Test unsupported category in alpha version."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-id",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "vulnerability",  # Not supported in alpha
-            "type": "cve",
-            "evidence_source": "vulnerability_scan",
-        }
-
-        parser = XARFParser(strict=False)
-        report = parser.parse(report_data)
-
-        # Should fall back to base model
-        assert report.category == "vulnerability"
-        errors = parser.get_errors()
-        assert len(errors) == 1
-        assert "Unsupported category" in errors[0]
-
-    def test_missing_required_fields(self):
-        """Test missing required field validation."""
-        invalid_data = {
-            "xarf_version": "4.0.0",
-            # Missing most required fields
-        }
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(invalid_data)
-
-        assert result is False
-        errors = parser.get_errors()
-        assert any("Missing required fields" in error for error in errors)
-
-    def test_invalid_reporter_type(self):
-        """Test invalid reporter type validation."""
-        invalid_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-id",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "invalid_type",  # Invalid
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(invalid_data)
-
-        assert result is False
-        errors = parser.get_errors()
-        assert any("Invalid reporter type" in error for error in errors)
diff --git a/tests/test_schema_validator.py b/tests/test_schema_validator.py
new file mode 100644
index 0000000..bafe41f
--- /dev/null
+++ b/tests/test_schema_validator.py
@@ -0,0 +1,239 @@
+"""Tests for xarf.schema_validator.SchemaValidator."""
+
+from collections import deque
+
+import jsonschema.exceptions
+import pytest
+
+from xarf import ContactInfo, SpamReport
+from xarf.models import ValidationError
+from xarf.schema_validator import SchemaValidator, schema_validator
+
+
+# ---------------------------------------------------------------------------
+# Helper fixture
+# ---------------------------------------------------------------------------
+
+
+def _valid_spam_report() -> SpamReport:
+    """Return a fully valid SpamReport for use in tests.
+
+    Includes smtp_from and source_port because the schema conditionally
+    requires them when protocol is "smtp".
+
+    Returns:
+        A SpamReport with all schema-required fields populated.
+    """
+    return SpamReport(
+        xarf_version="4.2.0",
+        report_id="02eb480f-8172-431a-9276-c28ba90f694a",
+        timestamp="2025-01-11T10:59:45Z",
+        reporter=ContactInfo(org="Test Org", contact="test@test.com", domain="test.com"),
+        sender=ContactInfo(org="Test Org", contact="test@test.com", domain="test.com"),
+        source_identifier="192.168.1.1",
+        category="messaging",
+        type="spam",
+        protocol="smtp",
+        smtp_from="spammer@example.com",
+        source_port=25,
+    )
+
+
+# ---------------------------------------------------------------------------
+# TestValidReports
+# ---------------------------------------------------------------------------
+
+
+class TestValidReports:
+    def test_valid_spam_report_has_no_errors(self) -> None:
+        report = _valid_spam_report()
+        errors = schema_validator.validate(report)
+        assert errors == []
+
+
+# ---------------------------------------------------------------------------
+# TestInvalidReports
+# ---------------------------------------------------------------------------
+
+
+class TestInvalidReports:
+    def test_invalid_report_id_format(self) -> None:
+        report = _valid_spam_report()
+        report.report_id = "not-a-uuid"
+        errors = schema_validator.validate(report)
+        assert len(errors) >= 1
+        fields = [e.field for e in errors]
+        assert any("report_id" in f for f in fields)
+
+    def test_invalid_xarf_version_pattern(self) -> None:
+        report = _valid_spam_report()
+        report.xarf_version = "3.0.0"
+        errors = schema_validator.validate(report)
+        assert len(errors) >= 1
+        assert any(e.field == "xarf_version" for e in errors)
+
+    def test_errors_are_validation_error_instances(self) -> None:
+        report = _valid_spam_report()
+        report.report_id = "not-a-uuid"
+        errors = schema_validator.validate(report)
+        assert all(isinstance(e, ValidationError) for e in errors)
+        assert all(len(e.message) > 0 for e in errors)
+
+
+# ---------------------------------------------------------------------------
+# TestStrictMode
+# ---------------------------------------------------------------------------
+
+
+class TestStrictMode:
+    def test_recommended_field_missing_passes_normal_mode(self) -> None:
+        report = _valid_spam_report()
+        # evidence_source is x-recommended; omitting it is fine in normal mode
+        assert report.evidence_source is None
+        errors = schema_validator.validate(report, strict=False)
+        assert errors == []
+
+    def test_recommended_field_missing_fails_strict_mode(self) -> None:
+        report = _valid_spam_report()
+        assert report.evidence_source is None
+        errors = schema_validator.validate(report, strict=True)
+        assert len(errors) >= 1
+        assert any("evidence_source" in e.message for e in errors)
+
+    def test_strict_mode_valid_when_all_recommended_present(self) -> None:
+        report = _valid_spam_report()
+        # Core x-recommended: evidence_source, source_port (already set), evidence, confidence
+        # evidence_item x-recommended: description, hash
+        # Spam type x-recommended: evidence_source, smtp_to, subject, message_id
+        # confidence is 0.0-1.0 per schema
+        from xarf.models import XARFEvidence
+
+        report.evidence_source = "spamtrap"
+        report.evidence = [
+            XARFEvidence(
+                content_type="message/rfc822",
+                payload="dGVzdA==",
+                description="Spam email evidence",
+                hash="sha256:abc123def456abc123def456abc123def456abc123def456abc123def456abc12345",
+            )
+        ]
+        report.confidence = 1  # schema max is 1.0
+        report.smtp_to = "victim@example.com"
+        report.subject = "Buy now!"
+        report.message_id = "<abc123@example.com>"
+        errors = schema_validator.validate(report, strict=True)
+        assert errors == []
+
+
+# ---------------------------------------------------------------------------
+# TestErrorDeduplication
+# ---------------------------------------------------------------------------
+
+
+class TestErrorDeduplication:
+    def test_no_duplicate_errors(self) -> None:
+        report = _valid_spam_report()
+        report.report_id = "not-a-uuid"
+        errors = schema_validator.validate(report)
+        pairs = [(e.field, e.message) for e in errors]
+        assert len(pairs) == len(set(pairs))
+
+
+# ---------------------------------------------------------------------------
+# TestFormatValidationErrorHelper
+# ---------------------------------------------------------------------------
+
+
+class TestFormatValidationErrorHelper:
+    def _make_validator(self) -> SchemaValidator:
+        """Return a SchemaValidator (loads schemas lazily on demand)."""
+        return SchemaValidator()
+
+    def test_field_from_absolute_path(self) -> None:
+        sv = self._make_validator()
+        err = jsonschema.exceptions.ValidationError(
+            message="test error",
+            path=deque(["reporter", "contact"]),
+            instance="bad-value",
+        )
+        ve = sv._format_validation_error(err)
+        assert ve.field == "reporter.contact"
+
+    def test_empty_field_for_root_error(self) -> None:
+        sv = self._make_validator()
+        err = jsonschema.exceptions.ValidationError(
+            message="root error",
+            path=deque(),
+            instance={"key": "value"},
+        )
+        ve = sv._format_validation_error(err)
+        assert ve.field == ""
+
+    def test_message_is_raw(self) -> None:
+        sv = self._make_validator()
+        raw_message = "some raw jsonschema message"
+        err = jsonschema.exceptions.ValidationError(
+            message=raw_message,
+            path=deque(),
+            instance=None,
+        )
+        ve = sv._format_validation_error(err)
+        assert ve.message == raw_message
+
+    def test_value_is_instance(self) -> None:
+        sv = self._make_validator()
+        instance_value = {"foo": "bar"}
+        err = jsonschema.exceptions.ValidationError(
+            message="test",
+            path=deque(),
+            instance=instance_value,
+        )
+        ve = sv._format_validation_error(err)
+        assert ve.value == instance_value
+
+
+# ---------------------------------------------------------------------------
+# TestSupportedTypes
+# ---------------------------------------------------------------------------
+
+
+class TestSupportedTypes:
+    def test_returns_list_of_strings(self) -> None:
+        sv = SchemaValidator()
+        result = sv.get_supported_types()
+        assert isinstance(result, list)
+        assert all(isinstance(item, str) for item in result)
+
+    def test_contains_known_types(self) -> None:
+        sv = SchemaValidator()
+        result = sv.get_supported_types()
+        assert "messaging/spam" in result
+        assert "connection/ddos" in result
+
+    def test_format_is_category_slash_type(self) -> None:
+        sv = SchemaValidator()
+        result = sv.get_supported_types()
+        assert len(result) > 0
+        for item in result:
+            assert item.count("/") == 1
+
+
+# ---------------------------------------------------------------------------
+# TestHasTypeSchema
+# ---------------------------------------------------------------------------
+
+
+class TestHasTypeSchema:
+    def test_known_pair_returns_true(self) -> None:
+        sv = SchemaValidator()
+        assert sv.has_type_schema("messaging", "spam") is True
+
+    def test_unknown_type_returns_false(self) -> None:
+        sv = SchemaValidator()
+        assert sv.has_type_schema("messaging", "unknown_type") is False
+
+    def test_unknown_category_returns_false(self) -> None:
+        sv = SchemaValidator()
+        assert sv.has_type_schema("unknown_category", "spam") is False
+
+
diff --git a/tests/test_security.py b/tests/test_security.py
deleted file mode 100644
index 4182523..0000000
--- a/tests/test_security.py
+++ /dev/null
@@ -1,374 +0,0 @@
-"""Security-focused tests for UUID generation and timestamp formatting."""
-
-import re
-import uuid
-from datetime import datetime, timezone
-
-from xarf import XARFParser
-
-
-class TestUUIDGeneration:
-    """Test UUID format validation and generation security."""
-
-    def test_valid_uuid_v4_format(self):
-        """Test that valid UUID v4 format is accepted."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "550e8400-e29b-41d4-a716-446655440000",  # Valid UUID v4
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test Org",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-        assert report.report_id == "550e8400-e29b-41d4-a716-446655440000"
-
-    def test_uuid_uniqueness(self):
-        """Test that UUIDs are unique when generated."""
-        generated_uuids = set()
-
-        # Generate 1000 UUIDs
-        for _ in range(1000):
-            new_uuid = str(uuid.uuid4())
-            assert new_uuid not in generated_uuids, "UUID collision detected!"
-            generated_uuids.add(new_uuid)
-
-        assert len(generated_uuids) == 1000
-
-    def test_uuid_format_validation(self):
-        """Test UUID format conforms to RFC 4122."""
-        uuid_pattern = re.compile(
-            r"^[0-9a-f]{8}-[0-9a-f]{4}-[4][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$",
-            re.IGNORECASE,
-        )
-
-        # Generate and test 100 UUIDs
-        for _ in range(100):
-            test_uuid = str(uuid.uuid4())
-            assert uuid_pattern.match(test_uuid), f"Invalid UUID format: {test_uuid}"
-
-    def test_uuid_version_4_variant(self):
-        """Test that generated UUIDs are version 4 with correct variant."""
-        for _ in range(100):
-            test_uuid = uuid.uuid4()
-            # Check version (should be 4)
-            assert test_uuid.version == 4, f"Wrong UUID version: {test_uuid.version}"
-            # Check variant (should be RFC 4122)
-            assert (
-                test_uuid.variant == uuid.RFC_4122
-            ), f"Wrong UUID variant: {test_uuid.variant}"
-
-    def test_uuid_randomness(self):
-        """Test UUID randomness (simple entropy check)."""
-        # Generate 100 UUIDs and check they're all different
-        uuids = [str(uuid.uuid4()) for _ in range(100)]
-
-        # Check uniqueness
-        assert len(set(uuids)) == 100, "UUID generation not sufficiently random"
-
-        # Check no sequential patterns
-        for i in range(1, len(uuids)):
-            assert uuids[i] != uuids[i - 1], "Sequential UUIDs detected"
-
-    def test_report_id_string_format(self):
-        """Test that report_id accepts string UUIDs."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": str(uuid.uuid4()),
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-
-        # Verify it's a valid UUID format
-        assert uuid.UUID(report.report_id), "report_id is not a valid UUID"
-
-
-class TestTimestampFormatting:
-    """Test timestamp format validation and security."""
-
-    def test_iso8601_utc_format(self):
-        """Test ISO 8601 UTC timestamp format is accepted."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": str(uuid.uuid4()),
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-        assert isinstance(report.timestamp, datetime)
-
-    def test_timestamp_with_timezone(self):
-        """Test timestamp with explicit timezone offset."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": str(uuid.uuid4()),
-            "timestamp": "2024-01-15T10:30:00+00:00",
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-        assert report.timestamp.tzinfo is not None
-
-    def test_timestamp_microseconds(self):
-        """Test timestamp with microseconds precision."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": str(uuid.uuid4()),
-            "timestamp": "2024-01-15T10:30:00.123456Z",
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-        assert report.timestamp.microsecond == 123456
-
-    def test_invalid_timestamp_format(self):
-        """Test that invalid timestamp formats are rejected."""
-        invalid_timestamps = [
-            "10:30:00",  # Time only
-            "2024/01/15 10:30:00",  # Wrong separators
-            "15-01-2024T10:30:00Z",  # Wrong date order
-            "not-a-timestamp",  # Invalid string
-            "1705318200",  # Unix timestamp as string
-        ]
-
-        parser = XARFParser(strict=False)
-
-        for invalid_ts in invalid_timestamps:
-            report_data = {
-                "xarf_version": "4.0.0",
-                "report_id": str(uuid.uuid4()),
-                "timestamp": invalid_ts,
-                "reporter": {
-                    "org": "Test",
-                    "contact": "test@example.com",
-                    "type": "automated",
-                },
-                "source_identifier": "192.0.2.1",
-                "category": "messaging",
-                "type": "spam",
-                "evidence_source": "spamtrap",
-            }
-
-            result = parser.validate(report_data)
-            assert result is False, f"Invalid timestamp accepted: {invalid_ts}"
-            errors = parser.get_errors()
-            assert any(
-                "Invalid timestamp format" in error for error in errors
-            ), f"No timestamp error for: {invalid_ts}"
-
-    def test_timestamp_ordering(self):
-        """Test timestamp chronological ordering."""
-        ts1 = datetime(2024, 1, 15, 10, 0, 0, tzinfo=timezone.utc)
-        ts2 = datetime(2024, 1, 15, 10, 30, 0, tzinfo=timezone.utc)
-        ts3 = datetime(2024, 1, 15, 11, 0, 0, tzinfo=timezone.utc)
-
-        assert ts1 < ts2 < ts3, "Timestamp ordering failed"
-
-    def test_timestamp_immutability(self):
-        """Test that timestamps represent a fixed point in time."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": str(uuid.uuid4()),
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-
-        original_timestamp = report.timestamp
-        # Attempt to modify (should create new object, not modify)
-        new_timestamp = original_timestamp.replace(hour=11)
-
-        assert report.timestamp == original_timestamp
-        assert report.timestamp != new_timestamp
-
-    def test_future_timestamp_detection(self):
-        """Test detection of future timestamps."""
-        from datetime import timedelta
-
-        future_time = datetime.now(timezone.utc) + timedelta(days=1)
-        future_timestamp = future_time.isoformat()
-
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": str(uuid.uuid4()),
-            "timestamp": future_timestamp,
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-
-        # Parser accepts future timestamps (business logic can validate if needed)
-        assert report.timestamp > datetime.now(timezone.utc)
-
-    def test_timestamp_precision(self):
-        """Test timestamp maintains precision."""
-        precise_timestamp = "2024-01-15T10:30:00.123456Z"
-
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": str(uuid.uuid4()),
-            "timestamp": precise_timestamp,
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-
-        # Check microsecond precision is preserved
-        assert report.timestamp.microsecond == 123456
-
-
-class TestSecurityEdgeCases:
-    """Test security-related edge cases."""
-
-    def test_sql_injection_in_report_id(self):
-        """Test that SQL injection attempts in report_id are handled safely."""
-        malicious_ids = [
-            "'; DROP TABLE reports; --",
-            "1' OR '1'='1",
-            "admin'--",
-            "<script>alert('XSS')</script>",
-        ]
-
-        parser = XARFParser(strict=False)
-
-        for malicious_id in malicious_ids:
-            report_data = {
-                "xarf_version": "4.0.0",
-                "report_id": malicious_id,
-                "timestamp": "2024-01-15T10:30:00Z",
-                "reporter": {
-                    "org": "Test",
-                    "contact": "test@example.com",
-                    "type": "automated",
-                },
-                "source_identifier": "192.0.2.1",
-                "category": "messaging",
-                "type": "spam",
-                "evidence_source": "spamtrap",
-            }
-
-            # Parser should accept any string as report_id
-            # Application layer should validate/sanitize
-            report = parser.parse(report_data)
-            assert report.report_id == malicious_id
-
-    def test_extremely_long_uuid(self):
-        """Test handling of excessively long report_id."""
-        long_id = "x" * 10000
-
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": long_id,
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-        # Parser accepts it; application should validate length
-        assert len(report.report_id) == 10000
-
-    def test_null_byte_injection(self):
-        """Test handling of null byte injection attempts."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-id\x00malicious",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test\x00Org",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-        # Parser accepts null bytes; application should sanitize
-        assert "\x00" in report.report_id
diff --git a/tests/test_v3_compatibility.py b/tests/test_v3_compatibility.py
deleted file mode 100644
index 5906a73..0000000
--- a/tests/test_v3_compatibility.py
+++ /dev/null
@@ -1,398 +0,0 @@
-"""Tests for XARF v3 backwards compatibility."""
-
-import json
-import warnings
-
-from xarf import XARFParser, convert_v3_to_v4, is_v3_report
-from xarf.models import ConnectionReport, ContentReport, MessagingReport
-from xarf.v3_compat import XARFv3DeprecationWarning
-
-
-class TestV3Detection:
-    """Test v3 format detection."""
-
-    def test_detect_v3_report(self):
-        """Test detection of v3 format."""
-        v3_data = {
-            "Version": "3.0.0",
-            "ReporterInfo": {"ReporterOrg": "Test"},
-            "Report": {"ReportClass": "Messaging", "ReportType": "spam"},
-        }
-
-        assert is_v3_report(v3_data) is True
-
-    def test_detect_v4_report(self):
-        """Test v4 format is not detected as v3."""
-        v4_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-id",
-            "category": "messaging",
-        }
-
-        assert is_v3_report(v4_data) is False
-
-    def test_detect_invalid_format(self):
-        """Test detection with neither v3 nor v4 markers."""
-        invalid_data = {"some_field": "value"}
-
-        assert is_v3_report(invalid_data) is False
-
-
-class TestV3Conversion:
-    """Test v3 to v4 conversion."""
-
-    def test_convert_v3_spam_report(self):
-        """Test conversion of v3 spam report."""
-        v3_report = {
-            "Version": "3.0.0",
-            "ReporterInfo": {
-                "ReporterOrg": "Example Anti-Spam",
-                "ReporterOrgEmail": "abuse@example.com",
-            },
-            "Report": {
-                "ReportClass": "Messaging",
-                "ReportType": "spam",
-                "Date": "2024-01-15T14:30:25Z",
-                "Source": {"IP": "192.168.1.100", "Port": 25},
-                "Attachment": [
-                    {
-                        "ContentType": "message/rfc822",
-                        "Description": "Original spam message",
-                        "Data": "VGVzdCBkYXRh",
-                    }
-                ],
-                "AdditionalInfo": {
-                    "Protocol": "smtp",
-                    "SMTPFrom": "spammer@example.com",
-                    "Subject": "Test Spam",
-                    "DetectionMethod": "spamtrap",
-                },
-            },
-        }
-
-        # Suppress deprecation warning for this test
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", XARFv3DeprecationWarning)
-            v4_report = convert_v3_to_v4(v3_report)
-
-        # Verify base fields
-        assert v4_report["xarf_version"] == "4.0.0"
-        assert "report_id" in v4_report
-        assert v4_report["timestamp"] == "2024-01-15T14:30:25Z"
-        assert v4_report["category"] == "messaging"
-        assert v4_report["type"] == "spam"
-        assert v4_report["source_identifier"] == "192.168.1.100"
-        assert v4_report["evidence_source"] == "spamtrap"
-
-        # Verify reporter
-        assert v4_report["reporter"]["org"] == "Example Anti-Spam"
-        assert v4_report["reporter"]["contact"] == "abuse@example.com"
-        assert v4_report["reporter"]["type"] == "automated"
-
-        # Verify messaging-specific fields
-        assert v4_report["protocol"] == "smtp"
-        assert v4_report["smtp_from"] == "spammer@example.com"
-        assert v4_report["subject"] == "Test Spam"
-
-        # Verify evidence conversion
-        assert len(v4_report["evidence"]) == 1
-        assert v4_report["evidence"][0]["content_type"] == "message/rfc822"
-        assert v4_report["evidence"][0]["payload"] == "VGVzdCBkYXRh"
-
-        # Verify legacy markers
-        assert v4_report["legacy_version"] == "3"
-        assert v4_report["_internal"]["converted_from_v3"] is True
-
-    def test_convert_v3_ddos_report(self):
-        """Test conversion of v3 DDoS report."""
-        v3_report = {
-            "Version": "3.0.0",
-            "ReporterInfo": {
-                "ReporterOrg": "Security Monitor",
-                "ReporterContactEmail": "security@example.com",
-            },
-            "Report": {
-                "ReportClass": "Connection",
-                "ReportType": "ddos",
-                "Date": "2024-01-15T11:00:00Z",
-                "Source": {"IP": "203.0.113.50", "Port": 12345},
-                "DestinationIp": "198.51.100.10",
-                "DestinationPort": 80,
-                "AdditionalInfo": {
-                    "Protocol": "tcp",
-                    "AttackType": "syn_flood",
-                    "PacketCount": 1500000,
-                    "DetectionMethod": "honeypot",
-                },
-            },
-        }
-
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", XARFv3DeprecationWarning)
-            v4_report = convert_v3_to_v4(v3_report)
-
-        assert v4_report["category"] == "connection"
-        assert v4_report["type"] == "ddos"
-        assert v4_report["destination_ip"] == "198.51.100.10"
-        assert v4_report["destination_port"] == 80
-        assert v4_report["protocol"] == "tcp"
-        assert v4_report["attack_type"] == "syn_flood"
-        assert v4_report["packet_count"] == 1500000
-        assert v4_report["source_port"] == 12345
-
-    def test_convert_v3_phishing_report(self):
-        """Test conversion of v3 phishing report."""
-        v3_report = {
-            "Version": "3.0.0",
-            "ReporterInfo": {
-                "ReporterOrg": "Web Security",
-                "ReporterOrgEmail": "web@example.com",
-            },
-            "Report": {
-                "ReportClass": "Content",
-                "ReportType": "phishing",
-                "Date": "2024-01-15T12:00:00Z",
-                "Source": {"IP": "192.0.2.50"},
-                "URL": "http://phishing.example.com/fake-bank",
-                "AdditionalInfo": {
-                    "ContentType": "text/html",
-                    "DetectionMethod": "user_report",
-                },
-            },
-        }
-
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", XARFv3DeprecationWarning)
-            v4_report = convert_v3_to_v4(v3_report)
-
-        assert v4_report["category"] == "content"
-        assert v4_report["type"] == "phishing"
-        assert v4_report["url"] == "http://phishing.example.com/fake-bank"
-        assert v4_report["content_type"] == "text/html"
-        assert v4_report["evidence_source"] == "user_report"
-
-    def test_deprecation_warning_emitted(self):
-        """Test that deprecation warning is emitted on v3 conversion."""
-        v3_report = {
-            "Version": "3.0.0",
-            "ReporterInfo": {
-                "ReporterOrg": "Test",
-                "ReporterOrgEmail": "test@example.com",
-            },
-            "Report": {
-                "ReportClass": "Messaging",
-                "ReportType": "spam",
-                "Date": "2024-01-15T10:00:00Z",
-                "Source": {"IP": "192.0.2.1"},
-                "AdditionalInfo": {},
-            },
-        }
-
-        with warnings.catch_warnings(record=True) as w:
-            warnings.simplefilter("always")
-            convert_v3_to_v4(v3_report)
-
-            assert len(w) == 1
-            assert issubclass(w[0].category, XARFv3DeprecationWarning)
-            assert "v3 format is deprecated" in str(w[0].message).lower()
-
-
-class TestV3ParserIntegration:
-    """Test v3 compatibility in XARFParser."""
-
-    def test_parser_auto_converts_v3_spam(self):
-        """Test parser automatically converts v3 reports."""
-        v3_json = json.dumps(
-            {
-                "Version": "3.0.0",
-                "ReporterInfo": {
-                    "ReporterOrg": "Spam Filter",
-                    "ReporterOrgEmail": "abuse@filter.example",
-                },
-                "Report": {
-                    "ReportClass": "Messaging",
-                    "ReportType": "spam",
-                    "Date": "2024-01-15T10:30:00Z",
-                    "Source": {"IP": "192.0.2.100"},
-                    "AdditionalInfo": {
-                        "Protocol": "smtp",
-                        "SMTPFrom": "spam@bad.example",
-                        "Subject": "Spam Message",
-                        "DetectionMethod": "spamtrap",
-                    },
-                },
-            }
-        )
-
-        parser = XARFParser()
-
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", XARFv3DeprecationWarning)
-            report = parser.parse(v3_json)
-
-        assert isinstance(report, MessagingReport)
-        assert report.category == "messaging"
-        assert report.type == "spam"
-        assert report.smtp_from == "spam@bad.example"
-        assert report.subject == "Spam Message"
-
-    def test_parser_auto_converts_v3_ddos(self):
-        """Test parser converts v3 DDoS reports."""
-        v3_data = {
-            "Version": "3.0.0",
-            "ReporterInfo": {
-                "ReporterOrg": "Network Monitor",
-                "ReporterOrgEmail": "noc@example.com",
-            },
-            "Report": {
-                "ReportClass": "Connection",
-                "ReportType": "ddos",
-                "Date": "2024-01-15T11:00:00Z",
-                "Source": {"IP": "203.0.113.50"},
-                "DestinationIp": "198.51.100.10",
-                "AdditionalInfo": {"Protocol": "tcp", "DetectionMethod": "automated"},
-            },
-        }
-
-        parser = XARFParser()
-
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", XARFv3DeprecationWarning)
-            report = parser.parse(v3_data)
-
-        assert isinstance(report, ConnectionReport)
-        assert report.category == "connection"
-        assert report.type == "ddos"
-        assert report.destination_ip == "198.51.100.10"
-        assert report.protocol == "tcp"
-
-    def test_parser_auto_converts_v3_phishing(self):
-        """Test parser converts v3 phishing reports."""
-        v3_data = {
-            "Version": "3.0.0",
-            "ReporterInfo": {
-                "ReporterOrg": "Phishing Watch",
-                "ReporterOrgEmail": "phishing@watch.example",
-            },
-            "Report": {
-                "ReportClass": "Content",
-                "ReportType": "phishing",
-                "Date": "2024-01-15T12:00:00Z",
-                "Source": {"IP": "192.0.2.200"},
-                "URL": "http://fake-bank.example.com",
-                "AdditionalInfo": {"DetectionMethod": "user_report"},
-            },
-        }
-
-        parser = XARFParser()
-
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", XARFv3DeprecationWarning)
-            report = parser.parse(v3_data)
-
-        assert isinstance(report, ContentReport)
-        assert report.category == "content"
-        assert report.type == "phishing"
-        assert report.url == "http://fake-bank.example.com"
-
-    def test_parser_validates_converted_v3_report(self):
-        """Test parser validates converted v3 reports."""
-        v3_data = {
-            "Version": "3.0.0",
-            "ReporterInfo": {
-                "ReporterOrg": "Test Org",
-                "ReporterOrgEmail": "test@example.com",
-            },
-            "Report": {
-                "ReportClass": "Messaging",
-                "ReportType": "spam",
-                "Date": "2024-01-15T10:00:00Z",
-                "Source": {"IP": "192.0.2.1"},
-                "AdditionalInfo": {
-                    "Protocol": "smtp",
-                    "SMTPFrom": "spam@example.com",
-                    "Subject": "Test",
-                    "DetectionMethod": "spamtrap",
-                },
-            },
-        }
-
-        parser = XARFParser()
-
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", XARFv3DeprecationWarning)
-            # Should parse without validation errors
-            parser.parse(v3_data)
-            assert parser.get_errors() == []
-
-
-class TestV3EdgeCases:
-    """Test edge cases in v3 conversion."""
-
-    def test_missing_optional_fields(self):
-        """Test conversion with missing optional fields."""
-        minimal_v3 = {
-            "Version": "3.0.0",
-            "ReporterInfo": {},
-            "Report": {
-                "ReportClass": "Messaging",
-                "ReportType": "spam",
-                "Source": {},
-            },
-        }
-
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", XARFv3DeprecationWarning)
-            v4_report = convert_v3_to_v4(minimal_v3)
-
-        # Should have defaults
-        assert v4_report["reporter"]["org"] == "Unknown"
-        assert "example.com" in v4_report["reporter"]["contact"]
-        assert v4_report["source_identifier"] == "0.0.0.0"
-
-    def test_activity_class_mapped_to_messaging(self):
-        """Test v3 'Activity' class maps to 'messaging'."""
-        v3_report = {
-            "Version": "3.0.0",
-            "ReporterInfo": {
-                "ReporterOrg": "Test",
-                "ReporterOrgEmail": "test@example.com",
-            },
-            "Report": {
-                "ReportClass": "Activity",  # Old v3 class name
-                "ReportType": "spam",
-                "Date": "2024-01-15T10:00:00Z",
-                "Source": {"IP": "192.0.2.1"},
-                "AdditionalInfo": {},
-            },
-        }
-
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", XARFv3DeprecationWarning)
-            v4_report = convert_v3_to_v4(v3_report)
-
-        assert v4_report["category"] == "messaging"
-
-    def test_legacy_tags_added(self):
-        """Test legacy information is preserved in tags."""
-        v3_report = {
-            "Version": "3.0.0",
-            "ReporterInfo": {
-                "ReporterOrg": "Test",
-                "ReporterOrgEmail": "test@example.com",
-            },
-            "Report": {
-                "ReportClass": "Messaging",
-                "ReportType": "spam",
-                "Date": "2024-01-15T10:00:00Z",
-                "Source": {"IP": "192.0.2.1"},
-                "AdditionalInfo": {},
-            },
-        }
-
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", XARFv3DeprecationWarning)
-            v4_report = convert_v3_to_v4(v3_report)
-
-        assert "legacy:category:Messaging" in v4_report["tags"]
-        assert "legacy:type:spam" in v4_report["tags"]
diff --git a/tests/test_validation.py b/tests/test_validation.py
deleted file mode 100644
index 79b49e7..0000000
--- a/tests/test_validation.py
+++ /dev/null
@@ -1,435 +0,0 @@
-"""Comprehensive validation tests for all XARF categories."""
-
-from xarf import XARFParser
-
-
-class TestCategoryValidation:
-    """Test validation for all 8 XARF categories."""
-
-    def test_messaging_category_valid(self):
-        """Test valid messaging category report."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-messaging-001",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Email Provider",
-                "contact": "abuse@emailprovider.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-        assert report.category == "messaging"
-        assert report.type == "spam"
-
-    def test_connection_category_valid(self):
-        """Test valid connection category report."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-connection-001",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Network Monitor",
-                "contact": "security@network.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.2",
-            "category": "connection",
-            "type": "ddos",
-            "evidence_source": "honeypot",
-            "destination_ip": "203.0.113.1",
-            "protocol": "tcp",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-        assert report.category == "connection"
-        assert report.type == "ddos"
-
-    def test_content_category_valid(self):
-        """Test valid content category report."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-content-001",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Web Security",
-                "contact": "security@websec.com",
-                "type": "manual",
-            },
-            "source_identifier": "192.0.2.3",
-            "category": "content",
-            "type": "phishing_site",
-            "evidence_source": "user_report",
-            "url": "http://phishing.example.com",
-        }
-
-        parser = XARFParser()
-        report = parser.parse(report_data)
-        assert report.category == "content"
-        assert report.type == "phishing_site"
-
-    def test_infrastructure_category_valid(self):
-        """Test valid infrastructure category report."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-infrastructure-001",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Security Research",
-                "contact": "research@security.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.4",
-            "category": "infrastructure",
-            "type": "open_resolver",
-            "evidence_source": "automated_scan",
-        }
-
-        parser = XARFParser(strict=False)
-        report = parser.parse(report_data)
-        assert report.category == "infrastructure"
-        errors = parser.get_errors()
-        # Infrastructure not in alpha, should have warning
-        assert any("Unsupported category" in error for error in errors)
-
-    def test_copyright_category_valid(self):
-        """Test valid copyright category report."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-copyright-001",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Copyright Holder",
-                "contact": "legal@copyright.com",
-                "type": "manual",
-            },
-            "source_identifier": "192.0.2.5",
-            "category": "copyright",
-            "type": "file_sharing",
-            "evidence_source": "manual_analysis",
-        }
-
-        parser = XARFParser(strict=False)
-        report = parser.parse(report_data)
-        assert report.category == "copyright"
-
-    def test_vulnerability_category_valid(self):
-        """Test valid vulnerability category report."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-vulnerability-001",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Vulnerability Scanner",
-                "contact": "vuln@scanner.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.6",
-            "category": "vulnerability",
-            "type": "cve",
-            "evidence_source": "vulnerability_scan",
-        }
-
-        parser = XARFParser(strict=False)
-        report = parser.parse(report_data)
-        assert report.category == "vulnerability"
-
-    def test_reputation_category_valid(self):
-        """Test valid reputation category report."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-reputation-001",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Reputation Service",
-                "contact": "rep@service.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.7",
-            "category": "reputation",
-            "type": "blacklist",
-            "evidence_source": "threat_intelligence",
-        }
-
-        parser = XARFParser(strict=False)
-        report = parser.parse(report_data)
-        assert report.category == "reputation"
-
-    def test_other_category_valid(self):
-        """Test valid other category report."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-other-001",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Other Reporter",
-                "contact": "other@reporter.com",
-                "type": "manual",
-            },
-            "source_identifier": "192.0.2.8",
-            "category": "other",
-            "type": "custom_type",
-            "evidence_source": "manual_analysis",
-        }
-
-        parser = XARFParser(strict=False)
-        report = parser.parse(report_data)
-        assert report.category == "other"
-
-
-class TestMandatoryFields:
-    """Test validation of all mandatory fields."""
-
-    def get_valid_base_report(self):
-        """Get a valid base report for testing."""
-        return {
-            "xarf_version": "4.0.0",
-            "report_id": "test-id-001",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test Organization",
-                "contact": "abuse@test.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-        }
-
-    def test_missing_xarf_version(self):
-        """Test validation fails without xarf_version."""
-        report_data = self.get_valid_base_report()
-        del report_data["xarf_version"]
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-        errors = parser.get_errors()
-        assert any("Missing required fields" in error for error in errors)
-
-    def test_missing_report_id(self):
-        """Test validation fails without report_id."""
-        report_data = self.get_valid_base_report()
-        del report_data["report_id"]
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-
-    def test_missing_timestamp(self):
-        """Test validation fails without timestamp."""
-        report_data = self.get_valid_base_report()
-        del report_data["timestamp"]
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-
-    def test_missing_reporter(self):
-        """Test validation fails without reporter."""
-        report_data = self.get_valid_base_report()
-        del report_data["reporter"]
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-
-    def test_missing_source_identifier(self):
-        """Test validation fails without source_identifier."""
-        report_data = self.get_valid_base_report()
-        del report_data["source_identifier"]
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-
-    def test_missing_category(self):
-        """Test validation fails without category."""
-        report_data = self.get_valid_base_report()
-        del report_data["category"]
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-
-    def test_missing_type(self):
-        """Test validation fails without type."""
-        report_data = self.get_valid_base_report()
-        del report_data["type"]
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-
-    def test_missing_evidence_source(self):
-        """Test validation fails without evidence_source."""
-        report_data = self.get_valid_base_report()
-        del report_data["evidence_source"]
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-
-    def test_invalid_xarf_version(self):
-        """Test validation fails with wrong xarf_version."""
-        report_data = self.get_valid_base_report()
-        report_data["xarf_version"] = "3.0.0"
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-        errors = parser.get_errors()
-        assert any("Unsupported XARF version" in error for error in errors)
-
-    def test_invalid_timestamp_format(self):
-        """Test validation fails with invalid timestamp."""
-        report_data = self.get_valid_base_report()
-        report_data["timestamp"] = "not-a-timestamp"
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-        errors = parser.get_errors()
-        assert any("Invalid timestamp format" in error for error in errors)
-
-    def test_missing_reporter_org(self):
-        """Test validation fails without reporter.org."""
-        report_data = self.get_valid_base_report()
-        del report_data["reporter"]["org"]
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-        errors = parser.get_errors()
-        assert any("Missing reporter fields" in error for error in errors)
-
-    def test_missing_reporter_contact(self):
-        """Test validation fails without reporter.contact."""
-        report_data = self.get_valid_base_report()
-        del report_data["reporter"]["contact"]
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-
-    def test_missing_reporter_type(self):
-        """Test validation fails without reporter.type."""
-        report_data = self.get_valid_base_report()
-        del report_data["reporter"]["type"]
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-
-    def test_invalid_reporter_type(self):
-        """Test validation fails with invalid reporter.type."""
-        report_data = self.get_valid_base_report()
-        report_data["reporter"]["type"] = "invalid"
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-        errors = parser.get_errors()
-        assert any("Invalid reporter type" in error for error in errors)
-
-
-class TestCategorySpecificFields:
-    """Test category-specific required fields."""
-
-    def test_messaging_missing_protocol(self):
-        """Test messaging report validation without required fields."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-id",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "messaging",
-            "type": "spam",
-            "evidence_source": "spamtrap",
-            "protocol": "smtp",
-            # Missing smtp_from and subject for spam
-        }
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-        errors = parser.get_errors()
-        assert any("smtp_from required" in error for error in errors)
-
-    def test_connection_missing_destination_ip(self):
-        """Test connection report requires destination_ip."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-id",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "automated",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "connection",
-            "type": "ddos",
-            "evidence_source": "honeypot",
-            # Missing destination_ip and protocol
-        }
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-        errors = parser.get_errors()
-        assert any("destination_ip required" in error for error in errors)
-
-    def test_content_missing_url(self):
-        """Test content report requires url."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            "report_id": "test-id",
-            "timestamp": "2024-01-15T10:30:00Z",
-            "reporter": {
-                "org": "Test",
-                "contact": "test@example.com",
-                "type": "manual",
-            },
-            "source_identifier": "192.0.2.1",
-            "category": "content",
-            "type": "phishing_site",
-            "evidence_source": "user_report",
-            # Missing url
-        }
-
-        parser = XARFParser(strict=False)
-        result = parser.validate(report_data)
-
-        assert result is False
-        errors = parser.get_errors()
-        assert any("url required" in error for error in errors)
diff --git a/xarf/__init__.py b/xarf/__init__.py
index b03d9ce..6352c7a 100644
--- a/xarf/__init__.py
+++ b/xarf/__init__.py
@@ -34,6 +34,7 @@
     reset_registry,
     schema_registry,
 )
+from xarf.schema_validator import SchemaValidator, schema_validator
 from xarf.types_connection import (
     ConnectionBaseReport,
     ConnectionReport,
@@ -149,6 +150,9 @@
     "FieldMetadata",
     "get_registry",
     "reset_registry",
+    # Schema validator
+    "SchemaValidator",
+    "schema_validator",
     # v3 compatibility
     "is_v3_report",
     "convert_v3_to_v4",
diff --git a/xarf/schema_validator.py b/xarf/schema_validator.py
new file mode 100644
index 0000000..948a8e2
--- /dev/null
+++ b/xarf/schema_validator.py
@@ -0,0 +1,406 @@
+"""Schema Validator — JSON Schema-based validation for XARF v4 reports.
+
+Validates :class:`~xarf.models.XARFReport` instances against the official
+XARF JSON Schema (Draft 2020-12) using the ``jsonschema`` library.  Supports
+both normal and strict modes; in strict mode, fields marked
+``x-recommended: true`` in the schema are promoted to required.
+
+Example:
+    >>> from xarf import schema_validator, SpamReport, ContactInfo
+    >>> report = SpamReport(
+    ...     xarf_version="4.2.0",
+    ...     report_id="02eb480f-8172-431a-9276-c28ba90f694a",
+    ...     timestamp="2025-01-11T10:59:45Z",
+    ...     reporter=ContactInfo(org="Org", contact="a@b.com", domain="b.com"),
+    ...     sender=ContactInfo(org="Org", contact="a@b.com", domain="b.com"),
+    ...     source_identifier="192.168.1.1",
+    ...     category="messaging",
+    ...     type="spam",
+    ...     protocol="smtp",
+    ... )
+    >>> errors = schema_validator.validate(report)
+    >>> errors
+    []
+"""
+
+from __future__ import annotations
+
+import copy
+import json
+from importlib import resources
+from pathlib import Path
+from typing import Any
+
+import jsonschema
+import jsonschema.exceptions
+import referencing
+import referencing.jsonschema
+
+from xarf.exceptions import XARFSchemaError
+from xarf.models import ValidationError, XARFReport
+from xarf.schema_registry import schema_registry as _schema_registry
+
+# ---------------------------------------------------------------------------
+# Internal type alias
+# ---------------------------------------------------------------------------
+
+_SchemaDict = dict[str, Any]
+
+# ---------------------------------------------------------------------------
+# SchemaValidator
+# ---------------------------------------------------------------------------
+
+
+class SchemaValidator:
+    """JSON Schema-based validator for XARF v4 reports.
+
+    Validates :class:`~xarf.models.XARFReport` instances against the official
+    XARF JSON Schema using ``jsonschema`` (Draft 2020-12).  Supports both
+    normal and strict modes.
+
+    Schema loading is **lazy** — schemas are loaded on the first call to
+    :meth:`validate`.  Construction is cheap and always succeeds.
+    """
+
+    def __init__(self) -> None:
+        """Initialise state variables without loading any schemas."""
+        self._schemas_loaded: bool = False
+        self._schemas_dir: Path | None = None
+        self._normal_validator: jsonschema.Draft202012Validator | None = None
+        self._strict_validator: jsonschema.Draft202012Validator | None = None
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def validate(
+        self, report: XARFReport, strict: bool = False
+    ) -> list[ValidationError]:
+        """Validate *report* against the XARF JSON Schema.
+
+        Args:
+            report: A :class:`~xarf.models.XARFReport` (or subclass) instance.
+            strict: When ``True``, fields marked ``x-recommended: true`` in
+                the schema are treated as required.  Defaults to ``False``.
+
+        Returns:
+            A list of :class:`~xarf.models.ValidationError` instances.
+            An empty list means the report is valid.
+
+        Raises:
+            XARFSchemaError: If the bundled schemas cannot be loaded.
+        """
+        self._ensure_schemas_loaded()
+
+        data = report.model_dump(by_alias=True, exclude_none=True)
+
+        validator = self._strict_validator if strict else self._normal_validator
+        if validator is None:  # pragma: no cover
+            raise XARFSchemaError("Validator not initialised after schema loading.")
+
+        raw_errors = list(validator.iter_errors(data))
+
+        result: list[ValidationError] = []
+        seen: set[tuple[str, str]] = set()
+        for err in raw_errors:
+            ve = self._format_validation_error(err)
+            key = (ve.field, ve.message)
+            if key not in seen:
+                seen.add(key)
+                result.append(ve)
+
+        return result
+
+    def get_supported_types(self) -> list[str]:
+        """Return all supported ``"category/type"`` strings.
+
+        Uses the :data:`~xarf.schema_registry.schema_registry` singleton to
+        enumerate all known category/type pairs.
+
+        Returns:
+            A list of strings in ``"category/type"`` format.
+        """
+        result: list[str] = []
+        for category, types in _schema_registry.get_all_types().items():
+            for type_ in sorted(types):
+                result.append(f"{category}/{type_}")
+        return result
+
+    def has_type_schema(self, category: str, type_: str) -> bool:
+        """Return whether a schema exists for the given *category*/*type_* pair.
+
+        Args:
+            category: XARF category name (e.g. ``"messaging"``).
+            type_: XARF type name within the category (e.g. ``"spam"``).
+
+        Returns:
+            ``True`` if the combination is known; ``False`` otherwise.
+        """
+        return _schema_registry.is_valid_type(category, type_)
+
+    # ------------------------------------------------------------------
+    # Lazy loading
+    # ------------------------------------------------------------------
+
+    def _ensure_schemas_loaded(self) -> None:
+        """Load all schemas on first call; do nothing on subsequent calls.
+
+        Raises:
+            XARFSchemaError: If schemas cannot be located or parsed.
+        """
+        if self._schemas_loaded:
+            return
+        self._schemas_dir = self._find_schemas_dir()
+        all_schemas = self._load_all_schemas()
+        master_schema = self._find_master_schema(all_schemas)
+
+        normal_registry = self._build_registry(all_schemas, strict=False)
+        strict_registry = self._build_registry(all_schemas, strict=True)
+
+        strict_master = self._transform_for_strict(master_schema)
+
+        self._normal_validator = jsonschema.Draft202012Validator(
+            master_schema,
+            registry=normal_registry,
+            format_checker=jsonschema.FormatChecker(),
+        )
+        self._strict_validator = jsonschema.Draft202012Validator(
+            strict_master,
+            registry=strict_registry,
+            format_checker=jsonschema.FormatChecker(),
+        )
+        self._schemas_loaded = True
+
+    def _find_schemas_dir(self) -> Path:
+        """Locate the bundled ``schemas/`` directory inside the package.
+
+        Returns:
+            Absolute path to the schemas directory.
+
+        Raises:
+            XARFSchemaError: If the directory cannot be found.
+        """
+        try:
+            pkg = resources.files("xarf")
+            schemas_path = Path(str(pkg)) / "schemas"
+            if not schemas_path.is_dir():
+                raise XARFSchemaError(
+                    f"Bundled schemas directory not found at {schemas_path}. "
+                    "Run 'python scripts/fetch_schemas.py' to download schemas."
+                )
+            return schemas_path
+        except (TypeError, FileNotFoundError) as exc:
+            raise XARFSchemaError(
+                "Could not locate the xarf package directory while searching "
+                "for bundled schemas."
+            ) from exc
+
+    def _load_all_schemas(self) -> list[_SchemaDict]:
+        """Load core, master, and all type schemas from the bundled directory.
+
+        Returns:
+            List of parsed schema dicts.
+
+        Raises:
+            XARFSchemaError: If any schema file cannot be read or parsed.
+        """
+        if self._schemas_dir is None:  # pragma: no cover
+            raise XARFSchemaError("Schemas directory not set.")
+        schemas_dir = self._schemas_dir
+        schemas: list[_SchemaDict] = []
+
+        for name in ("xarf-core.json", "xarf-v4-master.json"):
+            path = schemas_dir / name
+            schema = self._load_json_file(path)
+            if schema is None:
+                raise XARFSchemaError(
+                    f"Failed to load schema '{name}' from {path}. "
+                    "The bundled schemas may be missing or corrupted."
+                )
+            schemas.append(schema)
+
+        types_dir = schemas_dir / "types"
+        if types_dir.is_dir():
+            for json_file in sorted(types_dir.glob("*.json")):
+                schema = self._load_json_file(json_file)
+                if schema is None:
+                    raise XARFSchemaError(
+                        f"Failed to load type schema from {json_file}. "
+                        "The bundled schemas may be missing or corrupted."
+                    )
+                schemas.append(schema)
+
+        return schemas
+
+    def _load_json_file(self, path: Path) -> _SchemaDict | None:
+        """Load and parse a single JSON file.
+
+        Args:
+            path: Absolute path to the JSON file.
+
+        Returns:
+            Parsed dict, or ``None`` if the file cannot be read or parsed.
+        """
+        try:
+            with path.open(encoding="utf-8") as fh:
+                return json.load(fh)  # type: ignore[no-any-return]
+        except (OSError, json.JSONDecodeError):
+            return None
+
+    def _find_master_schema(self, schemas: list[_SchemaDict]) -> _SchemaDict:
+        """Find the master schema (``xarf-v4-master.json``) among *schemas*.
+
+        Args:
+            schemas: List of loaded schema dicts.
+
+        Returns:
+            The master schema dict.
+
+        Raises:
+            XARFSchemaError: If the master schema is not found.
+        """
+        master_id = "https://xarf.org/schemas/v4/xarf-v4-master.json"
+        for schema in schemas:
+            if schema.get("$id") == master_id:
+                return schema
+        raise XARFSchemaError(
+            f"Master schema with $id '{master_id}' not found among loaded schemas."
+        )
+
+    # ------------------------------------------------------------------
+    # Registry building
+    # ------------------------------------------------------------------
+
+    def _build_registry(
+        self, schemas: list[_SchemaDict], strict: bool
+    ) -> referencing.Registry[Any]:
+        """Build a :class:`referencing.Registry` for ``$ref`` resolution.
+
+        Args:
+            schemas: All loaded schema dicts.
+            strict: When ``True``, each schema is transformed via
+                :meth:`_transform_for_strict` before registration.
+
+        Returns:
+            A populated :class:`referencing.Registry`.
+        """
+        resource_pairs: list[tuple[str, referencing.Resource[Any]]] = []
+        for raw_schema in schemas:
+            schema = self._transform_for_strict(raw_schema) if strict else raw_schema
+            schema_id = schema.get("$id")
+            if schema_id:
+                resource = referencing.jsonschema.DRAFT202012.create_resource(schema)
+                resource_pairs.append((schema_id, resource))
+
+        registry: referencing.Registry[Any] = referencing.Registry()
+        registry = registry.with_resources(resource_pairs)
+        return registry
+
+    # ------------------------------------------------------------------
+    # Strict mode transformation
+    # ------------------------------------------------------------------
+
+    def _transform_for_strict(self, schema: _SchemaDict) -> _SchemaDict:
+        """Return a deep copy of *schema* with recommended fields promoted.
+
+        Calls :meth:`_promote_recommended_to_required` on the clone.
+
+        Args:
+            schema: Original schema dict (not mutated).
+
+        Returns:
+            A new schema dict where ``x-recommended: true`` properties have
+            been added to their parent ``required`` arrays.
+        """
+        clone: _SchemaDict = copy.deepcopy(schema)
+        self._promote_recommended_to_required(clone)
+        return clone
+
+    def _promote_recommended_to_required(self, node: Any) -> None:
+        """Recursively promote ``x-recommended`` properties to ``required``.
+
+        Walks all relevant schema nodes and, for any ``properties`` dict
+        where a property has ``x-recommended: true``, ensures that property
+        name appears in the parent node's ``required`` array.
+
+        Recurses into: ``properties``, ``$defs``, ``allOf``, ``anyOf``,
+        ``oneOf``, ``items``, ``if``, ``then``, ``else``, ``not``,
+        ``additionalProperties``.
+
+        Args:
+            node: A schema node (dict) to process in place.  Non-dict values
+                are ignored.
+        """
+        if not isinstance(node, dict):
+            return
+
+        # Promote x-recommended properties to required on this node
+        props = node.get("properties")
+        if isinstance(props, dict):
+            recommended = [
+                k
+                for k, v in props.items()
+                if isinstance(v, dict) and v.get("x-recommended") is True
+            ]
+            if recommended:
+                existing: list[str] = list(node.get("required", []))
+                for field in recommended:
+                    if field not in existing:
+                        existing.append(field)
+                node["required"] = existing
+
+        # Recurse into dict-valued keywords
+        for key in ("properties", "$defs"):
+            sub = node.get(key)
+            if isinstance(sub, dict):
+                for value in sub.values():
+                    self._promote_recommended_to_required(value)
+
+        # Recurse into list-valued keywords
+        for key in ("allOf", "anyOf", "oneOf"):
+            sub = node.get(key)
+            if isinstance(sub, list):
+                for item in sub:
+                    self._promote_recommended_to_required(item)
+
+        # Recurse into single-schema keywords
+        for key in ("items", "if", "then", "else", "not", "additionalProperties"):
+            sub = node.get(key)
+            if isinstance(sub, dict):
+                self._promote_recommended_to_required(sub)
+
+    # ------------------------------------------------------------------
+    # Error formatting
+    # ------------------------------------------------------------------
+
+    def _format_validation_error(
+        self,
+        err: jsonschema.exceptions.ValidationError,
+    ) -> ValidationError:
+        """Map a ``jsonschema`` error to a :class:`~xarf.models.ValidationError`.
+
+        Args:
+            err: Raw :class:`jsonschema.exceptions.ValidationError` instance.
+
+        Returns:
+            A :class:`~xarf.models.ValidationError` with:
+
+            - ``field``: dot-joined absolute path, or ``""`` for root errors.
+            - ``message``: the raw ``err.message`` string.
+            - ``value``: the offending ``err.instance`` value.
+        """
+        path_parts = list(err.absolute_path)
+        field = ".".join(str(p) for p in path_parts)
+        return ValidationError(
+            field=field,
+            message=err.message,
+            value=err.instance,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Module-level singleton
+# ---------------------------------------------------------------------------
+
+#: Module-level singleton — lazily loads schemas on first :meth:`validate` call.
+schema_validator: SchemaValidator = SchemaValidator()

From 1316b605cddf2c05a24762ecbf700b4472898c05 Mon Sep 17 00:00:00 2001
From: Victor Lopez <victor@thevictorlopez.com>
Date: Tue, 31 Mar 2026 10:28:49 +0200
Subject: [PATCH 07/13] Add validator class

---
 tests/test_models.py           |   7 +-
 tests/test_schema_validator.py |  60 ++++-
 xarf/__init__.py               |  11 +-
 xarf/models.py                 |  12 +-
 xarf/parser.py                 | 395 ++++++++++++---------------------
 xarf/schema_validator.py       |  19 +-
 xarf/validator.py              | 347 +++++++++++++++++++++++++++++
 7 files changed, 582 insertions(+), 269 deletions(-)
 create mode 100644 xarf/validator.py

diff --git a/tests/test_models.py b/tests/test_models.py
index 15373e1..73d8b4f 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -146,15 +146,16 @@ def test_without_report(self) -> None:
         assert len(result.errors) == 1
 
     def test_with_info(self) -> None:
-        """ParseResult accepts optional info dict."""
+        """ParseResult accepts optional info list of field-message dicts."""
         result = ParseResult(
             report=None,
             errors=[],
             warnings=[],
-            info={"missing_optional": ["evidence_source"]},
+            info=[{"field": "evidence_source", "message": "RECOMMENDED: ..."}],
         )
         assert result.info is not None
-        assert "missing_optional" in result.info
+        assert isinstance(result.info, list)
+        assert result.info[0]["field"] == "evidence_source"
 
 
 class TestCreateReportResult:
diff --git a/tests/test_schema_validator.py b/tests/test_schema_validator.py
index bafe41f..5de1d49 100644
--- a/tests/test_schema_validator.py
+++ b/tests/test_schema_validator.py
@@ -3,13 +3,11 @@
 from collections import deque
 
 import jsonschema.exceptions
-import pytest
 
 from xarf import ContactInfo, SpamReport
-from xarf.models import ValidationError
+from xarf.models import ValidationError, XARFEvidence
 from xarf.schema_validator import SchemaValidator, schema_validator
 
-
 # ---------------------------------------------------------------------------
 # Helper fixture
 # ---------------------------------------------------------------------------
@@ -28,7 +26,9 @@ def _valid_spam_report() -> SpamReport:
         xarf_version="4.2.0",
         report_id="02eb480f-8172-431a-9276-c28ba90f694a",
         timestamp="2025-01-11T10:59:45Z",
-        reporter=ContactInfo(org="Test Org", contact="test@test.com", domain="test.com"),
+        reporter=ContactInfo(
+            org="Test Org", contact="test@test.com", domain="test.com"
+        ),
         sender=ContactInfo(org="Test Org", contact="test@test.com", domain="test.com"),
         source_identifier="192.168.1.1",
         category="messaging",
@@ -106,8 +106,6 @@ def test_strict_mode_valid_when_all_recommended_present(self) -> None:
         # evidence_item x-recommended: description, hash
         # Spam type x-recommended: evidence_source, smtp_to, subject, message_id
         # confidence is 0.0-1.0 per schema
-        from xarf.models import XARFEvidence
-
         report.evidence_source = "spamtrap"
         report.evidence = [
             XARFEvidence(
@@ -237,3 +235,53 @@ def test_unknown_category_returns_false(self) -> None:
         assert sv.has_type_schema("unknown_category", "spam") is False
 
 
+# ---------------------------------------------------------------------------
+# TestDictInput — validate() accepts raw dicts
+# ---------------------------------------------------------------------------
+
+
+def _valid_spam_dict() -> dict[str, object]:
+    """Return the same report as _valid_spam_report() but as a plain dict."""
+    _contact = {"org": "Test Org", "contact": "test@test.com", "domain": "test.com"}
+    return {
+        "xarf_version": "4.2.0",
+        "report_id": "02eb480f-8172-431a-9276-c28ba90f694a",
+        "timestamp": "2025-01-11T10:59:45Z",
+        "reporter": _contact,
+        "sender": _contact,
+        "source_identifier": "192.168.1.1",
+        "category": "messaging",
+        "type": "spam",
+        "protocol": "smtp",
+        "smtp_from": "spammer@example.com",
+        "source_port": 25,
+    }
+
+
+class TestDictInput:
+    def test_valid_dict_produces_no_errors(self) -> None:
+        """validate() accepts a raw dict and returns no errors for a valid report."""
+        errors = schema_validator.validate(_valid_spam_dict())
+        assert errors == []
+
+    def test_invalid_dict_produces_errors(self) -> None:
+        """validate() accepts a raw dict and returns errors for an invalid report."""
+        data = _valid_spam_dict()
+        data["report_id"] = "not-a-uuid"  # type: ignore[index]
+        errors = schema_validator.validate(data)
+        assert len(errors) >= 1
+        assert any("report_id" in e.field for e in errors)
+
+    def test_dict_and_model_produce_same_errors(self) -> None:
+        """validate() returns identical errors for a dict and equivalent model."""
+        data = _valid_spam_dict()
+        data["report_id"] = "not-a-uuid"  # type: ignore[index]
+
+        report = _valid_spam_report()
+        report.report_id = "not-a-uuid"
+
+        dict_errors = schema_validator.validate(data)
+        model_errors = schema_validator.validate(report)
+        assert [(e.field, e.message) for e in dict_errors] == [
+            (e.field, e.message) for e in model_errors
+        ]
diff --git a/xarf/__init__.py b/xarf/__init__.py
index 6352c7a..2311bf6 100644
--- a/xarf/__init__.py
+++ b/xarf/__init__.py
@@ -27,6 +27,7 @@
     XARFEvidence,
     XARFReport,
 )
+from xarf.parser import parse
 from xarf.schema_registry import (
     FieldMetadata,
     SchemaRegistry,
@@ -117,7 +118,11 @@
     VulnerabilityBaseReport,
     VulnerabilityReport,
 )
-from xarf.v3_compat import convert_v3_to_v4, is_v3_report
+from xarf.v3_compat import (
+    convert_v3_to_v4,
+    is_v3_report,
+)
+from xarf.validator import ValidationResult
 
 __version__ = "0.1.0.dev0"
 __author__ = "XARF Project"
@@ -129,6 +134,8 @@
 __all__ = [
     # Version
     "SPEC_VERSION",
+    # Public API functions
+    "parse",
     # Result types
     "AnyXARFReport",
     "ParseResult",
@@ -153,6 +160,8 @@
     # Schema validator
     "SchemaValidator",
     "schema_validator",
+    # Validator
+    "ValidationResult",
     # v3 compatibility
     "is_v3_report",
     "convert_v3_to_v4",
diff --git a/xarf/models.py b/xarf/models.py
index d356436..f07afe5 100644
--- a/xarf/models.py
+++ b/xarf/models.py
@@ -53,13 +53,15 @@ class ParseResult:
         report: The parsed report, or ``None`` if parsing failed entirely.
         errors: List of validation errors encountered.
         warnings: List of non-fatal warnings.
-        info: Optional metadata dict (populated when ``show_missing_optional=True``).
+        info: Optional list of missing-field metadata dicts, each with
+            ``"field"`` and ``"message"`` keys.  Populated when
+            ``show_missing_optional=True``.
     """
 
     report: AnyXARFReport | None
     errors: list[ValidationError]
     warnings: list[ValidationWarning]
-    info: dict[str, object] | None = None
+    info: list[dict[str, str]] | None = None
 
 
 @dataclass
@@ -70,13 +72,15 @@ class CreateReportResult:
         report: The created report, or ``None`` if creation failed.
         errors: List of validation errors encountered.
         warnings: List of non-fatal warnings.
-        info: Optional metadata dict.
+        info: Optional list of missing-field metadata dicts, each with
+            ``"field"`` and ``"message"`` keys.  Populated when
+            ``show_missing_optional=True``.
     """
 
     report: AnyXARFReport | None
     errors: list[ValidationError]
     warnings: list[ValidationWarning]
-    info: dict[str, object] | None = None
+    info: list[dict[str, str]] | None = None
 
 
 # ---------------------------------------------------------------------------
diff --git a/xarf/parser.py b/xarf/parser.py
index eb86190..73f4fe4 100644
--- a/xarf/parser.py
+++ b/xarf/parser.py
@@ -1,259 +1,152 @@
-"""XARF v4 Parser Implementation."""
+"""XARF v4 Parser.
 
-import json
-from datetime import datetime
-from typing import Any, Dict, List, Union
+Provides the module-level :func:`parse` function that converts raw JSON (a
+string or a plain dict) into a fully-typed :data:`~xarf.models.AnyXARFReport`
+Pydantic model, returning a :class:`~xarf.models.ParseResult` that carries the
+report together with any validation errors, warnings, and optional
+missing-field info.
 
-from .exceptions import XARFParseError, XARFValidationError
-from .models import ConnectionReport, ContentReport, MessagingReport, XARFReport
-from .v3_compat import convert_v3_to_v4, is_v3_report
+Mirrors ``parse()`` in ``xarf-javascript/src/parser.ts``.  All validation
+logic — schema validation, unknown-field detection, and missing-field
+discovery — is delegated to :data:`xarf.validator._validator`, exactly as
+``parser.ts`` delegates to its ``XARFValidator`` instance.
 
+Example:
+    >>> from xarf import parse
+    >>> result = parse(json_string)
+    >>> if not result.errors:
+    ...     report = result.report  # fully typed AnyXARFReport subclass
+"""
 
-class XARFParser:
-    """XARF v4 Report Parser.
+from __future__ import annotations
 
-    Parses and validates XARF v4 abuse reports from JSON.
+import json
+from typing import Any
+
+from pydantic import TypeAdapter
+from pydantic import ValidationError as PydanticValidationError
+
+from xarf.exceptions import XARFParseError
+from xarf.models import AnyXARFReport, ParseResult, ValidationWarning
+from xarf.v3_compat import convert_v3_to_v4, is_v3_report
+from xarf.validator import _validator
+
+# ---------------------------------------------------------------------------
+# Module-level TypeAdapter (built once; reused for every parse() call)
+# ---------------------------------------------------------------------------
+
+_REPORT_ADAPTER: TypeAdapter[AnyXARFReport] = TypeAdapter(AnyXARFReport)
+
+# ---------------------------------------------------------------------------
+# v3 deprecation warning message (mirrors getV3DeprecationWarning() in JS)
+# ---------------------------------------------------------------------------
+
+_V3_DEPRECATION_MESSAGE = (
+    "XARF v3 format is deprecated. Please upgrade to XARF v4. "
+    "This report will be automatically converted, but v3 support "
+    "will be removed in a future version."
+)
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def parse(
+    json_data: str | dict[str, Any],
+    strict: bool = False,
+    show_missing_optional: bool = False,
+) -> ParseResult:
+    """Parse a XARF v4 report from JSON.
+
+    Supports both XARF v4 and v3 (legacy) formats.  v3 reports are
+    automatically converted to v4 and a deprecation warning is emitted via
+    :mod:`warnings` as well as added to
+    :attr:`~xarf.models.ParseResult.warnings`.
+
+    In non-strict mode the parser attempts best-effort deserialization even
+    when schema validation errors are present, returning ``report=None`` only
+    when Pydantic is also unable to construct a typed model.
+
+    Args:
+        json_data: A JSON string or a pre-parsed dict containing XARF report
+            data.
+        strict: When ``True``, fields marked ``x-recommended: true`` in the
+            schema are treated as required, unknown fields become errors, and
+            any validation error causes ``report=None`` to be returned
+            immediately without Pydantic deserialization.
+        show_missing_optional: When ``True``,
+            :attr:`~xarf.models.ParseResult.info` is populated with details
+            about optional and recommended fields absent from the report.
+
+    Returns:
+        :class:`~xarf.models.ParseResult` containing:
+
+        - ``report``: The typed report model, or ``None`` on failure.
+        - ``errors``: Validation errors (empty list means valid).
+        - ``warnings``: Non-fatal warnings (v3 conversion, unknown fields).
+        - ``info``: Missing-field metadata when ``show_missing_optional=True``,
+          otherwise ``None``.
+
+    Raises:
+        XARFParseError: If *json_data* is a string containing malformed JSON.
+
+    Example:
+        >>> result = parse('{"xarf_version": "4.2.0", ...}')
+        >>> result.report
+        SpamReport(...)
+        >>> result.errors
+        []
     """
+    parse_warnings: list[ValidationWarning] = []
 
-    def __init__(self, strict: bool = False):
-        """Initialize parser.
-
-        Args:
-            strict: If True, raise exceptions on validation errors.
-                   If False, collect errors for later retrieval.
-        """
-        self.strict = strict
-        self.errors: List[str] = []
-
-        # Supported categories in alpha version
-        self.supported_categories = {"messaging", "connection", "content"}
-
-    def parse(self, json_data: Union[str, Dict[str, Any]]) -> XARFReport:
-        """Parse XARF report from JSON.
-
-        Supports both XARF v4 and v3 (with automatic conversion).
-
-        Args:
-            json_data: JSON string or dictionary containing XARF report
-
-        Returns:
-            XARFReport: Parsed report object
-
-        Raises:
-            XARFParseError: If parsing fails
-            XARFValidationError: If validation fails (strict mode)
-        """
-        self.errors.clear()
-
-        try:
-            if isinstance(json_data, str):
-                data = json.loads(json_data)
-            else:
-                data = json_data
-        except json.JSONDecodeError as e:
-            raise XARFParseError(f"Invalid JSON: {e}")
-
-        # Auto-detect and convert v3 reports
-        if is_v3_report(data):
-            try:
-                data = convert_v3_to_v4(data)
-            except Exception as e:
-                raise XARFParseError(f"Failed to convert XARF v3 report: {e}")
-
-        # Validate basic structure
-        if not self.validate_structure(data):
-            if self.strict:
-                raise XARFValidationError("Validation failed", self.errors)
-
-        # Parse based on category
-        report_category = data.get("category")
-
-        if report_category not in self.supported_categories:
-            error_msg = (
-                f"Unsupported category '{report_category}' in alpha "
-                f"version. Supported: {self.supported_categories}"
-            )
-            if self.strict:
-                raise XARFValidationError(error_msg)
-            else:
-                self.errors.append(error_msg)
-                # Fall back to base model
-                return XARFReport(**data)
-
+    # ------------------------------------------------------------------
+    # Step 1 — JSON parsing
+    # ------------------------------------------------------------------
+    if isinstance(json_data, str):
         try:
-            if report_category == "messaging":
-                return MessagingReport(**data)
-            elif report_category == "connection":
-                return ConnectionReport(**data)
-            elif report_category == "content":
-                return ContentReport(**data)
-            else:
-                return XARFReport(**data)
-
-        except Exception as e:
-            raise XARFParseError(f"Failed to parse {report_category} report: {e}")
-
-    def validate(self, json_data: Union[str, Dict[str, Any]]) -> bool:
-        """Validate XARF report without parsing.
-
-        Args:
-            json_data: JSON string or dictionary containing XARF report
-
-        Returns:
-            bool: True if valid, False otherwise
-        """
-        self.errors.clear()
-
-        try:
-            if isinstance(json_data, str):
-                data = json.loads(json_data)
-            else:
-                data = json_data
-        except json.JSONDecodeError as e:
-            self.errors.append(f"Invalid JSON: {e}")
-            return False
-
-        return self.validate_structure(data)
-
-    def validate_structure(self, data: Dict[str, Any]) -> bool:
-        """Validate basic XARF structure.
-
-        Args:
-            data: Parsed JSON data
-
-        Returns:
-            bool: True if structure is valid
-        """
-        required_fields = {
-            "xarf_version",
-            "report_id",
-            "timestamp",
-            "reporter",
-            "source_identifier",
-            "category",
-            "type",
-            "evidence_source",
-        }
-
-        # Check required fields
-        missing_fields = required_fields - set(data.keys())
-        if missing_fields:
-            self.errors.append(f"Missing required fields: {missing_fields}")
-            return False
-
-        # Check XARF version
-        if data.get("xarf_version") != "4.0.0":
-            self.errors.append(f"Unsupported XARF version: {data.get('xarf_version')}")
-            return False
-
-        # Validate reporter structure
-        reporter = data.get("reporter", {})
-        if not isinstance(reporter, dict):
-            self.errors.append("Reporter must be an object")
-            return False
-
-        reporter_required = {"org", "contact", "type"}
-        missing_reporter = reporter_required - set(reporter.keys())
-        if missing_reporter:
-            self.errors.append(f"Missing reporter fields: {missing_reporter}")
-            return False
-
-        # Validate reporter type
-        if reporter.get("type") not in ["automated", "manual", "hybrid"]:
-            self.errors.append(f"Invalid reporter type: {reporter.get('type')}")
-            return False
-
-        # Validate timestamp format
-        try:
-            datetime.fromisoformat(data["timestamp"].replace("Z", "+00:00"))
-        except (ValueError, AttributeError):
-            self.errors.append(f"Invalid timestamp format: {data.get('timestamp')}")
-            return False
-
-        # Category-specific validation
-        return self.validate_category_specific(data)
-
-    def validate_category_specific(self, data: Dict[str, Any]) -> bool:
-        """Validate category-specific requirements.
-
-        Args:
-            data: Parsed JSON data
-
-        Returns:
-            bool: True if category-specific validation passes
-        """
-        report_category = data.get("category")
-        report_type = data.get("type")
-
-        if report_category == "messaging":
-            return self.validate_messaging(data, report_type or "")
-        elif report_category == "connection":
-            return self.validate_connection(data, report_type or "")
-        elif report_category == "content":
-            return self.validate_content(data, report_type or "")
-
-        return True
-
-    def validate_messaging(self, data: Dict[str, Any], report_type: str) -> bool:
-        """Validate messaging category reports."""
-        valid_types = {"spam", "phishing", "social_engineering"}
-        if report_type not in valid_types:
-            self.errors.append(f"Invalid messaging type: {report_type}")
-            return False
-
-        # Email-specific validation
-        if data.get("protocol") == "smtp":
-            if not data.get("smtp_from"):
-                self.errors.append("smtp_from required for email reports")
-                return False
-            if report_type in ["spam", "phishing"] and not data.get("subject"):
-                self.errors.append("subject required for spam/phishing reports")
-                return False
-
-        return True
-
-    def validate_connection(self, data: Dict[str, Any], report_type: str) -> bool:
-        """Validate connection category reports."""
-        valid_types = {"ddos", "port_scan", "login_attack", "ip_spoofing"}
-        if report_type not in valid_types:
-            self.errors.append(f"Invalid connection type: {report_type}")
-            return False
-
-        # Required fields for connection reports
-        if not data.get("destination_ip"):
-            self.errors.append("destination_ip required for connection reports")
-            return False
-
-        if not data.get("protocol"):
-            self.errors.append("protocol required for connection reports")
-            return False
-
-        return True
-
-    def validate_content(self, data: Dict[str, Any], report_type: str) -> bool:
-        """Validate content category reports."""
-        valid_types = {
-            "phishing_site",
-            "malware_distribution",
-            "defacement",
-            "spamvertised",
-            "web_hack",
-        }
-        if report_type not in valid_types:
-            self.errors.append(f"Invalid content type: {report_type}")
-            return False
-
-        # URL required for content reports
-        if not data.get("url"):
-            self.errors.append("url required for content reports")
-            return False
-
-        return True
-
-    def get_errors(self) -> List[str]:
-        """Get validation errors from last parse/validate call.
-
-        Returns:
-            List[str]: List of validation error messages
-        """
-        return self.errors.copy()
+            data: dict[str, Any] = json.loads(json_data)
+        except json.JSONDecodeError as exc:
+            raise XARFParseError(f"Invalid JSON: {exc}") from exc
+    else:
+        data = json_data
+
+    # ------------------------------------------------------------------
+    # Step 2 — v3 detection and conversion
+    # ------------------------------------------------------------------
+    if is_v3_report(data):
+        # convert_v3_to_v4 emits a Python warnings.warn() internally.
+        data = convert_v3_to_v4(data)
+        parse_warnings.append(
+            ValidationWarning(field="", message=_V3_DEPRECATION_MESSAGE)
+        )
+
+    # ------------------------------------------------------------------
+    # Step 3 — Validate (schema + unknown fields + missing optional)
+    # Mirrors: validator.validate(data, strict, showMissingOptional)
+    # ------------------------------------------------------------------
+    result = _validator.validate(
+        data, strict=strict, show_missing_optional=show_missing_optional
+    )
+
+    # ------------------------------------------------------------------
+    # Step 4 — Strict mode early return (Python-specific: prevents a
+    # Pydantic discriminator failure on malformed category/type)
+    # ------------------------------------------------------------------
+    if result.errors and strict:
+        return ParseResult(report=None, errors=result.errors, warnings=parse_warnings)
+
+    # ------------------------------------------------------------------
+    # Step 5 — Pydantic deserialization via discriminated union
+    # ------------------------------------------------------------------
+    try:
+        report = _REPORT_ADAPTER.validate_python(data)
+    except PydanticValidationError:
+        return ParseResult(report=None, errors=result.errors, warnings=parse_warnings)
+
+    return ParseResult(
+        report=report,
+        errors=result.errors,
+        warnings=parse_warnings + result.warnings,
+        info=result.info,
+    )
diff --git a/xarf/schema_validator.py b/xarf/schema_validator.py
index 948a8e2..2a4ccc8 100644
--- a/xarf/schema_validator.py
+++ b/xarf/schema_validator.py
@@ -41,10 +41,11 @@
 from xarf.schema_registry import schema_registry as _schema_registry
 
 # ---------------------------------------------------------------------------
-# Internal type alias
+# Internal type aliases
 # ---------------------------------------------------------------------------
 
 _SchemaDict = dict[str, Any]
+_ReportInput = XARFReport | dict[str, Any]
 
 # ---------------------------------------------------------------------------
 # SchemaValidator
@@ -74,12 +75,19 @@ def __init__(self) -> None:
     # ------------------------------------------------------------------
 
     def validate(
-        self, report: XARFReport, strict: bool = False
+        self, report: _ReportInput, strict: bool = False
     ) -> list[ValidationError]:
         """Validate *report* against the XARF JSON Schema.
 
+        Accepts either a :class:`~xarf.models.XARFReport` instance (converted
+        to a dict via :meth:`~pydantic.BaseModel.model_dump` before validation)
+        or a plain :class:`dict` (used directly).  The dict path is used by
+        :func:`xarf.parser.parse` to validate raw JSON data before Pydantic
+        deserialization.
+
         Args:
-            report: A :class:`~xarf.models.XARFReport` (or subclass) instance.
+            report: A :class:`~xarf.models.XARFReport` (or subclass) instance,
+                or a plain dict containing raw report data.
             strict: When ``True``, fields marked ``x-recommended: true`` in
                 the schema are treated as required.  Defaults to ``False``.
 
@@ -92,7 +100,10 @@ def validate(
         """
         self._ensure_schemas_loaded()
 
-        data = report.model_dump(by_alias=True, exclude_none=True)
+        if isinstance(report, dict):
+            data: dict[str, Any] = report
+        else:
+            data = report.model_dump(by_alias=True, exclude_none=True)
 
         validator = self._strict_validator if strict else self._normal_validator
         if validator is None:  # pragma: no cover
diff --git a/xarf/validator.py b/xarf/validator.py
new file mode 100644
index 0000000..7d1f365
--- /dev/null
+++ b/xarf/validator.py
@@ -0,0 +1,347 @@
+"""XARF Report Validator.
+
+Higher-level validator that wraps schema validation and adds unknown-field
+detection and optional missing-field discovery.  Mirrors ``validator.ts``
+from the JavaScript reference implementation.
+
+The public surface of this module is :class:`ValidationResult` (exported
+from :mod:`xarf`) and the private :data:`_validator` singleton consumed by
+:func:`xarf.parser.parse`.  :class:`XARFValidator` itself is an internal
+implementation detail, matching the JS convention where the class is not
+re-exported from ``index.ts``.
+
+Example:
+    >>> from xarf.validator import _validator
+    >>> result = _validator.validate(report_dict, strict=False)
+    >>> result.valid
+    True
+"""
+
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+from typing import Any
+
+from xarf.models import ValidationError, ValidationWarning, XARFReport
+from xarf.schema_registry import schema_registry
+from xarf.schema_validator import schema_validator
+
+# ---------------------------------------------------------------------------
+# ValidationResult
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class ValidationResult:
+    """Result returned by :meth:`XARFValidator.validate`.
+
+    Mirrors the ``ValidationResult`` interface in
+    ``xarf-javascript/src/validator.ts``.
+
+    Attributes:
+        valid: ``True`` when :attr:`errors` is empty.
+        errors: Schema-validation errors and (in strict mode) unknown-field
+            errors.
+        warnings: Unknown-field warnings (non-strict mode only).
+        info: Missing optional/recommended field details when
+            ``show_missing_optional=True``, otherwise ``None``.
+    """
+
+    valid: bool
+    errors: list[ValidationError]
+    warnings: list[ValidationWarning]
+    info: list[dict[str, str]] | None = None
+
+
+# ---------------------------------------------------------------------------
+# XARFValidator
+# ---------------------------------------------------------------------------
+
+
+class XARFValidator:
+    """Higher-level XARF report validator.
+
+    Wraps :class:`~xarf.schema_validator.SchemaValidator` and adds
+    unknown-field detection and missing optional-field discovery, mirroring
+    ``XARFValidator`` in ``xarf-javascript/src/validator.ts``.
+
+    All state is local to each :meth:`validate` call — the class carries no
+    instance state and the module-level :data:`_validator` singleton is safe
+    for concurrent use.
+    """
+
+    def validate(
+        self,
+        report: XARFReport | dict[str, Any],
+        strict: bool = False,
+        show_missing_optional: bool = False,
+    ) -> ValidationResult:
+        """Validate *report* and collect errors, warnings, and optional info.
+
+        Mirrors ``XARFValidator.validate()`` in
+        ``xarf-javascript/src/validator.ts``.
+
+        Steps:
+
+        1. **Schema validation** via :data:`~xarf.schema_validator.schema_validator`.
+        2. **Unknown-field detection** — fields not defined in the core or
+           type-specific schema produce :class:`~xarf.models.ValidationWarning`
+           entries.
+        3. **Strict-mode promotion** — in strict mode, unknown-field warnings
+           are converted to :class:`~xarf.models.ValidationError` entries and
+           the warnings list is cleared.
+        4. **Missing optional fields** — populated only when
+           *show_missing_optional* is ``True``.
+
+        Args:
+            report: A :class:`~xarf.models.XARFReport` (or subclass) instance,
+                or a plain :class:`dict` containing raw report data.
+            strict: When ``True``, schema recommended fields are treated as
+                required and unknown-field warnings become errors.
+            show_missing_optional: When ``True``, :attr:`ValidationResult.info`
+                is populated with details about absent optional and recommended
+                fields.
+
+        Returns:
+            :class:`ValidationResult` with ``valid``, ``errors``, ``warnings``,
+            and optional ``info``.
+
+        Example:
+            >>> result = _validator.validate({"category": "messaging", ...})
+            >>> result.valid
+            False
+        """
+        data: dict[str, Any] = (
+            report
+            if isinstance(report, dict)
+            else report.model_dump(by_alias=True, exclude_none=True)
+        )
+
+        # ------------------------------------------------------------------
+        # Step 1 — Schema validation
+        # ------------------------------------------------------------------
+        errors: list[ValidationError] = list(
+            schema_validator.validate(data, strict=strict)
+        )
+
+        # ------------------------------------------------------------------
+        # Step 2 — Unknown-field detection
+        # ------------------------------------------------------------------
+        category: str = str(data.get("category", ""))
+        type_: str = str(data.get("type", ""))
+        warnings: list[ValidationWarning] = []
+        if category and type_:
+            warnings = _collect_unknown_fields(data, category, type_)
+
+        # ------------------------------------------------------------------
+        # Step 3 — Strict mode: promote unknown-field warnings to errors
+        # ------------------------------------------------------------------
+        if strict and warnings:
+            errors.extend(
+                ValidationError(field=w.field, message=w.message) for w in warnings
+            )
+            warnings = []
+
+        # ------------------------------------------------------------------
+        # Step 4 — Missing optional / recommended fields
+        # ------------------------------------------------------------------
+        info: list[dict[str, str]] | None = None
+        if show_missing_optional and category and type_:
+            info = _collect_missing_optional(data, category, type_)
+
+        return ValidationResult(
+            valid=not errors,
+            errors=errors,
+            warnings=warnings,
+            info=info,
+        )
+
+
+# ---------------------------------------------------------------------------
+# Private helpers (mirrors private methods of XARFValidator in validator.ts)
+# ---------------------------------------------------------------------------
+
+
+def _collect_unknown_fields(
+    data: dict[str, Any],
+    category: str,
+    type_: str,
+) -> list[ValidationWarning]:
+    """Return warnings for fields in *data* not defined in the XARF schema.
+
+    Mirrors ``collectUnknownFields()`` in ``xarf-javascript/src/validator.ts``.
+    Known fields are the union of core property names and type-specific fields
+    for the given ``category``/``type_`` pair.
+
+    Args:
+        data: Raw report dict (post-v3-conversion if applicable).
+        category: XARF category string (e.g. ``"messaging"``).
+        type_: XARF type string within the category (e.g. ``"spam"``).
+
+    Returns:
+        List of :class:`~xarf.models.ValidationWarning`, one per unknown field.
+    """
+    known_fields: set[str] = set(schema_registry.get_core_property_names())
+    known_fields.update(schema_registry.get_category_fields(category, type_))
+
+    return [
+        ValidationWarning(
+            field=field_name,
+            message=f"Unknown field '{field_name}' is not defined in the XARF schema",
+        )
+        for field_name in data
+        if field_name not in known_fields
+    ]
+
+
+def _collect_missing_optional(
+    data: dict[str, Any],
+    category: str,
+    type_: str,
+) -> list[dict[str, str]]:
+    """Collect missing optional and recommended fields for the report.
+
+    Mirrors ``collectMissingOptionalFields()`` in
+    ``xarf-javascript/src/validator.ts``.  Checks both the core schema and
+    the type-specific schema, following ``allOf`` / base-schema references.
+
+    Each returned dict has two keys:
+
+    - ``"field"``: the field name.
+    - ``"message"``: ``"RECOMMENDED: <description>"`` or
+      ``"OPTIONAL: <description>"``.
+
+    Args:
+        data: Raw report dict.
+        category: XARF category string.
+        type_: XARF type string.
+
+    Returns:
+        List of info dicts for each field that is absent from *data*.
+    """
+    info: list[dict[str, str]] = []
+    required_fields = schema_registry.get_required_fields()
+
+    # Core optional fields
+    for field_name in sorted(schema_registry.get_core_property_names()):
+        if field_name in required_fields or field_name == "_internal":
+            continue
+        if field_name in data:
+            continue
+        metadata = schema_registry.get_field_metadata(field_name)
+        if metadata is None:
+            continue
+        prefix = "RECOMMENDED" if metadata.recommended else "OPTIONAL"
+        description = metadata.description or f"Optional field: {field_name}"
+        info.append({"field": field_name, "message": f"{prefix}: {description}"})
+
+    # Type-specific optional fields
+    type_schema = schema_registry.get_type_schema(category, type_)
+    if type_schema:
+        for field_name, description, recommended in _extract_type_optional_fields(
+            type_schema
+        ):
+            if field_name in data:
+                continue
+            prefix = "RECOMMENDED" if recommended else "OPTIONAL"
+            info.append({"field": field_name, "message": f"{prefix}: {description}"})
+
+    return info
+
+
+def _extract_type_optional_fields(
+    schema: dict[str, Any],
+    _accumulated_required: frozenset[str] | None = None,
+) -> list[tuple[str, str, bool]]:
+    """Extract optional field metadata from a type schema.
+
+    Mirrors ``extractOptionalFields()`` in ``xarf-javascript/src/validator.ts``.
+    Handles ``properties`` defined directly on the schema as well as fields
+    inherited via ``allOf`` (resolving ``-base.json`` references).
+
+    Core fields are excluded; ``category``, ``type``, and ``_internal`` are
+    always skipped.
+
+    Args:
+        schema: The type-specific (or base) schema dict to inspect.
+        _accumulated_required: Required field names accumulated from parent
+            schemas during recursive calls.  Pass ``None`` on the initial call.
+
+    Returns:
+        List of ``(field_name, description, recommended)`` triples for each
+        optional field found.
+    """
+    core_fields = schema_registry.get_core_property_names()
+    _skip = {"category", "type", "_internal"}
+
+    schema_required: frozenset[str] = frozenset(schema.get("required", []))
+    effective_required = (
+        schema_required | _accumulated_required
+        if _accumulated_required is not None
+        else schema_required
+    )
+
+    result: list[tuple[str, str, bool]] = []
+    seen: set[str] = set()
+
+    def _add(field_name: str, description: str, recommended: bool) -> None:
+        if field_name not in seen:
+            seen.add(field_name)
+            result.append((field_name, description, recommended))
+
+    for field_name, prop_def in schema.get("properties", {}).items():
+        if field_name in core_fields or field_name in _skip:
+            continue
+        if field_name in effective_required:
+            continue
+        description = prop_def.get("description") or f"Optional field: {field_name}"
+        recommended = prop_def.get("x-recommended") is True
+        _add(field_name, description, recommended)
+
+    for sub in schema.get("allOf", []):
+        ref: str = sub.get("$ref", "")
+        if ref:
+            if "-base.json" not in ref:
+                continue
+            base_schema = _load_base_schema(ref)
+            if base_schema is None:
+                continue
+            for item in _extract_type_optional_fields(base_schema, effective_required):
+                _add(*item)
+        else:
+            for item in _extract_type_optional_fields(sub, effective_required):
+                _add(*item)
+
+    return result
+
+
+def _load_base_schema(ref: str) -> dict[str, Any] | None:
+    """Load a base schema file referenced by a ``$ref`` string.
+
+    Only handles ``-base.json`` references (e.g. ``"./content-base.json"``).
+    Uses the same schemas directory as
+    :data:`~xarf.schema_registry.schema_registry`.
+
+    Args:
+        ref: The ``$ref`` value from the schema (e.g. ``"./content-base.json"``).
+
+    Returns:
+        Parsed schema dict, or ``None`` if the file cannot be loaded.
+    """
+    filename = ref.removeprefix("./").removeprefix("../")
+    schema_path = schema_registry._schemas_dir / "types" / filename
+    try:
+        with schema_path.open(encoding="utf-8") as fh:
+            return json.load(fh)  # type: ignore[no-any-return]
+    except (OSError, json.JSONDecodeError):
+        return None
+
+
+# ---------------------------------------------------------------------------
+# Module-level singleton (private — used by parser.parse(), not public API)
+# ---------------------------------------------------------------------------
+
+#: Private singleton consumed by :func:`xarf.parser.parse`.
+#: Not exported from :mod:`xarf`
+_validator: XARFValidator = XARFValidator()

From 4e9b2e6c07f3e847e5ca46d82aee7d52510ea3f8 Mon Sep 17 00:00:00 2001
From: Victor Lopez <victor@thevictorlopez.com>
Date: Tue, 31 Mar 2026 13:57:27 +0200
Subject: [PATCH 08/13] Add generator methods

---
 tests/test_generator.py | 495 ++++++++++++++++++++++++++
 xarf/__init__.py        |  11 +-
 xarf/_version.py        |   8 +
 xarf/generator.py       | 751 +++++++++++++---------------------------
 4 files changed, 753 insertions(+), 512 deletions(-)
 create mode 100644 tests/test_generator.py
 create mode 100644 xarf/_version.py

diff --git a/tests/test_generator.py b/tests/test_generator.py
new file mode 100644
index 0000000..84c102d
--- /dev/null
+++ b/tests/test_generator.py
@@ -0,0 +1,495 @@
+"""Tests for xarf.generator — Phase 5.
+
+Covers:
+- create_report(): auto-metadata, all 7 categories, typed return, strict mode
+- create_evidence(): all 4 hash algorithms, bytes/str input, base64 encoding
+"""
+
+from __future__ import annotations
+
+import base64
+import hashlib
+import re
+import uuid
+from typing import Any
+
+import pytest
+
+from xarf import (
+    BlocklistReport,
+    BotnetReport,
+    ContactInfo,
+    CopyrightCopyrightReport,
+    CreateReportResult,
+    CveReport,
+    DdosReport,
+    FraudReport,
+    SpamReport,
+    ThreatIntelligenceReport,
+    XARFEvidence,
+    create_evidence,
+    create_report,
+)
+
+# ---------------------------------------------------------------------------
+# Shared fixtures
+# ---------------------------------------------------------------------------
+
+REPORTER: dict[str, Any] = {
+    "org": "ACME Security",
+    "contact": "abuse@acme.example",
+    "domain": "acme.example",
+}
+
+SENDER: dict[str, Any] = {
+    "org": "Bad Actor Inc",
+    "contact": "noreply@bad.example",
+    "domain": "bad.example",
+}
+
+
+def _base_kwargs(**extra: Any) -> dict[str, Any]:
+    """Return the minimum kwargs shared by every create_report() call."""
+    return {
+        "source_identifier": "192.0.2.1",
+        "reporter": REPORTER,
+        "sender": SENDER,
+        **extra,
+    }
+
+
+def _spam_kwargs(**extra: Any) -> dict[str, Any]:
+    """Return kwargs for a minimal valid messaging/spam report.
+
+    Includes ``protocol="sms"`` to satisfy the schema-required ``protocol``
+    field while avoiding the conditional ``smtp_from``/``source_port``
+    requirement triggered only when ``protocol="smtp"``.
+    """
+    return _base_kwargs(protocol="sms", **extra)
+
+
+# ---------------------------------------------------------------------------
+# create_evidence() — hashing and encoding
+# ---------------------------------------------------------------------------
+
+
+class TestCreateEvidence:
+    """Tests for the create_evidence() helper."""
+
+    def test_returns_xarf_evidence(self) -> None:
+        ev = create_evidence("text/plain", b"hello")
+        assert isinstance(ev, XARFEvidence)
+
+    def test_description_optional(self) -> None:
+        ev = create_evidence("text/plain", b"x")
+        assert ev.description is None
+
+    def test_size_equals_byte_length(self) -> None:
+        payload = b"Hello, XARF!"
+        ev = create_evidence("text/plain", payload)
+        assert ev.size == len(payload)
+
+    def test_size_for_str_payload(self) -> None:
+        # "café" is 5 UTF-8 bytes (é = 2 bytes)
+        ev = create_evidence("text/plain", "café")
+        assert ev.size == len("café".encode())
+
+    def test_payload_is_base64_encoded(self) -> None:
+        raw = b"test payload"
+        ev = create_evidence("text/plain", raw)
+        decoded = base64.b64decode(ev.payload)
+        assert decoded == raw
+
+    def test_str_payload_encodes_utf8(self) -> None:
+        text = "Hello"
+        ev = create_evidence("text/plain", text)
+        decoded = base64.b64decode(ev.payload)
+        assert decoded == text.encode("utf-8")
+
+    @pytest.mark.parametrize(
+        ("algorithm", "hasher"),
+        [
+            ("sha256", hashlib.sha256),
+            ("sha512", hashlib.sha512),
+            ("sha1", hashlib.sha1),
+            ("md5", hashlib.md5),
+        ],
+    )
+    def test_hash_algorithm_correctness(self, algorithm: str, hasher: Any) -> None:
+        payload = b"test data for hashing"
+        ev = create_evidence("text/plain", payload, hash_algorithm=algorithm)  # type: ignore[arg-type]
+        expected_hex = hasher(payload).hexdigest()
+        assert ev.hash == f"{algorithm}:{expected_hex}"
+
+    def test_hash_default_is_sha256(self) -> None:
+        payload = b"default algo"
+        ev = create_evidence("text/plain", payload)
+        expected = hashlib.sha256(payload).hexdigest()
+        assert ev.hash == f"sha256:{expected}"
+
+    def test_hash_format_matches_spec_pattern(self) -> None:
+        """Hash must match the schema pattern: algorithm:hexvalue."""
+        ev = create_evidence("text/plain", b"check")
+        assert re.match(r"^(sha256|sha512|sha1|md5):[a-f0-9]+$", ev.hash)
+
+    def test_empty_payload(self) -> None:
+        ev = create_evidence("text/plain", b"")
+        assert ev.size == 0
+        expected = hashlib.sha256(b"").hexdigest()
+        assert ev.hash == f"sha256:{expected}"
+
+
+# ---------------------------------------------------------------------------
+# create_report() — return type and auto-metadata
+# ---------------------------------------------------------------------------
+
+
+class TestCreateReportReturnType:
+    """Verify that create_report() returns CreateReportResult with typed model."""
+
+    def test_returns_create_report_result(self) -> None:
+        result = create_report(
+            category="messaging",
+            type="spam",
+            **_spam_kwargs(),
+        )
+        assert isinstance(result, CreateReportResult)
+
+    def test_report_is_spam_report(self) -> None:
+        result = create_report(
+            category="messaging",
+            type="spam",
+            **_spam_kwargs(),
+        )
+        assert isinstance(result.report, SpamReport)
+
+    def test_report_field_is_none_on_invalid_category(self) -> None:
+        result = create_report(
+            category="nonexistent",
+            type="fake",
+            **_base_kwargs(),
+        )
+        assert result.report is None
+        assert result.errors
+
+    def test_info_is_none_by_default(self) -> None:
+        result = create_report(
+            category="messaging",
+            type="spam",
+            **_spam_kwargs(),
+        )
+        assert result.info is None
+
+    def test_info_populated_when_show_missing_optional(self) -> None:
+        result = create_report(
+            category="messaging",
+            type="spam",
+            show_missing_optional=True,
+            **_spam_kwargs(),
+        )
+        assert isinstance(result.info, list)
+        # Each entry must be a dict with "field" and "message" keys
+        for entry in result.info:
+            assert "field" in entry
+            assert "message" in entry
+
+
+class TestCreateReportAutoMetadata:
+    """Verify that auto-filled metadata fields are correct."""
+
+    def test_xarf_version_is_spec_version(self) -> None:
+        result = create_report(
+            category="messaging",
+            type="spam",
+            **_spam_kwargs(),
+        )
+        assert result.report is not None
+        assert result.report.xarf_version == "4.2.0"
+
+    def test_report_id_is_valid_uuid(self) -> None:
+        result = create_report(
+            category="messaging",
+            type="spam",
+            **_spam_kwargs(),
+        )
+        assert result.report is not None
+        # Must not raise ValueError
+        parsed = uuid.UUID(result.report.report_id)
+        assert parsed.version == 4
+
+    def test_timestamp_is_iso8601(self) -> None:
+        result = create_report(
+            category="messaging",
+            type="spam",
+            **_spam_kwargs(),
+        )
+        assert result.report is not None
+        # ISO 8601 with timezone offset or Z
+        assert re.match(
+            r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(\.\d+)?(Z|[+-]\d{2}:\d{2})$",
+            result.report.timestamp,
+        )
+
+    def test_category_and_type_preserved(self) -> None:
+        result = create_report(
+            category="messaging",
+            type="spam",
+            **_spam_kwargs(),
+        )
+        assert result.report is not None
+        assert result.report.category == "messaging"
+        assert result.report.type == "spam"
+
+    def test_source_identifier_preserved(self) -> None:
+        result = create_report(
+            category="messaging",
+            type="spam",
+            **_spam_kwargs(),
+        )
+        assert result.report is not None
+        assert result.report.source_identifier == "192.0.2.1"
+
+
+# ---------------------------------------------------------------------------
+# create_report() — all 7 categories
+# ---------------------------------------------------------------------------
+
+
+class TestCreateReportAllCategories:
+    """Verify that all 7 XARF categories produce valid, typed Pydantic models."""
+
+    def test_messaging_spam(self) -> None:
+        # protocol="sms" avoids the smtp_from/source_port conditional requirement
+        result = create_report(
+            category="messaging",
+            type="spam",
+            protocol="sms",
+            **_base_kwargs(),
+        )
+        assert isinstance(result.report, SpamReport)
+        assert not result.errors
+
+    def test_connection_ddos(self) -> None:
+        # source_identifier is an IP → source_port is required (min=1)
+        result = create_report(
+            category="connection",
+            type="ddos",
+            protocol="tcp",
+            first_seen="2024-01-01T00:00:00+00:00",
+            source_port=443,
+            **_base_kwargs(),
+        )
+        assert isinstance(result.report, DdosReport)
+        assert not result.errors
+
+    def test_content_fraud(self) -> None:
+        result = create_report(
+            category="content",
+            type="fraud",
+            fraud_type="investment",
+            url="https://fake-exchange.example.com",
+            **_base_kwargs(),
+        )
+        assert isinstance(result.report, FraudReport)
+        assert not result.errors
+
+    def test_infrastructure_botnet(self) -> None:
+        result = create_report(
+            category="infrastructure",
+            type="botnet",
+            compromise_evidence="C2 communication observed in network logs",
+            **_base_kwargs(),
+        )
+        assert isinstance(result.report, BotnetReport)
+        assert not result.errors
+
+    def test_copyright_copyright(self) -> None:
+        result = create_report(
+            category="copyright",
+            type="copyright",
+            infringing_url="https://pirate.example.com/file.mp4",
+            **_base_kwargs(),
+        )
+        assert isinstance(result.report, CopyrightCopyrightReport)
+        assert not result.errors
+
+    def test_vulnerability_cve(self) -> None:
+        result = create_report(
+            category="vulnerability",
+            type="cve",
+            service="apache_httpd",
+            service_port=443,
+            cve_id="CVE-2024-12345",
+            **_base_kwargs(),
+        )
+        assert isinstance(result.report, CveReport)
+        assert not result.errors
+
+    def test_reputation_blocklist(self) -> None:
+        result = create_report(
+            category="reputation",
+            type="blocklist",
+            threat_type="scanning_source",
+            **_base_kwargs(),
+        )
+        assert isinstance(result.report, BlocklistReport)
+        assert not result.errors
+
+    def test_reputation_threat_intelligence(self) -> None:
+        result = create_report(
+            category="reputation",
+            type="threat_intelligence",
+            threat_type="malware_distribution",
+            **_base_kwargs(),
+        )
+        assert isinstance(result.report, ThreatIntelligenceReport)
+        assert not result.errors
+
+
+# ---------------------------------------------------------------------------
+# create_report() — ContactInfo input variant
+# ---------------------------------------------------------------------------
+
+
+class TestCreateReportContactInfo:
+    """Verify that ContactInfo objects are accepted in place of dicts."""
+
+    def test_contact_info_reporter(self) -> None:
+        reporter = ContactInfo(
+            org="Security Team",
+            contact="sec@example.net",
+            domain="example.net",
+        )
+        sender = ContactInfo(
+            org="Sender Org",
+            contact="s@sender.example",
+            domain="sender.example",
+        )
+        result = create_report(
+            category="messaging",
+            type="spam",
+            protocol="sms",
+            source_identifier="10.0.0.1",
+            reporter=reporter,
+            sender=sender,
+        )
+        assert isinstance(result.report, SpamReport)
+        assert not result.errors
+
+    def test_mixed_dict_and_contact_info(self) -> None:
+        reporter = ContactInfo(
+            org="Reporter Org",
+            contact="r@reporter.example",
+            domain="reporter.example",
+        )
+        result = create_report(
+            category="messaging",
+            type="spam",
+            protocol="sms",
+            source_identifier="10.0.0.2",
+            reporter=reporter,
+            sender=SENDER,
+        )
+        assert not result.errors
+
+
+# ---------------------------------------------------------------------------
+# create_report() — evidence kwarg with XARFEvidence objects
+# ---------------------------------------------------------------------------
+
+
+class TestCreateReportWithEvidence:
+    """Verify that XARFEvidence objects in evidence= kwarg are serialised."""
+
+    def test_evidence_xarf_evidence_object(self) -> None:
+        ev = create_evidence("text/plain", b"log line", description="Server log")
+        result = create_report(
+            category="messaging",
+            type="spam",
+            evidence=[ev],
+            **_spam_kwargs(),
+        )
+        assert not result.errors
+        assert result.report is not None
+        assert result.report.evidence is not None
+        assert len(result.report.evidence) == 1
+        # Verify _to_jsonable serialisation round-trips the evidence correctly
+        item = result.report.evidence[0]
+        assert item.content_type == ev.content_type
+        assert item.payload == ev.payload
+        assert item.hash == ev.hash
+        assert item.size == ev.size
+
+    def test_evidence_dict(self) -> None:
+        result = create_report(
+            category="messaging",
+            type="spam",
+            evidence=[{"content_type": "text/plain", "payload": "aGVsbG8="}],
+            **_spam_kwargs(),
+        )
+        assert not result.errors
+
+
+# ---------------------------------------------------------------------------
+# create_report() — strict mode
+# ---------------------------------------------------------------------------
+
+
+class TestCreateReportStrictMode:
+    """Verify strict-mode behaviour: errors → report=None."""
+
+    def test_strict_invalid_category_returns_none(self) -> None:
+        result = create_report(
+            category="nonexistent",
+            type="fake",
+            strict=True,
+            **_base_kwargs(),
+        )
+        assert result.report is None
+        assert result.errors
+
+    def test_strict_promotes_recommended_to_required(self) -> None:
+        # Non-strict: missing recommended fields produces no errors
+        result_normal = create_report(
+            category="messaging",
+            type="spam",
+            strict=False,
+            **_spam_kwargs(),
+        )
+        assert not result_normal.errors
+
+        # Strict: missing recommended fields produce errors (e.g. source_port,
+        # evidence, confidence, smtp_to, subject, message_id become required)
+        result_strict = create_report(
+            category="messaging",
+            type="spam",
+            strict=True,
+            **_spam_kwargs(),
+        )
+        assert result_strict.errors
+
+    def test_unknown_field_produces_warning_non_strict(self) -> None:
+        result = create_report(
+            category="messaging",
+            type="spam",
+            completely_unknown_field_xyz="value",
+            **_spam_kwargs(),
+        )
+        assert not result.errors
+        assert any(
+            "completely_unknown_field_xyz" in w.field for w in result.warnings
+        )
+
+    def test_strict_unknown_field_becomes_error(self) -> None:
+        result = create_report(
+            category="messaging",
+            type="spam",
+            strict=True,
+            completely_unknown_field_xyz="value",
+            **_spam_kwargs(),
+        )
+        assert result.report is None
+        assert any(
+            "completely_unknown_field_xyz" in e.field for e in result.errors
+        )
+
diff --git a/xarf/__init__.py b/xarf/__init__.py
index 2311bf6..ff6ca43 100644
--- a/xarf/__init__.py
+++ b/xarf/__init__.py
@@ -11,12 +11,14 @@
     SpamReport(...)
 """
 
+from xarf._version import SPEC_VERSION
 from xarf.exceptions import (
     XARFError,
     XARFParseError,
     XARFSchemaError,
     XARFValidationError,
 )
+from xarf.generator import create_evidence, create_report
 from xarf.models import (
     AnyXARFReport,
     ContactInfo,
@@ -31,8 +33,6 @@
 from xarf.schema_registry import (
     FieldMetadata,
     SchemaRegistry,
-    get_registry,
-    reset_registry,
     schema_registry,
 )
 from xarf.schema_validator import SchemaValidator, schema_validator
@@ -128,14 +128,13 @@
 __author__ = "XARF Project"
 __email__ = "contact@xarf.org"
 
-# Spec version this library was built against.
-SPEC_VERSION = "4.2.0"
-
 __all__ = [
     # Version
     "SPEC_VERSION",
     # Public API functions
     "parse",
+    "create_report",
+    "create_evidence",
     # Result types
     "AnyXARFReport",
     "ParseResult",
@@ -155,8 +154,6 @@
     "schema_registry",
     "SchemaRegistry",
     "FieldMetadata",
-    "get_registry",
-    "reset_registry",
     # Schema validator
     "SchemaValidator",
     "schema_validator",
diff --git a/xarf/_version.py b/xarf/_version.py
new file mode 100644
index 0000000..90431fd
--- /dev/null
+++ b/xarf/_version.py
@@ -0,0 +1,8 @@
+"""XARF spec version constant.
+
+Centralised so ``generator.py`` and ``__init__.py`` both import from
+one place and cannot silently diverge.
+"""
+
+#: The XARF specification version this library targets.
+SPEC_VERSION: str = "4.2.0"
diff --git a/xarf/generator.py b/xarf/generator.py
index 3341070..363041b 100644
--- a/xarf/generator.py
+++ b/xarf/generator.py
@@ -1,526 +1,267 @@
 """XARF Report Generator.
 
-This module provides functionality for generating XARF v4.0.0 compliant reports
-programmatically with proper validation and type safety.
+Provides the module-level :func:`create_report` and :func:`create_evidence`
+functions for programmatic creation of XARF v4 reports with automatic
+metadata, validation, and type safety.
+
+Mirrors ``generator.ts`` from the JavaScript reference implementation.
+``xarf_version``, ``report_id``, and ``timestamp`` are auto-generated;
+callers supply all other required fields plus any category-specific kwargs.
+
+Example:
+    >>> from xarf import create_report, create_evidence
+    >>> evidence = create_evidence("text/plain", b"log line", description="Log")
+    >>> result = create_report(
+    ...     category="messaging",
+    ...     type="spam",
+    ...     source_identifier="192.0.2.1",
+    ...     reporter={"org": "ACME", "contact": "abuse@acme.example",
+    ...               "domain": "acme.example"},
+    ...     sender={"org": "Bad Actor", "contact": "noreply@bad.example",
+    ...             "domain": "bad.example"},
+    ...     evidence=[evidence],
+    ... )
+    >>> result.errors
+    []
 """
 
+from __future__ import annotations
+
+import base64
 import hashlib
-import secrets
 import uuid
 from datetime import datetime, timezone
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Literal
+
+from pydantic import BaseModel, TypeAdapter
+from pydantic import ValidationError as PydanticValidationError
+
+from xarf._version import SPEC_VERSION as _SPEC_VERSION
+from xarf.models import AnyXARFReport, ContactInfo, CreateReportResult, XARFEvidence
+from xarf.validator import _validator
+
+# ---------------------------------------------------------------------------
+# Module-level TypeAdapter (built once; reused for every create_report() call)
+# ---------------------------------------------------------------------------
+
+_REPORT_ADAPTER: TypeAdapter[AnyXARFReport] = TypeAdapter(AnyXARFReport)
 
-from .exceptions import XARFError
 
+# ---------------------------------------------------------------------------
+# Private helpers
+# ---------------------------------------------------------------------------
 
-class XARFGenerator:
-    """Generator for creating XARF v4.0.0 compliant reports.
 
-    This class provides methods to generate complete XARF reports with all
-    required fields, proper validation, and support for all 8 report categories.
+def _to_jsonable(value: Any) -> Any:  # noqa: ANN401
+    """Recursively convert Pydantic models to plain dicts for JSON serialisation.
+
+    Used to ensure that caller-supplied :class:`~xarf.models.XARFEvidence`
+    objects (or other Pydantic models) in ``**kwargs`` are serialised to plain
+    dicts before the report dict is handed to the schema validator.
+
+    Args:
+        value: Any Python value — plain scalars, lists, dicts, or
+            :class:`pydantic.BaseModel` instances.
+
+    Returns:
+        The value with all :class:`pydantic.BaseModel` instances converted to
+        ``dict`` (recursively).  Non-model values are returned unchanged.
+    """
+    if isinstance(value, BaseModel):
+        return value.model_dump(by_alias=True, exclude_none=True)
+    if isinstance(value, list):
+        return [_to_jsonable(item) for item in value]
+    if isinstance(value, dict):
+        return {k: _to_jsonable(v) for k, v in value.items()}
+    return value
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def create_report(
+    *,
+    category: str,
+    type: str,  # noqa: A002
+    source_identifier: str,
+    reporter: dict[str, Any] | ContactInfo,
+    sender: dict[str, Any] | ContactInfo,
+    strict: bool = False,
+    show_missing_optional: bool = False,
+    **kwargs: Any,
+) -> CreateReportResult:
+    """Create a validated XARF report with auto-generated metadata.
+
+    ``xarf_version``, ``report_id``, and ``timestamp`` are filled in
+    automatically.  Category-specific fields are passed via ``**kwargs`` and
+    merged into the report alongside the named parameters.
+
+    Mirrors ``createReport()`` in ``xarf-javascript/src/generator.ts``.
+
+    Args:
+        category: XARF abuse category (e.g. ``"messaging"``, ``"connection"``).
+        type: Report type within the category (e.g. ``"spam"``, ``"ddos"``).
+        source_identifier: IP address, domain, or other identifier of the
+            abusive source.
+        reporter: Contact information for the reporting party — either a
+            :class:`~xarf.models.ContactInfo` instance or a plain dict with
+            ``org``, ``contact``, and ``domain`` keys.
+        sender: Contact information for the originating/sending party — same
+            format as *reporter*.
+        strict: When ``True``, recommended fields are treated as required,
+            unknown fields become errors, and validation failures cause
+            ``report=None`` to be returned.
+        show_missing_optional: When ``True``,
+            :attr:`~xarf.models.CreateReportResult.info` is populated with
+            details about absent optional and recommended fields.
+        **kwargs: Category-specific fields and any other valid XARF report
+            fields (e.g. ``destination_ip``, ``protocol``, ``evidence``).
+            :class:`~xarf.models.XARFEvidence` instances in list values are
+            automatically serialised to dicts.
+
+    Returns:
+        :class:`~xarf.models.CreateReportResult` containing:
+
+        - ``report``: The typed report model, or ``None`` on failure.
+        - ``errors``: Validation errors (empty list means valid).
+        - ``warnings``: Non-fatal warnings.
+        - ``info``: Missing-field metadata when ``show_missing_optional=True``,
+          otherwise ``None``.
 
     Example:
-        >>> generator = XARFGenerator()
-        >>> report = generator.generate_report(
+        >>> result = create_report(
         ...     category="connection",
-        ...     report_type="ddos",
-        ...     source_identifier="192.0.2.100",
-        ...     reporter_contact="abuse@example.com",
-        ...     reporter_org="Example Security Team"
+        ...     type="ddos",
+        ...     source_identifier="192.0.2.1",
+        ...     reporter={"org": "Acme", "contact": "abuse@acme.example",
+        ...               "domain": "acme.example"},
+        ...     sender={"org": "Bad", "contact": "x@bad.example",
+        ...             "domain": "bad.example"},
         ... )
+        >>> result.errors
+        []
     """
-
-    # XARF v4.0.0 specification constants
-    XARF_VERSION = "4.0.0"
-
-    # Valid categories as per XARF spec
-    VALID_CATEGORIES = {
-        "abuse",
-        "messaging",
-        "connection",
-        "content",
-        "copyright",
-        "infrastructure",
-        "vulnerability",
-        "reputation",
+    # ------------------------------------------------------------------
+    # Step 1 — Serialise ContactInfo objects; build report dict
+    # ------------------------------------------------------------------
+    reporter_dict: dict[str, Any] = (
+        reporter.model_dump(by_alias=True, exclude_none=True)
+        if isinstance(reporter, ContactInfo)
+        else reporter
+    )
+    sender_dict: dict[str, Any] = (
+        sender.model_dump(by_alias=True, exclude_none=True)
+        if isinstance(sender, ContactInfo)
+        else sender
+    )
+
+    # Serialise any Pydantic models nested in kwargs (e.g. XARFEvidence lists)
+    serialised_kwargs: dict[str, Any] = {k: _to_jsonable(v) for k, v in kwargs.items()}
+
+    report_dict: dict[str, Any] = {
+        **serialised_kwargs,
+        "category": category,
+        "type": type,
+        "source_identifier": source_identifier,
+        "reporter": reporter_dict,
+        "sender": sender_dict,
+        # Auto-generated metadata
+        "xarf_version": _SPEC_VERSION,
+        "report_id": str(uuid.uuid4()),
+        "timestamp": datetime.now(timezone.utc).isoformat(),
     }
 
-    # Valid types per category
-    EVENT_TYPES: Dict[str, List[str]] = {
-        "abuse": ["ddos", "malware", "phishing", "spam", "scanner"],
-        "vulnerability": ["cve", "misconfiguration", "open_service"],
-        "connection": [
-            "compromised",
-            "botnet",
-            "malicious_traffic",
-            "ddos",
-            "port_scan",
-            "login_attack",
-            "sql_injection",
-            "reconnaissance",
-            "scraping",
-            "vuln_scanning",
-            "bot",
-            "infected_host",
-        ],
-        "content": [
-            "illegal",
-            "malicious",
-            "policy_violation",
-            "phishing",
-            "malware",
-            "fraud",
-            "exposed_data",
-            "csam",
-            "csem",
-            "brand_infringement",
-            "suspicious_registration",
-            "remote_compromise",
-        ],
-        "copyright": [
-            "infringement",
-            "dmca",
-            "trademark",
-            "p2p",
-            "cyberlocker",
-            "link_site",
-            "ugc_platform",
-            "usenet",
-            "copyright",
-        ],
-        "messaging": ["bulk_messaging", "spam"],
-        "reputation": ["blocklist", "threat_intelligence"],
-        "infrastructure": ["botnet", "compromised_server"],
-    }
-
-    # Valid evidence sources
-    VALID_EVIDENCE_SOURCES = {
-        "spamtrap",
-        "honeypot",
-        "user_report",
-        "automated_scan",
-        "manual_analysis",
-        "vulnerability_scan",
-        "researcher_analysis",
-        "threat_intelligence",
-        "flow_analysis",
-        "ids_ips",
-        "siem",
-    }
-
-    # Valid reporter types
-    VALID_REPORTER_TYPES = {"automated", "manual", "hybrid"}
-
-    # Valid severity levels
-    VALID_SEVERITIES = {"low", "medium", "high", "critical"}
-
-    # Evidence content types by category
-    EVIDENCE_CONTENT_TYPES: Dict[str, List[str]] = {
-        "abuse": ["application/pcap", "text/plain", "image/png"],
-        "vulnerability": ["text/plain", "application/json", "image/png"],
-        "connection": ["application/pcap", "text/plain", "application/json"],
-        "content": ["image/png", "text/html", "application/pdf"],
-        "copyright": ["text/html", "image/png", "application/pdf"],
-        "messaging": ["message/rfc822", "text/plain", "text/html"],
-        "reputation": ["application/json", "text/plain", "text/csv"],
-        "infrastructure": ["application/pcap", "text/plain", "application/json"],
-    }
+    # ------------------------------------------------------------------
+    # Step 2 — Validate (schema + unknown fields + missing optional)
+    # ------------------------------------------------------------------
+    result = _validator.validate(
+        report_dict, strict=strict, show_missing_optional=show_missing_optional
+    )
+
+    # ------------------------------------------------------------------
+    # Step 3 — Strict mode early return
+    # ------------------------------------------------------------------
+    if result.errors and strict:
+        return CreateReportResult(
+            report=None,
+            errors=result.errors,
+            warnings=result.warnings,
+            info=result.info,
+        )
 
-    def __init__(self) -> None:
-        """Initialize the XARF generator."""
-
-    def generate_uuid(self) -> str:
-        """Generate a UUID v4 for report identification.
-
-        Uses Python's uuid.uuid4() which generates cryptographically secure
-        random UUIDs as per RFC 4122.
-
-        Returns:
-            A string representation of a UUID v4.
-
-        Example:
-            >>> generator = XARFGenerator()
-            >>> report_id = generator.generate_uuid()
-            >>> len(report_id)
-            36
-        """
-        return str(uuid.uuid4())
-
-    def generate_timestamp(self) -> str:
-        """Generate an ISO 8601 formatted timestamp with UTC timezone.
-
-        Creates a timestamp in the format required by XARF specification:
-        YYYY-MM-DDTHH:MM:SSZ
-
-        Returns:
-            ISO 8601 formatted timestamp string with UTC timezone.
-
-        Example:
-            >>> generator = XARFGenerator()
-            >>> timestamp = generator.generate_timestamp()
-            >>> timestamp.endswith('Z')
-            True
-        """
-        return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
-
-    def generate_hash(self, data: Union[str, bytes], algorithm: str = "sha256") -> str:
-        """Generate a cryptographic hash of the provided data.
-
-        Args:
-            data: The data to hash (string or bytes).
-            algorithm: Hash algorithm to use (default: "sha256").
-                      Supported: "sha256", "sha512", "sha1", "md5".
-
-        Returns:
-            Hexadecimal string representation of the hash.
-
-        Raises:
-            XARFError: If the algorithm is not supported.
-
-        Example:
-            >>> generator = XARFGenerator()
-            >>> hash_val = generator.generate_hash("test data")
-            >>> len(hash_val)
-            64
-        """
-        if isinstance(data, str):
-            data = data.encode("utf-8")
-
-        if algorithm == "sha256":
-            return hashlib.sha256(data).hexdigest()
-        elif algorithm == "sha512":
-            return hashlib.sha512(data).hexdigest()
-        elif algorithm == "sha1":
-            return hashlib.sha1(data).hexdigest()  # nosec B324
-        elif algorithm == "md5":
-            return hashlib.md5(data).hexdigest()  # nosec B324
-        else:
-            raise XARFError(f"Unsupported hash algorithm: {algorithm}")
-
-    def add_evidence(
-        self,
-        content_type: str,
-        description: str,
-        payload: Union[str, bytes],
-        hash_algorithm: str = "sha256",
-    ) -> Dict[str, str]:
-        """Create an evidence item with automatic hashing.
-
-        Args:
-            content_type: MIME type of the evidence (e.g., "text/plain").
-            description: Human-readable description of the evidence.
-            payload: The evidence data (base64-encoded string or raw bytes).
-            hash_algorithm: Algorithm to use for hashing (default: "sha256").
-
-        Returns:
-            Dictionary containing evidence fields including computed hash.
-
-        Example:
-            >>> generator = XARFGenerator()
-            >>> evidence = generator.add_evidence(
-            ...     content_type="text/plain",
-            ...     description="Log excerpt",
-            ...     payload="Sample log data"
-            ... )
-            >>> "hash" in evidence
-            True
-        """
-        if isinstance(payload, bytes):
-            payload_bytes = payload
-            payload_str = payload.decode("utf-8", errors="ignore")
-        else:
-            payload_str = payload
-            payload_bytes = payload.encode("utf-8")
-
-        evidence_hash = self.generate_hash(payload_bytes, hash_algorithm)
-
-        return {
-            "content_type": content_type,
-            "description": description,
-            "payload": payload_str,
-            "hash": evidence_hash,
-        }
-
-    def generate_report(
-        self,
-        category: str,
-        report_type: str,
-        source_identifier: str,
-        reporter_contact: str,
-        reporter_org: Optional[str] = None,
-        reporter_type: str = "automated",
-        evidence_source: str = "automated_scan",
-        on_behalf_of: Optional[Dict[str, str]] = None,
-        description: Optional[str] = None,
-        evidence: Optional[List[Dict[str, str]]] = None,
-        severity: Optional[str] = None,
-        confidence: Optional[float] = None,
-        tags: Optional[List[str]] = None,
-        occurrence: Optional[Dict[str, str]] = None,
-        target: Optional[Dict[str, Any]] = None,
-        additional_fields: Optional[Dict[str, Any]] = None,
-    ) -> Dict[str, Any]:
-        """Generate a complete XARF v4.0.0 report.
-
-        Args:
-            category: Report category (e.g., "connection", "content").
-            report_type: Specific type within category (e.g., "ddos", "phishing").
-            source_identifier: Source IP address or identifier.
-            reporter_contact: Contact email for the reporter.
-            reporter_org: Organization name of the reporter (optional).
-            reporter_type: Type of reporter (default: "automated").
-            evidence_source: How the evidence was collected (default: "automated_scan").
-            on_behalf_of: Dictionary with "org" and optional "contact" keys for
-                         reporting on behalf of another entity.
-            description: Human-readable description of the incident.
-            evidence: List of evidence items (dictionaries with content_type,
-                     description, payload, and hash).
-            severity: Incident severity (low, medium, high, critical).
-            confidence: Confidence score between 0.0 and 1.0.
-            tags: List of tags for categorization.
-            occurrence: Dictionary with "start" and "end" ISO 8601 timestamps.
-            target: Dictionary with target information (ip, port, url, etc.).
-            additional_fields: Category-specific fields to include in the report.
-
-        Returns:
-            Complete XARF report as a dictionary.
-
-        Raises:
-            XARFError: If validation fails or required fields are missing.
-
-        Example:
-            >>> generator = XARFGenerator()
-            >>> report = generator.generate_report(
-            ...     category="connection",
-            ...     report_type="ddos",
-            ...     source_identifier="192.0.2.100",
-            ...     reporter_contact="abuse@example.com",
-            ...     reporter_org="Example Security",
-            ...     severity="high"
-            ... )
-            >>> report["xarf_version"]
-            '4.0.0'
-        """
-        # Validate required parameters
-        if not source_identifier:
-            raise XARFError("source_identifier is required")
-        if not reporter_contact:
-            raise XARFError("reporter_contact is required")
-
-        # Validate category
-        if category not in self.VALID_CATEGORIES:
-            raise XARFError(
-                f"Invalid category '{category}'. Must be one of: "
-                f"{', '.join(sorted(self.VALID_CATEGORIES))}"
-            )
-
-        # Validate type for category
-        valid_types = self.EVENT_TYPES.get(category, [])
-        if report_type not in valid_types:
-            raise XARFError(
-                f"Invalid type '{report_type}' for category '{category}'. "
-                f"Must be one of: {', '.join(valid_types)}"
-            )
-
-        # Validate reporter_type
-        if reporter_type not in self.VALID_REPORTER_TYPES:
-            raise XARFError(
-                f"Invalid reporter_type '{reporter_type}'. Must be one of: "
-                f"{', '.join(sorted(self.VALID_REPORTER_TYPES))}"
-            )
-
-        # Validate evidence_source
-        if evidence_source not in self.VALID_EVIDENCE_SOURCES:
-            raise XARFError(
-                f"Invalid evidence_source '{evidence_source}'. Must be one of: "
-                f"{', '.join(sorted(self.VALID_EVIDENCE_SOURCES))}"
-            )
-
-        # Validate severity if provided
-        if severity and severity not in self.VALID_SEVERITIES:
-            raise XARFError(
-                f"Invalid severity '{severity}'. Must be one of: "
-                f"{', '.join(sorted(self.VALID_SEVERITIES))}"
-            )
-
-        # Validate confidence if provided
-        if confidence is not None and not (0.0 <= confidence <= 1.0):
-            raise XARFError("confidence must be between 0.0 and 1.0")
-
-        # Build base report structure
-        report: Dict[str, Any] = {
-            "xarf_version": self.XARF_VERSION,
-            "report_id": self.generate_uuid(),
-            "timestamp": self.generate_timestamp(),
-            "reporter": {"contact": reporter_contact, "type": reporter_type},
-            "source_identifier": source_identifier,
-            "category": category,
-            "type": report_type,
-            "evidence_source": evidence_source,
-        }
-
-        # Add optional reporter fields
-        if reporter_org:
-            report["reporter"]["org"] = reporter_org
-
-        # Add on_behalf_of if provided
-        if on_behalf_of:
-            if "org" not in on_behalf_of:
-                raise XARFError("on_behalf_of must contain 'org' key")
-            report["reporter"]["on_behalf_of"] = on_behalf_of
-
-        # Add optional fields
-        if description:
-            report["description"] = description
-
-        if evidence:
-            report["evidence"] = evidence
-
-        if severity:
-            report["severity"] = severity
-
-        if confidence is not None:
-            report["confidence"] = confidence
-
-        if tags:
-            report["tags"] = tags
-
-        if occurrence:
-            if "start" in occurrence and "end" in occurrence:
-                report["occurrence"] = occurrence
-            else:
-                raise XARFError("occurrence must contain 'start' and 'end' keys")
-
-        if target:
-            report["target"] = target
-
-        # Add any additional category-specific fields
-        if additional_fields:
-            report.update(additional_fields)
-
-        return report
-
-    def generate_random_evidence(
-        self, category: str, description: Optional[str] = None
-    ) -> Dict[str, str]:
-        """Generate random sample evidence for testing purposes.
-
-        Args:
-            category: Report category to determine appropriate content type.
-            description: Custom description (auto-generated if not provided).
-
-        Returns:
-            Dictionary containing a sample evidence item.
-
-        Example:
-            >>> generator = XARFGenerator()
-            >>> evidence = generator.generate_random_evidence("connection")
-            >>> "content_type" in evidence
-            True
-        """
-        # Select appropriate content type for category
-        content_types = self.EVIDENCE_CONTENT_TYPES.get(category, ["text/plain"])
-        content_type = secrets.choice(content_types)
-
-        # Generate random payload data
-        random_data = secrets.token_bytes(32)
-        payload = random_data.hex()
-
-        # Generate description if not provided
-        if not description:
-            description = f"Sample {category} evidence data"
-
-        return self.add_evidence(
-            content_type=content_type, description=description, payload=payload
+    # ------------------------------------------------------------------
+    # Step 4 — Pydantic deserialization via discriminated union
+    # ------------------------------------------------------------------
+    try:
+        report = _REPORT_ADAPTER.validate_python(report_dict)
+    except PydanticValidationError:
+        return CreateReportResult(
+            report=None,
+            errors=result.errors,
+            warnings=result.warnings,
+            info=result.info,
         )
 
-    def generate_sample_report(
-        self,
-        category: str,
-        report_type: str,
-        include_evidence: bool = True,
-        include_optional: bool = True,
-    ) -> Dict[str, Any]:
-        """Generate a sample XARF report with randomized data for testing.
-
-        Useful for generating test reports, examples, and documentation.
-
-        Args:
-            category: Report category (e.g., "connection").
-            report_type: Specific type within category (e.g., "ddos").
-            include_evidence: Whether to include sample evidence (default: True).
-            include_optional: Whether to include optional fields (default: True).
-
-        Returns:
-            Complete sample XARF report.
-
-        Raises:
-            XARFError: If category or type is invalid.
-
-        Example:
-            >>> generator = XARFGenerator()
-            >>> sample = generator.generate_sample_report("connection", "ddos")
-            >>> sample["category"]
-            'connection'
-        """
-        # Validate inputs
-        if category not in self.VALID_CATEGORIES:
-            raise XARFError(f"Invalid category: {category}")
-
-        valid_types = self.EVENT_TYPES.get(category, [])
-        if report_type not in valid_types:
-            raise XARFError(f"Invalid type '{report_type}' for category '{category}'")
-
-        # Generate random test data
-        source_ip = f"192.0.2.{secrets.randbelow(256)}"
-
-        sample_orgs = [
-            "Security Operations Center",
-            "Abuse Response Team",
-            "Network Security Team",
-            "Threat Intelligence Unit",
-            "SOC Team",
-        ]
-        reporter_org = secrets.choice(sample_orgs)
-
-        sample_domains = ["example.com", "security.net", "abuse.org", "soc.io"]
-        reporter_contact = f"abuse@{secrets.choice(sample_domains)}"
-
-        # Build report parameters
-        params: Dict[str, Any] = {
-            "category": category,
-            "report_type": report_type,
-            "source_identifier": source_ip,
-            "reporter_contact": reporter_contact,
-            "reporter_org": reporter_org,
-            "description": f"Sample {report_type} report for testing",
-        }
-
-        # Add evidence if requested
-        if include_evidence:
-            params["evidence"] = [self.generate_random_evidence(category)]
-
-        # Add optional fields if requested
-        if include_optional:
-            params["severity"] = secrets.choice(list(self.VALID_SEVERITIES))
-            params["confidence"] = round(0.7 + secrets.randbelow(30) / 100, 2)
-            params["tags"] = [category, report_type, "sample"]
-
-            # Add target information
-            target_ip = f"203.0.113.{secrets.randbelow(256)}"
-            params["target"] = {
-                "ip": target_ip,
-                "port": secrets.choice([53, 80, 443, 8080, 22, 25]),
-            }
-
-            # Add occurrence time range
-            now = datetime.now(timezone.utc)
-            start = datetime.fromtimestamp(
-                now.timestamp() - secrets.randbelow(7200), tz=timezone.utc
-            )
-            params["occurrence"] = {
-                "start": start.strftime("%Y-%m-%dT%H:%M:%SZ"),
-                "end": now.strftime("%Y-%m-%dT%H:%M:%SZ"),
-            }
-
-        return self.generate_report(**params)
+    return CreateReportResult(
+        report=report,
+        errors=result.errors,
+        warnings=result.warnings,
+        info=result.info,
+    )
+
+
+def create_evidence(
+    content_type: str,
+    payload: bytes | str,
+    *,
+    description: str | None = None,
+    hash_algorithm: Literal["sha256", "sha512", "sha1", "md5"] = "sha256",
+) -> XARFEvidence:
+    """Create an evidence item with automatic hashing, encoding, and size.
+
+    Converts *payload* to bytes if needed, computes a hex digest with the
+    chosen algorithm, base64-encodes the payload, and returns a fully-formed
+    :class:`~xarf.models.XARFEvidence` object.
+
+    Mirrors ``createEvidence()`` in ``xarf-javascript/src/generator.ts``.
+
+    Args:
+        content_type: MIME type of the evidence (e.g. ``"message/rfc822"``).
+        payload: Raw evidence data as bytes or a UTF-8 string.
+        description: Human-readable description of the evidence item.
+        hash_algorithm: Cryptographic algorithm for the integrity hash
+            (default ``"sha256"``).  Supported values: ``"sha256"``,
+            ``"sha512"``, ``"sha1"``, ``"md5"``.
+
+    Returns:
+        :class:`~xarf.models.XARFEvidence` with ``content_type``, base64
+        ``payload``, ``hash`` in ``"algorithm:hexvalue"`` format, ``size``
+        (byte count of the original payload), and optional ``description``.
+
+    Example:
+        >>> ev = create_evidence("text/plain", b"Hello, XARF!", description="Test")
+        >>> ev.hash.startswith("sha256:")
+        True
+        >>> ev.size
+        12
+    """
+    payload_bytes: bytes = (
+        payload.encode("utf-8") if isinstance(payload, str) else payload
+    )
+
+    # Compute hash — sha1/md5 are legacy but valid per the XARF spec
+    hasher = hashlib.new(hash_algorithm)
+    hasher.update(payload_bytes)
+    hex_digest = hasher.hexdigest()
+
+    encoded_payload: str = base64.b64encode(payload_bytes).decode("ascii")
+
+    return XARFEvidence(
+        content_type=content_type,
+        payload=encoded_payload,
+        hash=f"{hash_algorithm}:{hex_digest}",
+        size=len(payload_bytes),
+        description=description,
+    )

From 8fb2f755917ab78476f88933e21398e836a38fb7 Mon Sep 17 00:00:00 2001
From: Victor Lopez <victor@thevictorlopez.com>
Date: Tue, 31 Mar 2026 14:50:45 +0200
Subject: [PATCH 09/13] Add v3 compatiblity layer

---
 tests/test_v3_compat.py | 775 ++++++++++++++++++++++++++++++++++++++++
 xarf/__init__.py        |   4 +
 xarf/parser.py          |  20 +-
 xarf/v3_compat.py       | 560 +++++++++++++++++++----------
 4 files changed, 1167 insertions(+), 192 deletions(-)
 create mode 100644 tests/test_v3_compat.py

diff --git a/tests/test_v3_compat.py b/tests/test_v3_compat.py
new file mode 100644
index 0000000..be7e246
--- /dev/null
+++ b/tests/test_v3_compat.py
@@ -0,0 +1,775 @@
+"""Tests for XARF v3 backwards compatibility.
+
+Mirrors ``v3-legacy.test.ts`` in ``xarf-javascript/tests/``.
+"""
+
+from __future__ import annotations
+
+import base64
+import hashlib
+import warnings as warnings_module
+from typing import Any
+
+import pytest
+
+from xarf import parse
+from xarf.exceptions import XARFParseError
+from xarf.v3_compat import (
+    XARFv3DeprecationWarning,
+    convert_v3_to_v4,
+    get_v3_deprecation_warning,
+    is_v3_report,
+)
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _spam_v3(
+    *,
+    version: str = "3",
+    reporter_org: str | None = "Test Org",
+    reporter_email: str = "abuse@example.com",
+    source_ip: str = "192.0.2.1",
+    protocol: str | None = "smtp",
+) -> dict[str, Any]:
+    """Build a minimal v3 spam report for testing."""
+    reporter: dict[str, Any] = {"ReporterOrgEmail": reporter_email}
+    if reporter_org is not None:
+        reporter["ReporterOrg"] = reporter_org
+    report: dict[str, Any] = {
+        "ReportType": "Spam",
+        "Date": "2024-01-15T10:00:00Z",
+        "SourceIp": source_ip,
+    }
+    if protocol is not None:
+        report["Protocol"] = protocol
+    return {"Version": version, "ReporterInfo": reporter, "Report": report}
+
+
+# ===========================================================================
+# is_v3_report — detection
+# ===========================================================================
+
+
+class TestIsV3Report:
+    def test_detects_version_3(self) -> None:
+        assert is_v3_report(
+            {
+                "Version": "3",
+                "ReporterInfo": {"ReporterOrgEmail": "t@example.com"},
+                "Report": {"ReportType": "Spam", "Date": "2024-01-15T10:00:00Z"},
+            }
+        )
+
+    def test_detects_version_3_0(self) -> None:
+        assert is_v3_report(
+            {
+                "Version": "3.0",
+                "ReporterInfo": {"ReporterOrgEmail": "t@example.com"},
+                "Report": {"ReportType": "DDoS", "Date": "2024-01-15T10:00:00Z"},
+            }
+        )
+
+    def test_detects_version_3_0_0(self) -> None:
+        assert is_v3_report(
+            {
+                "Version": "3.0.0",
+                "ReporterInfo": {"ReporterOrgEmail": "t@example.com"},
+                "Report": {"ReportType": "Spam", "Date": "2024-01-15T10:00:00Z"},
+            }
+        )
+
+    def test_does_not_detect_v4_as_v3(self) -> None:
+        assert not is_v3_report(
+            {
+                "xarf_version": "4.2.0",
+                "report_id": "abc",
+                "timestamp": "2024-01-15T10:00:00Z",
+                "category": "messaging",
+                "type": "spam",
+            }
+        )
+
+    def test_does_not_detect_empty_dict(self) -> None:
+        assert not is_v3_report({})
+
+    def test_does_not_detect_version_4(self) -> None:
+        assert not is_v3_report({"Version": "4.0.0"})
+
+    def test_does_not_detect_v3_without_report_key(self) -> None:
+        # Version "3" but missing the "Report" key
+        assert not is_v3_report(
+            {
+                "Version": "3",
+                "ReporterInfo": {"ReporterOrgEmail": "t@example.com"},
+            }
+        )
+
+    def test_does_not_detect_v3_without_reporter_info(self) -> None:
+        assert not is_v3_report(
+            {
+                "Version": "3",
+                "Report": {"ReportType": "Spam"},
+            }
+        )
+
+
+# ===========================================================================
+# convert_v3_to_v4 — spam
+# ===========================================================================
+
+
+class TestSpamConversion:
+    def test_converts_full_spam_report(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {
+                "ReporterOrg": "Anti-Spam Service",
+                "ReporterOrgEmail": "abuse@antispam.example",
+            },
+            "Report": {
+                "ReportType": "Spam",
+                "Date": "2024-01-15T14:30:25Z",
+                "SourceIp": "192.168.1.100",
+                "Protocol": "smtp",
+                "SmtpMailFromAddress": "spammer@evil.example",
+                "SmtpMessageSubject": "Buy now!",
+                "AttackDescription": "Spam email detected",
+            },
+        }
+
+        msgs: list[str] = []
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3, conversion_warnings=msgs)
+
+        assert v4["xarf_version"] == "4.2.0"
+        assert v4["category"] == "messaging"
+        assert v4["type"] == "spam"
+        assert v4["source_identifier"] == "192.168.1.100"
+        assert v4["reporter"]["org"] == "Anti-Spam Service"
+        assert v4["reporter"]["contact"] == "abuse@antispam.example"
+        assert v4["reporter"]["domain"] == "antispam.example"
+        assert v4["sender"]["org"] == "Anti-Spam Service"
+        assert v4["sender"]["contact"] == "abuse@antispam.example"
+        assert v4["sender"]["domain"] == "antispam.example"
+        assert v4["timestamp"] == "2024-01-15T14:30:25Z"
+        assert v4["description"] == "Spam email detected"
+        assert v4["legacy_version"] == "3"
+        assert v4["_internal"]["original_report_type"] == "Spam"
+        assert "converted_at" in v4["_internal"]
+        # Category-specific
+        assert v4["protocol"] == "smtp"
+        assert v4["smtp_from"] == "spammer@evil.example"
+        assert v4["subject"] == "Buy now!"
+
+    def test_converts_lowercase_spam_type(self) -> None:
+        v3 = _spam_v3()
+        v3["Report"]["ReportType"] = "spam"
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["category"] == "messaging"
+        assert v4["type"] == "spam"
+
+    def test_source_from_source_ip_and_port(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {"ReporterOrgEmail": "abuse@example.com"},
+            "Report": {
+                "ReportType": "spam",
+                "Date": "2024-01-15T10:00:00Z",
+                "Protocol": "smtp",
+                "Source": {"IP": "10.0.0.1", "Port": 25},
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["source_identifier"] == "10.0.0.1"
+        assert v4["source_port"] == 25
+
+    def test_smtp_from_from_additional_info(self) -> None:
+        v3 = _spam_v3()
+        v3["Report"]["AdditionalInfo"] = {"SMTPFrom": "from@example.com"}
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["smtp_from"] == "from@example.com"
+
+    def test_no_description_field_when_absent(self) -> None:
+        v3 = _spam_v3()
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert "description" not in v4
+
+
+# ===========================================================================
+# convert_v3_to_v4 — connection types
+# ===========================================================================
+
+
+class TestConnectionConversion:
+    def _ddos(self, **extra: Any) -> dict[str, Any]:
+        report: dict[str, Any] = {
+            "ReportType": "DDoS",
+            "Date": "2024-01-15T15:00:00Z",
+            "SourceIp": "203.0.113.50",
+            "Protocol": "tcp",
+        }
+        report.update(extra)
+        return {
+            "Version": "3",
+            "ReporterInfo": {
+                "ReporterOrg": "DDoS Protection",
+                "ReporterOrgEmail": "ddos@example.com",
+            },
+            "Report": report,
+        }
+
+    def test_converts_ddos_full(self) -> None:
+        v3 = self._ddos(
+            DestinationIp="198.51.100.10", DestinationPort=80, AttackCount=10000
+        )
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["category"] == "connection"
+        assert v4["type"] == "ddos"
+        assert v4["source_identifier"] == "203.0.113.50"
+        assert v4["destination_ip"] == "198.51.100.10"
+        assert v4["destination_port"] == 80
+        assert v4["protocol"] == "tcp"
+        assert v4["attack_count"] == 10000
+        assert v4["first_seen"] == "2024-01-15T15:00:00Z"
+
+    def test_ddos_absent_optional_fields_not_in_result(self) -> None:
+        v3 = self._ddos()  # no DestinationIp, DestinationPort, AttackCount
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert "destination_ip" not in v4
+        assert "destination_port" not in v4
+        assert "attack_count" not in v4
+
+    def test_converts_login_attack(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {"ReporterOrgEmail": "security@example.com"},
+            "Report": {
+                "ReportType": "Login-Attack",
+                "Date": "2024-01-15T12:00:00Z",
+                "SourceIp": "192.0.2.50",
+                "DestinationIp": "203.0.113.10",
+                "DestinationPort": 22,
+                "Protocol": "tcp",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["category"] == "connection"
+        assert v4["type"] == "login_attack"
+
+    def test_converts_port_scan(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {"ReporterOrgEmail": "security@example.com"},
+            "Report": {
+                "ReportType": "Port-Scan",
+                "Date": "2024-01-15T12:00:00Z",
+                "SourceIp": "192.0.2.99",
+                "Protocol": "tcp",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["category"] == "connection"
+        assert v4["type"] == "port_scan"
+
+    def test_converts_lowercase_ddos(self) -> None:
+        v3 = self._ddos()
+        v3["Report"]["ReportType"] = "ddos"
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["type"] == "ddos"
+
+
+# ===========================================================================
+# convert_v3_to_v4 — content types
+# ===========================================================================
+
+
+class TestContentConversion:
+    def test_converts_phishing(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {"ReporterOrgEmail": "phishing@example.com"},
+            "Report": {
+                "ReportType": "Phishing",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.100",
+                "Url": "http://evil-phishing.example",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["category"] == "content"
+        assert v4["type"] == "phishing"
+        assert v4["url"] == "http://evil-phishing.example"
+
+    def test_converts_malware(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {"ReporterOrgEmail": "malware@example.com"},
+            "Report": {
+                "ReportType": "Malware",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.150",
+                "Url": "http://malware-site.example",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["category"] == "content"
+        assert v4["type"] == "malware"
+
+    def test_url_from_additional_info(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {"ReporterOrgEmail": "test@example.com"},
+            "Report": {
+                "ReportType": "Phishing",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.1",
+                "AdditionalInfo": {"URL": "http://phish.example/login"},
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["url"] == "http://phish.example/login"
+
+    def test_url_from_source_url(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {
+                "ReporterOrg": "Security Vendor",
+                "ReporterOrgEmail": "abuse@security.example",
+            },
+            "Report": {
+                "ReportType": "Phishing",
+                "Date": "2024-01-15T10:00:00Z",
+                "Source": {"URL": "https://malicious-example.net/banking-login/"},
+                "Url": "https://malicious-example.net/banking-login/",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["url"] == "https://malicious-example.net/banking-login/"
+
+
+# ===========================================================================
+# convert_v3_to_v4 — other categories
+# ===========================================================================
+
+
+class TestOtherCategoryConversion:
+    def test_converts_botnet(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {"ReporterOrgEmail": "botnet@example.com"},
+            "Report": {
+                "ReportType": "Botnet",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.200",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["category"] == "infrastructure"
+        assert v4["type"] == "botnet"
+
+    def test_converts_copyright(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {"ReporterOrgEmail": "dmca@example.com"},
+            "Report": {
+                "ReportType": "Copyright",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.250",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["category"] == "copyright"
+        assert v4["type"] == "copyright"
+
+
+# ===========================================================================
+# convert_v3_to_v4 — evidence conversion
+# ===========================================================================
+
+
+class TestEvidenceConversion:
+    def test_converts_attachment_with_description(self) -> None:
+        payload = "SGVsbG8gV29ybGQ="  # base64("Hello World")
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {"ReporterOrgEmail": "test@example.com"},
+            "Report": {
+                "ReportType": "Spam",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.1",
+                "Protocol": "smtp",
+                "Attachment": [
+                    {
+                        "ContentType": "message/rfc822",
+                        "Data": payload,
+                        "Description": "Original email",
+                    }
+                ],
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+
+        assert v4.get("evidence") is not None
+        ev = v4["evidence"][0]
+        assert ev["content_type"] == "message/rfc822"
+        assert ev["payload"] == payload
+        assert ev["description"] == "Original email"
+        raw = base64.b64decode(payload)
+        assert ev["size"] == len(raw)
+        expected_hash = "sha256:" + hashlib.sha256(raw).hexdigest()
+        assert ev["hash"] == expected_hash
+
+    def test_converts_samples_without_description(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {"ReporterOrgEmail": "test@example.com"},
+            "Report": {
+                "ReportType": "Malware",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.1",
+                "Url": "http://malware.example/payload",
+                "Samples": [
+                    {
+                        "ContentType": "application/octet-stream",
+                        "Data": "bWFsd2FyZWRhdGE=",
+                    }
+                ],
+            },
+        }
+        msgs: list[str] = []
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3, conversion_warnings=msgs)
+
+        ev = v4["evidence"][0]
+        assert ev["content_type"] == "application/octet-stream"
+        assert "description" not in ev
+        assert any("no description" in m for m in msgs)
+
+
+# ===========================================================================
+# Error cases
+# ===========================================================================
+
+
+class TestUnknownType:
+    def test_raises_on_unknown_report_type(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {"ReporterOrgEmail": "test@example.com"},
+            "Report": {
+                "ReportType": "UnknownType",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.1",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            with pytest.raises(
+                XARFParseError, match="unknown ReportType 'UnknownType'"
+            ):
+                convert_v3_to_v4(v3)
+
+
+class TestReporterEmailHandling:
+    def test_raises_when_both_emails_absent(self) -> None:
+        v3 = {
+            "Version": "3",
+            "ReporterInfo": {},
+            "Report": {
+                "ReportType": "Spam",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.1",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            with pytest.raises(XARFParseError, match="missing reporter email"):
+                convert_v3_to_v4(v3)
+
+    def test_raises_when_email_has_no_domain(self) -> None:
+        v3 = {
+            "Version": "3",
+            "ReporterInfo": {"ReporterOrgEmail": "not-an-email"},
+            "Report": {
+                "ReportType": "Spam",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.1",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            with pytest.raises(XARFParseError, match="not a valid email address"):
+                convert_v3_to_v4(v3)
+
+    def test_warns_when_reporter_org_missing(self) -> None:
+        v3 = _spam_v3(reporter_org=None)
+        msgs: list[str] = []
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3, conversion_warnings=msgs)
+        assert any("No ReporterOrg found" in m for m in msgs)
+        assert v4["reporter"]["org"] == "Unknown Organization"
+
+
+class TestSourceIdentifierHandling:
+    def test_raises_when_no_source_identifier(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {
+                "ReporterOrg": "Test Org",
+                "ReporterOrgEmail": "test@example.com",
+            },
+            "Report": {"ReportType": "Botnet", "Date": "2024-01-15T10:00:00Z"},
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            with pytest.raises(XARFParseError, match="no source identifier found"):
+                convert_v3_to_v4(v3)
+
+    def test_extracts_from_source_url(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {
+                "ReporterOrg": "Security Vendor",
+                "ReporterOrgEmail": "abuse@security.example",
+            },
+            "Report": {
+                "ReportType": "Phishing",
+                "Date": "2024-01-15T10:00:00Z",
+                "Source": {"URL": "https://malicious-example.net/banking-login/"},
+                "Url": "https://malicious-example.net/banking-login/",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["source_identifier"] == "https://malicious-example.net/banking-login/"
+
+    def test_extracts_from_url_field(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {
+                "ReporterOrg": "Test Org",
+                "ReporterOrgEmail": "test@example.com",
+            },
+            "Report": {
+                "ReportType": "Malware",
+                "Date": "2024-01-15T10:00:00Z",
+                "Url": "http://malware.example/payload.exe",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["source_identifier"] == "http://malware.example/payload.exe"
+
+
+class TestMissingProtocol:
+    def test_raises_when_messaging_protocol_missing(self) -> None:
+        v3 = _spam_v3(protocol=None)
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            with pytest.raises(
+                XARFParseError, match="missing protocol for messaging type"
+            ):
+                convert_v3_to_v4(v3)
+
+    def test_raises_when_connection_protocol_missing(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {
+                "ReporterOrg": "Test Org",
+                "ReporterOrgEmail": "test@example.com",
+            },
+            "Report": {
+                "ReportType": "DDoS",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.1",
+                # No Protocol
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            with pytest.raises(
+                XARFParseError, match="missing protocol for connection type"
+            ):
+                convert_v3_to_v4(v3)
+
+
+class TestMissingUrl:
+    def test_raises_when_content_url_missing(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {
+                "ReporterOrg": "Test Org",
+                "ReporterOrgEmail": "test@example.com",
+            },
+            "Report": {
+                "ReportType": "Phishing",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.100",
+                # No Url / Source.URL / AdditionalInfo.URL
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            with pytest.raises(XARFParseError, match="missing URL for content type"):
+                convert_v3_to_v4(v3)
+
+
+# ===========================================================================
+# evidence_source — pass-through only when present
+# ===========================================================================
+
+
+class TestEvidenceSource:
+    def test_evidence_source_set_when_detection_method_present(self) -> None:
+        v3 = _spam_v3()
+        v3["Report"]["AdditionalInfo"] = {
+            "DetectionMethod": "spamtrap",
+            "Protocol": "smtp",
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert v4["evidence_source"] == "spamtrap"
+
+    def test_evidence_source_absent_when_no_detection_method(self) -> None:
+        v3 = _spam_v3()
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            v4 = convert_v3_to_v4(v3)
+        assert "evidence_source" not in v4
+
+
+# ===========================================================================
+# Deprecation warning emission
+# ===========================================================================
+
+
+class TestDeprecationWarningEmission:
+    def test_emits_deprecation_warning_on_convert(self) -> None:
+        v3 = _spam_v3()
+        with warnings_module.catch_warnings(record=True) as caught:
+            warnings_module.simplefilter("always")
+            convert_v3_to_v4(v3)
+        dep_warnings = [
+            w for w in caught if issubclass(w.category, XARFv3DeprecationWarning)
+        ]
+        assert len(dep_warnings) == 1
+
+    def test_deprecation_warning_is_subclass_of_deprecation_warning(self) -> None:
+        assert issubclass(XARFv3DeprecationWarning, DeprecationWarning)
+
+
+# ===========================================================================
+# get_v3_deprecation_warning message content
+# ===========================================================================
+
+
+class TestGetV3DeprecationWarning:
+    def test_message_contains_expected_phrases(self) -> None:
+        msg = get_v3_deprecation_warning()
+        assert "DEPRECATION WARNING" in msg
+        assert "v3 format" in msg
+        assert "converted to v4" in msg
+        assert "future major version" in msg
+
+
+# ===========================================================================
+# parse() integration — v3 auto-detection
+# ===========================================================================
+
+
+class TestParserV3Integration:
+    def test_parses_v3_spam_report_automatically(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {
+                "ReporterOrg": "Test Security",
+                "ReporterOrgEmail": "abuse@test.example",
+            },
+            "Report": {
+                "ReportType": "Spam",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.100",
+                "Protocol": "smtp",
+                "SmtpMailFromAddress": "spammer@evil.example",
+                "SmtpMessageSubject": "Spam subject",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            result = parse(v3)
+
+        assert result.report is not None
+        assert result.report.xarf_version == "4.2.0"
+        assert result.report.category == "messaging"
+        assert result.report.type == "spam"
+        assert result.report.legacy_version == "3"
+        assert len(result.warnings) > 0
+        assert any("DEPRECATION WARNING" in w.message for w in result.warnings)
+
+    def test_parses_v3_ddos_with_no_errors(self) -> None:
+        v3: dict[str, Any] = {
+            "Version": "3",
+            "ReporterInfo": {"ReporterOrgEmail": "abuse@example.com"},
+            "Report": {
+                "ReportType": "DDoS",
+                "Date": "2024-01-15T10:00:00Z",
+                "SourceIp": "192.0.2.50",
+                "SourcePort": 54321,
+                "DestinationIp": "203.0.113.10",
+                "Protocol": "tcp",
+            },
+        }
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            result = parse(v3)
+
+        assert result.errors == []
+        assert len(result.warnings) > 0
+
+    def test_parse_v3_warnings_mention_v3_format(self) -> None:
+        v3 = _spam_v3()
+        with warnings_module.catch_warnings():
+            warnings_module.simplefilter("ignore", XARFv3DeprecationWarning)
+            result = parse(v3)
+        assert any("v3" in w.message.lower() for w in result.warnings)
diff --git a/xarf/__init__.py b/xarf/__init__.py
index ff6ca43..2fb0db5 100644
--- a/xarf/__init__.py
+++ b/xarf/__init__.py
@@ -119,7 +119,9 @@
     VulnerabilityReport,
 )
 from xarf.v3_compat import (
+    XARFv3DeprecationWarning,
     convert_v3_to_v4,
+    get_v3_deprecation_warning,
     is_v3_report,
 )
 from xarf.validator import ValidationResult
@@ -162,6 +164,8 @@
     # v3 compatibility
     "is_v3_report",
     "convert_v3_to_v4",
+    "get_v3_deprecation_warning",
+    "XARFv3DeprecationWarning",
     # Messaging
     "MessagingBaseReport",
     "SpamIndicators",
diff --git a/xarf/parser.py b/xarf/parser.py
index 73f4fe4..6d416aa 100644
--- a/xarf/parser.py
+++ b/xarf/parser.py
@@ -28,7 +28,7 @@
 
 from xarf.exceptions import XARFParseError
 from xarf.models import AnyXARFReport, ParseResult, ValidationWarning
-from xarf.v3_compat import convert_v3_to_v4, is_v3_report
+from xarf.v3_compat import convert_v3_to_v4, get_v3_deprecation_warning, is_v3_report
 from xarf.validator import _validator
 
 # ---------------------------------------------------------------------------
@@ -37,16 +37,6 @@
 
 _REPORT_ADAPTER: TypeAdapter[AnyXARFReport] = TypeAdapter(AnyXARFReport)
 
-# ---------------------------------------------------------------------------
-# v3 deprecation warning message (mirrors getV3DeprecationWarning() in JS)
-# ---------------------------------------------------------------------------
-
-_V3_DEPRECATION_MESSAGE = (
-    "XARF v3 format is deprecated. Please upgrade to XARF v4. "
-    "This report will be automatically converted, but v3 support "
-    "will be removed in a future version."
-)
-
 # ---------------------------------------------------------------------------
 # Public API
 # ---------------------------------------------------------------------------
@@ -116,10 +106,14 @@ def parse(
     # ------------------------------------------------------------------
     if is_v3_report(data):
         # convert_v3_to_v4 emits a Python warnings.warn() internally.
-        data = convert_v3_to_v4(data)
+        # Collect non-fatal conversion messages (e.g. missing ReporterOrg).
+        conversion_msgs: list[str] = []
+        data = convert_v3_to_v4(data, conversion_warnings=conversion_msgs)
         parse_warnings.append(
-            ValidationWarning(field="", message=_V3_DEPRECATION_MESSAGE)
+            ValidationWarning(field="", message=get_v3_deprecation_warning())
         )
+        for msg in conversion_msgs:
+            parse_warnings.append(ValidationWarning(field="", message=msg))
 
     # ------------------------------------------------------------------
     # Step 3 — Validate (schema + unknown fields + missing optional)
diff --git a/xarf/v3_compat.py b/xarf/v3_compat.py
index 8472aa2..e2c0012 100644
--- a/xarf/v3_compat.py
+++ b/xarf/v3_compat.py
@@ -1,233 +1,435 @@
 """XARF v3 Backwards Compatibility Module.
 
-This module provides automatic conversion from XARF v3 format to v4 format,
+Provides automatic detection and conversion of XARF v3 reports to v4 format,
 allowing parsers to transparently handle legacy reports.
+
+Mirrors ``v3-legacy.ts`` in ``xarf-javascript/src/``.
 """
 
+from __future__ import annotations
+
+import base64
+import hashlib
 import uuid
 import warnings
 from datetime import datetime, timezone
-from typing import Any, Dict, List, Optional
-
-
-class XARFv3DeprecationWarning(DeprecationWarning):
-    """Warning for usage of deprecated XARF v3 format."""
+from typing import Any
 
+from xarf.exceptions import XARFParseError
 
-# Enable deprecation warnings by default
-warnings.simplefilter("always", XARFv3DeprecationWarning)
+# ---------------------------------------------------------------------------
+# Deprecation warning class
+# ---------------------------------------------------------------------------
 
 
-def is_v3_report(data: Dict[str, Any]) -> bool:
+class XARFv3DeprecationWarning(DeprecationWarning):
+    """Warning emitted when an XARF v3 report is detected and auto-converted."""
+
+
+# Show each unique call site once rather than suppressing entirely (the Python
+# default silences DeprecationWarning outside __main__ and test runners).
+warnings.simplefilter("default", XARFv3DeprecationWarning)
+
+# ---------------------------------------------------------------------------
+# Type mapping — mirrors V3_TYPE_MAPPING in v3-legacy.ts exactly
+# (PascalCase and lowercase variant for each of the 8 supported v3 types)
+# ---------------------------------------------------------------------------
+
+_V3_TYPE_MAPPING: dict[str, tuple[str, str]] = {
+    "Spam": ("messaging", "spam"),
+    "spam": ("messaging", "spam"),
+    "Login-Attack": ("connection", "login_attack"),
+    "login-attack": ("connection", "login_attack"),
+    "Port-Scan": ("connection", "port_scan"),
+    "port-scan": ("connection", "port_scan"),
+    "DDoS": ("connection", "ddos"),
+    "ddos": ("connection", "ddos"),
+    "Phishing": ("content", "phishing"),
+    "phishing": ("content", "phishing"),
+    "Malware": ("content", "malware"),
+    "malware": ("content", "malware"),
+    "Botnet": ("infrastructure", "botnet"),
+    "botnet": ("infrastructure", "botnet"),
+    "Copyright": ("copyright", "copyright"),
+    "copyright": ("copyright", "copyright"),
+}
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+
+def is_v3_report(data: dict[str, Any]) -> bool:
     """Detect if a report is XARF v3 format.
 
+    Mirrors ``isXARFv3()`` in ``v3-legacy.ts``.  Checks for the presence of
+    ``Version`` (string equal to ``"3"``, ``"3.0"``, or ``"3.0.0"``),
+    ``ReporterInfo``, and ``Report`` keys.
+
     Args:
-        data: Parsed JSON data
+        data: Parsed JSON data to inspect.
 
     Returns:
-        bool: True if report is v3 format
+        ``True`` if *data* is a v3-format XARF report.
     """
-    # v3 has "Version" field, v4 has "xarf_version"
-    return "Version" in data and "xarf_version" not in data
+    version = data.get("Version")
+    return (
+        isinstance(version, str)
+        and version in ("3", "3.0", "3.0.0")
+        and "ReporterInfo" in data
+        and "Report" in data
+    )
+
 
+def convert_v3_to_v4(
+    v3_data: dict[str, Any],
+    conversion_warnings: list[str] | None = None,
+) -> dict[str, Any]:
+    """Convert an XARF v3 report to v4 format.
 
-def convert_v3_to_v4(v3_data: Dict[str, Any]) -> Dict[str, Any]:
-    """Convert XARF v3 report to v4 format.
+    Mirrors ``convertV3toV4()`` in ``v3-legacy.ts``.  Emits an
+    :class:`XARFv3DeprecationWarning` via :func:`warnings.warn` and raises
+    :class:`~xarf.exceptions.XARFParseError` for unrecoverable conversion
+    failures (unknown type, missing required fields).
 
     Args:
-        v3_data: XARF v3 report data
+        v3_data: Parsed XARF v3 report dict.
+        conversion_warnings: Optional list to collect non-fatal conversion
+            messages (e.g. missing ``ReporterOrg``).  Mirrors the ``warnings``
+            parameter in the JS implementation.
 
     Returns:
-        Dict[str, Any]: Converted XARF v4 report
+        A dict representing the converted XARF v4 report.
 
     Raises:
-        ValueError: If v3 data is invalid or cannot be converted
+        XARFParseError: If the v3 ``ReportType`` is not in the supported
+            mapping, required fields are missing, or source/contact info
+            cannot be extracted.
+
+    Example:
+        >>> v4 = convert_v3_to_v4(v3_dict)
+        >>> v4["xarf_version"]
+        '4.2.0'
     """
     warnings.warn(
-        "XARF v3 format is deprecated. Please upgrade to XARF v4. "
-        "This report will be automatically converted, but v3 support "
-        "will be removed in a future version.",
+        get_v3_deprecation_warning(),
         XARFv3DeprecationWarning,
         stacklevel=3,
     )
 
-    # Extract v3 structure
-    reporter_info = v3_data.get("ReporterInfo", {})
     report = v3_data.get("Report", {})
-    source = report.get("Source", {})
-
-    # Map v3 ReportClass to v4 category
-    report_class = report.get("ReportClass", "").lower()
-    category_map = {
-        "messaging": "messaging",
-        "activity": "messaging",  # v3 often used Activity for messaging
-        "connection": "connection",
-        "content": "content",
-        "infrastructure": "infrastructure",
-        "copyright": "copyright",
-        "vulnerability": "vulnerability",
-        "reputation": "reputation",
-    }
-    category = category_map.get(report_class, "other")
-
-    # Map v3 ReportType to v4 type
-    report_type = report.get("ReportType", "").lower()
+    reporter_info = v3_data.get("ReporterInfo", {})
 
-    # Build base v4 structure
-    v4_data: Dict[str, Any] = {
-        "xarf_version": "4.0.0",
+    # ------------------------------------------------------------------
+    # Resolve category and type via the type mapping
+    # ------------------------------------------------------------------
+    report_type = report.get("ReportType", "")
+    mapping = _V3_TYPE_MAPPING.get(report_type)
+    if mapping is None:
+        supported = ", ".join(sorted(set(_V3_TYPE_MAPPING.keys())))
+        raise XARFParseError(
+            f"Cannot convert v3 report: unknown ReportType '{report_type}'. "
+            f"Supported types: {supported}"
+        )
+    category, v4_type = mapping
+
+    # ------------------------------------------------------------------
+    # Extract required fields
+    # ------------------------------------------------------------------
+    source_identifier = _extract_source_identifier(report)
+    contact_info = _extract_contact_info(reporter_info, conversion_warnings)
+
+    # ------------------------------------------------------------------
+    # Evidence (Attachment or Samples)
+    # ------------------------------------------------------------------
+    raw_attachments = report.get("Attachment") or report.get("Samples")
+    evidence = _convert_attachments(raw_attachments, conversion_warnings)
+
+    # ------------------------------------------------------------------
+    # Build base v4 report
+    # ------------------------------------------------------------------
+    v4_data: dict[str, Any] = {
+        "xarf_version": "4.2.0",
         "report_id": str(uuid.uuid4()),
-        "timestamp": report.get("Date")
-        or datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
-        "reporter": {
-            "org": reporter_info.get("ReporterOrg", "Unknown"),
-            "contact": (
-                reporter_info.get("ReporterOrgEmail")
-                or reporter_info.get("ReporterContactEmail")
-                or "unknown@example.com"
-            ),
-            "type": "automated",  # v3 didn't distinguish, assume automated
-        },
-        "source_identifier": source.get("IP", "0.0.0.0"),  # nosec B104
+        "timestamp": report.get("Date"),
+        "reporter": contact_info,
+        "sender": contact_info,
+        "source_identifier": source_identifier,
         "category": category,
-        "type": report_type,
-        "evidence_source": _map_evidence_source(
-            report.get("AdditionalInfo", {}).get("DetectionMethod")
-        ),
-        # Indicate this was converted from v3
+        "type": v4_type,
         "legacy_version": "3",
         "_internal": {
-            "converted_from_v3": True,
-            "original_version": v3_data.get("Version"),
+            "original_report_type": report_type,
+            "converted_at": datetime.now(timezone.utc).isoformat(),
         },
     }
 
-    # Convert evidence/attachments
-    attachments = report.get("Attachment", [])
-    if attachments:
-        v4_data["evidence"] = _convert_attachments(attachments)
+    # description is optional
+    if report.get("AttackDescription"):
+        v4_data["description"] = report["AttackDescription"]
 
-    # Add category-specific fields based on type
+    # evidence_source only if explicitly provided in the v3 report
+    evidence_source = (report.get("AdditionalInfo") or {}).get("DetectionMethod")
+    if evidence_source:
+        v4_data["evidence_source"] = evidence_source
+
+    if evidence is not None:
+        v4_data["evidence"] = evidence
+
+    # ------------------------------------------------------------------
+    # Category-specific fields
+    # ------------------------------------------------------------------
     if category == "messaging":
         _add_messaging_fields(v4_data, report)
     elif category == "connection":
-        _add_connection_fields(v4_data, report, source)
+        _add_connection_fields(v4_data, report)
     elif category == "content":
         _add_content_fields(v4_data, report)
-    elif category == "infrastructure":
-        _add_infrastructure_fields(v4_data, report)
-
-    # Add tags if available
-    tags = []
-    if report.get("ReportClass"):
-        tags.append(f"legacy:category:{report['ReportClass']}")
-    if report.get("ReportType"):
-        tags.append(f"legacy:type:{report['ReportType']}")
-    if tags:
-        v4_data["tags"] = tags
 
     return v4_data
 
 
-def _map_evidence_source(v3_method: Optional[str]) -> str:
-    """Map v3 detection method to v4 evidence source."""
-    if not v3_method:
-        return "automated_scan"
-
-    method_lower = v3_method.lower()
-    if "spamtrap" in method_lower:
-        return "spamtrap"
-    elif "honeypot" in method_lower:
-        return "honeypot"
-    elif "user" in method_lower or "manual" in method_lower:
-        return "user_report"
-    elif "scan" in method_lower:
-        return "automated_scan"
-    elif "vuln" in method_lower:
-        return "vulnerability_scan"
-    else:
-        return "automated_scan"
-
-
-def _convert_attachments(v3_attachments: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-    """Convert v3 Attachment array to v4 evidence format."""
-    v4_evidence = []
+def get_v3_deprecation_warning() -> str:
+    """Return the canonical v3 deprecation warning message.
+
+    Mirrors ``getV3DeprecationWarning()`` in ``v3-legacy.ts``.
+
+    Returns:
+        A formatted deprecation warning string.
+    """
+    return (
+        "DEPRECATION WARNING: XARF v3 format detected. "
+        "The v3 format has been automatically converted to v4. "
+        "Please update your systems to generate v4 reports directly. "
+        "v3 support will be removed in a future major version."
+    )
+
+
+# ---------------------------------------------------------------------------
+# Private helpers
+# ---------------------------------------------------------------------------
+
+
+def _extract_source_identifier(report: dict[str, Any]) -> str:
+    """Extract a source identifier from a v3 report dict.
+
+    Checks ``Source.IP``, ``SourceIp``, ``Source.URL``, and ``Url`` in that
+    order, mirroring ``extractSourceIdentifier()`` in ``v3-legacy.ts``.
+
+    Args:
+        report: The inner ``Report`` dict from a v3 report.
+
+    Returns:
+        The source identifier string.
+
+    Raises:
+        XARFParseError: If no source identifier can be found.
+    """
+    source = report.get("Source") or {}
+    if source.get("IP"):
+        return str(source["IP"])
+    if report.get("SourceIp"):
+        return str(report["SourceIp"])
+    if source.get("URL"):
+        return str(source["URL"])
+    if report.get("Url"):
+        return str(report["Url"])
+    raise XARFParseError(
+        "Cannot convert v3 report: no source identifier found "
+        "(expected Source.IP, SourceIp, Source.URL, or Url)"
+    )
+
+
+def _extract_contact_info(
+    reporter_info: dict[str, Any],
+    conversion_warnings: list[str] | None = None,
+) -> dict[str, str]:
+    """Extract contact info from a v3 ``ReporterInfo`` dict.
+
+    Mirrors ``extractContactInfo()`` in ``v3-legacy.ts``.
+
+    Args:
+        reporter_info: The ``ReporterInfo`` dict from a v3 report.
+        conversion_warnings: Optional list to append non-fatal warnings to.
+
+    Returns:
+        A dict with ``org``, ``contact``, and ``domain`` keys.
+
+    Raises:
+        XARFParseError: If no email address is present or the email has no
+            domain part.
+    """
+    contact = reporter_info.get("ReporterContactEmail") or reporter_info.get(
+        "ReporterOrgEmail"
+    )
+    if not contact:
+        raise XARFParseError(
+            "Cannot convert v3 report: missing reporter email "
+            "(ReporterContactEmail and ReporterOrgEmail are both absent)"
+        )
+    parts = contact.split("@", 1)
+    if len(parts) < 2 or not parts[1]:
+        raise XARFParseError(
+            f"Cannot convert v3 report: reporter email '{contact}' "
+            "is not a valid email address"
+        )
+    domain = parts[1]
+
+    org = reporter_info.get("ReporterOrg")
+    if not org:
+        if conversion_warnings is not None:
+            conversion_warnings.append(
+                'No ReporterOrg found in v3 report, using "Unknown Organization"'
+            )
+        org = "Unknown Organization"
+
+    return {"org": org, "contact": contact, "domain": domain}
+
+
+def _convert_attachments(
+    v3_attachments: list[dict[str, Any]] | None,
+    conversion_warnings: list[str] | None = None,
+) -> list[dict[str, Any]] | None:
+    """Convert v3 ``Attachment`` / ``Samples`` items to v4 evidence format.
+
+    Mirrors ``convertEvidence()`` in ``v3-legacy.ts``.  Computes a sha256
+    hash and byte size from the base64-encoded ``Data`` field.
+
+    Args:
+        v3_attachments: List of v3 attachment dicts, or ``None``.
+        conversion_warnings: Optional list to append non-fatal warnings to.
+
+    Returns:
+        A list of v4 evidence dicts, or ``None`` if *v3_attachments* is empty
+        or ``None``.
+    """
+    if not v3_attachments:
+        return None
+
+    result = []
     for attachment in v3_attachments:
-        evidence_item = {
-            "content_type": attachment.get("ContentType", "text/plain"),
-            "description": attachment.get("Description", "Evidence from v3 report"),
-            "payload": attachment.get("Data", ""),
+        description = attachment.get("Description")
+        if not description and conversion_warnings is not None:
+            conversion_warnings.append(
+                "Evidence attachment has no description, omitting field"
+            )
+
+        raw_data = attachment.get("Data", "")
+        try:
+            raw_bytes = base64.b64decode(raw_data)
+        except ValueError:
+            raw_bytes = b""
+
+        digest = hashlib.sha256(raw_bytes).hexdigest()
+
+        item: dict[str, Any] = {
+            "content_type": attachment.get("ContentType", "application/octet-stream"),
+            "payload": raw_data,
+            "hash": f"sha256:{digest}",
+            "size": len(raw_bytes),
         }
-        v4_evidence.append(evidence_item)
-    return v4_evidence
-
-
-def _add_messaging_fields(v4_data: Dict[str, Any], v3_report: Dict[str, Any]) -> None:
-    """Add messaging-specific fields from v3 to v4."""
-    additional_info = v3_report.get("AdditionalInfo", {})
-
-    v4_data["protocol"] = additional_info.get("Protocol", "smtp")
-    if "SMTPFrom" in additional_info:
-        v4_data["smtp_from"] = additional_info["SMTPFrom"]
-    if "Subject" in additional_info:
-        v4_data["subject"] = additional_info["Subject"]
-    if "SMTPTo" in additional_info:
-        v4_data["smtp_to"] = additional_info["SMTPTo"]
-    if "MessageId" in additional_info:
-        v4_data["message_id"] = additional_info["MessageId"]
-
-
-def _add_connection_fields(
-    v4_data: Dict[str, Any], v3_report: Dict[str, Any], v3_source: Dict[str, Any]
-) -> None:
-    """Add connection-specific fields from v3 to v4."""
-    additional_info = v3_report.get("AdditionalInfo", {})
-
-    # Required fields
-    v4_data["destination_ip"] = v3_report.get("DestinationIp", "0.0.0.0")  # nosec B104
-    v4_data["protocol"] = additional_info.get("Protocol", "tcp")
-
-    # Optional fields
-    if "Port" in v3_source:
-        v4_data["source_port"] = v3_source["Port"]
-    if "DestinationPort" in v3_report:
-        v4_data["destination_port"] = v3_report["DestinationPort"]
-    if "AttackType" in additional_info:
-        v4_data["attack_type"] = additional_info["AttackType"]
-    if "PacketCount" in additional_info:
-        v4_data["packet_count"] = additional_info["PacketCount"]
-    if "ByteCount" in additional_info:
-        v4_data["byte_count"] = additional_info["ByteCount"]
-
-
-def _add_content_fields(v4_data: Dict[str, Any], v3_report: Dict[str, Any]) -> None:
-    """Add content-specific fields from v3 to v4."""
-    additional_info = v3_report.get("AdditionalInfo", {})
-
-    # Required field
-    v4_data["url"] = v3_report.get("URL") or additional_info.get(
-        "URL", "http://unknown"
-    )
+        if description:
+            item["description"] = description
+
+        result.append(item)
+
+    return result
+
+
+def _add_messaging_fields(v4_data: dict[str, Any], report: dict[str, Any]) -> None:
+    """Merge messaging-specific fields into *v4_data*.
+
+    Mirrors ``addMessagingFields()`` in ``v3-legacy.ts``.
+
+    Args:
+        v4_data: The partially-built v4 report dict (mutated in-place).
+        report: The inner ``Report`` dict from the v3 report.
+
+    Raises:
+        XARFParseError: If no protocol can be determined.
+    """
+    additional_info: dict[str, Any] = report.get("AdditionalInfo") or {}
+    protocol = report.get("Protocol") or additional_info.get("Protocol")
+    if not protocol:
+        raise XARFParseError(
+            "Cannot convert v3 report: missing protocol for messaging type"
+        )
+
+    v4_data["protocol"] = protocol
+
+    smtp_from = report.get("SmtpMailFromAddress") or additional_info.get("SMTPFrom")
+    if smtp_from:
+        v4_data["smtp_from"] = smtp_from
+
+    smtp_to = report.get("SmtpRcptToAddress")
+    if smtp_to:
+        v4_data["smtp_to"] = smtp_to
+
+    subject = report.get("SmtpMessageSubject") or additional_info.get("Subject")
+    if subject:
+        v4_data["subject"] = subject
 
-    # Optional fields
-    if "ContentType" in additional_info:
-        v4_data["content_type"] = additional_info["ContentType"]
-    if "AttackType" in additional_info:
-        v4_data["attack_type"] = additional_info["AttackType"]
-
-
-def _add_infrastructure_fields(
-    v4_data: Dict[str, Any], v3_report: Dict[str, Any]
-) -> None:
-    """Add infrastructure-specific fields from v3 to v4."""
-    additional_info = v3_report.get("AdditionalInfo", {})
-
-    # Infrastructure reports don't have many required fields beyond base
-    if "BotnetName" in additional_info:
-        v4_data["tags"] = v4_data.get("tags", []) + [
-            f"botnet:{additional_info['BotnetName']}"
-        ]
-    if "MalwareFamily" in additional_info:
-        v4_data["tags"] = v4_data.get("tags", []) + [
-            f"malware:{additional_info['MalwareFamily']}"
-        ]
+    source = report.get("Source") or {}
+    source_port = source.get("Port") or report.get("SourcePort")
+    if source_port is not None:
+        v4_data["source_port"] = source_port
+
+
+def _add_connection_fields(v4_data: dict[str, Any], report: dict[str, Any]) -> None:
+    """Merge connection-specific fields into *v4_data*.
+
+    Mirrors ``addConnectionFields()`` in ``v3-legacy.ts``.
+
+    Args:
+        v4_data: The partially-built v4 report dict (mutated in-place).
+        report: The inner ``Report`` dict from the v3 report.
+
+    Raises:
+        XARFParseError: If no protocol is present.
+    """
+    protocol = report.get("Protocol")
+    if not protocol:
+        raise XARFParseError(
+            "Cannot convert v3 report: missing protocol for connection type"
+        )
+
+    v4_data["protocol"] = protocol
+    # first_seen is required for connection types in v4
+    v4_data["first_seen"] = report.get("Date")
+
+    if report.get("DestinationIp"):
+        v4_data["destination_ip"] = report["DestinationIp"]
+
+    source = report.get("Source") or {}
+    source_port = source.get("Port") or report.get("SourcePort")
+    if source_port is not None:
+        v4_data["source_port"] = source_port
+
+    if report.get("DestinationPort") is not None:
+        v4_data["destination_port"] = report["DestinationPort"]
+
+    if report.get("AttackCount") is not None:
+        v4_data["attack_count"] = report["AttackCount"]
+
+
+def _add_content_fields(v4_data: dict[str, Any], report: dict[str, Any]) -> None:
+    """Merge content-specific fields into *v4_data*.
+
+    Mirrors ``addContentFields()`` in ``v3-legacy.ts``.
+
+    Args:
+        v4_data: The partially-built v4 report dict (mutated in-place).
+        report: The inner ``Report`` dict from the v3 report.
+
+    Raises:
+        XARFParseError: If no URL can be found.
+    """
+    additional_info: dict[str, Any] = report.get("AdditionalInfo") or {}
+    source: dict[str, Any] = report.get("Source") or {}
+    url = report.get("Url") or additional_info.get("URL") or source.get("URL")
+    if not url:
+        raise XARFParseError(
+            f"Cannot convert v3 report: missing URL for content type "
+            f"'{v4_data.get('type')}'. Content reports require a URL field"
+        )
+    v4_data["url"] = url

From ef19c08f8b255a9201ce3fb83b6fd02a797bfe25 Mon Sep 17 00:00:00 2001
From: Victor Lopez <victor@thevictorlopez.com>
Date: Tue, 31 Mar 2026 16:50:17 +0200
Subject: [PATCH 10/13] Add missing test coverage, general test cleanup.

---
 tests/conftest.py              | 171 +++++++++
 tests/test_exceptions.py       | 243 +++++++++++++
 tests/test_generator.py        |   9 +-
 tests/test_models.py           | 109 ++++--
 tests/test_parse.py            | 588 +++++++++++++++++++++++++++++++
 tests/test_schema_registry.py  |   1 -
 tests/test_schema_validator.py |   3 +-
 tests/test_validator.py        | 613 +++++++++++++++++++++++++++++++++
 xarf/exceptions.py             |  12 +-
 xarf/parser.py                 |   5 +-
 10 files changed, 1711 insertions(+), 43 deletions(-)
 create mode 100644 tests/conftest.py
 create mode 100644 tests/test_exceptions.py
 create mode 100644 tests/test_parse.py
 create mode 100644 tests/test_validator.py

diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..7307f29
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,171 @@
+"""Shared pytest fixtures, constants, and helpers for the XARF test suite.
+
+This module provides:
+
+- Directory path constants pointing to sample data locations.
+- A helper function :func:`_load_spec_samples` to enumerate canonical spec samples.
+- Module-level valid report dicts used across multiple test files.
+"""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+# ---------------------------------------------------------------------------
+# Directory constants
+# ---------------------------------------------------------------------------
+
+#: Path to the canonical xarf-spec v4 samples (relative to this file's location).
+SPEC_SAMPLES_DIR: Path = (
+    Path(__file__).parent.parent.parent / "xarf-spec" / "samples" / "v4"
+)
+
+#: Root of the shared parser-test suite samples bundled as a git subtree.
+SHARED_SAMPLES_DIR: Path = Path(__file__).parent / "shared" / "samples"
+
+#: Convenience pointer to the invalid shared samples.
+INVALID_SAMPLES_DIR: Path = SHARED_SAMPLES_DIR / "invalid"
+
+#: Convenience pointer to the v3 backward-compatibility samples.
+V3_SAMPLES_DIR: Path = SHARED_SAMPLES_DIR / "valid" / "v3"
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _load_spec_samples() -> list[tuple[Path, str]]:
+    """Return a list of ``(path, stem)`` tuples for every JSON file in SPEC_SAMPLES_DIR.
+
+    Returns:
+        A sorted list of ``(path, stem)`` tuples.  Returns an empty list when
+        :data:`SPEC_SAMPLES_DIR` does not exist (e.g. in CI environments that do
+        not have the full monorepo checked out).
+    """
+    if not SPEC_SAMPLES_DIR.exists():
+        return []
+    return [(p, p.stem) for p in sorted(SPEC_SAMPLES_DIR.glob("*.json"))]
+
+
+# ---------------------------------------------------------------------------
+# Shared contact info block reused across report dicts
+# ---------------------------------------------------------------------------
+
+_CONTACT: dict[str, str] = {
+    "org": "ACME Security",
+    "contact": "abuse@acme.example",
+    "domain": "acme.example",
+}
+
+# ---------------------------------------------------------------------------
+# Module-level valid report dicts
+# ---------------------------------------------------------------------------
+
+#: Minimal valid ``connection/ddos`` report dict.
+VALID_DDOS_REPORT: dict[str, Any] = {
+    "xarf_version": "4.2.0",
+    "report_id": "550e8400-e29b-41d4-a716-446655440000",
+    "timestamp": "2024-01-15T10:30:00Z",
+    "reporter": _CONTACT,
+    "sender": _CONTACT,
+    "source_identifier": "192.0.2.1",
+    "category": "connection",
+    "type": "ddos",
+    "evidence_source": "honeypot",
+    "source_port": 12345,
+    "destination_ip": "203.0.113.10",
+    "protocol": "tcp",
+    "first_seen": "2024-01-15T09:00:00Z",
+}
+
+#: Minimal valid ``messaging/spam`` report dict.  Uses ``protocol="sms"`` to
+#: avoid the ``smtp_from`` requirement that applies to SMTP spam reports.
+VALID_SPAM_REPORT: dict[str, Any] = {
+    "xarf_version": "4.2.0",
+    "report_id": "6ba7b810-9dad-11d1-80b4-00c04fd430c8",
+    "timestamp": "2024-01-15T10:30:00Z",
+    "reporter": _CONTACT,
+    "sender": _CONTACT,
+    "source_identifier": "192.0.2.1",
+    "category": "messaging",
+    "type": "spam",
+    "evidence_source": "honeypot",
+    "protocol": "sms",
+}
+
+#: Minimal valid ``content/phishing`` report dict.
+VALID_PHISHING_REPORT: dict[str, Any] = {
+    "xarf_version": "4.2.0",
+    "report_id": "6ba7b811-9dad-11d1-80b4-00c04fd430c8",
+    "timestamp": "2024-01-15T10:30:00Z",
+    "reporter": _CONTACT,
+    "sender": _CONTACT,
+    "source_identifier": "192.0.2.1",
+    "category": "content",
+    "type": "phishing",
+    "evidence_source": "honeypot",
+    "url": "https://phishing.example.com/login",
+}
+
+#: Minimal valid ``infrastructure/botnet`` report dict.
+VALID_BOTNET_REPORT: dict[str, Any] = {
+    "xarf_version": "4.2.0",
+    "report_id": "6ba7b812-9dad-11d1-80b4-00c04fd430c8",
+    "timestamp": "2024-01-15T10:30:00Z",
+    "reporter": _CONTACT,
+    "sender": _CONTACT,
+    "source_identifier": "192.0.2.1",
+    "category": "infrastructure",
+    "type": "botnet",
+    "evidence_source": "honeypot",
+    "compromise_evidence": "C2 communication observed",
+}
+
+#: Minimal valid ``copyright/copyright`` report dict.
+VALID_COPYRIGHT_REPORT: dict[str, Any] = {
+    "xarf_version": "4.2.0",
+    "report_id": "6ba7b813-9dad-11d1-80b4-00c04fd430c8",
+    "timestamp": "2024-01-15T10:30:00Z",
+    "reporter": _CONTACT,
+    "sender": _CONTACT,
+    "source_identifier": "192.0.2.1",
+    "category": "copyright",
+    "type": "copyright",
+    "evidence_source": "honeypot",
+    "infringing_url": "https://piracy.example.com/movie.mp4",
+    "infringement_type": "Copyright",
+}
+
+#: Minimal valid ``vulnerability/cve`` report dict.
+VALID_CVE_REPORT: dict[str, Any] = {
+    "xarf_version": "4.2.0",
+    "report_id": "6ba7b814-9dad-11d1-80b4-00c04fd430c8",
+    "timestamp": "2024-01-15T10:30:00Z",
+    "reporter": _CONTACT,
+    "sender": _CONTACT,
+    "source_identifier": "192.0.2.1",
+    "category": "vulnerability",
+    "type": "cve",
+    "evidence_source": "honeypot",
+    "cve_id": "CVE-2024-1234",
+    "service": "Apache httpd",
+    "service_port": 80,
+    "cvss_score": 9.8,
+}
+
+#: Minimal valid ``reputation/blocklist`` report dict.
+VALID_BLOCKLIST_REPORT: dict[str, Any] = {
+    "xarf_version": "4.2.0",
+    "report_id": "6ba7b815-9dad-11d1-80b4-00c04fd430c8",
+    "timestamp": "2024-01-15T10:30:00Z",
+    "reporter": _CONTACT,
+    "sender": _CONTACT,
+    "source_identifier": "192.0.2.1",
+    "category": "reputation",
+    "type": "blocklist",
+    "evidence_source": "honeypot",
+    "threat_type": "spam",
+    "blocklist_name": "test-blocklist",
+    "reason": "Spam source",
+}
diff --git a/tests/test_exceptions.py b/tests/test_exceptions.py
new file mode 100644
index 0000000..dabb59c
--- /dev/null
+++ b/tests/test_exceptions.py
@@ -0,0 +1,243 @@
+"""Tests for the XARF exception hierarchy.
+
+Port of the JavaScript ``errors.test.ts`` test suite.
+
+Covers:
+
+- :class:`~xarf.exceptions.XARFError` base behaviour.
+- :class:`~xarf.exceptions.XARFValidationError` ``.errors`` attribute.
+- :class:`~xarf.exceptions.XARFParseError` instantiation and hierarchy.
+- :class:`~xarf.exceptions.XARFSchemaError` instantiation and hierarchy.
+- Cross-class inheritance assertions.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from xarf.exceptions import (
+    XARFError,
+    XARFParseError,
+    XARFSchemaError,
+    XARFValidationError,
+)
+
+# ---------------------------------------------------------------------------
+# TestXARFError
+# ---------------------------------------------------------------------------
+
+
+class TestXARFError:
+    """Tests for the :class:`~xarf.exceptions.XARFError` base exception."""
+
+    def test_can_be_instantiated_with_message(self) -> None:
+        """XARFError can be constructed with a plain string message."""
+        error = XARFError("base error message")
+        assert error is not None
+
+    def test_str_contains_message(self) -> None:
+        """``str(error)`` must include the message passed to the constructor."""
+        error = XARFError("base error message")
+        assert "base error message" in str(error)
+
+    def test_is_subclass_of_exception(self) -> None:
+        """XARFError must be a subclass of the built-in :class:`Exception`."""
+        assert issubclass(XARFError, Exception)
+
+    def test_can_be_raised_and_caught_as_exception(self) -> None:
+        """XARFError raised in user code must be catchable as :class:`Exception`."""
+        with pytest.raises(XARFError):
+            raise XARFError("raised as exception")
+
+    def test_can_be_caught_as_xarf_error(self) -> None:
+        """XARFError raised in user code must be catchable as :class:`XARFError`."""
+        with pytest.raises(XARFError):
+            raise XARFError("caught as xarf error")
+
+
+# ---------------------------------------------------------------------------
+# TestXARFValidationError
+# ---------------------------------------------------------------------------
+
+
+class TestXARFValidationError:
+    """Tests for :class:`~xarf.exceptions.XARFValidationError`."""
+
+    def test_is_subclass_of_xarf_error(self) -> None:
+        """XARFValidationError must be a subclass of :class:`XARFError`."""
+        assert issubclass(XARFValidationError, XARFError)
+
+    def test_is_subclass_of_exception(self) -> None:
+        """XARFValidationError must be a subclass of the built-in :class:`Exception`."""
+        assert issubclass(XARFValidationError, Exception)
+
+    def test_errors_defaults_to_empty_list(self) -> None:
+        """When no ``errors`` argument is supplied, ``.errors`` must be an empty
+        list."""
+        error = XARFValidationError("validation failed")
+        assert error.errors == []
+
+    def test_errors_stores_provided_list(self) -> None:
+        """Errors passed to the constructor must be accessible via ``.errors``."""
+        msgs = ["field1 is required", "field2 is invalid"]
+        error = XARFValidationError("validation failed", errors=msgs)
+        assert error.errors == msgs
+
+    def test_message_is_accessible_via_str(self) -> None:
+        """``str(error)`` must contain the message passed to the constructor."""
+        error = XARFValidationError("validation failed message")
+        assert "validation failed message" in str(error)
+
+    def test_can_be_caught_as_xarf_error(self) -> None:
+        """XARFValidationError raised in user code must be catchable as
+        :class:`XARFError`."""
+        with pytest.raises(XARFError):
+            raise XARFValidationError("caught as xarf error")
+
+
+# ---------------------------------------------------------------------------
+# TestXARFParseError
+# ---------------------------------------------------------------------------
+
+
+class TestXARFParseError:
+    """Tests for :class:`~xarf.exceptions.XARFParseError`."""
+
+    def test_is_subclass_of_xarf_error(self) -> None:
+        """XARFParseError must be a subclass of :class:`XARFError`."""
+        assert issubclass(XARFParseError, XARFError)
+
+    def test_is_subclass_of_exception(self) -> None:
+        """XARFParseError must be a subclass of the built-in :class:`Exception`."""
+        assert issubclass(XARFParseError, Exception)
+
+    def test_can_be_raised_with_message(self) -> None:
+        """XARFParseError can be raised and contains the supplied message."""
+        with pytest.raises(XARFParseError) as exc_info:
+            raise XARFParseError("parse failed")
+        assert "parse failed" in str(exc_info.value)
+
+    def test_can_be_caught_as_xarf_error(self) -> None:
+        """XARFParseError raised in user code must be catchable as
+        :class:`XARFError`."""
+        with pytest.raises(XARFError):
+            raise XARFParseError("caught as xarf error")
+
+
+# ---------------------------------------------------------------------------
+# TestXARFSchemaError
+# ---------------------------------------------------------------------------
+
+
+class TestXARFSchemaError:
+    """Tests for :class:`~xarf.exceptions.XARFSchemaError`."""
+
+    def test_is_subclass_of_xarf_error(self) -> None:
+        """XARFSchemaError must be a subclass of :class:`XARFError`."""
+        assert issubclass(XARFSchemaError, XARFError)
+
+    def test_is_subclass_of_exception(self) -> None:
+        """XARFSchemaError must be a subclass of the built-in :class:`Exception`."""
+        assert issubclass(XARFSchemaError, Exception)
+
+    def test_can_be_raised_with_message(self) -> None:
+        """XARFSchemaError can be raised and contains the supplied message."""
+        with pytest.raises(XARFSchemaError) as exc_info:
+            raise XARFSchemaError("schema load failed")
+        assert "schema load failed" in str(exc_info.value)
+
+    def test_can_be_caught_as_xarf_error(self) -> None:
+        """XARFSchemaError raised in user code must be catchable as
+        :class:`XARFError`."""
+        with pytest.raises(XARFError):
+            raise XARFSchemaError("caught as xarf error")
+
+
+# ---------------------------------------------------------------------------
+# TestErrorInheritance
+# ---------------------------------------------------------------------------
+
+
+class TestErrorInheritance:
+    """Cross-class inheritance assertions for the entire exception hierarchy."""
+
+    def test_all_four_are_instances_of_exception(self) -> None:
+        """Instances of all four exception classes must satisfy
+        ``isinstance(e, Exception)``."""
+        exceptions = [
+            XARFError("base"),
+            XARFValidationError("validation"),
+            XARFParseError("parse"),
+            XARFSchemaError("schema"),
+        ]
+        for exc in exceptions:
+            assert isinstance(exc, Exception), (
+                f"{type(exc).__name__} is not an Exception"
+            )
+
+    def test_subclasses_are_instances_of_xarf_error(self) -> None:
+        """XARFValidationError, XARFParseError, and XARFSchemaError must all be
+        XARFError instances."""
+        subclasses = [
+            XARFValidationError("validation"),
+            XARFParseError("parse"),
+            XARFSchemaError("schema"),
+        ]
+        for exc in subclasses:
+            assert isinstance(exc, XARFError), (
+                f"{type(exc).__name__} is not an instance of XARFError"
+            )
+
+    def test_issubclass_checks_work(self) -> None:
+        """``issubclass`` checks must hold for the full hierarchy."""
+        assert issubclass(XARFValidationError, XARFError)
+        assert issubclass(XARFParseError, XARFError)
+        assert issubclass(XARFSchemaError, XARFError)
+        assert issubclass(XARFError, Exception)
+        assert issubclass(XARFValidationError, Exception)
+        assert issubclass(XARFParseError, Exception)
+        assert issubclass(XARFSchemaError, Exception)
+
+
+# ---------------------------------------------------------------------------
+# TestXARFValidationErrorErrors
+# ---------------------------------------------------------------------------
+
+
+class TestXARFValidationErrorErrors:
+    """Detailed tests for the ``errors`` attribute of
+    :class:`~xarf.exceptions.XARFValidationError`."""
+
+    def test_default_errors_is_empty_list(self) -> None:
+        """``XARFValidationError("msg").errors`` must be ``[]``."""
+        error = XARFValidationError("msg")
+        assert error.errors == []
+
+    def test_default_errors_is_a_list(self) -> None:
+        """``XARFValidationError("msg").errors`` must be an instance of
+        :class:`list`."""
+        error = XARFValidationError("msg")
+        assert isinstance(error.errors, list)
+
+    def test_providing_errors_list_stores_it(self) -> None:
+        """Errors supplied to the constructor are accessible via ``.errors``."""
+        errors = ["first error", "second error"]
+        error = XARFValidationError("msg", errors=errors)
+        assert error.errors == errors
+
+    def test_multiple_error_messages_stored_correctly(self) -> None:
+        """All supplied error message strings are stored and retrievable."""
+        messages = [
+            "missing field: xarf_version",
+            "invalid uuid: report_id",
+            "bad timestamp",
+        ]
+        error = XARFValidationError("multiple errors", errors=messages)
+        assert len(error.errors) == 3
+        for msg in messages:
+            assert msg in error.errors
+
+    def test_empty_list_provided_yields_empty_errors(self) -> None:
+        """Explicitly providing an empty list keeps ``.errors`` as ``[]``."""
+        error = XARFValidationError("msg", errors=[])
+        assert error.errors == []
diff --git a/tests/test_generator.py b/tests/test_generator.py
index 84c102d..496d5ad 100644
--- a/tests/test_generator.py
+++ b/tests/test_generator.py
@@ -476,9 +476,7 @@ def test_unknown_field_produces_warning_non_strict(self) -> None:
             **_spam_kwargs(),
         )
         assert not result.errors
-        assert any(
-            "completely_unknown_field_xyz" in w.field for w in result.warnings
-        )
+        assert any("completely_unknown_field_xyz" in w.field for w in result.warnings)
 
     def test_strict_unknown_field_becomes_error(self) -> None:
         result = create_report(
@@ -489,7 +487,4 @@ def test_strict_unknown_field_becomes_error(self) -> None:
             **_spam_kwargs(),
         )
         assert result.report is None
-        assert any(
-            "completely_unknown_field_xyz" in e.field for e in result.errors
-        )
-
+        assert any("completely_unknown_field_xyz" in e.field for e in result.errors)
diff --git a/tests/test_models.py b/tests/test_models.py
index 73d8b4f..314fcc1 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -3,7 +3,8 @@
 from __future__ import annotations
 
 import pytest
-from pydantic import TypeAdapter, ValidationError as PydanticValidationError
+from pydantic import TypeAdapter
+from pydantic import ValidationError as PydanticValidationError
 
 from xarf.models import (
     AnyXARFReport,
@@ -17,33 +18,26 @@
     _report_discriminator,
 )
 from xarf.types_connection import (
-    ConnectionBaseReport,
     DdosReport,
     InfectedHostReport,
     LoginAttackReport,
     PortScanReport,
     ReconnaissanceReport,
     ScrapingReport,
-    SqlInjectionReport,
     VulnerabilityScanReport,
 )
 from xarf.types_content import (
     BrandInfringementReport,
     CompromiseIndicator,
-    ContentBaseReport,
     CsamReport,
-    CsemReport,
     ExposedDataReport,
-    FraudReport,
     MalwareReport,
     PhishingReport,
-    RegistrantDetails,
     RemoteCompromiseReport,
     SuspiciousRegistrationReport,
     WebshellDetails,
 )
 from xarf.types_copyright import (
-    CopyrightBaseReport,
     CopyrightCopyrightReport,
     CopyrightCyberlockerReport,
     CopyrightLinkSiteReport,
@@ -57,7 +51,6 @@
 from xarf.types_messaging import (
     BulkIndicators,
     BulkMessagingReport,
-    MessagingBaseReport,
     SpamIndicators,
     SpamReport,
 )
@@ -67,15 +60,22 @@
     ImpactAssessment,
     MisconfigurationReport,
     OpenServiceReport,
-    VulnerabilityBaseReport,
 )
 
 # ---------------------------------------------------------------------------
 # Shared fixtures
 # ---------------------------------------------------------------------------
 
-REPORTER = {"org": "Example Corp", "contact": "abuse@example.com", "domain": "example.com"}
-SENDER = {"org": "Bad Actor LLC", "contact": "noreply@bad.example", "domain": "bad.example"}
+REPORTER = {
+    "org": "Example Corp",
+    "contact": "abuse@example.com",
+    "domain": "example.com",
+}
+SENDER = {
+    "org": "Bad Actor LLC",
+    "contact": "noreply@bad.example",
+    "domain": "bad.example",
+}
 
 BASE_FIELDS: dict[str, object] = {
     "xarf_version": "4.2.0",
@@ -113,7 +113,9 @@ class TestValidationWarning:
 
     def test_required_fields(self) -> None:
         """ValidationWarning requires field and message."""
-        warn = ValidationWarning(field="evidence_source", message="Recommended field missing")
+        warn = ValidationWarning(
+            field="evidence_source", message="Recommended field missing"
+        )
         assert warn.field == "evidence_source"
         assert warn.message == "Recommended field missing"
 
@@ -300,7 +302,9 @@ def test_valid_minimal(self) -> None:
 
     def test_optional_fields(self) -> None:
         """SpamReport optional fields default to None."""
-        report = SpamReport(**BASE_FIELDS, category="messaging", type="spam", protocol="smtp")
+        report = SpamReport(
+            **BASE_FIELDS, category="messaging", type="spam", protocol="smtp"
+        )
         assert report.language is None
         assert report.message_id is None
         assert report.recipient_count is None
@@ -315,7 +319,10 @@ def test_spam_indicators_nested(self) -> None:
             category="messaging",
             type="spam",
             protocol="smtp",
-            spam_indicators={"suspicious_links": ["http://evil.example/"], "commercial_content": True},
+            spam_indicators={
+                "suspicious_links": ["http://evil.example/"],
+                "commercial_content": True,
+            },
         )
         assert report.spam_indicators is not None
         assert isinstance(report.spam_indicators, SpamIndicators)
@@ -414,7 +421,9 @@ def test_infected_host_requires_bot_type(self) -> None:
 
     def test_infected_host(self) -> None:
         """InfectedHostReport constructs with bot_type."""
-        r = InfectedHostReport(**CONNECTION_BASE, type="infected_host", bot_type="mirai")
+        r = InfectedHostReport(
+            **CONNECTION_BASE, type="infected_host", bot_type="mirai"
+        )
         assert r.bot_type == "mirai"
 
     def test_reconnaissance_requires_probed_resources(self) -> None:
@@ -497,12 +506,15 @@ def test_brand_infringement_requires_fields(self) -> None:
             BrandInfringementReport(**CONTENT_BASE, type="brand_infringement")
 
     def test_remote_compromise_nested_indicators(self) -> None:
-        """RemoteCompromiseReport accepts nested CompromiseIndicator and WebshellDetails."""
+        """RemoteCompromiseReport accepts nested CompromiseIndicator and
+        WebshellDetails."""
         r = RemoteCompromiseReport(
             **CONTENT_BASE,
             type="remote_compromise",
             compromise_type="webshell",
-            compromise_indicators=[{"type": "file_path", "value": "/var/www/shell.php"}],
+            compromise_indicators=[
+                {"type": "file_path", "value": "/var/www/shell.php"}
+            ],
             webshell_details={"family": "c99", "password_protected": True},
         )
         assert r.compromise_indicators is not None
@@ -511,7 +523,8 @@ def test_remote_compromise_nested_indicators(self) -> None:
         assert isinstance(r.webshell_details, WebshellDetails)
 
     def test_suspicious_registration_requires_fields(self) -> None:
-        """SuspiciousRegistrationReport requires registration_date and suspicious_indicators."""
+        """SuspiciousRegistrationReport requires registration_date and
+        suspicious_indicators."""
         with pytest.raises(PydanticValidationError):
             SuspiciousRegistrationReport(**CONTENT_BASE, type="suspicious_registration")
 
@@ -629,7 +642,11 @@ def test_usenet(self) -> None:
 # Vulnerability type tests
 # ---------------------------------------------------------------------------
 
-VULN_BASE: dict[str, object] = {**BASE_FIELDS, "category": "vulnerability", "service": "openssh"}
+VULN_BASE: dict[str, object] = {
+    **BASE_FIELDS,
+    "category": "vulnerability",
+    "service": "openssh",
+}
 
 
 class TestVulnerabilityReports:
@@ -648,7 +665,11 @@ def test_cve(self) -> None:
             cve_id="CVE-2024-12345",
             service_port=22,
             cvss_score=9.8,
-            impact_assessment={"confidentiality": "high", "integrity": "high", "availability": "high"},
+            impact_assessment={
+                "confidentiality": "high",
+                "integrity": "high",
+                "availability": "high",
+            },
         )
         assert r.cve_id == "CVE-2024-12345"
         assert r.service_port == 22
@@ -712,14 +733,34 @@ class TestAnyXARFReportDiscriminator:
         ("category", "report_type", "extra"),
         [
             ("messaging", "spam", {"protocol": "smtp"}),
-            ("messaging", "bulk_messaging", {"protocol": "smtp", "recipient_count": 100}),
-            ("connection", "login_attack", {"first_seen": "2026-01-01T00:00:00Z", "protocol": "tcp"}),
-            ("connection", "port_scan", {"first_seen": "2026-01-01T00:00:00Z", "protocol": "tcp"}),
-            ("connection", "ddos", {"first_seen": "2026-01-01T00:00:00Z", "protocol": "udp"}),
+            (
+                "messaging",
+                "bulk_messaging",
+                {"protocol": "smtp", "recipient_count": 100},
+            ),
+            (
+                "connection",
+                "login_attack",
+                {"first_seen": "2026-01-01T00:00:00Z", "protocol": "tcp"},
+            ),
+            (
+                "connection",
+                "port_scan",
+                {"first_seen": "2026-01-01T00:00:00Z", "protocol": "tcp"},
+            ),
+            (
+                "connection",
+                "ddos",
+                {"first_seen": "2026-01-01T00:00:00Z", "protocol": "udp"},
+            ),
             (
                 "connection",
                 "infected_host",
-                {"first_seen": "2026-01-01T00:00:00Z", "protocol": "tcp", "bot_type": "mirai"},
+                {
+                    "first_seen": "2026-01-01T00:00:00Z",
+                    "protocol": "tcp",
+                    "bot_type": "mirai",
+                },
             ),
             (
                 "connection",
@@ -733,7 +774,11 @@ class TestAnyXARFReportDiscriminator:
             (
                 "connection",
                 "scraping",
-                {"first_seen": "2026-01-01T00:00:00Z", "protocol": "http", "total_requests": 1000},
+                {
+                    "first_seen": "2026-01-01T00:00:00Z",
+                    "protocol": "http",
+                    "total_requests": 1000,
+                },
             ),
             (
                 "connection",
@@ -743,7 +788,11 @@ class TestAnyXARFReportDiscriminator:
             (
                 "connection",
                 "vulnerability_scan",
-                {"first_seen": "2026-01-01T00:00:00Z", "protocol": "tcp", "scan_type": "port"},
+                {
+                    "first_seen": "2026-01-01T00:00:00Z",
+                    "protocol": "tcp",
+                    "scan_type": "port",
+                },
             ),
             ("content", "phishing", {"url": "https://evil.example/"}),
             ("content", "malware", {"url": "https://evil.example/payload.exe"}),
@@ -930,7 +979,9 @@ def test_dict_input(self) -> None:
 
     def test_model_input(self) -> None:
         """_report_discriminator extracts key from a model instance."""
-        report = SpamReport(**BASE_FIELDS, category="messaging", type="spam", protocol="smtp")
+        report = SpamReport(
+            **BASE_FIELDS, category="messaging", type="spam", protocol="smtp"
+        )
         key = _report_discriminator(report)
         assert key == "messaging/spam"
 
diff --git a/tests/test_parse.py b/tests/test_parse.py
new file mode 100644
index 0000000..9195860
--- /dev/null
+++ b/tests/test_parse.py
@@ -0,0 +1,588 @@
+"""Tests for the :func:`xarf.parse` function.
+
+Covers:
+
+- All 32 canonical xarf-spec v4 samples parse without errors.
+- Shared test-suite samples are handled robustly (no unhandled exceptions).
+- Invalid samples produce the expected errors or exceptions.
+- v3 backward-compatibility detection and conversion warnings.
+- JSON string vs dict input formats.
+- Strict mode behaviour.
+- Unknown-field warnings and errors.
+- ``show_missing_optional`` info population.
+- Category/type discriminated union resolution.
+- Malformed / edge-case input.
+- Throughput performance (≥ 1000 reports/sec).
+"""
+
+from __future__ import annotations
+
+import copy
+import json
+import time
+from pathlib import Path
+from typing import Any
+
+import pytest
+
+from xarf import parse
+from xarf.exceptions import XARFParseError
+from xarf.models import (
+    DdosReport,
+    ParseResult,
+    PhishingReport,
+    SpamReport,
+)
+
+# ---------------------------------------------------------------------------
+# Module-level collection of spec samples (empty when monorepo not present)
+# ---------------------------------------------------------------------------
+
+_SPEC_SAMPLES_DIR: Path = (
+    Path(__file__).parent.parent.parent / "xarf-spec" / "samples" / "v4"
+)
+_spec_samples: list[tuple[Path, str]] = (
+    [(p, p.stem) for p in sorted(_SPEC_SAMPLES_DIR.glob("*.json"))]
+    if _SPEC_SAMPLES_DIR.exists()
+    else []
+)
+
+_SHARED_SAMPLES_DIR: Path = Path(__file__).parent / "shared" / "samples"
+_INVALID_DIR: Path = _SHARED_SAMPLES_DIR / "invalid"
+_V3_DIR: Path = _SHARED_SAMPLES_DIR / "valid" / "v3"
+
+# ---------------------------------------------------------------------------
+# Base valid report used in several test classes
+# ---------------------------------------------------------------------------
+
+_CONTACT: dict[str, str] = {
+    "org": "ACME Security",
+    "contact": "abuse@acme.example",
+    "domain": "acme.example",
+}
+
+_VALID_DDOS: dict[str, Any] = {
+    "xarf_version": "4.2.0",
+    "report_id": "550e8400-e29b-41d4-a716-446655440000",
+    "timestamp": "2024-01-15T10:30:00Z",
+    "reporter": _CONTACT,
+    "sender": _CONTACT,
+    "source_identifier": "192.0.2.1",
+    "category": "connection",
+    "type": "ddos",
+    "evidence_source": "honeypot",
+    "source_port": 12345,
+    "destination_ip": "203.0.113.10",
+    "protocol": "tcp",
+    "first_seen": "2024-01-15T09:00:00Z",
+}
+
+_VALID_SPAM: dict[str, Any] = {
+    "xarf_version": "4.2.0",
+    "report_id": "6ba7b810-9dad-11d1-80b4-00c04fd430c8",
+    "timestamp": "2024-01-15T10:30:00Z",
+    "reporter": _CONTACT,
+    "sender": _CONTACT,
+    "source_identifier": "192.0.2.1",
+    "category": "messaging",
+    "type": "spam",
+    "evidence_source": "honeypot",
+    "protocol": "sms",
+}
+
+_VALID_PHISHING: dict[str, Any] = {
+    "xarf_version": "4.2.0",
+    "report_id": "6ba7b811-9dad-11d1-80b4-00c04fd430c8",
+    "timestamp": "2024-01-15T10:30:00Z",
+    "reporter": _CONTACT,
+    "sender": _CONTACT,
+    "source_identifier": "192.0.2.1",
+    "category": "content",
+    "type": "phishing",
+    "evidence_source": "honeypot",
+    "url": "https://phishing.example.com/login",
+}
+
+
+# ---------------------------------------------------------------------------
+# TestSpecSamples
+# ---------------------------------------------------------------------------
+
+
+class TestSpecSamples:
+    """Tests that every canonical xarf-spec v4 sample parses without errors."""
+
+    @pytest.mark.parametrize(
+        "sample_path,sample_stem",
+        _spec_samples,
+        ids=[stem for _, stem in _spec_samples],
+    )
+    def test_spec_sample_parses_without_errors(
+        self, sample_path: Path, sample_stem: str
+    ) -> None:
+        """Each canonical spec sample must produce zero validation errors.
+
+        Args:
+            sample_path: Absolute path to the sample JSON file.
+            sample_stem: Filename stem, used as the test ID.
+        """
+        if not _SPEC_SAMPLES_DIR.exists():
+            pytest.skip("xarf-spec directory not present in this checkout")
+
+        raw = sample_path.read_text(encoding="utf-8")
+        data = json.loads(raw)
+        result = parse(data)
+        assert result.errors == [], f"{sample_stem}: unexpected errors: {result.errors}"
+
+    def test_dict_and_string_input_are_equivalent(self) -> None:
+        """Dict input and JSON string input produce the same result for a
+        representative sample.
+
+        Skips gracefully when the spec samples directory is absent.
+        """
+        if not _spec_samples:
+            pytest.skip("xarf-spec directory not present in this checkout")
+
+        sample_path, _ = _spec_samples[0]
+        raw = sample_path.read_text(encoding="utf-8")
+        data = json.loads(raw)
+
+        result_dict = parse(data)
+        result_str = parse(raw)
+
+        assert result_dict.errors == result_str.errors
+        assert type(result_dict.report) is type(result_str.report)
+
+
+# ---------------------------------------------------------------------------
+# TestSharedSamplesRobustness
+# ---------------------------------------------------------------------------
+
+
+class TestSharedSamplesRobustness:
+    """Tests that all valid/v4 shared samples do not raise unhandled exceptions."""
+
+    @pytest.mark.parametrize(
+        "sample_path",
+        list((_SHARED_SAMPLES_DIR / "valid" / "v4").rglob("*.json")),
+        ids=[
+            p.stem
+            for p in sorted((_SHARED_SAMPLES_DIR / "valid" / "v4").rglob("*.json"))
+        ],
+    )
+    def test_shared_valid_v4_sample_does_not_raise(self, sample_path: Path) -> None:
+        """parse() must not raise for any shared valid/v4 sample.
+
+        The result must be a :class:`~xarf.models.ParseResult`.  The report may
+        be ``None`` when schema errors prevent Pydantic deserialization, but the
+        call itself must not throw.
+
+        Args:
+            sample_path: Path to a shared valid/v4 JSON sample.
+        """
+        data = json.loads(sample_path.read_text(encoding="utf-8"))
+        result = parse(data)
+        assert isinstance(result, ParseResult)
+        # Either the report was parsed OR there were errors — both are acceptable.
+        assert result.report is not None or len(result.errors) > 0
+
+
+# ---------------------------------------------------------------------------
+# TestInvalidSamples
+# ---------------------------------------------------------------------------
+
+
+class TestInvalidSamples:
+    """Tests that known-invalid shared samples are handled correctly."""
+
+    def test_malformed_json_raises_parse_error(self) -> None:
+        """Truly malformed JSON string raises
+        :class:`~xarf.exceptions.XARFParseError`."""
+        raw = (_INVALID_DIR / "malformed_data" / "invalid_json.json").read_text(
+            encoding="utf-8"
+        )
+        with pytest.raises(XARFParseError):
+            parse(raw)
+
+    def test_invalid_class_produces_category_error(self) -> None:
+        """A report with an invalid category value produces errors referencing
+        'category'.
+
+        Args: (none beyond self)
+        """
+        data = json.loads(
+            (_INVALID_DIR / "schema_violations" / "invalid_class.json").read_text(
+                encoding="utf-8"
+            )
+        )
+        result = parse(data)
+        assert len(result.errors) > 0
+        fields_and_messages = " ".join(f"{e.field} {e.message}" for e in result.errors)
+        assert "category" in fields_and_messages.lower()
+
+    def test_missing_xarf_version_produces_errors(self) -> None:
+        """A report missing ``xarf_version`` produces validation errors."""
+        data = json.loads(
+            (
+                _INVALID_DIR / "schema_violations" / "missing_xarf_version.json"
+            ).read_text(encoding="utf-8")
+        )
+        result = parse(data)
+        assert len(result.errors) > 0
+
+    def test_missing_reporter_produces_reporter_error(self) -> None:
+        """A report missing the ``reporter`` field produces an error referencing
+        'reporter'.
+
+        Args: (none beyond self)
+        """
+        data = json.loads(
+            (_INVALID_DIR / "missing_fields" / "missing_reporter.json").read_text(
+                encoding="utf-8"
+            )
+        )
+        result = parse(data)
+        assert len(result.errors) > 0
+        fields_and_messages = " ".join(f"{e.field} {e.message}" for e in result.errors)
+        assert "reporter" in fields_and_messages.lower()
+
+    def test_messaging_missing_protocol_produces_errors(self) -> None:
+        """A messaging report missing ``protocol`` produces validation errors."""
+        data = json.loads(
+            (
+                _INVALID_DIR
+                / "business_rule_violations"
+                / "messaging_missing_protocol.json"
+            ).read_text(encoding="utf-8")
+        )
+        result = parse(data)
+        assert len(result.errors) > 0
+
+
+# ---------------------------------------------------------------------------
+# TestV3Detection
+# ---------------------------------------------------------------------------
+
+
+class TestV3Detection:
+    """Tests for automatic v3 → v4 conversion and deprecation warnings."""
+
+    def test_spam_v3_sample_converts_without_errors(self) -> None:
+        """spam_v3_sample parses as a string with no errors and a v3 deprecation
+        warning."""
+        raw = (_V3_DIR / "spam_v3_sample.json").read_text(encoding="utf-8")
+        result = parse(raw)
+        assert result.errors == [], f"Unexpected errors: {result.errors}"
+        assert result.report is not None
+        warning_messages = " ".join(w.message for w in result.warnings)
+        assert (
+            "v3" in warning_messages.lower() or "deprecated" in warning_messages.lower()
+        )
+
+    def test_phishing_v3_sample_converts_without_errors(self) -> None:
+        """phishing_v3_sample parses as a string with no errors and a v3 deprecation
+        warning."""
+        raw = (_V3_DIR / "phishing_v3_sample.json").read_text(encoding="utf-8")
+        result = parse(raw)
+        assert result.errors == [], f"Unexpected errors: {result.errors}"
+        assert result.report is not None
+        warning_messages = " ".join(w.message for w in result.warnings)
+        assert (
+            "v3" in warning_messages.lower() or "deprecated" in warning_messages.lower()
+        )
+
+    def test_ddos_v3_sample_raises_parse_error(self) -> None:
+        """ddos_v3_sample raises :class:`~xarf.exceptions.XARFParseError` due to
+        missing protocol."""
+        raw = (_V3_DIR / "ddos_v3_sample.json").read_text(encoding="utf-8")
+        with pytest.raises(XARFParseError):
+            parse(raw)
+
+    def test_v3_conversion_emits_python_warning(self) -> None:
+        """parse() emits a Python :func:`warnings.warn` call when converting v3
+        reports."""
+        raw = (_V3_DIR / "spam_v3_sample.json").read_text(encoding="utf-8")
+        with pytest.warns(DeprecationWarning):
+            parse(raw)
+
+
+# ---------------------------------------------------------------------------
+# TestInputFormats
+# ---------------------------------------------------------------------------
+
+
+class TestInputFormats:
+    """Tests for JSON string vs dict input forms."""
+
+    def test_string_input_matches_dict_input(self) -> None:
+        """Passing a JSON string and an equivalent dict produce the same result."""
+        data = copy.deepcopy(_VALID_DDOS)
+        json_str = json.dumps(data)
+
+        result_dict = parse(data)
+        result_str = parse(json_str)
+
+        assert result_dict.errors == result_str.errors
+        assert type(result_dict.report) is type(result_str.report)
+
+    def test_extra_whitespace_in_json_string_is_handled(self) -> None:
+        """A JSON string with extra leading/trailing whitespace parses successfully."""
+        json_str = "  \n" + json.dumps(_VALID_DDOS) + "\n  "
+        result = parse(json_str)
+        assert isinstance(result, ParseResult)
+
+    def test_malformed_string_raises_parse_error(self) -> None:
+        """A non-JSON string raises :class:`~xarf.exceptions.XARFParseError`."""
+        with pytest.raises(XARFParseError):
+            parse("this is not json at all }{")
+
+
+# ---------------------------------------------------------------------------
+# TestStrictMode
+# ---------------------------------------------------------------------------
+
+
+class TestStrictMode:
+    """Tests for strict-mode validation behaviour."""
+
+    def test_missing_recommended_field_no_error_in_non_strict(self) -> None:
+        """Missing ``evidence_source`` (recommended) does not produce errors in
+        non-strict mode."""
+        data = copy.deepcopy(_VALID_DDOS)
+        del data["evidence_source"]
+        result = parse(data, strict=False)
+        assert result.errors == []
+
+    def test_missing_recommended_field_error_in_strict(self) -> None:
+        """Missing ``evidence_source`` (recommended) produces errors in strict mode."""
+        data = copy.deepcopy(_VALID_DDOS)
+        del data["evidence_source"]
+        result = parse(data, strict=True)
+        assert len(result.errors) > 0
+
+    def test_strict_mode_with_errors_returns_none_report(self) -> None:
+        """Strict mode with validation errors returns ``report=None``."""
+        data = copy.deepcopy(_VALID_DDOS)
+        del data["evidence_source"]
+        result = parse(data, strict=True)
+        assert result.report is None
+
+    def test_non_strict_mode_may_still_return_report(self) -> None:
+        """Non-strict mode with recoverable issues may still return a typed report."""
+        # A fully-valid report in non-strict mode always yields a report.
+        result = parse(copy.deepcopy(_VALID_DDOS), strict=False)
+        assert result.report is not None
+
+
+# ---------------------------------------------------------------------------
+# TestUnknownFields
+# ---------------------------------------------------------------------------
+
+
+class TestUnknownFields:
+    """Tests for unknown-field detection and warning/error promotion."""
+
+    def test_unknown_field_produces_warning_in_non_strict(self) -> None:
+        """An unrecognized field in a valid report produces a
+        :class:`~xarf.models.ValidationWarning`."""
+        data = copy.deepcopy(_VALID_DDOS)
+        data["totally_unknown_xarf_field"] = "surprise"
+        result = parse(data, strict=False)
+        warning_fields = [w.field for w in result.warnings]
+        assert "totally_unknown_xarf_field" in warning_fields
+
+    def test_unknown_field_produces_error_in_strict(self) -> None:
+        """An unrecognized field in strict mode produces a
+        :class:`~xarf.models.ValidationError`."""
+        data = copy.deepcopy(_VALID_DDOS)
+        data["totally_unknown_xarf_field"] = "surprise"
+        result = parse(data, strict=True)
+        error_fields = [e.field for e in result.errors]
+        assert "totally_unknown_xarf_field" in error_fields
+
+    def test_known_schema_fields_do_not_produce_warnings(self) -> None:
+        """Core schema fields such as ``description`` do not trigger
+        unknown-field warnings."""
+        data = copy.deepcopy(_VALID_DDOS)
+        data["description"] = "A known optional field"
+        result = parse(data, strict=False)
+        warning_fields = [w.field for w in result.warnings]
+        assert "description" not in warning_fields
+
+
+# ---------------------------------------------------------------------------
+# TestShowMissingOptional
+# ---------------------------------------------------------------------------
+
+
+class TestShowMissingOptional:
+    """Tests for the ``show_missing_optional`` feature."""
+
+    def test_show_missing_optional_false_returns_none_info(self) -> None:
+        """``show_missing_optional=False`` (default) leaves ``result.info`` as
+        ``None``."""
+        result = parse(copy.deepcopy(_VALID_DDOS), show_missing_optional=False)
+        assert result.info is None
+
+    def test_show_missing_optional_true_returns_list(self) -> None:
+        """``show_missing_optional=True`` populates ``result.info`` with a list."""
+        result = parse(copy.deepcopy(_VALID_DDOS), show_missing_optional=True)
+        assert isinstance(result.info, list)
+
+    def test_info_entries_have_field_and_message_keys(self) -> None:
+        """Each info dict must have ``"field"`` and ``"message"`` keys."""
+        result = parse(copy.deepcopy(_VALID_DDOS), show_missing_optional=True)
+        assert result.info is not None
+        for entry in result.info:
+            assert "field" in entry
+            assert "message" in entry
+
+    def test_recommended_field_info_has_recommended_prefix(self) -> None:
+        """The ``confidence`` field (recommended) appears in info with a
+        ``RECOMMENDED:`` prefix."""
+        result = parse(copy.deepcopy(_VALID_DDOS), show_missing_optional=True)
+        assert result.info is not None
+        confidence_entries = [e for e in result.info if e["field"] == "confidence"]
+        assert len(confidence_entries) == 1
+        assert confidence_entries[0]["message"].startswith("RECOMMENDED:")
+
+    def test_optional_field_info_has_optional_prefix(self) -> None:
+        """The ``description`` field (optional) appears in info with an
+        ``OPTIONAL:`` prefix."""
+        result = parse(copy.deepcopy(_VALID_DDOS), show_missing_optional=True)
+        assert result.info is not None
+        desc_entries = [e for e in result.info if e["field"] == "description"]
+        assert len(desc_entries) == 1
+        assert desc_entries[0]["message"].startswith("OPTIONAL:")
+
+    def test_present_fields_not_in_info(self) -> None:
+        """Fields that are already present in the report do not appear in info."""
+        result = parse(copy.deepcopy(_VALID_DDOS), show_missing_optional=True)
+        assert result.info is not None
+        info_fields = [e["field"] for e in result.info]
+        for present_field in (
+            "xarf_version",
+            "report_id",
+            "category",
+            "type",
+            "evidence_source",
+        ):
+            assert present_field not in info_fields
+
+
+# ---------------------------------------------------------------------------
+# TestCategoryTypeDiscrimination
+# ---------------------------------------------------------------------------
+
+
+class TestCategoryTypeDiscrimination:
+    """Tests that the discriminated union resolves to the correct concrete type."""
+
+    def test_spam_report_type(self) -> None:
+        """A ``messaging/spam`` dict resolves to a :class:`~xarf.models.SpamReport`."""
+        result = parse(copy.deepcopy(_VALID_SPAM))
+        assert result.errors == []
+        assert isinstance(result.report, SpamReport)
+
+    def test_spam_report_category_and_type_fields(self) -> None:
+        """``result.report.category`` and ``result.report.type`` are correct for
+        spam."""
+        result = parse(copy.deepcopy(_VALID_SPAM))
+        assert result.report is not None
+        assert result.report.category == "messaging"
+        assert result.report.type == "spam"
+
+    def test_ddos_report_type(self) -> None:
+        """A ``connection/ddos`` dict resolves to a :class:`~xarf.models.DdosReport`."""
+        result = parse(copy.deepcopy(_VALID_DDOS))
+        assert result.errors == []
+        assert isinstance(result.report, DdosReport)
+
+    def test_ddos_report_category_and_type_fields(self) -> None:
+        """``result.report.category`` and ``result.report.type`` are correct for
+        ddos."""
+        result = parse(copy.deepcopy(_VALID_DDOS))
+        assert result.report is not None
+        assert result.report.category == "connection"
+        assert result.report.type == "ddos"
+
+    def test_phishing_report_type(self) -> None:
+        """A ``content/phishing`` dict resolves to a
+        :class:`~xarf.models.PhishingReport`."""
+        result = parse(copy.deepcopy(_VALID_PHISHING))
+        assert result.errors == []
+        assert isinstance(result.report, PhishingReport)
+
+    def test_phishing_report_category_and_type_fields(self) -> None:
+        """``result.report.category`` and ``result.report.type`` are correct for
+        phishing."""
+        result = parse(copy.deepcopy(_VALID_PHISHING))
+        assert result.report is not None
+        assert result.report.category == "content"
+        assert result.report.type == "phishing"
+
+
+# ---------------------------------------------------------------------------
+# TestMalformedInput
+# ---------------------------------------------------------------------------
+
+
+class TestMalformedInput:
+    """Tests for degenerate and edge-case inputs."""
+
+    def test_empty_string_raises_parse_error(self) -> None:
+        """An empty string raises :class:`~xarf.exceptions.XARFParseError`."""
+        with pytest.raises(XARFParseError):
+            parse("")
+
+    def test_null_json_string_raises_or_returns_errors(self) -> None:
+        """The JSON string ``"null"`` either raises
+        :class:`~xarf.exceptions.XARFParseError` or returns a
+        :class:`~xarf.models.ParseResult` with errors (``None`` is not a dict).
+        """
+        try:
+            result = parse("null")
+            # If parse() doesn't raise, it must indicate failure.
+            assert result.report is None or len(result.errors) > 0
+        except XARFParseError:
+            pass  # Also acceptable.
+
+    def test_empty_dict_string_returns_errors(self) -> None:
+        """An empty JSON object ``"{}"`` returns errors for all missing required
+        fields."""
+        result = parse("{}")
+        assert len(result.errors) > 0
+        assert result.report is None
+
+
+# ---------------------------------------------------------------------------
+# TestPerformance
+# ---------------------------------------------------------------------------
+
+
+class TestPerformance:
+    """Throughput test verifying parse() processes reports within a reasonable
+    time budget.
+
+    Note:
+        The xarf-parser-tests spec targets ≥ 1000 reports/sec for the JavaScript
+        implementation using AJV.  Python's ``jsonschema`` library is significantly
+        slower than AJV, so the threshold here is adjusted for Python: 1000 reports
+        must complete in under 5 seconds (≥ 200 reports/sec), which is still a
+        meaningful regression guard while remaining achievable on typical developer
+        hardware and CI runners.
+    """
+
+    def test_parse_1000_reports_in_under_five_seconds(self) -> None:
+        """parse() processes 1000 typical reports in under 5 seconds."""
+        data = copy.deepcopy(_VALID_DDOS)
+        iterations = 1000
+
+        start = time.perf_counter()
+        for _ in range(iterations):
+            parse(data)
+        elapsed = time.perf_counter() - start
+
+        assert elapsed < 5.0, (
+            f"Parsed {iterations} reports in {elapsed:.3f}s — exceeds 5-second budget"
+        )
diff --git a/tests/test_schema_registry.py b/tests/test_schema_registry.py
index 216a336..94679da 100644
--- a/tests/test_schema_registry.py
+++ b/tests/test_schema_registry.py
@@ -334,7 +334,6 @@ def test_contains_known_fields(self) -> None:
             assert f in names
 
 
-
 # ---------------------------------------------------------------------------
 # get_type_schema
 # ---------------------------------------------------------------------------
diff --git a/tests/test_schema_validator.py b/tests/test_schema_validator.py
index 5de1d49..c69bdbf 100644
--- a/tests/test_schema_validator.py
+++ b/tests/test_schema_validator.py
@@ -102,7 +102,8 @@ def test_recommended_field_missing_fails_strict_mode(self) -> None:
 
     def test_strict_mode_valid_when_all_recommended_present(self) -> None:
         report = _valid_spam_report()
-        # Core x-recommended: evidence_source, source_port (already set), evidence, confidence
+        # Core x-recommended: evidence_source, source_port (already set), evidence,
+        # confidence
         # evidence_item x-recommended: description, hash
         # Spam type x-recommended: evidence_source, smtp_to, subject, message_id
         # confidence is 0.0-1.0 per schema
diff --git a/tests/test_validator.py b/tests/test_validator.py
new file mode 100644
index 0000000..6f8b49b
--- /dev/null
+++ b/tests/test_validator.py
@@ -0,0 +1,613 @@
+"""Tests for :class:`xarf.validator.XARFValidator` and the :data:`_validator` singleton.
+
+Port of the JavaScript ``validator.test.ts`` test suite.
+
+Covers:
+
+- Missing required fields.
+- Invalid category and type values.
+- Strict-mode promotion of recommended fields and unknown fields.
+- Format validation (UUID, timestamp, semver).
+- Required nested sub-fields (reporter.contact, reporter.domain).
+- Evidence-source enum validation.
+- Category-specific business rules.
+- Port range validation.
+- ``on_behalf_of`` handling.
+- ``show_missing_optional`` info population.
+- Unknown-field detection in both modes.
+- ``valid`` flag accuracy.
+"""
+
+from __future__ import annotations
+
+import copy
+from typing import Any
+
+from xarf.validator import _validator
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+_CONTACT: dict[str, str] = {
+    "org": "Test Org",
+    "contact": "test@example.com",
+    "domain": "example.com",
+}
+
+
+def _valid_ddos_report() -> dict[str, Any]:
+    """Return a fresh minimal valid ``connection/ddos`` report dict.
+
+    Returns:
+        A new dict on every call to prevent cross-test mutation.
+    """
+    return {
+        "xarf_version": "4.2.0",
+        "report_id": "550e8400-e29b-41d4-a716-446655440000",
+        "timestamp": "2024-01-15T10:30:00Z",
+        "reporter": copy.deepcopy(_CONTACT),
+        "sender": copy.deepcopy(_CONTACT),
+        "source_identifier": "192.0.2.1",
+        "category": "connection",
+        "type": "ddos",
+        "evidence_source": "honeypot",
+        "source_port": 12345,
+        "destination_ip": "203.0.113.10",
+        "protocol": "tcp",
+        "first_seen": "2024-01-15T09:00:00Z",
+    }
+
+
+# ---------------------------------------------------------------------------
+# TestMissingRequiredFields
+# ---------------------------------------------------------------------------
+
+
+class TestMissingRequiredFields:
+    """Tests that missing required fields produce validation errors."""
+
+    def test_empty_report_is_invalid(self) -> None:
+        """An empty dict must fail validation with at least one error."""
+        result = _validator.validate({})
+        assert result.valid is False
+        assert len(result.errors) > 0
+
+    def test_missing_source_identifier_is_invalid(self) -> None:
+        """A report without ``source_identifier`` must fail validation."""
+        data = _valid_ddos_report()
+        del data["source_identifier"]
+        result = _validator.validate(data)
+        assert result.valid is False
+
+
+# ---------------------------------------------------------------------------
+# TestInvalidCategory
+# ---------------------------------------------------------------------------
+
+
+class TestInvalidCategory:
+    """Tests that an unrecognised category value produces an appropriate error."""
+
+    def test_invalid_category_produces_category_error(self) -> None:
+        """An unknown category value must produce an error with ``field="category"``."""
+        data = _valid_ddos_report()
+        data["category"] = "totally_invalid_category"
+        result = _validator.validate(data)
+        assert result.valid is False
+        error_fields = [e.field for e in result.errors]
+        assert "category" in error_fields
+
+
+# ---------------------------------------------------------------------------
+# TestStrictMode
+# ---------------------------------------------------------------------------
+
+
+class TestStrictMode:
+    """Tests for strict-mode behaviour."""
+
+    def test_invalid_xarf_version_fails_in_strict(self) -> None:
+        """``xarf_version="3.0.0"`` must fail validation in strict mode."""
+        data = _valid_ddos_report()
+        data["xarf_version"] = "3.0.0"
+        result = _validator.validate(data, strict=True)
+        assert result.valid is False
+
+    def test_unknown_field_is_warning_in_non_strict(self) -> None:
+        """An unknown field produces a warning (not an error) in non-strict mode."""
+        data = _valid_ddos_report()
+        data["unknown_exotic_field_xyz"] = "value"
+        result = _validator.validate(data, strict=False)
+        assert result.valid is True
+        warning_fields = [w.field for w in result.warnings]
+        assert "unknown_exotic_field_xyz" in warning_fields
+
+    def test_unknown_field_is_error_in_strict(self) -> None:
+        """An unknown field becomes an error in strict mode."""
+        data = _valid_ddos_report()
+        data["unknown_exotic_field_xyz"] = "value"
+        result = _validator.validate(data, strict=True)
+        assert result.valid is False
+        error_fields = [e.field for e in result.errors]
+        assert "unknown_exotic_field_xyz" in error_fields
+
+    def test_strict_mode_clears_warnings_on_promotion(self) -> None:
+        """In strict mode, unknown-field entries appear as errors and not warnings."""
+        data = _valid_ddos_report()
+        data["unknown_exotic_field_xyz"] = "value"
+        result = _validator.validate(data, strict=True)
+        warning_fields = [w.field for w in result.warnings]
+        assert "unknown_exotic_field_xyz" not in warning_fields
+
+
+# ---------------------------------------------------------------------------
+# TestFormatValidation
+# ---------------------------------------------------------------------------
+
+
+class TestFormatValidation:
+    """Tests for field-level format validation (UUID, timestamp, semver)."""
+
+    def test_invalid_uuid_report_id_fails(self) -> None:
+        """A non-UUID ``report_id`` must produce an error referencing ``report_id``."""
+        data = _valid_ddos_report()
+        data["report_id"] = "not-a-uuid"
+        result = _validator.validate(data)
+        assert result.valid is False
+        error_fields_and_messages = " ".join(
+            f"{e.field} {e.message}" for e in result.errors
+        )
+        assert "report_id" in error_fields_and_messages
+
+    def test_wrong_type_timestamp_fails(self) -> None:
+        """A non-string ``timestamp`` (wrong JSON type) must produce an error.
+
+        Note:
+            ``date-time`` *format* validation (e.g. rejecting ``"foo"``) requires
+            the optional ``rfc3339-validator`` package, which is not a runtime
+            dependency.  This test covers the weaker guarantee: a timestamp that is
+            not a string at all (e.g. an integer) is caught by jsonschema's type
+            checker, which is always active.
+        """
+        data = _valid_ddos_report()
+        data["timestamp"] = 42  # wrong type — caught without optional format deps
+        result = _validator.validate(data)
+        assert result.valid is False
+        assert any(e.field == "timestamp" for e in result.errors)
+
+    def test_invalid_version_format_fails(self) -> None:
+        """A non-semver ``xarf_version`` such as ``"4.0"`` must fail validation."""
+        data = _valid_ddos_report()
+        data["xarf_version"] = "4.0"
+        result = _validator.validate(data)
+        assert result.valid is False
+
+    def test_valid_report_passes(self) -> None:
+        """A fully valid report must pass validation with no errors."""
+        result = _validator.validate(_valid_ddos_report())
+        assert result.valid is True
+        assert result.errors == []
+
+
+# ---------------------------------------------------------------------------
+# TestRequiredFieldEdgeCases
+# ---------------------------------------------------------------------------
+
+
+class TestRequiredFieldEdgeCases:
+    """Tests for required sub-fields within nested objects."""
+
+    def test_missing_reporter_contact_fails(self) -> None:
+        """A report without ``reporter.contact`` must fail with an error
+        referencing both.
+
+        Args: (none beyond self)
+        """
+        data = _valid_ddos_report()
+        del data["reporter"]["contact"]
+        result = _validator.validate(data)
+        assert result.valid is False
+        combined = " ".join(f"{e.field} {e.message}" for e in result.errors)
+        assert "reporter" in combined.lower()
+        assert "contact" in combined.lower()
+
+    def test_missing_reporter_domain_fails(self) -> None:
+        """A report without ``reporter.domain`` must fail with an error
+        referencing both.
+
+        Args: (none beyond self)
+        """
+        data = _valid_ddos_report()
+        del data["reporter"]["domain"]
+        result = _validator.validate(data)
+        assert result.valid is False
+        combined = " ".join(f"{e.field} {e.message}" for e in result.errors)
+        assert "reporter" in combined.lower()
+        assert "domain" in combined.lower()
+
+
+# ---------------------------------------------------------------------------
+# TestValueValidation
+# ---------------------------------------------------------------------------
+
+
+class TestValueValidation:
+    """Tests for field value constraints (enums, ranges)."""
+
+    def test_invalid_evidence_source_enum_fails(self) -> None:
+        """An invalid ``evidence_source`` value must fail with
+        ``field="evidence_source"``."""
+        data = _valid_ddos_report()
+        data["evidence_source"] = "made_up_source_value"
+        result = _validator.validate(data)
+        assert result.valid is False
+        error_fields = [e.field for e in result.errors]
+        assert "evidence_source" in error_fields
+
+
+# ---------------------------------------------------------------------------
+# TestCategorySpecific
+# ---------------------------------------------------------------------------
+
+
+class TestCategorySpecific:
+    """Category-specific validation rule tests."""
+
+    def test_valid_messaging_spam_report_passes(self) -> None:
+        """A minimal valid ``messaging/spam`` report (protocol=sms) must pass."""
+        data: dict[str, Any] = {
+            "xarf_version": "4.2.0",
+            "report_id": "6ba7b810-9dad-11d1-80b4-00c04fd430c8",
+            "timestamp": "2024-01-15T10:30:00Z",
+            "reporter": copy.deepcopy(_CONTACT),
+            "sender": copy.deepcopy(_CONTACT),
+            "source_identifier": "192.0.2.1",
+            "category": "messaging",
+            "type": "spam",
+            "evidence_source": "honeypot",
+            "protocol": "sms",
+        }
+        result = _validator.validate(data)
+        assert result.valid is True
+
+    def test_unknown_type_fails(self) -> None:
+        """An unknown report type within a valid category must fail validation."""
+        data = _valid_ddos_report()
+        data["type"] = "no_such_type_ever"
+        result = _validator.validate(data)
+        assert result.valid is False
+
+    def test_smtp_spam_without_smtp_from_fails(self) -> None:
+        """``messaging/spam`` with ``protocol=smtp`` but no ``smtp_from`` must fail."""
+        data: dict[str, Any] = {
+            "xarf_version": "4.2.0",
+            "report_id": "6ba7b811-9dad-11d1-80b4-00c04fd430c8",
+            "timestamp": "2024-01-15T10:30:00Z",
+            "reporter": copy.deepcopy(_CONTACT),
+            "sender": copy.deepcopy(_CONTACT),
+            "source_identifier": "192.0.2.1",
+            "category": "messaging",
+            "type": "spam",
+            "evidence_source": "honeypot",
+            "protocol": "smtp",
+            # smtp_from intentionally omitted
+        }
+        result = _validator.validate(data)
+        assert result.valid is False
+        combined = " ".join(f"{e.field} {e.message}" for e in result.errors)
+        assert "smtp_from" in combined
+
+    def test_ddos_without_destination_ip_is_valid(self) -> None:
+        """``connection/ddos`` without ``destination_ip`` (recommended) is valid in
+        non-strict mode."""
+        data = _valid_ddos_report()
+        del data["destination_ip"]
+        result = _validator.validate(data, strict=False)
+        assert result.valid is True
+
+    def test_phishing_without_url_fails(self) -> None:
+        """``content/phishing`` without ``url`` (required) must fail validation."""
+        data: dict[str, Any] = {
+            "xarf_version": "4.2.0",
+            "report_id": "6ba7b812-9dad-11d1-80b4-00c04fd430c8",
+            "timestamp": "2024-01-15T10:30:00Z",
+            "reporter": copy.deepcopy(_CONTACT),
+            "sender": copy.deepcopy(_CONTACT),
+            "source_identifier": "192.0.2.1",
+            "category": "content",
+            "type": "phishing",
+            "evidence_source": "honeypot",
+            # url intentionally omitted
+        }
+        result = _validator.validate(data)
+        assert result.valid is False
+
+    def test_phishing_with_wrong_type_url_fails(self) -> None:
+        """``content/phishing`` with a non-string ``url`` must produce an error.
+
+        Note:
+            ``uri`` *format* validation (e.g. rejecting ``"not a url"`` strings)
+            requires the optional ``rfc3986-validator`` package, which is not a
+            runtime dependency.  This test covers the weaker guarantee: a ``url``
+            field with the wrong JSON type (e.g. an integer) is rejected by
+            jsonschema's type checker, which is always active.
+        """
+        data: dict[str, Any] = {
+            "xarf_version": "4.2.0",
+            "report_id": "6ba7b813-9dad-11d1-80b4-00c04fd430c8",
+            "timestamp": "2024-01-15T10:30:00Z",
+            "reporter": copy.deepcopy(_CONTACT),
+            "sender": copy.deepcopy(_CONTACT),
+            "source_identifier": "192.0.2.1",
+            "category": "content",
+            "type": "phishing",
+            "evidence_source": "honeypot",
+            "url": 12345,  # wrong type — caught without optional format deps
+        }
+        result = _validator.validate(data)
+        assert result.valid is False
+        assert any(e.field == "url" for e in result.errors)
+        error_fields = [e.field for e in result.errors]
+        assert "url" in error_fields
+
+    def test_valid_botnet_report_passes(self) -> None:
+        """A minimal valid ``infrastructure/botnet`` report must pass validation."""
+        data: dict[str, Any] = {
+            "xarf_version": "4.2.0",
+            "report_id": "6ba7b814-9dad-11d1-80b4-00c04fd430c8",
+            "timestamp": "2024-01-15T10:30:00Z",
+            "reporter": copy.deepcopy(_CONTACT),
+            "sender": copy.deepcopy(_CONTACT),
+            "source_identifier": "192.0.2.1",
+            "category": "infrastructure",
+            "type": "botnet",
+            "evidence_source": "honeypot",
+            "compromise_evidence": "C2 communication observed",
+        }
+        result = _validator.validate(data)
+        assert result.valid is True
+
+
+# ---------------------------------------------------------------------------
+# TestPortValidation
+# ---------------------------------------------------------------------------
+
+
+class TestPortValidation:
+    """Tests for ``destination_port`` range and type validation."""
+
+    def test_destination_port_as_string_fails(self) -> None:
+        """``destination_port`` must be an integer; a string value must fail."""
+        data = _valid_ddos_report()
+        data["destination_port"] = "80"  # type: ignore[assignment]
+        result = _validator.validate(data)
+        assert result.valid is False
+        error_fields = [e.field for e in result.errors]
+        assert "destination_port" in error_fields
+
+    def test_destination_port_too_high_fails(self) -> None:
+        """``destination_port=70000`` exceeds 65535 and must fail validation."""
+        data = _valid_ddos_report()
+        data["destination_port"] = 70000
+        result = _validator.validate(data)
+        assert result.valid is False
+
+    def test_destination_port_negative_fails(self) -> None:
+        """``destination_port=-1`` is below the minimum and must fail validation."""
+        data = _valid_ddos_report()
+        data["destination_port"] = -1
+        result = _validator.validate(data)
+        assert result.valid is False
+
+
+# ---------------------------------------------------------------------------
+# TestOnBehalfOf
+# ---------------------------------------------------------------------------
+
+
+class TestOnBehalfOf:
+    """Tests for the optional ``on_behalf_of`` field."""
+
+    def test_valid_on_behalf_of_passes(self) -> None:
+        """A report with a valid ``on_behalf_of`` contact dict must pass validation."""
+        data = _valid_ddos_report()
+        data["on_behalf_of"] = copy.deepcopy(_CONTACT)
+        result = _validator.validate(data)
+        assert result.valid is True
+
+
+# ---------------------------------------------------------------------------
+# TestShowMissingOptional
+# ---------------------------------------------------------------------------
+
+
+class TestShowMissingOptional:
+    """Tests for ``show_missing_optional`` info population."""
+
+    def test_show_missing_optional_false_returns_none_info(self) -> None:
+        """``show_missing_optional=False`` must leave ``result.info`` as ``None``."""
+        result = _validator.validate(_valid_ddos_report(), show_missing_optional=False)
+        assert result.info is None
+
+    def test_show_missing_optional_true_returns_list(self) -> None:
+        """``show_missing_optional=True`` must populate ``result.info`` with a list."""
+        result = _validator.validate(_valid_ddos_report(), show_missing_optional=True)
+        assert isinstance(result.info, list)
+
+    def test_info_contains_description(self) -> None:
+        """``description`` (optional core field absent from test report) must
+        appear in info."""
+        result = _validator.validate(_valid_ddos_report(), show_missing_optional=True)
+        assert result.info is not None
+        info_fields = [e["field"] for e in result.info]
+        assert "description" in info_fields
+
+    def test_info_contains_confidence(self) -> None:
+        """``confidence`` (recommended core field absent from test report) must
+        appear in info."""
+        result = _validator.validate(_valid_ddos_report(), show_missing_optional=True)
+        assert result.info is not None
+        info_fields = [e["field"] for e in result.info]
+        assert "confidence" in info_fields
+
+    def test_info_contains_tags(self) -> None:
+        """``tags`` (optional core field absent from test report) must appear in
+        info."""
+        result = _validator.validate(_valid_ddos_report(), show_missing_optional=True)
+        assert result.info is not None
+        info_fields = [e["field"] for e in result.info]
+        assert "tags" in info_fields
+
+    def test_info_contains_type_specific_optional_field(self) -> None:
+        """Type-specific optional field ``destination_port`` must appear in info
+        for ddos."""
+        result = _validator.validate(_valid_ddos_report(), show_missing_optional=True)
+        assert result.info is not None
+        info_fields = [e["field"] for e in result.info]
+        assert "destination_port" in info_fields
+
+    def test_present_fields_not_in_info(self) -> None:
+        """Fields present in the report must not appear in info."""
+        result = _validator.validate(_valid_ddos_report(), show_missing_optional=True)
+        assert result.info is not None
+        info_fields = [e["field"] for e in result.info]
+        for present in (
+            "xarf_version",
+            "report_id",
+            "category",
+            "type",
+            "evidence_source",
+        ):
+            assert present not in info_fields
+
+    def test_confidence_info_message_contains_recommended(self) -> None:
+        """The ``confidence`` info entry message must start with ``RECOMMENDED:``."""
+        result = _validator.validate(_valid_ddos_report(), show_missing_optional=True)
+        assert result.info is not None
+        confidence_entries = [e for e in result.info if e["field"] == "confidence"]
+        assert len(confidence_entries) == 1
+        assert confidence_entries[0]["message"].startswith("RECOMMENDED:")
+
+    def test_description_info_message_contains_optional(self) -> None:
+        """The ``description`` info entry message must start with ``OPTIONAL:``."""
+        result = _validator.validate(_valid_ddos_report(), show_missing_optional=True)
+        assert result.info is not None
+        desc_entries = [e for e in result.info if e["field"] == "description"]
+        assert len(desc_entries) == 1
+        assert desc_entries[0]["message"].startswith("OPTIONAL:")
+
+    def test_content_phishing_info_contains_content_base_fields(self) -> None:
+        """content/phishing info must include fields from the
+        content-base.json ``$ref``.
+
+        Verifies that ``_extract_type_optional_fields`` follows ``allOf`` ``$ref``
+        chains to ``-base.json`` schemas.  ``registrar`` and ``hosting_provider``
+        are optional fields defined in ``content-base.json``.
+        """
+        phishing_data: dict[str, Any] = {
+            "xarf_version": "4.2.0",
+            "report_id": "550e8400-e29b-41d4-a716-446655440000",
+            "timestamp": "2024-01-15T10:30:00Z",
+            "reporter": {
+                "org": "Test",
+                "contact": "test@example.com",
+                "domain": "example.com",
+            },
+            "sender": {
+                "org": "Test",
+                "contact": "test@example.com",
+                "domain": "example.com",
+            },
+            "source_identifier": "192.0.2.1",
+            "category": "content",
+            "type": "phishing",
+            "url": "https://phishing.example.com/login",
+        }
+        result = _validator.validate(phishing_data, show_missing_optional=True)
+        assert result.info is not None
+        info_fields = [e["field"] for e in result.info]
+        assert "registrar" in info_fields
+        assert "hosting_provider" in info_fields
+
+
+# ---------------------------------------------------------------------------
+# TestUnknownFieldDetection
+# ---------------------------------------------------------------------------
+
+
+class TestUnknownFieldDetection:
+    """Tests for unknown-field detection logic."""
+
+    def test_two_unknown_fields_produce_two_warnings(self) -> None:
+        """Two unknown fields must each produce exactly one warning."""
+        data = _valid_ddos_report()
+        data["unknown_alpha"] = "a"
+        data["unknown_beta"] = "b"
+        result = _validator.validate(data, strict=False)
+        warning_fields = [w.field for w in result.warnings]
+        assert "unknown_alpha" in warning_fields
+        assert "unknown_beta" in warning_fields
+
+    def test_unknown_field_warnings_have_correct_field_values(self) -> None:
+        """Each unknown-field warning must carry the field name in its ``field``
+        attribute."""
+        data = _valid_ddos_report()
+        data["xarf_mystery_field"] = "mystery"
+        result = _validator.validate(data, strict=False)
+        matched = [w for w in result.warnings if w.field == "xarf_mystery_field"]
+        assert len(matched) == 1
+
+    def test_known_core_fields_do_not_produce_warnings(self) -> None:
+        """Core optional fields (``description``, ``confidence``, ``tags``) must
+        not trigger warnings."""
+        data = _valid_ddos_report()
+        data["description"] = "A legitimate optional field"
+        data["confidence"] = 90
+        data["tags"] = ["test"]
+        result = _validator.validate(data, strict=False)
+        warning_fields = [w.field for w in result.warnings]
+        for core_field in ("description", "confidence", "tags"):
+            assert core_field not in warning_fields
+
+    def test_known_category_specific_fields_do_not_produce_warnings(self) -> None:
+        """Category-specific defined fields (e.g. ``destination_port`` for ddos)
+        must not warn."""
+        data = _valid_ddos_report()
+        data["destination_port"] = 80
+        result = _validator.validate(data, strict=False)
+        warning_fields = [w.field for w in result.warnings]
+        assert "destination_port" not in warning_fields
+
+    def test_unknown_fields_in_strict_mode_appear_as_errors(self) -> None:
+        """In strict mode, unknown fields must appear in errors, not warnings."""
+        data = _valid_ddos_report()
+        data["unknown_strict_field"] = "strict"
+        result = _validator.validate(data, strict=True)
+        error_fields = [e.field for e in result.errors]
+        warning_fields = [w.field for w in result.warnings]
+        assert "unknown_strict_field" in error_fields
+        assert "unknown_strict_field" not in warning_fields
+
+
+# ---------------------------------------------------------------------------
+# TestValidResult
+# ---------------------------------------------------------------------------
+
+
+class TestValidResult:
+    """Tests for the ``valid`` flag on :class:`~xarf.validator.ValidationResult`."""
+
+    def test_valid_flag_true_when_no_errors(self) -> None:
+        """``result.valid`` must be ``True`` when ``result.errors`` is empty."""
+        result = _validator.validate(_valid_ddos_report())
+        assert result.valid is True
+        assert result.errors == []
+
+    def test_valid_flag_false_when_errors_present(self) -> None:
+        """``result.valid`` must be ``False`` when there are validation errors."""
+        data = _valid_ddos_report()
+        del data["source_identifier"]
+        result = _validator.validate(data)
+        assert result.valid is False
+        assert len(result.errors) > 0
diff --git a/xarf/exceptions.py b/xarf/exceptions.py
index 6a58e9e..7860e96 100644
--- a/xarf/exceptions.py
+++ b/xarf/exceptions.py
@@ -1,7 +1,5 @@
 """XARF Parser Exceptions."""
 
-from typing import List, Optional
-
 
 class XARFError(Exception):
     """Base exception for XARF parser errors."""
@@ -10,9 +8,15 @@ class XARFError(Exception):
 class XARFValidationError(XARFError):
     """Raised when XARF report validation fails."""
 
-    def __init__(self, message: str, errors: Optional[List[str]] = None):
+    def __init__(self, message: str, errors: list[str] | None = None) -> None:
+        """Initialise with a message and an optional list of error strings.
+
+        Args:
+            message: Human-readable description of the validation failure.
+            errors: Individual error strings; defaults to an empty list.
+        """
         super().__init__(message)
-        self.errors = errors or []
+        self.errors: list[str] = errors or []
 
 
 class XARFParseError(XARFError):
diff --git a/xarf/parser.py b/xarf/parser.py
index 6d416aa..e5d4540 100644
--- a/xarf/parser.py
+++ b/xarf/parser.py
@@ -95,9 +95,12 @@ def parse(
     # ------------------------------------------------------------------
     if isinstance(json_data, str):
         try:
-            data: dict[str, Any] = json.loads(json_data)
+            parsed = json.loads(json_data)
         except json.JSONDecodeError as exc:
             raise XARFParseError(f"Invalid JSON: {exc}") from exc
+        if not isinstance(parsed, dict):
+            raise XARFParseError(f"Expected a JSON object, got {type(parsed).__name__}")
+        data: dict[str, Any] = parsed
     else:
         data = json_data
 

From 98e4bdd4b90a7fa85fe78e4401dd281cc2964853 Mon Sep 17 00:00:00 2001
From: Victor Lopez <victor@thevictorlopez.com>
Date: Tue, 31 Mar 2026 18:25:37 +0200
Subject: [PATCH 11/13] Documentation cleanup

---
 .github/QUICK_START.md       | 140 --------
 .github/WORKFLOWS_SUMMARY.md | 403 ---------------------
 .github/trivy.yaml           |  58 ---
 ARCHITECTURE_DELIVERABLES.md | 369 --------------------
 CHANGELOG.md                 | 234 ++-----------
 CODE_OF_CONDUCT.md           |  97 ++++--
 CONTRIBUTING.md              | 473 ++++++++++++-------------
 PIPELINE_SUMMARY.md          | 232 ------------
 README.md                    | 659 ++++++++++++-----------------------
 SECURITY.md                  | 130 ++-----
 docs/COMPATIBILITY.md        | 417 ----------------------
 docs/DEPRECATED.md           | 377 --------------------
 docs/MIGRATION_V3_TO_V4.md   | 211 +++++++++++
 docs/QUICK_START.md          | 244 -------------
 docs/generator_usage.md      | 412 ----------------------
 docs/migration-guide.md      | 391 ---------------------
 xarf/py.typed                |   0
 17 files changed, 782 insertions(+), 4065 deletions(-)
 delete mode 100644 .github/QUICK_START.md
 delete mode 100644 .github/WORKFLOWS_SUMMARY.md
 delete mode 100644 .github/trivy.yaml
 delete mode 100644 ARCHITECTURE_DELIVERABLES.md
 delete mode 100644 PIPELINE_SUMMARY.md
 delete mode 100644 docs/COMPATIBILITY.md
 delete mode 100644 docs/DEPRECATED.md
 create mode 100644 docs/MIGRATION_V3_TO_V4.md
 delete mode 100644 docs/QUICK_START.md
 delete mode 100644 docs/generator_usage.md
 delete mode 100644 docs/migration-guide.md
 create mode 100644 xarf/py.typed

diff --git a/.github/QUICK_START.md b/.github/QUICK_START.md
deleted file mode 100644
index 6ca72e0..0000000
--- a/.github/QUICK_START.md
+++ /dev/null
@@ -1,140 +0,0 @@
-# CI/CD Quick Start Guide
-
-## First Time Setup
-
-### 1. Enable GitHub Environments
-```
-Settings → Environments → New environment
-- Create "test-pypi" (optional reviewers)
-- Create "pypi" (require reviewers, main branch only)
-```
-
-### 2. Configure PyPI Trusted Publishing
-**On PyPI.org:**
-```
-Account Settings → Publishing → Add GitHub OIDC publisher
-- Repository: xarf/xarf-parser-python
-- Workflow: publish-pypi.yml
-- Environment: pypi
-```
-
-**On Test PyPI (test.pypi.org):**
-```
-Same steps but with environment: test-pypi
-```
-
-### 3. Enable Branch Protection
-```
-Settings → Branches → Add rule
-Branch: main
-☑ Require status checks:
-  - Quality Checks / quality-checks
-  - Test Suite / test  
-  - CI Summary / ci-summary
-☑ Require PR reviews: 1 approval
-```
-
-## Testing the Pipeline
-
-### Test PR Workflow
-```bash
-git checkout -b test-pipeline
-echo "# test" >> README.md
-git add . && git commit -m "Test CI"
-git push origin test-pipeline
-# Create PR on GitHub
-```
-
-### Test Security Scan
-```
-GitHub → Actions → Security Scan → Run workflow
-```
-
-### Test Publishing (Test PyPI)
-```
-GitHub → Actions → Publish to PyPI → Run workflow
-Select: ☑ Publish to Test PyPI
-```
-
-### Test Release (Production)
-```bash
-git tag v4.0.0
-git push origin v4.0.0
-# Create release on GitHub → publishes automatically
-```
-
-## Common Commands
-
-### Run Tests Locally
-```bash
-pip install -e ".[dev,test]"
-pytest --cov=xarf
-```
-
-### Run Quality Checks Locally
-```bash
-isort --check xarf/ tests/
-black --check xarf/ tests/
-flake8 xarf/ tests/
-bandit -r xarf/
-mypy xarf/
-pydocstyle xarf/
-radon cc --min B xarf/
-```
-
-### Run Security Scans Locally
-```bash
-pip-audit
-bandit -r xarf/
-```
-
-## Monitoring
-
-### Check Workflow Status
-```
-GitHub → Actions → View runs
-```
-
-### Check Security Issues
-```
-GitHub → Security → Code scanning alerts
-```
-
-### Download Artifacts
-```
-Actions → Workflow run → Artifacts section
-```
-
-## Troubleshooting
-
-### Quality Checks Fail
-```bash
-# Fix imports
-isort xarf/ tests/
-
-# Fix formatting
-black xarf/ tests/
-
-# Show what would be fixed
-black --diff xarf/
-```
-
-### Coverage Too Low
-```bash
-# Run with coverage report
-pytest --cov=xarf --cov-report=html
-open htmlcov/index.html
-```
-
-### Publishing Fails
-1. Verify trusted publishing on PyPI
-2. Check environment permissions
-3. Ensure release is published (not draft)
-
-## Documentation
-
-- **Full Design**: [docs/ci-cd-pipeline-design.md](../docs/ci-cd-pipeline-design.md)
-- **Workflows**: [.github/workflows/WORKFLOWS_README.md](workflows/WORKFLOWS_README.md)
-
----
-**Need Help?** Check the troubleshooting section in ci-cd-pipeline-design.md
diff --git a/.github/WORKFLOWS_SUMMARY.md b/.github/WORKFLOWS_SUMMARY.md
deleted file mode 100644
index bad2ab6..0000000
--- a/.github/WORKFLOWS_SUMMARY.md
+++ /dev/null
@@ -1,403 +0,0 @@
-# GitHub Actions Workflows - Implementation Summary
-
-## Overview
-
-Successfully created 4 comprehensive GitHub Actions workflows for the xarf-parser-python project, adapted from abusix-parsers best practices while removing AWS/CodeArtifact dependencies.
-
-## Created Workflows
-
-### 1. **quality-checks.yml** (162 lines)
-
-Parallel execution of code quality and security checks using matrix strategy.
-
-**Key Features:**
-- ✅ Blocking checks: isort, black, flake8, bandit
-- ⚠️ Warning checks: mypy, pydocstyle, radon, pytest-cov
-- Matrix-based parallel execution for speed
-- Artifact uploads for logs and coverage
-- Configurable timeouts per check
-
-**Differences from abusix-parsers:**
-- ❌ Removed: AWS OIDC authentication
-- ❌ Removed: CodeArtifact setup
-- ❌ Removed: Poetry dependency (using pip + setuptools)
-- ❌ Removed: Trivy scanner (moved to security-scan.yml)
-- ✅ Added: Direct pip installation with caching
-- ✅ Added: Editable install for coverage check
-- ✅ Simplified: No custom GitHub actions needed
-- 🔧 Adjusted: Tool versions and paths for xarf project
-
-**Tools & Versions:**
-- isort 5.13.2
-- black 24.3.0
-- flake8 7.0.0
-- bandit 1.7.8
-- mypy 1.9.0
-- pydocstyle 6.3.0
-- radon 6.0.1
-- pytest-cov (latest)
-
-### 2. **security-scan.yml** (216 lines)
-
-Weekly security scanning with automatic issue creation.
-
-**Key Features:**
-- 🔒 Three scan types: pip-audit, bandit, trivy
-- 📅 Scheduled: Weekly on Monday 9 AM UTC
-- 🐛 Auto-creates GitHub issues on scheduled failures
-- 📊 SARIF reports uploaded to GitHub Security tab
-- 💾 90-day artifact retention for audit trail
-
-**Differences from abusix-parsers:**
-- ✅ Added: pip-audit for dependency CVE scanning
-- ✅ Added: Automatic GitHub issue creation
-- ✅ Added: Trivy filesystem scanning with SARIF
-- ✅ Added: Security summary job
-- 🔧 Adjusted: Scan paths and configuration
-
-**Schedule:**
-- Cron: `0 9 * * 1` (Every Monday 9 AM UTC)
-- Also runs on: Push to main, PR, workflow_dispatch
-
-### 3. **test.yml** (168 lines)
-
-Comprehensive test matrix across Python versions and platforms.
-
-**Key Features:**
-- 🐍 Python versions: 3.8, 3.9, 3.10, 3.11, 3.12
-- 💻 Platforms: Ubuntu (all), macOS (3.12), Windows (3.12)
-- 📊 Coverage upload to Codecov
-- 🧪 Minimum dependency version testing
-- 🔗 Integration test job (conditional)
-
-**Differences from abusix-parsers:**
-- ❌ Removed: Poetry/CodeArtifact dependency
-- ✅ Added: Multi-platform testing (macOS, Windows)
-- ✅ Added: Minimum version compatibility test
-- ✅ Added: Codecov integration
-- ✅ Added: Integration test placeholder
-- 🔧 Simplified: Direct pip installation
-
-**Matrix Strategy:**
-```yaml
-strategy:
-  fail-fast: false
-  matrix:
-    python-version: ['3.8', '3.9', '3.10', '3.11', '3.12']
-    os: [ubuntu-latest]
-    include:
-      - python-version: '3.12'
-        os: macos-latest
-      - python-version: '3.12'
-        os: windows-latest
-```
-
-### 4. **publish.yml** (202 lines)
-
-Automated PyPI publishing with validation and testing.
-
-**Key Features:**
-- 🚀 Trusted Publishing (no API tokens needed)
-- ✅ Pre-publish validation and testing
-- 📦 Builds both wheel and sdist
-- 🎯 Dual targets: PyPI and Test PyPI
-- 🏷️ Triggered by GitHub releases
-
-**Differences from abusix-parsers:**
-- ❌ Removed: CodeArtifact publishing
-- ❌ Removed: AWS authentication
-- ✅ Added: Test PyPI support
-- ✅ Added: Version validation from pyproject.toml
-- ✅ Added: Tag/version matching check
-- ✅ Added: Pre-publish quality checks
-- ✅ Added: Manual dispatch with test_pypi flag
-- 🔧 Using: PyPA trusted publishing (OIDC)
-
-**Publishing Logic:**
-- Prerelease → Test PyPI
-- Release → PyPI
-- Manual dispatch → Configurable via input
-
-## Key Adaptations from abusix-parsers
-
-### Removed Components
-1. **AWS Integration**
-   - No OIDC authentication
-   - No CodeArtifact repository
-   - No assume-role secrets
-
-2. **Poetry Dependency**
-   - Replaced with pip + setuptools
-   - Direct editable installs: `pip install -e ".[dev,test]"`
-   - Simpler dependency management
-
-3. **Custom GitHub Actions**
-   - No `.github/actions/setup-poetry`
-   - Direct action usage only
-
-### Added Features
-1. **Enhanced Security**
-   - Dedicated security-scan workflow
-   - Weekly automated scans
-   - Automatic issue creation
-   - SARIF reporting to GitHub Security
-
-2. **Improved Testing**
-   - Multi-platform support (Linux, macOS, Windows)
-   - Minimum version compatibility tests
-   - Codecov integration
-   - Integration test framework
-
-3. **Better Publishing**
-   - Trusted Publishing support
-   - Test PyPI option
-   - Version validation
-   - Pre-publish test gate
-
-### Configuration Files
-
-The workflows reference configuration in `pyproject.toml`:
-
-```toml
-[tool.black]
-line-length = 88
-target-version = ["py38"]
-
-[tool.isort]
-profile = "black"
-line_length = 88
-
-[tool.mypy]
-python_version = "3.8"
-strict = true
-
-[tool.pytest.ini_options]
-addopts = "-v --cov=xarf --cov-report=term-missing --cov-report=html"
-testpaths = ["tests"]
-
-[tool.coverage.run]
-source = ["xarf"]
-omit = ["tests/*", "setup.py"]
-```
-
-## Setup Requirements
-
-### 1. PyPI Trusted Publishing
-
-Configure at https://pypi.org/manage/account/publishing/
-
-**PyPI Settings:**
-- Project: `xarf-parser`
-- Owner: `xarf` (or your GitHub org/user)
-- Repository: `xarf-parser-python`
-- Workflow: `publish.yml`
-- Environment: `pypi`
-
-**Test PyPI Settings:**
-Repeat at https://test.pypi.org with environment: `test-pypi`
-
-### 2. GitHub Environments (Optional)
-
-Create environments in repository settings:
-- `pypi` - Production PyPI publishing
-- `test-pypi` - Test PyPI publishing
-
-### 3. Branch Protection (Recommended)
-
-Configure for `main` branch:
-- ✅ Require status checks: quality-checks, test
-- ✅ Require branches to be up to date
-- ✅ Require linear history
-- ✅ Include administrators
-
-### 4. Codecov (Optional)
-
-1. Sign up at https://codecov.io
-2. Connect GitHub repository
-3. No token needed for public repos
-
-## Workflow Execution Flow
-
-```
-┌─────────────────────────────────────────────────────┐
-│  Push/PR to main/develop                            │
-└──────────────────┬──────────────────────────────────┘
-                   │
-                   ├─────────────────┐
-                   │                 │
-                   ▼                 ▼
-         ┌─────────────────┐  ┌──────────────┐
-         │ Quality Checks  │  │     Tests    │
-         │   (Parallel)    │  │   (Matrix)   │
-         └────────┬────────┘  └──────┬───────┘
-                  │                  │
-                  └────────┬─────────┘
-                           │
-                           ▼
-                     ┌──────────┐
-                     │   Merge  │
-                     └─────┬────┘
-                           │
-                           ▼
-                  ┌────────────────┐
-                  │  Create Release │
-                  └────────┬────────┘
-                           │
-                           ▼
-              ┌────────────────────────┐
-              │ Publish Workflow       │
-              │ 1. Validate            │
-              │ 2. Test                │
-              │ 3. Quality Check       │
-              │ 4. Build               │
-              │ 5. Publish to PyPI     │
-              └────────────────────────┘
-```
-
-## Monitoring & Maintenance
-
-### Weekly Tasks
-- Review security scan results (Monday mornings)
-- Address any security issues found
-- Update vulnerable dependencies
-
-### Monthly Tasks
-- Update GitHub Actions versions
-- Review and update tool versions
-- Check for new best practices
-
-### Quarterly Tasks
-- Review workflow efficiency
-- Update Python version matrix
-- Audit security configurations
-
-### On Python Release
-- Add new Python version to test matrix
-- Update classifiers in pyproject.toml
-- Test compatibility
-
-## Performance Metrics
-
-Compared to sequential execution:
-
-| Metric | Sequential | Parallel (Matrix) | Improvement |
-|--------|-----------|-------------------|-------------|
-| Quality Checks | ~15 min | ~5 min | 3x faster |
-| Test Suite | ~25 min | ~8 min | 3.1x faster |
-| Total CI Time | ~40 min | ~13 min | 3x faster |
-
-**Note:** Times are estimates based on similar projects. Actual times depend on test complexity and runner availability.
-
-## Artifact Retention
-
-| Artifact | Retention | Purpose |
-|----------|-----------|---------|
-| Coverage Reports | 30 days | Code coverage analysis |
-| Test Results | 7 days | Debugging test failures |
-| Security Scans | 90 days | Audit trail and compliance |
-| Build Packages | 30 days | Distribution packages |
-| Check Logs | 7 days | Debugging quality issues |
-
-## Best Practices Implemented
-
-1. ✅ **Parallel Execution**: Matrix strategy for speed
-2. ✅ **Fail-Fast Disabled**: See all failures in one run
-3. ✅ **Continue on Error**: Non-blocking checks don't fail builds
-4. ✅ **Caching**: Pip cache for faster installs
-5. ✅ **Retry Logic**: Implicit in GitHub Actions
-6. ✅ **Timeouts**: Prevent hanging jobs
-7. ✅ **Artifact Uploads**: Preserve important files
-8. ✅ **Summary Jobs**: Clear pass/fail indicators
-9. ✅ **Security First**: Dedicated security workflow
-10. ✅ **Version Pinning**: Specific tool versions
-
-## Troubleshooting
-
-### Common Issues
-
-**1. Quality checks fail on first run**
-- Expected on legacy code
-- Run formatters locally first:
-  ```bash
-  black xarf/ tests/
-  isort --profile black xarf/ tests/
-  ```
-
-**2. Security scan finds vulnerabilities**
-- Review severity levels
-- Update dependencies: `pip install --upgrade <package>`
-- Use `pip-audit --fix` for automatic fixes
-
-**3. Tests fail on specific Python version**
-- Check for syntax incompatibilities
-- Review dependency version constraints
-- Test locally with specific version
-
-**4. Publishing fails with authentication error**
-- Verify Trusted Publishing configuration
-- Check environment names match exactly
-- Ensure repository settings are correct
-
-**5. Coverage below threshold**
-- Add tests for uncovered code
-- Update coverage thresholds in pyproject.toml
-- Review coverage report: `coverage.json`
-
-## Files Created
-
-```
-.github/
-└── workflows/
-    ├── README.md                  # Detailed documentation
-    ├── quality-checks.yml         # Code quality & security
-    ├── security-scan.yml          # Weekly security scanning
-    ├── test.yml                   # Test matrix
-    └── publish.yml                # PyPI publishing
-```
-
-**Total Lines of Code:** 748 (excluding README)
-
-## Next Steps
-
-1. **Test Workflows**
-   ```bash
-   # Push to trigger workflows
-   git add .github/workflows/
-   git commit -m "Add GitHub Actions workflows"
-   git push
-   ```
-
-2. **Configure PyPI**
-   - Set up Trusted Publishing
-   - Create environments
-
-3. **Review First Run**
-   - Check all jobs complete
-   - Address any failures
-   - Review artifact uploads
-
-4. **Add Badges to README**
-   ```markdown
-   [![Quality](https://github.com/xarf/xarf-parser-python/actions/workflows/quality-checks.yml/badge.svg)](https://github.com/xarf/xarf-parser-python/actions/workflows/quality-checks.yml)
-   [![Tests](https://github.com/xarf/xarf-parser-python/actions/workflows/test.yml/badge.svg)](https://github.com/xarf/xarf-parser-python/actions/workflows/test.yml)
-   [![Security](https://github.com/xarf/xarf-parser-python/actions/workflows/security-scan.yml/badge.svg)](https://github.com/xarf/xarf-parser-python/actions/workflows/security-scan.yml)
-   ```
-
-5. **Monitor First Week**
-   - Watch for security scan on Monday
-   - Verify PR checks work correctly
-   - Check artifact retention
-
-## Support & Documentation
-
-- Workflow documentation: `.github/workflows/README.md`
-- GitHub Actions docs: https://docs.github.com/actions
-- PyPI Trusted Publishing: https://docs.pypi.org/trusted-publishers/
-- Issues: Open in repository with workflow logs
-
----
-
-**Implementation Date:** 2025-11-20
-**Based on:** abusix-parsers workflows
-**Adapted for:** xarf-parser-python (pip + setuptools)
-**Status:** ✅ Ready for testing
diff --git a/.github/trivy.yaml b/.github/trivy.yaml
deleted file mode 100644
index 2d7cca4..0000000
--- a/.github/trivy.yaml
+++ /dev/null
@@ -1,58 +0,0 @@
-# Trivy configuration for XARF Python parser
-# Based on abusix-parsers security standards
-
-# Scan settings
-scan:
-  # Scan for both vulnerabilities and secrets
-  security-checks:
-    - vuln
-    - secret
-
-# Vulnerability settings
-vulnerability:
-  # Type of vulnerability sources
-  type:
-    - os
-    - library
-
-  # Severity levels to report
-  severity:
-    - LOW
-    - MEDIUM
-    - HIGH
-    - CRITICAL
-
-# Secret scanning settings
-secret:
-  # Additional secret scanning patterns
-  config: |
-    # AWS credentials
-    - name: AWS Access Key ID
-      regex: '(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16}'
-
-    # API Keys
-    - name: Generic API Key
-      regex: '(?i)(api[_-]?key|apikey)["\s:=]+[a-zA-Z0-9_\-]{20,}'
-
-    # Private Keys
-    - name: Private Key
-      regex: '-----BEGIN (RSA|DSA|EC|OPENSSH) PRIVATE KEY-----'
-
-    # GitHub tokens
-    - name: GitHub Token
-      regex: 'gh[pousr]_[A-Za-z0-9_]{36,}'
-
-# File path patterns to skip
-skip-files:
-  - "**/.git/**"
-  - "**/node_modules/**"
-  - "**/.venv/**"
-  - "**/venv/**"
-  - "**/__pycache__/**"
-  - "**/*.pyc"
-  - "**/dist/**"
-  - "**/build/**"
-  - "**/.pytest_cache/**"
-  - "**/.mypy_cache/**"
-  - "**/htmlcov/**"
-  - "**/*.egg-info/**"
diff --git a/ARCHITECTURE_DELIVERABLES.md b/ARCHITECTURE_DELIVERABLES.md
deleted file mode 100644
index 95c6463..0000000
--- a/ARCHITECTURE_DELIVERABLES.md
+++ /dev/null
@@ -1,369 +0,0 @@
-# XARF Python Library - Architecture Design Deliverables
-
-## Overview
-
-Complete architecture design for the XARF Python library has been delivered. This document provides an index of all deliverables and their locations.
-
-## Deliverables Summary
-
-### Primary Documents (5 files, 74KB total)
-
-1. **ARCHITECTURE.md** (20KB) - `/docs/ARCHITECTURE.md`
-   - Complete architectural specification
-   - 50+ pages of detailed design
-   - All components, modules, and patterns
-   - Quality standards and benchmarks
-   - Security considerations
-   - **Status**: ✅ Complete
-
-2. **ARCHITECTURE_SUMMARY.md** (10KB) - `/docs/ARCHITECTURE_SUMMARY.md`
-   - Quick reference guide
-   - Implementation priorities
-   - Key decisions summary
-   - Usage examples
-   - **Status**: ✅ Complete
-
-3. **CLASS_HIERARCHY.md** (17KB) - `/docs/CLASS_HIERARCHY.md`
-   - Complete class diagrams
-   - Inheritance relationships
-   - Design patterns
-   - Extension points
-   - **Status**: ✅ Complete
-
-4. **API_SURFACE.md** (18KB) - `/docs/API_SURFACE.md`
-   - Public API specification
-   - All classes and methods
-   - Usage examples
-   - Stability guarantees
-   - **Status**: ✅ Complete
-
-5. **ARCHITECTURE_DIAGRAM.txt** (9KB) - `/docs/ARCHITECTURE_DIAGRAM.txt`
-   - Visual diagrams in ASCII
-   - Component interactions
-   - Data flows
-   - Module dependencies
-   - **Status**: ✅ Complete
-
-### Supporting Documents
-
-6. **INDEX.md** - `/docs/INDEX.md`
-   - Documentation index
-   - Navigation guide
-   - Document organization
-   - **Status**: ✅ Complete
-
-### Memory Storage
-
-Architecture design has been stored for agent coordination:
-- **Key**: `xarf-python/architecture`
-- **Location**: Claude Flow memory system
-- **Status**: ⚠️ Attempted (file-based fallback used)
-
-## Key Design Decisions
-
-### 1. Package Rename
-- **Decision**: Rename from `xarf-parser` to `xarf`
-- **Document**: ARCHITECTURE.md Section 1.1
-- **Rationale**: Cleaner imports, broader scope
-- **Impact**: Migration path needed
-
-### 2. Field Naming
-- **Decision**: Use `category` field (not `class`)
-- **Document**: ARCHITECTURE.md Section 3
-- **Rationale**: Python keyword conflict
-- **Implementation**: Pydantic alias for JSON compatibility
-
-### 3. Component Architecture
-- **Decision**: Three separate components (Parser, Validator, Generator)
-- **Document**: ARCHITECTURE.md Section 2
-- **Rationale**: Separation of concerns, reusability
-- **Impact**: New modules to create
-
-### 4. No v3 Converter
-- **Decision**: No XARF v3 to v4 converter
-- **Document**: ARCHITECTURE.md ADR-003
-- **Rationale**: v3 deprecated, simpler codebase
-- **Impact**: Users migrate externally
-
-### 5. Minimal Dependencies
-- **Decision**: Only 3 core dependencies
-- **Document**: ARCHITECTURE.md Section 5.1
-- **Dependencies**: Pydantic v2, python-dateutil, email-validator
-- **Rationale**: Security, performance, maintainability
-
-## Module Structure
-
-### New Modules to Create
-
-```
-xarf/
-├── validator.py          # NEW - Extract from parser
-├── generator.py          # NEW - Report generation
-├── constants.py          # NEW - Constants and enums
-├── schemas/              # NEW - JSON Schema files
-│   ├── __init__.py
-│   ├── loader.py
-│   └── v4/*.json
-├── utils/                # NEW - Utilities
-│   ├── __init__.py
-│   ├── validators.py
-│   ├── encoders.py
-│   └── converters.py
-└── py.typed              # NEW - Type marker
-```
-
-### Modules to Update
-
-```
-xarf/
-├── __init__.py           # UPDATE - New exports
-├── parser.py             # UPDATE - Batch support
-├── models.py             # UPDATE - Use 'category' field
-└── exceptions.py         # UPDATE - Enhanced hierarchy
-```
-
-## Implementation Priority
-
-### Phase 1: Core Foundation (Week 1-2)
-1. ✅ Architecture design complete
-2. ⬜ Update models.py with `category` field
-3. ⬜ Enhance parser.py with batch support
-4. ⬜ Update exceptions.py
-5. ⬜ Create constants.py
-
-### Phase 2: New Components (Week 3-4)
-6. ⬜ Create validator.py (extract from parser)
-7. ⬜ Create generator.py with factory methods
-8. ⬜ Create utils/ package with validators
-9. ⬜ Bundle schemas/ in package
-
-### Phase 3: Quality (Week 5-6)
-10. ⬜ Comprehensive test suite (≥95% coverage)
-11. ⬜ Type hints on all public API
-12. ⬜ Documentation and examples
-13. ⬜ Performance optimization
-
-### Phase 4: Polish (Week 7-8)
-14. ⬜ CLI tool (optional)
-15. ⬜ Integration examples
-16. ⬜ Migration guide
-17. ⬜ Release preparation
-
-## Quality Standards
-
-### Testing
-- **Coverage**: ≥95% overall, 100% core modules
-- **Types**: Unit, integration, performance, conformance, property-based
-- **Frameworks**: pytest, pytest-cov, hypothesis
-- **Status**: Architecture defined, implementation pending
-
-### Type Safety
-- **Coverage**: 100% on public API
-- **Checker**: mypy strict mode
-- **Marker**: py.typed file
-- **Status**: Architecture defined, implementation pending
-
-### Performance
-- **Parse Speed**: 1000+ reports/sec
-- **Memory**: <10KB per report
-- **Concurrency**: Thread-safe, linear scaling
-- **Status**: Benchmarks defined, implementation pending
-
-### Code Quality
-- **Linter**: Ruff (replaces flake8, isort)
-- **Formatter**: Black (88 char line length)
-- **Complexity**: ≤10 cyclomatic complexity
-- **Status**: Tools specified, configuration pending
-
-## Documentation Structure
-
-```
-docs/
-├── INDEX.md                    # Navigation guide
-├── ARCHITECTURE.md             # Complete design (20KB)
-├── ARCHITECTURE_SUMMARY.md     # Quick reference (10KB)
-├── ARCHITECTURE_DIAGRAM.txt    # Visual diagrams (9KB)
-├── CLASS_HIERARCHY.md          # Class relationships (17KB)
-├── API_SURFACE.md             # Public API spec (18KB)
-├── QUICK_START.md             # Getting started
-├── MIGRATION_GUIDE.md         # Upgrade guide
-├── generator_usage.md         # Usage examples
-├── ci-cd-pipeline-design.md   # CI/CD setup
-└── PRE_COMMIT.md              # Dev tools setup
-```
-
-## Public API Surface
-
-### Parser
-- `XARFParser` - Parse JSON to objects
-  - `parse()` - Parse single report
-  - `parse_batch()` - Parse multiple reports
-  - `get_errors()` - Get validation errors
-
-### Validator
-- `XARFValidator` - Multi-level validation
-  - `validate()` - Full validation
-  - `validate_schema()` - Schema only
-  - `validate_business_rules()` - Business rules
-  - `validate_evidence()` - Evidence validation
-- `ValidationResult` - Result container
-
-### Generator
-- `XARFGenerator` - Report generation
-  - `create_messaging_report()` - Factory method
-  - `create_connection_report()` - Factory method
-  - `create_content_report()` - Factory method
-- `ReportBuilder` - Fluent builder pattern
-
-### Models
-- `XARFReport` - Base report class
-- `MessagingReport` - Email abuse reports
-- `ConnectionReport` - Network abuse reports
-- `ContentReport` - Web content abuse reports
-- `XARFReporter` - Reporter information
-- `Evidence` - Evidence attachments
-
-### Exceptions
-- `XARFError` - Base exception
-- `XARFParseError` - Parsing failures
-- `XARFValidationError` - Validation failures
-- `XARFSchemaError` - Schema errors
-- `XARFGenerationError` - Generation errors
-
-## Dependencies
-
-### Core (3 packages)
-```toml
-pydantic>=2.5.0,<3.0.0      # Data validation
-python-dateutil>=2.8.0       # Datetime parsing
-email-validator>=2.1.0       # Email validation
-```
-
-### Development (7 packages)
-```toml
-pytest>=7.4.0               # Testing framework
-pytest-cov>=4.1.0          # Coverage reporting
-hypothesis>=6.88.0         # Property testing
-ruff>=0.1.0                # Fast linting
-black>=23.11.0             # Code formatting
-mypy>=1.7.0                # Type checking
-pre-commit>=3.5.0          # Git hooks
-```
-
-## Usage Examples
-
-### Parse Report
-```python
-from xarf import XARFParser
-
-parser = XARFParser()
-report = parser.parse('{"xarf_version": "4.0.0", ...}')
-print(f"Category: {report.category}")
-print(f"Type: {report.type}")
-```
-
-### Validate Report
-```python
-from xarf import XARFValidator
-
-validator = XARFValidator()
-result = validator.validate(report)
-
-if not result.is_valid:
-    for error in result.errors:
-        print(f"Error: {error}")
-```
-
-### Generate Report
-```python
-from xarf import XARFGenerator
-
-report = XARFGenerator.create_messaging_report(
-    source_ip="192.0.2.100",
-    report_type="spam",
-    reporter={
-        "org": "My Org",
-        "contact": "noreply@example.com",
-        "type": "automated"
-    },
-    evidence_source="spamtrap"
-)
-
-json_output = report.model_dump_json(by_alias=True)
-```
-
-## Next Steps for Implementation Team
-
-### Immediate (Week 1)
-1. Review all architecture documents
-2. Set up development environment
-3. Create new module stubs
-4. Update pyproject.toml dependencies
-
-### Short-term (Weeks 2-4)
-1. Implement core models with 'category' field
-2. Extract validator from parser
-3. Create generator with factory methods
-4. Set up comprehensive test suite
-
-### Medium-term (Weeks 5-8)
-1. Achieve ≥95% test coverage
-2. Add type hints (100% public API)
-3. Performance optimization
-4. Documentation site with MkDocs
-
-### Long-term (Post v4.0.0)
-1. CLI tool development
-2. Integration examples
-3. Community feedback incorporation
-4. Additional report classes (infrastructure, copyright, etc.)
-
-## Success Criteria
-
-### Architecture Phase ✅
-- [x] Complete design specification
-- [x] Module structure defined
-- [x] Class hierarchy designed
-- [x] API surface specified
-- [x] Quality standards set
-- [x] Documentation written
-
-### Implementation Phase (Pending)
-- [ ] All modules implemented
-- [ ] Test coverage ≥95%
-- [ ] Type coverage 100%
-- [ ] Performance benchmarks met
-- [ ] Documentation complete
-- [ ] Ready for alpha release
-
-## Contact & Resources
-
-### Repository
-- **GitHub**: https://github.com/xarf/xarf-parser-python
-- **Issues**: https://github.com/xarf/xarf-parser-python/issues
-- **Pull Requests**: https://github.com/xarf/xarf-parser-python/pulls
-
-### Documentation
-- **This Codebase**: `/docs/` directory
-- **XARF Spec**: https://github.com/xarf/xarf-spec
-- **XARF Website**: https://xarf.org
-
-### Tools
-- **Pydantic**: https://docs.pydantic.dev/
-- **Ruff**: https://docs.astral.sh/ruff/
-- **Black**: https://black.readthedocs.io/
-- **mypy**: https://mypy.readthedocs.io/
-- **pytest**: https://docs.pytest.org/
-
-## Version Information
-
-- **Architecture Version**: 1.0
-- **Target Release**: 4.0.0
-- **Design Date**: 2025-11-20
-- **Status**: ✅ Architecture Complete, ⬜ Implementation Pending
-
----
-
-**Prepared by**: System Architecture Designer (Claude Code)
-**Date**: 2025-11-20
-**Project**: XARF Python Library (xarf-parser-python → xarf)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index bd2283b..5e11453 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,229 +1,43 @@
 # Changelog
 
-All notable changes to the XARF Python Parser will be documented in this file.
+All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
 
-### Changed
-- **Legacy Tag Naming**: Updated v3 compatibility tags from `legacy:class:` to `legacy:category:` to align with v4 field naming conventions
-  - Affects only v3 report conversion metadata tags
-  - Maintains consistency with `category` field terminology throughout codebase
-
-### Fixed
-- **Documentation Examples**: Corrected CONTRIBUTING.md sample report to use `category` field instead of outdated `class` reference
-
-### Added
-- **XARF v3 Backwards Compatibility**: Automatic conversion from v3 to v4 format
-  - `is_v3_report()` function to detect v3 reports
-  - `convert_v3_to_v4()` function for explicit conversion
-  - Automatic detection and conversion in `XARFParser.parse()`
-  - Deprecation warnings for v3 format usage (`XARFv3DeprecationWarning`)
-  - 14 comprehensive tests for v3 compatibility covering all categories
-  - Complete field mapping from v3 to v4 structure (ReportClass→category, etc.)
-  - Legacy metadata tracking (`legacy_version`, `_internal.converted_from_v3`)
-  - Migration guide documentation at `docs/migration-guide.md`
-
-### Changed
-- **Pydantic V2 Migration**: Updated from Pydantic V1 to V2 API
-  - Replaced `@validator` with `@field_validator` for all model validators
-  - Updated `Config` class to `ConfigDict` in XARFReport model
-  - Changed `allow_population_by_field_name` to `populate_by_name`
-  - All validators now use `@classmethod` decorator with type hints
-  - Fixed Python 3.13+ datetime deprecation warnings
-
-### Fixed
-- Resolved all Pydantic V2 deprecation warnings in models
-- Fixed `datetime.utcnow()` deprecation by using `datetime.now(timezone.utc)`
-- Improved type hints for Pydantic V2 compatibility
-- Updated import statements to use `pydantic.ConfigDict` and `field_validator`
+## [0.1.0] - 2026-03-31
 
-### Documentation
-- Added v3 compatibility section to README with example code
-- Created comprehensive migration guide (`docs/migration-guide.md`)
-- Updated feature list to highlight v3 support and Pydantic V2 compatibility
-- Added documentation links for migration guide
-
-## [4.0.0] - 2024-01-20
+This release is a complete rework of the alpha (`v4.0.0a1`). No backward compatibility with the alpha API is provided. The version numbers will now be independent from the spec to provide release independence for the library.
 
 ### Breaking Changes
 
-#### Field Rename: `class` → `category`
-
-The field previously named `class` has been renamed to `category` to align with the official XARF v4 specification. This change was made to avoid conflicts with programming language reserved keywords and better reflect the field's purpose.
-
-**Impact:**
-- All JSON reports must now use `"category"` instead of `"class"`
-- Python code must access `report.category` instead of `report.class_`
-- Validation checks for `"category"` field presence
-
-**Migration:**
-- Update all JSON generation code to use `"category"`
-- Replace all `report.class_` with `report.category` in Python code
-- See [MIGRATION_GUIDE.md](docs/MIGRATION_GUIDE.md) for detailed migration instructions
-
-```python
-# Before (v3.x)
-report = {
-    "class": "content",  # Old field name
-    "type": "phishing"
-}
-print(report.class_)  # Awkward Python workaround
-
-# After (v4.0.0+)
-report = {
-    "category": "content",  # New field name
-    "type": "phishing"
-}
-print(report.category)  # Clean Python access
-```
+- **New public API**: `parse()`, `create_report()`, `create_evidence()` are now module-level functions. The `XARFParser` and `XARFGenerator` classes have been removed.
+- **Structured result objects**: `parse()` and `create_report()` now return `ParseResult` and `CreateReportResult` dataclasses respectively, rather than bare model instances or dicts.
+- **Structured errors**: `ValidationError` and `ValidationWarning` are dataclasses with `field`, `message`, and (for errors) `value` attributes — previously errors were plain strings.
+- **Package name**: published as `xarf` (was `xarf-parser`).
+- **Python version**: minimum is now 3.10 (was 3.8).
 
 ### Added
 
-- **Generator Functionality**: New `XARFGenerator` class for programmatically creating XARF v4 reports
-  - `create_report()` - Generate complete reports with validation
-  - `create_messaging_report()` - Generate messaging category reports (spam, phishing)
-  - `create_connection_report()` - Generate connection category reports (DDoS, port scans)
-  - `create_content_report()` - Generate content category reports (phishing sites, malware)
-  - Automatic UUID generation for `report_id`
-  - Timestamp generation in ISO 8601 format
-  - Built-in validation during generation
-
-- **Reporter `on_behalf_of` Field**: Support for infrastructure providers sending reports on behalf of other organizations
-  - `reporter.on_behalf_of.org` - Organization being represented
-  - `reporter.on_behalf_of.contact` - Contact email for represented organization
-  - Useful for MSSPs, abuse reporting services, and infrastructure providers
-
-- **Enhanced Validation**: Improved validation for all XARF v4 requirements
-  - Category-specific field validation
-  - Evidence structure validation
-  - Reporter information validation
-  - Timestamp format validation
-
-- **Python 3.12 Support**: Added support for Python 3.12
-
-### Changed
-
-- **Model Classes**: Updated all model classes to use `category` instead of `class_`
-  - `XARFReport.category` replaces `XARFReport.class_`
-  - `MessagingReport.category` replaces `MessagingReport.class_`
-  - `ConnectionReport.category` replaces `ConnectionReport.class_`
-  - `ContentReport.category` replaces `ContentReport.class_`
-
-- **Parser Validation**: Updated validation logic to check for `"category"` field
-  - Old reports with `"class"` will fail validation
-  - Use migration helper to convert legacy reports
-
-- **Field Access**: Removed `class_` aliasing workaround in favor of clean `category` field
-  - Pydantic models now use `category` directly
-  - No more Python keyword conflicts
-
-### Removed
-
-- **Converter Module**: Temporarily removed `xarf.converter` module for XARF version conversion
-  - Will be redesigned and re-added in a future release
-  - Users needing conversion should implement temporary solution (see migration guide)
-
-- **Python 3.7 Support**: Dropped support for Python 3.7 (EOL June 2023)
-  - Minimum Python version is now 3.8
-
-### Fixed
-
-- Improved error messages for validation failures
-- Better handling of optional fields
-- Fixed timezone handling for timestamps
-
-### Documentation
-
-- Added comprehensive [MIGRATION_GUIDE.md](docs/MIGRATION_GUIDE.md) with:
-  - Step-by-step migration instructions
-  - Before/after code examples
-  - Common migration issues and solutions
-  - Database migration examples
-  - Backward compatibility patterns
-
-- Updated [README.md](README.md) with:
-  - Generator usage examples
-  - Updated JSON examples using `"category"`
-  - `on_behalf_of` examples
-  - Security best practices
-  - Links to xarf.org website
-  - Updated feature matrix
-
-### Security
-
-- Enhanced input validation for all fields
-- Added size limits for evidence payloads (5MB per item, 15MB total)
-- Improved email validation for reporter contact fields
-- Better handling of untrusted input in strict mode
-
----
-
-## [3.0.0] - 2023-11-15
-
-### Added
-- Initial XARF v3 support
-- Basic JSON parsing and validation
-- Support for common abuse types
-- Python 3.8+ compatibility
+- **All 7 categories fully implemented**: messaging, connection, content, infrastructure, copyright, vulnerability, reputation — with Pydantic v2 discriminated union models covering all 32 report types.
+- **Schema-driven validation**: validation rules are derived from the official xarf-spec JSON schemas via `jsonschema` + `referencing` (Draft 2020-12); no hardcoded type or field lists.
+- **`SchemaRegistry`**: programmatic access to schema-derived categories, types, and field metadata. Exposed as the `schema_registry` module-level singleton.
+- **`SchemaValidator`**: AJV-equivalent JSON Schema validator with strict mode (promotes `x-recommended` fields to required before validation).
+- **`create_evidence()`**: helper that computes hash, base64-encodes payload, and records size — supports `sha256`, `sha512`, `sha1`, `md5`.
+- **`show_missing_optional`** parameter on `parse()` and `create_report()`: populates `result.info` with missing recommended and optional field details.
+- **v3 backward compatibility** fully integrated into `parse()`: automatic detection and conversion with `XARFv3DeprecationWarning`.
+- **`python -m xarf fetch-schemas`**: CLI command to pull fresh schemas from the xarf-spec GitHub release.
+- **`python -m xarf check-schema-updates`**: CLI command to report whether a newer spec version is available.
+- **`py.typed` marker** (PEP 561): downstream `mypy` picks up types when the package is installed.
+- **Bundled schemas**: schemas ship inside the wheel, pinned to spec v4.2.0, loaded via `importlib.resources`.
 
 ### Changed
-- Migrated from XARF v2 to v3 format
-
----
-
-## [2.1.0] - 2023-06-10
-
-### Added
-- Evidence attachment support
-- Custom field handling
-
-### Fixed
-- Timestamp parsing issues
-- Validation edge cases
-
----
-
-## [2.0.0] - 2023-03-20
-
-### Added
-- Complete rewrite for XARF v2
-- Pydantic-based models
-- JSON Schema validation
-- Comprehensive test suite
-
----
-
-## [1.0.0] - 2022-09-15
-
-### Added
-- Initial release
-- Basic XARF v1 parsing
-- Limited validation
-
----
-
-## Migration Guides
-
-- **v3.x → v4.0.0**: See [MIGRATION_GUIDE.md](docs/MIGRATION_GUIDE.md)
-- **v2.x → v3.x**: Contact support for legacy migration assistance
-
-## Links
-
-- [XARF v4 Specification](https://xarf.org/docs/specification/)
-- [GitHub Repository](https://github.com/xarf/xarf-parser-python)
-- [PyPI Package](https://pypi.org/project/xarf-parser/)
-- [Issue Tracker](https://github.com/xarf/xarf-parser-python/issues)
-- [XARF Website](https://xarf.org)
-
-## Versioning
 
-This project follows [Semantic Versioning](https://semver.org/):
-- **MAJOR** version for incompatible API changes
-- **MINOR** version for backwards-compatible functionality additions
-- **PATCH** version for backwards-compatible bug fixes
+- **Tooling**: switched to `ruff` (replaces `black`, `isort`, `flake8`); `mypy --strict`; `bandit`; `pytest` with 80% coverage threshold.
+- **`v3_compat.py`**: aligned type mappings exactly with the JS reference implementation (8 types, PascalCase + lowercase variants for each).
+- **`models.py`**: replaced with result dataclasses (`ParseResult`, `CreateReportResult`, `ValidationError`, `ValidationWarning`) and base Pydantic models (`XARFReport`, `XARFEvidence`, `ContactInfo`).
 
-Alpha releases use suffix: `4.0.0a1`, `4.0.0a2`, etc.
-Beta releases use suffix: `4.0.0b1`, `4.0.0b2`, etc.
+[Unreleased]: https://github.com/xarf/xarf-python/compare/v0.1.0...HEAD
+[0.1.0]: https://github.com/xarf/xarf-python/releases/tag/v0.1.0
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
index 577e221..a2bc02f 100644
--- a/CODE_OF_CONDUCT.md
+++ b/CODE_OF_CONDUCT.md
@@ -1,48 +1,77 @@
-Contributor Covenant Code of Conduct
-Our Pledge
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
 We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation.
+
 We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.
-Our Standards
+
+## Our Standards
+
 Examples of behavior that contributes to a positive environment for our community include:
 
-* Demonstrating empathy and kindness toward other people
-* Being respectful of differing opinions, viewpoints, and experiences
-* Giving and gracefully accepting constructive feedback
-* Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
-* Focusing on what is best not just for us as individuals, but for the overall community
+- Demonstrating empathy and kindness toward other people
+- Being respectful of differing opinions, viewpoints, and experiences
+- Giving and gracefully accepting constructive feedback
+- Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
+- Focusing on what is best not just for us as individuals, but for the overall community
 
 Examples of unacceptable behavior include:
 
-* The use of sexualized language or imagery, and sexual attention or advances of any kind
-* Trolling, insulting or derogatory comments, and personal or political attacks
-* Public or private harassment
-* Publishing others’ private information, such as a physical or email address, without their explicit permission
-* Other conduct which could reasonably be considered inappropriate in a professional setting
+- The use of sexualized language or imagery, and sexual attention or advances of any kind
+- Trolling, insulting or derogatory comments, and personal or political attacks
+- Public or private harassment
+- Publishing others' private information, such as a physical or email address, without their explicit permission
+- Other conduct which could reasonably be considered inappropriate in a professional setting
+
+## Enforcement Responsibilities
 
-Enforcement Responsibilities
 Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful.
+
 Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.
-Scope
+
+## Scope
+
 This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.
-Enforcement
-Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at [INSERT CONTACT METHOD]. All complaints will be reviewed and investigated promptly and fairly.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at admin@xarf.org. All complaints will be reviewed and investigated promptly and fairly.
+
 All community leaders are obligated to respect the privacy and security of the reporter of any incident.
-Enforcement Guidelines
+
+## Enforcement Guidelines
+
 Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:
-1. Correction
-Community Impact: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community.
-Consequence: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.
-2. Warning
-Community Impact: A violation through a single incident or series of actions.
-Consequence: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.
-3. Temporary Ban
-Community Impact: A serious violation of community standards, including sustained inappropriate behavior.
-Consequence: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.
-4. Permanent Ban
-Community Impact: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals.
-Consequence: A permanent ban from any sort of public interaction within the community.
-Attribution
-This Code of Conduct is adapted from the Contributor Covenant, version 2.1, available at https://www.contributor-covenant.org/version/2/1/code_of_conduct.html.
-Community Impact Guidelines were inspired by Mozilla’s code of conduct enforcement ladder.
-For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations.
 
+### 1. Correction
+
+**Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community.
+
+**Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.
+
+### 2. Warning
+
+**Community Impact**: A violation through a single incident or series of actions.
+
+**Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.
+
+### 3. Temporary Ban
+
+**Community Impact**: A serious violation of community standards, including sustained inappropriate behavior.
+
+**Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.
+
+### 4. Permanent Ban
+
+**Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals.
+
+**Consequence**: A permanent ban from any sort of public interaction within the community.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org/), version 2.1, available at https://www.contributor-covenant.org/version/2/1/code_of_conduct.html.
+
+Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity).
+
+For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f1c710c..f9d243e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,279 +1,286 @@
-# Contributing to XARF Python Parser
+# Contributing to XARF Python Library
 
-Thank you for your interest in contributing to the XARF v4 Python parser! This document provides guidelines for contributing to the implementation.
+Thank you for your interest in contributing to the XARF Python library! We welcome contributions from the community and appreciate your help in making this project better.
 
-## 🤝 How to Contribute
+## Code of Conduct
 
-### Reporting Issues
-- **Bug Reports**: Parser errors, validation issues, or unexpected behavior
-- **Feature Requests**: New validation rules, performance improvements, or API enhancements
-- **Parser Support**: Help with implementing new XARF classes or types
+This project adheres to the [Contributor Covenant Code of Conduct](CODE_OF_CONDUCT.md). By participating, you are expected to uphold this code. Please report unacceptable behavior to admin@xarf.org.
 
-### Contributing Code
-1. **Fork** the repository
-2. **Create** a feature branch (`git checkout -b feature/validation-improvement`)
-3. **Make** your changes following our coding standards
-4. **Add tests** for new functionality
-5. **Run** the test suite and linting
-6. **Submit** a pull request
+## How to Contribute
 
-## 🛠️ Development Setup
+### Reporting Bugs
+
+If you find a bug, please create an issue on GitHub with the following information:
+
+- **Clear title and description** of the issue
+- **Steps to reproduce** the problem
+- **Expected behavior** vs. **actual behavior**
+- **Code samples** or test cases that demonstrate the issue
+- **Version** of the library you're using
+- **Python version** and operating system
+
+### Suggesting Features
+
+We welcome feature requests! Please create an issue with:
+
+- **Clear description** of the feature
+- **Use case** explaining why this feature would be useful
+- **Example code** showing how the feature might work
+- **Compatibility considerations** with the XARF specification
+
+### Pull Requests
+
+We actively welcome pull requests! Here's how to contribute:
+
+1. **Fork the repository** and create your branch from `main`
+2. **Make your changes** following our coding standards
+3. **Add tests** for any new functionality
+4. **Ensure all tests pass** and coverage remains >80%
+5. **Update documentation** as needed
+6. **Submit a pull request** with a clear description of changes
+
+## Development Setup
 
 ### Prerequisites
-- Python 3.8+
-- Git
 
-### Installation
-```bash
-# Clone your fork
-git clone https://github.com/YOUR_USERNAME/xarf-parser-python.git
-cd xarf-parser-python
+- **Python**: 3.10 or higher
+- **Git**: Latest stable version
 
-# Create virtual environment
-python -m venv venv
-source venv/bin/activate  # On Windows: venv\Scripts\activate
+### Getting Started
 
-# Install development dependencies
-pip install -e ".[dev]"
+1. **Clone your fork:**
 
-# Install pre-commit hooks
-pre-commit install
-```
+   ```bash
+   git clone https://github.com/YOUR_USERNAME/xarf-python.git
+   cd xarf-python
+   ```
+
+2. **Create a virtual environment and install dependencies:**
+
+   ```bash
+   python -m venv venv
+   source venv/bin/activate  # On Windows: venv\Scripts\activate
+   pip install -e ".[dev]"
+   ```
+
+3. **Install pre-commit hooks:**
+
+   ```bash
+   pre-commit install
+   ```
+
+4. **Run tests:**
+
+   ```bash
+   pytest
+   ```
+
+### Development Commands
+
+- `pytest` — Run the test suite
+- `pytest --cov=xarf` — Generate code coverage report
+- `ruff check xarf/` — Lint
+- `ruff check --fix xarf/` — Auto-fix lint issues
+- `ruff format xarf/` — Format code
+- `ruff format --check xarf/` — Check code formatting
+- `mypy --strict xarf/` — Run type checking
+- `bandit -r xarf/` — Security scanning
+
+## Testing Requirements
+
+All contributions must maintain or improve test coverage:
+
+- **Coverage threshold**: 80% overall — enforced by `pytest-cov`
+- **Unit tests**: Required for all new functions and classes
+- **Integration tests**: Required for parser and generator functionality
+- **Test file location**: Tests should be in the `tests/` directory
+- **No schema mocking**: tests must use real schemas loaded from the bundle
 
 ### Running Tests
+
 ```bash
-# Run full test suite
-pytest
+pytest                     # Run all tests
+pytest -v                  # Verbose output
+pytest --cov=xarf          # With coverage report
+pytest tests/test_parse.py # Run a specific file
+```
+
+### Writing Tests
 
-# Run with coverage
-pytest --cov=xarf
+We use pytest. Example test structure:
 
-# Run specific test file
-pytest tests/test_parser.py
+```python
+from xarf import parse
+
+def test_parse_valid_report() -> None:
+    result = parse({
+        # ... valid XARF data
+    })
+
+    assert not result.errors
+    assert result.report is not None
+    assert result.report.category == "connection"
+    assert result.report.type == "ddos"
+
+def test_parse_returns_errors_for_invalid_data() -> None:
+    result = parse({})
 
-# Run with verbose output
-pytest -v
+    assert len(result.errors) > 0
 ```
 
-### Code Quality
-```bash
-# Format code
-black xarf/
-isort xarf/
+## Code Style Guidelines
+
+### Python Standards
+
+- **Language version**: Python 3.10+
+- **Type annotations**: required on all public functions and methods
+- **Docstrings**: Google style for all public APIs (`Args:`, `Returns:`, `Raises:`, `Example:`)
+- **Strict mypy**: all code must pass `mypy --strict xarf/`
+
+See [pyproject.toml](pyproject.toml) for the full `ruff` and `mypy` configuration.
 
-# Lint code
-flake8 xarf/
+### Naming Conventions
 
-# Type checking
-mypy xarf/
+- **Functions / methods**: `snake_case` (e.g., `parse`, `create_report`, `create_evidence`)
+- **Constants**: `UPPER_SNAKE_CASE` (e.g., `SPEC_VERSION`)
+- **Classes**: `PascalCase` (e.g., `ParseResult`, `XARFReport`, `SchemaRegistry`)
+- **Type aliases**: `PascalCase` (e.g., `AnyXARFReport`, `ConnectionReport`)
+
+### Code Organization
+
+- **One module per file** for main components
+- **Related types** grouped in category-specific files (`types_messaging.py`, etc.)
+- **Export from `__init__.py`** for public API — use `xarf-javascript/src/index.ts` as the reference for which names to expose
+
+### Formatting and Linting
+
+We use `ruff` for both formatting and linting. Configuration lives in [pyproject.toml](pyproject.toml).
+
+```bash
+ruff format xarf/          # Auto-format
+ruff format --check xarf/  # Check formatting
+ruff check xarf/           # Lint
+ruff check --fix xarf/     # Auto-fix linting issues
 ```
 
-## 📋 Contribution Guidelines
+A pre-commit hook runs both automatically on staged files.
 
-### Code Standards
-- **Follow PEP 8** style guidelines
-- **Use type hints** for all functions and methods
-- **Write docstrings** for public APIs
-- **Keep functions focused** and single-purpose
-- **Use descriptive variable names**
+### Documentation
 
-### Testing Requirements
-- **Unit tests** for all new functionality
-- **Integration tests** for end-to-end scenarios
-- **Test edge cases** and error conditions
-- **Maintain >90% test coverage**
-- **Mock external dependencies**
+- **Google-style docstrings** for all public APIs
+- **Type annotations** on all parameters and return values
+- **Inline comments** for non-obvious logic
+- **README updates** for new features
 
-### API Design
-- **Consistent naming** with existing patterns
-- **Clear error messages** with actionable information
-- **Backward compatibility** when possible
-- **Performance considerations** for high-volume use
+## Commit Message Conventions
 
-## 🏗️ Architecture Overview
+We follow the [Conventional Commits](https://www.conventionalcommits.org/) specification:
 
-### Core Components
 ```
-xarf/
-├── __init__.py          # Public API exports
-├── parser.py            # Main XARFParser class
-├── models.py            # Pydantic data models
-├── exceptions.py        # Custom exception classes
-└── validators.py        # Validation logic (future)
+<type>(<scope>): <subject>
+
+<body>
+
+<footer>
 ```
 
-### Design Principles
-- **Pydantic models** for data validation and serialization
-- **Modular design** for easy extension
-- **Clear separation** between parsing and validation
-- **Comprehensive error handling** with context
+### Types
 
-## 🎯 Priority Areas
+- `feat`: New feature
+- `fix`: Bug fix
+- `docs`: Documentation changes
+- `style`: Code style changes (formatting, etc.)
+- `refactor`: Code refactoring without feature changes
+- `test`: Adding or updating tests
+- `chore`: Maintenance tasks, dependency updates
 
-### High Priority (Alpha → Beta)
-- **Complete class coverage** (infrastructure, copyright, vulnerability, reputation)
-- **Enhanced validation** beyond basic schema checking
-- **Performance optimization** for high-volume processing
-- **XARF v3 compatibility** layer
+### Examples
 
-### Medium Priority (Beta → Stable)
-- **Advanced validation rules** (business logic, cross-field validation)
-- **CLI tools** for command-line usage
-- **Integration examples** with popular security tools
-- **Comprehensive benchmarking**
+```
+feat(parser): add support for reputation/threat_intelligence reports
 
-### Future Enhancements
-- **Evidence handling** (compression, validation)
-- **Bulk processing** utilities
-- **Plugin architecture** for custom validators
-- **Async parsing** support
+Implement Pydantic model and schema-driven validation for the
+threat_intelligence report type. Includes shared test samples.
 
-## 📝 Code Style Examples
+Closes #123
+```
 
-### Function Documentation
-```python
-def parse_report(self, json_data: Union[str, Dict[str, Any]]) -> XARFReport:
-    """Parse XARF report from JSON data.
-    
-    Args:
-        json_data: JSON string or dictionary containing XARF report data
-        
-    Returns:
-        XARFReport: Parsed and validated report object
-        
-    Raises:
-        XARFParseError: If JSON parsing fails
-        XARFValidationError: If validation fails in strict mode
-        
-    Example:
-        >>> parser = XARFParser()
-        >>> report = parser.parse('{"xarf_version": "4.0.0", ...}')
-        >>> print(report.class_)
-        'messaging'
-    """
 ```
+fix(schema_validator): deduplicate errors from master and core schemas
 
-### Error Handling
-```python
-try:
-    report = parser.parse(json_data)
-except XARFValidationError as e:
-    logger.error(f"Validation failed: {e}")
-    for error in e.errors:
-        logger.debug(f"  - {error}")
-    raise
-except XARFParseError as e:
-    logger.error(f"Parse failed: {e}")
-    raise
+Errors reported by both the master schema and the core schema for the
+same field were appearing twice in ValidationError lists.
+
+Fixes #456
 ```
 
-### Test Structure
-```python
-class TestMessagingReports:
-    """Test parsing of messaging class reports."""
-    
-    def test_valid_spam_report(self):
-        """Test parsing of valid spam report."""
-        report_data = {
-            "xarf_version": "4.0.0",
-            # ... complete valid data
-        }
-        
-        parser = XARFParser()
-        report = parser.parse(report_data)
-        
-        assert isinstance(report, MessagingReport)
-        assert report.class_ == "messaging"
-        assert report.type == "spam"
-    
-    def test_missing_required_field(self):
-        """Test handling of missing required fields."""
-        invalid_data = {"xarf_version": "4.0.0"}  # Missing required fields
-        
-        parser = XARFParser(strict=True)
-        
-        with pytest.raises(XARFValidationError) as exc_info:
-            parser.parse(invalid_data)
-        
-        assert "Missing required fields" in str(exc_info.value)
+```
+docs(readme): update schema management section
 ```
 
-## 🔍 Testing Guidelines
+## Pull Request Process
 
-### Test Categories
-- **Unit Tests**: Individual functions and methods
-- **Integration Tests**: Full parsing workflows
-- **Validation Tests**: Schema and business rule validation
-- **Performance Tests**: Benchmarking and profiling
+1. **Update documentation** for any changed functionality
+2. **Add tests** covering your changes
+3. **Ensure all tests pass**: `pytest`
+4. **Verify coverage**: `pytest --cov=xarf`
+5. **Check linting**: `ruff check xarf/`
+6. **Verify formatting**: `ruff format --check xarf/`
+7. **Run type checking**: `mypy --strict xarf/`
+8. **Update CHANGELOG.md** if applicable
+9. **Create pull request** with clear description
 
-### Sample Test Data
-```python
-# Use realistic but anonymized data
-SAMPLE_SPAM_REPORT = {
-    "xarf_version": "4.0.0",
-    "report_id": "00000000-0000-0000-0000-000000000001",
-    "timestamp": "2024-01-01T12:00:00Z",
-    "reporter": {
-        "org": "Test Security Provider",
-        "contact": "test@example.com",
-        "type": "automated"
-    },
-    "source_identifier": "192.0.2.1",  # RFC 3330 test IP
-    "category": "messaging",
-    "type": "spam",
-    "evidence_source": "spamtrap"
-}
-```
+### Pull Request Template
+
+Your PR description should include:
+
+- **What**: Brief description of changes
+- **Why**: Motivation and context
+- **How**: Implementation approach
+- **Testing**: How you tested the changes
+- **Breaking changes**: Any breaking changes (if applicable)
+- **Related issues**: Link to related issues
+
+### Code Review
+
+All pull requests require review before merging:
+
+- At least **one approval** from a maintainer
+- All **CI checks must pass**
+- **No unresolved discussions**
+- **Merge conflicts resolved**
+
+## XARF Specification Compliance
+
+All implementations must conform to the [XARF specification](https://xarf.org):
+
+- Parse all **required fields**
+- Validate **data types** correctly
+- Support all **standard report types**
+- Handle **optional fields** appropriately
+- Implement proper **error handling**
+- Maintain **backward compatibility** when possible
+
+## Release Process
+
+Releases are managed by maintainers:
+
+1. Version bumped following [Semantic Versioning](https://semver.org/)
+2. CHANGELOG.md updated with changes
+3. Git tag created for the version
+4. Package published to PyPI
+
+## Getting Help
+
+- **Documentation**: Check the [README](README.md) and code comments
+- **Issues**: Search existing issues or create a new one
+- **Discussions**: Use GitHub Discussions for questions
+- **Email**: Contact the maintainers at contact@xarf.org
+
+## License
+
+By contributing to XARF Python Library, you agree that your contributions will be licensed under the [MIT License](LICENSE).
+
+---
 
-## 💬 Community Guidelines
-
-### Pull Request Process
-1. **Clear description** of changes and motivation
-2. **Reference issues** when applicable
-3. **Include tests** for new functionality
-4. **Update documentation** if needed
-5. **Respond to feedback** promptly
-
-### Review Criteria
-- **Code quality** and style consistency
-- **Test coverage** and quality
-- **Performance impact** consideration
-- **API compatibility** maintenance
-- **Documentation** completeness
-
-## 📞 Getting Help
-
-- **GitHub Issues**: Bug reports and feature requests
-- **GitHub Discussions**: Questions and design discussions
-- **Code Review**: Request feedback on draft PRs
-- **Email**: contact@xarf.org for sensitive issues
-
-## 🏆 Recognition
-
-Contributors are recognized through:
-- Git commit history and contributor graphs
-- Release notes and changelogs
-- PyPI package metadata
-- Conference presentations (with permission)
-
-## 🚀 Release Process
-
-### Alpha Releases (4.0.0a1, 4.0.0a2, ...)
-- **Frequent releases** with new features
-- **Breaking changes** allowed
-- **Community feedback** encouraged
-- **PyPI pre-release** tags
-
-### Beta Releases (4.0.0b1, ...)
-- **Feature complete** for initial scope
-- **API stabilization** focus
-- **No breaking changes** without major justification
-- **Performance optimization**
-
-### Stable Releases (4.0.0, 4.0.1, ...)
-- **Production ready** quality
-- **Semantic versioning** strictly followed
-- **Long-term support** commitment
-- **Comprehensive documentation**
-
-Thank you for helping make XARF parsing reliable and efficient! 🐍
\ No newline at end of file
+Thank you for contributing to XARF! Your efforts help make abuse reporting more effective and standardized across the internet.
diff --git a/PIPELINE_SUMMARY.md b/PIPELINE_SUMMARY.md
deleted file mode 100644
index 1713485..0000000
--- a/PIPELINE_SUMMARY.md
+++ /dev/null
@@ -1,232 +0,0 @@
-# XARF Python Parser - CI/CD Pipeline Implementation Summary
-
-**Created**: 2025-11-20  
-**Based on**: abusix-parsers quality standards  
-**Status**: ✅ Complete and Ready for Use
-
----
-
-## Overview
-
-Comprehensive CI/CD pipeline for the XARF Python library implementing enterprise-grade quality checks, security scanning, multi-version testing, and automated PyPI publishing.
-
-## Files Created (9 Total)
-
-### GitHub Actions Workflows (5)
-1. **continuous-integration.yml** - Main CI orchestration
-2. **quality-checks.yml** - 8 parallel quality checks (exists)
-3. **security-scan.yml** - Weekly security scanning (exists)
-4. **test-suite.yml** - Multi-version testing
-5. **publish-pypi.yml** - PyPI publishing
-
-### Configuration Files (2)
-6. **.flake8** - Linting configuration
-7. **.github/trivy.yaml** - Security scanner config
-
-### Documentation (2)
-8. **docs/ci-cd-pipeline-design.md** - Complete design documentation
-9. **.github/QUICK_START.md** - Setup and usage guide
-
-## Pipeline Capabilities
-
-### Quality Gate (8 Checks)
-- ✅ **Import Ordering** (isort) - Blocking
-- ✅ **Code Formatting** (black) - Blocking
-- ✅ **Code Style** (flake8) - Blocking
-- ✅ **Security Scan** (bandit) - Blocking
-- ⚠️ **Type Checking** (mypy) - Warning only
-- ⚠️ **Docstrings** (pydocstyle) - Warning only
-- ⚠️ **Complexity** (radon) - Warning only
-- ⚠️ **Coverage** (pytest-cov 80%) - Warning only
-
-### Security Scanning (3 Tools)
-- 🔒 **pip-audit** - Dependency CVE scanning
-- 🔒 **bandit** - Code security analysis
-- 🔒 **trivy** - Secrets & vulnerability detection
-- 📊 SARIF integration with GitHub Security
-- 🤖 Automated issue creation
-- 📁 90-day artifact retention
-
-### Testing (7 Configurations)
-- 🐍 Python 3.8, 3.9, 3.10, 3.11, 3.12
-- 💻 Ubuntu (all versions)
-- 🍎 macOS (3.12 only)
-- 🪟 Windows (3.12 only)
-- 📈 80% coverage threshold
-- 🔄 Codecov integration
-- 📊 JUnit XML results
-
-### Publishing (2 Targets)
-- 📦 **PyPI** (production) - On GitHub release
-- 🧪 **Test PyPI** - Manual testing
-- 🔐 Trusted publishing (no API tokens)
-- ✅ Distribution verification
-- 💬 Automated release comments
-
-## Key Features
-
-### Performance
-- ⚡ 15 parallel jobs (8 quality + 7 test matrix)
-- 🚀 ~15-23 minute total CI time
-- 💾 Pip caching (~60% faster builds)
-- 🔄 Retry logic for reliability
-
-### Security
-- 📅 Weekly automated scans (Monday 9 AM UTC)
-- 🎯 SARIF results in Security tab
-- 🔍 Secret detection (AWS, API keys, GitHub tokens)
-- 📋 Automated issue management
-
-### Developer Experience
-- 💬 PR status comments
-- 📊 Visual status tables
-- 📦 Downloadable artifacts
-- 📝 Detailed error logs
-- ⚠️ Non-blocking warnings
-
-### Compliance
-- ✅ Based on abusix-parsers standards
-- ✅ Industry-standard tool versions
-- ✅ 80% coverage requirement
-- ✅ Multi-platform testing
-- ✅ Security best practices
-
-## Workflow Triggers
-
-### Continuous Integration
-```
-Push/PR → continuous-integration.yml
-  ├─ quality-checks.yml (8 parallel)
-  ├─ test-suite.yml (7 parallel)
-  └─ ci-summary (posts to PR)
-```
-
-### Security Scan
-```
-Every Monday 9 AM UTC (or manual)
-  ├─ pip-audit
-  ├─ bandit  
-  ├─ trivy
-  └─ Create issue if failures
-```
-
-### Publishing
-```
-GitHub Release → publish-pypi.yml
-  ├─ Build distributions
-  ├─ Verify with twine
-  └─ Publish to PyPI
-```
-
-## Setup Requirements
-
-1. **GitHub Environments**: Create `test-pypi` and `pypi`
-2. **PyPI Trusted Publishing**: Configure OIDC publisher
-3. **Branch Protection**: Require status checks
-4. **Optional**: CODECOV_TOKEN for private repos
-
-## Tool Versions
-
-All based on abusix-parsers standards:
-- isort: 5.13.2
-- black: 24.3.0
-- flake8: 7.0.0
-- bandit: 1.7.8
-- mypy: 1.9.0
-- pydocstyle: 6.3.0
-- radon: 6.0.1
-- pip-audit: 2.7.2
-- trivy: 0.28.0
-- pytest: 7.0+
-- pytest-cov: 4.0+
-
-## Documentation
-
-### Primary Documents
-1. **ci-cd-pipeline-design.md** - Complete pipeline design and architecture
-2. **QUICK_START.md** - Setup and testing guide
-3. **WORKFLOWS_README.md** - Workflow reference
-
-### Additional Resources
-- Workflow files include inline documentation
-- Configuration files have explanatory comments
-- All tools configured via pyproject.toml
-
-## Memory Key
-
-**Storage Location**: 
-```
-/Users/tknecht/Projects/xarf/xarf-parser-python/docs/ci-cd-pipeline-design.md
-```
-
-**Memory Key**: `xarf-python/workflows`
-
-**Quick Reference**: 
-```
-/Users/tknecht/Projects/xarf/xarf-parser-python/PIPELINE_SUMMARY.md
-```
-
-## Testing the Pipeline
-
-### Quick Test
-```bash
-# Create test PR
-git checkout -b test-ci
-echo "# test" >> README.md
-git commit -am "Test CI"
-git push origin test-ci
-# Create PR on GitHub
-```
-
-### Manual Security Scan
-```
-GitHub → Actions → Security Scan → Run workflow
-```
-
-### Test PyPI Publishing
-```
-GitHub → Actions → Publish to PyPI → Run workflow
-Select: ☑ Publish to Test PyPI
-```
-
-## Monitoring
-
-- **Actions Tab**: View all workflow runs
-- **Security Tab**: SARIF scan results
-- **PR Comments**: Automated CI status
-- **Issues**: Automated security alerts
-
-## Maintenance Schedule
-
-- **Weekly**: Review security scan results
-- **Monthly**: Review coverage and warnings
-- **Quarterly**: Update tool versions
-
-## Success Metrics
-
-- ✅ All 5 workflows created
-- ✅ 8 quality checks configured
-- ✅ 3 security scans active
-- ✅ 7-platform test matrix
-- ✅ PyPI publishing ready
-- ✅ Documentation complete
-- ✅ Quick start guide available
-- ✅ Configuration files created
-
-## Next Steps
-
-1. Push workflows to GitHub
-2. Enable GitHub environments
-3. Configure PyPI trusted publishing
-4. Enable branch protection
-5. Test with sample PR
-6. Monitor first security scan
-
----
-
-**Pipeline Status**: ✅ Production Ready  
-**Documentation**: ✅ Complete  
-**Testing**: ⏳ Awaiting GitHub setup  
-**Deployment**: ⏳ Awaiting PyPI configuration
-
-**All files are located at**: `/Users/tknecht/Projects/xarf/xarf-parser-python/`
diff --git a/README.md b/README.md
index 34a0fda..25f6889 100644
--- a/README.md
+++ b/README.md
@@ -1,538 +1,307 @@
-# XARF v4 Python Parser
+# XARF Python Library
 
-[![CI](https://github.com/xarf/xarf-python/actions/workflows/ci.yml/badge.svg)](https://github.com/xarf/xarf-python/actions/workflows/ci.yml)
-[![PyPI version](https://badge.fury.io/py/xarf-parser.svg)](https://pypi.org/project/xarf-parser/)
-[![Python versions](https://img.shields.io/pypi/pyversions/xarf-parser.svg)](https://pypi.org/project/xarf-parser/)
+![XARF Spec](https://img.shields.io/badge/XARF%20Spec-v4.2.0-blue)
+[![PyPI version](https://badge.fury.io/py/xarf.svg)](https://pypi.org/project/xarf/)
 [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
-[![Security Policy](https://img.shields.io/badge/Security-Policy-blue.svg)](SECURITY.md)
-
-A Python library for parsing, validating, and generating XARF v4 (eXtended Abuse Reporting Format) reports.
-
-## 🚀 Status: Alpha Development
-
-This library is currently in **alpha** development (v4.0.0-alpha). It supports the core XARF v4 categories with full parsing, validation, and generation capabilities.
+[![CI](https://github.com/xarf/xarf-python/actions/workflows/ci.yml/badge.svg)](https://github.com/xarf/xarf-python/actions/workflows/ci.yml)
 
-### Supported Categories
+A Python library for parsing, validating, and generating [XARF v4](https://xarf.org) (eXtended Abuse Reporting Format) reports.
 
-- ✅ **messaging** - Email spam, phishing, social engineering
-- ✅ **connection** - DDoS, port scans, login attacks, brute force
-- ✅ **content** - Phishing sites, malware distribution, defacement, fraud
-- ✅ **infrastructure** - Compromised systems, botnets
-- ✅ **copyright** - DMCA, P2P, cyberlockers
-- ✅ **vulnerability** - CVE reports, misconfigurations
-- ✅ **reputation** - Threat intelligence, blocklists
+## Features
 
----
+- **Parse** XARF reports from JSON with validation and typed results
+- **Generate** XARF-compliant reports with auto-generated metadata (UUIDs, timestamps)
+- **Validate** reports against the official JSON schemas with detailed errors and warnings
+- **Full type support** with Pydantic v2 discriminated union models for all 7 categories
+- **v3 backward compatibility** with automatic detection and conversion
+- **Schema-driven** — validation rules derived from the official [xarf-spec](https://github.com/xarf/xarf-spec) schemas, not hardcoded
 
-## 📦 Installation
+## Installation
 
 ```bash
-# Alpha releases (recommended for early testing)
-pip install xarf-parser==4.0.0a1
-
-# Install from source for latest development
-git clone https://github.com/xarf/xarf-parser-python.git
-cd xarf-parser-python
-pip install -e .
-
-# Install with development dependencies
-pip install -e ".[dev]"
+pip install xarf
 ```
 
----
-
-## ✨ XARF v3 Backwards Compatibility
+## Quick Start
 
-**Automatic conversion from XARF v3 to v4!** This parser transparently handles legacy v3 reports with automatic conversion and deprecation warnings.
+### Parsing a Report
 
 ```python
-from xarf import XARFParser
-
-parser = XARFParser()
-
-# Works seamlessly with both v3 and v4 reports
-v3_report = '''
-{
-  "Version": "3.0.0",
-  "ReporterInfo": {
-    "ReporterOrg": "Security Team",
-    "ReporterOrgEmail": "abuse@example.com"
-  },
-  "Report": {
-    "ReportClass": "Messaging",
-    "ReportType": "spam",
-    ...
-  }
-}
-'''
-
-# Automatically converted to v4 format
-report = parser.parse(v3_report)
-print(f"Category: {report.category}")  # messaging
-```
-
-See the **[Migration Guide](docs/migration-guide.md)** for complete v3 to v4 conversion details.
+from xarf import parse
 
----
-
-## 🔧 Quick Start
-
-### Parsing XARF Reports
-
-```python
-from xarf import XARFParser
-
-# Initialize parser
-parser = XARFParser()
-
-# Parse a XARF report from JSON string
-report_json = '''
-{
-  "xarf_version": "4.0.0",
-  "report_id": "550e8400-e29b-41d4-a716-446655440000",
-  "category": "content",
-  "type": "phishing",
-  "timestamp": "2024-01-15T14:30:00Z",
-  "source_identifier": "203.0.113.45",
-  "reporter": {
-    "org": "Security Team",
-    "contact": "abuse@example.com",
-    "type": "automated"
-  },
-  "url": "https://evil-site.example.com/phishing"
-}
-'''
-
-report = parser.parse(report_json)
-
-# Access report data
-print(f"Category: {report.category}")
-print(f"Type: {report.type}")
-print(f"Source: {report.source_identifier}")
-print(f"URL: {report.url}")
-
-# Validate report structure
-if parser.validate(report_json):
-    print("✅ Report is valid")
+# Missing first_seen and source_port produce validation warnings.
+result = parse({
+    "xarf_version": "4.2.0",
+    "report_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
+    "timestamp": "2024-01-15T10:30:00Z",
+    # "first_seen": "2024-01-15T10:00:00Z",
+    "reporter": {
+        "org": "Security Team",
+        "contact": "abuse@example.com",
+        "domain": "example.com",
+    },
+    "sender": {
+        "org": "Security Team",
+        "contact": "abuse@example.com",
+        "domain": "example.com",
+    },
+    "source_identifier": "192.0.2.100",
+    # "source_port": 1234,
+    "category": "connection",
+    "type": "ddos",
+    "evidence_source": "honeypot",
+    "destination_ip": "203.0.113.10",
+    "protocol": "tcp",
+})
+
+if not result.errors:
+    print(result.report.category)  # 'connection'
 else:
-    print("❌ Validation errors:", parser.get_errors())
+    for e in result.errors:
+        print(f"{e.field}: {e.message}")
 ```
 
-### Generating XARF Reports
+### Creating a Report
 
 ```python
-from xarf.generator import XARFGenerator
-
-# Initialize generator
-generator = XARFGenerator()
-
-# Generate a phishing report
-report = generator.create_content_report(
-    report_type="phishing",
-    source_identifier="203.0.113.45",
-    url="https://evil-phishing.example.com/login",
-    reporter_org="Security Research Lab",
-    reporter_contact="abuse@security-lab.example",
-    description="Phishing site targeting banking customers",
-    evidence=[
-        {
-            "content_type": "image/png",
-            "description": "Screenshot of phishing page",
-            "payload": "iVBORw0KGgoAAAANSUhEUg...",  # base64 encoded
-            "hashes": ["sha256:a665a45920422f9d417e4867efdc4fb8..."]
-        }
-    ]
+from xarf import create_report, create_evidence
+
+# Returns XARFEvidence with content_type, payload (base64), hash, size, description
+evidence = create_evidence(
+    "message/rfc822",
+    raw_email_bytes,
+    description="Original spam email",
+    hash_algorithm="sha256",
 )
 
-# Report is automatically validated and includes:
-# - Auto-generated UUID for report_id
-# - Current timestamp in ISO 8601 format
-# - Proper XARF v4 structure
+# xarf_version, report_id, and timestamp are auto-generated
+result = create_report(
+    category="messaging",
+    type="spam",
+    source_identifier="192.0.2.100",
+    reporter={
+        "org": "Example Security",
+        "contact": "abuse@example.com",
+        "domain": "example.com",
+    },
+    sender={
+        "org": "Example Security",
+        "contact": "abuse@example.com",
+        "domain": "example.com",
+    },
+    evidence_source="spamtrap",
+    description="Spam email detected from source",
+    protocol="smtp",
+    smtp_from="spammer@evil.example.com",
+    evidence=[evidence],
+)
 
-print(report.to_json())
+import json
+print(json.dumps(result.report.model_dump(by_alias=True, exclude_none=True), indent=2))
 ```
 
----
+## API Reference
 
-## 📋 JSON Schema Validation
+### `parse(json_data, strict=False, show_missing_optional=False)`
 
-This parser uses the official JSON schemas from the [XARF specification repository](https://github.com/xarf/xarf-spec/tree/main/schemas/v4):
+Parse and validate a XARF report from JSON. Supports both v4 and v3 (legacy) formats — v3 reports are automatically converted to v4 with deprecation warnings.
 
 ```python
-# Validate against official XARF v4 schema
-from xarf.validation import validate_xarf_report
+from xarf import parse
 
-# Schema URLs reference the spec repository
-validation_result = validate_xarf_report(
-    report_json,
-    schema_url="https://raw.githubusercontent.com/xarf/xarf-spec/main/schemas/v4/xarf-v4-master.json"
-)
+result = parse(json_data, strict=False, show_missing_optional=False)
 ```
 
-## 📋 Features
-
-### Current (Alpha v4.0.0)
-
-- ✅ **Parsing**: Parse XARF v4 JSON reports into Python objects
-- ✅ **Validation**: JSON Schema validation with category-specific rules
-- ✅ **Generation**: Create XARF v4 reports programmatically
-- ✅ **Evidence Handling**: Support for text, images, and binary evidence
-- ✅ **Category Support**: All 7 categories (messaging, connection, content, infrastructure, copyright, vulnerability, reputation)
-- ✅ **Reporter Info**: Including `on_behalf_of` for infrastructure providers
-- ✅ **XARF v3 Compatibility**: Automatic conversion with deprecation warnings
-- ✅ **Pydantic V2**: Modern validation with full type safety
-- ✅ **Python 3.8-3.12**: Full compatibility
-
-### Planned (Beta)
+**Parameters:**
 
-- ⏳ Advanced validation rules (business logic)
-- ⏳ Evidence compression support
-- ⏳ Bulk processing utilities
-- ⏳ Performance optimizations
-- ⏳ CLI tools for validation and conversion
+- `json_data: str | dict` — JSON string or dict containing a XARF report
+- `strict: bool` — Return `report=None` on validation failures (default: `False`)
+- `show_missing_optional: bool` — Populate `result.info` with missing optional field details (default: `False`)
 
-### Future
+**Returns `ParseResult`:**
 
-- 🔮 CLI tools for validation and generation
-- 🔮 SIEM integration adapters
-- 🔮 Report signing and encryption
-- 🔮 Multi-format export (XML, CSV)
+- `report: AnyXARFReport | None` — The parsed report, typed by category (e.g., `DdosReport`, `SpamReport`)
+- `errors: list[ValidationError]` — Structured validation errors (each has `.field`, `.message`, `.value`)
+- `warnings: list[ValidationWarning]` — Structured validation warnings
+- `info: list[dict[str, str]] | None` — Missing optional field info (only when `show_missing_optional=True`)
 
-## 📊 Supported Categories & Types
+### `create_report(*, category, type, source_identifier, reporter, sender, **kwargs)`
 
-### messaging
-- `spam` - Email spam reports
-- `phishing` - Phishing emails
-- `social_engineering` - Social engineering attempts
+Create a validated XARF report with auto-generated metadata. Automatically fills `xarf_version`, `report_id` (UUID v4), and `timestamp` (ISO 8601 UTC).
 
-### connection
-- `ddos` - Distributed denial of service attacks
-- `port_scan` - Port scanning attempts
-- `login_attack` - Brute force/credential attacks
-- `ip_spoofing` - IP address spoofing
-
-### content
-- `phishing_site` - Phishing websites
-- `malware_distribution` - Malware hosting sites
-- `defacement` - Website defacements
-- `spamvertised` - Spam-advertised content
-- `web_hack` - Web application attacks
-
-## 🧪 Examples
-
-### Parse Email Spam Report
 ```python
-import json
-from xarf import XARFParser
+from xarf import create_report
 
-spam_report = {
-    "xarf_version": "4.0.0",
-    "report_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
-    "timestamp": "2024-01-15T10:30:00Z",
-    "reporter": {
-        "org": "Spam Detection Service",
-        "contact": "noreply@spamdetect.example",
-        "type": "automated"
-    },
-    "source_identifier": "192.0.2.100",
-    "category": "messaging",
-    "type": "spam",
-    "evidence_source": "spamtrap",
-    "protocol": "smtp",
-    "smtp_from": "spammer@badexample.com",
-    "subject": "Get Rich Quick Scheme!",
-    "evidence": [
-        {
-            "content_type": "message/rfc822",
-            "description": "Full email message captured by spamtrap",
-            "payload": "UmVjZWl2ZWQ6IGZyb20gYmFkZXhhbXBsZS5jb20="
-        }
-    ],
-    "tags": ["spam:bulk", "category:financial"]
-}
-
-parser = XARFParser()
-report = parser.parse(json.dumps(spam_report))
-print(f"Detected {report.type} from {report.smtp_from}")
+result = create_report(
+    category="messaging",
+    type="spam",
+    source_identifier="192.0.2.100",
+    reporter={"org": "...", "contact": "...", "domain": "..."},
+    sender={"org": "...", "contact": "...", "domain": "..."},
+    # category-specific fields as keyword arguments
+    protocol="smtp",
+)
 ```
 
-### Generate DDoS Report
+**Parameters:**
 
-```python
-from xarf.generator import XARFGenerator
-
-generator = XARFGenerator()
-
-ddos_report = generator.create_connection_report(
-    report_type="ddos",
-    source_identifier="203.0.113.50",
-    destination_ip="198.51.100.10",
-    protocol="tcp",
-    destination_port=80,
-    reporter_org="Network Operations Center",
-    reporter_contact="noc@example.com",
-    attack_type="syn_flood",
-    duration_minutes=45,
-    packet_count=1500000,
-    description="Volumetric SYN flood attack against web services"
-)
+- `category: str` — One of the 7 XARF categories
+- `type: str` — Report type within the category
+- `source_identifier: str` — IP address or identifier of the abuse source
+- `reporter: dict | ContactInfo` — Reporting organization details
+- `sender: dict | ContactInfo` — Sending organization details
+- `strict: bool` — Return `report=None` on validation failures (default: `False`)
+- `show_missing_optional: bool` — Populate `result.info` with missing optional field details (default: `False`)
+- `**kwargs` — Category-specific fields (e.g., `protocol`, `destination_ip`, `smtp_from`)
 
-print(f"Attack lasted {ddos_report.duration_minutes} minutes")
-print(f"Total packets: {ddos_report.packet_count}")
-```
+**Returns `CreateReportResult`:**
 
-### Using `on_behalf_of` for Infrastructure Providers
+- `report: AnyXARFReport | None` — The generated report
+- `errors: list[ValidationError]` — Structured validation errors (`field`, `message`, `value`)
+- `warnings: list[ValidationWarning]` — Structured validation warnings
+- `info: list[dict[str, str]] | None` — Missing optional field info (only when `show_missing_optional=True`)
 
-```python
-from xarf.generator import XARFGenerator
+### `create_evidence(content_type, payload, *, description=None, hash_algorithm="sha256")`
 
-generator = XARFGenerator()
+Create an evidence object with automatic base64 encoding, hashing, and size calculation.
 
-# Infrastructure provider (Abusix) sending report for client (Swisscom)
-report = generator.create_report(
-    category="messaging",
-    report_type="spam",
-    source_identifier="192.0.2.150",
-    reporter_org="Abusix",
-    reporter_contact="reports@abusix.com",
-    on_behalf_of={
-        "org": "Swisscom",
-        "contact": "abuse@swisscom.ch"
-    },
-    description="Spam detected by Swisscom's infrastructure"
-)
+```python
+from xarf import create_evidence
 
-# The report clearly shows Abusix is reporting on behalf of Swisscom
-print(f"Reporter: {report.reporter.org}")
-print(f"On behalf of: {report.reporter.on_behalf_of.org}")
+evidence = create_evidence(
+    "message/rfc822",
+    raw_bytes,
+    description="Original email",
+    hash_algorithm="sha256",
+)
 ```
 
-## 🔍 Validation
-
-The parser performs multiple validation levels:
-
-1. **JSON Schema** - Structure and required fields
-2. **Data Types** - Field type validation
-3. **Business Rules** - Category-specific requirements
-4. **Evidence** - Content type and size validation
+**Parameters:**
 
-```python
-from xarf import XARFParser, XARFValidationError
-
-# Non-strict mode: collect errors without raising exception
-parser = XARFParser(strict=False)
-is_valid = parser.validate(report_json)
-
-if not is_valid:
-    errors = parser.get_errors()
-    for error in errors:
-        print(f"Error: {error}")
-
-# Strict mode: raise exception on first error
-strict_parser = XARFParser(strict=True)
-try:
-    report = strict_parser.parse(report_json)
-except XARFValidationError as e:
-    print(f"Validation failed: {e}")
-    print(f"Errors: {e.errors}")
-```
+- `content_type: str` — MIME type of the evidence (e.g., `'message/rfc822'`)
+- `payload: bytes | str` — The evidence data (strings are UTF-8 encoded)
+- `description: str | None` — Human-readable description
+- `hash_algorithm: Literal["sha256", "sha512", "sha1", "md5"]` — Hash algorithm (default: `"sha256"`)
 
----
+**Returns `XARFEvidence`** with computed `hash`, `size`, and base64-encoded `payload`.
 
-## 🔒 Security Best Practices
+### `schema_registry`
 
-### 1. Always Validate Input
+Access schema-derived validation rules and metadata programmatically.
 
 ```python
-from xarf import XARFParser, XARFValidationError
+from xarf import schema_registry
 
-parser = XARFParser(strict=True)
+# Get all valid categories
+schema_registry.get_categories()
+# {'messaging', 'connection', 'content', 'infrastructure', 'copyright', 'vulnerability', 'reputation'}
 
-def process_external_report(report_json: str):
-    """Safely process XARF report from external source."""
-    try:
-        # Validate before processing
-        if not parser.validate(report_json):
-            raise ValueError(f"Invalid report: {parser.get_errors()}")
+# Get valid types for a category
+schema_registry.get_types_for_category("connection")
+# {'ddos', 'port_scan', 'login_attack', ...}
 
-        report = parser.parse(report_json)
-        # Process validated report
-        return report
+# Check if a category/type combination is valid
+schema_registry.is_valid_type("connection", "ddos")  # True
 
-    except XARFValidationError as e:
-        # Log validation errors
-        log_security_event(f"Invalid XARF report received: {e.errors}")
-        raise
+# Get field metadata including descriptions
+schema_registry.get_field_metadata("confidence")
+# FieldMetadata(description='...', required=False, recommended=True, ...)
 ```
 
-### 2. Limit Evidence Size
+### Validation Details
 
-```python
-MAX_EVIDENCE_SIZE = 5 * 1024 * 1024  # 5MB per evidence item
-MAX_TOTAL_SIZE = 15 * 1024 * 1024   # 15MB total
-
-def validate_evidence_size(report):
-    """Enforce evidence size limits."""
-    total_size = 0
-    for evidence_item in report.evidence or []:
-        item_size = evidence_item.get('size', 0)
+Both `parse()` and `create_report()` run validation internally. Additional behaviors:
 
-        if item_size > MAX_EVIDENCE_SIZE:
-            raise ValueError(f"Evidence item too large: {item_size} bytes")
+- **Unknown fields** trigger warnings (or cause `report=None` in strict mode)
+- **Missing optional fields** can be discovered with `show_missing_optional=True`:
 
-        total_size += item_size
+```python
+result = parse(report, show_missing_optional=True)
 
-    if total_size > MAX_TOTAL_SIZE:
-        raise ValueError(f"Total evidence too large: {total_size} bytes")
+if result.info:
+    for item in result.info:
+        print(f"{item['field']}: {item['message']}")
+        # e.g., "description: OPTIONAL - Human-readable description of the abuse"
+        # e.g., "confidence: RECOMMENDED - Confidence score between 0.0 and 1.0"
 ```
 
-### 3. Verify Evidence Hashes
+**Type narrowing** after parsing — use `isinstance` or check `.category`/`.type`:
 
 ```python
-import hashlib
-import base64
-
-def verify_evidence_hash(evidence_item: dict) -> bool:
-    """Verify evidence payload matches declared hash."""
-    if 'hash' not in evidence_item:
-        return True  # Hash is optional
-
-    # Parse hash format: "algorithm:hexvalue"
-    hash_string = evidence_item['hash']
-    algorithm, expected_hash = hash_string.split(':', 1)
-
-    # Decode base64 payload
-    payload_bytes = base64.b64decode(evidence_item['payload'])
-
-    # Compute hash
-    if algorithm == 'sha256':
-        computed_hash = hashlib.sha256(payload_bytes).hexdigest()
-    elif algorithm == 'sha512':
-        computed_hash = hashlib.sha512(payload_bytes).hexdigest()
-    elif algorithm == 'md5':
-        computed_hash = hashlib.md5(payload_bytes).hexdigest()
-    else:
-        raise ValueError(f"Unsupported hash algorithm: {algorithm}")
-
-    return computed_hash == expected_hash
-```
+from xarf import parse, DdosReport
 
----
+result = parse(json_data)
+if isinstance(result.report, DdosReport):
+    print(result.report.destination_ip)
 
-## 🧬 Development
-
-```bash
-# Setup development environment
-git clone https://github.com/xarf/xarf-parser-python.git
-cd xarf-parser-python
-python -m venv venv
-source venv/bin/activate  # On Windows: venv\Scripts\activate
-pip install -e ".[dev,test]"
-
-# Run tests with coverage
-pytest --cov=xarf --cov-report=term -v tests/
-
-# Run quality checks
-black --check .
-flake8 xarf/ tests/
-mypy xarf/
-
-# Auto-format code
-black .
+# or check attributes directly
+if result.report and result.report.category == "connection":
+    print(result.report.type)
 ```
 
-### CI/CD Workflows
-
-This project uses two GitHub Actions workflows:
-
-1. **CI Workflow** (`.github/workflows/ci.yml`)
-   - Runs on every push to `main` and all pull requests
-   - Tests against Python 3.8, 3.9, 3.10, 3.11, and 3.12
-   - Runs linting checks: black, flake8, mypy
-   - Uploads coverage reports to Codecov
-
-2. **PyPI Publish Workflow** (`.github/workflows/publish.yml`)
-   - Runs on GitHub releases
-   - Manual workflow dispatch with Test PyPI option
-   - Builds distribution packages
-   - Publishes to PyPI or Test PyPI
-
-## 📚 Documentation
-
-- **[XARF v4 Specification](https://xarf.org/docs/specification/)** - Complete technical reference
-- **[v3 to v4 Migration Guide](docs/migration-guide.md)** - Automatic conversion and compatibility
-- **[CHANGELOG](CHANGELOG.md)** - Version history and breaking changes
-- **[Sample Reports](https://xarf.org/docs/types/)** - Real-world examples by category
-- **[Common Fields](https://xarf.org/docs/common-fields/)** - Field reference
-- **[Best Practices](https://xarf.org/docs/best-practices/)** - Implementation guidelines
-
-## 🤝 Contributing
-
-We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
-
-- **Bug Reports**: Use GitHub Issues
-- **Feature Requests**: Discuss in GitHub Discussions
-- **Pull Requests**: Follow our coding standards
-- **Testing**: Add tests for new features
+## v3 Backward Compatibility
 
-## 📄 License
+The library automatically detects XARF v3 reports (by the `Version` field) and converts them to v4 during parsing. Converted reports include `legacy_version: '3'` and deprecation warnings.
 
-MIT License - See [LICENSE](LICENSE) for details.
+```python
+from xarf import parse
 
-## 🔗 Related Projects
+result = parse(v3_report)
 
-- **[xarf-spec](https://github.com/xarf/xarf-spec)** - XARF v4 specification and JSON schemas
-- **[xarf.org](https://xarf.org)** - Official XARF website and documentation
-- **[xarf-javascript](https://github.com/xarf/xarf-javascript)** - JavaScript/TypeScript parser
-- **[xarf-go](https://github.com/xarf/xarf-go)** - Go implementation
-- **[xarf-java](https://github.com/xarf/xarf-java)** - Java implementation
-- **[xarf-csharp](https://github.com/xarf/xarf-csharp)** - C# implementation
+print(result.report.xarf_version)   # '4.2.0'
+print(result.report.category)       # mapped category (e.g., 'messaging')
+print(result.report.legacy_version) # '3'
+# result.warnings includes deprecation notice + conversion details
+```
 
-## 📈 Versioning
+You can also use the low-level utilities directly:
 
-This project follows semantic versioning with alpha/beta releases:
+```python
+from xarf import is_v3_report, convert_v3_to_v4, get_v3_deprecation_warning
 
-- `4.0.0a1`, `4.0.0a2` - Alpha releases (current)
-- `4.0.0b1`, `4.0.0b2` - Beta releases (planned)
-- `4.0.0` - Stable release (Q2 2024)
+if is_v3_report(json_data):
+    v4_data = convert_v3_to_v4(json_data)
+    print(get_v3_deprecation_warning())
+```
 
-## 🎯 Roadmap
+Unknown v3 report types cause a parse error listing the supported types. See [MIGRATION_V3_TO_V4.md](docs/MIGRATION_V3_TO_V4.md) for the full type mapping and migration strategies.
 
-### Alpha Phase (Current - v4.0.0a1)
+## Schema Management
 
-- [x] Core parser foundation
-- [x] JSON schema validation
-- [x] messaging, connection, content categories
-- [x] Generator functionality
-- [x] `on_behalf_of` support
-- [ ] Evidence handling improvements
-- [ ] Performance benchmarks
+This library validates against the official [xarf-spec](https://github.com/xarf/xarf-spec) JSON schemas. Schemas are bundled with the package and pinned to the spec version configured in `pyproject.toml`:
 
-### Beta Phase (Q1 2024)
+```toml
+[tool.xarf]
+spec_version = "v4.2.0"
+```
 
-- [ ] Complete category coverage (all 7)
-- [ ] XARF v3 compatibility layer
-- [ ] Advanced validation rules
-- [ ] CLI tools
-- [ ] Comprehensive documentation
+```bash
+# Re-fetch schemas (e.g., to pick up a newer spec version)
+python -m xarf fetch-schemas
 
-### Stable Release (Q2 2024)
+# Check whether a newer spec version is available
+python -m xarf check-schema-updates
+```
 
-- [ ] Production-ready performance
-- [ ] >95% test coverage
-- [ ] Integration examples
-- [ ] Community feedback incorporated
-- [ ] Performance optimizations
+To update to a newer spec version, change `spec_version` in `pyproject.toml` and run `python -m xarf fetch-schemas`.
 
----
+## Development
 
-## 💬 Support
+```bash
+pytest                        # Run tests
+pytest --cov=xarf             # Run tests with coverage
+ruff check xarf/              # Lint
+ruff format --check xarf/     # Check formatting
+mypy --strict xarf/           # Type-check
+```
 
-- **Documentation**: https://xarf.org
-- **GitHub Issues**: https://github.com/xarf/xarf-parser-python/issues
-- **Discussions**: https://github.com/xarf/xarf-spec/discussions
-- **Email**: contact@xarf.org
+See [CONTRIBUTING.md](CONTRIBUTING.md) for development guidelines.
 
----
+## Links
 
-**Note:** This library implements the official [XARF v4 specification](https://xarf.org/docs/specification/). Always refer to the specification for authoritative technical details.
+- [XARF Specification](https://xarf.org)
+- [GitHub Repository](https://github.com/xarf/xarf-python)
+- [PyPI Package](https://pypi.org/project/xarf/)
+- [Issue Tracker](https://github.com/xarf/xarf-python/issues)
+- [Migration Guide (v3 → v4)](docs/MIGRATION_V3_TO_V4.md)
+- [License (MIT)](LICENSE)
diff --git a/SECURITY.md b/SECURITY.md
index 29e8e7d..bab8fe0 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -1,127 +1,57 @@
 # Security Policy
 
-## Supported Versions
-
-We actively support and provide security updates for the following versions:
-
-| Version | Supported          |
-| ------- | ------------------ |
-| 2.x.x   | :white_check_mark: |
-| 1.x.x   | :x:                |
-
 ## Reporting a Vulnerability
 
-We take security vulnerabilities seriously. If you discover a security issue in XARF Python Parser, please report it responsibly.
+The XARF project takes security vulnerabilities seriously. We appreciate your efforts to responsibly disclose your findings.
 
 ### How to Report
 
 **Please DO NOT report security vulnerabilities through public GitHub issues.**
 
-Instead, report security vulnerabilities by:
+Instead, please report security vulnerabilities by emailing:
 
-1. **Email**: Send details to security@xarf.org
-2. **Private Advisory**: Use GitHub's [private security advisory feature](https://github.com/xarf/xarf-python/security/advisories/new)
+**security@abusix.com**
 
 ### What to Include
 
-When reporting a vulnerability, please include:
+Please include the following information in your report:
 
-- Description of the vulnerability
-- Steps to reproduce the issue
-- Affected versions
-- Potential impact assessment
-- Any proof-of-concept code (if applicable)
-- Your name/handle for credit (optional)
+- Type of vulnerability or security concern
+- Affected specification version(s)
+- Detailed description of the security issue
+- Potential impact on implementations
+- Suggested mitigation or fix (if applicable)
 
 ### Response Timeline
 
-- **Acknowledgment**: Within 48 hours of report
-- **Initial Assessment**: Within 5 business days
-- **Status Updates**: Every 7 days until resolution
-- **Fix Timeline**: Critical issues within 30 days, others within 90 days
-
-### Disclosure Policy
-
-- We will coordinate public disclosure with you
-- Security advisories will be published after fixes are released
-- We credit security researchers in advisories (unless you prefer to remain anonymous)
-
-## Security Features
-
-This project implements multiple security layers:
-
-### Automated Scanning
-
-- **CodeQL Analysis**: Deep semantic security analysis (weekly + on PRs)
-- **Dependency Review**: PR-based vulnerability scanning
-- **Dependabot**: Automated dependency security updates
-- **Secret Scanning**: Detects committed credentials
-- **Bandit**: Python-specific security linter in CI
-
-### Code Quality Gates
-
-All pull requests must pass:
-
-- Static security analysis (Bandit)
-- Type safety checks (MyPy strict mode)
-- Dependency vulnerability scans
-- Code complexity limits (Radon)
-
-### Security Best Practices
+- **Initial Response**: Within 48 hours
+- **Status Update**: Within 7 days
+- **Resolution**: Depends on severity and complexity
 
-Our codebase follows:
+### Security Update Process
 
-- Strict type hints for safety
-- Input validation via Pydantic models
-- No hardcoded credentials
-- Principle of least privilege
-- Regular dependency updates
+1. **Triage**: We'll confirm the vulnerability and assess severity
+2. **Specification Review**: We'll review affected specification sections
+3. **Fix Development**: We'll develop and review proposed changes
+4. **Community Review**: We'll engage with implementation maintainers
+5. **Disclosure**: We'll coordinate disclosure timing with you
+6. **Publication**: We'll publish updated specification with security notes
 
-## Known Security Considerations
+## Vulnerability Disclosure Policy
 
-### XARF Report Processing
+We follow a **coordinated disclosure** model:
 
-When processing XARF reports:
-
-1. **Input Validation**: All reports are validated against JSON schema
-2. **Email Parsing**: Uses python-email-validator for safe email processing
-3. **Date Handling**: Uses python-dateutil for timezone-aware parsing
-4. **No Code Execution**: Parser does not execute any user-provided code
-
-### Dependencies
-
-We actively monitor and update dependencies for security issues:
-
-- Automated Dependabot updates for vulnerabilities
-- Grouped minor/patch updates for development dependencies
-- Individual PRs for production dependency major updates
-
-## Security Updates
-
-Security updates are released as:
-
-- **Critical**: Immediate patch release
-- **High**: Patch release within 7 days
-- **Moderate**: Included in next minor release
-- **Low**: Included in next release cycle
-
-Subscribe to [GitHub Security Advisories](https://github.com/xarf/xarf-python/security/advisories) for notifications.
-
-## Responsible Disclosure
-
-We are committed to working with security researchers under responsible disclosure guidelines:
-
-1. Allow reasonable time for fixes before public disclosure
-2. Avoid privacy violations and data destruction
-3. Do not exploit vulnerabilities beyond proof-of-concept
-4. Respect user privacy and data protection regulations
+1. **Private Disclosure**: Report sent to security@abusix.com
+2. **Acknowledgment**: We confirm receipt within 48 hours
+3. **Investigation**: We investigate with specification experts
+4. **Community Review**: We consult with implementation maintainers
+5. **Specification Update**: We publish updated specification
+6. **Public Disclosure**: We publish advisory 7 days after publication
 
 ## Security Hall of Fame
 
-We recognize security researchers who help improve our security:
-
-<!-- Security researchers will be listed here after coordinated disclosure -->
+We recognize security researchers who responsibly disclose vulnerabilities:
 
----
+<!-- Security researchers will be listed here -->
 
-For general inquiries or questions about this policy, contact: security@xarf.org
+_No vulnerabilities reported yet._
diff --git a/docs/COMPATIBILITY.md b/docs/COMPATIBILITY.md
deleted file mode 100644
index f9aead8..0000000
--- a/docs/COMPATIBILITY.md
+++ /dev/null
@@ -1,417 +0,0 @@
-# XARF Python Library - Backwards Compatibility Guide
-
-**Library Version**: 4.0.0+
-**XARF Specification**: v4.0.0
-**Last Updated**: 2025-01-23
-
-## Overview
-
-The XARF Python library v4.0.0 implements the XARF v4 specification with full backwards compatibility for XARF v3 reports. This guide details the compatibility strategy, breaking changes, and migration paths.
-
-## Field Name Changes: `class` → `category`
-
-### The Change
-
-XARF v4 specification renamed the field from `class` (v3) to `category` (v4) to avoid conflicts with programming language reserved keywords and improve semantic clarity.
-
-| Version | Field Name | Python Access | Status |
-|---------|------------|---------------|--------|
-| **v3** | `class` | `report.class_` (workaround) | ❌ Deprecated |
-| **v4** | `category` | `report.category` (clean) | ✅ Current |
-
-### Why This Matters
-
-- **Python Keyword Conflict**: `class` is a reserved keyword requiring awkward workarounds
-- **Better Semantics**: "category" more accurately describes abuse classification
-- **Cross-Language Consistency**: Easier implementation across Java, JavaScript, C#, Go
-
-## Compatibility Strategy
-
-### Parsing Behavior
-
-The parser accepts **both field names** with the following precedence:
-
-```python
-# Priority order when parsing:
-# 1. "category" field (v4 standard)
-# 2. "class" field (v3 compatibility)
-# 3. Raise error if neither present
-
-from xarf import XARFParser
-
-parser = XARFParser()
-
-# ✅ v4 format (recommended)
-v4_report = parser.parse('{"category": "messaging", ...}')
-
-# ✅ v3 format (backwards compatibility)
-v3_report = parser.parse('{"class": "messaging", ...}')
-
-# Both work identically
-print(v4_report.category)  # "messaging"
-print(v3_report.category)  # "messaging" (auto-converted)
-```
-
-### Generation Behavior
-
-The generator **always** produces v4 format with `category`:
-
-```python
-from xarf.generator import XARFGenerator
-
-gen = XARFGenerator()
-
-report = gen.create_report(
-    category="messaging",  # v4 parameter name
-    report_type="spam",
-    source_identifier="192.0.2.1",
-    reporter_org="Security Team",
-    reporter_contact="abuse@example.com"
-)
-
-# Output uses "category" field only
-print(report.to_json())
-# {"category": "messaging", ...}
-```
-
-## Migration Guide
-
-### Step 1: Update Dependencies
-
-```bash
-# Upgrade to latest version
-pip install --upgrade xarf-parser>=4.0.0
-
-# Or specify exact version
-pip install xarf-parser==4.0.0
-```
-
-### Step 2: Code Changes
-
-#### Before (v3 / Pre-4.0.0)
-
-```python
-# JSON with "class" field
-report_json = {
-    "xarf_version": "4.0.0",
-    "class": "connection",  # ❌ Old field name
-    "type": "ddos"
-}
-
-# Awkward Python access
-print(report.class_)  # ❌ Requires underscore
-```
-
-#### After (v4.0.0+)
-
-```python
-# JSON with "category" field
-report_json = {
-    "xarf_version": "4.0.0",
-    "category": "connection",  # ✅ New field name
-    "type": "ddos"
-}
-
-# Clean Python access
-print(report.category)  # ✅ No workaround needed
-```
-
-### Step 3: Update Function Parameters
-
-```python
-# Before
-def process_report(report_class: str, report_type: str):
-    if report_class == "messaging":
-        handle_messaging(report_type)
-
-# After
-def process_report(category: str, report_type: str):
-    if category == "messaging":
-        handle_messaging(report_type)
-```
-
-### Step 4: Update Tests
-
-```python
-# Before
-def test_report():
-    assert report.class_ == "content"
-
-# After
-def test_report():
-    assert report.category == "content"
-```
-
-## Backwards Compatibility Features
-
-### Auto-Detection and Conversion
-
-The library automatically detects and converts v3 reports:
-
-```python
-from xarf import XARFParser
-
-parser = XARFParser()
-
-# Legacy v3 report (still works)
-v3_json = '''
-{
-  "Version": "4.0.0",
-  "ReporterInfo": {
-    "ReporterOrg": "Security Team",
-    "ReporterOrgEmail": "abuse@example.com"
-  },
-  "Report": {
-    "ReportClass": "Activity",
-    "ReportType": "Spam",
-    "SourceIp": "192.0.2.1"
-  }
-}
-'''
-
-# Auto-converts to v4 format
-report = parser.parse(v3_json)
-print(report.category)  # "messaging" (auto-mapped from "Activity")
-print(report.type)      # "spam" (normalized)
-```
-
-### Deprecation Warnings
-
-```python
-import warnings
-
-# Using deprecated "class_" accessor triggers warning
-report = parser.parse(report_json)
-
-# Triggers DeprecationWarning (if implemented)
-# "Accessing 'class_' is deprecated, use 'category' instead"
-old_value = report.class_
-```
-
-## Common Migration Issues
-
-### Issue 1: KeyError When Accessing Old JSON
-
-**Problem**: Parsing old JSON with `"class"` field in strict v4 mode
-
-**Solution**: Use permissive mode (default) for auto-conversion
-
-```python
-# Permissive mode (default) - auto-converts
-parser = XARFParser(strict=False)
-report = parser.parse(old_json)  # Works with "class" or "category"
-
-# Strict mode - requires v4 format
-strict_parser = XARFParser(strict=True)
-try:
-    report = strict_parser.parse(old_json)  # May fail with v3 format
-except XARFValidationError as e:
-    print(f"Use permissive mode for v3 reports: {e}")
-```
-
-### Issue 2: Database Schema Migration
-
-**Problem**: Existing databases use `report_class` column
-
-**Solution**: Database migration script
-
-```python
-import sqlite3
-
-def migrate_database(db_path: str):
-    """Migrate database from class to category field."""
-    conn = sqlite3.connect(db_path)
-    cursor = conn.cursor()
-
-    # Add new column
-    cursor.execute('''
-        ALTER TABLE xarf_reports
-        ADD COLUMN category VARCHAR(50)
-    ''')
-
-    # Copy data from old column
-    cursor.execute('''
-        UPDATE xarf_reports
-        SET category = report_class
-        WHERE category IS NULL
-    ''')
-
-    # Create index
-    cursor.execute('''
-        CREATE INDEX idx_category
-        ON xarf_reports(category)
-    ''')
-
-    conn.commit()
-    conn.close()
-```
-
-### Issue 3: Third-Party Integration
-
-**Problem**: External systems expect `"class"` field
-
-**Solution**: Compatibility wrapper
-
-```python
-class XARFCompatibilityWrapper:
-    """Wrapper providing both old and new field formats."""
-
-    def to_legacy_format(self, report: dict) -> dict:
-        """Add 'class' field for legacy systems."""
-        legacy_report = report.copy()
-        if "category" in legacy_report:
-            legacy_report["class"] = legacy_report["category"]
-        return legacy_report
-
-    def to_modern_format(self, report: dict) -> dict:
-        """Convert 'class' to 'category' for v4."""
-        modern_report = report.copy()
-        if "class" in modern_report and "category" not in modern_report:
-            modern_report["category"] = modern_report.pop("class")
-        return modern_report
-
-# Usage
-wrapper = XARFCompatibilityWrapper()
-
-# Sending to legacy API
-legacy_data = wrapper.to_legacy_format(v4_report)
-send_to_legacy_api(legacy_data)
-
-# Receiving from legacy API
-modern_data = wrapper.to_modern_format(received_data)
-report = parser.parse(modern_data)
-```
-
-## Category Support Matrix
-
-### Currently Supported (v4.0.0)
-
-| Category | Status | Types Supported |
-|----------|--------|-----------------|
-| **messaging** | ✅ Full | spam, phishing, social_engineering |
-| **connection** | ✅ Full | ddos, port_scan, login_attack, ip_spoofing |
-| **content** | ✅ Full | phishing_site, malware_distribution, defacement, web_hack |
-| **infrastructure** | 🚧 Planned | botnet, compromised_server |
-| **copyright** | 🚧 Planned | infringement, dmca, p2p |
-| **vulnerability** | 🚧 Planned | cve, misconfiguration, open_service |
-| **reputation** | 🚧 Planned | blocklist, threat_intelligence |
-
-### Planned Support (v4.1.0+)
-
-All remaining categories will be added in minor version updates without breaking changes.
-
-## Version Compatibility Matrix
-
-| Python Library | XARF Spec | v3 Support | v4 Support | Notes |
-|----------------|-----------|------------|------------|-------|
-| 3.x.x | v3 | ✅ Native | ❌ None | End of life |
-| 4.0.0-alpha | v4 | ✅ Auto-convert | ✅ Full | Alpha release |
-| 4.0.0 | v4 | ✅ Auto-convert | ✅ Full | Stable (planned Q2 2024) |
-| 4.1.0+ | v4 | ✅ Auto-convert | ✅ Full | All categories |
-
-## Deprecation Timeline
-
-| Date | Version | Action |
-|------|---------|--------|
-| **2024-01-20** | 4.0.0-alpha | `class_` marked deprecated |
-| **Q2 2024** | 4.0.0 | Deprecation warnings enabled |
-| **Q4 2024** | 4.1.0 | v3 auto-convert moves to optional module |
-| **2025-Q1** | 5.0.0 | Breaking: Remove `class_` accessor |
-
-## Testing Compatibility
-
-### Running Compatibility Tests
-
-```bash
-# Run full test suite including compatibility tests
-pytest tests/ -v
-
-# Run only compatibility tests
-pytest tests/test_compatibility.py -v
-
-# Run with warnings visible
-pytest tests/ -W default::DeprecationWarning
-```
-
-### Example Compatibility Test
-
-```python
-import pytest
-from xarf import XARFParser
-
-def test_v3_compatibility():
-    """Ensure v3 reports parse correctly."""
-    parser = XARFParser()
-
-    v3_report = {
-        "Version": "4.0.0",
-        "ReporterInfo": {
-            "ReporterOrg": "Test Org",
-            "ReporterOrgEmail": "abuse@test.com"
-        },
-        "Report": {
-            "ReportClass": "Activity",
-            "ReportType": "Spam",
-            "SourceIp": "192.0.2.1"
-        }
-    }
-
-    report = parser.parse(v3_report)
-
-    assert report.category == "messaging"
-    assert report.type == "spam"
-    assert report.source_identifier == "192.0.2.1"
-```
-
-## Best Practices
-
-### For New Projects
-
-1. ✅ Use `category` field in all new code
-2. ✅ Use v4 JSON format exclusively
-3. ✅ Test with v4 schema validation
-4. ✅ Follow migration guide for consistency
-
-### For Existing Projects
-
-1. ✅ Enable deprecation warnings
-2. ✅ Update code incrementally
-3. ✅ Test with both v3 and v4 samples
-4. ✅ Plan database migration
-5. ✅ Update integration partners
-
-### For Library Maintainers
-
-1. ✅ Accept both `class` and `category` during transition
-2. ✅ Emit deprecation warnings
-3. ✅ Provide migration tools
-4. ✅ Maintain backwards compatibility for 12+ months
-
-## Getting Help
-
-### Resources
-
-- **Migration Guide**: [MIGRATION_GUIDE.md](MIGRATION_GUIDE.md)
-- **Changelog**: [CHANGELOG.md](../CHANGELOG.md)
-- **XARF Specification**: https://xarf.org/docs/specification/
-- **GitHub Issues**: https://github.com/xarf/xarf-python/issues
-- **GitHub Discussions**: https://github.com/xarf/xarf-spec/discussions
-
-### Support Channels
-
-- GitHub Issues for bugs
-- GitHub Discussions for questions
-- Email: contact@xarf.org
-
-## Related Documentation
-
-- [README.md](../README.md) - Library overview
-- [MIGRATION_GUIDE.md](MIGRATION_GUIDE.md) - Detailed migration steps
-- [DEPRECATED.md](DEPRECATED.md) - Deprecated features list
-- [API_REFERENCE.md](API_REFERENCE.md) - Complete API documentation
-
----
-
-**Status**: Production Ready
-**Stability**: Stable
-**Compatibility**: XARF v3 + v4
diff --git a/docs/DEPRECATED.md b/docs/DEPRECATED.md
deleted file mode 100644
index b1359c6..0000000
--- a/docs/DEPRECATED.md
+++ /dev/null
@@ -1,377 +0,0 @@
-# XARF Python Library - Deprecated Features
-
-**Library Version**: 4.0.0+
-**Last Updated**: 2025-01-23
-
-## Overview
-
-This document lists all deprecated features in the XARF Python library, their replacements, and removal timelines. Deprecation follows semantic versioning with clear migration paths.
-
-## Deprecation Policy
-
-- **Warning Period**: Minimum 12 months before removal
-- **Documentation**: All deprecations documented with examples
-- **Warnings**: Python `DeprecationWarning` issued when used
-- **Alternatives**: Clear migration path provided
-
-## Currently Deprecated Features
-
-### 1. Field Name: `class_` Property
-
-**Status**: ⚠️ Deprecated since v4.0.0 (2024-01-20)
-**Removal**: v5.0.0 (estimated 2025-Q1)
-**Reason**: XARF v4 spec renamed field to `category`
-
-#### What's Deprecated
-
-```python
-from xarf import XARFParser
-
-parser = XARFParser()
-report = parser.parse(report_json)
-
-# ❌ Deprecated - will be removed in v5.0.0
-old_value = report.class_
-if report.class_ == "messaging":
-    process_messaging(report)
-```
-
-#### Migration Path
-
-```python
-# ✅ Use this instead
-new_value = report.category
-if report.category == "messaging":
-    process_messaging(report)
-```
-
-#### Deprecation Warning
-
-```python
-DeprecationWarning: Accessing 'class_' property is deprecated.
-Use 'category' instead. This will be removed in v5.0.0.
-```
-
----
-
-### 2. Parameter Name: `report_class` in Generator
-
-**Status**: ⚠️ Deprecated since v4.0.0 (2024-01-20)
-**Removal**: v5.0.0 (estimated 2025-Q1)
-**Reason**: Renamed to `category` for consistency
-
-#### What's Deprecated
-
-```python
-from xarf.generator import XARFGenerator
-
-gen = XARFGenerator()
-
-# ❌ Deprecated parameter name
-report = gen.create_report(
-    report_class="messaging",  # Old parameter
-    report_type="spam",
-    source_identifier="192.0.2.1",
-    reporter_org="Security Team",
-    reporter_contact="abuse@example.com"
-)
-```
-
-#### Migration Path
-
-```python
-# ✅ Use this instead
-report = gen.create_report(
-    category="messaging",  # New parameter
-    report_type="spam",
-    source_identifier="192.0.2.1",
-    reporter_org="Security Team",
-    reporter_contact="abuse@example.com"
-)
-```
-
----
-
-### 3. JSON Field: `"class"` in Report Structure
-
-**Status**: ⚠️ Deprecated for generation since v4.0.0 (2024-01-20)
-**Parsing Support**: Indefinite (auto-conversion to `category`)
-**Generation**: Never output (v4.0.0+)
-**Reason**: XARF v4 spec standardizes on `category`
-
-#### What's Deprecated
-
-```python
-# ❌ Deprecated JSON structure (parsing still works)
-report_json = {
-    "xarf_version": "4.0.0",
-    "class": "messaging",  # Deprecated field name
-    "type": "spam"
-}
-```
-
-#### Migration Path
-
-```python
-# ✅ Use this instead
-report_json = {
-    "xarf_version": "4.0.0",
-    "category": "messaging",  # New field name
-    "type": "spam"
-}
-```
-
-**Note**: Parsing of `"class"` field will be supported indefinitely for backwards compatibility, but all generated reports use `"category"`.
-
----
-
-### 4. Module: `xarf.converter` (Removed)
-
-**Status**: ❌ Removed in v4.0.0
-**Removal Date**: 2024-01-20
-**Reason**: Being redesigned for better v3→v4 conversion
-**Planned Return**: v4.1.0 or v4.2.0
-
-#### What Was Removed
-
-```python
-# ❌ No longer available
-from xarf.converter import XARFConverter
-
-converter = XARFConverter()
-v4_report = converter.convert_v3_to_v4(v3_report)
-```
-
-#### Temporary Workaround
-
-```python
-def simple_v3_to_v4(v3_report: dict) -> dict:
-    """Basic v3 to v4 conversion."""
-    v4_report = v3_report.copy()
-
-    # Update version
-    v4_report["xarf_version"] = "4.0.0"
-
-    # Migrate class to category
-    if "class" in v4_report:
-        v4_report["category"] = v4_report.pop("class")
-
-    # Map v3 structure to v4
-    if "Report" in v4_report:
-        report_data = v4_report.pop("Report")
-        v4_report.update(report_data)
-
-    if "ReporterInfo" in v4_report:
-        reporter_data = v4_report.pop("ReporterInfo")
-        v4_report["reporter"] = {
-            "org": reporter_data.get("ReporterOrg"),
-            "contact": reporter_data.get("ReporterOrgEmail"),
-            "type": "unknown"
-        }
-
-    return v4_report
-```
-
----
-
-## Previously Deprecated (Now Removed)
-
-### Python 3.7 Support
-
-**Deprecated**: v3.5.0 (2023-06-01)
-**Removed**: v4.0.0 (2024-01-20)
-**Reason**: Python 3.7 reached end of life (June 2023)
-
-#### What Changed
-
-- Minimum Python version is now 3.8+
-- Library uses Python 3.8+ features (walrus operator, etc.)
-
----
-
-## Future Deprecations
-
-### Planned for v4.1.0
-
-No new deprecations planned.
-
-### Planned for v5.0.0 (2025)
-
-#### 1. Legacy v3 Auto-Conversion (Under Review)
-
-**Status**: 🔍 Under consideration
-**Potential Change**: Move to optional module
-**Reason**: Reduce complexity, improve performance
-
-```python
-# May require explicit opt-in for v3 support
-from xarf.legacy import V3CompatibleParser
-
-parser = V3CompatibleParser()  # Explicit v3 support
-report = parser.parse(v3_json)
-```
-
----
-
-## Deprecation Timeline
-
-| Feature | Deprecated | Warning Period | Removal | Status |
-|---------|------------|----------------|---------|--------|
-| `class_` property | v4.0.0 (2024-01) | 12 months | v5.0.0 (2025-01) | ⚠️ Active |
-| `report_class` param | v4.0.0 (2024-01) | 12 months | v5.0.0 (2025-01) | ⚠️ Active |
-| `"class"` JSON field | v4.0.0 (2024-01) | Indefinite | Never (parsing) | 🔒 Permanent parsing support |
-| `xarf.converter` module | v4.0.0 (2024-01) | - | Removed | ❌ Removed (returning in v4.1+) |
-| Python 3.7 | v3.5.0 (2023-06) | 8 months | v4.0.0 (2024-01) | ❌ Removed |
-
----
-
-## Handling Deprecation Warnings
-
-### Viewing Warnings
-
-```python
-import warnings
-
-# Show all deprecation warnings
-warnings.filterwarnings('default', category=DeprecationWarning)
-
-# Or show them once
-warnings.filterwarnings('once', category=DeprecationWarning)
-
-# Or make them errors (strict mode)
-warnings.filterwarnings('error', category=DeprecationWarning)
-```
-
-### Suppressing Specific Warnings (Not Recommended)
-
-```python
-import warnings
-
-# Suppress specific warning (not recommended)
-warnings.filterwarnings(
-    'ignore',
-    message='.*class_.*',
-    category=DeprecationWarning
-)
-```
-
-**Warning**: Suppressing deprecation warnings can lead to breaking changes when features are removed.
-
----
-
-## Migration Checklist
-
-Use this checklist when updating deprecated code:
-
-### Code Updates
-
-- [ ] Replace all `report.class_` with `report.category`
-- [ ] Update `report_class` parameters to `category`
-- [ ] Change JSON `"class"` fields to `"category"`
-- [ ] Update function signatures using `class` parameter names
-- [ ] Update tests for new field names
-- [ ] Update documentation and comments
-
-### Testing
-
-- [ ] Run tests with deprecation warnings enabled
-- [ ] Verify all warnings resolved
-- [ ] Test with both v3 and v4 sample data
-- [ ] Validate generated reports against v4 schema
-
-### Documentation
-
-- [ ] Update README examples
-- [ ] Update API documentation
-- [ ] Update internal wiki/docs
-- [ ] Update training materials
-
----
-
-## Getting Deprecation Notices
-
-### Command Line
-
-```bash
-# Run with warnings visible
-python -W default::DeprecationWarning your_script.py
-
-# Or use pytest
-pytest -W default::DeprecationWarning
-
-# Make warnings errors (fail fast)
-python -W error::DeprecationWarning your_script.py
-```
-
-### Programmatic Detection
-
-```python
-import warnings
-
-def check_for_deprecations():
-    """Detect deprecated usage in codebase."""
-    with warnings.catch_warnings(record=True) as w:
-        warnings.simplefilter("always", DeprecationWarning)
-
-        # Your code here
-        report = parser.parse(data)
-        _ = report.class_  # Triggers warning
-
-        # Check warnings
-        for warning in w:
-            if issubclass(warning.category, DeprecationWarning):
-                print(f"Deprecation: {warning.message}")
-                print(f"File: {warning.filename}:{warning.lineno}")
-```
-
----
-
-## Staying Informed
-
-### How to Track Deprecations
-
-1. **CHANGELOG.md**: Read before upgrading
-2. **GitHub Releases**: Subscribe to release notifications
-3. **Documentation**: Check deprecation notices
-4. **This File**: Review regularly for updates
-
-### Subscribe to Updates
-
-- GitHub Watch: https://github.com/xarf/xarf-python
-- Release RSS: https://github.com/xarf/xarf-python/releases.atom
-- PyPI RSS: https://pypi.org/rss/project/xarf-parser/releases.xml
-
----
-
-## Support and Questions
-
-### If You're Affected by a Deprecation
-
-1. **Review migration path** in this document
-2. **Check MIGRATION_GUIDE.md** for detailed steps
-3. **Search GitHub issues** for related discussions
-4. **Open new issue** if you need help
-
-### Reporting Issues
-
-If you believe a deprecation is:
-- Too aggressive
-- Missing migration path
-- Incorrectly implemented
-
-Please open an issue: https://github.com/xarf/xarf-python/issues
-
----
-
-## Related Documentation
-
-- [MIGRATION_GUIDE.md](MIGRATION_GUIDE.md) - Step-by-step migration
-- [COMPATIBILITY.md](COMPATIBILITY.md) - Backwards compatibility
-- [CHANGELOG.md](../CHANGELOG.md) - Complete version history
-- [README.md](../README.md) - Library overview
-
----
-
-**Last Updated**: 2025-01-23
-**Review Schedule**: Quarterly
-**Next Review**: 2025-04-23
diff --git a/docs/MIGRATION_V3_TO_V4.md b/docs/MIGRATION_V3_TO_V4.md
new file mode 100644
index 0000000..baa1f28
--- /dev/null
+++ b/docs/MIGRATION_V3_TO_V4.md
@@ -0,0 +1,211 @@
+# XARF v3 to v4 Migration Guide
+
+## Overview
+
+XARF v4 introduces a category-based architecture that improves upon the v3 format. This Python library provides automatic backward compatibility, making migration seamless.
+
+## Automatic Conversion
+
+The library automatically detects and converts v3 reports to v4 format:
+
+```python
+from xarf import parse
+
+# v3 report is automatically detected and converted
+result = parse(v3_json_data)
+# result.warnings includes:
+#   "DEPRECATION WARNING: XARF v3 format detected. The v3 format has been
+#    automatically converted to v4. Please update your systems to generate
+#    v4 reports directly. v3 support will be removed in a future major version."
+```
+
+## What Changes
+
+### Structure Changes
+
+**v3 Format:**
+
+```json
+{
+  "Version": "3",
+  "ReporterInfo": {
+    "ReporterOrg": "Security Team",
+    "ReporterOrgEmail": "abuse@example.com"
+  },
+  "Report": {
+    "ReportType": "Spam",
+    "Date": "2024-01-15T10:00:00Z",
+    "SourceIp": "192.0.2.1"
+  }
+}
+```
+
+**v4 Format (after conversion):**
+
+```json
+{
+  "xarf_version": "4.2.0",
+  "report_id": "auto-generated-uuid",
+  "timestamp": "2024-01-15T10:00:00Z",
+  "reporter": {
+    "org": "Security Team",
+    "contact": "abuse@example.com",
+    "domain": "example.com"
+  },
+  "sender": {
+    "org": "Security Team",
+    "contact": "abuse@example.com",
+    "domain": "example.com"
+  },
+  "source_identifier": "192.0.2.1",
+  "category": "messaging",
+  "type": "spam",
+  "legacy_version": "3",
+  "_internal": {
+    "original_report_type": "Spam",
+    "converted_at": "2024-01-15T10:05:00Z"
+  }
+}
+```
+
+### Field Mappings
+
+| v3 Field                                | v4 Field            | Notes                                             |
+| --------------------------------------- | ------------------- | ------------------------------------------------- |
+| `Version`                               | `xarf_version`      | Set to "4.2.0"                                    |
+| N/A                                     | `report_id`         | Auto-generated UUID                               |
+| `ReporterInfo.ReporterOrg`              | `reporter.org`      | Direct mapping                                    |
+| `ReporterInfo.ReporterOrgEmail`         | `reporter.contact`  | Direct mapping                                    |
+| `ReporterInfo.ReporterOrgEmail`         | `reporter.domain`   | Extracted from email domain part                  |
+| N/A                                     | `sender`            | Set to same values as `reporter`                  |
+| `Report.Date`                           | `timestamp`         | Direct mapping                                    |
+| `Report.SourceIp` or `Report.Source.IP` | `source_identifier` | Priority: Source.IP > SourceIp > Source.URL > Url |
+| `Report.ReportType`                     | `category` + `type` | Mapped per table below                            |
+| `Report.Attachment` or `Report.Samples` | `evidence`          | Structure converted, hash and size added          |
+| `Report.AdditionalInfo.DetectionMethod` | `evidence_source`   | Only set if explicitly provided in v3             |
+
+### Report Type Mappings
+
+| v3 ReportType  | v4 Category      | v4 Type        |
+| -------------- | ---------------- | -------------- |
+| `Spam`         | `messaging`      | `spam`         |
+| `Login-Attack` | `connection`     | `login_attack` |
+| `Port-Scan`    | `connection`     | `port_scan`    |
+| `DDoS`         | `connection`     | `ddos`         |
+| `Phishing`     | `content`        | `phishing`     |
+| `Malware`      | `content`        | `malware`      |
+| `Botnet`       | `infrastructure` | `botnet`       |
+| `Copyright`    | `copyright`      | `copyright`    |
+
+**Note**: Unknown v3 report types are not silently converted — they cause a parse error listing the supported types. Only the 8 types above are supported.
+
+## Deprecation Warnings
+
+When parsing v3 reports, you'll receive deprecation warnings in `result.warnings`. A `UserWarning` subclass (`XARFv3DeprecationWarning`) is also emitted via Python's `warnings` module, so existing warning filters apply:
+
+```python
+import warnings
+from xarf import parse, XARFv3DeprecationWarning
+
+# Suppress v3 deprecation warnings if desired
+warnings.filterwarnings("ignore", category=XARFv3DeprecationWarning)
+
+result = parse(v3_report)
+# result.warnings still contains the deprecation message
+```
+
+## Migration Strategies
+
+### Phase 1: Accept Both Formats
+
+Use the library's automatic conversion:
+
+```python
+from xarf import parse
+
+def process_report(json_data: str | dict) -> None:
+    result = parse(json_data)
+
+    if result.report and result.report.legacy_version == "3":
+        print("Received v3 report - consider upgrading sender")
+
+    # Process as v4 report
+    handle_v4_report(result.report)
+```
+
+### Phase 2: Monitor v3 Usage
+
+Track v3 report usage to plan deprecation:
+
+```python
+from xarf import parse
+
+def track_legacy_usage(json_data: str | dict) -> None:
+    result = parse(json_data)
+
+    if result.report and result.report.legacy_version == "3":
+        metrics.increment("xarf.v3.reports")
+        log_deprecation_notice(result.report.reporter.contact)
+```
+
+### Phase 3: Generate v4 Reports
+
+Update your report generators to produce v4 format:
+
+```python
+from xarf import create_report
+
+result = create_report(
+    category="messaging",
+    type="spam",
+    source_identifier="192.0.2.100",
+    reporter={
+        "org": "Security Team",
+        "contact": "abuse@example.com",
+        "domain": "example.com",
+    },
+    sender={
+        "org": "Security Team",
+        "contact": "abuse@example.com",
+        "domain": "example.com",
+    },
+    # ... additional fields
+)
+```
+
+## Breaking Changes from v3
+
+1. **Required Fields**: v4 requires `report_id` (UUID) — auto-generated during conversion
+2. **Reporter Domain**: v4 requires `reporter.domain` — extracted from the reporter email address
+3. **Sender Field**: v4 requires a `sender` object — set to the same values as `reporter` during conversion
+4. **Category System**: v3's single `ReportType` becomes `category` + `type` in v4
+5. **Timestamp Format**: Both use ISO 8601, but v4 is more strict
+6. **Evidence Structure**: v3's `Attachment`/`Samples` becomes structured `evidence` array with computed `hash` (SHA256) and `size` fields
+7. **Evidence Source**: v4's `evidence_source` is only set if `AdditionalInfo.DetectionMethod` is present in the v3 report — it is not defaulted
+
+## Unsupported v3 Features
+
+The following v3 fields have no direct v4 equivalent and are not preserved:
+
+- `Disclosure` — not included in v4 core spec
+- `ReporterInfo.ReporterContactName` — not in v4 core spec
+- `ReporterInfo.ReporterContactPhone` — not in v4 core spec
+
+If you need these fields, consider storing them in v4's `_internal` section:
+
+```python
+from xarf import is_v3_report, convert_v3_to_v4
+
+if is_v3_report(v3_data):
+    v4_data = convert_v3_to_v4(v3_data)
+    v4_data.setdefault("_internal", {}).update({
+        "v3_disclosure": v3_data.get("Disclosure"),
+        "v3_contact_name": v3_data.get("ReporterInfo", {}).get("ReporterContactName"),
+    })
+```
+
+## Getting Help
+
+- Check the [XARF v4 Specification](https://xarf.org)
+- Review [API Documentation](https://github.com/xarf/xarf-python)
+- Open an [Issue](https://github.com/xarf/xarf-python/issues)
diff --git a/docs/QUICK_START.md b/docs/QUICK_START.md
deleted file mode 100644
index 169a9a4..0000000
--- a/docs/QUICK_START.md
+++ /dev/null
@@ -1,244 +0,0 @@
-# XARF Parser Python - Quick Start Guide
-
-## Installation
-
-```bash
-# Clone the repository
-git clone https://github.com/xarf/xarf-parser-python.git
-cd xarf-parser-python
-
-# Create and activate virtual environment
-python3 -m venv venv
-source venv/bin/activate  # On Windows: venv\Scripts\activate
-
-# Install package with dev dependencies
-pip install -e ".[dev]"
-
-# Install pre-commit hooks
-pre-commit install
-```
-
-## Development Workflow
-
-### 1. Before Starting Work
-
-```bash
-# Activate virtual environment
-source venv/bin/activate
-
-# Ensure dependencies are up to date
-pip install -e ".[dev]"
-```
-
-### 2. During Development
-
-```bash
-# Run tests
-pytest
-
-# Run specific test
-pytest tests/test_parser.py::test_basic_parsing
-
-# Run with coverage
-pytest --cov=xarf --cov-report=html
-```
-
-### 3. Before Committing
-
-```bash
-# Run all pre-commit checks
-./scripts/run-precommit.sh run --all-files
-
-# Or manually:
-black xarf/ tests/
-isort xarf/ tests/
-flake8 xarf/ tests/
-mypy xarf/
-pytest
-```
-
-### 4. Committing Changes
-
-```bash
-# Stage your changes
-git add .
-
-# Commit (pre-commit hooks run automatically)
-git commit -m "Your commit message"
-
-# If hooks fail, fix issues and commit again
-git add .
-git commit -m "Your commit message"
-```
-
-## Pre-commit Hooks
-
-The project uses comprehensive pre-commit hooks:
-
-- **Formatting**: black, isort, trailing-whitespace, end-of-file-fixer
-- **Linting**: flake8, autoflake
-- **Type checking**: mypy
-- **Docstrings**: pydocstyle
-- **Security**: bandit
-- **Complexity**: radon
-
-### Common Commands
-
-```bash
-# Run all hooks on all files
-./scripts/run-precommit.sh run --all-files
-
-# Run specific hook
-./scripts/run-precommit.sh run black --all-files
-
-# Update hooks to latest versions
-./scripts/run-precommit.sh autoupdate
-
-# Skip hooks (not recommended)
-git commit --no-verify
-```
-
-## Testing
-
-```bash
-# Run all tests
-pytest
-
-# Run with verbose output
-pytest -v
-
-# Run with coverage
-pytest --cov=xarf --cov-report=term-missing
-
-# Run specific test file
-pytest tests/test_parser.py
-
-# Run specific test function
-pytest tests/test_parser.py::test_basic_parsing
-```
-
-## Code Style
-
-### Black Formatting
-```bash
-# Format all Python files
-black xarf/ tests/
-
-# Check without modifying
-black --check xarf/ tests/
-```
-
-### Import Sorting
-```bash
-# Sort imports
-isort xarf/ tests/
-
-# Check without modifying
-isort --check-only xarf/ tests/
-```
-
-### Linting
-```bash
-# Run flake8
-flake8 xarf/ tests/
-
-# Run with specific config
-flake8 --config=pyproject.toml xarf/
-```
-
-### Type Checking
-```bash
-# Run mypy
-mypy xarf/
-
-# Run with config
-mypy --config-file=pyproject.toml xarf/
-```
-
-## Building and Distribution
-
-```bash
-# Build package
-python -m build
-
-# Install locally
-pip install -e .
-
-# Install with dev dependencies
-pip install -e ".[dev]"
-
-# Install with test dependencies
-pip install -e ".[test]"
-```
-
-## Common Issues
-
-### Authentication with Custom PyPI
-
-If you encounter PyPI authentication errors:
-```bash
-export PIP_INDEX_URL=https://pypi.org/simple/
-```
-
-Add to your shell profile for persistence:
-```bash
-echo 'export PIP_INDEX_URL=https://pypi.org/simple/' >> ~/.zshrc
-```
-
-### Pre-commit Hook Failures
-
-1. **Automatic fixes**: Some hooks (black, isort) auto-fix issues
-   - Review changes: `git diff`
-   - Stage fixes: `git add .`
-   - Commit again
-
-2. **Manual fixes required**: Fix reported issues manually
-   - Read error messages carefully
-   - Fix the code
-   - Stage and commit again
-
-3. **Cache issues**: Clear pre-commit cache
-   ```bash
-   pre-commit clean
-   pre-commit install --install-hooks
-   ```
-
-## Documentation
-
-- [Pre-commit Setup](./PRE_COMMIT.md) - Detailed pre-commit documentation
-- [Migration Guide](./MIGRATION_GUIDE.md) - Upgrading from older versions
-- [Contributing](../CONTRIBUTING.md) - Contribution guidelines
-- [Changelog](../CHANGELOG.md) - Version history
-
-## Getting Help
-
-- GitHub Issues: https://github.com/xarf/xarf-parser-python/issues
-- XARF Specification: https://github.com/xarf/xarf-spec
-- Email: contact@xarf.org
-
-## Quick Reference
-
-| Command | Description |
-|---------|-------------|
-| `pytest` | Run tests |
-| `pytest --cov=xarf` | Run tests with coverage |
-| `black xarf/ tests/` | Format code |
-| `isort xarf/ tests/` | Sort imports |
-| `flake8 xarf/` | Lint code |
-| `mypy xarf/` | Type check |
-| `./scripts/run-precommit.sh run --all-files` | Run all hooks |
-| `pre-commit autoupdate` | Update hooks |
-| `pip install -e ".[dev]"` | Install with dev deps |
-
-## Environment Variables
-
-```bash
-# Use public PyPI (avoids auth issues)
-export PIP_INDEX_URL=https://pypi.org/simple/
-
-# Set Python version for mypy
-export MYPY_PYTHON_VERSION=3.8
-
-# Increase pytest verbosity
-export PYTEST_ADDOPTS="-v"
-```
diff --git a/docs/generator_usage.md b/docs/generator_usage.md
deleted file mode 100644
index 8084ec4..0000000
--- a/docs/generator_usage.md
+++ /dev/null
@@ -1,412 +0,0 @@
-# XARF Generator Usage Guide
-
-The `XARFGenerator` class provides a comprehensive API for generating XARF v4.0.0 compliant reports programmatically.
-
-## Installation
-
-```python
-from xarf import XARFGenerator
-```
-
-## Basic Usage
-
-### Creating a Simple Report
-
-```python
-from xarf import XARFGenerator
-
-# Initialize the generator
-generator = XARFGenerator()
-
-# Generate a minimal report
-report = generator.generate_report(
-    category="connection",
-    report_type="ddos",
-    source_identifier="192.0.2.100",
-    reporter_contact="abuse@example.com"
-)
-
-print(report)
-```
-
-### Adding Organization Information
-
-```python
-report = generator.generate_report(
-    category="content",
-    report_type="phishing",
-    source_identifier="203.0.113.50",
-    reporter_contact="abuse@example.com",
-    reporter_org="Security Operations Team"
-)
-```
-
-### Reporting on Behalf of Another Organization
-
-```python
-report = generator.generate_report(
-    category="copyright",
-    report_type="infringement",
-    source_identifier="198.51.100.25",
-    reporter_contact="reporter@agency.com",
-    reporter_org="Anti-Piracy Agency",
-    on_behalf_of={
-        "org": "Copyright Holder Inc.",
-        "contact": "legal@copyrightholder.com"
-    }
-)
-```
-
-## Adding Evidence
-
-### Creating Evidence Items
-
-```python
-# Create evidence with automatic hashing
-evidence = generator.add_evidence(
-    content_type="text/plain",
-    description="Server access log excerpt showing malicious activity",
-    payload="2025-01-20 10:15:23 192.0.2.100 GET /admin/backdoor.php"
-)
-
-# Use evidence in a report
-report = generator.generate_report(
-    category="connection",
-    report_type="sql_injection",
-    source_identifier="192.0.2.100",
-    reporter_contact="security@example.com",
-    evidence=[evidence]
-)
-```
-
-### Multiple Evidence Items
-
-```python
-# Log evidence
-log_evidence = generator.add_evidence(
-    content_type="text/plain",
-    description="Firewall logs showing attack pattern",
-    payload="[BLOCK] 192.0.2.100 -> 203.0.113.50:443 SYN flood detected"
-)
-
-# Screenshot evidence
-screenshot_evidence = generator.add_evidence(
-    content_type="image/png",
-    description="Screenshot of phishing page",
-    payload="iVBORw0KGgoAAAANSUhEUg..."  # Base64 encoded image
-)
-
-report = generator.generate_report(
-    category="content",
-    report_type="phishing",
-    source_identifier="192.0.2.100",
-    reporter_contact="abuse@example.com",
-    evidence=[log_evidence, screenshot_evidence]
-)
-```
-
-## Advanced Features
-
-### Full Report with All Optional Fields
-
-```python
-report = generator.generate_report(
-    category="vulnerability",
-    report_type="cve",
-    source_identifier="192.0.2.150",
-    reporter_contact="security@research.org",
-    reporter_org="Security Research Lab",
-    reporter_type="manual",
-    evidence_source="researcher_analysis",
-    description="Critical remote code execution vulnerability in web server",
-    severity="critical",
-    confidence=0.95,
-    tags=["cve-2025-12345", "rce", "critical", "web-server"],
-    occurrence={
-        "start": "2025-01-20T10:00:00Z",
-        "end": "2025-01-20T11:00:00Z"
-    },
-    target={
-        "ip": "203.0.113.100",
-        "port": 443,
-        "url": "https://vulnerable-server.example.com/admin"
-    }
-)
-```
-
-### Adding Category-Specific Fields
-
-Each XARF category supports specific fields. You can add them using `additional_fields`:
-
-```python
-# DDoS attack report with connection-specific fields
-report = generator.generate_report(
-    category="connection",
-    report_type="ddos",
-    source_identifier="192.0.2.100",
-    reporter_contact="abuse@example.com",
-    additional_fields={
-        "destination_ip": "203.0.113.100",
-        "destination_port": 80,
-        "protocol": "tcp",
-        "attack_vector": "syn_flood",
-        "peak_pps": 250000,
-        "peak_bps": 1200000000,
-        "duration_seconds": 2700,
-        "total_packets": 11250000,
-        "total_bytes": 3240000000,
-        "botnet_participation": True,
-        "mitigation_applied": True
-    }
-)
-```
-
-## Generating Sample Reports
-
-### For Testing and Development
-
-```python
-# Generate a complete sample report with random data
-sample_report = generator.generate_sample_report(
-    category="connection",
-    report_type="ddos",
-    include_evidence=True,
-    include_optional=True
-)
-
-# Generate minimal sample without optional fields
-minimal_sample = generator.generate_sample_report(
-    category="messaging",
-    report_type="spam",
-    include_evidence=False,
-    include_optional=False
-)
-```
-
-## All Supported Categories and Types
-
-### Abuse
-- `ddos` - DDoS attacks
-- `malware` - Malware distribution
-- `phishing` - Phishing attempts
-- `spam` - Spam messages
-- `scanner` - Port scanning
-
-### Connection
-- `compromised` - Compromised systems
-- `botnet` - Botnet activity
-- `malicious_traffic` - Malicious network traffic
-- `ddos` - DDoS participation
-- `port_scan` - Port scanning
-- `login_attack` - Brute force attacks
-- `sql_injection` - SQL injection attempts
-- `reconnaissance` - Network reconnaissance
-- `scraping` - Web scraping
-- `vuln_scanning` - Vulnerability scanning
-- `bot` - Bot activity
-- `infected_host` - Infected hosts
-
-### Content
-- `illegal` - Illegal content
-- `malicious` - Malicious content
-- `policy_violation` - Policy violations
-- `phishing` - Phishing pages
-- `malware` - Malware hosting
-- `fraud` - Fraudulent content
-- `exposed_data` - Data exposure
-- `csam` / `csem` - Child exploitation material
-- `brand_infringement` - Brand infringement
-- `suspicious_registration` - Suspicious registrations
-- `remote_compromise` - Remote compromise
-
-### Copyright
-- `infringement` - Copyright infringement
-- `dmca` - DMCA violations
-- `trademark` - Trademark infringement
-- `p2p` - P2P file sharing
-- `cyberlocker` - Cyberlocker abuse
-- `link_site` - Link sites
-- `ugc_platform` - UGC platform violations
-- `usenet` - Usenet violations
-- `copyright` - Generic copyright issues
-
-### Infrastructure
-- `botnet` - Botnet infrastructure
-- `compromised_server` - Compromised servers
-
-### Messaging
-- `bulk_messaging` - Bulk messaging
-- `spam` - Spam messages
-
-### Reputation
-- `blocklist` - Blocklist entries
-- `threat_intelligence` - Threat intelligence
-
-### Vulnerability
-- `cve` - CVE vulnerabilities
-- `misconfiguration` - Security misconfigurations
-- `open_service` - Open services
-
-## Utility Methods
-
-### Generate UUID
-
-```python
-report_id = generator.generate_uuid()
-# e.g., "550e8400-e29b-41d4-a716-446655440000"
-```
-
-### Generate Timestamp
-
-```python
-timestamp = generator.generate_timestamp()
-# e.g., "2025-01-20T14:30:45Z"
-```
-
-### Generate Hash
-
-```python
-# SHA256 hash (default)
-hash_sha256 = generator.generate_hash("data to hash")
-
-# Other algorithms
-hash_sha512 = generator.generate_hash("data", "sha512")
-hash_sha1 = generator.generate_hash("data", "sha1")
-hash_md5 = generator.generate_hash("data", "md5")
-```
-
-### Random Evidence Generation
-
-```python
-# Generate random evidence for testing
-evidence = generator.generate_random_evidence(
-    category="connection",
-    description="Test evidence"
-)
-```
-
-## Validation
-
-The generator automatically validates:
-
-- Category names (must be one of 8 valid categories)
-- Type names (must be valid for the category)
-- Reporter type (automated, manual, hybrid)
-- Evidence source (valid evidence collection method)
-- Severity levels (low, medium, high, critical)
-- Confidence scores (0.0 to 1.0)
-- Required field presence
-- Data format consistency
-
-## Error Handling
-
-```python
-from xarf import XARFGenerator, XARFError
-
-generator = XARFGenerator()
-
-try:
-    report = generator.generate_report(
-        category="invalid_category",  # Invalid!
-        report_type="test",
-        source_identifier="192.0.2.1",
-        reporter_contact="abuse@example.com"
-    )
-except XARFError as e:
-    print(f"Generation failed: {e}")
-```
-
-## JSON Serialization
-
-All generated reports are JSON-serializable:
-
-```python
-import json
-
-report = generator.generate_report(
-    category="connection",
-    report_type="ddos",
-    source_identifier="192.0.2.100",
-    reporter_contact="abuse@example.com"
-)
-
-# Serialize to JSON
-json_string = json.dumps(report, indent=2)
-
-# Save to file
-with open("xarf_report.json", "w") as f:
-    json.dump(report, f, indent=2)
-```
-
-## Best Practices
-
-1. **Always include descriptive information**: Use the `description` field to provide context
-2. **Add evidence when possible**: Include logs, screenshots, or other supporting data
-3. **Set appropriate severity**: Use severity levels to indicate impact
-4. **Use confidence scores**: Indicate your certainty about the report
-5. **Tag reports appropriately**: Use tags for categorization and searching
-6. **Include occurrence times**: Specify when the incident occurred
-7. **Provide target information**: Include affected systems/resources
-8. **Use proper evidence types**: Match evidence content_type to actual data
-9. **Hash all evidence**: The generator automatically computes hashes
-10. **Validate before sending**: Ensure all required fields are present
-
-## Integration Example
-
-```python
-import json
-from xarf import XARFGenerator
-
-def create_abuse_report(source_ip, evidence_logs, severity="medium"):
-    """Create an abuse report from detected incidents."""
-    generator = XARFGenerator()
-
-    # Prepare evidence
-    evidence_items = []
-    for log in evidence_logs:
-        evidence_items.append(
-            generator.add_evidence(
-                content_type="text/plain",
-                description=f"Log from {log['timestamp']}",
-                payload=log['content']
-            )
-        )
-
-    # Generate report
-    report = generator.generate_report(
-        category="connection",
-        report_type="malicious_traffic",
-        source_identifier=source_ip,
-        reporter_contact="security@example.com",
-        reporter_org="Security Operations Center",
-        evidence=evidence_items,
-        severity=severity,
-        tags=["automated", "ids", "malicious"]
-    )
-
-    return report
-
-# Usage
-logs = [
-    {"timestamp": "2025-01-20 10:15:23", "content": "Malicious packet detected"},
-    {"timestamp": "2025-01-20 10:15:24", "content": "Multiple attempts blocked"}
-]
-
-report = create_abuse_report("192.0.2.100", logs, "high")
-print(json.dumps(report, indent=2))
-```
-
-## Security Considerations
-
-1. **UUID Generation**: Uses `uuid.uuid4()` for cryptographically secure random UUIDs
-2. **Random Data**: Uses `secrets` module for secure random generation
-3. **Hash Algorithms**: Supports SHA-256, SHA-512, SHA-1, and MD5
-4. **Input Validation**: All inputs are validated before report generation
-5. **Type Safety**: Full type hints for IDE support and static analysis
-6. **No Sensitive Data**: Never include credentials or sensitive PII in reports
-
-## Further Reading
-
-- [XARF Specification](https://xarf.org)
-- [XARF Parser Documentation](../README.md)
-- [GitHub Repository](https://github.com/xarf/xarf-parser-python)
diff --git a/docs/migration-guide.md b/docs/migration-guide.md
deleted file mode 100644
index 96e76a6..0000000
--- a/docs/migration-guide.md
+++ /dev/null
@@ -1,391 +0,0 @@
-# XARF v3 to v4 Migration Guide
-
-## Overview
-
-XARF v4 Python parser provides **automatic backwards compatibility** with XARF v3 format. Your existing v3 reports will be automatically converted to v4 format when parsed, with deprecation warnings to help you migrate.
-
-## Quick Start
-
-### No Code Changes Required
-
-The parser automatically detects and converts v3 reports:
-
-```python
-from xarf import XARFParser
-
-parser = XARFParser()
-
-# Works with both v3 and v4 reports automatically
-v3_report_json = '''
-{
-  "Version": "3.0.0",
-  "ReporterInfo": {
-    "ReporterOrg": "Security Team",
-    "ReporterOrgEmail": "abuse@example.com"
-  },
-  "Report": {
-    "ReportClass": "Messaging",
-    "ReportType": "spam",
-    "Date": "2024-01-15T10:00:00Z",
-    "Source": {"IP": "192.0.2.1"},
-    "AdditionalInfo": {
-      "Protocol": "smtp",
-      "SMTPFrom": "spammer@example.com",
-      "Subject": "Spam Message",
-      "DetectionMethod": "spamtrap"
-    }
-  }
-}
-'''
-
-# Automatically converted to v4 format
-report = parser.parse(v3_report_json)
-print(f"Category: {report.category}")  # messaging
-print(f"Type: {report.type}")          # spam
-print(f"From: {report.smtp_from}")     # spammer@example.com
-```
-
-## Deprecation Warnings
-
-When parsing v3 reports, you'll receive a deprecation warning:
-
-```python
-import warnings
-from xarf import XARFParser
-from xarf.v3_compat import XARFv3DeprecationWarning
-
-# See deprecation warnings
-warnings.simplefilter("always", XARFv3DeprecationWarning)
-
-parser = XARFParser()
-report = parser.parse(v3_json)
-# DeprecationWarning: XARF v3 format is deprecated. Please upgrade to XARF v4.
-```
-
-### Suppressing Warnings (Temporary)
-
-If you need to suppress warnings during migration:
-
-```python
-import warnings
-from xarf.v3_compat import XARFv3DeprecationWarning
-
-# Suppress v3 warnings temporarily
-warnings.simplefilter("ignore", XARFv3DeprecationWarning)
-
-# Your code here
-```
-
-## Field Mapping Reference
-
-### Base Fields
-
-| v3 Field | v4 Field | Notes |
-|----------|----------|-------|
-| `Version` | `xarf_version` | Always becomes "4.0.0" |
-| N/A | `report_id` | Auto-generated UUID v4 |
-| `Report.Date` | `timestamp` | Preserved as-is |
-| `ReporterInfo.ReporterOrg` | `reporter.org` | Direct mapping |
-| `ReporterInfo.ReporterOrgEmail` | `reporter.contact` | Preferred over ContactEmail |
-| `ReporterInfo.ReporterContactEmail` | `reporter.contact` | Fallback if OrgEmail missing |
-| N/A | `reporter.type` | Set to "automated" |
-| `Report.Source.IP` | `source_identifier` | Direct mapping |
-| `Report.ReportClass` | `category` | Lowercase conversion |
-| `Report.ReportType` | `type` | Lowercase conversion |
-| `Report.AdditionalInfo.DetectionMethod` | `evidence_source` | Mapped to v4 values |
-
-### Category-Specific Mapping
-
-#### Messaging Reports
-
-| v3 Field | v4 Field |
-|----------|----------|
-| `AdditionalInfo.Protocol` | `protocol` |
-| `AdditionalInfo.SMTPFrom` | `smtp_from` |
-| `AdditionalInfo.SMTPTo` | `smtp_to` |
-| `AdditionalInfo.Subject` | `subject` |
-| `AdditionalInfo.MessageId` | `message_id` |
-
-#### Connection Reports
-
-| v3 Field | v4 Field |
-|----------|----------|
-| `Report.DestinationIp` | `destination_ip` |
-| `Report.DestinationPort` | `destination_port` |
-| `AdditionalInfo.Protocol` | `protocol` |
-| `Source.Port` | `source_port` |
-| `AdditionalInfo.AttackType` | `attack_type` |
-| `AdditionalInfo.PacketCount` | `packet_count` |
-| `AdditionalInfo.ByteCount` | `byte_count` |
-
-#### Content Reports
-
-| v3 Field | v4 Field |
-|----------|----------|
-| `Report.URL` or `AdditionalInfo.URL` | `url` |
-| `AdditionalInfo.ContentType` | `content_type` |
-| `AdditionalInfo.AttackType` | `attack_type` |
-
-### Evidence Conversion
-
-v3 `Attachment` array is converted to v4 `evidence` format:
-
-```python
-# v3 Format
-"Attachment": [
-  {
-    "ContentType": "message/rfc822",
-    "Description": "Spam email",
-    "Data": "base64data..."
-  }
-]
-
-# Converted to v4
-"evidence": [
-  {
-    "content_type": "message/rfc822",
-    "description": "Spam email",
-    "payload": "base64data..."
-  }
-]
-```
-
-### Evidence Source Mapping
-
-| v3 DetectionMethod | v4 evidence_source |
-|-------------------|-------------------|
-| Contains "spamtrap" | `spamtrap` |
-| Contains "honeypot" | `honeypot` |
-| Contains "user" or "manual" | `user_report` |
-| Contains "scan" | `automated_scan` |
-| Contains "vuln" | `vulnerability_scan` |
-| Other/Missing | `automated_scan` |
-
-### Legacy Metadata
-
-Converted reports include metadata to track conversion:
-
-```json
-{
-  "legacy_version": "3",
-  "_internal": {
-    "converted_from_v3": true,
-    "original_version": "3.0.0"
-  },
-  "tags": [
-    "legacy:class:Messaging",
-    "legacy:type:spam"
-  ]
-}
-```
-
-## Migration Strategies
-
-### Strategy 1: Gradual Migration (Recommended)
-
-1. **Keep sending v3 reports** - Parser handles them automatically
-2. **Monitor deprecation warnings** - Track which systems need updating
-3. **Update report generators** - Migrate to v4 format over time
-4. **Remove v3 support eventually** - Once all sources upgraded
-
-```python
-# Phase 1: Auto-conversion (current)
-parser = XARFParser()
-report = parser.parse(v3_or_v4_json)  # Works with both
-
-# Phase 2: Track what needs migration
-import warnings
-warnings.simplefilter("always", XARFv3DeprecationWarning)
-# Monitor warnings to find v3 sources
-
-# Phase 3: Update report generators to v4
-# ... upgrade systems ...
-
-# Phase 4: Eventually require v4 only
-# (Future version may remove v3 support)
-```
-
-### Strategy 2: Explicit Conversion
-
-If you want to convert and store v4 reports:
-
-```python
-from xarf import convert_v3_to_v4, is_v3_report
-import json
-
-# Check if report is v3
-data = json.loads(report_json)
-if is_v3_report(data):
-    # Convert to v4 and store
-    v4_data = convert_v3_to_v4(data)
-    v4_json = json.dumps(v4_data)
-    # Store v4_json instead of original
-```
-
-### Strategy 3: Validation Mode
-
-Validate that conversions work correctly:
-
-```python
-from xarf import XARFParser, is_v3_report, convert_v3_to_v4
-
-parser = XARFParser(strict=True)
-
-if is_v3_report(data):
-    v4_data = convert_v3_to_v4(data)
-    # Validate converted report
-    report = parser.parse(v4_data)
-    errors = parser.get_errors()
-    if errors:
-        print(f"Conversion issues: {errors}")
-```
-
-## Breaking Changes from v3
-
-While the parser handles conversion automatically, be aware of these conceptual changes:
-
-### 1. Class → Category Rename
-
-v3 used `ReportClass`, v4 uses `category`:
-- More accurate terminology
-- Avoids confusion with programming classes
-
-### 2. Reporter Structure
-
-v3 had flat `ReporterInfo`, v4 has structured `reporter` object:
-
-```python
-# v3
-"ReporterInfo": {
-  "ReporterOrg": "...",
-  "ReporterOrgEmail": "...",
-  "ReporterContactEmail": "...",
-  "ReporterContactName": "..."
-}
-
-# v4
-"reporter": {
-  "org": "...",
-  "contact": "...",
-  "type": "automated|manual|hybrid"
-}
-```
-
-### 3. Required Fields
-
-v4 has stricter requirements:
-- `report_id` (UUID v4) is required
-- `evidence_source` is required
-- `reporter.type` is required
-
-### 4. Evidence Format
-
-Renamed for clarity:
-- `Attachment` → `evidence`
-- `Data` → `payload`
-
-## Pydantic V2 Migration
-
-The parser has been updated to Pydantic V2. If you use the models directly:
-
-### Old (Pydantic V1)
-
-```python
-from xarf.models import XARFReport
-
-class Config:
-    allow_population_by_field_name = True
-    extra = "allow"
-```
-
-### New (Pydantic V2)
-
-```python
-from xarf.models import XARFReport
-from pydantic import ConfigDict
-
-model_config = ConfigDict(
-    populate_by_name=True,
-    extra="allow"
-)
-```
-
-Validators also changed:
-
-```python
-# Old
-@validator("category")
-def validate_category(cls, v):
-    return v
-
-# New
-@field_validator("category")
-@classmethod
-def validate_category(cls, v: str) -> str:
-    return v
-```
-
-## Testing Your Migration
-
-### Test with Sample v3 Reports
-
-```python
-import pytest
-from xarf import XARFParser
-
-def test_my_v3_reports():
-    """Test that our v3 reports convert correctly."""
-    parser = XARFParser()
-
-    # Your actual v3 report
-    v3_json = load_sample_v3_report()
-
-    # Should parse without errors
-    report = parser.parse(v3_json)
-
-    # Verify expected values
-    assert report.category == "messaging"
-    assert report.type == "spam"
-    # ... test your specific fields
-```
-
-### Validate All Historical Reports
-
-```python
-from xarf import XARFParser
-import json
-import glob
-
-parser = XARFParser(strict=False)
-failed = []
-
-for report_file in glob.glob("reports/*.json"):
-    with open(report_file) as f:
-        try:
-            data = json.load(f)
-            report = parser.parse(data)
-            if parser.get_errors():
-                failed.append((report_file, parser.get_errors()))
-        except Exception as e:
-            failed.append((report_file, str(e)))
-
-if failed:
-    print(f"Failed to parse {len(failed)} reports:")
-    for filename, error in failed:
-        print(f"  {filename}: {error}")
-```
-
-## Support and Questions
-
-- **Specification**: See [XARF v4 Specification](https://github.com/xarf/xarf-spec)
-- **Issues**: Report bugs at [GitHub Issues](https://github.com/xarf/xarf-parser-python/issues)
-- **Community**: Join discussions at [XARF Discussions](https://github.com/xarf/xarf-spec/discussions)
-
-## Timeline
-
-- **Now (v4.0.0-alpha)**: Full v3 compatibility, deprecation warnings
-- **v4.0.0-beta**: Enhanced validation, migration tools
-- **v4.0.0-stable**: Production-ready, v3 support continues
-- **v5.0.0 (future)**: v3 support may be removed (with advance notice)
-
-We recommend migrating to v4 format when convenient, but there's no rush - v3 support will continue for the foreseeable future.
diff --git a/xarf/py.typed b/xarf/py.typed
new file mode 100644
index 0000000..e69de29

From 2998caaa7d2639db9b0fc2e7553fa6492770ba30 Mon Sep 17 00:00:00 2001
From: Victor Lopez <victor@thevictorlopez.com>
Date: Tue, 31 Mar 2026 18:36:34 +0200
Subject: [PATCH 12/13] Set version to 1.0.0

---
 CHANGELOG.md     | 6 +++---
 xarf/__init__.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5e11453..ddc24c4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
-## [0.1.0] - 2026-03-31
+## [1.0.0] - 2026-03-31
 
 This release is a complete rework of the alpha (`v4.0.0a1`). No backward compatibility with the alpha API is provided. The version numbers will now be independent from the spec to provide release independence for the library.
 
@@ -39,5 +39,5 @@ This release is a complete rework of the alpha (`v4.0.0a1`). No backward compati
 - **`v3_compat.py`**: aligned type mappings exactly with the JS reference implementation (8 types, PascalCase + lowercase variants for each).
 - **`models.py`**: replaced with result dataclasses (`ParseResult`, `CreateReportResult`, `ValidationError`, `ValidationWarning`) and base Pydantic models (`XARFReport`, `XARFEvidence`, `ContactInfo`).
 
-[Unreleased]: https://github.com/xarf/xarf-python/compare/v0.1.0...HEAD
-[0.1.0]: https://github.com/xarf/xarf-python/releases/tag/v0.1.0
+[Unreleased]: https://github.com/xarf/xarf-python/compare/v1.0.0...HEAD
+[0.1.0]: https://github.com/xarf/xarf-python/releases/tag/v1.0.0
diff --git a/xarf/__init__.py b/xarf/__init__.py
index 2fb0db5..295b542 100644
--- a/xarf/__init__.py
+++ b/xarf/__init__.py
@@ -126,7 +126,7 @@
 )
 from xarf.validator import ValidationResult
 
-__version__ = "0.1.0.dev0"
+__version__ = "1.0.0"
 __author__ = "XARF Project"
 __email__ = "contact@xarf.org"
 

From 1a79030a1f35aeeb976bb2c837f340476feaa131 Mon Sep 17 00:00:00 2001
From: Victor Lopez <victor@thevictorlopez.com>
Date: Tue, 31 Mar 2026 18:40:43 +0200
Subject: [PATCH 13/13] Bundle 4.2.0 schemas

---
 .gitignore                                    |   3 -
 xarf/schemas/.version                         |   5 +
 xarf/schemas/types/connection-ddos.json       | 190 ++++++
 .../types/connection-infected-host.json       | 195 ++++++
 .../types/connection-login-attack.json        |  85 +++
 xarf/schemas/types/connection-port-scan.json  |  85 +++
 .../types/connection-reconnaissance.json      | 203 ++++++
 xarf/schemas/types/connection-scraping.json   | 181 +++++
 .../types/connection-sql-injection.json       | 153 +++++
 .../types/connection-vulnerability-scan.json  | 169 +++++
 xarf/schemas/types/content-base.json          | 243 +++++++
 .../types/content-brand_infringement.json     | 164 +++++
 xarf/schemas/types/content-csam.json          | 127 ++++
 xarf/schemas/types/content-csem.json          | 170 +++++
 xarf/schemas/types/content-exposed-data.json  | 210 ++++++
 xarf/schemas/types/content-fraud.json         | 149 ++++
 xarf/schemas/types/content-malware.json       | 263 ++++++++
 xarf/schemas/types/content-phishing.json      | 141 ++++
 .../types/content-remote_compromise.json      | 240 +++++++
 .../content-suspicious_registration.json      | 230 +++++++
 xarf/schemas/types/copyright-copyright.json   |  76 +++
 xarf/schemas/types/copyright-cyberlocker.json | 223 ++++++
 xarf/schemas/types/copyright-link-site.json   | 269 ++++++++
 xarf/schemas/types/copyright-p2p.json         | 221 ++++++
 .../schemas/types/copyright-ugc-platform.json | 287 ++++++++
 xarf/schemas/types/copyright-usenet.json      | 281 ++++++++
 xarf/schemas/types/infrastructure-botnet.json |  88 +++
 .../infrastructure-compromised-server.json    |  29 +
 .../types/messaging-bulk-messaging.json       | 142 ++++
 xarf/schemas/types/messaging-spam.json        | 202 ++++++
 xarf/schemas/types/reputation-blocklist.json  |  29 +
 .../types/reputation-threat-intelligence.json |  29 +
 xarf/schemas/types/vulnerability-cve.json     | 276 ++++++++
 .../types/vulnerability-misconfiguration.json |  29 +
 .../types/vulnerability-open-service.json     |  29 +
 xarf/schemas/xarf-core.json                   | 310 +++++++++
 xarf/schemas/xarf-v4-master.json              | 638 ++++++++++++++++++
 37 files changed, 6361 insertions(+), 3 deletions(-)
 create mode 100644 xarf/schemas/.version
 create mode 100644 xarf/schemas/types/connection-ddos.json
 create mode 100644 xarf/schemas/types/connection-infected-host.json
 create mode 100644 xarf/schemas/types/connection-login-attack.json
 create mode 100644 xarf/schemas/types/connection-port-scan.json
 create mode 100644 xarf/schemas/types/connection-reconnaissance.json
 create mode 100644 xarf/schemas/types/connection-scraping.json
 create mode 100644 xarf/schemas/types/connection-sql-injection.json
 create mode 100644 xarf/schemas/types/connection-vulnerability-scan.json
 create mode 100644 xarf/schemas/types/content-base.json
 create mode 100644 xarf/schemas/types/content-brand_infringement.json
 create mode 100644 xarf/schemas/types/content-csam.json
 create mode 100644 xarf/schemas/types/content-csem.json
 create mode 100644 xarf/schemas/types/content-exposed-data.json
 create mode 100644 xarf/schemas/types/content-fraud.json
 create mode 100644 xarf/schemas/types/content-malware.json
 create mode 100644 xarf/schemas/types/content-phishing.json
 create mode 100644 xarf/schemas/types/content-remote_compromise.json
 create mode 100644 xarf/schemas/types/content-suspicious_registration.json
 create mode 100644 xarf/schemas/types/copyright-copyright.json
 create mode 100644 xarf/schemas/types/copyright-cyberlocker.json
 create mode 100644 xarf/schemas/types/copyright-link-site.json
 create mode 100644 xarf/schemas/types/copyright-p2p.json
 create mode 100644 xarf/schemas/types/copyright-ugc-platform.json
 create mode 100644 xarf/schemas/types/copyright-usenet.json
 create mode 100644 xarf/schemas/types/infrastructure-botnet.json
 create mode 100644 xarf/schemas/types/infrastructure-compromised-server.json
 create mode 100644 xarf/schemas/types/messaging-bulk-messaging.json
 create mode 100644 xarf/schemas/types/messaging-spam.json
 create mode 100644 xarf/schemas/types/reputation-blocklist.json
 create mode 100644 xarf/schemas/types/reputation-threat-intelligence.json
 create mode 100644 xarf/schemas/types/vulnerability-cve.json
 create mode 100644 xarf/schemas/types/vulnerability-misconfiguration.json
 create mode 100644 xarf/schemas/types/vulnerability-open-service.json
 create mode 100644 xarf/schemas/xarf-core.json
 create mode 100644 xarf/schemas/xarf-v4-master.json

diff --git a/.gitignore b/.gitignore
index 4a5582e..09d2f5a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,9 +24,6 @@ share/python-wheels/
 *.egg
 MANIFEST
 
-# Fetched XARF schemas
-xarf/schemas/
-
 # Virtual environments
 .env
 .venv
diff --git a/xarf/schemas/.version b/xarf/schemas/.version
new file mode 100644
index 0000000..a184846
--- /dev/null
+++ b/xarf/schemas/.version
@@ -0,0 +1,5 @@
+{
+  "version": "v4.2.0",
+  "fetched_at": "2026-03-31T16:39:51.627945+00:00",
+  "source": "https://github.com/xarf/xarf-spec/tree/v4.2.0"
+}
diff --git a/xarf/schemas/types/connection-ddos.json b/xarf/schemas/types/connection-ddos.json
new file mode 100644
index 0000000..a1c6ef9
--- /dev/null
+++ b/xarf/schemas/types/connection-ddos.json
@@ -0,0 +1,190 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/connection-ddos.json",
+	"title": "XARF v4 Connection - DDoS Type Schema",
+	"description": "Schema for Distributed Denial of Service attack reports including volumetric attacks (SYN floods, UDP floods, HTTP floods) and amplification/reflection attacks (DNS, NTP, memcached, SSDP)",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "connection"
+				},
+				"type": {
+					"const": "ddos"
+				},
+				"evidence_source": {
+					"type": "string",
+					"enum": [
+						"firewall_logs",
+						"ids_detection",
+						"flow_analysis",
+						"traffic_monitoring",
+						"honeypot"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Source of DDoS attack evidence"
+				},
+				"destination_ip": {
+					"type": "string",
+					"anyOf": [
+						{
+							"format": "ipv4"
+						},
+						{
+							"format": "ipv6"
+						}
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target IP address of the DDoS attack"
+				},
+				"destination_port": {
+					"type": "integer",
+					"minimum": 1,
+					"maximum": 65535,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target port number",
+					"examples": [
+						80,
+						443,
+						53,
+						25
+					]
+				},
+				"protocol": {
+					"type": "string",
+					"enum": [
+						"tcp",
+						"udp",
+						"icmp",
+						"sctp"
+					],
+					"description": "REQUIRED: Network protocol used in the attack"
+				},
+				"attack_vector": {
+					"type": "string",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Specific DDoS attack method",
+					"examples": [
+						"syn_flood",
+						"udp_flood",
+						"icmp_flood",
+						"http_flood",
+						"dns_amplification",
+						"ntp_amplification",
+						"memcached_amplification"
+					]
+				},
+				"peak_pps": {
+					"type": "integer",
+					"minimum": 1,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Peak packets per second during attack"
+				},
+				"peak_bps": {
+					"type": "integer",
+					"minimum": 1,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Peak bits per second during attack"
+				},
+				"duration_seconds": {
+					"type": "integer",
+					"minimum": 1,
+					"description": "OPTIONAL: Duration of the DDoS attack in seconds"
+				},
+				"amplification_factor": {
+					"type": "number",
+					"minimum": 1.0,
+					"description": "OPTIONAL: Amplification factor for reflection attacks"
+				},
+				"first_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "REQUIRED: When DDoS attack was first observed"
+				},
+				"last_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "OPTIONAL: When DDoS attack was last observed"
+				},
+				"threshold_exceeded": {
+					"type": "string",
+					"format": "date-time",
+					"description": "OPTIONAL: When detection threshold was exceeded"
+				},
+				"mitigation_applied": {
+					"type": "boolean",
+					"description": "OPTIONAL: Whether mitigation measures were applied"
+				},
+				"service_impact": {
+					"type": "string",
+					"enum": [
+						"none",
+						"degraded",
+						"unavailable"
+					],
+					"description": "OPTIONAL: Impact on target service availability"
+				}
+			},
+			"required": [
+				"protocol",
+				"first_seen"
+			],
+			"if": {
+				"properties": {
+					"source_identifier": {
+						"anyOf": [
+							{
+								"format": "ipv4"
+							},
+							{
+								"format": "ipv6"
+							}
+						]
+					}
+				}
+			},
+			"then": {
+				"required": [
+					"source_port"
+				]
+			}
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "789a0123-b456-78c9-d012-345678901234",
+			"timestamp": "2024-01-15T16:55:42Z",
+			"reporter": {
+				"org": "DDoS Protection Service",
+				"contact": "ddos@ddos-monitor.example",
+				"domain": "ddos-monitor.example"
+			},
+			"sender": {
+				"org": "DDoS Protection Service",
+				"contact": "ddos@ddos-monitor.example",
+				"domain": "ddos-monitor.example"
+			},
+			"source_identifier": "192.0.2.155",
+			"category": "connection",
+			"type": "ddos",
+			"destination_ip": "203.0.113.100",
+			"destination_port": 80,
+			"protocol": "tcp",
+			"attack_vector": "syn_flood",
+			"peak_pps": 250000,
+			"peak_bps": 1200000000,
+			"duration_seconds": 2700,
+			"evidence_source": "flow_analysis",
+			"service_impact": "degraded",
+			"tags": [
+				"attack:syn_flood",
+				"volume:high"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/connection-infected-host.json b/xarf/schemas/types/connection-infected-host.json
new file mode 100644
index 0000000..e90cfa3
--- /dev/null
+++ b/xarf/schemas/types/connection-infected-host.json
@@ -0,0 +1,195 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/connection-infected-host.json",
+	"title": "XARF v4 Connection - Infected Host Type Schema",
+	"description": "Schema for compromised systems participating in botnets or being remotely controlled for malicious activities (DDoS, spam distribution, click fraud, cryptocurrency mining, credential stuffing)",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "connection"
+				},
+				"type": {
+					"const": "infected_host"
+				},
+				"destination_ip": {
+					"type": "string",
+					"anyOf": [
+						{
+							"format": "ipv4"
+						},
+						{
+							"format": "ipv6"
+						}
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target IP address"
+				},
+				"destination_port": {
+					"type": "integer",
+					"minimum": 1,
+					"maximum": 65535,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target port number"
+				},
+				"protocol": {
+					"type": "string",
+					"enum": [
+						"tcp",
+						"udp"
+					],
+					"default": "tcp",
+					"description": "REQUIRED: Network protocol used"
+				},
+				"bot_type": {
+					"type": "string",
+					"enum": [
+						"search_engine",
+						"ai_agent",
+						"monitoring",
+						"seo_analyzer",
+						"link_checker",
+						"feed_reader",
+						"social_media",
+						"advertising",
+						"malicious",
+						"unknown"
+					],
+					"description": "REQUIRED: Classification of bot type"
+				},
+				"bot_name": {
+					"type": "string",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Identified bot name or signature",
+					"examples": [
+						"Googlebot",
+						"GPTBot",
+						"ChatGPT-User",
+						"Claude-Web",
+						"FacebookBot",
+						"TwitterBot",
+						"UptimeRobot",
+						"PingdomBot"
+					]
+				},
+				"user_agent": {
+					"type": "string",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Full User-Agent string"
+				},
+				"behavior_pattern": {
+					"type": "string",
+					"enum": [
+						"legitimate_crawling",
+						"aggressive_crawling",
+						"api_abuse",
+						"form_submission",
+						"comment_spam",
+						"account_creation",
+						"content_harvesting",
+						"vulnerability_probing",
+						"mixed"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Observed behavior pattern"
+				},
+				"request_rate": {
+					"type": "number",
+					"description": "OPTIONAL: Average requests per second"
+				},
+				"total_requests": {
+					"type": "integer",
+					"minimum": 1,
+					"description": "OPTIONAL: Total number of requests"
+				},
+				"respects_robots_txt": {
+					"type": "boolean",
+					"description": "OPTIONAL: Whether bot respects robots.txt directives"
+				},
+				"follows_crawl_delay": {
+					"type": "boolean",
+					"description": "OPTIONAL: Whether bot follows crawl-delay directive"
+				},
+				"javascript_execution": {
+					"type": "boolean",
+					"description": "OPTIONAL: Whether bot executes JavaScript"
+				},
+				"accepts_cookies": {
+					"type": "boolean",
+					"description": "OPTIONAL: Whether bot accepts and maintains cookies"
+				},
+				"api_endpoints_accessed": {
+					"type": "array",
+					"items": {
+						"type": "string"
+					},
+					"description": "OPTIONAL: List of API endpoints accessed"
+				},
+				"verification_status": {
+					"type": "string",
+					"enum": [
+						"verified",
+						"unverified",
+						"spoofed",
+						"unknown"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Whether bot identity has been verified"
+				},
+				"first_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "REQUIRED: When bot activity was first observed"
+				},
+				"last_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "OPTIONAL: When bot activity was last observed"
+				}
+			},
+			"required": [
+				"protocol",
+				"bot_type",
+				"first_seen"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "d5e6f7a8-b9c0-1234-defa-123456789012",
+			"timestamp": "2025-01-15T11:30:00Z",
+			"reporter": {
+				"org": "Bot Detection Service",
+				"contact": "botreport@example.com",
+				"domain": "example.com"
+			},
+			"sender": {
+				"org": "Bot Detection Service",
+				"contact": "botreport@example.com",
+				"domain": "example.com"
+			},
+			"source_identifier": "203.0.113.42",
+			"category": "connection",
+			"type": "infected_host",
+			"destination_ip": "198.51.100.80",
+			"destination_port": 443,
+			"protocol": "tcp",
+			"bot_type": "ai_agent",
+			"bot_name": "GPTBot",
+			"user_agent": "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; GPTBot/1.0)",
+			"behavior_pattern": "legitimate_crawling",
+			"request_rate": 2.5,
+			"total_requests": 150,
+			"respects_robots_txt": true,
+			"follows_crawl_delay": true,
+			"verification_status": "verified",
+			"first_seen": "2025-01-15T11:00:00Z",
+			"last_seen": "2025-01-15T11:28:00Z"
+		}
+	]
+}
diff --git a/xarf/schemas/types/connection-login-attack.json b/xarf/schemas/types/connection-login-attack.json
new file mode 100644
index 0000000..f65813f
--- /dev/null
+++ b/xarf/schemas/types/connection-login-attack.json
@@ -0,0 +1,85 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/connection-login-attack.json",
+	"title": "XARF v4 Connection - Login Attack Type Schema",
+	"description": "Schema for brute force login attempts, credential stuffing campaigns, password spraying attacks, and repeated authentication failures against authentication systems (SSH, RDP, web logins, API authentication)",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "connection"
+				},
+				"type": {
+					"const": "login_attack"
+				},
+				"destination_ip": {
+					"type": "string",
+					"anyOf": [
+						{
+							"format": "ipv4"
+						},
+						{
+							"format": "ipv6"
+						}
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target IP address of the login attack"
+				},
+				"destination_port": {
+					"type": "integer",
+					"minimum": 1,
+					"maximum": 65535,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target port number"
+				},
+				"protocol": {
+					"type": "string",
+					"enum": [
+						"tcp",
+						"udp",
+						"icmp",
+						"sctp"
+					],
+					"description": "REQUIRED: Network protocol used in the attack"
+				},
+				"first_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "REQUIRED: When attack activity was first observed"
+				},
+				"last_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "OPTIONAL: When attack activity was last observed"
+				}
+			},
+			"required": [
+				"protocol",
+				"first_seen"
+			],
+			"if": {
+				"properties": {
+					"source_identifier": {
+						"anyOf": [
+							{
+								"format": "ipv4"
+							},
+							{
+								"format": "ipv6"
+							}
+						]
+					}
+				}
+			},
+			"then": {
+				"required": [
+					"source_port"
+				]
+			}
+		}
+	]
+}
diff --git a/xarf/schemas/types/connection-port-scan.json b/xarf/schemas/types/connection-port-scan.json
new file mode 100644
index 0000000..054fedb
--- /dev/null
+++ b/xarf/schemas/types/connection-port-scan.json
@@ -0,0 +1,85 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/connection-port-scan.json",
+	"title": "XARF v4 Connection - Port Scan Type Schema",
+	"description": "Schema for Network port scanning and reconnaissance activities",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "connection"
+				},
+				"type": {
+					"const": "port_scan"
+				},
+				"destination_ip": {
+					"type": "string",
+					"anyOf": [
+						{
+							"format": "ipv4"
+						},
+						{
+							"format": "ipv6"
+						}
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target IP address of the port scan"
+				},
+				"destination_port": {
+					"type": "integer",
+					"minimum": 1,
+					"maximum": 65535,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target port number"
+				},
+				"protocol": {
+					"type": "string",
+					"enum": [
+						"tcp",
+						"udp",
+						"icmp",
+						"sctp"
+					],
+					"description": "REQUIRED: Network protocol used in the attack"
+				},
+				"first_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "REQUIRED: When attack activity was first observed"
+				},
+				"last_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "OPTIONAL: When attack activity was last observed"
+				}
+			},
+			"required": [
+				"protocol",
+				"first_seen"
+			],
+			"if": {
+				"properties": {
+					"source_identifier": {
+						"anyOf": [
+							{
+								"format": "ipv4"
+							},
+							{
+								"format": "ipv6"
+							}
+						]
+					}
+				}
+			},
+			"then": {
+				"required": [
+					"source_port"
+				]
+			}
+		}
+	]
+}
diff --git a/xarf/schemas/types/connection-reconnaissance.json b/xarf/schemas/types/connection-reconnaissance.json
new file mode 100644
index 0000000..2280bf0
--- /dev/null
+++ b/xarf/schemas/types/connection-reconnaissance.json
@@ -0,0 +1,203 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/connection-reconnaissance.json",
+	"title": "XARF v4 Connection - Reconnaissance Type Schema",
+	"description": "Schema for reconnaissance and probing activities (e.g., .env, .git, .htaccess files)",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "connection"
+				},
+				"type": {
+					"const": "reconnaissance"
+				},
+				"destination_ip": {
+					"type": "string",
+					"anyOf": [
+						{
+							"format": "ipv4"
+						},
+						{
+							"format": "ipv6"
+						}
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target IP address"
+				},
+				"destination_port": {
+					"type": "integer",
+					"minimum": 1,
+					"maximum": 65535,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target port number"
+				},
+				"protocol": {
+					"type": "string",
+					"enum": [
+						"tcp",
+						"udp"
+					],
+					"default": "tcp",
+					"description": "REQUIRED: Network protocol used"
+				},
+				"probed_resources": {
+					"type": "array",
+					"items": {
+						"type": "string"
+					},
+					"description": "REQUIRED: List of resources that were probed",
+					"examples": [
+						[
+							"/.env",
+							"/.git/config",
+							"/.htaccess",
+							"/wp-config.php.bak",
+							"/config.json",
+							"/.aws/credentials",
+							"/.docker/config.json",
+							"/admin/.htpasswd"
+						]
+					]
+				},
+				"resource_categories": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"environment_files",
+							"version_control",
+							"configuration_files",
+							"backup_files",
+							"admin_panels",
+							"database_files",
+							"log_files",
+							"credential_files",
+							"api_endpoints",
+							"debug_endpoints",
+							"other"
+						]
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Categories of resources being probed"
+				},
+				"http_methods": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"GET",
+							"POST",
+							"HEAD",
+							"OPTIONS",
+							"PUT",
+							"DELETE",
+							"TRACE",
+							"CONNECT"
+						]
+					},
+					"description": "OPTIONAL: HTTP methods used in reconnaissance"
+				},
+				"response_codes": {
+					"type": "array",
+					"items": {
+						"type": "integer"
+					},
+					"description": "OPTIONAL: HTTP response codes received"
+				},
+				"successful_probes": {
+					"type": "array",
+					"items": {
+						"type": "string"
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Resources that returned success responses (200, 301, 302)"
+				},
+				"user_agent": {
+					"type": "string",
+					"description": "OPTIONAL: User-Agent string used"
+				},
+				"first_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "REQUIRED: When reconnaissance activity was first observed"
+				},
+				"last_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "OPTIONAL: When reconnaissance activity was last observed"
+				},
+				"total_probes": {
+					"type": "integer",
+					"minimum": 1,
+					"description": "OPTIONAL: Total number of probe attempts"
+				},
+				"automated_tool": {
+					"type": "boolean",
+					"description": "OPTIONAL: Whether activity appears to be from an automated tool"
+				}
+			},
+			"required": [
+				"protocol",
+				"probed_resources",
+				"first_seen"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "e6f7a8b9-c0d1-2345-efab-234567890123",
+			"timestamp": "2025-01-15T16:45:00Z",
+			"reporter": {
+				"org": "Web Security Service",
+				"contact": "security@webhost.example",
+				"domain": "webhost.example"
+			},
+			"sender": {
+				"org": "Web Security Service",
+				"contact": "security@webhost.example",
+				"domain": "webhost.example"
+			},
+			"source_identifier": "192.0.2.99",
+			"category": "connection",
+			"type": "reconnaissance",
+			"destination_ip": "198.51.100.75",
+			"destination_port": 443,
+			"protocol": "tcp",
+			"probed_resources": [
+				"/.env",
+				"/.git/config",
+				"/.aws/credentials",
+				"/wp-config.php.bak",
+				"/admin/.htpasswd"
+			],
+			"resource_categories": [
+				"environment_files",
+				"version_control",
+				"credential_files",
+				"backup_files"
+			],
+			"http_methods": [
+				"GET",
+				"HEAD"
+			],
+			"response_codes": [
+				404,
+				403,
+				200
+			],
+			"successful_probes": [
+				"/.git/config"
+			],
+			"automated_tool": true,
+			"total_probes": 47,
+			"first_seen": "2025-01-15T16:30:00Z",
+			"last_seen": "2025-01-15T16:44:00Z"
+		}
+	]
+}
diff --git a/xarf/schemas/types/connection-scraping.json b/xarf/schemas/types/connection-scraping.json
new file mode 100644
index 0000000..9a586fc
--- /dev/null
+++ b/xarf/schemas/types/connection-scraping.json
@@ -0,0 +1,181 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/connection-scraping.json",
+	"title": "XARF v4 Connection - Scraping Type Schema",
+	"description": "Schema for web crawling and scraping activities",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "connection"
+				},
+				"type": {
+					"const": "scraping"
+				},
+				"destination_ip": {
+					"type": "string",
+					"anyOf": [
+						{
+							"format": "ipv4"
+						},
+						{
+							"format": "ipv6"
+						}
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target IP address being scraped"
+				},
+				"destination_port": {
+					"type": "integer",
+					"minimum": 1,
+					"maximum": 65535,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target port number (typically 80 or 443)"
+				},
+				"protocol": {
+					"type": "string",
+					"enum": [
+						"tcp",
+						"udp"
+					],
+					"default": "tcp",
+					"description": "REQUIRED: Network protocol used"
+				},
+				"scraping_pattern": {
+					"type": "string",
+					"enum": [
+						"sequential",
+						"random",
+						"targeted",
+						"sitemap_following",
+						"api_harvesting",
+						"deep_crawling",
+						"breadth_first",
+						"depth_first"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Pattern of scraping behavior observed"
+				},
+				"target_content": {
+					"type": "string",
+					"enum": [
+						"product_data",
+						"pricing_information",
+						"user_profiles",
+						"contact_information",
+						"news_articles",
+						"images",
+						"documents",
+						"api_data",
+						"search_results",
+						"general_content",
+						"other"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Type of content being scraped"
+				},
+				"user_agent": {
+					"type": "string",
+					"x-recommended": true,
+					"description": "RECOMMENDED: User-Agent string used by the scraper"
+				},
+				"bot_signature": {
+					"type": "string",
+					"description": "OPTIONAL: Known bot or scraper signature if identified",
+					"examples": [
+						"Googlebot",
+						"Bingbot",
+						"AhrefsBot",
+						"SemrushBot",
+						"MJ12bot",
+						"DotBot",
+						"Custom Python Script",
+						"Scrapy"
+					]
+				},
+				"request_rate": {
+					"type": "number",
+					"description": "OPTIONAL: Average requests per second"
+				},
+				"total_requests": {
+					"type": "integer",
+					"minimum": 1,
+					"description": "REQUIRED: Total number of requests made"
+				},
+				"unique_urls": {
+					"type": "integer",
+					"minimum": 1,
+					"description": "OPTIONAL: Number of unique URLs accessed"
+				},
+				"data_volume": {
+					"type": "integer",
+					"description": "OPTIONAL: Total bytes of data transferred"
+				},
+				"respects_robots_txt": {
+					"type": "boolean",
+					"description": "OPTIONAL: Whether the scraper respects robots.txt"
+				},
+				"session_duration": {
+					"type": "integer",
+					"description": "OPTIONAL: Duration of scraping session in seconds"
+				},
+				"concurrent_connections": {
+					"type": "integer",
+					"description": "OPTIONAL: Maximum concurrent connections observed"
+				},
+				"first_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "REQUIRED: When scraping activity was first observed"
+				},
+				"last_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "OPTIONAL: When scraping activity was last observed"
+				}
+			},
+			"required": [
+				"protocol",
+				"first_seen",
+				"total_requests"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "a2b3c4d5-e6f7-8901-abcd-ef1234567890",
+			"timestamp": "2025-01-15T14:00:00Z",
+			"reporter": {
+				"org": "Website Protection Service",
+				"contact": "abuse@hosting.example",
+				"domain": "hosting.example"
+			},
+			"sender": {
+				"org": "Website Protection Service",
+				"contact": "abuse@hosting.example",
+				"domain": "hosting.example"
+			},
+			"source_identifier": "192.0.2.150",
+			"category": "connection",
+			"type": "scraping",
+			"destination_ip": "198.51.100.25",
+			"destination_port": 443,
+			"protocol": "tcp",
+			"scraping_pattern": "deep_crawling",
+			"target_content": "product_data",
+			"user_agent": "Mozilla/5.0 (compatible; DataBot/1.0)",
+			"request_rate": 15.5,
+			"total_requests": 45000,
+			"unique_urls": 3500,
+			"respects_robots_txt": false,
+			"concurrent_connections": 25,
+			"first_seen": "2025-01-15T10:00:00Z",
+			"last_seen": "2025-01-15T13:45:00Z"
+		}
+	]
+}
diff --git a/xarf/schemas/types/connection-sql-injection.json b/xarf/schemas/types/connection-sql-injection.json
new file mode 100644
index 0000000..622986c
--- /dev/null
+++ b/xarf/schemas/types/connection-sql-injection.json
@@ -0,0 +1,153 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/connection-sql-injection.json",
+	"title": "XARF v4 Connection - SQL Injection Type Schema",
+	"description": "Schema for SQL injection attack attempts",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "connection"
+				},
+				"type": {
+					"const": "sql_injection"
+				},
+				"destination_ip": {
+					"type": "string",
+					"anyOf": [
+						{
+							"format": "ipv4"
+						},
+						{
+							"format": "ipv6"
+						}
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target IP address"
+				},
+				"destination_port": {
+					"type": "integer",
+					"minimum": 1,
+					"maximum": 65535,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target port number (typically 80, 443)"
+				},
+				"protocol": {
+					"type": "string",
+					"enum": [
+						"tcp",
+						"udp"
+					],
+					"default": "tcp",
+					"description": "REQUIRED: Network protocol used"
+				},
+				"http_method": {
+					"type": "string",
+					"enum": [
+						"GET",
+						"POST",
+						"PUT",
+						"DELETE",
+						"PATCH",
+						"HEAD",
+						"OPTIONS"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: HTTP method used in the attack"
+				},
+				"target_url": {
+					"type": "string",
+					"format": "uri",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Full URL that was targeted"
+				},
+				"injection_point": {
+					"type": "string",
+					"enum": [
+						"query_parameter",
+						"post_body",
+						"cookie",
+						"header",
+						"path",
+						"json_parameter"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Where the SQL injection was attempted"
+				},
+				"payload_sample": {
+					"type": "string",
+					"description": "OPTIONAL: Sample of the SQL injection payload (sanitized)",
+					"maxLength": 1000
+				},
+				"attack_technique": {
+					"type": "string",
+					"enum": [
+						"union_based",
+						"error_based",
+						"boolean_blind",
+						"time_blind",
+						"stacked_queries",
+						"out_of_band",
+						"second_order",
+						"other"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: SQL injection technique used"
+				},
+				"first_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "REQUIRED: When attack activity was first observed"
+				},
+				"last_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "OPTIONAL: When attack activity was last observed"
+				},
+				"attempts_count": {
+					"type": "integer",
+					"minimum": 1,
+					"description": "OPTIONAL: Number of injection attempts observed"
+				}
+			},
+			"required": [
+				"protocol",
+				"first_seen"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "b3c4d5e6-f7a8-9012-bcde-f01234567890",
+			"timestamp": "2025-01-15T12:00:00Z",
+			"reporter": {
+				"org": "Web Application Firewall",
+				"contact": "security@example.com",
+				"domain": "example.com"
+			},
+			"sender": {
+				"org": "Web Application Firewall",
+				"contact": "security@example.com",
+				"domain": "example.com"
+			},
+			"source_identifier": "192.0.2.45",
+			"category": "connection",
+			"type": "sql_injection",
+			"destination_ip": "198.51.100.10",
+			"destination_port": 443,
+			"protocol": "tcp",
+			"http_method": "GET",
+			"target_url": "https://example.com/products.php?id=1",
+			"injection_point": "query_parameter",
+			"attack_technique": "union_based",
+			"attempts_count": 15,
+			"first_seen": "2025-01-15T11:45:00Z",
+			"last_seen": "2025-01-15T12:00:00Z"
+		}
+	]
+}
diff --git a/xarf/schemas/types/connection-vulnerability-scan.json b/xarf/schemas/types/connection-vulnerability-scan.json
new file mode 100644
index 0000000..0ac4d89
--- /dev/null
+++ b/xarf/schemas/types/connection-vulnerability-scan.json
@@ -0,0 +1,169 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/connection-vulnerability-scan.json",
+	"title": "XARF v4 Connection - Vulnerability Scan Type Schema",
+	"description": "Schema for vulnerability scanning and automated exploit attempt activities (Nmap, Masscan, Nikto, OpenVAS, web vulnerability scanners)",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "connection"
+				},
+				"type": {
+					"const": "vulnerability_scan"
+				},
+				"destination_ip": {
+					"type": "string",
+					"anyOf": [
+						{
+							"format": "ipv4"
+						},
+						{
+							"format": "ipv6"
+						}
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Target IP address being scanned"
+				},
+				"scan_type": {
+					"type": "string",
+					"enum": [
+						"port_scan",
+						"vulnerability_scan",
+						"version_detection",
+						"os_fingerprinting",
+						"service_enumeration",
+						"web_vuln_scan",
+						"directory_brute_force",
+						"mixed"
+					],
+					"description": "REQUIRED: Type of scanning activity"
+				},
+				"scanner_signature": {
+					"type": "string",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Known scanner tool signature if identified",
+					"examples": [
+						"Nmap",
+						"Masscan",
+						"Nikto",
+						"OpenVAS",
+						"Nessus",
+						"Acunetix",
+						"Burp Scanner"
+					]
+				},
+				"targeted_ports": {
+					"type": "array",
+					"items": {
+						"type": "integer",
+						"minimum": 1,
+						"maximum": 65535
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: List of ports that were scanned"
+				},
+				"targeted_services": {
+					"type": "array",
+					"items": {
+						"type": "string"
+					},
+					"description": "OPTIONAL: Services that were targeted",
+					"examples": [
+						[
+							"http",
+							"https",
+							"ssh",
+							"ftp",
+							"mysql",
+							"postgresql",
+							"mongodb"
+						]
+					]
+				},
+				"vulnerabilities_probed": {
+					"type": "array",
+					"items": {
+						"type": "string"
+					},
+					"description": "OPTIONAL: Specific vulnerabilities or CVEs that were probed"
+				},
+				"scan_rate": {
+					"type": "number",
+					"description": "OPTIONAL: Requests per second if measurable"
+				},
+				"protocol": {
+					"type": "string",
+					"enum": [
+						"tcp",
+						"udp",
+						"icmp",
+						"mixed"
+					],
+					"description": "REQUIRED: Network protocol(s) used in scanning"
+				},
+				"first_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "REQUIRED: When scanning activity was first observed"
+				},
+				"last_seen": {
+					"type": "string",
+					"format": "date-time",
+					"description": "OPTIONAL: When scanning activity was last observed"
+				},
+				"total_requests": {
+					"type": "integer",
+					"minimum": 1,
+					"description": "OPTIONAL: Total number of scanning requests observed"
+				},
+				"user_agent": {
+					"type": "string",
+					"description": "OPTIONAL: User-Agent string if HTTP-based scanning"
+				}
+			},
+			"required": [
+				"scan_type",
+				"protocol",
+				"first_seen"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "c4d5e6f7-a8b9-0123-cdef-012345678901",
+			"timestamp": "2025-01-15T09:30:00Z",
+			"reporter": {
+				"org": "Network Security Monitor",
+				"contact": "noc@example.net",
+				"domain": "example.net"
+			},
+			"sender": {
+				"org": "Network Security Monitor",
+				"contact": "noc@example.net",
+				"domain": "example.net"
+			},
+			"source_identifier": "203.0.113.77",
+			"category": "connection",
+			"type": "vulnerability_scan",
+			"destination_ip": "198.51.100.50",
+			"scan_type": "web_vuln_scan",
+			"scanner_signature": "Nikto",
+			"targeted_ports": [
+				80,
+				443,
+				8080,
+				8443
+			],
+			"protocol": "tcp",
+			"total_requests": 1250,
+			"first_seen": "2025-01-15T09:15:00Z",
+			"last_seen": "2025-01-15T09:28:00Z"
+		}
+	]
+}
diff --git a/xarf/schemas/types/content-base.json b/xarf/schemas/types/content-base.json
new file mode 100644
index 0000000..a39ace4
--- /dev/null
+++ b/xarf/schemas/types/content-base.json
@@ -0,0 +1,243 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/content-base.json",
+	"title": "XARF v4 Content Category - Base Schema",
+	"description": "Base schema for all content category abuse types with shared fields",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "content"
+				},
+				"url": {
+					"type": "string",
+					"format": "uri",
+					"description": "REQUIRED: The URL of the abusive content",
+					"examples": [
+						"https://phishing-site.example.com/login",
+						"http://malware-host.example.net/payload.exe"
+					]
+				},
+				"domain": {
+					"type": "string",
+					"pattern": "^([a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,}$",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Fully qualified domain name of the abusive content",
+					"examples": [
+						"phishing-site.example.com",
+						"malware.example.net"
+					]
+				},
+				"registrar": {
+					"type": "string",
+					"description": "OPTIONAL: Domain registrar if known",
+					"examples": [
+						"GoDaddy",
+						"Namecheap",
+						"CloudFlare"
+					]
+				},
+				"nameservers": {
+					"type": "array",
+					"items": {
+						"type": "string"
+					},
+					"description": "OPTIONAL: DNS nameservers for the domain",
+					"examples": [
+						[
+							"ns1.example.com",
+							"ns2.example.com"
+						]
+					]
+				},
+				"dns_records": {
+					"type": "object",
+					"properties": {
+						"a": {
+							"type": "array",
+							"items": {
+								"type": "string",
+								"format": "ipv4"
+							}
+						},
+						"aaaa": {
+							"type": "array",
+							"items": {
+								"type": "string",
+								"format": "ipv6"
+							}
+						},
+						"mx": {
+							"type": "array",
+							"items": {
+								"type": "string"
+							}
+						},
+						"txt": {
+							"type": "array",
+							"items": {
+								"type": "string"
+							}
+						}
+					},
+					"description": "OPTIONAL: Key DNS evidence records"
+				},
+				"screenshot_url": {
+					"type": "string",
+					"format": "uri",
+					"description": "OPTIONAL: Reference URL to screenshot evidence"
+				},
+				"verified_at": {
+					"type": "string",
+					"format": "date-time",
+					"x-recommended": true,
+					"description": "RECOMMENDED: When content was last verified as active"
+				},
+				"verification_method": {
+					"type": "string",
+					"enum": [
+						"manual",
+						"automated_crawler",
+						"user_report",
+						"honeypot",
+						"threat_intelligence"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: How the abusive content was verified"
+				},
+				"attack_vector": {
+					"type": "string",
+					"enum": [
+						"phishing",
+						"malware",
+						"fraud",
+						"brand_infringement",
+						"copyright_infringement",
+						"data_leak",
+						"remote_compromise",
+						"suspicious_registration"
+					],
+					"description": "OPTIONAL: Primary attack vector classification"
+				},
+				"target_brand": {
+					"type": "string",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Impersonated brand or entity if applicable",
+					"examples": [
+						"PayPal",
+						"Microsoft",
+						"Amazon"
+					]
+				},
+				"hosting_provider": {
+					"type": "string",
+					"description": "OPTIONAL: Identified hosting provider",
+					"examples": [
+						"AWS",
+						"CloudFlare",
+						"DigitalOcean"
+					]
+				},
+				"asn": {
+					"type": "integer",
+					"minimum": 1,
+					"maximum": 4294967295,
+					"description": "OPTIONAL: Autonomous System Number"
+				},
+				"country_code": {
+					"type": "string",
+					"pattern": "^[A-Z]{2}$",
+					"description": "OPTIONAL: ISO 3166-1 alpha-2 country code"
+				},
+				"ssl_certificate": {
+					"type": "object",
+					"properties": {
+						"issuer": {
+							"type": "string",
+							"description": "OPTIONAL: Certificate issuer"
+						},
+						"subject": {
+							"type": "string",
+							"description": "OPTIONAL: Certificate subject"
+						},
+						"valid_from": {
+							"type": "string",
+							"format": "date-time"
+						},
+						"valid_to": {
+							"type": "string",
+							"format": "date-time"
+						},
+						"fingerprint": {
+							"type": "string",
+							"description": "OPTIONAL: SHA256 fingerprint of the certificate"
+						}
+					},
+					"description": "OPTIONAL: SSL certificate details if HTTPS is used"
+				},
+				"whois": {
+					"type": "object",
+					"properties": {
+						"registrant": {
+							"type": "string",
+							"description": "OPTIONAL: Domain registrant name or organization"
+						},
+						"created_date": {
+							"type": "string",
+							"format": "date-time",
+							"description": "OPTIONAL: Domain creation date"
+						},
+						"updated_date": {
+							"type": "string",
+							"format": "date-time",
+							"description": "OPTIONAL: Domain last updated date"
+						},
+						"expiry_date": {
+							"type": "string",
+							"format": "date-time",
+							"description": "OPTIONAL: Domain expiration date"
+						},
+						"registrar_abuse_contact": {
+							"type": "string",
+							"format": "email",
+							"description": "OPTIONAL: Registrar's abuse contact email"
+						}
+					},
+					"description": "OPTIONAL: WHOIS data for the domain"
+				},
+				"dns_response": {
+					"type": "object",
+					"properties": {
+						"query_time": {
+							"type": "string",
+							"format": "date-time",
+							"description": "OPTIONAL: When the DNS query was made"
+						},
+						"authoritative": {
+							"type": "boolean",
+							"description": "OPTIONAL: Whether response was from authoritative nameserver"
+						},
+						"response_code": {
+							"type": "string",
+							"enum": [
+								"NOERROR",
+								"NXDOMAIN",
+								"SERVFAIL",
+								"REFUSED"
+							],
+							"description": "OPTIONAL: DNS response code"
+						}
+					},
+					"description": "OPTIONAL: DNS query response metadata"
+				}
+			},
+			"required": [
+				"url"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/content-brand_infringement.json b/xarf/schemas/types/content-brand_infringement.json
new file mode 100644
index 0000000..f66a21c
--- /dev/null
+++ b/xarf/schemas/types/content-brand_infringement.json
@@ -0,0 +1,164 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/content-brand_infringement.json",
+	"title": "XARF v4 Content - Brand Infringement Type Schema",
+	"description": "Schema for brand impersonation and trademark violations",
+	"allOf": [
+		{
+			"$ref": "./content-base.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"type": {
+					"const": "brand_infringement"
+				},
+				"infringement_type": {
+					"type": "string",
+					"enum": [
+						"counterfeit",
+						"typosquatting",
+						"lookalike",
+						"homograph",
+						"unauthorized_reseller",
+						"trademark_violation",
+						"brand_impersonation",
+						"logo_misuse",
+						"other"
+					],
+					"description": "REQUIRED: Specific type of brand infringement"
+				},
+				"legitimate_site": {
+					"type": "string",
+					"format": "uri",
+					"description": "REQUIRED: URL of the legitimate brand website"
+				},
+				"similarity_score": {
+					"type": "number",
+					"minimum": 0.0,
+					"maximum": 1.0,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Visual or textual similarity score (0.0 = no similarity, 1.0 = identical)"
+				},
+				"trademark_details": {
+					"type": "object",
+					"properties": {
+						"registration_number": {
+							"type": "string",
+							"description": "OPTIONAL: Trademark registration number"
+						},
+						"jurisdiction": {
+							"type": "string",
+							"description": "OPTIONAL: Trademark jurisdiction (country/region)"
+						},
+						"category": {
+							"type": "array",
+							"items": {
+								"type": "integer",
+								"minimum": 1,
+								"maximum": 45
+							},
+							"description": "OPTIONAL: Nice Classification classes"
+						}
+					},
+					"description": "OPTIONAL: Trademark registration details if applicable"
+				},
+				"infringing_elements": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"logo",
+							"brand_name",
+							"tagline",
+							"color_scheme",
+							"layout",
+							"product_images",
+							"domain_name",
+							"other"
+						]
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Specific brand elements being infringed"
+				},
+				"products_offered": {
+					"type": "array",
+					"items": {
+						"type": "string"
+					},
+					"description": "OPTIONAL: Products or services offered on the infringing site"
+				},
+				"previous_enforcement": {
+					"type": "array",
+					"items": {
+						"type": "object",
+						"properties": {
+							"date": {
+								"type": "string",
+								"format": "date",
+								"description": "OPTIONAL: Date of enforcement action"
+							},
+							"action": {
+								"type": "string",
+								"enum": [
+									"cease_desist",
+									"takedown_notice",
+									"domain_dispute",
+									"legal_action",
+									"other"
+								],
+								"description": "OPTIONAL: Type of action taken"
+							},
+							"result": {
+								"type": "string",
+								"description": "OPTIONAL: Result of the action"
+							}
+						}
+					},
+					"description": "OPTIONAL: Previous enforcement actions taken"
+				}
+			},
+			"required": [
+				"infringement_type",
+				"legitimate_site"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
+			"timestamp": "2025-01-15T14:45:00Z",
+			"reporter": {
+				"org": "Brand Protection Services",
+				"contact": "enforcement@brandprotect.example",
+				"domain": "brandprotect.example"
+			},
+			"sender": {
+				"org": "Brand Protection Services",
+				"contact": "enforcement@brandprotect.example",
+				"domain": "brandprotect.example"
+			},
+			"source_identifier": "203.0.113.77",
+			"category": "content",
+			"type": "brand_infringement",
+			"url": "https://sh0p-deals.example.com",
+			"infringement_type": "typosquatting",
+			"legitimate_site": "https://www.shop.example",
+			"similarity_score": 0.87,
+			"infringing_elements": [
+				"logo",
+				"brand_name",
+				"color_scheme"
+			],
+			"target_brand": "Major Retailer",
+			"evidence": [
+				{
+					"content_type": "image/png",
+					"description": "Screenshot showing brand impersonation",
+					"payload": "base64_encoded_screenshot"
+				}
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/content-csam.json b/xarf/schemas/types/content-csam.json
new file mode 100644
index 0000000..7f2bb6e
--- /dev/null
+++ b/xarf/schemas/types/content-csam.json
@@ -0,0 +1,127 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/content-csam.json",
+	"title": "XARF v4 Content - CSAM Type Schema",
+	"description": "Schema for Child Sexual Abuse Material (baseline/A1/A2/B1/B2 illegal content)",
+	"allOf": [
+		{
+			"$ref": "./content-base.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"type": {
+					"const": "csam"
+				},
+				"classification": {
+					"type": "string",
+					"enum": [
+						"baseline",
+						"A1",
+						"A2",
+						"B1",
+						"B2"
+					],
+					"description": "REQUIRED: Legal classification category for the material"
+				},
+				"media_type": {
+					"type": "string",
+					"enum": [
+						"image",
+						"video",
+						"audio",
+						"text",
+						"mixed"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Type of media containing CSAM"
+				},
+				"detection_method": {
+					"type": "string",
+					"enum": [
+						"hash_match",
+						"ai_detection",
+						"manual_review",
+						"user_report",
+						"automated_scan"
+					],
+					"description": "REQUIRED: Method used to detect the CSAM"
+				},
+				"hash_values": {
+					"type": "object",
+					"properties": {
+						"md5": {
+							"type": "string",
+							"pattern": "^[a-fA-F0-9]{32}$",
+							"description": "OPTIONAL: MD5 hash"
+						},
+						"sha1": {
+							"type": "string",
+							"pattern": "^[a-fA-F0-9]{40}$",
+							"description": "OPTIONAL: SHA1 hash"
+						},
+						"sha256": {
+							"type": "string",
+							"pattern": "^[a-fA-F0-9]{64}$",
+							"description": "RECOMMENDED: SHA256 hash"
+						},
+						"photodna": {
+							"type": "string",
+							"description": "RECOMMENDED: PhotoDNA hash for image matching"
+						}
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Hash values of the illegal content"
+				},
+				"ncmec_report_id": {
+					"type": "string",
+					"x-recommended": true,
+					"description": "RECOMMENDED: NCMEC CyberTipline report ID if applicable"
+				},
+				"content_removed": {
+					"type": "boolean",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Whether the content has been removed"
+				},
+				"account_suspended": {
+					"type": "boolean",
+					"description": "OPTIONAL: Whether associated accounts were suspended"
+				}
+			},
+			"required": [
+				"classification",
+				"detection_method"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "f7a8b9c0-d1e2-3456-fabc-def012345678",
+			"timestamp": "2025-01-15T10:30:00Z",
+			"reporter": {
+				"org": "Content Safety Service",
+				"contact": "safety@example.org",
+				"domain": "example.org"
+			},
+			"sender": {
+				"org": "Content Safety Service",
+				"contact": "safety@example.org",
+				"domain": "example.org"
+			},
+			"source_identifier": "198.51.100.42",
+			"category": "content",
+			"type": "csam",
+			"url": "https://example.com/illegal-content",
+			"classification": "A1",
+			"detection_method": "hash_match",
+			"media_type": "image",
+			"hash_values": {
+				"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
+			},
+			"ncmec_report_id": "12345678",
+			"content_removed": true,
+			"account_suspended": true
+		}
+	]
+}
diff --git a/xarf/schemas/types/content-csem.json b/xarf/schemas/types/content-csem.json
new file mode 100644
index 0000000..b2984da
--- /dev/null
+++ b/xarf/schemas/types/content-csem.json
@@ -0,0 +1,170 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/content-csem.json",
+	"title": "XARF v4 Content - CSEM Type Schema",
+	"description": "Schema for Child Sexual Exploitation Material (grooming, solicitation, and other exploitation activities)",
+	"allOf": [
+		{
+			"$ref": "./content-base.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"type": {
+					"const": "csem"
+				},
+				"exploitation_type": {
+					"type": "string",
+					"enum": [
+						"grooming",
+						"solicitation",
+						"sextortion",
+						"trafficking",
+						"distribution",
+						"production",
+						"possession"
+					],
+					"description": "REQUIRED: Type of exploitation activity"
+				},
+				"victim_age_range": {
+					"type": "string",
+					"enum": [
+						"infant",
+						"toddler",
+						"prepubescent",
+						"pubescent",
+						"unknown"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Estimated age range of victim"
+				},
+				"platform": {
+					"type": "string",
+					"enum": [
+						"social_media",
+						"messaging_app",
+						"gaming_platform",
+						"forum",
+						"email",
+						"darkweb",
+						"other"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Platform where exploitation occurred"
+				},
+				"detection_method": {
+					"type": "string",
+					"enum": [
+						"behavioral_analysis",
+						"keyword_detection",
+						"user_report",
+						"ai_detection",
+						"manual_review",
+						"law_enforcement_referral"
+					],
+					"description": "REQUIRED: Method used to detect the exploitation"
+				},
+				"evidence_type": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"chat_logs",
+							"images",
+							"videos",
+							"user_profile",
+							"metadata"
+						]
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Types of evidence collected"
+				},
+				"perpetrator_indicators": {
+					"type": "object",
+					"properties": {
+						"account_id": {
+							"type": "string",
+							"description": "OPTIONAL: Account identifier of perpetrator"
+						},
+						"ip_addresses": {
+							"type": "array",
+							"items": {
+								"type": "string",
+								"format": "ipv4"
+							},
+							"description": "OPTIONAL: IP addresses associated with perpetrator"
+						},
+						"pattern_of_behavior": {
+							"type": "string",
+							"description": "OPTIONAL: Description of behavioral patterns"
+						}
+					},
+					"description": "OPTIONAL: Indicators associated with the perpetrator"
+				},
+				"reporting_obligations": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"NCMEC",
+							"IWF",
+							"local_law_enforcement",
+							"europol",
+							"interpol",
+							"platform_safety_team",
+							"other"
+						]
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Entities to which this has been or should be reported"
+				}
+			},
+			"required": [
+				"exploitation_type",
+				"detection_method"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "a8b9c0d1-e2f3-4567-abcd-ef0123456789",
+			"timestamp": "2025-01-20T14:20:00Z",
+			"reporter": {
+				"org": "Platform Safety Team",
+				"contact": "safety@platform.example",
+				"domain": "platform.example"
+			},
+			"sender": {
+				"org": "Platform Safety Team",
+				"contact": "safety@platform.example",
+				"domain": "platform.example"
+			},
+			"source_identifier": "203.0.113.55",
+			"category": "content",
+			"type": "csem",
+			"url": "https://platform.example/user/suspicious-account",
+			"exploitation_type": "grooming",
+			"victim_age_range": "pubescent",
+			"detection_method": "behavioral_analysis",
+			"platform": "social_media",
+			"evidence_type": [
+				"chat_logs",
+				"user_profile"
+			],
+			"reporting_obligations": [
+				"NCMEC",
+				"local_law_enforcement",
+				"platform_safety_team"
+			],
+			"perpetrator_indicators": {
+				"account_id": "user123456",
+				"ip_addresses": [
+					"203.0.113.55",
+					"203.0.113.56"
+				],
+				"pattern_of_behavior": "Repeated contact with minors, requesting private information"
+			}
+		}
+	]
+}
diff --git a/xarf/schemas/types/content-exposed-data.json b/xarf/schemas/types/content-exposed-data.json
new file mode 100644
index 0000000..0274c87
--- /dev/null
+++ b/xarf/schemas/types/content-exposed-data.json
@@ -0,0 +1,210 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/content-exposed-data.json",
+	"title": "XARF v4 Content - Exposed Data Type Schema",
+	"description": "Schema for exposed sensitive data and information leaks",
+	"allOf": [
+		{
+			"$ref": "./content-base.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"type": {
+					"const": "exposed_data"
+				},
+				"data_types": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"personal_information",
+							"credentials",
+							"financial",
+							"medical",
+							"government_id",
+							"email_addresses",
+							"phone_numbers",
+							"api_keys",
+							"database_dumps",
+							"source_code",
+							"internal_documents",
+							"customer_data",
+							"employee_data",
+							"intellectual_property",
+							"other"
+						]
+					},
+					"minItems": 1,
+					"description": "REQUIRED: Types of data exposed"
+				},
+				"exposure_method": {
+					"type": "string",
+					"enum": [
+						"misconfigured_server",
+						"open_directory",
+						"database_exposure",
+						"git_repository",
+						"backup_file",
+						"log_file",
+						"cloud_storage",
+						"paste_site",
+						"forum_post",
+						"ransomware_leak",
+						"intentional_leak",
+						"other"
+					],
+					"description": "REQUIRED: How the data was exposed"
+				},
+				"record_count": {
+					"type": "integer",
+					"minimum": 0,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Number of records exposed (if known)"
+				},
+				"affected_organization": {
+					"type": "string",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Organization whose data was exposed"
+				},
+				"data_format": {
+					"type": "string",
+					"enum": [
+						"plaintext",
+						"csv",
+						"json",
+						"xml",
+						"sql",
+						"excel",
+						"pdf",
+						"mixed",
+						"other"
+					],
+					"description": "OPTIONAL: Format of the exposed data"
+				},
+				"sensitive_fields": {
+					"type": "array",
+					"items": {
+						"type": "string"
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Specific sensitive data fields exposed",
+					"examples": [
+						[
+							"ssn",
+							"credit_card",
+							"password"
+						],
+						[
+							"email",
+							"phone",
+							"address"
+						]
+					]
+				},
+				"encryption_status": {
+					"type": "string",
+					"enum": [
+						"unencrypted",
+						"encrypted",
+						"partially_encrypted",
+						"hashed",
+						"unknown"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Whether the exposed data was encrypted"
+				},
+				"accessibility": {
+					"type": "string",
+					"enum": [
+						"public",
+						"requires_authentication",
+						"requires_payment",
+						"dark_web",
+						"removed"
+					],
+					"description": "OPTIONAL: Current accessibility of the exposed data"
+				},
+				"discovery_source": {
+					"type": "string",
+					"enum": [
+						"security_researcher",
+						"automated_scan",
+						"breach_monitoring",
+						"user_report",
+						"law_enforcement",
+						"threat_intelligence",
+						"other"
+					],
+					"description": "OPTIONAL: How the data exposure was discovered"
+				},
+				"sample_records": {
+					"type": "array",
+					"items": {
+						"type": "object",
+						"properties": {
+							"description": {
+								"type": "string",
+								"description": "OPTIONAL: Description of sample record"
+							},
+							"redacted_sample": {
+								"type": "string",
+								"description": "OPTIONAL: Redacted sample content"
+							}
+						}
+					},
+					"maxItems": 5,
+					"description": "OPTIONAL: Redacted samples of exposed records for verification"
+				}
+			},
+			"required": [
+				"data_types",
+				"exposure_method"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "f1e2d3c4-b5a6-7890-abcd-ef1234567890",
+			"timestamp": "2025-01-15T09:20:00Z",
+			"reporter": {
+				"org": "Data Breach Monitor",
+				"contact": "alerts@breachmonitor.example",
+				"domain": "breachmonitor.example"
+			},
+			"sender": {
+				"org": "Data Breach Monitor",
+				"contact": "alerts@breachmonitor.example",
+				"domain": "breachmonitor.example"
+			},
+			"source_identifier": "198.51.100.99",
+			"category": "content",
+			"type": "exposed_data",
+			"url": "http://exposed-database.example.com:8080/dump.sql",
+			"data_types": [
+				"credentials",
+				"personal_information",
+				"financial"
+			],
+			"exposure_method": "misconfigured_server",
+			"record_count": 150000,
+			"affected_organization": "Example Corp",
+			"data_format": "sql",
+			"sensitive_fields": [
+				"email",
+				"password_hash",
+				"credit_card"
+			],
+			"encryption_status": "hashed",
+			"accessibility": "public",
+			"evidence": [
+				{
+					"content_type": "text/plain",
+					"description": "Sample of exposed database structure",
+					"payload": "base64_encoded_sample"
+				}
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/content-fraud.json b/xarf/schemas/types/content-fraud.json
new file mode 100644
index 0000000..e0bdfee
--- /dev/null
+++ b/xarf/schemas/types/content-fraud.json
@@ -0,0 +1,149 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/content-fraud.json",
+	"title": "XARF v4 Content - Fraud Type Schema",
+	"description": "Schema for fraud and scam websites",
+	"allOf": [
+		{
+			"$ref": "./content-base.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"type": {
+					"const": "fraud"
+				},
+				"fraud_type": {
+					"type": "string",
+					"enum": [
+						"investment",
+						"romance",
+						"tech_support",
+						"lottery",
+						"advance_fee",
+						"cryptocurrency",
+						"shopping",
+						"charity",
+						"employment",
+						"government_impersonation",
+						"other"
+					],
+					"description": "REQUIRED: Specific type of fraud"
+				},
+				"payment_methods": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"credit_card",
+							"bank_transfer",
+							"cryptocurrency",
+							"gift_cards",
+							"wire_transfer",
+							"paypal",
+							"western_union",
+							"moneygram",
+							"cashapp",
+							"venmo",
+							"other"
+						]
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Payment methods requested by fraudsters"
+				},
+				"cryptocurrency_addresses": {
+					"type": "array",
+					"items": {
+						"type": "object",
+						"properties": {
+							"currency": {
+								"type": "string",
+								"enum": [
+									"bitcoin",
+									"ethereum",
+									"usdt",
+									"bnb",
+									"monero",
+									"other"
+								],
+								"description": "OPTIONAL: Cryptocurrency type"
+							},
+							"address": {
+								"type": "string",
+								"description": "OPTIONAL: Cryptocurrency wallet address"
+							}
+						},
+						"required": [
+							"currency",
+							"address"
+						]
+					},
+					"description": "OPTIONAL: Cryptocurrency addresses used in the fraud"
+				},
+				"claimed_entity": {
+					"type": "string",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Organization or person the fraudster claims to represent"
+				},
+				"loss_amount": {
+					"type": "object",
+					"properties": {
+						"currency": {
+							"type": "string",
+							"pattern": "^[A-Z]{3}$",
+							"description": "OPTIONAL: ISO 4217 currency code"
+						},
+						"amount": {
+							"type": "number",
+							"minimum": 0,
+							"description": "OPTIONAL: Estimated or actual loss amount"
+						}
+					},
+					"description": "OPTIONAL: Financial loss information if known"
+				}
+			},
+			"required": [
+				"fraud_type"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "550e8400-e29b-41d4-a716-446655440000",
+			"timestamp": "2025-01-15T10:30:00Z",
+			"reporter": {
+				"org": "Anti-Fraud Coalition",
+				"contact": "reports@antifraud.example",
+				"domain": "antifraud.example"
+			},
+			"sender": {
+				"org": "Anti-Fraud Coalition",
+				"contact": "reports@antifraud.example",
+				"domain": "antifraud.example"
+			},
+			"source_identifier": "198.51.100.45",
+			"category": "content",
+			"type": "fraud",
+			"url": "https://get-rich-quick.example.com",
+			"fraud_type": "investment",
+			"payment_methods": [
+				"cryptocurrency",
+				"wire_transfer"
+			],
+			"cryptocurrency_addresses": [
+				{
+					"currency": "bitcoin",
+					"address": "1A1zP1eP5QGefi2DMPTfTL5SLmv7DivfNa"
+				}
+			],
+			"evidence": [
+				{
+					"content_type": "image/png",
+					"description": "Screenshot of fraudulent investment site",
+					"payload": "base64_encoded_screenshot"
+				}
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/content-malware.json b/xarf/schemas/types/content-malware.json
new file mode 100644
index 0000000..fd77103
--- /dev/null
+++ b/xarf/schemas/types/content-malware.json
@@ -0,0 +1,263 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/content-malware.json",
+	"title": "XARF v4 Content - Malware Type Schema",
+	"description": "Schema for malware hosting and distribution",
+	"allOf": [
+		{
+			"$ref": "./content-base.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"type": {
+					"const": "malware"
+				},
+				"malware_family": {
+					"type": "string",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Known malware family name",
+					"examples": [
+						"Emotet",
+						"TrickBot",
+						"Qakbot",
+						"Cobalt Strike"
+					]
+				},
+				"malware_type": {
+					"type": "string",
+					"enum": [
+						"trojan",
+						"ransomware",
+						"dropper",
+						"loader",
+						"backdoor",
+						"rootkit",
+						"infostealer",
+						"banking_trojan",
+						"cryptominer",
+						"adware",
+						"spyware",
+						"worm",
+						"bot",
+						"rat",
+						"other"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Classification of the malware"
+				},
+				"file_hashes": {
+					"type": "object",
+					"properties": {
+						"md5": {
+							"type": "string",
+							"pattern": "^[a-fA-F0-9]{32}$",
+							"description": "OPTIONAL: MD5 hash of malware file"
+						},
+						"sha1": {
+							"type": "string",
+							"pattern": "^[a-fA-F0-9]{40}$",
+							"description": "OPTIONAL: SHA1 hash of malware file"
+						},
+						"sha256": {
+							"type": "string",
+							"pattern": "^[a-fA-F0-9]{64}$",
+							"description": "RECOMMENDED: SHA256 hash of malware file"
+						},
+						"ssdeep": {
+							"type": "string",
+							"description": "OPTIONAL: SSDeep fuzzy hash"
+						}
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Hash values of the malware file"
+				},
+				"file_metadata": {
+					"type": "object",
+					"properties": {
+						"filename": {
+							"type": "string",
+							"description": "OPTIONAL: Original filename"
+						},
+						"file_size": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: File size in bytes"
+						},
+						"file_type": {
+							"type": "string",
+							"description": "OPTIONAL: File type description",
+							"examples": [
+								"PE32 executable",
+								"PDF document",
+								"Microsoft Word document",
+								"ZIP archive"
+							]
+						},
+						"mime_type": {
+							"type": "string",
+							"description": "OPTIONAL: MIME type"
+						}
+					},
+					"description": "OPTIONAL: Metadata about the malware file"
+				},
+				"distribution_method": {
+					"type": "string",
+					"enum": [
+						"direct_download",
+						"drive_by_download",
+						"email_attachment",
+						"malvertising",
+						"exploit_kit",
+						"watering_hole",
+						"supply_chain",
+						"social_engineering",
+						"other"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: How the malware is being distributed"
+				},
+				"c2_servers": {
+					"type": "array",
+					"items": {
+						"type": "object",
+						"properties": {
+							"address": {
+								"type": "string",
+								"description": "OPTIONAL: IP or domain of C2 server"
+							},
+							"port": {
+								"type": "integer",
+								"minimum": 1,
+								"maximum": 65535,
+								"description": "OPTIONAL: Port number"
+							},
+							"protocol": {
+								"type": "string",
+								"enum": [
+									"http",
+									"https",
+									"tcp",
+									"udp",
+									"dns",
+									"other"
+								],
+								"description": "OPTIONAL: Protocol used"
+							}
+						}
+					},
+					"description": "OPTIONAL: Command and control servers if known"
+				},
+				"sandbox_analysis": {
+					"type": "object",
+					"properties": {
+						"sandbox_name": {
+							"type": "string",
+							"description": "OPTIONAL: Name of sandbox used",
+							"examples": [
+								"VirusTotal",
+								"Hybrid Analysis",
+								"Joe Sandbox"
+							]
+						},
+						"analysis_url": {
+							"type": "string",
+							"format": "uri",
+							"description": "OPTIONAL: URL to analysis report"
+						},
+						"verdict": {
+							"type": "string",
+							"enum": [
+								"malicious",
+								"suspicious",
+								"clean",
+								"unknown"
+							],
+							"description": "OPTIONAL: Analysis verdict"
+						},
+						"score": {
+							"type": "number",
+							"minimum": 0,
+							"maximum": 100,
+							"description": "OPTIONAL: Maliciousness score"
+						}
+					},
+					"description": "OPTIONAL: Results from automated malware analysis"
+				},
+				"exploit_cve": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"pattern": "^CVE-\\d{4}-\\d{4,}$"
+					},
+					"description": "OPTIONAL: CVEs exploited by this malware"
+				},
+				"persistence_mechanism": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"registry",
+							"scheduled_task",
+							"service",
+							"startup_folder",
+							"dll_hijacking",
+							"wmi",
+							"other"
+						]
+					},
+					"description": "OPTIONAL: How the malware maintains persistence"
+				},
+				"targeted_platforms": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"windows",
+							"linux",
+							"macos",
+							"android",
+							"ios",
+							"multi_platform"
+						]
+					},
+					"description": "OPTIONAL: Operating systems targeted by the malware"
+				}
+			}
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "e5f6a7b8-c9d0-1234-efab-cdef01234567",
+			"timestamp": "2025-01-15T12:45:00Z",
+			"reporter": {
+				"org": "Malware Analysis Lab",
+				"contact": "alerts@malwarelab.example",
+				"domain": "malwarelab.example"
+			},
+			"sender": {
+				"org": "Malware Analysis Lab",
+				"contact": "alerts@malwarelab.example",
+				"domain": "malwarelab.example"
+			},
+			"source_identifier": "198.51.100.123",
+			"category": "content",
+			"type": "malware",
+			"url": "https://malicious-downloads.example.com/invoice.exe",
+			"malware_family": "Emotet",
+			"malware_type": "trojan",
+			"file_hashes": {
+				"sha256": "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
+			},
+			"distribution_method": "email_attachment",
+			"evidence": [
+				{
+					"content_type": "application/octet-stream",
+					"description": "Malware sample",
+					"payload": "base64_encoded_malware"
+				}
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/content-phishing.json b/xarf/schemas/types/content-phishing.json
new file mode 100644
index 0000000..e1334f5
--- /dev/null
+++ b/xarf/schemas/types/content-phishing.json
@@ -0,0 +1,141 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/content-phishing.json",
+	"title": "XARF v4 Content - Phishing Type Schema",
+	"description": "Schema for phishing websites and credential harvesting",
+	"allOf": [
+		{
+			"$ref": "./content-base.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"type": {
+					"const": "phishing"
+				},
+				"credential_fields": {
+					"type": "array",
+					"items": {
+						"type": "string"
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Form fields present on the phishing page",
+					"examples": [
+						[
+							"username",
+							"password"
+						],
+						[
+							"email",
+							"password",
+							"pin"
+						],
+						[
+							"card_number",
+							"cvv",
+							"expiry"
+						]
+					]
+				},
+				"phishing_kit": {
+					"type": "string",
+					"description": "OPTIONAL: Known phishing kit identifier if detected",
+					"examples": [
+						"Kr3pto",
+						"16Shop",
+						"LogoKit"
+					]
+				},
+				"redirect_chain": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"format": "uri"
+					},
+					"description": "OPTIONAL: URL redirect sequence leading to phishing page"
+				},
+				"submission_url": {
+					"type": "string",
+					"format": "uri",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Where credentials are submitted"
+				},
+				"cloned_site": {
+					"type": "string",
+					"format": "uri",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Legitimate site being impersonated"
+				},
+				"detection_evasion": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"geo_blocking",
+							"user_agent_filtering",
+							"referrer_checking",
+							"captcha",
+							"time_based_display",
+							"ip_blacklisting",
+							"obfuscation",
+							"other"
+						]
+					},
+					"description": "OPTIONAL: Evasion techniques used by the phishing page"
+				},
+				"lure_type": {
+					"type": "string",
+					"enum": [
+						"account_suspension",
+						"security_alert",
+						"payment_issue",
+						"prize_notification",
+						"document_share",
+						"password_reset",
+						"shipping_notification",
+						"tax_refund",
+						"other"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Social engineering lure used"
+				}
+			}
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "b2c3d4e5-f6a7-8901-bcde-f2345678901a",
+			"timestamp": "2025-01-15T15:15:24Z",
+			"reporter": {
+				"org": "Phishing Detection Service",
+				"contact": "reports@antiphishing.example",
+				"domain": "antiphishing.example"
+			},
+			"sender": {
+				"org": "Phishing Detection Service",
+				"contact": "reports@antiphishing.example",
+				"domain": "antiphishing.example"
+			},
+			"source_identifier": "203.0.113.45",
+			"category": "content",
+			"type": "phishing",
+			"url": "https://secure-banking-login.example.com/auth",
+			"target_brand": "Major Bank Corp",
+			"cloned_site": "https://www.bank.example",
+			"credential_fields": [
+				"username",
+				"password",
+				"pin"
+			],
+			"lure_type": "security_alert",
+			"evidence": [
+				{
+					"content_type": "image/png",
+					"description": "Screenshot of phishing page",
+					"payload": "base64_encoded_screenshot"
+				}
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/content-remote_compromise.json b/xarf/schemas/types/content-remote_compromise.json
new file mode 100644
index 0000000..77225e5
--- /dev/null
+++ b/xarf/schemas/types/content-remote_compromise.json
@@ -0,0 +1,240 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/content-remote_compromise.json",
+	"title": "XARF v4 Content - Remote Compromise Type Schema",
+	"description": "Schema for compromised websites, webshells, and unauthorized access",
+	"allOf": [
+		{
+			"$ref": "./content-base.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"type": {
+					"const": "remote_compromise"
+				},
+				"compromise_type": {
+					"type": "string",
+					"enum": [
+						"webshell",
+						"backdoor",
+						"defacement",
+						"malicious_redirect",
+						"seo_spam",
+						"cryptominer",
+						"phishing_kit",
+						"malware_host",
+						"c2_server",
+						"proxy",
+						"scanner",
+						"other"
+					],
+					"description": "REQUIRED: Type of compromise detected"
+				},
+				"compromise_indicators": {
+					"type": "array",
+					"items": {
+						"type": "object",
+						"properties": {
+							"type": {
+								"type": "string",
+								"enum": [
+									"file_path",
+									"process",
+									"network_connection",
+									"user_account",
+									"scheduled_task",
+									"registry_key",
+									"service"
+								],
+								"description": "REQUIRED: Type of indicator"
+							},
+							"value": {
+								"type": "string",
+								"description": "REQUIRED: Indicator value (e.g., file path, process name)"
+							},
+							"description": {
+								"type": "string",
+								"description": "OPTIONAL: Additional description"
+							}
+						},
+						"required": [
+							"type",
+							"value"
+						]
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Specific indicators of compromise"
+				},
+				"webshell_details": {
+					"type": "object",
+					"properties": {
+						"family": {
+							"type": "string",
+							"description": "OPTIONAL: Known webshell family",
+							"examples": [
+								"WSO",
+								"C99",
+								"B374K",
+								"R57"
+							]
+						},
+						"capabilities": {
+							"type": "array",
+							"items": {
+								"type": "string",
+								"enum": [
+									"file_manager",
+									"command_execution",
+									"database_access",
+									"network_scanning",
+									"privilege_escalation",
+									"persistence",
+									"other"
+								]
+							},
+							"description": "OPTIONAL: Webshell capabilities"
+						},
+						"password_protected": {
+							"type": "boolean",
+							"description": "OPTIONAL: Whether webshell is password protected"
+						}
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Details specific to webshell compromises"
+				},
+				"affected_cms": {
+					"type": "string",
+					"enum": [
+						"wordpress",
+						"joomla",
+						"drupal",
+						"magento",
+						"prestashop",
+						"opencart",
+						"custom",
+						"unknown",
+						"other"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Content management system if identified"
+				},
+				"vulnerability_exploited": {
+					"type": "object",
+					"properties": {
+						"cve": {
+							"type": "string",
+							"pattern": "^CVE-\\d{4}-\\d{4,}$",
+							"description": "OPTIONAL: CVE identifier"
+						},
+						"description": {
+							"type": "string",
+							"description": "OPTIONAL: Vulnerability description"
+						},
+						"component": {
+							"type": "string",
+							"description": "OPTIONAL: Vulnerable component (e.g., plugin name)"
+						}
+					},
+					"description": "OPTIONAL: Vulnerability used for initial compromise if known"
+				},
+				"persistence_mechanisms": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"cron_job",
+							"modified_core_files",
+							"hidden_admin_account",
+							"autoload_backdoor",
+							"htaccess_modification",
+							"database_backdoor",
+							"other"
+						]
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Methods used to maintain access"
+				},
+				"malicious_activities": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"spam_sending",
+							"ddos_attacks",
+							"cryptocurrency_mining",
+							"data_exfiltration",
+							"lateral_movement",
+							"hosting_malware",
+							"hosting_phishing",
+							"scanning",
+							"other"
+						]
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Observed malicious activities from the compromised site"
+				},
+				"cleanup_status": {
+					"type": "string",
+					"enum": [
+						"not_cleaned",
+						"partially_cleaned",
+						"cleaned",
+						"reinfected",
+						"unknown"
+					],
+					"description": "OPTIONAL: Current cleanup status of the compromise"
+				}
+			},
+			"required": [
+				"compromise_type"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "d4e5f6a7-b8c9-0123-def4-567890abcdef",
+			"timestamp": "2025-01-15T11:30:00Z",
+			"reporter": {
+				"org": "Web Security Scanner",
+				"contact": "abuse@websecscanner.example",
+				"domain": "websecscanner.example"
+			},
+			"sender": {
+				"org": "Web Security Scanner",
+				"contact": "abuse@websecscanner.example",
+				"domain": "websecscanner.example"
+			},
+			"source_identifier": "192.0.2.150",
+			"category": "content",
+			"type": "remote_compromise",
+			"url": "https://compromised-site.example.com/wp-content/uploads/shell.php",
+			"compromise_type": "webshell",
+			"affected_cms": "wordpress",
+			"webshell_details": {
+				"family": "WSO",
+				"capabilities": [
+					"file_manager",
+					"command_execution",
+					"database_access"
+				],
+				"password_protected": true
+			},
+			"compromise_indicators": [
+				{
+					"type": "file_path",
+					"value": "/wp-content/uploads/2025/01/shell.php",
+					"description": "Webshell located in uploads directory"
+				}
+			],
+			"evidence": [
+				{
+					"content_type": "text/plain",
+					"description": "Webshell source code snippet",
+					"payload": "base64_encoded_code"
+				}
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/content-suspicious_registration.json b/xarf/schemas/types/content-suspicious_registration.json
new file mode 100644
index 0000000..c6974d5
--- /dev/null
+++ b/xarf/schemas/types/content-suspicious_registration.json
@@ -0,0 +1,230 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/content-suspicious_registration.json",
+	"title": "XARF v4 Content - Suspicious Registration Type Schema",
+	"description": "Schema for newly registered suspicious domains and preemptive threat detection",
+	"allOf": [
+		{
+			"$ref": "./content-base.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"type": {
+					"const": "suspicious_registration"
+				},
+				"registration_date": {
+					"type": "string",
+					"format": "date-time",
+					"description": "REQUIRED: When the domain was registered"
+				},
+				"days_since_registration": {
+					"type": "integer",
+					"minimum": 0,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Number of days since domain registration"
+				},
+				"suspicious_indicators": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"typosquatting",
+							"homograph_attack",
+							"brand_keyword",
+							"suspicious_tld",
+							"bulk_registration",
+							"privacy_protection",
+							"suspicious_registrant",
+							"fast_flux",
+							"dga_pattern",
+							"known_bad_nameserver",
+							"suspicious_ssl_cert",
+							"immediate_activation",
+							"parked_page",
+							"other"
+						]
+					},
+					"minItems": 1,
+					"description": "REQUIRED: Indicators that make this registration suspicious"
+				},
+				"risk_score": {
+					"type": "number",
+					"minimum": 0.0,
+					"maximum": 1.0,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Calculated risk score for this domain"
+				},
+				"targeted_brands": {
+					"type": "array",
+					"items": {
+						"type": "string"
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Brands potentially targeted by this domain"
+				},
+				"registrant_details": {
+					"type": "object",
+					"properties": {
+						"email_domain": {
+							"type": "string",
+							"description": "OPTIONAL: Domain of registrant's email"
+						},
+						"country": {
+							"type": "string",
+							"pattern": "^[A-Z]{2}$",
+							"description": "OPTIONAL: ISO 3166-1 alpha-2 country code"
+						},
+						"privacy_protected": {
+							"type": "boolean",
+							"description": "OPTIONAL: Whether WHOIS privacy is enabled"
+						},
+						"bulk_registrations": {
+							"type": "integer",
+							"description": "OPTIONAL: Number of domains registered by same entity"
+						}
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Information about the domain registrant"
+				},
+				"related_domains": {
+					"type": "array",
+					"items": {
+						"type": "object",
+						"properties": {
+							"domain": {
+								"type": "string",
+								"description": "OPTIONAL: Related domain name"
+							},
+							"relationship": {
+								"type": "string",
+								"enum": [
+									"same_registrant",
+									"same_nameserver",
+									"same_ip",
+									"same_ssl_cert",
+									"similar_pattern",
+									"same_campaign"
+								],
+								"description": "OPTIONAL: Type of relationship"
+							}
+						}
+					},
+					"maxItems": 20,
+					"description": "OPTIONAL: Other domains related to this suspicious registration"
+				},
+				"predicted_usage": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"phishing",
+							"malware",
+							"spam",
+							"fraud",
+							"brand_abuse",
+							"botnet_c2",
+							"unknown"
+						]
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Predicted malicious usage based on patterns"
+				},
+				"ssl_certificate_details": {
+					"type": "object",
+					"properties": {
+						"issued_immediately": {
+							"type": "boolean",
+							"description": "OPTIONAL: Certificate issued immediately after registration"
+						},
+						"free_certificate": {
+							"type": "boolean",
+							"description": "OPTIONAL: Using free SSL certificate provider"
+						},
+						"wildcard": {
+							"type": "boolean",
+							"description": "OPTIONAL: Wildcard certificate"
+						}
+					},
+					"description": "OPTIONAL: SSL certificate details"
+				},
+				"activation_behavior": {
+					"type": "object",
+					"properties": {
+						"time_to_activation": {
+							"type": "integer",
+							"description": "OPTIONAL: Hours between registration and first content"
+						},
+						"initial_content": {
+							"type": "string",
+							"enum": [
+								"parked",
+								"under_construction",
+								"immediate_malicious",
+								"cloned_site",
+								"blank",
+								"other"
+							],
+							"description": "OPTIONAL: Type of initial content"
+						}
+					},
+					"description": "OPTIONAL: Activation behavior details"
+				}
+			},
+			"required": [
+				"registration_date",
+				"suspicious_indicators"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "c3d4e5f6-a7b8-9012-bcde-f34567890123",
+			"timestamp": "2025-01-15T08:00:00Z",
+			"reporter": {
+				"org": "Domain Threat Intelligence",
+				"contact": "alerts@domainthreat.example",
+				"domain": "domainthreat.example"
+			},
+			"sender": {
+				"org": "Domain Threat Intelligence",
+				"contact": "alerts@domainthreat.example",
+				"domain": "domainthreat.example"
+			},
+			"source_identifier": "192.0.2.200",
+			"category": "content",
+			"type": "suspicious_registration",
+			"url": "https://amaz0n-secure.example",
+			"domain": "amaz0n-secure.example",
+			"registration_date": "2025-01-14T22:00:00Z",
+			"days_since_registration": 0,
+			"suspicious_indicators": [
+				"typosquatting",
+				"brand_keyword",
+				"privacy_protection",
+				"immediate_activation"
+			],
+			"risk_score": 0.92,
+			"targeted_brands": [
+				"Amazon"
+			],
+			"registrant_details": {
+				"privacy_protected": true,
+				"country": "CN",
+				"bulk_registrations": 47
+			},
+			"predicted_usage": [
+				"phishing",
+				"fraud"
+			],
+			"evidence": [
+				{
+					"content_type": "application/json",
+					"description": "WHOIS and DNS records",
+					"payload": "base64_encoded_data"
+				}
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/copyright-copyright.json b/xarf/schemas/types/copyright-copyright.json
new file mode 100644
index 0000000..7e98e71
--- /dev/null
+++ b/xarf/schemas/types/copyright-copyright.json
@@ -0,0 +1,76 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/copyright-copyright.json",
+	"title": "XARF v4 Copyright - Copyright Type Schema",
+	"description": "Schema for Copyright infringement and DMCA violations",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "copyright"
+				},
+				"type": {
+					"const": "copyright"
+				},
+				"infringing_url": {
+					"type": "string",
+					"format": "uri",
+					"description": "REQUIRED: URL of the infringing content - this is what's being reported",
+					"examples": [
+						"http://piracy-site.example.com/movies/copyrighted-movie.mp4",
+						"https://file-sharing.example.org/download/12345"
+					]
+				},
+				"work_title": {
+					"type": "string",
+					"maxLength": 500,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Title of the copyrighted work",
+					"examples": [
+						"Avengers: Endgame",
+						"Beatles - Abbey Road",
+						"Adobe Photoshop"
+					]
+				},
+				"rights_holder": {
+					"type": "string",
+					"maxLength": 200,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Organization or person holding the copyright",
+					"examples": [
+						"Disney Enterprises, Inc.",
+						"Sony Music",
+						"Adobe Systems"
+					]
+				},
+				"original_url": {
+					"type": "string",
+					"format": "uri",
+					"description": "OPTIONAL: URL of the legitimate/original content",
+					"examples": [
+						"https://www.studio.example/movies/blockbuster-film"
+					]
+				},
+				"infringement_type": {
+					"type": "string",
+					"enum": [
+						"direct_copy",
+						"modified_copy",
+						"streaming",
+						"download",
+						"distribution"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Type of copyright infringement"
+				}
+			},
+			"required": [
+				"infringing_url"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/copyright-cyberlocker.json b/xarf/schemas/types/copyright-cyberlocker.json
new file mode 100644
index 0000000..be27dd5
--- /dev/null
+++ b/xarf/schemas/types/copyright-cyberlocker.json
@@ -0,0 +1,223 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/copyright-cyberlocker.json",
+	"title": "XARF v4 Copyright - Cyberlocker Type Schema",
+	"description": "Schema for cyberlocker/file hosting service copyright infringement reports",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "copyright"
+				},
+				"type": {
+					"const": "cyberlocker"
+				},
+				"evidence_source": {
+					"type": "string",
+					"enum": [
+						"automated_crawl",
+						"manual_discovery",
+						"user_report",
+						"rights_holder",
+						"search_engine"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Source of cyberlocker infringement evidence"
+				},
+				"infringing_url": {
+					"type": "string",
+					"format": "uri",
+					"description": "REQUIRED: URL to the infringing file on the hosting service"
+				},
+				"hosting_service": {
+					"type": "string",
+					"maxLength": 200,
+					"description": "REQUIRED: Name of the file hosting service",
+					"examples": [
+						"Rapidshare",
+						"Megaupload",
+						"4shared",
+						"MediaFire",
+						"Zippyshare",
+						"Uploaded.net"
+					]
+				},
+				"file_info": {
+					"type": "object",
+					"properties": {
+						"filename": {
+							"type": "string",
+							"maxLength": 500,
+							"description": "OPTIONAL: Name of the infringing file"
+						},
+						"file_size": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: File size in bytes"
+						},
+						"file_hash": {
+							"type": "string",
+							"pattern": "^(md5|sha1|sha256):[a-fA-F0-9]+$",
+							"description": "OPTIONAL: File hash with algorithm prefix"
+						},
+						"upload_date": {
+							"type": "string",
+							"format": "date-time",
+							"description": "OPTIONAL: When file was uploaded to service"
+						},
+						"download_count": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: Number of downloads if available"
+						}
+					},
+					"additionalProperties": false,
+					"x-recommended": true,
+					"description": "RECOMMENDED: File information details"
+				},
+				"uploader_info": {
+					"type": "object",
+					"properties": {
+						"username": {
+							"type": "string",
+							"maxLength": 200,
+							"description": "OPTIONAL: Username of the uploader"
+						},
+						"user_id": {
+							"type": "string",
+							"maxLength": 100,
+							"description": "OPTIONAL: User ID on the hosting service"
+						},
+						"account_type": {
+							"type": "string",
+							"enum": [
+								"free",
+								"premium",
+								"business",
+								"unknown"
+							],
+							"description": "OPTIONAL: Type of user account"
+						}
+					},
+					"additionalProperties": false,
+					"description": "OPTIONAL: Information about the uploader"
+				},
+				"work_title": {
+					"type": "string",
+					"maxLength": 500,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Title of the copyrighted work"
+				},
+				"rights_holder": {
+					"type": "string",
+					"maxLength": 200,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Organization or person holding the copyright"
+				},
+				"work_category": {
+					"type": "string",
+					"enum": [
+						"movie",
+						"tv_show",
+						"music",
+						"software",
+						"ebook",
+						"audiobook",
+						"game",
+						"document",
+						"other"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Category of copyrighted work"
+				},
+				"access_method": {
+					"type": "string",
+					"enum": [
+						"direct_link",
+						"password_protected",
+						"premium_only",
+						"time_limited",
+						"captcha_protected"
+					],
+					"description": "OPTIONAL: How the file can be accessed"
+				},
+				"takedown_info": {
+					"type": "object",
+					"properties": {
+						"previous_requests": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: Number of previous takedown requests for this file"
+						},
+						"service_response_time": {
+							"type": "string",
+							"description": "OPTIONAL: Expected response time from hosting service"
+						},
+						"automated_removal": {
+							"type": "boolean",
+							"description": "OPTIONAL: Whether service supports automated removal"
+						}
+					},
+					"additionalProperties": false,
+					"description": "OPTIONAL: Takedown request information"
+				}
+			},
+			"required": [
+				"infringing_url",
+				"hosting_service"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "456b7890-c123-45d6-e789-012345678901",
+			"timestamp": "2024-01-15T11:15:20Z",
+			"reporter": {
+				"org": "Anti-Piracy Coalition",
+				"contact": "takedowns@anti-piracy.example",
+				"domain": "anti-piracy.example"
+			},
+			"sender": {
+				"org": "Anti-Piracy Coalition",
+				"contact": "takedowns@anti-piracy.example",
+				"domain": "anti-piracy.example"
+			},
+			"source_identifier": "cyberlocker-service.example",
+			"category": "copyright",
+			"type": "cyberlocker",
+			"infringing_url": "https://filehost.example/download/abc123def456",
+			"hosting_service": "FileHost Pro",
+			"work_title": "Popular Movie 2024",
+			"rights_holder": "Entertainment Studios LLC",
+			"work_category": "movie",
+			"evidence_source": "automated_crawl",
+			"file_info": {
+				"filename": "Popular.Movie.2024.1080p.WEBRip.x264.mp4",
+				"file_size": 2147483648,
+				"upload_date": "2024-01-14T20:30:00Z"
+			},
+			"uploader_info": {
+				"username": "movieshare123",
+				"account_type": "premium"
+			},
+			"access_method": "direct_link",
+			"evidence": [
+				{
+					"content_type": "text/html",
+					"description": "Screenshot of download page showing copyrighted content",
+					"payload": "PGh0bWw+PGhlYWQ+PHRpdGxlPkRvd25sb2FkIFBvcHVsYXIgTW92aWUgMjAyNC4uLjwvdGl0bGU+PC9oZWFkPjwvaHRtbD4="
+				}
+			],
+			"tags": [
+				"copyright:cyberlocker",
+				"service:filehost",
+				"media:movie"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/copyright-link-site.json b/xarf/schemas/types/copyright-link-site.json
new file mode 100644
index 0000000..5937506
--- /dev/null
+++ b/xarf/schemas/types/copyright-link-site.json
@@ -0,0 +1,269 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/copyright-link-site.json",
+	"title": "XARF v4 Copyright - Link Site Type Schema",
+	"description": "Schema for link aggregation site copyright infringement reports",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "copyright"
+				},
+				"type": {
+					"const": "link_site"
+				},
+				"evidence_source": {
+					"type": "string",
+					"enum": [
+						"automated_crawl",
+						"manual_monitoring",
+						"user_report",
+						"rights_holder",
+						"search_monitoring"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Source of link site infringement evidence"
+				},
+				"infringing_url": {
+					"type": "string",
+					"format": "uri",
+					"description": "REQUIRED: URL to the page containing infringing links"
+				},
+				"site_name": {
+					"type": "string",
+					"maxLength": 200,
+					"description": "REQUIRED: Name of the link aggregation site",
+					"examples": [
+						"The Pirate Bay",
+						"1337x",
+						"RARBG",
+						"Torrentz2",
+						"ExtraTorrent",
+						"KickassTorrents"
+					]
+				},
+				"site_category": {
+					"type": "string",
+					"enum": [
+						"torrent_index",
+						"direct_download_links",
+						"streaming_links",
+						"usenet_index",
+						"search_engine",
+						"forum_links",
+						"other"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Category of link aggregation site"
+				},
+				"link_info": {
+					"type": "object",
+					"properties": {
+						"page_title": {
+							"type": "string",
+							"maxLength": 500,
+							"description": "OPTIONAL: Title of the page containing links"
+						},
+						"posting_date": {
+							"type": "string",
+							"format": "date-time",
+							"description": "OPTIONAL: When links were posted"
+						},
+						"uploader": {
+							"type": "string",
+							"maxLength": 200,
+							"description": "OPTIONAL: Username who posted the links"
+						},
+						"download_count": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: Number of downloads/views"
+						},
+						"link_count": {
+							"type": "integer",
+							"minimum": 1,
+							"description": "OPTIONAL: Number of infringing links on the page"
+						},
+						"comments_count": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: Number of user comments"
+						}
+					},
+					"additionalProperties": false,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Information about the link page"
+				},
+				"linked_content": {
+					"type": "array",
+					"items": {
+						"type": "object",
+						"properties": {
+							"target_url": {
+								"type": "string",
+								"format": "uri",
+								"description": "REQUIRED: URL that the link points to"
+							},
+							"link_type": {
+								"type": "string",
+								"enum": [
+									"torrent_file",
+									"magnet_link",
+									"direct_download",
+									"streaming_link",
+									"usenet_nzb",
+									"other"
+								],
+								"description": "REQUIRED: Type of link"
+							},
+							"hosting_service": {
+								"type": "string",
+								"maxLength": 200,
+								"description": "OPTIONAL: Service hosting the linked content"
+							},
+							"file_size": {
+								"type": "integer",
+								"minimum": 0,
+								"description": "OPTIONAL: Size of linked file in bytes"
+							}
+						},
+						"required": [
+							"target_url",
+							"link_type"
+						],
+						"additionalProperties": false
+					},
+					"maxItems": 50,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Details about the linked infringing content"
+				},
+				"work_title": {
+					"type": "string",
+					"maxLength": 500,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Title of the copyrighted work"
+				},
+				"rights_holder": {
+					"type": "string",
+					"maxLength": 200,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Organization or person holding the copyright"
+				},
+				"work_category": {
+					"type": "string",
+					"enum": [
+						"movie",
+						"tv_show",
+						"music",
+						"software",
+						"ebook",
+						"audiobook",
+						"game",
+						"adult_content",
+						"other"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Category of copyrighted work"
+				},
+				"search_terms": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"maxLength": 200
+					},
+					"maxItems": 10,
+					"description": "OPTIONAL: Search terms used to find the infringing links"
+				},
+				"site_ranking": {
+					"type": "object",
+					"properties": {
+						"alexa_rank": {
+							"type": "integer",
+							"minimum": 1,
+							"description": "OPTIONAL: Alexa traffic ranking of the site"
+						},
+						"popularity_score": {
+							"type": "number",
+							"minimum": 0.0,
+							"maximum": 10.0,
+							"description": "OPTIONAL: Popularity score (0.0-10.0)"
+						}
+					},
+					"additionalProperties": false,
+					"description": "OPTIONAL: Site ranking information"
+				}
+			},
+			"required": [
+				"infringing_url",
+				"site_name"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "012c3456-d789-01e2-f345-678901234567",
+			"timestamp": "2024-01-15T20:10:30Z",
+			"reporter": {
+				"org": "Digital Rights Enforcement",
+				"contact": "enforcement@digital-rights.example",
+				"domain": "digital-rights.example"
+			},
+			"sender": {
+				"org": "Digital Rights Enforcement",
+				"contact": "enforcement@digital-rights.example",
+				"domain": "digital-rights.example"
+			},
+			"source_identifier": "torrent-indexer.example",
+			"category": "copyright",
+			"type": "link_site",
+			"infringing_url": "https://torrentindex.example/torrent/12345/blockbuster-movie-2024",
+			"site_name": "TorrentIndex",
+			"site_category": "torrent_index",
+			"work_title": "Blockbuster Movie 2024",
+			"rights_holder": "Hollywood Studios",
+			"work_category": "movie",
+			"evidence_source": "automated_crawl",
+			"link_info": {
+				"page_title": "Blockbuster Movie 2024 1080p BluRay x264",
+				"posting_date": "2024-01-14T22:30:00Z",
+				"uploader": "movieuploader123",
+				"download_count": 2500,
+				"link_count": 3
+			},
+			"linked_content": [
+				{
+					"target_url": "magnet:?xt=urn:btih:da39a3ee5e6b4b0d3255bfef95601890afd80709",
+					"link_type": "magnet_link",
+					"hosting_service": "BitTorrent DHT"
+				},
+				{
+					"target_url": "https://filehost.example/download/abc123",
+					"link_type": "direct_download",
+					"hosting_service": "FileHost Service",
+					"file_size": 4294967296
+				}
+			],
+			"search_terms": [
+				"Blockbuster Movie 2024",
+				"1080p BluRay"
+			],
+			"evidence": [
+				{
+					"content_type": "text/html",
+					"description": "Screenshot of torrent page showing copyrighted content links",
+					"payload": "PGh0bWw+PGhlYWQ+PHRpdGxlPkJsb2NrYnVzdGVyIE1vdmllIDIwMjQ8L3RpdGxlPjwvaGVhZD48L2h0bWw+"
+				}
+			],
+			"tags": [
+				"copyright:link_site",
+				"site:torrent_index",
+				"media:movie"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/copyright-p2p.json b/xarf/schemas/types/copyright-p2p.json
new file mode 100644
index 0000000..7dfadab
--- /dev/null
+++ b/xarf/schemas/types/copyright-p2p.json
@@ -0,0 +1,221 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/copyright-p2p.json",
+	"title": "XARF v4 Copyright - P2P Type Schema",
+	"description": "Schema for peer-to-peer copyright infringement reports",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "copyright"
+				},
+				"type": {
+					"const": "p2p"
+				},
+				"evidence_source": {
+					"type": "string",
+					"enum": [
+						"automated_crawl",
+						"manual_monitoring",
+						"user_report",
+						"rights_holder",
+						"watermark_detection"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Source of P2P infringement evidence"
+				},
+				"p2p_protocol": {
+					"type": "string",
+					"enum": [
+						"bittorrent",
+						"edonkey",
+						"gnutella",
+						"kademlia",
+						"other"
+					],
+					"description": "REQUIRED: P2P protocol used for infringement"
+				},
+				"swarm_info": {
+					"type": "object",
+					"properties": {
+						"info_hash": {
+							"type": "string",
+							"pattern": "^[a-fA-F0-9]{40}$",
+							"description": "RECOMMENDED: BitTorrent info hash (SHA-1)"
+						},
+						"magnet_uri": {
+							"type": "string",
+							"pattern": "^magnet:\\?xt=urn:",
+							"description": "RECOMMENDED: Magnet link for the infringing content"
+						},
+						"torrent_name": {
+							"type": "string",
+							"maxLength": 500,
+							"description": "OPTIONAL: Name of the torrent"
+						},
+						"file_count": {
+							"type": "integer",
+							"minimum": 1,
+							"description": "OPTIONAL: Number of files in the torrent"
+						},
+						"total_size": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: Total size in bytes"
+						}
+					},
+					"additionalProperties": false,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Swarm information (info_hash or magnet_uri required)"
+				},
+				"peer_info": {
+					"type": "object",
+					"properties": {
+						"peer_id": {
+							"type": "string",
+							"maxLength": 100,
+							"description": "OPTIONAL: P2P client peer ID"
+						},
+						"client_version": {
+							"type": "string",
+							"maxLength": 100,
+							"description": "OPTIONAL: P2P client software and version"
+						},
+						"upload_amount": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: Amount uploaded in bytes"
+						},
+						"download_amount": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: Amount downloaded in bytes"
+						}
+					},
+					"additionalProperties": false,
+					"description": "OPTIONAL: Peer information"
+				},
+				"work_title": {
+					"type": "string",
+					"maxLength": 500,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Title of the copyrighted work"
+				},
+				"rights_holder": {
+					"type": "string",
+					"maxLength": 200,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Organization or person holding the copyright"
+				},
+				"work_category": {
+					"type": "string",
+					"enum": [
+						"movie",
+						"tv_show",
+						"music",
+						"software",
+						"ebook",
+						"audiobook",
+						"game",
+						"other"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Category of copyrighted work"
+				},
+				"release_date": {
+					"type": "string",
+					"format": "date",
+					"description": "OPTIONAL: Official release date of the work"
+				},
+				"detection_method": {
+					"type": "string",
+					"enum": [
+						"automated_crawl",
+						"fingerprinting",
+						"metadata_match",
+						"manual_verification"
+					],
+					"description": "OPTIONAL: Method used to detect the infringement"
+				}
+			},
+			"required": [
+				"p2p_protocol"
+			],
+			"anyOf": [
+				{
+					"required": [
+						"swarm_info"
+					],
+					"properties": {
+						"swarm_info": {
+							"anyOf": [
+								{
+									"required": [
+										"info_hash"
+									]
+								},
+								{
+									"required": [
+										"magnet_uri"
+									]
+								}
+							]
+						}
+					}
+				}
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "789a1234-b567-89c0-d123-456789abcdef",
+			"timestamp": "2024-01-15T18:30:45Z",
+			"reporter": {
+				"org": "Content Protection Agency",
+				"contact": "reports@content-protection.example",
+				"domain": "content-protection.example"
+			},
+			"sender": {
+				"org": "Content Protection Agency",
+				"contact": "reports@content-protection.example",
+				"domain": "content-protection.example"
+			},
+			"source_identifier": "203.0.113.150",
+			"source_port": 6881,
+			"category": "copyright",
+			"type": "p2p",
+			"p2p_protocol": "bittorrent",
+			"work_title": "Movie Title 2024",
+			"rights_holder": "Major Studio Inc",
+			"work_category": "movie",
+			"evidence_source": "automated_crawl",
+			"swarm_info": {
+				"info_hash": "da39a3ee5e6b4b0d3255bfef95601890afd80709",
+				"torrent_name": "Movie.Title.2024.1080p.BluRay.x264",
+				"file_count": 1,
+				"total_size": 8589934592
+			},
+			"peer_info": {
+				"client_version": "uTorrent/3.5.5",
+				"upload_amount": 1073741824
+			},
+			"evidence": [
+				{
+					"content_type": "application/x-bittorrent",
+					"description": "Torrent file containing copyrighted content",
+					"payload": "ZDg6YW5ub3VuY2UyNzpodHRwOi8vdHJhY2tlci5leGFtcGxlLmNvbS9hbm5vdW5jZQ=="
+				}
+			],
+			"tags": [
+				"copyright:p2p",
+				"protocol:bittorrent",
+				"media:movie"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/copyright-ugc-platform.json b/xarf/schemas/types/copyright-ugc-platform.json
new file mode 100644
index 0000000..2ad8502
--- /dev/null
+++ b/xarf/schemas/types/copyright-ugc-platform.json
@@ -0,0 +1,287 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/copyright-ugc-platform.json",
+	"title": "XARF v4 Copyright - UGC Platform Type Schema",
+	"description": "Schema for user-generated content platform copyright infringement reports",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "copyright"
+				},
+				"type": {
+					"const": "ugc_platform"
+				},
+				"evidence_source": {
+					"type": "string",
+					"enum": [
+						"automated_detection",
+						"user_report",
+						"rights_holder",
+						"content_id_match",
+						"fingerprint_match",
+						"manual_review"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Source of UGC platform infringement evidence"
+				},
+				"infringing_url": {
+					"type": "string",
+					"format": "uri",
+					"description": "REQUIRED: URL to the infringing content on the platform"
+				},
+				"platform_name": {
+					"type": "string",
+					"maxLength": 200,
+					"description": "REQUIRED: Name of the UGC platform",
+					"examples": [
+						"YouTube",
+						"TikTok",
+						"Instagram",
+						"Twitter",
+						"Facebook",
+						"Vimeo",
+						"Twitch",
+						"SoundCloud"
+					]
+				},
+				"content_info": {
+					"type": "object",
+					"properties": {
+						"content_id": {
+							"type": "string",
+							"maxLength": 200,
+							"description": "OPTIONAL: Platform-specific content identifier"
+						},
+						"content_title": {
+							"type": "string",
+							"maxLength": 500,
+							"description": "OPTIONAL: Title of the infringing content"
+						},
+						"content_description": {
+							"type": "string",
+							"maxLength": 2000,
+							"description": "OPTIONAL: Description of the infringing content"
+						},
+						"upload_date": {
+							"type": "string",
+							"format": "date-time",
+							"description": "OPTIONAL: When content was uploaded to platform"
+						},
+						"content_duration": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: Duration in seconds for video/audio content"
+						},
+						"view_count": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: Number of views/plays"
+						},
+						"like_count": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: Number of likes/upvotes"
+						}
+					},
+					"additionalProperties": false,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Information about the infringing content"
+				},
+				"uploader_info": {
+					"type": "object",
+					"properties": {
+						"username": {
+							"type": "string",
+							"maxLength": 200,
+							"description": "OPTIONAL: Username of the content uploader"
+						},
+						"user_id": {
+							"type": "string",
+							"maxLength": 100,
+							"description": "OPTIONAL: Platform-specific user identifier"
+						},
+						"account_verified": {
+							"type": "boolean",
+							"description": "OPTIONAL: Whether uploader account is verified"
+						},
+						"subscriber_count": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: Number of subscribers/followers"
+						},
+						"account_creation_date": {
+							"type": "string",
+							"format": "date-time",
+							"description": "OPTIONAL: When uploader account was created"
+						}
+					},
+					"additionalProperties": false,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Information about the uploader"
+				},
+				"work_title": {
+					"type": "string",
+					"maxLength": 500,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Title of the copyrighted work"
+				},
+				"rights_holder": {
+					"type": "string",
+					"maxLength": 200,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Organization or person holding the copyright"
+				},
+				"work_category": {
+					"type": "string",
+					"enum": [
+						"movie",
+						"tv_show",
+						"music",
+						"music_video",
+						"audiobook",
+						"podcast",
+						"live_performance",
+						"sports_event",
+						"documentary",
+						"other"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Category of copyrighted work"
+				},
+				"infringement_type": {
+					"type": "string",
+					"enum": [
+						"full_work",
+						"substantial_portion",
+						"compilation",
+						"remix_unauthorized",
+						"background_music",
+						"clip_mashup"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Type of copyright infringement"
+				},
+				"match_details": {
+					"type": "object",
+					"properties": {
+						"match_confidence": {
+							"type": "number",
+							"minimum": 0.0,
+							"maximum": 1.0,
+							"description": "OPTIONAL: Confidence level of content match (0.0-1.0)"
+						},
+						"match_duration": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: Duration of matching content in seconds"
+						},
+						"match_percentage": {
+							"type": "number",
+							"minimum": 0.0,
+							"maximum": 100.0,
+							"description": "OPTIONAL: Percentage of original work that matches"
+						},
+						"reference_id": {
+							"type": "string",
+							"maxLength": 200,
+							"description": "OPTIONAL: Reference ID from content identification system"
+						}
+					},
+					"additionalProperties": false,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Content match details"
+				},
+				"monetization_info": {
+					"type": "object",
+					"properties": {
+						"monetized": {
+							"type": "boolean",
+							"description": "OPTIONAL: Whether infringing content is monetized"
+						},
+						"ad_revenue": {
+							"type": "boolean",
+							"description": "OPTIONAL: Whether content generates ad revenue"
+						},
+						"premium_content": {
+							"type": "boolean",
+							"description": "OPTIONAL: Whether content is behind paywall"
+						}
+					},
+					"additionalProperties": false,
+					"description": "OPTIONAL: Monetization information"
+				}
+			},
+			"required": [
+				"infringing_url",
+				"platform_name"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "789c0123-d456-78e9-f012-345678901234",
+			"timestamp": "2024-01-15T13:45:15Z",
+			"reporter": {
+				"org": "Music Rights Management",
+				"contact": "copyright@music-rights.example",
+				"domain": "music-rights.example"
+			},
+			"sender": {
+				"org": "Music Rights Management",
+				"contact": "copyright@music-rights.example",
+				"domain": "music-rights.example"
+			},
+			"source_identifier": "video-platform.example",
+			"category": "copyright",
+			"type": "ugc_platform",
+			"infringing_url": "https://platform.example/watch?v=abc123def456",
+			"platform_name": "VideoShare",
+			"work_title": "Hit Song 2024",
+			"rights_holder": "Record Label Inc",
+			"work_category": "music",
+			"evidence_source": "content_id_match",
+			"infringement_type": "background_music",
+			"content_info": {
+				"content_id": "vid_abc123def456",
+				"content_title": "My Vacation Highlights",
+				"upload_date": "2024-01-14T16:20:00Z",
+				"content_duration": 180,
+				"view_count": 15000
+			},
+			"uploader_info": {
+				"username": "travelblogger2024",
+				"account_verified": false,
+				"subscriber_count": 1200
+			},
+			"match_details": {
+				"match_confidence": 0.95,
+				"match_duration": 45,
+				"match_percentage": 25.0,
+				"reference_id": "ref_hit_song_2024_001"
+			},
+			"monetization_info": {
+				"monetized": true,
+				"ad_revenue": true
+			},
+			"evidence": [
+				{
+					"content_type": "application/json",
+					"description": "Content ID match report with timestamps",
+					"payload": "eyJtYXRjaF9kZXRhaWxzIjogeyJzdGFydF90aW1lIjogNjAsICJlbmRfdGltZSI6IDEwNX19"
+				}
+			],
+			"tags": [
+				"copyright:ugc",
+				"platform:video",
+				"media:music",
+				"type:background"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/copyright-usenet.json b/xarf/schemas/types/copyright-usenet.json
new file mode 100644
index 0000000..b8a75b0
--- /dev/null
+++ b/xarf/schemas/types/copyright-usenet.json
@@ -0,0 +1,281 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/copyright-usenet.json",
+	"title": "XARF v4 Copyright - Usenet Type Schema",
+	"description": "Schema for Usenet newsgroup copyright infringement reports",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "copyright"
+				},
+				"type": {
+					"const": "usenet"
+				},
+				"evidence_source": {
+					"type": "string",
+					"enum": [
+						"automated_monitoring",
+						"newsgroup_crawl",
+						"user_report",
+						"rights_holder",
+						"nzb_index_monitoring"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Source of Usenet infringement evidence"
+				},
+				"newsgroup": {
+					"type": "string",
+					"maxLength": 200,
+					"description": "REQUIRED: Name of the newsgroup containing infringing content",
+					"examples": [
+						"alt.binaries.movies.divx",
+						"alt.binaries.tv",
+						"alt.binaries.sounds.mp3",
+						"alt.binaries.games",
+						"alt.binaries.multimedia"
+					]
+				},
+				"message_info": {
+					"type": "object",
+					"properties": {
+						"message_id": {
+							"type": "string",
+							"maxLength": 500,
+							"description": "RECOMMENDED: Usenet Message-ID header"
+						},
+						"subject": {
+							"type": "string",
+							"maxLength": 500,
+							"description": "OPTIONAL: Subject line of the post"
+						},
+						"from_header": {
+							"type": "string",
+							"maxLength": 200,
+							"description": "OPTIONAL: From header of the post"
+						},
+						"posting_date": {
+							"type": "string",
+							"format": "date-time",
+							"description": "OPTIONAL: When message was posted to newsgroup"
+						},
+						"part_number": {
+							"type": "integer",
+							"minimum": 1,
+							"description": "OPTIONAL: Part number if multi-part post"
+						},
+						"total_parts": {
+							"type": "integer",
+							"minimum": 1,
+							"description": "OPTIONAL: Total number of parts in posting"
+						},
+						"file_size": {
+							"type": "integer",
+							"minimum": 0,
+							"description": "OPTIONAL: Size of the posted file in bytes"
+						}
+					},
+					"additionalProperties": false,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Message information (message_id required)"
+				},
+				"nzb_info": {
+					"type": "object",
+					"properties": {
+						"nzb_name": {
+							"type": "string",
+							"maxLength": 500,
+							"description": "OPTIONAL: Name of the NZB file"
+						},
+						"nzb_url": {
+							"type": "string",
+							"format": "uri",
+							"description": "OPTIONAL: URL to NZB file on indexing site"
+						},
+						"indexer_site": {
+							"type": "string",
+							"maxLength": 200,
+							"description": "OPTIONAL: NZB indexing site name"
+						},
+						"completion_percentage": {
+							"type": "number",
+							"minimum": 0.0,
+							"maximum": 100.0,
+							"description": "OPTIONAL: Completion percentage of the post"
+						}
+					},
+					"additionalProperties": false,
+					"description": "OPTIONAL: NZB file information"
+				},
+				"server_info": {
+					"type": "object",
+					"properties": {
+						"nntp_server": {
+							"type": "string",
+							"maxLength": 200,
+							"description": "OPTIONAL: NNTP server hostname"
+						},
+						"server_group": {
+							"type": "string",
+							"maxLength": 200,
+							"description": "OPTIONAL: News server provider group"
+						},
+						"retention_days": {
+							"type": "integer",
+							"minimum": 1,
+							"description": "OPTIONAL: Server retention period in days"
+						}
+					},
+					"additionalProperties": false,
+					"description": "OPTIONAL: Server information"
+				},
+				"work_title": {
+					"type": "string",
+					"maxLength": 500,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Title of the copyrighted work"
+				},
+				"rights_holder": {
+					"type": "string",
+					"maxLength": 200,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Organization or person holding the copyright"
+				},
+				"work_category": {
+					"type": "string",
+					"enum": [
+						"movie",
+						"tv_show",
+						"music",
+						"software",
+						"ebook",
+						"audiobook",
+						"magazine",
+						"game",
+						"adult_content",
+						"other"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Category of copyrighted work"
+				},
+				"encoding_info": {
+					"type": "object",
+					"properties": {
+						"encoding_format": {
+							"type": "string",
+							"enum": [
+								"yenc",
+								"uuencode",
+								"base64",
+								"other"
+							],
+							"description": "OPTIONAL: Binary encoding format used"
+						},
+						"par2_recovery": {
+							"type": "boolean",
+							"description": "OPTIONAL: Whether PAR2 recovery files are included"
+						},
+						"rar_compression": {
+							"type": "boolean",
+							"description": "OPTIONAL: Whether content is RAR compressed"
+						}
+					},
+					"additionalProperties": false,
+					"description": "OPTIONAL: Encoding information"
+				},
+				"detection_method": {
+					"type": "string",
+					"enum": [
+						"subject_line_match",
+						"header_analysis",
+						"content_sampling",
+						"nzb_metadata"
+					],
+					"description": "OPTIONAL: Method used to detect infringement"
+				}
+			},
+			"required": [
+				"newsgroup"
+			],
+			"anyOf": [
+				{
+					"required": [
+						"message_info"
+					],
+					"properties": {
+						"message_info": {
+							"required": [
+								"message_id"
+							]
+						}
+					}
+				}
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "345d6789-e012-34f5-a678-901234567890",
+			"timestamp": "2024-01-15T07:20:45Z",
+			"reporter": {
+				"org": "Usenet Monitoring Service",
+				"contact": "reports@usenet-monitor.example",
+				"domain": "usenet-monitor.example"
+			},
+			"sender": {
+				"org": "Usenet Monitoring Service",
+				"contact": "reports@usenet-monitor.example",
+				"domain": "usenet-monitor.example"
+			},
+			"source_identifier": "news.example-provider.com",
+			"category": "copyright",
+			"type": "usenet",
+			"newsgroup": "alt.binaries.movies.divx",
+			"work_title": "Latest Movie 2024",
+			"rights_holder": "Film Distribution Corp",
+			"work_category": "movie",
+			"evidence_source": "automated_monitoring",
+			"message_info": {
+				"message_id": "<abc123def456@news.provider.example.com>",
+				"subject": "[1/50] Latest.Movie.2024.1080p.BluRay.x264 - File 01 of 50",
+				"from_header": "movieposter@anon.example.com (MoviePoster)",
+				"posting_date": "2024-01-14T05:30:00Z",
+				"part_number": 1,
+				"total_parts": 50,
+				"file_size": 4294967296
+			},
+			"nzb_info": {
+				"nzb_name": "Latest Movie 2024 1080p BluRay x264.nzb",
+				"indexer_site": "NZB Indexer Pro",
+				"completion_percentage": 100.0
+			},
+			"server_info": {
+				"nntp_server": "news.example-provider.com",
+				"retention_days": 3000
+			},
+			"encoding_info": {
+				"encoding_format": "yenc",
+				"par2_recovery": true,
+				"rar_compression": true
+			},
+			"detection_method": "subject_line_match",
+			"evidence": [
+				{
+					"content_type": "message/rfc822",
+					"description": "Usenet post headers showing copyrighted movie",
+					"payload": "TWVzc2FnZS1JRDogPGFiYzEyM2RlZjQ1NkBuZXdzLnByb3ZpZGVyLmNvbT4="
+				}
+			],
+			"tags": [
+				"copyright:usenet",
+				"newsgroup:movies",
+				"media:movie"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/infrastructure-botnet.json b/xarf/schemas/types/infrastructure-botnet.json
new file mode 100644
index 0000000..ffdbd5e
--- /dev/null
+++ b/xarf/schemas/types/infrastructure-botnet.json
@@ -0,0 +1,88 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/infrastructure-botnet.json",
+	"title": "XARF v4 Infrastructure - Botnet Type Schema",
+	"description": "Schema for Botnet infections and compromised systems",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "infrastructure"
+				},
+				"type": {
+					"const": "botnet"
+				},
+				"malware_family": {
+					"type": "string",
+					"maxLength": 200,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Malware family classification",
+					"examples": [
+						"conficker",
+						"mirai",
+						"emotet",
+						"zeus"
+					]
+				},
+				"c2_server": {
+					"type": "string",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Command and control server domain or IP",
+					"examples": [
+						"evil-c2.example.com",
+						"192.0.2.100"
+					]
+				},
+				"c2_protocol": {
+					"type": "string",
+					"enum": [
+						"http",
+						"https",
+						"tcp",
+						"udp",
+						"dns",
+						"irc",
+						"p2p",
+						"custom"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Protocol used for C2 communications"
+				},
+				"bot_capabilities": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"enum": [
+							"ddos",
+							"spam",
+							"proxy",
+							"keylogger",
+							"file_download",
+							"remote_shell",
+							"cryptocurrency_mining",
+							"data_theft"
+						]
+					},
+					"x-recommended": true,
+					"description": "RECOMMENDED: Capabilities observed in the bot"
+				},
+				"compromise_evidence": {
+					"type": "string",
+					"description": "REQUIRED: Evidence of how compromise was detected",
+					"examples": [
+						"C2 communication observed",
+						"Malicious process running",
+						"Suspicious network traffic patterns"
+					]
+				}
+			},
+			"required": [
+				"compromise_evidence"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/infrastructure-compromised-server.json b/xarf/schemas/types/infrastructure-compromised-server.json
new file mode 100644
index 0000000..82e8954
--- /dev/null
+++ b/xarf/schemas/types/infrastructure-compromised-server.json
@@ -0,0 +1,29 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/infrastructure-compromised-server.json",
+	"title": "XARF v4 Infrastructure - Compromised Server Type Schema",
+	"description": "Schema for Compromised servers and infrastructure",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "infrastructure"
+				},
+				"type": {
+					"const": "compromised_server"
+				},
+				"compromise_method": {
+					"type": "string",
+					"description": "REQUIRED: Method used to compromise the server"
+				}
+			},
+			"required": [
+				"compromise_method"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/messaging-bulk-messaging.json b/xarf/schemas/types/messaging-bulk-messaging.json
new file mode 100644
index 0000000..f37a31e
--- /dev/null
+++ b/xarf/schemas/types/messaging-bulk-messaging.json
@@ -0,0 +1,142 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/messaging-bulk-messaging.json",
+	"title": "XARF v4 Messaging - Bulk Messaging Type Schema",
+	"description": "Schema for bulk messaging reports - legitimate but unwanted bulk communications",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "messaging"
+				},
+				"type": {
+					"const": "bulk_messaging"
+				},
+				"evidence_source": {
+					"type": "string",
+					"enum": [
+						"user_complaint",
+						"automated_filter",
+						"reputation_feed",
+						"volume_analysis"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Source of bulk messaging evidence"
+				},
+				"protocol": {
+					"type": "string",
+					"enum": [
+						"smtp",
+						"sms",
+						"whatsapp",
+						"telegram",
+						"social_media",
+						"push_notification",
+						"other"
+					],
+					"description": "REQUIRED: Communication protocol used for bulk messaging"
+				},
+				"smtp_from": {
+					"type": "string",
+					"format": "email",
+					"description": "REQUIRED: SMTP envelope sender address (required when protocol=smtp)"
+				},
+				"subject": {
+					"type": "string",
+					"maxLength": 500,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Message subject line"
+				},
+				"sender_name": {
+					"type": "string",
+					"maxLength": 200,
+					"description": "OPTIONAL: Display name of the sender"
+				},
+				"recipient_count": {
+					"type": "integer",
+					"minimum": 100,
+					"description": "REQUIRED: Number of recipients (bulk requires minimum 100 recipients)"
+				},
+				"unsubscribe_provided": {
+					"type": "boolean",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Whether message provides unsubscribe mechanism"
+				},
+				"opt_in_evidence": {
+					"type": "boolean",
+					"description": "OPTIONAL: Whether there is evidence of recipient opt-in"
+				},
+				"bulk_indicators": {
+					"type": "object",
+					"properties": {
+						"high_volume": {
+							"type": "boolean",
+							"description": "OPTIONAL: High volume sending pattern detected"
+						},
+						"template_based": {
+							"type": "boolean",
+							"description": "OPTIONAL: Message appears to be template-based"
+						},
+						"commercial_sender": {
+							"type": "boolean",
+							"description": "OPTIONAL: Sender appears to be commercial entity"
+						}
+					},
+					"description": "OPTIONAL: Indicators specific to bulk messaging detection",
+					"additionalProperties": false
+				}
+			},
+			"required": [
+				"protocol",
+				"recipient_count"
+			],
+			"if": {
+				"properties": {
+					"protocol": {
+						"const": "smtp"
+					}
+				}
+			},
+			"then": {
+				"required": [
+					"smtp_from",
+					"source_port"
+				]
+			}
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "456e7890-a12b-34c5-d678-901234567890",
+			"timestamp": "2024-01-15T16:45:10Z",
+			"reporter": {
+				"org": "Email Service Provider",
+				"contact": "abuse@email-provider.example",
+				"domain": "email-provider.example"
+			},
+			"sender": {
+				"org": "Email Service Provider",
+				"contact": "abuse@email-provider.example",
+				"domain": "email-provider.example"
+			},
+			"source_identifier": "192.0.2.200",
+			"category": "messaging",
+			"type": "bulk_messaging",
+			"protocol": "smtp",
+			"smtp_from": "newsletter@company.example",
+			"subject": "Weekly Newsletter - January Edition",
+			"evidence_source": "user_complaint",
+			"recipient_count": 50000,
+			"unsubscribe_provided": false,
+			"tags": [
+				"bulk:commercial",
+				"complaint:unsubscribe"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/messaging-spam.json b/xarf/schemas/types/messaging-spam.json
new file mode 100644
index 0000000..7e043f7
--- /dev/null
+++ b/xarf/schemas/types/messaging-spam.json
@@ -0,0 +1,202 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/messaging-spam.json",
+	"title": "XARF v4 Messaging - Spam Type Schema",
+	"description": "Schema for spam email reports - unsolicited commercial messages",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "messaging"
+				},
+				"type": {
+					"const": "spam"
+				},
+				"evidence_source": {
+					"type": "string",
+					"enum": [
+						"spamtrap",
+						"user_complaint",
+						"automated_filter",
+						"honeypot",
+						"content_analysis",
+						"reputation_feed"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Source of spam evidence"
+				},
+				"protocol": {
+					"type": "string",
+					"enum": [
+						"smtp",
+						"sms",
+						"whatsapp",
+						"telegram",
+						"signal",
+						"chat",
+						"social_media",
+						"push_notification",
+						"other"
+					],
+					"description": "REQUIRED: Communication protocol used for spam delivery"
+				},
+				"smtp_from": {
+					"type": "string",
+					"format": "email",
+					"description": "REQUIRED: SMTP envelope sender address (required when protocol=smtp)",
+					"examples": [
+						"spam@example.com",
+						"noreply@malicious-domain.example"
+					]
+				},
+				"smtp_to": {
+					"type": "string",
+					"format": "email",
+					"x-recommended": true,
+					"description": "RECOMMENDED: SMTP envelope recipient address",
+					"examples": [
+						"victim@example.org",
+						"spamtrap@spamtrap.example"
+					]
+				},
+				"subject": {
+					"type": "string",
+					"maxLength": 500,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Message subject line",
+					"examples": [
+						"Urgent: Account Verification Required",
+						"Your package is ready for delivery",
+						"Limited Time Offer - Act Now!"
+					]
+				},
+				"sender_name": {
+					"type": "string",
+					"maxLength": 200,
+					"description": "OPTIONAL: Display name of the sender",
+					"examples": [
+						"Customer Support",
+						"No Reply",
+						"Sales Team"
+					]
+				},
+				"message_id": {
+					"type": "string",
+					"maxLength": 200,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Message ID from headers - helps with deduplication",
+					"examples": [
+						"<abc123@example.com>",
+						"msg_1234567890"
+					]
+				},
+				"user_agent": {
+					"type": "string",
+					"maxLength": 200,
+					"description": "OPTIONAL: User agent string from message headers",
+					"examples": [
+						"Outlook 16.0",
+						"bulk_mailer_v2.1"
+					]
+				},
+				"recipient_count": {
+					"type": "integer",
+					"minimum": 1,
+					"description": "OPTIONAL: Number of recipients for bulk spam campaigns"
+				},
+				"language": {
+					"type": "string",
+					"pattern": "^[a-z]{2}(-[A-Z]{2})?$",
+					"description": "OPTIONAL: Primary language of message content (ISO 639-1)",
+					"examples": [
+						"en",
+						"es",
+						"de",
+						"ja",
+						"en-US"
+					]
+				},
+				"spam_indicators": {
+					"type": "object",
+					"properties": {
+						"suspicious_links": {
+							"type": "array",
+							"items": {
+								"type": "string",
+								"format": "uri"
+							},
+							"description": "OPTIONAL: Suspicious URLs found in the message"
+						},
+						"commercial_content": {
+							"type": "boolean",
+							"description": "OPTIONAL: Whether message contains commercial offers"
+						},
+						"bulk_characteristics": {
+							"type": "boolean",
+							"description": "OPTIONAL: Whether message shows bulk mailing characteristics"
+						}
+					},
+					"description": "OPTIONAL: Indicators specific to spam detection",
+					"additionalProperties": false
+				}
+			},
+			"required": [
+				"protocol"
+			],
+			"if": {
+				"properties": {
+					"protocol": {
+						"const": "smtp"
+					}
+				}
+			},
+			"then": {
+				"required": [
+					"smtp_from",
+					"source_port"
+				]
+			}
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "123e4567-e89b-12d3-a456-426614174000",
+			"timestamp": "2024-01-15T14:30:25Z",
+			"reporter": {
+				"org": "Spam Reporting Service",
+				"contact": "reports@spam-reports.example",
+				"domain": "spam-reports.example"
+			},
+			"sender": {
+				"org": "Spam Reporting Service",
+				"contact": "reports@spam-reports.example",
+				"domain": "spam-reports.example"
+			},
+			"source_identifier": "192.0.2.123",
+			"source_port": 25,
+			"category": "messaging",
+			"type": "spam",
+			"protocol": "smtp",
+			"smtp_from": "fake@example.com",
+			"subject": "Urgent: Verify Your Account",
+			"evidence_source": "spamtrap",
+			"evidence": [
+				{
+					"content_type": "message/rfc822",
+					"description": "Complete spam email with headers",
+					"payload": "UmVjZWl2ZWQ6IGZyb20gW3NwYW1tZXIuZXhhbXBsZS5jb21d..."
+				}
+			],
+			"tags": [
+				"spam:commercial",
+				"campaign:fake_bank_2024"
+			],
+			"confidence": 0.92
+		}
+	]
+}
diff --git a/xarf/schemas/types/reputation-blocklist.json b/xarf/schemas/types/reputation-blocklist.json
new file mode 100644
index 0000000..3f644a0
--- /dev/null
+++ b/xarf/schemas/types/reputation-blocklist.json
@@ -0,0 +1,29 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/reputation-blocklist.json",
+	"title": "XARF v4 Reputation - Blocklist Type Schema",
+	"description": "Schema for IP/domain blocklist inclusion reports",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "reputation"
+				},
+				"type": {
+					"const": "blocklist"
+				},
+				"threat_type": {
+					"type": "string",
+					"description": "REQUIRED: Type of threat for blocklist inclusion"
+				}
+			},
+			"required": [
+				"threat_type"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/reputation-threat-intelligence.json b/xarf/schemas/types/reputation-threat-intelligence.json
new file mode 100644
index 0000000..d2e1df9
--- /dev/null
+++ b/xarf/schemas/types/reputation-threat-intelligence.json
@@ -0,0 +1,29 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/reputation-threat-intelligence.json",
+	"title": "XARF v4 Reputation - Threat Intelligence Type Schema",
+	"description": "Schema for Threat intelligence and IOC reports",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "reputation"
+				},
+				"type": {
+					"const": "threat_intelligence"
+				},
+				"threat_type": {
+					"type": "string",
+					"description": "REQUIRED: Type of threat for intelligence report"
+				}
+			},
+			"required": [
+				"threat_type"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/vulnerability-cve.json b/xarf/schemas/types/vulnerability-cve.json
new file mode 100644
index 0000000..cd7ef78
--- /dev/null
+++ b/xarf/schemas/types/vulnerability-cve.json
@@ -0,0 +1,276 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/vulnerability-cve.json",
+	"title": "XARF v4 Vulnerability - CVE Type Schema",
+	"description": "Schema for Common Vulnerabilities and Exposures (CVE) reports",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "vulnerability"
+				},
+				"type": {
+					"const": "cve"
+				},
+				"evidence_source": {
+					"type": "string",
+					"enum": [
+						"vulnerability_scan",
+						"researcher_analysis",
+						"automated_discovery",
+						"penetration_testing"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Source of CVE vulnerability evidence"
+				},
+				"service": {
+					"type": "string",
+					"maxLength": 200,
+					"description": "REQUIRED: Vulnerable service or application name",
+					"examples": [
+						"Apache HTTP Server",
+						"OpenSSL",
+						"Windows SMB",
+						"SSH Server",
+						"MySQL Database"
+					]
+				},
+				"service_version": {
+					"type": "string",
+					"maxLength": 100,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Version of the vulnerable service",
+					"examples": [
+						"2.4.41",
+						"1.1.1a",
+						"OpenSSH_7.4",
+						"5.7.33"
+					]
+				},
+				"service_port": {
+					"type": "integer",
+					"minimum": 1,
+					"maximum": 65535,
+					"description": "REQUIRED: Port number where vulnerable service is running",
+					"examples": [
+						80,
+						443,
+						22,
+						3389,
+						21,
+						23,
+						25
+					]
+				},
+				"cve_id": {
+					"type": "string",
+					"pattern": "^CVE-[0-9]{4}-[0-9]+$",
+					"description": "REQUIRED: CVE identifier",
+					"examples": [
+						"CVE-2021-44228",
+						"CVE-2014-0160",
+						"CVE-2017-5638"
+					]
+				},
+				"cve_ids": {
+					"type": "array",
+					"items": {
+						"type": "string",
+						"pattern": "^CVE-[0-9]{4}-[0-9]+$"
+					},
+					"description": "OPTIONAL: Multiple CVE identifiers if vulnerability involves multiple CVEs",
+					"maxItems": 10,
+					"uniqueItems": true
+				},
+				"cvss_score": {
+					"type": "number",
+					"minimum": 0.0,
+					"maximum": 10.0,
+					"x-recommended": true,
+					"description": "RECOMMENDED: CVSS vulnerability score (0.0-10.0)"
+				},
+				"cvss_vector": {
+					"type": "string",
+					"pattern": "^CVSS:3\\.[01]/.*",
+					"description": "OPTIONAL: CVSS v3 vector string",
+					"examples": [
+						"CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H",
+						"CVSS:3.0/AV:L/AC:L/PR:L/UI:N/S:U/C:H/I:H/A:H"
+					]
+				},
+				"cvss_version": {
+					"type": "string",
+					"enum": [
+						"2.0",
+						"3.0",
+						"3.1"
+					],
+					"description": "OPTIONAL: CVSS version used for scoring"
+				},
+				"risk_level": {
+					"type": "string",
+					"enum": [
+						"info",
+						"low",
+						"medium",
+						"high",
+						"critical"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Risk assessment level"
+				},
+				"severity": {
+					"type": "string",
+					"enum": [
+						"informational",
+						"low",
+						"medium",
+						"high",
+						"critical"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Severity classification"
+				},
+				"exploitability": {
+					"type": "string",
+					"enum": [
+						"theoretical",
+						"poc_available",
+						"functional",
+						"weaponized"
+					],
+					"x-recommended": true,
+					"description": "RECOMMENDED: Level of exploit availability and maturity"
+				},
+				"patch_available": {
+					"type": "boolean",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Whether a patch or fix is available"
+				},
+				"patch_version": {
+					"type": "string",
+					"maxLength": 100,
+					"description": "OPTIONAL: Version that fixes the vulnerability",
+					"examples": [
+						"2.4.46",
+						"1.1.1k",
+						"OpenSSH_8.0",
+						"5.7.35"
+					]
+				},
+				"patch_url": {
+					"type": "string",
+					"format": "uri",
+					"description": "OPTIONAL: URL to patch, security advisory, or fix information"
+				},
+				"vendor_advisory": {
+					"type": "string",
+					"format": "uri",
+					"description": "OPTIONAL: URL to vendor security advisory"
+				},
+				"disclosure_date": {
+					"type": "string",
+					"format": "date-time",
+					"description": "OPTIONAL: When CVE was publicly disclosed"
+				},
+				"impact_assessment": {
+					"type": "object",
+					"properties": {
+						"confidentiality": {
+							"type": "string",
+							"enum": [
+								"none",
+								"low",
+								"high"
+							],
+							"description": "OPTIONAL: Impact on data confidentiality"
+						},
+						"integrity": {
+							"type": "string",
+							"enum": [
+								"none",
+								"low",
+								"high"
+							],
+							"description": "OPTIONAL: Impact on data integrity"
+						},
+						"availability": {
+							"type": "string",
+							"enum": [
+								"none",
+								"low",
+								"high"
+							],
+							"description": "OPTIONAL: Impact on system availability"
+						}
+					},
+					"description": "OPTIONAL: CIA triad impact assessment",
+					"additionalProperties": false
+				},
+				"remediation_priority": {
+					"type": "string",
+					"enum": [
+						"low",
+						"medium",
+						"high",
+						"critical",
+						"emergency"
+					],
+					"description": "OPTIONAL: Recommended priority for remediation"
+				}
+			},
+			"required": [
+				"service",
+				"service_port",
+				"cve_id"
+			]
+		}
+	],
+	"examples": [
+		{
+			"xarf_version": "4.2.0",
+			"report_id": "901b2345-c678-90d1-e234-567890123456",
+			"timestamp": "2024-01-15T09:20:30Z",
+			"reporter": {
+				"org": "Vulnerability Scanner Service",
+				"contact": "security@vuln-scanner.example",
+				"domain": "vuln-scanner.example"
+			},
+			"sender": {
+				"org": "Vulnerability Scanner Service",
+				"contact": "security@vuln-scanner.example",
+				"domain": "vuln-scanner.example"
+			},
+			"source_identifier": "203.0.113.75",
+			"category": "vulnerability",
+			"type": "cve",
+			"service": "Apache HTTP Server",
+			"service_version": "2.4.41",
+			"service_port": 80,
+			"cve_id": "CVE-2021-41773",
+			"cvss_score": 7.5,
+			"cvss_vector": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N",
+			"risk_level": "high",
+			"severity": "high",
+			"patch_available": true,
+			"patch_version": "2.4.51",
+			"evidence_source": "vulnerability_scan",
+			"evidence": [
+				{
+					"content_type": "text/plain",
+					"description": "Vulnerability scan results showing Apache version",
+					"payload": "QXBhY2hlIEhUVFAgU2VydmVyIDIuNC40MSBkZXRlY3RlZCB3aXRoIENWRS0yMDIxLTQxNzcz"
+				}
+			],
+			"tags": [
+				"cve:CVE-2021-41773",
+				"severity:high",
+				"service:apache"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/vulnerability-misconfiguration.json b/xarf/schemas/types/vulnerability-misconfiguration.json
new file mode 100644
index 0000000..1c7b9a8
--- /dev/null
+++ b/xarf/schemas/types/vulnerability-misconfiguration.json
@@ -0,0 +1,29 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/vulnerability-misconfiguration.json",
+	"title": "XARF v4 Vulnerability - Misconfiguration Type Schema",
+	"description": "Schema for Security misconfigurations and hardening issues",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "vulnerability"
+				},
+				"type": {
+					"const": "misconfiguration"
+				},
+				"service": {
+					"type": "string",
+					"description": "REQUIRED: Service or component that is misconfigured"
+				}
+			},
+			"required": [
+				"service"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/types/vulnerability-open-service.json b/xarf/schemas/types/vulnerability-open-service.json
new file mode 100644
index 0000000..81e8b54
--- /dev/null
+++ b/xarf/schemas/types/vulnerability-open-service.json
@@ -0,0 +1,29 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/types/vulnerability-open-service.json",
+	"title": "XARF v4 Vulnerability - Open Service Type Schema",
+	"description": "Schema for open services that should not be publicly accessible (DNS resolvers, NTP servers, memcached, SSDP) and can be exploited for DDoS amplification or other attacks",
+	"allOf": [
+		{
+			"$ref": "../xarf-core.json"
+		},
+		{
+			"type": "object",
+			"properties": {
+				"category": {
+					"const": "vulnerability"
+				},
+				"type": {
+					"const": "open_service"
+				},
+				"service": {
+					"type": "string",
+					"description": "REQUIRED: Name of the open service"
+				}
+			},
+			"required": [
+				"service"
+			]
+		}
+	]
+}
diff --git a/xarf/schemas/xarf-core.json b/xarf/schemas/xarf-core.json
new file mode 100644
index 0000000..6b2a1a9
--- /dev/null
+++ b/xarf/schemas/xarf-core.json
@@ -0,0 +1,310 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/xarf-core.json",
+	"title": "XARF v4 Core Schema",
+	"description": "Base schema defining common fields and structures for all XARF v4 abuse reports",
+	"type": "object",
+	"required": [
+		"xarf_version",
+		"report_id",
+		"timestamp",
+		"reporter",
+		"sender",
+		"source_identifier",
+		"category",
+		"type"
+	],
+	"properties": {
+		"xarf_version": {
+			"type": "string",
+			"pattern": "^4\\.[0-9]+\\.[0-9]+$",
+			"description": "REQUIRED: XARF schema version using semantic versioning (e.g., '4.0.0')",
+			"examples": [
+				"4.0.0",
+				"4.1.2",
+				"4.6.1"
+			]
+		},
+		"report_id": {
+			"type": "string",
+			"format": "uuid",
+			"description": "REQUIRED: Unique report identifier using UUID v4 format",
+			"examples": [
+				"550e8400-e29b-41d4-a716-446655440000"
+			]
+		},
+		"timestamp": {
+			"type": "string",
+			"format": "date-time",
+			"description": "REQUIRED: ISO 8601 timestamp when the abuse incident occurred",
+			"examples": [
+				"2024-01-15T14:30:25Z",
+				"2024-01-15T14:30:25.123Z"
+			]
+		},
+		"reporter": {
+			"$ref": "#/$defs/contact_info",
+			"description": "REQUIRED: The organization that owns/generated the abuse complaint (the victim or complainant)"
+		},
+		"sender": {
+			"$ref": "#/$defs/contact_info",
+			"description": "REQUIRED: The organization that transmitted/filed this report (may be same as reporter or a service provider)"
+		},
+		"source_identifier": {
+			"type": "string",
+			"description": "REQUIRED: IP address, domain, or other identifier of the abuse source",
+			"examples": [
+				"192.0.2.1",
+				"2001:db8::1",
+				"example.com",
+				"abuse-source.example.org"
+			]
+		},
+		"source_port": {
+			"type": "integer",
+			"minimum": 1,
+			"maximum": 65535,
+			"x-recommended": true,
+			"description": "RECOMMENDED: Source port number - critical for identifying sources in CGNAT environments",
+			"examples": [
+				25,
+				80,
+				443,
+				3389
+			]
+		},
+		"category": {
+			"type": "string",
+			"enum": [
+				"messaging",
+				"content",
+				"copyright",
+				"connection",
+				"vulnerability",
+				"infrastructure",
+				"reputation"
+			],
+			"description": "REQUIRED: Primary abuse classification category"
+		},
+		"type": {
+			"type": "string",
+			"description": "REQUIRED: Specific abuse type within the category - validation depends on category value",
+			"examples": [
+				"spam",
+				"phishing",
+				"ddos",
+				"port_scan",
+				"bot",
+				"blocklist"
+			]
+		},
+		"evidence_source": {
+			"type": "string",
+			"x-recommended": true,
+			"description": "RECOMMENDED: Quality and reliability indicator for the evidence provided",
+			"examples": [
+				"spamtrap",
+				"user_complaint",
+				"automated_filter",
+				"honeypot",
+				"crawler",
+				"user_report",
+				"automated_scan",
+				"spam_analysis",
+				"firewall_logs",
+				"ids_detection",
+				"flow_analysis",
+				"vulnerability_scan",
+				"researcher_analysis",
+				"automated_discovery",
+				"traffic_analysis",
+				"threat_intelligence"
+			]
+		},
+		"evidence": {
+			"type": "array",
+			"items": {
+				"$ref": "#/$defs/evidence_item"
+			},
+			"x-recommended": true,
+			"description": "RECOMMENDED: Array of evidence items supporting this abuse report",
+			"maxItems": 50
+		},
+		"tags": {
+			"type": "array",
+			"items": {
+				"type": "string",
+				"pattern": "^[a-z0-9][a-z0-9_+-]*:[a-z0-9][a-z0-9_+-]*$",
+				"description": "OPTIONAL: Namespaced tag in format 'namespace:predicate'"
+			},
+			"description": "OPTIONAL: Namespaced tags for categorization, correlation, and automation",
+			"examples": [
+				[
+					"malware:conficker",
+					"campaign:winter-2024"
+				],
+				[
+					"botnet:command-and-control",
+					"malware:cobalt-strike"
+				],
+				[
+					"language:c++",
+					"attack:syn-flood"
+				]
+			],
+			"maxItems": 20
+		},
+		"confidence": {
+			"type": "number",
+			"minimum": 0.0,
+			"maximum": 1.0,
+			"x-recommended": true,
+			"description": "RECOMMENDED: Confidence score for this report (0.0 = no confidence, 1.0 = complete confidence)",
+			"examples": [
+				0.85,
+				0.95,
+				1.0
+			]
+		},
+		"description": {
+			"type": "string",
+			"maxLength": 1000,
+			"description": "OPTIONAL: Human-readable description of the abuse incident",
+			"examples": [
+				"Spam email campaign targeting financial institutions",
+				"DDoS attack against web services using SYN flood technique"
+			]
+		},
+		"legacy_version": {
+			"type": "string",
+			"enum": [
+				"3"
+			],
+			"description": "OPTIONAL: Original XARF version if this report was converted from v3 format"
+		},
+		"_internal": {
+			"$ref": "#/$defs/internal_metadata",
+			"description": "OPTIONAL: Internal operational metadata - NEVER transmitted between systems"
+		}
+	},
+	"additionalProperties": true,
+	"$defs": {
+		"contact_info": {
+			"type": "object",
+			"required": [
+				"org",
+				"contact",
+				"domain"
+			],
+			"properties": {
+				"org": {
+					"type": "string",
+					"maxLength": 200,
+					"description": "REQUIRED: Organization name",
+					"examples": [
+						"Acme Corporation",
+						"Abusix",
+						"Security Research Lab"
+					]
+				},
+				"contact": {
+					"type": "string",
+					"format": "email",
+					"description": "REQUIRED: Contact email address",
+					"examples": [
+						"abuse@example.com",
+						"reports@abusix.com"
+					]
+				},
+				"domain": {
+					"type": "string",
+					"format": "hostname",
+					"description": "REQUIRED: Organization domain for verification",
+					"examples": [
+						"example.com",
+						"abusix.com",
+						"security-lab.org"
+					]
+				}
+			},
+			"additionalProperties": false
+		},
+		"evidence_item": {
+			"type": "object",
+			"required": [
+				"content_type",
+				"payload"
+			],
+			"properties": {
+				"content_type": {
+					"type": "string",
+					"description": "REQUIRED: MIME type of the evidence content",
+					"examples": [
+						"text/plain",
+						"text/csv",
+						"application/json",
+						"message/rfc822",
+						"text/email",
+						"image/png",
+						"image/jpeg",
+						"image/gif",
+						"application/pdf",
+						"text/html",
+						"application/octet-stream",
+						"application/zip"
+					]
+				},
+				"description": {
+					"type": "string",
+					"maxLength": 500,
+					"x-recommended": true,
+					"description": "RECOMMENDED: Human-readable description of this evidence item",
+					"examples": [
+						"Original spam email with headers",
+						"Screenshot of phishing page",
+						"Network flow analysis logs"
+					]
+				},
+				"payload": {
+					"type": "string",
+					"description": "REQUIRED: Base64-encoded evidence data",
+					"contentEncoding": "base64"
+				},
+				"hash": {
+					"type": "string",
+					"pattern": "^(md5|sha1|sha256|sha512):[a-fA-F0-9]+$",
+					"x-recommended": true,
+					"description": "RECOMMENDED: Hash of evidence for integrity verification in format 'algorithm:hexvalue'",
+					"examples": [
+						"sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
+						"md5:d41d8cd98f00b204e9800998ecf8427e"
+					]
+				},
+				"size": {
+					"type": "integer",
+					"minimum": 0,
+					"maximum": 5242880,
+					"description": "OPTIONAL: Size of evidence in bytes (max 5MB per item)"
+				}
+			},
+			"additionalProperties": false
+		},
+		"internal_metadata": {
+			"type": "object",
+			"description": "OPTIONAL: Internal operational metadata - completely flexible, organization-defined structure. NEVER transmitted between systems.",
+			"additionalProperties": true,
+			"examples": [
+				{
+					"ticket": "ABUSE-1234",
+					"analyst": "john.doe",
+					"priority": "high"
+				},
+				{
+					"threat_id": "THR-2024-001",
+					"ml_confidence": 0.94,
+					"campaign_cluster": "winter_2024_phishing"
+				}
+			]
+		}
+	}
+}
diff --git a/xarf/schemas/xarf-v4-master.json b/xarf/schemas/xarf-v4-master.json
new file mode 100644
index 0000000..f93fe09
--- /dev/null
+++ b/xarf/schemas/xarf-v4-master.json
@@ -0,0 +1,638 @@
+{
+	"$schema": "https://json-schema.org/draft/2020-12/schema",
+	"$id": "https://xarf.org/schemas/v4/xarf-v4-master.json",
+	"title": "XARF v4 Master Schema",
+	"description": "Complete XARF v4 schema with type-specific validation for all categories and event types. This provides granular validation for each specific abuse type.",
+	"type": "object",
+	"allOf": [
+		{
+			"$ref": "xarf-core.json"
+		},
+		{
+			"description": "Valid category and type combinations. Rejects unknown types that do not match any defined category+type pair.",
+			"anyOf": [
+				{
+					"properties": {
+						"category": {
+							"const": "messaging"
+						},
+						"type": {
+							"enum": [
+								"spam",
+								"bulk_messaging"
+							]
+						}
+					}
+				},
+				{
+					"properties": {
+						"category": {
+							"const": "connection"
+						},
+						"type": {
+							"enum": [
+								"login_attack",
+								"port_scan",
+								"ddos",
+								"infected_host",
+								"reconnaissance",
+								"scraping",
+								"sql_injection",
+								"vulnerability_scan"
+							]
+						}
+					}
+				},
+				{
+					"properties": {
+						"category": {
+							"const": "vulnerability"
+						},
+						"type": {
+							"enum": [
+								"cve",
+								"open_service",
+								"misconfiguration"
+							]
+						}
+					}
+				},
+				{
+					"properties": {
+						"category": {
+							"const": "reputation"
+						},
+						"type": {
+							"enum": [
+								"blocklist",
+								"threat_intelligence"
+							]
+						}
+					}
+				},
+				{
+					"properties": {
+						"category": {
+							"const": "infrastructure"
+						},
+						"type": {
+							"enum": [
+								"botnet",
+								"compromised_server"
+							]
+						}
+					}
+				},
+				{
+					"properties": {
+						"category": {
+							"const": "content"
+						},
+						"type": {
+							"enum": [
+								"phishing",
+								"malware",
+								"csam",
+								"csem",
+								"exposed_data",
+								"brand_infringement",
+								"fraud",
+								"remote_compromise",
+								"suspicious_registration"
+							]
+						}
+					}
+				},
+				{
+					"properties": {
+						"category": {
+							"const": "copyright"
+						},
+						"type": {
+							"enum": [
+								"copyright",
+								"p2p",
+								"cyberlocker",
+								"ugc_platform",
+								"link_site",
+								"usenet"
+							]
+						}
+					}
+				}
+			]
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "messaging"
+					},
+					"type": {
+						"const": "spam"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/messaging-spam.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "messaging"
+					},
+					"type": {
+						"const": "bulk_messaging"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/messaging-bulk-messaging.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "connection"
+					},
+					"type": {
+						"const": "login_attack"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/connection-login-attack.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "connection"
+					},
+					"type": {
+						"const": "port_scan"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/connection-port-scan.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "connection"
+					},
+					"type": {
+						"const": "ddos"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/connection-ddos.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "connection"
+					},
+					"type": {
+						"const": "infected_host"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/connection-infected-host.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "connection"
+					},
+					"type": {
+						"const": "reconnaissance"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/connection-reconnaissance.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "connection"
+					},
+					"type": {
+						"const": "scraping"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/connection-scraping.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "connection"
+					},
+					"type": {
+						"const": "sql_injection"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/connection-sql-injection.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "connection"
+					},
+					"type": {
+						"const": "vulnerability_scan"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/connection-vulnerability-scan.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "vulnerability"
+					},
+					"type": {
+						"const": "cve"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/vulnerability-cve.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "vulnerability"
+					},
+					"type": {
+						"const": "open_service"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/vulnerability-open-service.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "vulnerability"
+					},
+					"type": {
+						"const": "misconfiguration"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/vulnerability-misconfiguration.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "reputation"
+					},
+					"type": {
+						"const": "blocklist"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/reputation-blocklist.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "reputation"
+					},
+					"type": {
+						"const": "threat_intelligence"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/reputation-threat-intelligence.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "infrastructure"
+					},
+					"type": {
+						"const": "botnet"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/infrastructure-botnet.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "infrastructure"
+					},
+					"type": {
+						"const": "compromised_server"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/infrastructure-compromised-server.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "content"
+					},
+					"type": {
+						"const": "phishing"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/content-phishing.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "content"
+					},
+					"type": {
+						"const": "malware"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/content-malware.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "content"
+					},
+					"type": {
+						"const": "csam"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/content-csam.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "content"
+					},
+					"type": {
+						"const": "csem"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/content-csem.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "content"
+					},
+					"type": {
+						"const": "exposed_data"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/content-exposed-data.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "content"
+					},
+					"type": {
+						"const": "brand_infringement"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/content-brand_infringement.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "content"
+					},
+					"type": {
+						"const": "fraud"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/content-fraud.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "content"
+					},
+					"type": {
+						"const": "remote_compromise"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/content-remote_compromise.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "content"
+					},
+					"type": {
+						"const": "suspicious_registration"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/content-suspicious_registration.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "copyright"
+					},
+					"type": {
+						"const": "copyright"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/copyright-copyright.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "copyright"
+					},
+					"type": {
+						"const": "p2p"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/copyright-p2p.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "copyright"
+					},
+					"type": {
+						"const": "cyberlocker"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/copyright-cyberlocker.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "copyright"
+					},
+					"type": {
+						"const": "ugc_platform"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/copyright-ugc-platform.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "copyright"
+					},
+					"type": {
+						"const": "link_site"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/copyright-link-site.json"
+			}
+		},
+		{
+			"if": {
+				"properties": {
+					"category": {
+						"const": "copyright"
+					},
+					"type": {
+						"const": "usenet"
+					}
+				}
+			},
+			"then": {
+				"$ref": "types/copyright-usenet.json"
+			}
+		}
+	],
+	"additionalProperties": true,
+	"properties": {
+		"xarf_version": {
+			"description": "This schema validates XARF v4 reports using type-specific validation. Supported versions match pattern ^4\\\\.[0-9]+\\\\.[0-9]+$"
+		}
+	},
+	"examples": [
+		{
+			"title": "Messaging Spam Report",
+			"category": "messaging",
+			"type": "spam",
+			"description": "Spam email detected by spamtrap"
+		},
+		{
+			"title": "Connection DDoS Report",
+			"category": "connection",
+			"type": "ddos",
+			"description": "DDoS attack with SYN flood technique"
+		},
+		{
+			"title": "Vulnerability CVE Report",
+			"category": "vulnerability",
+			"type": "cve",
+			"description": "Apache HTTP Server CVE-2021-41773 vulnerability"
+		},
+		{
+			"title": "Content Phishing Report",
+			"category": "content",
+			"type": "phishing",
+			"description": "Phishing website targeting banking credentials"
+		}
+	]
+}