diff --git a/dandi/cli/cmd_validate.py b/dandi/cli/cmd_validate.py index 5ef7501ee..375dbc578 100644 --- a/dandi/cli/cmd_validate.py +++ b/dandi/cli/cmd_validate.py @@ -21,7 +21,7 @@ validation_companion_path, write_validation_jsonl, ) -from ..validate._types import Severity, ValidationResult +from ..validate._types import MissingFileContent, Severity, ValidationResult lgr = logging.getLogger(__name__) @@ -56,6 +56,7 @@ def _collect_results( schema: str | None, devel_debug: bool, allow_any_path: bool, + missing_file_content: MissingFileContent = MissingFileContent.error, ) -> list[ValidationResult]: """Run validation and collect all results into a list.""" # Avoid heavy import by importing within function: @@ -81,6 +82,7 @@ def _collect_results( schema_version=schema, devel_debug=devel_debug, allow_any_path=allow_any_path, + missing_file_content=missing_file_content, ) ) @@ -208,6 +210,17 @@ def validate_bids( help="Limit results per group (or total if ungrouped). " "Excess results are replaced by a count of omitted items.", ) +@click.option( + "--missing-file-content", + "missing_file_content", + help="How to handle files whose content is unavailable (e.g. broken symlinks " + "in a datalad dataset without fetched data). 'error' (default) emits a " + "concise error per file, 'skip' skips each such file with a warning, " + "'only-non-data' skips content-dependent validators but still validates " + "path layout.", + type=click.Choice(["error", "only-non-data", "skip"], case_sensitive=True), + default="error", +) @click.option( "--load", help="Load validation results from JSONL file(s) instead of running validation.", @@ -229,6 +242,7 @@ def validate( output_file: str | None = None, summary: bool = False, max_per_group: int | None = None, + missing_file_content: str = "error", load: tuple[str, ...] = (), schema: str | None = None, devel_debug: bool = False, @@ -270,7 +284,10 @@ def validate( if load: results = load_validation_jsonl(load) else: - results = _collect_results(paths, schema, devel_debug, allow_any_path) + mfc = MissingFileContent(missing_file_content) + results = _collect_results( + paths, schema, devel_debug, allow_any_path, missing_file_content=mfc + ) # Auto-save companion right after collection, before filtering — so # all results are preserved regardless of display filters. # Skip when writing to --output (user already gets structured output). diff --git a/dandi/cli/tests/test_cmd_validate.py b/dandi/cli/tests/test_cmd_validate.py index 06b587d93..6d0b503a0 100644 --- a/dandi/cli/tests/test_cmd_validate.py +++ b/dandi/cli/tests/test_cmd_validate.py @@ -896,3 +896,103 @@ def test_validate_auto_companion_structured_stdout( assert r.exit_code == 1 assert len(list(redirected_logdir.glob("*_validation.jsonl"))) == 1 + + +# ---- Tests for --missing-file-content option (issue #1606) ---- + + +def _make_dandiset_with_broken_symlinks(tmp_path: Path) -> Path: + """Create a dandiset with broken NWB symlinks *and* one real NWB file. + + Layout:: + + dandiset.yaml + sub-001/sub-001.nwb -> broken symlink (datalad annex) + sub-002/sub-002.nwb -> broken symlink (datalad annex) + sub-003/sub-003.nwb -> real NWB file (content present) + + This lets tests assert that real files are still fully validated + while broken symlinks are handled according to the policy. + """ + from datetime import datetime, timezone + + from ...consts import dandiset_metadata_file + from ...pynwb_utils import make_nwb_file + + (tmp_path / dandiset_metadata_file).write_text( + "identifier: '000027'\nname: Test\ndescription: Test dandiset\n" + ) + # Two broken symlinks + for name in ("sub-001/sub-001.nwb", "sub-002/sub-002.nwb"): + p = tmp_path / name + p.parent.mkdir(parents=True, exist_ok=True) + p.symlink_to( + ".git/annex/objects/XX/YY/SHA256E-s123--abc.nwb/SHA256E-s123--abc.nwb" + ) + # One real NWB file + sub3 = tmp_path / "sub-003" + sub3.mkdir() + make_nwb_file( + sub3 / "sub-003.nwb", + session_description="test session", + identifier="test-nwb-003", + session_start_time=datetime(2017, 4, 3, 11, tzinfo=timezone.utc), + ) + return tmp_path + + +@pytest.mark.ai_generated +def test_validate_missing_file_content_error_default(tmp_path: Path) -> None: + """Default --missing-file-content=error emits concise errors for broken symlinks.""" + ds = _make_dandiset_with_broken_symlinks(tmp_path) + r = CliRunner().invoke(validate, [str(ds)]) + assert r.exit_code == 1 + assert "FILE_CONTENT_MISSING" in r.output + assert "broken symlink" in r.output.lower() + # No Python tracebacks in output + assert "Traceback" not in r.output + # The real NWB file (sub-003) must still be validated + assert "sub-003" in r.output + + +@pytest.mark.ai_generated +def test_validate_missing_file_content_skip(tmp_path: Path) -> None: + """--missing-file-content=skip skips broken symlinks with a WARNING.""" + ds = _make_dandiset_with_broken_symlinks(tmp_path) + r = CliRunner().invoke(validate, ["--missing-file-content", "skip", str(ds)]) + assert "FILE_CONTENT_MISSING_SKIPPED" in r.output + assert "skipped" in r.output.lower() + # The real NWB file (sub-003) must still be validated + assert "sub-003" in r.output + + +@pytest.mark.ai_generated +def test_validate_missing_file_content_only_non_data(tmp_path: Path) -> None: + """--missing-file-content=only-non-data validates path layout only for broken.""" + ds = _make_dandiset_with_broken_symlinks(tmp_path) + r = CliRunner().invoke( + validate, ["--missing-file-content", "only-non-data", str(ds)] + ) + assert "FILE_CONTENT_MISSING_PARTIAL" in r.output + # No pynwb tracebacks + assert "Traceback" not in r.output + # The real NWB file (sub-003) must still be validated + assert "sub-003" in r.output + + +@pytest.mark.ai_generated +def test_validate_missing_file_content_no_broken_symlinks(tmp_path: Path) -> None: + """--missing-file-content has no effect on normal files (no broken symlinks).""" + from ...consts import dandiset_metadata_file + + (tmp_path / dandiset_metadata_file).write_text( + "identifier: '000027'\nname: Test\ndescription: Test dandiset\n" + ) + r_default = CliRunner().invoke(validate, [str(tmp_path)]) + r_skip = CliRunner().invoke( + validate, ["--missing-file-content", "skip", str(tmp_path)] + ) + # Both should succeed identically (no broken symlinks) + assert r_default.exit_code == r_skip.exit_code == 0 + assert "FILE_CONTENT_MISSING" not in r_default.output + assert "FILE_CONTENT_MISSING" not in r_skip.output diff --git a/dandi/files/bases.py b/dandi/files/bases.py index de52027f7..381ee51cf 100644 --- a/dandi/files/bases.py +++ b/dandi/files/bases.py @@ -32,6 +32,7 @@ from dandi.validate._types import ( ORIGIN_INTERNAL_DANDI, ORIGIN_VALIDATION_DANDI, + MissingFileContent, Origin, OriginType, Scope, @@ -91,9 +92,18 @@ def get_validation_errors( self, schema_version: str | None = None, devel_debug: bool = False, + missing_file_content: MissingFileContent | None = None, ) -> list[ValidationResult]: """ Attempt to validate the file and return a list of errors encountered + + Parameters + ---------- + missing_file_content : MissingFileContent | None + When set (not None), the file's content is known to be unavailable + (e.g. broken symlink). Validators should adjust behaviour + accordingly — for ``only-non-data`` they should skip + content-dependent checks and emit a WARNING. """ ... @@ -116,6 +126,7 @@ def get_validation_errors( self, schema_version: str | None = None, devel_debug: bool = False, + missing_file_content: MissingFileContent | None = None, ) -> list[ValidationResult]: with open(self.filepath) as f: meta = yaml_load(f, typ="safe") @@ -185,7 +196,14 @@ def get_validation_errors( self, schema_version: str | None = None, devel_debug: bool = False, + missing_file_content: MissingFileContent | None = None, ) -> list[ValidationResult]: + # When file content is unavailable and policy is only-non-data, + # skip metadata extraction (which requires reading the file) and + # only do path-based validation via subclass hooks. + if missing_file_content == MissingFileContent.only_non_data: + return [] + current_version = get_schema_version() if schema_version is None: schema_version = current_version @@ -511,81 +529,88 @@ def get_validation_errors( self, schema_version: str | None = None, devel_debug: bool = False, + missing_file_content: MissingFileContent | None = None, ) -> list[ValidationResult]: """ Validate NWB asset If ``schema_version`` was provided, we only validate basic metadata, - and completely skip validation using nwbinspector.inspect_nwbfile - """ - # Avoid heavy import by importing within function: - from nwbinspector import Importance, inspect_nwbfile, load_config + and completely skip validation using nwbinspector.inspect_nwbfile. - # Avoid heavy import by importing within function: - from dandi.pynwb_utils import validate as pynwb_validate + If ``missing_file_content`` is ``only-non-data``, content-dependent + validators (pynwb, nwbinspector) are skipped and only path-layout + validation is performed. + """ + errors: list[ValidationResult] = [] - errors: list[ValidationResult] = pynwb_validate( - self.filepath, devel_debug=devel_debug - ) - if schema_version is not None: - errors.extend( - super().get_validation_errors( - schema_version=schema_version, devel_debug=devel_debug - ) - ) + if missing_file_content == MissingFileContent.only_non_data: + # Skip content-dependent validators (pynwb, nwbinspector). + # The warning for the user is emitted centrally in _core.validate(). + pass else: - # make sure that we have some basic metadata fields we require - try: - origin_validation_nwbinspector = Origin( - type=OriginType.VALIDATION, - validator=Validator.nwbinspector, - validator_version=str(_get_nwb_inspector_version()), + # Avoid heavy import by importing within function: + from nwbinspector import Importance, inspect_nwbfile, load_config + + # Avoid heavy import by importing within function: + from dandi.pynwb_utils import validate as pynwb_validate + + errors.extend(pynwb_validate(self.filepath, devel_debug=devel_debug)) + if schema_version is not None: + errors.extend( + super().get_validation_errors( + schema_version=schema_version, devel_debug=devel_debug + ) ) + else: + # make sure that we have some basic metadata fields we require + try: + origin_validation_nwbinspector = Origin( + type=OriginType.VALIDATION, + validator=Validator.nwbinspector, + validator_version=str(_get_nwb_inspector_version()), + ) - for error in inspect_nwbfile( - nwbfile_path=self.filepath, - skip_validate=True, - config=load_config(filepath_or_keyword="dandi"), - importance_threshold=Importance.BEST_PRACTICE_VIOLATION, - # we might want to switch to a lower threshold once nwbinspector - # upstream reporting issues are clarified: - # https://github.com/dandi/dandi-cli/pull/1162#issuecomment-1322238896 - # importance_threshold=Importance.BEST_PRACTICE_SUGGESTION, - ): - severity = NWBI_IMPORTANCE_TO_DANDI_SEVERITY[error.importance.name] - kw: Any = {} - if error.location: - kw["within_asset_paths"] = { - error.file_path: error.location, - } - errors.append( - ValidationResult( - origin=origin_validation_nwbinspector, - severity=severity, - id=f"NWBI.{error.check_function_name}", - scope=Scope.FILE, - origin_result=error, - path=Path(error.file_path), - message=error.message, - # Assuming multiple sessions per multiple subjects, - # otherwise nesting level might differ - dataset_path=Path(error.file_path).parent.parent, # TODO - dandiset_path=Path(error.file_path).parent, # TODO - **kw, + for error in inspect_nwbfile( + nwbfile_path=self.filepath, + skip_validate=True, + config=load_config(filepath_or_keyword="dandi"), + importance_threshold=Importance.BEST_PRACTICE_VIOLATION, + ): + severity = NWBI_IMPORTANCE_TO_DANDI_SEVERITY[ + error.importance.name + ] + kw: Any = {} + if error.location: + kw["within_asset_paths"] = { + error.file_path: error.location, + } + errors.append( + ValidationResult( + origin=origin_validation_nwbinspector, + severity=severity, + id=f"NWBI.{error.check_function_name}", + scope=Scope.FILE, + origin_result=error, + path=Path(error.file_path), + message=error.message, + dataset_path=Path(error.file_path).parent.parent, + dandiset_path=Path(error.file_path).parent, + **kw, + ) ) + except Exception as e: + if devel_debug: + raise + # TODO: might reraise instead of making it into an error + return _pydantic_errors_to_validation_results( + [e], self.filepath, scope=Scope.FILE ) - except Exception as e: - if devel_debug: - raise - # TODO: might reraise instead of making it into an error - return _pydantic_errors_to_validation_results( - [e], self.filepath, scope=Scope.FILE - ) # Avoid circular imports by importing within function: from .bids import NWBBIDSAsset from ..organize import validate_organized_path + # Path-layout validation always runs (doesn't need content) if not isinstance(self, NWBBIDSAsset) and self.dandiset_path is not None: errors.extend( validate_organized_path(self.path, self.filepath, self.dandiset_path) diff --git a/dandi/files/bids.py b/dandi/files/bids.py index 335f70f5d..008b085dd 100644 --- a/dandi/files/bids.py +++ b/dandi/files/bids.py @@ -18,6 +18,7 @@ from ..misctypes import Digest from ..validate._types import ( ORIGIN_VALIDATION_DANDI_LAYOUT, + MissingFileContent, Scope, Severity, ValidationResult, @@ -184,6 +185,7 @@ def get_validation_errors( self, schema_version: str | None = None, devel_debug: bool = False, + missing_file_content: MissingFileContent | None = None, ) -> list[ValidationResult]: """ Return all validation results for the containing dataset per the BIDS standard @@ -237,6 +239,7 @@ def get_validation_errors( self, schema_version: str | None = None, devel_debug: bool = False, + missing_file_content: MissingFileContent | None = None, ) -> list[ValidationResult]: return self.bids_dataset_description.get_asset_errors(self) @@ -266,9 +269,13 @@ def get_validation_errors( self, schema_version: str | None = None, devel_debug: bool = False, + missing_file_content: MissingFileContent | None = None, ) -> list[ValidationResult]: return NWBAsset.get_validation_errors( - self, schema_version, devel_debug + self, + schema_version, + devel_debug, + missing_file_content=missing_file_content, ) + BIDSAsset.get_validation_errors(self) def get_metadata( @@ -297,9 +304,13 @@ def get_validation_errors( self, schema_version: str | None = None, devel_debug: bool = False, + missing_file_content: MissingFileContent | None = None, ) -> list[ValidationResult]: return ZarrAsset.get_validation_errors( - self, schema_version, devel_debug + self, + schema_version, + devel_debug, + missing_file_content=missing_file_content, ) + BIDSAsset.get_validation_errors(self) def get_metadata( @@ -328,7 +339,11 @@ def get_validation_errors( self, schema_version: str | None = None, devel_debug: bool = False, + missing_file_content: MissingFileContent | None = None, ) -> list[ValidationResult]: return GenericAsset.get_validation_errors( - self, schema_version, devel_debug + self, + schema_version, + devel_debug, + missing_file_content=missing_file_content, ) + BIDSAsset.get_validation_errors(self) diff --git a/dandi/files/zarr.py b/dandi/files/zarr.py index b9668e2b0..9acde0f3b 100644 --- a/dandi/files/zarr.py +++ b/dandi/files/zarr.py @@ -49,6 +49,7 @@ from .bases import LocalDirectoryAsset from ..validate._types import ( ORIGIN_VALIDATION_DANDI_ZARR, + MissingFileContent, Origin, OriginType, Scope, @@ -444,6 +445,7 @@ def get_validation_errors( self, schema_version: str | None = None, devel_debug: bool = False, + missing_file_content: MissingFileContent | None = None, ) -> list[ValidationResult]: # Avoid heavy import by importing within function: import zarr @@ -542,7 +544,9 @@ def get_validation_errors( ) ) return errors + super().get_validation_errors( - schema_version=schema_version, devel_debug=devel_debug + schema_version=schema_version, + devel_debug=devel_debug, + missing_file_content=missing_file_content, ) def _is_too_deep(self) -> bool: diff --git a/dandi/validate/__init__.py b/dandi/validate/__init__.py index 580022b36..7866f689f 100644 --- a/dandi/validate/__init__.py +++ b/dandi/validate/__init__.py @@ -27,6 +27,7 @@ ORIGIN_VALIDATION_DANDI, ORIGIN_VALIDATION_DANDI_LAYOUT, ORIGIN_VALIDATION_DANDI_ZARR, + MissingFileContent, Origin, OriginType, Scope, @@ -39,6 +40,7 @@ __all__ = [ "CURRENT_RECORD_VERSION", + "MissingFileContent", "ORIGIN_INTERNAL_DANDI", "ORIGIN_VALIDATION_DANDI", "ORIGIN_VALIDATION_DANDI_LAYOUT", diff --git a/dandi/validate/_core.py b/dandi/validate/_core.py index b397e49c0..371ce5a30 100644 --- a/dandi/validate/_core.py +++ b/dandi/validate/_core.py @@ -12,9 +12,11 @@ from collections.abc import Iterator import os from pathlib import Path +from typing import Any from ._types import ( ORIGIN_VALIDATION_DANDI_LAYOUT, + MissingFileContent, Origin, OriginType, Scope, @@ -147,18 +149,39 @@ def validate_bids( return our_validation_result +def _is_broken_symlink(filepath: Path) -> bool: + """Return True if *filepath* is a symlink whose target does not exist.""" + return filepath.is_symlink() and not filepath.exists() + + +# BIDS error codes that require reading file content (headers, pixel data). +# When ``only-non-data`` is active these are suppressed for broken-symlink files. +_BIDS_CONTENT_DEPENDENT_CODES = frozenset( + { + "BIDS.NIFTI_HEADER_UNREADABLE", + "BIDS.EMPTY_FILE", + } +) + + def validate( *paths: str | Path, schema_version: str | None = None, devel_debug: bool = False, allow_any_path: bool = False, + missing_file_content: MissingFileContent = MissingFileContent.error, ) -> Iterator[ValidationResult]: """Validate content Parameters ---------- - paths: list(str) + paths : list(str) Could be individual (.nwb) files or a single dandiset path. + missing_file_content : MissingFileContent + Policy for files whose content is unavailable (e.g. broken symlinks in a + datalad dataset without fetched data). ``error`` emits a concise error, + ``skip`` skips the file with a warning, ``only-non-data`` skips + content-dependent validators but still validates path layout. Yields ------ @@ -190,11 +213,101 @@ def validate( for df in find_dandi_files( p, dandiset_path=dandiset_path, allow_all=allow_any_path ): + # Handle broken symlinks (missing file content) + if _is_broken_symlink(df.filepath): + r = _handle_missing_content(df, missing_file_content) + if r is not None: + r_id = id(r) + if r_id not in df_result_ids: + df_results.append(r) + df_result_ids.add(r_id) + yield r + if missing_file_content in ( + MissingFileContent.skip, + MissingFileContent.error, + ): + continue + # only-non-data: fall through but pass the flag to validators + + is_broken = _is_broken_symlink(df.filepath) for r in df.get_validation_errors( - schema_version=schema_version, devel_debug=devel_debug + schema_version=schema_version, + devel_debug=devel_debug, + missing_file_content=(missing_file_content if is_broken else None), ): + # For broken-symlink files under only-non-data, suppress + # BIDS errors that require reading file content (e.g. + # NIFTI_HEADER_UNREADABLE). The validator ran in full so + # real files still get those checks. + if ( + is_broken + and missing_file_content == MissingFileContent.only_non_data + and r.id in _BIDS_CONTENT_DEPENDENT_CODES + ): + continue r_id = id(r) if r_id not in df_result_ids: df_results.append(r) df_result_ids.add(r_id) yield r + + +def _handle_missing_content( + df: Any, + policy: MissingFileContent, +) -> ValidationResult | None: + """Produce a single :class:`ValidationResult` for a file with missing content. + + Returns ``None`` when *policy* is ``only-non-data`` (a warning is not + needed because validation still proceeds on the non-data aspects). + """ + from ..files import DandiFile + + assert isinstance(df, DandiFile) + filepath = df.filepath + + if policy == MissingFileContent.error: + return ValidationResult( + id="DANDI.FILE_CONTENT_MISSING", + origin=ORIGIN_VALIDATION_DANDI_LAYOUT, + severity=Severity.ERROR, + scope=Scope.FILE, + path=filepath, + dandiset_path=df.dandiset_path, + message=( + f"File content is not available (broken symlink: " + f"{filepath} -> {os.readlink(filepath)}). " + f"Use --missing-file-content=skip or " + f"--missing-file-content=only-non-data to handle gracefully." + ), + ) + elif policy == MissingFileContent.skip: + return ValidationResult( + id="DANDI.FILE_CONTENT_MISSING_SKIPPED", + origin=ORIGIN_VALIDATION_DANDI_LAYOUT, + severity=Severity.WARNING, + scope=Scope.FILE, + path=filepath, + dandiset_path=df.dandiset_path, + message=( + f"Validation skipped: file content is not available " + f"(broken symlink: {filepath} -> {os.readlink(filepath)})." + ), + ) + else: + # only-non-data: emit a warning per file so the user knows that + # content-dependent checks were skipped for this file. + return ValidationResult( + id="DANDI.FILE_CONTENT_MISSING_PARTIAL", + origin=ORIGIN_VALIDATION_DANDI_LAYOUT, + severity=Severity.WARNING, + scope=Scope.FILE, + path=filepath, + dandiset_path=df.dandiset_path, + message=( + f"File content is not available (broken symlink: " + f"{filepath} -> {os.readlink(filepath)}); " + f"content-dependent checks skipped, " + f"path layout still validated." + ), + ) diff --git a/dandi/validate/_types.py b/dandi/validate/_types.py index 3b2fad046..f41c8402e 100644 --- a/dandi/validate/_types.py +++ b/dandi/validate/_types.py @@ -11,6 +11,22 @@ import dandi from dandi.utils import StrEnum + +class MissingFileContent(StrEnum): + """Policy for handling files whose content is missing (e.g. broken symlinks + in a datalad dataset without fetched data).""" + + error = auto() + """Emit a concise error for each file with missing content (default).""" + + only_non_data = "only-non-data" + """Skip content-dependent validators (e.g. pynwb, nwbinspector) but still + validate non-data aspects such as path layout.""" + + skip = auto() + """Skip the file entirely; emit a WARNING noting that validation was skipped.""" + + lgr = logging.getLogger(__name__) diff --git a/dandi/validate/tests/test_core.py b/dandi/validate/tests/test_core.py index 6c6d5f1d3..7d331bb61 100644 --- a/dandi/validate/tests/test_core.py +++ b/dandi/validate/tests/test_core.py @@ -6,6 +6,7 @@ from .._core import validate from .._types import ( + MissingFileContent, Origin, OriginType, Scope, @@ -198,3 +199,114 @@ def test_validate_bids_errors( err_location = r.path.relative_to(r.dataset_path).as_posix() assert err_location == expected_err_location + + +def _make_dandiset_with_broken_symlink( + tmp_path: Path, *, include_real_nwb: bool = False +) -> Path: + """Create a minimal dandiset with a broken NWB symlink (simulating datalad). + + When *include_real_nwb* is True a second subject directory contains a real + (though minimal) NWB file so tests can verify that normal validation still + runs on files with content present alongside broken symlinks. + """ + (tmp_path / dandiset_metadata_file).write_text( + "identifier: '000027'\nname: Test\ndescription: Test dandiset\n" + ) + sub_dir = tmp_path / "sub-001" + sub_dir.mkdir() + nwb_link = sub_dir / "sub-001.nwb" + # Symlink to a non-existent target (simulating datalad annex) + nwb_link.symlink_to( + ".git/annex/objects/XX/YY/SHA256E-s123--abc.nwb/SHA256E-s123--abc.nwb" + ) + + if include_real_nwb: + from datetime import datetime, timezone + + from ...pynwb_utils import make_nwb_file + + sub2 = tmp_path / "sub-002" + sub2.mkdir() + make_nwb_file( + sub2 / "sub-002.nwb", + session_description="test session", + identifier="test-nwb-001", + session_start_time=datetime(2017, 4, 3, 11, tzinfo=timezone.utc), + ) + return tmp_path + + +@pytest.mark.ai_generated +def test_validate_broken_symlink_error_default(tmp_path: Path) -> None: + """Default (error) policy emits a concise error for broken symlinks.""" + ds = _make_dandiset_with_broken_symlink(tmp_path) + results = list(validate(ds)) + errs = [r for r in results if r.id == "DANDI.FILE_CONTENT_MISSING"] + assert len(errs) == 1 + assert errs[0].severity == Severity.ERROR + assert errs[0].message is not None + assert "broken symlink" in errs[0].message.lower() + # No traceback should appear in the message + assert "Traceback" not in errs[0].message + + +@pytest.mark.ai_generated +def test_validate_broken_symlink_skip(tmp_path: Path) -> None: + """skip policy emits a WARNING and skips the file entirely.""" + ds = _make_dandiset_with_broken_symlink(tmp_path) + results = list(validate(ds, missing_file_content=MissingFileContent.skip)) + skipped = [r for r in results if r.id == "DANDI.FILE_CONTENT_MISSING_SKIPPED"] + assert len(skipped) == 1 + assert skipped[0].severity == Severity.WARNING + assert skipped[0].message is not None + assert "skipped" in skipped[0].message.lower() + # No pynwb/nwbinspector errors should appear + pynwb_errs = [r for r in results if r.origin.validator in (Validator.pynwb,)] + assert len(pynwb_errs) == 0 + + +@pytest.mark.ai_generated +def test_validate_broken_symlink_only_non_data(tmp_path: Path) -> None: + """only-non-data policy skips content validators, runs path validation.""" + ds = _make_dandiset_with_broken_symlink(tmp_path) + results = list(validate(ds, missing_file_content=MissingFileContent.only_non_data)) + # Should have a partial-skip warning + partial = [r for r in results if r.id == "DANDI.FILE_CONTENT_MISSING_PARTIAL"] + assert len(partial) == 1 + assert partial[0].severity == Severity.WARNING + # No pynwb/nwbinspector errors + pynwb_errs = [r for r in results if r.origin.validator in (Validator.pynwb,)] + assert len(pynwb_errs) == 0 + + +@pytest.mark.ai_generated +def test_validate_broken_symlink_real_file_still_validated(tmp_path: Path) -> None: + """When a real NWB file coexists with broken symlinks, normal validation + still runs on the real file under all policies.""" + ds = _make_dandiset_with_broken_symlink(tmp_path, include_real_nwb=True) + + for policy in (MissingFileContent.skip, MissingFileContent.only_non_data): + results = list(validate(ds, missing_file_content=policy)) + + # The real file (sub-002.nwb) must have been validated by pynwb or + # nwbinspector — look for any result referencing it. + real_file = tmp_path / "sub-002" / "sub-002.nwb" + real_results = [ + r for r in results if r.path is not None and r.path == real_file + ] + assert len(real_results) > 0, ( + f"policy={policy.value}: expected validation results for the real " + f"NWB file at {real_file}" + ) + + # The broken symlink file must NOT have pynwb/nwbinspector results. + broken_file = tmp_path / "sub-001" / "sub-001.nwb" + broken_pynwb = [ + r + for r in results + if r.path == broken_file and r.origin.validator == Validator.pynwb + ] + assert ( + len(broken_pynwb) == 0 + ), f"policy={policy.value}: pynwb should not run on the broken symlink"