Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions packtools/sps/models/sec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
from packtools.sps.models.article_and_subarticles import Fulltext


VALID_SEC_TYPES = [
"cases",
"conclusions",
"data-availability",
"discussion",
"intro",
"materials",
"methods",
"results",
"subjects",
"supplementary-material",
"transcript",
]

NON_COMBINABLE_SEC_TYPES = [
"data-availability",
"supplementary-material",
"transcript",
]


class Sec:
"""Represents a single <sec> element."""

def __init__(self, element, parent_attribs=None):
self.element = element
self._parent_attribs = parent_attribs or {}

@property
def sec_id(self):
return self.element.get("id")

@property
def sec_type(self):
return self.element.get("sec-type")

@property
def specific_use(self):
return self.element.get("specific-use")

@property
def title(self):
title_elem = self.element.find("title")
if title_elem is not None:
return title_elem.text or ""
return None

@property
def paragraphs(self):
return self.element.findall("p")

@property
def is_first_level(self):
parent = self.element.getparent()
if parent is not None:
return parent.tag in ("body", "back", "abstract", "trans-abstract",
"app", "bio", "boxed-text")
return True

@property
def data(self):
d = {
"sec_id": self.sec_id,
"sec_type": self.sec_type,
"specific_use": self.specific_use,
"title": self.title,
"has_title": self.title is not None,
"paragraph_count": len(self.paragraphs),
"is_first_level": self.is_first_level,
}
d.update(self._parent_attribs)
return d


class ArticleSecs:
"""Extracts all <sec> elements from an article and sub-articles."""

def __init__(self, xmltree):
self.xmltree = xmltree

@property
def main_article_type(self):
return self.xmltree.find(".").get("article-type")

def _get_secs_from_node(self, node, parent_attribs):
for sec_elem in node.xpath(".//sec"):
yield Sec(sec_elem, parent_attribs).data

@property
def all_secs(self):
for node in self.xmltree.xpath(
". | ./sub-article[@article-type='translation']"
):
fulltext = Fulltext(node)
parent_attribs = fulltext.attribs_parent_prefixed
yield from self._get_secs_from_node(fulltext.node, parent_attribs)

@property
def first_level_body_secs(self):
"""Get only first-level <sec> elements inside <body>."""
for node in self.xmltree.xpath(
". | ./sub-article[@article-type='translation']"
):
fulltext = Fulltext(node)
parent_attribs = fulltext.attribs_parent_prefixed
body = fulltext.body
if body is not None:
for sec_elem in body.findall("sec"):
yield Sec(sec_elem, parent_attribs).data

@property
def body_sec_types(self):
"""Get all sec-type values from first-level body secs."""
return [
sec["sec_type"]
for sec in self.first_level_body_secs
if sec.get("sec_type")
]
238 changes: 238 additions & 0 deletions packtools/sps/validation/sec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
from packtools.sps.models.sec import ArticleSecs
from packtools.sps.validation.utils import build_response


class SecValidation:
"""Validates a single <sec> element."""

def __init__(self, data, params):
self.data = data
self.params = params

def validate(self):
yield self.validate_title()
result = self.validate_sec_type_value()
if result:
yield result
result = self.validate_transcript_id()
if result:
yield result
result = self.validate_combined_format()
if result:
yield result
result = self.validate_non_combinable()
if result:
yield result
yield self.validate_content()

def validate_title(self):
"""Rule 1: <title> is mandatory in <sec> for accessibility."""
has_title = self.data.get("has_title")
return build_response(
title="sec title",
parent=self.data,
item="sec",
sub_item="title",
validation_type="exist",
is_valid=has_title,
expected="<title> element in <sec>",
obtained=self.data.get("title"),
advice="Add <title> element to <sec> for accessibility",
data=self.data,
error_level=self.params.get("title_error_level", "CRITICAL"),
)

def validate_sec_type_value(self):
"""Rule 2: When present, @sec-type must have a valid value."""
sec_type = self.data.get("sec_type")
if not sec_type:
return None

valid_sec_types = self.params.get("valid_sec_types", [])
# Handle combined types (e.g. "materials|methods")
parts = sec_type.split("|")
is_valid = all(part in valid_sec_types for part in parts)

return build_response(
title="sec type value",
parent=self.data,
item="sec",
sub_item="@sec-type",
validation_type="value in list",
is_valid=is_valid,
expected=str(valid_sec_types),
obtained=sec_type,
advice=f'Replace @sec-type="{sec_type}" with a valid value: {valid_sec_types}',
data=self.data,
error_level=self.params.get("sec_type_value_error_level", "ERROR"),
)

def validate_transcript_id(self):
"""Rule 3: <sec sec-type="transcript"> must have @id."""
sec_type = self.data.get("sec_type")
if sec_type != "transcript":
return None

sec_id = self.data.get("sec_id")
is_valid = bool(sec_id)

return build_response(
title="transcript id",
parent=self.data,
item="sec",
sub_item="@id",
validation_type="exist",
is_valid=is_valid,
expected='@id attribute in <sec sec-type="transcript">',
obtained=sec_id,
advice='Add @id attribute to <sec sec-type="transcript">',
data=self.data,
error_level=self.params.get("transcript_id_error_level", "ERROR"),
)

def validate_combined_format(self):
"""Rule 5: Combined sec-types must use pipe separator."""
sec_type = self.data.get("sec_type")
if not sec_type:
return None

# Only check if it looks like a combined type (contains separator chars)
# If it contains spaces or commas but not pipes, it's incorrectly formatted
has_space_separator = " " in sec_type and "|" not in sec_type
has_comma_separator = "," in sec_type and "|" not in sec_type

if not has_space_separator and not has_comma_separator:
return None

return build_response(
title="sec type combined format",
parent=self.data,
item="sec",
sub_item="@sec-type",
validation_type="format",
is_valid=False,
expected='Combined sec-types separated by pipe "|" (e.g., "materials|methods")',
obtained=sec_type,
advice=f'Use pipe "|" as separator in @sec-type="{sec_type}" (e.g., "materials|methods")',
data=self.data,
error_level=self.params.get("combined_format_error_level", "WARNING"),
)

def validate_non_combinable(self):
"""Rule 6: transcript, supplementary-material, and data-availability cannot be combined."""
sec_type = self.data.get("sec_type")
if not sec_type or "|" not in sec_type:
return None

non_combinable = self.params.get(
"non_combinable_sec_types",
["data-availability", "supplementary-material", "transcript"],
)
parts = sec_type.split("|")

found_non_combinable = [p for p in parts if p in non_combinable]
if not found_non_combinable:
return None

return build_response(
title="sec type non-combinable",
parent=self.data,
item="sec",
sub_item="@sec-type",
validation_type="format",
is_valid=False,
expected=f"Types {non_combinable} must not be combined with other types",
obtained=sec_type,
advice=f'Do not combine "{found_non_combinable[0]}" with other types in @sec-type="{sec_type}"',
data=self.data,
error_level=self.params.get("non_combinable_error_level", "WARNING"),
)

def validate_content(self):
"""Rule 7: <sec> should contain at least one <p> after <title>."""
paragraph_count = self.data.get("paragraph_count", 0)
is_valid = paragraph_count > 0

return build_response(
title="sec content",
parent=self.data,
item="sec",
sub_item="p",
validation_type="exist",
is_valid=is_valid,
expected="At least one <p> element in <sec>",
obtained=f"{paragraph_count} paragraphs",
advice="Add at least one <p> element to <sec>",
data=self.data,
error_level=self.params.get("content_error_level", "WARNING"),
)


class XMLSecValidation:
"""Validates all <sec> elements in the XML document."""

def __init__(self, xmltree, params):
self.xmltree = xmltree
self.params = params
self.article_secs = ArticleSecs(xmltree)

def validate(self):
yield from self.validate_secs()
yield from self.validate_data_availability_presence()

def validate_secs(self):
"""Validate each <sec> element individually."""
for sec_data in self.article_secs.all_secs:
validator = SecValidation(sec_data, self.params)
yield from validator.validate()

def validate_data_availability_presence(self):
"""Rule 4: Certain article types require a data-availability section."""
required_types = self.params.get(
"data_availability_required_article_types",
[
"data-article",
"brief-report",
"case-report",
"rapid-communication",
"research-article",
"review-article",
],
)

article_type = self.article_secs.main_article_type
if not article_type or article_type not in required_types:
return

body_sec_types = self.article_secs.body_sec_types
has_data_availability = "data-availability" in body_sec_types

parent = {
"parent": "article",
"parent_id": None,
"parent_article_type": article_type,
"parent_lang": self.xmltree.find(".").get(
"{http://www.w3.org/XML/1998/namespace}lang"
),
}

yield build_response(
title="data availability section",
parent=parent,
item="sec",
sub_item='@sec-type="data-availability"',
validation_type="exist",
is_valid=has_data_availability,
expected='<sec sec-type="data-availability"> in <body>',
obtained=(
'<sec sec-type="data-availability">'
if has_data_availability
else "missing"
),
advice=(
f'Add <sec sec-type="data-availability" specific-use="..."> to <body> '
f'(required for article-type="{article_type}")'
),
data=parent,
error_level=self.params.get("data_availability_error_level", "ERROR"),
)
19 changes: 19 additions & 0 deletions packtools/sps/validation/xml_validations.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from packtools.sps.validation.history import HistoryValidation
from packtools.sps.validation.ext_link import ExtLinkValidation
from packtools.sps.validation.graphic import XMLGraphicValidation
from packtools.sps.validation.sec import XMLSecValidation


def validate_affiliations(xmltree, params):
Expand Down Expand Up @@ -374,3 +375,21 @@ def validate_graphics(xmltree, params):
graphic_rules = params["graphic_rules"]
validator = XMLGraphicValidation(xmltree, graphic_rules)
yield from validator.validate()


def validate_secs(xmltree, params):
"""
Validates <sec> elements according to SPS 1.10 specification.

Validates:
- <title> presence (accessibility requirement)
- @sec-type valid values
- @id for transcript sections
- data-availability section presence for required article types
- Combined sec-type format
- Non-combinable sec-types
- Content presence
"""
sec_rules = params["sec_rules"]
validator = XMLSecValidation(xmltree, sec_rules)
yield from validator.validate()
4 changes: 4 additions & 0 deletions packtools/sps/validation/xml_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,7 @@ def validate_xml_content(xmltree, rules):
"group": "graphic",
"items": xml_validations.validate_graphics(xmltree, params),
}
yield {
"group": "sec",
"items": xml_validations.validate_secs(xmltree, params),
}
Loading