From 0e165f4b50df30a14a83cd91bf8e32e135207803 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 26 Mar 2026 16:02:07 -0300 Subject: [PATCH 1/4] =?UTF-8?q?refactor:=20antecipa=20valida=C3=A7=C3=A3o?= =?UTF-8?q?=20de=20journal=20e=20issue=20em=20load=5Farticle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Alterada a lógica de carregamento para que a existência de 'journal' e 'issue' seja verificada antes da chamada de Article.create_or_update. Principais mudanças: - Evita a criação ou atualização parcial de objetos Article quando as FKs obrigatórias estão ausentes. - Remove chamadas redundantes a article.save() dentro dos blocos de erro, centralizando a persistência após a atribuição bem-sucedida. - Melhora a clareza das mensagens de exceção incluindo o 'sps_pkg_name'. --- article/sources/xmlsps.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/article/sources/xmlsps.py b/article/sources/xmlsps.py index 2bcd41fd..9c3eda91 100755 --- a/article/sources/xmlsps.py +++ b/article/sources/xmlsps.py @@ -169,6 +169,18 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None): sps_pkg_name = xml_with_pre.sps_pkg_name logging.info(f"Article {pid_v3} {sps_pkg_name}") + journal = get_journal(xmltree=xmltree, errors=errors) + if not journal: + raise ValueError(f"Not found journal for article: {pid_v3} {sps_pkg_name}") + issue = get_issue( + xmltree=xmltree, + journal=journal, + item=pid_v3, + errors=errors, + ) + if not issue: + raise ValueError(f"Not found issue for article: {pid_v3} {sps_pkg_name}") + # CRIAÇÃO/OBTENÇÃO DO OBJETO PRINCIPAL article = Article.create_or_update( user=user, @@ -201,19 +213,9 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None): ) # FOREIGN KEYS SIMPLES - article.journal = get_journal(xmltree=xmltree, errors=errors) - if not article.journal: - article.save() - raise ValueError(f"Not found journal for article: {pid_v3}") - article.issue = get_issue( - xmltree=xmltree, - journal=article.journal, - item=pid_v3, - errors=errors, - ) - if not article.issue: - article.save() - raise ValueError(f"Not found issue for article: {pid_v3}") + article.journal = journal + article.issue = issue + article.save() # Salvar uma vez após definir todos os campos simples logging.info( From e91a4c9d30b44416ad8cb0702f6f5fbeec7d599f Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 26 Mar 2026 17:50:28 -0300 Subject: [PATCH 2/4] chore(pid_provider): add UNMATCHED status for journal/issue validation --- pid_provider/choices.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pid_provider/choices.py b/pid_provider/choices.py index f03a3d9c..bcb68b21 100644 --- a/pid_provider/choices.py +++ b/pid_provider/choices.py @@ -8,6 +8,8 @@ PPXML_STATUS_DONE = "DONE" PPXML_STATUS_UNDEF = "UNDEF" PPXML_STATUS_INVALID = "NVALID" +PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE = "UNMATCHED" + PPXML_STATUS_DUPLICATED = "DUP" PPXML_STATUS_DEDUPLICATED = "DEDUP" PPXML_STATUS = ( @@ -19,4 +21,5 @@ (PPXML_STATUS_INVALID, _("invalid")), (PPXML_STATUS_DUPLICATED, _("duplicated")), (PPXML_STATUS_DEDUPLICATED, _("deduplicated")), + (PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE, _("unmatched journal or issue")), ) From 9353cc54e5fe0a72b654e15b0c1c161821c8e74f Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 26 Mar 2026 17:50:44 -0300 Subject: [PATCH 3/4] feat(pid_provider): implement XMLEvent model and add_event helper --- pid_provider/choices.py | 2 +- ...ter_pidproviderxml_proc_status_xmlevent.py | 102 ++++++++++++++++++ pid_provider/models.py | 45 +++++++- 3 files changed, 147 insertions(+), 2 deletions(-) create mode 100644 pid_provider/migrations/0016_alter_pidproviderxml_proc_status_xmlevent.py diff --git a/pid_provider/choices.py b/pid_provider/choices.py index bcb68b21..9415dab9 100644 --- a/pid_provider/choices.py +++ b/pid_provider/choices.py @@ -8,7 +8,7 @@ PPXML_STATUS_DONE = "DONE" PPXML_STATUS_UNDEF = "UNDEF" PPXML_STATUS_INVALID = "NVALID" -PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE = "UNMATCHED" +PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE = "UNMATCH" PPXML_STATUS_DUPLICATED = "DUP" PPXML_STATUS_DEDUPLICATED = "DEDUP" diff --git a/pid_provider/migrations/0016_alter_pidproviderxml_proc_status_xmlevent.py b/pid_provider/migrations/0016_alter_pidproviderxml_proc_status_xmlevent.py new file mode 100644 index 00000000..6d0be8bb --- /dev/null +++ b/pid_provider/migrations/0016_alter_pidproviderxml_proc_status_xmlevent.py @@ -0,0 +1,102 @@ +# Generated by Django 5.2.7 on 2026-03-26 20:52 + +import django.db.models.deletion +import modelcluster.fields +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("pid_provider", "0015_alter_xmlversion_file_xmlurl"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.AlterField( + model_name="pidproviderxml", + name="proc_status", + field=models.CharField( + blank=True, + choices=[ + ("TODO", "To do"), + ("DONE", "Done"), + ("WAIT", "waiting"), + ("IGNORE", "ignore"), + ("UNDEF", "undefined"), + ("NVALID", "invalid"), + ("DUP", "duplicated"), + ("DEDUP", "deduplicated"), + ("UNMATCH", "unmatched journal or issue"), + ], + default="TODO", + max_length=7, + null=True, + verbose_name="processing status", + ), + ), + migrations.CreateModel( + name="XMLEvent", + fields=[ + ( + "id", + models.BigAutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "updated", + models.DateTimeField( + auto_now=True, verbose_name="Last update date" + ), + ), + ("name", models.CharField(max_length=200, verbose_name="name")), + ("detail", models.JSONField(blank=True, null=True)), + ( + "created", + models.DateTimeField( + auto_now_add=True, verbose_name="Creation date" + ), + ), + ("completed", models.BooleanField(default=False)), + ( + "creator", + models.ForeignKey( + editable=False, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="%(class)s_creator", + to=settings.AUTH_USER_MODEL, + verbose_name="Creator", + ), + ), + ( + "ppxml", + modelcluster.fields.ParentalKey( + on_delete=django.db.models.deletion.CASCADE, + related_name="events", + to="pid_provider.pidproviderxml", + ), + ), + ( + "updated_by", + models.ForeignKey( + blank=True, + editable=False, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="%(class)s_last_mod_user", + to=settings.AUTH_USER_MODEL, + verbose_name="Updater", + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/pid_provider/models.py b/pid_provider/models.py index 20d5dc93..cb67abda 100644 --- a/pid_provider/models.py +++ b/pid_provider/models.py @@ -38,7 +38,7 @@ zero_to_none, QueryBuilderPidProviderXML, ) -from tracker.models import BaseEvent, EventSaveError, UnexpectedEvent +from tracker.models import BaseEvent, UnexpectedEvent try: from django_prometheus.models import ExportModelOperationsMixin @@ -441,12 +441,16 @@ class PidProviderXML(BasePidProviderXML, CommonControlField, ClusterableModel): FieldPanel("z_links"), FieldPanel("z_partial_body"), ] + panels_event = [ + InlinePanel("events", label=_("Events")), + ] edit_handler = TabbedInterface( [ ObjectList(panel_a, heading=_("Identification")), ObjectList(panel_b, heading=_("Other PIDs")), ObjectList(panel_c, heading=_("Data")), + ObjectList(panels_event, heading=_("Events")), ] ) @@ -1447,6 +1451,11 @@ def fix_pkg_name(self, pkg_name): self.save() return True return False + + def add_event(self, name, proc_status, detail=None, errors=None, exceptions=None): + self.proc_status = proc_status + self.save() + return XMLEvent.register(self, name, detail=detail, errors=errors, exceptions=exceptions) class FixPidV2(CommonControlField): @@ -1748,3 +1757,37 @@ def save_file(self, xml_content, filename=None): except Exception as e: logging.error(f"Error saving zip file for XMLURL {self.url}: {e}") return False + + +class XMLEvent(BaseEvent, CommonControlField): + """ + Model to log events related to XML processing in the PID Provider system. + + This model captures various events that occur during the processing of XML data, + such as registration attempts, validation errors, and other significant actions, + along with relevant details for debugging and monitoring purposes. + + Attributes: + name (CharField): Name of the event. + detail (JSONField): Detailed information about the event. + created (DateTimeField): Timestamp when the event was created. + completed (BooleanField): Indicates if the event has been completed. + ppxml (ParentalKey): Reference to the related PidProviderXML instance. + + Methods: + data (property): Returns a dictionary with the event's name, detail, and creation timestamp. + create (classmethod): Creates and saves a new XMLEvent instance. + finish: Marks the event as completed and optionally updates details, errors, or exceptions. + """ + ppxml = ParentalKey( + PidProviderXML, on_delete=models.CASCADE, related_name="events" + ) + + @classmethod + def register(cls, ppxml, name, detail=None, errors=None, exceptions=None): + obj = cls() + obj.ppxml = ppxml + obj.name = name + completed = bool(not errors and not exceptions) + obj.finish(completed=completed, detail=detail, errors=errors, exceptions=exceptions) + return obj \ No newline at end of file From 7849df41fcf7c8ac9dbf32094432ba357ac77db4 Mon Sep 17 00:00:00 2001 From: Roberta Takenaka Date: Thu, 26 Mar 2026 17:55:21 -0300 Subject: [PATCH 4/4] refactor(article): simplify load_article to require pp_xml and use XMLEvent --- article/sources/xmlsps.py | 111 +++++++++++++------------------------- 1 file changed, 37 insertions(+), 74 deletions(-) diff --git a/article/sources/xmlsps.py b/article/sources/xmlsps.py index 9c3eda91..24624220 100755 --- a/article/sources/xmlsps.py +++ b/article/sources/xmlsps.py @@ -21,7 +21,6 @@ from packtools.sps.models.kwd_group import ArticleKeywords from packtools.sps.models.v2.article_toc_sections import ArticleTocSections from packtools.sps.models.v2.related_articles import RelatedArticles -from packtools.sps.pid_provider.xml_sps_lib import XMLWithPre from article import choices from article.models import ( @@ -42,7 +41,7 @@ from issue.articlemeta.loader import load_issue_sections from journal.models import Journal from location.models import Location -from pid_provider.choices import PPXML_STATUS_DONE, PPXML_STATUS_INVALID +from pid_provider.choices import PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE, PPXML_STATUS_INVALID from pid_provider.models import PidProviderXML # Researcher no longer used - replaced by ContribPerson # from researcher.models import Affiliation, Researcher @@ -70,7 +69,7 @@ def add_error(errors, function_name, error, **kwargs): errors.append(error_dict) -def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None): +def load_article(user, pp_xml): """ Carrega um artigo a partir de XML. @@ -80,10 +79,7 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None): Args: user: Usuário responsável pela operação (obrigatório) - xml: String contendo o XML do artigo (opcional) - file_path: Caminho para o arquivo XML (opcional) - v3: PID v3 do artigo (opcional) - pp_xml: Objeto PidProviderXML relacionado (opcional) + pp_xml: Objeto PidProviderXML relacionado (obrigatório) Returns: Article: Instância do artigo processado com todos os relacionamentos @@ -98,80 +94,57 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None): - O processamento continua mesmo com falhas parciais - O campo article.valid indica se o processamento foi completo """ - logging.info(f"load article {pp_xml} {v3} {file_path}") - errors = [] - article = None # Inicializar no início + logging.info(f"load article {pp_xml}") + detail = {"pp_xml": str(pp_xml)} # Validações iniciais if not user: raise ValueError("User is required") - if not any([pp_xml, v3, file_path, xml]): + if not pp_xml: raise ValueError( - "load_article() requires params: pp_xml or v3 or file_path or xml" + "load_article() requires params: pp_xml" ) - if not pp_xml and v3: - try: - pp_xml = PidProviderXML.get_by_pid_v3(pid_v3=v3) - except PidProviderXML.DoesNotExist: - pp_xml = None - try: - if pp_xml: - xml_with_pre = pp_xml.xml_with_pre - elif file_path: - for xml_with_pre in XMLWithPre.create(file_path): - xmltree = xml_with_pre.xmltree - break - elif xml: - xml_with_pre = XMLWithPre("", etree.fromstring(xml)) + xml_with_pre = pp_xml.xml_with_pre except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - UnexpectedEvent.create( - item=str(pp_xml or v3 or file_path or "xml"), - action="article.sources.xmlsps.load_article", - exception=e, - exc_traceback=exc_traceback, - detail=dict( - function="article.sources.xmlsps.load_article", - xml=f"{xml}", - v3=v3, - file_path=file_path, - pp_xml=str(pp_xml), - ), - ) - item = str(pp_xml or v3 or file_path or "xml") - if pp_xml: - pp_xml.proc_status = PPXML_STATUS_INVALID - pp_xml.save() - updated = ( - Article.objects.filter(pid_v3=pp_xml.v3) - .exclude( - pp_xml=pp_xml, - data_status=choices.DATA_STATUS_INVALID, - ) - .update( - pp_xml=pp_xml, - data_status=choices.DATA_STATUS_INVALID, - ) + updated = ( + Article.objects.filter(pp_xml=pp_xml) + .exclude( + data_status=choices.DATA_STATUS_INVALID, ) - raise ValueError(f"Unable to get XML to load article from {item}: {e}") + .update( + data_status=choices.DATA_STATUS_INVALID, + ) + ) + errors = [ + { + "function": "load_article", + "error_type": e.__class__.__name__, + "error_message": str(e), + "timestamp": datetime.now().isoformat(), + } + ] + pp_xml.add_event(name="load_article", proc_status=PPXML_STATUS_INVALID, detail=detail, errors=errors, exceptions=e) + raise ValueError(f"Unable to get XML to load article from {pp_xml}: {e}") - pid_v3 = v3 or xml_with_pre.v3 try: - # Sequência organizada para atribuição de campos do Article - # Do mais simples (campos diretos) para o mais complexo (FKs e M2M) + errors = [] + article = None event = None + xmltree = xml_with_pre.xmltree + pid_v3 = xml_with_pre.v3 sps_pkg_name = xml_with_pre.sps_pkg_name - logging.info(f"Article {pid_v3} {sps_pkg_name}") + logging.info(f"Pid Provider XML: {pid_v3} {sps_pkg_name}") + journal = get_journal(xmltree=xmltree, errors=errors) if not journal: - raise ValueError(f"Not found journal for article: {pid_v3} {sps_pkg_name}") + raise ValueError(f"Not found journal for pid provider xml: {pid_v3} {sps_pkg_name}") issue = get_issue( xmltree=xmltree, journal=journal, @@ -179,7 +152,7 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None): errors=errors, ) if not issue: - raise ValueError(f"Not found issue for article: {pid_v3} {sps_pkg_name}") + raise ValueError(f"Not found issue for pid provider xml: {pid_v3} {sps_pkg_name}") # CRIAÇÃO/OBTENÇÃO DO OBJETO PRINCIPAL article = Article.create_or_update( @@ -288,19 +261,9 @@ def load_article(user, xml=None, file_path=None, v3=None, pp_xml=None): if event: event.finish(errors=errors, exceptions=traceback.format_exc()) raise - UnexpectedEvent.create( - item=str(pp_xml or v3 or file_path or "xml"), - action="article.sources.xmlsps.load_article", - exception=e, - exc_traceback=exc_traceback, - detail=dict( - function="article.sources.xmlsps.load_article", - xml=f"{xml}", - v3=v3, - file_path=file_path, - pp_xml=str(pp_xml), - ), - ) + + pp_xml.add_event(name="load_article", proc_status=PPXML_STATUS_UNMATCHED_JOURNAL_OR_ISSUE, detail=detail, errors=errors, exceptions=e) + raise