diff --git a/bigbang/tasks_scheduler.py b/bigbang/tasks_scheduler.py index 6022380f..48aa115d 100644 --- a/bigbang/tasks_scheduler.py +++ b/bigbang/tasks_scheduler.py @@ -271,7 +271,7 @@ def schedule_load_journal_from_article_meta(username, enabled=False): """ Agenda a tarefa de carga de dados de journals obtidos do AM e Core. - Configura verify=True para verificação SSL nas requisições HTTP. + Configura verify=False para verificação SSL nas requisições HTTP. """ schedule_task( task="journal.tasks.load_journal_from_article_meta", @@ -279,7 +279,7 @@ def schedule_load_journal_from_article_meta(username, enabled=False): kwargs=dict( load_data=False, collection_acron="scl", - verify=True, + verify=False, ), description=_("Carga de dados de journals obtidos do AM e Core"), priority=1, @@ -295,7 +295,7 @@ def schedule_collect_journals_from_am(username, enabled=False): """ Agenda a tarefa de coleta de journals da fonte AM. - Configura verify=True para verificação SSL nas requisições HTTP. + Configura verify=False para verificação SSL nas requisições HTTP. """ schedule_task( task="journal.tasks.load_journal_from_article_meta", @@ -303,7 +303,7 @@ def schedule_collect_journals_from_am(username, enabled=False): kwargs=dict( load_data=True, collection_acron="scl", - verify=True, + verify=False, ), description=_("Coleta de journals da fonte AM"), priority=1, diff --git a/collection/models.py b/collection/models.py index 73d8e5f9..d761603d 100755 --- a/collection/models.py +++ b/collection/models.py @@ -223,12 +223,12 @@ def __str__(self): base_form_class = CoreAdminModelForm @classmethod - def load(cls, user, collections_data=None): + def load(cls, user, collections_data=None, verify=False): if not collections_data: collections_data = fetch_data( "https://articlemeta.scielo.org/api/v1/collection/identifiers/", json=True, - verify=False, + verify=verify, ) for collection_data in collections_data: diff --git a/core/utils/harvesters.py b/core/utils/harvesters.py index cf2026f6..928b7233 100644 --- a/core/utils/harvesters.py +++ b/core/utils/harvesters.py @@ -19,6 +19,7 @@ def __init__( until_date: Optional[str] = None, limit: Optional[int] = None, timeout: int = 30, + verify: bool = False, ): """ Inicializa o harvester do ArticleMeta. @@ -37,6 +38,7 @@ def __init__( self.until_date = until_date or datetime.utcnow().isoformat()[:10] self.limit = limit or 1000 self.timeout = timeout + self.verify = verify def harvest_documents(self) -> Generator[Dict[str, Any], None, None]: """ @@ -73,7 +75,7 @@ def harvest_documents(self) -> Generator[Dict[str, Any], None, None]: logging.info(f"Fetching AM documents from: {url}") # Faz requisição - response = fetch_data(url, json=True, timeout=self.timeout, verify=False) + response = fetch_data(url, json=True, timeout=self.timeout, verify=self.verify) # Processa objetos retornados objects = response.get("objects", []) @@ -147,6 +149,7 @@ def __init__( until_date: Optional[str] = None, limit: int = 100, timeout: int = 5, + verify: bool = False, ): """ Inicializa o harvester do OPAC. @@ -165,6 +168,7 @@ def __init__( self.until_date = until_date or datetime.utcnow().isoformat()[:10] self.limit = limit or 100 self.timeout = timeout or 5 + self.verify = verify def harvest_documents(self) -> Generator[Dict[str, Any], None, None]: """ @@ -199,7 +203,7 @@ def harvest_documents(self) -> Generator[Dict[str, Any], None, None]: # Faz requisição # verify=False é necessário para evitar erros de SSL em ambientes onde o certificado do OPAC não é reconhecido - response = fetch_data(url, json=True, timeout=self.timeout, verify=False) + response = fetch_data(url, json=True, timeout=self.timeout, verify=self.verify) # Define total de páginas na primeira iteração if total_pages is None: diff --git a/core/utils/utils.py b/core/utils/utils.py index d57cd63b..16d4f351 100644 --- a/core/utils/utils.py +++ b/core/utils/utils.py @@ -42,7 +42,7 @@ class NonRetryableError(Exception): wait=wait_exponential(multiplier=1, min=1, max=5), stop=stop_after_attempt(5), ) -def fetch_data(url, headers=None, json=False, timeout=FETCH_DATA_TIMEOUT, verify=True): +def fetch_data(url, headers=None, json=False, timeout=FETCH_DATA_TIMEOUT, verify=False): """ Get the resource with HTTP Retry: Wait 2^x * 1 second between each retry starting with 4 seconds, diff --git a/issue/articlemeta/loader.py b/issue/articlemeta/loader.py index a8242e57..3dcf360e 100644 --- a/issue/articlemeta/loader.py +++ b/issue/articlemeta/loader.py @@ -14,14 +14,16 @@ def harvest_issue_identifiers( - collection_acron, from_date, until_date, force_update, timeout=30 + collection_acron, from_date, until_date, force_update, timeout=30, verify=False ): + # chamado em core/issue/tasks.py try: harvester = AMHarvester( record_type="issue", collection_acron=collection_acron, from_date=from_date, until_date=until_date, + verify=verify ) yield from harvester.harvest_documents() @@ -40,7 +42,7 @@ def harvest_issue_identifiers( ) -def harvest_and_load_issue(user, url, code, collection_acron, processing_date, force_update, timeout=30): +def harvest_and_load_issue(user, url, code, collection_acron, processing_date, force_update, timeout=30, verify=False): if not url: raise ValueError("URL is required to harvest and load issue") @@ -50,7 +52,7 @@ def harvest_and_load_issue(user, url, code, collection_acron, processing_date, f if not collection_acron: raise ValueError("Collection acronym is required to harvest and load issue") - harvested_data = harvest_issue_data(url, timeout=timeout) + harvested_data = harvest_issue_data(url, timeout=timeout, verify=verify) am_issue = load_am_issue( user, Collection.objects.get(acron3=collection_acron), @@ -60,16 +62,17 @@ def harvest_and_load_issue(user, url, code, collection_acron, processing_date, f harvested_data, force_update=force_update, timeout=timeout, + verify=verify, ) if not am_issue: raise ValueError(f"Unable to create am_issue for {url}") return create_issue_from_am_issue(user, am_issue) -def harvest_issue_data(url, timeout=30): +def harvest_issue_data(url, timeout=30, verify=False): try: item = {} - item["data"] = utils.fetch_data(url, json=True, timeout=timeout, verify=False) + item["data"] = utils.fetch_data(url, json=True, timeout=timeout, verify=verify) item["status"] = "pending" return item except Exception as e: @@ -96,6 +99,7 @@ def load_am_issue( force_update, do_harvesting=False, timeout=30, + verify=False, ): try: if not url: @@ -103,7 +107,7 @@ def load_am_issue( # Corrigido: não redefine harvested_data se já existe if do_harvesting or not harvested_data: - harvested_data = harvest_issue_data(url, timeout=timeout) + harvested_data = harvest_issue_data(url, timeout=timeout, verify=verify) return AMIssue.create_or_update( pid=pid, @@ -132,7 +136,7 @@ def load_am_issue( return None -def complete_am_issue(user, am_issue): +def complete_am_issue(user, am_issue, verify=False): try: detail = {} @@ -144,7 +148,7 @@ def complete_am_issue(user, am_issue): if not am_issue.url: raise ValueError("am_issue.url is required") - harvested_data = harvest_issue_data(am_issue.url) + harvested_data = harvest_issue_data(am_issue.url, verify=verify) detail["harvested_data"] = str(harvested_data) am_issue.status = harvested_data.get("status") am_issue.data = harvested_data.get("data") @@ -160,7 +164,7 @@ def complete_am_issue(user, am_issue): ) -def get_issue_data_from_am_issue(am_issue, user=None): +def get_issue_data_from_am_issue(am_issue, user=None, verify=False): """ Extrai e ajusta dados do AMIssue para criação de Issue. @@ -183,7 +187,7 @@ def get_issue_data_from_am_issue(am_issue, user=None): am_data = am_issue.data if not am_data: if user: - complete_am_issue(user, am_issue) + complete_am_issue(user, am_issue, verify=verify) am_data = am_issue.data if not am_data: diff --git a/issue/tasks.py b/issue/tasks.py index bd1f2391..fd12abc1 100644 --- a/issue/tasks.py +++ b/issue/tasks.py @@ -28,6 +28,7 @@ def load_issue_from_articlemeta( until_date=None, force_update=None, timeout=30, + verify=False, ): """ Carrega issues do ArticleMeta para collections específicas. @@ -53,7 +54,7 @@ def load_issue_from_articlemeta( # Coletar identificadores de issues for issue_identifier in harvest_issue_identifiers( - acron3, from_date, until_date, force_update, timeout + acron3, from_date, until_date, force_update, timeout, verify ): try: logger.info(f"Scheduling load for issue {issue_identifier.get('code')} in collection {acron3}") @@ -65,6 +66,7 @@ def load_issue_from_articlemeta( issue_identifier=issue_identifier, force_update=force_update, timeout=timeout, + verify=verify, ) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() @@ -116,6 +118,8 @@ def task_harvest_and_load_issue( issue_identifier=None, force_update=None, timeout=30, + verify=False, + ): """ Carrega um issue específico do ArticleMeta. @@ -127,6 +131,7 @@ def task_harvest_and_load_issue( issue_identifier: Dados do identificador do issue force_update: Forçar atualização de registros existentes timeout: Timeout para requisições HTTP + verify: Verificação SSL para requisições HTTP """ try: user = _get_user(request=self.request, user_id=user_id, username=username) @@ -158,6 +163,7 @@ def task_harvest_and_load_issue( processing_date=processing_date, force_update=force_update, timeout=timeout, + verify=verify, ) if issue: diff --git a/journal/sources/article_meta.py b/journal/sources/article_meta.py index 443ff145..06ea792e 100644 --- a/journal/sources/article_meta.py +++ b/journal/sources/article_meta.py @@ -15,7 +15,7 @@ def __init__(self, message): super().__init__(f"Failed to save SciELO Journal from article meta: {message}") -def _get_collection_journals(offset=None, limit=None, collection=None, verify=True): +def _get_collection_journals(offset=None, limit=None, collection=None, verify=False): limit = limit or 10 offset = f"&offset={offset}" if offset else "" if not collection: @@ -30,7 +30,7 @@ def _get_collection_journals(offset=None, limit=None, collection=None, verify=Tr return data -def _fetch_and_store_journal(collection, issn, obj_collection, user, verify=True): +def _fetch_and_store_journal(collection, issn, obj_collection, user, verify=False): url_journal = f"https://articlemeta.scielo.org/api/v1/journal/?collection={collection}&issn={issn}" data_journal = fetch_data(url_journal, json=True, timeout=30, verify=verify) AMJournal.create_or_update( @@ -41,7 +41,7 @@ def _fetch_and_store_journal(collection, issn, obj_collection, user, verify=True ) -def process_journal_article_meta(collection, limit, user, journal_issn_list=None, verify=True): +def process_journal_article_meta(collection, limit, user, journal_issn_list=None, verify=False): obj_collection = Collection.objects.get(acron3=collection) if journal_issn_list: for issn in journal_issn_list: diff --git a/journal/tasks.py b/journal/tasks.py index 70d38f0f..ef5e9e7d 100644 --- a/journal/tasks.py +++ b/journal/tasks.py @@ -49,7 +49,7 @@ def load_journal_from_article_meta( collection_acron=None, load_data=None, journal_issn_list=None, - verify=True, + verify=False, ): try: if journal_issn_list and not collection_acron: @@ -95,7 +95,7 @@ def load_journal_from_article_meta_for_one_collection( limit=None, load_data=None, journal_issn_list=None, - verify=True, + verify=False, ): user = _get_user(self.request, username=username, user_id=user_id) try: