Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions bigbang/tasks_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,15 +271,15 @@ def schedule_load_journal_from_article_meta(username, enabled=False):
"""
Agenda a tarefa de carga de dados de journals obtidos do AM e Core.

Configura verify=True para verificação SSL nas requisições HTTP.
Configura verify=False para verificação SSL nas requisições HTTP.
"""
schedule_task(
task="journal.tasks.load_journal_from_article_meta",
name="Carga de dados de journals obtidos do AM e Core",
kwargs=dict(
load_data=False,
collection_acron="scl",
verify=True,
verify=False,
),
description=_("Carga de dados de journals obtidos do AM e Core"),
priority=1,
Expand All @@ -295,15 +295,15 @@ def schedule_collect_journals_from_am(username, enabled=False):
"""
Agenda a tarefa de coleta de journals da fonte AM.

Configura verify=True para verificação SSL nas requisições HTTP.
Configura verify=False para verificação SSL nas requisições HTTP.
"""
schedule_task(
task="journal.tasks.load_journal_from_article_meta",
name="Coleta de journals da fonte AM",
kwargs=dict(
load_data=True,
collection_acron="scl",
verify=True,
verify=False,
),
description=_("Coleta de journals da fonte AM"),
priority=1,
Expand Down
4 changes: 2 additions & 2 deletions collection/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,12 +223,12 @@ def __str__(self):
base_form_class = CoreAdminModelForm

@classmethod
def load(cls, user, collections_data=None):
def load(cls, user, collections_data=None, verify=False):
if not collections_data:
collections_data = fetch_data(
"https://articlemeta.scielo.org/api/v1/collection/identifiers/",
json=True,
verify=False,
verify=verify,
)

for collection_data in collections_data:
Expand Down
8 changes: 6 additions & 2 deletions core/utils/harvesters.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def __init__(
until_date: Optional[str] = None,
limit: Optional[int] = None,
timeout: int = 30,
verify: bool = False,
):
"""
Inicializa o harvester do ArticleMeta.
Expand All @@ -37,6 +38,7 @@ def __init__(
self.until_date = until_date or datetime.utcnow().isoformat()[:10]
self.limit = limit or 1000
self.timeout = timeout
self.verify = verify

def harvest_documents(self) -> Generator[Dict[str, Any], None, None]:
"""
Expand Down Expand Up @@ -73,7 +75,7 @@ def harvest_documents(self) -> Generator[Dict[str, Any], None, None]:
logging.info(f"Fetching AM documents from: {url}")

# Faz requisição
response = fetch_data(url, json=True, timeout=self.timeout, verify=False)
response = fetch_data(url, json=True, timeout=self.timeout, verify=self.verify)

# Processa objetos retornados
objects = response.get("objects", [])
Expand Down Expand Up @@ -147,6 +149,7 @@ def __init__(
until_date: Optional[str] = None,
limit: int = 100,
timeout: int = 5,
verify: bool = False,
):
"""
Inicializa o harvester do OPAC.
Expand All @@ -165,6 +168,7 @@ def __init__(
self.until_date = until_date or datetime.utcnow().isoformat()[:10]
self.limit = limit or 100
self.timeout = timeout or 5
self.verify = verify

def harvest_documents(self) -> Generator[Dict[str, Any], None, None]:
"""
Expand Down Expand Up @@ -199,7 +203,7 @@ def harvest_documents(self) -> Generator[Dict[str, Any], None, None]:

# Faz requisição
# verify=False é necessário para evitar erros de SSL em ambientes onde o certificado do OPAC não é reconhecido
response = fetch_data(url, json=True, timeout=self.timeout, verify=False)
response = fetch_data(url, json=True, timeout=self.timeout, verify=self.verify)

# Define total de páginas na primeira iteração
if total_pages is None:
Expand Down
2 changes: 1 addition & 1 deletion core/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class NonRetryableError(Exception):
wait=wait_exponential(multiplier=1, min=1, max=5),
stop=stop_after_attempt(5),
)
def fetch_data(url, headers=None, json=False, timeout=FETCH_DATA_TIMEOUT, verify=True):
def fetch_data(url, headers=None, json=False, timeout=FETCH_DATA_TIMEOUT, verify=False):
"""
Get the resource with HTTP
Retry: Wait 2^x * 1 second between each retry starting with 4 seconds,
Expand Down
24 changes: 14 additions & 10 deletions issue/articlemeta/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@


def harvest_issue_identifiers(
collection_acron, from_date, until_date, force_update, timeout=30
collection_acron, from_date, until_date, force_update, timeout=30, verify=False
):
# chamado em core/issue/tasks.py
try:
harvester = AMHarvester(
record_type="issue",
collection_acron=collection_acron,
from_date=from_date,
until_date=until_date,
verify=verify
)
yield from harvester.harvest_documents()

Expand All @@ -40,7 +42,7 @@ def harvest_issue_identifiers(
)


def harvest_and_load_issue(user, url, code, collection_acron, processing_date, force_update, timeout=30):
def harvest_and_load_issue(user, url, code, collection_acron, processing_date, force_update, timeout=30, verify=False):
if not url:
raise ValueError("URL is required to harvest and load issue")

Expand All @@ -50,7 +52,7 @@ def harvest_and_load_issue(user, url, code, collection_acron, processing_date, f
if not collection_acron:
raise ValueError("Collection acronym is required to harvest and load issue")

harvested_data = harvest_issue_data(url, timeout=timeout)
harvested_data = harvest_issue_data(url, timeout=timeout, verify=verify)
am_issue = load_am_issue(
user,
Collection.objects.get(acron3=collection_acron),
Expand All @@ -60,16 +62,17 @@ def harvest_and_load_issue(user, url, code, collection_acron, processing_date, f
harvested_data,
force_update=force_update,
timeout=timeout,
verify=verify,
)
if not am_issue:
raise ValueError(f"Unable to create am_issue for {url}")
return create_issue_from_am_issue(user, am_issue)


def harvest_issue_data(url, timeout=30):
def harvest_issue_data(url, timeout=30, verify=False):
try:
item = {}
item["data"] = utils.fetch_data(url, json=True, timeout=timeout, verify=False)
item["data"] = utils.fetch_data(url, json=True, timeout=timeout, verify=verify)
item["status"] = "pending"
return item
except Exception as e:
Expand All @@ -96,14 +99,15 @@ def load_am_issue(
force_update,
do_harvesting=False,
timeout=30,
verify=False,
):
try:
if not url:
raise ValueError("URL is required to load AMIssue")

# Corrigido: não redefine harvested_data se já existe
if do_harvesting or not harvested_data:
harvested_data = harvest_issue_data(url, timeout=timeout)
harvested_data = harvest_issue_data(url, timeout=timeout, verify=verify)

return AMIssue.create_or_update(
pid=pid,
Expand Down Expand Up @@ -132,7 +136,7 @@ def load_am_issue(
return None


def complete_am_issue(user, am_issue):
def complete_am_issue(user, am_issue, verify=False):
try:
detail = {}

Expand All @@ -144,7 +148,7 @@ def complete_am_issue(user, am_issue):
if not am_issue.url:
raise ValueError("am_issue.url is required")

harvested_data = harvest_issue_data(am_issue.url)
harvested_data = harvest_issue_data(am_issue.url, verify=verify)
detail["harvested_data"] = str(harvested_data)
am_issue.status = harvested_data.get("status")
am_issue.data = harvested_data.get("data")
Expand All @@ -160,7 +164,7 @@ def complete_am_issue(user, am_issue):
)


def get_issue_data_from_am_issue(am_issue, user=None):
def get_issue_data_from_am_issue(am_issue, user=None, verify=False):
"""
Extrai e ajusta dados do AMIssue para criação de Issue.

Expand All @@ -183,7 +187,7 @@ def get_issue_data_from_am_issue(am_issue, user=None):
am_data = am_issue.data
if not am_data:
if user:
complete_am_issue(user, am_issue)
complete_am_issue(user, am_issue, verify=verify)
am_data = am_issue.data

if not am_data:
Expand Down
8 changes: 7 additions & 1 deletion issue/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def load_issue_from_articlemeta(
until_date=None,
force_update=None,
timeout=30,
verify=False,
):
"""
Carrega issues do ArticleMeta para collections específicas.
Expand All @@ -53,7 +54,7 @@ def load_issue_from_articlemeta(

# Coletar identificadores de issues
for issue_identifier in harvest_issue_identifiers(
acron3, from_date, until_date, force_update, timeout
acron3, from_date, until_date, force_update, timeout, verify
):
try:
logger.info(f"Scheduling load for issue {issue_identifier.get('code')} in collection {acron3}")
Expand All @@ -65,6 +66,7 @@ def load_issue_from_articlemeta(
issue_identifier=issue_identifier,
force_update=force_update,
timeout=timeout,
verify=verify,
)
except Exception as e:
exc_type, exc_value, exc_traceback = sys.exc_info()
Expand Down Expand Up @@ -116,6 +118,8 @@ def task_harvest_and_load_issue(
issue_identifier=None,
force_update=None,
timeout=30,
verify=False,

):
"""
Carrega um issue específico do ArticleMeta.
Expand All @@ -127,6 +131,7 @@ def task_harvest_and_load_issue(
issue_identifier: Dados do identificador do issue
force_update: Forçar atualização de registros existentes
timeout: Timeout para requisições HTTP
verify: Verificação SSL para requisições HTTP
"""
try:
user = _get_user(request=self.request, user_id=user_id, username=username)
Expand Down Expand Up @@ -158,6 +163,7 @@ def task_harvest_and_load_issue(
processing_date=processing_date,
force_update=force_update,
timeout=timeout,
verify=verify,
)

if issue:
Expand Down
6 changes: 3 additions & 3 deletions journal/sources/article_meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def __init__(self, message):
super().__init__(f"Failed to save SciELO Journal from article meta: {message}")


def _get_collection_journals(offset=None, limit=None, collection=None, verify=True):
def _get_collection_journals(offset=None, limit=None, collection=None, verify=False):
limit = limit or 10
offset = f"&offset={offset}" if offset else ""
if not collection:
Expand All @@ -30,7 +30,7 @@ def _get_collection_journals(offset=None, limit=None, collection=None, verify=Tr
return data


def _fetch_and_store_journal(collection, issn, obj_collection, user, verify=True):
def _fetch_and_store_journal(collection, issn, obj_collection, user, verify=False):
url_journal = f"https://articlemeta.scielo.org/api/v1/journal/?collection={collection}&issn={issn}"
data_journal = fetch_data(url_journal, json=True, timeout=30, verify=verify)
AMJournal.create_or_update(
Expand All @@ -41,7 +41,7 @@ def _fetch_and_store_journal(collection, issn, obj_collection, user, verify=True
)


def process_journal_article_meta(collection, limit, user, journal_issn_list=None, verify=True):
def process_journal_article_meta(collection, limit, user, journal_issn_list=None, verify=False):
obj_collection = Collection.objects.get(acron3=collection)
if journal_issn_list:
for issn in journal_issn_list:
Expand Down
4 changes: 2 additions & 2 deletions journal/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def load_journal_from_article_meta(
collection_acron=None,
load_data=None,
journal_issn_list=None,
verify=True,
verify=False,
):
try:
if journal_issn_list and not collection_acron:
Expand Down Expand Up @@ -95,7 +95,7 @@ def load_journal_from_article_meta_for_one_collection(
limit=None,
load_data=None,
journal_issn_list=None,
verify=True,
verify=False,
):
user = _get_user(self.request, username=username, user_id=user_id)
try:
Expand Down
Loading