From 5ff2e3c42345fd1e2b6f81f9a031e31326fad173 Mon Sep 17 00:00:00 2001 From: gabestein Date: Thu, 11 Jul 2024 16:09:49 -0400 Subject: [PATCH 1/2] batches author lookups in imports --- workers/tasks/import/metadata.ts | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/workers/tasks/import/metadata.ts b/workers/tasks/import/metadata.ts index 7f7270a566..f353667406 100644 --- a/workers/tasks/import/metadata.ts +++ b/workers/tasks/import/metadata.ts @@ -3,6 +3,7 @@ import { metaValueToString, metaValueToJsonSerializable } from '@pubpub/prosemir import { getSearchUsers } from 'server/search/queries'; import { isValidDate } from 'utils/dates'; +import { asyncMap } from 'utils/async'; import { Falsy } from 'types'; const getAuthorsArray = (author) => { @@ -23,17 +24,20 @@ const getDateStringFromMetaValue = (metaDateString) => { return null; }; +const getAuthorEntries = async (authorEntry) => { + if (typeof authorEntry === 'string') { + const users = await getSearchUsers(authorEntry); + return { name: authorEntry, users: users.map((user) => user.toJSON()) }; + } + return authorEntry; +}; const getAttributions = async (author) => { if (author) { const authorsArray = getAuthorsArray(author); const authorEntries = authorsArray.map(metaValueToJsonSerializable) as any[]; const attributions = await Promise.all( - authorEntries.map(async (authorEntry: string) => { - if (typeof authorEntry === 'string') { - const users = await getSearchUsers(authorEntry); - return { name: authorEntry, users: users.map((user) => user.toJSON()) }; - } - return authorEntry; + await asyncMap(authorEntries, (authorEntry) => getAuthorEntries(authorEntry), { + concurrency: 5, }), ); return attributions; From 80face94b407341754569f018bb0ffaa477c10bc Mon Sep 17 00:00:00 2001 From: gabestein Date: Fri, 12 Jul 2024 15:21:09 -0400 Subject: [PATCH 2/2] batch at 2 and up queue time to 5 mins --- workers/queue.ts | 2 +- workers/tasks/import/metadata.ts | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/workers/queue.ts b/workers/queue.ts index dfae0d3644..4247cea3fb 100644 --- a/workers/queue.ts +++ b/workers/queue.ts @@ -11,7 +11,7 @@ import { WorkerTask } from 'server/models'; import { expect } from 'utils/assert'; import { createCachePurgeDebouncer } from 'utils/caching/createCachePurgeDebouncer'; -const maxWorkerTimeSeconds = 120; +const maxWorkerTimeSeconds = 300; const maxWorkerThreads = 5; let currentWorkerThreads = 0; diff --git a/workers/tasks/import/metadata.ts b/workers/tasks/import/metadata.ts index f353667406..ad6f87b558 100644 --- a/workers/tasks/import/metadata.ts +++ b/workers/tasks/import/metadata.ts @@ -35,10 +35,12 @@ const getAttributions = async (author) => { if (author) { const authorsArray = getAuthorsArray(author); const authorEntries = authorsArray.map(metaValueToJsonSerializable) as any[]; - const attributions = await Promise.all( - await asyncMap(authorEntries, (authorEntry) => getAuthorEntries(authorEntry), { - concurrency: 5, - }), + const attributions = await asyncMap( + authorEntries, + (authorEntry) => getAuthorEntries(authorEntry), + { + concurrency: 2, + }, ); return attributions; }