From f541b8f8cadcba43783a04721b74afd79f12d90d Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 22 Feb 2024 20:22:37 +0800 Subject: [PATCH 1/2] export at most 1000 most recent items --- .../src/jobs/integration/export_all_items.ts | 44 +++++++++++-------- .../api/src/jobs/integration/export_item.ts | 2 - packages/api/src/services/library_item.ts | 23 +++++++++- 3 files changed, 47 insertions(+), 22 deletions(-) diff --git a/packages/api/src/jobs/integration/export_all_items.ts b/packages/api/src/jobs/integration/export_all_items.ts index fb29b20fb..52bf636b4 100644 --- a/packages/api/src/jobs/integration/export_all_items.ts +++ b/packages/api/src/jobs/integration/export_all_items.ts @@ -1,6 +1,6 @@ import { IntegrationType } from '../../entity/integration' import { findIntegration } from '../../services/integrations' -import { searchLibraryItems } from '../../services/library_item' +import { findRecentLibraryItems } from '../../services/library_item' import { findActiveUser } from '../../services/user' import { enqueueExportItem } from '../../utils/createTask' import { logger } from '../../utils/logger' @@ -39,31 +39,37 @@ export const exportAllItems = async (jobData: ExportAllItemsJobData) => { return } - // get paginated items from the database - const first = 50 - let after = 0 - for (;;) { - console.log('searching for items...', { - userId, - first, - after, - }) - const searchResult = await searchLibraryItems( - { from: after, size: first }, - userId - ) - const libraryItems = searchResult.libraryItems - const size = libraryItems.length - if (size === 0) { - break + const maxItems = 1000 + const limit = 50 + let offset = 0 + // get max 1000 most recent items from the database + while (offset < maxItems) { + const libraryItems = await findRecentLibraryItems(userId, limit, offset) + if (libraryItems.length === 0) { + logger.info('no library items found', { + userId, + }) + return } + logger.info('enqueuing export item...', { + userId, + offset, + integrationId, + }) + await enqueueExportItem({ userId, libraryItemIds: libraryItems.map((item) => item.id), integrationId, }) - after += size + offset += libraryItems.length + + logger.info('exported items', { + userId, + offset, + integrationId, + }) } } diff --git a/packages/api/src/jobs/integration/export_item.ts b/packages/api/src/jobs/integration/export_item.ts index 896ca45b4..cd99bba5e 100644 --- a/packages/api/src/jobs/integration/export_item.ts +++ b/packages/api/src/jobs/integration/export_item.ts @@ -21,7 +21,6 @@ export const exportItem = async (jobData: ExportItemJobData) => { if (libraryItems.length === 0) { logger.error('library items not found', { userId, - libraryItemIds, }) return } @@ -40,7 +39,6 @@ export const exportItem = async (jobData: ExportItemJobData) => { integrations.map(async (integration) => { const logObject = { userId, - libraryItemIds, integrationId: integration.id, } logger.info('exporting item...', logObject) diff --git a/packages/api/src/services/library_item.ts b/packages/api/src/services/library_item.ts index 4cb581784..e2f73cc0e 100644 --- a/packages/api/src/services/library_item.ts +++ b/packages/api/src/services/library_item.ts @@ -652,6 +652,28 @@ export const searchLibraryItems = async ( ) } +export const findRecentLibraryItems = async ( + userId: string, + limit = 1000, + offset?: number +) => { + return authTrx( + async (tx) => + tx + .createQueryBuilder(LibraryItem, 'library_item') + .where('library_item.user_id = :userId', { userId }) + .andWhere('library_item.state = :state', { + state: LibraryItemState.Succeeded, + }) + .orderBy('library_item.saved_at', 'DESC', 'NULLS LAST') + .take(limit) + .skip(offset) + .getMany(), + undefined, + userId + ) +} + export const findLibraryItemsByIds = async (ids: string[], userId: string) => { return authTrx( async (tx) => @@ -659,7 +681,6 @@ export const findLibraryItemsByIds = async (ids: string[], userId: string) => { .createQueryBuilder(LibraryItem, 'library_item') .leftJoinAndSelect('library_item.labels', 'labels') .leftJoinAndSelect('library_item.highlights', 'highlights') - .leftJoinAndSelect('highlights.user', 'user') .where('library_item.id IN (:...ids)', { ids }) .getMany(), undefined, From b9471002785b33dbacd92107e0aef885f4d2fedc Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 22 Feb 2024 20:23:37 +0800 Subject: [PATCH 2/2] increase batch size to 100 --- packages/api/src/jobs/integration/export_all_items.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/api/src/jobs/integration/export_all_items.ts b/packages/api/src/jobs/integration/export_all_items.ts index 52bf636b4..930a4c8df 100644 --- a/packages/api/src/jobs/integration/export_all_items.ts +++ b/packages/api/src/jobs/integration/export_all_items.ts @@ -40,7 +40,7 @@ export const exportAllItems = async (jobData: ExportAllItemsJobData) => { } const maxItems = 1000 - const limit = 50 + const limit = 100 let offset = 0 // get max 1000 most recent items from the database while (offset < maxItems) {