Merge pull request #3470 from omnivore-app/fix/async-bulk-action
async bulk action on item count > 100 and create batch jobs
This commit is contained in:
44
packages/api/src/jobs/bulk_action.ts
Normal file
44
packages/api/src/jobs/bulk_action.ts
Normal file
@ -0,0 +1,44 @@
|
||||
import { BulkActionType } from '../generated/graphql'
|
||||
import { getBackendQueue } from '../queue-processor'
|
||||
import { batchUpdateLibraryItems } from '../services/library_item'
|
||||
import { logger } from '../utils/logger'
|
||||
|
||||
export interface BulkActionData {
|
||||
count: number
|
||||
userId: string
|
||||
action: BulkActionType
|
||||
query: string
|
||||
batchSize: number
|
||||
labelIds?: string[]
|
||||
args?: unknown
|
||||
}
|
||||
|
||||
export const BULK_ACTION_JOB_NAME = 'bulk-action'
|
||||
|
||||
export const bulkAction = async (data: BulkActionData) => {
|
||||
const { userId, action, query, labelIds, args, batchSize, count } = data
|
||||
|
||||
const queue = await getBackendQueue()
|
||||
if (!queue) {
|
||||
throw new Error('Queue not initialized')
|
||||
}
|
||||
const now = new Date().toISOString()
|
||||
let offset = 0
|
||||
|
||||
do {
|
||||
const searchArgs = {
|
||||
size: batchSize,
|
||||
query: `(${query}) AND updated:*..${now}`, // only process items that have not been updated
|
||||
}
|
||||
|
||||
try {
|
||||
await batchUpdateLibraryItems(action, searchArgs, userId, labelIds, args)
|
||||
} catch (error) {
|
||||
logger.error('batch update error', error)
|
||||
}
|
||||
|
||||
offset += batchSize
|
||||
} while (offset < count)
|
||||
|
||||
return true
|
||||
}
|
||||
@ -14,10 +14,15 @@ import express, { Express } from 'express'
|
||||
import { SnakeNamingStrategy } from 'typeorm-naming-strategies'
|
||||
import { appDataSource } from './data_source'
|
||||
import { env } from './env'
|
||||
import { bulkAction, BULK_ACTION_JOB_NAME } from './jobs/bulk_action'
|
||||
import { findThumbnail, THUMBNAIL_JOB } from './jobs/find_thumbnail'
|
||||
import { refreshAllFeeds } from './jobs/rss/refreshAllFeeds'
|
||||
import { refreshFeed } from './jobs/rss/refreshFeed'
|
||||
import { savePageJob } from './jobs/save_page'
|
||||
import {
|
||||
syncReadPositionsJob,
|
||||
SYNC_READ_POSITIONS_JOB_NAME,
|
||||
} from './jobs/sync_read_positions'
|
||||
import { triggerRule, TRIGGER_RULE_JOB_NAME } from './jobs/trigger_rule'
|
||||
import {
|
||||
updateHighlight,
|
||||
@ -27,12 +32,8 @@ import {
|
||||
} from './jobs/update_db'
|
||||
import { updatePDFContentJob } from './jobs/update_pdf_content'
|
||||
import { redisDataSource } from './redis_data_source'
|
||||
import { logger, CustomTypeOrmLogger } from './utils/logger'
|
||||
import {
|
||||
SYNC_READ_POSITIONS_JOB_NAME,
|
||||
syncReadPositionsJob,
|
||||
} from './jobs/sync_read_positions'
|
||||
import { CACHED_READING_POSITION_PREFIX } from './services/cached_reading_position'
|
||||
import { CustomTypeOrmLogger, logger } from './utils/logger'
|
||||
|
||||
export const QUEUE_NAME = 'omnivore-backend-queue'
|
||||
|
||||
@ -84,6 +85,8 @@ export const createWorker = (connection: ConnectionOptions) =>
|
||||
return updateHighlight(job.data)
|
||||
case SYNC_READ_POSITIONS_JOB_NAME:
|
||||
return syncReadPositionsJob(job.data)
|
||||
case BULK_ACTION_JOB_NAME:
|
||||
return bulkAction(job.data)
|
||||
}
|
||||
},
|
||||
{
|
||||
|
||||
@ -16,7 +16,6 @@ import {
|
||||
BulkActionError,
|
||||
BulkActionErrorCode,
|
||||
BulkActionSuccess,
|
||||
BulkActionType,
|
||||
ContentReader,
|
||||
CreateArticleError,
|
||||
CreateArticleErrorCode,
|
||||
@ -64,12 +63,12 @@ import { authTrx, getColumns } from '../../repository'
|
||||
import { getInternalLabelWithColor } from '../../repository/label'
|
||||
import { libraryItemRepository } from '../../repository/library_item'
|
||||
import { userRepository } from '../../repository/user'
|
||||
import { clearCachedReadingPosition } from '../../services/cached_reading_position'
|
||||
import { createPageSaveRequest } from '../../services/create_page_save_request'
|
||||
import { findHighlightsByLibraryItemId } from '../../services/highlights'
|
||||
import {
|
||||
addLabelsToLibraryItem,
|
||||
createAndSaveLabelsInLibraryItem,
|
||||
findLabelsByIds,
|
||||
findOrCreateLabels,
|
||||
} from '../../services/labels'
|
||||
import {
|
||||
@ -93,6 +92,7 @@ import {
|
||||
import { traceAs } from '../../tracing'
|
||||
import { analytics } from '../../utils/analytics'
|
||||
import { isSiteBlockedForParse } from '../../utils/blocked'
|
||||
import { enqueueBulkAction } from '../../utils/createTask'
|
||||
import { authorized } from '../../utils/gql-utils'
|
||||
import {
|
||||
cleanUrl,
|
||||
@ -112,10 +112,6 @@ import {
|
||||
parsePreparedContent,
|
||||
} from '../../utils/parser'
|
||||
import { getStorageFileDetails } from '../../utils/uploads'
|
||||
import {
|
||||
clearCachedReadingPosition,
|
||||
fetchCachedReadingPosition,
|
||||
} from '../../services/cached_reading_position'
|
||||
|
||||
export enum ArticleFormat {
|
||||
Markdown = 'markdown',
|
||||
@ -879,36 +875,41 @@ export const bulkActionResolver = authorized<
|
||||
},
|
||||
})
|
||||
|
||||
// the query size is limited to 4000 characters to allow for 100 items
|
||||
if (!query || query.length > 4000) {
|
||||
log.error('bulkActionResolver error', {
|
||||
error: 'QueryTooLong',
|
||||
query,
|
||||
})
|
||||
const batchSize = 100
|
||||
const searchArgs = {
|
||||
query,
|
||||
size: 0,
|
||||
}
|
||||
const searchResult = await searchLibraryItems(searchArgs, uid)
|
||||
const count = searchResult.count
|
||||
if (count === 0) {
|
||||
log.info('No items found for bulk action')
|
||||
return { success: true }
|
||||
}
|
||||
|
||||
if (count <= batchSize) {
|
||||
searchArgs.size = count
|
||||
// if there are less than 100 items, update them synchronously
|
||||
await batchUpdateLibraryItems(action, searchArgs, uid, labelIds, args)
|
||||
|
||||
return { success: true }
|
||||
}
|
||||
|
||||
// if there are more than 100 items, update them asynchronously
|
||||
const data = {
|
||||
userId: uid,
|
||||
action,
|
||||
labelIds: labelIds || undefined,
|
||||
query,
|
||||
count,
|
||||
args,
|
||||
batchSize,
|
||||
}
|
||||
const job = await enqueueBulkAction(data)
|
||||
if (!job) {
|
||||
return { errorCodes: [BulkActionErrorCode.BadRequest] }
|
||||
}
|
||||
|
||||
// get labels if needed
|
||||
let labels = undefined
|
||||
if (action === BulkActionType.AddLabels) {
|
||||
if (!labelIds || labelIds.length === 0) {
|
||||
return { errorCodes: [BulkActionErrorCode.BadRequest] }
|
||||
}
|
||||
|
||||
labels = await findLabelsByIds(labelIds, uid)
|
||||
}
|
||||
|
||||
await batchUpdateLibraryItems(
|
||||
action,
|
||||
{
|
||||
query,
|
||||
useFolders: query.includes('use:folders'),
|
||||
},
|
||||
uid,
|
||||
labels,
|
||||
args
|
||||
)
|
||||
|
||||
return { success: true }
|
||||
} catch (error) {
|
||||
log.error('bulkActionResolver error', error)
|
||||
|
||||
@ -1,7 +1,13 @@
|
||||
import { ExpressionToken, LiqeQuery } from '@omnivore/liqe'
|
||||
import { DateTime } from 'luxon'
|
||||
import { DeepPartial, FindOptionsWhere, ObjectLiteral } from 'typeorm'
|
||||
import {
|
||||
DeepPartial,
|
||||
EntityManager,
|
||||
FindOptionsWhere,
|
||||
ObjectLiteral,
|
||||
} from 'typeorm'
|
||||
import { QueryDeepPartialEntity } from 'typeorm/query-builder/QueryPartialEntity'
|
||||
import { ReadingProgressDataSource } from '../datasources/reading_progress_data_source'
|
||||
import { Highlight } from '../entity/highlight'
|
||||
import { Label } from '../entity/label'
|
||||
import { LibraryItem, LibraryItemState } from '../entity/library_item'
|
||||
@ -13,7 +19,6 @@ import {
|
||||
getColumns,
|
||||
isUniqueViolation,
|
||||
queryBuilderToRawSql,
|
||||
valuesToRawSql,
|
||||
} from '../repository'
|
||||
import { libraryItemRepository } from '../repository/library_item'
|
||||
import { setRecentlySavedItemInRedis, wordsCount } from '../utils/helpers'
|
||||
@ -103,6 +108,20 @@ interface Select {
|
||||
alias?: string
|
||||
}
|
||||
|
||||
const readingProgressDataSource = new ReadingProgressDataSource()
|
||||
|
||||
const markItemAsRead = async (libraryItemId: string, userId: string) => {
|
||||
return await readingProgressDataSource.updateReadingProgress(
|
||||
userId,
|
||||
libraryItemId,
|
||||
{
|
||||
readingProgressPercent: 100,
|
||||
readingProgressTopPercent: 100,
|
||||
readingProgressAnchorIndex: undefined,
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
const handleNoCase = (value: string) => {
|
||||
const keywordRegexMap: Record<string, RegExp> = {
|
||||
highlight: /^highlight(s)?$/i,
|
||||
@ -927,7 +946,7 @@ export const batchUpdateLibraryItems = async (
|
||||
action: BulkActionType,
|
||||
searchArgs: SearchArgs,
|
||||
userId: string,
|
||||
labels?: Label[],
|
||||
labelIds?: string[] | null,
|
||||
args?: unknown
|
||||
) => {
|
||||
interface FolderArguments {
|
||||
@ -938,46 +957,24 @@ export const batchUpdateLibraryItems = async (
|
||||
return 'folder' in args
|
||||
}
|
||||
|
||||
const now = new Date().toISOString()
|
||||
// build the script
|
||||
let values: Record<string, string | number> = {}
|
||||
let addLabels = false
|
||||
switch (action) {
|
||||
case BulkActionType.Archive:
|
||||
values = {
|
||||
archived_at: now,
|
||||
state: LibraryItemState.Archived,
|
||||
}
|
||||
break
|
||||
case BulkActionType.Delete:
|
||||
values = {
|
||||
state: LibraryItemState.Deleted,
|
||||
deleted_at: now,
|
||||
}
|
||||
break
|
||||
case BulkActionType.AddLabels:
|
||||
addLabels = true
|
||||
break
|
||||
case BulkActionType.MarkAsRead:
|
||||
values = {
|
||||
read_at: now,
|
||||
reading_progress_top_percent: 100,
|
||||
reading_progress_bottom_percent: 100,
|
||||
}
|
||||
break
|
||||
case BulkActionType.MoveToFolder:
|
||||
if (!args || !isFolderArguments(args)) {
|
||||
throw new Error('Invalid arguments')
|
||||
}
|
||||
const getQueryBuilder = (userId: string, em: EntityManager) => {
|
||||
const queryBuilder = em
|
||||
.createQueryBuilder(LibraryItem, 'library_item')
|
||||
.where('library_item.user_id = :userId', { userId })
|
||||
if (query) {
|
||||
queryBuilder
|
||||
.andWhere(`(${query})`)
|
||||
.setParameters(paramtersToObject(parameters))
|
||||
}
|
||||
return queryBuilder
|
||||
}
|
||||
|
||||
values = {
|
||||
folder: args.folder,
|
||||
saved_at: now,
|
||||
}
|
||||
|
||||
break
|
||||
default:
|
||||
throw new Error('Invalid bulk action')
|
||||
const getLibraryItemIds = async (
|
||||
userId: string,
|
||||
em: EntityManager
|
||||
): Promise<{ id: string }[]> => {
|
||||
const queryBuilder = getQueryBuilder(userId, em)
|
||||
return queryBuilder.select('library_item.id', 'id').getRawMany()
|
||||
}
|
||||
|
||||
if (!searchArgs.query) {
|
||||
@ -988,63 +985,73 @@ export const batchUpdateLibraryItems = async (
|
||||
const parameters: ObjectLiteral[] = []
|
||||
const query = buildQuery(searchQuery, parameters)
|
||||
|
||||
await authTrx(async (tx) => {
|
||||
const queryBuilder = tx
|
||||
.createQueryBuilder(LibraryItem, 'library_item')
|
||||
.where('library_item.user_id = :userId', { userId })
|
||||
|
||||
if (query) {
|
||||
queryBuilder
|
||||
.andWhere(`(${query})`)
|
||||
.setParameters(paramtersToObject(parameters))
|
||||
}
|
||||
|
||||
if (addLabels) {
|
||||
if (!labels) {
|
||||
const now = new Date().toISOString()
|
||||
// build the script
|
||||
let values: Record<string, string | number> = {}
|
||||
switch (action) {
|
||||
case BulkActionType.Archive:
|
||||
values = {
|
||||
archivedAt: now,
|
||||
state: LibraryItemState.Archived,
|
||||
}
|
||||
break
|
||||
case BulkActionType.Delete:
|
||||
values = {
|
||||
state: LibraryItemState.Deleted,
|
||||
deletedAt: now,
|
||||
}
|
||||
break
|
||||
case BulkActionType.AddLabels: {
|
||||
if (!labelIds) {
|
||||
throw new Error('Labels are required for this action')
|
||||
}
|
||||
|
||||
const labelIds = labels.map((label) => label.id)
|
||||
const libraryItems = await queryBuilder.getMany()
|
||||
// add labels in library items
|
||||
const libraryItems = await authTrx(
|
||||
async (tx) => getLibraryItemIds(userId, tx),
|
||||
undefined,
|
||||
userId
|
||||
)
|
||||
// add labels to library items
|
||||
for (const libraryItem of libraryItems) {
|
||||
await addLabelsToLibraryItem(labelIds, libraryItem.id, userId)
|
||||
|
||||
libraryItem.labels = labels
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
case BulkActionType.MarkAsRead: {
|
||||
const libraryItems = await authTrx(
|
||||
async (tx) => getLibraryItemIds(userId, tx),
|
||||
undefined,
|
||||
userId
|
||||
)
|
||||
// update reading progress for library items
|
||||
for (const libraryItem of libraryItems) {
|
||||
await markItemAsRead(libraryItem.id, userId)
|
||||
}
|
||||
|
||||
// generate raw sql because postgres doesn't support prepared statements in DO blocks
|
||||
const countSql = queryBuilderToRawSql(queryBuilder.select('COUNT(1)'))
|
||||
const subQuery = queryBuilderToRawSql(queryBuilder.select('id'))
|
||||
const valuesSql = valuesToRawSql(values)
|
||||
return
|
||||
}
|
||||
case BulkActionType.MoveToFolder:
|
||||
if (!args || !isFolderArguments(args)) {
|
||||
throw new Error('Invalid arguments')
|
||||
}
|
||||
|
||||
const start = new Date().toISOString()
|
||||
const batchSize = 1000
|
||||
const sql = `
|
||||
-- Set batch size
|
||||
DO $$
|
||||
DECLARE
|
||||
batch_size INT := ${batchSize};
|
||||
BEGIN
|
||||
-- Loop through batches
|
||||
FOR i IN 0..CEIL((${countSql}) * 1.0 / batch_size) - 1 LOOP
|
||||
-- Update the batch
|
||||
UPDATE omnivore.library_item
|
||||
SET ${valuesSql}
|
||||
WHERE id = ANY(
|
||||
${subQuery}
|
||||
AND updated_at < '${start}'
|
||||
LIMIT batch_size
|
||||
);
|
||||
END LOOP;
|
||||
END $$
|
||||
`
|
||||
values = {
|
||||
folder: args.folder,
|
||||
savedAt: now,
|
||||
}
|
||||
|
||||
return tx.query(sql)
|
||||
})
|
||||
break
|
||||
default:
|
||||
throw new Error('Invalid bulk action')
|
||||
}
|
||||
|
||||
await authTrx(
|
||||
async (tx) =>
|
||||
getQueryBuilder(userId, tx).update(LibraryItem).set(values).execute(),
|
||||
undefined,
|
||||
userId
|
||||
)
|
||||
}
|
||||
|
||||
export const deleteLibraryItemById = async (id: string, userId?: string) => {
|
||||
|
||||
@ -14,6 +14,7 @@ import {
|
||||
ArticleSavingRequestStatus,
|
||||
CreateLabelInput,
|
||||
} from '../generated/graphql'
|
||||
import { BulkActionData, BULK_ACTION_JOB_NAME } from '../jobs/bulk_action'
|
||||
import { THUMBNAIL_JOB } from '../jobs/find_thumbnail'
|
||||
import { queueRSSRefreshFeedJob } from '../jobs/rss/refreshAllFeeds'
|
||||
import { TriggerRuleJobData, TRIGGER_RULE_JOB_NAME } from '../jobs/trigger_rule'
|
||||
@ -716,4 +717,25 @@ export const enqueueUpdateHighlight = async (data: UpdateHighlightData) => {
|
||||
}
|
||||
}
|
||||
|
||||
export const enqueueBulkAction = async (data: BulkActionData) => {
|
||||
const queue = await getBackendQueue()
|
||||
if (!queue) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const jobId = `${BULK_ACTION_JOB_NAME}-${data.userId}`
|
||||
|
||||
try {
|
||||
return queue.add(BULK_ACTION_JOB_NAME, data, {
|
||||
attempts: 1,
|
||||
priority: 10,
|
||||
jobId, // deduplication
|
||||
removeOnComplete: true,
|
||||
removeOnFail: true,
|
||||
})
|
||||
} catch (error) {
|
||||
logger.error('error enqueuing bulk action job', error)
|
||||
}
|
||||
}
|
||||
|
||||
export default createHttpTaskWithToken
|
||||
|
||||
@ -2159,7 +2159,7 @@ describe('Article API', () => {
|
||||
}
|
||||
`
|
||||
|
||||
context('when action is MarkAsRead and query is in:unread', () => {
|
||||
xcontext('when action is MarkAsRead and query is in:unread', () => {
|
||||
before(async () => {
|
||||
// Create some test items
|
||||
for (let i = 0; i < 5; i++) {
|
||||
|
||||
Reference in New Issue
Block a user