From 5eb19cda71c3bc6acabbd5c1190aaf9561601d5a Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 2 Jan 2024 13:37:50 +0800 Subject: [PATCH 1/3] prune users in batch --- packages/api/src/routers/svc/user.ts | 4 +-- packages/api/src/services/user.ts | 44 +++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 3 deletions(-) diff --git a/packages/api/src/routers/svc/user.ts b/packages/api/src/routers/svc/user.ts index ca6e9dbea..0c7fc5991 100644 --- a/packages/api/src/routers/svc/user.ts +++ b/packages/api/src/routers/svc/user.ts @@ -5,7 +5,7 @@ import express from 'express' import { LessThan } from 'typeorm' import { StatusType } from '../../entity/user' import { readPushSubscription } from '../../pubsub' -import { deleteUsers } from '../../services/user' +import { batchDeleteUsers } from '../../services/user' import { corsConfig } from '../../utils/corsConfig' import { logger } from '../../utils/logger' @@ -52,7 +52,7 @@ export function userServiceRouter() { const subTime = cleanupMessage.subDays * 1000 * 60 * 60 * 24 // convert days to milliseconds try { - const result = await deleteUsers({ + const result = await batchDeleteUsers({ status: StatusType.Deleted, updatedAt: LessThan(new Date(Date.now() - subTime)), // subDays ago }) diff --git a/packages/api/src/services/user.ts b/packages/api/src/services/user.ts index f3d01e489..848c12b0c 100644 --- a/packages/api/src/services/user.ts +++ b/packages/api/src/services/user.ts @@ -1,6 +1,6 @@ import { DeepPartial, FindOptionsWhere, In } from 'typeorm' import { StatusType, User } from '../entity/user' -import { authTrx } from '../repository' +import { authTrx, getRepository, queryBuilderToRawSql } from '../repository' import { userRepository } from '../repository/user' import { SetClaimsRole } from '../utils/dictionary' @@ -47,3 +47,45 @@ export const createUsers = async (users: DeepPartial[]) => { SetClaimsRole.ADMIN ) } + +export const batchDeleteUsers = async (criteria: FindOptionsWhere) => { + const userQb = getRepository(User).createQueryBuilder().where(criteria) + const userCountSql = queryBuilderToRawSql(userQb.select('COUNT(1)')) + const userSubQuery = queryBuilderToRawSql(userQb.select('id INTO user_ids')) + + const batchSize = 1000 + const start = new Date().toISOString() + const sql = ` + -- Set batch size + DO $$ + DECLARE + batch_size INT := ${batchSize}; + user_ids UUID[]; + BEGIN + -- Loop through batches of users + FOR i IN 0..CEIL((${userCountSql})) * 1.0 / batch_size) - 1 LOOP + -- GET batch of user ids + ${userSubQuery} LIMIT ${batchSize} OFFSET i * batch_size; + + -- Loop through batches of items + FOR j IN 0..CEIL((SELECT COUNT(1) FROM omnivore.library_item WHERE user_id = ANY(user_ids))) * 1.0 / batch_size) - 1 LOOP + -- Delete batch of items + DELETE FROM omnivore.library_item + WHERE user_id = ANY(user_ids) + AND updated_at < '${start}' + LIMIT ${batchSize}; + END LOOP; + + -- Delete the batch of users + DELETE FROM omnivore.user WHERE id = ANY(user_ids); + END LOOP; + END $$ + ` + + return authTrx( + async (t) => t.query(sql), + undefined, + undefined, + SetClaimsRole.ADMIN + ) +} From d31c520a904a5039bd28bd0c3d0c8bf78f78dd31 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 2 Jan 2024 14:08:41 +0800 Subject: [PATCH 2/3] add empty trash api to prune soft deleted items in trash --- packages/api/src/generated/graphql.ts | 42 +++++++++++++++++++++ packages/api/src/generated/schema.graphql | 15 ++++++++ packages/api/src/resolvers/article/index.ts | 24 ++++++++++++ packages/api/src/routers/svc/user.ts | 4 +- packages/api/src/schema.ts | 15 ++++++++ packages/api/src/services/library_item.ts | 27 +++++++++++++ packages/api/src/services/user.ts | 14 ++++--- 7 files changed, 133 insertions(+), 8 deletions(-) diff --git a/packages/api/src/generated/graphql.ts b/packages/api/src/generated/graphql.ts index 62ba7be07..c68e340ee 100644 --- a/packages/api/src/generated/graphql.ts +++ b/packages/api/src/generated/graphql.ts @@ -727,6 +727,22 @@ export type DeviceTokensSuccess = { deviceTokens: Array; }; +export type EmptyTrashError = { + __typename?: 'EmptyTrashError'; + errorCodes: Array; +}; + +export enum EmptyTrashErrorCode { + Unauthorized = 'UNAUTHORIZED' +} + +export type EmptyTrashResult = EmptyTrashError | EmptyTrashSuccess; + +export type EmptyTrashSuccess = { + __typename?: 'EmptyTrashSuccess'; + success?: Maybe; +}; + export type Feature = { __typename?: 'Feature'; createdAt: Scalars['Date']; @@ -1352,6 +1368,7 @@ export type Mutation = { deleteNewsletterEmail: DeleteNewsletterEmailResult; deleteRule: DeleteRuleResult; deleteWebhook: DeleteWebhookResult; + emptyTrash: EmptyTrashResult; fetchContent: FetchContentResult; generateApiKey: GenerateApiKeyResult; googleLogin: LoginResult; @@ -3663,6 +3680,10 @@ export type ResolversTypes = { DeviceTokensErrorCode: DeviceTokensErrorCode; DeviceTokensResult: ResolversTypes['DeviceTokensError'] | ResolversTypes['DeviceTokensSuccess']; DeviceTokensSuccess: ResolverTypeWrapper; + EmptyTrashError: ResolverTypeWrapper; + EmptyTrashErrorCode: EmptyTrashErrorCode; + EmptyTrashResult: ResolversTypes['EmptyTrashError'] | ResolversTypes['EmptyTrashSuccess']; + EmptyTrashSuccess: ResolverTypeWrapper; Feature: ResolverTypeWrapper; Feed: ResolverTypeWrapper; FeedArticle: ResolverTypeWrapper; @@ -4169,6 +4190,9 @@ export type ResolversParentTypes = { DeviceTokensError: DeviceTokensError; DeviceTokensResult: ResolversParentTypes['DeviceTokensError'] | ResolversParentTypes['DeviceTokensSuccess']; DeviceTokensSuccess: DeviceTokensSuccess; + EmptyTrashError: EmptyTrashError; + EmptyTrashResult: ResolversParentTypes['EmptyTrashError'] | ResolversParentTypes['EmptyTrashSuccess']; + EmptyTrashSuccess: EmptyTrashSuccess; Feature: Feature; Feed: Feed; FeedArticle: FeedArticle; @@ -4975,6 +4999,20 @@ export type DeviceTokensSuccessResolvers; }; +export type EmptyTrashErrorResolvers = { + errorCodes?: Resolver, ParentType, ContextType>; + __isTypeOf?: IsTypeOfResolverFn; +}; + +export type EmptyTrashResultResolvers = { + __resolveType: TypeResolveFn<'EmptyTrashError' | 'EmptyTrashSuccess', ParentType, ContextType>; +}; + +export type EmptyTrashSuccessResolvers = { + success?: Resolver, ParentType, ContextType>; + __isTypeOf?: IsTypeOfResolverFn; +}; + export type FeatureResolvers = { createdAt?: Resolver; expiresAt?: Resolver, ParentType, ContextType>; @@ -5460,6 +5498,7 @@ export type MutationResolvers>; deleteRule?: Resolver>; deleteWebhook?: Resolver>; + emptyTrash?: Resolver; fetchContent?: Resolver>; generateApiKey?: Resolver>; googleLogin?: Resolver>; @@ -6647,6 +6686,9 @@ export type Resolvers = { DeviceTokensError?: DeviceTokensErrorResolvers; DeviceTokensResult?: DeviceTokensResultResolvers; DeviceTokensSuccess?: DeviceTokensSuccessResolvers; + EmptyTrashError?: EmptyTrashErrorResolvers; + EmptyTrashResult?: EmptyTrashResultResolvers; + EmptyTrashSuccess?: EmptyTrashSuccessResolvers; Feature?: FeatureResolvers; Feed?: FeedResolvers; FeedArticle?: FeedArticleResolvers; diff --git a/packages/api/src/generated/schema.graphql b/packages/api/src/generated/schema.graphql index cbcdc1808..5c2431d8a 100644 --- a/packages/api/src/generated/schema.graphql +++ b/packages/api/src/generated/schema.graphql @@ -644,6 +644,20 @@ type DeviceTokensSuccess { deviceTokens: [DeviceToken!]! } +type EmptyTrashError { + errorCodes: [EmptyTrashErrorCode!]! +} + +enum EmptyTrashErrorCode { + UNAUTHORIZED +} + +union EmptyTrashResult = EmptyTrashError | EmptyTrashSuccess + +type EmptyTrashSuccess { + success: Boolean +} + type Feature { createdAt: Date! expiresAt: Date @@ -1213,6 +1227,7 @@ type Mutation { deleteNewsletterEmail(newsletterEmailId: ID!): DeleteNewsletterEmailResult! deleteRule(id: ID!): DeleteRuleResult! deleteWebhook(id: ID!): DeleteWebhookResult! + emptyTrash: EmptyTrashResult! fetchContent(id: ID!): FetchContentResult! generateApiKey(input: GenerateApiKeyInput!): GenerateApiKeyResult! googleLogin(input: GoogleLoginInput!): LoginResult! diff --git a/packages/api/src/resolvers/article/index.ts b/packages/api/src/resolvers/article/index.ts index 5d698fc81..a1b95ee92 100644 --- a/packages/api/src/resolvers/article/index.ts +++ b/packages/api/src/resolvers/article/index.ts @@ -21,6 +21,8 @@ import { CreateArticleError, CreateArticleErrorCode, CreateArticleSuccess, + EmptyTrashError, + EmptyTrashSuccess, FetchContentError, FetchContentErrorCode, FetchContentSuccess, @@ -71,6 +73,7 @@ import { findOrCreateLabels, } from '../../services/labels' import { + batchDelete, batchUpdateLibraryItems, createLibraryItem, findLibraryItemById, @@ -1035,6 +1038,27 @@ export const fetchContentResolver = authorized< } }) +export const emptyTrashResolver = authorized< + EmptyTrashSuccess, + EmptyTrashError +>(async (_, __, { uid }) => { + analytics.track({ + userId: uid, + event: 'empty_trash', + }) + + await batchDelete({ + state: LibraryItemState.Deleted, + user: { + id: uid, + }, + }) + + return { + success: true, + } +}) + const getUpdateReason = (libraryItem: LibraryItem, since: Date) => { if (libraryItem.deletedAt) { return UpdateReason.Deleted diff --git a/packages/api/src/routers/svc/user.ts b/packages/api/src/routers/svc/user.ts index 0c7fc5991..cf9519c69 100644 --- a/packages/api/src/routers/svc/user.ts +++ b/packages/api/src/routers/svc/user.ts @@ -5,7 +5,7 @@ import express from 'express' import { LessThan } from 'typeorm' import { StatusType } from '../../entity/user' import { readPushSubscription } from '../../pubsub' -import { batchDeleteUsers } from '../../services/user' +import { batchDelete } from '../../services/user' import { corsConfig } from '../../utils/corsConfig' import { logger } from '../../utils/logger' @@ -52,7 +52,7 @@ export function userServiceRouter() { const subTime = cleanupMessage.subDays * 1000 * 60 * 60 * 24 // convert days to milliseconds try { - const result = await batchDeleteUsers({ + const result = await batchDelete({ status: StatusType.Deleted, updatedAt: LessThan(new Date(Date.now() - subTime)), // subDays ago }) diff --git a/packages/api/src/schema.ts b/packages/api/src/schema.ts index 4df9a5b35..47720b676 100755 --- a/packages/api/src/schema.ts +++ b/packages/api/src/schema.ts @@ -2761,6 +2761,20 @@ const schema = gql` BAD_REQUEST } + union EmptyTrashResult = EmptyTrashSuccess | EmptyTrashError + + type EmptyTrashSuccess { + success: Boolean + } + + type EmptyTrashError { + errorCodes: [EmptyTrashErrorCode!]! + } + + enum EmptyTrashErrorCode { + UNAUTHORIZED + } + # Mutations type Mutation { googleLogin(input: GoogleLoginInput!): LoginResult! @@ -2872,6 +2886,7 @@ const schema = gql` updateNewsletterEmail( input: UpdateNewsletterEmailInput! ): UpdateNewsletterEmailResult! + emptyTrash: EmptyTrashResult! } # FIXME: remove sort from feedArticles after all cached tabs are closed diff --git a/packages/api/src/services/library_item.ts b/packages/api/src/services/library_item.ts index fdfb4e962..9aca2c098 100644 --- a/packages/api/src/services/library_item.ts +++ b/packages/api/src/services/library_item.ts @@ -1073,3 +1073,30 @@ export const deleteLibraryItemsByAdmin = async ( 'admin' ) } + +export const batchDelete = async (criteria: FindOptionsWhere) => { + const batchSize = 1000 + + const qb = libraryItemRepository.createQueryBuilder().where(criteria) + const countSql = queryBuilderToRawSql(qb.select('COUNT(1)')) + const subQuery = queryBuilderToRawSql(qb.select('id').limit(batchSize)) + + const sql = ` + -- Set batch size + DO $$ + DECLARE + batch_size INT := ${batchSize}; + BEGIN + -- Loop through batches + FOR i IN 0..CEIL((${countSql})) * 1.0 / batch_size) - 1 LOOP + -- Delete batch + DELETE FROM omnivore.library_item + WHERE id = ANY( + ${subQuery} + ); + END LOOP; + END $$ + ` + + return authTrx(async (t) => t.query(sql)) +} diff --git a/packages/api/src/services/user.ts b/packages/api/src/services/user.ts index 848c12b0c..4c8cabbd7 100644 --- a/packages/api/src/services/user.ts +++ b/packages/api/src/services/user.ts @@ -48,13 +48,12 @@ export const createUsers = async (users: DeepPartial[]) => { ) } -export const batchDeleteUsers = async (criteria: FindOptionsWhere) => { +export const batchDelete = async (criteria: FindOptionsWhere) => { const userQb = getRepository(User).createQueryBuilder().where(criteria) const userCountSql = queryBuilderToRawSql(userQb.select('COUNT(1)')) const userSubQuery = queryBuilderToRawSql(userQb.select('id INTO user_ids')) const batchSize = 1000 - const start = new Date().toISOString() const sql = ` -- Set batch size DO $$ @@ -65,15 +64,18 @@ export const batchDeleteUsers = async (criteria: FindOptionsWhere) => { -- Loop through batches of users FOR i IN 0..CEIL((${userCountSql})) * 1.0 / batch_size) - 1 LOOP -- GET batch of user ids - ${userSubQuery} LIMIT ${batchSize} OFFSET i * batch_size; + ${userSubQuery} LIMIT batch_size OFFSET i * batch_size; -- Loop through batches of items FOR j IN 0..CEIL((SELECT COUNT(1) FROM omnivore.library_item WHERE user_id = ANY(user_ids))) * 1.0 / batch_size) - 1 LOOP -- Delete batch of items DELETE FROM omnivore.library_item - WHERE user_id = ANY(user_ids) - AND updated_at < '${start}' - LIMIT ${batchSize}; + WHERE id = ANY( + SELECT id + FROM omnivore.library_item + WHERE user_id = ANY(user_ids) + LIMIT batch_size + ); END LOOP; -- Delete the batch of users From 554c4cee12fc83868d17111a34517642fdad0c0f Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 3 Jan 2024 12:40:46 +0800 Subject: [PATCH 3/3] add test cases --- .../api/src/resolvers/function_resolvers.ts | 3 ++ packages/api/src/services/library_item.ts | 2 +- packages/api/src/services/user.ts | 10 ++-- packages/api/test/resolvers/article.test.ts | 54 +++++++++++++++++++ 4 files changed, 64 insertions(+), 5 deletions(-) diff --git a/packages/api/src/resolvers/function_resolvers.ts b/packages/api/src/resolvers/function_resolvers.ts index 7c4616ea1..6cff177ca 100644 --- a/packages/api/src/resolvers/function_resolvers.ts +++ b/packages/api/src/resolvers/function_resolvers.ts @@ -38,6 +38,7 @@ import { generateDownloadSignedUrl, generateUploadFilePathName, } from '../utils/uploads' +import { emptyTrashResolver } from './article' import { optInFeatureResolver } from './features' import { uploadImportFileResolver } from './importers/uploadImportFileResolver' import { @@ -231,6 +232,7 @@ export const functionResolvers = { updateEmail: updateEmailResolver, moveToFolder: moveToFolderResolver, updateNewsletterEmail: updateNewsletterEmailResolver, + emptyTrash: emptyTrashResolver, }, Query: { me: getMeUserResolver, @@ -578,4 +580,5 @@ export const functionResolvers = { ...resultResolveTypeResolver('ScanFeeds'), ...resultResolveTypeResolver('MoveToFolder'), ...resultResolveTypeResolver('UpdateNewsletterEmail'), + ...resultResolveTypeResolver('EmptyTrash'), } diff --git a/packages/api/src/services/library_item.ts b/packages/api/src/services/library_item.ts index 9aca2c098..5fabc2828 100644 --- a/packages/api/src/services/library_item.ts +++ b/packages/api/src/services/library_item.ts @@ -1088,7 +1088,7 @@ export const batchDelete = async (criteria: FindOptionsWhere) => { batch_size INT := ${batchSize}; BEGIN -- Loop through batches - FOR i IN 0..CEIL((${countSql})) * 1.0 / batch_size) - 1 LOOP + FOR i IN 0..CEIL((${countSql}) * 1.0 / batch_size) - 1 LOOP -- Delete batch DELETE FROM omnivore.library_item WHERE id = ANY( diff --git a/packages/api/src/services/user.ts b/packages/api/src/services/user.ts index 4c8cabbd7..d7f9fa0d3 100644 --- a/packages/api/src/services/user.ts +++ b/packages/api/src/services/user.ts @@ -51,7 +51,9 @@ export const createUsers = async (users: DeepPartial[]) => { export const batchDelete = async (criteria: FindOptionsWhere) => { const userQb = getRepository(User).createQueryBuilder().where(criteria) const userCountSql = queryBuilderToRawSql(userQb.select('COUNT(1)')) - const userSubQuery = queryBuilderToRawSql(userQb.select('id INTO user_ids')) + const userSubQuery = queryBuilderToRawSql( + userQb.select('array_agg(id::UUID) into user_ids') + ) const batchSize = 1000 const sql = ` @@ -62,12 +64,12 @@ export const batchDelete = async (criteria: FindOptionsWhere) => { user_ids UUID[]; BEGIN -- Loop through batches of users - FOR i IN 0..CEIL((${userCountSql})) * 1.0 / batch_size) - 1 LOOP + FOR i IN 0..CEIL((${userCountSql}) * 1.0 / batch_size) - 1 LOOP -- GET batch of user ids - ${userSubQuery} LIMIT batch_size OFFSET i * batch_size; + ${userSubQuery} LIMIT batch_size; -- Loop through batches of items - FOR j IN 0..CEIL((SELECT COUNT(1) FROM omnivore.library_item WHERE user_id = ANY(user_ids))) * 1.0 / batch_size) - 1 LOOP + FOR j IN 0..CEIL((SELECT COUNT(1) FROM omnivore.library_item WHERE user_id = ANY(user_ids)) * 1.0 / batch_size) - 1 LOOP -- Delete batch of items DELETE FROM omnivore.library_item WHERE id = ANY( diff --git a/packages/api/test/resolvers/article.test.ts b/packages/api/test/resolvers/article.test.ts index b6e781597..090e31208 100644 --- a/packages/api/test/resolvers/article.test.ts +++ b/packages/api/test/resolvers/article.test.ts @@ -2285,4 +2285,58 @@ describe('Article API', () => { expect(item?.labels?.map((l) => l.name)).to.eql(['Favorites']) }) }) + + describe('EmptyTrash API', () => { + const emptyTrashQuery = () => ` + mutation { + emptyTrash { + ... on EmptyTrashSuccess { + success + } + ... on EmptyTrashError { + errorCodes + } + } + }` + + let items: LibraryItem[] = [] + + before(async () => { + // Create some test items + for (let i = 0; i < 5; i++) { + const itemToSave: DeepPartial = { + user, + title: 'test item', + readableContent: '

test

', + slug: '', + originalUrl: `https://blog.omnivore.app/p/empty-trash-${i}`, + deletedAt: new Date(), + state: LibraryItemState.Deleted, + } + const item = await createLibraryItem(itemToSave, user.id) + items.push(item) + } + }) + + after(async () => { + // Delete all items + await deleteLibraryItemsByUserId(user.id) + }) + + it('empties the trash', async () => { + let response = await graphqlRequest( + searchQuery('in:trash'), + authToken + ).expect(200) + expect(response.body.data.search.pageInfo.totalCount).to.eql(5) + + await graphqlRequest(emptyTrashQuery(), authToken).expect(200) + + response = await graphqlRequest( + searchQuery('in:trash'), + authToken + ).expect(200) + expect(response.body.data.search.pageInfo.totalCount).to.eql(0) + }) + }) })