From afd4d2a397e6ca2c0b30b7996c6dd78ff738399b Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 19 Feb 2024 11:35:43 +0800 Subject: [PATCH] fix: duplicate key value violates unique constraint "library_item_pkey" by getting the item by user_id and md5(url) for update before upserting the item --- packages/api/src/jobs/save_page.ts | 3 +- packages/api/src/repository/library_item.ts | 77 ++++++++++++------- .../api/src/routers/svc/email_attachment.ts | 14 ++-- packages/api/src/services/library_item.ts | 35 +++++++-- packages/api/src/services/recommendation.ts | 8 +- packages/api/src/services/save_page.ts | 3 +- packages/api/test/resolvers/article.test.ts | 15 ++-- 7 files changed, 100 insertions(+), 55 deletions(-) diff --git a/packages/api/src/jobs/save_page.ts b/packages/api/src/jobs/save_page.ts index e1ce04874..7e67024e9 100644 --- a/packages/api/src/jobs/save_page.ts +++ b/packages/api/src/jobs/save_page.ts @@ -246,8 +246,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => { // for non-pdf content, we need to save the page const result = await savePage( { - url, - finalUrl, + url: finalUrl, clientRequestId: articleSavingRequestId, title, originalContent: content, diff --git a/packages/api/src/repository/library_item.ts b/packages/api/src/repository/library_item.ts index 39d69d57e..678d9cd12 100644 --- a/packages/api/src/repository/library_item.ts +++ b/packages/api/src/repository/library_item.ts @@ -2,7 +2,7 @@ import { DeepPartial } from 'typeorm' import { getColumns, getColumnsDbName } from '.' import { appDataSource } from '../data_source' import { LibraryItem } from '../entity/library_item' -import { keysToCamelCase, wordsCount } from '../utils/helpers' +import { wordsCount } from '../utils/helpers' const convertToLibraryItem = (item: DeepPartial) => { return { @@ -18,52 +18,75 @@ export const libraryItemRepository = appDataSource return this.findOneBy({ id }) }, - findByUserIdAndUrl(userId: string, url: string) { + findByUserIdAndUrl(userId: string, url: string, forUpdate = false) { // md5 is used to hash the url to avoid the length limit of the index - return this.createQueryBuilder() + const qb = this.createQueryBuilder() .where('user_id = :userId', { userId }) .andWhere('md5(original_url) = md5(:url)', { url }) - .getOne() + + if (forUpdate) { + qb.setLock('pessimistic_write') + } + + return qb.getOne() }, countByCreatedAt(createdAt: Date) { return this.countBy({ createdAt }) }, - async upsertLibraryItem(item: DeepPartial, finalUrl?: string) { + async upsertLibraryItemById(item: DeepPartial) { const columns = getColumnsDbName(this) - // overwrites columns except id and slug - const overwrites = columns.filter( - (column) => !['id', 'slug'].includes(column) - ) + // overwrites columns except slug + const overwrites = columns.filter((column) => column !== 'slug') - const hashedUrl = 'md5(original_url)' - let conflictColumns = ['user_id', hashedUrl] - - if (item.id && finalUrl && finalUrl !== item.originalUrl) { - // update the original url if it's different from the current one in the database - conflictColumns = ['id'] - item.originalUrl = finalUrl - } - - const [query, params] = this.createQueryBuilder() + const result = await this.createQueryBuilder() .insert() .into(LibraryItem) .values(convertToLibraryItem(item)) - .orUpdate(overwrites, conflictColumns, { + .orUpdate(overwrites, ['id'], { skipUpdateIfNoValuesChanged: true, }) .returning(getColumns(this)) - .getQueryAndParameters() + .execute() - // this is a workaround for the typeorm bug which quotes the md5 function - const newQuery = query.replace(`"${hashedUrl}"`, hashedUrl) - const results = (await this.query(newQuery, params)) as never[] + if (result.generatedMaps.length === 0) { + throw new Error('Failed to upsert library item') + } - // convert to camel case - const newItem = keysToCamelCase(results[0]) as LibraryItem + return result.generatedMaps[0] as LibraryItem + }, - return newItem + async updateLibraryItem(item: DeepPartial) { + return this.save(item) + + // const columns = getColumnsDbName(this) + // // overwrites columns except id and slug + // const overwrites = columns.filter( + // (column) => !['id', 'slug'].includes(column) + // ) + + // const hashedUrl = 'md5(original_url)' + // const conflictColumns = ['user_id', hashedUrl] + + // const [query, params] = this.createQueryBuilder() + // .insert() + // .into(LibraryItem) + // .values(convertToLibraryItem(item)) + // .orUpdate(overwrites, conflictColumns, { + // skipUpdateIfNoValuesChanged: true, + // }) + // .returning(getColumns(this)) + // .getQueryAndParameters() + + // // this is a workaround for the typeorm bug which quotes the md5 function + // const newQuery = query.replace(`"${hashedUrl}"`, hashedUrl) + // const results = (await this.query(newQuery, params)) as never[] + + // // convert to camel case + // const newItem = keysToCamelCase(results[0]) as LibraryItem + + // return newItem }, createByPopularRead(name: string, userId: string) { diff --git a/packages/api/src/routers/svc/email_attachment.ts b/packages/api/src/routers/svc/email_attachment.ts index 0de5481a0..9f6e2ee0a 100644 --- a/packages/api/src/routers/svc/email_attachment.ts +++ b/packages/api/src/routers/svc/email_attachment.ts @@ -1,15 +1,13 @@ import express from 'express' -import { DeepPartial } from 'typeorm' -import { - ContentReaderType, - LibraryItem, - LibraryItemState, -} from '../../entity/library_item' +import { ContentReaderType, LibraryItemState } from '../../entity/library_item' import { UploadFile } from '../../entity/upload_file' import { env } from '../../env' import { PageType, UploadFileStatus } from '../../generated/graphql' import { authTrx } from '../../repository' -import { createOrUpdateLibraryItem } from '../../services/library_item' +import { + createOrUpdateLibraryItem, + CreateOrUpdateLibraryItemArgs, +} from '../../services/library_item' import { findNewsletterEmailByAddress } from '../../services/newsletters' import { updateReceivedEmail } from '../../services/received_emails' import { @@ -154,7 +152,7 @@ export function emailAttachmentRouter() { ? PageType.File : PageType.Book const title = subject || uploadFileData.fileName - const itemToCreate: DeepPartial = { + const itemToCreate: CreateOrUpdateLibraryItemArgs = { originalUrl: uploadFileUrlOverride, itemType, textContentHash: uploadFileHash, diff --git a/packages/api/src/services/library_item.ts b/packages/api/src/services/library_item.ts index f838834f0..a09b4c773 100644 --- a/packages/api/src/services/library_item.ts +++ b/packages/api/src/services/library_item.ts @@ -16,6 +16,7 @@ import { createPubSubClient, EntityType } from '../pubsub' import { redisDataSource } from '../redis_data_source' import { authTrx, getColumns, queryBuilderToRawSql } from '../repository' import { libraryItemRepository } from '../repository/library_item' +import { Merge } from '../util' import { setRecentlySavedItemInRedis } from '../utils/helpers' import { parseSearchQuery } from '../utils/search' import { addLabelsToLibraryItem } from './labels' @@ -825,18 +826,38 @@ export const createLibraryItems = async ( ) } +export type CreateOrUpdateLibraryItemArgs = Merge< + DeepPartial, + { originalUrl: string } +> export const createOrUpdateLibraryItem = async ( - libraryItem: DeepPartial, + libraryItem: CreateOrUpdateLibraryItemArgs, userId: string, pubsub = createPubSubClient(), - skipPubSub = false, - finalUrl?: string + skipPubSub = false ): Promise => { const newLibraryItem = await authTrx( - async (tx) => - tx - .withRepository(libraryItemRepository) - .upsertLibraryItem(libraryItem, finalUrl), + async (tx) => { + const repo = tx.withRepository(libraryItemRepository) + // find existing library item by user_id and url + const existingLibraryItem = await repo.findByUserIdAndUrl( + userId, + libraryItem.originalUrl, + true + ) + + if (existingLibraryItem) { + // update existing library item + return repo.save({ + ...libraryItem, + id: existingLibraryItem.id, + slug: existingLibraryItem.slug, // keep the original slug + }) + } + + // create or update library item + return repo.upsertLibraryItemById(libraryItem) + }, undefined, userId ) diff --git a/packages/api/src/services/recommendation.ts b/packages/api/src/services/recommendation.ts index cd090cdb6..483b10366 100644 --- a/packages/api/src/services/recommendation.ts +++ b/packages/api/src/services/recommendation.ts @@ -5,7 +5,11 @@ import { Recommendation } from '../entity/recommendation' import { authTrx } from '../repository' import { logger } from '../utils/logger' import { createHighlights } from './highlights' -import { createOrUpdateLibraryItem, findLibraryItemByUrl } from './library_item' +import { + createOrUpdateLibraryItem, + CreateOrUpdateLibraryItemArgs, + findLibraryItemByUrl, +} from './library_item' export const addRecommendation = async ( item: LibraryItem, @@ -18,7 +22,7 @@ export const addRecommendation = async ( let recommendedItem = await findLibraryItemByUrl(item.originalUrl, userId) if (!recommendedItem) { // create a new item - const newItem: DeepPartial = { + const newItem: CreateOrUpdateLibraryItemArgs = { user: { id: userId }, slug: item.slug, title: item.title, diff --git a/packages/api/src/services/save_page.ts b/packages/api/src/services/save_page.ts index fafc20dc7..e4f065e94 100644 --- a/packages/api/src/services/save_page.ts +++ b/packages/api/src/services/save_page.ts @@ -128,8 +128,7 @@ export const savePage = async ( itemToSave, user.id, undefined, - isImported, - input.finalUrl + isImported ) clientRequestId = newItem.id diff --git a/packages/api/test/resolvers/article.test.ts b/packages/api/test/resolvers/article.test.ts index a3fd7e034..c93a388b3 100644 --- a/packages/api/test/resolvers/article.test.ts +++ b/packages/api/test/resolvers/article.test.ts @@ -32,6 +32,7 @@ import { findLibraryItemById, findLibraryItemByUrl, updateLibraryItem, + CreateOrUpdateLibraryItemArgs, } from '../../src/services/library_item' import { deleteUser } from '../../src/services/user' import * as createTask from '../../src/utils/createTask' @@ -443,7 +444,7 @@ describe('Article API', () => { let itemId: string before(async () => { - const itemToCreate: DeepPartial = { + const itemToCreate: CreateOrUpdateLibraryItemArgs = { title: 'test title', originalContent: '

test

', slug: realSlug, @@ -696,7 +697,7 @@ describe('Article API', () => { let itemId: string before(async () => { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'test title', readableContent: '

test

', @@ -924,7 +925,7 @@ describe('Article API', () => { const readingProgressArray = [0, 2, 97, 98, 100] // Create some test items for (let i = 0; i < 5; i++) { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'test title', readableContent: `

test ${searchedKeyword}

`, @@ -1971,7 +1972,7 @@ describe('Article API', () => { before(async () => { // Create some test items for (let i = 0; i < 5; i++) { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'typeahead search item', readableContent: '

test

', @@ -2045,7 +2046,7 @@ describe('Article API', () => { before(async () => { // Create some test items for (let i = 0; i < 5; i++) { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { title: 'test item', slug: '', readableContent: '

test

', @@ -2316,7 +2317,7 @@ describe('Article API', () => { let articleId = '' before(async () => { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'test setFavoriteArticle', slug: '', @@ -2361,7 +2362,7 @@ describe('Article API', () => { before(async () => { // Create some test items for (let i = 0; i < 5; i++) { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'test item', readableContent: '

test

',