Merge pull request #3518 from omnivore-app/fix/library-item

fix: duplicate key value violates unique constraint "library_item_pkey"
This commit is contained in:
Hongbo Wu
2024-02-08 13:18:22 +08:00
committed by GitHub
9 changed files with 89 additions and 59 deletions

View File

@ -9,7 +9,6 @@ import {
OneToMany,
OneToOne,
PrimaryGeneratedColumn,
Unique,
UpdateDateColumn,
} from 'typeorm'
import { Highlight } from './highlight'

View File

@ -24,6 +24,7 @@ const REQUEST_TIMEOUT = 30000 // 30 seconds
interface Data {
userId: string
url: string
finalUrl: string
articleSavingRequestId: string
state?: string
labels?: CreateLabelInput[]
@ -175,16 +176,21 @@ export const savePageJob = async (data: Data, attemptsMade: number) => {
publishedAt,
taskId,
url,
finalUrl,
} = data
let isImported,
isSaved,
state = data.state
try {
logger.info(`savePageJob: ${userId} ${url}`)
logger.info('savePageJob', {
userId,
url,
finalUrl,
})
// get the fetch result from cache
const fetchedResult = await getCachedFetchResult(url)
const fetchedResult = await getCachedFetchResult(finalUrl)
const { title, contentType } = fetchedResult
let content = fetchedResult.content
@ -200,11 +206,15 @@ export const savePageJob = async (data: Data, attemptsMade: number) => {
// for pdf content, we need to upload the pdf
if (contentType === 'application/pdf') {
const uploadResult = await uploadPdf(url, userId, articleSavingRequestId)
const uploadResult = await uploadPdf(
finalUrl,
userId,
articleSavingRequestId
)
const result = await saveFile(
{
url,
url: finalUrl,
uploadFileId: uploadResult.uploadFileId,
state: state ? (state as ArticleSavingRequestStatus) : undefined,
labels,
@ -227,7 +237,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => {
}
if (!content) {
logger.info('content is not fetched', url)
logger.info(`content is not fetched: ${finalUrl}`)
// set the state to failed if we don't have content
content = 'Failed to fetch content'
state = ArticleSavingRequestStatus.Failed
@ -237,6 +247,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => {
const result = await savePage(
{
url,
finalUrl,
clientRequestId: articleSavingRequestId,
title,
originalContent: content,

View File

@ -1,9 +1,8 @@
import { DeepPartial } from 'typeorm'
import { getColumnsDbName } from '.'
import { getColumns, getColumnsDbName } from '.'
import { appDataSource } from '../data_source'
import { LibraryItem } from '../entity/library_item'
import { keysToCamelCase, wordsCount } from '../utils/helpers'
import { logger } from '../utils/logger'
const convertToLibraryItem = (item: DeepPartial<LibraryItem>) => {
return {
@ -31,21 +30,30 @@ export const libraryItemRepository = appDataSource
return this.countBy({ createdAt })
},
async upsertLibraryItem(item: DeepPartial<LibraryItem>) {
async upsertLibraryItem(item: DeepPartial<LibraryItem>, finalUrl?: string) {
const columns = getColumnsDbName(this)
// overwrites columns except id and slug
const overwrites = getColumnsDbName(this).filter(
const overwrites = columns.filter(
(column) => !['id', 'slug'].includes(column)
)
const hashedUrl = 'md5(original_url)'
let conflictColumns = ['user_id', hashedUrl]
if (item.id && finalUrl && finalUrl !== item.originalUrl) {
// update the original url if it's different from the current one in the database
conflictColumns = ['id']
item.originalUrl = finalUrl
}
const [query, params] = this.createQueryBuilder()
.insert()
.into(LibraryItem)
.values(convertToLibraryItem(item))
.orUpdate(overwrites, ['user_id', hashedUrl], {
.orUpdate(overwrites, conflictColumns, {
skipUpdateIfNoValuesChanged: true,
})
.returning('*')
.returning(getColumns(this))
.getQueryAndParameters()
// this is a workaround for the typeorm bug which quotes the md5 function

View File

@ -104,7 +104,7 @@ export const createPageSaveRequest = async ({
// create processing item
const libraryItem = await createOrUpdateLibraryItem(
{
id: articleSavingRequestId,
id: articleSavingRequestId || undefined,
user: { id: userId },
readableContent: SAVING_CONTENT,
itemType: PageType.Unknown,

View File

@ -829,11 +829,14 @@ export const createOrUpdateLibraryItem = async (
libraryItem: DeepPartial<LibraryItem>,
userId: string,
pubsub = createPubSubClient(),
skipPubSub = false
skipPubSub = false,
finalUrl?: string
): Promise<LibraryItem> => {
const newLibraryItem = await authTrx(
async (tx) =>
tx.withRepository(libraryItemRepository).upsertLibraryItem(libraryItem),
tx
.withRepository(libraryItemRepository)
.upsertLibraryItem(libraryItem, finalUrl),
undefined,
userId
)
@ -1031,7 +1034,8 @@ export const deleteLibraryItems = async (
userId?: string
) => {
return authTrx(
async (tx) => tx.withRepository(libraryItemRepository).remove(items),
async (tx) =>
tx.withRepository(libraryItemRepository).delete(items.map((i) => i.id)),
undefined,
userId
)

View File

@ -62,9 +62,39 @@ const shouldParseInBackend = (input: SavePageInput): boolean => {
}
export const savePage = async (
input: SavePageInput,
input: SavePageInput & {
finalUrl?: string
},
user: User
): Promise<SaveResult> => {
const [slug, croppedPathname] = createSlug(input.url, input.title)
let clientRequestId = input.clientRequestId
// always parse in backend if the url is in the force puppeteer list
if (shouldParseInBackend(input)) {
try {
await createPageSaveRequest({
user,
url: input.url,
articleSavingRequestId: clientRequestId || undefined,
state: input.state || undefined,
labels: input.labels || undefined,
folder: input.folder || undefined,
})
} catch (e) {
return {
__typename: 'SaveError',
errorCodes: [SaveErrorCode.Unknown],
message: 'Failed to create page save request',
}
}
return {
clientRequestId,
url: `${homePageURL()}/${user.profile.username}/${slug}`,
}
}
const parseResult = await parsePreparedContent(input.url, {
document: input.originalContent,
pageInfo: {
@ -72,9 +102,6 @@ export const savePage = async (
canonicalUrl: input.url,
},
})
const [newSlug, croppedPathname] = createSlug(input.url, input.title)
let slug = newSlug
let clientRequestId = input.clientRequestId
const itemToSave = parsedContentToLibraryItem({
itemId: clientRequestId,
@ -96,42 +123,22 @@ export const savePage = async (
const isImported =
input.source === 'csv-importer' || input.source === 'pocket'
// always parse in backend if the url is in the force puppeteer list
if (shouldParseInBackend(input)) {
try {
await createPageSaveRequest({
user,
url: itemToSave.originalUrl,
articleSavingRequestId: clientRequestId || undefined,
state: input.state || undefined,
labels: input.labels || undefined,
folder: input.folder || undefined,
})
} catch (e) {
return {
__typename: 'SaveError',
errorCodes: [SaveErrorCode.Unknown],
message: 'Failed to create page save request',
}
}
} else {
// do not publish a pubsub event if the item is imported
const newItem = await createOrUpdateLibraryItem(
itemToSave,
user.id,
undefined,
isImported
)
clientRequestId = newItem.id
slug = newItem.slug
// do not publish a pubsub event if the item is imported
const newItem = await createOrUpdateLibraryItem(
itemToSave,
user.id,
undefined,
isImported,
input.finalUrl
)
clientRequestId = newItem.id
await createAndSaveLabelsInLibraryItem(
clientRequestId,
user.id,
input.labels,
input.rssFeedUrl
)
}
await createAndSaveLabelsInLibraryItem(
clientRequestId,
user.id,
input.labels,
input.rssFeedUrl
)
// we don't want to create thumbnail for imported pages and pages that already have thumbnail
if (!isImported && !parseResult.parsedContent?.previewImage) {

View File

@ -634,7 +634,7 @@ describe('Article API', () => {
const savedItem = await findLibraryItemByUrl(url, user.id)
expect(savedItem?.archivedAt).to.not.be.null
expect(savedItem?.labels?.map((l) => l.name)).to.eql(labels)
expect(savedItem?.labels?.map((l) => l.name)).to.include.members(labels)
})
})

View File

@ -116,7 +116,8 @@ export const contentFetchRequestHandler: RequestHandler = async (req, res) => {
userId: user.id,
data: {
userId: user.id,
url: finalUrl,
url,
finalUrl,
articleSavingRequestId,
state,
labels,

View File

@ -5,7 +5,7 @@ import { isOldItem, RssFeedItem } from '../src'
describe('isOldItem', () => {
it('returns true if item is older than 1 day', () => {
const item = {
pubDate: '2020-01-01',
isoDate: '2020-01-01',
} as RssFeedItem
const mostRecentItemTimestamp = Date.now()
@ -15,7 +15,7 @@ describe('isOldItem', () => {
it('returns true if item was published at the last fetched time', () => {
const mostRecentItemTimestamp = Date.now()
const item = {
pubDate: new Date(mostRecentItemTimestamp).toISOString(),
isoDate: new Date(mostRecentItemTimestamp).toISOString(),
} as RssFeedItem
expect(isOldItem(item, mostRecentItemTimestamp)).to.be.true