diff --git a/packages/api/src/entity/library_item.ts b/packages/api/src/entity/library_item.ts index b7054097d..f5c15d42e 100644 --- a/packages/api/src/entity/library_item.ts +++ b/packages/api/src/entity/library_item.ts @@ -9,7 +9,6 @@ import { OneToMany, OneToOne, PrimaryGeneratedColumn, - Unique, UpdateDateColumn, } from 'typeorm' import { Highlight } from './highlight' diff --git a/packages/api/src/jobs/save_page.ts b/packages/api/src/jobs/save_page.ts index 1879dbe0b..e1ce04874 100644 --- a/packages/api/src/jobs/save_page.ts +++ b/packages/api/src/jobs/save_page.ts @@ -24,6 +24,7 @@ const REQUEST_TIMEOUT = 30000 // 30 seconds interface Data { userId: string url: string + finalUrl: string articleSavingRequestId: string state?: string labels?: CreateLabelInput[] @@ -175,16 +176,21 @@ export const savePageJob = async (data: Data, attemptsMade: number) => { publishedAt, taskId, url, + finalUrl, } = data let isImported, isSaved, state = data.state try { - logger.info(`savePageJob: ${userId} ${url}`) + logger.info('savePageJob', { + userId, + url, + finalUrl, + }) // get the fetch result from cache - const fetchedResult = await getCachedFetchResult(url) + const fetchedResult = await getCachedFetchResult(finalUrl) const { title, contentType } = fetchedResult let content = fetchedResult.content @@ -200,11 +206,15 @@ export const savePageJob = async (data: Data, attemptsMade: number) => { // for pdf content, we need to upload the pdf if (contentType === 'application/pdf') { - const uploadResult = await uploadPdf(url, userId, articleSavingRequestId) + const uploadResult = await uploadPdf( + finalUrl, + userId, + articleSavingRequestId + ) const result = await saveFile( { - url, + url: finalUrl, uploadFileId: uploadResult.uploadFileId, state: state ? (state as ArticleSavingRequestStatus) : undefined, labels, @@ -227,7 +237,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => { } if (!content) { - logger.info('content is not fetched', url) + logger.info(`content is not fetched: ${finalUrl}`) // set the state to failed if we don't have content content = 'Failed to fetch content' state = ArticleSavingRequestStatus.Failed @@ -237,6 +247,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => { const result = await savePage( { url, + finalUrl, clientRequestId: articleSavingRequestId, title, originalContent: content, diff --git a/packages/api/src/repository/library_item.ts b/packages/api/src/repository/library_item.ts index 22e1e64f8..39d69d57e 100644 --- a/packages/api/src/repository/library_item.ts +++ b/packages/api/src/repository/library_item.ts @@ -1,9 +1,8 @@ import { DeepPartial } from 'typeorm' -import { getColumnsDbName } from '.' +import { getColumns, getColumnsDbName } from '.' import { appDataSource } from '../data_source' import { LibraryItem } from '../entity/library_item' import { keysToCamelCase, wordsCount } from '../utils/helpers' -import { logger } from '../utils/logger' const convertToLibraryItem = (item: DeepPartial) => { return { @@ -31,21 +30,30 @@ export const libraryItemRepository = appDataSource return this.countBy({ createdAt }) }, - async upsertLibraryItem(item: DeepPartial) { + async upsertLibraryItem(item: DeepPartial, finalUrl?: string) { + const columns = getColumnsDbName(this) // overwrites columns except id and slug - const overwrites = getColumnsDbName(this).filter( + const overwrites = columns.filter( (column) => !['id', 'slug'].includes(column) ) + const hashedUrl = 'md5(original_url)' + let conflictColumns = ['user_id', hashedUrl] + + if (item.id && finalUrl && finalUrl !== item.originalUrl) { + // update the original url if it's different from the current one in the database + conflictColumns = ['id'] + item.originalUrl = finalUrl + } const [query, params] = this.createQueryBuilder() .insert() .into(LibraryItem) .values(convertToLibraryItem(item)) - .orUpdate(overwrites, ['user_id', hashedUrl], { + .orUpdate(overwrites, conflictColumns, { skipUpdateIfNoValuesChanged: true, }) - .returning('*') + .returning(getColumns(this)) .getQueryAndParameters() // this is a workaround for the typeorm bug which quotes the md5 function diff --git a/packages/api/src/services/create_page_save_request.ts b/packages/api/src/services/create_page_save_request.ts index 403ccf3f0..084f7da3f 100644 --- a/packages/api/src/services/create_page_save_request.ts +++ b/packages/api/src/services/create_page_save_request.ts @@ -104,7 +104,7 @@ export const createPageSaveRequest = async ({ // create processing item const libraryItem = await createOrUpdateLibraryItem( { - id: articleSavingRequestId, + id: articleSavingRequestId || undefined, user: { id: userId }, readableContent: SAVING_CONTENT, itemType: PageType.Unknown, diff --git a/packages/api/src/services/library_item.ts b/packages/api/src/services/library_item.ts index c567995ba..f838834f0 100644 --- a/packages/api/src/services/library_item.ts +++ b/packages/api/src/services/library_item.ts @@ -829,11 +829,14 @@ export const createOrUpdateLibraryItem = async ( libraryItem: DeepPartial, userId: string, pubsub = createPubSubClient(), - skipPubSub = false + skipPubSub = false, + finalUrl?: string ): Promise => { const newLibraryItem = await authTrx( async (tx) => - tx.withRepository(libraryItemRepository).upsertLibraryItem(libraryItem), + tx + .withRepository(libraryItemRepository) + .upsertLibraryItem(libraryItem, finalUrl), undefined, userId ) @@ -1031,7 +1034,8 @@ export const deleteLibraryItems = async ( userId?: string ) => { return authTrx( - async (tx) => tx.withRepository(libraryItemRepository).remove(items), + async (tx) => + tx.withRepository(libraryItemRepository).delete(items.map((i) => i.id)), undefined, userId ) diff --git a/packages/api/src/services/save_page.ts b/packages/api/src/services/save_page.ts index d8e28a43c..fafc20dc7 100644 --- a/packages/api/src/services/save_page.ts +++ b/packages/api/src/services/save_page.ts @@ -62,9 +62,39 @@ const shouldParseInBackend = (input: SavePageInput): boolean => { } export const savePage = async ( - input: SavePageInput, + input: SavePageInput & { + finalUrl?: string + }, user: User ): Promise => { + const [slug, croppedPathname] = createSlug(input.url, input.title) + let clientRequestId = input.clientRequestId + + // always parse in backend if the url is in the force puppeteer list + if (shouldParseInBackend(input)) { + try { + await createPageSaveRequest({ + user, + url: input.url, + articleSavingRequestId: clientRequestId || undefined, + state: input.state || undefined, + labels: input.labels || undefined, + folder: input.folder || undefined, + }) + } catch (e) { + return { + __typename: 'SaveError', + errorCodes: [SaveErrorCode.Unknown], + message: 'Failed to create page save request', + } + } + + return { + clientRequestId, + url: `${homePageURL()}/${user.profile.username}/${slug}`, + } + } + const parseResult = await parsePreparedContent(input.url, { document: input.originalContent, pageInfo: { @@ -72,9 +102,6 @@ export const savePage = async ( canonicalUrl: input.url, }, }) - const [newSlug, croppedPathname] = createSlug(input.url, input.title) - let slug = newSlug - let clientRequestId = input.clientRequestId const itemToSave = parsedContentToLibraryItem({ itemId: clientRequestId, @@ -96,42 +123,22 @@ export const savePage = async ( const isImported = input.source === 'csv-importer' || input.source === 'pocket' - // always parse in backend if the url is in the force puppeteer list - if (shouldParseInBackend(input)) { - try { - await createPageSaveRequest({ - user, - url: itemToSave.originalUrl, - articleSavingRequestId: clientRequestId || undefined, - state: input.state || undefined, - labels: input.labels || undefined, - folder: input.folder || undefined, - }) - } catch (e) { - return { - __typename: 'SaveError', - errorCodes: [SaveErrorCode.Unknown], - message: 'Failed to create page save request', - } - } - } else { - // do not publish a pubsub event if the item is imported - const newItem = await createOrUpdateLibraryItem( - itemToSave, - user.id, - undefined, - isImported - ) - clientRequestId = newItem.id - slug = newItem.slug + // do not publish a pubsub event if the item is imported + const newItem = await createOrUpdateLibraryItem( + itemToSave, + user.id, + undefined, + isImported, + input.finalUrl + ) + clientRequestId = newItem.id - await createAndSaveLabelsInLibraryItem( - clientRequestId, - user.id, - input.labels, - input.rssFeedUrl - ) - } + await createAndSaveLabelsInLibraryItem( + clientRequestId, + user.id, + input.labels, + input.rssFeedUrl + ) // we don't want to create thumbnail for imported pages and pages that already have thumbnail if (!isImported && !parseResult.parsedContent?.previewImage) { diff --git a/packages/api/test/resolvers/article.test.ts b/packages/api/test/resolvers/article.test.ts index d35ef6f4f..a3fd7e034 100644 --- a/packages/api/test/resolvers/article.test.ts +++ b/packages/api/test/resolvers/article.test.ts @@ -634,7 +634,7 @@ describe('Article API', () => { const savedItem = await findLibraryItemByUrl(url, user.id) expect(savedItem?.archivedAt).to.not.be.null - expect(savedItem?.labels?.map((l) => l.name)).to.eql(labels) + expect(savedItem?.labels?.map((l) => l.name)).to.include.members(labels) }) }) diff --git a/packages/content-fetch/src/request_handler.ts b/packages/content-fetch/src/request_handler.ts index 5fb1d0066..d0c3067f1 100644 --- a/packages/content-fetch/src/request_handler.ts +++ b/packages/content-fetch/src/request_handler.ts @@ -116,7 +116,8 @@ export const contentFetchRequestHandler: RequestHandler = async (req, res) => { userId: user.id, data: { userId: user.id, - url: finalUrl, + url, + finalUrl, articleSavingRequestId, state, labels, diff --git a/packages/rss-handler/test/index.test.ts b/packages/rss-handler/test/index.test.ts index f5c7b7b38..6e21e1bfb 100644 --- a/packages/rss-handler/test/index.test.ts +++ b/packages/rss-handler/test/index.test.ts @@ -5,7 +5,7 @@ import { isOldItem, RssFeedItem } from '../src' describe('isOldItem', () => { it('returns true if item is older than 1 day', () => { const item = { - pubDate: '2020-01-01', + isoDate: '2020-01-01', } as RssFeedItem const mostRecentItemTimestamp = Date.now() @@ -15,7 +15,7 @@ describe('isOldItem', () => { it('returns true if item was published at the last fetched time', () => { const mostRecentItemTimestamp = Date.now() const item = { - pubDate: new Date(mostRecentItemTimestamp).toISOString(), + isoDate: new Date(mostRecentItemTimestamp).toISOString(), } as RssFeedItem expect(isOldItem(item, mostRecentItemTimestamp)).to.be.true