diff --git a/packages/api/src/jobs/save_page.ts b/packages/api/src/jobs/save_page.ts index e1ce04874..7e67024e9 100644 --- a/packages/api/src/jobs/save_page.ts +++ b/packages/api/src/jobs/save_page.ts @@ -246,8 +246,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => { // for non-pdf content, we need to save the page const result = await savePage( { - url, - finalUrl, + url: finalUrl, clientRequestId: articleSavingRequestId, title, originalContent: content, diff --git a/packages/api/src/repository/library_item.ts b/packages/api/src/repository/library_item.ts index 39d69d57e..f4003b7bc 100644 --- a/packages/api/src/repository/library_item.ts +++ b/packages/api/src/repository/library_item.ts @@ -2,7 +2,7 @@ import { DeepPartial } from 'typeorm' import { getColumns, getColumnsDbName } from '.' import { appDataSource } from '../data_source' import { LibraryItem } from '../entity/library_item' -import { keysToCamelCase, wordsCount } from '../utils/helpers' +import { wordsCount } from '../utils/helpers' const convertToLibraryItem = (item: DeepPartial) => { return { @@ -18,52 +18,43 @@ export const libraryItemRepository = appDataSource return this.findOneBy({ id }) }, - findByUserIdAndUrl(userId: string, url: string) { + findByUserIdAndUrl(userId: string, url: string, forUpdate = false) { // md5 is used to hash the url to avoid the length limit of the index - return this.createQueryBuilder() + const qb = this.createQueryBuilder() .where('user_id = :userId', { userId }) .andWhere('md5(original_url) = md5(:url)', { url }) - .getOne() + + if (forUpdate) { + qb.setLock('pessimistic_write') + } + + return qb.getOne() }, countByCreatedAt(createdAt: Date) { return this.countBy({ createdAt }) }, - async upsertLibraryItem(item: DeepPartial, finalUrl?: string) { + async upsertLibraryItemById(item: DeepPartial) { const columns = getColumnsDbName(this) - // overwrites columns except id and slug - const overwrites = columns.filter( - (column) => !['id', 'slug'].includes(column) - ) + // overwrites columns except slug + const overwrites = columns.filter((column) => column !== 'slug') - const hashedUrl = 'md5(original_url)' - let conflictColumns = ['user_id', hashedUrl] - - if (item.id && finalUrl && finalUrl !== item.originalUrl) { - // update the original url if it's different from the current one in the database - conflictColumns = ['id'] - item.originalUrl = finalUrl - } - - const [query, params] = this.createQueryBuilder() + const result = await this.createQueryBuilder() .insert() .into(LibraryItem) .values(convertToLibraryItem(item)) - .orUpdate(overwrites, conflictColumns, { + .orUpdate(overwrites, ['id'], { skipUpdateIfNoValuesChanged: true, }) .returning(getColumns(this)) - .getQueryAndParameters() + .execute() - // this is a workaround for the typeorm bug which quotes the md5 function - const newQuery = query.replace(`"${hashedUrl}"`, hashedUrl) - const results = (await this.query(newQuery, params)) as never[] + if (result.generatedMaps.length === 0) { + throw new Error('Failed to upsert library item') + } - // convert to camel case - const newItem = keysToCamelCase(results[0]) as LibraryItem - - return newItem + return result.generatedMaps[0] as LibraryItem }, createByPopularRead(name: string, userId: string) { diff --git a/packages/api/src/routers/svc/email_attachment.ts b/packages/api/src/routers/svc/email_attachment.ts index 0de5481a0..9f6e2ee0a 100644 --- a/packages/api/src/routers/svc/email_attachment.ts +++ b/packages/api/src/routers/svc/email_attachment.ts @@ -1,15 +1,13 @@ import express from 'express' -import { DeepPartial } from 'typeorm' -import { - ContentReaderType, - LibraryItem, - LibraryItemState, -} from '../../entity/library_item' +import { ContentReaderType, LibraryItemState } from '../../entity/library_item' import { UploadFile } from '../../entity/upload_file' import { env } from '../../env' import { PageType, UploadFileStatus } from '../../generated/graphql' import { authTrx } from '../../repository' -import { createOrUpdateLibraryItem } from '../../services/library_item' +import { + createOrUpdateLibraryItem, + CreateOrUpdateLibraryItemArgs, +} from '../../services/library_item' import { findNewsletterEmailByAddress } from '../../services/newsletters' import { updateReceivedEmail } from '../../services/received_emails' import { @@ -154,7 +152,7 @@ export function emailAttachmentRouter() { ? PageType.File : PageType.Book const title = subject || uploadFileData.fileName - const itemToCreate: DeepPartial = { + const itemToCreate: CreateOrUpdateLibraryItemArgs = { originalUrl: uploadFileUrlOverride, itemType, textContentHash: uploadFileHash, diff --git a/packages/api/src/server.ts b/packages/api/src/server.ts index ca08e3e0b..caa8ae5bc 100755 --- a/packages/api/src/server.ts +++ b/packages/api/src/server.ts @@ -212,17 +212,35 @@ const main = async (): Promise => { listener.headersTimeout = 640 * 1000 // 10s more than above listener.timeout = 640 * 1000 // match headersTimeout - process.on('SIGINT', async () => { + const gracefulShutdown = async (signal: string) => { + console.log(`[api]: Received ${signal}, closing server...`) + + await apollo.stop() + console.log('[api]: Apollo server stopped') + + await new Promise((resolve) => { + listener.close((err) => { + console.log('[api]: Express listener closed') + if (err) { + console.log('[api]: error stopping listener', { err }) + } + resolve() + }) + }) + // Shutdown redis before DB because the quit sequence can // cause appDataSource to get reloaded in the callback await redisDataSource.shutdown() - console.log('Redis connection closed.') + console.log('[api]: Redis connection closed.') await appDataSource.destroy() - console.log('DB connection closed.') + console.log('[api]: DB connection closed.') process.exit(0) - }) + } + + process.on('SIGINT', () => gracefulShutdown('SIGINT')) + process.on('SIGTERM', () => gracefulShutdown('SIGTERM')) } // only call main if the file was called from the CLI and wasn't required from another module diff --git a/packages/api/src/services/library_item.ts b/packages/api/src/services/library_item.ts index f838834f0..20b47afb7 100644 --- a/packages/api/src/services/library_item.ts +++ b/packages/api/src/services/library_item.ts @@ -16,6 +16,7 @@ import { createPubSubClient, EntityType } from '../pubsub' import { redisDataSource } from '../redis_data_source' import { authTrx, getColumns, queryBuilderToRawSql } from '../repository' import { libraryItemRepository } from '../repository/library_item' +import { Merge } from '../util' import { setRecentlySavedItemInRedis } from '../utils/helpers' import { parseSearchQuery } from '../utils/search' import { addLabelsToLibraryItem } from './labels' @@ -825,18 +826,45 @@ export const createLibraryItems = async ( ) } +export type CreateOrUpdateLibraryItemArgs = Merge< + DeepPartial, + { originalUrl: string } +> export const createOrUpdateLibraryItem = async ( - libraryItem: DeepPartial, + libraryItem: CreateOrUpdateLibraryItemArgs, userId: string, pubsub = createPubSubClient(), - skipPubSub = false, - finalUrl?: string + skipPubSub = false ): Promise => { const newLibraryItem = await authTrx( - async (tx) => - tx - .withRepository(libraryItemRepository) - .upsertLibraryItem(libraryItem, finalUrl), + async (tx) => { + const repo = tx.withRepository(libraryItemRepository) + // find existing library item by user_id and url for update + const existingLibraryItem = await repo.findByUserIdAndUrl( + userId, + libraryItem.originalUrl, + true + ) + + if (existingLibraryItem) { + // update existing library item + const newItem = await repo.save({ + ...libraryItem, + id: existingLibraryItem.id, + slug: existingLibraryItem.slug, // keep the original slug + }) + + // delete the new item if it's different from the existing one + if (libraryItem.id && libraryItem.id !== existingLibraryItem.id) { + await repo.delete(libraryItem.id) + } + + return newItem + } + + // create or update library item + return repo.upsertLibraryItemById(libraryItem) + }, undefined, userId ) diff --git a/packages/api/src/services/recommendation.ts b/packages/api/src/services/recommendation.ts index cd090cdb6..483b10366 100644 --- a/packages/api/src/services/recommendation.ts +++ b/packages/api/src/services/recommendation.ts @@ -5,7 +5,11 @@ import { Recommendation } from '../entity/recommendation' import { authTrx } from '../repository' import { logger } from '../utils/logger' import { createHighlights } from './highlights' -import { createOrUpdateLibraryItem, findLibraryItemByUrl } from './library_item' +import { + createOrUpdateLibraryItem, + CreateOrUpdateLibraryItemArgs, + findLibraryItemByUrl, +} from './library_item' export const addRecommendation = async ( item: LibraryItem, @@ -18,7 +22,7 @@ export const addRecommendation = async ( let recommendedItem = await findLibraryItemByUrl(item.originalUrl, userId) if (!recommendedItem) { // create a new item - const newItem: DeepPartial = { + const newItem: CreateOrUpdateLibraryItemArgs = { user: { id: userId }, slug: item.slug, title: item.title, diff --git a/packages/api/src/services/save_page.ts b/packages/api/src/services/save_page.ts index fafc20dc7..e4f065e94 100644 --- a/packages/api/src/services/save_page.ts +++ b/packages/api/src/services/save_page.ts @@ -128,8 +128,7 @@ export const savePage = async ( itemToSave, user.id, undefined, - isImported, - input.finalUrl + isImported ) clientRequestId = newItem.id diff --git a/packages/api/test/resolvers/article.test.ts b/packages/api/test/resolvers/article.test.ts index a3fd7e034..c93a388b3 100644 --- a/packages/api/test/resolvers/article.test.ts +++ b/packages/api/test/resolvers/article.test.ts @@ -32,6 +32,7 @@ import { findLibraryItemById, findLibraryItemByUrl, updateLibraryItem, + CreateOrUpdateLibraryItemArgs, } from '../../src/services/library_item' import { deleteUser } from '../../src/services/user' import * as createTask from '../../src/utils/createTask' @@ -443,7 +444,7 @@ describe('Article API', () => { let itemId: string before(async () => { - const itemToCreate: DeepPartial = { + const itemToCreate: CreateOrUpdateLibraryItemArgs = { title: 'test title', originalContent: '

test

', slug: realSlug, @@ -696,7 +697,7 @@ describe('Article API', () => { let itemId: string before(async () => { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'test title', readableContent: '

test

', @@ -924,7 +925,7 @@ describe('Article API', () => { const readingProgressArray = [0, 2, 97, 98, 100] // Create some test items for (let i = 0; i < 5; i++) { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'test title', readableContent: `

test ${searchedKeyword}

`, @@ -1971,7 +1972,7 @@ describe('Article API', () => { before(async () => { // Create some test items for (let i = 0; i < 5; i++) { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'typeahead search item', readableContent: '

test

', @@ -2045,7 +2046,7 @@ describe('Article API', () => { before(async () => { // Create some test items for (let i = 0; i < 5; i++) { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { title: 'test item', slug: '', readableContent: '

test

', @@ -2316,7 +2317,7 @@ describe('Article API', () => { let articleId = '' before(async () => { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'test setFavoriteArticle', slug: '', @@ -2361,7 +2362,7 @@ describe('Article API', () => { before(async () => { // Create some test items for (let i = 0; i < 5; i++) { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'test item', readableContent: '

test

', diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index 999d7de71..a64b9360f 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -203,7 +203,7 @@ Readability.prototype = { // Readability-readerable.js. Please keep both copies in sync. articleNegativeLookBehindCandidates: /breadcrumbs|breadcrumb|utils|trilist|_header/i, articleNegativeLookAheadCandidates: /outstream(.?)_|sub(.?)_|m_|omeda-promo-|in-article-advert|block-ad-.*|tl_/i, - unlikelyCandidates: /\bad\b|ai2html|banner|breadcrumbs|breadcrumb|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager(?!ow)|popup|yom-remote|copyright|keywords|outline|infinite-list|beta|recirculation|site-index|hide-for-print|post-end-share-cta|post-end-cta-full|post-footer|post-head|post-tag|li-date|main-navigation|programtic-ads|outstream_article|hfeed|comment-holder|back-to-top|show-up-next|onward-journey|topic-tracker|list-nav|block-ad-entity|adSpecs|gift-article-button|modal-title|in-story-masthead|share-tools|standard-dock|expanded-dock|margins-h|subscribe-dialog|icon|bumped|dvz-social-media-buttons|post-toc|mobile-menu|mobile-navbar|tl_article_header|mvp(-post)*-(add-story|soc(-mob)*-wrap)|w-condition-invisible|rich-text-block main w-richtext|rich-text-block_ataglance at-a-glance test w-richtext|PostsPage-commentsSection|hide-text|text-blurple/i, + unlikelyCandidates: /\bad\b|ai2html|banner|breadcrumbs|breadcrumb|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager(?!ow)|popup|yom-remote|copyright|keywords|outline|infinite-list|beta|recirculation|site-index|hide-for-print|post-end-share-cta|post-end-cta-full|post-footer|post-head|post-tag|li-date|main-navigation|programtic-ads|outstream_article|hfeed|comment-holder|back-to-top|show-up-next|onward-journey|topic-tracker|list-nav|block-ad-entity|adSpecs|gift-article-button|modal-title|in-story-masthead|share-tools|standard-dock|expanded-dock|margins-h|subscribe-dialog|icon|bumped|dvz-social-media-buttons|post-toc|mobile-menu|mobile-navbar|tl_article_header|mvp(-post)*-(add-story|soc(-mob)*-wrap)|w-condition-invisible|rich-text-block main w-richtext|rich-text-block_ataglance at-a-glance test w-richtext|PostsPage-commentsSection|hide-text|text-blurple|bottom-wrapper/i, // okMaybeItsACandidate: /and|article(?!-breadcrumb)|body|column|content|main|shadow|post-header/i, get okMaybeItsACandidate() { return new RegExp(`and|(?