From 9c493d29db597f54f197528b2bfdf406909c0eaf Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Mon, 19 Feb 2024 11:20:21 +0800 Subject: [PATCH 1/4] More graceful shutdown for the api server --- packages/api/src/server.ts | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/packages/api/src/server.ts b/packages/api/src/server.ts index ca08e3e0b..caa8ae5bc 100755 --- a/packages/api/src/server.ts +++ b/packages/api/src/server.ts @@ -212,17 +212,35 @@ const main = async (): Promise => { listener.headersTimeout = 640 * 1000 // 10s more than above listener.timeout = 640 * 1000 // match headersTimeout - process.on('SIGINT', async () => { + const gracefulShutdown = async (signal: string) => { + console.log(`[api]: Received ${signal}, closing server...`) + + await apollo.stop() + console.log('[api]: Apollo server stopped') + + await new Promise((resolve) => { + listener.close((err) => { + console.log('[api]: Express listener closed') + if (err) { + console.log('[api]: error stopping listener', { err }) + } + resolve() + }) + }) + // Shutdown redis before DB because the quit sequence can // cause appDataSource to get reloaded in the callback await redisDataSource.shutdown() - console.log('Redis connection closed.') + console.log('[api]: Redis connection closed.') await appDataSource.destroy() - console.log('DB connection closed.') + console.log('[api]: DB connection closed.') process.exit(0) - }) + } + + process.on('SIGINT', () => gracefulShutdown('SIGINT')) + process.on('SIGTERM', () => gracefulShutdown('SIGTERM')) } // only call main if the file was called from the CLI and wasn't required from another module From afd4d2a397e6ca2c0b30b7996c6dd78ff738399b Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 19 Feb 2024 11:35:43 +0800 Subject: [PATCH 2/4] fix: duplicate key value violates unique constraint "library_item_pkey" by getting the item by user_id and md5(url) for update before upserting the item --- packages/api/src/jobs/save_page.ts | 3 +- packages/api/src/repository/library_item.ts | 77 ++++++++++++------- .../api/src/routers/svc/email_attachment.ts | 14 ++-- packages/api/src/services/library_item.ts | 35 +++++++-- packages/api/src/services/recommendation.ts | 8 +- packages/api/src/services/save_page.ts | 3 +- packages/api/test/resolvers/article.test.ts | 15 ++-- 7 files changed, 100 insertions(+), 55 deletions(-) diff --git a/packages/api/src/jobs/save_page.ts b/packages/api/src/jobs/save_page.ts index e1ce04874..7e67024e9 100644 --- a/packages/api/src/jobs/save_page.ts +++ b/packages/api/src/jobs/save_page.ts @@ -246,8 +246,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => { // for non-pdf content, we need to save the page const result = await savePage( { - url, - finalUrl, + url: finalUrl, clientRequestId: articleSavingRequestId, title, originalContent: content, diff --git a/packages/api/src/repository/library_item.ts b/packages/api/src/repository/library_item.ts index 39d69d57e..678d9cd12 100644 --- a/packages/api/src/repository/library_item.ts +++ b/packages/api/src/repository/library_item.ts @@ -2,7 +2,7 @@ import { DeepPartial } from 'typeorm' import { getColumns, getColumnsDbName } from '.' import { appDataSource } from '../data_source' import { LibraryItem } from '../entity/library_item' -import { keysToCamelCase, wordsCount } from '../utils/helpers' +import { wordsCount } from '../utils/helpers' const convertToLibraryItem = (item: DeepPartial) => { return { @@ -18,52 +18,75 @@ export const libraryItemRepository = appDataSource return this.findOneBy({ id }) }, - findByUserIdAndUrl(userId: string, url: string) { + findByUserIdAndUrl(userId: string, url: string, forUpdate = false) { // md5 is used to hash the url to avoid the length limit of the index - return this.createQueryBuilder() + const qb = this.createQueryBuilder() .where('user_id = :userId', { userId }) .andWhere('md5(original_url) = md5(:url)', { url }) - .getOne() + + if (forUpdate) { + qb.setLock('pessimistic_write') + } + + return qb.getOne() }, countByCreatedAt(createdAt: Date) { return this.countBy({ createdAt }) }, - async upsertLibraryItem(item: DeepPartial, finalUrl?: string) { + async upsertLibraryItemById(item: DeepPartial) { const columns = getColumnsDbName(this) - // overwrites columns except id and slug - const overwrites = columns.filter( - (column) => !['id', 'slug'].includes(column) - ) + // overwrites columns except slug + const overwrites = columns.filter((column) => column !== 'slug') - const hashedUrl = 'md5(original_url)' - let conflictColumns = ['user_id', hashedUrl] - - if (item.id && finalUrl && finalUrl !== item.originalUrl) { - // update the original url if it's different from the current one in the database - conflictColumns = ['id'] - item.originalUrl = finalUrl - } - - const [query, params] = this.createQueryBuilder() + const result = await this.createQueryBuilder() .insert() .into(LibraryItem) .values(convertToLibraryItem(item)) - .orUpdate(overwrites, conflictColumns, { + .orUpdate(overwrites, ['id'], { skipUpdateIfNoValuesChanged: true, }) .returning(getColumns(this)) - .getQueryAndParameters() + .execute() - // this is a workaround for the typeorm bug which quotes the md5 function - const newQuery = query.replace(`"${hashedUrl}"`, hashedUrl) - const results = (await this.query(newQuery, params)) as never[] + if (result.generatedMaps.length === 0) { + throw new Error('Failed to upsert library item') + } - // convert to camel case - const newItem = keysToCamelCase(results[0]) as LibraryItem + return result.generatedMaps[0] as LibraryItem + }, - return newItem + async updateLibraryItem(item: DeepPartial) { + return this.save(item) + + // const columns = getColumnsDbName(this) + // // overwrites columns except id and slug + // const overwrites = columns.filter( + // (column) => !['id', 'slug'].includes(column) + // ) + + // const hashedUrl = 'md5(original_url)' + // const conflictColumns = ['user_id', hashedUrl] + + // const [query, params] = this.createQueryBuilder() + // .insert() + // .into(LibraryItem) + // .values(convertToLibraryItem(item)) + // .orUpdate(overwrites, conflictColumns, { + // skipUpdateIfNoValuesChanged: true, + // }) + // .returning(getColumns(this)) + // .getQueryAndParameters() + + // // this is a workaround for the typeorm bug which quotes the md5 function + // const newQuery = query.replace(`"${hashedUrl}"`, hashedUrl) + // const results = (await this.query(newQuery, params)) as never[] + + // // convert to camel case + // const newItem = keysToCamelCase(results[0]) as LibraryItem + + // return newItem }, createByPopularRead(name: string, userId: string) { diff --git a/packages/api/src/routers/svc/email_attachment.ts b/packages/api/src/routers/svc/email_attachment.ts index 0de5481a0..9f6e2ee0a 100644 --- a/packages/api/src/routers/svc/email_attachment.ts +++ b/packages/api/src/routers/svc/email_attachment.ts @@ -1,15 +1,13 @@ import express from 'express' -import { DeepPartial } from 'typeorm' -import { - ContentReaderType, - LibraryItem, - LibraryItemState, -} from '../../entity/library_item' +import { ContentReaderType, LibraryItemState } from '../../entity/library_item' import { UploadFile } from '../../entity/upload_file' import { env } from '../../env' import { PageType, UploadFileStatus } from '../../generated/graphql' import { authTrx } from '../../repository' -import { createOrUpdateLibraryItem } from '../../services/library_item' +import { + createOrUpdateLibraryItem, + CreateOrUpdateLibraryItemArgs, +} from '../../services/library_item' import { findNewsletterEmailByAddress } from '../../services/newsletters' import { updateReceivedEmail } from '../../services/received_emails' import { @@ -154,7 +152,7 @@ export function emailAttachmentRouter() { ? PageType.File : PageType.Book const title = subject || uploadFileData.fileName - const itemToCreate: DeepPartial = { + const itemToCreate: CreateOrUpdateLibraryItemArgs = { originalUrl: uploadFileUrlOverride, itemType, textContentHash: uploadFileHash, diff --git a/packages/api/src/services/library_item.ts b/packages/api/src/services/library_item.ts index f838834f0..a09b4c773 100644 --- a/packages/api/src/services/library_item.ts +++ b/packages/api/src/services/library_item.ts @@ -16,6 +16,7 @@ import { createPubSubClient, EntityType } from '../pubsub' import { redisDataSource } from '../redis_data_source' import { authTrx, getColumns, queryBuilderToRawSql } from '../repository' import { libraryItemRepository } from '../repository/library_item' +import { Merge } from '../util' import { setRecentlySavedItemInRedis } from '../utils/helpers' import { parseSearchQuery } from '../utils/search' import { addLabelsToLibraryItem } from './labels' @@ -825,18 +826,38 @@ export const createLibraryItems = async ( ) } +export type CreateOrUpdateLibraryItemArgs = Merge< + DeepPartial, + { originalUrl: string } +> export const createOrUpdateLibraryItem = async ( - libraryItem: DeepPartial, + libraryItem: CreateOrUpdateLibraryItemArgs, userId: string, pubsub = createPubSubClient(), - skipPubSub = false, - finalUrl?: string + skipPubSub = false ): Promise => { const newLibraryItem = await authTrx( - async (tx) => - tx - .withRepository(libraryItemRepository) - .upsertLibraryItem(libraryItem, finalUrl), + async (tx) => { + const repo = tx.withRepository(libraryItemRepository) + // find existing library item by user_id and url + const existingLibraryItem = await repo.findByUserIdAndUrl( + userId, + libraryItem.originalUrl, + true + ) + + if (existingLibraryItem) { + // update existing library item + return repo.save({ + ...libraryItem, + id: existingLibraryItem.id, + slug: existingLibraryItem.slug, // keep the original slug + }) + } + + // create or update library item + return repo.upsertLibraryItemById(libraryItem) + }, undefined, userId ) diff --git a/packages/api/src/services/recommendation.ts b/packages/api/src/services/recommendation.ts index cd090cdb6..483b10366 100644 --- a/packages/api/src/services/recommendation.ts +++ b/packages/api/src/services/recommendation.ts @@ -5,7 +5,11 @@ import { Recommendation } from '../entity/recommendation' import { authTrx } from '../repository' import { logger } from '../utils/logger' import { createHighlights } from './highlights' -import { createOrUpdateLibraryItem, findLibraryItemByUrl } from './library_item' +import { + createOrUpdateLibraryItem, + CreateOrUpdateLibraryItemArgs, + findLibraryItemByUrl, +} from './library_item' export const addRecommendation = async ( item: LibraryItem, @@ -18,7 +22,7 @@ export const addRecommendation = async ( let recommendedItem = await findLibraryItemByUrl(item.originalUrl, userId) if (!recommendedItem) { // create a new item - const newItem: DeepPartial = { + const newItem: CreateOrUpdateLibraryItemArgs = { user: { id: userId }, slug: item.slug, title: item.title, diff --git a/packages/api/src/services/save_page.ts b/packages/api/src/services/save_page.ts index fafc20dc7..e4f065e94 100644 --- a/packages/api/src/services/save_page.ts +++ b/packages/api/src/services/save_page.ts @@ -128,8 +128,7 @@ export const savePage = async ( itemToSave, user.id, undefined, - isImported, - input.finalUrl + isImported ) clientRequestId = newItem.id diff --git a/packages/api/test/resolvers/article.test.ts b/packages/api/test/resolvers/article.test.ts index a3fd7e034..c93a388b3 100644 --- a/packages/api/test/resolvers/article.test.ts +++ b/packages/api/test/resolvers/article.test.ts @@ -32,6 +32,7 @@ import { findLibraryItemById, findLibraryItemByUrl, updateLibraryItem, + CreateOrUpdateLibraryItemArgs, } from '../../src/services/library_item' import { deleteUser } from '../../src/services/user' import * as createTask from '../../src/utils/createTask' @@ -443,7 +444,7 @@ describe('Article API', () => { let itemId: string before(async () => { - const itemToCreate: DeepPartial = { + const itemToCreate: CreateOrUpdateLibraryItemArgs = { title: 'test title', originalContent: '

test

', slug: realSlug, @@ -696,7 +697,7 @@ describe('Article API', () => { let itemId: string before(async () => { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'test title', readableContent: '

test

', @@ -924,7 +925,7 @@ describe('Article API', () => { const readingProgressArray = [0, 2, 97, 98, 100] // Create some test items for (let i = 0; i < 5; i++) { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'test title', readableContent: `

test ${searchedKeyword}

`, @@ -1971,7 +1972,7 @@ describe('Article API', () => { before(async () => { // Create some test items for (let i = 0; i < 5; i++) { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'typeahead search item', readableContent: '

test

', @@ -2045,7 +2046,7 @@ describe('Article API', () => { before(async () => { // Create some test items for (let i = 0; i < 5; i++) { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { title: 'test item', slug: '', readableContent: '

test

', @@ -2316,7 +2317,7 @@ describe('Article API', () => { let articleId = '' before(async () => { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'test setFavoriteArticle', slug: '', @@ -2361,7 +2362,7 @@ describe('Article API', () => { before(async () => { // Create some test items for (let i = 0; i < 5; i++) { - const itemToSave: DeepPartial = { + const itemToSave: CreateOrUpdateLibraryItemArgs = { user, title: 'test item', readableContent: '

test

', From 26d05a2e6e044789da36b3e417c08cfa2c47a854 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 19 Feb 2024 11:47:09 +0800 Subject: [PATCH 3/4] delete the new item if it is different from the existing one --- packages/api/src/repository/library_item.ts | 32 --------------------- packages/api/src/services/library_item.ts | 11 +++++-- 2 files changed, 9 insertions(+), 34 deletions(-) diff --git a/packages/api/src/repository/library_item.ts b/packages/api/src/repository/library_item.ts index 678d9cd12..f4003b7bc 100644 --- a/packages/api/src/repository/library_item.ts +++ b/packages/api/src/repository/library_item.ts @@ -57,38 +57,6 @@ export const libraryItemRepository = appDataSource return result.generatedMaps[0] as LibraryItem }, - async updateLibraryItem(item: DeepPartial) { - return this.save(item) - - // const columns = getColumnsDbName(this) - // // overwrites columns except id and slug - // const overwrites = columns.filter( - // (column) => !['id', 'slug'].includes(column) - // ) - - // const hashedUrl = 'md5(original_url)' - // const conflictColumns = ['user_id', hashedUrl] - - // const [query, params] = this.createQueryBuilder() - // .insert() - // .into(LibraryItem) - // .values(convertToLibraryItem(item)) - // .orUpdate(overwrites, conflictColumns, { - // skipUpdateIfNoValuesChanged: true, - // }) - // .returning(getColumns(this)) - // .getQueryAndParameters() - - // // this is a workaround for the typeorm bug which quotes the md5 function - // const newQuery = query.replace(`"${hashedUrl}"`, hashedUrl) - // const results = (await this.query(newQuery, params)) as never[] - - // // convert to camel case - // const newItem = keysToCamelCase(results[0]) as LibraryItem - - // return newItem - }, - createByPopularRead(name: string, userId: string) { return this.query( ` diff --git a/packages/api/src/services/library_item.ts b/packages/api/src/services/library_item.ts index a09b4c773..20b47afb7 100644 --- a/packages/api/src/services/library_item.ts +++ b/packages/api/src/services/library_item.ts @@ -839,7 +839,7 @@ export const createOrUpdateLibraryItem = async ( const newLibraryItem = await authTrx( async (tx) => { const repo = tx.withRepository(libraryItemRepository) - // find existing library item by user_id and url + // find existing library item by user_id and url for update const existingLibraryItem = await repo.findByUserIdAndUrl( userId, libraryItem.originalUrl, @@ -848,11 +848,18 @@ export const createOrUpdateLibraryItem = async ( if (existingLibraryItem) { // update existing library item - return repo.save({ + const newItem = await repo.save({ ...libraryItem, id: existingLibraryItem.id, slug: existingLibraryItem.slug, // keep the original slug }) + + // delete the new item if it's different from the existing one + if (libraryItem.id && libraryItem.id !== existingLibraryItem.id) { + await repo.delete(libraryItem.id) + } + + return newItem } // create or update library item From 4cfe4f95ae8a17eb5db17f75a3e9ec10cf81e958 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Tue, 20 Feb 2024 11:26:21 +0800 Subject: [PATCH 4/4] Remove bottom-wrapper element which is added on NYT and some other sites RSS feeds --- packages/readabilityjs/Readability.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index 999d7de71..a64b9360f 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -203,7 +203,7 @@ Readability.prototype = { // Readability-readerable.js. Please keep both copies in sync. articleNegativeLookBehindCandidates: /breadcrumbs|breadcrumb|utils|trilist|_header/i, articleNegativeLookAheadCandidates: /outstream(.?)_|sub(.?)_|m_|omeda-promo-|in-article-advert|block-ad-.*|tl_/i, - unlikelyCandidates: /\bad\b|ai2html|banner|breadcrumbs|breadcrumb|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager(?!ow)|popup|yom-remote|copyright|keywords|outline|infinite-list|beta|recirculation|site-index|hide-for-print|post-end-share-cta|post-end-cta-full|post-footer|post-head|post-tag|li-date|main-navigation|programtic-ads|outstream_article|hfeed|comment-holder|back-to-top|show-up-next|onward-journey|topic-tracker|list-nav|block-ad-entity|adSpecs|gift-article-button|modal-title|in-story-masthead|share-tools|standard-dock|expanded-dock|margins-h|subscribe-dialog|icon|bumped|dvz-social-media-buttons|post-toc|mobile-menu|mobile-navbar|tl_article_header|mvp(-post)*-(add-story|soc(-mob)*-wrap)|w-condition-invisible|rich-text-block main w-richtext|rich-text-block_ataglance at-a-glance test w-richtext|PostsPage-commentsSection|hide-text|text-blurple/i, + unlikelyCandidates: /\bad\b|ai2html|banner|breadcrumbs|breadcrumb|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager(?!ow)|popup|yom-remote|copyright|keywords|outline|infinite-list|beta|recirculation|site-index|hide-for-print|post-end-share-cta|post-end-cta-full|post-footer|post-head|post-tag|li-date|main-navigation|programtic-ads|outstream_article|hfeed|comment-holder|back-to-top|show-up-next|onward-journey|topic-tracker|list-nav|block-ad-entity|adSpecs|gift-article-button|modal-title|in-story-masthead|share-tools|standard-dock|expanded-dock|margins-h|subscribe-dialog|icon|bumped|dvz-social-media-buttons|post-toc|mobile-menu|mobile-navbar|tl_article_header|mvp(-post)*-(add-story|soc(-mob)*-wrap)|w-condition-invisible|rich-text-block main w-richtext|rich-text-block_ataglance at-a-glance test w-richtext|PostsPage-commentsSection|hide-text|text-blurple|bottom-wrapper/i, // okMaybeItsACandidate: /and|article(?!-breadcrumb)|body|column|content|main|shadow|post-header/i, get okMaybeItsACandidate() { return new RegExp(`and|(?