Merge remote-tracking branch 'origin/main' into feature/android-mark-ad-read

This commit is contained in:
Stefano Sansone
2024-02-21 00:01:28 +00:00
9 changed files with 99 additions and 61 deletions

View File

@ -246,8 +246,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => {
// for non-pdf content, we need to save the page
const result = await savePage(
{
url,
finalUrl,
url: finalUrl,
clientRequestId: articleSavingRequestId,
title,
originalContent: content,

View File

@ -2,7 +2,7 @@ import { DeepPartial } from 'typeorm'
import { getColumns, getColumnsDbName } from '.'
import { appDataSource } from '../data_source'
import { LibraryItem } from '../entity/library_item'
import { keysToCamelCase, wordsCount } from '../utils/helpers'
import { wordsCount } from '../utils/helpers'
const convertToLibraryItem = (item: DeepPartial<LibraryItem>) => {
return {
@ -18,52 +18,43 @@ export const libraryItemRepository = appDataSource
return this.findOneBy({ id })
},
findByUserIdAndUrl(userId: string, url: string) {
findByUserIdAndUrl(userId: string, url: string, forUpdate = false) {
// md5 is used to hash the url to avoid the length limit of the index
return this.createQueryBuilder()
const qb = this.createQueryBuilder()
.where('user_id = :userId', { userId })
.andWhere('md5(original_url) = md5(:url)', { url })
.getOne()
if (forUpdate) {
qb.setLock('pessimistic_write')
}
return qb.getOne()
},
countByCreatedAt(createdAt: Date) {
return this.countBy({ createdAt })
},
async upsertLibraryItem(item: DeepPartial<LibraryItem>, finalUrl?: string) {
async upsertLibraryItemById(item: DeepPartial<LibraryItem>) {
const columns = getColumnsDbName(this)
// overwrites columns except id and slug
const overwrites = columns.filter(
(column) => !['id', 'slug'].includes(column)
)
// overwrites columns except slug
const overwrites = columns.filter((column) => column !== 'slug')
const hashedUrl = 'md5(original_url)'
let conflictColumns = ['user_id', hashedUrl]
if (item.id && finalUrl && finalUrl !== item.originalUrl) {
// update the original url if it's different from the current one in the database
conflictColumns = ['id']
item.originalUrl = finalUrl
}
const [query, params] = this.createQueryBuilder()
const result = await this.createQueryBuilder()
.insert()
.into(LibraryItem)
.values(convertToLibraryItem(item))
.orUpdate(overwrites, conflictColumns, {
.orUpdate(overwrites, ['id'], {
skipUpdateIfNoValuesChanged: true,
})
.returning(getColumns(this))
.getQueryAndParameters()
.execute()
// this is a workaround for the typeorm bug which quotes the md5 function
const newQuery = query.replace(`"${hashedUrl}"`, hashedUrl)
const results = (await this.query(newQuery, params)) as never[]
if (result.generatedMaps.length === 0) {
throw new Error('Failed to upsert library item')
}
// convert to camel case
const newItem = keysToCamelCase(results[0]) as LibraryItem
return newItem
return result.generatedMaps[0] as LibraryItem
},
createByPopularRead(name: string, userId: string) {

View File

@ -1,15 +1,13 @@
import express from 'express'
import { DeepPartial } from 'typeorm'
import {
ContentReaderType,
LibraryItem,
LibraryItemState,
} from '../../entity/library_item'
import { ContentReaderType, LibraryItemState } from '../../entity/library_item'
import { UploadFile } from '../../entity/upload_file'
import { env } from '../../env'
import { PageType, UploadFileStatus } from '../../generated/graphql'
import { authTrx } from '../../repository'
import { createOrUpdateLibraryItem } from '../../services/library_item'
import {
createOrUpdateLibraryItem,
CreateOrUpdateLibraryItemArgs,
} from '../../services/library_item'
import { findNewsletterEmailByAddress } from '../../services/newsletters'
import { updateReceivedEmail } from '../../services/received_emails'
import {
@ -154,7 +152,7 @@ export function emailAttachmentRouter() {
? PageType.File
: PageType.Book
const title = subject || uploadFileData.fileName
const itemToCreate: DeepPartial<LibraryItem> = {
const itemToCreate: CreateOrUpdateLibraryItemArgs = {
originalUrl: uploadFileUrlOverride,
itemType,
textContentHash: uploadFileHash,

View File

@ -212,17 +212,35 @@ const main = async (): Promise<void> => {
listener.headersTimeout = 640 * 1000 // 10s more than above
listener.timeout = 640 * 1000 // match headersTimeout
process.on('SIGINT', async () => {
const gracefulShutdown = async (signal: string) => {
console.log(`[api]: Received ${signal}, closing server...`)
await apollo.stop()
console.log('[api]: Apollo server stopped')
await new Promise<void>((resolve) => {
listener.close((err) => {
console.log('[api]: Express listener closed')
if (err) {
console.log('[api]: error stopping listener', { err })
}
resolve()
})
})
// Shutdown redis before DB because the quit sequence can
// cause appDataSource to get reloaded in the callback
await redisDataSource.shutdown()
console.log('Redis connection closed.')
console.log('[api]: Redis connection closed.')
await appDataSource.destroy()
console.log('DB connection closed.')
console.log('[api]: DB connection closed.')
process.exit(0)
})
}
process.on('SIGINT', () => gracefulShutdown('SIGINT'))
process.on('SIGTERM', () => gracefulShutdown('SIGTERM'))
}
// only call main if the file was called from the CLI and wasn't required from another module

View File

@ -16,6 +16,7 @@ import { createPubSubClient, EntityType } from '../pubsub'
import { redisDataSource } from '../redis_data_source'
import { authTrx, getColumns, queryBuilderToRawSql } from '../repository'
import { libraryItemRepository } from '../repository/library_item'
import { Merge } from '../util'
import { setRecentlySavedItemInRedis } from '../utils/helpers'
import { parseSearchQuery } from '../utils/search'
import { addLabelsToLibraryItem } from './labels'
@ -825,18 +826,45 @@ export const createLibraryItems = async (
)
}
export type CreateOrUpdateLibraryItemArgs = Merge<
DeepPartial<LibraryItem>,
{ originalUrl: string }
>
export const createOrUpdateLibraryItem = async (
libraryItem: DeepPartial<LibraryItem>,
libraryItem: CreateOrUpdateLibraryItemArgs,
userId: string,
pubsub = createPubSubClient(),
skipPubSub = false,
finalUrl?: string
skipPubSub = false
): Promise<LibraryItem> => {
const newLibraryItem = await authTrx(
async (tx) =>
tx
.withRepository(libraryItemRepository)
.upsertLibraryItem(libraryItem, finalUrl),
async (tx) => {
const repo = tx.withRepository(libraryItemRepository)
// find existing library item by user_id and url for update
const existingLibraryItem = await repo.findByUserIdAndUrl(
userId,
libraryItem.originalUrl,
true
)
if (existingLibraryItem) {
// update existing library item
const newItem = await repo.save({
...libraryItem,
id: existingLibraryItem.id,
slug: existingLibraryItem.slug, // keep the original slug
})
// delete the new item if it's different from the existing one
if (libraryItem.id && libraryItem.id !== existingLibraryItem.id) {
await repo.delete(libraryItem.id)
}
return newItem
}
// create or update library item
return repo.upsertLibraryItemById(libraryItem)
},
undefined,
userId
)

View File

@ -5,7 +5,11 @@ import { Recommendation } from '../entity/recommendation'
import { authTrx } from '../repository'
import { logger } from '../utils/logger'
import { createHighlights } from './highlights'
import { createOrUpdateLibraryItem, findLibraryItemByUrl } from './library_item'
import {
createOrUpdateLibraryItem,
CreateOrUpdateLibraryItemArgs,
findLibraryItemByUrl,
} from './library_item'
export const addRecommendation = async (
item: LibraryItem,
@ -18,7 +22,7 @@ export const addRecommendation = async (
let recommendedItem = await findLibraryItemByUrl(item.originalUrl, userId)
if (!recommendedItem) {
// create a new item
const newItem: DeepPartial<LibraryItem> = {
const newItem: CreateOrUpdateLibraryItemArgs = {
user: { id: userId },
slug: item.slug,
title: item.title,

View File

@ -128,8 +128,7 @@ export const savePage = async (
itemToSave,
user.id,
undefined,
isImported,
input.finalUrl
isImported
)
clientRequestId = newItem.id

View File

@ -32,6 +32,7 @@ import {
findLibraryItemById,
findLibraryItemByUrl,
updateLibraryItem,
CreateOrUpdateLibraryItemArgs,
} from '../../src/services/library_item'
import { deleteUser } from '../../src/services/user'
import * as createTask from '../../src/utils/createTask'
@ -443,7 +444,7 @@ describe('Article API', () => {
let itemId: string
before(async () => {
const itemToCreate: DeepPartial<LibraryItem> = {
const itemToCreate: CreateOrUpdateLibraryItemArgs = {
title: 'test title',
originalContent: '<p>test</p>',
slug: realSlug,
@ -696,7 +697,7 @@ describe('Article API', () => {
let itemId: string
before(async () => {
const itemToSave: DeepPartial<LibraryItem> = {
const itemToSave: CreateOrUpdateLibraryItemArgs = {
user,
title: 'test title',
readableContent: '<p>test</p>',
@ -924,7 +925,7 @@ describe('Article API', () => {
const readingProgressArray = [0, 2, 97, 98, 100]
// Create some test items
for (let i = 0; i < 5; i++) {
const itemToSave: DeepPartial<LibraryItem> = {
const itemToSave: CreateOrUpdateLibraryItemArgs = {
user,
title: 'test title',
readableContent: `<p>test ${searchedKeyword}</p>`,
@ -1971,7 +1972,7 @@ describe('Article API', () => {
before(async () => {
// Create some test items
for (let i = 0; i < 5; i++) {
const itemToSave: DeepPartial<LibraryItem> = {
const itemToSave: CreateOrUpdateLibraryItemArgs = {
user,
title: 'typeahead search item',
readableContent: '<p>test</p>',
@ -2045,7 +2046,7 @@ describe('Article API', () => {
before(async () => {
// Create some test items
for (let i = 0; i < 5; i++) {
const itemToSave: DeepPartial<LibraryItem> = {
const itemToSave: CreateOrUpdateLibraryItemArgs = {
title: 'test item',
slug: '',
readableContent: '<p>test</p>',
@ -2316,7 +2317,7 @@ describe('Article API', () => {
let articleId = ''
before(async () => {
const itemToSave: DeepPartial<LibraryItem> = {
const itemToSave: CreateOrUpdateLibraryItemArgs = {
user,
title: 'test setFavoriteArticle',
slug: '',
@ -2361,7 +2362,7 @@ describe('Article API', () => {
before(async () => {
// Create some test items
for (let i = 0; i < 5; i++) {
const itemToSave: DeepPartial<LibraryItem> = {
const itemToSave: CreateOrUpdateLibraryItemArgs = {
user,
title: 'test item',
readableContent: '<p>test</p>',

View File

@ -203,7 +203,7 @@ Readability.prototype = {
// Readability-readerable.js. Please keep both copies in sync.
articleNegativeLookBehindCandidates: /breadcrumbs|breadcrumb|utils|trilist|_header/i,
articleNegativeLookAheadCandidates: /outstream(.?)_|sub(.?)_|m_|omeda-promo-|in-article-advert|block-ad-.*|tl_/i,
unlikelyCandidates: /\bad\b|ai2html|banner|breadcrumbs|breadcrumb|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager(?!ow)|popup|yom-remote|copyright|keywords|outline|infinite-list|beta|recirculation|site-index|hide-for-print|post-end-share-cta|post-end-cta-full|post-footer|post-head|post-tag|li-date|main-navigation|programtic-ads|outstream_article|hfeed|comment-holder|back-to-top|show-up-next|onward-journey|topic-tracker|list-nav|block-ad-entity|adSpecs|gift-article-button|modal-title|in-story-masthead|share-tools|standard-dock|expanded-dock|margins-h|subscribe-dialog|icon|bumped|dvz-social-media-buttons|post-toc|mobile-menu|mobile-navbar|tl_article_header|mvp(-post)*-(add-story|soc(-mob)*-wrap)|w-condition-invisible|rich-text-block main w-richtext|rich-text-block_ataglance at-a-glance test w-richtext|PostsPage-commentsSection|hide-text|text-blurple/i,
unlikelyCandidates: /\bad\b|ai2html|banner|breadcrumbs|breadcrumb|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager(?!ow)|popup|yom-remote|copyright|keywords|outline|infinite-list|beta|recirculation|site-index|hide-for-print|post-end-share-cta|post-end-cta-full|post-footer|post-head|post-tag|li-date|main-navigation|programtic-ads|outstream_article|hfeed|comment-holder|back-to-top|show-up-next|onward-journey|topic-tracker|list-nav|block-ad-entity|adSpecs|gift-article-button|modal-title|in-story-masthead|share-tools|standard-dock|expanded-dock|margins-h|subscribe-dialog|icon|bumped|dvz-social-media-buttons|post-toc|mobile-menu|mobile-navbar|tl_article_header|mvp(-post)*-(add-story|soc(-mob)*-wrap)|w-condition-invisible|rich-text-block main w-richtext|rich-text-block_ataglance at-a-glance test w-richtext|PostsPage-commentsSection|hide-text|text-blurple|bottom-wrapper/i,
// okMaybeItsACandidate: /and|article(?!-breadcrumb)|body|column|content|main|shadow|post-header/i,
get okMaybeItsACandidate() {
return new RegExp(`and|(?<!${this.articleNegativeLookAheadCandidates.source})article(?!-(${this.articleNegativeLookBehindCandidates.source}))|body|column|content|^(?!main-navigation|main-header)main|shadow|post-header|hfeed site|blog-posts hfeed|container-banners|menu-opacity|header-with-anchor-widget|commentOnSelection|highlight--with-header`, 'i')