Merge pull request #2994 from omnivore-app/fix/save-page-source

fix: when source is rss-feed and domain is youtube.com, the item will be parsed in the backend
This commit is contained in:
Hongbo Wu
2023-10-23 16:00:50 +08:00
committed by GitHub
3 changed files with 31 additions and 10 deletions

View File

@ -53,11 +53,9 @@ import { getInternalLabelWithColor } from '../../repository/label'
import { libraryItemRepository } from '../../repository/library_item'
import { userRepository } from '../../repository/user'
import { createPageSaveRequest } from '../../services/create_page_save_request'
import { findHighlightsByLibraryItemId } from '../../services/highlights'
import {
addLabelsToLibraryItem,
findLabelsByIds,
findLabelsByLibraryItemId,
findOrCreateLabels,
saveLabelsInLibraryItem,
} from '../../services/labels'
@ -70,7 +68,6 @@ import {
updateLibraryItemReadingProgress,
updateLibraryItems,
} from '../../services/library_item'
import { findRecommendationsByLibraryItemId } from '../../services/recommendation'
import { parsedContentToLibraryItem } from '../../services/save_page'
import {
findUploadFileById,
@ -84,14 +81,12 @@ import {
cleanUrl,
errorHandler,
generateSlug,
isBase64Image,
isParsingTimeout,
libraryItemToArticle,
libraryItemToSearchItem,
titleForFilePath,
userDataToUser,
} from '../../utils/helpers'
import { createImageProxyUrl } from '../../utils/imageproxy'
import {
contentConverter,
getDistillerResult,

View File

@ -36,6 +36,7 @@ const FORCE_PUPPETEER_URLS = [
TWEET_URL_REGEX,
/^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w-]+\?v=|embed\/|v\/)?)([\w-]+)(\S+)?$/,
]
const ALREADY_PARSED_SOURCES = ['puppeteer-parse', 'csv-importer', 'rss-feeder']
const createSlug = (url: string, title?: Maybe<string> | undefined) => {
const { pathname } = new URL(url)
@ -52,7 +53,7 @@ const createSlug = (url: string, title?: Maybe<string> | undefined) => {
const shouldParseInBackend = (input: SavePageInput): boolean => {
return (
input.source !== 'puppeteer-parse' &&
ALREADY_PARSED_SOURCES.indexOf(input.source) === -1 &&
FORCE_PUPPETEER_URLS.some((regex) => regex.test(input.url))
)
}
@ -99,7 +100,7 @@ export const savePage = async (
await createPageSaveRequest({
userId: user.id,
url: itemToSave.originalUrl,
articleSavingRequestId: clientRequestId,
articleSavingRequestId: clientRequestId || undefined,
state: input.state || undefined,
labels: input.labels || undefined,
})

View File

@ -185,15 +185,17 @@ const savePageQuery = (
title: string,
originalContent: string,
state: ArticleSavingRequestStatus | null = null,
labels: string[] | null = null
labels: string[] | null = null,
clientRequestId = generateFakeUuid(),
source = 'puppeteer-parse'
) => {
return `
mutation {
savePage(
input: {
url: "${url}",
source: "test",
clientRequestId: "${generateFakeUuid()}",
source: "${source}",
clientRequestId: "${clientRequestId}",
title: "${title}",
originalContent: "${originalContent}"
state: ${state}
@ -605,6 +607,29 @@ describe('Article API', () => {
expect(savedItem?.labels?.map((l) => l.name)).to.eql(labels)
})
})
context('when the source is rss-feeder and url is from youtube.com', () => {
const source = 'rss-feeder'
const stub = sinon.stub(createTask, 'enqueueParseRequest')
before(() => {
url = 'https://www.youtube.com/watch?v=123'
})
after(async () => {
await deleteLibraryItemByUrl(url, user.id)
sinon.restore()
})
it('does not parse in the backend', async () => {
await graphqlRequest(
savePageQuery(url, title, originalContent, null, null, '', source),
authToken
).expect(200)
expect(stub).not.to.have.been.called
})
})
})
describe('SaveUrl', () => {