Merge pull request #3446 from omnivore-app/fix/refresh-feed

fix: add rss label and feed url to the pdf in rss feed item
This commit is contained in:
Hongbo Wu
2024-01-29 14:21:08 +08:00
committed by GitHub
9 changed files with 228 additions and 330 deletions

View File

@ -2283,8 +2283,11 @@ export type SaveFileInput = {
clientRequestId: Scalars['ID'];
folder?: InputMaybe<Scalars['String']>;
labels?: InputMaybe<Array<CreateLabelInput>>;
publishedAt?: InputMaybe<Scalars['Date']>;
savedAt?: InputMaybe<Scalars['Date']>;
source: Scalars['String'];
state?: InputMaybe<ArticleSavingRequestStatus>;
subscription?: InputMaybe<Scalars['String']>;
uploadFileId: Scalars['ID'];
url: Scalars['String'];
};

View File

@ -1718,8 +1718,11 @@ input SaveFileInput {
clientRequestId: ID!
folder: String
labels: [CreateLabelInput!]
publishedAt: Date
savedAt: Date
source: String!
state: ArticleSavingRequestStatus
subscription: String
uploadFileId: ID!
url: String!
}

View File

@ -8,14 +8,15 @@ import {
} from '../generated/graphql'
import { redisDataSource } from '../redis_data_source'
import { userRepository } from '../repository/user'
import { saveFile } from '../services/save_file'
import { savePage } from '../services/save_page'
import { uploadFile } from '../services/upload_file'
import { logger } from '../utils/logger'
const signToken = promisify(jwt.sign)
const IMPORTER_METRICS_COLLECTOR_URL = env.queue.importerMetricsUrl
const JWT_SECRET = env.server.jwtSecret
const REST_BACKEND_ENDPOINT = `${env.server.internalApiUrl}/api`
const MAX_ATTEMPTS = 2
const REQUEST_TIMEOUT = 30000 // 30 seconds
@ -34,29 +35,6 @@ interface Data {
taskId?: string
}
interface UploadFileResponse {
data: {
uploadFileRequest: {
id: string
uploadSignedUrl: string
uploadFileId: string
createdPageId: string
errorCodes?: string[]
}
}
}
interface CreateArticleResponse {
data: {
createArticle: {
createdArticle: {
id: string
}
errorCodes: string[]
}
}
}
interface FetchResult {
finalUrl: string
title?: string
@ -73,6 +51,12 @@ const uploadToSignedUrl = async (
contentType: string,
contentObjUrl: string
) => {
logger.info('uploading to signed url', {
uploadSignedUrl,
contentType,
contentObjUrl,
})
try {
const stream = await axios.get(contentObjUrl, {
responseType: 'stream',
@ -92,137 +76,33 @@ const uploadToSignedUrl = async (
}
}
const getUploadIdAndSignedUrl = async (
userId: string,
url: string,
articleSavingRequestId: string
) => {
const auth = await signToken({ uid: userId }, JWT_SECRET)
const data = JSON.stringify({
query: `mutation UploadFileRequest($input: UploadFileRequestInput!) {
uploadFileRequest(input:$input) {
... on UploadFileRequestError {
errorCodes
}
... on UploadFileRequestSuccess {
id
uploadSignedUrl
}
}
}`,
variables: {
input: {
url: encodeURI(url),
contentType: 'application/pdf',
clientRequestId: articleSavingRequestId,
},
},
})
try {
const response = await axios.post<UploadFileResponse>(
`${REST_BACKEND_ENDPOINT}/graphql`,
data,
{
headers: {
Cookie: `auth=${auth as string};`,
'Content-Type': 'application/json',
},
timeout: REQUEST_TIMEOUT,
}
)
if (
response.data.data.uploadFileRequest.errorCodes &&
response.data.data.uploadFileRequest.errorCodes?.length > 0
) {
console.error(
'Error while getting upload id and signed url',
response.data.data.uploadFileRequest.errorCodes[0]
)
return null
}
return response.data.data.uploadFileRequest
} catch (e) {
console.error('error getting upload id and signed url', e)
return null
}
}
const uploadPdf = async (
url: string,
userId: string,
articleSavingRequestId: string
) => {
const uploadResult = await getUploadIdAndSignedUrl(
userId,
url,
articleSavingRequestId
const result = await uploadFile(
{
url,
contentType: 'application/pdf',
clientRequestId: articleSavingRequestId,
createPageEntry: true,
},
userId
)
if (!uploadResult) {
if (!result.uploadSignedUrl) {
throw new Error('error while getting upload id and signed url')
}
const uploaded = await uploadToSignedUrl(
uploadResult.uploadSignedUrl,
result.uploadSignedUrl,
'application/pdf',
url
)
if (!uploaded) {
throw new Error('error while uploading pdf')
}
return uploadResult.id
}
const sendCreateArticleMutation = async (userId: string, input: unknown) => {
const data = JSON.stringify({
query: `mutation CreateArticle ($input: CreateArticleInput!){
createArticle(input:$input){
... on CreateArticleSuccess{
createdArticle{
id
}
}
... on CreateArticleError{
errorCodes
}
}
}`,
variables: {
input,
},
})
const auth = await signToken({ uid: userId }, JWT_SECRET)
try {
const response = await axios.post<CreateArticleResponse>(
`${REST_BACKEND_ENDPOINT}/graphql`,
data,
{
headers: {
Cookie: `auth=${auth as string};`,
'Content-Type': 'application/json',
},
timeout: REQUEST_TIMEOUT,
}
)
if (
response.data.data.createArticle.errorCodes &&
response.data.data.createArticle.errorCodes.length > 0
) {
console.error(
'error while creating article',
response.data.data.createArticle.errorCodes[0]
)
return null
}
return response.data.data.createArticle
} catch (error) {
console.error('error creating article', error)
return null
}
return result.id
}
const sendImportStatusUpdate = async (
@ -231,6 +111,7 @@ const sendImportStatusUpdate = async (
isImported?: boolean
) => {
try {
logger.info('sending import status update')
const auth = await signToken({ uid: userId }, JWT_SECRET)
await axios.post(
@ -298,25 +179,37 @@ export const savePageJob = async (data: Data, attemptsMade: number) => {
const { title, contentType } = fetchedResult
let content = fetchedResult.content
const user = await userRepository.findById(userId)
if (!user) {
logger.error('Unable to save job, user can not be found.', {
userId,
url,
})
// if the user is not found, we do not retry
return false
}
// for pdf content, we need to upload the pdf
if (contentType === 'application/pdf') {
const encodedUrl = encodeURI(url)
const uploadFileId = await uploadPdf(url, userId, articleSavingRequestId)
const uploadedPdf = await sendCreateArticleMutation(userId, {
url: encodedUrl,
articleSavingRequestId,
uploadFileId,
state,
labels,
source,
folder,
rssFeedUrl,
savedAt,
publishedAt,
})
if (!uploadedPdf) {
throw new Error('error while saving uploaded pdf')
const result = await saveFile(
{
url,
uploadFileId,
state: state ? (state as ArticleSavingRequestStatus) : undefined,
labels,
source,
folder,
subscription: rssFeedUrl,
savedAt,
publishedAt,
clientRequestId: articleSavingRequestId,
},
user
)
if (result.__typename == 'SaveError') {
throw new Error(result.message || result.errorCodes[0])
}
isSaved = true
@ -331,16 +224,6 @@ export const savePageJob = async (data: Data, attemptsMade: number) => {
state = ArticleSavingRequestStatus.Failed
}
const user = await userRepository.findById(userId)
if (!user) {
logger.error('Unable to save job, user can not be found.', {
userId,
url,
})
// if the user is not found, we do not retry
return false
}
// for non-pdf content, we need to save the page
const result = await savePage(
{

View File

@ -87,6 +87,7 @@ import {
import { parsedContentToLibraryItem } from '../../services/save_page'
import {
findUploadFileById,
itemTypeForContentType,
setFileUploadComplete,
} from '../../services/upload_file'
import { traceAs } from '../../tracing'
@ -111,7 +112,6 @@ import {
parsePreparedContent,
} from '../../utils/parser'
import { getStorageFileDetails } from '../../utils/uploads'
import { itemTypeForContentType } from '../upload_files'
export enum ArticleFormat {
Markdown = 'markdown',

View File

@ -1,55 +1,17 @@
/* eslint-disable @typescript-eslint/no-unused-vars */
import normalizeUrl from 'normalize-url'
import path from 'path'
import { LibraryItemState } from '../../entity/library_item'
import { UploadFile } from '../../entity/upload_file'
import { env } from '../../env'
import {
MutationUploadFileRequestArgs,
PageType,
UploadFileRequestError,
UploadFileRequestErrorCode,
UploadFileRequestSuccess,
UploadFileStatus,
} from '../../generated/graphql'
import { validateUrl } from '../../services/create_page_save_request'
import {
createLibraryItem,
findLibraryItemByUrl,
updateLibraryItem,
} from '../../services/library_item'
import { uploadFile } from '../../services/upload_file'
import { analytics } from '../../utils/analytics'
import { generateSlug } from '../../utils/helpers'
import { authorized } from '../../utils/gql-utils'
import {
contentReaderForLibraryItem,
generateUploadFilePathName,
generateUploadSignedUrl,
} from '../../utils/uploads'
const isFileUrl = (url: string): boolean => {
const parsedUrl = new URL(url)
return parsedUrl.protocol == 'file:'
}
export const itemTypeForContentType = (contentType: string) => {
if (contentType == 'application/epub+zip') {
return PageType.Book
}
return PageType.File
}
export const uploadFileRequestResolver = authorized<
UploadFileRequestSuccess,
UploadFileRequestError,
MutationUploadFileRequestArgs
>(async (_, { input }, ctx) => {
const { authTrx, uid, log } = ctx
let uploadFileData: { id: string | null } = {
id: null,
}
>(async (_, { input }, { uid }) => {
analytics.track({
userId: uid,
event: 'file_upload_request',
@ -59,112 +21,5 @@ export const uploadFileRequestResolver = authorized<
},
})
let title: string
let fileName: string
try {
const url = normalizeUrl(new URL(input.url).href, {
stripHash: true,
stripWWW: false,
})
title = decodeURI(path.basename(new URL(url).pathname, '.pdf'))
fileName = decodeURI(path.basename(new URL(url).pathname)).replace(
/[^a-zA-Z0-9-_.]/g,
''
)
if (!fileName) {
fileName = 'content.pdf'
}
if (!isFileUrl(url)) {
try {
validateUrl(url)
} catch (error) {
log.info('illegal file input url', error)
return {
errorCodes: [UploadFileRequestErrorCode.BadInput],
}
}
}
} catch {
return { errorCodes: [UploadFileRequestErrorCode.BadInput] }
}
uploadFileData = await authTrx((t) =>
t.getRepository(UploadFile).save({
url: input.url,
user: { id: uid },
fileName,
status: UploadFileStatus.Initialized,
contentType: input.contentType,
})
)
if (uploadFileData.id) {
const uploadFileId = uploadFileData.id
const uploadFilePathName = generateUploadFilePathName(
uploadFileId,
fileName
)
const uploadSignedUrl = await generateUploadSignedUrl(
uploadFilePathName,
input.contentType
)
// If this is a file URL, we swap in a special URL
const attachmentUrl = `https://omnivore.app/attachments/${uploadFilePathName}`
if (isFileUrl(input.url)) {
await authTrx(async (tx) => {
await tx.getRepository(UploadFile).update(uploadFileId, {
url: attachmentUrl,
status: UploadFileStatus.Initialized,
})
})
}
let createdItemId: string | undefined = undefined
if (input.createPageEntry) {
// If we have a file:// URL, don't try to match it
// and create a copy of the item, just create a
// new item.
const item = await findLibraryItemByUrl(input.url, uid)
if (item) {
await updateLibraryItem(
item.id,
{
state: LibraryItemState.Processing,
},
uid
)
createdItemId = item.id
} else {
const itemType = itemTypeForContentType(input.contentType)
const uploadFileId = uploadFileData.id
const item = await createLibraryItem(
{
id: input.clientRequestId || undefined,
originalUrl: isFileUrl(input.url) ? attachmentUrl : input.url,
user: { id: uid },
title,
readableContent: '',
itemType,
uploadFile: { id: uploadFileData.id },
slug: generateSlug(uploadFilePathName),
state: LibraryItemState.Processing,
contentReader: contentReaderForLibraryItem(itemType, uploadFileId),
},
uid
)
createdItemId = item.id
}
}
return {
id: uploadFileData.id,
uploadSignedUrl,
createdPageId: createdItemId,
}
} else {
return { errorCodes: [UploadFileRequestErrorCode.FailedCreate] }
}
return uploadFile(input, uid)
})

View File

@ -545,6 +545,9 @@ const schema = gql`
state: ArticleSavingRequestStatus
labels: [CreateLabelInput!]
folder: String
savedAt: Date
publishedAt: Date
subscription: String
}
input ParseResult {

View File

@ -14,6 +14,7 @@ export const saveFile = async (
const uploadFile = await findUploadFileById(input.uploadFileId)
if (!uploadFile) {
return {
__typename: 'SaveError',
errorCodes: [SaveErrorCode.Unauthorized],
}
}
@ -24,26 +25,30 @@ export const saveFile = async (
if (!uploadFileData) {
return {
__typename: 'SaveError',
errorCodes: [SaveErrorCode.Unknown],
}
}
if (input.state || input.folder) {
await updateLibraryItem(
input.clientRequestId,
{
state: (input.state as unknown as LibraryItemState) || undefined,
folder: input.folder || undefined,
},
user.id
)
}
await updateLibraryItem(
input.clientRequestId,
{
state:
(input.state as unknown as LibraryItemState) ||
LibraryItemState.Succeeded,
folder: input.folder || undefined,
savedAt: input.savedAt ? new Date(input.savedAt) : undefined,
publishedAt: input.publishedAt ? new Date(input.publishedAt) : undefined,
},
user.id
)
// add labels to item
await createAndSaveLabelsInLibraryItem(
input.clientRequestId,
user.id,
input.labels
input.labels,
input.subscription
)
return {

View File

@ -1,5 +1,35 @@
import normalizeUrl from 'normalize-url'
import path from 'path'
import { LibraryItemState } from '../entity/library_item'
import { UploadFile } from '../entity/upload_file'
import {
PageType,
UploadFileRequestErrorCode,
UploadFileRequestInput,
UploadFileStatus,
} from '../generated/graphql'
import { authTrx, getRepository } from '../repository'
import { generateSlug } from '../utils/helpers'
import { logger } from '../utils/logger'
import {
contentReaderForLibraryItem,
generateUploadFilePathName,
generateUploadSignedUrl,
} from '../utils/uploads'
import { validateUrl } from './create_page_save_request'
import { createLibraryItem } from './library_item'
const isFileUrl = (url: string): boolean => {
const parsedUrl = new URL(url)
return parsedUrl.protocol == 'file:'
}
export const itemTypeForContentType = (contentType: string) => {
if (contentType == 'application/epub+zip') {
return PageType.Book
}
return PageType.File
}
export const findUploadFileById = async (id: string) => {
return getRepository(UploadFile).findOne({
@ -22,3 +52,100 @@ export const setFileUploadComplete = async (id: string, userId?: string) => {
userId
)
}
export const uploadFile = async (
input: UploadFileRequestInput,
uid: string
) => {
let title: string
let fileName: string
try {
const url = normalizeUrl(new URL(input.url).href, {
stripHash: true,
stripWWW: false,
})
title = decodeURI(path.basename(new URL(url).pathname, '.pdf'))
fileName = decodeURI(path.basename(new URL(url).pathname)).replace(
/[^a-zA-Z0-9-_.]/g,
''
)
if (!fileName) {
fileName = 'content.pdf'
}
if (!isFileUrl(url)) {
try {
validateUrl(url)
} catch (error) {
logger.info('illegal file input url', error)
return {
errorCodes: [UploadFileRequestErrorCode.BadInput],
}
}
}
} catch {
return {
errorCodes: [UploadFileRequestErrorCode.BadInput],
}
}
const uploadFileData = await authTrx((t) =>
t.getRepository(UploadFile).save({
url: input.url,
user: { id: uid },
fileName,
status: UploadFileStatus.Initialized,
contentType: input.contentType,
})
)
const uploadFileId = uploadFileData.id
const uploadFilePathName = generateUploadFilePathName(uploadFileId, fileName)
const uploadSignedUrl = await generateUploadSignedUrl(
uploadFilePathName,
input.contentType
)
// If this is a file URL, we swap in a special URL
const attachmentUrl = `https://omnivore.app/attachments/${uploadFilePathName}`
if (isFileUrl(input.url)) {
await authTrx(async (tx) => {
await tx.getRepository(UploadFile).update(uploadFileId, {
url: attachmentUrl,
status: UploadFileStatus.Initialized,
})
})
}
const itemType = itemTypeForContentType(input.contentType)
if (input.createPageEntry) {
// If we have a file:// URL, don't try to match it
// and create a copy of the item, just create a
// new item.
const item = await createLibraryItem(
{
id: input.clientRequestId || undefined,
originalUrl: isFileUrl(input.url) ? attachmentUrl : input.url,
user: { id: uid },
title,
readableContent: '',
itemType,
uploadFile: { id: uploadFileData.id },
slug: generateSlug(uploadFilePathName),
state: LibraryItemState.Processing,
contentReader: contentReaderForLibraryItem(itemType, uploadFileId),
},
uid
)
return {
id: uploadFileId,
uploadSignedUrl,
createdPageId: item.id,
}
}
return {
id: uploadFileId,
uploadSignedUrl,
}
}

View File

@ -218,14 +218,18 @@ const savePageQuery = (
`
}
const saveFileQuery = (url: string, uploadFileId: string) => {
const saveFileQuery = (
clientRequestId: string,
url: string,
uploadFileId: string
) => {
return `
mutation {
saveFile (
input: {
url: "${url}",
source: "test",
clientRequestId: "${generateFakeUuid()}",
clientRequestId: "${clientRequestId}",
uploadFileId: "${uploadFileId}",
}
) {
@ -832,8 +836,23 @@ describe('Article API', () => {
let query = ''
let url = ''
let uploadFileId = ''
let itemId = ''
before(async () => {
const item = await createLibraryItem(
{
user: { id: user.id },
originalUrl: 'https://blog.omnivore.app/setBookmarkArticle',
slug: 'test-with-omnivore',
readableContent: '<p>test</p>',
title: 'test title',
readingProgressBottomPercent: 100,
readingProgressTopPercent: 80,
},
user.id
)
itemId = item.id
before(() => {
sinon.replace(
uploads,
'getStorageFileDetails',
@ -842,7 +861,7 @@ describe('Article API', () => {
})
beforeEach(() => {
query = saveFileQuery(url, uploadFileId)
query = saveFileQuery(itemId, url, uploadFileId)
})
after(() => {