feat: generate preview content if excerpt less than 180 chars

This commit is contained in:
Hongbo Wu
2024-06-06 19:08:41 +08:00
parent 8f5bbdaeb0
commit 76383d8a33
8 changed files with 132 additions and 2 deletions

View File

@ -219,4 +219,7 @@ export class LibraryItem {
@Column('float')
score?: number
@Column('text')
previewContent?: string
}

View File

@ -0,0 +1,59 @@
import { parseHTML } from 'linkedom'
import {
findLibraryItemById,
updateLibraryItem,
} from '../services/library_item'
import { findActiveUser } from '../services/user'
import { logger } from '../utils/logger'
export const GENERATE_PREVIEW_CONTENT_JOB = 'generate-preview'
interface GeneratePreviewContentData {
libraryItemId: string
userId: string
}
export const generatePreviewContent = async (
job: GeneratePreviewContentData
) => {
const { libraryItemId, userId } = job
const user = await findActiveUser(userId)
if (!user) {
logger.error(`User not found: ${userId}`)
return
}
const libraryItem = await findLibraryItemById(libraryItemId, userId)
if (!libraryItem) {
logger.error(`Library item not found: ${libraryItemId}`)
return
}
const content = libraryItem.readableContent
if (!content) {
logger.error(`Library item has no content: ${libraryItemId}`)
return
}
// Generate preview content
logger.info(`Generating preview for library item: ${libraryItemId}`)
// the preview content should be within 600 characters
const document = parseHTML(content).document
const previewContent = document.documentElement.textContent?.slice(0, 600)
if (!previewContent) {
logger.error(
`Failed to generate preview for library item: ${libraryItemId}`
)
return
}
await updateLibraryItem(
libraryItemId,
{
previewContent,
},
userId,
undefined,
true
)
}

View File

@ -31,6 +31,10 @@ import {
} from './jobs/email/inbound_emails'
import { sendEmailJob, SEND_EMAIL_JOB } from './jobs/email/send_email'
import { findThumbnail, THUMBNAIL_JOB } from './jobs/find_thumbnail'
import {
generatePreviewContent,
GENERATE_PREVIEW_CONTENT_JOB,
} from './jobs/generate_preview_content'
import {
exportAllItems,
EXPORT_ALL_ITEMS_JOB_NAME,
@ -194,6 +198,8 @@ export const createWorker = (connection: ConnectionOptions) =>
return updateHome(job.data)
case SCORE_LIBRARY_ITEM_JOB:
return scoreLibraryItem(job.data)
case GENERATE_PREVIEW_CONTENT_JOB:
return generatePreviewContent(job.data)
default:
logger.warning(`[queue-processor] unhandled job: ${job.name}`)
}

View File

@ -686,7 +686,8 @@ export const functionResolvers = {
canComment: false,
canShare: true,
dir: libraryItem.directionality,
previewContent: libraryItem.description,
previewContent:
libraryItem.previewContent || libraryItem.description,
subscription: libraryItem.subscription,
siteName: libraryItem.siteName,
siteIcon: libraryItem.siteIcon,

View File

@ -16,7 +16,10 @@ import {
SaveResult,
} from '../generated/graphql'
import { Merge } from '../util'
import { enqueueThumbnailJob } from '../utils/createTask'
import {
enqueueGeneratePreviewContentJob,
enqueueThumbnailJob,
} from '../utils/createTask'
import {
cleanUrl,
generateSlug,
@ -174,6 +177,16 @@ export const savePage = async (
}
}
const excerpt = parseResult.parsedContent?.excerpt
// generate preview content if excerpt is less than 180 characters
if (!excerpt || excerpt.length < 180) {
try {
await enqueueGeneratePreviewContentJob(clientRequestId, user.id)
} catch (e) {
logger.error('Failed to enqueue generate preview job', e)
}
}
if (parseResult.highlightData) {
const highlight: DeepPartial<Highlight> = {
...parseResult.highlightData,
@ -255,6 +268,7 @@ export const parsedContentToLibraryItem = ({
originalContent: originalHtml,
readableContent: parsedContent?.content || '',
description: parsedContent?.excerpt,
previewContent: parsedContent?.excerpt,
title:
title ||
parsedContent?.title ||

View File

@ -29,6 +29,7 @@ import { BulkActionData, BULK_ACTION_JOB_NAME } from '../jobs/bulk_action'
import { CallWebhookJobData, CALL_WEBHOOK_JOB_NAME } from '../jobs/call_webhook'
import { SendEmailJobData, SEND_EMAIL_JOB } from '../jobs/email/send_email'
import { THUMBNAIL_JOB } from '../jobs/find_thumbnail'
import { GENERATE_PREVIEW_CONTENT_JOB } from '../jobs/generate_preview_content'
import { EXPORT_ALL_ITEMS_JOB_NAME } from '../jobs/integration/export_all_items'
import {
ExportItemJobData,
@ -110,6 +111,7 @@ export const getJobPriority = (jobName: string): number => {
case EXPORT_ALL_ITEMS_JOB_NAME:
case REFRESH_ALL_FEEDS_JOB_NAME:
case THUMBNAIL_JOB:
case GENERATE_PREVIEW_CONTENT_JOB:
return 100
default:
@ -1024,4 +1026,29 @@ export const enqueueScoreJob = async (data: ScoreLibraryItemJobData) => {
})
}
export const enqueueGeneratePreviewContentJob = async (
libraryItemId: string,
userId: string
) => {
const queue = await getBackendQueue()
if (!queue) {
return undefined
}
return queue.add(
GENERATE_PREVIEW_CONTENT_JOB,
{
libraryItemId,
userId,
},
{
jobId: `${GENERATE_PREVIEW_CONTENT_JOB}_${libraryItemId}_${JOB_VERSION}`,
removeOnComplete: true,
removeOnFail: true,
priority: getJobPriority(GENERATE_PREVIEW_CONTENT_JOB),
attempts: 3,
}
)
}
export default createHttpTaskWithToken

View File

@ -0,0 +1,10 @@
-- Type: DO
-- Name: add_preview_content_to_library_item
-- Description: Add preview_content column to library_item table
BEGIN;
ALTER TABLE omnivore.library_item
ADD COLUMN preview_content TEXT;
COMMIT;

View File

@ -0,0 +1,10 @@
-- Type: UNDO
-- Name: add_preview_content_to_library_item
-- Description: Add preview_content column to library_item table
BEGIN;
ALTER TABLE omnivore.library_item
DROP COLUMN preview_content;
COMMIT;