From 9259913344ecc46f25e55da796a9d8eef4a43eed Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 8 May 2024 14:32:16 +0800 Subject: [PATCH] allow saving digest in the library as a channel --- packages/api/src/jobs/ai/create_digest.ts | 83 +++++++++++++++++++---- packages/api/src/jobs/find_thumbnail.ts | 2 +- packages/api/src/resolvers/types.ts | 1 + packages/api/src/services/digest.ts | 2 +- packages/api/src/services/save_page.ts | 5 +- 5 files changed, 78 insertions(+), 15 deletions(-) diff --git a/packages/api/src/jobs/ai/create_digest.ts b/packages/api/src/jobs/ai/create_digest.ts index 74b6e8d5e..7c10d2216 100644 --- a/packages/api/src/jobs/ai/create_digest.ts +++ b/packages/api/src/jobs/ai/create_digest.ts @@ -17,12 +17,13 @@ import { User } from '../../entity/user' import { env } from '../../env' import { TaskState } from '../../generated/graphql' import { redisDataSource } from '../../redis_data_source' -import { Digest, writeDigest } from '../../services/digest' +import { Chapter, Digest, writeDigest } from '../../services/digest' import { findLibraryItemsByIds, getItemUrl, searchLibraryItems, } from '../../services/library_item' +import { savePage } from '../../services/save_page' import { findUserAndPersonalization, sendPushNotifications, @@ -32,6 +33,7 @@ import { wordsCount } from '../../utils/helpers' import { logger } from '../../utils/logger' import { htmlToMarkdown } from '../../utils/parser' import { uploadToBucket } from '../../utils/uploads' +import { getImageSize, _findThumbnail } from '../find_thumbnail' export type CreateDigestJobSchedule = 'daily' | 'weekly' @@ -84,7 +86,7 @@ interface RankedTitle { title: string } -type Channel = 'push' | 'email' +type Channel = 'push' | 'email' | 'library' export const CREATE_DIGEST_JOB = 'create-digest' export const CRON_PATTERNS = { @@ -94,6 +96,8 @@ export const CRON_PATTERNS = { weekly: '30 10 * * 7', } +const AUTHOR = 'Omnivore Digest' + let digestDefinition: DigestDefinition export const getCronPattern = (schedule: CreateDigestJobSchedule) => @@ -200,7 +204,9 @@ const getCandidatesList = async ( const dedupedCandidates = candidates .flat() .filter( - (item, index, self) => index === self.findIndex((t) => t.id === item.id) + (item, index, self) => + index === self.findIndex((t) => t.id === item.id) && + !item.title.startsWith(AUTHOR) // exclude the digest items ) .map((item) => ({ ...item, @@ -489,7 +495,9 @@ const filterSummaries = (summaries: RankedItem[]): RankedItem[] => { // we can use something more sophisticated to generate titles const generateTitle = (summaries: RankedItem[]): string => 'Omnivore digest: ' + - summaries.map((item) => item.libraryItem.title).join(', ') + summaries + .map((item) => item.libraryItem.title.replace(/\|.*/, '').trim()) // remove the author + .join(', ') // generate description based on the summaries const generateDescription = ( @@ -557,7 +565,7 @@ const uploadSummary = async ( const sendPushNotification = async (userId: string, digest: Digest) => { const notification = { - title: 'Omnivore Digest', + title: AUTHOR, body: truncate(digest.title, { length: 100 }), } const data = { @@ -572,12 +580,9 @@ const sendEmail = async ( digest: Digest, summaries: RankedItem[] ) => { - const createdAt = digest.createdAt ?? new Date() - - const prefix = 'Omnivore Digest' - const title = `${prefix} ${createdAt.toLocaleDateString()}` + const title = `${AUTHOR} ${new Date().toLocaleDateString()}` const subTitle = truncate(digest.title, { length: 200 }).slice( - prefix.length + 1 + AUTHOR.length + 1 ) const chapters = digest.chapters ?? [] @@ -608,7 +613,59 @@ const sendEmail = async ( }) } -const sendNotifications = async ( +const findThumbnail = async (chapters: Chapter[]) => { + const images = await Promise.all( + chapters + .filter((chapter) => chapter.thumbnail) + .map((chapter) => getImageSize(chapter.thumbnail as string)) + ) + + return _findThumbnail(images) +} + +const saveInLibrary = async ( + user: User, + digest: Digest, + summaries: RankedItem[] +) => { + const subTitle = digest.title?.slice(AUTHOR.length + 1) ?? '' + const title = `${AUTHOR}: ${subTitle}` + + const chapters = digest.chapters ?? [] + + const html = ` +
+ ${chapters + .map( + (chapter, index) => ` +
+

${chapter.title} (${chapter.wordCount} words)

+
+ ${summaries[index].summary} +
+
` + ) + .join('')} +
` + + const previewImage = await findThumbnail(chapters) + + await savePage( + { + url: `${env.client.url}/digest/${digest.id}`, + title, + originalContent: html, + clientRequestId: digest.id, + source: 'digest', + author: AUTHOR, + publishedAt: new Date(), + previewImage, + }, + user + ) +} + +const sendToChannels = async ( user: User, digest: Digest, summaries: RankedItem[], @@ -623,6 +680,8 @@ const sendNotifications = async ( return sendPushNotification(user.id, digest) case 'email': return sendEmail(user, digest, summaries) + case 'library': + return saveInLibrary(user, digest, summaries) default: logger.error('Unknown channel', { channel }) return @@ -732,7 +791,7 @@ export const createDigest = async (jobData: CreateDigestData) => { logger.info(`digest created: ${digest.id}`) // send notifications when digest is created - await sendNotifications(user, digest, filteredSummaries, config?.channels) + await sendToChannels(user, digest, filteredSummaries, config?.channels) console.timeEnd('createDigestJob') } catch (error) { diff --git a/packages/api/src/jobs/find_thumbnail.ts b/packages/api/src/jobs/find_thumbnail.ts index 63bae9b42..b7d65dd59 100644 --- a/packages/api/src/jobs/find_thumbnail.ts +++ b/packages/api/src/jobs/find_thumbnail.ts @@ -36,7 +36,7 @@ const fetchImage = async (url: string): Promise => { } } -const getImageSize = async (src: string): Promise => { +export const getImageSize = async (src: string): Promise => { try { const response = await fetchImage(src) if (!response) { diff --git a/packages/api/src/resolvers/types.ts b/packages/api/src/resolvers/types.ts index f880a0145..2fc556495 100644 --- a/packages/api/src/resolvers/types.ts +++ b/packages/api/src/resolvers/types.ts @@ -45,6 +45,7 @@ export interface RequestContext { tracingSpan: Span dataSources: { readingProgress: ReadingProgressDataSource + originalContent: ReadingProgressDataSource } dataLoaders: { labels: DataLoader diff --git a/packages/api/src/services/digest.ts b/packages/api/src/services/digest.ts index b7a44c740..b54292991 100644 --- a/packages/api/src/services/digest.ts +++ b/packages/api/src/services/digest.ts @@ -3,7 +3,7 @@ import { SpeechFile } from '@omnivore/text-to-speech-handler' import { logger } from '../utils/logger' import { TaskState } from '../generated/graphql' -interface Chapter { +export interface Chapter { title: string id: string url: string diff --git a/packages/api/src/services/save_page.ts b/packages/api/src/services/save_page.ts index 702924b91..c5ba13989 100644 --- a/packages/api/src/services/save_page.ts +++ b/packages/api/src/services/save_page.ts @@ -261,7 +261,10 @@ export const parsedContentToLibraryItem = ({ itemType, textContentHash: uploadFileHash || stringToHash(parsedContent?.content || url), - thumbnail: parsedContent?.previewImage ?? undefined, + thumbnail: + (preparedDocument?.pageInfo.previewImage || + parsedContent?.previewImage) ?? + undefined, publishedAt: validatedDate( publishedAt || parsedContent?.publishedDate || undefined ),