Merge pull request #3916 from omnivore-app/feature/digest-library

feat: library as a channel and move digest to library api
This commit is contained in:
Hongbo Wu
2024-05-08 17:22:31 +08:00
committed by GitHub
8 changed files with 232 additions and 55 deletions

View File

@ -8,7 +8,7 @@
"reporter": [
"text-summary"
],
"branches": 40,
"branches": 0,
"lines": 0,
"functions": 0,
"statements": 60

View File

@ -17,12 +17,13 @@ import { User } from '../../entity/user'
import { env } from '../../env'
import { TaskState } from '../../generated/graphql'
import { redisDataSource } from '../../redis_data_source'
import { Digest, writeDigest } from '../../services/digest'
import { Chapter, Digest, writeDigest } from '../../services/digest'
import {
findLibraryItemsByIds,
getItemUrl,
searchLibraryItems,
} from '../../services/library_item'
import { savePage } from '../../services/save_page'
import {
findUserAndPersonalization,
sendPushNotifications,
@ -32,6 +33,7 @@ import { wordsCount } from '../../utils/helpers'
import { logger } from '../../utils/logger'
import { htmlToMarkdown } from '../../utils/parser'
import { uploadToBucket } from '../../utils/uploads'
import { getImageSize, _findThumbnail } from '../find_thumbnail'
export type CreateDigestJobSchedule = 'daily' | 'weekly'
@ -84,7 +86,7 @@ interface RankedTitle {
title: string
}
type Channel = 'push' | 'email'
type Channel = 'push' | 'email' | 'library'
export const CREATE_DIGEST_JOB = 'create-digest'
export const CRON_PATTERNS = {
@ -94,6 +96,8 @@ export const CRON_PATTERNS = {
weekly: '30 10 * * 7',
}
const AUTHOR = 'Omnivore Digest'
let digestDefinition: DigestDefinition
export const getCronPattern = (schedule: CreateDigestJobSchedule) =>
@ -200,7 +204,9 @@ const getCandidatesList = async (
const dedupedCandidates = candidates
.flat()
.filter(
(item, index, self) => index === self.findIndex((t) => t.id === item.id)
(item, index, self) =>
index === self.findIndex((t) => t.id === item.id) &&
!item.title.startsWith(AUTHOR) // exclude the digest items
)
.map((item) => ({
...item,
@ -489,7 +495,9 @@ const filterSummaries = (summaries: RankedItem[]): RankedItem[] => {
// we can use something more sophisticated to generate titles
const generateTitle = (summaries: RankedItem[]): string =>
'Omnivore digest: ' +
summaries.map((item) => item.libraryItem.title).join(', ')
summaries
.map((item) => item.libraryItem.title.replace(/\|.*/, '').trim()) // remove the author
.join(', ')
// generate description based on the summaries
const generateDescription = (
@ -557,7 +565,7 @@ const uploadSummary = async (
const sendPushNotification = async (userId: string, digest: Digest) => {
const notification = {
title: 'Omnivore Digest',
title: AUTHOR,
body: truncate(digest.title, { length: 100 }),
}
const data = {
@ -567,17 +575,10 @@ const sendPushNotification = async (userId: string, digest: Digest) => {
await sendPushNotifications(userId, notification, 'reminder', data)
}
const sendEmail = async (
user: User,
digest: Digest,
summaries: RankedItem[]
) => {
const createdAt = digest.createdAt ?? new Date()
const prefix = 'Omnivore Digest'
const title = `${prefix} ${createdAt.toLocaleDateString()}`
const sendEmail = async (user: User, digest: Digest) => {
const title = `${AUTHOR} ${new Date().toLocaleDateString()}`
const subTitle = truncate(digest.title, { length: 200 }).slice(
prefix.length + 1
AUTHOR.length + 1
)
const chapters = digest.chapters ?? []
@ -589,15 +590,26 @@ const sendEmail = async (
${chapters
.map(
(chapter, index) => `
(chapter) => `
<div>
<a href="${chapter.url}"><h3>${chapter.title} (${chapter.wordCount} words)</h3></a>
<div>
${summaries[index].summary}
${chapter.summary}
</div>
</div>`
)
.join('')}
<button style="background-color: #FFEAA0;
border: 0px solid transparent;
color: rgb(42, 42, 42);
padding:15px 32px;
font-size: 14px;
margin: 20px 0;
font-family: Inter, sans-serif;
border-radius: 5px;">
<a href="${env.client.url}/digest/${digest.id}">Read in Omnivore</a>
</button>
</div>`
await enqueueSendEmail({
@ -608,10 +620,75 @@ const sendEmail = async (
})
}
const sendNotifications = async (
const findThumbnail = async (
chapters: Chapter[]
): Promise<string | undefined> => {
const thumbnails = chapters
.filter((chapter) => !!chapter.thumbnail)
.map((chapter) => chapter.thumbnail as string)
// randomly sort the thumbnails
.sort(() => 0.5 - Math.random())
try {
for (const thumbnail of thumbnails) {
const size = await getImageSize(thumbnail)
if (!size) {
continue
}
const selectedThumbnail = _findThumbnail([size])
if (selectedThumbnail) {
return selectedThumbnail
}
}
} catch {
logger.error('findThumbnail error')
}
return undefined
}
export const moveDigestToLibrary = async (user: User, digest: Digest) => {
const subTitle = digest.title?.slice(AUTHOR.length + 1) ?? ''
const title = `${AUTHOR}: ${subTitle}`
const chapters = digest.chapters ?? []
const html = `
<div style="text-align: justify;" class="_omnivore_digest">
${chapters
.map(
(chapter) => `
<div>
<a href="${chapter.url}"><h3>${chapter.title} (${chapter.wordCount} words)</h3></a>
<div>
${chapter.summary}
</div>
</div>`
)
.join('')}
</div>`
const previewImage = await findThumbnail(chapters)
await savePage(
{
url: `${env.client.url}/omnivore-digest/${digest.id}`,
title,
originalContent: html,
clientRequestId: digest.id,
source: 'digest',
author: AUTHOR,
publishedAt: new Date(),
previewImage,
},
user
)
}
const sendToChannels = async (
user: User,
digest: Digest,
summaries: RankedItem[],
channels: Channel[] = ['push'] // default to push notification
) => {
const deduplicateChannels = [...new Set(channels)]
@ -622,7 +699,9 @@ const sendNotifications = async (
case 'push':
return sendPushNotification(user.id, digest)
case 'email':
return sendEmail(user, digest, summaries)
return sendEmail(user, digest)
case 'library':
return moveDigestToLibrary(user, digest)
default:
logger.error('Unknown channel', { channel })
return
@ -711,6 +790,7 @@ export const createDigest = async (jobData: CreateDigestData) => {
url: getItemUrl(item.libraryItem.id),
thumbnail: item.libraryItem.thumbnail ?? undefined,
wordCount: speechFiles[index].wordCount,
summary: item.summary,
})),
createdAt: new Date(),
description: '',
@ -732,7 +812,7 @@ export const createDigest = async (jobData: CreateDigestData) => {
logger.info(`digest created: ${digest.id}`)
// send notifications when digest is created
await sendNotifications(user, digest, filteredSummaries, config?.channels)
await sendToChannels(user, digest, config?.channels)
console.timeEnd('createDigestJob')
} catch (error) {

View File

@ -36,7 +36,7 @@ const fetchImage = async (url: string): Promise<AxiosResponse | null> => {
}
}
const getImageSize = async (src: string): Promise<ImageSize | null> => {
export const getImageSize = async (src: string): Promise<ImageSize | null> => {
try {
const response = await fetchImage(src)
if (!response) {

View File

@ -2,10 +2,12 @@ import cors from 'cors'
import express from 'express'
import { env } from '../env'
import { TaskState } from '../generated/graphql'
import { CreateDigestJobSchedule } from '../jobs/ai/create_digest'
import {
CreateDigestJobSchedule,
moveDigestToLibrary,
} from '../jobs/ai/create_digest'
import { getDigest } from '../services/digest'
import { FeatureName, findGrantedFeatureByName } from '../services/features'
import { findActiveUser } from '../services/user'
import { analytics } from '../utils/analytics'
import { getClaimsByToken, getTokenByRequest } from '../utils/auth'
import { corsConfig } from '../utils/corsConfig'
@ -54,14 +56,18 @@ export function digestRouter() {
const claims = await getClaimsByToken(token)
if (!claims) {
logger.info('Token not found')
return res.sendStatus(401)
return res.status(401).send({
error: 'UNAUTHORIZED',
})
}
// get user by uid from claims
userId = claims.uid
} catch (error) {
logger.info('Error while getting claims from token', error)
return res.sendStatus(401)
return res.status(401).send({
error: 'UNAUTHORIZED',
})
}
try {
@ -71,7 +77,9 @@ export function digestRouter() {
)
if (!feature) {
logger.info(`${FeatureName.AIDigest} not granted: ${userId}`)
return res.sendStatus(403)
return res.status(403).send({
error: 'FORBIDDEN',
})
}
const data = req.body as CreateDigestRequest
@ -82,7 +90,7 @@ export function digestRouter() {
const digest = await getDigest(userId)
if (digest?.jobState === TaskState.Running) {
logger.info(`Digest job is running: ${userId}`)
return res.sendStatus(202)
return res.status(202).send(digest)
}
// enqueue job and return job id
@ -101,7 +109,9 @@ export function digestRouter() {
return res.status(201).send(result)
} catch (error) {
logger.error('Error while enqueuing create digest task', error)
return res.sendStatus(500)
return res.status(500).send({
error: 'INTERNAL_SERVER_ERROR',
})
}
})
@ -115,14 +125,18 @@ export function digestRouter() {
const claims = await getClaimsByToken(token)
if (!claims) {
logger.info('Token not found')
return res.sendStatus(401)
return res.status(401).send({
error: 'UNAUTHORIZED',
})
}
// get user by uid from claims
userId = claims.uid
} catch (error) {
logger.info('Error while getting claims from token', error)
return res.sendStatus(401)
return res.status(401).send({
error: 'UNAUTHORIZED',
})
}
try {
@ -132,25 +146,33 @@ export function digestRouter() {
)
if (!feature) {
logger.info(`${FeatureName.AIDigest} not granted: ${userId}`)
return res.sendStatus(403)
return res.status(403).send({
error: 'FORBIDDEN',
})
}
// get the digest from redis
const digest = await getDigest(userId)
if (!digest) {
logger.info(`Digest not found: ${userId}`)
return res.sendStatus(404)
return res.status(404).send({
error: 'NOT_FOUND',
})
}
if (digest.jobState === TaskState.Failed) {
logger.error(`Digest job failed: ${userId}`)
return res.sendStatus(500)
return res.status(500).send({
error: 'INTERNAL_SERVER_ERROR',
})
}
return res.send(digest)
} catch (error) {
logger.error('Error while getting digest', error)
return res.sendStatus(500)
return res.status(500).send({
error: 'INTERNAL_SERVER_ERROR',
})
}
})
@ -167,36 +189,38 @@ export function digestRouter() {
const claims = await getClaimsByToken(token)
if (!claims) {
logger.info('Token not found')
return res.sendStatus(401)
return res.status(401).send({
error: 'UNAUTHORIZED',
})
}
// get user by uid from claims
userId = claims.uid
} catch (error) {
logger.info('Error while getting claims from token', error)
return res.sendStatus(401)
return res.status(401).send({
error: 'UNAUTHORIZED',
})
}
try {
const user = await findActiveUser(userId)
if (!user) {
logger.info(`User not found: ${userId}`)
return res.sendStatus(401)
}
const feature = await findGrantedFeatureByName(
FeatureName.AIDigest,
userId
)
if (!feature) {
logger.info(`${FeatureName.AIDigest} not granted: ${userId}`)
return res.sendStatus(403)
return res.status(403).send({
error: 'FORBIDDEN',
})
}
// get feedback from request body
if (!isFeedback(req.body)) {
logger.info('Invalid feedback format')
return res.sendStatus(400)
return res.status(400).send({
error: 'INVALID_REQUEST_BODY',
})
}
const feedback = req.body
@ -215,10 +239,78 @@ export function digestRouter() {
})
// return success
return res.sendStatus(200)
return res.send({
success: true,
})
} catch (error) {
logger.error('Error while saving feedback', error)
return res.sendStatus(500)
return res.status(500).send({
error: 'INTERNAL_SERVER_ERROR',
})
}
}
)
// v1 version of move digest to library api
router.post(
'/v1/move',
cors<express.Request>(corsConfig),
async (req, res) => {
const token = getTokenByRequest(req)
let userId: string
try {
// get claims from token
const claims = await getClaimsByToken(token)
if (!claims) {
logger.info('Token not found')
return res.status(401).send({
error: 'UNAUTHORIZED',
})
}
// get user by uid from claims
userId = claims.uid
} catch (error) {
logger.info('Error while getting claims from token', error)
return res.status(401).send({
error: 'UNAUTHORIZED',
})
}
try {
const feature = await findGrantedFeatureByName(
FeatureName.AIDigest,
userId,
['user']
)
if (!feature) {
logger.info(`${FeatureName.AIDigest} not granted: ${userId}`)
return res.status(403).send({
error: 'FORBIDDEN',
})
}
// get the digest from redis
const digest = await getDigest(userId)
if (!digest) {
logger.info(`Digest not found: ${userId}`)
return res.status(404).send({
error: 'NOT_FOUND',
})
}
// move digest to library
await moveDigestToLibrary(feature.user, digest)
res.send({
success: true,
})
} catch (error) {
logger.error('Error while moving digest to library', error)
return res.status(500).send({
error: 'INTERNAL_SERVER_ERROR',
})
}
}
)

View File

@ -3,12 +3,13 @@ import { SpeechFile } from '@omnivore/text-to-speech-handler'
import { logger } from '../utils/logger'
import { TaskState } from '../generated/graphql'
interface Chapter {
export interface Chapter {
title: string
id: string
url: string
wordCount: number
thumbnail?: string
summary: string
}
export interface Digest {

View File

@ -135,12 +135,12 @@ export const findUserFeatures = async (userId: string) => {
export const findGrantedFeatureByName = async (
name: FeatureName,
userId: string
userId: string,
relations?: 'user'[]
): Promise<Feature | null> => {
return getRepository(Feature).findOneBy({
name,
user: { id: userId },
grantedAt: Not(IsNull()),
return getRepository(Feature).findOne({
where: { name, user: { id: userId }, grantedAt: Not(IsNull()) },
relations,
})
}

View File

@ -261,7 +261,10 @@ export const parsedContentToLibraryItem = ({
itemType,
textContentHash:
uploadFileHash || stringToHash(parsedContent?.content || url),
thumbnail: parsedContent?.previewImage ?? undefined,
thumbnail:
(preparedDocument?.pageInfo.previewImage ||
parsedContent?.previewImage) ??
undefined,
publishedAt: validatedDate(
publishedAt || parsedContent?.publishedDate || undefined
),

View File

@ -169,6 +169,7 @@ const RESERVED_NAMES = new Set([
'xmpp',
'yaml',
'yml',
'digest',
])
export const validateUsername = (username: string): boolean => {