From 0a2f9dba25b670eabfefc8bc5471423a6251c673 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 29 Apr 2024 15:34:56 +0800 Subject: [PATCH] filter out summary if longer than 1000 words --- packages/api/src/jobs/ai/create_digest.ts | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/packages/api/src/jobs/ai/create_digest.ts b/packages/api/src/jobs/ai/create_digest.ts index edcf2b389..d21e5248c 100644 --- a/packages/api/src/jobs/ai/create_digest.ts +++ b/packages/api/src/jobs/ai/create_digest.ts @@ -18,6 +18,7 @@ import { searchLibraryItems, } from '../../services/library_item' import { findDeviceTokensByUserId } from '../../services/user_device_tokens' +import { wordsCount } from '../../utils/helpers' import { logger } from '../../utils/logger' import { htmlToMarkdown } from '../../utils/parser' import { sendMulticastPushNotifications } from '../../utils/sendNotification' @@ -171,9 +172,10 @@ const getCandidatesList = async ( await searchLibraryItems( { includeContent: true, - query: existingCandidateIds - ? `(${selector.query}) -includes:${existingCandidateIds}` // exclude the existing candidates - : selector.query, + // query: existingCandidateIds + // ? `(${selector.query}) -includes:${existingCandidateIds}` // exclude the existing candidates + // : selector.query, + query: selector.query, size: selector.count, }, userId @@ -192,6 +194,8 @@ const getCandidatesList = async ( readableContent: htmlToMarkdown(item.readableContent), })) // convert the html content to markdown + logger.info('dedupedCandidates: ', dedupedCandidates) + console.timeEnd('getCandidatesList') if (dedupedCandidates.length === 0) { @@ -209,6 +213,8 @@ const getCandidatesList = async ( const selectedCandidates = randomSelectCandidates(dedupedCandidates) + logger.info('selectedCandidates: ', selectedCandidates) + // store the ids in cache const candidateIds = selectedCandidates.map((item) => item.id).join(',') await redisDataSource.redisClient?.set(key, candidateIds) @@ -420,7 +426,8 @@ const generateSpeechFiles = ( const filterSummaries = (summaries: RankedItem[]): RankedItem[] => { return summaries.filter( (item) => - item.summary.length > 200 || + wordsCount(item.summary) > 100 && + wordsCount(item.summary) < 1000 && item.summary.length < item.libraryItem.readableContent.length ) } @@ -480,6 +487,7 @@ export const createDigest = async (jobData: CreateDigestData) => { summary: '', })) const summaries = await summarizeItems(selections) + logger.info('summaries: ', summaries) const filteredSummaries = filterSummaries(summaries)