From 3954fa309b1d77b74df916474dc5d83565c170c7 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 29 Apr 2024 14:24:37 +0800 Subject: [PATCH] randomly select at most 25 candidates and time each step --- packages/api/src/jobs/ai/create_digest.ts | 24 +++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/packages/api/src/jobs/ai/create_digest.ts b/packages/api/src/jobs/ai/create_digest.ts index 2a6e155d4..8d7a4fd71 100644 --- a/packages/api/src/jobs/ai/create_digest.ts +++ b/packages/api/src/jobs/ai/create_digest.ts @@ -136,11 +136,17 @@ const getPreferencesList = async (userId: string): Promise => { return dedupedPreferences } +const randomSelectCandidates = (candidates: LibraryItem[]): LibraryItem[] => { + // randomly choose at most 25 candidates + return candidates.sort(() => 0.5 - Math.random()).slice(0, 25) +} + // Makes multiple DB queries and combines the results const getCandidatesList = async ( userId: string, selectedLibraryItemIds?: string[] ): Promise => { + console.time('getCandidatesList') // use the queries from the digest definitions to lookup preferences // There should be a list of multiple queries we use. For now we can // hardcode these queries: @@ -199,11 +205,15 @@ const getCandidatesList = async ( return [] } + const selectedCandidates = randomSelectCandidates(dedupedCandidates) + // store the ids in cache - const candidateIds = dedupedCandidates.map((item) => item.id).join(',') + const candidateIds = selectedCandidates.map((item) => item.id).join(',') await redisDataSource.redisClient?.set(key, candidateIds) - return dedupedCandidates + console.timeEnd('getCandidatesList') + + return selectedCandidates } // Takes a list of library items, and uses a prompt to generate @@ -345,6 +355,7 @@ const chooseRankedSelections = (rankedCandidates: RankedItem[]) => { const summarizeItems = async ( rankedCandidates: RankedItem[] ): Promise => { + console.time('summarizeItems') const llm = new OpenAI({ modelName: 'gpt-4-0125-preview', configuration: { @@ -370,6 +381,8 @@ const summarizeItems = async ( (summary, index) => (rankedCandidates[index].summary = summary) ) + console.timeEnd('summarizeItems') + return rankedCandidates } @@ -378,6 +391,7 @@ const generateSpeechFiles = ( rankedItems: RankedItem[], options: SSMLOptions ): SpeechFile[] => { + console.time('generateSpeechFiles') // convert the summaries from markdown to HTML const converter = new showdown.Converter({ backslashEscapesHTMLTags: true, @@ -396,6 +410,8 @@ const generateSpeechFiles = ( }) }) + console.timeEnd('generateSpeechFiles') + return speechFiles } @@ -434,6 +450,8 @@ const generateByline = (summaries: RankedItem[]): string => export const createDigest = async (jobData: CreateDigestData) => { try { + console.time('createDigestJob') + digestDefinition = await fetchDigestDefinition() const candidates = await getCandidatesList( @@ -511,5 +529,7 @@ export const createDigest = async (jobData: CreateDigestData) => { await sendMulticastPushNotifications(jobData.userId, message, 'reminder') } + + console.timeEnd('createDigestJob') } }