From 49bdf0a6f45a09d6743df516f86b0acd11550cc5 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 2 May 2024 18:30:04 +0800 Subject: [PATCH 1/4] select model from prompt and defaults to openai --- packages/api/src/jobs/ai/create_digest.ts | 76 ++++++++++++++++------- packages/api/src/services/digest.ts | 1 + 2 files changed, 54 insertions(+), 23 deletions(-) diff --git a/packages/api/src/jobs/ai/create_digest.ts b/packages/api/src/jobs/ai/create_digest.ts index bd2c29497..e3eef6b71 100644 --- a/packages/api/src/jobs/ai/create_digest.ts +++ b/packages/api/src/jobs/ai/create_digest.ts @@ -61,6 +61,7 @@ interface DigestDefinition { assemblePrompt: string zeroShot: ZeroShotDefinition + model?: string } interface RankedItem { @@ -367,34 +368,47 @@ const chooseRankedSelections = (rankedCandidates: RankedItem[]) => { } const summarizeItems = async ( + model: string, rankedCandidates: RankedItem[] ): Promise => { - console.time('summarizeItems') - // const llm = new OpenAI({ - // modelName: 'gpt-4-0125-preview', - // configuration: { - // apiKey: process.env.OPENAI_API_KEY, - // }, - // }) + const contextualTemplate = PromptTemplate.fromTemplate( + digestDefinition.summaryPrompt + ) + if (model === 'openai') { + const llm = new OpenAI({ + modelName: 'gpt-4-0125-preview', + configuration: { + apiKey: process.env.OPENAI_API_KEY, + }, + }) + + const chain = contextualTemplate.pipe(llm) + + // send all the ranked candidates to openAI at once in a batch + const summaries = await chain.batch( + rankedCandidates.map((item) => ({ + title: item.libraryItem.title, + author: item.libraryItem.author ?? '', + content: item.libraryItem.readableContent, // markdown content + })) + ) + + logger.info('summaries: ', summaries) + + summaries.forEach( + (summary, index) => (rankedCandidates[index].summary = summary) + ) + + return rankedCandidates + } + + // use anthropic otherwise const llm = new ChatAnthropic({ apiKey: process.env.CLAUDE_API_KEY, model: 'claude-3-sonnet-20240229', }) - const contextualTemplate = ChatPromptTemplate.fromTemplate( - digestDefinition.summaryPrompt - ) - - // // send all the ranked candidates to openAI at once in a batch - // const summaries = await chain.batch( - // rankedCandidates.map((item) => ({ - // title: item.libraryItem.title, - // author: item.libraryItem.author ?? '', - // content: item.libraryItem.readableContent, // markdown content - // })) - // ) - const prompts = await Promise.all( rankedCandidates.map(async (item) => { try { @@ -419,8 +433,6 @@ const summarizeItems = async ( (rankedCandidates[index].summary = summary.content.toString()) ) - console.timeEnd('summarizeItems') - return rankedCandidates } @@ -489,6 +501,20 @@ const generateByline = (summaries: RankedItem[]): string => .map((item) => item.libraryItem.author) .join(', ') +const selectModel = (model?: string): string => { + switch (model) { + case 'random': + // randomly choose between openai and anthropic + return ['anthropic', 'openai'][Math.floor(Math.random() * 2)] + case 'anthropic': + return 'anthropic' + case 'openai': + default: + // default to openai + return 'openai' + } +} + export const createDigest = async (jobData: CreateDigestData) => { console.time('createDigestJob') @@ -496,6 +522,7 @@ export const createDigest = async (jobData: CreateDigestData) => { const digestId = jobData.id ?? uuid() try { digestDefinition = await fetchDigestDefinition() + const model = selectModel(digestDefinition.model) const candidates = await getCandidatesList( jobData.userId, @@ -520,7 +547,9 @@ export const createDigest = async (jobData: CreateDigestData) => { libraryItem: item, summary: '', })) - const summaries = await summarizeItems(selections) + console.time('summarizeItems') + const summaries = await summarizeItems(model, selections) + console.timeEnd('summarizeItems') const filteredSummaries = filterSummaries(summaries) @@ -548,6 +577,7 @@ export const createDigest = async (jobData: CreateDigestData) => { // description: generateDescription(summaries, rankedTopics), byline: generateByline(summaries), urlsToAudio: [], + model, } await writeDigest(jobData.userId, digest) diff --git a/packages/api/src/services/digest.ts b/packages/api/src/services/digest.ts index 811202efd..b7a44c740 100644 --- a/packages/api/src/services/digest.ts +++ b/packages/api/src/services/digest.ts @@ -25,6 +25,7 @@ export interface Digest { urlsToAudio?: string[] speechFiles?: SpeechFile[] + model?: string } const digestKey = (userId: string) => `digest:${userId}` From 298a50fba260b62c181bb2f6d71f57234c0666a3 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 2 May 2024 18:33:41 +0800 Subject: [PATCH 2/4] allow random model --- packages/api/src/jobs/ai/create_digest.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/api/src/jobs/ai/create_digest.ts b/packages/api/src/jobs/ai/create_digest.ts index e3eef6b71..0b6aaa9bc 100644 --- a/packages/api/src/jobs/ai/create_digest.ts +++ b/packages/api/src/jobs/ai/create_digest.ts @@ -523,6 +523,7 @@ export const createDigest = async (jobData: CreateDigestData) => { try { digestDefinition = await fetchDigestDefinition() const model = selectModel(digestDefinition.model) + logger.info('model: ', model) const candidates = await getCandidatesList( jobData.userId, From 4610c810927137e220ad51d6837a5857489fbf32 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 2 May 2024 19:00:21 +0800 Subject: [PATCH 3/4] upload summaries to gcs --- packages/api/src/jobs/ai/create_digest.ts | 43 +++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) diff --git a/packages/api/src/jobs/ai/create_digest.ts b/packages/api/src/jobs/ai/create_digest.ts index 0b6aaa9bc..d7d29f526 100644 --- a/packages/api/src/jobs/ai/create_digest.ts +++ b/packages/api/src/jobs/ai/create_digest.ts @@ -24,6 +24,7 @@ import { wordsCount } from '../../utils/helpers' import { logger } from '../../utils/logger' import { htmlToMarkdown } from '../../utils/parser' import { sendMulticastPushNotifications } from '../../utils/sendNotification' +import { generateUploadFilePathName, uploadToBucket } from '../../utils/uploads' export type CreateDigestJobSchedule = 'daily' | 'weekly' @@ -515,6 +516,35 @@ const selectModel = (model?: string): string => { } } +const uploadSummary = async ( + userId: string, + digest: Digest, + summaries: RankedItem[] +) => { + console.time('uploadSummary') + logger.info('uploading summaries to gcs') + + const filename = generateUploadFilePathName( + userId, + `${digest.id}/summaries.json` + ) + await uploadToBucket( + filename, + Buffer.from( + JSON.stringify({ + model: digest.model, + summaries: summaries.map((item) => ({ + title: item.libraryItem.title, + summary: item.summary, + })), + }) + ) + ) + + logger.info('uploaded summaries to gcs') + console.timeEnd('uploadSummary') +} + export const createDigest = async (jobData: CreateDigestData) => { console.time('createDigestJob') @@ -523,7 +553,7 @@ export const createDigest = async (jobData: CreateDigestData) => { try { digestDefinition = await fetchDigestDefinition() const model = selectModel(digestDefinition.model) - logger.info('model: ', model) + logger.info(`model: ${model}`) const candidates = await getCandidatesList( jobData.userId, @@ -581,7 +611,16 @@ export const createDigest = async (jobData: CreateDigestData) => { model, } - await writeDigest(jobData.userId, digest) + await Promise.all([ + // write the digest to redis + writeDigest(jobData.userId, digest), + // upload the summaries to GCS + uploadSummary(jobData.userId, digest, summaries).catch((error) => + logger.error('uploadSummary error', error) + ), + ]) + + logger.info(`digest created: ${digest.id}`) } catch (error) { logger.error('createDigestJob error', error) From 6e554e2bca82681cc2226df7116792b9bf5b4527 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 2 May 2024 19:10:02 +0800 Subject: [PATCH 4/4] update upload file name --- packages/api/src/jobs/ai/create_digest.ts | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/packages/api/src/jobs/ai/create_digest.ts b/packages/api/src/jobs/ai/create_digest.ts index d7d29f526..db5650ccb 100644 --- a/packages/api/src/jobs/ai/create_digest.ts +++ b/packages/api/src/jobs/ai/create_digest.ts @@ -524,10 +524,7 @@ const uploadSummary = async ( console.time('uploadSummary') logger.info('uploading summaries to gcs') - const filename = generateUploadFilePathName( - userId, - `${digest.id}/summaries.json` - ) + const filename = `digest/${userId}/${digest.id}/summaries.json` await uploadToBucket( filename, Buffer.from( @@ -538,7 +535,11 @@ const uploadSummary = async ( summary: item.summary, })), }) - ) + ), + { + contentType: 'application/json', + public: false, + } ) logger.info('uploaded summaries to gcs')