Merge pull request #3891 from omnivore-app/fix/digest

select model from prompt and defaults to openai
This commit is contained in:
Hongbo Wu
2024-05-03 09:25:32 +08:00
committed by GitHub
2 changed files with 96 additions and 24 deletions

View File

@ -24,6 +24,7 @@ import { wordsCount } from '../../utils/helpers'
import { logger } from '../../utils/logger'
import { htmlToMarkdown } from '../../utils/parser'
import { sendMulticastPushNotifications } from '../../utils/sendNotification'
import { generateUploadFilePathName, uploadToBucket } from '../../utils/uploads'
export type CreateDigestJobSchedule = 'daily' | 'weekly'
@ -61,6 +62,7 @@ interface DigestDefinition {
assemblePrompt: string
zeroShot: ZeroShotDefinition
model?: string
}
interface RankedItem {
@ -367,34 +369,47 @@ const chooseRankedSelections = (rankedCandidates: RankedItem[]) => {
}
const summarizeItems = async (
model: string,
rankedCandidates: RankedItem[]
): Promise<RankedItem[]> => {
console.time('summarizeItems')
// const llm = new OpenAI({
// modelName: 'gpt-4-0125-preview',
// configuration: {
// apiKey: process.env.OPENAI_API_KEY,
// },
// })
const contextualTemplate = PromptTemplate.fromTemplate(
digestDefinition.summaryPrompt
)
if (model === 'openai') {
const llm = new OpenAI({
modelName: 'gpt-4-0125-preview',
configuration: {
apiKey: process.env.OPENAI_API_KEY,
},
})
const chain = contextualTemplate.pipe(llm)
// send all the ranked candidates to openAI at once in a batch
const summaries = await chain.batch(
rankedCandidates.map((item) => ({
title: item.libraryItem.title,
author: item.libraryItem.author ?? '',
content: item.libraryItem.readableContent, // markdown content
}))
)
logger.info('summaries: ', summaries)
summaries.forEach(
(summary, index) => (rankedCandidates[index].summary = summary)
)
return rankedCandidates
}
// use anthropic otherwise
const llm = new ChatAnthropic({
apiKey: process.env.CLAUDE_API_KEY,
model: 'claude-3-sonnet-20240229',
})
const contextualTemplate = ChatPromptTemplate.fromTemplate(
digestDefinition.summaryPrompt
)
// // send all the ranked candidates to openAI at once in a batch
// const summaries = await chain.batch(
// rankedCandidates.map((item) => ({
// title: item.libraryItem.title,
// author: item.libraryItem.author ?? '',
// content: item.libraryItem.readableContent, // markdown content
// }))
// )
const prompts = await Promise.all(
rankedCandidates.map(async (item) => {
try {
@ -419,8 +434,6 @@ const summarizeItems = async (
(rankedCandidates[index].summary = summary.content.toString())
)
console.timeEnd('summarizeItems')
return rankedCandidates
}
@ -489,6 +502,50 @@ const generateByline = (summaries: RankedItem[]): string =>
.map((item) => item.libraryItem.author)
.join(', ')
const selectModel = (model?: string): string => {
switch (model) {
case 'random':
// randomly choose between openai and anthropic
return ['anthropic', 'openai'][Math.floor(Math.random() * 2)]
case 'anthropic':
return 'anthropic'
case 'openai':
default:
// default to openai
return 'openai'
}
}
const uploadSummary = async (
userId: string,
digest: Digest,
summaries: RankedItem[]
) => {
console.time('uploadSummary')
logger.info('uploading summaries to gcs')
const filename = `digest/${userId}/${digest.id}/summaries.json`
await uploadToBucket(
filename,
Buffer.from(
JSON.stringify({
model: digest.model,
summaries: summaries.map((item) => ({
title: item.libraryItem.title,
summary: item.summary,
})),
})
),
{
contentType: 'application/json',
public: false,
}
)
logger.info('uploaded summaries to gcs')
console.timeEnd('uploadSummary')
}
export const createDigest = async (jobData: CreateDigestData) => {
console.time('createDigestJob')
@ -496,6 +553,8 @@ export const createDigest = async (jobData: CreateDigestData) => {
const digestId = jobData.id ?? uuid()
try {
digestDefinition = await fetchDigestDefinition()
const model = selectModel(digestDefinition.model)
logger.info(`model: ${model}`)
const candidates = await getCandidatesList(
jobData.userId,
@ -520,7 +579,9 @@ export const createDigest = async (jobData: CreateDigestData) => {
libraryItem: item,
summary: '',
}))
const summaries = await summarizeItems(selections)
console.time('summarizeItems')
const summaries = await summarizeItems(model, selections)
console.timeEnd('summarizeItems')
const filteredSummaries = filterSummaries(summaries)
@ -548,9 +609,19 @@ export const createDigest = async (jobData: CreateDigestData) => {
// description: generateDescription(summaries, rankedTopics),
byline: generateByline(summaries),
urlsToAudio: [],
model,
}
await writeDigest(jobData.userId, digest)
await Promise.all([
// write the digest to redis
writeDigest(jobData.userId, digest),
// upload the summaries to GCS
uploadSummary(jobData.userId, digest, summaries).catch((error) =>
logger.error('uploadSummary error', error)
),
])
logger.info(`digest created: ${digest.id}`)
} catch (error) {
logger.error('createDigestJob error', error)

View File

@ -25,6 +25,7 @@ export interface Digest {
urlsToAudio?: string[]
speechFiles?: SpeechFile[]
model?: string
}
const digestKey = (userId: string) => `digest:${userId}`