Merge pull request #3891 from omnivore-app/fix/digest
select model from prompt and defaults to openai
This commit is contained in:
@ -24,6 +24,7 @@ import { wordsCount } from '../../utils/helpers'
|
||||
import { logger } from '../../utils/logger'
|
||||
import { htmlToMarkdown } from '../../utils/parser'
|
||||
import { sendMulticastPushNotifications } from '../../utils/sendNotification'
|
||||
import { generateUploadFilePathName, uploadToBucket } from '../../utils/uploads'
|
||||
|
||||
export type CreateDigestJobSchedule = 'daily' | 'weekly'
|
||||
|
||||
@ -61,6 +62,7 @@ interface DigestDefinition {
|
||||
assemblePrompt: string
|
||||
|
||||
zeroShot: ZeroShotDefinition
|
||||
model?: string
|
||||
}
|
||||
|
||||
interface RankedItem {
|
||||
@ -367,34 +369,47 @@ const chooseRankedSelections = (rankedCandidates: RankedItem[]) => {
|
||||
}
|
||||
|
||||
const summarizeItems = async (
|
||||
model: string,
|
||||
rankedCandidates: RankedItem[]
|
||||
): Promise<RankedItem[]> => {
|
||||
console.time('summarizeItems')
|
||||
// const llm = new OpenAI({
|
||||
// modelName: 'gpt-4-0125-preview',
|
||||
// configuration: {
|
||||
// apiKey: process.env.OPENAI_API_KEY,
|
||||
// },
|
||||
// })
|
||||
const contextualTemplate = PromptTemplate.fromTemplate(
|
||||
digestDefinition.summaryPrompt
|
||||
)
|
||||
|
||||
if (model === 'openai') {
|
||||
const llm = new OpenAI({
|
||||
modelName: 'gpt-4-0125-preview',
|
||||
configuration: {
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
},
|
||||
})
|
||||
|
||||
const chain = contextualTemplate.pipe(llm)
|
||||
|
||||
// send all the ranked candidates to openAI at once in a batch
|
||||
const summaries = await chain.batch(
|
||||
rankedCandidates.map((item) => ({
|
||||
title: item.libraryItem.title,
|
||||
author: item.libraryItem.author ?? '',
|
||||
content: item.libraryItem.readableContent, // markdown content
|
||||
}))
|
||||
)
|
||||
|
||||
logger.info('summaries: ', summaries)
|
||||
|
||||
summaries.forEach(
|
||||
(summary, index) => (rankedCandidates[index].summary = summary)
|
||||
)
|
||||
|
||||
return rankedCandidates
|
||||
}
|
||||
|
||||
// use anthropic otherwise
|
||||
const llm = new ChatAnthropic({
|
||||
apiKey: process.env.CLAUDE_API_KEY,
|
||||
model: 'claude-3-sonnet-20240229',
|
||||
})
|
||||
|
||||
const contextualTemplate = ChatPromptTemplate.fromTemplate(
|
||||
digestDefinition.summaryPrompt
|
||||
)
|
||||
|
||||
// // send all the ranked candidates to openAI at once in a batch
|
||||
// const summaries = await chain.batch(
|
||||
// rankedCandidates.map((item) => ({
|
||||
// title: item.libraryItem.title,
|
||||
// author: item.libraryItem.author ?? '',
|
||||
// content: item.libraryItem.readableContent, // markdown content
|
||||
// }))
|
||||
// )
|
||||
|
||||
const prompts = await Promise.all(
|
||||
rankedCandidates.map(async (item) => {
|
||||
try {
|
||||
@ -419,8 +434,6 @@ const summarizeItems = async (
|
||||
(rankedCandidates[index].summary = summary.content.toString())
|
||||
)
|
||||
|
||||
console.timeEnd('summarizeItems')
|
||||
|
||||
return rankedCandidates
|
||||
}
|
||||
|
||||
@ -489,6 +502,50 @@ const generateByline = (summaries: RankedItem[]): string =>
|
||||
.map((item) => item.libraryItem.author)
|
||||
.join(', ')
|
||||
|
||||
const selectModel = (model?: string): string => {
|
||||
switch (model) {
|
||||
case 'random':
|
||||
// randomly choose between openai and anthropic
|
||||
return ['anthropic', 'openai'][Math.floor(Math.random() * 2)]
|
||||
case 'anthropic':
|
||||
return 'anthropic'
|
||||
case 'openai':
|
||||
default:
|
||||
// default to openai
|
||||
return 'openai'
|
||||
}
|
||||
}
|
||||
|
||||
const uploadSummary = async (
|
||||
userId: string,
|
||||
digest: Digest,
|
||||
summaries: RankedItem[]
|
||||
) => {
|
||||
console.time('uploadSummary')
|
||||
logger.info('uploading summaries to gcs')
|
||||
|
||||
const filename = `digest/${userId}/${digest.id}/summaries.json`
|
||||
await uploadToBucket(
|
||||
filename,
|
||||
Buffer.from(
|
||||
JSON.stringify({
|
||||
model: digest.model,
|
||||
summaries: summaries.map((item) => ({
|
||||
title: item.libraryItem.title,
|
||||
summary: item.summary,
|
||||
})),
|
||||
})
|
||||
),
|
||||
{
|
||||
contentType: 'application/json',
|
||||
public: false,
|
||||
}
|
||||
)
|
||||
|
||||
logger.info('uploaded summaries to gcs')
|
||||
console.timeEnd('uploadSummary')
|
||||
}
|
||||
|
||||
export const createDigest = async (jobData: CreateDigestData) => {
|
||||
console.time('createDigestJob')
|
||||
|
||||
@ -496,6 +553,8 @@ export const createDigest = async (jobData: CreateDigestData) => {
|
||||
const digestId = jobData.id ?? uuid()
|
||||
try {
|
||||
digestDefinition = await fetchDigestDefinition()
|
||||
const model = selectModel(digestDefinition.model)
|
||||
logger.info(`model: ${model}`)
|
||||
|
||||
const candidates = await getCandidatesList(
|
||||
jobData.userId,
|
||||
@ -520,7 +579,9 @@ export const createDigest = async (jobData: CreateDigestData) => {
|
||||
libraryItem: item,
|
||||
summary: '',
|
||||
}))
|
||||
const summaries = await summarizeItems(selections)
|
||||
console.time('summarizeItems')
|
||||
const summaries = await summarizeItems(model, selections)
|
||||
console.timeEnd('summarizeItems')
|
||||
|
||||
const filteredSummaries = filterSummaries(summaries)
|
||||
|
||||
@ -548,9 +609,19 @@ export const createDigest = async (jobData: CreateDigestData) => {
|
||||
// description: generateDescription(summaries, rankedTopics),
|
||||
byline: generateByline(summaries),
|
||||
urlsToAudio: [],
|
||||
model,
|
||||
}
|
||||
|
||||
await writeDigest(jobData.userId, digest)
|
||||
await Promise.all([
|
||||
// write the digest to redis
|
||||
writeDigest(jobData.userId, digest),
|
||||
// upload the summaries to GCS
|
||||
uploadSummary(jobData.userId, digest, summaries).catch((error) =>
|
||||
logger.error('uploadSummary error', error)
|
||||
),
|
||||
])
|
||||
|
||||
logger.info(`digest created: ${digest.id}`)
|
||||
} catch (error) {
|
||||
logger.error('createDigestJob error', error)
|
||||
|
||||
|
||||
@ -25,6 +25,7 @@ export interface Digest {
|
||||
|
||||
urlsToAudio?: string[]
|
||||
speechFiles?: SpeechFile[]
|
||||
model?: string
|
||||
}
|
||||
|
||||
const digestKey = (userId: string) => `digest:${userId}`
|
||||
|
||||
Reference in New Issue
Block a user