Digest job
This commit is contained in:
committed by
Hongbo Wu
parent
47150c2991
commit
fbaa9ce663
167
packages/api/src/jobs/ai/create_digest.ts
Normal file
167
packages/api/src/jobs/ai/create_digest.ts
Normal file
@ -0,0 +1,167 @@
|
||||
import { logger } from '../../utils/logger'
|
||||
import { OpenAI } from '@langchain/openai'
|
||||
import { PromptTemplate } from '@langchain/core/prompts'
|
||||
import { LibraryItem } from '../../entity/library_item'
|
||||
import { CreateDigestJobData } from '../../services/digest'
|
||||
|
||||
const USER_PROFILE_PROMPT =
|
||||
'Create a user profile based on the supplied titles\n\ntitles:\n{titles}'
|
||||
|
||||
const SUMMARIZE_PROMPT =
|
||||
'Summarize the supplied article.\n\ntitle: {title}\nauthor: {author}\ncontent: {content}'
|
||||
|
||||
// TODO: Makes multiple DB queries and combines the results
|
||||
const getPreferencesList = (userId: string): Promise<LibraryItem[]> => {
|
||||
// use the queries from the digest definitions to lookup preferences
|
||||
// There should be a list of multiple queries we use. For now we can
|
||||
// hardcode these queries:
|
||||
// - query: "in:all is:read OR has:highlights sort:updated-desc wordsCount:>=20"
|
||||
// count: 21
|
||||
// reason: "recently read or highlighted items that are not part of the digest"
|
||||
// - query: "in:all is:read OR has:highlights sort:saved-asc wordsCount:>=20"
|
||||
// count: 4
|
||||
// reason: "some older items that were interacted with"
|
||||
return Promise.resolve([])
|
||||
}
|
||||
|
||||
// TODO: Makes multiple DB queries and combines the results
|
||||
const getCandidatesList = (userId: string): Promise<LibraryItem[]> => {
|
||||
// use the queries from the digest definitions to lookup preferences
|
||||
// There should be a list of multiple queries we use. For now we can
|
||||
// hardcode these queries:
|
||||
// - query: "in:all is:unread saved:last24hrs sort:saved-desc wordsCount:>=500"
|
||||
// count: 100
|
||||
// reason: "most recent 100 items saved over 500 words
|
||||
return Promise.resolve([])
|
||||
}
|
||||
|
||||
// TODO: Takes a list of library items, and uses a prompt to generate
|
||||
// a text representation of a user profile
|
||||
const createUserProfile = async (
|
||||
preferences: LibraryItem[]
|
||||
): Promise<string> => {
|
||||
const llm = new OpenAI({
|
||||
modelName: 'gpt-4-0125-preview',
|
||||
configuration: {
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
},
|
||||
})
|
||||
|
||||
const contextualTemplate = PromptTemplate.fromTemplate(USER_PROFILE_PROMPT)
|
||||
|
||||
const chain = contextualTemplate.pipe(llm)
|
||||
const result = await chain.invoke({
|
||||
titles: preferences.map((item) => `* ${item}`).join('\n'),
|
||||
})
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// TODO: Checks redis for a user profile, if not found creates one and writes
|
||||
// it to redis
|
||||
const findOrCreateUserProfile = async (userId: string): Promise<string> => {
|
||||
// check redis for user profile, return if found
|
||||
// if not found
|
||||
const preferences = await getPreferencesList(userId)
|
||||
const profile = await createUserProfile(preferences)
|
||||
// TODO: write to redis here
|
||||
return profile
|
||||
}
|
||||
|
||||
type RankedItem = {
|
||||
topic: string
|
||||
summary?: string
|
||||
libraryItem: LibraryItem
|
||||
}
|
||||
|
||||
// TODO: Uses OpenAI to rank all the titles based on the user profiles
|
||||
const rankCandidates = async (
|
||||
candidates: LibraryItem[],
|
||||
userProfile: string
|
||||
): Promise<RankedItem[]> => {
|
||||
return Promise.resolve([])
|
||||
}
|
||||
|
||||
// Does some grouping by topic while trying to maintain ranking
|
||||
// adds some basic topic diversity
|
||||
const chooseRankedSelections = (rankedCandidates: RankedItem[]) => {
|
||||
const selected = []
|
||||
const rankedTopics = []
|
||||
const topicCount = {} as Record<string, number>
|
||||
|
||||
for (const item of rankedCandidates) {
|
||||
if (selected.length >= 5) {
|
||||
break
|
||||
}
|
||||
|
||||
topicCount[item.topic] = (topicCount[item.topic] || 0) + 1
|
||||
|
||||
if (topicCount[item.topic] <= 2) {
|
||||
selected.push(item)
|
||||
if (rankedTopics.indexOf(item.topic) === -1) {
|
||||
rankedTopics.push(item.topic)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log('rankedTopics: ', rankedTopics)
|
||||
console.log('finalSelections: ', selected)
|
||||
|
||||
const finalSelections = []
|
||||
|
||||
for (const topic of rankedTopics) {
|
||||
const matches = selected.filter((item) => item.topic == topic)
|
||||
finalSelections.push(...matches)
|
||||
}
|
||||
|
||||
console.log('finalSelections: ', finalSelections)
|
||||
|
||||
return finalSelections
|
||||
}
|
||||
|
||||
// TODO: we could paralleize this step sending all the ranked candidates to openAI at once
|
||||
const summarizeItems = async (
|
||||
rankedCandidates: RankedItem[]
|
||||
): Promise<RankedItem[]> => {
|
||||
const llm = new OpenAI({
|
||||
modelName: 'gpt-4-0125-preview',
|
||||
configuration: {
|
||||
apiKey: process.env.OPENAI_API_KEY,
|
||||
},
|
||||
})
|
||||
|
||||
for (const item of rankedCandidates) {
|
||||
const contextualTemplate = PromptTemplate.fromTemplate(SUMMARIZE_PROMPT)
|
||||
|
||||
const chain = contextualTemplate.pipe(llm)
|
||||
const summary = await chain.invoke({
|
||||
title: item.libraryItem.title,
|
||||
author: item.libraryItem.author ?? '',
|
||||
content: item.libraryItem.readableContent, // markdown content
|
||||
})
|
||||
item.summary = summary
|
||||
}
|
||||
|
||||
return rankedCandidates
|
||||
}
|
||||
|
||||
// TODO: write the digest to redis here
|
||||
const writeDigest = (userId: string, selections: RankedItem[]) => {}
|
||||
|
||||
export const CreateDigestJob = async (jobData: CreateDigestJobData) => {
|
||||
try {
|
||||
const candidates = await getCandidatesList(jobData.userId)
|
||||
const userProfile = await findOrCreateUserProfile(jobData.userId)
|
||||
const rankedCandidates = await rankCandidates(candidates, userProfile)
|
||||
const selections = chooseRankedSelections(rankedCandidates)
|
||||
|
||||
const summaries = await summarizeItems(selections)
|
||||
|
||||
// TODO: we should have a QA step here that does some
|
||||
// basic checks to make sure the summaries are good.
|
||||
|
||||
writeDigest(jobData.userId, summaries)
|
||||
} catch (err) {
|
||||
console.log('error creating summary: ', err)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user