diff --git a/packages/api/src/jobs/ai/create_digest.ts b/packages/api/src/jobs/ai/create_digest.ts new file mode 100644 index 000000000..acf1d827e --- /dev/null +++ b/packages/api/src/jobs/ai/create_digest.ts @@ -0,0 +1,167 @@ +import { logger } from '../../utils/logger' +import { OpenAI } from '@langchain/openai' +import { PromptTemplate } from '@langchain/core/prompts' +import { LibraryItem } from '../../entity/library_item' +import { CreateDigestJobData } from '../../services/digest' + +const USER_PROFILE_PROMPT = + 'Create a user profile based on the supplied titles\n\ntitles:\n{titles}' + +const SUMMARIZE_PROMPT = + 'Summarize the supplied article.\n\ntitle: {title}\nauthor: {author}\ncontent: {content}' + +// TODO: Makes multiple DB queries and combines the results +const getPreferencesList = (userId: string): Promise => { + // use the queries from the digest definitions to lookup preferences + // There should be a list of multiple queries we use. For now we can + // hardcode these queries: + // - query: "in:all is:read OR has:highlights sort:updated-desc wordsCount:>=20" + // count: 21 + // reason: "recently read or highlighted items that are not part of the digest" + // - query: "in:all is:read OR has:highlights sort:saved-asc wordsCount:>=20" + // count: 4 + // reason: "some older items that were interacted with" + return Promise.resolve([]) +} + +// TODO: Makes multiple DB queries and combines the results +const getCandidatesList = (userId: string): Promise => { + // use the queries from the digest definitions to lookup preferences + // There should be a list of multiple queries we use. For now we can + // hardcode these queries: + // - query: "in:all is:unread saved:last24hrs sort:saved-desc wordsCount:>=500" + // count: 100 + // reason: "most recent 100 items saved over 500 words + return Promise.resolve([]) +} + +// TODO: Takes a list of library items, and uses a prompt to generate +// a text representation of a user profile +const createUserProfile = async ( + preferences: LibraryItem[] +): Promise => { + const llm = new OpenAI({ + modelName: 'gpt-4-0125-preview', + configuration: { + apiKey: process.env.OPENAI_API_KEY, + }, + }) + + const contextualTemplate = PromptTemplate.fromTemplate(USER_PROFILE_PROMPT) + + const chain = contextualTemplate.pipe(llm) + const result = await chain.invoke({ + titles: preferences.map((item) => `* ${item}`).join('\n'), + }) + + return result +} + +// TODO: Checks redis for a user profile, if not found creates one and writes +// it to redis +const findOrCreateUserProfile = async (userId: string): Promise => { + // check redis for user profile, return if found + // if not found + const preferences = await getPreferencesList(userId) + const profile = await createUserProfile(preferences) + // TODO: write to redis here + return profile +} + +type RankedItem = { + topic: string + summary?: string + libraryItem: LibraryItem +} + +// TODO: Uses OpenAI to rank all the titles based on the user profiles +const rankCandidates = async ( + candidates: LibraryItem[], + userProfile: string +): Promise => { + return Promise.resolve([]) +} + +// Does some grouping by topic while trying to maintain ranking +// adds some basic topic diversity +const chooseRankedSelections = (rankedCandidates: RankedItem[]) => { + const selected = [] + const rankedTopics = [] + const topicCount = {} as Record + + for (const item of rankedCandidates) { + if (selected.length >= 5) { + break + } + + topicCount[item.topic] = (topicCount[item.topic] || 0) + 1 + + if (topicCount[item.topic] <= 2) { + selected.push(item) + if (rankedTopics.indexOf(item.topic) === -1) { + rankedTopics.push(item.topic) + } + } + } + + console.log('rankedTopics: ', rankedTopics) + console.log('finalSelections: ', selected) + + const finalSelections = [] + + for (const topic of rankedTopics) { + const matches = selected.filter((item) => item.topic == topic) + finalSelections.push(...matches) + } + + console.log('finalSelections: ', finalSelections) + + return finalSelections +} + +// TODO: we could paralleize this step sending all the ranked candidates to openAI at once +const summarizeItems = async ( + rankedCandidates: RankedItem[] +): Promise => { + const llm = new OpenAI({ + modelName: 'gpt-4-0125-preview', + configuration: { + apiKey: process.env.OPENAI_API_KEY, + }, + }) + + for (const item of rankedCandidates) { + const contextualTemplate = PromptTemplate.fromTemplate(SUMMARIZE_PROMPT) + + const chain = contextualTemplate.pipe(llm) + const summary = await chain.invoke({ + title: item.libraryItem.title, + author: item.libraryItem.author ?? '', + content: item.libraryItem.readableContent, // markdown content + }) + item.summary = summary + } + + return rankedCandidates +} + +// TODO: write the digest to redis here +const writeDigest = (userId: string, selections: RankedItem[]) => {} + +export const CreateDigestJob = async (jobData: CreateDigestJobData) => { + try { + const candidates = await getCandidatesList(jobData.userId) + const userProfile = await findOrCreateUserProfile(jobData.userId) + const rankedCandidates = await rankCandidates(candidates, userProfile) + const selections = chooseRankedSelections(rankedCandidates) + + const summaries = await summarizeItems(selections) + + // TODO: we should have a QA step here that does some + // basic checks to make sure the summaries are good. + + writeDigest(jobData.userId, summaries) + } catch (err) { + console.log('error creating summary: ', err) + } +}