Merge all changes from main, update theming of Discover
This commit is contained in:
@ -16,11 +16,11 @@ const enrichedArticles$ = (): Observable<OmnivoreArticle> => {
|
||||
;(() => {
|
||||
enrichedArticles$()
|
||||
.pipe(
|
||||
removeDuplicateArticles$,
|
||||
// removeDuplicateArticles$,
|
||||
addEmbeddingToArticle$,
|
||||
addTopicsToArticle$,
|
||||
putImageInProxy$,
|
||||
insertArticleToStore$,
|
||||
insertArticleToStore$
|
||||
)
|
||||
.subscribe((_it) => {})
|
||||
})()
|
||||
|
||||
@ -26,11 +26,11 @@ const prepareTitle = (article: OmnivoreArticle): string =>
|
||||
.replace(/[`~!@#$%^&*()_|+\-=?;:'",.<>{}[]\\\/]/gi, '')
|
||||
|
||||
const getEmbeddingForArticle = async (
|
||||
it: OmnivoreArticle,
|
||||
it: OmnivoreArticle
|
||||
): Promise<EmbeddedOmnivoreArticle> => {
|
||||
console.log(`${prepareTitle(it)}: ${it.description}`)
|
||||
// console.log(`${prepareTitle(it)}: ${it.description}`)
|
||||
const embedding = await client.getEmbeddings(
|
||||
`${prepareTitle(it)}: ${it.summary}`,
|
||||
`${prepareTitle(it)}: ${it.summary}`
|
||||
)
|
||||
|
||||
return {
|
||||
@ -41,7 +41,7 @@ const getEmbeddingForArticle = async (
|
||||
}
|
||||
|
||||
const addTopicsToArticle = async (
|
||||
it: EmbeddedOmnivoreArticle,
|
||||
it: EmbeddedOmnivoreArticle
|
||||
): Promise<EmbeddedOmnivoreArticle> => {
|
||||
const articleEmbedding = it.embedding
|
||||
|
||||
@ -49,7 +49,7 @@ const addTopicsToArticle = async (
|
||||
`SELECT name, similarity
|
||||
FROM (SELECT discover_topic_name as name, MAX(ABS(embed.embedding <#> $1)) AS "similarity" FROM omnivore.omnivore.discover_topic_embedding_link embed group by discover_topic_name) topics
|
||||
ORDER BY similarity desc`,
|
||||
[toSql(articleEmbedding)],
|
||||
[toSql(articleEmbedding)]
|
||||
)
|
||||
|
||||
// OpenAI seems to cluster things around 0.7-0.9. Through trial and error I have found 0.77 to be a fairly accurate score.
|
||||
@ -61,6 +61,25 @@ const addTopicsToArticle = async (
|
||||
topicNames.push(topics.rows[0]?.name)
|
||||
}
|
||||
|
||||
// I basically want to check if there's anything between the top one and the others.
|
||||
// If the gap is miniscule, then we should include it. IE: 0.7688 and 0.765
|
||||
const topTopic = topics.rows[0]
|
||||
const extraTopics = topics.rows
|
||||
.filter(
|
||||
({ similarity, name }) =>
|
||||
similarity < 0.77 &&
|
||||
topTopic.name != name &&
|
||||
topTopic.similarity - similarity < 0.01
|
||||
)
|
||||
.map(({ name }) => name as string)
|
||||
|
||||
if (extraTopics.length > 0) {
|
||||
console.log(`${it.article.title}: ${it.article.description}`)
|
||||
console.log(topics.rows)
|
||||
console.log(extraTopics)
|
||||
}
|
||||
topicNames.push(...extraTopics)
|
||||
|
||||
if (it.article.type == 'community') {
|
||||
topicNames.push('Community Picks')
|
||||
}
|
||||
@ -72,13 +91,13 @@ const addTopicsToArticle = async (
|
||||
}
|
||||
|
||||
const getEmbeddingForLabel = async (
|
||||
label: Label,
|
||||
label: Label
|
||||
): Promise<EmbeddedOmnivoreLabel> => {
|
||||
const embedding = await client.getEmbeddings(
|
||||
`${label.name}${label.description ? ' : ' + label.description : ''}`,
|
||||
`${label.name}${label.description ? ' : ' + label.description : ''}`
|
||||
)
|
||||
console.log(
|
||||
`${label.name}${label.description ? ' : ' + label.description : ''}`,
|
||||
`${label.name}${label.description ? ' : ' + label.description : ''}`
|
||||
)
|
||||
|
||||
return {
|
||||
@ -92,12 +111,12 @@ export const rateLimitEmbedding = <T>() =>
|
||||
|
||||
export const rateLimiting = rateLimitEmbedding<any>()
|
||||
|
||||
export const addEmbeddingToLabel: OperatorFunction<
|
||||
export const addEmbeddingToLabel$: OperatorFunction<
|
||||
Label,
|
||||
EmbeddedOmnivoreLabel
|
||||
> = pipe(
|
||||
rateLimiting,
|
||||
mergeMap((it: Label) => fromPromise(getEmbeddingForLabel(it))),
|
||||
mergeMap((it: Label) => fromPromise(getEmbeddingForLabel(it)))
|
||||
)
|
||||
|
||||
export const addEmbeddingToArticle$: OperatorFunction<
|
||||
@ -106,8 +125,8 @@ export const addEmbeddingToArticle$: OperatorFunction<
|
||||
> = pipe(
|
||||
rateLimiting,
|
||||
onErrorContinue(
|
||||
mergeMap((it: OmnivoreArticle) => fromPromise(getEmbeddingForArticle(it))),
|
||||
),
|
||||
mergeMap((it: OmnivoreArticle) => fromPromise(getEmbeddingForArticle(it)))
|
||||
)
|
||||
)
|
||||
|
||||
export const addTopicsToArticle$: OperatorFunction<
|
||||
@ -116,7 +135,7 @@ export const addTopicsToArticle$: OperatorFunction<
|
||||
> = pipe(
|
||||
onErrorContinue(
|
||||
mergeMap((it: EmbeddedOmnivoreArticle) =>
|
||||
fromPromise(addTopicsToArticle(it)),
|
||||
),
|
||||
),
|
||||
fromPromise(addTopicsToArticle(it))
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
@ -19,7 +19,7 @@ import { exponentialBackOff, onErrorContinue } from '../../../utils/reactive'
|
||||
|
||||
const REFRESH_DELAY_MS = 3_600_000
|
||||
const getRssFeed = async (
|
||||
feed: OmnivoreFeed,
|
||||
feed: OmnivoreFeed
|
||||
): Promise<OmnivoreContentFeed | null> => {
|
||||
try {
|
||||
const rss = await axios.get<string>(feed.link)
|
||||
@ -37,8 +37,8 @@ const rssToArticles = (site: OmnivoreFeed) =>
|
||||
fromPromise(getRssFeed(site)).pipe(
|
||||
filter((it): it is OmnivoreContentFeed => !!it),
|
||||
mergeMap<OmnivoreContentFeed, Observable<OmnivoreArticle>>((item) =>
|
||||
converters.generic(item),
|
||||
),
|
||||
converters.generic(item)
|
||||
)
|
||||
)
|
||||
|
||||
export const rss$ = (() => {
|
||||
@ -46,24 +46,36 @@ export const rss$ = (() => {
|
||||
|
||||
const filteredRss$ = getRssFeeds$.pipe(
|
||||
onErrorContinue(
|
||||
mergeMap((it) => rssToArticles(it).pipe(exponentialBackOff(5))),
|
||||
mergeMap((it) => rssToArticles(it).pipe(exponentialBackOff(5)))
|
||||
),
|
||||
filter((it: OmnivoreArticle) => it.publishedAt > lastUpdatedTime),
|
||||
finalize(() => {
|
||||
lastUpdatedTime = new Date()
|
||||
console.log(lastUpdatedTime)
|
||||
}),
|
||||
})
|
||||
)
|
||||
|
||||
return merge(
|
||||
newFeeds$.pipe(
|
||||
onErrorContinue(
|
||||
mergeMap((it) => rssToArticles(it).pipe(exponentialBackOff(5))),
|
||||
),
|
||||
mergeMap((it) => rssToArticles(it).pipe(exponentialBackOff(5)))
|
||||
)
|
||||
),
|
||||
timer(0, REFRESH_DELAY_MS).pipe(
|
||||
tap((e) => console.log('Refreshing Stream')),
|
||||
concatMap(() => filteredRss$),
|
||||
),
|
||||
concatMap(() => filteredRss$)
|
||||
)
|
||||
)
|
||||
|
||||
// return fromArrayLike([
|
||||
// {
|
||||
// id: 'ABC',
|
||||
// description:
|
||||
// 'Though AI companies said they put some guardrails in place, researchers were able to easily create images related to claims of election fraud.',
|
||||
// image: 'string',
|
||||
// link: 'https://www.wired.com/story/genai-images-election-fraud/',
|
||||
// title: 'AI Tools Are Still Generating Misleading Election Images',
|
||||
// type: 'RSS',
|
||||
// },
|
||||
// ])
|
||||
})()
|
||||
|
||||
Reference in New Issue
Block a user