update candidates selection

This commit is contained in:
Hongbo Wu
2024-05-23 15:48:30 +08:00
parent 43ebdc30a1
commit 06b89a88b3
8 changed files with 188 additions and 23 deletions

View File

@ -204,4 +204,7 @@ export class LibraryItem {
@Column('text')
highlightAnnotations?: string[]
@Column('timestamptz')
seenAt?: Date
}

View File

@ -0,0 +1,57 @@
import {
Column,
CreateDateColumn,
Entity,
OneToOne,
PrimaryGeneratedColumn,
UpdateDateColumn,
} from 'typeorm'
import { PublicItemInteraction } from './public_item_interaction'
@Entity()
export class PublicItem {
@PrimaryGeneratedColumn('uuid')
id!: string
@OneToOne(() => PublicItemInteraction)
interaction?: PublicItemInteraction
@Column('uuid')
sourceId!: string
@Column('text')
type!: string
@Column('text')
title!: string
@Column('text')
url!: string
@Column('boolean')
approved!: boolean
@Column('text', { nullable: true })
thumbnail?: string | null
@Column('text', { nullable: true })
previewContent?: string | null
@Column('text', { nullable: true })
languageCode?: string | null
@Column('text', { nullable: true })
author?: string | null
@Column('text', { nullable: true })
dir?: string | null
@Column('timestamptz', { nullable: true })
publishedAt?: Date | null
@CreateDateColumn()
createdAt!: Date
@UpdateDateColumn()
updatedAt!: Date
}

View File

@ -0,0 +1,42 @@
import {
Column,
Entity,
JoinColumn,
ManyToOne,
OneToOne,
PrimaryGeneratedColumn,
} from 'typeorm'
import { PublicItem } from './public_item'
import { User } from './user'
@Entity({ name: 'public_item_interactions' })
export class PublicItemInteraction {
@PrimaryGeneratedColumn('uuid')
id!: string
@OneToOne(() => PublicItem, { onDelete: 'CASCADE' })
@JoinColumn({ name: 'public_item_id' })
publicItem!: PublicItem
@ManyToOne(() => User, { onDelete: 'CASCADE' })
@JoinColumn({ name: 'user_id' })
user!: User
@Column('timestamptz')
seenAt?: Date
@Column('timestamptz')
savedAt?: Date
@Column('timestamptz')
likedAt?: Date
@Column('timestamptz')
broadcastedAt?: Date
@Column('timestamptz')
createdAt!: Date
@Column('timestamptz')
updated!: Date
}

View File

@ -0,0 +1,40 @@
import { searchLibraryItems } from '../services/library_item'
import { findUnseenPublicItems } from '../services/public_item'
import { logger } from '../utils/logger'
interface JustReadFeedUpdateData {
userId: string
}
const selectCandidates = async (userId: string) => {
// get last 100 library items saved and not seen by user
const privateCandidates = await searchLibraryItems(
{
size: 100,
includeContent: false,
query: `-is:seen`,
},
userId
)
// get candidates from public inventory
const publicCandidates = await findUnseenPublicItems(userId, {
limit: 100,
})
// TODO: mix candidates
return privateCandidates.concat(publicCandidates)
}
const updateJustReadFeed = async (data: JustReadFeedUpdateData) => {
const { userId } = data
logger.info(`Updating just read feed for user ${userId}`)
const candidates = await selectCandidates(userId)
logger.info(`Found ${candidates.length} candidates`)
// TODO: integrity check on candidates?
// TODO: rank candidates
// TODO: prepend candidates to feed in redis
}

View File

@ -60,6 +60,7 @@ enum ReadFilter {
READ = 'read',
READING = 'reading',
UNREAD = 'unread',
SEEN = 'seen',
}
enum InFilter {
@ -332,6 +333,8 @@ export const buildQueryString = (
return 'library_item.reading_progress_bottom_percent BETWEEN 2 AND 98'
case ReadFilter.UNREAD:
return 'library_item.reading_progress_bottom_percent < 2'
case ReadFilter.SEEN:
return 'library_item.seen_at IS NOT NULL'
default:
throw new Error(`Unexpected keyword: ${value}`)
}

View File

@ -0,0 +1,28 @@
import { IsNull } from 'typeorm'
import { PublicItem } from '../entity/public_item'
import { getRepository } from '../repository'
export const findUnseenPublicItems = async (
userId: string,
options: {
limit?: number
offset?: number
}
) =>
getRepository(PublicItem).find({
where: {
interaction: IsNull(),
interaction: {
user: {
id: userId,
},
seenAt: IsNull(),
},
approved: true,
},
order: {
createdAt: 'DESC',
},
take: options.limit,
skip: options.offset,
})

View File

@ -16,6 +16,9 @@ CREATE TABLE omnivore.public_item_source (
updated_at timestamptz NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE TRIGGER update_public_item_source_modtime BEFORE UPDATE ON omnivore.public_item_source FOR EACH ROW EXECUTE PROCEDURE update_updated_at_column();
CREATE TABLE omnivore.public_item (
id uuid PRIMARY KEY DEFAULT uuid_generate_v1mc(),
source_id uuid NOT NULL, -- user_id or public_item_source_id
@ -33,18 +36,8 @@ CREATE TABLE omnivore.public_item (
updated_at timestamptz NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE omnivore.public_item_features (
id uuid PRIMARY KEY DEFAULT uuid_generate_v1mc(),
public_item_id uuid NOT NULL REFERENCES omnivore.public_item(id) ON DELETE CASCADE,
classified_topic TEXT,
sentiment_score FLOAT,
writing_style TEXT,
popularity_score FLOAT,
embedding VECTOR(768),
created_at timestamptz NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE TRIGGER update_public_item_modtime BEFORE UPDATE ON omnivore.public_item FOR EACH ROW EXECUTE PROCEDURE update_updated_at_column();
CREATE INDEX public_item_feature_public_item_id_idx ON omnivore.public_item_features(public_item_id);
CREATE TABLE omnivore.public_item_stats (
id uuid PRIMARY KEY DEFAULT uuid_generate_v1mc(),
@ -57,28 +50,26 @@ CREATE TABLE omnivore.public_item_stats (
);
CREATE INDEX public_item_stats_public_item_id_idx ON omnivore.public_item_stats(public_item_id);
CREATE TRIGGER update_public_item_stats_modtime BEFORE UPDATE ON omnivore.public_item_stats FOR EACH ROW EXECUTE PROCEDURE update_updated_at_column();
CREATE TABLE omnivore.public_item_interactions (
id uuid PRIMARY KEY DEFAULT uuid_generate_v1mc(),
user_id uuid NOT NULL REFERENCES omnivore.user(id) ON DELETE CASCADE,
public_item_id uuid NOT NULL REFERENCES omnivore.public_item(id) ON DELETE CASCADE,
action TEXT NOT NULL, -- save, like, broadcast, comment, see
action_data TEXT, -- for comment, the comment text
saved_at TIMESTAMPTZ,
liked_at TIMESTAMPTZ,
broadcasted_at TIMESTAMPTZ,
seen_at TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP
updated_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX public_item_interaction_user_id_idx ON omnivore.public_item_interactions(user_id);
CREATE INDEX public_item_interaction_public_item_id_idx ON omnivore.public_item_interactions(public_item_id);
CREATE TRIGGER update_public_item_interactions_modtime BEFORE UPDATE ON omnivore.public_item_interactions FOR EACH ROW EXECUTE PROCEDURE update_updated_at_column();
CREATE TABLE omnivore.library_item_interactions (
id uuid PRIMARY KEY DEFAULT uuid_generate_v1mc(),
user_id uuid NOT NULL REFERENCES omnivore.user(id) ON DELETE CASCADE,
library_item_id uuid NOT NULL REFERENCES omnivore.library_item(id) ON DELETE CASCADE,
action TEXT NOT NULL, -- seen
created_at TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX library_item_interaction_user_id_idx ON omnivore.library_item_interactions(user_id);
CREATE INDEX library_item_interaction_library_item_id_idx ON omnivore.library_item_interactions(library_item_id);
ALTER TABLE omnivore.library_item ADD COLUMN seen_at timestamptz;
COMMIT;

View File

@ -6,8 +6,9 @@ BEGIN;
DROP TABLE omnivore.public_item_interactions;
DROP TABLE omnivore.public_item_stats;
DROP TABLE omnivore.public_item_features;
DROP TABLE omnivore.public_item;
DROP TABLE omnivore.public_item_source;
ALTER TABLE omnivore.library_item DROP COLUMN seen_at;
COMMIT;