From e1af53ab486a2d231d511b55b7ba6605ae87f318 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 21 Aug 2023 23:27:11 +0800 Subject: [PATCH] add libraryItem entity class --- packages/api/src/entity/library_item.ts | 169 ++++++++++++++++++ packages/api/tsconfig.json | 4 +- .../db/migrations/0118.do.library_item.sql | 37 +++- .../db/migrations/0118.undo.library_item.sql | 14 +- .../0121.undo.library_item_preview.sql | 2 +- 5 files changed, 217 insertions(+), 9 deletions(-) create mode 100644 packages/api/src/entity/library_item.ts diff --git a/packages/api/src/entity/library_item.ts b/packages/api/src/entity/library_item.ts new file mode 100644 index 000000000..730f6c4a6 --- /dev/null +++ b/packages/api/src/entity/library_item.ts @@ -0,0 +1,169 @@ +import { + Column, + CreateDateColumn, + Entity, + JoinColumn, + OneToOne, + PrimaryGeneratedColumn, + UpdateDateColumn, +} from 'typeorm' +import { UploadFile } from './upload_file' +import { User } from './user' + +export enum LibraryItemState { + Failed = 'FAILED', + Processing = 'PROCESSING', + Succeeded = 'SUCCEEDED', + Deleted = 'DELETED', + Archived = 'ARCHIVED', +} + +export enum LibraryItemType { + Article = 'ARTICLE', + Book = 'BOOK', + File = 'FILE', + Profile = 'PROFILE', + Website = 'WEBSITE', + Tweet = 'TWEET', + Video = 'VIDEO', + Image = 'IMAGE', + Unknown = 'UNKNOWN', +} + +export enum ContentReaderType { + WEB = 'WEB', + PDF = 'PDF', + EPUB = 'EPUB', +} + +@Entity({ name: 'library_item' }) +export class LibraryItem { + @PrimaryGeneratedColumn('uuid') + id?: string + + @OneToOne(() => User, { onDelete: 'CASCADE' }) + @JoinColumn({ name: 'user_id' }) + user!: User + + @Column('enum', { + enum: LibraryItemState, + default: LibraryItemState.Succeeded, + }) + state?: LibraryItemState + + @Column('text') + originalUrl!: string + + @Column('text', { nullable: true }) + downloadUrl?: string + + @Column('text') + slug!: string + + @Column('text') + title!: string + + @Column('text', { nullable: true }) + author?: string + + @Column('text', { nullable: true }) + description?: string + + @Column('timestamptz') + savedAt?: Date + + @CreateDateColumn() + createdAt?: Date + + @Column('timestamptz', { nullable: true }) + publishedAt?: Date + + @Column('timestamptz') + archivedAt?: Date + + @Column('timestamptz') + deletedAt?: Date + + @Column('timestamptz') + readAt?: Date + + @UpdateDateColumn() + updatedAt?: Date + + @Column('text', { nullable: true }) + itemLanguage?: string + + @Column('integer', { nullable: true }) + wordCount?: number + + @Column('text', { nullable: true }) + siteName?: string + + @Column('text', { nullable: true }) + siteIcon?: string + + @Column('json', { nullable: true }) + metadata?: Record + + @Column('integer', { nullable: true }) + readingProgressLastReadAnchor?: number + + @Column('integer', { nullable: true }) + readingProgressHighestReadAnchor?: number + + @Column('real', { nullable: true }) + readingProgressTopPercent?: number + + @Column('real', { nullable: true }) + readingProgressBottomPercent?: number + + @Column('text', { nullable: true }) + thumbnail?: string + + @Column('enum', { enum: LibraryItemType, default: LibraryItemType.Unknown }) + itemType?: LibraryItemType + + @OneToOne(() => UploadFile, { onDelete: 'CASCADE' }) + @JoinColumn({ name: 'upload_file_id' }) + uploadFile?: UploadFile + + @Column('enum', { enum: ContentReaderType, default: ContentReaderType.WEB }) + contentReader?: ContentReaderType + + @Column('text', { nullable: true }) + originalContent?: string + + @Column('text', { nullable: true }) + readableContent?: string + + @Column('tsvector', { nullable: true }) + contentTsv?: string + + @Column('tsvector', { nullable: true }) + siteTsv?: string + + @Column('tsvector', { nullable: true }) + titleTsv?: string + + @Column('tsvector', { nullable: true }) + authorTsv?: string + + @Column('tsvector', { nullable: true }) + descriptionTsv?: string + + @Column('tsvector', { nullable: true }) + searchTsv?: string + + @Column('text', { nullable: true }) + modelName?: string + + // NOT SUPPORTED IN TYPEORM + // @Column('vector', { nullable: true }) + // embedding?: number[] + + @Column('text', { nullable: true }) + textContentHash?: string + + @Column('text', { nullable: true }) + gcsArchiveId?: string +} diff --git a/packages/api/tsconfig.json b/packages/api/tsconfig.json index 7c8caecfd..9d64d8069 100644 --- a/packages/api/tsconfig.json +++ b/packages/api/tsconfig.json @@ -4,7 +4,9 @@ "files": true }, "compilerOptions": { - "outDir": "dist" + "outDir": "dist", + "emitDecoratorMetadata": true, + "experimentalDecorators": true }, "include": ["src", "test"], "exclude": ["./src/generated", "./test"] diff --git a/packages/db/migrations/0118.do.library_item.sql b/packages/db/migrations/0118.do.library_item.sql index 1dbb1a8d0..5d3ec8765 100755 --- a/packages/db/migrations/0118.do.library_item.sql +++ b/packages/db/migrations/0118.do.library_item.sql @@ -30,7 +30,7 @@ CREATE TABLE omnivore.library_item ( read_at timestamptz, updated_at timestamptz NOT NULL DEFAULT current_timestamp, item_language text, - words_count integer, + word_count integer, site_name text, site_icon text, metadata JSON, @@ -39,9 +39,9 @@ CREATE TABLE omnivore.library_item ( reading_progress_top_percent real, reading_progress_bottom_percent real, thumbnail text, - item_type library_item_type, + item_type library_item_type NOT NULL DEFAULT 'UNKNOWN', upload_file_id uuid REFERENCES omnivore.upload_files ON DELETE CASCADE, - content_reader content_reader_type, + content_reader content_reader_type NOT NULL DEFAULT 'WEB', original_content text, readable_content text, content_tsv tsvector, @@ -58,4 +58,35 @@ CREATE TABLE omnivore.library_item ( CREATE TRIGGER update_library_item_modtime BEFORE UPDATE ON omnivore.library_item FOR EACH ROW EXECUTE PROCEDURE update_updated_at_column(); +CREATE INDEX library_item_content_tsv_idx ON omnivore.library_item USING GIN (content_tsv); +CREATE INDEX library_item_site_tsv_idx ON omnivore.library_item USING GIN (site_tsv); +CREATE INDEX library_item_title_tsv_idx ON omnivore.library_item USING GIN (title_tsv); +CREATE INDEX library_item_author_tsv_idx ON omnivore.library_item USING GIN (author_tsv); +CREATE INDEX library_item_description_tsv_idx ON omnivore.library_item USING GIN (description_tsv); +CREATE INDEX library_item_search_tsv_idx ON omnivore.library_item USING GIN (search_tsv); + +CREATE OR REPLACE FUNCTION update_library_item_tsv() RETURNS trigger AS $$ +begin + new.content_tsv := to_tsvector('pg_catalog.english', coalesce(new.readable_content, '')); + new.site_tsv := to_tsvector('pg_catalog.english', coalesce(new.site_name, '')); + new.title_tsv := to_tsvector('pg_catalog.english', coalesce(new.title, '')); + new.author_tsv := to_tsvector('pg_catalog.english', coalesce(new.author, '')); + new.description_tsv := to_tsvector('pg_catalog.english', coalesce(new.description, '')); + new.search_tsv := + setweight(new.title_tsv, 'A') || + setweight(new.author_tsv, 'A') || + setweight(new.site_tsv, 'A') || + setweight(new.description_tsv, 'A') || + -- full hostname (eg www.omnivore.app) + setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.url, '^((http[s]?):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\3'), '')), 'A') || + -- secondary hostname (eg omnivore) + setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.url, '^((http[s]?):\/)?\/?(.*\.)?([^:\/\s]+)(\..*)((\/+)*\/)?([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\4'), '')), 'A') || + setweight(new.content_tsv, 'B'); + return new; +end +$$ LANGUAGE plpgsql; + +CREATE TRIGGER library_item_tsv_update BEFORE INSERT OR UPDATE + ON omnivore.library_item FOR EACH ROW EXECUTE PROCEDURE update_library_item_tsv(); + COMMIT; diff --git a/packages/db/migrations/0118.undo.library_item.sql b/packages/db/migrations/0118.undo.library_item.sql index 991cfb401..6197c6c44 100755 --- a/packages/db/migrations/0118.undo.library_item.sql +++ b/packages/db/migrations/0118.undo.library_item.sql @@ -4,14 +4,20 @@ BEGIN; +DROP TRIGGER library_item_tsv_update ON omnivore.library_item; +DROP FUNCTION update_library_item_tsv(); + +DROP INDEX library_item_search_tsv_idx; +DROP INDEX library_item_description_tsv_idx; +DROP INDEX library_item_author_tsv_idx; +DROP INDEX library_item_title_tsv_idx; +DROP INDEX library_item_site_tsv_idx; +DROP INDEX library_item_content_tsv_idx; + DROP TRIGGER update_library_item_modtime ON omnivore.library_item; - DROP TABLE omnivore.library_item; - DROP TYPE library_item_type; - DROP TYPE content_reader_type; - DROP TYPE library_item_state; DROP EXTENSION vector; diff --git a/packages/db/migrations/0121.undo.library_item_preview.sql b/packages/db/migrations/0121.undo.library_item_preview.sql index c55d96584..0644e036d 100755 --- a/packages/db/migrations/0121.undo.library_item_preview.sql +++ b/packages/db/migrations/0121.undo.library_item_preview.sql @@ -4,7 +4,7 @@ BEGIN; -DROP TRIGGER update_library_item_preview_modtime; +DROP TRIGGER update_library_item_preview_modtime ON omnivore.library_item_preview; DROP TABLE omnivore.library_item_preview;