From 648a16615b8570d63fe7e53aa3f0cceb9529ae54 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 28 Aug 2023 18:20:44 +0800 Subject: [PATCH] update search query --- packages/api/src/elastic/pages.ts | 11 ++- packages/api/src/entity/library_item.ts | 9 ++ packages/api/src/resolvers/article/index.ts | 46 +++------- .../api/src/resolvers/upload_files/index.ts | 12 +-- packages/api/src/routers/svc/integrations.ts | 4 +- packages/api/src/services/library_item.ts | 89 ++++++++++++++----- packages/api/src/utils/search.ts | 26 +++--- packages/api/test/elastic/pages.test.ts | 24 ++--- packages/api/test/routers/auth.test.ts | 18 ++-- .../db/migrations/0118.do.library_item.sql | 13 ++- .../db/migrations/0118.undo.library_item.sql | 1 + 11 files changed, 155 insertions(+), 98 deletions(-) diff --git a/packages/api/src/elastic/pages.ts b/packages/api/src/elastic/pages.ts index 30e069652..7cb293075 100644 --- a/packages/api/src/elastic/pages.ts +++ b/packages/api/src/elastic/pages.ts @@ -226,8 +226,15 @@ const appendHasFilters = ( }, }) break - case HasFilter.SHARED_AT: - builder = builder.query('exists', { field: 'sharedAt' }) + case HasFilter.LABELS: + builder = builder.query('nested', { + path: 'labels', + query: { + exists: { + field: 'labels', + }, + }, + }) break } }) diff --git a/packages/api/src/entity/library_item.ts b/packages/api/src/entity/library_item.ts index e5001a0ec..7b1740481 100644 --- a/packages/api/src/entity/library_item.ts +++ b/packages/api/src/entity/library_item.ts @@ -12,6 +12,7 @@ import { Unique, UpdateDateColumn, } from 'typeorm' +import { Highlight } from './highlight' import { Label } from './label' import { Recommendation } from './recommendation' import { Subscription } from './subscription' @@ -188,4 +189,12 @@ export class LibraryItem { @Column('enum', { enum: DirectionalityType, default: DirectionalityType.LTR }) directionality!: DirectionalityType + + @OneToMany(() => Highlight, (highlight) => highlight.libraryItem) + @JoinTable({ + name: 'highlight', + joinColumn: { name: 'library_item_id' }, + inverseJoinColumn: { name: 'id' }, + }) + highlights?: Highlight[] } diff --git a/packages/api/src/resolvers/article/index.ts b/packages/api/src/resolvers/article/index.ts index 054fe1b56..df3392641 100644 --- a/packages/api/src/resolvers/article/index.ts +++ b/packages/api/src/resolvers/article/index.ts @@ -5,22 +5,7 @@ /* eslint-disable @typescript-eslint/no-floating-promises */ import { Readability } from '@omnivore/readability' import graphqlFields from 'graphql-fields' -import { searchHighlights } from '../../elastic/highlights' -import { - createPage, - getPageByParam, - searchAsYouType, - searchPages, - updatePage, - updatePages, -} from '../../elastic/pages' -import { - ArticleSavingRequestStatus, - Page, - PageType, - SearchItem as SearchItemData, -} from '../../elastic/types' -import { getRepository } from '../../repository' +import { LibraryItemType } from '../../entity/library_item' import { UploadFile } from '../../entity/upload_file' import { User } from '../../entity/user' import { env } from '../../env' @@ -28,6 +13,7 @@ import { Article, ArticleError, ArticleErrorCode, + ArticleSavingRequestStatus, ArticlesError, ArticleSuccess, BulkActionError, @@ -57,7 +43,6 @@ import { SaveArticleReadingProgressSuccess, SearchError, SearchErrorCode, - SearchItem, SearchSuccess, SetBookmarkArticleError, SetBookmarkArticleErrorCode, @@ -76,12 +61,14 @@ import { UpdatesSinceErrorCode, UpdatesSinceSuccess, } from '../../generated/graphql' +import { getRepository } from '../../repository' import { createPageSaveRequest } from '../../services/create_page_save_request' import { addLabelToPage, createLabels, getLabelsByIds, } from '../../services/labels' +import { searchLibraryItems } from '../../services/library_item' import { setFileUploadComplete } from '../../services/save_file' import { parsedContentToPage } from '../../services/save_page' import { traceAs } from '../../tracing' @@ -115,7 +102,7 @@ import { makeStorageFilePublic, } from '../../utils/uploads' import { WithDataSourcesContext } from '../types' -import { pageTypeForContentType } from '../upload_files' +import { itemTypeForContentType } from '../upload_files' export enum ArticleFormat { Markdown = 'markdown', @@ -220,7 +207,7 @@ export const createArticleResolver = authorized< let userArticleUrl: string | null = null let uploadFileHash = null let domContent = null - let pageType = PageType.Unknown + let pageType = LibraryItemType.Unknown const DUMMY_RESPONSE = { user, @@ -233,7 +220,7 @@ export const createArticleResolver = authorized< content: '', description: '', title: '', - pageType: PageType.Unknown, + pageType: LibraryItemType.Unknown, contentReader: ContentReader.Web, author: '', url, @@ -280,7 +267,7 @@ export const createArticleResolver = authorized< uploadFileHash = uploadFileDetails.md5Hash userArticleUrl = uploadFileDetails.fileUrl canonicalUrl = uploadFile.url - pageType = pageTypeForContentType(uploadFile.contentType) + pageType = itemTypeForContentType(uploadFile.contentType) title = titleForFilePath(uploadFile.url) } else if ( source !== 'puppeteer-parse' && @@ -298,7 +285,7 @@ export const createArticleResolver = authorized< parsedContent = parseResults.parsedContent canonicalUrl = parseResults.canonicalUrl domContent = parseResults.domContent - pageType = parseResults.pageType + pageType = parseResults.pageType as unknown as LibraryItemType } else if (!preparedDocument?.document) { // We have a URL but no document, so we try to send this to puppeteer // and return a dummy response. @@ -520,7 +507,7 @@ export const getArticlesResolver = authorized< }, }) - const [pages, totalCount] = (await searchPages( + const [pages, totalCount] = (await searchLibraryItems( { from: Number(startCursor), size: first + 1, // fetch one more item to get next cursor @@ -551,13 +538,9 @@ export const getArticlesResolver = authorized< pages.pop() } - const edges = pages.map((a) => { + const edges = pages.map((node) => { return { - node: { - ...a, - image: a.image && createImageProxyUrl(a.image, 320, 320), - isArchived: !!a.archivedAt, - }, + node, cursor: endCursor, } }) @@ -589,7 +572,6 @@ export type SetShareArticleSuccessPartial = Merge< } > -// TODO: not implemented yet // export const setShareArticleResolver = authorized< // SetShareArticleSuccessPartial, // SetShareArticleError, @@ -916,7 +898,7 @@ export const searchResolver = authorized< )) || [[], 0] } else { // otherwise, search pages - ;[results, totalCount] = (await searchPages( + ;[results, totalCount] = (await searchLibraryItems( { from: Number(startCursor), size: first + 1, // fetch one more item to get next cursor @@ -1031,7 +1013,7 @@ export const updatesSinceResolver = authorized< const startCursor = after || '' const size = first || 10 const startDate = new Date(since) - const [pages, totalCount] = (await searchPages( + const [pages, totalCount] = (await searchLibraryItems( { from: Number(startCursor), size: size + 1, // fetch one more item to get next cursor diff --git a/packages/api/src/resolvers/upload_files/index.ts b/packages/api/src/resolvers/upload_files/index.ts index e3b921201..8035480bc 100644 --- a/packages/api/src/resolvers/upload_files/index.ts +++ b/packages/api/src/resolvers/upload_files/index.ts @@ -2,7 +2,7 @@ import normalizeUrl from 'normalize-url' import path from 'path' import { createPage, getPageByParam, updatePage } from '../../elastic/pages' -import { PageType } from '../../elastic/types' +import { LibraryItemType } from '../../entity/library_item' import { UploadFile } from '../../entity/upload_file' import { env } from '../../env' import { @@ -28,11 +28,13 @@ const isFileUrl = (url: string): boolean => { return parsedUrl.protocol == 'file:' } -export const pageTypeForContentType = (contentType: string): PageType => { +export const itemTypeForContentType = ( + contentType: string +): LibraryItemType => { if (contentType == 'application/epub+zip') { - return PageType.Book + return LibraryItemType.Book } - return PageType.File + return LibraryItemType.File } export const uploadFileRequestResolver = authorized< @@ -151,7 +153,7 @@ export const uploadFileRequestResolver = authorized< title: title, hash: uploadFilePathName, content: '', - pageType: pageTypeForContentType(input.contentType), + pageType: itemTypeForContentType(input.contentType), uploadFileId: uploadFileData.id, slug: generateSlug(uploadFilePathName), createdAt: new Date(), diff --git a/packages/api/src/routers/svc/integrations.ts b/packages/api/src/routers/svc/integrations.ts index 349792c9c..0f5084e7a 100644 --- a/packages/api/src/routers/svc/integrations.ts +++ b/packages/api/src/routers/svc/integrations.ts @@ -5,7 +5,7 @@ import { stringify } from 'csv-stringify' import express from 'express' import { DateTime } from 'luxon' import { v4 as uuidv4 } from 'uuid' -import { getPageById, searchPages } from '../../elastic/pages' +import { getPageById, searchLibraryItems } from '../../elastic/pages' import { Page } from '../../elastic/types' import { Integration, IntegrationType } from '../../entity/integration' import { EntityType, readPushSubscription } from '../../pubsub' @@ -133,7 +133,7 @@ export function integrationsServiceRouter() { const dateFilters: DateFilter[] = [] syncedAt && dateFilters.push({ field: 'updatedAt', startDate: syncedAt }) - ;[pages, count] = (await searchPages( + ;[pages, count] = (await searchLibraryItems( { from: after, size, dateFilters }, userId )) as [Page[], number] diff --git a/packages/api/src/services/library_item.ts b/packages/api/src/services/library_item.ts index f098332f7..aaddab419 100644 --- a/packages/api/src/services/library_item.ts +++ b/packages/api/src/services/library_item.ts @@ -108,17 +108,18 @@ export const createLibraryItem = async ( const buildWhereClause = ( queryBuilder: SelectQueryBuilder, - userId: string, args: PageSearchArgs ) => { if (args.query) { - queryBuilder.andWhere(`tsv @@ websearch_to_tsquery(:query)`, { - query: args.query, - }) + queryBuilder + .addSelect('ts_rank_cd(library_item.search_tsv, query)', 'rank') + .addFrom("websearch_to_tsquery('english', ':query')", 'query') + .andWhere('query @@ library_item.search_tsv') + .setParameter('query', args.query) } if (args.typeFilter) { - queryBuilder.andWhere(`library_item.item_type = :typeFilter`, { + queryBuilder.andWhere('library_item.item_type = :typeFilter', { typeFilter: args.typeFilter, }) } @@ -126,18 +127,15 @@ const buildWhereClause = ( if (args.inFilter !== InFilter.ALL) { switch (args.inFilter) { case InFilter.INBOX: - queryBuilder.andWhere(`library_item.archived_at IS NULL`) + queryBuilder.andWhere('library_item.archived_at IS NULL') break case InFilter.ARCHIVE: - queryBuilder.andWhere(`library_item.archived_at IS NOT NULL`) + queryBuilder.andWhere('library_item.archived_at IS NOT NULL') break case InFilter.TRASH: // return only deleted pages within 14 days queryBuilder.andWhere( - `library_item.state = :state AND deleted_at >= now() - interval '14 days'`, - { - state: LibraryItemState.Deleted, - } + "library_item.state = 'DELETED' AND library_item.deleted_at >= now() - interval '14 days'" ) break } @@ -158,7 +156,12 @@ const buildWhereClause = ( args.hasFilters.forEach((filter) => { switch (filter) { case HasFilter.HIGHLIGHTS: - queryBuilder.andWhere(`library_item.highlights IS NOT NULL`) + queryBuilder.andWhere( + 'library_item.highlight_annotations IS NOT NULL' + ) + break + case HasFilter.LABELS: + queryBuilder.andWhere('library_item.label_names IS NOT NULL') break } }) @@ -173,17 +176,19 @@ const buildWhereClause = ( ) if (includeLabels && includeLabels.length > 0) { - queryBuilder.andWhere( - `library_item.id IN (SELECT library_item_id FROM library_item_label WHERE name IN (:...includeLabels))`, - { - includeLabels, - } - ) + includeLabels.forEach((includeLabel) => { + queryBuilder.andWhere( + 'library_item.label_names @> ARRAY[:...includeLabels]::text[] OR library_item.highlight_labels @> ARRAY[:...includeLabels]::text[]', + { + includeLabels: includeLabel.labels, + } + ) + }) } if (excludeLabels && excludeLabels.length > 0) { queryBuilder.andWhere( - `library_item.id NOT IN (SELECT library_item_id FROM library_item_label WHERE name IN (:...excludeLabels))`, + 'NOT library_item.label_names && ARRAY[:...excludeLabels]::text[] AND NOT library_item.highlight_labels && ARRAY[:...excludeLabels]::text[]', { excludeLabels, } @@ -210,6 +215,39 @@ const buildWhereClause = ( }) }) } + + if (args.matchFilters && args.matchFilters.length > 0) { + args.matchFilters.forEach((filter) => { + queryBuilder.andWhere( + `websearch_to_tsquery('english', ':query') @@ library_item.${filter.field}_tsv`, + { + query: filter.value, + } + ) + }) + } + + if (args.ids && args.ids.length > 0) { + queryBuilder.andWhere('library_item.id IN (:...ids)', { ids: args.ids }) + } + + if (!args.includePending) { + queryBuilder.andWhere('library_item.state != :state', { + state: LibraryItemState.Processing, + }) + } + + if (!args.includeDeleted && args.inFilter !== InFilter.TRASH) { + queryBuilder.andWhere('library_item.state != :state', { + state: LibraryItemState.Deleted, + }) + } + + if (args.noFilters) { + args.noFilters.forEach((filter) => { + queryBuilder.andWhere(`library_item.${filter.field} IS NULL`) + }) + } } export const searchLibraryItems = async ( @@ -225,15 +263,16 @@ export const searchLibraryItems = async ( const queryBuilder = entityManager .createQueryBuilder(LibraryItem, 'library_item') - .select('library_item.*') + .leftJoinAndSelect('library_item.labels', 'labels') + .leftJoinAndSelect('library_item.highlights', 'highlights') .where('library_item.user_id = :userId', { userId }) // build the where clause - buildWhereClause(queryBuilder, userId, args) + buildWhereClause(queryBuilder, args) // add pagination and sorting const libraryItems = await queryBuilder - .orderBy(`omnivore.library_item.${sortField}`, sortOrder) + .orderBy(`library_item.${sortField}`, sortOrder) .offset(from) .limit(size) .getMany() @@ -242,3 +281,9 @@ export const searchLibraryItems = async ( return [libraryItems, count] } + +export const libraryItemToSearchItem = ( + libraryItem: LibraryItem, + userId: string +): SearchItem => { + \ No newline at end of file diff --git a/packages/api/src/utils/search.ts b/packages/api/src/utils/search.ts index 6087e1289..741ecf4bb 100644 --- a/packages/api/src/utils/search.ts +++ b/packages/api/src/utils/search.ts @@ -9,7 +9,7 @@ import { SearchParserKeyWordOffset, SearchParserTextOffset, } from 'search-query-parser' -import { PageType } from '../generated/graphql' +import { LibraryItemType } from '../entity/library_item' export enum ReadFilter { ALL, @@ -30,7 +30,7 @@ export interface SearchFilter { query: string | undefined inFilter: InFilter readFilter: ReadFilter - typeFilter?: PageType + typeFilter?: LibraryItemType labelFilters: LabelFilter[] sortParams?: SortParams hasFilters: HasFilter[] @@ -55,7 +55,7 @@ export type LabelFilter = { export enum HasFilter { HIGHLIGHTS, - SHARED_AT, + LABELS, } export interface DateFilter { @@ -134,27 +134,27 @@ const parseInFilter = ( return query ? InFilter.ALL : InFilter.INBOX } -const parseTypeFilter = (str: string | undefined): PageType | undefined => { +const parseTypeFilter = ( + str: string | undefined +): LibraryItemType | undefined => { if (str === undefined) { return undefined } switch (str.toLowerCase()) { case 'article': - return PageType.Article + return LibraryItemType.Article case 'book': - return PageType.Book + return LibraryItemType.Book case 'pdf': case 'file': - return PageType.File + return LibraryItemType.File case 'profile': - return PageType.Profile + return LibraryItemType.Profile case 'website': - return PageType.Website + return LibraryItemType.Website case 'unknown': - return PageType.Unknown - case 'highlights': - return PageType.Highlights + return LibraryItemType.Unknown } return undefined } @@ -230,6 +230,8 @@ const parseHasFilter = (str?: string): HasFilter | undefined => { switch (str.toUpperCase()) { case 'HIGHLIGHTS': return HasFilter.HIGHLIGHTS + case 'LABELS': + return HasFilter.LABELS } } diff --git a/packages/api/test/elastic/pages.test.ts b/packages/api/test/elastic/pages.test.ts index be6db6591..baa01d616 100644 --- a/packages/api/test/elastic/pages.test.ts +++ b/packages/api/test/elastic/pages.test.ts @@ -1,13 +1,5 @@ -import 'mocha' import { expect } from 'chai' -import { InFilter, ReadFilter } from '../../src/utils/search' -import { - ArticleSavingRequestStatus, - Page, - PageContext, - PageType, -} from '../../src/elastic/types' -import { createPubSubClient } from '../../src/pubsub' +import 'mocha' import { countByCreatedAt, createPage, @@ -16,9 +8,17 @@ import { getPageById, getPageByParam, searchAsYouType, - searchPages, + searchLibraryItems, updatePage, } from '../../src/elastic/pages' +import { + ArticleSavingRequestStatus, + Page, + PageContext, + PageType, +} from '../../src/elastic/types' +import { createPubSubClient } from '../../src/pubsub' +import { InFilter, ReadFilter } from '../../src/utils/search' import { createTestElasticPage } from '../util' describe('pages in elastic', () => { @@ -126,7 +126,7 @@ describe('pages in elastic', () => { }) }) - describe('searchPages', () => { + describe('searchLibraryItems', () => { let page: Page before(async () => { @@ -140,7 +140,7 @@ describe('pages in elastic', () => { }) it('searches pages', async () => { - const searchResults = await searchPages( + const searchResults = await searchLibraryItems( { dateFilters: [], hasFilters: [], diff --git a/packages/api/test/routers/auth.test.ts b/packages/api/test/routers/auth.test.ts index 4a17d58ba..5559f1b5e 100644 --- a/packages/api/test/routers/auth.test.ts +++ b/packages/api/test/routers/auth.test.ts @@ -3,7 +3,7 @@ import chai, { expect } from 'chai' import sinon from 'sinon' import sinonChai from 'sinon-chai' import supertest from 'supertest' -import { searchPages } from '../../src/elastic/pages' +import { searchLibraryItems } from '../../src/elastic/pages' import { StatusType, User } from '../../src/entity/user' import { getRepository } from '../../src/repository' import { AuthProvider } from '../../src/routers/auth/auth_types' @@ -605,10 +605,10 @@ describe('auth router', () => { 'web' ).expect(200) const user = await getRepository(User).findOneBy({ name }) - const [popularReads, count] = (await searchPages({}, user?.id!)) || [ - [], - 0, - ] + const [popularReads, count] = (await searchLibraryItems( + {}, + user?.id! + )) || [[], 0] expect(count).to.eql(3) }) @@ -629,10 +629,10 @@ describe('auth router', () => { 'ios' ).expect(200) const user = await getRepository(User).findOneBy({ name }) - const [popularReads, count] = (await searchPages({}, user?.id!)) || [ - [], - 0, - ] + const [popularReads, count] = (await searchLibraryItems( + {}, + user?.id! + )) || [[], 0] expect(count).to.eql(4) }) diff --git a/packages/db/migrations/0118.do.library_item.sql b/packages/db/migrations/0118.do.library_item.sql index 3b2f6e6df..f65d2112a 100755 --- a/packages/db/migrations/0118.do.library_item.sql +++ b/packages/db/migrations/0118.do.library_item.sql @@ -55,6 +55,11 @@ CREATE TABLE omnivore.library_item ( gcs_archive_id text, directionality directionality_type NOT NULL DEFAULT 'LTR', subscription_id uuid REFERENCES omnivore.subscriptions ON DELETE CASCADE, + label_names text[], -- array of label names of the item + highlight_labels text[], -- array of label names of the item's highlights + highlight_annotations text[], -- array of highlight annotations of the item + note text, + note_tsv tsvector, UNIQUE (user_id, original_url) ); @@ -66,6 +71,7 @@ CREATE INDEX library_item_title_tsv_idx ON omnivore.library_item USING GIN (titl CREATE INDEX library_item_author_tsv_idx ON omnivore.library_item USING GIN (author_tsv); CREATE INDEX library_item_description_tsv_idx ON omnivore.library_item USING GIN (description_tsv); CREATE INDEX library_item_search_tsv_idx ON omnivore.library_item USING GIN (search_tsv); +CREATE INDEX library_item_note_tsv_idx ON omnivore.library_item USING GIN (note_tsv); CREATE OR REPLACE FUNCTION update_library_item_tsv() RETURNS trigger AS $$ begin @@ -74,15 +80,18 @@ begin new.title_tsv := to_tsvector('pg_catalog.english', coalesce(new.title, '')); new.author_tsv := to_tsvector('pg_catalog.english', coalesce(new.author, '')); new.description_tsv := to_tsvector('pg_catalog.english', coalesce(new.description, '')); + -- note_tsv is generated by both note and highlight_annotations + new.note_tsv := to_tsvector('pg_catalog.english', coalesce(new.note, '')) || to_tsvector('pg_catalog.english', string_agg(new.highlight_annotations, ' ')); new.search_tsv := setweight(new.title_tsv, 'A') || setweight(new.author_tsv, 'A') || setweight(new.site_tsv, 'A') || setweight(new.description_tsv, 'A') || -- full hostname (eg www.omnivore.app) - setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.url, '^((http[s]?):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\3'), '')), 'A') || + setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\3'), '')), 'A') || -- secondary hostname (eg omnivore) - setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.url, '^((http[s]?):\/)?\/?(.*\.)?([^:\/\s]+)(\..*)((\/+)*\/)?([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\4'), '')), 'A') || + setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?(.*\.)?([^:\/\s]+)(\..*)((\/+)*\/)?([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\4'), '')), 'A') || + setweight(to_tsvector('pg_catalog.english', new.note_tsv), 'A') || setweight(new.content_tsv, 'B'); return new; end diff --git a/packages/db/migrations/0118.undo.library_item.sql b/packages/db/migrations/0118.undo.library_item.sql index 202b831dd..57e2a2e43 100755 --- a/packages/db/migrations/0118.undo.library_item.sql +++ b/packages/db/migrations/0118.undo.library_item.sql @@ -7,6 +7,7 @@ BEGIN; DROP TRIGGER library_item_tsv_update ON omnivore.library_item; DROP FUNCTION update_library_item_tsv(); +DROP INDEX omnivore.library_item_note_tsv_idx; DROP INDEX omnivore.library_item_search_tsv_idx; DROP INDEX omnivore.library_item_description_tsv_idx; DROP INDEX omnivore.library_item_author_tsv_idx;