update search query

This commit is contained in:
Hongbo Wu
2023-08-28 18:20:44 +08:00
parent 751df68175
commit 648a16615b
11 changed files with 155 additions and 98 deletions

View File

@ -226,8 +226,15 @@ const appendHasFilters = (
},
})
break
case HasFilter.SHARED_AT:
builder = builder.query('exists', { field: 'sharedAt' })
case HasFilter.LABELS:
builder = builder.query('nested', {
path: 'labels',
query: {
exists: {
field: 'labels',
},
},
})
break
}
})

View File

@ -12,6 +12,7 @@ import {
Unique,
UpdateDateColumn,
} from 'typeorm'
import { Highlight } from './highlight'
import { Label } from './label'
import { Recommendation } from './recommendation'
import { Subscription } from './subscription'
@ -188,4 +189,12 @@ export class LibraryItem {
@Column('enum', { enum: DirectionalityType, default: DirectionalityType.LTR })
directionality!: DirectionalityType
@OneToMany(() => Highlight, (highlight) => highlight.libraryItem)
@JoinTable({
name: 'highlight',
joinColumn: { name: 'library_item_id' },
inverseJoinColumn: { name: 'id' },
})
highlights?: Highlight[]
}

View File

@ -5,22 +5,7 @@
/* eslint-disable @typescript-eslint/no-floating-promises */
import { Readability } from '@omnivore/readability'
import graphqlFields from 'graphql-fields'
import { searchHighlights } from '../../elastic/highlights'
import {
createPage,
getPageByParam,
searchAsYouType,
searchPages,
updatePage,
updatePages,
} from '../../elastic/pages'
import {
ArticleSavingRequestStatus,
Page,
PageType,
SearchItem as SearchItemData,
} from '../../elastic/types'
import { getRepository } from '../../repository'
import { LibraryItemType } from '../../entity/library_item'
import { UploadFile } from '../../entity/upload_file'
import { User } from '../../entity/user'
import { env } from '../../env'
@ -28,6 +13,7 @@ import {
Article,
ArticleError,
ArticleErrorCode,
ArticleSavingRequestStatus,
ArticlesError,
ArticleSuccess,
BulkActionError,
@ -57,7 +43,6 @@ import {
SaveArticleReadingProgressSuccess,
SearchError,
SearchErrorCode,
SearchItem,
SearchSuccess,
SetBookmarkArticleError,
SetBookmarkArticleErrorCode,
@ -76,12 +61,14 @@ import {
UpdatesSinceErrorCode,
UpdatesSinceSuccess,
} from '../../generated/graphql'
import { getRepository } from '../../repository'
import { createPageSaveRequest } from '../../services/create_page_save_request'
import {
addLabelToPage,
createLabels,
getLabelsByIds,
} from '../../services/labels'
import { searchLibraryItems } from '../../services/library_item'
import { setFileUploadComplete } from '../../services/save_file'
import { parsedContentToPage } from '../../services/save_page'
import { traceAs } from '../../tracing'
@ -115,7 +102,7 @@ import {
makeStorageFilePublic,
} from '../../utils/uploads'
import { WithDataSourcesContext } from '../types'
import { pageTypeForContentType } from '../upload_files'
import { itemTypeForContentType } from '../upload_files'
export enum ArticleFormat {
Markdown = 'markdown',
@ -220,7 +207,7 @@ export const createArticleResolver = authorized<
let userArticleUrl: string | null = null
let uploadFileHash = null
let domContent = null
let pageType = PageType.Unknown
let pageType = LibraryItemType.Unknown
const DUMMY_RESPONSE = {
user,
@ -233,7 +220,7 @@ export const createArticleResolver = authorized<
content: '',
description: '',
title: '',
pageType: PageType.Unknown,
pageType: LibraryItemType.Unknown,
contentReader: ContentReader.Web,
author: '',
url,
@ -280,7 +267,7 @@ export const createArticleResolver = authorized<
uploadFileHash = uploadFileDetails.md5Hash
userArticleUrl = uploadFileDetails.fileUrl
canonicalUrl = uploadFile.url
pageType = pageTypeForContentType(uploadFile.contentType)
pageType = itemTypeForContentType(uploadFile.contentType)
title = titleForFilePath(uploadFile.url)
} else if (
source !== 'puppeteer-parse' &&
@ -298,7 +285,7 @@ export const createArticleResolver = authorized<
parsedContent = parseResults.parsedContent
canonicalUrl = parseResults.canonicalUrl
domContent = parseResults.domContent
pageType = parseResults.pageType
pageType = parseResults.pageType as unknown as LibraryItemType
} else if (!preparedDocument?.document) {
// We have a URL but no document, so we try to send this to puppeteer
// and return a dummy response.
@ -520,7 +507,7 @@ export const getArticlesResolver = authorized<
},
})
const [pages, totalCount] = (await searchPages(
const [pages, totalCount] = (await searchLibraryItems(
{
from: Number(startCursor),
size: first + 1, // fetch one more item to get next cursor
@ -551,13 +538,9 @@ export const getArticlesResolver = authorized<
pages.pop()
}
const edges = pages.map((a) => {
const edges = pages.map((node) => {
return {
node: {
...a,
image: a.image && createImageProxyUrl(a.image, 320, 320),
isArchived: !!a.archivedAt,
},
node,
cursor: endCursor,
}
})
@ -589,7 +572,6 @@ export type SetShareArticleSuccessPartial = Merge<
}
>
// TODO: not implemented yet
// export const setShareArticleResolver = authorized<
// SetShareArticleSuccessPartial,
// SetShareArticleError,
@ -916,7 +898,7 @@ export const searchResolver = authorized<
)) || [[], 0]
} else {
// otherwise, search pages
;[results, totalCount] = (await searchPages(
;[results, totalCount] = (await searchLibraryItems(
{
from: Number(startCursor),
size: first + 1, // fetch one more item to get next cursor
@ -1031,7 +1013,7 @@ export const updatesSinceResolver = authorized<
const startCursor = after || ''
const size = first || 10
const startDate = new Date(since)
const [pages, totalCount] = (await searchPages(
const [pages, totalCount] = (await searchLibraryItems(
{
from: Number(startCursor),
size: size + 1, // fetch one more item to get next cursor

View File

@ -2,7 +2,7 @@
import normalizeUrl from 'normalize-url'
import path from 'path'
import { createPage, getPageByParam, updatePage } from '../../elastic/pages'
import { PageType } from '../../elastic/types'
import { LibraryItemType } from '../../entity/library_item'
import { UploadFile } from '../../entity/upload_file'
import { env } from '../../env'
import {
@ -28,11 +28,13 @@ const isFileUrl = (url: string): boolean => {
return parsedUrl.protocol == 'file:'
}
export const pageTypeForContentType = (contentType: string): PageType => {
export const itemTypeForContentType = (
contentType: string
): LibraryItemType => {
if (contentType == 'application/epub+zip') {
return PageType.Book
return LibraryItemType.Book
}
return PageType.File
return LibraryItemType.File
}
export const uploadFileRequestResolver = authorized<
@ -151,7 +153,7 @@ export const uploadFileRequestResolver = authorized<
title: title,
hash: uploadFilePathName,
content: '',
pageType: pageTypeForContentType(input.contentType),
pageType: itemTypeForContentType(input.contentType),
uploadFileId: uploadFileData.id,
slug: generateSlug(uploadFilePathName),
createdAt: new Date(),

View File

@ -5,7 +5,7 @@ import { stringify } from 'csv-stringify'
import express from 'express'
import { DateTime } from 'luxon'
import { v4 as uuidv4 } from 'uuid'
import { getPageById, searchPages } from '../../elastic/pages'
import { getPageById, searchLibraryItems } from '../../elastic/pages'
import { Page } from '../../elastic/types'
import { Integration, IntegrationType } from '../../entity/integration'
import { EntityType, readPushSubscription } from '../../pubsub'
@ -133,7 +133,7 @@ export function integrationsServiceRouter() {
const dateFilters: DateFilter[] = []
syncedAt &&
dateFilters.push({ field: 'updatedAt', startDate: syncedAt })
;[pages, count] = (await searchPages(
;[pages, count] = (await searchLibraryItems(
{ from: after, size, dateFilters },
userId
)) as [Page[], number]

View File

@ -108,17 +108,18 @@ export const createLibraryItem = async (
const buildWhereClause = (
queryBuilder: SelectQueryBuilder<LibraryItem>,
userId: string,
args: PageSearchArgs
) => {
if (args.query) {
queryBuilder.andWhere(`tsv @@ websearch_to_tsquery(:query)`, {
query: args.query,
})
queryBuilder
.addSelect('ts_rank_cd(library_item.search_tsv, query)', 'rank')
.addFrom("websearch_to_tsquery('english', ':query')", 'query')
.andWhere('query @@ library_item.search_tsv')
.setParameter('query', args.query)
}
if (args.typeFilter) {
queryBuilder.andWhere(`library_item.item_type = :typeFilter`, {
queryBuilder.andWhere('library_item.item_type = :typeFilter', {
typeFilter: args.typeFilter,
})
}
@ -126,18 +127,15 @@ const buildWhereClause = (
if (args.inFilter !== InFilter.ALL) {
switch (args.inFilter) {
case InFilter.INBOX:
queryBuilder.andWhere(`library_item.archived_at IS NULL`)
queryBuilder.andWhere('library_item.archived_at IS NULL')
break
case InFilter.ARCHIVE:
queryBuilder.andWhere(`library_item.archived_at IS NOT NULL`)
queryBuilder.andWhere('library_item.archived_at IS NOT NULL')
break
case InFilter.TRASH:
// return only deleted pages within 14 days
queryBuilder.andWhere(
`library_item.state = :state AND deleted_at >= now() - interval '14 days'`,
{
state: LibraryItemState.Deleted,
}
"library_item.state = 'DELETED' AND library_item.deleted_at >= now() - interval '14 days'"
)
break
}
@ -158,7 +156,12 @@ const buildWhereClause = (
args.hasFilters.forEach((filter) => {
switch (filter) {
case HasFilter.HIGHLIGHTS:
queryBuilder.andWhere(`library_item.highlights IS NOT NULL`)
queryBuilder.andWhere(
'library_item.highlight_annotations IS NOT NULL'
)
break
case HasFilter.LABELS:
queryBuilder.andWhere('library_item.label_names IS NOT NULL')
break
}
})
@ -173,17 +176,19 @@ const buildWhereClause = (
)
if (includeLabels && includeLabels.length > 0) {
queryBuilder.andWhere(
`library_item.id IN (SELECT library_item_id FROM library_item_label WHERE name IN (:...includeLabels))`,
{
includeLabels,
}
)
includeLabels.forEach((includeLabel) => {
queryBuilder.andWhere(
'library_item.label_names @> ARRAY[:...includeLabels]::text[] OR library_item.highlight_labels @> ARRAY[:...includeLabels]::text[]',
{
includeLabels: includeLabel.labels,
}
)
})
}
if (excludeLabels && excludeLabels.length > 0) {
queryBuilder.andWhere(
`library_item.id NOT IN (SELECT library_item_id FROM library_item_label WHERE name IN (:...excludeLabels))`,
'NOT library_item.label_names && ARRAY[:...excludeLabels]::text[] AND NOT library_item.highlight_labels && ARRAY[:...excludeLabels]::text[]',
{
excludeLabels,
}
@ -210,6 +215,39 @@ const buildWhereClause = (
})
})
}
if (args.matchFilters && args.matchFilters.length > 0) {
args.matchFilters.forEach((filter) => {
queryBuilder.andWhere(
`websearch_to_tsquery('english', ':query') @@ library_item.${filter.field}_tsv`,
{
query: filter.value,
}
)
})
}
if (args.ids && args.ids.length > 0) {
queryBuilder.andWhere('library_item.id IN (:...ids)', { ids: args.ids })
}
if (!args.includePending) {
queryBuilder.andWhere('library_item.state != :state', {
state: LibraryItemState.Processing,
})
}
if (!args.includeDeleted && args.inFilter !== InFilter.TRASH) {
queryBuilder.andWhere('library_item.state != :state', {
state: LibraryItemState.Deleted,
})
}
if (args.noFilters) {
args.noFilters.forEach((filter) => {
queryBuilder.andWhere(`library_item.${filter.field} IS NULL`)
})
}
}
export const searchLibraryItems = async (
@ -225,15 +263,16 @@ export const searchLibraryItems = async (
const queryBuilder = entityManager
.createQueryBuilder(LibraryItem, 'library_item')
.select('library_item.*')
.leftJoinAndSelect('library_item.labels', 'labels')
.leftJoinAndSelect('library_item.highlights', 'highlights')
.where('library_item.user_id = :userId', { userId })
// build the where clause
buildWhereClause(queryBuilder, userId, args)
buildWhereClause(queryBuilder, args)
// add pagination and sorting
const libraryItems = await queryBuilder
.orderBy(`omnivore.library_item.${sortField}`, sortOrder)
.orderBy(`library_item.${sortField}`, sortOrder)
.offset(from)
.limit(size)
.getMany()
@ -242,3 +281,9 @@ export const searchLibraryItems = async (
return [libraryItems, count]
}
export const libraryItemToSearchItem = (
libraryItem: LibraryItem,
userId: string
): SearchItem => {

View File

@ -9,7 +9,7 @@ import {
SearchParserKeyWordOffset,
SearchParserTextOffset,
} from 'search-query-parser'
import { PageType } from '../generated/graphql'
import { LibraryItemType } from '../entity/library_item'
export enum ReadFilter {
ALL,
@ -30,7 +30,7 @@ export interface SearchFilter {
query: string | undefined
inFilter: InFilter
readFilter: ReadFilter
typeFilter?: PageType
typeFilter?: LibraryItemType
labelFilters: LabelFilter[]
sortParams?: SortParams
hasFilters: HasFilter[]
@ -55,7 +55,7 @@ export type LabelFilter = {
export enum HasFilter {
HIGHLIGHTS,
SHARED_AT,
LABELS,
}
export interface DateFilter {
@ -134,27 +134,27 @@ const parseInFilter = (
return query ? InFilter.ALL : InFilter.INBOX
}
const parseTypeFilter = (str: string | undefined): PageType | undefined => {
const parseTypeFilter = (
str: string | undefined
): LibraryItemType | undefined => {
if (str === undefined) {
return undefined
}
switch (str.toLowerCase()) {
case 'article':
return PageType.Article
return LibraryItemType.Article
case 'book':
return PageType.Book
return LibraryItemType.Book
case 'pdf':
case 'file':
return PageType.File
return LibraryItemType.File
case 'profile':
return PageType.Profile
return LibraryItemType.Profile
case 'website':
return PageType.Website
return LibraryItemType.Website
case 'unknown':
return PageType.Unknown
case 'highlights':
return PageType.Highlights
return LibraryItemType.Unknown
}
return undefined
}
@ -230,6 +230,8 @@ const parseHasFilter = (str?: string): HasFilter | undefined => {
switch (str.toUpperCase()) {
case 'HIGHLIGHTS':
return HasFilter.HIGHLIGHTS
case 'LABELS':
return HasFilter.LABELS
}
}

View File

@ -1,13 +1,5 @@
import 'mocha'
import { expect } from 'chai'
import { InFilter, ReadFilter } from '../../src/utils/search'
import {
ArticleSavingRequestStatus,
Page,
PageContext,
PageType,
} from '../../src/elastic/types'
import { createPubSubClient } from '../../src/pubsub'
import 'mocha'
import {
countByCreatedAt,
createPage,
@ -16,9 +8,17 @@ import {
getPageById,
getPageByParam,
searchAsYouType,
searchPages,
searchLibraryItems,
updatePage,
} from '../../src/elastic/pages'
import {
ArticleSavingRequestStatus,
Page,
PageContext,
PageType,
} from '../../src/elastic/types'
import { createPubSubClient } from '../../src/pubsub'
import { InFilter, ReadFilter } from '../../src/utils/search'
import { createTestElasticPage } from '../util'
describe('pages in elastic', () => {
@ -126,7 +126,7 @@ describe('pages in elastic', () => {
})
})
describe('searchPages', () => {
describe('searchLibraryItems', () => {
let page: Page
before(async () => {
@ -140,7 +140,7 @@ describe('pages in elastic', () => {
})
it('searches pages', async () => {
const searchResults = await searchPages(
const searchResults = await searchLibraryItems(
{
dateFilters: [],
hasFilters: [],

View File

@ -3,7 +3,7 @@ import chai, { expect } from 'chai'
import sinon from 'sinon'
import sinonChai from 'sinon-chai'
import supertest from 'supertest'
import { searchPages } from '../../src/elastic/pages'
import { searchLibraryItems } from '../../src/elastic/pages'
import { StatusType, User } from '../../src/entity/user'
import { getRepository } from '../../src/repository'
import { AuthProvider } from '../../src/routers/auth/auth_types'
@ -605,10 +605,10 @@ describe('auth router', () => {
'web'
).expect(200)
const user = await getRepository(User).findOneBy({ name })
const [popularReads, count] = (await searchPages({}, user?.id!)) || [
[],
0,
]
const [popularReads, count] = (await searchLibraryItems(
{},
user?.id!
)) || [[], 0]
expect(count).to.eql(3)
})
@ -629,10 +629,10 @@ describe('auth router', () => {
'ios'
).expect(200)
const user = await getRepository(User).findOneBy({ name })
const [popularReads, count] = (await searchPages({}, user?.id!)) || [
[],
0,
]
const [popularReads, count] = (await searchLibraryItems(
{},
user?.id!
)) || [[], 0]
expect(count).to.eql(4)
})

View File

@ -55,6 +55,11 @@ CREATE TABLE omnivore.library_item (
gcs_archive_id text,
directionality directionality_type NOT NULL DEFAULT 'LTR',
subscription_id uuid REFERENCES omnivore.subscriptions ON DELETE CASCADE,
label_names text[], -- array of label names of the item
highlight_labels text[], -- array of label names of the item's highlights
highlight_annotations text[], -- array of highlight annotations of the item
note text,
note_tsv tsvector,
UNIQUE (user_id, original_url)
);
@ -66,6 +71,7 @@ CREATE INDEX library_item_title_tsv_idx ON omnivore.library_item USING GIN (titl
CREATE INDEX library_item_author_tsv_idx ON omnivore.library_item USING GIN (author_tsv);
CREATE INDEX library_item_description_tsv_idx ON omnivore.library_item USING GIN (description_tsv);
CREATE INDEX library_item_search_tsv_idx ON omnivore.library_item USING GIN (search_tsv);
CREATE INDEX library_item_note_tsv_idx ON omnivore.library_item USING GIN (note_tsv);
CREATE OR REPLACE FUNCTION update_library_item_tsv() RETURNS trigger AS $$
begin
@ -74,15 +80,18 @@ begin
new.title_tsv := to_tsvector('pg_catalog.english', coalesce(new.title, ''));
new.author_tsv := to_tsvector('pg_catalog.english', coalesce(new.author, ''));
new.description_tsv := to_tsvector('pg_catalog.english', coalesce(new.description, ''));
-- note_tsv is generated by both note and highlight_annotations
new.note_tsv := to_tsvector('pg_catalog.english', coalesce(new.note, '')) || to_tsvector('pg_catalog.english', string_agg(new.highlight_annotations, ' '));
new.search_tsv :=
setweight(new.title_tsv, 'A') ||
setweight(new.author_tsv, 'A') ||
setweight(new.site_tsv, 'A') ||
setweight(new.description_tsv, 'A') ||
-- full hostname (eg www.omnivore.app)
setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.url, '^((http[s]?):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\3'), '')), 'A') ||
setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\3'), '')), 'A') ||
-- secondary hostname (eg omnivore)
setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.url, '^((http[s]?):\/)?\/?(.*\.)?([^:\/\s]+)(\..*)((\/+)*\/)?([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\4'), '')), 'A') ||
setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?(.*\.)?([^:\/\s]+)(\..*)((\/+)*\/)?([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\4'), '')), 'A') ||
setweight(to_tsvector('pg_catalog.english', new.note_tsv), 'A') ||
setweight(new.content_tsv, 'B');
return new;
end

View File

@ -7,6 +7,7 @@ BEGIN;
DROP TRIGGER library_item_tsv_update ON omnivore.library_item;
DROP FUNCTION update_library_item_tsv();
DROP INDEX omnivore.library_item_note_tsv_idx;
DROP INDEX omnivore.library_item_search_tsv_idx;
DROP INDEX omnivore.library_item_description_tsv_idx;
DROP INDEX omnivore.library_item_author_tsv_idx;