feat: allow logical operator in search query

This commit is contained in:
Hongbo Wu
2023-11-30 16:21:47 +08:00
parent b8146439d9
commit a3d3f4daf4
5 changed files with 606 additions and 182 deletions

View File

@ -71,6 +71,7 @@
"jsonwebtoken": "^8.5.1",
"jwks-rsa": "^2.0.3",
"linkedom": "^0.14.9",
"liqe": "^3.8.0",
"lodash": "^4.17.21",
"luxon": "^3.2.1",
"nanoid": "^3.1.25",

View File

@ -655,16 +655,15 @@ export const searchResolver = authorized<
return { errorCodes: [SearchErrorCode.QueryTooLong] }
}
const searchQuery = parseSearchQuery(params.query || undefined)
const searchQuery = params.query ? parseSearchQuery(params.query) : undefined
const { libraryItems, count } = await searchLibraryItems(
{
from: Number(startCursor),
size: first + 1, // fetch one more item to get next cursor
sort: searchQuery.sort,
includePending: true,
includeContent: !!params.includeContent,
...searchQuery,
searchQuery,
},
uid
)
@ -826,11 +825,16 @@ export const bulkActionResolver = authorized<
},
})
// parse query
const searchQuery = parseSearchQuery(query)
if (searchQuery.ids.length > 100) {
return { errorCodes: [BulkActionErrorCode.BadRequest] }
}
if (!query) {
return { errorCodes: [BulkActionErrorCode.BadRequest] }
}
// parse query
const searchQuery = parseSearchQuery(query)
const ids = searchQuery.getValue?.('includes') as string[]
if (!ids || ids.length === 0 || ids.length > 100) {
return { errorCodes: [BulkActionErrorCode.BadRequest] }
}
// get labels if needed
let labels = undefined

View File

@ -1,4 +1,11 @@
import { Brackets, DeepPartial, SelectQueryBuilder } from 'typeorm'
import { LiqeQuery } from 'liqe'
import { DateTime } from 'luxon'
import {
Brackets,
DeepPartial,
ObjectLiteral,
SelectQueryBuilder,
} from 'typeorm'
import { QueryDeepPartialEntity } from 'typeorm/query-builder/QueryPartialEntity'
import { EntityLabel } from '../entity/entity_label'
import { Highlight } from '../entity/highlight'
@ -31,7 +38,7 @@ export interface SearchArgs {
size?: number
sort?: Sort
query?: string
inFilter: InFilter
inFilter?: InFilter
readFilter?: ReadFilter
typeFilter?: string
labelFilters?: LabelFilter[]
@ -47,6 +54,7 @@ export interface SearchArgs {
noFilters?: NoFilter[]
rangeFilters?: RangeFilter[]
useFolders?: boolean
searchQuery?: LiqeQuery
}
export interface SearchResultItem {
@ -82,6 +90,405 @@ export interface SearchResultItem {
content?: string
}
const getColumnName = (field: string) => {
switch (field) {
case 'language':
return 'item_language'
case 'subscription':
case 'rss':
return 'subscription'
case 'site':
return 'site_name'
case 'wordsCount':
return 'word_count'
case 'readPosition':
return 'reading_progress_bottom_percent'
default:
return field
}
}
export const buildQuery = (
searchQuery: LiqeQuery,
parameters: ObjectLiteral[]
) => {
const escapeQueryWithParameters = (
query: string,
parameter: ObjectLiteral
) => {
parameters.push(parameter)
return query
}
const serializeTagExpression = (ast: LiqeQuery): string => {
if (ast.type !== 'Tag') {
throw new Error('Expected a tag expression.')
}
const { field, expression } = ast
if (field.type === 'ImplicitField') {
if (expression.type !== 'LiteralExpression') {
throw new Error('Expected a literal expression.')
}
const value = expression.value?.toString()
if (!value) {
return ''
}
const param = `implicit_${parameters.length}`
return escapeQueryWithParameters(
`websearch_to_tsquery('english', :${param}) @@ library_item.search_tsv`,
{ [param]: value }
)
} else {
switch (field.name) {
case 'in': {
if (expression.type !== 'LiteralExpression') {
throw new Error('Expected a literal expression.')
}
const folder = expression.value?.toString()
if (!folder) {
return ''
}
switch (folder) {
case InFilter.INBOX:
return 'library_item.archived_at IS NULL'
case InFilter.ARCHIVE:
return 'library_item.archived_at IS NOT NULL'
case InFilter.TRASH:
// return only deleted pages within 14 days
return "library_item.deleted_at >= now() - interval '14 days'"
default: {
const param = `folder_${parameters.length}`
return escapeQueryWithParameters(
`library_item.folder = :${param}`,
{ [param]: folder }
)
}
}
}
case 'is': {
if (expression.type !== 'LiteralExpression') {
throw new Error('Expected a literal expression.')
}
const value = expression.value?.toString()
if (!value) {
return ''
}
switch (value) {
case ReadFilter.READ:
return 'library_item.reading_progress_bottom_percent > 98'
case ReadFilter.READING:
return 'library_item.reading_progress_bottom_percent BETWEEN 2 AND 98'
case ReadFilter.UNREAD:
return 'library_item.reading_progress_bottom_percent < 2'
default:
throw new Error(`Unexpected keyword: ${value}`)
}
}
case 'type': {
if (expression.type !== 'LiteralExpression') {
throw new Error('Expected a literal expression.')
}
const value = expression.value?.toString()
if (!value) {
return ''
}
const param = `type_${parameters.length}`
return escapeQueryWithParameters(
`LOWER(library_item.item_type) = :${param}`,
{
[param]: value.toLowerCase(),
}
)
}
case 'label': {
if (expression.type !== 'LiteralExpression') {
throw new Error('Expected a literal expression.')
}
const label = expression.value?.toString()?.toLowerCase()
if (!label) {
return ''
}
const param = `label_${parameters.length}`
const hasWildcard = label.includes('*')
if (hasWildcard) {
return escapeQueryWithParameters(
`exists (select 1 from unnest(array_cat(library_item.label_names, library_item.highlight_labels)::text[]) as label where label ILIKE :${param})`,
{
[param]: label.replace(/\*/g, '%'),
}
)
}
return escapeQueryWithParameters(
`:${param} = ANY(lower(array_cat(library_item.label_names, library_item.highlight_labels)::text)::text[])`,
{
[param]: label,
}
)
}
// case 'sort':
// result.sort = parseSort(keyword.value)
// break
case 'has': {
if (expression.type !== 'LiteralExpression') {
throw new Error('Expected a literal expression.')
}
const value = expression.value?.toString()
if (!value) {
return ''
}
switch (value) {
case HasFilter.HIGHLIGHTS:
return "library_item.highlight_annotations <> '{}'"
case HasFilter.LABELS:
return "library_item.label_names <> '{}'"
case HasFilter.SUBSCRIPTIONS:
return 'library_item.subscription is NOT NULL'
default:
throw new Error(`Unexpected keyword: ${value}`)
}
}
case 'saved':
case 'read':
case 'updated':
case 'published': {
if (expression.type !== 'LiteralExpression') {
throw new Error('Expected a literal expression.')
}
const date = expression.value?.toString()
if (!date) {
return ''
}
let startDate: Date | undefined
let endDate: Date | undefined
// check for special date filters
switch (date.toLowerCase()) {
case 'today':
startDate = DateTime.local().startOf('day').toJSDate()
break
case 'yesterday': {
const yesterday = DateTime.local().minus({ days: 1 })
startDate = yesterday.startOf('day').toJSDate()
endDate = yesterday.endOf('day').toJSDate()
break
}
case 'this week':
startDate = DateTime.local().startOf('week').toJSDate()
break
case 'this month':
startDate = DateTime.local().startOf('month').toJSDate()
break
default: {
// check for date ranges
const [start, end] = date.split('..')
startDate = start && start !== '*' ? new Date(start) : undefined
endDate = end && end !== '*' ? new Date(end) : undefined
}
}
const startParam = `${field.name}_start_${parameters.length}`
const endParam = `${field.name}_end_${parameters.length}`
return escapeQueryWithParameters(
`library_item.${field.name}_at BETWEEN :${startParam} AND :${endParam}`,
{
[startParam]: startDate ?? new Date(0),
[endParam]: endDate ?? new Date(),
}
)
}
// term filters
case 'subscription':
case 'rss':
case 'language': {
if (expression.type !== 'LiteralExpression') {
throw new Error('Expected a literal expression.')
}
const value = expression.value?.toString()
if (!value) {
return ''
}
const columnName = getColumnName(field.name)
const param = `term_${field.name}_${parameters.length}`
return escapeQueryWithParameters(
`library_item.${columnName} = :${param}`,
{
[param]: value,
}
)
}
// match filters
case 'author':
case 'title':
case 'description':
case 'note':
case 'site': {
if (expression.type !== 'LiteralExpression') {
throw new Error('Expected a literal expression.')
}
// normalize the term to lower case
const value = expression.value?.toString()?.toLowerCase()
if (!value) {
return ''
}
const columnName = getColumnName(field.name)
const param = `match_${field.name}_${parameters.length}`
const wildcardParam = `match_${field.name}_wildcard_${parameters.length}`
return escapeQueryWithParameters(
`(websearch_to_tsquery('english', :${param}) @@ library_item.${columnName}_tsv OR library_item.${columnName} ILIKE :${wildcardParam})`,
{
[param]: value,
[wildcardParam]: `%${value}%`,
}
)
}
case 'includes': {
if (expression.type !== 'LiteralExpression') {
throw new Error('Expected a literal expression.')
}
const ids = expression.value?.toString()?.split(',')
if (!ids || ids.length === 0) {
return ''
}
const param = `includes_${parameters.length}`
return escapeQueryWithParameters(`library_item.id = ANY(:${param})`, {
[param]: ids,
})
}
// case 'recommendedBy': {
// result.recommendedBy = parseStringValue(keyword.value)
// break
// }
case 'no': {
if (expression.type !== 'LiteralExpression') {
throw new Error('Expected a literal expression.')
}
const value = expression.value?.toString()
if (!value) {
return ''
}
let column = ''
switch (value) {
case 'highlight':
column = 'highlight_annotations'
break
case 'label':
column = 'label_names'
break
case 'subscription':
column = 'subscription'
break
default:
throw new Error(`Unexpected keyword: ${value}`)
}
return `(library_item.${column} = '{}' OR library_item.${column} IS NULL)`
}
case 'mode':
// mode is ignored and used only by the frontend
return ''
case 'readPosition':
case 'wordsCount': {
if (expression.type !== 'LiteralExpression') {
throw new Error('Expected a literal expression.')
}
let value = expression.value?.toString()
if (!value) {
return ''
}
const column = getColumnName(field.name)
const operatorRegex = /([<>]=?)/
const operator = value.match(operatorRegex)?.[0]
if (!operator) {
return ''
}
value = value.replace(operatorRegex, '')
if (!value) {
return ''
}
const param = `range_${field.name}_${parameters.length}`
return escapeQueryWithParameters(
`library_item.${column} ${operator} :${param}`,
{
[param]: parseInt(value, 10),
}
)
}
default:
throw new Error(`Unexpected keyword: ${field.name}`)
}
}
}
const serialize = (ast: LiqeQuery): string => {
if (ast.type === 'Tag') {
return serializeTagExpression(ast)
}
if (ast.type === 'LogicalExpression') {
let operator = ''
if (ast.operator.operator === 'AND') {
operator = 'AND'
} else if (ast.operator.operator === 'OR') {
operator = 'OR'
} else {
throw new Error('Unexpected operator.')
}
return `${serialize(ast.left)} ${operator} ${serialize(ast.right)}`
}
if (ast.type === 'UnaryOperator') {
return `NOT ${serialize(ast.operand)}`
}
if (ast.type === 'ParenthesizedExpression') {
return `(${serialize(ast.expression)})`
}
throw new Error('Missing AST type.')
}
return serialize(searchQuery)
}
const buildWhereClause = (
queryBuilder: SelectQueryBuilder<LibraryItem>,
args: SearchArgs
@ -357,8 +764,15 @@ export const searchLibraryItems = async (
.select(selectColumns)
.where('library_item.user_id = :userId', { userId })
// build the where clause
buildWhereClause(queryBuilder, args)
if (args.searchQuery) {
const parameters: ObjectLiteral[] = []
const whereClause = buildQuery(args.searchQuery, parameters)
whereClause &&
queryBuilder.andWhere(
whereClause,
parameters.reduce((a, b) => ({ ...a, ...b }), {})
)
}
const libraryItems = await queryBuilder
.addOrderBy(`library_item.${sortField}`, sortOrder, 'NULLS LAST')
@ -671,7 +1085,7 @@ export const countByCreatedAt = async (
export const updateLibraryItems = async (
action: BulkActionType,
searchArgs: SearchArgs,
searchQuery: LiqeQuery,
userId: string,
labels?: Label[],
args?: unknown
@ -731,7 +1145,7 @@ export const updateLibraryItems = async (
.where('library_item.user_id = :userId', { userId })
// build the where clause
buildWhereClause(queryBuilder, searchArgs)
// buildWhereClause(queryBuilder, searchQuery)
if (addLabels) {
if (!labels) {

View File

@ -3,20 +3,16 @@
/* eslint-disable @typescript-eslint/no-unsafe-call */
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
import { LiqeQuery, parse } from 'liqe'
import { DateTime } from 'luxon'
import {
ISearchParserDictionary,
parse,
SearchParserKeyWordOffset,
SearchParserTextOffset,
} from 'search-query-parser'
import { ISearchParserDictionary } from 'search-query-parser'
import { InputMaybe, PageType, SortParams } from '../generated/graphql'
export enum ReadFilter {
ALL,
READ,
READING,
UNREAD,
ALL = 'all',
READ = 'read',
READING = 'reading',
UNREAD = 'unread',
}
export enum InFilter {
@ -56,9 +52,9 @@ export type LabelFilter = {
}
export enum HasFilter {
HIGHLIGHTS,
LABELS,
SUBSCRIPTIONS,
HIGHLIGHTS = 'highlights',
LABELS = 'labels',
SUBSCRIPTIONS = 'subscriptions',
}
export interface DateFilter {
@ -394,168 +390,164 @@ const parseNoFilter = (str?: string): NoFilter | undefined => {
return undefined
}
export const parseSearchQuery = (query: string | undefined): SearchFilter => {
export const parseSearchQuery = (query: string): LiqeQuery => {
const searchQuery = query
? query
.replace(/\W\s":/g, '')
.replace('in:subscription', 'has:subscriptions') // compatibility with old search
.replace('in:library', 'no:subscription') // compatibility with old search
: undefined
const result: SearchFilter = {
query: searchQuery,
readFilter: ReadFilter.ALL,
inFilter: searchQuery ? InFilter.ALL : InFilter.INBOX,
labelFilters: [],
hasFilters: [],
dateFilters: [],
termFilters: [],
matchFilters: [],
ids: [],
noFilters: [],
rangeFilters: [],
}
.replace(/\W\s":/g, '')
.replace('in:subscription', 'has:subscriptions') // compatibility with old search
.replace('in:library', 'no:subscription') // compatibility with old search
// const result: SearchFilter = {
// query: searchQuery,
// readFilter: ReadFilter.ALL,
// inFilter: searchQuery ? InFilter.ALL : InFilter.INBOX,
// labelFilters: [],
// hasFilters: [],
// dateFilters: [],
// termFilters: [],
// matchFilters: [],
// ids: [],
// noFilters: [],
// rangeFilters: [],
// }
if (!searchQuery) {
return result
}
// if (!searchQuery) {
// return result
// }
const parsed = parse(searchQuery, {
keywords: [
'in',
'is',
'type',
'label',
'sort',
'has',
'saved',
'author',
'published',
'subscription',
'language',
'title',
'description',
'content',
'updated',
'includes',
'recommendedBy',
'no',
'mode',
'site',
'note',
'rss',
'wordsCount',
'readPosition',
'use',
],
tokenize: true,
})
if (parsed.offsets) {
const texts = parsed.offsets
.filter((offset) => 'text' in offset)
.map((offset) => offset as SearchParserTextOffset)
return parse(searchQuery)
if (texts.length > 0) {
result.query = texts
.map((offset: SearchParserTextOffset) => {
// TODO: the parser library doesn't let us accurately
// pull out quoted text, so we are just assuming
// anything with spaces is quoted.
if (offset.text.indexOf(' ') > -1) {
return `"${offset.text}"`
}
return offset.text
})
.join(' ')
} else {
result.query = undefined
}
// const parsed = parse(searchQuery, {
// keywords: [
// 'in',
// 'is',
// 'type',
// 'label',
// 'sort',
// 'has',
// 'saved',
// 'author',
// 'published',
// 'subscription',
// 'language',
// 'title',
// 'description',
// 'content',
// 'updated',
// 'includes',
// 'recommendedBy',
// 'no',
// 'mode',
// 'site',
// 'note',
// 'rss',
// 'wordsCount',
// 'readPosition',
// ],
// tokenize: true,
// })
// if (parsed.offsets) {
// const texts = parsed.offsets
// .filter((offset) => 'text' in offset)
// .map((offset) => offset as SearchParserTextOffset)
const keywords = parsed.offsets
.filter((offset) => 'keyword' in offset)
.map((offset) => offset as SearchParserKeyWordOffset)
// if (texts.length > 0) {
// result.query = texts
// .map((offset: SearchParserTextOffset) => {
// // TODO: the parser library doesn't let us accurately
// // pull out quoted text, so we are just assuming
// // anything with spaces is quoted.
// if (offset.text.indexOf(' ') > -1) {
// return `"${offset.text}"`
// }
// return offset.text
// })
// .join(' ')
// } else {
// result.query = undefined
// }
for (const keyword of keywords) {
switch (keyword.keyword) {
case 'in':
result.inFilter = parseInFilter(keyword.value, result.query)
break
case 'is':
result.readFilter = parseIsFilter(keyword.value)
break
case 'type':
result.typeFilter = parseTypeFilter(keyword.value)
break
case 'label': {
const labelFilter = parseLabelFilter(keyword.value, parsed.exclude)
labelFilter && result.labelFilters.push(labelFilter)
break
}
case 'sort':
result.sort = parseSort(keyword.value)
break
case 'has': {
const hasFilter = parseHasFilter(keyword.value)
hasFilter !== undefined && result.hasFilters.push(hasFilter)
break
}
case 'saved':
case 'read':
case 'updated':
case 'published': {
const dateFilter = parseDateFilter(keyword.keyword, keyword.value)
dateFilter && result.dateFilters.push(dateFilter)
break
}
// term filters
case 'subscription':
case 'rss':
case 'language': {
const fieldFilter = parseFieldFilter(keyword.keyword, keyword.value)
fieldFilter && result.termFilters.push(fieldFilter)
break
}
// match filters
case 'author':
case 'title':
case 'description':
case 'note':
case 'site':
case 'content': {
const fieldFilter = parseFieldFilter(keyword.keyword, keyword.value)
fieldFilter && result.matchFilters.push(fieldFilter)
break
}
case 'includes': {
const ids = parseIds(keyword.value)
ids && result.ids.push(...ids)
break
}
case 'recommendedBy': {
result.recommendedBy = parseStringValue(keyword.value)
break
}
case 'no': {
const noFilter = parseNoFilter(keyword.value)
noFilter && result.noFilters.push(noFilter)
break
}
case 'mode':
// mode is ignored and used only by the frontend
break
case 'readPosition':
case 'wordsCount': {
const rangeFilter = parseRangeFilter(keyword.keyword, keyword.value)
rangeFilter && result.rangeFilters.push(rangeFilter)
break
}
case 'use':
result.useFolders = keyword.value === 'folders'
break
}
}
}
// const keywords = parsed.offsets
// .filter((offset) => 'keyword' in offset)
// .map((offset) => offset as SearchParserKeyWordOffset)
return result
// for (const keyword of keywords) {
// switch (keyword.keyword) {
// case 'in':
// result.inFilter = parseInFilter(keyword.value, result.query)
// break
// case 'is':
// result.readFilter = parseIsFilter(keyword.value)
// break
// case 'type':
// result.typeFilter = parseTypeFilter(keyword.value)
// break
// case 'label': {
// const labelFilter = parseLabelFilter(keyword.value, parsed.exclude)
// labelFilter && result.labelFilters.push(labelFilter)
// break
// }
// case 'sort':
// result.sort = parseSort(keyword.value)
// break
// case 'has': {
// const hasFilter = parseHasFilter(keyword.value)
// hasFilter !== undefined && result.hasFilters.push(hasFilter)
// break
// }
// case 'saved':
// case 'read':
// case 'updated':
// case 'published': {
// const dateFilter = parseDateFilter(keyword.keyword, keyword.value)
// dateFilter && result.dateFilters.push(dateFilter)
// break
// }
// // term filters
// case 'subscription':
// case 'rss':
// case 'language': {
// const fieldFilter = parseFieldFilter(keyword.keyword, keyword.value)
// fieldFilter && result.termFilters.push(fieldFilter)
// break
// }
// // match filters
// case 'author':
// case 'title':
// case 'description':
// case 'note':
// case 'site':
// case 'content': {
// const fieldFilter = parseFieldFilter(keyword.keyword, keyword.value)
// fieldFilter && result.matchFilters.push(fieldFilter)
// break
// }
// case 'includes': {
// const ids = parseIds(keyword.value)
// ids && result.ids.push(...ids)
// break
// }
// case 'recommendedBy': {
// result.recommendedBy = parseStringValue(keyword.value)
// break
// }
// case 'no': {
// const noFilter = parseNoFilter(keyword.value)
// noFilter && result.noFilters.push(noFilter)
// break
// }
// case 'mode':
// // mode is ignored and used only by the frontend
// break
// case 'readPosition':
// case 'wordsCount': {
// const rangeFilter = parseRangeFilter(keyword.keyword, keyword.value)
// rangeFilter && result.rangeFilters.push(rangeFilter)
// break
// }
// }
// }
// }
// return result
}
export const sortParamsToSort = (

View File

@ -18804,6 +18804,14 @@ linkify-it@^4.0.1:
dependencies:
uc.micro "^1.0.1"
liqe@^3.8.0:
version "3.8.0"
resolved "https://registry.yarnpkg.com/liqe/-/liqe-3.8.0.tgz#e96c0733ecd3efb613d4404023433f15ae4f5849"
integrity sha512-cZ1rDx4XzxONBTskSPBp7/KwJ9qbUdF8EPnY4VjKXwHF1Krz9lgnlMTh1G7kd+KtPYvUte1mhuZeQSnk7KiSBg==
dependencies:
nearley "^2.20.1"
ts-error "^1.0.6"
listr-silent-renderer@^1.1.1:
version "1.1.1"
resolved "https://registry.yarnpkg.com/listr-silent-renderer/-/listr-silent-renderer-1.1.1.tgz#924b5a3757153770bf1a8e3fbf74b8bbf3f9242e"
@ -26919,6 +26927,11 @@ ts-dedent@^2.0.0, ts-dedent@^2.2.0:
resolved "https://registry.yarnpkg.com/ts-dedent/-/ts-dedent-2.2.0.tgz#39e4bd297cd036292ae2394eb3412be63f563bb5"
integrity sha512-q5W7tVM71e2xjHZTlgfTDoPF/SmqKG5hddq9SzR49CH2hayqRKJtQ4mtRlSxKaJlR/+9rEM+mnBHf7I2/BQcpQ==
ts-error@^1.0.6:
version "1.0.6"
resolved "https://registry.yarnpkg.com/ts-error/-/ts-error-1.0.6.tgz#277496f2a28de6c184cfce8dfd5cdd03a4e6b0fc"
integrity sha512-tLJxacIQUM82IR7JO1UUkKlYuUTmoY9HBJAmNWFzheSlDS5SPMcNIepejHJa4BpPQLAcbRhRf3GDJzyj6rbKvA==
ts-loader@^9.3.0:
version "9.3.0"
resolved "https://registry.yarnpkg.com/ts-loader/-/ts-loader-9.3.0.tgz#980f4dbfb60e517179e15e10ed98e454b132159f"