Feature/search highlights backend (#395)

* add highlight mappings

* return highlight in resolvers

* temporarily skip highlight tests

* add test for getting highlights

* update merge highlight

* separate elastic methods

* roll back merge highlight test

* add highlight to elastic script

* update delete highlight in elastic

* migrate highlight data from postgres to elastic

* rescue not found exception when page is not found in the migration script

* exclude highlights in searching pages results

* search pages with highlights only with has:highlight query

* add search endpoint to search pages or highlights

* reduce code smell in search api

* fix rebase error

* fix tests

* add test for search highlight

* add test for new search endpoint

* add labels to search results

* update schema

* update search query

* fix update/share highlights

* fix rebase error

* fix tests

* add highlight model in elastic

* add savedAt and publishedAt date range in search query

* add sort by updated and recently read

* fix tests

* close db connection when tests are done

* test github action

* revert github action test

* fix rebase error

* add docker-compose for api-test

* remove unused env

* remove highlights with no page attached to

* allow get_articles resolver to search for query so we can merge it without web changes
This commit is contained in:
Hongbo Wu
2022-04-12 12:31:08 +08:00
committed by GitHub
parent 2ebdaba780
commit ae0d1dd2ee
41 changed files with 2024 additions and 692 deletions

73
docker-compose-test.yml Normal file
View File

@ -0,0 +1,73 @@
version: '3'
services:
postgres-test:
image: "postgres:12.8"
container_name: "omnivore-postgres-test"
environment:
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=postgres
- POSTGRES_DB=omnivore_test
- PG_POOL_MAX=20
healthcheck:
test: "exit 0"
interval: 2s
timeout: 12s
retries: 3
expose:
- 5432
elastic-test:
image: docker.elastic.co/elasticsearch/elasticsearch:7.17.1
container_name: "omnivore-elastic-test"
healthcheck:
test: curl 0.0.0.0:9201/_cat/health >/dev/null || exit 1
interval: 2s
timeout: 2s
retries: 5
environment:
- discovery.type=single-node
- http.cors.allow-origin=*
- http.cors.enabled=true
- http.cors.allow-headers=X-Requested-With,X-Auth-Token,Content-Type,Content-Length,Authorization
- http.cors.allow-credentials=true
- http.port=9201
volumes:
- ./.docker/elastic-data:/usr/share/elasticsearch/data
ports:
- "9201:9201"
api-test:
build:
context: .
dockerfile: ./packages/api/Dockerfile-test
container_name: "omnivore-api-test"
environment:
- API_ENV=local
- PG_HOST=postgres-test
- PG_USER=postgres
- PG_PASSWORD=postgres
- PG_DB=omnivore_test
- PG_PORT=5432
- PG_POOL_MAX=20
- ELASTIC_URL=http://elastic-test:9201
- IMAGE_PROXY_URL=http://localhost:9999
- IMAGE_PROXY_SECRET=some-secret
- JWT_SECRET=some_secret
- SSO_JWT_SECRET=some_sso_secret
- CLIENT_URL=http://localhost:3000
- GATEWAY_URL=http://localhost:8080/api
- PUPPETEER_TASK_HANDLER_URL=http://localhost:9090/
- REMINDER_TASK_HANDLER_URL=/svc/reminders/trigger
- BOOKMARKLET_JWT_SECRET=some_bookmarklet_secret
- BOOKMARKLET_VERSION=1.0.0
- PREVIEW_IMAGE_WRAPPER_ID='selected_highlight_wrapper'
- GCP_PROJECT_ID=omnivore-local
- GAUTH_CLIENT_ID='notset'
- GAUTH_SECRET='notset'
- SEGMENT_WRITE_KEY='test'
- PUBSUB_VERIFICATION_TOKEN='123456'
depends_on:
postgres-test:
condition: service_healthy
elastic-test:
condition: service_healthy

View File

@ -23,4 +23,5 @@ GCS_UPLOAD_BUCKET=
GCS_UPLOAD_SA_KEY_FILE_PATH=
TWITTER_BEARER_TOKEN=
PREVIEW_IMAGE_WRAPPER_ID='selected_highlight_wrapper'
REMINDER_TASK_HANDLER_URL=
REMINDER_TASK_HANDLER_URL=
ELASTIC_URL=http://localhost:9200

View File

@ -28,4 +28,4 @@ SEGMENT_WRITE_KEY='test'
REMINDER_TASK_HANDLER_URL=http://localhost:4000/svc/reminders/trigger
PUBSUB_VERIFICATION_TOKEN='123456'
PUPPETEER_TASK_HANDLER_URL=http://localhost:9090/
ELASTIC_URL=http://localhost:9200

View File

@ -18,6 +18,7 @@ ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
RUN yarn install
ADD /packages/db ./packages/db
ADD /packages/readabilityjs ./packages/readabilityjs
ADD /packages/api ./packages/api

View File

@ -0,0 +1,188 @@
#!/usr/bin/python
import os
import json
import psycopg2
from psycopg2.extras import RealDictCursor
from elasticsearch import Elasticsearch, NotFoundError
PG_HOST = os.getenv('PG_HOST', 'localhost')
PG_PORT = os.getenv('PG_PORT', 5432)
PG_USER = os.getenv('PG_USER', 'app_user')
PG_PASSWORD = os.getenv('PG_PASSWORD', 'app_pass')
PG_DB = os.getenv('PG_DB', 'omnivore')
ES_URL = os.getenv('ES_URL', 'http://localhost:9200')
ES_USERNAME = os.getenv('ES_USERNAME', 'elastic')
ES_PASSWORD = os.getenv('ES_PASSWORD', 'password')
UPDATE_TIME = os.getenv('UPDATE_TIME', '2019-01-01 00:00:00')
INDEX_SETTINGS = os.getenv('INDEX_SETTINGS', 'index_settings.json')
DATETIME_FORMAT = 'YYYY-MM-DD"T"HH24:MI:SS.MS"Z"'
def update_mappings(client: Elasticsearch):
print('updating mappings')
try:
with open(INDEX_SETTINGS, 'r') as f:
settings = json.load(f)
client.indices.put_mapping(
index='pages_alias',
body=settings['mappings'])
print('mappings updated')
except Exception as err:
print('update mappings ERROR:', err)
exit(1)
def assertData(conn, client: Elasticsearch, pages):
# get all users from postgres
try:
success = 0
failure = 0
skip = 0
cursor = conn.cursor(cursor_factory=RealDictCursor)
for page in pages:
pageId = page['pageId']
cursor.execute(
f'''SELECT COUNT(*) FROM omnivore.highlight
WHERE elastic_page_id = \'{pageId}\' AND deleted = false''')
countInPostgres = cursor.fetchone()['count']
try:
countInElastic = len(client.get(
index='pages_alias',
id=pageId,
_source=['highlights'])['_source']['highlights'])
except NotFoundError as err:
print('Elasticsearch get ERROR:', err)
# if page is not found in elasticsearch, skip testing
skip += 1
continue
if countInPostgres == countInElastic:
success += 1
print(f'Page {pageId} OK')
else:
failure += 1
print(
f'Page {pageId} ERROR: postgres: {countInPostgres}, elastic: {countInElastic}')
cursor.close()
print(
f'Asserted data, success: {success}, failure: {failure}, skip: {skip}')
except Exception as err:
print('Assert data ERROR:', err)
exit(1)
def ingest_highlights(conn, pages):
try:
import_count = 0
cursor = conn.cursor(cursor_factory=RealDictCursor)
for page in pages:
pageId = page['pageId']
query = '''
SELECT
id,
quote,
prefix,
to_char(created_at, '{DATETIME_FORMAT}') as "createdAt",
to_char(COALESCE(updated_at, current_timestamp), '{DATETIME_FORMAT}') as "updatedAt",
suffix,
patch,
annotation,
short_id as "shortId",
user_id as "userId",
to_char(shared_at, '{DATETIME_FORMAT}') as "sharedAt"
FROM omnivore.highlight
WHERE
elastic_page_id = \'{pageId}\'
AND deleted = false
AND created_at > '{UPDATE_TIME}'
'''.format(pageId=pageId, DATETIME_FORMAT=DATETIME_FORMAT, UPDATE_TIME=UPDATE_TIME)
cursor.execute(query)
result = cursor.fetchall()
import_count += import_highlights_to_es(client, result, pageId)
print(f'Imported total {import_count} highlights to es')
cursor.close()
except Exception as err:
print('Export data to json ERROR:', err)
def import_highlights_to_es(client, highlights, pageId) -> int:
# import highlights to elasticsearch
print(f'Writing {len(highlights)} highlights to page {pageId}')
if len(highlights) == 0:
print('No highlights to import')
return 0
try:
resp = client.update(
index='pages_alias',
id=pageId,
body={'doc': {'highlights': highlights}})
count = 0
if resp['result'] == 'updated':
count = len(highlights)
print(f'Added {count} highlights to page {pageId}')
return count
except Exception as err:
print('Elasticsearch update ERROR:', err)
return 0
def get_pages_with_highlights(conn):
try:
query = f'''
SELECT DISTINCT
elastic_page_id as "pageId"
FROM omnivore.highlight
WHERE
elastic_page_id IS NOT NULL
AND deleted = false
AND created_at > '{UPDATE_TIME}'
'''
cursor = conn.cursor(cursor_factory=RealDictCursor)
cursor.execute(query)
result = cursor.fetchall()
cursor.close()
print('Found pages with highlights:', len(result))
return result
except Exception as err:
print('Get pages with highlights ERROR:', err)
print('Starting migration')
# test elastic client
client = Elasticsearch(ES_URL, http_auth=(
ES_USERNAME, ES_PASSWORD), retry_on_timeout=True)
try:
print('Elasticsearch client connected', client.info())
except Exception as err:
print('Elasticsearch client ERROR:', err)
exit(1)
# test postgres client
conn = psycopg2.connect(
f'host={PG_HOST} port={PG_PORT} dbname={PG_DB} user={PG_USER} \
password={PG_PASSWORD}')
print('Postgres connection:', conn.info)
update_mappings(client)
pages = get_pages_with_highlights(conn)
ingest_highlights(conn, pages)
assertData(conn, client, pages)
client.close()
conn.close()
print('Migration complete')

View File

@ -89,7 +89,7 @@ def assertData(conn, client):
f'SELECT COUNT(*) FROM omnivore.links WHERE user_id = \'{userId}\'''')
countInPostgres = cursor.fetchone()['count']
countInElastic = client.count(
index='pages', body={'query': {'term': {'userId': userId}}})['count']
index='pages_alias', body={'query': {'term': {'userId': userId}}})['count']
if countInPostgres == countInElastic:
success += 1
@ -197,7 +197,7 @@ def import_data_to_es(client, docs) -> int:
doc['publishedAt'] = validated_date(doc['publishedAt'])
# convert the string to a dict object
dict_doc = {
'_index': 'pages',
'_index': 'pages_alias',
'_id': doc['id'],
'_source': doc
}

View File

@ -56,6 +56,30 @@
}
}
},
"highlights": {
"type": "nested",
"properties": {
"id": {
"type": "keyword"
},
"userId": {
"type": "keyword"
},
"quote": {
"type": "text",
"analyzer": "strip_html_analyzer"
},
"annotation": {
"type": "text"
},
"createdAt": {
"type": "date"
},
"updatedAt": {
"type": "date"
}
}
},
"readingProgressPercent": {
"type": "float"
},

View File

@ -8,7 +8,7 @@
"start": "node dist/server.js",
"lint": "eslint src --ext ts,js,tsx,jsx",
"lint:fix": "eslint src --fix --ext ts,js,tsx,jsx",
"test": "nyc mocha -r ts-node/register --config mocha-config.json --exit --timeout 10000"
"test": "nyc mocha -r ts-node/register --config mocha-config.json --timeout 10000"
},
"dependencies": {
"@elastic/elasticsearch": "~7.12.0",

View File

@ -3,7 +3,7 @@
import Knex from 'knex'
import { LinkShareInfo } from '../../generated/graphql'
import { DataModels } from '../../resolvers/types'
import { getPageByParam } from '../../elastic'
import { getPageByParam } from '../../elastic/pages'
// once we have links setup properly in the API we will remove this method
// and have a getShareInfoForLink method

View File

@ -0,0 +1,281 @@
import {
Highlight,
Page,
PageContext,
SearchItem,
SearchResponse,
} from './types'
import { ResponseError } from '@elastic/elasticsearch/lib/errors'
import { client, INDEX_ALIAS } from './index'
import { SortBy, SortOrder, SortParams } from '../utils/search'
export const addHighlightToPage = async (
id: string,
highlight: Highlight,
ctx: PageContext
): Promise<boolean> => {
try {
const { body } = await client.update({
index: INDEX_ALIAS,
id,
body: {
script: {
source: `if (ctx._source.highlights == null) {
ctx._source.highlights = [params.highlight]
} else {
ctx._source.highlights.add(params.highlight)
}`,
lang: 'painless',
params: {
highlight: highlight,
},
},
},
refresh: ctx.refresh,
retry_on_conflict: 3,
})
return body.result === 'updated'
} catch (e) {
if (
e instanceof ResponseError &&
e.message === 'document_missing_exception'
) {
console.log('page has been deleted', id)
return false
}
console.error('failed to add highlight to a page in elastic', e)
return false
}
}
export const getHighlightById = async (
id: string
): Promise<Highlight | undefined> => {
try {
const { body } = await client.search({
index: INDEX_ALIAS,
body: {
query: {
nested: {
path: 'highlights',
query: {
match: {
'highlights.id': id,
},
},
inner_hits: {},
},
},
_source: false,
},
})
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
if (body.hits.total.value === 0) {
return undefined
}
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access,@typescript-eslint/no-unsafe-return
return body.hits.hits[0].inner_hits.highlights.hits.hits[0]._source
} catch (e) {
console.error('failed to get highlight from a page in elastic', e)
return undefined
}
}
export const deleteHighlight = async (
highlightId: string,
ctx: PageContext
): Promise<boolean> => {
try {
const { body } = await client.updateByQuery({
index: INDEX_ALIAS,
body: {
script: {
source:
'ctx._source.highlights.removeIf(h -> h.id == params.highlightId)',
lang: 'painless',
params: {
highlightId: highlightId,
},
},
query: {
bool: {
filter: [
{
term: {
userId: ctx.uid,
},
},
{
nested: {
path: 'highlights',
query: {
term: {
'highlights.id': highlightId,
},
},
},
},
],
},
},
},
refresh: ctx.refresh,
})
return !!body.updated
} catch (e) {
console.error('failed to delete a highlight in elastic', e)
return false
}
}
export const searchHighlights = async (
args: {
from?: number
size?: number
sort?: SortParams
query?: string
},
userId: string
): Promise<[SearchItem[], number] | undefined> => {
try {
const { from = 0, size = 10, sort, query } = args
const sortOrder = sort?.order || SortOrder.DESCENDING
// default sort by updatedAt
const sortField =
sort?.by === SortBy.SCORE ? SortBy.SCORE : 'highlights.updatedAt'
const searchBody = {
query: {
nested: {
path: 'highlights',
query: {
bool: {
filter: [
{
term: {
'highlights.userId': userId,
},
},
],
should: [
{
multi_match: {
query: query || '',
fields: ['highlights.quote', 'highlights.annotation'],
operator: 'and',
type: 'cross_fields',
},
},
],
minimum_should_match: query ? 1 : 0,
},
},
inner_hits: {},
},
},
sort: [
{
[sortField]: {
order: sortOrder,
nested: {
path: 'highlights',
},
},
},
],
from,
size,
_source: ['title', 'slug', 'url', 'createdAt'],
}
console.log('searching highlights in elastic', JSON.stringify(searchBody))
const response = await client.search<SearchResponse<Page>>({
index: INDEX_ALIAS,
body: searchBody,
})
if (response.body.hits.total.value === 0) {
return [[], 0]
}
const results: SearchItem[] = []
response.body.hits.hits.forEach((hit) => {
// eslint-disable-next-line @typescript-eslint/no-unsafe-call,@typescript-eslint/no-unsafe-member-access
hit.inner_hits.highlights.hits.hits.forEach(
(innerHit: { _source: Highlight }) => {
results.push({
...hit._source,
...innerHit._source,
pageId: hit._id,
})
}
)
})
return [results, response.body.hits.total.value]
} catch (e) {
console.error('failed to search highlights in elastic', e)
return undefined
}
}
export const updateHighlight = async (
highlight: Highlight,
ctx: PageContext
): Promise<boolean> => {
try {
const { body } = await client.updateByQuery({
index: INDEX_ALIAS,
body: {
script: {
source: `ctx._source.highlights.removeIf(h -> h.id == params.highlight.id);
ctx._source.highlights.add(params.highlight)`,
lang: 'painless',
params: {
highlight: highlight,
},
},
query: {
bool: {
filter: [
{
term: {
userId: ctx.uid,
},
},
{
nested: {
path: 'highlights',
query: {
term: {
'highlights.id': highlight.id,
},
},
},
},
],
},
},
},
refresh: ctx.refresh,
})
return !!body.updated
} catch (e) {
if (
e instanceof ResponseError &&
e.message === 'document_missing_exception'
) {
console.log('page has been deleted')
return false
}
console.error('failed to update highlight in elastic', e)
return false
}
}

View File

@ -1,35 +1,11 @@
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
/* eslint-disable @typescript-eslint/no-unsafe-call */
import { env } from '../env'
import { Client } from '@elastic/elasticsearch'
import {
Label,
PageType,
SortBy,
SortOrder,
SortParams,
} from '../generated/graphql'
import {
InFilter,
LabelFilter,
LabelFilterType,
ReadFilter,
} from '../utils/search'
import {
Page,
PageContext,
ParamSet,
SearchBody,
SearchResponse,
} from './types'
import { readFileSync } from 'fs'
import { join } from 'path'
import { ResponseError } from '@elastic/elasticsearch/lib/errors'
const INDEX_NAME = 'pages'
const INDEX_ALIAS = 'pages_alias'
const client = new Client({
export const INDEX_NAME = 'pages'
export const INDEX_ALIAS = 'pages_alias'
export const client = new Client({
node: env.elastic.url,
maxRetries: 3,
requestTimeout: 50000,
@ -52,484 +28,6 @@ const ingest = async (): Promise<void> => {
})
}
const appendQuery = (body: SearchBody, query: string): void => {
body.query.bool.should.push({
multi_match: {
query,
fields: ['title', 'content', 'author', 'description', 'siteName'],
operator: 'and',
type: 'cross_fields',
},
})
body.query.bool.minimum_should_match = 1
}
const appendTypeFilter = (body: SearchBody, filter: PageType): void => {
body.query.bool.filter.push({
term: {
pageType: filter,
},
})
}
const appendReadFilter = (body: SearchBody, filter: ReadFilter): void => {
switch (filter) {
case ReadFilter.UNREAD:
body.query.bool.filter.push({
range: {
readingProgress: {
gte: 98,
},
},
})
break
case ReadFilter.READ:
body.query.bool.filter.push({
range: {
readingProgress: {
lt: 98,
},
},
})
}
}
const appendInFilter = (body: SearchBody, filter: InFilter): void => {
switch (filter) {
case InFilter.ARCHIVE:
body.query.bool.filter.push({
exists: {
field: 'archivedAt',
},
})
break
case InFilter.INBOX:
body.query.bool.must_not.push({
exists: {
field: 'archivedAt',
},
})
}
}
const appendNotNullField = (body: SearchBody, field: string): void => {
body.query.bool.filter.push({
exists: {
field,
},
})
}
const appendExcludeLabelFilter = (
body: SearchBody,
filters: LabelFilter[]
): void => {
body.query.bool.must_not.push({
nested: {
path: 'labels',
query: filters.map((filter) => {
return {
terms: {
'labels.name': filter.labels,
},
}
}),
},
})
}
const appendIncludeLabelFilter = (
body: SearchBody,
filters: LabelFilter[]
): void => {
body.query.bool.filter.push({
nested: {
path: 'labels',
query: {
bool: {
filter: filters.map((filter) => {
return {
terms: {
'labels.name': filter.labels,
},
}
}),
},
},
},
})
}
export const createPage = async (
page: Page,
ctx: PageContext
): Promise<string | undefined> => {
try {
const { body } = await client.index({
id: page.id || undefined,
index: INDEX_ALIAS,
body: {
...page,
updatedAt: new Date(),
savedAt: new Date(),
},
refresh: ctx.refresh,
})
await ctx.pubsub.pageCreated(page)
return body._id as string
} catch (e) {
console.error('failed to create a page in elastic', e)
return undefined
}
}
export const updatePage = async (
id: string,
page: Partial<Page>,
ctx: PageContext
): Promise<boolean> => {
try {
const { body } = await client.update({
index: INDEX_ALIAS,
id,
body: {
doc: {
...page,
updatedAt: new Date(),
},
},
refresh: ctx.refresh,
retry_on_conflict: 3,
})
if (body.result !== 'updated') return false
await ctx.pubsub.pageUpdated({ ...page, id }, ctx.uid)
return true
} catch (e) {
if (
e instanceof ResponseError &&
e.message === 'document_missing_exception'
) {
console.info('page has been deleted', id)
return false
}
console.error('failed to update a page in elastic', e)
return false
}
}
export const addLabelInPage = async (
id: string,
label: Label,
ctx: PageContext
): Promise<boolean> => {
try {
const { body } = await client.update({
index: INDEX_ALIAS,
id,
body: {
script: {
source: `if (ctx._source.labels == null) {
ctx._source.labels = [params.label]
} else if (!ctx._source.labels.any(label -> label.name == params.label.name)) {
ctx._source.labels.add(params.label)
} else { ctx.op = 'none' }`,
lang: 'painless',
params: {
label: label,
},
},
},
refresh: ctx.refresh,
retry_on_conflict: 3,
})
return body.result === 'updated'
} catch (e) {
console.error('failed to update a page in elastic', e)
return false
}
}
export const deletePage = async (
id: string,
ctx: PageContext
): Promise<boolean> => {
try {
const { body } = await client.delete({
index: INDEX_ALIAS,
id,
refresh: ctx.refresh,
})
if (body.deleted === 0) return false
await ctx.pubsub.pageDeleted(id, ctx.uid)
return true
} catch (e) {
console.error('failed to delete a page in elastic', e)
return false
}
}
export const deleteLabelInPages = async (
userId: string,
label: string,
ctx: PageContext
): Promise<void> => {
try {
await client.updateByQuery({
index: INDEX_ALIAS,
body: {
script: {
source:
'ctx._source.labels.removeIf(label -> label.name == params.label)',
lang: 'painless',
params: {
label: label,
},
},
query: {
bool: {
filter: [
{
term: {
userId,
},
},
{
nested: {
path: 'labels',
query: {
term: {
'labels.name': label,
},
},
},
},
],
},
},
},
refresh: ctx.refresh,
})
} catch (e) {
console.error('failed to delete a page in elastic', e)
}
}
export const getPageByParam = async <K extends keyof ParamSet>(
param: Record<K, Page[K]>
): Promise<Page | undefined> => {
try {
const params = {
query: {
bool: {
filter: Object.keys(param).map((key) => {
return {
term: {
[key]: param[key as K],
},
}
}),
},
},
size: 1,
_source: {
excludes: ['originalHtml'],
},
}
const { body } = await client.search({
index: INDEX_ALIAS,
body: params,
})
if (body.hits.total.value === 0) {
return undefined
}
return {
...body.hits.hits[0]._source,
id: body.hits.hits[0]._id,
} as Page
} catch (e) {
console.error('failed to search pages in elastic', e)
return undefined
}
}
export const getPageById = async (id: string): Promise<Page | undefined> => {
try {
const { body } = await client.get({
index: INDEX_ALIAS,
id,
})
return {
...body._source,
id: body._id,
} as Page
} catch (e) {
console.error('failed to search pages in elastic', e)
return undefined
}
}
export const searchPages = async (
args: {
from?: number
size?: number
sort?: SortParams
query?: string
inFilter: InFilter
readFilter: ReadFilter
typeFilter?: PageType
labelFilters: LabelFilter[]
},
userId: string,
notNullField: string | null = null
): Promise<[Page[], number] | undefined> => {
try {
const {
from = 0,
size = 10,
sort,
query,
readFilter,
typeFilter,
labelFilters,
inFilter,
} = args
const sortOrder = sort?.order === SortOrder.Ascending ? 'asc' : 'desc'
// default sort by saved_at
const sortField = sort?.by === SortBy.Score ? '_score' : 'savedAt'
const includeLabels = labelFilters.filter(
(filter) => filter.type === LabelFilterType.INCLUDE
)
const excludeLabels = labelFilters.filter(
(filter) => filter.type === LabelFilterType.EXCLUDE
)
const body: SearchBody = {
query: {
bool: {
filter: [
{
term: {
userId,
},
},
],
should: [],
must_not: [],
},
},
sort: [
{
[sortField]: {
order: sortOrder,
},
},
],
from,
size,
_source: {
excludes: ['originalHtml', 'content'],
},
}
// append filters
if (query) {
appendQuery(body, query)
}
if (typeFilter) {
appendTypeFilter(body, typeFilter)
}
if (inFilter !== InFilter.ALL) {
appendInFilter(body, inFilter)
}
if (readFilter !== ReadFilter.ALL) {
appendReadFilter(body, readFilter)
}
if (notNullField) {
appendNotNullField(body, notNullField)
}
if (includeLabels.length > 0) {
appendIncludeLabelFilter(body, includeLabels)
}
if (excludeLabels.length > 0) {
appendExcludeLabelFilter(body, excludeLabels)
}
console.log('searching pages in elastic', JSON.stringify(body))
const response = await client.search<SearchResponse<Page>, SearchBody>({
index: INDEX_ALIAS,
body,
})
if (response.body.hits.total.value === 0) {
return [[], 0]
}
return [
response.body.hits.hits.map((hit: { _source: Page; _id: string }) => ({
...hit._source,
content: '',
id: hit._id,
})),
response.body.hits.total.value,
]
} catch (e) {
console.error('failed to search pages in elastic', e)
return undefined
}
}
export const countByCreatedAt = async (
userId: string,
from?: number,
to?: number
): Promise<number> => {
try {
const { body } = await client.count({
index: INDEX_ALIAS,
body: {
query: {
bool: {
filter: [
{
term: {
userId,
},
},
{
range: {
createdAt: {
gte: from,
lte: to,
},
},
},
],
},
},
},
})
return body.count as number
} catch (e) {
console.error('failed to count pages in elastic', e)
return 0
}
}
export const initElasticsearch = async (): Promise<void> => {
try {
const response = await client.info()

View File

@ -0,0 +1,81 @@
import { Label, PageContext } from './types'
import { client, INDEX_ALIAS } from './index'
export const addLabelInPage = async (
id: string,
label: Label,
ctx: PageContext
): Promise<boolean> => {
try {
const { body } = await client.update({
index: INDEX_ALIAS,
id,
body: {
script: {
source: `if (ctx._source.labels == null) {
ctx._source.labels = [params.label]
} else if (!ctx._source.labels.any(label -> label.name == params.label.name)) {
ctx._source.labels.add(params.label)
} else { ctx.op = 'none' }`,
lang: 'painless',
params: {
label: label,
},
},
},
refresh: ctx.refresh,
retry_on_conflict: 3,
})
return body.result === 'updated'
} catch (e) {
console.error('failed to update a page in elastic', e)
return false
}
}
export const deleteLabelInPages = async (
userId: string,
label: string,
ctx: PageContext
): Promise<void> => {
try {
await client.updateByQuery({
index: INDEX_ALIAS,
body: {
script: {
source:
'ctx._source.labels.removeIf(label -> label.name == params.label)',
lang: 'painless',
params: {
label: label,
},
},
query: {
bool: {
filter: [
{
term: {
userId,
},
},
{
nested: {
path: 'labels',
query: {
term: {
'labels.name': label,
},
},
},
},
],
},
},
},
refresh: ctx.refresh,
})
} catch (e) {
console.error('failed to delete a page in elastic', e)
}
}

View File

@ -0,0 +1,470 @@
import {
Page,
PageContext,
PageType,
ParamSet,
SearchBody,
SearchResponse,
} from './types'
import {
DateRangeFilter,
HasFilter,
InFilter,
LabelFilter,
LabelFilterType,
ReadFilter,
SortBy,
SortOrder,
SortParams,
} from '../utils/search'
import { client, INDEX_ALIAS } from './index'
const appendQuery = (body: SearchBody, query: string): void => {
body.query.bool.should.push({
multi_match: {
query,
fields: ['title', 'content', 'author', 'description', 'siteName'],
operator: 'and',
type: 'cross_fields',
},
})
body.query.bool.minimum_should_match = 1
}
const appendTypeFilter = (body: SearchBody, filter: PageType): void => {
body.query.bool.filter.push({
term: {
pageType: filter,
},
})
}
const appendReadFilter = (body: SearchBody, filter: ReadFilter): void => {
switch (filter) {
case ReadFilter.UNREAD:
body.query.bool.filter.push({
range: {
readingProgress: {
gte: 98,
},
},
})
break
case ReadFilter.READ:
body.query.bool.filter.push({
range: {
readingProgress: {
lt: 98,
},
},
})
}
}
const appendInFilter = (body: SearchBody, filter: InFilter): void => {
switch (filter) {
case InFilter.ARCHIVE:
body.query.bool.filter.push({
exists: {
field: 'archivedAt',
},
})
break
case InFilter.INBOX:
body.query.bool.must_not.push({
exists: {
field: 'archivedAt',
},
})
}
}
const appendHasFilters = (body: SearchBody, filters: HasFilter[]): void => {
filters.forEach((filter) => {
switch (filter) {
case HasFilter.HIGHLIGHTS:
body.query.bool.filter.push({
nested: {
path: 'highlights',
query: {
exists: {
field: 'highlights',
},
},
},
})
break
case HasFilter.SHARED_AT:
body.query.bool.filter.push({
exists: {
field: 'sharedAt',
},
})
break
}
})
}
const appendExcludeLabelFilter = (
body: SearchBody,
filters: LabelFilter[]
): void => {
body.query.bool.must_not.push({
nested: {
path: 'labels',
query: filters.map((filter) => {
return {
terms: {
'labels.name': filter.labels,
},
}
}),
},
})
}
const appendIncludeLabelFilter = (
body: SearchBody,
filters: LabelFilter[]
): void => {
body.query.bool.filter.push({
nested: {
path: 'labels',
query: {
bool: {
filter: filters.map((filter) => {
return {
terms: {
'labels.name': filter.labels,
},
}
}),
},
},
},
})
}
const appendSavedDateFilter = (
body: SearchBody,
filter: DateRangeFilter
): void => {
body.query.bool.filter.push({
range: {
savedAt: {
gt: filter.startDate,
lt: filter.endDate,
},
},
})
}
const appendPublishedDateFilter = (
body: SearchBody,
filter: DateRangeFilter
): void => {
body.query.bool.filter.push({
range: {
publishedAt: {
gt: filter.startDate,
lt: filter.endDate,
},
},
})
}
export const createPage = async (
page: Page,
ctx: PageContext
): Promise<string | undefined> => {
try {
const { body } = await client.index({
id: page.id || undefined,
index: INDEX_ALIAS,
body: {
...page,
updatedAt: new Date(),
savedAt: new Date(),
},
refresh: ctx.refresh,
})
await ctx.pubsub.pageCreated(page)
return body._id as string
} catch (e) {
console.error('failed to create a page in elastic', e)
return undefined
}
}
export const updatePage = async (
id: string,
page: Partial<Page>,
ctx: PageContext
): Promise<boolean> => {
try {
const { body } = await client.update({
index: INDEX_ALIAS,
id,
body: {
doc: {
...page,
updatedAt: new Date(),
},
},
refresh: ctx.refresh,
retry_on_conflict: 3,
})
if (body.result !== 'updated') return false
await ctx.pubsub.pageUpdated({ ...page, id }, ctx.uid)
return true
} catch (e) {
console.error('failed to update a page in elastic', e)
return false
}
}
export const deletePage = async (
id: string,
ctx: PageContext
): Promise<boolean> => {
try {
const { body } = await client.delete({
index: INDEX_ALIAS,
id,
refresh: ctx.refresh,
})
if (body.deleted === 0) return false
await ctx.pubsub.pageDeleted(id, ctx.uid)
return true
} catch (e) {
console.error('failed to delete a page in elastic', e)
return false
}
}
export const getPageByParam = async <K extends keyof ParamSet>(
param: Record<K, Page[K]>
): Promise<Page | undefined> => {
try {
const params = {
query: {
bool: {
filter: Object.keys(param).map((key) => {
return {
term: {
[key]: param[key as K],
},
}
}),
},
},
size: 1,
_source: {
excludes: ['originalHtml'],
},
}
const { body } = await client.search<SearchResponse<Page>>({
index: INDEX_ALIAS,
body: params,
})
if (body.hits.total.value === 0) {
return undefined
}
return {
...body.hits.hits[0]._source,
id: body.hits.hits[0]._id,
} as Page
} catch (e) {
console.error('failed to search pages in elastic', e)
return undefined
}
}
export const getPageById = async (id: string): Promise<Page | undefined> => {
try {
const { body } = await client.get({
index: INDEX_ALIAS,
id,
})
return {
...body._source,
id: body._id as string,
} as Page
} catch (e) {
console.error('failed to search pages in elastic', e)
return undefined
}
}
export const searchPages = async (
args: {
from?: number
size?: number
sort?: SortParams
query?: string
inFilter?: InFilter
readFilter?: ReadFilter
typeFilter?: PageType
labelFilters?: LabelFilter[]
hasFilters?: HasFilter[]
savedDateFilter?: DateRangeFilter
publishedDateFilter?: DateRangeFilter
},
userId: string
): Promise<[Page[], number] | undefined> => {
try {
const {
from = 0,
size = 10,
sort,
query,
readFilter = ReadFilter.ALL,
typeFilter,
labelFilters = [],
inFilter = InFilter.ALL,
hasFilters = [],
savedDateFilter,
publishedDateFilter,
} = args
// default order is descending
const sortOrder = sort?.order || SortOrder.DESCENDING
// default sort by saved_at
const sortField = sort?.by || SortBy.SAVED
const includeLabels = labelFilters.filter(
(filter) => filter.type === LabelFilterType.INCLUDE
)
const excludeLabels = labelFilters.filter(
(filter) => filter.type === LabelFilterType.EXCLUDE
)
const body: SearchBody = {
query: {
bool: {
filter: [
{
term: {
userId,
},
},
],
should: [],
must_not: [],
},
},
sort: [
{
[sortField]: {
order: sortOrder,
},
},
],
from,
size,
_source: {
excludes: ['originalHtml', 'content', 'highlights'],
},
}
// append filters
if (query) {
appendQuery(body, query)
}
if (typeFilter) {
appendTypeFilter(body, typeFilter)
}
if (inFilter !== InFilter.ALL) {
appendInFilter(body, inFilter)
}
if (readFilter !== ReadFilter.ALL) {
appendReadFilter(body, readFilter)
}
if (hasFilters.length > 0) {
appendHasFilters(body, hasFilters)
}
if (includeLabels.length > 0) {
appendIncludeLabelFilter(body, includeLabels)
}
if (excludeLabels.length > 0) {
appendExcludeLabelFilter(body, excludeLabels)
}
if (savedDateFilter) {
appendSavedDateFilter(body, savedDateFilter)
}
if (publishedDateFilter) {
appendPublishedDateFilter(body, publishedDateFilter)
}
console.log('searching pages in elastic', JSON.stringify(body))
const response = await client.search<SearchResponse<Page>, SearchBody>({
index: INDEX_ALIAS,
body,
})
if (response.body.hits.total.value === 0) {
return [[], 0]
}
return [
response.body.hits.hits.map((hit: { _source: Page; _id: string }) => ({
...hit._source,
content: '',
id: hit._id,
})),
response.body.hits.total.value,
]
} catch (e) {
console.error('failed to search pages in elastic', e)
return undefined
}
}
export const countByCreatedAt = async (
userId: string,
from?: number,
to?: number
): Promise<number> => {
try {
const { body } = await client.count({
index: INDEX_ALIAS,
body: {
query: {
bool: {
filter: [
{
term: {
userId,
},
},
{
range: {
createdAt: {
gte: from,
lte: to,
},
},
},
],
},
},
},
})
return body.count as number
} catch (e) {
console.error('failed to count pages in elastic', e)
return 0
}
}

View File

@ -1,5 +1,4 @@
// Define the type of the body for the Search request
import { Label, PageType } from '../generated/graphql'
import { PickTuple } from '../util'
import { PubsubClient } from '../datalayer/pubsub'
@ -19,6 +18,16 @@ export interface SearchBody {
readingProgress: { gte: number } | { lt: number }
}
}
| {
range: {
savedAt: { gt: Date | undefined } | { lt: Date | undefined }
}
}
| {
range: {
publishedAt: { gt: Date | undefined } | { lt: Date | undefined }
}
}
| {
nested: {
path: 'labels'
@ -33,6 +42,16 @@ export interface SearchBody {
}
}
}
| {
nested: {
path: 'highlights'
query: {
exists: {
field: 'highlights'
}
}
}
}
)[]
should: {
multi_match: {
@ -109,7 +128,7 @@ export interface SearchResponse<T> {
_explanation?: Explanation
fields?: never
highlight?: never
inner_hits?: never
inner_hits?: any
matched_queries?: string[]
sort?: string[]
}>
@ -117,6 +136,38 @@ export interface SearchResponse<T> {
aggregations?: never
}
export enum PageType {
Article = 'ARTICLE',
Book = 'BOOK',
File = 'FILE',
Profile = 'PROFILE',
Unknown = 'UNKNOWN',
Website = 'WEBSITE',
Highlights = 'HIGHLIGHTS',
}
export interface Label {
id: string
name: string
color: string
description?: string
createdAt?: Date
}
export interface Highlight {
id: string
shortId: string
patch: string
quote: string
userId: string
createdAt: Date
prefix?: string | null
suffix?: string | null
annotation?: string | null
sharedAt?: Date | null
updatedAt?: Date | null
}
export interface Page {
id: string
userId: string
@ -143,6 +194,29 @@ export interface Page {
siteName?: string
_id?: string
siteIcon?: string
highlights?: Highlight[]
}
export interface SearchItem {
annotation?: string | null
author?: string | null
createdAt: Date
description?: string | null
id: string
image?: string | null
pageId?: string
pageType: PageType
publishedAt?: Date
quote?: string | null
shortId?: string | null
slug: string
title: string
uploadFileId?: string | null
url: string
archivedAt?: Date | null
readingProgressPercent?: number
readingProgressAnchorIndex?: number
userId: string
}
const keys = ['_id', 'url', 'slug', 'userId', 'uploadFileId'] as const

View File

@ -615,7 +615,6 @@ export type GoogleSignupSuccess = {
export type Highlight = {
__typename?: 'Highlight';
annotation?: Maybe<Scalars['String']>;
article: Article;
createdAt: Scalars['Date'];
createdByMe: Scalars['Boolean'];
id: Scalars['ID'];
@ -1087,6 +1086,7 @@ export enum PageType {
Article = 'ARTICLE',
Book = 'BOOK',
File = 'FILE',
Highlights = 'HIGHLIGHTS',
Profile = 'PROFILE',
Unknown = 'UNKNOWN',
Website = 'WEBSITE'
@ -1120,6 +1120,7 @@ export type Query = {
me?: Maybe<User>;
newsletterEmails: NewsletterEmailsResult;
reminder: ReminderResult;
search: SearchResult;
sharedArticle: SharedArticleResult;
user: UserResult;
users: UsersResult;
@ -1170,6 +1171,13 @@ export type QueryReminderArgs = {
};
export type QuerySearchArgs = {
after?: InputMaybe<Scalars['String']>;
first?: InputMaybe<Scalars['Int']>;
query?: InputMaybe<Scalars['String']>;
};
export type QuerySharedArticleArgs = {
selectedHighlightId?: InputMaybe<Scalars['String']>;
slug: Scalars['String'];
@ -1323,6 +1331,55 @@ export type SaveUrlInput = {
url: Scalars['String'];
};
export type SearchError = {
__typename?: 'SearchError';
errorCodes: Array<SearchErrorCode>;
};
export enum SearchErrorCode {
Unauthorized = 'UNAUTHORIZED'
}
export type SearchItem = {
__typename?: 'SearchItem';
annotation?: Maybe<Scalars['String']>;
author?: Maybe<Scalars['String']>;
contentReader: ContentReader;
createdAt: Scalars['Date'];
description?: Maybe<Scalars['String']>;
id: Scalars['ID'];
image?: Maybe<Scalars['String']>;
isArchived: Scalars['Boolean'];
labels?: Maybe<Array<Label>>;
originalArticleUrl?: Maybe<Scalars['String']>;
ownedByViewer?: Maybe<Scalars['Boolean']>;
pageId?: Maybe<Scalars['ID']>;
pageType: PageType;
publishedAt?: Maybe<Scalars['Date']>;
quote?: Maybe<Scalars['String']>;
readingProgressAnchorIndex?: Maybe<Scalars['Int']>;
readingProgressPercent?: Maybe<Scalars['Float']>;
shortId?: Maybe<Scalars['String']>;
slug: Scalars['String'];
title: Scalars['String'];
uploadFileId?: Maybe<Scalars['ID']>;
url: Scalars['String'];
};
export type SearchItemEdge = {
__typename?: 'SearchItemEdge';
cursor: Scalars['String'];
node: SearchItem;
};
export type SearchResult = SearchError | SearchSuccess;
export type SearchSuccess = {
__typename?: 'SearchSuccess';
edges: Array<SearchItemEdge>;
pageInfo: PageInfo;
};
export type SetBookmarkArticleError = {
__typename?: 'SetBookmarkArticleError';
errorCodes: Array<SetBookmarkArticleErrorCode>;
@ -2094,6 +2151,12 @@ export type ResolversTypes = {
SaveResult: ResolversTypes['SaveError'] | ResolversTypes['SaveSuccess'];
SaveSuccess: ResolverTypeWrapper<SaveSuccess>;
SaveUrlInput: SaveUrlInput;
SearchError: ResolverTypeWrapper<SearchError>;
SearchErrorCode: SearchErrorCode;
SearchItem: ResolverTypeWrapper<SearchItem>;
SearchItemEdge: ResolverTypeWrapper<SearchItemEdge>;
SearchResult: ResolversTypes['SearchError'] | ResolversTypes['SearchSuccess'];
SearchSuccess: ResolverTypeWrapper<SearchSuccess>;
SetBookmarkArticleError: ResolverTypeWrapper<SetBookmarkArticleError>;
SetBookmarkArticleErrorCode: SetBookmarkArticleErrorCode;
SetBookmarkArticleInput: SetBookmarkArticleInput;
@ -2346,6 +2409,11 @@ export type ResolversParentTypes = {
SaveResult: ResolversParentTypes['SaveError'] | ResolversParentTypes['SaveSuccess'];
SaveSuccess: SaveSuccess;
SaveUrlInput: SaveUrlInput;
SearchError: SearchError;
SearchItem: SearchItem;
SearchItemEdge: SearchItemEdge;
SearchResult: ResolversParentTypes['SearchError'] | ResolversParentTypes['SearchSuccess'];
SearchSuccess: SearchSuccess;
SetBookmarkArticleError: SetBookmarkArticleError;
SetBookmarkArticleInput: SetBookmarkArticleInput;
SetBookmarkArticleResult: ResolversParentTypes['SetBookmarkArticleError'] | ResolversParentTypes['SetBookmarkArticleSuccess'];
@ -2864,7 +2932,6 @@ export type GoogleSignupSuccessResolvers<ContextType = ResolverContext, ParentTy
export type HighlightResolvers<ContextType = ResolverContext, ParentType extends ResolversParentTypes['Highlight'] = ResolversParentTypes['Highlight']> = {
annotation?: Resolver<Maybe<ResolversTypes['String']>, ParentType, ContextType>;
article?: Resolver<ResolversTypes['Article'], ParentType, ContextType>;
createdAt?: Resolver<ResolversTypes['Date'], ParentType, ContextType>;
createdByMe?: Resolver<ResolversTypes['Boolean'], ParentType, ContextType>;
id?: Resolver<ResolversTypes['ID'], ParentType, ContextType>;
@ -3100,6 +3167,7 @@ export type QueryResolvers<ContextType = ResolverContext, ParentType extends Res
me?: Resolver<Maybe<ResolversTypes['User']>, ParentType, ContextType>;
newsletterEmails?: Resolver<ResolversTypes['NewsletterEmailsResult'], ParentType, ContextType>;
reminder?: Resolver<ResolversTypes['ReminderResult'], ParentType, ContextType, RequireFields<QueryReminderArgs, 'linkId'>>;
search?: Resolver<ResolversTypes['SearchResult'], ParentType, ContextType, Partial<QuerySearchArgs>>;
sharedArticle?: Resolver<ResolversTypes['SharedArticleResult'], ParentType, ContextType, RequireFields<QuerySharedArticleArgs, 'slug' | 'username'>>;
user?: Resolver<ResolversTypes['UserResult'], ParentType, ContextType, Partial<QueryUserArgs>>;
users?: Resolver<ResolversTypes['UsersResult'], ParentType, ContextType>;
@ -3180,6 +3248,53 @@ export type SaveSuccessResolvers<ContextType = ResolverContext, ParentType exten
__isTypeOf?: IsTypeOfResolverFn<ParentType, ContextType>;
};
export type SearchErrorResolvers<ContextType = ResolverContext, ParentType extends ResolversParentTypes['SearchError'] = ResolversParentTypes['SearchError']> = {
errorCodes?: Resolver<Array<ResolversTypes['SearchErrorCode']>, ParentType, ContextType>;
__isTypeOf?: IsTypeOfResolverFn<ParentType, ContextType>;
};
export type SearchItemResolvers<ContextType = ResolverContext, ParentType extends ResolversParentTypes['SearchItem'] = ResolversParentTypes['SearchItem']> = {
annotation?: Resolver<Maybe<ResolversTypes['String']>, ParentType, ContextType>;
author?: Resolver<Maybe<ResolversTypes['String']>, ParentType, ContextType>;
contentReader?: Resolver<ResolversTypes['ContentReader'], ParentType, ContextType>;
createdAt?: Resolver<ResolversTypes['Date'], ParentType, ContextType>;
description?: Resolver<Maybe<ResolversTypes['String']>, ParentType, ContextType>;
id?: Resolver<ResolversTypes['ID'], ParentType, ContextType>;
image?: Resolver<Maybe<ResolversTypes['String']>, ParentType, ContextType>;
isArchived?: Resolver<ResolversTypes['Boolean'], ParentType, ContextType>;
labels?: Resolver<Maybe<Array<ResolversTypes['Label']>>, ParentType, ContextType>;
originalArticleUrl?: Resolver<Maybe<ResolversTypes['String']>, ParentType, ContextType>;
ownedByViewer?: Resolver<Maybe<ResolversTypes['Boolean']>, ParentType, ContextType>;
pageId?: Resolver<Maybe<ResolversTypes['ID']>, ParentType, ContextType>;
pageType?: Resolver<ResolversTypes['PageType'], ParentType, ContextType>;
publishedAt?: Resolver<Maybe<ResolversTypes['Date']>, ParentType, ContextType>;
quote?: Resolver<Maybe<ResolversTypes['String']>, ParentType, ContextType>;
readingProgressAnchorIndex?: Resolver<Maybe<ResolversTypes['Int']>, ParentType, ContextType>;
readingProgressPercent?: Resolver<Maybe<ResolversTypes['Float']>, ParentType, ContextType>;
shortId?: Resolver<Maybe<ResolversTypes['String']>, ParentType, ContextType>;
slug?: Resolver<ResolversTypes['String'], ParentType, ContextType>;
title?: Resolver<ResolversTypes['String'], ParentType, ContextType>;
uploadFileId?: Resolver<Maybe<ResolversTypes['ID']>, ParentType, ContextType>;
url?: Resolver<ResolversTypes['String'], ParentType, ContextType>;
__isTypeOf?: IsTypeOfResolverFn<ParentType, ContextType>;
};
export type SearchItemEdgeResolvers<ContextType = ResolverContext, ParentType extends ResolversParentTypes['SearchItemEdge'] = ResolversParentTypes['SearchItemEdge']> = {
cursor?: Resolver<ResolversTypes['String'], ParentType, ContextType>;
node?: Resolver<ResolversTypes['SearchItem'], ParentType, ContextType>;
__isTypeOf?: IsTypeOfResolverFn<ParentType, ContextType>;
};
export type SearchResultResolvers<ContextType = ResolverContext, ParentType extends ResolversParentTypes['SearchResult'] = ResolversParentTypes['SearchResult']> = {
__resolveType: TypeResolveFn<'SearchError' | 'SearchSuccess', ParentType, ContextType>;
};
export type SearchSuccessResolvers<ContextType = ResolverContext, ParentType extends ResolversParentTypes['SearchSuccess'] = ResolversParentTypes['SearchSuccess']> = {
edges?: Resolver<Array<ResolversTypes['SearchItemEdge']>, ParentType, ContextType>;
pageInfo?: Resolver<ResolversTypes['PageInfo'], ParentType, ContextType>;
__isTypeOf?: IsTypeOfResolverFn<ParentType, ContextType>;
};
export type SetBookmarkArticleErrorResolvers<ContextType = ResolverContext, ParentType extends ResolversParentTypes['SetBookmarkArticleError'] = ResolversParentTypes['SetBookmarkArticleError']> = {
errorCodes?: Resolver<Array<ResolversTypes['SetBookmarkArticleErrorCode']>, ParentType, ContextType>;
__isTypeOf?: IsTypeOfResolverFn<ParentType, ContextType>;
@ -3620,6 +3735,11 @@ export type Resolvers<ContextType = ResolverContext> = {
SaveError?: SaveErrorResolvers<ContextType>;
SaveResult?: SaveResultResolvers<ContextType>;
SaveSuccess?: SaveSuccessResolvers<ContextType>;
SearchError?: SearchErrorResolvers<ContextType>;
SearchItem?: SearchItemResolvers<ContextType>;
SearchItemEdge?: SearchItemEdgeResolvers<ContextType>;
SearchResult?: SearchResultResolvers<ContextType>;
SearchSuccess?: SearchSuccessResolvers<ContextType>;
SetBookmarkArticleError?: SetBookmarkArticleErrorResolvers<ContextType>;
SetBookmarkArticleResult?: SetBookmarkArticleResultResolvers<ContextType>;
SetBookmarkArticleSuccess?: SetBookmarkArticleSuccessResolvers<ContextType>;

View File

@ -540,7 +540,6 @@ type GoogleSignupSuccess {
type Highlight {
annotation: String
article: Article!
createdAt: Date!
createdByMe: Boolean!
id: ID!
@ -788,6 +787,7 @@ enum PageType {
ARTICLE
BOOK
FILE
HIGHLIGHTS
PROFILE
UNKNOWN
WEBSITE
@ -819,6 +819,7 @@ type Query {
me: User
newsletterEmails: NewsletterEmailsResult!
reminder(linkId: ID!): ReminderResult!
search(after: String, first: Int, query: String): SearchResult!
sharedArticle(selectedHighlightId: String, slug: String!, username: String!): SharedArticleResult!
user(userId: ID, username: String): UserResult!
users: UsersResult!
@ -951,6 +952,51 @@ input SaveUrlInput {
url: String!
}
type SearchError {
errorCodes: [SearchErrorCode!]!
}
enum SearchErrorCode {
UNAUTHORIZED
}
type SearchItem {
annotation: String
author: String
contentReader: ContentReader!
createdAt: Date!
description: String
id: ID!
image: String
isArchived: Boolean!
labels: [Label!]
originalArticleUrl: String
ownedByViewer: Boolean
pageId: ID
pageType: PageType!
publishedAt: Date
quote: String
readingProgressAnchorIndex: Int
readingProgressPercent: Float
shortId: String
slug: String!
title: String!
uploadFileId: ID
url: String!
}
type SearchItemEdge {
cursor: String!
node: SearchItem!
}
union SearchResult = SearchError | SearchSuccess
type SearchSuccess {
edges: [SearchItemEdge!]!
pageInfo: PageInfo!
}
type SetBookmarkArticleError {
errorCodes: [SetBookmarkArticleErrorCode!]!
}

View File

@ -22,10 +22,14 @@ import {
PageType,
QueryArticleArgs,
QueryArticlesArgs,
QuerySearchArgs,
ResolverFn,
SaveArticleReadingProgressError,
SaveArticleReadingProgressErrorCode,
SaveArticleReadingProgressSuccess,
SearchError,
SearchItem,
SearchSuccess,
SetBookmarkArticleError,
SetBookmarkArticleErrorCode,
SetBookmarkArticleSuccess,
@ -67,6 +71,8 @@ import { createPageSaveRequest } from '../../services/create_page_save_request'
import { createIntercomEvent } from '../../utils/intercom'
import { analytics } from '../../utils/analytics'
import { env } from '../../env'
import { Page, SearchItem as SearchItemData } from '../../elastic/types'
import {
createPage,
deletePage,
@ -74,8 +80,8 @@ import {
getPageByParam,
searchPages,
updatePage,
} from '../../elastic'
import { Page } from '../../elastic/types'
} from '../../elastic/pages'
import { searchHighlights } from '../../elastic/highlights'
export type PartialArticle = Omit<
Article,
@ -438,44 +444,45 @@ export const getArticlesResolver = authorized<
ArticlesError,
QueryArticlesArgs
>(async (_obj, params, { claims }) => {
const notNullField = params.sharedOnly ? 'sharedAt' : null
const startCursor = params.after || ''
const first = params.first || 10
// Perform basic sanitization. Right now we just allow alphanumeric, space and quote
// so queries can contain phrases like "human race";
// We can also split out terms like "label:unread".
const searchQuery = parseSearchQuery(params.query || undefined)
analytics.track({
userId: claims.uid,
event: 'search',
event: 'get_articles',
properties: {
env: env.server.apiEnv,
query: searchQuery.query,
inFilter: searchQuery.inFilter,
readFilter: searchQuery.readFilter,
typeFilter: searchQuery.typeFilter,
labelFilters: searchQuery.labelFilters,
sortParams: searchQuery.sortParams,
env: env.server.apiEnv,
hasFilters: searchQuery.hasFilters,
savedDateFilter: searchQuery.savedDateFilter,
publishedDateFilter: searchQuery.publishedDateFilter,
},
})
await createIntercomEvent('search', claims.uid)
await createIntercomEvent('get_articles', claims.uid)
const [pages, totalCount] = (await searchPages(
{
from: Number(startCursor),
size: first + 1, // fetch one more item to get next cursor
sort: searchQuery.sortParams || params.sort || undefined,
sort: searchQuery.sortParams,
query: searchQuery.query,
inFilter: searchQuery.inFilter,
readFilter: searchQuery.readFilter,
typeFilter: searchQuery.typeFilter,
labelFilters: searchQuery.labelFilters,
hasFilters: searchQuery.hasFilters,
savedDateFilter: searchQuery.savedDateFilter,
publishedDateFilter: searchQuery.publishedDateFilter,
},
claims.uid,
notNullField
claims.uid
)) || [[], 0]
const start =
@ -792,3 +799,106 @@ export const getReadingProgressAnchorIndexForArticleResolver: ResolverFn<
return articleReadingProgressAnchorIndex || 0
}
export const searchResolver = authorized<
SearchSuccess,
SearchError,
QuerySearchArgs
>(async (_obj, params, { claims }) => {
const startCursor = params.after || ''
const first = params.first || 10
const searchQuery = parseSearchQuery(params.query || undefined)
analytics.track({
userId: claims.uid,
event: 'search',
properties: {
query: searchQuery.query,
inFilter: searchQuery.inFilter,
readFilter: searchQuery.readFilter,
typeFilter: searchQuery.typeFilter,
labelFilters: searchQuery.labelFilters,
sortParams: searchQuery.sortParams,
hasFilters: searchQuery.hasFilters,
savedDateFilter: searchQuery.savedDateFilter,
publishedDateFilter: searchQuery.publishedDateFilter,
env: env.server.apiEnv,
},
})
await createIntercomEvent('search', claims.uid)
let results: (SearchItemData | Page)[]
let totalCount: number
const searchType = searchQuery.typeFilter
// search highlights if type:highlights
if (searchType === PageType.Highlights) {
;[results, totalCount] = (await searchHighlights(
{
from: Number(startCursor),
size: first + 1, // fetch one more item to get next cursor
sort: searchQuery.sortParams,
query: searchQuery.query,
},
claims.uid
)) || [[], 0]
} else {
// otherwise, search pages
;[results, totalCount] = (await searchPages(
{
from: Number(startCursor),
size: first + 1, // fetch one more item to get next cursor
sort: searchQuery.sortParams,
query: searchQuery.query,
inFilter: searchQuery.inFilter,
readFilter: searchQuery.readFilter,
typeFilter: searchQuery.typeFilter,
labelFilters: searchQuery.labelFilters,
hasFilters: searchQuery.hasFilters,
savedDateFilter: searchQuery.savedDateFilter,
publishedDateFilter: searchQuery.publishedDateFilter,
},
claims.uid
)) || [[], 0]
}
const start =
startCursor && !isNaN(Number(startCursor)) ? Number(startCursor) : 0
const hasNextPage = results.length > first
const endCursor = String(start + results.length - (hasNextPage ? 1 : 0))
if (hasNextPage) {
// remove an extra if exists
results.pop()
}
const edges = results.map((r) => {
return {
node: {
...r,
image: r.image && createImageProxyUrl(r.image, 88, 88),
isArchived: !!r.archivedAt,
contentReader:
r.pageType === PageType.File ? ContentReader.Pdf : ContentReader.Web,
originalArticleUrl: r.url,
publishedAt: validatedDate(r.publishedAt),
ownedByViewer: r.userId === claims.uid,
pageType: r.pageType || PageType.Highlights,
} as SearchItem,
cursor: endCursor,
}
})
return {
edges,
pageInfo: {
hasPreviousPage: false,
startCursor,
hasNextPage: hasNextPage,
endCursor,
totalCount,
},
}
})

View File

@ -16,6 +16,7 @@ import {
LinkShareInfo,
PageType,
Reaction,
SearchItem,
User,
} from './../generated/graphql'
@ -54,6 +55,7 @@ import {
saveFileResolver,
savePageResolver,
saveUrlResolver,
searchResolver,
setBookmarkArticleResolver,
setDeviceTokenResolver,
setFollowResolver,
@ -78,7 +80,7 @@ import {
generateDownloadSignedUrl,
generateUploadFilePathName,
} from '../utils/uploads'
import { getPageById, getPageByParam } from '../elastic'
import { getPageByParam } from '../elastic/pages'
import { generateApiKeyResolver } from './api_key'
/* eslint-disable @typescript-eslint/naming-convention */
@ -157,6 +159,7 @@ export const functionResolvers = {
newsletterEmails: newsletterEmailsResolver,
reminder: reminderResolver,
labels: labelsResolver,
search: searchResolver,
},
User: {
async sharedArticles(
@ -394,32 +397,33 @@ export const functionResolvers = {
: ContentReader.Web
},
async highlights(
article: { id: string; userId?: string },
article: { id: string; userId?: string; highlights?: Highlight[] },
_: { input: ArticleHighlightsInput },
ctx: WithDataSourcesContext
) {
const includeFriends = false
// TODO: this is a temporary solution until we figure out how collaborative approach would look like
// article has userId only if it's returned by getSharedArticle resolver
if (article.userId) {
const result = await ctx.models.highlight.getForUserArticle(
article.userId,
article.id
)
return result
}
const friendsIds =
ctx.claims?.uid && includeFriends
? await ctx.models.userFriends.getFriends(ctx.claims?.uid)
: []
// FIXME: Move this filtering logic to the datalayer
return (await ctx.models.highlight.batchGet(article.id)).filter((h) =>
[...(includeFriends ? friendsIds : []), ctx.claims?.uid || ''].some(
(u) => u === h.userId
)
)
// const includeFriends = false
// // TODO: this is a temporary solution until we figure out how collaborative approach would look like
// // article has userId only if it's returned by getSharedArticle resolver
// if (article.userId) {
// const result = await ctx.models.highlight.getForUserArticle(
// article.userId,
// article.id
// )
// return result
// }
//
// const friendsIds =
// ctx.claims?.uid && includeFriends
// ? await ctx.models.userFriends.getFriends(ctx.claims?.uid)
// : []
//
// // FIXME: Move this filtering logic to the datalayer
// return (await ctx.models.highlight.batchGet(article.id)).filter((h) =>
// [...(includeFriends ? friendsIds : []), ctx.claims?.uid || ''].some(
// (u) => u === h.userId
// )
// )
return article.highlights || []
},
async shareInfo(
article: { id: string; sharedBy?: User; shareInfo?: LinkShareInfo },
@ -447,9 +451,6 @@ export const functionResolvers = {
},
},
Highlight: {
async article(highlight: { articleId: string }, __: unknown) {
return getPageById(highlight.articleId)
},
async user(
highlight: { userId: string },
__: unknown,
@ -484,6 +485,19 @@ export const functionResolvers = {
return userDataToUser(await ctx.models.user.get(reaction.userId))
},
},
SearchItem: {
async url(item: SearchItem, _: unknown, ctx: WithDataSourcesContext) {
if (item.pageType == PageType.File && ctx.claims && item.uploadFileId) {
const upload = await ctx.models.uploadFile.get(item.uploadFileId)
if (!upload || !upload.fileName) {
return undefined
}
const filePath = generateUploadFilePathName(upload.id, upload.fileName)
return generateDownloadSignedUrl(filePath)
}
return item.url
},
},
...resultResolveTypeResolver('Login'),
...resultResolveTypeResolver('LogOut'),
...resultResolveTypeResolver('GoogleSignup'),
@ -532,4 +546,5 @@ export const functionResolvers = {
...resultResolveTypeResolver('Signup'),
...resultResolveTypeResolver('SetLabels'),
...resultResolveTypeResolver('GenerateApiKey'),
...resultResolveTypeResolver('Search'),
}

View File

@ -3,7 +3,6 @@
/* eslint-disable @typescript-eslint/no-floating-promises */
import { authorized } from '../../utils/helpers'
import {
Article,
CreateHighlightError,
CreateHighlightErrorCode,
CreateHighlightSuccess,
@ -27,15 +26,20 @@ import {
UpdateHighlightSuccess,
User,
} from '../../generated/graphql'
import { HighlightData } from '../../datalayer/highlight/model'
import { env } from '../../env'
import { analytics } from '../../utils/analytics'
import { getPageById } from '../../elastic'
import { Highlight as HighlightData } from '../../elastic/types'
import { getPageById, updatePage } from '../../elastic/pages'
import {
addHighlightToPage,
deleteHighlight,
getHighlightById,
updateHighlight,
} from '../../elastic/highlights'
const highlightDataToHighlight = (highlight: HighlightData): Highlight => ({
...highlight,
user: highlight.userId as unknown as User,
article: highlight.articleId as unknown as Article,
updatedAt: highlight.updatedAt || highlight.createdAt,
replies: [],
reactions: [],
@ -46,11 +50,10 @@ export const createHighlightResolver = authorized<
CreateHighlightSuccess,
CreateHighlightError,
MutationCreateHighlightArgs
>(async (_, { input }, { models, claims, log }) => {
const { articleId } = input
const article = await getPageById(articleId)
if (!article) {
>(async (_, { input }, { claims, log, pubsub }) => {
const { articleId: pageId } = input
const page = await getPageById(pageId)
if (!page) {
return {
errorCodes: [CreateHighlightErrorCode.NotFound],
}
@ -60,7 +63,7 @@ export const createHighlightResolver = authorized<
userId: claims.uid,
event: 'highlight_created',
properties: {
articleId: article.id,
pageId,
env: env.server.apiEnv,
},
})
@ -72,12 +75,23 @@ export const createHighlightResolver = authorized<
}
try {
const highlight = await models.highlight.create({
...input,
articleId: undefined,
const highlight: HighlightData = {
updatedAt: new Date(),
createdAt: new Date(),
userId: claims.uid,
elasticPageId: article.id,
})
...input,
}
if (
!(await addHighlightToPage(pageId, highlight, {
pubsub,
uid: claims.uid,
}))
) {
return {
errorCodes: [CreateHighlightErrorCode.NotFound],
}
}
log.info('Creating a new highlight', {
highlight,
@ -101,22 +115,27 @@ export const mergeHighlightResolver = authorized<
MergeHighlightSuccess,
MergeHighlightError,
MutationMergeHighlightArgs
>(async (_, { input }, { authTrx, models, claims, log }) => {
const { articleId } = input
>(async (_, { input }, { claims, log, pubsub }) => {
const { articleId: pageId } = input
const { overlapHighlightIdList, ...newHighlightInput } = input
const articleHighlights = await models.highlight.batchGet(articleId)
if (!articleHighlights.length) {
const page = await getPageById(pageId)
if (!page || !page.highlights) {
return {
errorCodes: [MergeHighlightErrorCode.NotFound],
}
}
const articleHighlights = page.highlights
/* Compute merged annotation form the order of highlights appearing on page */
const overlapAnnotations: { [id: string]: string } = {}
articleHighlights.forEach((highlight) => {
if (overlapHighlightIdList.includes(highlight.id) && highlight.annotation) {
overlapAnnotations[highlight.id] = highlight.annotation
articleHighlights.forEach((highlight, index) => {
if (overlapHighlightIdList.includes(highlight.id)) {
articleHighlights.splice(index, 1)
if (highlight.annotation) {
overlapAnnotations[highlight.id] = highlight.annotation
}
}
})
const mergedAnnotation: string[] = []
@ -127,17 +146,20 @@ export const mergeHighlightResolver = authorized<
})
try {
const highlight = await authTrx(async (tx) => {
await models.highlight.deleteMany(overlapHighlightIdList, tx)
return await models.highlight.create({
...newHighlightInput,
articleId: undefined,
annotation: mergedAnnotation ? mergedAnnotation.join('\n') : null,
userId: claims.uid,
elasticPageId: newHighlightInput.articleId,
})
})
if (!highlight) {
const highlight: HighlightData = {
...newHighlightInput,
updatedAt: new Date(),
createdAt: new Date(),
userId: claims.uid,
annotation: mergedAnnotation ? mergedAnnotation.join('\n') : null,
}
const merged = await updatePage(
pageId,
{ highlights: articleHighlights.concat(highlight) },
{ pubsub, uid: claims.uid }
)
if (!merged) {
throw new Error('Failed to create merged highlight')
}
@ -147,7 +169,7 @@ export const mergeHighlightResolver = authorized<
source: 'resolver',
resolver: 'mergeHighlightResolver',
uid: claims.uid,
articleId: articleId,
pageId,
},
})
@ -175,9 +197,9 @@ export const updateHighlightResolver = authorized<
UpdateHighlightSuccess,
UpdateHighlightError,
MutationUpdateHighlightArgs
>(async (_, { input }, { authTrx, models, claims, log }) => {
>(async (_, { input }, { pubsub, claims, log }) => {
const { highlightId } = input
const highlight = await models.highlight.get(highlightId)
const highlight = await getHighlightById(highlightId)
if (!highlight?.id) {
return {
@ -197,16 +219,11 @@ export const updateHighlightResolver = authorized<
}
}
const updatedHighlight = await authTrx((tx) =>
models.highlight.update(
highlightId,
{
annotation: input.annotation,
sharedAt: input.sharedAt,
},
tx
)
)
const updatedHighlight: HighlightData = {
...highlight,
annotation: input.annotation,
updatedAt: new Date(),
}
log.info('Updating a highlight', {
updatedHighlight,
@ -217,6 +234,17 @@ export const updateHighlightResolver = authorized<
},
})
const updated = await updateHighlight(updatedHighlight, {
pubsub,
uid: claims.uid,
})
if (!updated) {
return {
errorCodes: [UpdateHighlightErrorCode.NotFound],
}
}
return { highlight: highlightDataToHighlight(updatedHighlight) }
})
@ -224,8 +252,8 @@ export const deleteHighlightResolver = authorized<
DeleteHighlightSuccess,
DeleteHighlightError,
MutationDeleteHighlightArgs
>(async (_, { highlightId }, { authTrx, models, claims, log }) => {
const highlight = await models.highlight.get(highlightId)
>(async (_, { highlightId }, { claims, log, pubsub }) => {
const highlight = await getHighlightById(highlightId)
if (!highlight?.id) {
return {
@ -239,18 +267,19 @@ export const deleteHighlightResolver = authorized<
}
}
const deletedHighlight = await authTrx((tx) =>
models.highlight.delete(highlightId, tx)
)
const deleted = await deleteHighlight(highlightId, {
pubsub,
uid: claims.uid,
})
if ('error' in deletedHighlight) {
if (!deleted) {
return {
errorCodes: [DeleteHighlightErrorCode.NotFound],
}
}
log.info('Deleting a highlight', {
deletedHighlight,
highlight,
labels: {
source: 'resolver',
resolver: 'deleteHighlightResolver',
@ -258,15 +287,15 @@ export const deleteHighlightResolver = authorized<
},
})
return { highlight: highlightDataToHighlight(deletedHighlight) }
return { highlight: highlightDataToHighlight(highlight) }
})
export const setShareHighlightResolver = authorized<
SetShareHighlightSuccess,
SetShareHighlightError,
MutationSetShareHighlightArgs
>(async (_, { input: { id, share } }, { authTrx, models, claims, log }) => {
const highlight = await models.highlight.get(id)
>(async (_, { input: { id, share } }, { pubsub, claims, log }) => {
const highlight = await getHighlightById(id)
if (!highlight?.id) {
return {
@ -287,16 +316,22 @@ export const setShareHighlightResolver = authorized<
labels: {
source: 'resolver',
resolver: 'setShareHighlightResolver',
articleId: highlight.articleId,
userId: highlight.userId,
},
})
const updatedHighlight = await authTrx((tx) =>
models.highlight.update(id, { sharedAt }, tx)
)
const updatedHighlight: HighlightData = {
...highlight,
sharedAt,
updatedAt: new Date(),
}
if (!updatedHighlight || 'error' in updatedHighlight) {
const updated = await updateHighlight(updatedHighlight, {
pubsub,
uid: claims.uid,
})
if (!updated) {
return {
errorCodes: [SetShareHighlightErrorCode.NotFound],
}

View File

@ -26,9 +26,10 @@ import { User } from '../../entity/user'
import { Label } from '../../entity/label'
import { ILike, In } from 'typeorm'
import { getRepository, setClaims } from '../../entity/utils'
import { deleteLabelInPages, getPageById, updatePage } from '../../elastic'
import { createPubSubClient } from '../../datalayer/pubsub'
import { AppDataSource } from '../../server'
import { getPageById, updatePage } from '../../elastic/pages'
import { deleteLabelInPages } from '../../elastic/labels'
export const labelsResolver = authorized<LabelsSuccess, LabelsError>(
async (_obj, _params, { claims: { uid }, log }) => {

View File

@ -14,7 +14,7 @@ import {
import { authorized } from '../../utils/helpers'
import { analytics } from '../../utils/analytics'
import { env } from '../../env'
import { updatePage } from '../../elastic'
import { updatePage } from '../../elastic/pages'
export const updateLinkShareInfoResolver = authorized<
UpdateLinkShareInfoSuccess,

View File

@ -6,8 +6,8 @@ import {
createPubSubClient,
readPushSubscription,
} from '../../datalayer/pubsub'
import { getPageByParam, updatePage } from '../../elastic'
import { Page } from '../../elastic/types'
import { getPageByParam, updatePage } from '../../elastic/pages'
interface UpdateContentMessage {
fileId: string

View File

@ -14,9 +14,9 @@ import { analytics } from '../../utils/analytics'
import { getNewsletterEmail } from '../../services/newsletters'
import { setClaims } from '../../datalayer/helpers'
import { generateSlug } from '../../utils/helpers'
import { createPage } from '../../elastic'
import { createPubSubClient } from '../../datalayer/pubsub'
import { Page } from '../../elastic/types'
import { createPage } from '../../elastic/pages'
export function pdfAttachmentsRouter() {
const router = express.Router()

View File

@ -286,6 +286,7 @@ const schema = gql`
FILE
PROFILE
WEBSITE
HIGHLIGHTS
UNKNOWN
}
@ -569,7 +570,6 @@ const schema = gql`
# used for simplified url format
shortId: String!
user: User!
article: Article!
quote: String!
# piece of content before the quote
prefix: String
@ -1393,6 +1393,56 @@ const schema = gql`
BAD_REQUEST
}
# Query: search
union SearchResult = SearchSuccess | SearchError
type SearchItem {
# used for pages
id: ID!
title: String!
slug: String!
# for uploaded file articles (PDFs), the URL here is the saved omnivore link in GCS
url: String!
pageType: PageType!
contentReader: ContentReader!
createdAt: Date!
isArchived: Boolean!
readingProgressPercent: Float
readingProgressAnchorIndex: Int
author: String
image: String
description: String
publishedAt: Date
ownedByViewer: Boolean
# for uploaded file articles (PDFs), we track the original article URL separately!
originalArticleUrl: String
uploadFileId: ID
# used for highlights
pageId: ID
shortId: String
quote: String
annotation: String
labels: [Label!]
}
type SearchItemEdge {
cursor: String!
node: SearchItem!
}
type SearchSuccess {
edges: [SearchItemEdge!]!
pageInfo: PageInfo!
}
enum SearchErrorCode {
UNAUTHORIZED
}
type SearchError {
errorCodes: [SearchErrorCode!]!
}
# Mutations
type Mutation {
googleLogin(input: GoogleLoginInput!): LoginResult!
@ -1490,6 +1540,7 @@ const schema = gql`
newsletterEmails: NewsletterEmailsResult!
reminder(linkId: ID!): ReminderResult!
labels: LabelsResult!
search(after: String, first: Int, query: String): SearchResult!
}
`

View File

@ -9,7 +9,7 @@ import {
} from '../generated/graphql'
import { articleSavingRequestDataToArticleSavingRequest } from '../utils/helpers'
import * as privateIpLib from 'private-ip'
import { countByCreatedAt } from '../elastic'
import { countByCreatedAt } from '../elastic/pages'
const isPrivateIP = privateIpLib.default

View File

@ -2,7 +2,7 @@ import { Label } from '../entity/label'
import { ILike, In } from 'typeorm'
import { PageContext } from '../elastic/types'
import { User } from '../entity/user'
import { addLabelInPage } from '../elastic'
import { addLabelInPage } from '../elastic/labels'
import { getRepository } from '../entity/utils'
import { Link } from '../entity/link'
import DataLoader from 'dataloader'

View File

@ -1,7 +1,7 @@
import { ReportItemInput, ReportType } from '../generated/graphql'
import { ContentDisplayReport } from '../entity/reports/content_display_report'
import { AbuseReport } from '../entity/reports/abuse_report'
import { getPageById } from '../elastic'
import { getPageById } from '../elastic/pages'
import { getRepository } from '../entity/utils'
export const saveContentDisplayReport = async (

View File

@ -7,7 +7,7 @@ import {
import normalizeUrl from 'normalize-url'
import { PubsubClient } from '../datalayer/pubsub'
import { Page } from '../elastic/types'
import { createPage, getPageByParam, updatePage } from '../elastic'
import { createPage, getPageByParam, updatePage } from '../elastic/pages'
export type SaveContext = {
pubsub: PubsubClient

View File

@ -13,7 +13,7 @@ import { DataModels } from '../resolvers/types'
import { generateSlug } from '../utils/helpers'
import { getStorageFileDetails, makeStorageFilePublic } from '../utils/uploads'
import { createSavingRequest } from './save_page'
import { createPage, getPageByParam, updatePage } from '../elastic'
import { createPage, getPageByParam, updatePage } from '../elastic/pages'
type SaveContext = {
pubsub: PubsubClient

View File

@ -14,8 +14,8 @@ import normalizeUrl from 'normalize-url'
import { createPageSaveRequest } from './create_page_save_request'
import { kx } from '../datalayer/knex_config'
import { setClaims } from '../datalayer/helpers'
import { createPage, getPageByParam, updatePage } from '../elastic'
import { Page } from '../elastic/types'
import { createPage, getPageByParam, updatePage } from '../elastic/pages'
type SaveContext = {
pubsub: PubsubClient

View File

@ -9,7 +9,7 @@ import {
SearchParserKeyWordOffset,
SearchParserTextOffset,
} from 'search-query-parser'
import { PageType, SortBy, SortOrder, SortParams } from '../generated/graphql'
import { PageType } from '../generated/graphql'
export enum ReadFilter {
ALL,
@ -27,9 +27,12 @@ export type SearchFilter = {
query: string | undefined
inFilter: InFilter
readFilter: ReadFilter
typeFilter?: PageType | undefined
typeFilter?: PageType
labelFilters: LabelFilter[]
sortParams?: SortParams
hasFilters: HasFilter[]
savedDateFilter?: DateRangeFilter
publishedDateFilter?: DateRangeFilter
}
export enum LabelFilterType {
@ -42,6 +45,32 @@ export type LabelFilter = {
labels: string[]
}
export enum HasFilter {
HIGHLIGHTS,
SHARED_AT,
}
export type DateRangeFilter = {
startDate?: Date
endDate?: Date
}
export enum SortBy {
SAVED = 'savedAt',
UPDATED = 'updatedAt',
SCORE = '_score',
}
export enum SortOrder {
ASCENDING = 'asc',
DESCENDING = 'desc',
}
export type SortParams = {
by: SortBy
order?: SortOrder
}
const parseIsFilter = (str: string | undefined): ReadFilter => {
switch (str?.toUpperCase()) {
case 'READ':
@ -72,7 +101,7 @@ const parseTypeFilter = (str: string | undefined): PageType | undefined => {
return undefined
}
switch (str) {
switch (str.toLowerCase()) {
case 'article':
return PageType.Article
case 'book':
@ -86,6 +115,8 @@ const parseTypeFilter = (str: string | undefined): PageType | undefined => {
return PageType.Website
case 'unknown':
return PageType.Unknown
case 'highlights':
return PageType.Highlights
}
return undefined
}
@ -115,25 +146,55 @@ const parseSortParams = (str?: string): SortParams | undefined => {
return undefined
}
const [sort, order] = str.split(':')
const [sort, order] = str.split('-')
const sortOrder =
order?.toUpperCase() === 'ASC' ? SortOrder.Ascending : SortOrder.Descending
order?.toUpperCase() === 'ASC' ? SortOrder.ASCENDING : SortOrder.DESCENDING
switch (sort.toUpperCase()) {
case 'UPDATED_TIME':
case 'SAVED_AT':
case 'UPDATED':
return {
by: SortBy.SavedAt,
by: SortBy.UPDATED,
order: sortOrder,
}
case 'SAVED':
return {
by: SortBy.SAVED,
order: sortOrder,
}
case 'SCORE':
// sort by score does not need an order
return {
by: SortBy.Score,
by: SortBy.SCORE,
}
}
}
const parseHasFilter = (str?: string): HasFilter | undefined => {
if (str === undefined) {
return undefined
}
switch (str.toUpperCase()) {
case 'HIGHLIGHTS':
return HasFilter.HIGHLIGHTS
}
}
const parseDateRangeFilter = (str?: string): DateRangeFilter | undefined => {
if (str === undefined) {
return undefined
}
const [start, end] = str.split('..')
const startDate = start && start !== '*' ? new Date(start) : undefined
const endDate = end && end !== '*' ? new Date(end) : undefined
return {
startDate,
endDate,
}
}
export const parseSearchQuery = (query: string | undefined): SearchFilter => {
const searchQuery = query ? query.replace(/\W\s":/g, '') : undefined
const result: SearchFilter = {
@ -141,6 +202,7 @@ export const parseSearchQuery = (query: string | undefined): SearchFilter => {
readFilter: ReadFilter.ALL,
inFilter: searchQuery ? InFilter.ALL : InFilter.INBOX,
labelFilters: [],
hasFilters: [],
}
if (!searchQuery) {
@ -149,11 +211,21 @@ export const parseSearchQuery = (query: string | undefined): SearchFilter => {
inFilter: InFilter.INBOX,
readFilter: ReadFilter.ALL,
labelFilters: [],
hasFilters: [],
}
}
const parsed = parse(searchQuery, {
keywords: ['in', 'is', 'type', 'label', 'sort'],
keywords: [
'in',
'is',
'type',
'label',
'sort',
'has',
'saved',
'published',
],
tokenize: true,
})
if (parsed.offsets) {
@ -197,10 +269,20 @@ export const parseSearchQuery = (query: string | undefined): SearchFilter => {
labelFilter && result.labelFilters.push(labelFilter)
break
}
case 'sort': {
case 'sort':
result.sortParams = parseSortParams(keyword.value)
break
case 'has': {
const hasFilter = parseHasFilter(keyword.value)
hasFilter !== undefined && result.hasFilters.push(hasFilter)
break
}
case 'saved':
result.savedDateFilter = parseDateRangeFilter(keyword.value)
break
case 'published':
result.publishedDateFilter = parseDateRangeFilter(keyword.value)
break
}
}
}

View File

@ -1,6 +1,9 @@
import 'mocha'
import { expect } from 'chai'
import { InFilter, ReadFilter } from '../../src/utils/search'
import { Highlight, Page, PageContext, PageType } from '../../src/elastic/types'
import { createPubSubClient } from '../../src/datalayer/pubsub'
import {
addLabelInPage,
countByCreatedAt,
createPage,
deletePage,
@ -8,12 +11,12 @@ import {
getPageByParam,
searchPages,
updatePage,
} from '../../src/elastic'
import { PageType } from '../../src/generated/graphql'
import { expect } from 'chai'
import { InFilter, ReadFilter } from '../../src/utils/search'
import { Page, PageContext } from '../../src/elastic/types'
import { createPubSubClient } from '../../src/datalayer/pubsub'
} from '../../src/elastic/pages'
import { addLabelInPage } from '../../src/elastic/labels'
import {
addHighlightToPage,
searchHighlights,
} from '../../src/elastic/highlights'
describe('elastic api', () => {
const userId = 'userId'
@ -204,4 +207,33 @@ describe('elastic api', () => {
expect(count).to.eq(1)
})
})
describe('searchHighlights', () => {
const highlightId = 'highlightId'
before(async () => {
const highlightData: Highlight = {
patch: 'test patch',
quote: 'test content',
shortId: 'test shortId',
id: highlightId,
userId: page.userId,
createdAt: new Date(),
}
await addHighlightToPage(page.id, highlightData, ctx)
})
it('searches highlights', async () => {
const [searchResults, count] = (await searchHighlights(
{
query: 'test',
},
page.userId
)) || [[], 0]
expect(count).to.eq(1)
expect(searchResults[0].id).to.eq(highlightId)
})
})
})

View File

@ -1,5 +1,6 @@
import { createTestConnection } from './db'
import { initElasticsearch } from '../src/elastic'
import { startApolloServer } from './util'
export const mochaGlobalSetup = async () => {
await createTestConnection()
@ -7,4 +8,7 @@ export const mochaGlobalSetup = async () => {
await initElasticsearch()
console.log('elasticsearch initialized')
await startApolloServer()
console.log('apollo server started')
}

View File

@ -1,6 +1,12 @@
import { AppDataSource } from '../src/server'
import { stopApolloServer } from './util'
import { kx } from '../src/datalayer/knex_config'
export const mochaGlobalTeardown = async () => {
await AppDataSource.destroy()
await kx.destroy()
console.log('db connection closed')
await stopApolloServer()
console.log('apollo server stopped')
}

View File

@ -11,17 +11,18 @@ import 'mocha'
import { User } from '../../src/entity/user'
import chaiString from 'chai-string'
import { Label } from '../../src/entity/label'
import { UploadFileStatus } from '../../src/generated/graphql'
import { Highlight, Page, PageContext, PageType } from '../../src/elastic/types'
import { UploadFile } from '../../src/entity/upload_file'
import { createPubSubClient } from '../../src/datalayer/pubsub'
import { getRepository } from '../../src/entity/utils'
import {
createPage,
deletePage,
getPageById,
updatePage,
} from '../../src/elastic'
import { PageType, UploadFileStatus } from '../../src/generated/graphql'
import { Page, PageContext } from '../../src/elastic/types'
import { UploadFile } from '../../src/entity/upload_file'
import { createPubSubClient } from '../../src/datalayer/pubsub'
import { getRepository } from '../../src/entity/utils'
} from '../../src/elastic/pages'
import { addHighlightToPage } from '../../src/elastic/highlights'
chai.use(chaiString)
@ -85,15 +86,11 @@ const createArticleQuery = (
`
}
const articlesQuery = (after = '', order = 'ASCENDING') => {
const articlesQuery = (after = '') => {
return `
query {
articles(
sharedOnly: ${false}
sort: {
order: ${order}
by: UPDATED_TIME
}
after: "${after}"
first: 5
query: "") {
@ -136,6 +133,17 @@ const getArticleQuery = (slug: string) => {
article {
id
slug
highlights {
id
shortId
quote
prefix
suffix
patch
annotation
sharedAt
createdAt
}
}
}
... on ArticleError {
@ -146,6 +154,37 @@ const getArticleQuery = (slug: string) => {
`
}
const searchQuery = (keyword = '') => {
return `
query {
search(
after: ""
first: 5
query: "${keyword}") {
... on SearchSuccess {
edges {
cursor
node {
id
url
}
}
pageInfo {
hasNextPage
hasPreviousPage
startCursor
endCursor
totalCount
}
}
... on SearchError {
errorCodes
}
}
}
`
}
const savePageQuery = (url: string, title: string, originalContent: string) => {
return `
mutation {
@ -302,6 +341,7 @@ describe('Article API', () => {
describe('GetArticle', () => {
const realSlug = 'testing-is-really-fun-with-omnivore'
let query = ''
let slug = ''
let pageId: string | undefined
@ -321,6 +361,15 @@ describe('Article API', () => {
readingProgressAnchorIndex: 0,
url: 'https://blog.omnivore.app/test-with-omnivore',
savedAt: new Date(),
highlights: [
{
id: 'test id',
shortId: 'test short id',
createdAt: new Date(),
patch: 'test patch',
quote: 'test quote',
},
],
} as Page
pageId = await createPage(page, ctx)
})
@ -335,19 +384,25 @@ describe('Article API', () => {
query = getArticleQuery(slug)
})
context('when article exists', () => {
context('when page exists', () => {
before(() => {
slug = realSlug
})
it('should return the article', async () => {
it('should return the page', async () => {
const res = await graphqlRequest(query, authToken).expect(200)
expect(res.body.data.article.article.slug).to.eql(slug)
})
it('should return highlights', async () => {
const res = await graphqlRequest(query, authToken).expect(200)
expect(res.body.data.article.article.highlights).to.length(1)
})
})
context('when article does not exist', () => {
context('when page does not exist', () => {
before(() => {
slug = 'not-a-real-slug'
})
@ -418,11 +473,6 @@ describe('Article API', () => {
})
context('when there are pages with labels', () => {
before(() => {
// get the last page
after = '14'
})
it('should return labels', async () => {
const res = await graphqlRequest(query, authToken).expect(200)
@ -437,15 +487,15 @@ describe('Article API', () => {
after = ''
})
it('should return the first five items', async () => {
it('should return the first five items in desc order', async () => {
const res = await graphqlRequest(query, authToken).expect(200)
expect(res.body.data.articles.edges.length).to.eql(5)
expect(res.body.data.articles.edges[0].node.id).to.eql(pages[0].id)
expect(res.body.data.articles.edges[1].node.id).to.eql(pages[1].id)
expect(res.body.data.articles.edges[2].node.id).to.eql(pages[2].id)
expect(res.body.data.articles.edges[3].node.id).to.eql(pages[3].id)
expect(res.body.data.articles.edges[4].node.id).to.eql(pages[4].id)
expect(res.body.data.articles.edges[0].node.id).to.eql(pages[14].id)
expect(res.body.data.articles.edges[1].node.id).to.eql(pages[13].id)
expect(res.body.data.articles.edges[2].node.id).to.eql(pages[12].id)
expect(res.body.data.articles.edges[3].node.id).to.eql(pages[11].id)
expect(res.body.data.articles.edges[4].node.id).to.eql(pages[10].id)
})
it('should set the pageInfo', async () => {
@ -471,11 +521,11 @@ describe('Article API', () => {
const res = await graphqlRequest(query, authToken).expect(200)
expect(res.body.data.articles.edges.length).to.eql(5)
expect(res.body.data.articles.edges[0].node.id).to.eql(pages[5].id)
expect(res.body.data.articles.edges[1].node.id).to.eql(pages[6].id)
expect(res.body.data.articles.edges[0].node.id).to.eql(pages[9].id)
expect(res.body.data.articles.edges[1].node.id).to.eql(pages[8].id)
expect(res.body.data.articles.edges[2].node.id).to.eql(pages[7].id)
expect(res.body.data.articles.edges[3].node.id).to.eql(pages[8].id)
expect(res.body.data.articles.edges[4].node.id).to.eql(pages[9].id)
expect(res.body.data.articles.edges[3].node.id).to.eql(pages[6].id)
expect(res.body.data.articles.edges[4].node.id).to.eql(pages[5].id)
})
it('should set the pageInfo', async () => {
@ -532,7 +582,7 @@ describe('Article API', () => {
// set a slight delay to make sure the page is updated
setTimeout(async () => {
let allLinks = await graphqlRequest(
articlesQuery('', 'DESCENDING'),
articlesQuery(''),
authToken
).expect(200)
const justSavedId = allLinks.body.data.articles.edges[0].node.id
@ -541,10 +591,9 @@ describe('Article API', () => {
// test the negative case, ensuring the archive link wasn't returned
setTimeout(async () => {
allLinks = await graphqlRequest(
articlesQuery('', 'DESCENDING'),
authToken
).expect(200)
allLinks = await graphqlRequest(articlesQuery(''), authToken).expect(
200
)
expect(allLinks.body.data.articles.edges[0].node.url).to.not.eq(url)
}, 100)
@ -555,10 +604,9 @@ describe('Article API', () => {
).expect(200)
setTimeout(async () => {
allLinks = await graphqlRequest(
articlesQuery('', 'DESCENDING'),
authToken
).expect(200)
allLinks = await graphqlRequest(articlesQuery(''), authToken).expect(
200
)
expect(allLinks.body.data.articles.edges[0].node.url).to.eq(url)
}, 100)
})
@ -729,4 +777,91 @@ describe('Article API', () => {
})
})
})
describe('Search API', () => {
const url = 'https://blog.omnivore.app/p/getting-started-with-omnivore'
const pages: Page[] = []
const highlights: Highlight[] = []
let query = ''
let keyword = ''
before(async () => {
// Create some test pages
for (let i = 0; i < 5; i++) {
const page: Page = {
id: '',
hash: 'test hash',
userId: user.id,
pageType: PageType.Article,
title: 'test title',
content: '<p>search page</p>',
slug: 'test slug',
createdAt: new Date(),
updatedAt: new Date(),
readingProgressPercent: 0,
readingProgressAnchorIndex: 0,
url: url,
savedAt: new Date(),
}
const pageId = await createPage(page, ctx)
if (!pageId) {
expect.fail('Failed to create page')
}
page.id = pageId
pages.push(page)
// Create some test highlights
const highlight: Highlight = {
id: `highlight-${i}`,
patch: 'test patch',
shortId: 'test shortId',
userId: user.id,
quote: '<p>search highlight</p>',
createdAt: new Date(),
updatedAt: new Date(),
}
await addHighlightToPage(pageId, highlight, ctx)
highlights.push(highlight)
}
})
beforeEach(async () => {
query = searchQuery(keyword)
})
context('when type:highlights is not in the query', () => {
before(() => {
keyword = 'search'
})
it('should return pages in descending order', async () => {
const res = await graphqlRequest(query, authToken).expect(200)
expect(res.body.data.search.edges.length).to.eql(5)
expect(res.body.data.search.edges[0].node.id).to.eq(pages[4].id)
expect(res.body.data.search.edges[1].node.id).to.eq(pages[3].id)
expect(res.body.data.search.edges[2].node.id).to.eq(pages[2].id)
expect(res.body.data.search.edges[3].node.id).to.eq(pages[1].id)
expect(res.body.data.search.edges[4].node.id).to.eq(pages[0].id)
})
})
context('when type:highlights is in the query', () => {
before(() => {
keyword = 'search type:highlights'
})
it('should return highlights in descending order', async () => {
const res = await graphqlRequest(query, authToken).expect(200)
expect(res.body.data.search.edges.length).to.eq(5)
expect(res.body.data.search.edges[0].node.id).to.eq(highlights[4].id)
expect(res.body.data.search.edges[1].node.id).to.eq(highlights[3].id)
expect(res.body.data.search.edges[2].node.id).to.eq(highlights[2].id)
expect(res.body.data.search.edges[3].node.id).to.eq(highlights[1].id)
expect(res.body.data.search.edges[4].node.id).to.eq(highlights[0].id)
})
})
})
})

View File

@ -10,9 +10,9 @@ import { expect } from 'chai'
import 'mocha'
import { User } from '../../src/entity/user'
import chaiString from 'chai-string'
import { deletePage } from '../../src/elastic'
import { createPubSubClient } from '../../src/datalayer/pubsub'
import { PageContext } from '../../src/elastic/types'
import { deletePage } from '../../src/elastic/pages'
chai.use(chaiString)

View File

@ -10,8 +10,8 @@ import { expect } from 'chai'
import 'mocha'
import { User } from '../../src/entity/user'
import { Page } from '../../src/elastic/types'
import { getPageById } from '../../src/elastic'
import { getRepository } from '../../src/entity/utils'
import { getPageById } from '../../src/elastic/pages'
describe('Labels API', () => {
const username = 'fakeUser'

View File

@ -8,7 +8,7 @@ import { User } from '../../src/entity/user'
import 'mocha'
import * as jwt from 'jsonwebtoken'
import { expect } from 'chai'
import { getPageById } from '../../src/elastic'
import { getPageById } from '../../src/elastic/pages'
describe('PDF attachments Router', () => {
const username = 'fakeUser'

View File

@ -4,7 +4,7 @@ import 'chai/register-should'
import { createTestUser, deleteTestUser } from '../db'
import { SaveContext, saveEmail } from '../../src/services/save_email'
import { createPubSubClient } from '../../src/datalayer/pubsub'
import { getPageByParam } from '../../src/elastic'
import { getPageByParam } from '../../src/elastic/pages'
describe('saveEmail', () => {
const username = 'fakeUser'

View File

@ -4,11 +4,11 @@ import 'chai/register-should'
import { createTestUser, deleteTestUser } from '../db'
import { createNewsletterEmail } from '../../src/services/newsletters'
import { saveNewsletterEmail } from '../../src/services/save_newsletter_email'
import { getPageByParam } from '../../src/elastic'
import { User } from '../../src/entity/user'
import { NewsletterEmail } from '../../src/entity/newsletter_email'
import { SaveContext } from '../../src/services/save_email'
import { createPubSubClient } from '../../src/datalayer/pubsub'
import { getPageByParam } from '../../src/elastic/pages'
describe('saveNewsletterEmail', () => {
const username = 'fakeUser'

View File

@ -4,18 +4,22 @@ import { v4 } from 'uuid'
import { corsConfig } from '../src/utils/corsConfig'
import { Page } from '../src/elastic/types'
import { PageType } from '../src/generated/graphql'
import { createPage, getPageById } from '../src/elastic'
import { User } from '../src/entity/user'
import { Label } from '../src/entity/label'
import { createPubSubClient } from '../src/datalayer/pubsub'
import { createPage, getPageById } from '../src/elastic/pages'
const { app, apollo } = createApp()
export const request = supertest(app)
before(async () => {
export const startApolloServer = async () => {
await apollo.start()
apollo.applyMiddleware({ app, path: '/api/graphql', cors: corsConfig })
})
}
export const stopApolloServer = async () => {
await apollo.stop()
}
export const graphqlRequest = (
query: string,