From d480d531514cbf3d0d86109048ec3f601edf23a0 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 26 Oct 2023 22:10:35 +0800 Subject: [PATCH 1/2] fix: site scoped search not working for domain and hostname * alter site_name_tsv to include hostname and domain * add test cases --- packages/api/src/utils/search.ts | 4 +- packages/api/test/resolvers/article.test.ts | 41 +++++++++++++++++++ .../0143.do.add_domain_to_site_name_tsv.sql | 31 ++++++++++++++ .../0143.undo.add_domain_to_site_name_tsv.sql | 31 ++++++++++++++ 4 files changed, 105 insertions(+), 2 deletions(-) create mode 100755 packages/db/migrations/0143.do.add_domain_to_site_name_tsv.sql create mode 100755 packages/db/migrations/0143.undo.add_domain_to_site_name_tsv.sql diff --git a/packages/api/src/utils/search.ts b/packages/api/src/utils/search.ts index 96abd859b..39d9975d0 100644 --- a/packages/api/src/utils/search.ts +++ b/packages/api/src/utils/search.ts @@ -338,7 +338,7 @@ const parseFieldFilter = ( } } -const parseIds = (field: string, str?: string): string[] | undefined => { +const parseIds = (str?: string): string[] | undefined => { if (str === undefined) { return undefined } @@ -500,7 +500,7 @@ export const parseSearchQuery = (query: string | undefined): SearchFilter => { break } case 'includes': { - const ids = parseIds(keyword.keyword, keyword.value) + const ids = parseIds(keyword.value) ids && result.ids.push(...ids) break } diff --git a/packages/api/test/resolvers/article.test.ts b/packages/api/test/resolvers/article.test.ts index 3b6e80057..f3fdafd2e 100644 --- a/packages/api/test/resolvers/article.test.ts +++ b/packages/api/test/resolvers/article.test.ts @@ -1568,6 +1568,47 @@ describe('Article API', () => { ).to.eq(group.name) }) }) + + context('when site:youtube.com is in the query', () => { + let items: LibraryItem[] = [] + + before(async () => { + keyword = 'site:youtube.com' + // Create some test items + items = await createLibraryItems( + [ + { + user, + title: 'test title 1', + readableContent: '

test 1

', + slug: 'test slug 1', + originalUrl: + 'https://www.youtube.com/watch?v=Omnivore', + itemType: PageType.Video, + }, + { + user, + title: 'test title 2', + readableContent: '

test 2

', + slug: 'test slug 2', + originalUrl: `${url}/test2`, + }, + ], + user.id + ) + }) + + after(async () => { + await deleteLibraryItems(items, user.id) + }) + + it('returns youtube videos', async () => { + const res = await graphqlRequest(query, authToken).expect(200) + + expect(res.body.data.search.pageInfo.totalCount).to.eq(1) + expect(res.body.data.search.edges[0].node.id).to.eq(items[0].id) + }) + }) }) describe('TypeaheadSearch API', () => { diff --git a/packages/db/migrations/0143.do.add_domain_to_site_name_tsv.sql b/packages/db/migrations/0143.do.add_domain_to_site_name_tsv.sql new file mode 100755 index 000000000..e55a17615 --- /dev/null +++ b/packages/db/migrations/0143.do.add_domain_to_site_name_tsv.sql @@ -0,0 +1,31 @@ +-- Type: DO +-- Name: add_domain_to_site_name_tsv +-- Description: Convert domain to tsvector and add it to site_name_tsv column + +BEGIN; + +CREATE OR REPLACE FUNCTION update_library_item_tsv() RETURNS trigger AS $$ +begin + new.content_tsv := to_tsvector('pg_catalog.english', coalesce(new.readable_content, '')); + new.site_name_tsv := to_tsvector('pg_catalog.english', coalesce(new.site_name, '')) || + -- domain (eg omnivore.app) + to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?(.*\.)?(([^:\/\s]+)\.[^:\/\s]+)(.*)$', '\4'), '')) || + -- secondary hostname (eg omnivore) + to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?(.*\.)?(([^:\/\s]+)\.[^:\/\s]+)(.*)$', '\5'), '')); + new.title_tsv := to_tsvector('pg_catalog.english', coalesce(new.title, '')); + new.author_tsv := to_tsvector('pg_catalog.english', coalesce(new.author, '')); + new.description_tsv := to_tsvector('pg_catalog.english', coalesce(new.description, '')); + -- note_tsv is generated by both note and highlight_annotations + new.note_tsv := to_tsvector('pg_catalog.english', coalesce(new.note, '') || ' ' || array_to_string(new.highlight_annotations, ' ')); + new.search_tsv := + setweight(new.title_tsv, 'A') || + setweight(new.author_tsv, 'A') || + setweight(new.site_name_tsv, 'A') || + setweight(new.description_tsv, 'A') || + setweight(new.note_tsv, 'A') || + setweight(new.content_tsv, 'B'); + return new; +end +$$ LANGUAGE plpgsql; + +COMMIT; diff --git a/packages/db/migrations/0143.undo.add_domain_to_site_name_tsv.sql b/packages/db/migrations/0143.undo.add_domain_to_site_name_tsv.sql new file mode 100755 index 000000000..8af1159bb --- /dev/null +++ b/packages/db/migrations/0143.undo.add_domain_to_site_name_tsv.sql @@ -0,0 +1,31 @@ +-- Type: UNDO +-- Name: add_domain_to_site_name_tsv +-- Description: Convert domain to tsvector and add it to site_name_tsv column + +BEGIN; + +CREATE OR REPLACE FUNCTION update_library_item_tsv() RETURNS trigger AS $$ +begin + new.content_tsv := to_tsvector('pg_catalog.english', coalesce(new.readable_content, '')); + new.site_name_tsv := to_tsvector('pg_catalog.english', coalesce(new.site_name, '')); + new.title_tsv := to_tsvector('pg_catalog.english', coalesce(new.title, '')); + new.author_tsv := to_tsvector('pg_catalog.english', coalesce(new.author, '')); + new.description_tsv := to_tsvector('pg_catalog.english', coalesce(new.description, '')); + -- note_tsv is generated by both note and highlight_annotations + new.note_tsv := to_tsvector('pg_catalog.english', coalesce(new.note, '') || ' ' || array_to_string(new.highlight_annotations, ' ')); + new.search_tsv := + setweight(new.title_tsv, 'A') || + setweight(new.author_tsv, 'A') || + setweight(new.site_name_tsv, 'A') || + setweight(new.description_tsv, 'A') || + -- full hostname (eg www.omnivore.app) + setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\3'), '')), 'A') || + -- secondary hostname (eg omnivore) + setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?(.*\.)?([^:\/\s]+)(\..*)((\/+)*\/)?([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\4'), '')), 'A') || + setweight(new.note_tsv, 'A') || + setweight(new.content_tsv, 'B'); + return new; +end +$$ LANGUAGE plpgsql; + +COMMIT; From 67bfca726b1e117191d95ea7b794a8ae1b069b14 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 26 Oct 2023 22:15:33 +0800 Subject: [PATCH 2/2] add test cases --- packages/api/test/resolvers/article.test.ts | 42 ++++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/packages/api/test/resolvers/article.test.ts b/packages/api/test/resolvers/article.test.ts index f3fdafd2e..0763a1a49 100644 --- a/packages/api/test/resolvers/article.test.ts +++ b/packages/api/test/resolvers/article.test.ts @@ -1582,8 +1582,7 @@ describe('Article API', () => { title: 'test title 1', readableContent: '

test 1

', slug: 'test slug 1', - originalUrl: - 'https://www.youtube.com/watch?v=Omnivore', + originalUrl: 'https://www.youtube.com/watch?v=Omnivore', itemType: PageType.Video, }, { @@ -1609,6 +1608,45 @@ describe('Article API', () => { expect(res.body.data.search.edges[0].node.id).to.eq(items[0].id) }) }) + + context('when site:wikipedia is in the query', () => { + let items: LibraryItem[] = [] + + before(async () => { + keyword = 'site:wikipedia' + // Create some test items + items = await createLibraryItems( + [ + { + user, + title: 'test title 1', + readableContent: '

test 1

', + slug: 'test slug 1', + originalUrl: 'https://en.wikipedia.org/wiki/Omnivore', + }, + { + user, + title: 'test title 2', + readableContent: '

test 2

', + slug: 'test slug 2', + originalUrl: `${url}/test2`, + }, + ], + user.id + ) + }) + + after(async () => { + await deleteLibraryItems(items, user.id) + }) + + it('returns wikipedia pages', async () => { + const res = await graphqlRequest(query, authToken).expect(200) + + expect(res.body.data.search.pageInfo.totalCount).to.eq(1) + expect(res.body.data.search.edges[0].node.id).to.eq(items[0].id) + }) + }) }) describe('TypeaheadSearch API', () => {