Merge pull request #3027 from omnivore-app/fix/site-search

fix: site scoped search not working for domain and hostname
2023-10-30 11:08:26 +08:00
parent 4f0b7df33d 67bfca726b
commit 6bdc7f01e1
4 changed files with 143 additions and 2 deletions
--- a/packages/api/src/utils/search.ts
+++ b/packages/api/src/utils/search.ts
@ -338,7 +338,7 @@ const parseFieldFilter = (
  }
 }

-const parseIds = (field: string, str?: string): string[] | undefined => {
+const parseIds = (str?: string): string[] | undefined => {
  if (str === undefined) {
    return undefined
  }
@ -500,7 +500,7 @@ export const parseSearchQuery = (query: string | undefined): SearchFilter => {
          break
        }
        case 'includes': {
-          const ids = parseIds(keyword.keyword, keyword.value)
+          const ids = parseIds(keyword.value)
          ids && result.ids.push(...ids)
          break
        }
--- a/packages/api/test/resolvers/article.test.ts
+++ b/packages/api/test/resolvers/article.test.ts
@ -1568,6 +1568,85 @@ describe('Article API', () => {
        ).to.eq(group.name)
      })
    })
+
+    context('when site:youtube.com is in the query', () => {
+      let items: LibraryItem[] = []
+
+      before(async () => {
+        keyword = 'site:youtube.com'
+        // Create some test items
+        items = await createLibraryItems(
+          [
+            {
+              user,
+              title: 'test title 1',
+              readableContent: '<p>test 1</p>',
+              slug: 'test slug 1',
+              originalUrl: 'https://www.youtube.com/watch?v=Omnivore',
+              itemType: PageType.Video,
+            },
+            {
+              user,
+              title: 'test title 2',
+              readableContent: '<p>test 2</p>',
+              slug: 'test slug 2',
+              originalUrl: `${url}/test2`,
+            },
+          ],
+          user.id
+        )
+      })
+
+      after(async () => {
+        await deleteLibraryItems(items, user.id)
+      })
+
+      it('returns youtube videos', async () => {
+        const res = await graphqlRequest(query, authToken).expect(200)
+
+        expect(res.body.data.search.pageInfo.totalCount).to.eq(1)
+        expect(res.body.data.search.edges[0].node.id).to.eq(items[0].id)
+      })
+    })
+
+    context('when site:wikipedia is in the query', () => {
+      let items: LibraryItem[] = []
+
+      before(async () => {
+        keyword = 'site:wikipedia'
+        // Create some test items
+        items = await createLibraryItems(
+          [
+            {
+              user,
+              title: 'test title 1',
+              readableContent: '<p>test 1</p>',
+              slug: 'test slug 1',
+              originalUrl: 'https://en.wikipedia.org/wiki/Omnivore',
+            },
+            {
+              user,
+              title: 'test title 2',
+              readableContent: '<p>test 2</p>',
+              slug: 'test slug 2',
+              originalUrl: `${url}/test2`,
+            },
+          ],
+          user.id
+        )
+      })
+
+      after(async () => {
+        await deleteLibraryItems(items, user.id)
+      })
+
+      it('returns wikipedia pages', async () => {
+        const res = await graphqlRequest(query, authToken).expect(200)
+
+        expect(res.body.data.search.pageInfo.totalCount).to.eq(1)
+        expect(res.body.data.search.edges[0].node.id).to.eq(items[0].id)
+      })
+    })
  })

  describe('TypeaheadSearch API', () => {
--- a/packages/db/migrations/0143.do.add_domain_to_site_name_tsv.sql
+++ b/packages/db/migrations/0143.do.add_domain_to_site_name_tsv.sql
@ -0,0 +1,31 @@
+-- Type: DO
+-- Name: add_domain_to_site_name_tsv
+-- Description: Convert domain to tsvector and add it to site_name_tsv column
+
+BEGIN;
+
+CREATE OR REPLACE FUNCTION update_library_item_tsv() RETURNS trigger AS $$
+begin
+    new.content_tsv := to_tsvector('pg_catalog.english', coalesce(new.readable_content, ''));
+    new.site_name_tsv := to_tsvector('pg_catalog.english', coalesce(new.site_name, '')) ||
+        -- domain (eg omnivore.app)
+        to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?(.*\.)?(([^:\/\s]+)\.[^:\/\s]+)(.*)$', '\4'), '')) ||
+        -- secondary hostname (eg omnivore)
+        to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?(.*\.)?(([^:\/\s]+)\.[^:\/\s]+)(.*)$', '\5'), ''));
+    new.title_tsv := to_tsvector('pg_catalog.english', coalesce(new.title, ''));
+    new.author_tsv := to_tsvector('pg_catalog.english', coalesce(new.author, ''));
+    new.description_tsv := to_tsvector('pg_catalog.english', coalesce(new.description, ''));
+    -- note_tsv is generated by both note and highlight_annotations
+    new.note_tsv := to_tsvector('pg_catalog.english', coalesce(new.note, '') || ' ' || array_to_string(new.highlight_annotations, ' '));
+    new.search_tsv := 
+        setweight(new.title_tsv, 'A') || 
+        setweight(new.author_tsv, 'A') || 
+        setweight(new.site_name_tsv, 'A') || 
+        setweight(new.description_tsv, 'A') || 
+        setweight(new.note_tsv, 'A') ||
+        setweight(new.content_tsv, 'B');
+    return new;
+end
+$$ LANGUAGE plpgsql;
+
+COMMIT;
--- a/packages/db/migrations/0143.undo.add_domain_to_site_name_tsv.sql
+++ b/packages/db/migrations/0143.undo.add_domain_to_site_name_tsv.sql
@ -0,0 +1,31 @@
+-- Type: UNDO
+-- Name: add_domain_to_site_name_tsv
+-- Description: Convert domain to tsvector and add it to site_name_tsv column
+
+BEGIN;
+
+CREATE OR REPLACE FUNCTION update_library_item_tsv() RETURNS trigger AS $$
+begin
+    new.content_tsv := to_tsvector('pg_catalog.english', coalesce(new.readable_content, ''));
+    new.site_name_tsv := to_tsvector('pg_catalog.english', coalesce(new.site_name, ''));
+    new.title_tsv := to_tsvector('pg_catalog.english', coalesce(new.title, ''));
+    new.author_tsv := to_tsvector('pg_catalog.english', coalesce(new.author, ''));
+    new.description_tsv := to_tsvector('pg_catalog.english', coalesce(new.description, ''));
+    -- note_tsv is generated by both note and highlight_annotations
+    new.note_tsv := to_tsvector('pg_catalog.english', coalesce(new.note, '') || ' ' || array_to_string(new.highlight_annotations, ' '));
+    new.search_tsv := 
+        setweight(new.title_tsv, 'A') || 
+        setweight(new.author_tsv, 'A') || 
+        setweight(new.site_name_tsv, 'A') || 
+        setweight(new.description_tsv, 'A') || 
+        -- full hostname (eg www.omnivore.app)
+        setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\3'), '')), 'A') || 
+        -- secondary hostname (eg omnivore)
+        setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?(.*\.)?([^:\/\s]+)(\..*)((\/+)*\/)?([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\4'), '')), 'A') ||
+        setweight(new.note_tsv, 'A') ||
+        setweight(new.content_tsv, 'B');
+    return new;
+end
+$$ LANGUAGE plpgsql;
+
+COMMIT;