Merge pull request #3027 from omnivore-app/fix/site-search

fix: site scoped search not working for domain and hostname
This commit is contained in:
Hongbo Wu
2023-10-30 11:08:26 +08:00
committed by GitHub
4 changed files with 143 additions and 2 deletions

View File

@ -338,7 +338,7 @@ const parseFieldFilter = (
}
}
const parseIds = (field: string, str?: string): string[] | undefined => {
const parseIds = (str?: string): string[] | undefined => {
if (str === undefined) {
return undefined
}
@ -500,7 +500,7 @@ export const parseSearchQuery = (query: string | undefined): SearchFilter => {
break
}
case 'includes': {
const ids = parseIds(keyword.keyword, keyword.value)
const ids = parseIds(keyword.value)
ids && result.ids.push(...ids)
break
}

View File

@ -1568,6 +1568,85 @@ describe('Article API', () => {
).to.eq(group.name)
})
})
context('when site:youtube.com is in the query', () => {
let items: LibraryItem[] = []
before(async () => {
keyword = 'site:youtube.com'
// Create some test items
items = await createLibraryItems(
[
{
user,
title: 'test title 1',
readableContent: '<p>test 1</p>',
slug: 'test slug 1',
originalUrl: 'https://www.youtube.com/watch?v=Omnivore',
itemType: PageType.Video,
},
{
user,
title: 'test title 2',
readableContent: '<p>test 2</p>',
slug: 'test slug 2',
originalUrl: `${url}/test2`,
},
],
user.id
)
})
after(async () => {
await deleteLibraryItems(items, user.id)
})
it('returns youtube videos', async () => {
const res = await graphqlRequest(query, authToken).expect(200)
expect(res.body.data.search.pageInfo.totalCount).to.eq(1)
expect(res.body.data.search.edges[0].node.id).to.eq(items[0].id)
})
})
context('when site:wikipedia is in the query', () => {
let items: LibraryItem[] = []
before(async () => {
keyword = 'site:wikipedia'
// Create some test items
items = await createLibraryItems(
[
{
user,
title: 'test title 1',
readableContent: '<p>test 1</p>',
slug: 'test slug 1',
originalUrl: 'https://en.wikipedia.org/wiki/Omnivore',
},
{
user,
title: 'test title 2',
readableContent: '<p>test 2</p>',
slug: 'test slug 2',
originalUrl: `${url}/test2`,
},
],
user.id
)
})
after(async () => {
await deleteLibraryItems(items, user.id)
})
it('returns wikipedia pages', async () => {
const res = await graphqlRequest(query, authToken).expect(200)
expect(res.body.data.search.pageInfo.totalCount).to.eq(1)
expect(res.body.data.search.edges[0].node.id).to.eq(items[0].id)
})
})
})
describe('TypeaheadSearch API', () => {

View File

@ -0,0 +1,31 @@
-- Type: DO
-- Name: add_domain_to_site_name_tsv
-- Description: Convert domain to tsvector and add it to site_name_tsv column
BEGIN;
CREATE OR REPLACE FUNCTION update_library_item_tsv() RETURNS trigger AS $$
begin
new.content_tsv := to_tsvector('pg_catalog.english', coalesce(new.readable_content, ''));
new.site_name_tsv := to_tsvector('pg_catalog.english', coalesce(new.site_name, '')) ||
-- domain (eg omnivore.app)
to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?(.*\.)?(([^:\/\s]+)\.[^:\/\s]+)(.*)$', '\4'), '')) ||
-- secondary hostname (eg omnivore)
to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?(.*\.)?(([^:\/\s]+)\.[^:\/\s]+)(.*)$', '\5'), ''));
new.title_tsv := to_tsvector('pg_catalog.english', coalesce(new.title, ''));
new.author_tsv := to_tsvector('pg_catalog.english', coalesce(new.author, ''));
new.description_tsv := to_tsvector('pg_catalog.english', coalesce(new.description, ''));
-- note_tsv is generated by both note and highlight_annotations
new.note_tsv := to_tsvector('pg_catalog.english', coalesce(new.note, '') || ' ' || array_to_string(new.highlight_annotations, ' '));
new.search_tsv :=
setweight(new.title_tsv, 'A') ||
setweight(new.author_tsv, 'A') ||
setweight(new.site_name_tsv, 'A') ||
setweight(new.description_tsv, 'A') ||
setweight(new.note_tsv, 'A') ||
setweight(new.content_tsv, 'B');
return new;
end
$$ LANGUAGE plpgsql;
COMMIT;

View File

@ -0,0 +1,31 @@
-- Type: UNDO
-- Name: add_domain_to_site_name_tsv
-- Description: Convert domain to tsvector and add it to site_name_tsv column
BEGIN;
CREATE OR REPLACE FUNCTION update_library_item_tsv() RETURNS trigger AS $$
begin
new.content_tsv := to_tsvector('pg_catalog.english', coalesce(new.readable_content, ''));
new.site_name_tsv := to_tsvector('pg_catalog.english', coalesce(new.site_name, ''));
new.title_tsv := to_tsvector('pg_catalog.english', coalesce(new.title, ''));
new.author_tsv := to_tsvector('pg_catalog.english', coalesce(new.author, ''));
new.description_tsv := to_tsvector('pg_catalog.english', coalesce(new.description, ''));
-- note_tsv is generated by both note and highlight_annotations
new.note_tsv := to_tsvector('pg_catalog.english', coalesce(new.note, '') || ' ' || array_to_string(new.highlight_annotations, ' '));
new.search_tsv :=
setweight(new.title_tsv, 'A') ||
setweight(new.author_tsv, 'A') ||
setweight(new.site_name_tsv, 'A') ||
setweight(new.description_tsv, 'A') ||
-- full hostname (eg www.omnivore.app)
setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\3'), '')), 'A') ||
-- secondary hostname (eg omnivore)
setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.original_url, '^((http[s]?):\/)?\/?(.*\.)?([^:\/\s]+)(\..*)((\/+)*\/)?([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\4'), '')), 'A') ||
setweight(new.note_tsv, 'A') ||
setweight(new.content_tsv, 'B');
return new;
end
$$ LANGUAGE plpgsql;
COMMIT;