Add the link domain to the search index

This adds the domain of the saved link to the search index in
two formats, the full domain like www.omnivore.app and just
the secondary domain like `omnivore`.

This also adds some ranking to search, making title, description,
and domain more highly ranked than the content but search results
wont change as we still order by saved_at.
This commit is contained in:
Jackson Harper
2022-02-24 19:18:37 -08:00
parent 01d96c98db
commit df3a0e1640
2 changed files with 51 additions and 0 deletions

View File

@ -0,0 +1,35 @@
-- Type: DO
-- Name: add_domain_to_search_index
-- Description: Add the site's domain to the search index
BEGIN;
CREATE OR REPLACE FUNCTION update_page_tsv() RETURNS trigger AS $$
begin
new.tsv :=
setweight(to_tsvector('pg_catalog.english', coalesce(new.title, '')), 'A') ||
setweight(to_tsvector('pg_catalog.english', coalesce(new.author, '')), 'A') ||
setweight(to_tsvector('pg_catalog.english', coalesce(new.description,'')), 'A') ||
setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.url, '^((http[s]?):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\3'), '')), 'A') ||
setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.url, '^((http[s]?):\/)?\/?(.*\.)?([^:\/\s]+)(\..*)((\/+)*\/)?([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\4'), '')), 'A') ||
setweight(to_tsvector('pg_catalog.english', coalesce(new.content,'')), 'B');
return new;
end
$$ LANGUAGE plpgsql;
CREATE OR REPLACE TRIGGER article_tsv_update BEFORE INSERT OR UPDATE
ON omnivore.pages FOR EACH ROW EXECUTE PROCEDURE update_page_tsv();
-- rename to page* since we aren't using Article naming anymore
ALTER TRIGGER article_tsv_update ON omnivore.pages RENAME TO page_tsv_update;
COMMIT;
BEGIN;
-- This will force all the text vectors to be
-- recreated.
-- We need to do it in a separate transaction
-- block though, otherwise the trigger wont be
-- executed on update.
UPDATE omnivore.pages SET updated_at = NOW();
COMMIT;

View File

@ -0,0 +1,16 @@
-- Type: UNDO
-- Name: add_domain_to_search_index
-- Description: Add the site's domain to the search index
BEGIN;
DROP TRIGGER IF EXISTS page_tsv_update ON omnivore.pages;
DROP FUNCTION IF EXISTS update_page_tsv();
CREATE OR REPLACE TRIGGER article_tsv_update BEFORE INSERT OR UPDATE
ON omnivore.pages FOR EACH ROW EXECUTE PROCEDURE
tsvector_update_trigger(
tsv, 'pg_catalog.english', content, title, description
);
COMMIT;