From df3a0e1640fd522915debd986f230c71d849a784 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Thu, 24 Feb 2022 19:18:37 -0800 Subject: [PATCH] Add the link domain to the search index This adds the domain of the saved link to the search index in two formats, the full domain like www.omnivore.app and just the secondary domain like `omnivore`. This also adds some ranking to search, making title, description, and domain more highly ranked than the content but search results wont change as we still order by saved_at. --- .../0071.do.add_domain_to_search_index.sql | 35 +++++++++++++++++++ .../0071.undo.add_domain_to_search_index.sql | 16 +++++++++ 2 files changed, 51 insertions(+) create mode 100755 packages/db/migrations/0071.do.add_domain_to_search_index.sql create mode 100755 packages/db/migrations/0071.undo.add_domain_to_search_index.sql diff --git a/packages/db/migrations/0071.do.add_domain_to_search_index.sql b/packages/db/migrations/0071.do.add_domain_to_search_index.sql new file mode 100755 index 000000000..84c2d9be2 --- /dev/null +++ b/packages/db/migrations/0071.do.add_domain_to_search_index.sql @@ -0,0 +1,35 @@ +-- Type: DO +-- Name: add_domain_to_search_index +-- Description: Add the site's domain to the search index + +BEGIN; + +CREATE OR REPLACE FUNCTION update_page_tsv() RETURNS trigger AS $$ +begin + new.tsv := + setweight(to_tsvector('pg_catalog.english', coalesce(new.title, '')), 'A') || + setweight(to_tsvector('pg_catalog.english', coalesce(new.author, '')), 'A') || + setweight(to_tsvector('pg_catalog.english', coalesce(new.description,'')), 'A') || + setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.url, '^((http[s]?):\/)?\/?([^:\/\s]+)((\/\w+)*\/)([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\3'), '')), 'A') || + setweight(to_tsvector('pg_catalog.english', coalesce(regexp_replace(new.url, '^((http[s]?):\/)?\/?(.*\.)?([^:\/\s]+)(\..*)((\/+)*\/)?([\w\-\.]+[^#?\s]+)(.*)?(#[\w\-]+)?$', '\4'), '')), 'A') || + setweight(to_tsvector('pg_catalog.english', coalesce(new.content,'')), 'B'); + return new; +end +$$ LANGUAGE plpgsql; + +CREATE OR REPLACE TRIGGER article_tsv_update BEFORE INSERT OR UPDATE + ON omnivore.pages FOR EACH ROW EXECUTE PROCEDURE update_page_tsv(); + +-- rename to page* since we aren't using Article naming anymore +ALTER TRIGGER article_tsv_update ON omnivore.pages RENAME TO page_tsv_update; + +COMMIT; + +BEGIN; +-- This will force all the text vectors to be +-- recreated. +-- We need to do it in a separate transaction +-- block though, otherwise the trigger wont be +-- executed on update. +UPDATE omnivore.pages SET updated_at = NOW(); +COMMIT; \ No newline at end of file diff --git a/packages/db/migrations/0071.undo.add_domain_to_search_index.sql b/packages/db/migrations/0071.undo.add_domain_to_search_index.sql new file mode 100755 index 000000000..e79d9e19f --- /dev/null +++ b/packages/db/migrations/0071.undo.add_domain_to_search_index.sql @@ -0,0 +1,16 @@ +-- Type: UNDO +-- Name: add_domain_to_search_index +-- Description: Add the site's domain to the search index + +BEGIN; + +DROP TRIGGER IF EXISTS page_tsv_update ON omnivore.pages; +DROP FUNCTION IF EXISTS update_page_tsv(); + +CREATE OR REPLACE TRIGGER article_tsv_update BEFORE INSERT OR UPDATE + ON omnivore.pages FOR EACH ROW EXECUTE PROCEDURE + tsvector_update_trigger( + tsv, 'pg_catalog.english', content, title, description +); + +COMMIT;