From c4a599d2ba7803a77ee5468958a849e475ed3105 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 7 Jul 2022 17:35:57 +0800 Subject: [PATCH] support newsletters hosted on convertkit.com --- packages/api/src/utils/parser.ts | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/packages/api/src/utils/parser.ts b/packages/api/src/utils/parser.ts index cd22e7269..7704ac233 100644 --- a/packages/api/src/utils/parser.ts +++ b/packages/api/src/utils/parser.ts @@ -451,6 +451,14 @@ export const isProbablyNewsletter = async (html: string): Promise => { } } + // Check if this is a convertkit.com newsletter + if (dom.querySelectorAll('img[src*="convertkit-mail.com"]').length > 0) { + const convertkitUrl = convertkitNewsletterHref(dom) + if (convertkitUrl) { + return true + } + } + return false } @@ -465,6 +473,17 @@ const beehiivNewsletterHref = (dom: Document): string | undefined => { return res } +const convertkitNewsletterHref = (dom: Document): string | undefined => { + const readOnline = dom.querySelectorAll('table tr td div a') + let res: string | undefined = undefined + readOnline.forEach((e) => { + if (e.textContent === 'View this email in your browser') { + res = e.getAttribute('href') || undefined + } + }) + return res +} + const revueNewsletterHref = (dom: Document): string | undefined => { const viewOnline = dom.querySelectorAll('table tr td div a[target="_blank"]') let res: string | undefined = undefined @@ -495,6 +514,12 @@ const findNewsletterHeaderHref = (dom: Document): string | undefined => { return revue } + // Check if this is a convertkit.com newsletter + const convertkitUrl = convertkitNewsletterHref(dom) + if (convertkitUrl) { + return convertkitUrl + } + return undefined }