From 91952e587b0f1d197407ca665d5d978b5d816738 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 28 Sep 2022 16:06:35 +0800 Subject: [PATCH] Replace newsletter-handler with content-handler --- .../.eslintignore | 0 .../.eslintrc | 0 .../.gitignore | 0 .../.npmignore | 0 .../mocha-config.json | 0 .../package.json | 4 +- packages/content-handler/src/index.ts | 103 ++++++++++++++++++ .../test/babel-register.js | 0 .../test/stub.test.ts | 0 .../tsconfig.json | 2 +- packages/newsletter-handler/index.js | 0 packages/newsletter-handler/src/index.ts | 0 12 files changed, 106 insertions(+), 3 deletions(-) rename packages/{newsletter-handler => content-handler}/.eslintignore (100%) rename packages/{newsletter-handler => content-handler}/.eslintrc (100%) rename packages/{newsletter-handler => content-handler}/.gitignore (100%) rename packages/{newsletter-handler => content-handler}/.npmignore (100%) rename packages/{newsletter-handler => content-handler}/mocha-config.json (100%) rename packages/{newsletter-handler => content-handler}/package.json (73%) create mode 100644 packages/content-handler/src/index.ts rename packages/{newsletter-handler => content-handler}/test/babel-register.js (100%) rename packages/{newsletter-handler => content-handler}/test/stub.test.ts (100%) rename packages/{newsletter-handler => content-handler}/tsconfig.json (85%) delete mode 100644 packages/newsletter-handler/index.js delete mode 100644 packages/newsletter-handler/src/index.ts diff --git a/packages/newsletter-handler/.eslintignore b/packages/content-handler/.eslintignore similarity index 100% rename from packages/newsletter-handler/.eslintignore rename to packages/content-handler/.eslintignore diff --git a/packages/newsletter-handler/.eslintrc b/packages/content-handler/.eslintrc similarity index 100% rename from packages/newsletter-handler/.eslintrc rename to packages/content-handler/.eslintrc diff --git a/packages/newsletter-handler/.gitignore b/packages/content-handler/.gitignore similarity index 100% rename from packages/newsletter-handler/.gitignore rename to packages/content-handler/.gitignore diff --git a/packages/newsletter-handler/.npmignore b/packages/content-handler/.npmignore similarity index 100% rename from packages/newsletter-handler/.npmignore rename to packages/content-handler/.npmignore diff --git a/packages/newsletter-handler/mocha-config.json b/packages/content-handler/mocha-config.json similarity index 100% rename from packages/newsletter-handler/mocha-config.json rename to packages/content-handler/mocha-config.json diff --git a/packages/newsletter-handler/package.json b/packages/content-handler/package.json similarity index 73% rename from packages/newsletter-handler/package.json rename to packages/content-handler/package.json index c998a2d58..5cbda3c86 100644 --- a/packages/newsletter-handler/package.json +++ b/packages/content-handler/package.json @@ -1,7 +1,7 @@ { - "name": "@omnivore/newsletter-handler", + "name": "@omnivore/content-handler", "version": "1.0.0", - "description": "A standalone version of newsletter handler to parse and format each newsletter", + "description": "A standalone version of content handler to parse and format each type of content", "main": "build/src/index.js", "types": "build/src/index.d.ts", "files": [ diff --git a/packages/content-handler/src/index.ts b/packages/content-handler/src/index.ts new file mode 100644 index 000000000..24f3e874e --- /dev/null +++ b/packages/content-handler/src/index.ts @@ -0,0 +1,103 @@ +import addressparser from 'addressparser' +import { v4 as uuidv4 } from 'uuid' +import rfc2047 from 'rfc2047' + +interface Unsubscribe { + mailTo?: string + httpUrl?: string +} + +interface NewsletterMessage { + email: string + content: string + url: string + title: string + author: string + unsubMailTo?: string + unsubHttpUrl?: string +} + +export class ContentHandler { + protected senderRegex = /NEWSLETTER_SENDER_REGEX/ + protected urlRegex = /NEWSLETTER_URL_REGEX/ + protected defaultUrl = 'NEWSLETTER_DEFAULT_URL' + protected name = '' + + shouldPrehandle(url: URL, dom: Document): boolean { + return false + } + + prehandle(url: URL, document: Document): Promise { + return Promise.resolve(document) + } + + isNewsletter(postHeader: string, from: string, unSubHeader: string): boolean { + // Axios newsletter is from + const re = new RegExp(this.senderRegex) + return re.test(from) && (!!postHeader || !!unSubHeader) + } + + parseNewsletterUrl(_postHeader: string, html: string): string | undefined { + // get newsletter url from html + const matches = html.match(this.urlRegex) + if (matches) { + return matches[1] + } + return undefined + } + + parseAuthor(from: string): string { + // get author name from email + // e.g. 'Jackson Harper from Omnivore App ' + // or 'Mike Allen ' + const parsed = addressparser(from) + if (parsed.length > 0) { + return parsed[0].name + } + return from + } + + parseUnsubscribe(unSubHeader: string): Unsubscribe { + // parse list-unsubscribe header + // e.g. List-Unsubscribe: , + const decoded = rfc2047.decode(unSubHeader) + return { + mailTo: decoded.match(/<(https?:\/\/[^>]*)>/)?.[1], + httpUrl: decoded.match(/]*)>/)?.[1], + } + } + + async handleNewsletter( + email: string, + html: string, + postHeader: string, + title: string, + from: string, + unSubHeader: string + ): Promise { + console.log('handleNewsletter', email, postHeader, title, from) + + if (!email || !html || !title || !from) { + console.log('invalid newsletter email') + throw new Error('invalid newsletter email') + } + + // fallback to default url if newsletter url does not exist + // assign a random uuid to the default url to avoid duplicate url + const url = + this.parseNewsletterUrl(postHeader, html) || + `${this.defaultUrl}?source=newsletters&id=${uuidv4()}` + const author = this.parseAuthor(from) + const unsubscribe = this.parseUnsubscribe(unSubHeader) + + return { + email, + content: html, + url, + title, + author, + unsubMailTo: unsubscribe.mailTo || '', + unsubHttpUrl: unsubscribe.httpUrl || '', + } + } +} diff --git a/packages/newsletter-handler/test/babel-register.js b/packages/content-handler/test/babel-register.js similarity index 100% rename from packages/newsletter-handler/test/babel-register.js rename to packages/content-handler/test/babel-register.js diff --git a/packages/newsletter-handler/test/stub.test.ts b/packages/content-handler/test/stub.test.ts similarity index 100% rename from packages/newsletter-handler/test/stub.test.ts rename to packages/content-handler/test/stub.test.ts diff --git a/packages/newsletter-handler/tsconfig.json b/packages/content-handler/tsconfig.json similarity index 85% rename from packages/newsletter-handler/tsconfig.json rename to packages/content-handler/tsconfig.json index a6b8377a0..aeb8d2c3a 100644 --- a/packages/newsletter-handler/tsconfig.json +++ b/packages/content-handler/tsconfig.json @@ -6,5 +6,5 @@ "outDir": "build", "lib": ["dom"] }, - "include": ["src", "test"] + "include": ["src"] } diff --git a/packages/newsletter-handler/index.js b/packages/newsletter-handler/index.js deleted file mode 100644 index e69de29bb..000000000 diff --git a/packages/newsletter-handler/src/index.ts b/packages/newsletter-handler/src/index.ts deleted file mode 100644 index e69de29bb..000000000