From 88f6e20bfc4bb8833d6a03f604e13c5e6781036d Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 30 Jan 2023 18:52:38 +0800 Subject: [PATCH] Pass email headers to the content-handler --- .../content-handler/src/content-handler.ts | 39 ++++---- packages/content-handler/src/index.ts | 5 +- .../src/newsletters/beehiiv-handler.ts | 5 +- .../src/newsletters/convertkit-handler.ts | 5 +- .../src/newsletters/cooper-press-handler.ts | 5 +- .../src/newsletters/ghost-handler.ts | 5 +- .../src/newsletters/hey-world-handler.ts | 2 +- .../src/newsletters/revue-handler.ts | 5 +- .../src/newsletters/substack-handler.ts | 10 +- .../content-handler/test/newsletter.test.ts | 92 ++++++++----------- packages/inbound-email-handler/src/index.ts | 8 +- 11 files changed, 80 insertions(+), 101 deletions(-) diff --git a/packages/content-handler/src/content-handler.ts b/packages/content-handler/src/content-handler.ts index 83a5b7e35..ebdf81d3e 100644 --- a/packages/content-handler/src/content-handler.ts +++ b/packages/content-handler/src/content-handler.ts @@ -11,12 +11,11 @@ interface Unsubscribe { } export interface NewsletterInput { - postHeader: string from: string - unSubHeader: string - email: string + to: string + subject: string html: string - title: string + headers: Record } export interface NewsletterResult { @@ -76,15 +75,16 @@ export abstract class ContentHandler { } async isNewsletter(input: { - postHeader: string from: string - unSubHeader: string html: string + headers: Record dom: Document }): Promise { const re = new RegExp(this.senderRegex) + const postHeader = input.headers['list-post'] + const unSubHeader = input.headers['list-unsubscribe'] return Promise.resolve( - re.test(input.from) && (!!input.postHeader || !!input.unSubHeader) + re.test(input.from) && (!!postHeader || !!unSubHeader) ) } @@ -118,7 +118,7 @@ export abstract class ContentHandler { } async parseNewsletterUrl( - _postHeader: string, + headers: Record, html: string ): Promise { // get newsletter url from html @@ -151,16 +151,15 @@ export abstract class ContentHandler { } async handleNewsletter({ - email, - html, - postHeader, - title, from, - unSubHeader, + to, + subject, + html, + headers, }: NewsletterInput): Promise { - console.log('handleNewsletter', email, postHeader, title, from) + console.log('handleNewsletter', from, to, subject, headers, from) - if (!email || !html || !title || !from) { + if (!from || !html || !subject || !to) { console.log('invalid newsletter email') throw new Error('invalid newsletter email') } @@ -168,15 +167,17 @@ export abstract class ContentHandler { // fallback to default url if newsletter url does not exist // assign a random uuid to the default url to avoid duplicate url const url = - (await this.parseNewsletterUrl(postHeader, html)) || generateUniqueUrl() + (await this.parseNewsletterUrl(headers, html)) || generateUniqueUrl() const author = this.parseAuthor(from) - const unsubscribe = this.parseUnsubscribe(unSubHeader) + const unsubscribe = this.parseUnsubscribe( + headers['list-unsubscribe']?.toString() + ) return { - email, + email: to, content: html, url, - title, + title: subject, author, unsubMailTo: unsubscribe.mailTo || '', unsubHttpUrl: unsubscribe.httpUrl || '', diff --git a/packages/content-handler/src/index.ts b/packages/content-handler/src/index.ts index b04d997b9..fb6491f59 100644 --- a/packages/content-handler/src/index.ts +++ b/packages/content-handler/src/index.ts @@ -131,10 +131,9 @@ export const preParseContent = async ( } export const getNewsletterHandler = async (input: { - postHeader: string from: string - unSubHeader: string html: string + headers: Record }): Promise => { const dom = parseHTML(input.html).document for (const handler of newsletterHandlers) { @@ -151,7 +150,7 @@ export const handleNewsletter = async ( ): Promise => { const handler = await getNewsletterHandler(input) if (handler) { - console.log('handleNewsletter', handler.name, input.title) + console.log('handleNewsletter', handler.name, input.subject) return handler.handleNewsletter(input) } diff --git a/packages/content-handler/src/newsletters/beehiiv-handler.ts b/packages/content-handler/src/newsletters/beehiiv-handler.ts index 332e7a050..14702d437 100644 --- a/packages/content-handler/src/newsletters/beehiiv-handler.ts +++ b/packages/content-handler/src/newsletters/beehiiv-handler.ts @@ -18,9 +18,8 @@ export class BeehiivHandler extends ContentHandler { } async isNewsletter(input: { - postHeader: string from: string - unSubHeader: string + headers: Record dom: Document }): Promise { const dom = input.dom @@ -34,7 +33,7 @@ export class BeehiivHandler extends ContentHandler { } async parseNewsletterUrl( - postHeader: string, + headers: Record, html: string ): Promise { return this.findNewsletterUrl(html) diff --git a/packages/content-handler/src/newsletters/convertkit-handler.ts b/packages/content-handler/src/newsletters/convertkit-handler.ts index cf0b818b5..bfcad2c69 100644 --- a/packages/content-handler/src/newsletters/convertkit-handler.ts +++ b/packages/content-handler/src/newsletters/convertkit-handler.ts @@ -21,10 +21,9 @@ export class ConvertkitHandler extends ContentHandler { } async isNewsletter(input: { - postHeader: string from: string - unSubHeader: string dom: Document + headers: Record }): Promise { const dom = input.dom const icons = dom.querySelectorAll( @@ -45,7 +44,7 @@ export class ConvertkitHandler extends ContentHandler { } async parseNewsletterUrl( - postHeader: string, + headers: Record, html: string ): Promise { return this.findNewsletterUrl(html) diff --git a/packages/content-handler/src/newsletters/cooper-press-handler.ts b/packages/content-handler/src/newsletters/cooper-press-handler.ts index d0f39f4c2..547bdc54b 100644 --- a/packages/content-handler/src/newsletters/cooper-press-handler.ts +++ b/packages/content-handler/src/newsletters/cooper-press-handler.ts @@ -18,10 +18,9 @@ export class CooperPressHandler extends ContentHandler { } async isNewsletter(input: { - postHeader: string from: string - unSubHeader: string dom: Document + headers: Record }): Promise { const dom = input.dom return Promise.resolve( @@ -30,7 +29,7 @@ export class CooperPressHandler extends ContentHandler { } async parseNewsletterUrl( - postHeader: string, + headers: Record, html: string ): Promise { return this.findNewsletterUrl(html) diff --git a/packages/content-handler/src/newsletters/ghost-handler.ts b/packages/content-handler/src/newsletters/ghost-handler.ts index ca3e35a85..96f9639e2 100644 --- a/packages/content-handler/src/newsletters/ghost-handler.ts +++ b/packages/content-handler/src/newsletters/ghost-handler.ts @@ -12,10 +12,9 @@ export class GhostHandler extends ContentHandler { } async isNewsletter(input: { - postHeader: string from: string - unSubHeader: string dom: Document + headers: Record }): Promise { const dom = input.dom return Promise.resolve( @@ -24,7 +23,7 @@ export class GhostHandler extends ContentHandler { } async parseNewsletterUrl( - postHeader: string, + headers: Record, html: string ): Promise { return this.findNewsletterUrl(html) diff --git a/packages/content-handler/src/newsletters/hey-world-handler.ts b/packages/content-handler/src/newsletters/hey-world-handler.ts index 5542bc2ef..d52fa612d 100644 --- a/packages/content-handler/src/newsletters/hey-world-handler.ts +++ b/packages/content-handler/src/newsletters/hey-world-handler.ts @@ -19,7 +19,7 @@ export class HeyWorldHandler extends ContentHandler { } async parseNewsletterUrl( - postHeader: string, + headers: Record, html: string ): Promise { return this.findNewsletterUrl(html) diff --git a/packages/content-handler/src/newsletters/revue-handler.ts b/packages/content-handler/src/newsletters/revue-handler.ts index cd22d314c..57992c2af 100644 --- a/packages/content-handler/src/newsletters/revue-handler.ts +++ b/packages/content-handler/src/newsletters/revue-handler.ts @@ -18,10 +18,9 @@ export class RevueHandler extends ContentHandler { } async isNewsletter(input: { - postHeader: string from: string - unSubHeader: string dom: Document + headers: Record }): Promise { const dom = input.dom if ( @@ -37,7 +36,7 @@ export class RevueHandler extends ContentHandler { } async parseNewsletterUrl( - postHeader: string, + headers: Record, html: string ): Promise { return this.findNewsletterUrl(html) diff --git a/packages/content-handler/src/newsletters/substack-handler.ts b/packages/content-handler/src/newsletters/substack-handler.ts index ce45acd67..e0c87a159 100644 --- a/packages/content-handler/src/newsletters/substack-handler.ts +++ b/packages/content-handler/src/newsletters/substack-handler.ts @@ -51,15 +51,14 @@ export class SubstackHandler extends ContentHandler { } async isNewsletter({ - postHeader, + headers, dom, }: { - postHeader: string from: string - unSubHeader: string + headers: Record dom: Document }): Promise { - if (postHeader) { + if (headers['list-post']) { return Promise.resolve(true) } // substack newsletter emails have tables with a *post-meta class @@ -85,11 +84,12 @@ export class SubstackHandler extends ContentHandler { } async parseNewsletterUrl( - postHeader: string, + headers: Record, html: string ): Promise { // raw SubStack newsletter url is like // we need to get the real url from the raw url + const postHeader = headers['list-post']?.toString() if (postHeader && addressparser(postHeader).length > 0) { return Promise.resolve(addressparser(postHeader)[0].name) } diff --git a/packages/content-handler/test/newsletter.test.ts b/packages/content-handler/test/newsletter.test.ts index 5e64655ca..2e569029c 100644 --- a/packages/content-handler/test/newsletter.test.ts +++ b/packages/content-handler/test/newsletter.test.ts @@ -1,23 +1,23 @@ -import 'mocha' -import * as chai from 'chai' -import { expect } from 'chai' -import chaiAsPromised from 'chai-as-promised' -import chaiString from 'chai-string' -import { SubstackHandler } from '../src/newsletters/substack-handler' -import { AxiosHandler } from '../src/newsletters/axios-handler' -import { BloombergNewsletterHandler } from '../src/newsletters/bloomberg-newsletter-handler' -import { GolangHandler } from '../src/newsletters/golang-handler' -import { MorningBrewHandler } from '../src/newsletters/morning-brew-handler' -import nock from 'nock' -import { generateUniqueUrl } from '../src/content-handler' -import fs from 'fs' -import { BeehiivHandler } from '../src/newsletters/beehiiv-handler' -import { ConvertkitHandler } from '../src/newsletters/convertkit-handler' -import { GhostHandler } from '../src/newsletters/ghost-handler' -import { CooperPressHandler } from '../src/newsletters/cooper-press-handler' -import { getNewsletterHandler } from '../src' -import { parseHTML } from 'linkedom' -import { HeyWorldHandler } from '../src/newsletters/hey-world-handler' +import "mocha"; +import * as chai from "chai"; +import { expect } from "chai"; +import chaiAsPromised from "chai-as-promised"; +import chaiString from "chai-string"; +import { SubstackHandler } from "../src/newsletters/substack-handler"; +import { AxiosHandler } from "../src/newsletters/axios-handler"; +import { BloombergNewsletterHandler } from "../src/newsletters/bloomberg-newsletter-handler"; +import { GolangHandler } from "../src/newsletters/golang-handler"; +import { MorningBrewHandler } from "../src/newsletters/morning-brew-handler"; +import nock from "nock"; +import { generateUniqueUrl } from "../src/content-handler"; +import fs from "fs"; +import { BeehiivHandler } from "../src/newsletters/beehiiv-handler"; +import { ConvertkitHandler } from "../src/newsletters/convertkit-handler"; +import { GhostHandler } from "../src/newsletters/ghost-handler"; +import { CooperPressHandler } from "../src/newsletters/cooper-press-handler"; +import { getNewsletterHandler } from "../src"; +import { parseHTML } from "linkedom"; +import { HeyWorldHandler } from "../src/newsletters/hey-world-handler"; chai.use(chaiAsPromised) chai.use(chaiString) @@ -29,10 +29,10 @@ const load = (path: string): string => { describe('Newsletter email test', () => { describe('#getNewsletterUrl()', () => { it('returns url when email is from SubStack', async () => { - const rawUrl = '' + const headers = { 'list-post': '' } await expect( - new SubstackHandler().parseNewsletterUrl(rawUrl, '') + new SubstackHandler().parseNewsletterUrl(headers, '') ).to.eventually.equal('https://hongbo130.substack.com/p/tldr') }) @@ -41,7 +41,7 @@ describe('Newsletter email test', () => { const html = `View in browser at ${url}` await expect( - new AxiosHandler().parseNewsletterUrl('', html) + new AxiosHandler().parseNewsletterUrl({}, html) ).to.eventually.equal(url) }) @@ -54,7 +54,7 @@ describe('Newsletter email test', () => { ` await expect( - new BloombergNewsletterHandler().parseNewsletterUrl('', html) + new BloombergNewsletterHandler().parseNewsletterUrl({}, html) ).to.eventually.equal(url) }) @@ -65,7 +65,7 @@ describe('Newsletter email test', () => { ` await expect( - new GolangHandler().parseNewsletterUrl('', html) + new GolangHandler().parseNewsletterUrl({}, html) ).to.eventually.equal(url) }) @@ -76,7 +76,7 @@ describe('Newsletter email test', () => { ` await expect( - new MorningBrewHandler().parseNewsletterUrl('', html) + new MorningBrewHandler().parseNewsletterUrl({}, html) ).to.eventually.equal(url) }) }) @@ -105,9 +105,8 @@ describe('Newsletter email test', () => { const html = load('./test/data/substack-forwarded-newsletter.html') const handler = await getNewsletterHandler({ html, - postHeader: '', from: '', - unSubHeader: '', + headers: {}, }) expect(handler).to.be.instanceOf(SubstackHandler) }) @@ -118,9 +117,8 @@ describe('Newsletter email test', () => { ) const handler = await getNewsletterHandler({ html, - postHeader: '', from: '', - unSubHeader: '', + headers: {}, }) expect(handler).to.be.instanceOf(SubstackHandler) }) @@ -129,9 +127,8 @@ describe('Newsletter email test', () => { const html = load('./test/data/substack-forwarded-welcome-email.html') const handler = await getNewsletterHandler({ html, - postHeader: '', from: '', - unSubHeader: '', + headers: {}, }) expect(handler).to.be.undefined }) @@ -142,9 +139,8 @@ describe('Newsletter email test', () => { ) const handler = await getNewsletterHandler({ html, - postHeader: '', from: '', - unSubHeader: '', + headers: {}, }) expect(handler).to.be.instanceOf(SubstackHandler) }) @@ -157,9 +153,8 @@ describe('Newsletter email test', () => { ) const handler = await getNewsletterHandler({ html, - postHeader: '', from: '', - unSubHeader: '', + headers: {}, }) expect(handler).to.be.instanceOf(SubstackHandler) @@ -178,9 +173,8 @@ describe('Newsletter email test', () => { const html = load('./test/data/beehiiv-newsletter.html') const handler = await getNewsletterHandler({ html, - postHeader: '', from: '', - unSubHeader: '', + headers: {}, }) expect(handler).to.be.instanceOf(BeehiivHandler) }) @@ -189,9 +183,8 @@ describe('Newsletter email test', () => { const html = load('./test/data/milkroad-newsletter.html') const handler = await getNewsletterHandler({ html, - postHeader: '', from: '', - unSubHeader: '', + headers: {}, }) expect(handler).to.be.instanceOf(BeehiivHandler) }) @@ -200,9 +193,8 @@ describe('Newsletter email test', () => { const html = load('./test/data/ghost-newsletter.html') const handler = await getNewsletterHandler({ html, - postHeader: '', from: '', - unSubHeader: '', + headers: {}, }) expect(handler).to.be.instanceOf(GhostHandler) }) @@ -211,9 +203,8 @@ describe('Newsletter email test', () => { const html = load('./test/data/convertkit-newsletter.html') const handler = await getNewsletterHandler({ html, - postHeader: '', from: '', - unSubHeader: '', + headers: {}, }) expect(handler).to.be.instanceOf(ConvertkitHandler) }) @@ -222,9 +213,8 @@ describe('Newsletter email test', () => { const html = load('./test/data/node-weekly-newsletter.html') const handler = await getNewsletterHandler({ html, - postHeader: '', from: '', - unSubHeader: '', + headers: {}, }) expect(handler).to.be.instanceOf(CooperPressHandler) }) @@ -233,10 +223,8 @@ describe('Newsletter email test', () => { const html = load('./test/data/hey-world-newsletter.html') const handler = await getNewsletterHandler({ html, - postHeader: '', from: 'Hongbo Wu ', - unSubHeader: - '', + headers: {'list-unsubscribe': ''}, }) expect(handler).to.be.instanceOf(HeyWorldHandler) }) @@ -245,9 +233,8 @@ describe('Newsletter email test', () => { const html = load('./test/data/tomasz-tunguz-newsletter.html') const handler = await getNewsletterHandler({ html, - postHeader: '', from: '', - unSubHeader: '', + headers: {}, }) expect(handler).to.be.instanceOf(ConvertkitHandler) }) @@ -256,9 +243,8 @@ describe('Newsletter email test', () => { const html = load('./test/data/convertkit-confirmation.html') const handler = await getNewsletterHandler({ html, - postHeader: '', from: '', - unSubHeader: '', + headers: {}, }) expect(handler).to.be.undefined }) diff --git a/packages/inbound-email-handler/src/index.ts b/packages/inbound-email-handler/src/index.ts index 5b214b1dd..12e587e3c 100644 --- a/packages/inbound-email-handler/src/index.ts +++ b/packages/inbound-email-handler/src/index.ts @@ -109,7 +109,6 @@ export const inboundEmailHandler = Sentry.GCPFunction.wrapHttpFunction( // if an email is forwarded to the inbox, the to is the forwarding email recipient const to = forwardedTo || parsed['to'] - const postHeader = headers['list-post']?.toString() const unSubHeader = headers['list-unsubscribe']?.toString() const { id: receivedEmailId } = await saveReceivedEmail(to, { @@ -124,11 +123,10 @@ export const inboundEmailHandler = Sentry.GCPFunction.wrapHttpFunction( // check if it is a confirmation email or forwarding newsletter const newsletterMessage = await handleNewsletter({ from, + to, + subject, html, - postHeader, - unSubHeader, - email: to, - title: subject, + headers, }) if (newsletterMessage) { await publishMessage(NEWSLETTER_EMAIL_RECEIVED_TOPIC, {