Pass email headers to the content-handler

This commit is contained in:
Hongbo Wu
2023-01-30 18:52:38 +08:00
parent 341f88c4d6
commit 88f6e20bfc
11 changed files with 80 additions and 101 deletions

View File

@ -11,12 +11,11 @@ interface Unsubscribe {
} }
export interface NewsletterInput { export interface NewsletterInput {
postHeader: string
from: string from: string
unSubHeader: string to: string
email: string subject: string
html: string html: string
title: string headers: Record<string, string | string[]>
} }
export interface NewsletterResult { export interface NewsletterResult {
@ -76,15 +75,16 @@ export abstract class ContentHandler {
} }
async isNewsletter(input: { async isNewsletter(input: {
postHeader: string
from: string from: string
unSubHeader: string
html: string html: string
headers: Record<string, string | string[]>
dom: Document dom: Document
}): Promise<boolean> { }): Promise<boolean> {
const re = new RegExp(this.senderRegex) const re = new RegExp(this.senderRegex)
const postHeader = input.headers['list-post']
const unSubHeader = input.headers['list-unsubscribe']
return Promise.resolve( return Promise.resolve(
re.test(input.from) && (!!input.postHeader || !!input.unSubHeader) re.test(input.from) && (!!postHeader || !!unSubHeader)
) )
} }
@ -118,7 +118,7 @@ export abstract class ContentHandler {
} }
async parseNewsletterUrl( async parseNewsletterUrl(
_postHeader: string, headers: Record<string, string | string[]>,
html: string html: string
): Promise<string | undefined> { ): Promise<string | undefined> {
// get newsletter url from html // get newsletter url from html
@ -151,16 +151,15 @@ export abstract class ContentHandler {
} }
async handleNewsletter({ async handleNewsletter({
email,
html,
postHeader,
title,
from, from,
unSubHeader, to,
subject,
html,
headers,
}: NewsletterInput): Promise<NewsletterResult> { }: NewsletterInput): Promise<NewsletterResult> {
console.log('handleNewsletter', email, postHeader, title, from) console.log('handleNewsletter', from, to, subject, headers, from)
if (!email || !html || !title || !from) { if (!from || !html || !subject || !to) {
console.log('invalid newsletter email') console.log('invalid newsletter email')
throw new Error('invalid newsletter email') throw new Error('invalid newsletter email')
} }
@ -168,15 +167,17 @@ export abstract class ContentHandler {
// fallback to default url if newsletter url does not exist // fallback to default url if newsletter url does not exist
// assign a random uuid to the default url to avoid duplicate url // assign a random uuid to the default url to avoid duplicate url
const url = const url =
(await this.parseNewsletterUrl(postHeader, html)) || generateUniqueUrl() (await this.parseNewsletterUrl(headers, html)) || generateUniqueUrl()
const author = this.parseAuthor(from) const author = this.parseAuthor(from)
const unsubscribe = this.parseUnsubscribe(unSubHeader) const unsubscribe = this.parseUnsubscribe(
headers['list-unsubscribe']?.toString()
)
return { return {
email, email: to,
content: html, content: html,
url, url,
title, title: subject,
author, author,
unsubMailTo: unsubscribe.mailTo || '', unsubMailTo: unsubscribe.mailTo || '',
unsubHttpUrl: unsubscribe.httpUrl || '', unsubHttpUrl: unsubscribe.httpUrl || '',

View File

@ -131,10 +131,9 @@ export const preParseContent = async (
} }
export const getNewsletterHandler = async (input: { export const getNewsletterHandler = async (input: {
postHeader: string
from: string from: string
unSubHeader: string
html: string html: string
headers: Record<string, string | string[]>
}): Promise<ContentHandler | undefined> => { }): Promise<ContentHandler | undefined> => {
const dom = parseHTML(input.html).document const dom = parseHTML(input.html).document
for (const handler of newsletterHandlers) { for (const handler of newsletterHandlers) {
@ -151,7 +150,7 @@ export const handleNewsletter = async (
): Promise<NewsletterResult | undefined> => { ): Promise<NewsletterResult | undefined> => {
const handler = await getNewsletterHandler(input) const handler = await getNewsletterHandler(input)
if (handler) { if (handler) {
console.log('handleNewsletter', handler.name, input.title) console.log('handleNewsletter', handler.name, input.subject)
return handler.handleNewsletter(input) return handler.handleNewsletter(input)
} }

View File

@ -18,9 +18,8 @@ export class BeehiivHandler extends ContentHandler {
} }
async isNewsletter(input: { async isNewsletter(input: {
postHeader: string
from: string from: string
unSubHeader: string headers: Record<string, string | string[]>
dom: Document dom: Document
}): Promise<boolean> { }): Promise<boolean> {
const dom = input.dom const dom = input.dom
@ -34,7 +33,7 @@ export class BeehiivHandler extends ContentHandler {
} }
async parseNewsletterUrl( async parseNewsletterUrl(
postHeader: string, headers: Record<string, string | string[]>,
html: string html: string
): Promise<string | undefined> { ): Promise<string | undefined> {
return this.findNewsletterUrl(html) return this.findNewsletterUrl(html)

View File

@ -21,10 +21,9 @@ export class ConvertkitHandler extends ContentHandler {
} }
async isNewsletter(input: { async isNewsletter(input: {
postHeader: string
from: string from: string
unSubHeader: string
dom: Document dom: Document
headers: Record<string, string | string[]>
}): Promise<boolean> { }): Promise<boolean> {
const dom = input.dom const dom = input.dom
const icons = dom.querySelectorAll( const icons = dom.querySelectorAll(
@ -45,7 +44,7 @@ export class ConvertkitHandler extends ContentHandler {
} }
async parseNewsletterUrl( async parseNewsletterUrl(
postHeader: string, headers: Record<string, string | string[]>,
html: string html: string
): Promise<string | undefined> { ): Promise<string | undefined> {
return this.findNewsletterUrl(html) return this.findNewsletterUrl(html)

View File

@ -18,10 +18,9 @@ export class CooperPressHandler extends ContentHandler {
} }
async isNewsletter(input: { async isNewsletter(input: {
postHeader: string
from: string from: string
unSubHeader: string
dom: Document dom: Document
headers: Record<string, string | string[]>
}): Promise<boolean> { }): Promise<boolean> {
const dom = input.dom const dom = input.dom
return Promise.resolve( return Promise.resolve(
@ -30,7 +29,7 @@ export class CooperPressHandler extends ContentHandler {
} }
async parseNewsletterUrl( async parseNewsletterUrl(
postHeader: string, headers: Record<string, string | string[]>,
html: string html: string
): Promise<string | undefined> { ): Promise<string | undefined> {
return this.findNewsletterUrl(html) return this.findNewsletterUrl(html)

View File

@ -12,10 +12,9 @@ export class GhostHandler extends ContentHandler {
} }
async isNewsletter(input: { async isNewsletter(input: {
postHeader: string
from: string from: string
unSubHeader: string
dom: Document dom: Document
headers: Record<string, string | string[]>
}): Promise<boolean> { }): Promise<boolean> {
const dom = input.dom const dom = input.dom
return Promise.resolve( return Promise.resolve(
@ -24,7 +23,7 @@ export class GhostHandler extends ContentHandler {
} }
async parseNewsletterUrl( async parseNewsletterUrl(
postHeader: string, headers: Record<string, string | string[]>,
html: string html: string
): Promise<string | undefined> { ): Promise<string | undefined> {
return this.findNewsletterUrl(html) return this.findNewsletterUrl(html)

View File

@ -19,7 +19,7 @@ export class HeyWorldHandler extends ContentHandler {
} }
async parseNewsletterUrl( async parseNewsletterUrl(
postHeader: string, headers: Record<string, string | string[]>,
html: string html: string
): Promise<string | undefined> { ): Promise<string | undefined> {
return this.findNewsletterUrl(html) return this.findNewsletterUrl(html)

View File

@ -18,10 +18,9 @@ export class RevueHandler extends ContentHandler {
} }
async isNewsletter(input: { async isNewsletter(input: {
postHeader: string
from: string from: string
unSubHeader: string
dom: Document dom: Document
headers: Record<string, string | string[]>
}): Promise<boolean> { }): Promise<boolean> {
const dom = input.dom const dom = input.dom
if ( if (
@ -37,7 +36,7 @@ export class RevueHandler extends ContentHandler {
} }
async parseNewsletterUrl( async parseNewsletterUrl(
postHeader: string, headers: Record<string, string | string[]>,
html: string html: string
): Promise<string | undefined> { ): Promise<string | undefined> {
return this.findNewsletterUrl(html) return this.findNewsletterUrl(html)

View File

@ -51,15 +51,14 @@ export class SubstackHandler extends ContentHandler {
} }
async isNewsletter({ async isNewsletter({
postHeader, headers,
dom, dom,
}: { }: {
postHeader: string
from: string from: string
unSubHeader: string headers: Record<string, string | string[]>
dom: Document dom: Document
}): Promise<boolean> { }): Promise<boolean> {
if (postHeader) { if (headers['list-post']) {
return Promise.resolve(true) return Promise.resolve(true)
} }
// substack newsletter emails have tables with a *post-meta class // substack newsletter emails have tables with a *post-meta class
@ -85,11 +84,12 @@ export class SubstackHandler extends ContentHandler {
} }
async parseNewsletterUrl( async parseNewsletterUrl(
postHeader: string, headers: Record<string, string | string[]>,
html: string html: string
): Promise<string | undefined> { ): Promise<string | undefined> {
// raw SubStack newsletter url is like <https://hongbo130.substack.com/p/tldr> // raw SubStack newsletter url is like <https://hongbo130.substack.com/p/tldr>
// we need to get the real url from the raw url // we need to get the real url from the raw url
const postHeader = headers['list-post']?.toString()
if (postHeader && addressparser(postHeader).length > 0) { if (postHeader && addressparser(postHeader).length > 0) {
return Promise.resolve(addressparser(postHeader)[0].name) return Promise.resolve(addressparser(postHeader)[0].name)
} }

View File

@ -1,23 +1,23 @@
import 'mocha' import "mocha";
import * as chai from 'chai' import * as chai from "chai";
import { expect } from 'chai' import { expect } from "chai";
import chaiAsPromised from 'chai-as-promised' import chaiAsPromised from "chai-as-promised";
import chaiString from 'chai-string' import chaiString from "chai-string";
import { SubstackHandler } from '../src/newsletters/substack-handler' import { SubstackHandler } from "../src/newsletters/substack-handler";
import { AxiosHandler } from '../src/newsletters/axios-handler' import { AxiosHandler } from "../src/newsletters/axios-handler";
import { BloombergNewsletterHandler } from '../src/newsletters/bloomberg-newsletter-handler' import { BloombergNewsletterHandler } from "../src/newsletters/bloomberg-newsletter-handler";
import { GolangHandler } from '../src/newsletters/golang-handler' import { GolangHandler } from "../src/newsletters/golang-handler";
import { MorningBrewHandler } from '../src/newsletters/morning-brew-handler' import { MorningBrewHandler } from "../src/newsletters/morning-brew-handler";
import nock from 'nock' import nock from "nock";
import { generateUniqueUrl } from '../src/content-handler' import { generateUniqueUrl } from "../src/content-handler";
import fs from 'fs' import fs from "fs";
import { BeehiivHandler } from '../src/newsletters/beehiiv-handler' import { BeehiivHandler } from "../src/newsletters/beehiiv-handler";
import { ConvertkitHandler } from '../src/newsletters/convertkit-handler' import { ConvertkitHandler } from "../src/newsletters/convertkit-handler";
import { GhostHandler } from '../src/newsletters/ghost-handler' import { GhostHandler } from "../src/newsletters/ghost-handler";
import { CooperPressHandler } from '../src/newsletters/cooper-press-handler' import { CooperPressHandler } from "../src/newsletters/cooper-press-handler";
import { getNewsletterHandler } from '../src' import { getNewsletterHandler } from "../src";
import { parseHTML } from 'linkedom' import { parseHTML } from "linkedom";
import { HeyWorldHandler } from '../src/newsletters/hey-world-handler' import { HeyWorldHandler } from "../src/newsletters/hey-world-handler";
chai.use(chaiAsPromised) chai.use(chaiAsPromised)
chai.use(chaiString) chai.use(chaiString)
@ -29,10 +29,10 @@ const load = (path: string): string => {
describe('Newsletter email test', () => { describe('Newsletter email test', () => {
describe('#getNewsletterUrl()', () => { describe('#getNewsletterUrl()', () => {
it('returns url when email is from SubStack', async () => { it('returns url when email is from SubStack', async () => {
const rawUrl = '<https://hongbo130.substack.com/p/tldr>' const headers = { 'list-post': '<https://hongbo130.substack.com/p/tldr>' }
await expect( await expect(
new SubstackHandler().parseNewsletterUrl(rawUrl, '') new SubstackHandler().parseNewsletterUrl(headers, '')
).to.eventually.equal('https://hongbo130.substack.com/p/tldr') ).to.eventually.equal('https://hongbo130.substack.com/p/tldr')
}) })
@ -41,7 +41,7 @@ describe('Newsletter email test', () => {
const html = `View in browser at <a>${url}</a>` const html = `View in browser at <a>${url}</a>`
await expect( await expect(
new AxiosHandler().parseNewsletterUrl('', html) new AxiosHandler().parseNewsletterUrl({}, html)
).to.eventually.equal(url) ).to.eventually.equal(url)
}) })
@ -54,7 +54,7 @@ describe('Newsletter email test', () => {
` `
await expect( await expect(
new BloombergNewsletterHandler().parseNewsletterUrl('', html) new BloombergNewsletterHandler().parseNewsletterUrl({}, html)
).to.eventually.equal(url) ).to.eventually.equal(url)
}) })
@ -65,7 +65,7 @@ describe('Newsletter email test', () => {
` `
await expect( await expect(
new GolangHandler().parseNewsletterUrl('', html) new GolangHandler().parseNewsletterUrl({}, html)
).to.eventually.equal(url) ).to.eventually.equal(url)
}) })
@ -76,7 +76,7 @@ describe('Newsletter email test', () => {
` `
await expect( await expect(
new MorningBrewHandler().parseNewsletterUrl('', html) new MorningBrewHandler().parseNewsletterUrl({}, html)
).to.eventually.equal(url) ).to.eventually.equal(url)
}) })
}) })
@ -105,9 +105,8 @@ describe('Newsletter email test', () => {
const html = load('./test/data/substack-forwarded-newsletter.html') const html = load('./test/data/substack-forwarded-newsletter.html')
const handler = await getNewsletterHandler({ const handler = await getNewsletterHandler({
html, html,
postHeader: '',
from: '', from: '',
unSubHeader: '', headers: {},
}) })
expect(handler).to.be.instanceOf(SubstackHandler) expect(handler).to.be.instanceOf(SubstackHandler)
}) })
@ -118,9 +117,8 @@ describe('Newsletter email test', () => {
) )
const handler = await getNewsletterHandler({ const handler = await getNewsletterHandler({
html, html,
postHeader: '',
from: '', from: '',
unSubHeader: '', headers: {},
}) })
expect(handler).to.be.instanceOf(SubstackHandler) expect(handler).to.be.instanceOf(SubstackHandler)
}) })
@ -129,9 +127,8 @@ describe('Newsletter email test', () => {
const html = load('./test/data/substack-forwarded-welcome-email.html') const html = load('./test/data/substack-forwarded-welcome-email.html')
const handler = await getNewsletterHandler({ const handler = await getNewsletterHandler({
html, html,
postHeader: '',
from: '', from: '',
unSubHeader: '', headers: {},
}) })
expect(handler).to.be.undefined expect(handler).to.be.undefined
}) })
@ -142,9 +139,8 @@ describe('Newsletter email test', () => {
) )
const handler = await getNewsletterHandler({ const handler = await getNewsletterHandler({
html, html,
postHeader: '',
from: '', from: '',
unSubHeader: '', headers: {},
}) })
expect(handler).to.be.instanceOf(SubstackHandler) expect(handler).to.be.instanceOf(SubstackHandler)
}) })
@ -157,9 +153,8 @@ describe('Newsletter email test', () => {
) )
const handler = await getNewsletterHandler({ const handler = await getNewsletterHandler({
html, html,
postHeader: '',
from: '', from: '',
unSubHeader: '', headers: {},
}) })
expect(handler).to.be.instanceOf(SubstackHandler) expect(handler).to.be.instanceOf(SubstackHandler)
@ -178,9 +173,8 @@ describe('Newsletter email test', () => {
const html = load('./test/data/beehiiv-newsletter.html') const html = load('./test/data/beehiiv-newsletter.html')
const handler = await getNewsletterHandler({ const handler = await getNewsletterHandler({
html, html,
postHeader: '',
from: '', from: '',
unSubHeader: '', headers: {},
}) })
expect(handler).to.be.instanceOf(BeehiivHandler) expect(handler).to.be.instanceOf(BeehiivHandler)
}) })
@ -189,9 +183,8 @@ describe('Newsletter email test', () => {
const html = load('./test/data/milkroad-newsletter.html') const html = load('./test/data/milkroad-newsletter.html')
const handler = await getNewsletterHandler({ const handler = await getNewsletterHandler({
html, html,
postHeader: '',
from: '', from: '',
unSubHeader: '', headers: {},
}) })
expect(handler).to.be.instanceOf(BeehiivHandler) expect(handler).to.be.instanceOf(BeehiivHandler)
}) })
@ -200,9 +193,8 @@ describe('Newsletter email test', () => {
const html = load('./test/data/ghost-newsletter.html') const html = load('./test/data/ghost-newsletter.html')
const handler = await getNewsletterHandler({ const handler = await getNewsletterHandler({
html, html,
postHeader: '',
from: '', from: '',
unSubHeader: '', headers: {},
}) })
expect(handler).to.be.instanceOf(GhostHandler) expect(handler).to.be.instanceOf(GhostHandler)
}) })
@ -211,9 +203,8 @@ describe('Newsletter email test', () => {
const html = load('./test/data/convertkit-newsletter.html') const html = load('./test/data/convertkit-newsletter.html')
const handler = await getNewsletterHandler({ const handler = await getNewsletterHandler({
html, html,
postHeader: '',
from: '', from: '',
unSubHeader: '', headers: {},
}) })
expect(handler).to.be.instanceOf(ConvertkitHandler) expect(handler).to.be.instanceOf(ConvertkitHandler)
}) })
@ -222,9 +213,8 @@ describe('Newsletter email test', () => {
const html = load('./test/data/node-weekly-newsletter.html') const html = load('./test/data/node-weekly-newsletter.html')
const handler = await getNewsletterHandler({ const handler = await getNewsletterHandler({
html, html,
postHeader: '',
from: '', from: '',
unSubHeader: '', headers: {},
}) })
expect(handler).to.be.instanceOf(CooperPressHandler) expect(handler).to.be.instanceOf(CooperPressHandler)
}) })
@ -233,10 +223,8 @@ describe('Newsletter email test', () => {
const html = load('./test/data/hey-world-newsletter.html') const html = load('./test/data/hey-world-newsletter.html')
const handler = await getNewsletterHandler({ const handler = await getNewsletterHandler({
html, html,
postHeader: '',
from: 'Hongbo Wu <hw@world.hey.com>', from: 'Hongbo Wu <hw@world.hey.com>',
unSubHeader: headers: {'list-unsubscribe': '<https://world.hey.com/dhh/subscribers/MtuoW9TvSJK9o5c7ohB72V2s/unsubscribe>'},
'<https://world.hey.com/dhh/subscribers/MtuoW9TvSJK9o5c7ohB72V2s/unsubscribe>',
}) })
expect(handler).to.be.instanceOf(HeyWorldHandler) expect(handler).to.be.instanceOf(HeyWorldHandler)
}) })
@ -245,9 +233,8 @@ describe('Newsletter email test', () => {
const html = load('./test/data/tomasz-tunguz-newsletter.html') const html = load('./test/data/tomasz-tunguz-newsletter.html')
const handler = await getNewsletterHandler({ const handler = await getNewsletterHandler({
html, html,
postHeader: '',
from: '', from: '',
unSubHeader: '', headers: {},
}) })
expect(handler).to.be.instanceOf(ConvertkitHandler) expect(handler).to.be.instanceOf(ConvertkitHandler)
}) })
@ -256,9 +243,8 @@ describe('Newsletter email test', () => {
const html = load('./test/data/convertkit-confirmation.html') const html = load('./test/data/convertkit-confirmation.html')
const handler = await getNewsletterHandler({ const handler = await getNewsletterHandler({
html, html,
postHeader: '',
from: '', from: '',
unSubHeader: '', headers: {},
}) })
expect(handler).to.be.undefined expect(handler).to.be.undefined
}) })

View File

@ -109,7 +109,6 @@ export const inboundEmailHandler = Sentry.GCPFunction.wrapHttpFunction(
// if an email is forwarded to the inbox, the to is the forwarding email recipient // if an email is forwarded to the inbox, the to is the forwarding email recipient
const to = forwardedTo || parsed['to'] const to = forwardedTo || parsed['to']
const postHeader = headers['list-post']?.toString()
const unSubHeader = headers['list-unsubscribe']?.toString() const unSubHeader = headers['list-unsubscribe']?.toString()
const { id: receivedEmailId } = await saveReceivedEmail(to, { const { id: receivedEmailId } = await saveReceivedEmail(to, {
@ -124,11 +123,10 @@ export const inboundEmailHandler = Sentry.GCPFunction.wrapHttpFunction(
// check if it is a confirmation email or forwarding newsletter // check if it is a confirmation email or forwarding newsletter
const newsletterMessage = await handleNewsletter({ const newsletterMessage = await handleNewsletter({
from, from,
to,
subject,
html, html,
postHeader, headers,
unSubHeader,
email: to,
title: subject,
}) })
if (newsletterMessage) { if (newsletterMessage) {
await publishMessage(NEWSLETTER_EMAIL_RECEIVED_TOPIC, { await publishMessage(NEWSLETTER_EMAIL_RECEIVED_TOPIC, {