From de0fb8542a756819cda83d557c5f5c852947d599 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 20 Apr 2023 20:52:43 +0800 Subject: [PATCH 1/2] Decode email metadata from rfc2047 encoded --- packages/inbound-email-handler/src/index.ts | 5 ++-- .../inbound-email-handler/src/newsletter.ts | 6 ++-- .../test/newsletter.test.ts | 30 ++++++++++++------- 3 files changed, 25 insertions(+), 16 deletions(-) diff --git a/packages/inbound-email-handler/src/index.ts b/packages/inbound-email-handler/src/index.ts index 6f88f5ff7..906943238 100644 --- a/packages/inbound-email-handler/src/index.ts +++ b/packages/inbound-email-handler/src/index.ts @@ -11,6 +11,7 @@ import axios from 'axios' import * as jwt from 'jsonwebtoken' import parseHeaders from 'parse-headers' import * as multipart from 'parse-multipart-data' +import rfc2047 from 'rfc2047' import { promisify } from 'util' import { Attachment, handleAttachments, isAttachment } from './attachment' import { @@ -96,14 +97,14 @@ export const inboundEmailHandler = Sentry.GCPFunction.wrapHttpFunction( for (const part of parts) { const { name, data, type, filename } = part if (name && data) { - parsed[name] = data.toString() + // decode data from rfc2047 encoded + parsed[name] = rfc2047.decode(data.toString()) } else if (isAttachment(type, data)) { attachments.push({ data, contentType: type, filename }) } else { console.log('no data or name for ', part) } } - const headers = parseHeaders(parsed.headers) console.log('parsed: ', parsed) console.log('headers: ', headers) diff --git a/packages/inbound-email-handler/src/newsletter.ts b/packages/inbound-email-handler/src/newsletter.ts index 715a8a6b6..54be5d3dd 100644 --- a/packages/inbound-email-handler/src/newsletter.ts +++ b/packages/inbound-email-handler/src/newsletter.ts @@ -1,5 +1,4 @@ import addressparser from 'addressparser' -import rfc2047 from 'rfc2047' import { publishMessage } from './index' interface Unsubscribe { @@ -19,10 +18,9 @@ const CONFIRMATION_EMAIL_SUBJECT_PATTERN = export const parseUnsubscribe = (unSubHeader: string): Unsubscribe => { // parse list-unsubscribe header // e.g. List-Unsubscribe: , - const decoded = rfc2047.decode(unSubHeader) return { - mailTo: decoded.match(UNSUBSCRIBE_MAIL_TO_PATTERN)?.[1], - httpUrl: decoded.match(UNSUBSCRIBE_HTTP_URL_PATTERN)?.[1], + mailTo: unSubHeader.match(UNSUBSCRIBE_MAIL_TO_PATTERN)?.[1], + httpUrl: unSubHeader.match(UNSUBSCRIBE_HTTP_URL_PATTERN)?.[1], } } diff --git a/packages/inbound-email-handler/test/newsletter.test.ts b/packages/inbound-email-handler/test/newsletter.test.ts index 60f96f712..b32a7f504 100644 --- a/packages/inbound-email-handler/test/newsletter.test.ts +++ b/packages/inbound-email-handler/test/newsletter.test.ts @@ -1,5 +1,7 @@ import { expect } from 'chai' import 'mocha' +import parseHeaders from 'parse-headers' +import rfc2047 from 'rfc2047' import { parsedTo } from '../src' import { getConfirmationCode, @@ -81,16 +83,6 @@ describe('Newsletter email test', () => { expect(parseUnsubscribe(header).httpUrl).to.equal(httpUrl) }) - - context('when unsubscribe header rfc2047 encoded', () => { - it('returns mail to address if exists', () => { - const header = `=?us-ascii?Q?=3Cmailto=3A654e9594-184c-4884-8e02-e6e58a3a6871+87e39b3d-c3ca-4be?= =?us-ascii?Q?b-ba4d-977cc2ba61e7+067a353f-f775-4f2c-?= =?us-ascii?Q?a5cc-978df38deeca=40unsub=2Ebeehiiv=2Ecom=3E=2C?= =?us-ascii?Q?_=3Chttps=3A=2F=2Fwww=2Emilkroad=2Ecom=2Fsubscribe=2F87e39b3d-c3ca-4beb-ba4d-97?= =?us-ascii?Q?7cc2ba61e7=2Fmanage=3Fpost=5Fid=3D067a353f-f775?= =?us-ascii?Q?-4f2c-a5cc-978df38deeca=3E?=',` - - expect(parseUnsubscribe(header).mailTo).to.equal( - '654e9594-184c-4884-8e02-e6e58a3a6871+87e39b3d-c3ca-4beb-ba4d-977cc2ba61e7+067a353f-f775-4f2c-a5cc-978df38deeca@unsub.beehiiv.com' - ) - }) - }) }) }) @@ -128,3 +120,21 @@ describe('isSubscriptionConfirmationEmail', () => { expect(isSubscriptionConfirmationEmail(subject)).to.be.true }) }) + +describe('decode and parse headers', () => { + it('decodes headers from rfc2047 and parses it', () => { + const headerStr = + 'Subject: =?UTF-8?B?8J+MjQ==?= Dead on arrival\n' + + 'x-newsletter: =?us-ascii?Q?https=3A=2F=2Farchives=2Einternationalintrigue=2Eio=2Fp=2Fsudan-ceasefires-c?= =?us-ascii?Q?ollapse-fighting-intensifies?=\n' + const decoded = rfc2047.decode(headerStr) + expect(decoded).to.eql( + 'Subject: 🌍 Dead on arrival\n' + + 'x-newsletter: https://archives.internationalintrigue.io/p/sudan-ceasefires-collapse-fighting-intensifies\n' + ) + expect(parseHeaders(decoded)).to.eql({ + subject: '🌍 Dead on arrival', + 'x-newsletter': + 'https://archives.internationalintrigue.io/p/sudan-ceasefires-collapse-fighting-intensifies', + }) + }) +}) From 89ac9c46cc1fd3bb5aaff7bf021bc1a8b1e0e738 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 20 Apr 2023 20:53:06 +0800 Subject: [PATCH 2/2] Remove other places that use rfc2047 decoder --- packages/content-handler/package.json | 1 - packages/content-handler/src/content-handler.ts | 10 ++++------ packages/content-handler/test/newsletter.test.ts | 10 ---------- 3 files changed, 4 insertions(+), 17 deletions(-) diff --git a/packages/content-handler/package.json b/packages/content-handler/package.json index 6f5c18197..29be9e1f3 100644 --- a/packages/content-handler/package.json +++ b/packages/content-handler/package.json @@ -33,7 +33,6 @@ "linkedom": "^0.14.16", "luxon": "^3.0.4", "puppeteer-core": "^19.1.1", - "rfc2047": "^4.0.1", "underscore": "^1.13.6", "uuid": "^9.0.0" } diff --git a/packages/content-handler/src/content-handler.ts b/packages/content-handler/src/content-handler.ts index 7ef7dffd2..c7f7f1d61 100644 --- a/packages/content-handler/src/content-handler.ts +++ b/packages/content-handler/src/content-handler.ts @@ -1,9 +1,8 @@ import addressparser from 'addressparser' -import rfc2047 from 'rfc2047' -import { v4 as uuid } from 'uuid' -import { parseHTML } from 'linkedom' import axios from 'axios' +import { parseHTML } from 'linkedom' import { Browser } from 'puppeteer-core' +import { v4 as uuid } from 'uuid' interface Unsubscribe { mailTo?: string @@ -148,10 +147,9 @@ export abstract class ContentHandler { parseUnsubscribe(unSubHeader: string): Unsubscribe { // parse list-unsubscribe header // e.g. List-Unsubscribe: , - const decoded = rfc2047.decode(unSubHeader) return { - httpUrl: decoded.match(/<(https?:\/\/[^>]*)>/)?.[1], - mailTo: decoded.match(/]*)>/)?.[1], + httpUrl: unSubHeader.match(/<(https?:\/\/[^>]*)>/)?.[1], + mailTo: unSubHeader.match(/]*)>/)?.[1], } } diff --git a/packages/content-handler/test/newsletter.test.ts b/packages/content-handler/test/newsletter.test.ts index 7188edbe8..e9c270908 100644 --- a/packages/content-handler/test/newsletter.test.ts +++ b/packages/content-handler/test/newsletter.test.ts @@ -560,16 +560,6 @@ describe('Newsletter email test', () => { httpUrl ) }) - - context('when unsubscribe header rfc2047 encoded', () => { - it('returns mail to address if exists', () => { - const header = `=?us-ascii?Q?=3Cmailto=3A654e9594-184c-4884-8e02-e6e58a3a6871+87e39b3d-c3ca-4be?= =?us-ascii?Q?b-ba4d-977cc2ba61e7+067a353f-f775-4f2c-?= =?us-ascii?Q?a5cc-978df38deeca=40unsub=2Ebeehiiv=2Ecom=3E=2C?= =?us-ascii?Q?_=3Chttps=3A=2F=2Fwww=2Emilkroad=2Ecom=2Fsubscribe=2F87e39b3d-c3ca-4beb-ba4d-97?= =?us-ascii?Q?7cc2ba61e7=2Fmanage=3Fpost=5Fid=3D067a353f-f775?= =?us-ascii?Q?-4f2c-a5cc-978df38deeca=3E?=',` - - expect(new GenericHandler().parseUnsubscribe(header).mailTo).to.equal( - '654e9594-184c-4884-8e02-e6e58a3a6871+87e39b3d-c3ca-4beb-ba4d-977cc2ba61e7+067a353f-f775-4f2c-a5cc-978df38deeca@unsub.beehiiv.com' - ) - }) - }) }) describe('preParse', () => {