Decode email metadata from rfc2047 encoded

This commit is contained in:
Hongbo Wu
2023-04-20 20:52:43 +08:00
parent ebc760e522
commit de0fb8542a
3 changed files with 25 additions and 16 deletions

View File

@ -11,6 +11,7 @@ import axios from 'axios'
import * as jwt from 'jsonwebtoken'
import parseHeaders from 'parse-headers'
import * as multipart from 'parse-multipart-data'
import rfc2047 from 'rfc2047'
import { promisify } from 'util'
import { Attachment, handleAttachments, isAttachment } from './attachment'
import {
@ -96,14 +97,14 @@ export const inboundEmailHandler = Sentry.GCPFunction.wrapHttpFunction(
for (const part of parts) {
const { name, data, type, filename } = part
if (name && data) {
parsed[name] = data.toString()
// decode data from rfc2047 encoded
parsed[name] = rfc2047.decode(data.toString())
} else if (isAttachment(type, data)) {
attachments.push({ data, contentType: type, filename })
} else {
console.log('no data or name for ', part)
}
}
const headers = parseHeaders(parsed.headers)
console.log('parsed: ', parsed)
console.log('headers: ', headers)

View File

@ -1,5 +1,4 @@
import addressparser from 'addressparser'
import rfc2047 from 'rfc2047'
import { publishMessage } from './index'
interface Unsubscribe {
@ -19,10 +18,9 @@ const CONFIRMATION_EMAIL_SUBJECT_PATTERN =
export const parseUnsubscribe = (unSubHeader: string): Unsubscribe => {
// parse list-unsubscribe header
// e.g. List-Unsubscribe: <https://omnivore.com/unsub>, <mailto:unsub@omnivore.com>
const decoded = rfc2047.decode(unSubHeader)
return {
mailTo: decoded.match(UNSUBSCRIBE_MAIL_TO_PATTERN)?.[1],
httpUrl: decoded.match(UNSUBSCRIBE_HTTP_URL_PATTERN)?.[1],
mailTo: unSubHeader.match(UNSUBSCRIBE_MAIL_TO_PATTERN)?.[1],
httpUrl: unSubHeader.match(UNSUBSCRIBE_HTTP_URL_PATTERN)?.[1],
}
}

View File

@ -1,5 +1,7 @@
import { expect } from 'chai'
import 'mocha'
import parseHeaders from 'parse-headers'
import rfc2047 from 'rfc2047'
import { parsedTo } from '../src'
import {
getConfirmationCode,
@ -81,16 +83,6 @@ describe('Newsletter email test', () => {
expect(parseUnsubscribe(header).httpUrl).to.equal(httpUrl)
})
context('when unsubscribe header rfc2047 encoded', () => {
it('returns mail to address if exists', () => {
const header = `=?us-ascii?Q?=3Cmailto=3A654e9594-184c-4884-8e02-e6e58a3a6871+87e39b3d-c3ca-4be?= =?us-ascii?Q?b-ba4d-977cc2ba61e7+067a353f-f775-4f2c-?= =?us-ascii?Q?a5cc-978df38deeca=40unsub=2Ebeehiiv=2Ecom=3E=2C?= =?us-ascii?Q?_=3Chttps=3A=2F=2Fwww=2Emilkroad=2Ecom=2Fsubscribe=2F87e39b3d-c3ca-4beb-ba4d-97?= =?us-ascii?Q?7cc2ba61e7=2Fmanage=3Fpost=5Fid=3D067a353f-f775?= =?us-ascii?Q?-4f2c-a5cc-978df38deeca=3E?=',`
expect(parseUnsubscribe(header).mailTo).to.equal(
'654e9594-184c-4884-8e02-e6e58a3a6871+87e39b3d-c3ca-4beb-ba4d-977cc2ba61e7+067a353f-f775-4f2c-a5cc-978df38deeca@unsub.beehiiv.com'
)
})
})
})
})
@ -128,3 +120,21 @@ describe('isSubscriptionConfirmationEmail', () => {
expect(isSubscriptionConfirmationEmail(subject)).to.be.true
})
})
describe('decode and parse headers', () => {
it('decodes headers from rfc2047 and parses it', () => {
const headerStr =
'Subject: =?UTF-8?B?8J+MjQ==?= Dead on arrival\n' +
'x-newsletter: =?us-ascii?Q?https=3A=2F=2Farchives=2Einternationalintrigue=2Eio=2Fp=2Fsudan-ceasefires-c?= =?us-ascii?Q?ollapse-fighting-intensifies?=\n'
const decoded = rfc2047.decode(headerStr)
expect(decoded).to.eql(
'Subject: 🌍 Dead on arrival\n' +
'x-newsletter: https://archives.internationalintrigue.io/p/sudan-ceasefires-collapse-fighting-intensifies\n'
)
expect(parseHeaders(decoded)).to.eql({
subject: '🌍 Dead on arrival',
'x-newsletter':
'https://archives.internationalintrigue.io/p/sudan-ceasefires-collapse-fighting-intensifies',
})
})
})