From d184ca8d0470d2f2538f581a909ea5107b83f0b7 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 26 Jul 2022 18:37:04 +0800 Subject: [PATCH] Add function isProbablyArticle to test if a forwarded email contains an article to save --- packages/api/src/utils/parser.ts | 14 ++++ packages/api/test/utils/parser.test.ts | 25 +++++++ packages/inbound-email-handler/src/index.ts | 73 +++++++++++---------- 3 files changed, 78 insertions(+), 34 deletions(-) diff --git a/packages/api/src/utils/parser.ts b/packages/api/src/utils/parser.ts index 7489b7373..7a31782fa 100644 --- a/packages/api/src/utils/parser.ts +++ b/packages/api/src/utils/parser.ts @@ -15,6 +15,9 @@ import { GolangHandler } from './golang-handler' import * as hljs from 'highlightjs' import { decode } from 'html-entities' import { parseHTML } from 'linkedom' +import { getRepository } from '../entity/utils' +import { User } from '../entity/user' +import { ILike } from 'typeorm' const logger = buildLogger('utils.parse') @@ -37,6 +40,7 @@ const DOM_PURIFY_CONFIG = { 'data-feature', ], } +const ARTICLE_PREFIX = 'omnivore:' interface ContentHandler { shouldPrehandle: (url: URL, dom: Document) => boolean @@ -545,3 +549,13 @@ export const findNewsletterUrl = async ( return undefined } + +export const isProbablyArticle = async ( + email: string, + subject: string +): Promise => { + const user = await getRepository(User).findOneBy({ + email: ILike(email), + }) + return !!user || subject.includes(ARTICLE_PREFIX) +} diff --git a/packages/api/test/utils/parser.test.ts b/packages/api/test/utils/parser.test.ts index 431933a1a..9daa0cd2a 100644 --- a/packages/api/test/utils/parser.test.ts +++ b/packages/api/test/utils/parser.test.ts @@ -5,12 +5,15 @@ import 'chai/register-should' import fs from 'fs' import { findNewsletterUrl, + isProbablyArticle, isProbablyNewsletter, parsePageMetadata, parsePreparedContent, } from '../../src/utils/parser' import nock from 'nock' import chaiAsPromised from 'chai-as-promised' +import { User } from '../../src/entity/user' +import { createTestUser, deleteTestUser } from '../db' chai.use(chaiAsPromised) @@ -135,3 +138,25 @@ describe('parsePreparedContent', async () => { ) }) }) + +describe('isProbablyArticle', () => { + let user: User + + before(async () => { + user = await createTestUser('fakeUser') + }) + + after(async () => { + await deleteTestUser(user.name) + }) + + it('returns true when email is signed up with us', async () => { + const email = user.email + expect(await isProbablyArticle(email, 'test subject')).to.be.true + }) + + it('returns true when subject has omnivore: prefix', async () => { + const subject = 'omnivore: test subject' + expect(await isProbablyArticle('test-email', subject)).to.be.true + }) +}) diff --git a/packages/inbound-email-handler/src/index.ts b/packages/inbound-email-handler/src/index.ts index 2e1d9022f..df87c1af8 100644 --- a/packages/inbound-email-handler/src/index.ts +++ b/packages/inbound-email-handler/src/index.ts @@ -70,10 +70,10 @@ export const inboundEmailHandler = Sentry.GCPFunction.wrapHttpFunction( const html = parsed.html const text = parsed.text - const forwardedAddress = headers['x-forwarded-to'] - const recipientAddress = forwardedAddress?.toString() || parsed.to + const forwardedAddress = headers['x-forwarded-to']?.toString() + const recipientAddress = forwardedAddress || parsed.to const postHeader = headers['list-post']?.toString() - const unSubHeader = headers['list-unsubscribe'].toString() + const unSubHeader = headers['list-unsubscribe']?.toString() try { // check if it is a forwarding confirmation email or newsletter @@ -93,36 +93,43 @@ export const inboundEmailHandler = Sentry.GCPFunction.wrapHttpFunction( from, unSubHeader ) - } else { - console.log('non-newsletter email from:', from, recipientAddress) - - if (isConfirmationEmail(from)) { - console.log('handleConfirmation', from) - await handleConfirmation(recipientAddress, subject) - } else if (pdfAttachment) { - console.log('handle PDF attachment', from, recipientAddress) - await handlePdfAttachment( - recipientAddress, - pdfAttachmentName, - pdfAttachment, - subject - ) - } - - const unsubscribe = parseUnsubscribe(unSubHeader) - // queue non-newsletter emails - await pubsub.topic(NON_NEWSLETTER_EMAIL_TOPIC).publishMessage({ - json: { - from: from, - to: recipientAddress, - subject: subject, - html: html, - text: text, - unsubMailTo: unsubscribe.mailTo, - unsubHttpUrl: unsubscribe.httpUrl, - }, - }) + return res.send('ok') } + + console.log('non-newsletter email from:', from, recipientAddress) + + if (isConfirmationEmail(from)) { + console.log('handleConfirmation', from) + await handleConfirmation(recipientAddress, subject) + return res.send('ok') + } + + if (pdfAttachment) { + console.log('handle PDF attachment', from, recipientAddress) + await handlePdfAttachment( + recipientAddress, + pdfAttachmentName, + pdfAttachment, + subject + ) + return res.send('ok') + } + + const unsubscribe = parseUnsubscribe(unSubHeader) + // queue non-newsletter emails + await pubsub.topic(NON_NEWSLETTER_EMAIL_TOPIC).publishMessage({ + json: { + from: from, + to: recipientAddress, + subject: subject, + html: html, + text: text, + unsubMailTo: unsubscribe.mailTo, + unsubHttpUrl: unsubscribe.httpUrl, + }, + }) + + res.send('ok') } catch (error) { console.log( 'error handling emails, will forward.', @@ -141,8 +148,6 @@ export const inboundEmailHandler = Sentry.GCPFunction.wrapHttpFunction( }, }) } - - res.send('ok') } catch (e) { console.log(e) res.send(e)