Add function isProbablyArticle to test if a forwarded email contains an article to save

This commit is contained in:
Hongbo Wu
2022-07-26 18:37:04 +08:00
parent dec2027fc8
commit d184ca8d04
3 changed files with 78 additions and 34 deletions

View File

@ -15,6 +15,9 @@ import { GolangHandler } from './golang-handler'
import * as hljs from 'highlightjs'
import { decode } from 'html-entities'
import { parseHTML } from 'linkedom'
import { getRepository } from '../entity/utils'
import { User } from '../entity/user'
import { ILike } from 'typeorm'
const logger = buildLogger('utils.parse')
@ -37,6 +40,7 @@ const DOM_PURIFY_CONFIG = {
'data-feature',
],
}
const ARTICLE_PREFIX = 'omnivore:'
interface ContentHandler {
shouldPrehandle: (url: URL, dom: Document) => boolean
@ -545,3 +549,13 @@ export const findNewsletterUrl = async (
return undefined
}
export const isProbablyArticle = async (
email: string,
subject: string
): Promise<boolean> => {
const user = await getRepository(User).findOneBy({
email: ILike(email),
})
return !!user || subject.includes(ARTICLE_PREFIX)
}

View File

@ -5,12 +5,15 @@ import 'chai/register-should'
import fs from 'fs'
import {
findNewsletterUrl,
isProbablyArticle,
isProbablyNewsletter,
parsePageMetadata,
parsePreparedContent,
} from '../../src/utils/parser'
import nock from 'nock'
import chaiAsPromised from 'chai-as-promised'
import { User } from '../../src/entity/user'
import { createTestUser, deleteTestUser } from '../db'
chai.use(chaiAsPromised)
@ -135,3 +138,25 @@ describe('parsePreparedContent', async () => {
)
})
})
describe('isProbablyArticle', () => {
let user: User
before(async () => {
user = await createTestUser('fakeUser')
})
after(async () => {
await deleteTestUser(user.name)
})
it('returns true when email is signed up with us', async () => {
const email = user.email
expect(await isProbablyArticle(email, 'test subject')).to.be.true
})
it('returns true when subject has omnivore: prefix', async () => {
const subject = 'omnivore: test subject'
expect(await isProbablyArticle('test-email', subject)).to.be.true
})
})

View File

@ -70,10 +70,10 @@ export const inboundEmailHandler = Sentry.GCPFunction.wrapHttpFunction(
const html = parsed.html
const text = parsed.text
const forwardedAddress = headers['x-forwarded-to']
const recipientAddress = forwardedAddress?.toString() || parsed.to
const forwardedAddress = headers['x-forwarded-to']?.toString()
const recipientAddress = forwardedAddress || parsed.to
const postHeader = headers['list-post']?.toString()
const unSubHeader = headers['list-unsubscribe'].toString()
const unSubHeader = headers['list-unsubscribe']?.toString()
try {
// check if it is a forwarding confirmation email or newsletter
@ -93,36 +93,43 @@ export const inboundEmailHandler = Sentry.GCPFunction.wrapHttpFunction(
from,
unSubHeader
)
} else {
console.log('non-newsletter email from:', from, recipientAddress)
if (isConfirmationEmail(from)) {
console.log('handleConfirmation', from)
await handleConfirmation(recipientAddress, subject)
} else if (pdfAttachment) {
console.log('handle PDF attachment', from, recipientAddress)
await handlePdfAttachment(
recipientAddress,
pdfAttachmentName,
pdfAttachment,
subject
)
}
const unsubscribe = parseUnsubscribe(unSubHeader)
// queue non-newsletter emails
await pubsub.topic(NON_NEWSLETTER_EMAIL_TOPIC).publishMessage({
json: {
from: from,
to: recipientAddress,
subject: subject,
html: html,
text: text,
unsubMailTo: unsubscribe.mailTo,
unsubHttpUrl: unsubscribe.httpUrl,
},
})
return res.send('ok')
}
console.log('non-newsletter email from:', from, recipientAddress)
if (isConfirmationEmail(from)) {
console.log('handleConfirmation', from)
await handleConfirmation(recipientAddress, subject)
return res.send('ok')
}
if (pdfAttachment) {
console.log('handle PDF attachment', from, recipientAddress)
await handlePdfAttachment(
recipientAddress,
pdfAttachmentName,
pdfAttachment,
subject
)
return res.send('ok')
}
const unsubscribe = parseUnsubscribe(unSubHeader)
// queue non-newsletter emails
await pubsub.topic(NON_NEWSLETTER_EMAIL_TOPIC).publishMessage({
json: {
from: from,
to: recipientAddress,
subject: subject,
html: html,
text: text,
unsubMailTo: unsubscribe.mailTo,
unsubHttpUrl: unsubscribe.httpUrl,
},
})
res.send('ok')
} catch (error) {
console.log(
'error handling emails, will forward.',
@ -141,8 +148,6 @@ export const inboundEmailHandler = Sentry.GCPFunction.wrapHttpFunction(
},
})
}
res.send('ok')
} catch (e) {
console.log(e)
res.send(e)