From bd8178a464539bb1e08fcc7b3b3dc5baf98badf4 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 17 Feb 2022 13:19:58 +0800 Subject: [PATCH] add support for bloomberg newsletters --- packages/api/src/utils/bloomberg-handler.ts | 28 +++++++++++++++++++ .../inbound-email-handler/src/newsletter.ts | 16 +++++++++-- .../test/newsletter.test.ts | 5 ++++ 3 files changed, 46 insertions(+), 3 deletions(-) create mode 100644 packages/api/src/utils/bloomberg-handler.ts diff --git a/packages/api/src/utils/bloomberg-handler.ts b/packages/api/src/utils/bloomberg-handler.ts new file mode 100644 index 000000000..a58597114 --- /dev/null +++ b/packages/api/src/utils/bloomberg-handler.ts @@ -0,0 +1,28 @@ +import { DOMWindow } from 'jsdom' + +export class SubstackHandler { + name = 'bloomberg' + + shouldPrehandle = (url: URL, dom: DOMWindow): boolean => { + const host = this.name + '.com' + // check if url ends with bloomberg.com + return url.hostname.endsWith(host) + } + + prehandle = (url: URL, dom: DOMWindow): Promise => { + const body = dom.document.querySelector('.wrapper') + + // this removes header + body?.querySelector('.sailthru-variables')?.remove() + body?.querySelector('.preview-text')?.remove() + body?.querySelector('.logo-wrapper')?.remove() + body?.querySelector('.by-the-number-wrapper')?.remove() + // this removes footer + body?.querySelector('.quote-box-wrapper')?.remove() + body?.querySelector('.header-wrapper')?.remove() + body?.querySelector('.component-wrapper')?.remove() + body?.querySelector('.footer')?.remove() + + return Promise.resolve(dom) + } +} diff --git a/packages/inbound-email-handler/src/newsletter.ts b/packages/inbound-email-handler/src/newsletter.ts index b50354cf6..56a56a0be 100644 --- a/packages/inbound-email-handler/src/newsletter.ts +++ b/packages/inbound-email-handler/src/newsletter.ts @@ -6,9 +6,11 @@ const EMAIL_CONFIRMATION_CODE_RECEIVED_TOPIC = 'emailConfirmationCodeReceived' const EMAIL_FORWARDING_SENDER_ADDRESSES = [ 'Gmail Team ', ] -const NEWSLETTER_SENDER_REGEX = '<.+@axios.com>' +const NEWSLETTER_SENDER_REGEX = + '<.+@((axios.com)|(mail.bloombergbusiness.com))>' const CONFIRMATION_CODE_PATTERN = '^\\(#\\d+\\)' const AXIOS_URL_PATTERN = 'View in browser at <.+>' +const BLOOMBERG_URL_PATTERN = '' export const handleConfirmation = async (email: string, subject: string) => { console.log('confirmation email') @@ -114,11 +116,19 @@ export const getNewsletterUrl = ( } // axios newsletter url from html - const re = new RegExp(AXIOS_URL_PATTERN) - const matches = html.match(re) + let re = new RegExp(AXIOS_URL_PATTERN) + let matches = html.match(re) if (matches) { const match = matches[0] return match.slice(match.indexOf('>') + 1, match.lastIndexOf('<')) } + + // bloomberg newsletter url from html + re = new RegExp(BLOOMBERG_URL_PATTERN) + matches = html.match(re) + if (matches) { + const match = matches[0] + return match.slice(match.indexOf('href=') + 1, match.lastIndexOf('style')) + } return undefined } diff --git a/packages/inbound-email-handler/test/newsletter.test.ts b/packages/inbound-email-handler/test/newsletter.test.ts index 920262792..0640bea12 100644 --- a/packages/inbound-email-handler/test/newsletter.test.ts +++ b/packages/inbound-email-handler/test/newsletter.test.ts @@ -28,6 +28,11 @@ describe('Newsletter email test', () => { expect(isNewsletter('', from)).to.be.true }) + + it('should return true when email is from bloomberg', () => { + const from = 'From: Bloomberg ' + expect(isNewsletter('', from)).to.be.true + }) }) describe('#getNewsletterUrl()', () => {