diff --git a/packages/api/src/utils/bloomberg-handler.ts b/packages/api/src/utils/bloomberg-handler.ts new file mode 100644 index 000000000..2fbae0ef5 --- /dev/null +++ b/packages/api/src/utils/bloomberg-handler.ts @@ -0,0 +1,34 @@ +import { DOMWindow } from 'jsdom' + +export class BloombergHandler { + name = 'bloomberg' + + shouldPrehandle = (url: URL, dom: DOMWindow): boolean => { + const host = this.name + '.com' + // check if url ends with bloomberg.com + return ( + url.hostname.endsWith(host) || + dom.document + .querySelector('.logo-image') + ?.getAttribute('alt') + ?.toLowerCase() === this.name + ) + } + + prehandle = (_url: URL, dom: DOMWindow): Promise => { + const body = dom.document.querySelector('.wrapper') + + // this removes header + body?.querySelector('.sailthru-variables')?.remove() + body?.querySelector('.preview-text')?.remove() + body?.querySelector('.logo-wrapper')?.remove() + body?.querySelector('.by-the-number-wrapper')?.remove() + // this removes footer + body?.querySelector('.quote-box-wrapper')?.remove() + body?.querySelector('.header-wrapper')?.remove() + body?.querySelector('.component-wrapper')?.remove() + body?.querySelector('.footer')?.remove() + + return Promise.resolve(dom) + } +} diff --git a/packages/api/src/utils/parser.ts b/packages/api/src/utils/parser.ts index 88160a413..de8ea06e6 100644 --- a/packages/api/src/utils/parser.ts +++ b/packages/api/src/utils/parser.ts @@ -11,6 +11,7 @@ import axios from 'axios' import { WikipediaHandler } from './wikipedia-handler' import { SubstackHandler } from './substack-handler' import { AxiosHandler } from './axios-handler' +import { BloombergHandler } from './bloomberg-handler' const logger = buildLogger('utils.parse') @@ -45,6 +46,7 @@ const HANDLERS = [ new WikipediaHandler(), new SubstackHandler(), new AxiosHandler(), + new BloombergHandler(), ] /** Hook that prevents DOMPurify from removing youtube iframes */ diff --git a/packages/inbound-email-handler/src/newsletter.ts b/packages/inbound-email-handler/src/newsletter.ts index b50354cf6..2c384c485 100644 --- a/packages/inbound-email-handler/src/newsletter.ts +++ b/packages/inbound-email-handler/src/newsletter.ts @@ -6,9 +6,12 @@ const EMAIL_CONFIRMATION_CODE_RECEIVED_TOPIC = 'emailConfirmationCodeReceived' const EMAIL_FORWARDING_SENDER_ADDRESSES = [ 'Gmail Team ', ] -const NEWSLETTER_SENDER_REGEX = '<.+@axios.com>' -const CONFIRMATION_CODE_PATTERN = '^\\(#\\d+\\)' -const AXIOS_URL_PATTERN = 'View in browser at <.+>' +const NEWSLETTER_SENDER_REGEX = + /<.+@((axios.com)|(mail.bloombergbusiness.com))>/ +const CONFIRMATION_CODE_PATTERN = /^\\(#\\d+\\)/ +const AXIOS_URL_PATTERN = /View in browser at (.*)<\/a>/ +const BLOOMBERG_URL_PATTERN = + / { console.log('confirmation email') @@ -114,11 +117,15 @@ export const getNewsletterUrl = ( } // axios newsletter url from html - const re = new RegExp(AXIOS_URL_PATTERN) - const matches = html.match(re) + let matches = html.match(AXIOS_URL_PATTERN) if (matches) { - const match = matches[0] - return match.slice(match.indexOf('>') + 1, match.lastIndexOf('<')) + return matches[1] + } + + // bloomberg newsletter url from html + matches = html.match(BLOOMBERG_URL_PATTERN) + if (matches) { + return matches[1] } return undefined } diff --git a/packages/inbound-email-handler/test/newsletter.test.ts b/packages/inbound-email-handler/test/newsletter.test.ts index 920262792..4fa65485b 100644 --- a/packages/inbound-email-handler/test/newsletter.test.ts +++ b/packages/inbound-email-handler/test/newsletter.test.ts @@ -28,6 +28,11 @@ describe('Newsletter email test', () => { expect(isNewsletter('', from)).to.be.true }) + + it('should return true when email is from bloomberg', () => { + const from = 'From: Bloomberg ' + expect(isNewsletter('', from)).to.be.true + }) }) describe('#getNewsletterUrl()', () => { @@ -41,10 +46,23 @@ describe('Newsletter email test', () => { it('returns url when email is from Axios', () => { const rawUrl = '' - const html = `View in browser at https://axios.com/blog/2019/02/28/the-best-way-to-build-a-web-app` + const html = `View in browser at https://axios.com/blog/the-best-way-to-build-a-web-app` expect(getNewsletterUrl(rawUrl, html)).to.equal( - 'https://axios.com/blog/2019/02/28/the-best-way-to-build-a-web-app' + 'https://axios.com/blog/the-best-way-to-build-a-web-app' + ) + }) + + it('returns url when email is from Bloomberg', () => { + const rawUrl = '' + const html = ` + + View in browser + + ` + + expect(getNewsletterUrl(rawUrl, html)).to.equal( + 'https://www.bloomberg.com/news/google-is-now-a-partner' ) }) })