Merge pull request #82 from omnivore-app/feature/bloomberg-newsletters

Add support for Bloomberg newsletter
This commit is contained in:
Jackson Harper
2022-02-18 01:08:46 +08:00
committed by GitHub
4 changed files with 70 additions and 9 deletions

View File

@ -0,0 +1,34 @@
import { DOMWindow } from 'jsdom'
export class BloombergHandler {
name = 'bloomberg'
shouldPrehandle = (url: URL, dom: DOMWindow): boolean => {
const host = this.name + '.com'
// check if url ends with bloomberg.com
return (
url.hostname.endsWith(host) ||
dom.document
.querySelector('.logo-image')
?.getAttribute('alt')
?.toLowerCase() === this.name
)
}
prehandle = (_url: URL, dom: DOMWindow): Promise<DOMWindow> => {
const body = dom.document.querySelector('.wrapper')
// this removes header
body?.querySelector('.sailthru-variables')?.remove()
body?.querySelector('.preview-text')?.remove()
body?.querySelector('.logo-wrapper')?.remove()
body?.querySelector('.by-the-number-wrapper')?.remove()
// this removes footer
body?.querySelector('.quote-box-wrapper')?.remove()
body?.querySelector('.header-wrapper')?.remove()
body?.querySelector('.component-wrapper')?.remove()
body?.querySelector('.footer')?.remove()
return Promise.resolve(dom)
}
}

View File

@ -11,6 +11,7 @@ import axios from 'axios'
import { WikipediaHandler } from './wikipedia-handler'
import { SubstackHandler } from './substack-handler'
import { AxiosHandler } from './axios-handler'
import { BloombergHandler } from './bloomberg-handler'
const logger = buildLogger('utils.parse')
@ -45,6 +46,7 @@ const HANDLERS = [
new WikipediaHandler(),
new SubstackHandler(),
new AxiosHandler(),
new BloombergHandler(),
]
/** Hook that prevents DOMPurify from removing youtube iframes */

View File

@ -6,9 +6,12 @@ const EMAIL_CONFIRMATION_CODE_RECEIVED_TOPIC = 'emailConfirmationCodeReceived'
const EMAIL_FORWARDING_SENDER_ADDRESSES = [
'Gmail Team <forwarding-noreply@google.com>',
]
const NEWSLETTER_SENDER_REGEX = '<.+@axios.com>'
const CONFIRMATION_CODE_PATTERN = '^\\(#\\d+\\)'
const AXIOS_URL_PATTERN = 'View in browser at <.+>'
const NEWSLETTER_SENDER_REGEX =
/<.+@((axios.com)|(mail.bloombergbusiness.com))>/
const CONFIRMATION_CODE_PATTERN = /^\\(#\\d+\\)/
const AXIOS_URL_PATTERN = /View in browser at <a.*>(.*)<\/a>/
const BLOOMBERG_URL_PATTERN =
/<a class="view-in-browser__url" href=["']([^"']*)["']/
export const handleConfirmation = async (email: string, subject: string) => {
console.log('confirmation email')
@ -114,11 +117,15 @@ export const getNewsletterUrl = (
}
// axios newsletter url from html
const re = new RegExp(AXIOS_URL_PATTERN)
const matches = html.match(re)
let matches = html.match(AXIOS_URL_PATTERN)
if (matches) {
const match = matches[0]
return match.slice(match.indexOf('>') + 1, match.lastIndexOf('<'))
return matches[1]
}
// bloomberg newsletter url from html
matches = html.match(BLOOMBERG_URL_PATTERN)
if (matches) {
return matches[1]
}
return undefined
}

View File

@ -28,6 +28,11 @@ describe('Newsletter email test', () => {
expect(isNewsletter('', from)).to.be.true
})
it('should return true when email is from bloomberg', () => {
const from = 'From: Bloomberg <noreply@mail.bloombergbusiness.com>'
expect(isNewsletter('', from)).to.be.true
})
})
describe('#getNewsletterUrl()', () => {
@ -41,10 +46,23 @@ describe('Newsletter email test', () => {
it('returns url when email is from Axios', () => {
const rawUrl = ''
const html = `View in browser at <a>https://axios.com/blog/2019/02/28/the-best-way-to-build-a-web-app</a>`
const html = `View in browser at <a>https://axios.com/blog/the-best-way-to-build-a-web-app</a>`
expect(getNewsletterUrl(rawUrl, html)).to.equal(
'https://axios.com/blog/2019/02/28/the-best-way-to-build-a-web-app'
'https://axios.com/blog/the-best-way-to-build-a-web-app'
)
})
it('returns url when email is from Bloomberg', () => {
const rawUrl = ''
const html = `
<a class="view-in-browser__url" href="https://www.bloomberg.com/news/google-is-now-a-partner">
View in browser
</a>
`
expect(getNewsletterUrl(rawUrl, html)).to.equal(
'https://www.bloomberg.com/news/google-is-now-a-partner'
)
})
})