Merge pull request #82 from omnivore-app/feature/bloomberg-newsletters
Add support for Bloomberg newsletter
This commit is contained in:
34
packages/api/src/utils/bloomberg-handler.ts
Normal file
34
packages/api/src/utils/bloomberg-handler.ts
Normal file
@ -0,0 +1,34 @@
|
||||
import { DOMWindow } from 'jsdom'
|
||||
|
||||
export class BloombergHandler {
|
||||
name = 'bloomberg'
|
||||
|
||||
shouldPrehandle = (url: URL, dom: DOMWindow): boolean => {
|
||||
const host = this.name + '.com'
|
||||
// check if url ends with bloomberg.com
|
||||
return (
|
||||
url.hostname.endsWith(host) ||
|
||||
dom.document
|
||||
.querySelector('.logo-image')
|
||||
?.getAttribute('alt')
|
||||
?.toLowerCase() === this.name
|
||||
)
|
||||
}
|
||||
|
||||
prehandle = (_url: URL, dom: DOMWindow): Promise<DOMWindow> => {
|
||||
const body = dom.document.querySelector('.wrapper')
|
||||
|
||||
// this removes header
|
||||
body?.querySelector('.sailthru-variables')?.remove()
|
||||
body?.querySelector('.preview-text')?.remove()
|
||||
body?.querySelector('.logo-wrapper')?.remove()
|
||||
body?.querySelector('.by-the-number-wrapper')?.remove()
|
||||
// this removes footer
|
||||
body?.querySelector('.quote-box-wrapper')?.remove()
|
||||
body?.querySelector('.header-wrapper')?.remove()
|
||||
body?.querySelector('.component-wrapper')?.remove()
|
||||
body?.querySelector('.footer')?.remove()
|
||||
|
||||
return Promise.resolve(dom)
|
||||
}
|
||||
}
|
||||
@ -11,6 +11,7 @@ import axios from 'axios'
|
||||
import { WikipediaHandler } from './wikipedia-handler'
|
||||
import { SubstackHandler } from './substack-handler'
|
||||
import { AxiosHandler } from './axios-handler'
|
||||
import { BloombergHandler } from './bloomberg-handler'
|
||||
|
||||
const logger = buildLogger('utils.parse')
|
||||
|
||||
@ -45,6 +46,7 @@ const HANDLERS = [
|
||||
new WikipediaHandler(),
|
||||
new SubstackHandler(),
|
||||
new AxiosHandler(),
|
||||
new BloombergHandler(),
|
||||
]
|
||||
|
||||
/** Hook that prevents DOMPurify from removing youtube iframes */
|
||||
|
||||
@ -6,9 +6,12 @@ const EMAIL_CONFIRMATION_CODE_RECEIVED_TOPIC = 'emailConfirmationCodeReceived'
|
||||
const EMAIL_FORWARDING_SENDER_ADDRESSES = [
|
||||
'Gmail Team <forwarding-noreply@google.com>',
|
||||
]
|
||||
const NEWSLETTER_SENDER_REGEX = '<.+@axios.com>'
|
||||
const CONFIRMATION_CODE_PATTERN = '^\\(#\\d+\\)'
|
||||
const AXIOS_URL_PATTERN = 'View in browser at <.+>'
|
||||
const NEWSLETTER_SENDER_REGEX =
|
||||
/<.+@((axios.com)|(mail.bloombergbusiness.com))>/
|
||||
const CONFIRMATION_CODE_PATTERN = /^\\(#\\d+\\)/
|
||||
const AXIOS_URL_PATTERN = /View in browser at <a.*>(.*)<\/a>/
|
||||
const BLOOMBERG_URL_PATTERN =
|
||||
/<a class="view-in-browser__url" href=["']([^"']*)["']/
|
||||
|
||||
export const handleConfirmation = async (email: string, subject: string) => {
|
||||
console.log('confirmation email')
|
||||
@ -114,11 +117,15 @@ export const getNewsletterUrl = (
|
||||
}
|
||||
|
||||
// axios newsletter url from html
|
||||
const re = new RegExp(AXIOS_URL_PATTERN)
|
||||
const matches = html.match(re)
|
||||
let matches = html.match(AXIOS_URL_PATTERN)
|
||||
if (matches) {
|
||||
const match = matches[0]
|
||||
return match.slice(match.indexOf('>') + 1, match.lastIndexOf('<'))
|
||||
return matches[1]
|
||||
}
|
||||
|
||||
// bloomberg newsletter url from html
|
||||
matches = html.match(BLOOMBERG_URL_PATTERN)
|
||||
if (matches) {
|
||||
return matches[1]
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
@ -28,6 +28,11 @@ describe('Newsletter email test', () => {
|
||||
|
||||
expect(isNewsletter('', from)).to.be.true
|
||||
})
|
||||
|
||||
it('should return true when email is from bloomberg', () => {
|
||||
const from = 'From: Bloomberg <noreply@mail.bloombergbusiness.com>'
|
||||
expect(isNewsletter('', from)).to.be.true
|
||||
})
|
||||
})
|
||||
|
||||
describe('#getNewsletterUrl()', () => {
|
||||
@ -41,10 +46,23 @@ describe('Newsletter email test', () => {
|
||||
|
||||
it('returns url when email is from Axios', () => {
|
||||
const rawUrl = ''
|
||||
const html = `View in browser at <a>https://axios.com/blog/2019/02/28/the-best-way-to-build-a-web-app</a>`
|
||||
const html = `View in browser at <a>https://axios.com/blog/the-best-way-to-build-a-web-app</a>`
|
||||
|
||||
expect(getNewsletterUrl(rawUrl, html)).to.equal(
|
||||
'https://axios.com/blog/2019/02/28/the-best-way-to-build-a-web-app'
|
||||
'https://axios.com/blog/the-best-way-to-build-a-web-app'
|
||||
)
|
||||
})
|
||||
|
||||
it('returns url when email is from Bloomberg', () => {
|
||||
const rawUrl = ''
|
||||
const html = `
|
||||
<a class="view-in-browser__url" href="https://www.bloomberg.com/news/google-is-now-a-partner">
|
||||
View in browser
|
||||
</a>
|
||||
`
|
||||
|
||||
expect(getNewsletterUrl(rawUrl, html)).to.equal(
|
||||
'https://www.bloomberg.com/news/google-is-now-a-partner'
|
||||
)
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user