Files
omnivore/packages/content-handler/src/bloomberg-handler.ts
2022-09-30 12:51:22 +08:00

42 lines
1.2 KiB
TypeScript

import axios from 'axios'
import { parseHTML } from 'linkedom'
import { ContentHandler, PreHandleResult } from './content-handler'
export class BloombergHandler extends ContentHandler {
constructor() {
super()
this.name = 'Bloomberg'
}
shouldPreHandle(url: string, dom?: Document): boolean {
const BLOOMBERG_URL_MATCH =
/https?:\/\/(www\.)?bloomberg.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)/
return BLOOMBERG_URL_MATCH.test(url.toString())
}
async preHandle(url: string, document?: Document): Promise<PreHandleResult> {
console.log('prehandling bloomberg url', url)
try {
const response = await axios.get('https://app.scrapingbee.com/api/v1', {
params: {
api_key: process.env.SCRAPINGBEE_API_KEY,
url: url,
return_page_source: true,
block_ads: true,
block_resources: false,
},
})
const dom = parseHTML(response.data).document
return {
title: dom.title,
content: dom.querySelector('body')?.innerHTML,
url: url,
}
} catch (error) {
console.error('error prehandling bloomberg url', error)
throw error
}
}
}