Merge pull request #597 from omnivore-app/remove-chrome-aws-lambda
Optimize puppeteer and remove chrome-aws-lambda dependencies
This commit is contained in:
@ -9,11 +9,8 @@ const axios = require('axios');
|
||||
const { promisify } = require('util');
|
||||
const { DateTime } = require('luxon');
|
||||
const os = require('os');
|
||||
const jsdom = require("jsdom");
|
||||
const { Cipher } = require('crypto');
|
||||
const { JSDOM } = jsdom;
|
||||
|
||||
|
||||
const { parseHTML } = require('linkedom');
|
||||
|
||||
exports.appleNewsHandler = {
|
||||
|
||||
@ -30,10 +27,10 @@ exports.appleNewsHandler = {
|
||||
const response = await axios.get(url, { headers: { 'User-Agent': MOBILE_USER_AGENT } } );
|
||||
const data = response.data;
|
||||
|
||||
const dom = new JSDOM(data);
|
||||
const dom = parseHTML(data).document;
|
||||
|
||||
// make sure its a valid URL by wrapping in new URL
|
||||
const u = new URL(dom.window.document.querySelector('span.click-here').parentNode.href);
|
||||
const u = new URL(dom.querySelector('span.click-here').parentNode.href);
|
||||
return { url: u.href };
|
||||
}
|
||||
}
|
||||
|
||||
@ -6,8 +6,7 @@
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const os = require('os');
|
||||
const jsdom = require("jsdom");
|
||||
const { JSDOM } = jsdom;
|
||||
const { parseHTML } = require('linkedom');
|
||||
|
||||
exports.bloombergHandler = {
|
||||
|
||||
@ -30,8 +29,8 @@ exports.bloombergHandler = {
|
||||
'block_resources': false,
|
||||
}
|
||||
})
|
||||
const dom = new JSDOM(response.data);
|
||||
return { title: dom.window.document.title, content: dom.window.document.querySelector('body').innerHTML, url: url }
|
||||
const dom = parseHTML(response.data).document;
|
||||
return { title: dom.title, content: dom.querySelector('body').innerHTML, url: url }
|
||||
} catch (error) {
|
||||
console.error('error prehandling bloomberg url', error)
|
||||
throw error
|
||||
|
||||
@ -5,8 +5,7 @@
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const jsdom = require("jsdom");
|
||||
const { JSDOM } = jsdom;
|
||||
const { parseHTML } = require('linkedom');
|
||||
|
||||
exports.derstandardHandler = {
|
||||
shouldPrehandle: (url, env) => {
|
||||
@ -23,10 +22,14 @@ exports.derstandardHandler = {
|
||||
});
|
||||
const content = response.data;
|
||||
|
||||
const dom = new JSDOM(content)
|
||||
const titleElement = dom.window.document.querySelector('.article-title')
|
||||
titleElement?.remove()
|
||||
var title = undefined;
|
||||
const dom = parseHTML(content).document;
|
||||
const titleElement = dom.querySelector('.article-title')
|
||||
if (!titleElement) {
|
||||
title = titleElement.textContent
|
||||
titleElement.remove()
|
||||
}
|
||||
|
||||
return { content: dom.window.document.body.outerHTML, title: titleElement?.textContent };
|
||||
return { content: dom.body.outerHTML, title: title };
|
||||
}
|
||||
}
|
||||
|
||||
@ -5,7 +5,6 @@
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const Url = require('url');
|
||||
const chromium = require('chrome-aws-lambda');
|
||||
const axios = require('axios');
|
||||
const jwt = require('jsonwebtoken');
|
||||
const { promisify } = require('util');
|
||||
@ -125,10 +124,34 @@ const userAgentForUrl = (url) => {
|
||||
// launch Puppeteer
|
||||
const getBrowserPromise = (async () => {
|
||||
return puppeteer.launch({
|
||||
args: chromium.args,
|
||||
args: [
|
||||
'--allow-running-insecure-content',
|
||||
'--autoplay-policy=user-gesture-required',
|
||||
'--disable-component-update',
|
||||
'--disable-domain-reliability',
|
||||
'--disable-features=AudioServiceOutOfProcess,IsolateOrigins,site-per-process',
|
||||
'--disable-print-preview',
|
||||
'--disable-setuid-sandbox',
|
||||
'--disable-site-isolation-trials',
|
||||
'--disable-speech-api',
|
||||
'--disable-web-security',
|
||||
'--disk-cache-size=33554432',
|
||||
'--enable-features=SharedArrayBuffer',
|
||||
'--hide-scrollbars',
|
||||
'--ignore-gpu-blocklist',
|
||||
'--in-process-gpu',
|
||||
'--mute-audio',
|
||||
'--no-default-browser-check',
|
||||
'--no-pings',
|
||||
'--no-sandbox',
|
||||
'--no-zygote',
|
||||
'--use-gl=swiftshader',
|
||||
'--window-size=1920,1080', // https://source.chromium.org/search?q=lang:cpp+symbol:kWindowSize&ss=chromium
|
||||
process.env.LAUNCH_HEADLESS ? '--single-process' : '--start-maximized',
|
||||
],
|
||||
defaultViewport: { height: 1080, width: 1920 },
|
||||
executablePath: process.env.CHROMIUM_PATH || (await chromium.executablePath),
|
||||
headless: process.env.LAUNCH_HEADLESS ? true : chromium.headless,
|
||||
executablePath: process.env.CHROMIUM_PATH,
|
||||
headless: !!process.env.LAUNCH_HEADLESS,
|
||||
timeout: 0,
|
||||
userDataDir: '/tmp/puppeteer',
|
||||
});
|
||||
@ -642,7 +665,7 @@ async function retrievePage(url) {
|
||||
await page.setRequestInterception(true);
|
||||
let requestCount = 0;
|
||||
page.on('request', request => {
|
||||
if (request.resourceType() === 'font' || request.resourceType() === 'image') {
|
||||
if (['font', 'image', 'media'].includes(request.resourceType())) {
|
||||
request.abort();
|
||||
return;
|
||||
}
|
||||
@ -655,9 +678,9 @@ async function retrievePage(url) {
|
||||
request.url().toLowerCase().indexOf('mathjax') > -1
|
||||
) {
|
||||
request.abort();
|
||||
} else {
|
||||
request.continue();
|
||||
return
|
||||
}
|
||||
request.continue();
|
||||
});
|
||||
|
||||
|
||||
|
||||
@ -6,8 +6,6 @@
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const os = require('os');
|
||||
const jsdom = require("jsdom");
|
||||
const { JSDOM } = jsdom;
|
||||
|
||||
exports.mediumHandler = {
|
||||
|
||||
|
||||
@ -8,10 +8,9 @@
|
||||
"@google-cloud/storage": "^5.18.1",
|
||||
"@sentry/serverless": "^6.13.3",
|
||||
"axios": "^0.26.0",
|
||||
"chrome-aws-lambda": "^10.1.0",
|
||||
"dotenv": "^8.2.0",
|
||||
"jsdom": "^19.0.0",
|
||||
"jsonwebtoken": "^8.5.1",
|
||||
"linkedom": "^0.14.9",
|
||||
"luxon": "^2.3.1",
|
||||
"puppeteer-core": "^13.7.0",
|
||||
"puppeteer-extra": "^3.2.3",
|
||||
|
||||
Reference in New Issue
Block a user