Add puppeteer-extra pluggins

This commit is contained in:
Hongbo Wu
2022-11-16 10:37:14 +08:00
parent c8febe900f
commit 0261cafb7a
2 changed files with 12 additions and 4 deletions

View File

@ -15,12 +15,17 @@ const { DateTime } = require('luxon');
const os = require('os');
const { Storage } = require('@google-cloud/storage');
const { parseHTML } = require('linkedom');
const puppeteer = require('puppeteer-core');
const { preHandleContent } = require("@omnivore/content-handler");
const puppeteer = require('puppeteer-extra');
// Add stealth plugin to hide puppeteer usage
// const StealthPlugin = require('puppeteer-extra-plugin-stealth');
// puppeteer.use(StealthPlugin());
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
puppeteer.use(StealthPlugin());
// Add adblocker plugin to block all ads and trackers (saves bandwidth)
const AdblockerPlugin = require('puppeteer-extra-plugin-adblocker')
puppeteer.use(AdblockerPlugin({ blockTrackers: true }));
const storage = new Storage();
const ALLOWED_ORIGINS = process.env.ALLOWED_ORIGINS ? process.env.ALLOWED_ORIGINS.split(',') : [];
@ -549,7 +554,7 @@ async function retrievePage(url) {
});
try {
const response = await page.goto(url, { timeout: 8 * 1000, waitUntil: ['networkidle2'] });
const response = await page.goto(url, { timeout: 30 * 1000, waitUntil: ['networkidle2'] });
const finalUrl = response.url();
const contentType = response.headers()['content-type'];

View File

@ -12,6 +12,9 @@
"linkedom": "^0.14.9",
"luxon": "^2.3.1",
"puppeteer-core": "^16.1.0",
"puppeteer-extra": "^3.3.4",
"puppeteer-extra-plugin-adblocker": "^2.13.5",
"puppeteer-extra-plugin-stealth": "^2.11.1",
"underscore": "^1.13.4",
"winston": "^3.3.3"
},