@ -15,17 +15,12 @@ const { DateTime } = require('luxon');
|
||||
const os = require('os');
|
||||
const { Storage } = require('@google-cloud/storage');
|
||||
const { parseHTML } = require('linkedom');
|
||||
const puppeteer = require('puppeteer-core');
|
||||
const { preHandleContent } = require("@omnivore/content-handler");
|
||||
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
|
||||
// Add stealth plugin to hide puppeteer usage
|
||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||
puppeteer.use(StealthPlugin());
|
||||
|
||||
// Add adblocker plugin to block all ads and trackers (saves bandwidth)
|
||||
const AdblockerPlugin = require('puppeteer-extra-plugin-adblocker')
|
||||
puppeteer.use(AdblockerPlugin({ blockTrackers: true }));
|
||||
// const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||
// puppeteer.use(StealthPlugin());
|
||||
|
||||
const storage = new Storage();
|
||||
const ALLOWED_ORIGINS = process.env.ALLOWED_ORIGINS ? process.env.ALLOWED_ORIGINS.split(',') : [];
|
||||
@ -554,7 +549,7 @@ async function retrievePage(url) {
|
||||
});
|
||||
|
||||
try {
|
||||
const response = await page.goto(url, { timeout: 30 * 1000, waitUntil: ['networkidle2'] });
|
||||
const response = await page.goto(url, { timeout: 8 * 1000, waitUntil: ['networkidle2'] });
|
||||
const finalUrl = response.url();
|
||||
const contentType = response.headers()['content-type'];
|
||||
|
||||
|
||||
@ -12,9 +12,6 @@
|
||||
"linkedom": "^0.14.9",
|
||||
"luxon": "^2.3.1",
|
||||
"puppeteer-core": "^16.1.0",
|
||||
"puppeteer-extra": "^3.3.4",
|
||||
"puppeteer-extra-plugin-adblocker": "^2.13.5",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.1",
|
||||
"underscore": "^1.13.4",
|
||||
"winston": "^3.3.3"
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user