From 2447bd658ec359922901697f3edbe737c21d5a3f Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Fri, 15 Jul 2022 10:58:58 -0700 Subject: [PATCH 1/4] Use chrome-aws-lambda in GCF --- packages/puppeteer-parse/index.js | 68 +++++++++++++++++-------------- 1 file changed, 38 insertions(+), 30 deletions(-) diff --git a/packages/puppeteer-parse/index.js b/packages/puppeteer-parse/index.js index b71453387..50ab3edda 100644 --- a/packages/puppeteer-parse/index.js +++ b/packages/puppeteer-parse/index.js @@ -23,7 +23,8 @@ const { pdfHandler } = require('./pdf-handler'); const { mediumHandler } = require('./medium-handler'); const { derstandardHandler } = require('./derstandard-handler'); const { imageHandler } = require('./image-handler'); -const puppeteer = require('puppeteer-core'); +// const puppeteer = require('puppeteer-core'); +const chromium = require('chrome-aws-lambda'); // Add stealth plugin to hide puppeteer usage // const StealthPlugin = require('puppeteer-extra-plugin-stealth'); @@ -123,36 +124,43 @@ const userAgentForUrl = (url) => { // launch Puppeteer const getBrowserPromise = (async () => { - return puppeteer.launch({ - args: [ - '--allow-running-insecure-content', - '--autoplay-policy=user-gesture-required', - '--disable-component-update', - '--disable-domain-reliability', - '--disable-features=AudioServiceOutOfProcess,IsolateOrigins,site-per-process', - '--disable-print-preview', - '--disable-setuid-sandbox', - '--disable-site-isolation-trials', - '--disable-speech-api', - '--disable-web-security', - '--disk-cache-size=33554432', - '--enable-features=SharedArrayBuffer', - '--hide-scrollbars', - '--ignore-gpu-blocklist', - '--in-process-gpu', - '--mute-audio', - '--no-default-browser-check', - '--no-pings', - '--no-sandbox', - '--no-zygote', - '--use-gl=swiftshader', - '--window-size=1920,1080', - ].filter((item) => !!item), - defaultViewport: { height: 1080, width: 1920 }, - executablePath: process.env.CHROMIUM_PATH, - headless: !!process.env.LAUNCH_HEADLESS, - timeout: 0, + return chromium.puppeteer.launch({ + args: chromium.args, + defaultViewport: chromium.defaultViewport, + executablePath: await chromium.executablePath, + headless: chromium.headless, + ignoreHTTPSErrors: true, }); + // return puppeteer.launch({ + // args: [ + // '--allow-running-insecure-content', + // '--autoplay-policy=user-gesture-required', + // '--disable-component-update', + // '--disable-domain-reliability', + // '--disable-features=AudioServiceOutOfProcess,IsolateOrigins,site-per-process', + // '--disable-print-preview', + // '--disable-setuid-sandbox', + // '--disable-site-isolation-trials', + // '--disable-speech-api', + // '--disable-web-security', + // '--disk-cache-size=33554432', + // '--enable-features=SharedArrayBuffer', + // '--hide-scrollbars', + // '--ignore-gpu-blocklist', + // '--in-process-gpu', + // '--mute-audio', + // '--no-default-browser-check', + // '--no-pings', + // '--no-sandbox', + // '--no-zygote', + // '--use-gl=swiftshader', + // '--window-size=1920,1080', + // ].filter((item) => !!item), + // defaultViewport: { height: 1080, width: 1920 }, + // executablePath: process.env.CHROMIUM_PATH, + // headless: !!process.env.LAUNCH_HEADLESS, + // timeout: 0, + // }); })(); let logRecord, functionStartTime; From 38b2b0c27135d2301152af1e35777b3142a62a3b Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Fri, 15 Jul 2022 11:01:15 -0700 Subject: [PATCH 2/4] Remove puppeteer-core package in GCF --- packages/puppeteer-parse/package.json | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/puppeteer-parse/package.json b/packages/puppeteer-parse/package.json index 273792ccc..241a18a62 100644 --- a/packages/puppeteer-parse/package.json +++ b/packages/puppeteer-parse/package.json @@ -13,7 +13,6 @@ "jsonwebtoken": "^8.5.1", "linkedom": "^0.14.9", "luxon": "^2.3.1", - "puppeteer-core": "^15.3.2", "winston": "^3.3.3" }, "devDependencies": { From 33f35092ba146b57e1399100824a6c4087b22352 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Fri, 15 Jul 2022 11:19:44 -0700 Subject: [PATCH 3/4] Add puppeteer-core package --- packages/puppeteer-parse/package.json | 1 + yarn.lock | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/packages/puppeteer-parse/package.json b/packages/puppeteer-parse/package.json index 241a18a62..45f7ceb9e 100644 --- a/packages/puppeteer-parse/package.json +++ b/packages/puppeteer-parse/package.json @@ -13,6 +13,7 @@ "jsonwebtoken": "^8.5.1", "linkedom": "^0.14.9", "luxon": "^2.3.1", + "puppeteer-core": "^15.4.0", "winston": "^3.3.3" }, "devDependencies": { diff --git a/yarn.lock b/yarn.lock index 681c3cec1..15ef4e82b 100644 --- a/yarn.lock +++ b/yarn.lock @@ -20890,6 +20890,24 @@ puppeteer-core@^15.3.2: unbzip2-stream "1.4.3" ws "8.8.0" +puppeteer-core@^15.4.0: + version "15.4.0" + resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-15.4.0.tgz#37536c973ea8920181effde47c22c67c36d1db21" + integrity sha512-nUu0aqeOsYnKJwKlHNNCU5cqVsJ+p1EPDzNRITcEV3n1Mz06Ev2DNsb7CTtGd6Sx2rjoseD6zZzEU7XZWocYwQ== + dependencies: + cross-fetch "3.1.5" + debug "4.3.4" + devtools-protocol "0.0.1011705" + extract-zip "2.0.1" + https-proxy-agent "5.0.1" + pkg-dir "4.2.0" + progress "2.0.3" + proxy-from-env "1.1.0" + rimraf "3.0.2" + tar-fs "2.1.1" + unbzip2-stream "1.4.3" + ws "8.8.0" + puppeteer@^10.1.0: version "10.4.0" resolved "https://registry.yarnpkg.com/puppeteer/-/puppeteer-10.4.0.tgz#a6465ff97fda0576c4ac29601406f67e6fea3dc7" From 2660262c69e0253d3a0e6c17eb13f1290458ee06 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Fri, 15 Jul 2022 11:43:55 -0700 Subject: [PATCH 4/4] Use puppeteer-core --- packages/puppeteer-parse/index.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/puppeteer-parse/index.js b/packages/puppeteer-parse/index.js index 50ab3edda..695b5a1aa 100644 --- a/packages/puppeteer-parse/index.js +++ b/packages/puppeteer-parse/index.js @@ -23,8 +23,9 @@ const { pdfHandler } = require('./pdf-handler'); const { mediumHandler } = require('./medium-handler'); const { derstandardHandler } = require('./derstandard-handler'); const { imageHandler } = require('./image-handler'); -// const puppeteer = require('puppeteer-core'); + const chromium = require('chrome-aws-lambda'); +const puppeteer = require('puppeteer-core'); // Add stealth plugin to hide puppeteer usage // const StealthPlugin = require('puppeteer-extra-plugin-stealth'); @@ -124,7 +125,7 @@ const userAgentForUrl = (url) => { // launch Puppeteer const getBrowserPromise = (async () => { - return chromium.puppeteer.launch({ + return puppeteer.launch({ args: chromium.args, defaultViewport: chromium.defaultViewport, executablePath: await chromium.executablePath,