diff --git a/packages/content-fetch/src/request_handler.ts b/packages/content-fetch/src/request_handler.ts index bbb98f8d2..81fc04de2 100644 --- a/packages/content-fetch/src/request_handler.ts +++ b/packages/content-fetch/src/request_handler.ts @@ -62,7 +62,10 @@ const storage = process.env.GCS_UPLOAD_SA_KEY_FILE_PATH : new Storage() const bucketName = process.env.GCS_UPLOAD_BUCKET || 'omnivore-files' -const NO_CACHE_URLS = ['https://deviceandbrowserinfo.com/are_you_a_bot'] +const NO_CACHE_URLS = [ + 'https://deviceandbrowserinfo.com/are_you_a_bot', + 'https://deviceandbrowserinfo.com/info_device', +] const uploadToBucket = async (filePath: string, data: string) => { await storage diff --git a/packages/puppeteer-parse/src/browser.ts b/packages/puppeteer-parse/src/browser.ts index b121a5dfa..ab6568759 100644 --- a/packages/puppeteer-parse/src/browser.ts +++ b/packages/puppeteer-parse/src/browser.ts @@ -34,7 +34,6 @@ export const getBrowser = async (): Promise => { '--disk-cache-size=33554432', '--enable-features=SharedArrayBuffer', '--hide-scrollbars', - '--disable-gpu', '--mute-audio', '--no-default-browser-check', '--no-pings', @@ -56,7 +55,7 @@ export const getBrowser = async (): Promise => { width: 1920, }, executablePath: process.env.CHROMIUM_PATH, - headless: !!process.env.LAUNCH_HEADLESS, + headless: process.env.LAUNCH_HEADLESS === 'true', timeout: 10_000, // 10 seconds dumpio: true, // show console logs in the terminal })) as Browser diff --git a/packages/puppeteer-parse/src/index.ts b/packages/puppeteer-parse/src/index.ts index b7b2af23e..812502409 100644 --- a/packages/puppeteer-parse/src/index.ts +++ b/packages/puppeteer-parse/src/index.ts @@ -242,13 +242,14 @@ async function retrievePage( // constraints for the generated fingerprint fingerprintOptions: { devices: ['desktop'], - operatingSystems: ['windows'], + operatingSystems: ['linux'], browsers: ['chrome'], locales: [locale || 'en-US'], screen: { maxWidth: 1920, maxHeight: 1080, }, + mockWebRTC: true, }, })) as Page @@ -259,12 +260,11 @@ async function retrievePage( if (!enableJavascriptForUrl(url)) { await page.setJavaScriptEnabled(false) } - // await page.setUserAgent(userAgentForUrl(url)) - // // set locale for the page - // if (locale) { - // await page.setExtraHTTPHeaders({ 'Accept-Language': locale }) - // } + // set locale for the page + if (locale) { + await page.setExtraHTTPHeaders({ 'Accept-Language': locale }) + } // set timezone for the page if (timezone) { @@ -364,7 +364,7 @@ async function retrievePage( const response = await page.goto(url, { timeout: 30 * 1000, - waitUntil: ['networkidle2'], + waitUntil: ['networkidle0'], }) if (!response) { throw new Error('No response from page')