rewrite puppeteer in typescript

2024-01-15 23:32:26 +08:00
parent 51e586ed3d
commit cd3402b98a
13 changed files with 678 additions and 330 deletions
--- a/packages/content-fetch/item.js
+++ b/packages/content-fetch/item.js
@ -0,0 +1,75 @@
+const { interfaces } = require('mocha');
+const { uploadPdf, sendSavePageMutation, sendCreateArticleMutation, sendImportStatusUpdate } = require('./api');
+
+interface Item {
+  url: string;
+  userId: string;
+  contentType: string;
+  articleSavingRequestId: string;
+  state: string;
+  labels: string[];
+  source: string;
+  folder: string;
+  rssFeedUrl: string;
+  savedAt: string;
+  publishedAt: string;
+  readabilityResult: string;
+}
+
+exports.saveItem = async (item: Item) => {
+  const { url, userId, contentType, articleSavingRequestId, state, labels, source, folder, rssFeedUrl, savedAt, publishedAt, readabilityResult } = item;
+  try {
+    if (contentType === 'application/pdf') {
+      const uploadFileId = await uploadPdf(url, userId, articleSavingRequestId);
+      const uploadedPdf = await sendCreateArticleMutation(userId, {
+        url: encodeURI(url),
+        articleSavingRequestId,
+        uploadFileId,
+        state,
+        labels,
+        source,
+        folder,
+        rssFeedUrl,
+        savedAt,
+        publishedAt,
+      });
+      if (!uploadedPdf) {
+        console.error('error while saving uploaded pdf', url);
+        return false;
+      }
+    } else {
+      const apiResponse = await sendSavePageMutation(userId, {
+        url,
+        clientRequestId: articleSavingRequestId,h
+        title,
+        originalContent: content,
+        parseResult: readabilityResult,
+        state,
+        labels,
+        rssFeedUrl,
+        savedAt,
+        publishedAt,
+        source,
+        folder,
+      });
+      if (!apiResponse) {
+        console.error('error while saving page', url);
+        return false;
+      } else if (apiResponse.error === 'UNAUTHORIZED') {
+        console.log('user is deleted, do not retry', userId);
+        return true;
+      } else {
+        importStatus = readabilityResult ? 'imported' : 'failed';
+      }
+    }
+  } catch (error) {
+    logRecord.error = error.message;
+  } finally {
+    // mark import failed on the last failed retry
+    const retryCount = req.headers['x-cloudtasks-taskretrycount'];
+    if (retryCount === MAX_RETRY_COUNT) {
+      console.log('max retry count reached');
+      importStatus = importStatus || 'failed';
+    }
+  }
+}
--- a/packages/content-handler/package.json
+++ b/packages/content-handler/package.json
@ -36,7 +36,7 @@
    "linkedom": "^0.14.16",
    "lodash": "^4.17.21",
    "luxon": "^3.0.4",
-    "puppeteer-core": "^19.1.1",
+    "puppeteer-core": "^20.9.0",
    "underscore": "^1.13.6",
    "uuid": "^9.0.0"
  },
--- a/packages/puppeteer-parse/.eslintignore
+++ b/packages/puppeteer-parse/.eslintignore
@ -0,0 +1,2 @@
+node_modules/
+build/
--- a/packages/puppeteer-parse/.eslintrc
+++ b/packages/puppeteer-parse/.eslintrc
@ -0,0 +1,14 @@
+{
+  "extends": "../../.eslintrc",
+  "parserOptions": {
+    "project": "tsconfig.json"
+  },
+  "rules": {
+    "@typescript-eslint/no-floating-promises": [
+      "error",
+      {
+        "ignoreIIFE": true
+      }
+    ]
+  }
+}
--- a/packages/puppeteer-parse/mocha-config.json
+++ b/packages/puppeteer-parse/mocha-config.json
@ -0,0 +1,5 @@
+{
+    "extension": ["ts"],
+    "spec": "test/**/*.test.ts",
+    "require": "test/babel-register.js"
+  }
--- a/packages/puppeteer-parse/package.json
+++ b/packages/puppeteer-parse/package.json
@ -2,10 +2,14 @@
  "name": "@omnivore/puppeteer-parse",
  "version": "1.0.0",
  "description": "Accepts URL of the article and parses its content",
-  "main": "index.js",
+  "main": "build/src/index.js",
+  "files": [
+    "build/src"
+  ],
  "dependencies": {
    "@omnivore/content-handler": "1.0.0",
    "@omnivore/readability": "1.0.0",
+    "axios": "^1.4.0",
    "crypto": "^1.0.1",
    "dompurify": "^2.4.1",
    "linkedom": "^0.14.9",
@ -20,7 +24,10 @@
    "mocha": "^10.0.0"
  },
  "scripts": {
-    "test": "mocha test/*.js"
+    "test": "yarn mocha -r ts-node/register --config mocha-config.json",
+    "test:typecheck": "tsc --noEmit",
+    "lint": "eslint src --ext ts,js,tsx,jsx",
+    "build": "tsc"
  },
  "volta": {
    "extends": "../../package.json"
--- a/packages/puppeteer-parse/src/index.ts
+++ b/packages/puppeteer-parse/src/index.ts
@ -1,99 +1,106 @@
-/* eslint-disable no-undef */
-/* eslint-disable no-empty */
-/* eslint-disable @typescript-eslint/explicit-function-return-type */
-/* eslint-disable @typescript-eslint/no-var-requires */
-/* eslint-disable @typescript-eslint/no-require-imports */
-const { encode } = require("urlsafe-base64");
-const crypto = require("crypto");
-
-const Url = require('url');
-const os = require('os');
+/* eslint-disable @typescript-eslint/no-unsafe-member-access */
+/* eslint-disable @typescript-eslint/no-unsafe-assignment */
+import { preHandleContent, preParseContent } from '@omnivore/content-handler'
+import { Readability } from '@omnivore/readability'
+import axios from 'axios'
+import crypto from 'crypto'
+import createDOMPurify, { SanitizeElementHookEvent } from 'dompurify'
 // const { Storage } = require('@google-cloud/storage');
-const { parseHTML } = require('linkedom');
-const { preHandleContent, preParseContent } = require("@omnivore/content-handler");
-const { Readability } = require("@omnivore/readability");
-
-const puppeteer = require('puppeteer-extra');
+import { parseHTML } from 'linkedom'
+import path from 'path'
+import { Browser, BrowserContext, Page, Protocol } from 'puppeteer-core'
+import puppeteer from 'puppeteer-extra'
+import AdblockerPlugin from 'puppeteer-extra-plugin-adblocker'
+import StealthPlugin from 'puppeteer-extra-plugin-stealth'
+import Url from 'url'
+import { encode } from 'urlsafe-base64'

 // Add stealth plugin to hide puppeteer usage
-const StealthPlugin = require('puppeteer-extra-plugin-stealth');
-puppeteer.use(StealthPlugin());
-
+puppeteer.use(StealthPlugin())
 // Add adblocker plugin to block all ads and trackers (saves bandwidth)
-const AdblockerPlugin = require('puppeteer-extra-plugin-adblocker');
-puppeteer.use(AdblockerPlugin({ blockTrackers: true }));
-
-const createDOMPurify = require("dompurify");
+puppeteer.use(AdblockerPlugin({ blockTrackers: true }))

 // const storage = new Storage();
-const ALLOWED_ORIGINS = process.env.ALLOWED_ORIGINS ? process.env.ALLOWED_ORIGINS.split(',') : [];
+// const ALLOWED_ORIGINS = process.env.ALLOWED_ORIGINS
+//   ? process.env.ALLOWED_ORIGINS.split(',')
+//   : []
 // const previewBucket = process.env.PREVIEW_IMAGE_BUCKET ? storage.bucket(process.env.PREVIEW_IMAGE_BUCKET) : undefined;

-const filePath = `${os.tmpdir()}/previewImage.png`;
+// const filePath = `${os.tmpdir()}/previewImage.png`

-const MOBILE_USER_AGENT = 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.62 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
-const DESKTOP_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4372.0 Safari/537.36'
-const BOT_DESKTOP_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4372.0 Safari/537.36'
-const NON_BOT_DESKTOP_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4372.0 Safari/537.36'
+const MOBILE_USER_AGENT =
+  'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.62 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
+const DESKTOP_USER_AGENT =
+  'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4372.0 Safari/537.36'
+const BOT_DESKTOP_USER_AGENT =
+  'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4372.0 Safari/537.36'
+const NON_BOT_DESKTOP_USER_AGENT =
+  'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4372.0 Safari/537.36'
 const NON_BOT_HOSTS = ['bloomberg.com', 'forbes.com']
-const NON_SCRIPT_HOSTS= ['medium.com', 'fastcompany.com', 'fortelabs.com'];
+const NON_SCRIPT_HOSTS = ['medium.com', 'fastcompany.com', 'fortelabs.com']

-const ALLOWED_CONTENT_TYPES = ['text/html', 'application/octet-stream', 'text/plain', 'application/pdf'];
+const ALLOWED_CONTENT_TYPES = [
+  'text/html',
+  'application/octet-stream',
+  'text/plain',
+  'application/pdf',
+]
+const REQUEST_TIMEOUT = 30000

-const userAgentForUrl = (url) => {
+const userAgentForUrl = (url: string) => {
  try {
-    const u = new URL(url);
+    const u = new URL(url)
    for (const host of NON_BOT_HOSTS) {
      if (u.hostname.endsWith(host)) {
-        return NON_BOT_DESKTOP_USER_AGENT;
+        return NON_BOT_DESKTOP_USER_AGENT
      }
    }
  } catch (e) {
    console.log('error getting user agent for url', url, e)
  }
  return DESKTOP_USER_AGENT
-};
+}

-const fetchContentWithScrapingBee = async (url) => {
+const fetchContentWithScrapingBee = async (url: string) => {
  try {
    const response = await axios.get('https://app.scrapingbee.com/api/v1', {
      params: {
-        'api_key':  process.env.SCRAPINGBEE_API_KEY,
-        'url': url,
-        'render_js': 'false',
-        'premium_proxy': 'true',
-        'country_code':'us'
+        api_key: process.env.SCRAPINGBEE_API_KEY,
+        url: url,
+        render_js: 'false',
+        premium_proxy: 'true',
+        country_code: 'us',
      },
      timeout: REQUEST_TIMEOUT,
    })
-  
-    const dom = parseHTML(response.data).document;
+
+    const dom = parseHTML(response.data).document
    return { title: dom.title, domContent: dom.documentElement.outerHTML, url }
  } catch (e) {
-    console.error('error fetching with scrapingbee', e.message)
+    console.error('error fetching with scrapingbee', e)

    return { title: url, domContent: '', url }
  }
 }

-const enableJavascriptForUrl = (url) => {
+const enableJavascriptForUrl = (url: string) => {
  try {
-    const u = new URL(url);
+    const u = new URL(url)
    for (const host of NON_SCRIPT_HOSTS) {
      if (u.hostname.endsWith(host)) {
-        return false;
+        return false
      }
    }
  } catch (e) {
    console.log('error getting hostname for url', url, e)
  }
  return true
-};
+}

 // launch Puppeteer
 const getBrowserPromise = (async () => {
-  console.log("starting puppeteer browser")
-  return puppeteer.launch({
+  console.log('starting puppeteer browser')
+  return (await puppeteer.launch({
    args: [
      '--allow-running-insecure-content',
      '--autoplay-policy=user-gesture-required',
@ -123,112 +130,141 @@ const getBrowserPromise = (async () => {
      height: 1080,
      isLandscape: true,
      isMobile: false,
-      width: 1920
+      width: 1920,
    },
    executablePath: process.env.CHROMIUM_PATH,
    headless: !!process.env.LAUNCH_HEADLESS,
    timeout: 120000, // 2 minutes
-  });
-})();
+  })) as Browser
+})()

-async function fetchContent(url, locale, timezone) {
-  let functionStartTime = Date.now();
-  let logRecord = {
+export const fetchContent = async (
+  url: string,
+  locale: string,
+  timezone: string
+) => {
+  const functionStartTime = Date.now()
+  const logRecord = {
    url,
    functionStartTime,
    locale,
    timezone,
  }
-  console.log(`content-fetch request`, logRecord);
+  console.log(`content-fetch request`, logRecord)

-  let context, page, finalUrl, title, content, contentType, readabilityResult = null;
+  let context: BrowserContext | undefined,
+    page: Page | undefined,
+    finalUrl: string | undefined,
+    title: string | undefined,
+    content: string | undefined,
+    contentType: string | undefined,
+    readabilityResult: Readability.ParseResult | null | undefined
  try {
-    url = getUrl(url);
+    url = getUrl(url)
    if (!url) {
-      throw new Error('Valid URL to parse not specified');
+      throw new Error('Valid URL to parse not specified')
    }

    // pre handle url with custom handlers
    try {
-      const browser = await getBrowserPromise;
-      const result = await preHandleContent(url, browser);
+      const browser = await getBrowserPromise
+      const result = await preHandleContent(url, browser)
      if (result && result.url) {
-        validateUrlString(url);
-        url = result.url;
+        validateUrlString(url)
+        url = result.url
+      }
+      if (result && result.title) {
+        title = result.title
+      }
+      if (result && result.content) {
+        content = result.content
+      }
+      if (result && result.contentType) {
+        contentType = result.contentType
      }
-      if (result && result.title) { title = result.title }
-      if (result && result.content) { content = result.content }
-      if (result && result.contentType) { contentType = result.contentType }
    } catch (e) {
-      console.info('error with handler: ', e);
+      console.info('error with handler: ', e)
    }

    if ((!content || !title) && contentType !== 'application/pdf') {
-      const result = await retrievePage(url, logRecord, functionStartTime, locale, timezone);
-      if (result && result.context) { context = result.context }
-      if (result && result.page) { page = result.page }
-      if (result && result.finalUrl) { finalUrl = result.finalUrl }
-      if (result && result.contentType) { contentType = result.contentType }
+      const result = await retrievePage(
+        url,
+        logRecord,
+        functionStartTime,
+        locale,
+        timezone
+      )
+      if (result && result.context) {
+        context = result.context
+      }
+      if (result && result.page) {
+        page = result.page
+      }
+      if (result && result.finalUrl) {
+        finalUrl = result.finalUrl
+      }
+      if (result && result.contentType) {
+        contentType = result.contentType
+      }
    } else {
      finalUrl = url
    }

    if (contentType !== 'application/pdf') {
-      if (!content || !title) {
-        const result = await retrieveHtml(page, logRecord);
+      if (page && (!content || !title)) {
+        const result = await retrieveHtml(page, logRecord)
        if (result.isBlocked) {
          const sbResult = await fetchContentWithScrapingBee(url)
          title = sbResult.title
          content = sbResult.domContent
        } else {
-          title = result.title;
-          content = result.domContent;
+          title = result.title
+          content = result.domContent
        }
      } else {
-        console.info('using prefetched content and title');
+        console.info('using prefetched content and title')
      }
    }
  } catch (e) {
-    console.error(`Error while retrieving page ${url}`, e);
+    console.error(`Error while retrieving page ${url}`, e)

    // fallback to scrapingbee for non pdf content
    if (url && contentType !== 'application/pdf') {
-      console.info('fallback to scrapingbee', url);
+      console.info('fallback to scrapingbee', url)

-      const fetchStartTime = Date.now();
-      const sbResult = await fetchContentWithScrapingBee(url);
-      content = sbResult.domContent;
-      title = sbResult.title;
+      const sbResult = await fetchContentWithScrapingBee(url)
+      content = sbResult.domContent
+      title = sbResult.title
    } else {
-      throw e;
+      throw e
    }
  } finally {
    // close browser context if it was opened
    if (context) {
-      await context.close();
+      await context.close()
    }
    // save non pdf content
    if (url && contentType !== 'application/pdf') {
      // parse content if it is not empty
      if (content) {
-        let document = parseHTML(content).document;
+        let document = parseHTML(content).document
        // preParse content
        const preParsedDom = await preParseContent(url, document)
        if (preParsedDom) {
          document = preParsedDom
        }
-        readabilityResult = await getReadabilityResult(url, document);
+        readabilityResult = await getReadabilityResult(url, document)
      }
    }

-    console.info(`content-fetch result`, logRecord);
-
-    return { finalUrl, title, content, readabilityResult, contentType };
+    console.info(`content-fetch result`, logRecord)
  }
+
+  return { finalUrl, title, content, readabilityResult, contentType }
 }

-function validateUrlString(url) {
-  const u = new URL(url);
+function validateUrlString(url: string) {
+  const u = new URL(url)
  // Make sure the URL is http or https
  if (u.protocol !== 'http:' && u.protocol !== 'https:') {
    throw new Error('Invalid URL protocol check failed')
@ -243,60 +279,75 @@ function validateUrlString(url) {
  }
 }

-function tryParseUrl(urlStr) {
+function tryParseUrl(urlStr: string) {
  if (!urlStr) {
-    return null;
+    return null
  }
-  
+
  // a regular expression to match all URLs
-  const regex = /(https?:\/\/[^\s]+)/g;
-  
-  const matches = urlStr.match(regex);
-  
+  const regex = /(https?:\/\/[^\s]+)/g
+
+  const matches = urlStr.match(regex)
+
  if (matches) {
-    return matches[0]; // only return first match
+    return matches[0] // only return first match
  } else {
-    return null;
+    return null
  }
 }

-function getUrl(urlStr) {
+function getUrl(urlStr: string) {
  const url = tryParseUrl(urlStr)
  if (!url) {
-    throw new Error('No URL specified');
+    throw new Error('No URL specified')
  }

-  validateUrlString(url);
+  validateUrlString(url)

-  const parsed = Url.parse(url);
-  return parsed.href;
+  const parsed = Url.parse(url)
+  return parsed.href
 }

-async function retrievePage(url, logRecord, functionStartTime, locale, timezone) {
-  validateUrlString(url);
+async function retrievePage(
+  url: string,
+  logRecord: Record<string, any>,
+  functionStartTime: number,
+  locale: string,
+  timezone: string
+) {
+  validateUrlString(url)

-  const browser = await getBrowserPromise;
-  logRecord.timing = { ...logRecord.timing, browserOpened: Date.now() - functionStartTime };
+  const browser = await getBrowserPromise
+  logRecord.timing = {
+    ...logRecord.timing,
+    browserOpened: Date.now() - functionStartTime,
+  }

-  const context = await browser.createIncognitoBrowserContext();
+  const context = await browser.createIncognitoBrowserContext()
  const page = await context.newPage()

  if (!enableJavascriptForUrl(url)) {
-    await page.setJavaScriptEnabled(false);
+    await page.setJavaScriptEnabled(false)
  }
-  await page.setUserAgent(userAgentForUrl(url));
+  await page.setUserAgent(userAgentForUrl(url))

  // set locale for the page
  if (locale) {
-    await page.setExtraHTTPHeaders({ 'Accept-Language': locale });
+    await page.setExtraHTTPHeaders({ 'Accept-Language': locale })
  }

  // set timezone for the page
  if (timezone) {
-    await page.emulateTimezone(timezone);
+    await page.emulateTimezone(timezone)
  }

-  const client = await page.target().createCDPSession();
+  const client = await page.target().createCDPSession()
+
+  const downloadPath = path.resolve('./download_dir/')
+  await client.send('Page.setDownloadBehavior', {
+    behavior: 'allow',
+    downloadPath,
+  })

  // intercept request when response headers was received
  await client.send('Network.setRequestInterception', {
@ -307,107 +358,126 @@ async function retrievePage(url, logRecord, functionStartTime, locale, timezone)
        interceptionStage: 'HeadersReceived',
      },
    ],
-  });
-
-  const path = require('path');
-  const download_path = path.resolve('./download_dir/');
-
-  await client.send('Page.setDownloadBehavior', {
-    behavior: 'allow',
-    userDataDir: './',
-    downloadPath: download_path,
  })

-  client.on('Network.requestIntercepted', async e => {
-    const headers = e.responseHeaders || {};
+  client.on(
+    'Network.requestIntercepted',
+    (e: Protocol.Network.RequestInterceptedEvent) => {
+      ;(async () => {
+        const headers = e.responseHeaders || {}

-    const [contentType] = (headers['content-type'] || headers['Content-Type'] || '')
-      .toLowerCase()
-      .split(';');
-    const obj = { interceptionId: e.interceptionId };
+        const [contentType] = (
+          headers['content-type'] ||
+          headers['Content-Type'] ||
+          ''
+        )
+          .toLowerCase()
+          .split(';')
+        const obj: Protocol.Network.ContinueInterceptedRequestRequest = {
+          interceptionId: e.interceptionId,
+        }

-    if (e.responseStatusCode >= 200 && e.responseStatusCode < 300) {
-      // We only check content-type on success responses
-      // as it doesn't matter what the content type is for things
-      // like redirects
-      if (contentType && !ALLOWED_CONTENT_TYPES.includes(contentType)) {
-        obj['errorReason'] = 'BlockedByClient';
-      }
+        if (
+          e.responseStatusCode &&
+          e.responseStatusCode >= 200 &&
+          e.responseStatusCode < 300
+        ) {
+          // We only check content-type on success responses
+          // as it doesn't matter what the content type is for things
+          // like redirects
+          if (contentType && !ALLOWED_CONTENT_TYPES.includes(contentType)) {
+            obj['errorReason'] = 'BlockedByClient'
+          }
+        }
+
+        try {
+          await client.send('Network.continueInterceptedRequest', obj)
+        } catch {
+          // ignore
+        }
+      })()
    }
-
-    try {
-      await client.send('Network.continueInterceptedRequest', obj);
-      // eslint-disable-next-line no-empty
-    } catch {}
-  });
+  )

  /*
-    * Disallow MathJax from running in Puppeteer and modifying the document,
-    * we shall instead run it in our frontend application to transform any
-    * mathjax content when present.
-    */
-  await page.setRequestInterception(true);
-  let requestCount = 0;
-  page.on('request', request => {
-    if (request.resourceType() === 'font') {
-      // Disallow fonts from loading
-      request.abort();
-      return;
-    }
-    if (requestCount++ > 100) {
-      request.abort();
-      return;
-    }
-    if (
-      request.resourceType() === 'script' &&
-      request.url().toLowerCase().indexOf('mathjax') > -1
-    ) {
-      request.abort();
-      return
-    }
-    request.continue();
-  });
+   * Disallow MathJax from running in Puppeteer and modifying the document,
+   * we shall instead run it in our frontend application to transform any
+   * mathjax content when present.
+   */
+  await page.setRequestInterception(true)
+  let requestCount = 0
+  page.on('request', (request) => {
+    ;(async () => {
+      if (request.resourceType() === 'font') {
+        // Disallow fonts from loading
+        return request.abort()
+      }
+      if (requestCount++ > 100) {
+        return request.abort()
+      }
+      if (
+        request.resourceType() === 'script' &&
+        request.url().toLowerCase().indexOf('mathjax') > -1
+      ) {
+        return request.abort()
+      }
+
+      await request.continue()
+    })()
+  })

  // Puppeteer fails during download of PDf files,
  // so record the failure and use those items
-  let lastPdfUrl = undefined;
-  page.on('response', response => {
+  let lastPdfUrl = undefined
+  page.on('response', (response) => {
    if (response.headers()['content-type'] === 'application/pdf') {
-      lastPdfUrl = response.url();
+      lastPdfUrl = response.url()
    }
-  });
+  })

  try {
-    const response = await page.goto(url, { timeout: 30 * 1000, waitUntil: ['networkidle2'] });
-    const finalUrl = response.url();
-    const contentType = response.headers()['content-type'];
+    const response = await page.goto(url, {
+      timeout: 30 * 1000,
+      waitUntil: ['networkidle2'],
+    })
+    if (!response) {
+      throw new Error('No response from page')
+    }

-    logRecord.finalUrl = response.url();
-    logRecord.contentType = response.headers()['content-type'];
+    const finalUrl = response.url()
+    const contentType = response.headers()['content-type']

-    return { context, page, response, finalUrl, contentType };
+    logRecord.finalUrl = response.url()
+    logRecord.contentType = response.headers()['content-type']
+
+    return { context, page, response, finalUrl, contentType }
  } catch (error) {
    if (lastPdfUrl) {
-      return { context, page, finalUrl: lastPdfUrl, contentType: 'application/pdf' };
+      return {
+        context,
+        page,
+        finalUrl: lastPdfUrl,
+        contentType: 'application/pdf',
+      }
    }
-    await context.close();
-    throw error;
+    await context.close()
+    throw error
  }
 }

-async function retrieveHtml(page, logRecord) {
-  let domContent = '', title;
+async function retrieveHtml(page: Page, logRecord: Record<string, any>) {
+  let domContent = '',
+    title
  try {
-    title = await page.title();
-    logRecord.title = title;
+    title = await page.title()
+    logRecord.title = title

-    const pageScrollingStart = Date.now();
+    const pageScrollingStart = Date.now()
    /* scroll with a 5 seconds timeout */
    await Promise.race([
-      new Promise(resolve => {
-        (async function () {
-          try {
-            await page.evaluate(`(async () => {
+      await page
+        .evaluate(
+          `(async () => {
                /* credit: https://github.com/puppeteer/puppeteer/issues/305 */
                return new Promise((resolve, reject) => {
                  let scrollHeight = document.body.scrollHeight;
@ -422,46 +492,56 @@ async function retrieveHtml(page, logRecord) {
                    }
                  }, 10);
                });
-              })()`);
-          } catch (e) {
-            logRecord.scrollError = true;
-          } finally {
-            resolve(true);
-          }
-        })();
-      }),
-      page.waitForTimeout(5000),
-    ]);
-    logRecord.timing = { ...logRecord.timing, pageScrolled: Date.now() - pageScrollingStart };
+              })()`
+        )
+        .catch((e) => {
+          console.log('error scrolling page', e)
+          logRecord.scrollError = true
+        }),
+      new Promise((r) => setTimeout(r, 5000)),
+    ])

-    const iframes = {};
-    const urls = [];
-    const framesPromises = [];
-    const allowedUrls = /instagram\.com/gi;
+    logRecord.timing = {
+      ...logRecord.timing,
+      pageScrolled: Date.now() - pageScrollingStart,
+    }
+
+    const iframes: Record<string, any> = {}
+    const urls: string[] = []
+    const framesPromises = []
+    const allowedUrls = /instagram\.com/gi

    for (const frame of page.mainFrame().childFrames()) {
      if (frame.url() && allowedUrls.test(frame.url())) {
-        urls.push(frame.url());
-        framesPromises.push(frame.evaluate(el => el.innerHTML, await frame.$('body')));
+        urls.push(frame.url())
+        framesPromises.push(
+          frame.evaluate((el) => el?.innerHTML, await frame.$('body'))
+        )
      }
    }

-    (await Promise.all(framesPromises)).forEach((frame, index) => (iframes[urls[index]] = frame));
+    ;(await Promise.all(framesPromises)).forEach(
+      (frame, index) => (iframes[urls[index]] = frame)
+    )

-    const domContentCapturingStart = Date.now();
+    const domContentCapturingStart = Date.now()
    // get document body with all hidden elements removed
-    domContent = await page.evaluate(iframes => {
-      const BI_SRC_REGEXP = /url\("(.+?)"\)/gi;
+    domContent = await page.evaluate((iframes) => {
+      const BI_SRC_REGEXP = /url\("(.+?)"\)/gi

-      Array.from(document.body.getElementsByTagName('*')).forEach(el => {
-        const style = window.getComputedStyle(el);
+      Array.from(document.body.getElementsByTagName('*')).forEach((el) => {
+        const style = window.getComputedStyle(el)
+        const src = el.getAttribute('src')

        try {
          // Removing blurred images since they are mostly the copies of lazy loaded ones
-          if (el.tagName && ['img', 'image'].includes(el.tagName.toLowerCase())) {
-            const filter = style.getPropertyValue('filter');
+          if (
+            el.tagName &&
+            ['img', 'image'].includes(el.tagName.toLowerCase())
+          ) {
+            const filter = style.getPropertyValue('filter')
            if (filter && filter.startsWith('blur')) {
-              el.parentNode && el.parentNode.removeChild(el);
+              el.parentNode && el.parentNode.removeChild(el)
            }
          }
        } catch (err) {
@ -469,69 +549,80 @@ async function retrieveHtml(page, logRecord) {
        }

        // convert all nodes with background image to img nodes
-        if (!['', 'none'].includes(style.getPropertyValue('background-image'))) {
-          const filter = style.getPropertyValue('filter');
+        if (
+          !['', 'none'].includes(style.getPropertyValue('background-image'))
+        ) {
+          const filter = style.getPropertyValue('filter')
          // avoiding image nodes with a blur effect creation
          if (filter && filter.startsWith('blur')) {
-            el && el.parentNode && el.parentNode.removeChild(el);
+            el && el.parentNode && el.parentNode.removeChild(el)
          } else {
-            const matchedSRC = BI_SRC_REGEXP.exec(style.getPropertyValue('background-image'));
+            const matchedSRC = BI_SRC_REGEXP.exec(
+              style.getPropertyValue('background-image')
+            )
            // Using "g" flag with a regex we have to manually break down lastIndex to zero after every usage
            // More details here: https://stackoverflow.com/questions/1520800/why-does-a-regexp-with-global-flag-give-wrong-results
-            BI_SRC_REGEXP.lastIndex = 0;
+            BI_SRC_REGEXP.lastIndex = 0

-            if (matchedSRC && matchedSRC[1] && !el.src) {
+            if (matchedSRC && matchedSRC[1] && !src) {
              // Replacing element only of there are no content inside, b/c might remove important div with content.
              // Article example: http://www.josiahzayner.com/2017/01/genetic-designer-part-i.html
              // DIV with class "content-inner" has `url("https://resources.blogblog.com/blogblog/data/1kt/travel/bg_container.png")` background image.
              if (!el.textContent) {
-                const img = document.createElement('img');
-                img.src = matchedSRC[1];
-                el && el.parentNode && el.parentNode.replaceChild(img, el);
+                const img = document.createElement('img')
+                img.src = matchedSRC[1]
+                el && el.parentNode && el.parentNode.replaceChild(img, el)
              }
            }
          }
        }

        if (el.tagName === 'IFRAME') {
-          if (iframes[el.src]) {
-            const newNode = document.createElement('div');
-            newNode.className = 'omnivore-instagram-embed';
-            newNode.innerHTML = iframes[el.src];
-            el && el.parentNode && el.parentNode.replaceChild(newNode, el);
+          if (src && iframes[src]) {
+            const newNode = document.createElement('div')
+            newNode.className = 'omnivore-instagram-embed'
+            newNode.innerHTML = iframes[src]
+            el && el.parentNode && el.parentNode.replaceChild(newNode, el)
          }
        }
-      });
+      })

-      if (document.querySelector('[data-translate="managed_checking_msg"]') ||
-        document.getElementById('px-block-form-wrapper')) {
+      if (
+        document.querySelector('[data-translate="managed_checking_msg"]') ||
+        document.getElementById('px-block-form-wrapper')
+      ) {
        return 'IS_BLOCKED'
      }

-      return document.documentElement.outerHTML;
-    }, iframes);
-    logRecord.puppeteerSuccess = true;
+      return document.documentElement.outerHTML
+    }, iframes)
+    logRecord.puppeteerSuccess = true
    logRecord.timing = {
      ...logRecord.timing,
      contenCaptured: Date.now() - domContentCapturingStart,
-    };
+    }

    // [END puppeteer-block]
  } catch (e) {
-    if (e.message.startsWith('net::ERR_BLOCKED_BY_CLIENT at ')) {
-      logRecord.blockedByClient = true;
+    if (e instanceof Error) {
+      if (e.message.startsWith('net::ERR_BLOCKED_BY_CLIENT at ')) {
+        logRecord.blockedByClient = true
+      } else {
+        logRecord.puppeteerSuccess = false
+        logRecord.puppeteerError = {
+          message: e.message,
+          stack: e.stack,
+        }
+      }
    } else {
-      logRecord.puppeteerSuccess = false;
-      logRecord.puppeteerError = {
-        message: e.message,
-        stack: e.stack,
-      };
+      logRecord.puppeteerSuccess = false
+      logRecord.puppeteerError = e
    }
  }
  if (domContent === 'IS_BLOCKED') {
-    return { isBlocked: true };
+    return { isBlocked: true }
  }
-  return { domContent, title };
+  return { domContent, title }
 }

 // async function preview(req, res) {
@ -669,7 +760,7 @@ const DOM_PURIFY_CONFIG = {
  ],
 }

-function domPurifySanitizeHook(node, data) {
+function domPurifySanitizeHook(node: Element, data: SanitizeElementHookEvent) {
  if (data.tagName === 'iframe') {
    const urlRegex = /^(https?:)?\/\/www\.youtube(-nocookie)?\.com\/embed\//i
    const src = node.getAttribute('src') || ''
@ -688,7 +779,7 @@ function domPurifySanitizeHook(node, data) {
  }
 }

-function getPurifiedContent(html) {
+function getPurifiedContent(html: Document) {
  const newWindow = parseHTML('')
  const DOMPurify = createDOMPurify(newWindow)
  DOMPurify.addHook('uponSanitizeElement', domPurifySanitizeHook)
@ -696,13 +787,16 @@ function getPurifiedContent(html) {
  return parseHTML(clean).document
 }

-function signImageProxyUrl(url) {
+function signImageProxyUrl(url: string) {
  return encode(
-    crypto.createHmac('sha256', process.env.IMAGE_PROXY_SECRET).update(url).digest()
+    crypto
+      .createHmac('sha256', process.env.IMAGE_PROXY_SECRET || '')
+      .update(url)
+      .digest()
  )
 }

-function createImageProxyUrl(url, width = 0, height = 0) {
+function createImageProxyUrl(url: string, width = 0, height = 0) {
  if (!process.env.IMAGE_PROXY_URL || !process.env.IMAGE_PROXY_SECRET) {
    return url
  }
@ -713,7 +807,7 @@ function createImageProxyUrl(url, width = 0, height = 0) {
  return `${process.env.IMAGE_PROXY_URL}/${width}x${height},s${signature}/${url}`
 }

-async function getReadabilityResult(url, document) {
+async function getReadabilityResult(url: string, document: Document) {
  // First attempt to read the article as is.
  // if that fails attempt to purify then read
  const sources = [
@ -747,9 +841,3 @@ async function getReadabilityResult(url, document) {

  return null
 }
-
-module.exports = {
-  fetchContent,
-  // preview,
-};
-
--- a/packages/puppeteer-parse/src/readability.d.ts
+++ b/packages/puppeteer-parse/src/readability.d.ts
@ -0,0 +1,173 @@
+// Type definitions for non-npm package mozilla-readability 0.2
+// Project: https://github.com/mozilla/readability
+// Definitions by: Charles Vandevoorde <https://github.com/charlesvdv>, Alex Wendland <https://github.com/awendland>
+// Definitions: https://github.com/DefinitelyTyped/DefinitelyTyped
+// TypeScript Version: 2.2
+
+declare module '@omnivore/readability' {
+  /**
+   * A standalone version of the readability library used for Firefox Reader View.
+   *
+   * Note that isProbablyReaderable() was moved into a separate file in https://github.com/mozilla/readability/commit/2620542dd1e8380220d82afa97a2c283ae636e40
+   * and therefore is no longer part of the Readability class.
+   */
+  class Readability {
+    /**
+     * ## Usage on the web
+     *
+     * To parse a document, you must create a new Readability object from a
+     * DOM document object, and then call parse(). Here's an example:
+     *
+     * ```js
+     * var article = new Readability(document).parse();
+     * ```
+     *
+     * If you're using Readability on the web, you will likely be able to
+     * use a document reference from elsewhere (e.g. fetched via XMLHttpRequest,
+     * in a same-origin <iframe> you have access to, etc.).
+     *
+     * ## Usage from node.js
+     *
+     * In node.js, you won't generally have a DOM document object. To obtain one, you can use external
+     * libraries like [jsdom](https://github.com/tmpvar/jsdom). While this repository contains a parser of
+     * its own (`JSDOMParser`), that is restricted to reading XML-compatible markup and therefore we do
+     * not recommend it for general use.
+     *
+     * If you're using `jsdom` to create a DOM object, you should ensure that the page doesn't run (page)
+     * scripts (avoid fetching remote resources etc.) as well as passing it the page's URI as the `url`
+     * property of the `options` object you pass the `JSDOM` constructor.
+     *
+     * ```js
+     * var JSDOM = require('jsdom').JSDOM;
+     * var doc = new JSDOM("<body>Here's a bunch of text</body>", {
+     *   url: "https://www.example.com/the-page-i-got-the-source-from",
+     * });
+     * let reader = new Readability(doc.window.document);
+     * let article = reader.parse();
+     * ```
+     */
+    constructor(doc: Document, options?: Readability.Options)
+
+    /**
+     * Runs readability.
+     *
+     * ## Workflow:
+     *
+     *  1. Prep the document by removing script tags, css, etc.
+     *  2. Build readability's DOM tree.
+     *  3. Grab the article content from the current dom tree.
+     *  4. Replace the current DOM tree with the new one.
+     *  5. Read peacefully.
+     *
+     * ## Additional notes:
+     *
+     * Readability's parse() works by modifying the DOM. This removes some
+     * elements in the web page. You could avoid this by passing the clone
+     * of the document object while creating a Readability object.
+     *
+     * ```js
+     * var documentClone = document.cloneNode(true);
+     * var article = new Readability(documentClone).parse();
+     * ```
+     *
+     * The response will be null if the processing failed (https://github.com/mozilla/readability/blob/52ab9b5c8916c306a47b2119270dcdabebf9d203/Readability.js#L2038)
+     */
+    async parse(): Promise<Readability.ParseResult | null>
+  }
+
+  namespace Readability {
+    interface Options {
+      /**
+       * Control whether log messages are sent to the console
+       */
+      debug?: boolean
+
+      /**
+       * Set a maximum size on the documents that will be processed. This size is
+       * checked before any parsing operations occur. If the number of elements in
+       * the document exceeds this threshold then an Error will be thrown.
+       *
+       * See implementation details at https://github.com/mozilla/readability/blob/52ab9b5c8916c306a47b2119270dcdabebf9d203/Readability.js#L2019
+       */
+      maxElemsToParse?: number
+
+      nbTopCandidates?: number
+
+      /**
+       * Minimum number of characters in the extracted textContent in order to
+       * consider the article correctly identified. If the threshold is not met then
+       * the extraction process will automatically run again with different flags.
+       *
+       * See implementation details at https://github.com/mozilla/readability/blob/52ab9b5c8916c306a47b2119270dcdabebf9d203/Readability.js#L1208
+       *
+       * Changed from wordThreshold in https://github.com/mozilla/readability/commit/3ff9a166fb27928f222c4c0722e730eda412658a
+       */
+      charThreshold?: number
+
+      /**
+       * parse() removes the class="" attribute from every element in the given
+       * subtree, except those that match CLASSES_TO_PRESERVE and
+       * the classesToPreserve array from the options object.
+       */
+      classesToPreserve?: string[]
+
+      /**
+       * By default Readability will strip all classes from the HTML elements in the
+       * processed article. By setting this to `true` the classes will be retained.
+       *
+       * This is a blanket alternative to `classesToPreserve`.
+       *
+       * Added in https://github.com/mozilla/readability/commit/2982216913af2c66b0690e88606b03116553ad92
+       */
+
+      keepClasses?: boolean
+      url?: string
+
+      /**
+       * Function that converts a regular image url into imageproxy url
+       * @param url string
+       */
+      createImageProxyUrl?: (
+        url: string,
+        width?: number,
+        height?: number
+      ) => string
+
+      /**
+       * By default, Readability will clean all tables from the HTML elements in the
+       * processed article. But newsletters in emails use tables to display their content.
+       * By setting this to `true`, these tables will be retained.
+       */
+      keepTables?: boolean
+      ignoreLinkDensity?: boolean
+    }
+
+    interface ParseResult {
+      /** Article title */
+      title: string
+      /** Author metadata */
+      byline?: string | null
+      /** Content direction */
+      dir?: string | null
+      /** HTML string of processed article content */
+      content: string
+      /** non-HTML version of `content`  */
+      textContent: string
+      /** Length of an article, in characters */
+      length: number
+      /** Article description, or short excerpt from the content */
+      excerpt: string
+      /** Article site name */
+      siteName?: string | null
+      /** Article site icon */
+      siteIcon?: string | null
+      /** Article preview image */
+      previewImage?: string | null
+      /** Article published date */
+      publishedDate?: Date | null
+      language?: string | null
+    }
+  }
+
+  export { Readability }
+}
--- a/packages/puppeteer-parse/test/babel-register.js
+++ b/packages/puppeteer-parse/test/babel-register.js
@ -0,0 +1,3 @@
+const register = require('@babel/register').default
+
+register({ extensions: ['.ts', '.tsx', '.js', '.jsx'] })
--- a/packages/puppeteer-parse/test/stub.test.js
+++ b/packages/puppeteer-parse/test/stub.test.js
@ -1,9 +0,0 @@
-const chai = require("chai");
-
-const expect = chai.expect;
-
-describe('Stub test', () => {
-  it('should pass', () => {
-    expect(true).to.be.true
-  })
-})
--- a/packages/puppeteer-parse/test/stub.test.ts
+++ b/packages/puppeteer-parse/test/stub.test.ts
@ -0,0 +1,8 @@
+import 'mocha'
+import { expect } from 'chai'
+
+describe('stub test', () => {
+  it('should pass', () => {
+    expect(true).to.be.true
+  })
+})
--- a/packages/puppeteer-parse/tsconfig.json
+++ b/packages/puppeteer-parse/tsconfig.json
@ -0,0 +1,8 @@
+{
+  "extends": "./../../tsconfig.json",
+  "compilerOptions": {
+    "outDir": "build",
+    "rootDir": "."
+  },
+  "include": ["src"]
+}
--- a/yarn.lock
+++ b/yarn.lock
@ -12416,13 +12416,6 @@ cron-parser@^4.6.0:
  dependencies:
    luxon "^3.2.1"

-cross-fetch@3.1.5, cross-fetch@^3.0.6, cross-fetch@^3.1.5:
-  version "3.1.5"
-  resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.5.tgz#e1389f44d9e7ba767907f7af8454787952ab534f"
-  integrity sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw==
-  dependencies:
-    node-fetch "2.6.7"
-
 cross-fetch@4.0.0:
  version "4.0.0"
  resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-4.0.0.tgz#f037aef1580bb3a1a35164ea2a848ba81b445983"
@ -12430,6 +12423,13 @@ cross-fetch@4.0.0:
  dependencies:
    node-fetch "^2.6.12"

+cross-fetch@^3.0.6, cross-fetch@^3.1.5:
+  version "3.1.5"
+  resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.5.tgz#e1389f44d9e7ba767907f7af8454787952ab534f"
+  integrity sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw==
+  dependencies:
+    node-fetch "2.6.7"
+
 cross-spawn@^6.0.0:
  version "6.0.5"
  resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4"
@ -13171,11 +13171,6 @@ detect-port@^1.3.0:
    address "^1.0.1"
    debug "^2.6.0"

-devtools-protocol@0.0.1045489:
-  version "0.0.1045489"
-  resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1045489.tgz#f959ad560b05acd72d55644bc3fb8168a83abf28"
-  integrity sha512-D+PTmWulkuQW4D1NTiCRCFxF7pQPn0hgp4YyX4wAQ6xYXKOadSWPR3ENGDQ47MW/Ewc9v2rpC/UEEGahgBYpSQ==
-
 devtools-protocol@0.0.1147663:
  version "0.0.1147663"
  resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1147663.tgz#4ec5610b39a6250d1f87e6b9c7e16688ed0ac78e"
@ -17239,14 +17234,6 @@ https-browserify@^1.0.0:
  resolved "https://registry.yarnpkg.com/https-browserify/-/https-browserify-1.0.0.tgz#ec06c10e0a34c0f2faf199f7fd7fc78fffd03c73"
  integrity sha1-7AbBDgo0wPL68Zn3/X/Hj//QPHM=

-https-proxy-agent@5.0.1, https-proxy-agent@^5.0.0:
-  version "5.0.1"
-  resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz#c59ef224a04fe8b754f3db0063a25ea30d0005d6"
-  integrity sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==
-  dependencies:
-    agent-base "6"
-    debug "4"
-
 https-proxy-agent@^4.0.0:
  version "4.0.0"
  resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-4.0.0.tgz#702b71fb5520a132a66de1f67541d9e62154d82b"
@ -17255,6 +17242,14 @@ https-proxy-agent@^4.0.0:
    agent-base "5"
    debug "4"

+https-proxy-agent@^5.0.0:
+  version "5.0.1"
+  resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz#c59ef224a04fe8b754f3db0063a25ea30d0005d6"
+  integrity sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==
+  dependencies:
+    agent-base "6"
+    debug "4"
+
 https-proxy-agent@^7.0.0, https-proxy-agent@^7.0.1:
  version "7.0.1"
  resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-7.0.1.tgz#0277e28f13a07d45c663633841e20a40aaafe0ab"
@ -24911,7 +24906,7 @@ proxy-from-env@1.0.0:
  resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.0.0.tgz#33c50398f70ea7eb96d21f7b817630a55791c7ee"
  integrity sha1-M8UDmPcOp+uW0h97gXYwpVeRx+4=

-proxy-from-env@1.1.0, proxy-from-env@^1.1.0:
+proxy-from-env@^1.1.0:
  version "1.1.0"
  resolved "https://registry.yarnpkg.com/proxy-from-env/-/proxy-from-env-1.1.0.tgz#e102f16ca355424865755d2c9e8ea4f24d58c3e2"
  integrity sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==
@ -25009,22 +25004,6 @@ pupa@^2.1.1:
  dependencies:
    escape-goat "^2.0.0"

-puppeteer-core@^19.1.1:
-  version "19.1.1"
-  resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-19.1.1.tgz#6416ff925a9cc78523c490482a17a2998f7c0626"
-  integrity sha512-jV26Ke0VFel4MoXLjqm50uAW2uwksTP6Md1tvtXqWqXM5FyboKI6E9YYJ1qEQilUAqlhgGq8xLN5+SL8bPz/kw==
-  dependencies:
-    cross-fetch "3.1.5"
-    debug "4.3.4"
-    devtools-protocol "0.0.1045489"
-    extract-zip "2.0.1"
-    https-proxy-agent "5.0.1"
-    proxy-from-env "1.1.0"
-    rimraf "3.0.2"
-    tar-fs "2.1.1"
-    unbzip2-stream "1.4.3"
-    ws "8.9.0"
-
 puppeteer-core@^20.9.0:
  version "20.9.0"
  resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-20.9.0.tgz#6f4b420001b64419deab38d398a4d9cd071040e6"
@ -26751,13 +26730,6 @@ rfdc@^1.3.0:
  resolved "https://registry.yarnpkg.com/rfdc/-/rfdc-1.3.0.tgz#d0b7c441ab2720d05dc4cf26e01c89631d9da08b"
  integrity sha512-V2hovdzFbOi77/WajaSMXk2OLm+xNIeQdMMuB7icj7bk6zi2F8GGAxigcnDFpJHbNyNcgyJDiP+8nOrY5cZGrA==

-rimraf@3.0.2, rimraf@^3.0.0, rimraf@^3.0.2:
-  version "3.0.2"
-  resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a"
-  integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==
-  dependencies:
-    glob "^7.1.3"
-
 rimraf@^2.2.8, rimraf@^2.5.4, rimraf@^2.6.1, rimraf@^2.6.3:
  version "2.7.1"
  resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-2.7.1.tgz#35797f13a7fdadc566142c29d4f07ccad483e3ec"
@ -26765,6 +26737,13 @@ rimraf@^2.2.8, rimraf@^2.5.4, rimraf@^2.6.1, rimraf@^2.6.3:
  dependencies:
    glob "^7.1.3"

+rimraf@^3.0.0, rimraf@^3.0.2:
+  version "3.0.2"
+  resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-3.0.2.tgz#f1a5402ba6220ad52cc1282bac1ae3aa49fd061a"
+  integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==
+  dependencies:
+    glob "^7.1.3"
+
 rimraf@^4.4.1:
  version "4.4.1"
  resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-4.4.1.tgz#bd33364f67021c5b79e93d7f4fa0568c7c21b755"
@ -28533,16 +28512,6 @@ tapable@^2.0.0, tapable@^2.1.1, tapable@^2.2.0:
  resolved "https://registry.yarnpkg.com/tapable/-/tapable-2.2.1.tgz#1967a73ef4060a82f12ab96af86d52fdb76eeca0"
  integrity sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==

-tar-fs@2.1.1, tar-fs@^2.0.0:
-  version "2.1.1"
-  resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.1.tgz#489a15ab85f1f0befabb370b7de4f9eb5cbe8784"
-  integrity sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==
-  dependencies:
-    chownr "^1.1.1"
-    mkdirp-classic "^0.5.2"
-    pump "^3.0.0"
-    tar-stream "^2.1.4"
-
 tar-fs@3.0.4, tar-fs@^3.0.4:
  version "3.0.4"
  resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-3.0.4.tgz#a21dc60a2d5d9f55e0089ccd78124f1d3771dbbf"
@ -28552,6 +28521,16 @@ tar-fs@3.0.4, tar-fs@^3.0.4:
    pump "^3.0.0"
    tar-stream "^3.1.5"

+tar-fs@^2.0.0:
+  version "2.1.1"
+  resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.1.tgz#489a15ab85f1f0befabb370b7de4f9eb5cbe8784"
+  integrity sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==
+  dependencies:
+    chownr "^1.1.1"
+    mkdirp-classic "^0.5.2"
+    pump "^3.0.0"
+    tar-stream "^2.1.4"
+
 tar-stream@^2.1.4, tar-stream@~2.2.0:
  version "2.2.0"
  resolved "https://registry.yarnpkg.com/tar-stream/-/tar-stream-2.2.0.tgz#acad84c284136b060dc3faa64474aa9aebd77287"
@ -30935,11 +30914,6 @@ ws@8.13.0:
  resolved "https://registry.yarnpkg.com/ws/-/ws-8.13.0.tgz#9a9fb92f93cf41512a0735c8f4dd09b8a1211cd0"
  integrity sha512-x9vcZYTrFPC7aSIbj7sRCYo7L/Xb8Iy+pW0ng0wt2vCJv7M9HOMy0UoN3rr+IFC7hb7vXoqS+P9ktyLLLhO+LA==

-ws@8.9.0:
-  version "8.9.0"
-  resolved "https://registry.yarnpkg.com/ws/-/ws-8.9.0.tgz#2a994bb67144be1b53fe2d23c53c028adeb7f45e"
-  integrity sha512-Ja7nszREasGaYUYCI2k4lCKIRTt+y7XuqVoHR44YpI49TtryyqbqvDMn5eqfW7e6HzTukDRIsXqzVHScqRcafg==
-
 "ws@^5.2.0 || ^6.0.0 || ^7.0.0", ws@^7.3.1, ws@^7.4.6:
  version "7.5.7"
  resolved "https://registry.yarnpkg.com/ws/-/ws-7.5.7.tgz#9e0ac77ee50af70d58326ecff7e85eb3fa375e67"