diff --git a/packages/content-fetch/package.json b/packages/content-fetch/package.json index 841f56f02..f2cfed276 100644 --- a/packages/content-fetch/package.json +++ b/packages/content-fetch/package.json @@ -11,6 +11,7 @@ "dotenv": "^8.2.0", "express": "^4.17.1", "ioredis": "^5.3.2", + "posthog-node": "^3.6.3", "@google-cloud/functions-framework": "^3.0.0", "@omnivore/puppeteer-parse": "^1.0.0", "@sentry/serverless": "^7.77.0" diff --git a/packages/content-fetch/src/analytics.ts b/packages/content-fetch/src/analytics.ts new file mode 100644 index 000000000..c964579c8 --- /dev/null +++ b/packages/content-fetch/src/analytics.ts @@ -0,0 +1,41 @@ +import { PostHog } from 'posthog-node' + +interface AnalyticEvent { + distinctId: string + event: string + properties?: Record +} + +interface AnalyticClient { + capture: (event: AnalyticEvent) => void + shutdownAsync?: () => Promise +} + +class PostHogClient implements AnalyticClient { + private client: PostHog + + constructor(apiKey: string) { + this.client = new PostHog(apiKey) + } + + capture({ distinctId, event, properties }: AnalyticEvent) { + // get client from request context + + this.client.capture({ + distinctId, + event, + properties: { + ...properties, + env: process.env.API_ENV || 'demo', + }, + }) + } + + async shutdownAsync() { + return this.client.shutdownAsync() + } +} + +export const analytics = new PostHogClient( + process.env.POSTHOG_API_KEY || 'test' +) diff --git a/packages/content-fetch/src/request_handler.ts b/packages/content-fetch/src/request_handler.ts index 9da92e464..bb86005b8 100644 --- a/packages/content-fetch/src/request_handler.ts +++ b/packages/content-fetch/src/request_handler.ts @@ -1,5 +1,6 @@ import { fetchContent } from '@omnivore/puppeteer-parse' import { RequestHandler } from 'express' +import { analytics } from './analytics' import { queueSavePageJob } from './job' import { redisDataSource } from './redis_data_source' @@ -147,11 +148,29 @@ export const contentFetchRequestHandler: RequestHandler = async (req, res) => { logRecord.error = 'unknown error' } + // capture error event + users.forEach((user) => { + analytics.capture({ + distinctId: user.id, + event: 'content-fetch-failure', + properties: logRecord, + }) + }) + return res.sendStatus(500) } finally { logRecord.totalTime = Date.now() - functionStartTime console.log(`parse-page result`, logRecord) } + // capture success event + users.forEach((user) => { + analytics.capture({ + distinctId: user.id, + event: 'content-fetch-success', + properties: logRecord, + }) + }) + res.sendStatus(200) } diff --git a/packages/puppeteer-parse/package.json b/packages/puppeteer-parse/package.json index 5bcd43a9b..4ed611ab4 100644 --- a/packages/puppeteer-parse/package.json +++ b/packages/puppeteer-parse/package.json @@ -14,7 +14,7 @@ "crypto": "^1.0.1", "dompurify": "^2.4.1", "linkedom": "^0.14.9", - "puppeteer-core": "22.8.0", + "puppeteer-core": "^22.8.0", "puppeteer-extra": "^3.3.4", "puppeteer-extra-plugin-adblocker": "^2.13.5", "puppeteer-extra-plugin-stealth": "^2.11.1", diff --git a/packages/readabilityjs/package.json b/packages/readabilityjs/package.json index 0cccc78f5..8100134f3 100644 --- a/packages/readabilityjs/package.json +++ b/packages/readabilityjs/package.json @@ -32,7 +32,7 @@ "linkedom": "^0.14.9", "mocha": "^8.2.0", "nock": "^13.3.1", - "puppeteer-core": "^20.9.0", + "puppeteer-core": "^22.8.0", "puppeteer-extra": "^3.3.4", "puppeteer-extra-plugin-adblocker": "^2.13.5", "puppeteer-extra-plugin-stealth": "^2.11.1", diff --git a/yarn.lock b/yarn.lock index 56f68be83..a57d3a7ff 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4972,19 +4972,6 @@ resolved "https://registry.yarnpkg.com/@protobufjs/utf8/-/utf8-1.1.0.tgz#a777360b5b39a1a2e5106f8e858f2fd2d060c570" integrity sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw== -"@puppeteer/browsers@1.4.6": - version "1.4.6" - resolved "https://registry.yarnpkg.com/@puppeteer/browsers/-/browsers-1.4.6.tgz#1f70fd23d5d2ccce9d29b038e5039d7a1049ca77" - integrity sha512-x4BEjr2SjOPowNeiguzjozQbsc6h437ovD/wu+JpaenxVLm3jkgzHY2xOslMTp50HoTvQreMjiexiGQw1sqZlQ== - dependencies: - debug "4.3.4" - extract-zip "2.0.1" - progress "2.0.3" - proxy-agent "6.3.0" - tar-fs "3.0.4" - unbzip2-stream "1.4.3" - yargs "17.7.1" - "@puppeteer/browsers@2.2.3": version "2.2.3" resolved "https://registry.yarnpkg.com/@puppeteer/browsers/-/browsers-2.2.3.tgz#ad6b79129c50825e77ddaba082680f4dad0b674e" @@ -9517,7 +9504,7 @@ agent-base@^6.0.1, agent-base@^6.0.2: dependencies: debug "4" -agent-base@^7.0.1, agent-base@^7.0.2, agent-base@^7.1.0: +agent-base@^7.0.2, agent-base@^7.1.0: version "7.1.0" resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-7.1.0.tgz#536802b76bc0b34aa50195eb2442276d613e3434" integrity sha512-o/zjMZRhJxny7OyEF+Op8X+efiELC7k7yOjMzgfzVqOzXqkBkWI79YoTdOtsuWd5BWhAGAuOY/Xa6xpiaWXiNg== @@ -12060,13 +12047,6 @@ chrome-trace-event@^1.0.2: resolved "https://registry.yarnpkg.com/chrome-trace-event/-/chrome-trace-event-1.0.3.tgz#1015eced4741e15d06664a957dbbf50d041e26ac" integrity sha512-p3KULyQg4S7NIHixdwbGX+nFHkoBiA4YQmyWtjb8XngSKV124nJmRysgAeujbUVb15vh+RvFUfCPqU7rXk+hZg== -chromium-bidi@0.4.16: - version "0.4.16" - resolved "https://registry.yarnpkg.com/chromium-bidi/-/chromium-bidi-0.4.16.tgz#8a67bfdf6bb8804efc22765a82859d20724b46ab" - integrity sha512-7ZbXdWERxRxSwo3txsBjjmc/NLxqb1Bk30mRb0BMS4YIaiV6zvKZqL/UAH+DdqcDYayDWk2n/y8klkBDODrPvA== - dependencies: - mitt "3.0.0" - chromium-bidi@0.5.19: version "0.5.19" resolved "https://registry.yarnpkg.com/chromium-bidi/-/chromium-bidi-0.5.19.tgz#e4f4951b7d9b20d668d6b387839f7b7bf2d69ef4" @@ -13119,13 +13099,6 @@ cron-parser@^4.6.0: dependencies: luxon "^3.2.1" -cross-fetch@4.0.0: - version "4.0.0" - resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-4.0.0.tgz#f037aef1580bb3a1a35164ea2a848ba81b445983" - integrity sha512-e4a5N8lVvuLgAWgnCrLr2PP0YyDOTHa9H/Rj54dirp61qXnNq46m82bRhNqIA5VccJtWBvPTFRV3TtvHUKPB1g== - dependencies: - node-fetch "^2.6.12" - cross-fetch@^3.0.6, cross-fetch@^3.1.5: version "3.1.5" resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.5.tgz#e1389f44d9e7ba767907f7af8454787952ab534f" @@ -13884,11 +13857,6 @@ detect-port@^1.3.0: address "^1.0.1" debug "^2.6.0" -devtools-protocol@0.0.1147663: - version "0.0.1147663" - resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1147663.tgz#4ec5610b39a6250d1f87e6b9c7e16688ed0ac78e" - integrity sha512-hyWmRrexdhbZ1tcJUGpO95ivbRhWXz++F4Ko+n21AY5PNln2ovoJw+8ZMNDTtip+CNFQfrtLVh/w4009dXO/eQ== - devtools-protocol@0.0.1273771: version "0.0.1273771" resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1273771.tgz#46aeb5db41417e2c2ad3d8367c598c975290b1a5" @@ -22647,11 +22615,6 @@ mississippi@^3.0.0: stream-each "^1.1.0" through2 "^2.0.0" -mitt@3.0.0: - version "3.0.0" - resolved "https://registry.yarnpkg.com/mitt/-/mitt-3.0.0.tgz#69ef9bd5c80ff6f57473e8d89326d01c414be0bd" - integrity sha512-7dX2/10ITVyqh4aOSVI9gdape+t9l2/8QxHrFmUXu4EEUpdlxl6RudZUPZoc+zuY2hk1j7XxVroIVIan/pD/SQ== - mitt@3.0.1: version "3.0.1" resolved "https://registry.yarnpkg.com/mitt/-/mitt-3.0.1.tgz#ea36cf0cc30403601ae074c8f77b7092cdab36d1" @@ -23259,7 +23222,7 @@ node-fetch@2.6.7, node-fetch@^2.3.0, node-fetch@^2.6.0, node-fetch@^2.6.1, node- dependencies: whatwg-url "^5.0.0" -node-fetch@^2.6.12, node-fetch@^2.6.9: +node-fetch@^2.6.9: version "2.6.12" resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.12.tgz#02eb8e22074018e3d5a83016649d04df0e348fba" integrity sha512-C/fGU2E8ToujUivIO0H+tpQ6HWo4eEmchoPIoXtxCrVghxdKq+QOHqEZW7tuP3KlV3bC8FRMO5nMCC7Zm1VP6g== @@ -24623,20 +24586,6 @@ p-waterfall@2.1.1: dependencies: p-reduce "^2.0.0" -pac-proxy-agent@^7.0.0: - version "7.0.0" - resolved "https://registry.yarnpkg.com/pac-proxy-agent/-/pac-proxy-agent-7.0.0.tgz#db42120c64292685dafaf2bd921e223c56bfb13b" - integrity sha512-t4tRAMx0uphnZrio0S0Jw9zg3oDbz1zVhQ/Vy18FjLfP1XOLNUEjaVxYCYRI6NS+BsMBXKIzV6cTLOkO9AtywA== - dependencies: - "@tootallnate/quickjs-emscripten" "^0.23.0" - agent-base "^7.0.2" - debug "^4.3.4" - get-uri "^6.0.1" - http-proxy-agent "^7.0.0" - https-proxy-agent "^7.0.0" - pac-resolver "^7.0.0" - socks-proxy-agent "^8.0.1" - pac-proxy-agent@^7.0.1: version "7.0.1" resolved "https://registry.yarnpkg.com/pac-proxy-agent/-/pac-proxy-agent-7.0.1.tgz#6b9ddc002ec3ff0ba5fdf4a8a21d363bcc612d75" @@ -25967,20 +25916,6 @@ proxy-addr@~2.0.7: forwarded "0.2.0" ipaddr.js "1.9.1" -proxy-agent@6.3.0: - version "6.3.0" - resolved "https://registry.yarnpkg.com/proxy-agent/-/proxy-agent-6.3.0.tgz#72f7bb20eb06049db79f7f86c49342c34f9ba08d" - integrity sha512-0LdR757eTj/JfuU7TL2YCuAZnxWXu3tkJbg4Oq3geW/qFNT/32T0sp2HnZ9O0lMR4q3vwAt0+xCA8SR0WAD0og== - dependencies: - agent-base "^7.0.2" - debug "^4.3.4" - http-proxy-agent "^7.0.0" - https-proxy-agent "^7.0.0" - lru-cache "^7.14.1" - pac-proxy-agent "^7.0.0" - proxy-from-env "^1.1.0" - socks-proxy-agent "^8.0.1" - proxy-agent@6.4.0: version "6.4.0" resolved "https://registry.yarnpkg.com/proxy-agent/-/proxy-agent-6.4.0.tgz#b4e2dd51dee2b377748aef8d45604c2d7608652d" @@ -26098,7 +26033,7 @@ pupa@^2.1.1: dependencies: escape-goat "^2.0.0" -puppeteer-core@22.8.0: +puppeteer-core@^22.8.0: version "22.8.0" resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-22.8.0.tgz#82c0e7ebf62ba5f34404394034e313b82014de5f" integrity sha512-S5bWx3g/fNuyFxjZX9TkZMN07CEH47+9Zm6IiTl1QfqI9pnVaShbwrD9kRe5vmz/XPp/jLGhhxRUj1sY4wObnA== @@ -26109,18 +26044,6 @@ puppeteer-core@22.8.0: devtools-protocol "0.0.1273771" ws "8.17.0" -puppeteer-core@^20.9.0: - version "20.9.0" - resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-20.9.0.tgz#6f4b420001b64419deab38d398a4d9cd071040e6" - integrity sha512-H9fYZQzMTRrkboEfPmf7m3CLDN6JvbxXA3qTtS+dFt27tR+CsFHzPsT6pzp6lYL6bJbAPaR0HaPO6uSi+F94Pg== - dependencies: - "@puppeteer/browsers" "1.4.6" - chromium-bidi "0.4.16" - cross-fetch "4.0.0" - debug "4.3.4" - devtools-protocol "0.0.1147663" - ws "8.13.0" - puppeteer-extra-plugin-adblocker@^2.13.5: version "2.13.5" resolved "https://registry.yarnpkg.com/puppeteer-extra-plugin-adblocker/-/puppeteer-extra-plugin-adblocker-2.13.5.tgz#c86ce94873bf6fe500555d3972eccdcca4914f6f" @@ -28705,15 +28628,6 @@ socks-proxy-agent@^7.0.0: debug "^4.3.3" socks "^2.6.2" -socks-proxy-agent@^8.0.1: - version "8.0.1" - resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-8.0.1.tgz#ffc5859a66dac89b0c4dab90253b96705f3e7120" - integrity sha512-59EjPbbgg8U3x62hhKOFVAmySQUcfRQ4C7Q/D5sEHnZTQRrQlNKINks44DMR1gwXp0p4LaVIeccX2KHTTcHVqQ== - dependencies: - agent-base "^7.0.1" - debug "^4.3.4" - socks "^2.7.1" - socks-proxy-agent@^8.0.2: version "8.0.3" resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-8.0.3.tgz#6b2da3d77364fde6292e810b496cb70440b9b89d" @@ -29671,15 +29585,6 @@ tapable@^2.0.0, tapable@^2.1.1, tapable@^2.2.0: resolved "https://registry.yarnpkg.com/tapable/-/tapable-2.2.1.tgz#1967a73ef4060a82f12ab96af86d52fdb76eeca0" integrity sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ== -tar-fs@3.0.4, tar-fs@^3.0.4: - version "3.0.4" - resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-3.0.4.tgz#a21dc60a2d5d9f55e0089ccd78124f1d3771dbbf" - integrity sha512-5AFQU8b9qLfZCX9zp2duONhPmZv0hGYiBPJsyUdqMjzq/mqVpy/rEUSeHk1+YitmxugaptgBh5oDGU3VsAJq4w== - dependencies: - mkdirp-classic "^0.5.2" - pump "^3.0.0" - tar-stream "^3.1.5" - tar-fs@3.0.5: version "3.0.5" resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-3.0.5.tgz#f954d77767e4e6edf973384e1eb95f8f81d64ed9" @@ -29701,6 +29606,15 @@ tar-fs@^2.0.0: pump "^3.0.0" tar-stream "^2.1.4" +tar-fs@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-3.0.4.tgz#a21dc60a2d5d9f55e0089ccd78124f1d3771dbbf" + integrity sha512-5AFQU8b9qLfZCX9zp2duONhPmZv0hGYiBPJsyUdqMjzq/mqVpy/rEUSeHk1+YitmxugaptgBh5oDGU3VsAJq4w== + dependencies: + mkdirp-classic "^0.5.2" + pump "^3.0.0" + tar-stream "^3.1.5" + tar-stream@^2.1.4, tar-stream@~2.2.0: version "2.2.0" resolved "https://registry.yarnpkg.com/tar-stream/-/tar-stream-2.2.0.tgz#acad84c284136b060dc3faa64474aa9aebd77287" @@ -32140,11 +32054,6 @@ write-pkg@4.0.0: type-fest "^0.4.1" write-json-file "^3.2.0" -ws@8.13.0: - version "8.13.0" - resolved "https://registry.yarnpkg.com/ws/-/ws-8.13.0.tgz#9a9fb92f93cf41512a0735c8f4dd09b8a1211cd0" - integrity sha512-x9vcZYTrFPC7aSIbj7sRCYo7L/Xb8Iy+pW0ng0wt2vCJv7M9HOMy0UoN3rr+IFC7hb7vXoqS+P9ktyLLLhO+LA== - ws@8.17.0: version "8.17.0" resolved "https://registry.yarnpkg.com/ws/-/ws-8.17.0.tgz#d145d18eca2ed25aaf791a183903f7be5e295fea" @@ -32325,19 +32234,6 @@ yargs@16.2.0, yargs@^16.0.0, yargs@^16.2.0: y18n "^5.0.5" yargs-parser "^20.2.2" -yargs@17.7.1: - version "17.7.1" - resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.7.1.tgz#34a77645201d1a8fc5213ace787c220eabbd0967" - integrity sha512-cwiTb08Xuv5fqF4AovYacTFNxk62th7LKJ6BL9IGUpTJrWoU7/7WdQGTP2SjKf1dUNBGzDd28p/Yfs/GI6JrLw== - dependencies: - cliui "^8.0.1" - escalade "^3.1.1" - get-caller-file "^2.0.5" - require-directory "^2.1.1" - string-width "^4.2.3" - y18n "^5.0.5" - yargs-parser "^21.1.1" - yargs@17.7.2, yargs@^17.5.1, yargs@^17.6.2: version "17.7.2" resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.7.2.tgz#991df39aca675a192b816e1e0363f9d75d2aa269"