From 01bce437813a41f2545f72536d0c49ebde327e8b Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Fri, 19 Jul 2024 17:12:40 +0800 Subject: [PATCH 1/4] create a cloud run service for the exporter --- packages/export-handler/.dockerignore | 8 + packages/export-handler/.eslintignore | 2 + packages/export-handler/.eslintrc | 6 + packages/export-handler/Dockerfile | 27 +++ packages/export-handler/mocha-config.json | 5 + packages/export-handler/package.json | 40 +++++ packages/export-handler/src/index.ts | 145 ++++++++++++++++ packages/export-handler/src/job.ts | 23 +++ packages/export-handler/test/stub.test.ts | 8 + packages/export-handler/tsconfig.json | 8 + packages/import-handler/.eslintignore | 4 +- packages/import-handler/Dockerfile | 1 - packages/import-handler/Dockerfile-collector | 1 - yarn.lock | 173 ++++++++++++++++++- 14 files changed, 445 insertions(+), 6 deletions(-) create mode 100644 packages/export-handler/.dockerignore create mode 100644 packages/export-handler/.eslintignore create mode 100644 packages/export-handler/.eslintrc create mode 100644 packages/export-handler/Dockerfile create mode 100644 packages/export-handler/mocha-config.json create mode 100644 packages/export-handler/package.json create mode 100644 packages/export-handler/src/index.ts create mode 100644 packages/export-handler/src/job.ts create mode 100644 packages/export-handler/test/stub.test.ts create mode 100644 packages/export-handler/tsconfig.json diff --git a/packages/export-handler/.dockerignore b/packages/export-handler/.dockerignore new file mode 100644 index 000000000..5be851c5b --- /dev/null +++ b/packages/export-handler/.dockerignore @@ -0,0 +1,8 @@ +node_modules +build +test +.env* +Dockerfile +.dockerignore +.eslintrc +.eslintignore diff --git a/packages/export-handler/.eslintignore b/packages/export-handler/.eslintignore new file mode 100644 index 000000000..b38db2f29 --- /dev/null +++ b/packages/export-handler/.eslintignore @@ -0,0 +1,2 @@ +node_modules/ +build/ diff --git a/packages/export-handler/.eslintrc b/packages/export-handler/.eslintrc new file mode 100644 index 000000000..e006282a6 --- /dev/null +++ b/packages/export-handler/.eslintrc @@ -0,0 +1,6 @@ +{ + "extends": "../../.eslintrc", + "parserOptions": { + "project": "tsconfig.json" + } +} \ No newline at end of file diff --git a/packages/export-handler/Dockerfile b/packages/export-handler/Dockerfile new file mode 100644 index 000000000..ca84cfc00 --- /dev/null +++ b/packages/export-handler/Dockerfile @@ -0,0 +1,27 @@ +FROM node:18.16-alpine + +WORKDIR /app + +RUN apk add g++ make python3 + +ENV PORT 8080 + +COPY package.json . +COPY yarn.lock . +COPY tsconfig.json . + +COPY /packages/export-handler/package.json ./packages/export-handler/package.json + +RUN yarn install --pure-lockfile + +COPY /packages/export-handler ./packages/export-handler +RUN yarn workspace @omnivore/export-handler build + +# After building, fetch the production dependencies +RUN rm -rf /app/packages/export-handler/node_modules +RUN rm -rf /app/node_modules +RUN yarn install --pure-lockfile --production + +EXPOSE 8080 + +ENTRYPOINT ["yarn", "workspace", "@omnivore/export-handler", "start"] diff --git a/packages/export-handler/mocha-config.json b/packages/export-handler/mocha-config.json new file mode 100644 index 000000000..8e24eb08b --- /dev/null +++ b/packages/export-handler/mocha-config.json @@ -0,0 +1,5 @@ +{ + "extension": ["ts"], + "spec": "test/**/*.test.ts", + "timeout": 10000 + } diff --git a/packages/export-handler/package.json b/packages/export-handler/package.json new file mode 100644 index 000000000..7e7f14fdf --- /dev/null +++ b/packages/export-handler/package.json @@ -0,0 +1,40 @@ +{ + "name": "@omnivore/export-handler", + "version": "1.0.0", + "description": "", + "main": "build/src/index.js", + "files": [ + "build/src" + ], + "keywords": [], + "license": "Apache-2.0", + "scripts": { + "test": "yarn mocha -r ts-node/register --config mocha-config.json", + "test:typecheck": "tsc --noEmit", + "lint": "eslint src --ext ts,js,tsx,jsx", + "compile": "tsc", + "build": "tsc", + "start": "functions-framework --target=exportHandler", + "dev": "concurrently \"tsc -w\" \"nodemon --watch ./build/ --exec npm run start\"" + }, + "devDependencies": { + "@types/chai": "^4.3.4", + "@types/mocha": "^10.0.1", + "eslint-plugin-prettier": "^4.0.0" + }, + "dependencies": { + "@google-cloud/functions-framework": "3.1.2", + "@google-cloud/storage": "^7.0.1", + "@omnivore-app/api": "^1.0.4", + "@omnivore/utils": "1.0.0", + "@sentry/serverless": "^7.77.0", + "csv-stringify": "^6.4.0", + "dotenv": "^16.0.1", + "jsonwebtoken": "^8.5.1", + "nodemon": "^2.0.15", + "uuid": "^8.3.1" + }, + "volta": { + "extends": "../../package.json" + } +} diff --git a/packages/export-handler/src/index.ts b/packages/export-handler/src/index.ts new file mode 100644 index 000000000..95c922953 --- /dev/null +++ b/packages/export-handler/src/index.ts @@ -0,0 +1,145 @@ +import { File, Storage } from '@google-cloud/storage' +import { Omnivore } from '@omnivore-app/api' +import { RedisDataSource } from '@omnivore/utils' +import * as Sentry from '@sentry/serverless' +import { stringify } from 'csv-stringify' +import * as dotenv from 'dotenv' +import * as jwt from 'jsonwebtoken' +import { v4 as uuidv4 } from 'uuid' +import { queueEmailJob } from './job' + +dotenv.config() + +Sentry.GCPFunction.init({ + dsn: process.env.SENTRY_DSN, + tracesSampleRate: 0, +}) + +interface Claims { + uid: string + token: string +} + +const storage = new Storage() +const GCS_BUCKET = process.env.GCS_UPLOAD_BUCKET || 'omnivore-export' + +const createGCSFile = (bucket: string, filename: string): File => { + return storage.bucket(bucket).file(filename) +} + +const createSignedUrl = async (file: File): Promise => { + const signedUrl = await file.getSignedUrl({ + action: 'read', + expires: Date.now() + 15 * 60 * 1000, // 15 minutes + }) + return signedUrl[0] +} + +export const sendExportCompletedEmail = async ( + redisDataSource: RedisDataSource, + emailAddress: string, + urlToDownload: string +) => { + return queueEmailJob(redisDataSource, { + to: emailAddress, + subject: 'Your Omnivore export is ready', + html: `

Your export is ready. You can download it from the following link: ${urlToDownload}

`, + }) +} + +export const exporter = Sentry.GCPFunction.wrapHttpFunction( + async (req, res) => { + console.log('start to export') + + const JWT_SECRET = process.env.JWT_SECRET + if (!JWT_SECRET) { + return res.status(500).send({ errorCode: 'ENV_NOT_CONFIGURED' }) + } + + const token = req.get('Omnivore-Authorization') + if (!token) { + return res.status(401).send({ errorCode: 'INVALID_TOKEN' }) + } + + let claims: Claims + try { + claims = jwt.verify(token, JWT_SECRET) as Claims + } catch (e) { + console.error(e) + return res.status(401).send({ errorCode: 'INVALID_TOKEN' }) + } + + const redisDataSource = new RedisDataSource({ + cache: { + url: process.env.REDIS_URL, + cert: process.env.REDIS_CERT, + }, + mq: { + url: process.env.MQ_REDIS_URL, + cert: process.env.MQ_REDIS_CERT, + }, + }) + + try { + // write the list of urls to a csv file and upload it to gcs + // path style: exports///.csv + const dateStr = new Date().toISOString() + const fileUuid = uuidv4() + const fullPath = `exports/${claims.uid}/${dateStr}/${fileUuid}.csv` + // open a write_stream to the file + const file = createGCSFile(GCS_BUCKET, fullPath) + const writeStream = file.createWriteStream({ + contentType: 'text/csv', + }) + // stringify the data and pipe it to the write_stream + const stringifier = stringify({ + header: true, + columns: ['url', 'state', 'labels'], + }) + stringifier.pipe(writeStream) + + // fetch data from the database + const omnivore = new Omnivore({ + apiKey: claims.token, + }) + + let cursor = 0 + do { + const response = await omnivore.items.search({ + first: 50, + after: cursor, + }) + + const items = response.edges.map((edge) => edge.node) + cursor = response.pageInfo.endCursor + ? parseInt(response.pageInfo.endCursor) + : 0 + + // write data to the csv file + if (items.length > 0) { + // write the list of urls, state and labels to the stream + items.forEach((row) => stringifier.write(row)) + } + } while (cursor) + + writeStream.end() + + // generate a temporary signed url for the csv file + const signedUrl = await createSignedUrl(file) + console.log('signed url', signedUrl) + + // TODO: get the user's email from the database + await sendExportCompletedEmail(redisDataSource, claims.uid, signedUrl) + + console.log('done') + } catch (err) { + console.error('export failed', err) + + return res.status(500).send({ errorCode: 'INTERNAL_SERVER_ERROR' }) + } finally { + await redisDataSource.shutdown() + } + + res.sendStatus(200) + } +) diff --git a/packages/export-handler/src/job.ts b/packages/export-handler/src/job.ts new file mode 100644 index 000000000..69ea6bb79 --- /dev/null +++ b/packages/export-handler/src/job.ts @@ -0,0 +1,23 @@ +import { RedisDataSource } from '@omnivore/utils' +import { Queue } from 'bullmq' + +const QUEUE_NAME = 'omnivore-backend-queue' +export const SEND_EMAIL_JOB = 'send-email' + +interface SendEmailJobData { + to: string + from?: string + subject?: string + html?: string +} + +export const queueEmailJob = async ( + redisDataSource: RedisDataSource, + data: SendEmailJobData +) => { + const queue = new Queue(QUEUE_NAME, { + connection: redisDataSource.queueRedisClient, + }) + + await queue.add(SEND_EMAIL_JOB, data) +} diff --git a/packages/export-handler/test/stub.test.ts b/packages/export-handler/test/stub.test.ts new file mode 100644 index 000000000..24ad25c8f --- /dev/null +++ b/packages/export-handler/test/stub.test.ts @@ -0,0 +1,8 @@ +import 'mocha' +import { expect } from 'chai' + +describe('stub test', () => { + it('should pass', () => { + expect(true).to.be.true + }) +}) diff --git a/packages/export-handler/tsconfig.json b/packages/export-handler/tsconfig.json new file mode 100644 index 000000000..f47439000 --- /dev/null +++ b/packages/export-handler/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "./../../tsconfig.json", + "compilerOptions": { + "declaration": true, + "outDir": "build" + }, + "include": ["src", "test"] +} diff --git a/packages/import-handler/.eslintignore b/packages/import-handler/.eslintignore index b741470fc..b38db2f29 100644 --- a/packages/import-handler/.eslintignore +++ b/packages/import-handler/.eslintignore @@ -1,4 +1,2 @@ node_modules/ -dist/ -readabilityjs/ -src/generated/ +build/ diff --git a/packages/import-handler/Dockerfile b/packages/import-handler/Dockerfile index 1b0dda2b5..cafb9007a 100644 --- a/packages/import-handler/Dockerfile +++ b/packages/import-handler/Dockerfile @@ -2,7 +2,6 @@ FROM node:18.16-alpine WORKDIR /app -ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true RUN apk add g++ make python3 ENV PORT 8080 diff --git a/packages/import-handler/Dockerfile-collector b/packages/import-handler/Dockerfile-collector index 0abe00d6a..6f3401a7b 100644 --- a/packages/import-handler/Dockerfile-collector +++ b/packages/import-handler/Dockerfile-collector @@ -2,7 +2,6 @@ FROM node:18.16-alpine WORKDIR /app -ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true RUN apk add g++ make python3 ENV PORT 8080 diff --git a/yarn.lock b/yarn.lock index fd5a95c26..4e8e37120 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2,6 +2,19 @@ # yarn lockfile v1 +"@0no-co/graphql.web@^1.0.1", "@0no-co/graphql.web@^1.0.5": + version "1.0.7" + resolved "https://registry.yarnpkg.com/@0no-co/graphql.web/-/graphql.web-1.0.7.tgz#c7a762c887b3482a79ffa68f63de5e96059a62e4" + integrity sha512-E3Qku4mTzdrlwVWGPxklDnME5ANrEGetvYw4i2GCRlppWXXE4QD66j7pwb8HelZwS6LnqEChhrSOGCXpbiu6MQ== + +"@0no-co/graphqlsp@^1.12.9": + version "1.12.11" + resolved "https://registry.yarnpkg.com/@0no-co/graphqlsp/-/graphqlsp-1.12.11.tgz#0742b44dccf79eb760d87943f3b5a5886b59812f" + integrity sha512-vLja9r7L6BBXwxW86Wyi5z5hjTHscH7qoQooy+MXHkM9srBB6ZuesYZq5DQ/+SErQrFyaxeY+hwv2qBAksxriw== + dependencies: + "@gql.tada/internal" "^1.0.0" + graphql "^15.5.0 || ^16.0.0 || ^17.0.0" + "@ampproject/remapping@^2.0.0": version "2.1.1" resolved "https://registry.yarnpkg.com/@ampproject/remapping/-/remapping-2.1.1.tgz#7922fb0817bf3166d8d9e258c57477e3fd1c3610" @@ -1272,6 +1285,11 @@ resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.23.0.tgz#da950e622420bf96ca0d0f2909cdddac3acd8719" integrity sha512-vvPKKdMemU85V9WE/l5wZEmImpCtLqbnTvqDS2U1fJ96KrxoW7KrXhNsNCblQlg8Ck4b85yxdTyelsMUgFUXiw== +"@babel/parser@^7.24.7": + version "7.24.8" + resolved "https://registry.yarnpkg.com/@babel/parser/-/parser-7.24.8.tgz#58a4dbbcad7eb1d48930524a3fd93d93e9084c6f" + integrity sha512-WzfbgXOkGzZiXXCqk43kKwZjzwx4oulxZi3nq2TYL9mOjQv6kYwul9mz6ID36njuL7Xkp6nJEfok848Zj10j/w== + "@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression@^7.16.7": version "7.16.7" resolved "https://registry.yarnpkg.com/@babel/plugin-bugfix-safari-id-destructuring-collision-in-function-expression/-/plugin-bugfix-safari-id-destructuring-collision-in-function-expression-7.16.7.tgz#4eda6d6c2a0aa79c70fa7b6da67763dfe2141050" @@ -2989,6 +3007,25 @@ dependencies: "@google-recaptcha/core" "*" +"@gql.tada/cli-utils@1.5.1": + version "1.5.1" + resolved "https://registry.yarnpkg.com/@gql.tada/cli-utils/-/cli-utils-1.5.1.tgz#c0932f866ac74b461091e3f47e031693c9e83102" + integrity sha512-JVLpoXLa4msrE7MHnmW/7fYnIl8dncLom8T/Ghsxu+Kz5iMGnzK2joJN5cZt4ewCAqfCV3HZZ0VH189OalGd9g== + dependencies: + "@0no-co/graphqlsp" "^1.12.9" + "@gql.tada/internal" "1.0.4" + "@vue/compiler-dom" "^3.4.23" + "@vue/language-core" "^2.0.17" + graphql "^15.5.0 || ^16.0.0 || ^17.0.0" + svelte2tsx "^0.7.6" + +"@gql.tada/internal@1.0.4", "@gql.tada/internal@^1.0.0": + version "1.0.4" + resolved "https://registry.yarnpkg.com/@gql.tada/internal/-/internal-1.0.4.tgz#88a4b472966866eb97192a032e07aac5e7ab35d3" + integrity sha512-tq0rgoqjhdVqKWEsbrkiX7Qpp5gA4/Br9r9TVBeh3WpJIcuGh5U48UjB4IOxtXBePZdX8E0oc07GjOid/P60Wg== + dependencies: + "@0no-co/graphql.web" "^1.0.5" + "@graphql-codegen/cli@^2.6.2": version "2.6.2" resolved "https://registry.yarnpkg.com/@graphql-codegen/cli/-/cli-2.6.2.tgz#a9aa4656141ee0998cae8c7ad7d0bf9ca8e0c9ae" @@ -4592,6 +4629,14 @@ dependencies: "@octokit/openapi-types" "^18.0.0" +"@omnivore-app/api@^1.0.4": + version "1.0.4" + resolved "https://registry.yarnpkg.com/@omnivore-app/api/-/api-1.0.4.tgz#1de62d3f1b253c013a1e59c361a2a2a18937ccb1" + integrity sha512-M3a0we1WryUt7qaBV4fI0CmTJaC7eeJ8TAbF5hVTcrIeIJU9I1Lwfdq/coSx5SIHSf2jxOaCUqn1YRB314euRg== + dependencies: + "@urql/core" "^4.3.0" + gql.tada "^1.3.1" + "@opentelemetry/api-metrics@0.27.0": version "0.27.0" resolved "https://registry.yarnpkg.com/@opentelemetry/api-metrics/-/api-metrics-0.27.0.tgz#d8eca344ed1155f3ea8a8133ade827b4bb90efbf" @@ -9107,6 +9152,64 @@ resolved "https://registry.yarnpkg.com/@ungap/promise-all-settled/-/promise-all-settled-1.1.2.tgz#aa58042711d6e3275dd37dc597e5d31e8c290a44" integrity sha512-sL/cEvJWAnClXw0wHk85/2L0G6Sj8UB0Ctc1TEMbKSsmpRosqhwj9gWgFRZSrBr2f9tiXISwNhCPmlfqUqyb9Q== +"@urql/core@^4.3.0": + version "4.3.0" + resolved "https://registry.yarnpkg.com/@urql/core/-/core-4.3.0.tgz#5e150412ed08d167861b05ceed417abbd048553f" + integrity sha512-wT+FeL8DG4x5o6RfHEnONNFVDM3616ouzATMYUClB6CB+iIu2mwfBKd7xSUxYOZmwtxna5/hDRQdMl3nbQZlnw== + dependencies: + "@0no-co/graphql.web" "^1.0.1" + wonka "^6.3.2" + +"@volar/language-core@~2.4.0-alpha.15": + version "2.4.0-alpha.16" + resolved "https://registry.yarnpkg.com/@volar/language-core/-/language-core-2.4.0-alpha.16.tgz#fd4d38ccbf5ad13ebb29eacfdda719807749ffac" + integrity sha512-oOTnIZlx0P/idFwVw+W0NbzKDtZAQMzXSdIFfTePCKcXlb4Ys12GaGkx8NF9dsvPYV3nbv3ZsSxnkZWBmNKd7A== + dependencies: + "@volar/source-map" "2.4.0-alpha.16" + +"@volar/source-map@2.4.0-alpha.16": + version "2.4.0-alpha.16" + resolved "https://registry.yarnpkg.com/@volar/source-map/-/source-map-2.4.0-alpha.16.tgz#3a86ffadbba6928cd3f6717220dd87f8c1522904" + integrity sha512-sL9vNG7iR2hiKZor7UkD5Sufu3QCia4cbp2gX/nGRNSdaPbhOpdAoavwlBm0PrVkpiA19NZuavZoobD8krviFg== + +"@vue/compiler-core@3.4.32": + version "3.4.32" + resolved "https://registry.yarnpkg.com/@vue/compiler-core/-/compiler-core-3.4.32.tgz#e5db56cf6ebb1971e757a809b0b59a589888c56b" + integrity sha512-8tCVWkkLe/QCWIsrIvExUGnhYCAOroUs5dzhSoKL5w4MJS8uIYiou+pOPSVIOALOQ80B0jBs+Ri+kd5+MBnCDw== + dependencies: + "@babel/parser" "^7.24.7" + "@vue/shared" "3.4.32" + entities "^4.5.0" + estree-walker "^2.0.2" + source-map-js "^1.2.0" + +"@vue/compiler-dom@^3.4.0", "@vue/compiler-dom@^3.4.23": + version "3.4.32" + resolved "https://registry.yarnpkg.com/@vue/compiler-dom/-/compiler-dom-3.4.32.tgz#da8955cb86423d0c300fa6bc778d3493b8f35833" + integrity sha512-PbSgt9KuYo4fyb90dynuPc0XFTfFPs3sCTbPLOLlo+PrUESW1gn/NjSsUvhR+mI2AmmEzexwYMxbHDldxSOr2A== + dependencies: + "@vue/compiler-core" "3.4.32" + "@vue/shared" "3.4.32" + +"@vue/language-core@^2.0.17": + version "2.0.26" + resolved "https://registry.yarnpkg.com/@vue/language-core/-/language-core-2.0.26.tgz#233793b2e0a9f33db6f4bdac030d9c164b3efc0f" + integrity sha512-/lt6SfQ3O1yDAhPsnLv9iSUgXd1dMHqUm/t3RctfqjuwQf1LnftZ414X3UBn6aXT4MiwXWtbNJ4Z0NZWwDWgJQ== + dependencies: + "@volar/language-core" "~2.4.0-alpha.15" + "@vue/compiler-dom" "^3.4.0" + "@vue/shared" "^3.4.0" + computeds "^0.0.1" + minimatch "^9.0.3" + muggle-string "^0.4.1" + path-browserify "^1.0.1" + vue-template-compiler "^2.7.14" + +"@vue/shared@3.4.32", "@vue/shared@^3.4.0": + version "3.4.32" + resolved "https://registry.yarnpkg.com/@vue/shared/-/shared-3.4.32.tgz#7d4d21693e37113d5f2b9f6622778515ce1b77b1" + integrity sha512-ep4mF1IVnX/pYaNwxwOpJHyBtOMKWoKZMbnUyd+z0udqIxLUh7YCCd/JfDna8aUrmnG9SFORyIq2HzEATRrQsg== + "@webassemblyjs/ast@1.11.1": version "1.11.1" resolved "https://registry.yarnpkg.com/@webassemblyjs/ast/-/ast-1.11.1.tgz#2bfd767eae1a6996f432ff7e8d7fc75679c0b6a7" @@ -12671,6 +12774,11 @@ compute-scroll-into-view@^1.0.17: resolved "https://registry.yarnpkg.com/compute-scroll-into-view/-/compute-scroll-into-view-1.0.17.tgz#6a88f18acd9d42e9cf4baa6bec7e0522607ab7ab" integrity sha512-j4dx+Fb0URmzbwwMUrhqWM2BEWHdFGx+qZ9qqASHRPqvTYdqvWnHg0H1hIbcyLnvgnoNAVMlwkepyqM3DaIFUg== +computeds@^0.0.1: + version "0.0.1" + resolved "https://registry.yarnpkg.com/computeds/-/computeds-0.0.1.tgz#215b08a4ba3e08a11ff6eee5d6d8d7166a97ce2e" + integrity sha512-7CEBgcMjVmitjYo5q8JTJVra6X5mQ20uTThdK+0kR7UEaDrAWEQcRiBtWJzga4eRpP6afNwwLsX2SET2JhVB1Q== + concat-map@0.0.1: version "0.0.1" resolved "https://registry.yarnpkg.com/concat-map/-/concat-map-0.0.1.tgz#d8a96bd77fd68df7793a73036a3ba0d5405d477b" @@ -13538,6 +13646,11 @@ dayjs@1.x, dayjs@^1.10.4, dayjs@^1.11.7: resolved "https://registry.yarnpkg.com/dayjs/-/dayjs-1.11.10.tgz#68acea85317a6e164457d6d6947564029a6a16a0" integrity sha512-vjAczensTgRcqDERK0SR2XMwsF/tSvnvlv6VcF2GIhg6Sx4yOIt/irsr1RDJsKiIyBzJDpCoXiWWq28MqH2cnQ== +de-indent@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/de-indent/-/de-indent-1.0.2.tgz#b2038e846dc33baa5796128d0804b455b8c1e21d" + integrity sha512-e/1zu3xH5MQryN2zdVaF0OrdNLUbvWxzMbi+iNA6Bky7l1RoP8a2fIbRocyHclXt/arDrrR6lL3TqFD9pMQTsg== + debounce@^1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/debounce/-/debounce-1.2.0.tgz#44a540abc0ea9943018dc0eaa95cce87f65cd131" @@ -13637,6 +13750,11 @@ decompress-response@^6.0.0: dependencies: mimic-response "^3.1.0" +dedent-js@^1.0.1: + version "1.0.1" + resolved "https://registry.yarnpkg.com/dedent-js/-/dedent-js-1.0.1.tgz#bee5fb7c9e727d85dffa24590d10ec1ab1255305" + integrity sha512-OUepMozQULMLUmhxS95Vudo0jb0UchLimi3+pQ2plj61Fcy8axbP9hbiD4Sz6DPqn6XG3kfmziVfQ1rSys5AJQ== + dedent@0.7.0, dedent@^0.7.0: version "0.7.0" resolved "https://registry.yarnpkg.com/dedent/-/dedent-0.7.0.tgz#2495ddbaf6eb874abb0e1be9df22d2e5a544326c" @@ -17313,6 +17431,16 @@ got@^9.6.0: to-readable-stream "^1.0.0" url-parse-lax "^3.0.0" +gql.tada@^1.3.1: + version "1.8.2" + resolved "https://registry.yarnpkg.com/gql.tada/-/gql.tada-1.8.2.tgz#18013995e189e53f6f77a27a43e7c8da096a5904" + integrity sha512-LLt+2RcLY6i+Rq+LQQwx3uiEAPfA+pmEaAo/bJjUdaV1CVJBy3Wowds6GHeerW5kvekRM/XdbPTJw5OvnLq/DQ== + dependencies: + "@0no-co/graphql.web" "^1.0.5" + "@0no-co/graphqlsp" "^1.12.9" + "@gql.tada/cli-utils" "1.5.1" + "@gql.tada/internal" "1.0.4" + graceful-fs@4.2.10, graceful-fs@^4.1.9: version "4.2.10" resolved "https://registry.yarnpkg.com/graceful-fs/-/graceful-fs-4.2.10.tgz#147d3a006da4ca3ce14728c7aefc287c367d7a6c" @@ -17417,6 +17545,11 @@ graphql@^15.3.0, graphql@^15.5.1, graphql@^15.6.1: resolved "https://registry.yarnpkg.com/graphql/-/graphql-15.8.0.tgz#33410e96b012fa3bdb1091cc99a94769db212b38" integrity sha512-5gghUc24tP9HRznNpV2+FIoq3xKkj5dTQqf4v0CpdPbFVwFkWoxOM+o+2OC9ZSvjEMTjfmG9QT+gcvggTwW1zw== +"graphql@^15.5.0 || ^16.0.0 || ^17.0.0": + version "16.9.0" + resolved "https://registry.yarnpkg.com/graphql/-/graphql-16.9.0.tgz#1c310e63f16a49ce1fbb230bd0a000e99f6f115f" + integrity sha512-GGTKBX4SD7Wdb8mqeDLni2oaRGYQWjWHGKPQ24ZMnUtKfcsVoiv4uX8+LJr1K6U5VW2Lu1BwJnj7uiori0YtRw== + growl@1.10.5: version "1.10.5" resolved "https://registry.yarnpkg.com/growl/-/growl-1.10.5.tgz#f2735dc2283674fa67478b10181059355c369e5e" @@ -22608,6 +22741,13 @@ minimatch@^9.0.0, minimatch@^9.0.1: dependencies: brace-expansion "^2.0.1" +minimatch@^9.0.3: + version "9.0.5" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-9.0.5.tgz#d74f9dd6b57d83d8e98cfb82133b03978bc929e5" + integrity sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow== + dependencies: + brace-expansion "^2.0.1" + minimist-options@4.1.0: version "4.1.0" resolved "https://registry.yarnpkg.com/minimist-options/-/minimist-options-4.1.0.tgz#c0655713c53a8a2ebd77ffa247d342c40f010619" @@ -23021,6 +23161,11 @@ msgpackr@^1.10.1: optionalDependencies: msgpackr-extract "^3.0.2" +muggle-string@^0.4.1: + version "0.4.1" + resolved "https://registry.yarnpkg.com/muggle-string/-/muggle-string-0.4.1.tgz#3b366bd43b32f809dc20659534dd30e7c8a0d328" + integrity sha512-VNTrAak/KhO2i8dqqnqnAHOa3cYBwXEZe9h+D5h/1ZqFSTEFHdM65lR7RoIqq3tBBYavsOXV84NoHXZ0AkPyqQ== + multicast-dns-service-types@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/multicast-dns-service-types/-/multicast-dns-service-types-1.1.0.tgz#899f11d9686e5e05cb91b35d5f0e63b773cfc901" @@ -25033,7 +25178,7 @@ pascal-case@^2.0.0: camel-case "^3.0.0" upper-case-first "^1.1.0" -pascal-case@^3.1.2: +pascal-case@^3.1.1, pascal-case@^3.1.2: version "3.1.2" resolved "https://registry.yarnpkg.com/pascal-case/-/pascal-case-3.1.2.tgz#b48e0ef2b98e205e7c1dae747d0b1508237660eb" integrity sha512-uWlGT3YSnK9x3BQJaOdcZwrnV6hPpd8jFH1/ucpiLRPh/2zCVJKS19E4GvYHvaCcACn3foXZ0cLB9Wrx1KGe5g== @@ -28867,6 +29012,11 @@ source-map-js@^1.0.2: resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-1.0.2.tgz#adbc361d9c62df380125e7f161f71c826f1e490c" integrity sha512-R0XvVJ9WusLiqTCEiGCmICCMplcCkIwwR11mOSD9CR5u+IXYdiseeEuXCVAjS54zqwkLcPNnmU4OeJ6tUrWhDw== +source-map-js@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/source-map-js/-/source-map-js-1.2.0.tgz#16b809c162517b5b8c3e7dcd315a2a5c2612b2af" + integrity sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg== + source-map-resolve@^0.5.0: version "0.5.3" resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.5.3.tgz#190866bece7553e1f8f267a2ee82c606b5509a1a" @@ -29728,6 +29878,14 @@ supports-preserve-symlinks-flag@^1.0.0: resolved "https://registry.yarnpkg.com/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz#6eda4bd344a3c94aea376d4cc31bc77311039e09" integrity sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w== +svelte2tsx@^0.7.6: + version "0.7.13" + resolved "https://registry.yarnpkg.com/svelte2tsx/-/svelte2tsx-0.7.13.tgz#d9f19277dd9f74b5894c77ddebcf09ed67c9f5e9" + integrity sha512-aObZ93/kGAiLXA/I/kP+x9FriZM+GboB/ReOIGmLNbVGEd2xC+aTCppm3mk1cc9I/z60VQf7b2QDxC3jOXu3yw== + dependencies: + dedent-js "^1.0.1" + pascal-case "^3.1.1" + swap-case@^1.1.0: version "1.1.2" resolved "https://registry.yarnpkg.com/swap-case/-/swap-case-1.1.2.tgz#c39203a4587385fad3c850a0bd1bcafa081974e3" @@ -31552,6 +31710,14 @@ voca@^1.4.0: resolved "https://registry.yarnpkg.com/voca/-/voca-1.4.0.tgz#e15ac58b38290b72acc0c330366b6cc7984924d7" integrity sha512-8Xz4H3vhYRGbFupLtl6dHwMx0ojUcjt0HYkqZ9oBCfipd/5mD7Md58m2/dq7uPuZU/0T3Gb1m66KS9jn+I+14Q== +vue-template-compiler@^2.7.14: + version "2.7.16" + resolved "https://registry.yarnpkg.com/vue-template-compiler/-/vue-template-compiler-2.7.16.tgz#c81b2d47753264c77ac03b9966a46637482bb03b" + integrity sha512-AYbUWAJHLGGQM7+cNTELw+KsOG9nl2CnSv467WobS5Cv9uk3wFcnr1Etsz2sEIHEZvw1U+o9mRlEO6QbZvUPGQ== + dependencies: + de-indent "^1.0.2" + he "^1.2.0" + w3c-hr-time@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/w3c-hr-time/-/w3c-hr-time-1.0.2.tgz#0a89cdf5cc15822df9c360543676963e0cc308cd" @@ -32145,6 +32311,11 @@ winston@^3.3.3: triple-beam "^1.3.0" winston-transport "^4.5.0" +wonka@^6.3.2: + version "6.3.4" + resolved "https://registry.yarnpkg.com/wonka/-/wonka-6.3.4.tgz#76eb9316e3d67d7febf4945202b5bdb2db534594" + integrity sha512-CjpbqNtBGNAeyNS/9W6q3kSkKE52+FjIj7AkFlLr11s/VWGUu6a2CdYSdGxocIhIVjaW/zchesBQUKPVU69Cqg== + word-wrap@^1.2.3, word-wrap@~1.2.3: version "1.2.4" resolved "https://registry.yarnpkg.com/word-wrap/-/word-wrap-1.2.4.tgz#cb4b50ec9aca570abd1f52f33cd45b6c61739a9f" From f749090a7dc17024e2ae67d05dc248dca32e84b5 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Fri, 19 Jul 2024 21:44:54 +0800 Subject: [PATCH 2/4] close writestream on finish --- packages/export-handler/src/index.ts | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/packages/export-handler/src/index.ts b/packages/export-handler/src/index.ts index 95c922953..cac46cb92 100644 --- a/packages/export-handler/src/index.ts +++ b/packages/export-handler/src/index.ts @@ -81,22 +81,31 @@ export const exporter = Sentry.GCPFunction.wrapHttpFunction( }) try { - // write the list of urls to a csv file and upload it to gcs + // write the exported data to a csv file and upload it to gcs // path style: exports///.csv const dateStr = new Date().toISOString() const fileUuid = uuidv4() const fullPath = `exports/${claims.uid}/${dateStr}/${fileUuid}.csv` - // open a write_stream to the file const file = createGCSFile(GCS_BUCKET, fullPath) - const writeStream = file.createWriteStream({ - contentType: 'text/csv', - }) + // stringify the data and pipe it to the write_stream const stringifier = stringify({ header: true, - columns: ['url', 'state', 'labels'], + columns: ['id', 'title', 'description', 'state', 'labels'], }) - stringifier.pipe(writeStream) + + stringifier + .pipe( + file.createWriteStream({ + contentType: 'text/csv', + }) + ) + .on('error', (err) => { + console.error('error writing to file', err) + }) + .on('finish', () => { + console.log('done writing to file') + }) // fetch data from the database const omnivore = new Omnivore({ @@ -122,7 +131,7 @@ export const exporter = Sentry.GCPFunction.wrapHttpFunction( } } while (cursor) - writeStream.end() + stringifier.end() // generate a temporary signed url for the csv file const signedUrl = await createSignedUrl(file) From 95657e3d3ea2d01145db02120119e53cf463907e Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Sun, 21 Jul 2024 11:11:29 +0800 Subject: [PATCH 3/4] queue email sending jobs in exporter and importer --- packages/api/src/jobs/ai/create_digest.ts | 1 + packages/api/src/jobs/email/inbound_emails.ts | 1 + packages/api/src/jobs/email/send_email.ts | 14 +++++- .../api/src/resolvers/recent_emails/index.ts | 1 + packages/api/src/server.ts | 1 - packages/api/src/services/send_emails.ts | 3 ++ packages/import-handler/src/csv.ts | 13 ++++-- packages/import-handler/src/index.ts | 46 +++++++++++++------ packages/import-handler/src/job.ts | 23 ++++++++++ packages/import-handler/src/matterHistory.ts | 14 +++--- packages/import-handler/src/metrics.ts | 10 +++- packages/import-handler/test/csv/csv.test.ts | 2 +- .../test/matter/matter_importer.test.ts | 2 +- packages/import-handler/test/util.ts | 8 ++-- 14 files changed, 104 insertions(+), 35 deletions(-) create mode 100644 packages/import-handler/src/job.ts diff --git a/packages/api/src/jobs/ai/create_digest.ts b/packages/api/src/jobs/ai/create_digest.ts index 7bdbd8870..f265fd34d 100644 --- a/packages/api/src/jobs/ai/create_digest.ts +++ b/packages/api/src/jobs/ai/create_digest.ts @@ -623,6 +623,7 @@ const sendEmail = async (user: User, digest: Digest, channels: Channel[]) => { ` await enqueueSendEmail({ + userId: user.id, to: user.email, from: env.sender.message, subject: subTitle, diff --git a/packages/api/src/jobs/email/inbound_emails.ts b/packages/api/src/jobs/email/inbound_emails.ts index 6f1721660..5a047d51b 100644 --- a/packages/api/src/jobs/email/inbound_emails.ts +++ b/packages/api/src/jobs/email/inbound_emails.ts @@ -117,6 +117,7 @@ export const forwardEmailJob = async (data: EmailJobData) => { // forward non-newsletter emails to the registered email address const result = await enqueueSendEmail({ + userId: user.id, from: env.sender.message, to: user.email, subject: `Fwd: ${subject}`, diff --git a/packages/api/src/jobs/email/send_email.ts b/packages/api/src/jobs/email/send_email.ts index 130e2b14a..416cf94f0 100644 --- a/packages/api/src/jobs/email/send_email.ts +++ b/packages/api/src/jobs/email/send_email.ts @@ -1,5 +1,6 @@ import { env } from '../../env' import { sendWithMailJet } from '../../services/send_emails' +import { findActiveUser } from '../../services/user' import { Merge } from '../../util' import { logger } from '../../utils/logger' import { sendEmail } from '../../utils/sendEmail' @@ -9,7 +10,8 @@ export const SEND_EMAIL_JOB = 'send-email' type ContentType = { html: string } | { text: string } | { templateId: string } export type SendEmailJobData = Merge< { - to: string + userId: string + to?: string from?: string subject?: string html?: string @@ -22,6 +24,16 @@ export type SendEmailJobData = Merge< > export const sendEmailJob = async (data: SendEmailJobData) => { + if (!data.to) { + const user = await findActiveUser(data.userId) + if (!user) { + logger.error('user not found', data.userId) + return false + } + + data.to = user.email + } + if (process.env.USE_MAILJET && data.dynamicTemplateData) { return sendWithMailJet(data.to, data.dynamicTemplateData.link) } diff --git a/packages/api/src/resolvers/recent_emails/index.ts b/packages/api/src/resolvers/recent_emails/index.ts index cd19c0d11..de3538436 100644 --- a/packages/api/src/resolvers/recent_emails/index.ts +++ b/packages/api/src/resolvers/recent_emails/index.ts @@ -127,6 +127,7 @@ export const replyToEmailResolver = authorized< } const result = await enqueueSendEmail({ + userId: uid, to: recentEmail.replyTo || recentEmail.from, // send to the reply-to address if it exists or the from address subject: 'Re: ' + recentEmail.subject, text: reply, diff --git a/packages/api/src/server.ts b/packages/api/src/server.ts index 37df57fcc..13630af62 100755 --- a/packages/api/src/server.ts +++ b/packages/api/src/server.ts @@ -95,7 +95,6 @@ export const createApp = (): Express => { app.use('/api/auth', authLimiter, authRouter()) app.use('/api/mobile-auth', authLimiter, mobileAuthRouter()) app.use('/api/page', pageRouter()) - app.use('/api/user', userRouter()) app.use('/api/shortcuts', shortcutsRouter()) app.use('/api/article', articleRouter()) app.use('/api/ai-summary', aiSummariesRouter()) diff --git a/packages/api/src/services/send_emails.ts b/packages/api/src/services/send_emails.ts index 69a537bdb..988ed9183 100644 --- a/packages/api/src/services/send_emails.ts +++ b/packages/api/src/services/send_emails.ts @@ -18,6 +18,7 @@ export const sendNewAccountVerificationEmail = async (user: { } const result = await enqueueSendEmail({ + userId: user.id, to: user.email, dynamicTemplateData: dynamicTemplateData, templateId: env.sendgrid.confirmationTemplateId, @@ -78,6 +79,7 @@ export const sendAccountChangeEmail = async (user: { } const result = await enqueueSendEmail({ + userId: user.id, to: user.email, dynamicTemplateData: dynamicTemplateData, templateId: env.sendgrid.verificationTemplateId, @@ -100,6 +102,7 @@ export const sendPasswordResetEmail = async (user: { } const result = await enqueueSendEmail({ + userId: user.id, to: user.email, dynamicTemplateData: dynamicTemplateData, templateId: env.sendgrid.resetPasswordTemplateId, diff --git a/packages/import-handler/src/csv.ts b/packages/import-handler/src/csv.ts index fa3d209e1..7acdff4ce 100644 --- a/packages/import-handler/src/csv.ts +++ b/packages/import-handler/src/csv.ts @@ -46,7 +46,12 @@ const parseDate = (date: string): Date => { export const importCsv = async (ctx: ImportContext, stream: Stream) => { // create metrics in redis - await createMetrics(ctx.redisClient, ctx.userId, ctx.taskId, ctx.source) + await createMetrics( + ctx.redisDataSource.cacheClient, + ctx.userId, + ctx.taskId, + ctx.source + ) const parser = parse({ headers: true, @@ -68,7 +73,7 @@ export const importCsv = async (ctx: ImportContext, stream: Stream) => { // update total counter await updateMetrics( - ctx.redisClient, + ctx.redisDataSource, ctx.userId, ctx.taskId, ImportStatus.TOTAL @@ -79,7 +84,7 @@ export const importCsv = async (ctx: ImportContext, stream: Stream) => { ctx.countImported += 1 // update started counter await updateMetrics( - ctx.redisClient, + ctx.redisDataSource, ctx.userId, ctx.taskId, ImportStatus.STARTED @@ -96,7 +101,7 @@ export const importCsv = async (ctx: ImportContext, stream: Stream) => { ctx.countFailed += 1 // update invalid counter await updateMetrics( - ctx.redisClient, + ctx.redisDataSource, ctx.userId, ctx.taskId, ImportStatus.INVALID diff --git a/packages/import-handler/src/index.ts b/packages/import-handler/src/index.ts index de3379a26..06617dfe2 100644 --- a/packages/import-handler/src/index.ts +++ b/packages/import-handler/src/index.ts @@ -4,13 +4,13 @@ import { RedisDataSource } from '@omnivore/utils' import * as Sentry from '@sentry/serverless' import axios from 'axios' import 'dotenv/config' -import Redis from 'ioredis' import * as jwt from 'jsonwebtoken' import { Stream } from 'node:stream' import * as path from 'path' import { promisify } from 'util' import { v4 as uuid } from 'uuid' import { importCsv } from './csv' +import { queueEmailJob } from './job' import { importMatterArchive } from './matterHistory' import { ImportStatus, updateMetrics } from './metrics' import { CONTENT_FETCH_URL, createCloudTask, emailUserUrl } from './task' @@ -57,7 +57,7 @@ export type ImportContext = { countFailed: number urlHandler: UrlHandler contentHandler: ContentHandler - redisClient: Redis + redisDataSource: RedisDataSource taskId: string source: string } @@ -140,32 +140,40 @@ const createEmailCloudTask = async (userId: string, payload: unknown) => { ) } -const sendImportFailedEmail = async (userId: string) => { - return createEmailCloudTask(userId, { +const sendImportFailedEmail = async ( + redisDataSource: RedisDataSource, + userId: string +) => { + return queueEmailJob(redisDataSource, { + userId, subject: 'Your Omnivore import failed.', - body: `There was an error importing your file. Please ensure you uploaded the correct file type, if you need help, please email feedback@omnivore.app`, + html: `There was an error importing your file. Please ensure you uploaded the correct file type, if you need help, please email feedback@omnivore.app`, }) } export const sendImportStartedEmail = async ( + redisDataSource: RedisDataSource, userId: string, urlsEnqueued: number, urlsFailed: number ) => { - return createEmailCloudTask(userId, { + return queueEmailJob(redisDataSource, { + userId, subject: 'Your Omnivore import has started', - body: `We have started processing ${urlsEnqueued} URLs. ${urlsFailed} URLs are invalid.`, + html: `We have started processing ${urlsEnqueued} URLs. ${urlsFailed} URLs are invalid.`, }) } export const sendImportCompletedEmail = async ( + redisDataSource: RedisDataSource, userId: string, urlsImported: number, urlsFailed: number ) => { - return createEmailCloudTask(userId, { + return queueEmailJob(redisDataSource, { + userId, subject: 'Your Omnivore import has finished', - body: `We have finished processing ${ + html: `We have finished processing ${ urlsImported + urlsFailed } URLs. ${urlsImported} URLs have been added to your library. ${urlsFailed} URLs failed to be parsed.`, }) @@ -298,7 +306,10 @@ const contentHandler = async ( return Promise.resolve() } -const handleEvent = async (data: StorageEvent, redisClient: Redis) => { +const handleEvent = async ( + data: StorageEvent, + redisDataSource: RedisDataSource +) => { if (shouldHandle(data)) { const handler = handlerForFile(data.name) if (!handler) { @@ -329,7 +340,7 @@ const handleEvent = async (data: StorageEvent, redisClient: Redis) => { countFailed: 0, urlHandler, contentHandler, - redisClient, + redisDataSource, taskId: data.name, source: importSource(data.name), } @@ -337,9 +348,14 @@ const handleEvent = async (data: StorageEvent, redisClient: Redis) => { await handler(ctx, stream) if (ctx.countImported > 0) { - await sendImportStartedEmail(userId, ctx.countImported, ctx.countFailed) + await sendImportStartedEmail( + ctx.redisDataSource, + userId, + ctx.countImported, + ctx.countFailed + ) } else { - await sendImportFailedEmail(userId) + await sendImportFailedEmail(ctx.redisDataSource, userId) } } } @@ -377,7 +393,7 @@ export const importHandler = Sentry.GCPFunction.wrapHttpFunction( }) try { - await handleEvent(obj, redisDataSource.cacheClient) + await handleEvent(obj, redisDataSource) } catch (err) { console.log('error handling event', { err, obj }) throw err @@ -436,7 +452,7 @@ export const importMetricsCollector = Sentry.GCPFunction.wrapHttpFunction( try { // update metrics await updateMetrics( - redisDataSource.cacheClient, + redisDataSource, userId, req.body.taskId, req.body.status diff --git a/packages/import-handler/src/job.ts b/packages/import-handler/src/job.ts new file mode 100644 index 000000000..0f040b8f3 --- /dev/null +++ b/packages/import-handler/src/job.ts @@ -0,0 +1,23 @@ +import { RedisDataSource } from '@omnivore/utils' +import { Queue } from 'bullmq' + +const QUEUE_NAME = 'omnivore-backend-queue' +export const SEND_EMAIL_JOB = 'send-email' + +interface SendEmailJobData { + userId: string + from?: string + subject?: string + html?: string +} + +export const queueEmailJob = async ( + redisDataSource: RedisDataSource, + data: SendEmailJobData +) => { + const queue = new Queue(QUEUE_NAME, { + connection: redisDataSource.queueRedisClient, + }) + + await queue.add(SEND_EMAIL_JOB, data) +} diff --git a/packages/import-handler/src/matterHistory.ts b/packages/import-handler/src/matterHistory.ts index 378f9d51e..2d7ee9e99 100644 --- a/packages/import-handler/src/matterHistory.ts +++ b/packages/import-handler/src/matterHistory.ts @@ -37,7 +37,7 @@ export const importMatterHistoryCsv = async ( const url = new URL(row['URL']) // update total counter await updateMetrics( - ctx.redisClient, + ctx.redisDataSource, ctx.userId, ctx.taskId, ImportStatus.TOTAL @@ -46,7 +46,7 @@ export const importMatterHistoryCsv = async ( ctx.countImported += 1 // update started counter await updateMetrics( - ctx.redisClient, + ctx.redisDataSource, ctx.userId, ctx.taskId, ImportStatus.STARTED @@ -219,7 +219,7 @@ const handleMatterHistoryRow = async ( ctx.countFailed += 1 // update failed counter await updateMetrics( - ctx.redisClient, + ctx.redisDataSource, ctx.userId, ctx.taskId, ImportStatus.FAILED @@ -254,7 +254,7 @@ export const importMatterArchive = async ( try { // create metrics in redis await createMetrics( - ctx.redisClient, + ctx.redisDataSource.cacheClient, ctx.userId, ctx.taskId, 'matter-importer' @@ -273,7 +273,7 @@ export const importMatterArchive = async ( try { // update total metrics await updateMetrics( - ctx.redisClient, + ctx.redisDataSource, ctx.userId, ctx.taskId, ImportStatus.TOTAL @@ -284,7 +284,7 @@ export const importMatterArchive = async ( ctx.countImported += 1 // update started metrics await updateMetrics( - ctx.redisClient, + ctx.redisDataSource, ctx.userId, ctx.taskId, ImportStatus.STARTED @@ -294,7 +294,7 @@ export const importMatterArchive = async ( ctx.countFailed += 1 // update failed metrics await updateMetrics( - ctx.redisClient, + ctx.redisDataSource, ctx.userId, ctx.taskId, ImportStatus.FAILED diff --git a/packages/import-handler/src/metrics.ts b/packages/import-handler/src/metrics.ts index 41e4c734c..c737527b6 100644 --- a/packages/import-handler/src/metrics.ts +++ b/packages/import-handler/src/metrics.ts @@ -47,13 +47,14 @@ export const createMetrics = async ( } export const updateMetrics = async ( - redisClient: Redis, + redisDataSource: RedisDataSource, userId: string, taskId: string, status: ImportStatus ) => { const key = `import:${userId}:${taskId}` + const redisClient = redisDataSource.cacheClient /** * Define our command */ @@ -109,7 +110,12 @@ export const updateMetrics = async ( if ((state as ImportTaskState) == ImportTaskState.FINISHED) { const metrics = await getMetrics(redisClient, userId, taskId) if (metrics) { - await sendImportCompletedEmail(userId, metrics.imported, metrics.failed) + await sendImportCompletedEmail( + redisDataSource, + userId, + metrics.imported, + metrics.failed + ) } } } catch (error) { diff --git a/packages/import-handler/test/csv/csv.test.ts b/packages/import-handler/test/csv/csv.test.ts index 582444cab..66de76b01 100644 --- a/packages/import-handler/test/csv/csv.test.ts +++ b/packages/import-handler/test/csv/csv.test.ts @@ -26,7 +26,7 @@ describe('Test csv importer', () => { }, }) - stub = stubImportCtx(redisDataSource.cacheClient) + stub = stubImportCtx(redisDataSource) }) afterEach(async () => { diff --git a/packages/import-handler/test/matter/matter_importer.test.ts b/packages/import-handler/test/matter/matter_importer.test.ts index 3d6a84c00..649bb8f0f 100644 --- a/packages/import-handler/test/matter/matter_importer.test.ts +++ b/packages/import-handler/test/matter/matter_importer.test.ts @@ -30,7 +30,7 @@ describe('matter importer', () => { }, }) - stub = stubImportCtx(redisDataSource.cacheClient) + stub = stubImportCtx(redisDataSource) }) afterEach(async () => { diff --git a/packages/import-handler/test/util.ts b/packages/import-handler/test/util.ts index 14550783d..dbdd948c7 100644 --- a/packages/import-handler/test/util.ts +++ b/packages/import-handler/test/util.ts @@ -1,8 +1,10 @@ import { Readability } from '@omnivore/readability' -import Redis from 'ioredis' +import { RedisDataSource } from '@omnivore/utils' import { ArticleSavingRequestStatus, ImportContext } from '../src' -export const stubImportCtx = (redisClient: Redis): ImportContext => { +export const stubImportCtx = ( + redisDataSource: RedisDataSource +): ImportContext => { return { userId: '', countImported: 0, @@ -24,7 +26,7 @@ export const stubImportCtx = (redisClient: Redis): ImportContext => { ): Promise => { return Promise.resolve() }, - redisClient, + redisDataSource, taskId: '', source: 'csv-importer', } From 110f537e72bfb37eb4f9712d0b6fac340933fdec Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 22 Jul 2024 18:36:11 +0800 Subject: [PATCH 4/4] sleep for 1 second to avoid rate limiting --- packages/export-handler/src/index.ts | 59 ++++++++++++++++++++++++---- packages/export-handler/src/job.ts | 2 +- packages/import-handler/src/index.ts | 24 +---------- packages/import-handler/src/task.ts | 8 ---- 4 files changed, 54 insertions(+), 39 deletions(-) diff --git a/packages/export-handler/src/index.ts b/packages/export-handler/src/index.ts index cac46cb92..1a3aafa27 100644 --- a/packages/export-handler/src/index.ts +++ b/packages/export-handler/src/index.ts @@ -37,11 +37,11 @@ const createSignedUrl = async (file: File): Promise => { export const sendExportCompletedEmail = async ( redisDataSource: RedisDataSource, - emailAddress: string, + userId: string, urlToDownload: string ) => { return queueEmailJob(redisDataSource, { - to: emailAddress, + userId, subject: 'Your Omnivore export is ready', html: `

Your export is ready. You can download it from the following link: ${urlToDownload}

`, }) @@ -91,7 +91,26 @@ export const exporter = Sentry.GCPFunction.wrapHttpFunction( // stringify the data and pipe it to the write_stream const stringifier = stringify({ header: true, - columns: ['id', 'title', 'description', 'state', 'labels'], + columns: [ + 'id', + 'title', + 'description', + 'labels', + 'author', + 'site_name', + 'original_url', + 'slug', + 'updated_at', + 'saved_at', + 'type', + 'published_at', + 'url', + 'thumbnail', + 'read_at', + 'word_count', + 'reading_progress_percent', + 'archived_at', + ], }) stringifier @@ -113,23 +132,50 @@ export const exporter = Sentry.GCPFunction.wrapHttpFunction( }) let cursor = 0 + let hasNext = false do { const response = await omnivore.items.search({ - first: 50, + first: 100, after: cursor, + includeContent: false, }) const items = response.edges.map((edge) => edge.node) cursor = response.pageInfo.endCursor ? parseInt(response.pageInfo.endCursor) : 0 + hasNext = response.pageInfo.hasNextPage // write data to the csv file if (items.length > 0) { // write the list of urls, state and labels to the stream - items.forEach((row) => stringifier.write(row)) + items.forEach((item) => + stringifier.write({ + id: item.id, + title: item.title, + description: item.description, + labels: item.labels?.map((label) => label.name).join(','), + author: item.author, + site_name: item.siteName, + original_url: item.originalArticleUrl, + slug: item.slug, + updated_at: item.updatedAt, + saved_at: item.savedAt, + type: item.pageType, + published_at: item.publishedAt, + url: item.url, + thumbnail: item.image, + read_at: item.readAt, + word_count: item.wordsCount, + reading_progress_percent: item.readingProgressPercent, + archived_at: item.archivedAt, + }) + ) + + // sleep for 1 second to avoid rate limiting + await new Promise((resolve) => setTimeout(resolve, 1000)) } - } while (cursor) + } while (hasNext) stringifier.end() @@ -137,7 +183,6 @@ export const exporter = Sentry.GCPFunction.wrapHttpFunction( const signedUrl = await createSignedUrl(file) console.log('signed url', signedUrl) - // TODO: get the user's email from the database await sendExportCompletedEmail(redisDataSource, claims.uid, signedUrl) console.log('done') diff --git a/packages/export-handler/src/job.ts b/packages/export-handler/src/job.ts index 69ea6bb79..0f040b8f3 100644 --- a/packages/export-handler/src/job.ts +++ b/packages/export-handler/src/job.ts @@ -5,7 +5,7 @@ const QUEUE_NAME = 'omnivore-backend-queue' export const SEND_EMAIL_JOB = 'send-email' interface SendEmailJobData { - to: string + userId: string from?: string subject?: string html?: string diff --git a/packages/import-handler/src/index.ts b/packages/import-handler/src/index.ts index 06617dfe2..672310954 100644 --- a/packages/import-handler/src/index.ts +++ b/packages/import-handler/src/index.ts @@ -13,7 +13,7 @@ import { importCsv } from './csv' import { queueEmailJob } from './job' import { importMatterArchive } from './matterHistory' import { ImportStatus, updateMetrics } from './metrics' -import { CONTENT_FETCH_URL, createCloudTask, emailUserUrl } from './task' +import { CONTENT_FETCH_URL, createCloudTask } from './task' export enum ArticleSavingRequestStatus { Failed = 'FAILED', @@ -118,28 +118,6 @@ const importURL = async ( }) } -const createEmailCloudTask = async (userId: string, payload: unknown) => { - if (!process.env.JWT_SECRET) { - throw 'Envrionment not setup correctly' - } - - const exp = Math.floor(Date.now() / 1000) + 60 * 60 * 24 // 1 day - const authToken = (await signToken( - { uid: userId, exp }, - process.env.JWT_SECRET - )) as string - const headers = { - 'Omnivore-Authorization': authToken, - } - - return createCloudTask( - emailUserUrl(), - payload, - headers, - 'omnivore-email-queue' - ) -} - const sendImportFailedEmail = async ( redisDataSource: RedisDataSource, userId: string diff --git a/packages/import-handler/src/task.ts b/packages/import-handler/src/task.ts index 94b3352cc..bccca7ecb 100644 --- a/packages/import-handler/src/task.ts +++ b/packages/import-handler/src/task.ts @@ -3,14 +3,6 @@ import { CloudTasksClient, protos } from '@google-cloud/tasks' const cloudTask = new CloudTasksClient() -export const emailUserUrl = () => { - const envar = process.env.INTERNAL_SVC_ENDPOINT - if (envar) { - return envar + 'api/user/email' - } - throw 'INTERNAL_SVC_ENDPOINT not set' -} - export const CONTENT_FETCH_URL = process.env.CONTENT_FETCH_GCF_URL export const createCloudTask = async (