From 19facec4e281d46b16d18a0389477c8f38d5e73b Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 25 May 2023 12:15:05 +0800 Subject: [PATCH] feat: add importer metrics collector --- packages/import-handler/package.json | 7 +- .../src/luaScripts/updateMetrics.lua | 31 ++++++ packages/import-handler/src/metrics.ts | 95 +++++++++++++++++++ packages/import-handler/src/redis.ts | 37 ++++++++ packages/puppeteer-parse/index.js | 7 +- yarn.lock | 51 ++++++++++ 6 files changed, 224 insertions(+), 4 deletions(-) create mode 100644 packages/import-handler/src/luaScripts/updateMetrics.lua create mode 100644 packages/import-handler/src/metrics.ts create mode 100644 packages/import-handler/src/redis.ts diff --git a/packages/import-handler/package.json b/packages/import-handler/package.json index c425e3764..d13ce1623 100644 --- a/packages/import-handler/package.json +++ b/packages/import-handler/package.json @@ -12,9 +12,10 @@ "test": "yarn mocha -r ts-node/register --config mocha-config.json", "lint": "eslint src --ext ts,js,tsx,jsx", "compile": "tsc", - "build": "tsc", + "build": "tsc && yarn copy-files", "start": "functions-framework --target=importHandler", - "dev": "concurrently \"tsc -w\" \"nodemon --watch ./build/ --exec npm run start\"" + "dev": "concurrently \"tsc -w\" \"nodemon --watch ./build/ --exec npm run start\"", + "copy-files": "copyfiles -u 1 src/**/*.lua build/src" }, "devDependencies": { "@types/chai": "^4.3.4", @@ -28,6 +29,7 @@ "@types/unzip-stream": "^0.3.1", "@types/urlsafe-base64": "^1.0.28", "@types/uuid": "^9.0.0", + "copyfiles": "^2.4.1", "eslint-plugin-prettier": "^4.0.0" }, "dependencies": { @@ -46,6 +48,7 @@ "jsonwebtoken": "^8.5.1", "linkedom": "^0.14.21", "nodemon": "^2.0.15", + "redis": "^4.6.6", "unzip-stream": "^0.3.1", "urlsafe-base64": "^1.0.0", "uuid": "^9.0.0" diff --git a/packages/import-handler/src/luaScripts/updateMetrics.lua b/packages/import-handler/src/luaScripts/updateMetrics.lua new file mode 100644 index 000000000..94c45e1ab --- /dev/null +++ b/packages/import-handler/src/luaScripts/updateMetrics.lua @@ -0,0 +1,31 @@ +local key = tostring(KEYS[1]); +local status = tostring(ARGV[1]); +local timestamp = tonumber(ARGV[2]); + +-- increment the status counter +redis.call('HINCRBY', key, status, 1); + +if (status == "imported" or status == "failed") then + -- get the current metrics + local bulk = redis.call('HGETALL', key); + -- get the total, imported and failed counters + local result = {} + local nextkey + for i, v in ipairs(bulk) do + if i % 2 == 1 then + nextkey = v + else + result[nextkey] = v + end + end + + local imported = tonumber(result['imported']) or 0; + local failed = tonumber(result['failed']) or 0; + local total = tonumber(result['total']) or 0; + local state = tonumber(result['state']) or 0; + if (state == 0 and imported + failed >= total) then + -- all the records have been processed + -- update the metrics + redis.call('HSET', key, 'end_time', timestamp, 'state', 1); + end +end diff --git a/packages/import-handler/src/metrics.ts b/packages/import-handler/src/metrics.ts new file mode 100644 index 000000000..7be5edff5 --- /dev/null +++ b/packages/import-handler/src/metrics.ts @@ -0,0 +1,95 @@ +import { createClient } from 'redis' +import { lua } from './redis' + +// explicitly create the return type of RedisClient +type RedisClient = ReturnType + +enum ImportStatus { + STARTED = 'started', + INVALID = 'invalid', + IMPORTED = 'imported', + FAILED = 'failed', + TOTAL = 'total', +} + +enum ImportTaskState { + STARTED, + FINISHED, +} + +interface ImportMetrics { + started: number + invalid: number + imported: number + failed: number + total: number + importer: string + state: ImportTaskState + startTime: number + endTime: number +} + +export const startImport = async ( + redisClient: RedisClient, + userId: string, + taskId: string, + importer: string +) => { + const key = `import:${userId}:${taskId}` + try { + // set multiple fields + await redisClient.hSet(key, { + ['start_time']: Date.now(), // unix timestamp in seconds + ['importer']: importer, + ['state']: ImportTaskState.STARTED, + }) + } catch (error) { + console.error('Redis Error', error) + } +} + +export const updateMetrics = async ( + redisClient: RedisClient, + userId: string, + taskId: string, + status: ImportStatus +) => { + const key = `import:${userId}:${taskId}` + + try { + // use lua script to increment hash field + await redisClient.evalSha(lua.sha, { + keys: [key], + arguments: [status, Date.now().toString()], + }) + } catch (error) { + console.error('Redis Error', error) + } +} + +export const getMetrics = async ( + redisClient: RedisClient, + userId: string, + taskId: string +): Promise => { + const key = `import:${userId}:${taskId}` + try { + const metrics = await redisClient.hGetAll(key) + + return { + // convert to integer + started: parseInt(metrics.started, 10), + invalid: parseInt(metrics.invalid, 10), + imported: parseInt(metrics.imported, 10), + failed: parseInt(metrics.failed, 10), + total: parseInt(metrics.total, 10), + importer: metrics.importer, + state: parseInt(metrics.state, 10), + startTime: parseInt(metrics.start_time, 10), + endTime: parseInt(metrics.end_time, 10), + } + } catch (error) { + console.error('Redis Error', error) + return null + } +} diff --git a/packages/import-handler/src/redis.ts b/packages/import-handler/src/redis.ts new file mode 100644 index 000000000..3804c36ed --- /dev/null +++ b/packages/import-handler/src/redis.ts @@ -0,0 +1,37 @@ +import fs from 'fs' +import { createClient } from 'redis' + +// load lua script +export const lua = { + script: fs.readFileSync('./luaScripts/updateMetrics.lua', 'utf8'), + sha: '', +} + +export const createRedisClient = async (url?: string, cert?: string) => { + const redisClient = createClient({ + url, + socket: { + tls: url?.startsWith('rediss://'), // rediss:// is the protocol for TLS + cert: cert?.replace(/\\n/g, '\n'), // replace \n with new line + rejectUnauthorized: false, // for self-signed certs + connectTimeout: 10000, // 10 seconds + reconnectStrategy(retries: number): number | Error { + if (retries > 10) { + return new Error('Retries exhausted') + } + return 1000 + }, + }, + }) + + redisClient.on('error', (err) => console.error('Redis Client Error', err)) + + await redisClient.connect() + console.log('Redis Client Connected:', url) + + // load script to redis + lua.sha = await redisClient.scriptLoad(lua.script) + console.log('Redis Lua Script Loaded', lua.sha) + + return redisClient +} diff --git a/packages/puppeteer-parse/index.js b/packages/puppeteer-parse/index.js index 352137ac8..b25498790 100644 --- a/packages/puppeteer-parse/index.js +++ b/packages/puppeteer-parse/index.js @@ -224,7 +224,7 @@ const sendSavePageMutation = async (userId, input) => { } }`, variables: { - input: Object.assign({}, input , { source: 'puppeteer-parse' }), + input, }, }); @@ -256,13 +256,14 @@ async function fetchContent(req, res) { const articleSavingRequestId = (req.query ? req.query.saveRequestId : undefined) || (req.body ? req.body.saveRequestId : undefined); const state = req.body.state const labels = req.body.labels + const source = req.body.source || 'puppeteer-parse'; let logRecord = { url, userId, articleSavingRequestId, labels: { - source: 'parseContent', + source, }, state, labelsToAdd: labels @@ -342,6 +343,7 @@ async function fetchContent(req, res) { parseResult: readabilityResult, state, labels, + source, }); logRecord.totalTime = Date.now() - functionStartTime; @@ -378,6 +380,7 @@ async function fetchContent(req, res) { parseResult: readabilityResult, state, labels, + source, }); logRecord.totalTime = Date.now() - functionStartTime; diff --git a/yarn.lock b/yarn.lock index 339090cfe..5ef541069 100644 --- a/yarn.lock +++ b/yarn.lock @@ -5431,6 +5431,11 @@ resolved "https://registry.yarnpkg.com/@redis/bloom/-/bloom-1.0.2.tgz#42b82ec399a92db05e29fffcdfd9235a5fc15cdf" integrity sha512-EBw7Ag1hPgFzdznK2PBblc1kdlj5B5Cw3XwI9/oG7tSn85/HKy3X9xHy/8tm/eNXJYHLXHJL/pkwBpFMVVefkw== +"@redis/bloom@1.2.0": + version "1.2.0" + resolved "https://registry.yarnpkg.com/@redis/bloom/-/bloom-1.2.0.tgz#d3fd6d3c0af3ef92f26767b56414a370c7b63b71" + integrity sha512-HG2DFjYKbpNmVXsa0keLHp/3leGJz1mjh09f2RLGGLQZzSHpkmZWuwJbAvo3QcRY8p80m5+ZdXZdYOSBLlp7Cg== + "@redis/client@1.3.0": version "1.3.0" resolved "https://registry.yarnpkg.com/@redis/client/-/client-1.3.0.tgz#c62ccd707f16370a2dc2f9e158a28b7da049fa77" @@ -5440,11 +5445,25 @@ generic-pool "3.8.2" yallist "4.0.0" +"@redis/client@1.5.7": + version "1.5.7" + resolved "https://registry.yarnpkg.com/@redis/client/-/client-1.5.7.tgz#92cc5c98c76f189e37d24f0e1e17e104c6af17d4" + integrity sha512-gaOBOuJPjK5fGtxSseaKgSvjiZXQCdLlGg9WYQst+/GRUjmXaiB5kVkeQMRtPc7Q2t93XZcJfBMSwzs/XS9UZw== + dependencies: + cluster-key-slot "1.1.2" + generic-pool "3.9.0" + yallist "4.0.0" + "@redis/graph@1.0.1": version "1.0.1" resolved "https://registry.yarnpkg.com/@redis/graph/-/graph-1.0.1.tgz#eabc58ba99cd70d0c907169c02b55497e4ec8a99" integrity sha512-oDE4myMCJOCVKYMygEMWuriBgqlS5FqdWerikMoJxzmmTUErnTRRgmIDa2VcgytACZMFqpAOWDzops4DOlnkfQ== +"@redis/graph@1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@redis/graph/-/graph-1.1.0.tgz#cc2b82e5141a29ada2cce7d267a6b74baa6dd519" + integrity sha512-16yZWngxyXPd+MJxeSr0dqh2AIOi8j9yXKcKCwVaKDbH3HTuETpDVPcLujhFYVPtYrngSco31BUcSa9TH31Gqg== + "@redis/json@1.0.4": version "1.0.4" resolved "https://registry.yarnpkg.com/@redis/json/-/json-1.0.4.tgz#f372b5f93324e6ffb7f16aadcbcb4e5c3d39bda1" @@ -5455,11 +5474,21 @@ resolved "https://registry.yarnpkg.com/@redis/search/-/search-1.1.0.tgz#7abb18d431f27ceafe6bcb4dd83a3fa67e9ab4df" integrity sha512-NyFZEVnxIJEybpy+YskjgOJRNsfTYqaPbK/Buv6W2kmFNaRk85JiqjJZA5QkRmWvGbyQYwoO5QfDi2wHskKrQQ== +"@redis/search@1.1.2": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@redis/search/-/search-1.1.2.tgz#6a8f66ba90812d39c2457420f859ce8fbd8f3838" + integrity sha512-/cMfstG/fOh/SsE+4/BQGeuH/JJloeWuH+qJzM8dbxuWvdWibWAOAHHCZTMPhV3xIlH4/cUEIA8OV5QnYpaVoA== + "@redis/time-series@1.0.3": version "1.0.3" resolved "https://registry.yarnpkg.com/@redis/time-series/-/time-series-1.0.3.tgz#4cfca8e564228c0bddcdf4418cba60c20b224ac4" integrity sha512-OFp0q4SGrTH0Mruf6oFsHGea58u8vS/iI5+NpYdicaM+7BgqBZH8FFvNZ8rYYLrUO/QRqMq72NpXmxLVNcdmjA== +"@redis/time-series@1.0.4": + version "1.0.4" + resolved "https://registry.yarnpkg.com/@redis/time-series/-/time-series-1.0.4.tgz#af85eb080f6934580e4d3b58046026b6c2b18717" + integrity sha512-ThUIgo2U/g7cCuZavucQTQzA9g9JbDDY2f64u3AbAoz/8vE2lt2U37LamDUVChhaDA3IRT9R6VvJwqnUfTJzng== + "@remusao/guess-url-type@^1.1.2": version "1.2.1" resolved "https://registry.yarnpkg.com/@remusao/guess-url-type/-/guess-url-type-1.2.1.tgz#b3e7c32abdf98d0fb4f93cc67cad580b5fe4ba57" @@ -11812,6 +11841,11 @@ cluster-key-slot@1.1.0: resolved "https://registry.yarnpkg.com/cluster-key-slot/-/cluster-key-slot-1.1.0.tgz#30474b2a981fb12172695833052bc0d01336d10d" integrity sha512-2Nii8p3RwAPiFwsnZvukotvow2rIHM+yQ6ZcBXGHdniadkYGZYiGmkHJIbZPIV9nfv7m/U1IPMVVcAhoWFeklw== +cluster-key-slot@1.1.2: + version "1.1.2" + resolved "https://registry.yarnpkg.com/cluster-key-slot/-/cluster-key-slot-1.1.2.tgz#88ddaa46906e303b5de30d3153b7d9fe0a0c19ac" + integrity sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA== + cmd-shim@^4.1.0: version "4.1.0" resolved "https://registry.yarnpkg.com/cmd-shim/-/cmd-shim-4.1.0.tgz#b3a904a6743e9fede4148c6f3800bf2a08135bdd" @@ -15320,6 +15354,11 @@ generic-pool@3.8.2: resolved "https://registry.yarnpkg.com/generic-pool/-/generic-pool-3.8.2.tgz#aab4f280adb522fdfbdc5e5b64d718d3683f04e9" integrity sha512-nGToKy6p3PAbYQ7p1UlWl6vSPwfwU6TMSWK7TTu+WUY4ZjyZQGniGGt2oNVvyNSpyZYSB43zMXVLcBm08MTMkg== +generic-pool@3.9.0: + version "3.9.0" + resolved "https://registry.yarnpkg.com/generic-pool/-/generic-pool-3.9.0.tgz#36f4a678e963f4fdb8707eab050823abc4e8f5e4" + integrity sha512-hymDOu5B53XvN4QT9dBmZxPX4CWhBPPLguTZ9MMFeFa/Kg0xWVfylOVNlJji/E7yTZWFd/q9GO5TxDLq156D7g== + gensync@^1.0.0-beta.1, gensync@^1.0.0-beta.2: version "1.0.0-beta.2" resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.2.tgz#32a6ee76c3d7f52d46b2b1ae5d93fea8580a25e0" @@ -24101,6 +24140,18 @@ redis@^4.3.1: "@redis/search" "1.1.0" "@redis/time-series" "1.0.3" +redis@^4.6.6: + version "4.6.6" + resolved "https://registry.yarnpkg.com/redis/-/redis-4.6.6.tgz#46d4f2d149d1634d6ef53db5747412a0ef7974ec" + integrity sha512-aLs2fuBFV/VJ28oLBqYykfnhGGkFxvx0HdCEBYdJ99FFbSEMZ7c1nVKwR6ZRv+7bb7JnC0mmCzaqu8frgOYhpA== + dependencies: + "@redis/bloom" "1.2.0" + "@redis/client" "1.5.7" + "@redis/graph" "1.1.0" + "@redis/json" "1.0.4" + "@redis/search" "1.1.2" + "@redis/time-series" "1.0.4" + reflect-metadata@^0.1.13: version "0.1.13" resolved "https://registry.yarnpkg.com/reflect-metadata/-/reflect-metadata-0.1.13.tgz#67ae3ca57c972a2aa1642b10fe363fe32d49dc08"