From 813a90e3c956b8f80344e510ff89095e46ef8a93 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 25 May 2023 15:47:13 +0800 Subject: [PATCH] feat: send importer status update --- packages/import-handler/src/index.ts | 7 ++++- packages/puppeteer-parse/index.js | 40 ++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 3 deletions(-) diff --git a/packages/import-handler/src/index.ts b/packages/import-handler/src/index.ts index e0121f0a1..28f6dd958 100644 --- a/packages/import-handler/src/index.ts +++ b/packages/import-handler/src/index.ts @@ -40,6 +40,7 @@ const CONTENT_TYPES = ['text/csv', 'application/zip'] export type UrlHandler = ( ctx: ImportContext, url: URL, + taskId: string, state?: ArticleSavingRequestStatus, labels?: string[] ) => Promise @@ -96,6 +97,7 @@ const importURL = async ( userId: string, url: URL, source: string, + taskId: string, state?: ArticleSavingRequestStatus, labels?: string[] ): Promise => { @@ -173,6 +175,7 @@ const handlerForFile = (name: string): importHandlerFunc | undefined => { const urlHandler = async ( ctx: ImportContext, url: URL, + taskId: string, state?: ArticleSavingRequestStatus, labels?: string[] ): Promise => { @@ -182,6 +185,7 @@ const urlHandler = async ( ctx.userId, url, 'csv-importer', + taskId, state, labels && labels.length > 0 ? labels : undefined ) @@ -345,7 +349,7 @@ export const importMetricsCollector = Sentry.GCPFunction.wrapHttpFunction( console.error('JWT_SECRET not exists') return res.status(500).send({ errorCodes: 'JWT_SECRET_NOT_EXISTS' }) } - const token = (req.query.token || req.headers.authorization) as string + const token = req.headers.authorization if (!token) { return res.status(401).send({ errorCode: 'INVALID_TOKEN' }) } @@ -373,6 +377,7 @@ export const importMetricsCollector = Sentry.GCPFunction.wrapHttpFunction( // update metrics await updateMetrics(redisClient, userId, req.body.taskId, req.body.status) + await redisClient.quit() res.send('ok') diff --git a/packages/puppeteer-parse/index.js b/packages/puppeteer-parse/index.js index 405f0a219..09fc38e9e 100644 --- a/packages/puppeteer-parse/index.js +++ b/packages/puppeteer-parse/index.js @@ -44,6 +44,8 @@ const NON_SCRIPT_HOSTS= ['medium.com', 'fastcompany.com']; const ALLOWED_CONTENT_TYPES = ['text/html', 'application/octet-stream', 'text/plain', 'application/pdf']; +const IMPORTER_METRICS_COLLECTOR_URL = process.env.IMPORTER_METRICS_COLLECTOR_URL; + const userAgentForUrl = (url) => { try { const u = new URL(url); @@ -248,6 +250,25 @@ const saveUploadedPdf = async (userId, url, uploadFileId, articleSavingRequestId ); }; +const sendImportStatusUpdate = async (userId, taskId, status) => { + const auth = await signToken({ uid: userId }, process.env.JWT_SECRET); + + const response = await axios.post( + IMPORTER_METRICS_COLLECTOR_URL, + { + taskId, + status, + }, + { + headers: { + 'Authorization': auth, + 'Content-Type': 'application/json', + }, + }); + + return response.data; +}; + async function fetchContent(req, res) { let functionStartTime = Date.now(); @@ -257,6 +278,7 @@ async function fetchContent(req, res) { const state = req.body.state const labels = req.body.labels const source = req.body.source || 'parseContent'; + const taskId = req.body.taskId; // taskId is used to update import status let logRecord = { url, @@ -266,7 +288,8 @@ async function fetchContent(req, res) { source, }, state, - labelsToAdd: labels + labelsToAdd: labels, + taskId: taskId, }; console.info(`Article parsing request`, logRecord); @@ -278,7 +301,7 @@ async function fetchContent(req, res) { } // pre handle url with custom handlers - let title, content, contentType; + let title, content, contentType, importStatus; try { const browser = await getBrowserPromise; const result = await preHandleContent(url, browser); @@ -348,6 +371,8 @@ async function fetchContent(req, res) { logRecord.totalTime = Date.now() - functionStartTime; logRecord.result = apiResponse.createArticle; } + + importStatus = 'imported'; } catch (e) { logRecord.error = e.message; console.error(`Error while retrieving page`, logRecord); @@ -383,12 +408,23 @@ async function fetchContent(req, res) { logRecord.totalTime = Date.now() - functionStartTime; logRecord.result = apiResponse.createArticle; + + importStatus = 'failed'; } finally { if (context) { await context.close(); } console.info(`parse-page`, logRecord); + // send import status to update the metrics + if (taskId) { + try { + await sendImportStatusUpdate(userId, taskId, importStatus); + } catch (e) { + console.error('Error while sending import status update', e); + } + } + res.sendStatus(200); } }