feat: send importer status update

This commit is contained in:
Hongbo Wu
2023-05-25 15:47:13 +08:00
parent fba8e0424d
commit 813a90e3c9
2 changed files with 44 additions and 3 deletions

View File

@ -40,6 +40,7 @@ const CONTENT_TYPES = ['text/csv', 'application/zip']
export type UrlHandler = (
ctx: ImportContext,
url: URL,
taskId: string,
state?: ArticleSavingRequestStatus,
labels?: string[]
) => Promise<void>
@ -96,6 +97,7 @@ const importURL = async (
userId: string,
url: URL,
source: string,
taskId: string,
state?: ArticleSavingRequestStatus,
labels?: string[]
): Promise<string | undefined> => {
@ -173,6 +175,7 @@ const handlerForFile = (name: string): importHandlerFunc | undefined => {
const urlHandler = async (
ctx: ImportContext,
url: URL,
taskId: string,
state?: ArticleSavingRequestStatus,
labels?: string[]
): Promise<void> => {
@ -182,6 +185,7 @@ const urlHandler = async (
ctx.userId,
url,
'csv-importer',
taskId,
state,
labels && labels.length > 0 ? labels : undefined
)
@ -345,7 +349,7 @@ export const importMetricsCollector = Sentry.GCPFunction.wrapHttpFunction(
console.error('JWT_SECRET not exists')
return res.status(500).send({ errorCodes: 'JWT_SECRET_NOT_EXISTS' })
}
const token = (req.query.token || req.headers.authorization) as string
const token = req.headers.authorization
if (!token) {
return res.status(401).send({ errorCode: 'INVALID_TOKEN' })
}
@ -373,6 +377,7 @@ export const importMetricsCollector = Sentry.GCPFunction.wrapHttpFunction(
// update metrics
await updateMetrics(redisClient, userId, req.body.taskId, req.body.status)
await redisClient.quit()
res.send('ok')

View File

@ -44,6 +44,8 @@ const NON_SCRIPT_HOSTS= ['medium.com', 'fastcompany.com'];
const ALLOWED_CONTENT_TYPES = ['text/html', 'application/octet-stream', 'text/plain', 'application/pdf'];
const IMPORTER_METRICS_COLLECTOR_URL = process.env.IMPORTER_METRICS_COLLECTOR_URL;
const userAgentForUrl = (url) => {
try {
const u = new URL(url);
@ -248,6 +250,25 @@ const saveUploadedPdf = async (userId, url, uploadFileId, articleSavingRequestId
);
};
const sendImportStatusUpdate = async (userId, taskId, status) => {
const auth = await signToken({ uid: userId }, process.env.JWT_SECRET);
const response = await axios.post(
IMPORTER_METRICS_COLLECTOR_URL,
{
taskId,
status,
},
{
headers: {
'Authorization': auth,
'Content-Type': 'application/json',
},
});
return response.data;
};
async function fetchContent(req, res) {
let functionStartTime = Date.now();
@ -257,6 +278,7 @@ async function fetchContent(req, res) {
const state = req.body.state
const labels = req.body.labels
const source = req.body.source || 'parseContent';
const taskId = req.body.taskId; // taskId is used to update import status
let logRecord = {
url,
@ -266,7 +288,8 @@ async function fetchContent(req, res) {
source,
},
state,
labelsToAdd: labels
labelsToAdd: labels,
taskId: taskId,
};
console.info(`Article parsing request`, logRecord);
@ -278,7 +301,7 @@ async function fetchContent(req, res) {
}
// pre handle url with custom handlers
let title, content, contentType;
let title, content, contentType, importStatus;
try {
const browser = await getBrowserPromise;
const result = await preHandleContent(url, browser);
@ -348,6 +371,8 @@ async function fetchContent(req, res) {
logRecord.totalTime = Date.now() - functionStartTime;
logRecord.result = apiResponse.createArticle;
}
importStatus = 'imported';
} catch (e) {
logRecord.error = e.message;
console.error(`Error while retrieving page`, logRecord);
@ -383,12 +408,23 @@ async function fetchContent(req, res) {
logRecord.totalTime = Date.now() - functionStartTime;
logRecord.result = apiResponse.createArticle;
importStatus = 'failed';
} finally {
if (context) {
await context.close();
}
console.info(`parse-page`, logRecord);
// send import status to update the metrics
if (taskId) {
try {
await sendImportStatusUpdate(userId, taskId, importStatus);
} catch (e) {
console.error('Error while sending import status update', e);
}
}
res.sendStatus(200);
}
}