115 lines
3.7 KiB
JavaScript
115 lines
3.7 KiB
JavaScript
const { fetchContent } = require("@omnivore/puppeteer-parse");
|
|
const { uploadPdf, sendSavePageMutation, sendCreateArticleMutation, sendImportStatusUpdate } = require('./api');
|
|
|
|
const MAX_RETRY_COUNT = process.env.MAX_RETRY_COUNT || '1';
|
|
|
|
exports.contentFetchRequestHandler = async (req, res) => {
|
|
let functionStartTime = Date.now();
|
|
|
|
const userId = (req.query ? req.query.userId : undefined) || (req.body ? req.body.userId : undefined);
|
|
const articleSavingRequestId = (req.query ? req.query.saveRequestId : undefined) || (req.body ? req.body.saveRequestId : undefined);
|
|
const state = req.body.state
|
|
const labels = req.body.labels
|
|
const source = req.body.source || 'puppeteer-parse';
|
|
const taskId = req.body.taskId; // taskId is used to update import status
|
|
const url = (req.query ? req.query.url : undefined) || (req.body ? req.body.url : undefined);
|
|
const locale = (req.query ? req.query.locale : undefined) || (req.body ? req.body.locale : undefined);
|
|
const timezone = (req.query ? req.query.timezone : undefined) || (req.body ? req.body.timezone : undefined);
|
|
const rssFeedUrl = req.body.rssFeedUrl;
|
|
const savedAt = req.body.savedAt;
|
|
const publishedAt = req.body.publishedAt;
|
|
const folder = req.body.folder;
|
|
const users = req.body ? req.body.users : undefined; // users is used when saving article for multiple users
|
|
|
|
let logRecord = {
|
|
url,
|
|
userId,
|
|
articleSavingRequestId,
|
|
labels: {
|
|
source,
|
|
},
|
|
state,
|
|
labelsToAdd: labels,
|
|
taskId: taskId,
|
|
locale,
|
|
timezone,
|
|
rssFeedUrl,
|
|
savedAt,
|
|
publishedAt,
|
|
folder,
|
|
users,
|
|
};
|
|
|
|
console.log(`Article parsing request`, logRecord);
|
|
|
|
let importStatus, statusCode = 200;
|
|
|
|
try {
|
|
const { finalUrl, title, content, readabilityResult, contentType } = await fetchContent(url, locale, timezone);
|
|
if (contentType === 'application/pdf') {
|
|
const uploadFileId = await uploadPdf(finalUrl, userId, articleSavingRequestId);
|
|
const uploadedPdf = await sendCreateArticleMutation(userId, {
|
|
url: encodeURI(finalUrl),
|
|
articleSavingRequestId,
|
|
uploadFileId,
|
|
state,
|
|
labels,
|
|
source,
|
|
folder,
|
|
rssFeedUrl,
|
|
savedAt,
|
|
publishedAt,
|
|
});
|
|
if (!uploadedPdf) {
|
|
statusCode = 500;
|
|
logRecord.error = 'error while saving uploaded pdf';
|
|
} else {
|
|
importStatus = 'imported';
|
|
}
|
|
} else {
|
|
const apiResponse = await sendSavePageMutation(userId, {
|
|
url,
|
|
clientRequestId: articleSavingRequestId,
|
|
title,
|
|
originalContent: content,
|
|
parseResult: readabilityResult,
|
|
state,
|
|
labels,
|
|
rssFeedUrl,
|
|
savedAt,
|
|
publishedAt,
|
|
source,
|
|
folder,
|
|
});
|
|
if (!apiResponse) {
|
|
logRecord.error = 'error while saving page';
|
|
statusCode = 500;
|
|
} else if (apiResponse.error === 'UNAUTHORIZED') {
|
|
console.log('user is deleted, do not retry', logRecord);
|
|
return res.sendStatus(200);
|
|
} else {
|
|
importStatus = readabilityResult ? 'imported' : 'failed';
|
|
}
|
|
}
|
|
} catch (error) {
|
|
logRecord.error = error.message;
|
|
} finally {
|
|
logRecord.totalTime = Date.now() - functionStartTime;
|
|
console.log(`parse-page result`, logRecord);
|
|
|
|
// mark import failed on the last failed retry
|
|
const retryCount = req.headers['x-cloudtasks-taskretrycount'];
|
|
if (retryCount === MAX_RETRY_COUNT) {
|
|
console.log('max retry count reached');
|
|
importStatus = importStatus || 'failed';
|
|
}
|
|
|
|
// send import status to update the metrics
|
|
if (taskId && importStatus) {
|
|
await sendImportStatusUpdate(userId, taskId, importStatus);
|
|
}
|
|
|
|
res.sendStatus(statusCode);
|
|
}
|
|
}
|