Fix readability not run in puppeteer-parse
This commit is contained in:
@ -184,7 +184,8 @@ export const parsePreparedContent = async (
|
||||
labels: { source: 'parsePreparedContent' },
|
||||
}
|
||||
|
||||
let article = null
|
||||
// If we have a parse result, use it
|
||||
let article = parseResult || null
|
||||
let highlightData = undefined
|
||||
const { document, pageInfo } = preparedDocument
|
||||
|
||||
@ -205,14 +206,16 @@ export const parsePreparedContent = async (
|
||||
|
||||
let dom = parseHTML(document).document
|
||||
|
||||
// preParse content
|
||||
const preParsedDom = await preParseContent(url, dom)
|
||||
preParsedDom && (dom = preParsedDom)
|
||||
|
||||
try {
|
||||
article =
|
||||
parseResult ||
|
||||
(await getReadabilityResult(url, document, dom, isNewsletter))
|
||||
if (!article) {
|
||||
// Attempt to parse the article
|
||||
// preParse content
|
||||
const preParsedDom = await preParseContent(url, dom)
|
||||
preParsedDom && (dom = preParsedDom)
|
||||
|
||||
article = await getReadabilityResult(url, document, dom, isNewsletter)
|
||||
}
|
||||
|
||||
if (!article?.textContent && allowRetry) {
|
||||
const newDocument = {
|
||||
...preparedDocument,
|
||||
|
||||
@ -15,7 +15,7 @@ const signToken = promisify(jwt.sign);
|
||||
const os = require('os');
|
||||
const { Storage } = require('@google-cloud/storage');
|
||||
const { parseHTML } = require('linkedom');
|
||||
const { preHandleContent } = require("@omnivore/content-handler");
|
||||
const { preHandleContent, preParseContent } = require("@omnivore/content-handler");
|
||||
const { Readability } = require("@omnivore/readability");
|
||||
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
@ -314,7 +314,18 @@ async function fetchContent(req, res) {
|
||||
|
||||
logRecord.fetchContentTime = Date.now() - functionStartTime;
|
||||
|
||||
const readabilityResult = content ? (await getReadabilityResult(url, content)) : null;
|
||||
let readabilityResult = null;
|
||||
if (content) {
|
||||
let document = parseHTML(content).document;
|
||||
|
||||
// preParse content
|
||||
const preParsedDom = await preParseContent(url, document)
|
||||
if (preParsedDom) {
|
||||
document = preParsedDom
|
||||
}
|
||||
|
||||
readabilityResult = await getReadabilityResult(url, document);
|
||||
}
|
||||
|
||||
const apiResponse = await sendSavePageMutation(userId, {
|
||||
url: finalUrl,
|
||||
|
||||
Reference in New Issue
Block a user