diff --git a/packages/content-handler/src/websites/twitter-handler.ts b/packages/content-handler/src/websites/twitter-handler.ts index 46e635980..c745f6d66 100644 --- a/packages/content-handler/src/websites/twitter-handler.ts +++ b/packages/content-handler/src/websites/twitter-handler.ts @@ -274,7 +274,7 @@ const getTweetIds = async ( } window.scrollBy(0, distance) - await waitFor(100) + await waitFor(500) currentHeight += distance } @@ -364,6 +364,7 @@ export class TwitterHandler extends ContentHandler { ` const content = ` + @@ -375,7 +376,8 @@ export class TwitterHandler extends ContentHandler { ${tweetsContent} ${tweetUrl} - ` + +` return { content, url, title } } diff --git a/packages/puppeteer-parse/index.js b/packages/puppeteer-parse/index.js index 2eb417901..fa25b4732 100644 --- a/packages/puppeteer-parse/index.js +++ b/packages/puppeteer-parse/index.js @@ -348,7 +348,18 @@ async function fetchContent(req, res) { const content = sbResult.domContent; logRecord.fetchContentTime = Date.now() - functionStartTime; - const readabilityResult = content ? (await getReadabilityResult(url, content)) : null; + let readabilityResult = null; + if (content) { + let document = parseHTML(content).document; + + // preParse content + const preParsedDom = await preParseContent(sbUrl, document) + if (preParsedDom) { + document = preParsedDom + } + + readabilityResult = await getReadabilityResult(url, document); + } const apiResponse = await sendSavePageMutation(userId, { url: finalUrl,