From aae6759bcb2f7b71b40f3fef46d435b8777e7d5a Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 13 Mar 2023 12:08:01 +0800 Subject: [PATCH] return published date if the class name is omnivore-published-date which we added when we scraped the article --- packages/puppeteer-parse/index.js | 9 ++++++++- packages/readabilityjs/Readability.js | 5 ++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/packages/puppeteer-parse/index.js b/packages/puppeteer-parse/index.js index 26dc7eef2..b39ec4e12 100644 --- a/packages/puppeteer-parse/index.js +++ b/packages/puppeteer-parse/index.js @@ -652,7 +652,14 @@ async function retrieveHtml(page, logRecord) { document.getElementById('px-block-form-wrapper')) { return 'IS_BLOCKED' } - + if (create_time) { + // create_time is a global variable set by WeChat when rendering the page + const date = new Date(create_time * 1000); + const dateNode = document.createElement('div'); + dateNode.className = 'omnivore-published-date'; + dateNode.innerHTML = date.toLocaleString(); + document.body.appendChild(dateNode); + } return document.documentElement.outerHTML; }, iframes); logRecord.puppeteerSuccess = true; diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index dd4b6446f..832f66327 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -1055,7 +1055,10 @@ Readability.prototype = { _checkPublishedDate: function (node, matchString) { // Skipping meta tags if (node.tagName.toLowerCase() === 'meta') return - + // return published date if the class name is 'omnivore-published-date' which we added when we scraped the article + if (node.className === 'omnivore-published-date' && this._isValidPublishedDate(node.textContent)) { + return new Date(node.textContent); + } // Searching for the real date in the text content let dateRegExpFound = this.REGEXPS.DATES_REGEXPS.find(regexp => regexp.test(node.textContent.trim())) dateRegExpFound && (dateRegExpFound = dateRegExpFound.exec(node.textContent.trim()))