Merge pull request #1902 from omnivore-app/fix/readability-published-date-timezone

return published date if the class name is omnivore-published-date which we added when we scraped the article
This commit is contained in:
Hongbo Wu
2023-03-13 14:18:30 +08:00
committed by GitHub
2 changed files with 12 additions and 2 deletions

View File

@ -652,7 +652,14 @@ async function retrieveHtml(page, logRecord) {
document.getElementById('px-block-form-wrapper')) {
return 'IS_BLOCKED'
}
if (create_time) {
// create_time is a global variable set by WeChat when rendering the page
const date = new Date(create_time * 1000);
const dateNode = document.createElement('div');
dateNode.className = 'omnivore-published-date';
dateNode.innerHTML = date.toLocaleString();
document.body.appendChild(dateNode);
}
return document.documentElement.outerHTML;
}, iframes);
logRecord.puppeteerSuccess = true;

View File

@ -1055,7 +1055,10 @@ Readability.prototype = {
_checkPublishedDate: function (node, matchString) {
// Skipping meta tags
if (node.tagName.toLowerCase() === 'meta') return
// return published date if the class name is 'omnivore-published-date' which we added when we scraped the article
if (node.className === 'omnivore-published-date' && this._isValidPublishedDate(node.textContent)) {
return new Date(node.textContent);
}
// Searching for the real date in the text content
let dateRegExpFound = this.REGEXPS.DATES_REGEXPS.find(regexp => regexp.test(node.textContent.trim()))
dateRegExpFound && (dateRegExpFound = dateRegExpFound.exec(node.textContent.trim()))