return published date if the class name is omnivore-published-date which we added when we scraped the article
This commit is contained in:
@ -652,7 +652,14 @@ async function retrieveHtml(page, logRecord) {
|
||||
document.getElementById('px-block-form-wrapper')) {
|
||||
return 'IS_BLOCKED'
|
||||
}
|
||||
|
||||
if (create_time) {
|
||||
// create_time is a global variable set by WeChat when rendering the page
|
||||
const date = new Date(create_time * 1000);
|
||||
const dateNode = document.createElement('div');
|
||||
dateNode.className = 'omnivore-published-date';
|
||||
dateNode.innerHTML = date.toLocaleString();
|
||||
document.body.appendChild(dateNode);
|
||||
}
|
||||
return document.documentElement.outerHTML;
|
||||
}, iframes);
|
||||
logRecord.puppeteerSuccess = true;
|
||||
|
||||
@ -1055,7 +1055,10 @@ Readability.prototype = {
|
||||
_checkPublishedDate: function (node, matchString) {
|
||||
// Skipping meta tags
|
||||
if (node.tagName.toLowerCase() === 'meta') return
|
||||
|
||||
// return published date if the class name is 'omnivore-published-date' which we added when we scraped the article
|
||||
if (node.className === 'omnivore-published-date' && this._isValidPublishedDate(node.textContent)) {
|
||||
return new Date(node.textContent);
|
||||
}
|
||||
// Searching for the real date in the text content
|
||||
let dateRegExpFound = this.REGEXPS.DATES_REGEXPS.find(regexp => regexp.test(node.textContent.trim()))
|
||||
dateRegExpFound && (dateRegExpFound = dateRegExpFound.exec(node.textContent.trim()))
|
||||
|
||||
Reference in New Issue
Block a user