get published date from time element

This commit is contained in:
Hongbo Wu
2023-09-28 17:14:17 +08:00
parent c9b178ca5c
commit 45b7c2b619
4 changed files with 13 additions and 5 deletions

View File

@ -1081,6 +1081,18 @@ Readability.prototype = {
}
// we don't want to check for dates in the URL's
if (node.tagName.toLowerCase() === 'a') return
// get the datetime from time element
if (node.tagName.toLowerCase() === 'time') {
const datetime = node.getAttribute('datetime')
if (datetime) {
const date = new Date(datetime)
if (!isNaN(date)) {
this._articlePublishedDate = date
return true
}
}
}
// Searching for the real date in the text content
const content = node.textContent.trim()
let dateFound

View File

@ -4,7 +4,7 @@
"dir": null,
"excerpt": "The Sept. 27, 2022 episode of “The Ezra Klein Show”",
"siteName": "fakehost",
"siteIcon": "/vi-assets/static-assets/favicon-d2483f10ef688e6f89e23806b9700298.ico",
"siteIcon": "http://fakehost/vi-assets/static-assets/favicon-d2483f10ef688e6f89e23806b9700298.ico",
"previewImage": "https://static01.nyt.com/newsgraphics/images/icons/defaultPromoCrop.png",
"publishedDate": "2022-09-27T16:25:17.221Z",
"language": "English",

View File

@ -5,8 +5,6 @@
<article id="story">
<header>
<p> The Ezra Klein Show </p>
<p><time datetime="2022-09-27T12:25:17-04:00">Sept. 27, 2022</time>
</p>
</header>
<section name="articleBody">
<div>

View File

@ -30,8 +30,6 @@
</figcaption>
</figure>
</div>
<p><time datetime="2022-10-28T05:00:25-04:00"><span>Oct. 28, 2022</span></time>
</p>
</header>
<section name="articleBody">
<div>