From 45b7c2b619e64d3095477ef5bb1c908ac379a19a Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 28 Sep 2023 17:14:17 +0800 Subject: [PATCH] get published date from time element --- packages/readabilityjs/Readability.js | 12 ++++++++++++ .../nytimes-podcasts/expected-metadata.json | 2 +- .../test/test-pages/nytimes-podcasts/expected.html | 2 -- .../test/test-pages/nytimes.com/expected.html | 2 -- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index abdbfa8d0..65de68275 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -1081,6 +1081,18 @@ Readability.prototype = { } // we don't want to check for dates in the URL's if (node.tagName.toLowerCase() === 'a') return + // get the datetime from time element + if (node.tagName.toLowerCase() === 'time') { + const datetime = node.getAttribute('datetime') + if (datetime) { + const date = new Date(datetime) + if (!isNaN(date)) { + this._articlePublishedDate = date + return true + } + } + } + // Searching for the real date in the text content const content = node.textContent.trim() let dateFound diff --git a/packages/readabilityjs/test/test-pages/nytimes-podcasts/expected-metadata.json b/packages/readabilityjs/test/test-pages/nytimes-podcasts/expected-metadata.json index 61fe09886..d2bc065e6 100644 --- a/packages/readabilityjs/test/test-pages/nytimes-podcasts/expected-metadata.json +++ b/packages/readabilityjs/test/test-pages/nytimes-podcasts/expected-metadata.json @@ -4,7 +4,7 @@ "dir": null, "excerpt": "The Sept. 27, 2022 episode of “The Ezra Klein Show”", "siteName": "fakehost", - "siteIcon": "/vi-assets/static-assets/favicon-d2483f10ef688e6f89e23806b9700298.ico", + "siteIcon": "http://fakehost/vi-assets/static-assets/favicon-d2483f10ef688e6f89e23806b9700298.ico", "previewImage": "https://static01.nyt.com/newsgraphics/images/icons/defaultPromoCrop.png", "publishedDate": "2022-09-27T16:25:17.221Z", "language": "English", diff --git a/packages/readabilityjs/test/test-pages/nytimes-podcasts/expected.html b/packages/readabilityjs/test/test-pages/nytimes-podcasts/expected.html index 8d79daf8c..725bef094 100644 --- a/packages/readabilityjs/test/test-pages/nytimes-podcasts/expected.html +++ b/packages/readabilityjs/test/test-pages/nytimes-podcasts/expected.html @@ -5,8 +5,6 @@

The Ezra Klein Show

-

-

diff --git a/packages/readabilityjs/test/test-pages/nytimes.com/expected.html b/packages/readabilityjs/test/test-pages/nytimes.com/expected.html index ceb0aa925..9777d24a6 100644 --- a/packages/readabilityjs/test/test-pages/nytimes.com/expected.html +++ b/packages/readabilityjs/test/test-pages/nytimes.com/expected.html @@ -30,8 +30,6 @@
-

-