From f77aae9810988ab8aaf61b209bb470bd8a364491 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 30 Mar 2023 21:55:41 +0800 Subject: [PATCH] Remove \n, extra spaces from and trim author --- packages/readabilityjs/Readability.js | 3 ++- .../test/test-pages/fiercepharma/expected-metadata.json | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index 5902ae973..5268ae574 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -3022,7 +3022,8 @@ Readability.prototype = { var textContent = articleContent.textContent; return { title: this._articleTitle, - byline: author, + // remove \n and extra spaces and trim the string + byline: author ? author.replace(/\n/g, ' ').replace(/\s+/g, ' ').trim() : null, dir: this._articleDir, content: this._serializer(articleContent), textContent: textContent, diff --git a/packages/readabilityjs/test/test-pages/fiercepharma/expected-metadata.json b/packages/readabilityjs/test/test-pages/fiercepharma/expected-metadata.json index 44d47041b..c15549daf 100644 --- a/packages/readabilityjs/test/test-pages/fiercepharma/expected-metadata.json +++ b/packages/readabilityjs/test/test-pages/fiercepharma/expected-metadata.json @@ -1,6 +1,6 @@ { "title": "Novavax, eyeing the COVID 'vaccine hesitant' and kids, unveils new education campaigns as Nuvaxovid nears US finish line", - "byline": " 03:23pm", + "byline": "03:23pm", "dir": null, "excerpt": "Pfizer, Moderna and Johnson & Johnson were quickest off the mark in getting COVID vaccines into American arms, but Novavax is hoping to add another pandemic vaccine to the U.S. | Pfizer, Moderna and Johnson & Johnson were quickest off the mark in getting COVID vaccines into American arms, but Novavax is hoping to add another pandemic vaccine to the U.S. mix soon—and it's pushing new campaigns to get the word out.", "siteName": "Fierce Pharma",