From 9d223bb4e237f0ba7c62f652a9c51e6cd4ed5b46 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 28 Sep 2022 16:31:35 +0800 Subject: [PATCH] Better handling of NYT podcast transcripts --- packages/readabilityjs/Readability.js | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index 1c653ef36..dcc2db4a4 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -171,7 +171,7 @@ Readability.prototype = { // Readability-readerable.js. Please keep both copies in sync. articleNegativeLookBehindCandidates: /breadcrumbs|breadcrumb|utils|trilist/i, articleNegativeLookAheadCandidates: /outstream(.?)_|sub(.?)_|m_|omeda-promo-|in-article-advert|block-ad-.*/i, - unlikelyCandidates: /\bad\b|ai2html|banner|breadcrumbs|breadcrumb|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager(?!ow)|popup|yom-remote|copyright|keywords|outline|infinite-list|beta|recirculation|site-index|hide-for-print|post-end-share-cta|post-end-cta-full|post-footer|main-navigation|programtic-ads|outstream_article|hfeed|comment-holder|back-to-top|show-up-next|onward-journey|topic-tracker|list-nav|block-ad-entity|adSpecs/i, + unlikelyCandidates: /\bad\b|ai2html|banner|breadcrumbs|breadcrumb|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager(?!ow)|popup|yom-remote|copyright|keywords|outline|infinite-list|beta|recirculation|site-index|hide-for-print|post-end-share-cta|post-end-cta-full|post-footer|main-navigation|programtic-ads|outstream_article|hfeed|comment-holder|back-to-top|show-up-next|onward-journey|topic-tracker|list-nav|block-ad-entity|adSpecs|gift-article-button|modal-title|in-story-masthead|share-tools|standard-dock|margins-h/i, // okMaybeItsACandidate: /and|article(?!-breadcrumb)|body|column|content|main|shadow|post-header/i, get okMaybeItsACandidate() { return new RegExp(`and|(?