From f2b3a66b72c17411fd96e31d52ae3ed5ac72c938 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 18 Jul 2024 11:18:51 +0800 Subject: [PATCH] remove metadata and cover image from content --- .../src/websites/weixin-qq-handler.ts | 19 ++++++++++++++----- packages/readabilityjs/Readability.js | 2 ++ 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/packages/content-handler/src/websites/weixin-qq-handler.ts b/packages/content-handler/src/websites/weixin-qq-handler.ts index 454728655..8dac19e51 100644 --- a/packages/content-handler/src/websites/weixin-qq-handler.ts +++ b/packages/content-handler/src/websites/weixin-qq-handler.ts @@ -27,16 +27,25 @@ export class WeixinQqHandler extends ContentHandler { metaNode.setAttribute('content', publishTimeISO) dom.querySelector('head')?.appendChild(metaNode) } - // This replace the class name of the article info to preserve the block - dom - .querySelector('.rich_media_meta_list') - ?.setAttribute('class', '_omnivore_rich_media_meta_list') - // This removes the title + const author = ( + dom.querySelector('#js_author_name') || dom.querySelector('#js_name') + )?.textContent?.trim() + if (author) { + const authorNode = dom.createElement('meta') + authorNode.setAttribute('name', 'author') + authorNode.setAttribute('content', author) + dom.querySelector('head')?.appendChild(authorNode) + } + + // This removes the title, metadata and cover image dom.querySelector('.rich_media_title')?.remove() + dom.querySelector('.rich_media_meta_list')?.remove() + dom.querySelector('#js_row_immersive_cover_img')?.remove() // This removes the profile info dom.querySelector('.profile_container')?.remove() + dom.querySelector('.profile_card_container')?.remove() // This removes the footer dom.querySelector('#content_bottom_area')?.remove() diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index 2d57ba5ce..c97260ed7 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -2004,6 +2004,8 @@ Readability.prototype = { metadata.byline = jsonld.byline || values["dc:creator"] || values["dcterm:creator"] || + values["og:article:author"] || + values["twitter:creator"] || values["author"]; // get description