From 94aead998809ad40dc1d50963cefb15af16d35bf Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 16 May 2023 14:46:44 +0800 Subject: [PATCH] Add content handler for weixin.qq.com --- packages/content-handler/src/index.ts | 2 ++ .../src/websites/weixin-qq-handler.ts | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 packages/content-handler/src/websites/weixin-qq-handler.ts diff --git a/packages/content-handler/src/index.ts b/packages/content-handler/src/index.ts index 2745ee8ba..c8f5d7baf 100644 --- a/packages/content-handler/src/index.ts +++ b/packages/content-handler/src/index.ts @@ -33,6 +33,7 @@ import { ScrapingBeeHandler } from './websites/scrapingBee-handler' import { StackOverflowHandler } from './websites/stack-overflow-handler' import { TDotCoHandler } from './websites/t-dot-co-handler' import { TwitterHandler } from './websites/twitter-handler' +import { WeixinQqHandler } from './websites/weixin-qq-handler' import { WikipediaHandler } from './websites/wikipedia-handler' import { YoutubeHandler } from './websites/youtube-handler' @@ -75,6 +76,7 @@ const contentHandlers: ContentHandler[] = [ new StackOverflowHandler(), new EnergyWorldHandler(), new PipedVideoHandler(), + new WeixinQqHandler(), ] const newsletterHandlers: ContentHandler[] = [ diff --git a/packages/content-handler/src/websites/weixin-qq-handler.ts b/packages/content-handler/src/websites/weixin-qq-handler.ts new file mode 100644 index 000000000..feff90955 --- /dev/null +++ b/packages/content-handler/src/websites/weixin-qq-handler.ts @@ -0,0 +1,32 @@ +import { ContentHandler } from '../content-handler' + +export class WeixinQqHandler extends ContentHandler { + constructor() { + super() + this.name = 'Weixin QQ' + } + + shouldPreParse(url: string, dom: Document): boolean { + return new URL(url).hostname.endsWith('weixin.qq.com') + } + + async preParse(url: string, dom: Document): Promise { + // This replace the class name of the article info to preserve the block + dom + .querySelector('.rich_media_meta_list') + ?.setAttribute('class', '_omnivore_rich_media_meta_list') + + // This removes the title + dom.querySelector('.rich_media_title')?.remove() + + // This removes the profile info + dom.querySelector('.profile_container')?.remove() + + // This removes the footer + dom.querySelector('#content_bottom_area')?.remove() + dom.querySelector('.rich_media_area_extra')?.remove() + dom.querySelector('#js_pc_qr_code')?.remove() + + return Promise.resolve(dom) + } +}