From c993fa11be43dc7745da193a86cf384b0f0cee73 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 29 Nov 2022 11:39:23 +0800 Subject: [PATCH] Add stack-overflow content-handler --- packages/content-handler/src/index.ts | 2 + .../src/websites/stack-overflow-handler.ts | 93 +++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 packages/content-handler/src/websites/stack-overflow-handler.ts diff --git a/packages/content-handler/src/index.ts b/packages/content-handler/src/index.ts index e210d025d..b04d997b9 100644 --- a/packages/content-handler/src/index.ts +++ b/packages/content-handler/src/index.ts @@ -29,6 +29,7 @@ import { parseHTML } from 'linkedom' import { CooperPressHandler } from './newsletters/cooper-press-handler' import { HeyWorldHandler } from './newsletters/hey-world-handler' import { Browser } from 'puppeteer-core' +import { StackOverflowHandler } from './websites/stack-overflow-handler' const validateUrlString = (url: string) => { const u = new URL(url) @@ -64,6 +65,7 @@ const contentHandlers: ContentHandler[] = [ new MorningBrewHandler(), new BloombergNewsletterHandler(), new SubstackHandler(), + new StackOverflowHandler(), ] const newsletterHandlers: ContentHandler[] = [ diff --git a/packages/content-handler/src/websites/stack-overflow-handler.ts b/packages/content-handler/src/websites/stack-overflow-handler.ts new file mode 100644 index 000000000..e4c5b9c15 --- /dev/null +++ b/packages/content-handler/src/websites/stack-overflow-handler.ts @@ -0,0 +1,93 @@ +import { ContentHandler } from '../content-handler' + +export class StackOverflowHandler extends ContentHandler { + constructor() { + super() + this.name = 'stackoverflow' + } + + parseVotes(element: Element, dom: Document, title: string) { + const votes = element.querySelector(`div[itemprop='upvoteCount']`) + if (votes) { + const newVotes = dom.createElement('div') + newVotes.innerHTML = `

${title}: ${votes.innerHTML}votes

` + element.prepend(newVotes) + } + } + + parseComments(element: Element, dom: Document) { + // comments + const commentsDiv = element.querySelector(`.comments`) + if (commentsDiv) { + const comments = commentsDiv.querySelectorAll(`.comment`) + if (comments.length > 0) { + const count = element.querySelector( + `span[itemprop='commentCount']` + )?.textContent + + const newComments = dom.createElement('div') + newComments.innerHTML = `

${ + count ? count + ' Comments' : 'Comment' + }

` + + comments.forEach((comment) => { + const author = comment.querySelector(`.comment-user`) + const text = comment.querySelector(`.comment-copy`)?.textContent + const authorHref = author?.getAttribute('href') + const date = comment.querySelector(`.relativetime-clean`)?.textContent + const link = comment + .querySelector(`.comment-link`) + ?.getAttribute('href') + if (author && text && authorHref && date && link) { + const newComment = dom.createElement('p') + newComment.innerHTML = `${author.innerHTML}: ${text} - ${date}` + newComments.appendChild(newComment) + } + }) + commentsDiv.parentNode?.replaceChild(newComments, commentsDiv) + } + } + + // remove comment count + element.querySelector(`span[itemprop='commentCount']`)?.remove() + } + + shouldPreParse(url: string, dom: Document): boolean { + return new URL(url).hostname.endsWith('stackoverflow.com') + } + + async preParse(url: string, dom: Document): Promise { + const mainEntity = dom.querySelector(`div[itemprop='mainEntity']`) + if (mainEntity) { + const question = mainEntity.querySelector('.question') + if (question) { + this.parseVotes(question, dom, 'Question') + this.parseComments(question, dom) + } + + const answersDiv = mainEntity.querySelector('#answers') + if (answersDiv) { + const count = mainEntity.querySelector( + `span[itemprop='answerCount']` + )?.textContent + const newAnswers = dom.createElement('div') + newAnswers.innerHTML = `

${ + count ? count + ' Answers' : 'Answer' + }

` + + const answers = answersDiv.querySelectorAll(`.answer`) + answers.forEach((answer) => { + const title = answer.classList.contains('accepted-answer') + ? 'Accepted Answer' + : 'Answer' + this.parseVotes(answer, dom, title) + this.parseComments(answer, dom) + newAnswers.appendChild(answer) + }) + answersDiv.replaceChildren(newAnswers) + } + } + + return Promise.resolve(dom) + } +}