Add stack-overflow content-handler

This commit is contained in:
Hongbo Wu
2022-11-29 11:39:23 +08:00
parent 4f1b4e523c
commit c993fa11be
2 changed files with 95 additions and 0 deletions

View File

@ -29,6 +29,7 @@ import { parseHTML } from 'linkedom'
import { CooperPressHandler } from './newsletters/cooper-press-handler'
import { HeyWorldHandler } from './newsletters/hey-world-handler'
import { Browser } from 'puppeteer-core'
import { StackOverflowHandler } from './websites/stack-overflow-handler'
const validateUrlString = (url: string) => {
const u = new URL(url)
@ -64,6 +65,7 @@ const contentHandlers: ContentHandler[] = [
new MorningBrewHandler(),
new BloombergNewsletterHandler(),
new SubstackHandler(),
new StackOverflowHandler(),
]
const newsletterHandlers: ContentHandler[] = [

View File

@ -0,0 +1,93 @@
import { ContentHandler } from '../content-handler'
export class StackOverflowHandler extends ContentHandler {
constructor() {
super()
this.name = 'stackoverflow'
}
parseVotes(element: Element, dom: Document, title: string) {
const votes = element.querySelector(`div[itemprop='upvoteCount']`)
if (votes) {
const newVotes = dom.createElement('div')
newVotes.innerHTML = `<h3>${title}: ${votes.innerHTML}votes</h3>`
element.prepend(newVotes)
}
}
parseComments(element: Element, dom: Document) {
// comments
const commentsDiv = element.querySelector(`.comments`)
if (commentsDiv) {
const comments = commentsDiv.querySelectorAll(`.comment`)
if (comments.length > 0) {
const count = element.querySelector(
`span[itemprop='commentCount']`
)?.textContent
const newComments = dom.createElement('div')
newComments.innerHTML = `<h3>${
count ? count + ' Comments' : 'Comment'
}</h3>`
comments.forEach((comment) => {
const author = comment.querySelector(`.comment-user`)
const text = comment.querySelector(`.comment-copy`)?.textContent
const authorHref = author?.getAttribute('href')
const date = comment.querySelector(`.relativetime-clean`)?.textContent
const link = comment
.querySelector(`.comment-link`)
?.getAttribute('href')
if (author && text && authorHref && date && link) {
const newComment = dom.createElement('p')
newComment.innerHTML = `<a href="${authorHref}">${author.innerHTML}</a>: ${text} - <a href="${link}">${date}</a>`
newComments.appendChild(newComment)
}
})
commentsDiv.parentNode?.replaceChild(newComments, commentsDiv)
}
}
// remove comment count
element.querySelector(`span[itemprop='commentCount']`)?.remove()
}
shouldPreParse(url: string, dom: Document): boolean {
return new URL(url).hostname.endsWith('stackoverflow.com')
}
async preParse(url: string, dom: Document): Promise<Document> {
const mainEntity = dom.querySelector(`div[itemprop='mainEntity']`)
if (mainEntity) {
const question = mainEntity.querySelector('.question')
if (question) {
this.parseVotes(question, dom, 'Question')
this.parseComments(question, dom)
}
const answersDiv = mainEntity.querySelector('#answers')
if (answersDiv) {
const count = mainEntity.querySelector(
`span[itemprop='answerCount']`
)?.textContent
const newAnswers = dom.createElement('div')
newAnswers.innerHTML = `<h3>${
count ? count + ' Answers' : 'Answer'
}</h3>`
const answers = answersDiv.querySelectorAll(`.answer`)
answers.forEach((answer) => {
const title = answer.classList.contains('accepted-answer')
? 'Accepted Answer'
: 'Answer'
this.parseVotes(answer, dom, title)
this.parseComments(answer, dom)
newAnswers.appendChild(answer)
})
answersDiv.replaceChildren(newAnswers)
}
}
return Promise.resolve(dom)
}
}