fix code blocks not formatted correctly in articles from wechat official accounts
This commit is contained in:
@ -307,23 +307,28 @@ export const parsePreparedContent = async (
|
||||
// Format code blocks
|
||||
// TODO: we probably want to move this type of thing
|
||||
// to the handlers, and have some concept of postHandle
|
||||
const codeBlocks = newDocumentElement.querySelectorAll<HTMLElement>(
|
||||
'pre[class^="prism-"], pre[class^="language-"], pre[class^="code-snippet"], code'
|
||||
const codeBlocks = newDocumentElement.querySelectorAll(
|
||||
'pre[class^="prism-"], pre[class^="language-"], code'
|
||||
)
|
||||
if (codeBlocks.length > 0) {
|
||||
codeBlocks.forEach((e) => {
|
||||
const att = hljs.highlightAuto(e.innerText)
|
||||
const code = document.createElement('code')
|
||||
const langClass =
|
||||
`hljs language-${att.language}` +
|
||||
(att.second_best?.language
|
||||
? ` language-${att.second_best?.language}`
|
||||
: '')
|
||||
code.setAttribute('class', langClass)
|
||||
code.innerHTML = att.value
|
||||
e.replaceWith(code)
|
||||
})
|
||||
}
|
||||
codeBlocks.forEach((e) => {
|
||||
if (!e.textContent) {
|
||||
return e.parentNode?.removeChild(e)
|
||||
}
|
||||
|
||||
// replace <br> or <p> or </p> with \n
|
||||
e.innerHTML = e.innerHTML.replace(/<(br|p|\/p)>/g, '\n')
|
||||
|
||||
const att = hljs.highlightAuto(e.textContent)
|
||||
const code = document.createElement('code')
|
||||
const langClass =
|
||||
`hljs language-${att.language}` +
|
||||
(att.second_best?.language
|
||||
? ` language-${att.second_best?.language}`
|
||||
: '')
|
||||
code.setAttribute('class', langClass)
|
||||
code.innerHTML = att.value
|
||||
e.replaceWith(code)
|
||||
})
|
||||
|
||||
highlightData = findEmbeddedHighlight(newDocumentElement)
|
||||
|
||||
|
||||
@ -206,7 +206,7 @@ Readability.prototype = {
|
||||
unlikelyCandidates: /\bad\b|ai2html|banner|breadcrumbs|breadcrumb|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager(?!ow)|popup|yom-remote|copyright|keywords|outline|infinite-list|beta|recirculation|site-index|hide-for-print|post-end-share-cta|post-end-cta-full|post-footer|post-head|post-tag|li-date|main-navigation|programtic-ads|outstream_article|hfeed|comment-holder|back-to-top|show-up-next|onward-journey|topic-tracker|list-nav|block-ad-entity|adSpecs|gift-article-button|modal-title|in-story-masthead|share-tools|standard-dock|expanded-dock|margins-h|subscribe-dialog|icon|bumped|dvz-social-media-buttons|post-toc|mobile-menu|mobile-navbar|tl_article_header|mvp(-post)*-(add-story|soc(-mob)*-wrap)|w-condition-invisible|rich-text-block main w-richtext|rich-text-block_ataglance at-a-glance test w-richtext|PostsPage-commentsSection|hide-text/i,
|
||||
// okMaybeItsACandidate: /and|article(?!-breadcrumb)|body|column|content|main|shadow|post-header/i,
|
||||
get okMaybeItsACandidate() {
|
||||
return new RegExp(`and|(?<!${this.articleNegativeLookAheadCandidates.source})article(?!-(${this.articleNegativeLookBehindCandidates.source}))|body|column|content|^(?!main-navigation|main-header)main|shadow|post-header|hfeed site|blog-posts hfeed|container-banners|menu-opacity|header-with-anchor-widget|commentOnSelection`, 'i')
|
||||
return new RegExp(`and|(?<!${this.articleNegativeLookAheadCandidates.source})article(?!-(${this.articleNegativeLookBehindCandidates.source}))|body|column|content|^(?!main-navigation|main-header)main|shadow|post-header|hfeed site|blog-posts hfeed|container-banners|menu-opacity|header-with-anchor-widget|commentOnSelection|highlight--with-header`, 'i')
|
||||
},
|
||||
|
||||
positive: /article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story|tweet(-\w+)?|instagram|image|container-banners|player|commentOnSelection/i,
|
||||
@ -261,7 +261,7 @@ Readability.prototype = {
|
||||
"SUP", "TEXTAREA", "TIME", "VAR", "WBR"
|
||||
],
|
||||
|
||||
// These are the classes that readability sets itself.
|
||||
// These are the classes that we want to keep.
|
||||
CLASSES_TO_PRESERVE: [
|
||||
"page", "twitter-tweet", "tweet-placeholder", "instagram-placeholder", "morning-brew-markets", "prism-code"
|
||||
],
|
||||
@ -3082,6 +3082,7 @@ Readability.prototype = {
|
||||
this._removeScripts(this._doc);
|
||||
|
||||
this._prepDocument();
|
||||
console.log(this._doc.body.innerHTML);
|
||||
|
||||
var metadata = this._getArticleMetadata(jsonLd);
|
||||
this._articleTitle = metadata.title;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user