From 2bf932cbdc06dbdc52ccc5ea5dc30f1c1e3fd4f5 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 18 May 2023 16:50:49 +0800 Subject: [PATCH 1/6] fix: &npsp; breaks highlight --- packages/api/src/utils/parser.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/api/src/utils/parser.ts b/packages/api/src/utils/parser.ts index a3046d94f..a8df379ba 100644 --- a/packages/api/src/utils/parser.ts +++ b/packages/api/src/utils/parser.ts @@ -501,6 +501,7 @@ export const highlightTranslators: TranslatorConfigObject = { return { prefix: '==', postfix: '==', + content: node.innerHTML.trim(), } }, } From 48554be76d4e817557deb28942d4ca37d8dadcb3 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 18 May 2023 21:04:27 +0800 Subject: [PATCH 2/6] fix: wrap link with == --- packages/api/src/utils/highlightGenerator.ts | 7 +- packages/api/src/utils/parser.ts | 79 +++++++++++++++++++- 2 files changed, 81 insertions(+), 5 deletions(-) diff --git a/packages/api/src/utils/highlightGenerator.ts b/packages/api/src/utils/highlightGenerator.ts index e9c8e05b5..9b21c14df 100644 --- a/packages/api/src/utils/highlightGenerator.ts +++ b/packages/api/src/utils/highlightGenerator.ts @@ -393,7 +393,10 @@ export function makeHighlightNodeAttributes( let startingTextNodeIndex = textNodeIndex let quote = '' - while (highlightTextEnd > textNodes[startingTextNodeIndex].startIndex) { + while ( + startingTextNodeIndex < textNodes.length && + highlightTextEnd > textNodes[startingTextNodeIndex].startIndex + ) { const { node, textPartsToHighlight, isParagraphStart } = fillHighlight({ textNodes, startingTextNodeIndex, @@ -419,7 +422,7 @@ export function makeHighlightNodeAttributes( isParagraphStart && !i && quote && (quote += '\n') quote += text } - + console.log('quote', quote) const newHighlightSpan = document.createElement('span') newHighlightSpan.setAttribute(highlightIdAttribute, id) newHighlightSpan.appendChild(newTextNode) diff --git a/packages/api/src/utils/parser.ts b/packages/api/src/utils/parser.ts index a8df379ba..5e9a83969 100644 --- a/packages/api/src/utils/parser.ts +++ b/packages/api/src/utils/parser.ts @@ -11,6 +11,7 @@ import { decode } from 'html-entities' import * as jwt from 'jsonwebtoken' import { parseHTML } from 'linkedom' import { NodeHtmlMarkdown, TranslatorConfigObject } from 'node-html-markdown' +import { ElementNode } from 'node-html-markdown/dist/nodes' import { ILike } from 'typeorm' import { promisify } from 'util' import { v4 as uuid } from 'uuid' @@ -494,14 +495,86 @@ export const fetchFavicon = async ( // custom transformer to wrap tags in markdown highlight tags `==` export const highlightTranslators: TranslatorConfigObject = { + /* Link */ + a: ({ node, options, visitor }) => { + const href = node.getAttribute('href') + if (!href) return {} + + // Encodes symbols that can cause problems in markdown + let encodedHref = '' + for (const chr of href) { + switch (chr) { + case '(': + encodedHref += '%28' + break + case ')': + encodedHref += '%29' + break + case '_': + encodedHref += '%5F' + break + case '*': + encodedHref += '%2A' + break + default: + encodedHref += chr + } + } + + const title = node.getAttribute('title') + + let hasHighlight = false + // If the link is a highlight, wrap it in `==` tags + node.childNodes.forEach((child) => { + if ( + child.nodeType === 1 && + (child as ElementNode).getAttribute(highlightIdAttribute) + ) { + hasHighlight = true + return + } + }) + + // Inline link, when possible + // See: https://github.com/crosstype/node-html-markdown/issues/17 + if (node.textContent === href && options.useInlineLinks) + return { + prefix: hasHighlight ? '==' : undefined, + postfix: hasHighlight ? '==' : undefined, + content: `<${encodedHref}>`, + } + + const prefix = hasHighlight ? '==[' : '[' + const postfix = + ']' + + (!options.useLinkReferenceDefinitions + ? `(${encodedHref}${title ? ` "${title}"` : ''})` + : `[${visitor.addOrGetUrlDefinition(encodedHref)}]`) + + `${hasHighlight ? '==' : ''}` + + return { + postprocess: ({ content }) => content.replace(/(?:\r?\n)+/g, ' '), + childTranslators: visitor.instance.aTagTranslators, + prefix, + postfix, + } + }, + span: ({ node }) => { const id = node.getAttribute(highlightIdAttribute) if (!id) return {} + const hasLeadingSpace = node.innerHTML.startsWith(' ') + const hasTrailingSpace = node.innerHTML.endsWith(' ') + // remove the leading and trailing space + const content = node.innerHTML.trim() + const prefix = hasLeadingSpace ? ' ==' : '==' + const postfix = hasTrailingSpace ? '== ' : '==' + return { - prefix: '==', - postfix: '==', - content: node.innerHTML.trim(), + prefix, + postfix, + content, } }, } From 0adab7ad23b89cc25f577f3d9a090b134d5eef02 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 18 May 2023 21:07:51 +0800 Subject: [PATCH 3/6] fix: sentry report on unable to find highlight --- packages/api/src/utils/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/api/src/utils/parser.ts b/packages/api/src/utils/parser.ts index 5e9a83969..ed57f5eb7 100644 --- a/packages/api/src/utils/parser.ts +++ b/packages/api/src/utils/parser.ts @@ -625,7 +625,7 @@ export const htmlToHighlightedMarkdown = ( document ) } catch (err) { - console.error(err) + console.log(err) } }) html = document.documentElement.outerHTML From 331ddf69d45fbb87163a9e9a0a583e83454bc04f Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 18 May 2023 21:12:44 +0800 Subject: [PATCH 4/6] remove debugging log --- packages/api/src/utils/highlightGenerator.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/api/src/utils/highlightGenerator.ts b/packages/api/src/utils/highlightGenerator.ts index 9b21c14df..0146e4251 100644 --- a/packages/api/src/utils/highlightGenerator.ts +++ b/packages/api/src/utils/highlightGenerator.ts @@ -422,7 +422,7 @@ export function makeHighlightNodeAttributes( isParagraphStart && !i && quote && (quote += '\n') quote += text } - console.log('quote', quote) + const newHighlightSpan = document.createElement('span') newHighlightSpan.setAttribute(highlightIdAttribute, id) newHighlightSpan.appendChild(newTextNode) From 87f55eabf6a3056f6c8e49e67e3028f6bf749824 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Fri, 19 May 2023 10:54:43 +0800 Subject: [PATCH 5/6] fix: sentry report on node is null --- packages/api/src/utils/highlightGenerator.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/api/src/utils/highlightGenerator.ts b/packages/api/src/utils/highlightGenerator.ts index 0146e4251..58bcf325d 100644 --- a/packages/api/src/utils/highlightGenerator.ts +++ b/packages/api/src/utils/highlightGenerator.ts @@ -66,6 +66,8 @@ function getTextNodesBetween(rootNode: Node, startNode: Node, endNode: Node) { } function getTextNodes(node: Node) { + if (!node) return + if (node == startNode) { pastStartNode = true } @@ -384,7 +386,7 @@ export function makeHighlightNodeAttributes( patch: string, document: Document ) { - const rootNode = document.documentElement + const rootNode = document.getRootNode() const allArticleNodes = getTextNodesBetween(rootNode, rootNode, rootNode) const { highlightTextStart, highlightTextEnd, textNodes, textNodeIndex } = From 4cc95479355156f52b47db8512f281462e9b9d1a Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Fri, 19 May 2023 16:55:50 +0800 Subject: [PATCH 6/6] fix: fallback to html if failed to convert html to markdown --- packages/api/src/resolvers/article/index.ts | 10 +++++++--- packages/api/src/utils/parser.ts | 17 +++++++++++++++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/packages/api/src/resolvers/article/index.ts b/packages/api/src/resolvers/article/index.ts index 0d46ef642..157e4b489 100644 --- a/packages/api/src/resolvers/article/index.ts +++ b/packages/api/src/resolvers/article/index.ts @@ -948,9 +948,13 @@ export const searchResolver = authorized< if (params.includeContent && r.content) { // convert html to the requested format const format = params.format || ArticleFormat.Html - const converter = contentConverter(format) - if (converter) { - r.content = converter(r.content, r.highlights) + try { + const converter = contentConverter(format) + if (converter) { + r.content = converter(r.content, r.highlights) + } + } catch (error) { + console.log('Error converting content', error) } } diff --git a/packages/api/src/utils/parser.ts b/packages/api/src/utils/parser.ts index ed57f5eb7..d3f05c1d2 100644 --- a/packages/api/src/utils/parser.ts +++ b/packages/api/src/utils/parser.ts @@ -609,11 +609,24 @@ export const htmlToHighlightedMarkdown = ( html: string, highlights?: Highlight[] ): string => { - if (!highlights) { + if (!highlights || highlights.length == 0) { + return nhm.translate(/* html */ html) + } + + let document: Document + + try { + document = parseHTML(html).document + + if (!document || !document.documentElement) { + // the html is invalid + throw new Error('Invalid html content') + } + } catch (err) { + console.log(err) return nhm.translate(/* html */ html) } - const document = parseHTML(html).document // wrap highlights in special tags highlights .filter((h) => h.type == 'HIGHLIGHT' && h.patch)