From 244fb4ccb53960b542eabc02d72e2e9b6bf4385f Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 26 Jun 2023 16:40:14 +0800 Subject: [PATCH] fix: removing node with background image --- packages/puppeteer-parse/index.js | 4 +- .../readabilityjs/test/generate-testcase.js | 4 +- .../expected-metadata.json | 11 + .../digital-garden-docs/expected.html | 52 + .../digital-garden-docs/source.html | 1259 +++++++++++++++++ .../test-pages/digital-garden-docs/url.txt | 1 + 6 files changed, 1327 insertions(+), 4 deletions(-) create mode 100644 packages/readabilityjs/test/test-pages/digital-garden-docs/expected-metadata.json create mode 100644 packages/readabilityjs/test/test-pages/digital-garden-docs/expected.html create mode 100644 packages/readabilityjs/test/test-pages/digital-garden-docs/source.html create mode 100644 packages/readabilityjs/test/test-pages/digital-garden-docs/url.txt diff --git a/packages/puppeteer-parse/index.js b/packages/puppeteer-parse/index.js index 92b273331..c172b546a 100644 --- a/packages/puppeteer-parse/index.js +++ b/packages/puppeteer-parse/index.js @@ -724,10 +724,10 @@ async function retrieveHtml(page, logRecord) { // Replacing element only of there are no content inside, b/c might remove important div with content. // Article example: http://www.josiahzayner.com/2017/01/genetic-designer-part-i.html // DIV with class "content-inner" has `url("https://resources.blogblog.com/blogblog/data/1kt/travel/bg_container.png")` background image. - if (el.innerHTML.length < 25) { + if (!el.textContent) { const img = document.createElement('img'); img.src = matchedSRC[1]; - el && el.parentNode && el.parentNode.removeChild(el); + el && el.parentNode && el.parentNode.replaceChild(img, el); } } } diff --git a/packages/readabilityjs/test/generate-testcase.js b/packages/readabilityjs/test/generate-testcase.js index 90a5525fa..05a978ca0 100644 --- a/packages/readabilityjs/test/generate-testcase.js +++ b/packages/readabilityjs/test/generate-testcase.js @@ -186,10 +186,10 @@ async function fetchSource(url, callbackFn) { // Replacing element only of there are no content inside, b/c might remove important div with content. // Article example: http://www.josiahzayner.com/2017/01/genetic-designer-part-i.html // DIV with class "content-inner" has `url("https://resources.blogblog.com/blogblog/data/1kt/travel/bg_container.png")` background image. - if (el.innerHTML.length < 25) { + if (!el.textContent) { const img = document.createElement('img'); img.src = matchedSRC[1]; - el && el.parentNode && el.parentNode.removeChild(el); + el && el.parentNode && el.parentNode.replaceChild(img, el); } } } diff --git a/packages/readabilityjs/test/test-pages/digital-garden-docs/expected-metadata.json b/packages/readabilityjs/test/test-pages/digital-garden-docs/expected-metadata.json new file mode 100644 index 000000000..c88096dbe --- /dev/null +++ b/packages/readabilityjs/test/test-pages/digital-garden-docs/expected-metadata.json @@ -0,0 +1,11 @@ +{ + "title": "Digital Garden Overview", + "byline": null, + "dir": null, + "excerpt": "The Obsidian Digital Garden Plugin is a free and open source publishing tool for Obsidian.", + "siteName": "fakehost", + "siteIcon": "http://fakehost/favicon.ico", + "publishedDate": null, + "language": "English", + "readerable": false +} diff --git a/packages/readabilityjs/test/test-pages/digital-garden-docs/expected.html b/packages/readabilityjs/test/test-pages/digital-garden-docs/expected.html new file mode 100644 index 000000000..f4215097d --- /dev/null +++ b/packages/readabilityjs/test/test-pages/digital-garden-docs/expected.html @@ -0,0 +1,52 @@ +
+
+
+
+

🏡

+

Obsidian Digital Garden

+

The Obsidian Digital Garden Plugin is a free and open source publishing tool for Obsidian.

+

Publish your notes directly from Obsidian to the internet. While feature packed, it is highly configurable and hackable. Enable and disable features on a per-note basis. Use it as a full fledged digital garden or as a simple note sharing solution.

+

👉 Getting Started
👉 Features +

+

Examples

+

This site is itself powered by the Digital Garden plugin.

+
+
+

Edav Garden +

+
+
+

ajy.co +

+
+
+

Hermitage +

+
+
+

That Other Dev +

+
+
+

Razvan Andrei Surdu +

+
+
+

IceWind.Quest +

+
+
+

hxhc +

+
+
+

notes.ole.dev +

+
+
+
+

+

+
+
+
\ No newline at end of file diff --git a/packages/readabilityjs/test/test-pages/digital-garden-docs/source.html b/packages/readabilityjs/test/test-pages/digital-garden-docs/source.html new file mode 100644 index 000000000..7c5a644a3 --- /dev/null +++ b/packages/readabilityjs/test/test-pages/digital-garden-docs/source.html @@ -0,0 +1,1259 @@ + + + + + Digital Garden Overview + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ +
+ + +
+
+ +
+ + +
+
+
+
+
+
+

+ 🏡 +

+

+ Obsidian Digital Garden +

+

+ The Obsidian Digital Garden Plugin is a free and open source publishing tool for Obsidian. +

+

+ Publish your notes directly from Obsidian to the internet. While feature packed, it is highly configurable and hackable. Enable and disable features on a per-note basis. Use it as a full fledged digital garden or as a simple note sharing solution. +

+

+ 👉 Getting Started
+ 👉 Features +

+
+ +
+

+ Examples +

+

+ This site is itself powered by the Digital Garden plugin. +

+

+ Sites other people have created +

+
+
+ Edav Garden +
+
+ ajy.co +
+
+ Hermitage +
+
+ That Other Dev +
+
+ Razvan Andrei Surdu +
+
+ IceWind.Quest +
+
+ hxhc +
+
+ notes.ole.dev +
+
+
+ +
+
+ + + + + + + diff --git a/packages/readabilityjs/test/test-pages/digital-garden-docs/url.txt b/packages/readabilityjs/test/test-pages/digital-garden-docs/url.txt new file mode 100644 index 000000000..4140c4f6b --- /dev/null +++ b/packages/readabilityjs/test/test-pages/digital-garden-docs/url.txt @@ -0,0 +1 @@ +https://dg-docs.ole.dev/ \ No newline at end of file