From 417ed0a4eb7769b5dc10863255161735e47778cc Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 31 May 2022 20:02:54 +0800 Subject: [PATCH] Fetch tweet id from url --- packages/readabilityjs/Readability.js | 27 +- packages/readabilityjs/package.json | 3 +- .../expected-metadata.json | 10 + .../expected.html | 38 + .../substack-michaelshellenberger/source.html | 2000 +++++++++++++++++ .../substack-michaelshellenberger/url.txt | 1 + 6 files changed, 2073 insertions(+), 6 deletions(-) create mode 100644 packages/readabilityjs/test/test-pages/substack-michaelshellenberger/expected-metadata.json create mode 100644 packages/readabilityjs/test/test-pages/substack-michaelshellenberger/expected.html create mode 100644 packages/readabilityjs/test/test-pages/substack-michaelshellenberger/source.html create mode 100644 packages/readabilityjs/test/test-pages/substack-michaelshellenberger/url.txt diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index 33e66d52d..f322a2113 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -22,6 +22,7 @@ var parseSrcset = require('parse-srcset'); var htmlEntities = require('html-entities') +const axios = require("axios"); /** Checks whether an element is a wrapper for tweet */ const hasTweetInChildren = element => { @@ -2204,15 +2205,15 @@ Readability.prototype = { } }, - _createPlaceholders: function (e) { - Array.from(e.getElementsByTagName('a')).forEach(element => { + _createPlaceholders: async function (e) { + for (const element of Array.from(e.getElementsByTagName('a'))) { if (this.isEmbed(element)) { - return; + continue; } // Create tweets placeholders from links - if (element.href.includes('twitter.com')) { + if (element.href.includes('twitter.com') || element.parentNode.className === 'tweet') { const link = element.href; const regex = /(https?:\/\/twitter\.com\/\w+\/status\/)(\d+)/gm; const match = regex.exec(link); @@ -2234,6 +2235,22 @@ Readability.prototype = { if (tweetParent && tweetParent.className.includes('twitter-tweet')) { tweetParent.parentNode.replaceChild(tweet, tweetParent); } + } else if (element.parentNode.className === 'tweet') { + // Create tweets placeholders from classname + try { + const response = await axios.get(link); + const tweetUrl = response.request.res.responseUrl; + const match = regex.exec(tweetUrl); + if (Array.isArray(match) && typeof match[2] === 'string') { + const tweet = this._doc.createElement('div'); + tweet.innerText = 'Tweet placeholder'; + tweet.className = 'tweet-placeholder'; + tweet.setAttribute('data-tweet-id', match[2]); + element.parentNode.replaceChild(tweet, element); + } + } catch (e) { + this.log('Error loading tweet: ', link, e); + } } } @@ -2247,7 +2264,7 @@ Readability.prototype = { this._createInstagramPostPlaceholder(element, match[2]); } } - }); + } Array.from(e.getElementsByTagName('iframe')).forEach(element => { diff --git a/packages/readabilityjs/package.json b/packages/readabilityjs/package.json index ca3df23c8..32a65ff40 100644 --- a/packages/readabilityjs/package.json +++ b/packages/readabilityjs/package.json @@ -30,7 +30,8 @@ "mocha": "^8.2.0", "puppeteer": "^10.1.0", "sinon": "^7.3.2", - "linkedom": "^0.14.9" + "linkedom": "^0.14.9", + "axios": "^0.26.0" }, "dependencies": { "html-entities": "^2.3.2", diff --git a/packages/readabilityjs/test/test-pages/substack-michaelshellenberger/expected-metadata.json b/packages/readabilityjs/test/test-pages/substack-michaelshellenberger/expected-metadata.json new file mode 100644 index 000000000..f41a6b334 --- /dev/null +++ b/packages/readabilityjs/test/test-pages/substack-michaelshellenberger/expected-metadata.json @@ -0,0 +1,10 @@ +{ + "title": "The 2-minute 20-second video that changes everything", + "byline": "Michael Shellenberger", + "dir": null, + "excerpt": "For decades, people have claimed that homelessness is just a\n housing problem. Sure, many also have substance use and mental\n illness issues. But if we just give homeless people their own\n own studio apartments, and decriminalize public camping,\n drugs, and shoplifting, the problem will go away, many\n claimed.", + "siteName": null, + "publishedDate": "2001-05-25T16:00:00.000Z", + "language": "English", + "readerable": true +} diff --git a/packages/readabilityjs/test/test-pages/substack-michaelshellenberger/expected.html b/packages/readabilityjs/test/test-pages/substack-michaelshellenberger/expected.html new file mode 100644 index 000000000..f11abe4e0 --- /dev/null +++ b/packages/readabilityjs/test/test-pages/substack-michaelshellenberger/expected.html @@ -0,0 +1,38 @@ +
+
+
+
+
+ + + + + + + + +
+ +
+
+
+

For decades, people have claimed that homelessness is just a housing problem. Sure, many also have substance use and mental illness issues. But if we just give homeless people their own own studio apartments, and decriminalize public camping, drugs, and shoplifting, the problem will go away, many claimed.

+

That hasn’t happened. Instead, the open drug scenes have worsened. Nationally, drug overdoses and poisonings increased from 17,000 in 2000 to 108,000 in 2021. And California, which pioneered the “Housing First”/decriminalization approach saw its homeless population increase 31% between 2011 and 2020, even as homelessness declined 18% in the rest of the country.

+

I debunked the lies about homelessness in San Fransicko, in hundreds of articles, and on dozens of TV and podcast appearances. But when it comes to educating the public, nothing has been more impactful than the video interviews of homeless people that I’ve conducted over the last few months with my friend Leighton Woodhouse, a documentary filmmaker, as part of my run to become governor of California.

+

Now Leighton has assembled those interviews into a two-minute 20 second video we’ve posted on Twitter. It’s a must-watch. It’s only been on-line for a few hours, and over 130,000 people have seen it.

+

I hope you’ll take a minute to watch it. And, after you do, please consider a donation to Shellenberger for Governor.

+

There’s just 12 days before the primary election. Anyone can vote for anyone. And anyone in the US can donate. Whatever happens, we will make history.

+

+ Donate to Shellenberger 2022 +

+
+
+

+ You’re a free subscriber to Michael Shellenberger. For the full experience, become a paid subscriber. +

+

+ Subscribe +

+
+
+
\ No newline at end of file diff --git a/packages/readabilityjs/test/test-pages/substack-michaelshellenberger/source.html b/packages/readabilityjs/test/test-pages/substack-michaelshellenberger/source.html new file mode 100644 index 000000000..8ac94637e --- /dev/null +++ b/packages/readabilityjs/test/test-pages/substack-michaelshellenberger/source.html @@ -0,0 +1,2000 @@ + + + The 2-minute 20-second video that changes everything + + + + +
+ For decades, people have claimed that homelessness is just a housing + problem. Sure, many also have substance use and mental illness issues. But + if we just give homeless people their own own studio apartments, and + decriminalize public camping, drugs, and shoplifting, the problem will go + away, many claimed. +  ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ +
+ + + + + + + + + + + + + + + diff --git a/packages/readabilityjs/test/test-pages/substack-michaelshellenberger/url.txt b/packages/readabilityjs/test/test-pages/substack-michaelshellenberger/url.txt new file mode 100644 index 000000000..2fa284abc --- /dev/null +++ b/packages/readabilityjs/test/test-pages/substack-michaelshellenberger/url.txt @@ -0,0 +1 @@ +https://michaelshellenberger.substack.com/p/the-2-minute-20-second-video-that?s=r \ No newline at end of file