Make fetching tweet url async

2022-05-31 22:50:14 +08:00
parent 417ed0a4eb
commit 0b0edd3e69
1 changed files with 22 additions and 22 deletions
--- a/packages/readabilityjs/Readability.js
+++ b/packages/readabilityjs/Readability.js
@ -830,8 +830,8 @@ Readability.prototype = {
   * @param Element
   * @return void
   **/
-  _prepArticle: function (articleContent) {
-    this._createPlaceholders(articleContent);
+  _prepArticle: async function (articleContent) {
+    await this._createPlaceholders(articleContent);
    this._cleanStyles(articleContent);
    // Check for data tables before we continue, to avoid removing items in
    // those tables, which will often be isolated even though they're
@ -1105,7 +1105,7 @@ Readability.prototype = {
   * @param page a document to run upon. Needs to be a full document, complete with body.
   * @return Element
   **/
-  _grabArticle: function (page) {
+  _grabArticle: async function(page) {
    this.log("**** grabArticle ****");
    const doc = this._doc;
    const isPaging = page !== null;
@ -1148,13 +1148,13 @@ Readability.prototype = {
        if (shouldRemoveTitleHeader && this._headerDuplicatesTitle(node)) {
          const headingText = node.textContent.trim();
          const titleText = this._articleTitle.trim();
-          this.log("Removing header: ", {headingText, titleText});
+          this.log("Removing header: ", { headingText, titleText });
          shouldRemoveTitleHeader = false;
          // Replacing title with the heading if the title includes heading but heading is smaller
          // Example article: http://jsomers.net/i-should-have-loved-biology
          // Or if there is the specific attribute that we can lean on.
          // For example "headline" in this article - https://nymag.com/intelligencer/2020/12/four-seasons-total-landscaping-the-full-est-possible-story.html
-          if ((titleText !== headingText && titleText.includes(headingText)) || this._someNodeAttribute(node, ({value}) => value === 'headline')) {
+          if ((titleText !== headingText && titleText.includes(headingText)) || this._someNodeAttribute(node, ({ value }) => value === 'headline')) {
            this.log('Replacing title with heading')
            this._articleTitle = headingText;
          }
@ -1166,8 +1166,8 @@ Readability.prototype = {
        if (stripUnlikelyCandidates) {
          if (
            (this.REGEXPS.unlikelyCandidates.test(matchString) ||
-            // Checking for the "data-testid" attribute as well for the NYTimes articles
-            // Example article: https://www.nytimes.com/2021/03/31/world/americas/brazil-coronavirus-bolsonaro.html
+              // Checking for the "data-testid" attribute as well for the NYTimes articles
+              // Example article: https://www.nytimes.com/2021/03/31/world/americas/brazil-coronavirus-bolsonaro.html
              this.REGEXPS.unlikelyCandidates.test(node.dataset && node.dataset.testid)) &&
            !this.REGEXPS.okMaybeItsACandidate.test(matchString) &&
            !/tweet(-\w+)?/i.test(matchString) &&
@ -1204,8 +1204,8 @@ Readability.prototype = {

        // Remove DIV, SECTION, and HEADER nodes without any content(e.g. text, image, video, or iframe).
        if ((node.tagName === "DIV" || node.tagName === "SECTION" || node.tagName === "HEADER" ||
-          node.tagName === "H1" || node.tagName === "H2" || node.tagName === "H3" ||
-          node.tagName === "H4" || node.tagName === "H5" || node.tagName === "H6") &&
+            node.tagName === "H1" || node.tagName === "H2" || node.tagName === "H3" ||
+            node.tagName === "H4" || node.tagName === "H5" || node.tagName === "H6") &&
          this._isElementWithoutContent(node)) {
          node = this._removeAndGetNext(node);
          continue;
@ -1271,7 +1271,7 @@ Readability.prototype = {
       * A score is determined by things like number of commas, class names, etc. Maybe eventually link density.
       **/
      var candidates = [];
-      this._forEachNode(elementsToScore, function (elementToScore) {
+      this._forEachNode(elementsToScore, function(elementToScore) {
        if (!elementToScore.parentNode || typeof (elementToScore.parentNode.tagName) === "undefined")
          return;

@ -1297,7 +1297,7 @@ Readability.prototype = {
        contentScore += Math.min(Math.floor(innerText.length / 100), 3);

        // Initialize and score ancestors.
-        this._forEachNode(ancestors, function (ancestor, level) {
+        this._forEachNode(ancestors, function(ancestor, level) {
          if (!ancestor.tagName || !ancestor.parentNode || typeof (ancestor.parentNode.tagName) === "undefined")
            return;

@ -1526,7 +1526,7 @@ Readability.prototype = {
        const figures = this._getAllNodesWithTag(headerNode, ['FIGURE']);
        this._forEachNode(figures, figure => {
          if (!this._someNode(alreadyExistingFigures, existingFigure => existingFigure === figure)) {
-            this.log(`Prepending figure to the article`, {className: figure.className, scr: figure.src})
+            this.log(`Prepending figure to the article`, { className: figure.className, scr: figure.src })
            articleContent.prepend(figure)
          }
        })
@ -1535,7 +1535,7 @@ Readability.prototype = {
      if (this._debug)
        this.log("Article content pre-prep: ", { content: articleContent.innerHTML });
      // So we have all of the content that we need. Now we clean it up for presentation.
-      this._prepArticle(articleContent);
+      await this._prepArticle(articleContent);

      if (this._debug)
        this.log("Article content post-prep: ", { content: articleContent.innerHTML });
@ -1574,17 +1574,17 @@ Readability.prototype = {

        if (this._flagIsActive(this.FLAG_STRIP_UNLIKELYS)) {
          this._removeFlag(this.FLAG_STRIP_UNLIKELYS);
-          this._attempts.push({articleContent: articleContent, textLength: textLength});
+          this._attempts.push({ articleContent: articleContent, textLength: textLength });
        } else if (this._flagIsActive(this.FLAG_WEIGHT_CLASSES)) {
          this._removeFlag(this.FLAG_WEIGHT_CLASSES);
-          this._attempts.push({articleContent: articleContent, textLength: textLength});
+          this._attempts.push({ articleContent: articleContent, textLength: textLength });
        } else if (this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)) {
          this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY);
-          this._attempts.push({articleContent: articleContent, textLength: textLength});
+          this._attempts.push({ articleContent: articleContent, textLength: textLength });
        } else {
-          this._attempts.push({articleContent: articleContent, textLength: textLength});
+          this._attempts.push({ articleContent: articleContent, textLength: textLength });
          // No luck after removing flags, just return the longest text we found during the different loops
-          this._attempts.sort(function (a, b) {
+          this._attempts.sort(function(a, b) {
            return b.textLength - a.textLength;
          });

@ -1601,7 +1601,7 @@ Readability.prototype = {
      if (parseSuccessful) {
        // Find out text direction from ancestors of final top candidate.
        var ancestors = [parentOfTopCandidate, topCandidate].concat(this._getNodeAncestors(parentOfTopCandidate));
-        this._someNode(ancestors, function (ancestor) {
+        this._someNode(ancestors, function(ancestor) {
          if (!ancestor.tagName)
            return false;
          var articleDir = ancestor.getAttribute("dir");
@ -2246,7 +2246,7 @@ Readability.prototype = {
              tweet.innerText = 'Tweet placeholder';
              tweet.className = 'tweet-placeholder';
              tweet.setAttribute('data-tweet-id', match[2]);
-              element.parentNode.replaceChild(tweet, element);
+              element.parentNode.replaceWith(tweet);
            }
          } catch (e) {
            this.log('Error loading tweet: ', link, e);
@ -2880,7 +2880,7 @@ Readability.prototype = {
   *
   * @return void
   **/
-  parse: function () {
+  parse: async function() {
    // Avoid parsing too large documents, as per configuration option
    if (this._maxElemsToParse > 0) {
      var numTags = this._doc.getElementsByTagName("*").length;
@ -2905,7 +2905,7 @@ Readability.prototype = {
    var metadata = this._getArticleMetadata(jsonLd);
    this._articleTitle = metadata.title;

-    var articleContent = this._grabArticle();
+    var articleContent = await this._grabArticle();
    if (!articleContent)
      return null;