diff --git a/packages/api/package.json b/packages/api/package.json index 37976687a..00f11fe3a 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -89,6 +89,7 @@ "@types/analytics-node": "^3.1.7", "@types/bcryptjs": "^2.4.2", "@types/chai": "^4.2.18", + "@types/chai-as-promised": "^7.1.5", "@types/chai-string": "^1.4.2", "@types/cookie": "^0.4.0", "@types/cookie-parser": "^1.4.2", @@ -109,6 +110,7 @@ "@types/uuid": "^8.3.0", "@types/voca": "^1.4.0", "chai": "^4.3.4", + "chai-as-promised": "^7.1.1", "chai-string": "^1.5.0", "circular-dependency-plugin": "^5.2.0", "mocha": "^9.0.1", diff --git a/packages/api/src/routers/svc/emails.ts b/packages/api/src/routers/svc/emails.ts index 4bc000d81..3624cdeac 100644 --- a/packages/api/src/routers/svc/emails.ts +++ b/packages/api/src/routers/svc/emails.ts @@ -51,7 +51,7 @@ export function emailsServiceRouter() { return } - if (isProbablyNewsletter(data.html)) { + if (await isProbablyNewsletter(data.html)) { console.log('handling as newsletter', data) await saveNewsletterEmail({ email: data.to, diff --git a/packages/api/test/utils/parser.test.ts b/packages/api/test/utils/parser.test.ts index 87c48e1b0..8c053ca8f 100644 --- a/packages/api/test/utils/parser.test.ts +++ b/packages/api/test/utils/parser.test.ts @@ -1,54 +1,73 @@ import 'mocha' +import * as chai from 'chai' import { expect } from 'chai' import 'chai/register-should' import fs from 'fs' -import { findNewsletterUrl, isProbablyNewsletter, parsePageMetadata, parsePreparedContent } from '../../src/utils/parser' +import { + findNewsletterUrl, + isProbablyNewsletter, + parsePageMetadata, + parsePreparedContent, +} from '../../src/utils/parser' import nock from 'nock' +import chaiAsPromised from 'chai-as-promised' + +chai.use(chaiAsPromised) const load = (path: string): string => { return fs.readFileSync(path, 'utf8') } describe('isProbablyNewsletter', () => { - it('returns true for substack newsletter', () => { + it('returns true for substack newsletter', async () => { const html = load('./test/utils/data/substack-forwarded-newsletter.html') - isProbablyNewsletter(html).should.be.true + await expect(isProbablyNewsletter(html)).to.eventually.be.true }) - it('returns true for private forwarded substack newsletter', () => { - const html = load('./test/utils/data/substack-private-forwarded-newsletter.html') - isProbablyNewsletter(html).should.be.true + it('returns true for private forwarded substack newsletter', async () => { + const html = load( + './test/utils/data/substack-private-forwarded-newsletter.html' + ) + await expect(isProbablyNewsletter(html)).to.eventually.be.true }) - it('returns false for substack welcome email', () => { + it('returns false for substack welcome email', async () => { const html = load('./test/utils/data/substack-forwarded-welcome-email.html') - isProbablyNewsletter(html).should.be.false + await expect(isProbablyNewsletter(html)).to.eventually.be.false }) - it('returns true for beehiiv.com newsletter', () => { + it('returns true for beehiiv.com newsletter', async () => { const html = load('./test/utils/data/beehiiv-newsletter.html') - isProbablyNewsletter(html).should.be.true + await expect(isProbablyNewsletter(html)).to.eventually.be.true }) }) describe('findNewsletterUrl', async () => { it('gets the URL from the header if it is a substack newsletter', async () => { nock('https://newsletter.slowchinese.net') - .head('/p/companies-that-eat-people-217?token=eyJ1c2VyX2lkIjoxMTU0MzM0NSwicG9zdF9pZCI6NDg3MjA5NDAsImlhdCI6MTY0NTI1NzQ1MSwiaXNzIjoicHViLTI4MDUzMSIsInN1YiI6InBvc3QtcmVhY3Rpb24ifQ.l5F3Kx6K9tvy9cRAXx3MepobQBCJDJQgAxOpA0INIZA') - .reply(200, ''); + .head( + '/p/companies-that-eat-people-217?token=eyJ1c2VyX2lkIjoxMTU0MzM0NSwicG9zdF9pZCI6NDg3MjA5NDAsImlhdCI6MTY0NTI1NzQ1MSwiaXNzIjoicHViLTI4MDUzMSIsInN1YiI6InBvc3QtcmVhY3Rpb24ifQ.l5F3Kx6K9tvy9cRAXx3MepobQBCJDJQgAxOpA0INIZA' + ) + .reply(200, '') const html = load('./test/utils/data/substack-forwarded-newsletter.html') const url = await findNewsletterUrl(html) // Not sure if the redirects from substack expire, this test could eventually fail - expect(url).to.startWith('https://newsletter.slowchinese.net/p/companies-that-eat-people-217') + expect(url).to.startWith( + 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217' + ) }) it('gets the URL from the header if it is a beehiiv newsletter', async () => { nock('https://u23463625.ct.sendgrid.net') - .head('/ss/c/AX1lEgEQaxtvFxLaVo0GBo_geajNrlI1TGeIcmMViR3pL3fEDZnbbkoeKcaY62QZk0KPFudUiUXc_uMLerV4nA/3k5/3TFZmreTR0qKSCgowABnVg/h30/zzLik7UXd1H_n4oyd5W8Xu639AYQQB2UXz-CsssSnno') - .reply(302, undefined,{ - 'Location': 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple' - }) - .get('/p/talked-guy-spent-30m-beeple') - .reply(200, ''); + .head( + '/ss/c/AX1lEgEQaxtvFxLaVo0GBo_geajNrlI1TGeIcmMViR3pL3fEDZnbbkoeKcaY62QZk0KPFudUiUXc_uMLerV4nA/3k5/3TFZmreTR0qKSCgowABnVg/h30/zzLik7UXd1H_n4oyd5W8Xu639AYQQB2UXz-CsssSnno' + ) + .reply(302, undefined, { + Location: 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple', + }) + .get('/p/talked-guy-spent-30m-beeple') + .reply(200, '') const html = load('./test/utils/data/beehiiv-newsletter.html') const url = await findNewsletterUrl(html) - expect(url).to.startWith('https://www.milkroad.com/p/talked-guy-spent-30m-beeple') + expect(url).to.startWith( + 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple' + ) }) it('returns undefined if it is not a newsletter', async () => { const html = load('./test/utils/data/substack-forwarded-welcome-email.html') @@ -63,31 +82,35 @@ describe('parseMetadata', async () => { const metadata = await parsePageMetadata(html) expect(metadata?.author).to.deep.equal('Omnivore') expect(metadata?.title).to.deep.equal('Code Block Syntax Highlighting') - expect(metadata?.previewImage).to.deep.equal('https://cdn.substack.com/image/fetch/w_1200,h_600,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fbucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com%2Fpublic%2Fimages%2F2ab1f7e8-2ca7-4011-8ccb-43d0b3bd244f_1490x2020.png') - expect(metadata?.description).to.deep.equal('Highlighted in Omnivore') + expect(metadata?.previewImage).to.deep.equal( + 'https://cdn.substack.com/image/fetch/w_1200,h_600,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fbucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com%2Fpublic%2Fimages%2F2ab1f7e8-2ca7-4011-8ccb-43d0b3bd244f_1490x2020.png' + ) + expect(metadata?.description).to.deep.equal( + 'Highlighted in Omnivore' + ) }) }) describe('parsePreparedContent', async () => { it('gets published date when JSONLD fails to load', async () => { const html = load('./test/utils/data/stratechery-blog-post.html') - const result = await parsePreparedContent( - 'https://example.com/', - { - document: html, - pageInfo: { } - }, + const result = await parsePreparedContent('https://example.com/', { + document: html, + pageInfo: {}, + }) + expect(result.parsedContent?.publishedDate?.getTime()).to.equal( + new Date('2016-04-05T15:27:51+00:00').getTime() ) - expect(result.parsedContent?.publishedDate?.getTime()).to.equal(new Date('2016-04-05T15:27:51+00:00').getTime()) }) }) describe('parsePreparedContent', async () => { nock('https://oembeddata').get('/').reply(200, { - "version":"1.0", - "provider_name":"Hippocratic Adventures", - "provider_url":"https:\/\/www.hippocraticadventures.com", - "title":"The Ultimate Guide to Practicing Medicine in Singapore – Part 2" + version: '1.0', + provider_name: 'Hippocratic Adventures', + provider_url: 'https://www.hippocraticadventures.com', + title: + 'The Ultimate Guide to Practicing Medicine in Singapore – Part 2', }) it('gets metadata from external JSONLD if available', async () => { @@ -98,13 +121,12 @@ describe('parsePreparedContent', async () => { body ` - const result = await parsePreparedContent( - 'https://example.com/', - { - document: html, - pageInfo: { } - }, - ) - expect(result.parsedContent?.title).to.equal('The Ultimate Guide to Practicing Medicine in Singapore – Part 2') + const result = await parsePreparedContent('https://example.com/', { + document: html, + pageInfo: {}, + }) + expect(result.parsedContent?.title).to.equal( + 'The Ultimate Guide to Practicing Medicine in Singapore – Part 2' + ) }) }) diff --git a/packages/readabilityjs/package.json b/packages/readabilityjs/package.json index 32a65ff40..227de2c2a 100644 --- a/packages/readabilityjs/package.json +++ b/packages/readabilityjs/package.json @@ -24,14 +24,15 @@ "homepage": "https://github.com/mozilla/readability", "devDependencies": { "@c4312/matcha": "^1.3.1", + "axios": "^0.26.0", "chai": "^2.1.*", + "chai-as-promised": "^7.1.1", "htmltidy2": "^0.3.0", "js-beautify": "^1.13.0", + "linkedom": "^0.14.9", "mocha": "^8.2.0", "puppeteer": "^10.1.0", - "sinon": "^7.3.2", - "linkedom": "^0.14.9", - "axios": "^0.26.0" + "sinon": "^7.3.2" }, "dependencies": { "html-entities": "^2.3.2", diff --git a/packages/readabilityjs/test/test-readability.js b/packages/readabilityjs/test/test-readability.js index 14be0aa25..6db89071e 100644 --- a/packages/readabilityjs/test/test-readability.js +++ b/packages/readabilityjs/test/test-readability.js @@ -1,7 +1,9 @@ var chai = require("chai"); var sinon = require("sinon"); +var chaiAsPromised = require("chai-as-promised"); const { parseHTML } = require("linkedom"); +chai.use(chaiAsPromised); chai.config.includeStack = true; var expect = chai.expect; @@ -220,11 +222,11 @@ describe("Readability API", function() { describe("#parse", function() { var exampleSource = testPages[0].source; - it("shouldn't parse oversized documents as per configuration", function() { + it("shouldn't parse oversized documents as per configuration", async function() { var doc = new JSDOMParser().parse("
yo
"); - expect(async function() { - await (new Readability(doc, { maxElemsToParse: 1 })).parse(); - }).to.Throw("Aborting parsing document; 2 elements found"); + await expect( + (new Readability(doc, { maxElemsToParse: 1 })).parse() + ).to.be.rejectedWith("Aborting parsing document; 2 elements found"); }); it("should run _cleanClasses with default configuration", async function() { @@ -279,7 +281,7 @@ describe("Readability API", function() { var expected_xhtml = "
My cat: \"Red
"; - var content = await (new Readability(dom.document)).parse().content; + var content = (await (new Readability(dom.document)).parse()).content; expect(content).eql(expected_xhtml); }); @@ -296,11 +298,11 @@ describe("Readability API", function() { 'https://webkit.org/demos/srcset/image-2x.png 2x,' + 'https://webkit.org/demos/srcset/image-3x.png 3x,' + 'https://webkit.org/demos/srcset/image-4x.png 4x,">'; - var content = await (new Readability(dom.document, { + var content = (await (new Readability(dom.document, { createImageProxyUrl: function(url) { return url; } - })).parse().content; + })).parse()).content; expect(content).eql(expected_xhtml); }); @@ -309,11 +311,11 @@ describe("Readability API", function() { var expected_xhtml = '
' + 'My image: ' + '
'; - var content = await (new Readability(dom.document, { + var content = (await (new Readability(dom.document, { createImageProxyUrl: function(url) { return url; } - })).parse().content; + })).parse()).content; expect(content).eql(expected_xhtml); }); }); diff --git a/yarn.lock b/yarn.lock index d516c3c6f..2f5cc13e8 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7382,6 +7382,13 @@ resolved "https://registry.yarnpkg.com/@types/caseless/-/caseless-0.12.2.tgz#f65d3d6389e01eeb458bd54dc8f52b95a9463bc8" integrity sha512-6ckxMjBBD8URvjB6J3NcnuAn5Pkl7t3TizAg+xdlzzQGSPSmBcXf8KoIH0ua/i+tio+ZRUHEXp0HEmvaR4kt0w== +"@types/chai-as-promised@^7.1.5": + version "7.1.5" + resolved "https://registry.yarnpkg.com/@types/chai-as-promised/-/chai-as-promised-7.1.5.tgz#6e016811f6c7a64f2eed823191c3a6955094e255" + integrity sha512-jStwss93SITGBwt/niYrkf2C+/1KTeZCZl1LaeezTlqppAKeoQC7jxyqYuP72sxBGKCIbw7oHgbYssIRzT5FCQ== + dependencies: + "@types/chai" "*" + "@types/chai-string@^1.4.2": version "1.4.2" resolved "https://registry.yarnpkg.com/@types/chai-string/-/chai-string-1.4.2.tgz#0f116504a666b6c6a3c42becf86634316c9a19ac" @@ -10321,6 +10328,13 @@ ccount@^1.0.0: resolved "https://registry.yarnpkg.com/ccount/-/ccount-1.1.0.tgz#246687debb6014735131be8abab2d93898f8d043" integrity sha512-vlNK021QdI7PNeiUh/lKkC/mNHHfV0m/Ad5JoI0TYtlBnJAslM/JIkm/tGC88bkLIwO6OQ5uV6ztS6kVAtCDlg== +chai-as-promised@^7.1.1: + version "7.1.1" + resolved "https://registry.yarnpkg.com/chai-as-promised/-/chai-as-promised-7.1.1.tgz#08645d825deb8696ee61725dbf590c012eb00ca0" + integrity sha512-azL6xMoi+uxu6z4rhWQ1jbdUhOMhis2PvscD/xjLqNMkv3BPPp2JyyuTHOrf9BOosGpNQ11v6BKv/g57RXbiaA== + dependencies: + check-error "^1.0.2" + chai-string@^1.5.0: version "1.5.0" resolved "https://registry.yarnpkg.com/chai-string/-/chai-string-1.5.0.tgz#0bdb2d8a5f1dbe90bc78ec493c1c1c180dd4d3d2" @@ -17133,7 +17147,7 @@ lambdafs@^2.0.3: resolved "https://registry.yarnpkg.com/lambdafs/-/lambdafs-2.1.1.tgz#4bf8d3037b6c61bbb4a22ab05c73ee47964c25ed" integrity sha512-x5k8JcoJWkWLvCVBzrl4pzvkEHSgSBqFjg3Dpsc4AcTMq7oUMym4cL/gRTZ6VM4mUMY+M0dIbQ+V1c1tsqqanQ== dependencies: - tar-fs "*" + tar-fs "^2.1.1" language-subtag-registry@~0.3.2: version "0.3.21" @@ -23139,16 +23153,6 @@ tapable@^2.0.0, tapable@^2.1.1, tapable@^2.2.0: resolved "https://registry.yarnpkg.com/tapable/-/tapable-2.2.1.tgz#1967a73ef4060a82f12ab96af86d52fdb76eeca0" integrity sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ== -tar-fs@*, tar-fs@2.1.1: - version "2.1.1" - resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.1.tgz#489a15ab85f1f0befabb370b7de4f9eb5cbe8784" - integrity sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng== - dependencies: - chownr "^1.1.1" - mkdirp-classic "^0.5.2" - pump "^3.0.0" - tar-stream "^2.1.4" - tar-fs@2.0.0: version "2.0.0" resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.0.0.tgz#677700fc0c8b337a78bee3623fdc235f21d7afad" @@ -23159,6 +23163,16 @@ tar-fs@2.0.0: pump "^3.0.0" tar-stream "^2.0.0" +tar-fs@2.1.1, tar-fs@^2.1.1: + version "2.1.1" + resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.1.tgz#489a15ab85f1f0befabb370b7de4f9eb5cbe8784" + integrity sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng== + dependencies: + chownr "^1.1.1" + mkdirp-classic "^0.5.2" + pump "^3.0.0" + tar-stream "^2.1.4" + tar-stream@^2.0.0, tar-stream@^2.1.4: version "2.2.0" resolved "https://registry.yarnpkg.com/tar-stream/-/tar-stream-2.2.0.tgz#acad84c284136b060dc3faa64474aa9aebd77287"