Attempt to pull URLs for probable newsletter emails out of content

This commit is contained in:
Jackson Harper
2022-03-02 23:09:10 -08:00
parent 21329949e5
commit 65ce8353dc
5 changed files with 46 additions and 21 deletions

View File

@ -3,12 +3,13 @@ import { expect } from 'chai'
import 'chai/register-should'
import { JSDOM } from 'jsdom'
import fs from 'fs'
import { isProbablyNewsletter } from '../../src/utils/parser'
import { findNewsletterUrl, isProbablyNewsletter } from '../../src/utils/parser'
const load = (path: string): string => {
return fs.readFileSync(path, 'utf8')
}
describe('isProbablyNewsletter', () => {
const load = (path: string): string => {
return fs.readFileSync(path, 'utf8')
}
it('returns true for substack newsletter', () => {
const html = load('./test/utils/data/substack-forwarded-newsletter.html')
isProbablyNewsletter(html).should.be.true
@ -18,3 +19,17 @@ describe('isProbablyNewsletter', () => {
isProbablyNewsletter(html).should.be.false
})
})
describe('findNewsletterUrl', async () => {
it('gets the URL from the header if it is a newsletter', async () => {
const html = load('./test/utils/data/substack-forwarded-newsletter.html')
const url = await findNewsletterUrl(html)
// Not sure if the redirects from substack expire, this test could eventually fail
expect(url).to.startWith('https://newsletter.slowchinese.net/p/companies-that-eat-people-217')
})
it('returns undefined if it is not a newsletter', async () => {
const html = load('./test/utils/data/substack-forwarded-welcome-email.html')
const url = await findNewsletterUrl(html)
expect(url).to.be.undefined
})
})