New function to determine if an HTML blob is probably a newsletter based on its content

This commit is contained in:
Jackson Harper
2022-03-02 16:31:15 -08:00
parent ff871c35bc
commit f7f83fe080
4 changed files with 48 additions and 0 deletions

View File

@ -0,0 +1,21 @@
import 'mocha'
import { expect } from 'chai'
import 'chai/register-should'
import { JSDOM } from 'jsdom'
import fs from 'fs'
import { isProbablyNewsletter } from '../../src/utils/parser'
describe('isProbablyNewsletter', () => {
const load = (path: string): JSDOM => {
const content = fs.readFileSync(path, 'utf8')
return new JSDOM(content);
}
it('returns true for substack newsletter', () => {
const dom = load('./test/utils/data/substack-forwarded-newsletter.html')
isProbablyNewsletter(dom.window).should.be.true
})
it('returns false for substack welcome email', () => {
const dom = load('./test/utils/data/substack-forwarded-welcome-email.html')
isProbablyNewsletter(dom.window).should.be.false
})
})