New function to determine if an HTML blob is probably a newsletter based on its content
This commit is contained in:
25
packages/api/src/utils/newsletter.ts
Normal file
25
packages/api/src/utils/newsletter.ts
Normal file
@ -0,0 +1,25 @@
|
||||
import { Readability } from '@omnivore/readability';
|
||||
import { DOMWindow } from 'jsdom'
|
||||
|
||||
|
||||
// Attempt to determine if an HTML blob is a newsletter
|
||||
// based on it's contents.
|
||||
export const isProbablyNewsletter = (dom: DOMWindow): boolean => {
|
||||
const article = new Readability(document, {
|
||||
debug: false,
|
||||
keepTables: true,
|
||||
}).parse()
|
||||
|
||||
if (!article || !article.content) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Maybe it is a substack newsletter
|
||||
const body = dom.document.querySelector('.email-body-container')
|
||||
if (body?.querySelector('.post-meta') || body?.querySelector('.post-cta')) {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
};
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
21
packages/api/test/utils/parser.test.ts
Normal file
21
packages/api/test/utils/parser.test.ts
Normal file
@ -0,0 +1,21 @@
|
||||
import 'mocha'
|
||||
import { expect } from 'chai'
|
||||
import 'chai/register-should'
|
||||
import { JSDOM } from 'jsdom'
|
||||
import fs from 'fs'
|
||||
import { isProbablyNewsletter } from '../../src/utils/parser'
|
||||
|
||||
describe('isProbablyNewsletter', () => {
|
||||
const load = (path: string): JSDOM => {
|
||||
const content = fs.readFileSync(path, 'utf8')
|
||||
return new JSDOM(content);
|
||||
}
|
||||
it('returns true for substack newsletter', () => {
|
||||
const dom = load('./test/utils/data/substack-forwarded-newsletter.html')
|
||||
isProbablyNewsletter(dom.window).should.be.true
|
||||
})
|
||||
it('returns false for substack welcome email', () => {
|
||||
const dom = load('./test/utils/data/substack-forwarded-welcome-email.html')
|
||||
isProbablyNewsletter(dom.window).should.be.false
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user