Files
omnivore/packages/api/test/utils/parser.test.ts
Jackson Harper da28998130 Pull ul list out of newsletter blurb
next/react doesnt want child elements of the paragraphs

Improve formatting

Improve wording

Use buttons in the subscribe directly blocks

Simplify docs on setting up forwarding rules

Add extra padding on bottom of help docs

Remove unused style

Add emails help page

Improve formatting

Prefetch page content on iOS

Reduce the reader overly length now that items are precached

Add invalidation when highlights are added to items

fix missing index_settings.json file in api dockerfile for creating elastic index (#363)

Handle full email address objects in the to param from sendgrid

These come in a format like:

"jacksonh-dfdf@inbox.omnivore.app" <jacksonh-dfdf@inbox-demo.omnivore.app>

New IDs for short highlights because they dont cascade delete now

Testing CI issues

Simplify test

CI test

Use promises for async tests

Temporarily remove test to debug CI

Re-enable

re-enable test, return error

Specify a userId when looking up saved email pages

create a unique url for newsletters without a URL

Use 500ms on page test timeouts

Increase timeout

Dont use deep equal to match newsletter label

Run just the labels API

Run against just the newsletter emails

Run without the page tests

Fix

Set the allow uncaught flag

Remove highlight tests

Remove newsletters tests

more resolver tests

Remove newsetter tests

Comment out resolver tests

Use nock for external requests in tests

Specify puppeteer url for tests

Comment out more tests

uncomment tests

re-enable

re-enable email test

Re-disable

Re-enable one pdf attachment test

Re-disable pdf attachment test

Use promises on setTimeout tests

rm label tests

mv label tests into a context

Comment out pdf tests

Comment out pdf tests

Async test

Async wrappers

Delay when creating test pages

More debugging

Unique short ids

Remove potentially problematic test

Fetch page before returning for test

handler in before block

more debugging

More debugging

Move errors checks into contexts

Use a context when saving newsletters to force index refresh

Prettier fix

Fix newsletter label check, remove setTimeout

Re-enable test

timeout on pdf router handler

Fix method call

comment out PDF test

Unique fake username

Comment out PDF test

Debugging signed urls

Re-enable

New email

pdf test

PDF tests

Comment out pdf test

Add nock stubs for email URLs

Use full address for PDF test

Remove debug

Use full email addresses
2022-04-02 16:56:24 -07:00

111 lines
4.9 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import 'mocha'
import { expect } from 'chai'
import 'chai/register-should'
import fs from 'fs'
import { findNewsletterUrl, isProbablyNewsletter, parsePageMetadata, parsePreparedContent } from '../../src/utils/parser'
import nock from 'nock'
const load = (path: string): string => {
return fs.readFileSync(path, 'utf8')
}
describe('isProbablyNewsletter', () => {
it('returns true for substack newsletter', () => {
const html = load('./test/utils/data/substack-forwarded-newsletter.html')
isProbablyNewsletter(html).should.be.true
})
it('returns true for private forwarded substack newsletter', () => {
const html = load('./test/utils/data/substack-private-forwarded-newsletter.html')
isProbablyNewsletter(html).should.be.true
})
it('returns false for substack welcome email', () => {
const html = load('./test/utils/data/substack-forwarded-welcome-email.html')
isProbablyNewsletter(html).should.be.false
})
it('returns true for beehiiv.com newsletter', () => {
const html = load('./test/utils/data/beehiiv-newsletter.html')
isProbablyNewsletter(html).should.be.true
})
})
describe('findNewsletterUrl', async () => {
it('gets the URL from the header if it is a substack newsletter', async () => {
nock('https://newsletter.slowchinese.net')
.head('/p/companies-that-eat-people-217?token=eyJ1c2VyX2lkIjoxMTU0MzM0NSwicG9zdF9pZCI6NDg3MjA5NDAsImlhdCI6MTY0NTI1NzQ1MSwiaXNzIjoicHViLTI4MDUzMSIsInN1YiI6InBvc3QtcmVhY3Rpb24ifQ.l5F3Kx6K9tvy9cRAXx3MepobQBCJDJQgAxOpA0INIZA')
.reply(200, '');
const html = load('./test/utils/data/substack-forwarded-newsletter.html')
const url = await findNewsletterUrl(html)
// Not sure if the redirects from substack expire, this test could eventually fail
expect(url).to.startWith('https://newsletter.slowchinese.net/p/companies-that-eat-people-217')
})
it('gets the URL from the header if it is a beehiiv newsletter', async () => {
nock('https://u23463625.ct.sendgrid.net')
.head('/ss/c/AX1lEgEQaxtvFxLaVo0GBo_geajNrlI1TGeIcmMViR3pL3fEDZnbbkoeKcaY62QZk0KPFudUiUXc_uMLerV4nA/3k5/3TFZmreTR0qKSCgowABnVg/h30/zzLik7UXd1H_n4oyd5W8Xu639AYQQB2UXz-CsssSnno')
.reply(302, undefined,{
'Location': 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple'
})
.get('/p/talked-guy-spent-30m-beeple')
.reply(200, '');
const html = load('./test/utils/data/beehiiv-newsletter.html')
const url = await findNewsletterUrl(html)
expect(url).to.startWith('https://www.milkroad.com/p/talked-guy-spent-30m-beeple')
})
it('returns undefined if it is not a newsletter', async () => {
const html = load('./test/utils/data/substack-forwarded-welcome-email.html')
const url = await findNewsletterUrl(html)
expect(url).to.be.undefined
})
})
describe('parseMetadata', async () => {
it('gets author, title, image, description', async () => {
const html = load('./test/utils/data/substack-post.html')
const metadata = await parsePageMetadata(html)
expect(metadata?.author).to.deep.equal('Omnivore')
expect(metadata?.title).to.deep.equal('Code Block Syntax Highlighting')
expect(metadata?.previewImage).to.deep.equal('https://cdn.substack.com/image/fetch/w_1200,h_600,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fbucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com%2Fpublic%2Fimages%2F2ab1f7e8-2ca7-4011-8ccb-43d0b3bd244f_1490x2020.png')
expect(metadata?.description).to.deep.equal('Highlighted <code> in Omnivore')
})
})
describe('parsePreparedContent', async () => {
it('gets published date when JSONLD fails to load', async () => {
const html = load('./test/utils/data/stratechery-blog-post.html')
const result = await parsePreparedContent(
'https://example.com/',
{
document: html,
pageInfo: { }
},
)
expect(result.parsedContent?.publishedDate?.getTime()).to.equal(new Date('2016-04-05T15:27:51+00:00').getTime())
})
})
describe('parsePreparedContent', async () => {
nock('https://oembeddata').get('/').reply(200, {
"version":"1.0",
"provider_name":"Hippocratic Adventures",
"provider_url":"https:\/\/www.hippocraticadventures.com",
"title":"The Ultimate Guide to Practicing Medicine in Singapore &#8211; Part 2"
})
it('gets metadata from external JSONLD if available', async () => {
const html = `<html>
<head>
<link rel="alternate" type="application/json+oembed" href="https://oembeddata">
</link
</head>
<body>body</body>
</html>`
const result = await parsePreparedContent(
'https://example.com/',
{
document: html,
pageInfo: { }
},
)
expect(result.parsedContent?.title).to.equal('The Ultimate Guide to Practicing Medicine in Singapore Part 2')
})
})