Attempt to pull URLs for probable newsletter emails out of content
This commit is contained in:
@ -4,7 +4,7 @@ import { sendEmail } from '../../utils/sendEmail'
|
||||
import { analytics } from '../../utils/analytics'
|
||||
import { getNewsletterEmail } from '../../services/newsletters'
|
||||
import { env } from '../../env'
|
||||
import { isProbablyNewsletter } from '../../utils/parser'
|
||||
import { findNewsletterUrl, isProbablyNewsletter } from '../../utils/parser'
|
||||
import { saveNewsletterEmail } from '../../services/save_newsletter_email'
|
||||
|
||||
interface ForwardEmailMessage {
|
||||
@ -57,7 +57,7 @@ export function emailsServiceRouter() {
|
||||
title: data.subject,
|
||||
content: data.html,
|
||||
author: data.from,
|
||||
url: 'https://omnivore.app/no_url',
|
||||
url: (await findNewsletterUrl(data.html)) || 'https://omnivore.app/no_url',
|
||||
})
|
||||
res.status(200).send('Newsletter')
|
||||
return
|
||||
|
||||
@ -399,3 +399,27 @@ export const isProbablyNewsletter = (html: string): boolean => {
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// Given an HTML blob tries to find a URL to use for
|
||||
// a canonical URL.
|
||||
export const findNewsletterUrl = async (html: string): Promise<string | undefined> => {
|
||||
const dom = new JSDOM(html).window
|
||||
|
||||
// If there is an <h1 element with a URL, use that
|
||||
const postLink = dom.document.querySelector('h1 a ')
|
||||
if (postLink) {
|
||||
const href = postLink.getAttribute('href')
|
||||
if (href) {
|
||||
// Try to make a HEAD request so we get the redirected URL, since these
|
||||
// will usually be behind tracking url redirects
|
||||
return axios({
|
||||
method: 'HEAD',
|
||||
url: href,
|
||||
})
|
||||
.then(res => res.request.res.responseUrl)
|
||||
.catch((e) => href)
|
||||
}
|
||||
}
|
||||
|
||||
return undefined
|
||||
}
|
||||
|
||||
@ -94,6 +94,7 @@ const RESERVED_NAMES = new Set([
|
||||
'mine',
|
||||
'mis',
|
||||
'news',
|
||||
'no_url',
|
||||
'oauth',
|
||||
'oauth_clients',
|
||||
'offers',
|
||||
|
||||
@ -3,15 +3,8 @@ import { expect } from 'chai'
|
||||
import 'chai/register-should'
|
||||
import {
|
||||
createTestUser,
|
||||
createUserWithoutProfile,
|
||||
deleteTestUser,
|
||||
getProfile,
|
||||
} from '../db'
|
||||
import { createGroup } from '../../src/services/create_group'
|
||||
import {
|
||||
getUserFollowers,
|
||||
getUserFollowing,
|
||||
} from '../../src/services/followers'
|
||||
import { createNewsletterEmail } from '../../src/services/newsletters'
|
||||
import { saveNewsletterEmail } from '../../src/services/save_newsletter_email'
|
||||
import { getRepository } from 'typeorm'
|
||||
@ -23,14 +16,6 @@ describe('saveNewsletterEmail', () => {
|
||||
await deleteTestUser(username)
|
||||
})
|
||||
|
||||
interface NewsletterMessage {
|
||||
email: string
|
||||
content: string
|
||||
url: string
|
||||
title: string
|
||||
author: string
|
||||
}
|
||||
|
||||
it('adds the newsletter to the library', async () => {
|
||||
const user = await createTestUser(username)
|
||||
const email = await createNewsletterEmail(user.id)
|
||||
|
||||
@ -3,12 +3,13 @@ import { expect } from 'chai'
|
||||
import 'chai/register-should'
|
||||
import { JSDOM } from 'jsdom'
|
||||
import fs from 'fs'
|
||||
import { isProbablyNewsletter } from '../../src/utils/parser'
|
||||
import { findNewsletterUrl, isProbablyNewsletter } from '../../src/utils/parser'
|
||||
|
||||
const load = (path: string): string => {
|
||||
return fs.readFileSync(path, 'utf8')
|
||||
}
|
||||
|
||||
describe('isProbablyNewsletter', () => {
|
||||
const load = (path: string): string => {
|
||||
return fs.readFileSync(path, 'utf8')
|
||||
}
|
||||
it('returns true for substack newsletter', () => {
|
||||
const html = load('./test/utils/data/substack-forwarded-newsletter.html')
|
||||
isProbablyNewsletter(html).should.be.true
|
||||
@ -18,3 +19,17 @@ describe('isProbablyNewsletter', () => {
|
||||
isProbablyNewsletter(html).should.be.false
|
||||
})
|
||||
})
|
||||
|
||||
describe('findNewsletterUrl', async () => {
|
||||
it('gets the URL from the header if it is a newsletter', async () => {
|
||||
const html = load('./test/utils/data/substack-forwarded-newsletter.html')
|
||||
const url = await findNewsletterUrl(html)
|
||||
// Not sure if the redirects from substack expire, this test could eventually fail
|
||||
expect(url).to.startWith('https://newsletter.slowchinese.net/p/companies-that-eat-people-217')
|
||||
})
|
||||
it('returns undefined if it is not a newsletter', async () => {
|
||||
const html = load('./test/utils/data/substack-forwarded-welcome-email.html')
|
||||
const url = await findNewsletterUrl(html)
|
||||
expect(url).to.be.undefined
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user