From 65ce8353dcd954e88dfa909a71196667253bb1c1 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 2 Mar 2022 23:09:10 -0800 Subject: [PATCH] Attempt to pull URLs for probable newsletter emails out of content --- packages/api/src/routers/svc/emails.ts | 4 ++-- packages/api/src/utils/parser.ts | 24 +++++++++++++++++++ packages/api/src/utils/usernamePolicy.ts | 1 + .../services/save_newsletter_email.test.ts | 15 ------------ packages/api/test/utils/parser.test.ts | 23 ++++++++++++++---- 5 files changed, 46 insertions(+), 21 deletions(-) diff --git a/packages/api/src/routers/svc/emails.ts b/packages/api/src/routers/svc/emails.ts index d1199f942..ce8f20e3e 100644 --- a/packages/api/src/routers/svc/emails.ts +++ b/packages/api/src/routers/svc/emails.ts @@ -4,7 +4,7 @@ import { sendEmail } from '../../utils/sendEmail' import { analytics } from '../../utils/analytics' import { getNewsletterEmail } from '../../services/newsletters' import { env } from '../../env' -import { isProbablyNewsletter } from '../../utils/parser' +import { findNewsletterUrl, isProbablyNewsletter } from '../../utils/parser' import { saveNewsletterEmail } from '../../services/save_newsletter_email' interface ForwardEmailMessage { @@ -57,7 +57,7 @@ export function emailsServiceRouter() { title: data.subject, content: data.html, author: data.from, - url: 'https://omnivore.app/no_url', + url: (await findNewsletterUrl(data.html)) || 'https://omnivore.app/no_url', }) res.status(200).send('Newsletter') return diff --git a/packages/api/src/utils/parser.ts b/packages/api/src/utils/parser.ts index 74f1565a9..964fea610 100644 --- a/packages/api/src/utils/parser.ts +++ b/packages/api/src/utils/parser.ts @@ -399,3 +399,27 @@ export const isProbablyNewsletter = (html: string): boolean => { return false } + +// Given an HTML blob tries to find a URL to use for +// a canonical URL. +export const findNewsletterUrl = async (html: string): Promise => { + const dom = new JSDOM(html).window + + // If there is an

res.request.res.responseUrl) + .catch((e) => href) + } + } + + return undefined +} diff --git a/packages/api/src/utils/usernamePolicy.ts b/packages/api/src/utils/usernamePolicy.ts index fa64c469e..20bd5fe32 100644 --- a/packages/api/src/utils/usernamePolicy.ts +++ b/packages/api/src/utils/usernamePolicy.ts @@ -94,6 +94,7 @@ const RESERVED_NAMES = new Set([ 'mine', 'mis', 'news', + 'no_url', 'oauth', 'oauth_clients', 'offers', diff --git a/packages/api/test/services/save_newsletter_email.test.ts b/packages/api/test/services/save_newsletter_email.test.ts index 8804329f7..bf29de3ac 100644 --- a/packages/api/test/services/save_newsletter_email.test.ts +++ b/packages/api/test/services/save_newsletter_email.test.ts @@ -3,15 +3,8 @@ import { expect } from 'chai' import 'chai/register-should' import { createTestUser, - createUserWithoutProfile, deleteTestUser, - getProfile, } from '../db' -import { createGroup } from '../../src/services/create_group' -import { - getUserFollowers, - getUserFollowing, -} from '../../src/services/followers' import { createNewsletterEmail } from '../../src/services/newsletters' import { saveNewsletterEmail } from '../../src/services/save_newsletter_email' import { getRepository } from 'typeorm' @@ -23,14 +16,6 @@ describe('saveNewsletterEmail', () => { await deleteTestUser(username) }) - interface NewsletterMessage { - email: string - content: string - url: string - title: string - author: string - } - it('adds the newsletter to the library', async () => { const user = await createTestUser(username) const email = await createNewsletterEmail(user.id) diff --git a/packages/api/test/utils/parser.test.ts b/packages/api/test/utils/parser.test.ts index 16d4bbca7..2a2fedcfb 100644 --- a/packages/api/test/utils/parser.test.ts +++ b/packages/api/test/utils/parser.test.ts @@ -3,12 +3,13 @@ import { expect } from 'chai' import 'chai/register-should' import { JSDOM } from 'jsdom' import fs from 'fs' -import { isProbablyNewsletter } from '../../src/utils/parser' +import { findNewsletterUrl, isProbablyNewsletter } from '../../src/utils/parser' + +const load = (path: string): string => { + return fs.readFileSync(path, 'utf8') +} describe('isProbablyNewsletter', () => { - const load = (path: string): string => { - return fs.readFileSync(path, 'utf8') - } it('returns true for substack newsletter', () => { const html = load('./test/utils/data/substack-forwarded-newsletter.html') isProbablyNewsletter(html).should.be.true @@ -18,3 +19,17 @@ describe('isProbablyNewsletter', () => { isProbablyNewsletter(html).should.be.false }) }) + +describe('findNewsletterUrl', async () => { + it('gets the URL from the header if it is a newsletter', async () => { + const html = load('./test/utils/data/substack-forwarded-newsletter.html') + const url = await findNewsletterUrl(html) + // Not sure if the redirects from substack expire, this test could eventually fail + expect(url).to.startWith('https://newsletter.slowchinese.net/p/companies-that-eat-people-217') + }) + it('returns undefined if it is not a newsletter', async () => { + const html = load('./test/utils/data/substack-forwarded-welcome-email.html') + const url = await findNewsletterUrl(html) + expect(url).to.be.undefined + }) +})