Add other newsletter handlers
This commit is contained in:
@ -15,7 +15,12 @@
|
||||
"build": "tsc"
|
||||
},
|
||||
"devDependencies": {
|
||||
"eslint-plugin-prettier": "^4.0.0"
|
||||
"chai": "^4.3.6",
|
||||
"chai-as-promised": "^7.1.1",
|
||||
"chai-string": "^1.5.0",
|
||||
"eslint-plugin-prettier": "^4.0.0",
|
||||
"mocha": "^10.0.0",
|
||||
"nock": "^13.2.9"
|
||||
},
|
||||
"dependencies": {
|
||||
"addressparser": "^1.0.1",
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
import addressparser from 'addressparser'
|
||||
import rfc2047 from 'rfc2047'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
import { v4 as uuid } from 'uuid'
|
||||
import { parseHTML } from 'linkedom'
|
||||
import axios from 'axios'
|
||||
|
||||
interface Unsubscribe {
|
||||
mailTo?: string
|
||||
@ -34,16 +36,17 @@ export interface PreHandleResult {
|
||||
dom?: Document
|
||||
}
|
||||
|
||||
export const FAKE_URL_PREFIX = 'https://omnivore.app/no_url?q='
|
||||
export const generateUniqueUrl = () => FAKE_URL_PREFIX + uuid()
|
||||
|
||||
export abstract class ContentHandler {
|
||||
protected senderRegex: RegExp
|
||||
protected urlRegex: RegExp
|
||||
protected defaultUrl: string
|
||||
public name: string
|
||||
name: string
|
||||
|
||||
protected constructor() {
|
||||
this.senderRegex = new RegExp(/NEWSLETTER_SENDER_REGEX/)
|
||||
this.urlRegex = new RegExp(/NEWSLETTER_URL_REGEX/)
|
||||
this.defaultUrl = 'NEWSLETTER_DEFAULT_URL'
|
||||
this.name = 'Handler name'
|
||||
}
|
||||
|
||||
@ -63,17 +66,57 @@ export abstract class ContentHandler {
|
||||
return Promise.resolve({ url, dom })
|
||||
}
|
||||
|
||||
isNewsletter(postHeader: string, from: string, unSubHeader: string): boolean {
|
||||
return false
|
||||
async isNewsletter(input: {
|
||||
postHeader: string
|
||||
from: string
|
||||
unSubHeader: string
|
||||
html?: string
|
||||
}): Promise<boolean> {
|
||||
const re = new RegExp(this.senderRegex)
|
||||
return Promise.resolve(
|
||||
re.test(input.from) && (!!input.postHeader || !!input.unSubHeader)
|
||||
)
|
||||
}
|
||||
|
||||
parseNewsletterUrl(_postHeader: string, html: string): string | undefined {
|
||||
findNewsletterHeaderHref(dom: Document): string | undefined {
|
||||
return undefined
|
||||
}
|
||||
|
||||
// Given an HTML blob tries to find a URL to use for
|
||||
// a canonical URL.
|
||||
async findNewsletterUrl(html: string): Promise<string | undefined> {
|
||||
const dom = parseHTML(html).document
|
||||
|
||||
// Check if this is a substack newsletter
|
||||
const href = this.findNewsletterHeaderHref(dom)
|
||||
if (href) {
|
||||
// Try to make a HEAD request, so we get the redirected URL, since these
|
||||
// will usually be behind tracking url redirects
|
||||
try {
|
||||
const response = await axios.head(href, { timeout: 5000 })
|
||||
return Promise.resolve(
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-member-access
|
||||
response.request.res.responseUrl as string | undefined
|
||||
)
|
||||
} catch (e) {
|
||||
console.log('error making HEAD request', e)
|
||||
return Promise.resolve(href)
|
||||
}
|
||||
}
|
||||
|
||||
return Promise.resolve(undefined)
|
||||
}
|
||||
|
||||
async parseNewsletterUrl(
|
||||
_postHeader: string,
|
||||
html: string
|
||||
): Promise<string | undefined> {
|
||||
// get newsletter url from html
|
||||
const matches = html.match(this.urlRegex)
|
||||
if (matches) {
|
||||
return matches[1]
|
||||
return Promise.resolve(matches[1])
|
||||
}
|
||||
return undefined
|
||||
return Promise.resolve(undefined)
|
||||
}
|
||||
|
||||
parseAuthor(from: string): string {
|
||||
@ -97,14 +140,14 @@ export abstract class ContentHandler {
|
||||
}
|
||||
}
|
||||
|
||||
handleNewsletter({
|
||||
async handleNewsletter({
|
||||
email,
|
||||
html,
|
||||
postHeader,
|
||||
title,
|
||||
from,
|
||||
unSubHeader,
|
||||
}: NewsletterInput): NewsletterResult {
|
||||
}: NewsletterInput): Promise<NewsletterResult> {
|
||||
console.log('handleNewsletter', email, postHeader, title, from)
|
||||
|
||||
if (!email || !html || !title || !from) {
|
||||
@ -115,8 +158,7 @@ export abstract class ContentHandler {
|
||||
// fallback to default url if newsletter url does not exist
|
||||
// assign a random uuid to the default url to avoid duplicate url
|
||||
const url =
|
||||
this.parseNewsletterUrl(postHeader, html) ||
|
||||
`${this.defaultUrl}?source=newsletters&id=${uuidv4()}`
|
||||
(await this.parseNewsletterUrl(postHeader, html)) || generateUniqueUrl()
|
||||
const author = this.parseAuthor(from)
|
||||
const unsubscribe = this.parseUnsubscribe(unSubHeader)
|
||||
|
||||
|
||||
@ -1,13 +1,14 @@
|
||||
import { AppleNewsHandler } from './content/apple-news-handler'
|
||||
import { BloombergHandler } from './content/bloomberg-handler'
|
||||
import { DerstandardHandler } from './content/derstandard-handler'
|
||||
import { ImageHandler } from './content/image-handler'
|
||||
import { MediumHandler } from './content/medium-handler'
|
||||
import { PdfHandler } from './content/pdf-handler'
|
||||
import { ScrapingBeeHandler } from './content/scrapingBee-handler'
|
||||
import { TDotCoHandler } from './content/t-dot-co-handler'
|
||||
import { TwitterHandler } from './content/twitter-handler'
|
||||
import { YoutubeHandler } from './content/youtube-handler'
|
||||
import { AppleNewsHandler } from './websites/apple-news-handler'
|
||||
import { BloombergHandler } from './websites/bloomberg-handler'
|
||||
import { DerstandardHandler } from './websites/derstandard-handler'
|
||||
import { ImageHandler } from './websites/image-handler'
|
||||
import { MediumHandler } from './websites/medium-handler'
|
||||
import { PdfHandler } from './websites/pdf-handler'
|
||||
import { ScrapingBeeHandler } from './websites/scrapingBee-handler'
|
||||
import { TDotCoHandler } from './websites/t-dot-co-handler'
|
||||
import { TwitterHandler } from './websites/twitter-handler'
|
||||
import { YoutubeHandler } from './websites/youtube-handler'
|
||||
import { WikipediaHandler } from './websites/wikipedia-handler'
|
||||
import {
|
||||
ContentHandler,
|
||||
NewsletterInput,
|
||||
@ -19,7 +20,9 @@ import { AxiosHandler } from './newsletters/axios-handler'
|
||||
import { GolangHandler } from './newsletters/golang-handler'
|
||||
import { MorningBrewHandler } from './newsletters/morning-brew-handler'
|
||||
import { BloombergNewsletterHandler } from './newsletters/bloomberg-newsletter-handler'
|
||||
import { WikipediaHandler } from './content/wikipedia-handler'
|
||||
import { BeehiivHandler } from './newsletters/beehiiv-handler'
|
||||
import { ConvertkitHandler } from './newsletters/convertkit-handler'
|
||||
import { RevueHandler } from './newsletters/revue-handler'
|
||||
|
||||
const validateUrlString = (url: string) => {
|
||||
const u = new URL(url)
|
||||
@ -57,6 +60,10 @@ const newsletterHandlers: ContentHandler[] = [
|
||||
new GolangHandler(),
|
||||
new SubstackHandler(),
|
||||
new MorningBrewHandler(),
|
||||
new SubstackHandler(),
|
||||
new BeehiivHandler(),
|
||||
new ConvertkitHandler(),
|
||||
new RevueHandler(),
|
||||
]
|
||||
|
||||
export const preHandleContent = async (
|
||||
@ -91,11 +98,11 @@ export const preHandleContent = async (
|
||||
return undefined
|
||||
}
|
||||
|
||||
export const handleNewsletter = (
|
||||
export const handleNewsletter = async (
|
||||
input: NewsletterInput
|
||||
): NewsletterResult | undefined => {
|
||||
): Promise<NewsletterResult | undefined> => {
|
||||
for (const handler of newsletterHandlers) {
|
||||
if (handler.isNewsletter(input.postHeader, input.from, input.unSubHeader)) {
|
||||
if (await handler.isNewsletter(input)) {
|
||||
return handler.handleNewsletter(input)
|
||||
}
|
||||
}
|
||||
|
||||
@ -5,7 +5,6 @@ export class AxiosHandler extends ContentHandler {
|
||||
super()
|
||||
this.senderRegex = /<.+@axios.com>/
|
||||
this.urlRegex = /View in browser at <a.*>(.*)<\/a>/
|
||||
this.defaultUrl = 'https://axios.com'
|
||||
this.name = 'axios'
|
||||
}
|
||||
|
||||
@ -44,10 +43,4 @@ export class AxiosHandler extends ContentHandler {
|
||||
|
||||
return Promise.resolve({ dom })
|
||||
}
|
||||
|
||||
isNewsletter(postHeader: string, from: string, unSubHeader: string): boolean {
|
||||
// Axios newsletter is from <xx@axios.com>
|
||||
const re = new RegExp(this.senderRegex)
|
||||
return re.test(from) && (!!postHeader || !!unSubHeader)
|
||||
}
|
||||
}
|
||||
|
||||
43
packages/content-handler/src/newsletters/beehiiv-handler.ts
Normal file
43
packages/content-handler/src/newsletters/beehiiv-handler.ts
Normal file
@ -0,0 +1,43 @@
|
||||
import { ContentHandler } from '../content-handler'
|
||||
import { parseHTML } from 'linkedom'
|
||||
|
||||
export class BeehiivHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 'beehiiv'
|
||||
}
|
||||
|
||||
findNewsletterHeaderHref(dom: Document): string | undefined {
|
||||
const readOnline = dom.querySelectorAll('table tr td div a[class*="link"]')
|
||||
let res: string | undefined = undefined
|
||||
readOnline.forEach((e) => {
|
||||
if (e.textContent === 'Read Online') {
|
||||
res = e.getAttribute('href') || undefined
|
||||
}
|
||||
})
|
||||
return res
|
||||
}
|
||||
|
||||
async isNewsletter(input: {
|
||||
postHeader: string
|
||||
from: string
|
||||
unSubHeader: string
|
||||
html: string
|
||||
}): Promise<boolean> {
|
||||
const dom = parseHTML(input.html).document
|
||||
if (dom.querySelectorAll('img[src*="beehiiv.net"]').length > 0) {
|
||||
const beehiivUrl = this.findNewsletterHeaderHref(dom)
|
||||
if (beehiivUrl) {
|
||||
return Promise.resolve(true)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
async parseNewsletterUrl(
|
||||
postHeader: string,
|
||||
html: string
|
||||
): Promise<string | undefined> {
|
||||
return this.findNewsletterUrl(html)
|
||||
}
|
||||
}
|
||||
@ -5,7 +5,6 @@ export class BloombergNewsletterHandler extends ContentHandler {
|
||||
super()
|
||||
this.senderRegex = /<.+@mail.bloomberg.*.com>/
|
||||
this.urlRegex = /<a class="view-in-browser__url" href=["']([^"']*)["']/
|
||||
this.defaultUrl = 'https://www.bloomberg.com'
|
||||
this.name = 'bloomberg'
|
||||
}
|
||||
|
||||
@ -35,10 +34,4 @@ export class BloombergNewsletterHandler extends ContentHandler {
|
||||
|
||||
return Promise.resolve({ dom })
|
||||
}
|
||||
|
||||
isNewsletter(postHeader: string, from: string, unSubHeader: string): boolean {
|
||||
// Axios newsletter is from <xx@axios.com>
|
||||
const re = new RegExp(this.senderRegex)
|
||||
return re.test(from) && (!!postHeader || !!unSubHeader)
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,41 @@
|
||||
import { ContentHandler } from '../content-handler'
|
||||
import { parseHTML } from 'linkedom'
|
||||
|
||||
export class ConvertkitHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 'convertkit'
|
||||
}
|
||||
|
||||
findNewsletterHeaderHref(dom: Document): string | undefined {
|
||||
const readOnline = dom.querySelectorAll('table tr td a')
|
||||
let res: string | undefined = undefined
|
||||
readOnline.forEach((e) => {
|
||||
if (e.textContent === 'View this email in your browser') {
|
||||
res = e.getAttribute('href') || undefined
|
||||
}
|
||||
})
|
||||
return res
|
||||
}
|
||||
|
||||
async isNewsletter(input: {
|
||||
postHeader: string
|
||||
from: string
|
||||
unSubHeader: string
|
||||
html: string
|
||||
}): Promise<boolean> {
|
||||
const dom = parseHTML(input.html).document
|
||||
return Promise.resolve(
|
||||
dom.querySelectorAll(
|
||||
'img[src*="convertkit.com"], img[src*="convertkit-mail.com"]'
|
||||
).length > 0
|
||||
)
|
||||
}
|
||||
|
||||
async parseNewsletterUrl(
|
||||
postHeader: string,
|
||||
html: string
|
||||
): Promise<string | undefined> {
|
||||
return this.findNewsletterUrl(html)
|
||||
}
|
||||
}
|
||||
@ -5,7 +5,6 @@ export class GolangHandler extends ContentHandler {
|
||||
super()
|
||||
this.senderRegex = /<.+@golangweekly.com>/
|
||||
this.urlRegex = /<a href=["']([^"']*)["'].*>Read on the Web<\/a>/
|
||||
this.defaultUrl = 'https://golangweekly.com'
|
||||
this.name = 'golangweekly'
|
||||
}
|
||||
|
||||
@ -25,10 +24,4 @@ export class GolangHandler extends ContentHandler {
|
||||
|
||||
return Promise.resolve({ dom })
|
||||
}
|
||||
|
||||
isNewsletter(postHeader: string, from: string, unSubHeader: string): boolean {
|
||||
// Axios newsletter is from <xx@axios.com>
|
||||
const re = new RegExp(this.senderRegex)
|
||||
return re.test(from) && (!!postHeader || !!unSubHeader)
|
||||
}
|
||||
}
|
||||
|
||||
@ -5,7 +5,6 @@ export class MorningBrewHandler extends ContentHandler {
|
||||
super()
|
||||
this.senderRegex = /Morning Brew <crew@morningbrew.com>/
|
||||
this.urlRegex = /<a.* href=["']([^"']*)["'].*>View Online<\/a>/
|
||||
this.defaultUrl = 'https://www.morningbrew.com'
|
||||
this.name = 'morningbrew'
|
||||
}
|
||||
|
||||
@ -33,10 +32,4 @@ export class MorningBrewHandler extends ContentHandler {
|
||||
|
||||
return Promise.resolve({ dom })
|
||||
}
|
||||
|
||||
isNewsletter(postHeader: string, from: string, unSubHeader: string): boolean {
|
||||
// Axios newsletter is from <xx@axios.com>
|
||||
const re = new RegExp(this.senderRegex)
|
||||
return re.test(from) && (!!postHeader || !!unSubHeader)
|
||||
}
|
||||
}
|
||||
|
||||
46
packages/content-handler/src/newsletters/revue-handler.ts
Normal file
46
packages/content-handler/src/newsletters/revue-handler.ts
Normal file
@ -0,0 +1,46 @@
|
||||
import { ContentHandler } from '../content-handler'
|
||||
import { parseHTML } from 'linkedom'
|
||||
|
||||
export class RevueHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 'revue'
|
||||
}
|
||||
|
||||
findNewsletterHeaderHref(dom: Document): string | undefined {
|
||||
const viewOnline = dom.querySelectorAll('table tr td a[target="_blank"]')
|
||||
let res: string | undefined = undefined
|
||||
viewOnline.forEach((e) => {
|
||||
if (e.textContent === 'View online') {
|
||||
res = e.getAttribute('href') || undefined
|
||||
}
|
||||
})
|
||||
return res
|
||||
}
|
||||
|
||||
async isNewsletter(input: {
|
||||
postHeader: string
|
||||
from: string
|
||||
unSubHeader: string
|
||||
html: string
|
||||
}): Promise<boolean> {
|
||||
const dom = parseHTML(input.html).document
|
||||
if (
|
||||
dom.querySelectorAll('img[src*="getrevue.co"], img[src*="revue.email"]')
|
||||
.length > 0
|
||||
) {
|
||||
const getrevueUrl = this.findNewsletterHeaderHref(dom)
|
||||
if (getrevueUrl) {
|
||||
return Promise.resolve(true)
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
async parseNewsletterUrl(
|
||||
postHeader: string,
|
||||
html: string
|
||||
): Promise<string | undefined> {
|
||||
return this.findNewsletterUrl(html)
|
||||
}
|
||||
}
|
||||
@ -1,10 +1,10 @@
|
||||
import addressparser from 'addressparser'
|
||||
import { ContentHandler, PreHandleResult } from '../content-handler'
|
||||
import { parseHTML } from 'linkedom'
|
||||
|
||||
export class SubstackHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.defaultUrl = 'https://www.substack.com'
|
||||
this.name = 'substack'
|
||||
}
|
||||
|
||||
@ -38,15 +38,53 @@ export class SubstackHandler extends ContentHandler {
|
||||
return Promise.resolve(dom)
|
||||
}
|
||||
|
||||
isNewsletter(postHeader: string, from: string, unSubHeader: string): boolean {
|
||||
return !!postHeader
|
||||
findNewsletterHeaderHref(dom: Document): string | undefined {
|
||||
// Substack header links
|
||||
const postLink = dom.querySelector('h1 a ')
|
||||
if (postLink) {
|
||||
return postLink.getAttribute('href') || undefined
|
||||
}
|
||||
|
||||
return undefined
|
||||
}
|
||||
|
||||
parseNewsletterUrl(postHeader: string, html: string): string | undefined {
|
||||
async isNewsletter({
|
||||
postHeader,
|
||||
html,
|
||||
}: {
|
||||
postHeader: string
|
||||
from: string
|
||||
unSubHeader: string
|
||||
html: string
|
||||
}): Promise<boolean> {
|
||||
if (postHeader) {
|
||||
return Promise.resolve(true)
|
||||
}
|
||||
const dom = parseHTML(html).document
|
||||
// substack newsletter emails have tables with a *post-meta class
|
||||
if (dom.querySelector('table[class$="post-meta"]')) {
|
||||
return true
|
||||
}
|
||||
// If the article has a header link, and substack icons its probably a newsletter
|
||||
const href = this.findNewsletterHeaderHref(dom)
|
||||
const heartIcon = dom.querySelector(
|
||||
'table tbody td span a img[src*="HeartIcon"]'
|
||||
)
|
||||
const recommendIcon = dom.querySelector(
|
||||
'table tbody td span a img[src*="RecommendIconRounded"]'
|
||||
)
|
||||
return Promise.resolve(!!(href && (heartIcon || recommendIcon)))
|
||||
}
|
||||
|
||||
async parseNewsletterUrl(
|
||||
postHeader: string,
|
||||
html: string
|
||||
): Promise<string | undefined> {
|
||||
// raw SubStack newsletter url is like <https://hongbo130.substack.com/p/tldr>
|
||||
// we need to get the real url from the raw url
|
||||
return addressparser(postHeader).length > 0
|
||||
? addressparser(postHeader)[0].name
|
||||
: undefined
|
||||
if (postHeader && addressparser(postHeader).length > 0) {
|
||||
return Promise.resolve(addressparser(postHeader)[0].name)
|
||||
}
|
||||
return this.findNewsletterUrl(html)
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { AppleNewsHandler } from '../src/content/apple-news-handler'
|
||||
import { AppleNewsHandler } from '../src/websites/apple-news-handler'
|
||||
|
||||
describe('open a simple web page', () => {
|
||||
it('should return a response', async () => {
|
||||
|
||||
15
packages/content-handler/test/data/beehiiv-newsletter.html
Normal file
15
packages/content-handler/test/data/beehiiv-newsletter.html
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,28 +1,45 @@
|
||||
import 'mocha'
|
||||
import * as chai from 'chai'
|
||||
import { expect } from 'chai'
|
||||
import chaiAsPromised from 'chai-as-promised'
|
||||
import chaiString from 'chai-string'
|
||||
import { SubstackHandler } from '../src/newsletters/substack-handler'
|
||||
import { AxiosHandler } from '../src/newsletters/axios-handler'
|
||||
import { BloombergNewsletterHandler } from '../src/newsletters/bloomberg-newsletter-handler'
|
||||
import { GolangHandler } from '../src/newsletters/golang-handler'
|
||||
import { MorningBrewHandler } from '../src/newsletters/morning-brew-handler'
|
||||
import nock from 'nock'
|
||||
import { generateUniqueUrl } from '../src/content-handler'
|
||||
import fs from 'fs'
|
||||
import { BeehiivHandler } from '../src/newsletters/beehiiv-handler'
|
||||
|
||||
chai.use(chaiAsPromised)
|
||||
chai.use(chaiString)
|
||||
|
||||
const load = (path: string): string => {
|
||||
return fs.readFileSync(path, 'utf8')
|
||||
}
|
||||
|
||||
describe('Newsletter email test', () => {
|
||||
describe('#getNewsletterUrl()', () => {
|
||||
it('returns url when email is from SubStack', () => {
|
||||
it('returns url when email is from SubStack', async () => {
|
||||
const rawUrl = '<https://hongbo130.substack.com/p/tldr>'
|
||||
|
||||
expect(new SubstackHandler().parseNewsletterUrl(rawUrl, '')).to.equal(
|
||||
'https://hongbo130.substack.com/p/tldr'
|
||||
)
|
||||
await expect(
|
||||
new SubstackHandler().parseNewsletterUrl(rawUrl, '')
|
||||
).to.eventually.equal('https://hongbo130.substack.com/p/tldr')
|
||||
})
|
||||
|
||||
it('returns url when email is from Axios', () => {
|
||||
it('returns url when email is from Axios', async () => {
|
||||
const url = 'https://axios.com/blog/the-best-way-to-build-a-web-app'
|
||||
const html = `View in browser at <a>${url}</a>`
|
||||
|
||||
expect(new AxiosHandler().parseNewsletterUrl('', html)).to.equal(url)
|
||||
await expect(
|
||||
new AxiosHandler().parseNewsletterUrl('', html)
|
||||
).to.eventually.equal(url)
|
||||
})
|
||||
|
||||
it('returns url when email is from Bloomberg', () => {
|
||||
it('returns url when email is from Bloomberg', async () => {
|
||||
const url = 'https://www.bloomberg.com/news/google-is-now-a-partner'
|
||||
const html = `
|
||||
<a class="view-in-browser__url" href="${url}">
|
||||
@ -30,29 +47,31 @@ describe('Newsletter email test', () => {
|
||||
</a>
|
||||
`
|
||||
|
||||
expect(
|
||||
await expect(
|
||||
new BloombergNewsletterHandler().parseNewsletterUrl('', html)
|
||||
).to.equal(url)
|
||||
).to.eventually.equal(url)
|
||||
})
|
||||
|
||||
it('returns url when email is from Golang Weekly', () => {
|
||||
it('returns url when email is from Golang Weekly', async () => {
|
||||
const url = 'https://www.golangweekly.com/first'
|
||||
const html = `
|
||||
<a href="${url}" style="text-decoration: none">Read on the Web</a>
|
||||
`
|
||||
|
||||
expect(new GolangHandler().parseNewsletterUrl('', html)).to.equal(url)
|
||||
await expect(
|
||||
new GolangHandler().parseNewsletterUrl('', html)
|
||||
).to.eventually.equal(url)
|
||||
})
|
||||
|
||||
it('returns url when email is from Morning Brew', () => {
|
||||
it('returns url when email is from Morning Brew', async () => {
|
||||
const url = 'https://www.morningbrew.com/daily/issues/first'
|
||||
const html = `
|
||||
<a style="color: #000000; text-decoration: none;" target="_blank" rel="noopener" href="${url}">View Online</a>
|
||||
`
|
||||
|
||||
expect(new MorningBrewHandler().parseNewsletterUrl('', html)).to.equal(
|
||||
url
|
||||
)
|
||||
await expect(
|
||||
new MorningBrewHandler().parseNewsletterUrl('', html)
|
||||
).to.eventually.equal(url)
|
||||
})
|
||||
})
|
||||
|
||||
@ -69,4 +88,104 @@ describe('Newsletter email test', () => {
|
||||
expect(new AxiosHandler().parseAuthor(from)).to.equal('Mike Allen')
|
||||
})
|
||||
})
|
||||
|
||||
describe('isProbablyNewsletter', () => {
|
||||
it('returns true for substack newsletter', async () => {
|
||||
const html = load('./test/data/substack-forwarded-newsletter.html')
|
||||
await expect(
|
||||
new SubstackHandler().isNewsletter({
|
||||
html,
|
||||
postHeader: '',
|
||||
from: '',
|
||||
unSubHeader: '',
|
||||
})
|
||||
).to.eventually.be.true
|
||||
})
|
||||
it('returns true for private forwarded substack newsletter', async () => {
|
||||
const html = load(
|
||||
'./test/data/substack-private-forwarded-newsletter.html'
|
||||
)
|
||||
await expect(
|
||||
new SubstackHandler().isNewsletter({
|
||||
html,
|
||||
postHeader: '',
|
||||
from: '',
|
||||
unSubHeader: '',
|
||||
})
|
||||
).to.eventually.be.true
|
||||
})
|
||||
it('returns false for substack welcome email', async () => {
|
||||
const html = load('./test/data/substack-forwarded-welcome-email.html')
|
||||
await expect(
|
||||
new SubstackHandler().isNewsletter({
|
||||
html,
|
||||
postHeader: '',
|
||||
from: '',
|
||||
unSubHeader: '',
|
||||
})
|
||||
).to.eventually.be.false
|
||||
})
|
||||
it('returns true for beehiiv.com newsletter', async () => {
|
||||
const html = load('./test/data/beehiiv-newsletter.html')
|
||||
await expect(
|
||||
new BeehiivHandler().isNewsletter({
|
||||
html,
|
||||
postHeader: '',
|
||||
from: '',
|
||||
unSubHeader: '',
|
||||
})
|
||||
).to.eventually.be.true
|
||||
})
|
||||
})
|
||||
|
||||
describe('findNewsletterUrl', async () => {
|
||||
it('gets the URL from the header if it is a substack newsletter', async () => {
|
||||
nock('https://email.mg2.substack.com')
|
||||
.head(
|
||||
'/c/eJxNkk2TojAQhn-N3KTyQfg4cGDGchdnYcsZx9K5UCE0EMVAkTiKv36iHnarupNUd7rfVJ4W3EDTj1M89No496Uw0wCxgovuwBgYnbOGsZBVjDHzKPWYU8VehUMWOlIX9Qhw4rKLzXgGZziXnRTcyF7dK0iIGMVOG_OS1aTmKPRDilgVhTQUPCQIcE0x-MFTmJ8rCUpA3KtuenR2urg1ZtAzmszI0tq_Z7m66y-ilQo0uAqMTQ7WRX8auJKg56blZg7WB-iHDuYEBzO6NP0R1IwuYFphQbbTjnTH9NBfs80nym4Zyj8uUvyKbtUyGr5eUz9fNDQ7JCxfJDo9dW1lY9lmj_JNivPbGmf2Pt_lN9tDit9b-WeTetni85Z9pDpVOd7L1E_Vy7egayNO23ZP34eSeLJeux1b0rer_xaZ7ykS78nuSjMY-nL98rparNZNcv07JCjN06_EkTFBxBqOUMACErnELUNMSxTUjLDQZwzcqa4bRjCfeejUEFefS224OLr2S5wxPtij7lVrs80d2CNseRV2P52VNFMBipcdVE-U5jkRD7hFAwpGOylVwU2Mfc9qBh7DoR89yVnWXhgQFHnIsbpVb6tU_B-hH_2yzWY'
|
||||
)
|
||||
.reply(302, undefined, {
|
||||
Location:
|
||||
'https://newsletter.slowchinese.net/p/companies-that-eat-people-217',
|
||||
})
|
||||
.get('/p/companies-that-eat-people-217')
|
||||
.reply(200, '')
|
||||
const html = load('./test/data/substack-forwarded-newsletter.html')
|
||||
const url = await new SubstackHandler().findNewsletterUrl(html)
|
||||
// Not sure if the redirects from substack expire, this test could eventually fail
|
||||
expect(url).to.startWith(
|
||||
'https://newsletter.slowchinese.net/p/companies-that-eat-people-217'
|
||||
)
|
||||
})
|
||||
it('gets the URL from the header if it is a beehiiv newsletter', async () => {
|
||||
nock('https://u23463625.ct.sendgrid.net')
|
||||
.head(
|
||||
'/ss/c/AX1lEgEQaxtvFxLaVo0GBo_geajNrlI1TGeIcmMViR3pL3fEDZnbbkoeKcaY62QZk0KPFudUiUXc_uMLerV4nA/3k5/3TFZmreTR0qKSCgowABnVg/h30/zzLik7UXd1H_n4oyd5W8Xu639AYQQB2UXz-CsssSnno'
|
||||
)
|
||||
.reply(302, undefined, {
|
||||
Location: 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple',
|
||||
})
|
||||
.get('/p/talked-guy-spent-30m-beeple')
|
||||
.reply(200, '')
|
||||
const html = load('./test/data/beehiiv-newsletter.html')
|
||||
const url = await new BeehiivHandler().findNewsletterUrl(html)
|
||||
expect(url).to.startWith(
|
||||
'https://www.milkroad.com/p/talked-guy-spent-30m-beeple'
|
||||
)
|
||||
})
|
||||
it('returns undefined if it is not a newsletter', async () => {
|
||||
const html = load('./test/data/substack-forwarded-welcome-email.html')
|
||||
const url = await new SubstackHandler().findNewsletterUrl(html)
|
||||
expect(url).to.be.undefined
|
||||
})
|
||||
})
|
||||
|
||||
describe('generateUniqueUrl', () => {
|
||||
it('generates a unique URL', () => {
|
||||
const url1 = generateUniqueUrl()
|
||||
const url2 = generateUniqueUrl()
|
||||
|
||||
expect(url1).to.not.eql(url2)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { expect } from 'chai'
|
||||
import 'mocha'
|
||||
import { getYoutubeVideoId } from '../src/content/youtube-handler'
|
||||
import { getYoutubeVideoId } from '../src/websites/youtube-handler'
|
||||
|
||||
describe('getYoutubeVideoId', () => {
|
||||
it('should parse video id out of a URL', async () => {
|
||||
|
||||
Reference in New Issue
Block a user