Remove parsing newsletter emails from forwarded emails
This commit is contained in:
@ -450,150 +450,6 @@ export const parseUrlMetadata = async (
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt to determine if an HTML blob is a newsletter
|
||||
// based on it's contents.
|
||||
// TODO: when we consolidate the handlers we could include this
|
||||
// as a utility method on each one.
|
||||
export const isProbablyNewsletter = async (html: string): Promise<boolean> => {
|
||||
const dom = parseHTML(html).document
|
||||
const domCopy = parseHTML(dom.documentElement.outerHTML).document
|
||||
const article = await new Readability(domCopy, {
|
||||
debug: false,
|
||||
keepTables: true,
|
||||
}).parse()
|
||||
|
||||
if (!article || !article.content) {
|
||||
return false
|
||||
}
|
||||
|
||||
// substack newsletter emails have tables with a *post-meta class
|
||||
if (dom.querySelector('table[class$="post-meta"]')) {
|
||||
return true
|
||||
}
|
||||
|
||||
// If the article has a header link, and substack icons its probably a newsletter
|
||||
const href = findNewsletterHeaderHref(dom)
|
||||
const heartIcon = dom.querySelector(
|
||||
'table tbody td span a img[src*="HeartIcon"]'
|
||||
)
|
||||
const recommendIcon = dom.querySelector(
|
||||
'table tbody td span a img[src*="RecommendIconRounded"]'
|
||||
)
|
||||
if (href && (heartIcon || recommendIcon)) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check if this is a beehiiv.net newsletter
|
||||
if (dom.querySelectorAll('img[src*="beehiiv.net"]').length > 0) {
|
||||
const beehiivUrl = beehiivNewsletterHref(dom)
|
||||
if (beehiivUrl) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Check if this is a newsletter from revue
|
||||
if (
|
||||
dom.querySelectorAll('img[src*="getrevue.co"], img[src*="revue.email"]')
|
||||
.length > 0
|
||||
) {
|
||||
const getrevueUrl = revueNewsletterHref(dom)
|
||||
if (getrevueUrl) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Check if this is a convertkit.com newsletter
|
||||
return (
|
||||
dom.querySelectorAll(
|
||||
'img[src*="convertkit.com"], img[src*="convertkit-mail.com"]'
|
||||
).length > 0
|
||||
)
|
||||
}
|
||||
|
||||
const beehiivNewsletterHref = (dom: Document): string | undefined => {
|
||||
const readOnline = dom.querySelectorAll('table tr td div a[class*="link"]')
|
||||
let res: string | undefined = undefined
|
||||
readOnline.forEach((e) => {
|
||||
if (e.textContent === 'Read Online') {
|
||||
res = e.getAttribute('href') || undefined
|
||||
}
|
||||
})
|
||||
return res
|
||||
}
|
||||
|
||||
const convertkitNewsletterHref = (dom: Document): string | undefined => {
|
||||
const readOnline = dom.querySelectorAll('table tr td a')
|
||||
let res: string | undefined = undefined
|
||||
readOnline.forEach((e) => {
|
||||
if (e.textContent === 'View this email in your browser') {
|
||||
res = e.getAttribute('href') || undefined
|
||||
}
|
||||
})
|
||||
return res
|
||||
}
|
||||
|
||||
const revueNewsletterHref = (dom: Document): string | undefined => {
|
||||
const viewOnline = dom.querySelectorAll('table tr td a[target="_blank"]')
|
||||
let res: string | undefined = undefined
|
||||
viewOnline.forEach((e) => {
|
||||
if (e.textContent === 'View online') {
|
||||
res = e.getAttribute('href') || undefined
|
||||
}
|
||||
})
|
||||
return res
|
||||
}
|
||||
|
||||
const findNewsletterHeaderHref = (dom: Document): string | undefined => {
|
||||
// Substack header links
|
||||
const postLink = dom.querySelector('h1 a ')
|
||||
if (postLink) {
|
||||
return postLink.getAttribute('href') || undefined
|
||||
}
|
||||
|
||||
// Check if this is a beehiiv.net newsletter
|
||||
const beehiiv = beehiivNewsletterHref(dom)
|
||||
if (beehiiv) {
|
||||
return beehiiv
|
||||
}
|
||||
|
||||
// Check if this is a revue newsletter
|
||||
const revue = revueNewsletterHref(dom)
|
||||
if (revue) {
|
||||
return revue
|
||||
}
|
||||
|
||||
// Check if this is a convertkit.com newsletter
|
||||
const convertkitUrl = convertkitNewsletterHref(dom)
|
||||
if (convertkitUrl) {
|
||||
return convertkitUrl
|
||||
}
|
||||
|
||||
return undefined
|
||||
}
|
||||
|
||||
// Given an HTML blob tries to find a URL to use for
|
||||
// a canonical URL.
|
||||
export const findNewsletterUrl = async (
|
||||
html: string
|
||||
): Promise<string | undefined> => {
|
||||
const dom = parseHTML(html).document
|
||||
|
||||
// Check if this is a substack newsletter
|
||||
const href = findNewsletterHeaderHref(dom)
|
||||
if (href) {
|
||||
// Try to make a HEAD request so we get the redirected URL, since these
|
||||
// will usually be behind tracking url redirects
|
||||
return axios({
|
||||
method: 'HEAD',
|
||||
url: href,
|
||||
})
|
||||
.then((res) => res.request.res.responseUrl as string | undefined)
|
||||
.catch((e) => href)
|
||||
}
|
||||
|
||||
return undefined
|
||||
}
|
||||
|
||||
export const isProbablyArticle = async (
|
||||
email: string,
|
||||
subject: string
|
||||
|
||||
Reference in New Issue
Block a user