Fix parsing authors from page metadata
This commit is contained in:
@ -374,7 +374,7 @@ export const parsePageMetadata = (html: string): Metadata | undefined => {
|
||||
|
||||
const author =
|
||||
window.document
|
||||
.querySelector("head meta[property='author']")
|
||||
.querySelector("head meta[name='author']")
|
||||
?.getAttribute('content') || undefined
|
||||
|
||||
// TODO: we should be able to apply the JSONLD metadata
|
||||
|
||||
@ -3,7 +3,7 @@ import { expect } from 'chai'
|
||||
import 'chai/register-should'
|
||||
import { JSDOM } from 'jsdom'
|
||||
import fs from 'fs'
|
||||
import { findNewsletterUrl, isProbablyNewsletter } from '../../src/utils/parser'
|
||||
import { findNewsletterUrl, isProbablyNewsletter, parsePageMetadata } from '../../src/utils/parser'
|
||||
|
||||
const load = (path: string): string => {
|
||||
return fs.readFileSync(path, 'utf8')
|
||||
@ -38,3 +38,14 @@ describe('findNewsletterUrl', async () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe('parseMetadata', async () => {
|
||||
it('gets author, title, image, description', async () => {
|
||||
const html = load('./test/utils/data/substack-post.html')
|
||||
const metadata = await parsePageMetadata(html)
|
||||
expect(metadata?.author).to.deep.equal('Omnivore')
|
||||
expect(metadata?.title).to.deep.equal('Code Block Syntax Highlighting')
|
||||
expect(metadata?.previewImage).to.deep.equal('https://cdn.substack.com/image/fetch/w_1200,h_600,c_fill,f_jpg,q_auto:good,fl_progressive:steep,g_auto/https%3A%2F%2Fbucketeer-e05bbc84-baa3-437e-9518-adb32be77984.s3.amazonaws.com%2Fpublic%2Fimages%2F2ab1f7e8-2ca7-4011-8ccb-43d0b3bd244f_1490x2020.png')
|
||||
expect(metadata?.description).to.deep.equal('Highlighted <code> in Omnivore')
|
||||
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user