Add a new HTML to SSML function
This commit is contained in:
178
packages/text-to-speech/src/htmlToSsml.ts
Normal file
178
packages/text-to-speech/src/htmlToSsml.ts
Normal file
@ -0,0 +1,178 @@
|
||||
|
||||
|
||||
|
||||
import { parseHTML } from 'linkedom'
|
||||
|
||||
// this code needs to be kept in sync with the
|
||||
// frontend code in: useReadingProgressAnchor
|
||||
|
||||
const ANCHOR_ELEMENTS_BLOCKED_ATTRIBUTES = [
|
||||
'omnivore-highlight-id',
|
||||
'data-twitter-tweet-id',
|
||||
'data-instagram-id',
|
||||
]
|
||||
|
||||
function ssmlTagsForTopLevelElement(element: Element) {
|
||||
// if (element.nodeName == 'BLOCKQUOTE') {
|
||||
// return {
|
||||
// opening: `<voice name="en-US-GuyNeural"><p>`,
|
||||
// closing: `</p></voice>`
|
||||
// }
|
||||
// }
|
||||
return {
|
||||
opening: `<p>`,
|
||||
closing: `</p>`
|
||||
}
|
||||
}
|
||||
|
||||
function parseDomTree(pageNode: Element) {
|
||||
if (!pageNode || pageNode.childNodes.length == 0) {
|
||||
console.log(' no child nodes found')
|
||||
return []
|
||||
}
|
||||
|
||||
const nodesToVisitStack = [pageNode]
|
||||
const visitedNodeList = []
|
||||
|
||||
while (nodesToVisitStack.length > 0) {
|
||||
const currentNode = nodesToVisitStack.pop()
|
||||
if (
|
||||
currentNode?.nodeType !== 1 /* Node.ELEMENT_NODE */ ||
|
||||
// Avoiding dynamic elements from being counted as anchor-allowed elements
|
||||
ANCHOR_ELEMENTS_BLOCKED_ATTRIBUTES.some((attrib) =>
|
||||
currentNode.hasAttribute(attrib)
|
||||
)
|
||||
) {
|
||||
continue
|
||||
}
|
||||
|
||||
visitedNodeList.push(currentNode)
|
||||
;[].slice
|
||||
.call(currentNode.childNodes)
|
||||
.reverse()
|
||||
.forEach(function (node) {
|
||||
nodesToVisitStack.push(node)
|
||||
})
|
||||
}
|
||||
|
||||
visitedNodeList.shift()
|
||||
visitedNodeList.forEach((node, index) => {
|
||||
// start from index 1, index 0 reserved for anchor unknown.
|
||||
node.setAttribute('data-omnivore-anchor-idx', (index + 1).toString())
|
||||
})
|
||||
return visitedNodeList
|
||||
}
|
||||
|
||||
function emit(textItems: string[], text: string) {
|
||||
textItems.push(text)
|
||||
}
|
||||
|
||||
function cleanTextNode(textNode: ChildNode): String {
|
||||
return (textNode.textContent ?? '').replace(/\s+/g, ' ')
|
||||
}
|
||||
|
||||
function emitTextNode(textItems: string[], cleanedText: String, textNode: ChildNode) {
|
||||
const ssmlElement = textNode.parentNode?.nodeName === 'B' ? "emphasis" : undefined
|
||||
if (!cleanedText) { return }
|
||||
|
||||
if (ssmlElement) {
|
||||
emit(textItems, `<${ssmlElement}>`)
|
||||
}
|
||||
emit(textItems, `${cleanedText}`)
|
||||
if (ssmlElement) {
|
||||
emit(textItems, `</${ssmlElement}>`)
|
||||
}
|
||||
}
|
||||
|
||||
function emitElement(textItems: string[], element: Element, isTopLevel: Boolean) {
|
||||
const SKIP_TAGS = ['SCRIPT', 'STYLE', 'IMG', 'FIGURE', 'FIGCAPTION', 'IFRAME']
|
||||
|
||||
const topLevelTags = ssmlTagsForTopLevelElement(element)
|
||||
const idx = element.getAttribute('data-omnivore-anchor-idx')
|
||||
var maxVisitedIdx = Number(idx)
|
||||
|
||||
if (isTopLevel) {
|
||||
emit(textItems, topLevelTags.opening)
|
||||
}
|
||||
|
||||
for (const child of Array.from(element.childNodes)) {
|
||||
if (SKIP_TAGS.indexOf(child.nodeName) >= 0) {
|
||||
continue
|
||||
}
|
||||
|
||||
if (child.nodeType == 3 /* Node.TEXT_NODE */ && (child.textContent?.length ?? 0) > 0 ) {
|
||||
const cleanedText = cleanTextNode(child)
|
||||
if (cleanedText.length > 1) { // Make sure its more than just a space
|
||||
emit(textItems, `<bookmark mark="${idx}" />`)
|
||||
}
|
||||
emitTextNode(textItems, cleanedText, child)
|
||||
}
|
||||
if (child.nodeType == 1 /* Node.ELEMENT_NODE */) {
|
||||
maxVisitedIdx = emitElement(textItems, child as HTMLElement, false)
|
||||
}
|
||||
}
|
||||
|
||||
if (isTopLevel) {
|
||||
emit(textItems, topLevelTags.closing)
|
||||
}
|
||||
|
||||
return Number(maxVisitedIdx)
|
||||
}
|
||||
|
||||
export type SSMLItem = {
|
||||
open: string
|
||||
close: string
|
||||
textItems: string[]
|
||||
}
|
||||
|
||||
export type VoiceOptions = {
|
||||
primary: string
|
||||
secondary: string
|
||||
}
|
||||
|
||||
const startSsml = (element: Element, voices: VoiceOptions): string => {
|
||||
const voice = element.nodeName === 'BLOCKQUOTE' ? voices.secondary : voices.primary
|
||||
return `
|
||||
<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US"><voice name="${voice}"><prosody rate="0%" pitch="0%">
|
||||
`
|
||||
}
|
||||
|
||||
const endSsml = (): string => {
|
||||
return `</prosody></voice></speak>`
|
||||
}
|
||||
|
||||
export const ssmlItemText = (item: SSMLItem): string => {
|
||||
return [
|
||||
item.open,
|
||||
...item.textItems,
|
||||
item.close
|
||||
].join('')
|
||||
}
|
||||
|
||||
export const htmlToSsml = (html: string, voices: { primary: string, secondary: string}): SSMLItem[] => {
|
||||
const dom = parseHTML(html)
|
||||
var body = dom.document.querySelector('#readability-page-1')
|
||||
if (!body) {
|
||||
throw new Error('Unable to parse HTML document')
|
||||
}
|
||||
|
||||
var parsedNodes = parseDomTree(body)
|
||||
if (parsedNodes.length < 1) {
|
||||
throw new Error('No HTML nodes found')
|
||||
}
|
||||
|
||||
const items: SSMLItem[] = []
|
||||
for (var i = 1; i < parsedNodes.length + 1; i++) {
|
||||
var textItems: string[] = []
|
||||
const node = parsedNodes[i - 1]
|
||||
|
||||
i = emitElement(textItems, node, true)
|
||||
items.push({
|
||||
open: startSsml(node, voices),
|
||||
close: endSsml(),
|
||||
textItems: textItems,
|
||||
})
|
||||
}
|
||||
|
||||
return items
|
||||
}
|
||||
75
packages/text-to-speech/test/htmlToSsml.test.ts
Normal file
75
packages/text-to-speech/test/htmlToSsml.test.ts
Normal file
@ -0,0 +1,75 @@
|
||||
import 'mocha'
|
||||
import { expect } from 'chai'
|
||||
import { htmlToSsml } from '../src/htmlToSsml'
|
||||
|
||||
describe('htmlToSsml', () => {
|
||||
const TEST_VOCIES = { primary: 'test-primary', secondary: 'test-secondary' }
|
||||
|
||||
describe('a simple html file', () => {
|
||||
it('should convert Html to SSML', async () => {
|
||||
const ssml = htmlToSsml(`
|
||||
<div class="page" id="readability-page-1">
|
||||
<p data-omnivore-anchor-idx="1">this is some text</p>
|
||||
</div>
|
||||
`, TEST_VOCIES
|
||||
)
|
||||
const text = ssml[0].textItems.join('').trim()
|
||||
expect(text).to.equal(
|
||||
`<p><bookmark mark="1" />this is some text</p>`
|
||||
)
|
||||
})
|
||||
})
|
||||
describe('a file with nested elements', () => {
|
||||
it('should convert Html to SSML', async () => {
|
||||
const ssml = htmlToSsml(`
|
||||
<div class="page" id="readability-page-1">
|
||||
<p>
|
||||
this is in the first paragraph
|
||||
<span>this is in the second span</span>
|
||||
this is also in the first paragraph
|
||||
</p>
|
||||
</div>
|
||||
`, TEST_VOCIES
|
||||
)
|
||||
const text = ssml[0].textItems.join('').trim()
|
||||
expect(text).to.equal(
|
||||
`<p><bookmark mark="1" /> this is in the first paragraph <bookmark mark="2" />this is in the second span<bookmark mark="1" /> this is also in the first paragraph </p>`.trim()
|
||||
)
|
||||
})
|
||||
})
|
||||
describe('a file with blockquotes', () => {
|
||||
it('should convert Html to SSML with complimentary voices', async () => {
|
||||
const ssml = htmlToSsml(`
|
||||
<div class="page" id="readability-page-1">
|
||||
<p>first</p>
|
||||
<blockquote>second</blockquote>
|
||||
<p>third</p>
|
||||
</div>
|
||||
`, TEST_VOCIES
|
||||
)
|
||||
const first = ssml[0].textItems.join('').trim()
|
||||
const second = ssml[1].textItems.join('').trim()
|
||||
const third = ssml[2].textItems.join('').trim()
|
||||
|
||||
expect(first).to.equal(
|
||||
`<p><bookmark mark="1" />first</p>`
|
||||
)
|
||||
expect(second).to.equal(
|
||||
`<p><bookmark mark="2" />second</p>`
|
||||
)
|
||||
expect(third).to.equal(
|
||||
`<p><bookmark mark="3" />third</p>`
|
||||
)
|
||||
|
||||
expect(ssml[0].open.trim()).to.equal(
|
||||
`<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US"><voice name="test-primary"><prosody rate="0%" pitch="0%">`
|
||||
)
|
||||
expect(ssml[1].open.trim()).to.equal(
|
||||
`<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US"><voice name="test-secondary"><prosody rate="0%" pitch="0%">`
|
||||
)
|
||||
expect(ssml[2].open.trim()).to.equal(
|
||||
`<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US"><voice name="test-primary"><prosody rate="0%" pitch="0%">`
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user