diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts
new file mode 100644
index 000000000..581959543
--- /dev/null
+++ b/packages/text-to-speech/src/htmlToSsml.ts
@@ -0,0 +1,178 @@
+
+
+
+import { parseHTML } from 'linkedom'
+
+// this code needs to be kept in sync with the
+// frontend code in: useReadingProgressAnchor
+
+const ANCHOR_ELEMENTS_BLOCKED_ATTRIBUTES = [
+ 'omnivore-highlight-id',
+ 'data-twitter-tweet-id',
+ 'data-instagram-id',
+]
+
+function ssmlTagsForTopLevelElement(element: Element) {
+ // if (element.nodeName == 'BLOCKQUOTE') {
+ // return {
+ // opening: ` `,
+ // closing: `
`, + closing: `
` + } +} + +function parseDomTree(pageNode: Element) { + if (!pageNode || pageNode.childNodes.length == 0) { + console.log(' no child nodes found') + return [] + } + + const nodesToVisitStack = [pageNode] + const visitedNodeList = [] + + while (nodesToVisitStack.length > 0) { + const currentNode = nodesToVisitStack.pop() + if ( + currentNode?.nodeType !== 1 /* Node.ELEMENT_NODE */ || + // Avoiding dynamic elements from being counted as anchor-allowed elements + ANCHOR_ELEMENTS_BLOCKED_ATTRIBUTES.some((attrib) => + currentNode.hasAttribute(attrib) + ) + ) { + continue + } + + visitedNodeList.push(currentNode) + ;[].slice + .call(currentNode.childNodes) + .reverse() + .forEach(function (node) { + nodesToVisitStack.push(node) + }) + } + + visitedNodeList.shift() + visitedNodeList.forEach((node, index) => { + // start from index 1, index 0 reserved for anchor unknown. + node.setAttribute('data-omnivore-anchor-idx', (index + 1).toString()) + }) + return visitedNodeList +} + +function emit(textItems: string[], text: string) { + textItems.push(text) +} + +function cleanTextNode(textNode: ChildNode): String { + return (textNode.textContent ?? '').replace(/\s+/g, ' ') +} + +function emitTextNode(textItems: string[], cleanedText: String, textNode: ChildNode) { + const ssmlElement = textNode.parentNode?.nodeName === 'B' ? "emphasis" : undefined + if (!cleanedText) { return } + + if (ssmlElement) { + emit(textItems, `<${ssmlElement}>`) + } + emit(textItems, `${cleanedText}`) + if (ssmlElement) { + emit(textItems, `${ssmlElement}>`) + } +} + +function emitElement(textItems: string[], element: Element, isTopLevel: Boolean) { + const SKIP_TAGS = ['SCRIPT', 'STYLE', 'IMG', 'FIGURE', 'FIGCAPTION', 'IFRAME'] + + const topLevelTags = ssmlTagsForTopLevelElement(element) + const idx = element.getAttribute('data-omnivore-anchor-idx') + var maxVisitedIdx = Number(idx) + + if (isTopLevel) { + emit(textItems, topLevelTags.opening) + } + + for (const child of Array.from(element.childNodes)) { + if (SKIP_TAGS.indexOf(child.nodeName) >= 0) { + continue + } + + if (child.nodeType == 3 /* Node.TEXT_NODE */ && (child.textContent?.length ?? 0) > 0 ) { + const cleanedText = cleanTextNode(child) + if (cleanedText.length > 1) { // Make sure its more than just a space + emit(textItems, `this is some text
++this is in the first paragraph +this is in the second span +this is also in the first paragraph +
+first
+second+
third
+