diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts
new file mode 100644
index 000000000..581959543
--- /dev/null
+++ b/packages/text-to-speech/src/htmlToSsml.ts
@@ -0,0 +1,178 @@
+
+
+
+import { parseHTML } from 'linkedom'
+
+// this code needs to be kept in sync with the 
+// frontend code in: useReadingProgressAnchor
+
+const ANCHOR_ELEMENTS_BLOCKED_ATTRIBUTES = [
+  'omnivore-highlight-id',
+  'data-twitter-tweet-id',
+  'data-instagram-id',
+]
+
+function ssmlTagsForTopLevelElement(element: Element) {
+  // if (element.nodeName == 'BLOCKQUOTE') {
+  //   return {
+  //     opening: `<voice name="en-US-GuyNeural"><p>`,
+  //     closing: `</p></voice>`
+  //   }
+  // }
+  return {
+    opening: `<p>`,
+    closing: `</p>`
+  }
+}
+
+function parseDomTree(pageNode: Element) {
+  if (!pageNode || pageNode.childNodes.length == 0) {
+    console.log(' no child nodes found')
+    return []
+  }
+
+  const nodesToVisitStack = [pageNode]
+  const visitedNodeList = []
+
+  while (nodesToVisitStack.length > 0) {
+    const currentNode = nodesToVisitStack.pop()
+    if (
+      currentNode?.nodeType !== 1 /* Node.ELEMENT_NODE */ ||
+      // Avoiding dynamic elements from being counted as anchor-allowed elements
+      ANCHOR_ELEMENTS_BLOCKED_ATTRIBUTES.some((attrib) =>
+        currentNode.hasAttribute(attrib)
+      )
+    ) {
+      continue
+    }
+
+    visitedNodeList.push(currentNode)
+    ;[].slice
+      .call(currentNode.childNodes)
+      .reverse()
+      .forEach(function (node) {
+        nodesToVisitStack.push(node)
+      })
+  }
+
+  visitedNodeList.shift()
+  visitedNodeList.forEach((node, index) => {
+    // start from index 1, index 0 reserved for anchor unknown.
+    node.setAttribute('data-omnivore-anchor-idx', (index + 1).toString())
+  })
+  return visitedNodeList
+}
+
+function emit(textItems: string[], text: string) {
+  textItems.push(text)
+}
+
+function cleanTextNode(textNode: ChildNode): String {
+  return (textNode.textContent ?? '').replace(/\s+/g, ' ')
+}
+
+function emitTextNode(textItems: string[], cleanedText: String, textNode: ChildNode) {
+  const ssmlElement = textNode.parentNode?.nodeName === 'B' ? "emphasis" : undefined
+  if (!cleanedText) { return }
+
+  if (ssmlElement) {
+    emit(textItems, `<${ssmlElement}>`)
+  }
+  emit(textItems, `${cleanedText}`)
+  if (ssmlElement) {
+    emit(textItems, `</${ssmlElement}>`)
+  }
+}
+
+function emitElement(textItems: string[], element: Element, isTopLevel: Boolean) {
+  const SKIP_TAGS = ['SCRIPT', 'STYLE', 'IMG', 'FIGURE', 'FIGCAPTION', 'IFRAME']
+  
+  const topLevelTags = ssmlTagsForTopLevelElement(element)
+  const idx = element.getAttribute('data-omnivore-anchor-idx')
+  var maxVisitedIdx = Number(idx)
+
+  if (isTopLevel) {
+    emit(textItems, topLevelTags.opening)
+  }
+
+  for (const child of Array.from(element.childNodes)) {
+    if (SKIP_TAGS.indexOf(child.nodeName) >= 0) {
+      continue
+    }
+
+    if (child.nodeType == 3 /* Node.TEXT_NODE */ && (child.textContent?.length ?? 0) > 0 ) {
+      const cleanedText = cleanTextNode(child)
+      if (cleanedText.length > 1) { // Make sure its more than just a space
+        emit(textItems, `<bookmark mark="${idx}" />`)
+      }
+      emitTextNode(textItems, cleanedText, child)
+    }
+    if (child.nodeType == 1 /* Node.ELEMENT_NODE */) {
+      maxVisitedIdx = emitElement(textItems, child as HTMLElement, false)
+    }
+  }
+
+  if (isTopLevel) {
+    emit(textItems, topLevelTags.closing)
+  }
+
+  return Number(maxVisitedIdx)
+}
+
+export type SSMLItem = {
+  open: string
+  close: string
+  textItems: string[]
+}
+
+export type VoiceOptions = {
+  primary: string
+  secondary: string
+}
+
+const startSsml = (element: Element, voices: VoiceOptions): string => {
+  const voice = element.nodeName === 'BLOCKQUOTE' ? voices.secondary : voices.primary
+  return `
+<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US"><voice name="${voice}"><prosody rate="0%" pitch="0%">
+  `
+}
+
+const endSsml = (): string => {
+  return `</prosody></voice></speak>`
+}
+
+export const ssmlItemText = (item: SSMLItem): string => {
+  return [
+    item.open,
+    ...item.textItems,
+    item.close
+  ].join('')
+}
+
+export const htmlToSsml = (html: string, voices: { primary: string, secondary: string}): SSMLItem[] => {
+  const dom = parseHTML(html)
+  var body = dom.document.querySelector('#readability-page-1')
+  if (!body) {
+    throw new Error('Unable to parse HTML document')
+  }
+
+  var parsedNodes = parseDomTree(body)
+  if (parsedNodes.length < 1) {
+    throw new Error('No HTML nodes found')
+  }
+
+  const items: SSMLItem[] = []
+  for (var i = 1; i < parsedNodes.length + 1; i++) {
+    var textItems: string[] = []
+    const node = parsedNodes[i - 1]
+
+    i = emitElement(textItems, node, true)
+    items.push({
+      open: startSsml(node, voices),
+      close: endSsml(),
+      textItems: textItems,
+    })
+  }
+
+  return items
+}
diff --git a/packages/text-to-speech/test/htmlToSsml.test.ts b/packages/text-to-speech/test/htmlToSsml.test.ts
new file mode 100644
index 000000000..424de5d9d
--- /dev/null
+++ b/packages/text-to-speech/test/htmlToSsml.test.ts
@@ -0,0 +1,75 @@
+import 'mocha'
+import { expect } from 'chai'
+import { htmlToSsml } from '../src/htmlToSsml'
+
+describe('htmlToSsml', () => {
+  const TEST_VOCIES = { primary: 'test-primary', secondary: 'test-secondary' }
+
+  describe('a simple html file', () => {
+    it('should convert Html to SSML', async () => {
+      const ssml = htmlToSsml(`
+        <div class="page" id="readability-page-1">
+          <p data-omnivore-anchor-idx="1">this is some text</p>
+        </div>
+      `, TEST_VOCIES
+      )
+      const text = ssml[0].textItems.join('').trim()
+      expect(text).to.equal(
+        `<p><bookmark mark="1" />this is some text</p>`
+      )
+    })
+  })
+  describe('a file with nested elements', () => {
+    it('should convert Html to SSML', async () => {
+      const ssml = htmlToSsml(`
+<div class="page" id="readability-page-1">
+<p>
+this is in the first paragraph
+<span>this is in the second span</span>
+this is also in the first paragraph
+</p>
+</div>
+      `, TEST_VOCIES
+      )
+      const text = ssml[0].textItems.join('').trim()
+      expect(text).to.equal(
+        `<p><bookmark mark="1" /> this is in the first paragraph <bookmark mark="2" />this is in the second span<bookmark mark="1" /> this is also in the first paragraph </p>`.trim()
+      )
+    })
+  })
+  describe('a file with blockquotes', () => {
+    it('should convert Html to SSML with complimentary voices', async () => {
+      const ssml = htmlToSsml(`
+<div class="page" id="readability-page-1">
+<p>first</p>
+<blockquote>second</blockquote>
+<p>third</p>
+</div>
+      `, TEST_VOCIES
+      )
+      const first = ssml[0].textItems.join('').trim()
+      const second = ssml[1].textItems.join('').trim()
+      const third = ssml[2].textItems.join('').trim()
+
+      expect(first).to.equal(
+        `<p><bookmark mark="1" />first</p>`
+      )
+      expect(second).to.equal(
+        `<p><bookmark mark="2" />second</p>`
+      )
+      expect(third).to.equal(
+        `<p><bookmark mark="3" />third</p>`
+      )
+
+      expect(ssml[0].open.trim()).to.equal(
+        `<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US"><voice name="test-primary"><prosody rate="0%" pitch="0%">`
+      )
+      expect(ssml[1].open.trim()).to.equal(
+        `<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US"><voice name="test-secondary"><prosody rate="0%" pitch="0%">`
+      )
+      expect(ssml[2].open.trim()).to.equal(
+        `<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US"><voice name="test-primary"><prosody rate="0%" pitch="0%">`
+      )
+    })
+  })
+})