From 8c8734d15327fc47bcc2ddfe219398a0a121ecb3 Mon Sep 17 00:00:00 2001
From: Hongbo Wu <hongbo@omnivore.app>
Date: Mon, 29 Aug 2022 22:09:09 +0800
Subject: [PATCH 1/5] Re-enable auto synthesis in the backend

---
 packages/api/src/services/speech.ts | 61 ++++++++++++++---------------
 1 file changed, 30 insertions(+), 31 deletions(-)
diff --git a/packages/api/src/services/speech.ts b/packages/api/src/services/speech.ts
index f127fa39e..8d390c374 100644
--- a/packages/api/src/services/speech.ts
+++ b/packages/api/src/services/speech.ts
@@ -21,39 +21,38 @@ export const shouldSynthesize = async (
   userId: string,
   page: Page
 ): Promise<boolean> => {
-  return Promise.resolve(false)
-  // if (page.pageType === PageType.File || !page.content) {
-  //   // we don't synthesize files for now
-  //   return false
-  // }
+  if (page.pageType === PageType.File || !page.content) {
+    // we don't synthesize files for now
+    return false
+  }
 
-  // if (process.env.TEXT_TO_SPEECH_BETA_TEST) {
-  //   return true
-  // }
+  if (process.env.TEXT_TO_SPEECH_BETA_TEST) {
+    return true
+  }
 
-  // const [recentListenedPage, count] = (await searchPages(
-  //   {
-  //     dateFilters: [
-  //       {
-  //         field: 'listenedAt',
-  //         startDate: new Date(Date.now() - 30 * 24 * 60 * 60 * 1000),
-  //       },
-  //     ],
-  //     sort: {
-  //       by: SortBy.LISTENED,
-  //       order: SortOrder.DESCENDING,
-  //     },
-  //     size: 1,
-  //   },
-  //   userId
-  // )) || [[], 0]
-  // if (count === 0) {
-  //   return false
-  // }
-  // return (
-  //   !!recentListenedPage[0].listenedAt &&
-  //   page.savedAt < recentListenedPage[0].listenedAt
-  // )
+  const [recentListenedPage, count] = (await searchPages(
+    {
+      dateFilters: [
+        {
+          field: 'listenedAt',
+          startDate: new Date(Date.now() - 30 * 24 * 60 * 60 * 1000),
+        },
+      ],
+      sort: {
+        by: SortBy.LISTENED,
+        order: SortOrder.DESCENDING,
+      },
+      size: 1,
+    },
+    userId
+  )) || [[], 0]
+  if (count === 0) {
+    return false
+  }
+  return (
+    !!recentListenedPage[0].listenedAt &&
+    page.savedAt < recentListenedPage[0].listenedAt
+  )
 }
 
 export const synthesize = async (page: Page, speech: Speech): Promise<void> => {

From d085c86bb60077ef41c46ccd27f283bb3fff3fde Mon Sep 17 00:00:00 2001
From: Hongbo Wu <hongbo@omnivore.app>
Date: Mon, 29 Aug 2022 22:23:07 +0800
Subject: [PATCH 2/5] Enqueue text to speech tasks

---
 packages/api/src/routers/text_to_speech.ts | 15 ++++++--
 packages/api/src/services/speech.ts        | 45 ----------------------
 2 files changed, 12 insertions(+), 48 deletions(-)

diff --git a/packages/api/src/routers/text_to_speech.ts b/packages/api/src/routers/text_to_speech.ts
index 70e636e0b..dab71ac4e 100644
--- a/packages/api/src/routers/text_to_speech.ts
+++ b/packages/api/src/routers/text_to_speech.ts
@@ -9,9 +9,10 @@ import { getPageById } from '../elastic/pages'
 import { Speech, SpeechState } from '../entity/speech'
 import { buildLogger } from '../utils/logger'
 import { getClaimsByToken } from '../utils/auth'
-import { shouldSynthesize, synthesize } from '../services/speech'
+import { shouldSynthesize } from '../services/speech'
 import { readPushSubscription } from '../datalayer/pubsub'
 import { AppDataSource } from '../server'
+import { enqueueTextToSpeech } from '../utils/createTask'
 
 const logger = buildLogger('app.dispatch')
 
@@ -62,8 +63,16 @@ export function textToSpeechRouter() {
           state: SpeechState.INITIALIZED,
           voice: 'en-US-JennyNeural',
         })
-        await synthesize(page, speech)
-        logger.info('page synthesized')
+        // enqueue a task to convert text to speech
+        const taskName = await enqueueTextToSpeech({
+          userId,
+          speechId: speech.id,
+          text: page.content,
+          voice: speech.voice,
+          priority: 'low',
+        })
+        logger.info('Start Text to speech task', { taskName })
+        return res.status(202).send('Text to speech task started')
       }
 
       res.status(200).send('Page should not synthesize')
diff --git a/packages/api/src/services/speech.ts b/packages/api/src/services/speech.ts
index 8d390c374..380a14a36 100644
--- a/packages/api/src/services/speech.ts
+++ b/packages/api/src/services/speech.ts
@@ -1,16 +1,6 @@
-import { getRepository } from '../entity/utils'
-import { Speech, SpeechState } from '../entity/speech'
 import { searchPages } from '../elastic/pages'
 import { Page, PageType } from '../elastic/types'
 import { SortBy, SortOrder } from '../utils/search'
-import { synthesizeTextToSpeech } from '../utils/textToSpeech'
-
-export const setSpeechFailure = async (id: string) => {
-  // update state
-  await getRepository(Speech).update(id, {
-    state: SpeechState.FAILED,
-  })
-}
 
 /*
  * We should not synthesize the page when:
@@ -54,38 +44,3 @@ export const shouldSynthesize = async (
     page.savedAt < recentListenedPage[0].listenedAt
   )
 }
-
-export const synthesize = async (page: Page, speech: Speech): Promise<void> => {
-  try {
-    if (page.pageType === PageType.File || !page.content) {
-      // we don't synthesize files for now
-      return
-    }
-
-    console.log('Start synthesizing', { pageId: page.id, speechId: speech.id })
-    const startTime = Date.now()
-    const speechOutput = await synthesizeTextToSpeech({
-      id: speech.id,
-      text: page.content,
-      languageCode: page.language,
-      voice: speech.voice,
-      textType: 'ssml',
-    })
-    console.log('Synthesized article', {
-      audioFileName: speechOutput.audioFileName,
-      speechMarksFileName: speechOutput.speechMarksFileName,
-      duration: Date.now() - startTime,
-    })
-
-    // set state to completed
-    await getRepository(Speech).update(speech.id, {
-      audioFileName: speechOutput.audioFileName,
-      speechMarksFileName: speechOutput.speechMarksFileName,
-      state: SpeechState.COMPLETED,
-    })
-  } catch (error) {
-    console.log('Error synthesize article', error)
-    await setSpeechFailure(speech.id)
-    throw error
-  }
-}

From 7353c328d974647cd34790a999782f9886cbb8f3 Mon Sep 17 00:00:00 2001
From: Hongbo Wu <hongbo@omnivore.app>
Date: Mon, 29 Aug 2022 22:29:40 +0800
Subject: [PATCH 3/5] Remove unused text-to-speech code

---
 packages/api/src/utils/textToSpeech.ts        | 334 ------------------
 .../api/test/utils/data/text-to-speech.html   |   1 -
 packages/api/test/utils/textToSpeech.test.ts  |  44 ---
 3 files changed, 379 deletions(-)
 delete mode 100644 packages/api/src/utils/textToSpeech.ts
 delete mode 100644 packages/api/test/utils/data/text-to-speech.html
 delete mode 100644 packages/api/test/utils/textToSpeech.test.ts

diff --git a/packages/api/src/utils/textToSpeech.ts b/packages/api/src/utils/textToSpeech.ts
deleted file mode 100644
index c0327ab63..000000000
--- a/packages/api/src/utils/textToSpeech.ts
+++ /dev/null
@@ -1,334 +0,0 @@
-import { buildLogger } from './logger'
-import { createGCSFile, uploadToBucket } from './uploads'
-import {
-  CancellationDetails,
-  CancellationReason,
-  ResultReason,
-  SpeechConfig,
-  SpeechSynthesisOutputFormat,
-  SpeechSynthesisResult,
-  SpeechSynthesizer,
-} from 'microsoft-cognitiveservices-speech-sdk'
-import { env } from '../env'
-import { parseHTML } from 'linkedom'
-
-export interface TextToSpeechInput {
-  id: string
-  text: string
-  voice?: string
-  languageCode?: string
-  textType?: 'text' | 'ssml'
-  rate?: number
-  volume?: number
-  complimentaryVoice?: string
-}
-
-export interface TextToSpeechOutput {
-  audioFileName: string
-  speechMarksFileName: string
-}
-
-export interface SpeechMark {
-  time: number
-  start?: number
-  length?: number
-  word: string
-  type: 'word' | 'bookmark'
-}
-
-const logger = buildLogger('app.dispatch')
-
-export const synthesizeTextToSpeech = async (
-  input: TextToSpeechInput
-): Promise<TextToSpeechOutput> => {
-  const audioFileName = `speech/${input.id}.mp3`
-  const audioFile = createGCSFile(audioFileName)
-  const writeStream = audioFile.createWriteStream({
-    resumable: true,
-  })
-  const speechConfig = SpeechConfig.fromSubscription(
-    env.azure.speechKey,
-    env.azure.speechRegion
-  )
-  const textType = input.textType || 'text'
-  if (textType === 'text') {
-    speechConfig.speechSynthesisLanguage = input.languageCode || 'en-US'
-    speechConfig.speechSynthesisVoiceName = input.voice || 'en-US-JennyNeural'
-  }
-  speechConfig.speechSynthesisOutputFormat =
-    SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3
-
-  // Create the speech synthesizer.
-  const synthesizer = new SpeechSynthesizer(speechConfig)
-  const speechMarks: SpeechMark[] = []
-  let timeOffset = 0
-  let characterOffset = 0
-
-  synthesizer.synthesizing = function (s, e) {
-    // convert arrayBuffer to stream and write to gcs file
-    writeStream.write(Buffer.from(e.result.audioData))
-  }
-
-  // The event synthesis completed signals that the synthesis is completed.
-  synthesizer.synthesisCompleted = (s, e) => {
-    logger.info(
-      `(synthesized) Reason: ${ResultReason[e.result.reason]} Audio length: ${
-        e.result.audioData.byteLength
-      }`
-    )
-  }
-
-  // The synthesis started event signals that the synthesis is started.
-  synthesizer.synthesisStarted = (s, e) => {
-    logger.info('(synthesis started)')
-  }
-
-  // The event signals that the service has stopped processing speech.
-  // This can happen when an error is encountered.
-  synthesizer.SynthesisCanceled = (s, e) => {
-    const cancellationDetails = CancellationDetails.fromResult(e.result)
-    let str =
-      '(cancel) Reason: ' + CancellationReason[cancellationDetails.reason]
-    if (cancellationDetails.reason === CancellationReason.Error) {
-      str += ': ' + e.result.errorDetails
-    }
-    logger.info(str)
-  }
-
-  // The unit of e.audioOffset is tick (1 tick = 100 nanoseconds), divide by 10,000 to convert to milliseconds.
-  synthesizer.wordBoundary = (s, e) => {
-    speechMarks.push({
-      word: e.text,
-      time: (timeOffset + e.audioOffset) / 10000,
-      start: characterOffset + e.textOffset,
-      length: e.wordLength,
-      type: 'word',
-    })
-  }
-
-  synthesizer.bookmarkReached = (s, e) => {
-    logger.debug(
-      `(Bookmark reached), Audio offset: ${
-        e.audioOffset / 10000
-      }ms, bookmark text: ${e.text}`
-    )
-    speechMarks.push({
-      word: e.text,
-      time: (timeOffset + e.audioOffset) / 10000,
-      type: 'bookmark',
-    })
-  }
-
-  const speakTextAsyncPromise = (
-    text: string
-  ): Promise<SpeechSynthesisResult> => {
-    return new Promise((resolve, reject) => {
-      synthesizer.speakTextAsync(
-        text,
-        (result) => {
-          resolve(result)
-        },
-        (error) => {
-          reject(error)
-        }
-      )
-    })
-  }
-
-  const speakSsmlAsyncPromise = (
-    text: string
-  ): Promise<SpeechSynthesisResult> => {
-    return new Promise((resolve, reject) => {
-      synthesizer.speakSsmlAsync(
-        text,
-        (result) => {
-          resolve(result)
-        },
-        (error) => {
-          reject(error)
-        }
-      )
-    })
-  }
-
-  if (textType === 'text') {
-    // slice the text into chunks of 5,000 characters
-    let currentTextChunk = ''
-    const textChunks = input.text.split('\n')
-    for (let i = 0; i < textChunks.length; i++) {
-      currentTextChunk += textChunks[i] + '\n'
-      if (currentTextChunk.length < 5000 && i < textChunks.length - 1) {
-        continue
-      }
-      logger.debug(`synthesizing ${currentTextChunk}`)
-      const result = await speakTextAsyncPromise(currentTextChunk)
-      timeOffset = timeOffset + result.audioDuration
-      characterOffset = characterOffset + currentTextChunk.length
-      currentTextChunk = ''
-    }
-  } else {
-    const document = parseHTML(input.text).document
-    const elements = document.querySelectorAll(
-      'h1, h2, h3, p, ul, ol, blockquote'
-    )
-    // convert html elements to the ssml document
-    for (const e of Array.from(elements)) {
-      const htmlElement = e as HTMLElement
-      if (htmlElement.innerText) {
-        // use complimentary voice for blockquote, hardcoded for now
-        const voice =
-          htmlElement.tagName.toLowerCase() === 'blockquote'
-            ? input.complimentaryVoice || 'en-US-AriaNeural'
-            : input.voice
-        const ssml = htmlElementToSsml({
-          htmlElement: e,
-          language: input.languageCode,
-          rate: input.rate,
-          volume: input.volume,
-          voice,
-        })
-        logger.debug(`synthesizing ${ssml}`)
-        const result = await speakSsmlAsyncPromise(ssml)
-        // if (result.reason === ResultReason.Canceled) {
-        //   synthesizer.close()
-        //   throw new Error(result.errorDetails)
-        // }
-        timeOffset = timeOffset + result.audioDuration
-        // characterOffset = characterOffset + htmlElement.innerText.length
-      }
-    }
-  }
-  writeStream.end()
-  synthesizer.close()
-
-  logger.debug(`audio file: ${audioFileName}`)
-
-  // upload Speech Marks file to GCS
-  const speechMarksFileName = `speech/${input.id}.json`
-  await uploadToBucket(
-    speechMarksFileName,
-    Buffer.from(JSON.stringify(speechMarks))
-  )
-
-  return {
-    audioFileName,
-    speechMarksFileName,
-  }
-}
-
-export const htmlElementToSsml = ({
-  htmlElement,
-  language = 'en-US',
-  voice = 'en-US-JennyNeural',
-  rate = 1,
-  volume = 100,
-}: {
-  htmlElement: Element
-  language?: string
-  voice?: string
-  rate?: number
-  volume?: number
-}): string => {
-  const replaceElement = (newElement: Element, oldElement: Element) => {
-    const id = oldElement.getAttribute('data-omnivore-anchor-idx')
-    if (id) {
-      const e = htmlElement.querySelector(`[data-omnivore-anchor-idx="${id}"]`)
-      e?.parentNode?.replaceChild(newElement, e)
-    }
-  }
-
-  const appendBookmarkElement = (parent: Element, element: Element) => {
-    const id = element.getAttribute('data-omnivore-anchor-idx')
-    if (id) {
-      const bookMark = ssml.createElement('bookmark')
-      bookMark.setAttribute('mark', `data-omnivore-anchor-idx-${id}`)
-      parent.appendChild(bookMark)
-    }
-  }
-
-  const replaceWithEmphasis = (element: Element, level: string) => {
-    const parent = ssml.createDocumentFragment() as unknown as Element
-    appendBookmarkElement(parent, element)
-    const emphasisElement = ssml.createElement('emphasis')
-    emphasisElement.setAttribute('level', level)
-    emphasisElement.innerHTML = element.innerHTML.trim()
-    parent.appendChild(emphasisElement)
-    replaceElement(parent, element)
-  }
-
-  const replaceWithSentence = (element: Element) => {
-    const parent = ssml.createDocumentFragment() as unknown as Element
-    appendBookmarkElement(parent, element)
-    const sentenceElement = ssml.createElement('s')
-    sentenceElement.innerHTML = element.innerHTML.trim()
-    parent.appendChild(sentenceElement)
-    replaceElement(parent, element)
-  }
-
-  // create new ssml document
-  const ssml = parseHTML('').document
-  const speakElement = ssml.createElement('speak')
-  speakElement.setAttribute('version', '1.0')
-  speakElement.setAttribute('xmlns', 'http://www.w3.org/2001/10/synthesis')
-  speakElement.setAttribute('xml:lang', language)
-  const voiceElement = ssml.createElement('voice')
-  voiceElement.setAttribute('name', voice)
-  speakElement.appendChild(voiceElement)
-  const prosodyElement = ssml.createElement('prosody')
-  prosodyElement.setAttribute('rate', `${rate}`)
-  prosodyElement.setAttribute('volume', volume.toString())
-  voiceElement.appendChild(prosodyElement)
-  // add each paragraph to the ssml document
-  appendBookmarkElement(prosodyElement, htmlElement)
-  // replace emphasis elements with ssml
-  htmlElement.querySelectorAll('*').forEach((e) => {
-    switch (e.tagName.toLowerCase()) {
-      case 's':
-        replaceWithEmphasis(e, 'moderate')
-        break
-      case 'sub':
-        if (e.getAttribute('alias') === null) {
-          replaceWithEmphasis(e, 'moderate')
-        }
-        break
-      case 'i':
-      case 'em':
-      case 'q':
-      case 'blockquote':
-      case 'cite':
-      case 'del':
-      case 'strike':
-      case 'sup':
-      case 'summary':
-      case 'caption':
-      case 'figcaption':
-        replaceWithEmphasis(e, 'moderate')
-        break
-      case 'b':
-      case 'strong':
-      case 'dt':
-      case 'dfn':
-      case 'u':
-      case 'mark':
-      case 'th':
-      case 'title':
-      case 'var':
-        replaceWithEmphasis(e, 'moderate')
-        break
-      case 'li':
-        replaceWithSentence(e)
-        break
-      default: {
-        const parent = ssml.createDocumentFragment() as unknown as Element
-        appendBookmarkElement(parent, e)
-        const text = (e as HTMLElement).innerText.trim()
-        const textElement = ssml.createTextNode(text)
-        parent.appendChild(textElement)
-        replaceElement(parent, e)
-      }
-    }
-  })
-  prosodyElement.appendChild(htmlElement)
-
-  return speakElement.outerHTML.replace(/&nbsp;|\n/g, '')
-}
diff --git a/packages/api/test/utils/data/text-to-speech.html b/packages/api/test/utils/data/text-to-speech.html
deleted file mode 100644
index 65245fe69..000000000
--- a/packages/api/test/utils/data/text-to-speech.html
+++ /dev/null
@@ -1 +0,0 @@
-<DIV id="readability-content"><DIV class="page" id="readability-page-1"><div data-omnivore-anchor-idx="1" dir="ltr" lang="en" id="mw-content-text"> <p data-omnivore-anchor-idx="2"><i data-omnivore-anchor-idx="3"><b data-omnivore-anchor-idx="4">An Instinct for Dragons</b></i> is a book by <a data-omnivore-anchor-idx="5" href="https://en.wikipedia.org/wiki/University_of_Central_Florida" title="University of Central Florida">University of Central Florida</a> <a data-omnivore-anchor-idx="6" href="https://en.wikipedia.org/wiki/Anthropologist" title="Anthropologist">anthropologist</a>, David E. Jones, in which he seeks to explain the universality of <a data-omnivore-anchor-idx="7" href="https://en.wikipedia.org/wiki/Dragon" title="Dragon">dragon</a> images in the <a data-omnivore-anchor-idx="8" href="https://en.wikipedia.org/wiki/Folklore" title="Folklore">folklore</a> of human societies. In the introduction, Jones conducts a survey of dragon myths from cultures around the world and argues that certain aspects of dragons or dragon-like mythical creatures are found very widely. He claims that even the <a data-omnivore-anchor-idx="9" href="https://en.wikipedia.org/wiki/Inuit" title="Inuit">Inuit</a> have a reptilian dragon-like monster, even though (living in a frigid environment unsuited for cold-blooded animals) they had never seen an actual <a data-omnivore-anchor-idx="10" href="https://en.wikipedia.org/wiki/Reptile" title="Reptile">reptile</a>. </p><p data-omnivore-anchor-idx="11">Jones then argues against the common <a data-omnivore-anchor-idx="12" href="https://en.wikipedia.org/wiki/Hypothesis" title="Hypothesis">hypothesis</a> that dragon myths might be motivated by primitive discoveries of <a data-omnivore-anchor-idx="13" href="https://en.wikipedia.org/wiki/Dinosaur" title="Dinosaur">dinosaur</a> <a data-omnivore-anchor-idx="14" href="https://en.wikipedia.org/wiki/Fossil" title="Fossil">fossils</a> (he argues that there are widespread traits of dragons in folklore which are not observable from fossils), and claims that the common traits of dragons seem to be an amalgam of the principal predators of our ancestral <a data-omnivore-anchor-idx="15" href="https://en.wikipedia.org/wiki/Hominid" title="Hominid">hominids</a>, which he names as the <a data-omnivore-anchor-idx="16" href="https://en.wikipedia.org/wiki/Bird_of_prey" title="Bird of prey">raptors</a>, great cats (especially <a data-omnivore-anchor-idx="17" href="https://en.wikipedia.org/wiki/Leopard" title="Leopard">leopards</a>) and <a data-omnivore-anchor-idx="18" href="https://en.wikipedia.org/wiki/Pythonidae" title="Pythonidae">pythons</a>. </p><p data-omnivore-anchor-idx="19">The hypothesis to which Jones conforms is that over millions of years of <a data-omnivore-anchor-idx="20" href="https://en.wikipedia.org/wiki/Evolution" title="Evolution">evolution</a>, members of a species will evolve an <a data-omnivore-anchor-idx="21" href="https://en.wikipedia.org/wiki/Instinct" title="Instinct">instinctive</a> fear of their <a data-omnivore-anchor-idx="22" href="https://en.wikipedia.org/wiki/Predator" title="Predator">predators</a>, and he proposes ways in which these fearful images may be merged in artistic or cultural expression to create the dragon image and, perhaps, other kinds of hybrid monster. </p><p data-omnivore-anchor-idx="23">Finally he suggests sociological reasons for why such images may be perceived differently at different stages of a culture to try to explain why <a data-omnivore-anchor-idx="24" href="https://en.wikipedia.org/wiki/Chinese_dragon" title="Chinese dragon">Chinese dragons</a> are considered basically good and representative of government, but the great majority (although not all) <a data-omnivore-anchor-idx="25" href="https://en.wikipedia.org/wiki/European_dragon" title="European dragon">European dragons</a> are evil and often represent chaos. </p> <h2 data-omnivore-anchor-idx="26"><span data-omnivore-anchor-idx="27" id="Reception">Reception</span></h2> <p data-omnivore-anchor-idx="28">Jones' theory was opposed in an article by Paul Jordan-Smith in the Spring 2002 issue of <i data-omnivore-anchor-idx="29">Western Folklore</i> and by other authors. Jordan-Smith criticized the lack of evidence given to prove why dragon myths could not have been passed from culture to culture. He also notes that it cannot be demonstrated that the fears of ancestral hominids are coded into the human brain. He concludes his review by writing "One is tempted to say, as <a data-omnivore-anchor-idx="30" href="https://en.wikipedia.org/wiki/Dorothy_Parker" title="Dorothy Parker">Dorothy Parker</a> once did, that this is a book not to be tossed aside lightly but thrown violently. But no, it is not worth spending even that much energy on."<sup data-omnivore-anchor-idx="31" id="cite_ref-1"><a data-omnivore-anchor-idx="32" href="#cite_note-1">[1]</a></sup> </p><p data-omnivore-anchor-idx="33">D. Ogden writes that Jones' ideas "might offer pause for thought given the universality of dragon-slaying narratives". He adds, though, that the compound cat, snake, raptor creature imagined by Jones is mostly the Western stereotype based on <a data-omnivore-anchor-idx="34" href="https://en.wikipedia.org/wiki/European_dragon#Middle_Ages" title="European dragon">mediaeval imagery</a>, and that Jones has sought out similar images in a way that lacks <a data-omnivore-anchor-idx="35" href="https://en.wikipedia.org/wiki/Scholarly_method" title="Scholarly method">rigor</a>. In particular, Ogden notes that the <a data-omnivore-anchor-idx="36" href="https://en.wikipedia.org/wiki/Dragons_in_Greek_mythology" title="Dragons in Greek mythology">dragons of Graeco-Roman myth</a> do not fit with Jones's prototype, typically lacking one or more of the hybrid components (with the exception of <a data-omnivore-anchor-idx="37" href="https://en.wikipedia.org/wiki/Typhon" title="Typhon">Typhon</a>, who, however, combines many more animals than Jones's three).<sup data-omnivore-anchor-idx="38" id="cite_ref-drakon_2-0"><a data-omnivore-anchor-idx="39" href="#cite_note-drakon-2">[2]</a></sup> </p> <h2 data-omnivore-anchor-idx="40"><span data-omnivore-anchor-idx="41" id="References">References</span></h2> <div data-omnivore-anchor-idx="42"><ol data-omnivore-anchor-idx="43"> <li data-omnivore-anchor-idx="44" id="cite_note-1"><span data-omnivore-anchor-idx="45"><b data-omnivore-anchor-idx="46"><a data-omnivore-anchor-idx="47" href="#cite_ref-1" aria-label="Jump up" title="Jump up">^</a></b></span> <span data-omnivore-anchor-idx="48"><cite data-omnivore-anchor-idx="49" id="CITEREFJordan-Smith2002">Jordan-Smith, Paul (2002). "Review: <i data-omnivore-anchor-idx="50">An Instinct for Dragons</i>". <i data-omnivore-anchor-idx="51">Western Folklore</i>. <a data-omnivore-anchor-idx="52" href="https://en.wikipedia.org/wiki/JSTOR_(identifier)" title="JSTOR (identifier)">JSTOR</a>&nbsp;.</cite><span data-omnivore-anchor-idx="53" title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Ajournal&rft.genre=article&rft.jtitle=Western+Folklore&rft.atitle=Review%3A+An+Instinct+for+Dragons&rft.date=2002&rft_id=%2F%2Fwww.jstor.org%2Fstable%2F1500302%23id-name%3DJSTOR&rft.aulast=Jordan-Smith&rft.aufirst=Paul&rfr_id=info%3Asid%2Fen.wikipedia.org%3AAn+Instinct+for+Dragons"></span></span> </li> <li data-omnivore-anchor-idx="54" id="cite_note-drakon-2"><span data-omnivore-anchor-idx="55"><b data-omnivore-anchor-idx="56"><a data-omnivore-anchor-idx="57" href="#cite_ref-drakon_2-0" aria-label="Jump up" title="Jump up">^</a></b></span> <span data-omnivore-anchor-idx="58"><cite data-omnivore-anchor-idx="59" id="CITEREFOgden2013">Ogden, Daniel (2013). <a data-omnivore-anchor-idx="60" rel="nofollow" href="https://books.google.com/books?id=FQ2pAK9luwkC&pg=PA24"><i data-omnivore-anchor-idx="61">Drakon: Dragon Myth and Serpent Cult in the Greek and Roman Worlds</i></a>. Oxford University Press. pp.&nbsp;24–25.</cite><span data-omnivore-anchor-idx="62" title="ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook&rft.genre=book&rft.btitle=Drakon%3A+Dragon+Myth+and+Serpent+Cult+in+the+Greek+and+Roman+Worlds&rft.pages=24-25&rft.pub=Oxford+University+Press&rft.date=2013&rft.aulast=Ogden&rft.aufirst=Daniel&rft_id=https%3A%2F%2Fbooks.google.com%2Fbooks%3Fid%3DFQ2pAK9luwkC%26pg%3DPA24&rfr_id=info%3Asid%2Fen.wikipedia.org%3AAn+Instinct+for+Dragons"></span></span> </li> </ol></div> <!-- NewPP limit report Parsed by mw1331 Cached time: 20220804021123 Cache expiry: 1814400 Reduced expiry: false Complications: [vary‐revision‐sha1] CPU time usage: 0.304 seconds Real time usage: 0.374 seconds Preprocessor visited node count: 953/1000000 Post‐expand include size: 9193/2097152 bytes Template argument size: 1243/2097152 bytes Highest expansion depth: 19/100 Expensive parser function count: 0/500 Unstrip recursion depth: 1/20 Unstrip post‐expand size: 9154/5000000 bytes Lua time usage: 0.174/10.000 seconds Lua memory usage: 3082804/52428800 bytes Number of Wikibase entities loaded: 1/400 --> <!-- Transclusion expansion time report (%,ms,calls,template) 100.00% 342.913 1 -total 65.80% 225.643 1 Template:Infobox_book 55.47% 190.209 1 Template:Infobox 33.32% 114.269 1 Template:Reflist 28.73% 98.524 1 Template:ISBNT 22.38% 76.759 1 Template:Cite_journal 16.51% 56.610 1 Template:Catalog_lookup_link 7.03% 24.123 1 Template:Wikidata_image 3.52% 12.062 1 Template:Error-small 2.85% 9.786 1 Template:Small --> <!-- Saved in parser cache with key enwiki:pcache:idhash:1928876-0!canonical and timestamp 20220804021123 and revision id 992642852. --> </div></DIV></DIV>
diff --git a/packages/api/test/utils/textToSpeech.test.ts b/packages/api/test/utils/textToSpeech.test.ts
deleted file mode 100644
index e78492cca..000000000
--- a/packages/api/test/utils/textToSpeech.test.ts
+++ /dev/null
@@ -1,44 +0,0 @@
-import 'mocha'
-import {
-  htmlElementToSsml,
-  synthesizeTextToSpeech,
-  TextToSpeechInput,
-} from '../../src/utils/textToSpeech'
-import { expect } from 'chai'
-import { generateFakeUuid } from '../util'
-import { parseHTML } from 'linkedom'
-import fs from 'fs'
-
-describe('textToSpeech', () => {
-  const load = (path: string): string => {
-    return fs.readFileSync(path, 'utf8')
-  }
-
-  describe('synthesizeTextToSpeech', () => {
-    xit('should create an audio file with speech marks', async () => {
-      const html = load('./test/utils/data/text-to-speech.html')
-      const input: TextToSpeechInput = {
-        id: generateFakeUuid(),
-        text: html,
-        languageCode: 'en-US',
-        voice: 'en-US-JennyNeural',
-        textType: 'ssml',
-      }
-      const output = await synthesizeTextToSpeech(input)
-      expect(output.audioFileName).to.be.a('string')
-      expect(output.speechMarksFileName).to.be.a('string')
-    })
-  })
-
-  describe('htmlElementToSsml', () => {
-    it('should convert Html Element to SSML', async () => {
-      const htmlElement = parseHTML(
-        `<p data-omnivore-anchor-idx="1">Marry had a little lamb</p>`
-      ).document.documentElement
-      const ssml = htmlElementToSsml({ htmlElement })
-      expect(ssml).to.equal(
-        `<speak xml:lang="en-US" xmlns="http://www.w3.org/2001/10/synthesis" version="1.0"><voice name="en-US-JennyNeural"><prosody volume="100" rate="1"><bookmark mark="data-omnivore-anchor-idx-1"></bookmark><p data-omnivore-anchor-idx="1">Marry had a little lamb</p></prosody></voice></speak>`
-      )
-    })
-  })
-})

From fe30beafe6baa331cbc89177dd508e38d1807c5d Mon Sep 17 00:00:00 2001
From: Hongbo Wu <hongbo@omnivore.app>
Date: Mon, 29 Aug 2022 22:59:09 +0800
Subject: [PATCH 4/5] Throw error if synthesis is canceled

---
 packages/text-to-speech/src/index.ts | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/packages/text-to-speech/src/index.ts b/packages/text-to-speech/src/index.ts
index 2ff849856..41964b09d 100644
--- a/packages/text-to-speech/src/index.ts
+++ b/packages/text-to-speech/src/index.ts
@@ -19,6 +19,7 @@ import axios from 'axios'
 import * as jwt from 'jsonwebtoken'
 import * as dotenv from 'dotenv' // see https://github.com/motdotla/dotenv#how-do-i-use-dotenv-with-import
 import { htmlToSsml, ssmlItemText } from './htmlToSsml'
+
 dotenv.config()
 
 interface TextToSpeechInput {
@@ -227,10 +228,11 @@ const synthesizeTextToSpeech = async (
       const ssml = ssmlItemText(ssmlItem)
       console.debug(`synthesizing ${ssml}`)
       const result = await speakSsmlAsyncPromise(ssml)
-      // if (result.reason === ResultReason.Canceled) {
-      //   synthesizer.close()
-      //   throw new Error(result.errorDetails)
-      // }
+      if (result.reason === ResultReason.Canceled) {
+        writeStream.end()
+        synthesizer.close()
+        throw new Error(result.errorDetails)
+      }
       timeOffset = timeOffset + result.audioDuration
       // characterOffset = characterOffset + htmlElement.innerText.length
     }

From 9b736cbf1c81531a064dfa6c68249ee32598b9f9 Mon Sep 17 00:00:00 2001
From: Hongbo Wu <hongbo@omnivore.app>
Date: Tue, 30 Aug 2022 11:07:19 +0800
Subject: [PATCH 5/5] Add entrypoint for text-to-speech cloud function

---
 packages/text-to-speech/package.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/text-to-speech/package.json b/packages/text-to-speech/package.json
index fa9db0f24..0e839aa7a 100644
--- a/packages/text-to-speech/package.json
+++ b/packages/text-to-speech/package.json
@@ -16,7 +16,7 @@
     "build": "tsc",
     "start": "functions-framework --source=build/src/ --target=textToSpeechHandler",
     "dev": "concurrently \"tsc -w\" \"nodemon --watch ./build/ --exec npm run start\"",
-    "gcloud-deploy": "gcloud functions deploy text-to-speech --gen2 --trigger-http --allow-unauthenticated --region=us-west2 --runtime nodejs14",
+    "gcloud-deploy": "gcloud functions deploy text-to-speech --gen2 --entry-point=textToSpeechHandler --trigger-http --allow-unauthenticated --region=us-west2 --runtime nodejs14",
     "deploy": "yarn build && yarn gcloud-deploy"
   },
   "devDependencies": {