Replace SentenceTokenizerNew with SentenceTokenizer

2023-04-18 09:33:48 +08:00
parent c697b0cb4d
commit c63ecc94fb
1 changed files with 9 additions and 9 deletions
--- a/packages/text-to-speech/src/htmlToSsml.ts
+++ b/packages/text-to-speech/src/htmlToSsml.ts
@ -262,14 +262,14 @@ export const stripEmojis = (text: string): string => {
 }

 const textToUtterances = ({
-  tokenizer,
+  wordTokenizer,
  idx,
  textItems,
  wordOffset,
  voice,
  isHtml = true,
 }: {
-  tokenizer: WordPunctTokenizer
+  wordTokenizer: WordPunctTokenizer
  idx: string
  textItems: string[]
  wordOffset: number
@ -284,7 +284,7 @@ const textToUtterances = ({
        idx,
        text,
        wordOffset,
-        wordCount: tokenizer.tokenize(text).length,
+        wordCount: wordTokenizer.tokenize(text).length,
        voice,
      },
    ]
@ -318,7 +318,7 @@ const textToUtterances = ({
    const nextText = currentText + sentence
    if (nextText.length > MAX_CHARS) {
      if (currentText.length > 0) {
-        const wordCount = tokenizer.tokenize(currentText).length
+        const wordCount = wordTokenizer.tokenize(currentText).length
        utterances.push({
          idx,
          text: currentText,
@ -329,7 +329,7 @@ const textToUtterances = ({
        wordOffset += wordCount
        currentText = sentence
      } else {
-        const wordCount = tokenizer.tokenize(sentence).length
+        const wordCount = wordTokenizer.tokenize(sentence).length
        utterances.push({
          idx,
          text: sentence,
@ -347,7 +347,7 @@ const textToUtterances = ({
        idx,
        text: currentText,
        wordOffset,
-        wordCount: tokenizer.tokenize(currentText).length,
+        wordCount: wordTokenizer.tokenize(currentText).length,
        voice,
      })
    }
@ -385,13 +385,13 @@ export const htmlToSpeechFile = (htmlInput: HtmlInput): SpeechFile => {
    }
  }

-  const tokenizer = new WordPunctTokenizer()
+  const wordTokenizer = new WordPunctTokenizer()
  const utterances: Utterance[] = []
  let wordOffset = 0
  if (title) {
    // first utterances is the title
    const titleUtterance = textToUtterances({
-      tokenizer,
+      wordTokenizer,
      idx: '',
      textItems: [stripEmojis(title)], // title could have emoji
      wordOffset,
@ -412,7 +412,7 @@ export const htmlToSpeechFile = (htmlInput: HtmlInput): SpeechFile => {
      const idx = i.toString()
      i = emitElement(textItems, node, true)
      const newUtterances = textToUtterances({
-        tokenizer,
+        wordTokenizer,
        idx,
        textItems,
        wordOffset,