diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index 7d9dc0daf..00cc06c58 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -59,7 +59,7 @@ export const synthesizeTextToSpeech = async ( const synthesizer = new SpeechSynthesizer(speechConfig) const speechMarks: SpeechMark[] = [] let timeOffset = 0 - let wordOffset = 0 + // let wordOffset = 0 synthesizer.synthesizing = function (s, e) { // convert arrayBuffer to stream and write to stream @@ -98,7 +98,7 @@ export const synthesizeTextToSpeech = async ( speechMarks.push({ word: e.text, time: (timeOffset + e.audioOffset) / 10000, - start: wordOffset + e.textOffset, + start: e.textOffset, length: e.text.length, type: 'sentence', }) @@ -151,7 +151,7 @@ export const synthesizeTextToSpeech = async ( const text = _.escape(input.text) const ssml = `${startSsmlTag}${text}${endSsml()}` // set the text offset to be the end of SSML start tag - wordOffset -= startSsmlTag.length + // wordOffset -= startSsmlTag.length const result = await speakSsmlAsyncPromise(ssml) if (result.reason === ResultReason.Canceled) { throw new Error(result.errorDetails)