diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts
index c7c35fed3..2c72a4852 100644
--- a/packages/text-to-speech/src/textToSpeech.ts
+++ b/packages/text-to-speech/src/textToSpeech.ts
@@ -8,6 +8,7 @@ import {
   SpeechSynthesizer,
 } from 'microsoft-cognitiveservices-speech-sdk'
 import { endSsml, htmlToSsmlItems, ssmlItemText, startSsml } from './htmlToSsml'
+import * as _ from 'underscore'
 
 export interface TextToSpeechInput {
   text: string
@@ -139,7 +140,8 @@ export const synthesizeTextToSpeech = async (
     }
     // for ssml
     const startSsmlTag = startSsml(ssmlOptions)
-    const ssml = `${startSsmlTag}${input.text}${endSsml()}`
+    const text = _.escape(input.text)
+    const ssml = `${startSsmlTag}${text}${endSsml()}`
     // set the text offset to be the end of SSML start tag
     wordOffset -= startSsmlTag.length
     const result = await speakSsmlAsyncPromise(ssml)