From 303effe849d4c6541b4b5e299326d2f2b233bb65 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 15 Sep 2022 10:44:25 +0800 Subject: [PATCH 1/5] Log request body only in tts cloud functions --- packages/text-to-speech/src/htmlToSsml.ts | 6 +++--- packages/text-to-speech/src/index.ts | 4 ++-- packages/text-to-speech/src/textToSpeech.ts | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts index 9999e9f24..d506f1432 100644 --- a/packages/text-to-speech/src/htmlToSsml.ts +++ b/packages/text-to-speech/src/htmlToSsml.ts @@ -75,7 +75,7 @@ const TOP_LEVEL_TAGS = [ function parseDomTree(pageNode: Element) { if (!pageNode || pageNode.childNodes.length == 0) { - console.log(' no child nodes found') + console.log('no child nodes found') return [] } @@ -282,7 +282,7 @@ const textToUtterance = ({ console.error('Unable to convert HTML to text', { text, err }) textWithWordOffset = parseHTML(text).document.documentElement.textContent ?? text - console.debug('Converted HTML to text', { textWithWordOffset }) + console.info('Converted HTML to text:', textWithWordOffset) } } const wordCount = tokenizer.tokenize(textWithWordOffset).length @@ -297,7 +297,7 @@ const textToUtterance = ({ export const htmlToSpeechFile = (htmlInput: HtmlInput): SpeechFile => { const { title, content, options } = htmlInput - console.debug('creating speech file with options', options) + console.log('creating speech file with options', options) const dom = parseHTML(content) const body = dom.document.querySelector('#readability-page-1') diff --git a/packages/text-to-speech/src/index.ts b/packages/text-to-speech/src/index.ts index 63f9a6631..643148ce9 100644 --- a/packages/text-to-speech/src/index.ts +++ b/packages/text-to-speech/src/index.ts @@ -75,7 +75,7 @@ const updateSpeech = async ( export const textToSpeechHandler = Sentry.GCPFunction.wrapHttpFunction( async (req, res) => { - console.info('Text to speech request received') + console.info('Text to speech request body:', req.body) const token = req.query.token as string if (!process.env.JWT_SECRET) { console.error('JWT_SECRET not exists') @@ -142,7 +142,7 @@ export const textToSpeechHandler = Sentry.GCPFunction.wrapHttpFunction( export const textToSpeechStreamingHandler = Sentry.GCPFunction.wrapHttpFunction( async (req, res) => { - console.debug('Text to speech steaming request', req) + console.log('Text to speech steaming request body:', req.body) if (!process.env.JWT_SECRET) { console.error('JWT_SECRET not exists') return res.status(500).send({ errorCodes: 'JWT_SECRET_NOT_EXISTS' }) diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index 05df7807b..2d590bc0e 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -160,9 +160,9 @@ export const synthesizeTextToSpeech = async ( console.error('synthesis error', error) throw error } finally { - console.debug('closing synthesizer') + console.log('closing synthesizer') audioStream?.end() synthesizer.close() - console.debug('synthesizer closed') + console.log('synthesizer closed') } } From f0b01e6ebadf2d832063ef659b236ffa104f344a Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 15 Sep 2022 10:57:13 +0800 Subject: [PATCH 2/5] Improve error logging in tts cloud functions --- packages/text-to-speech/src/htmlToSsml.ts | 7 ++++++- packages/text-to-speech/src/index.ts | 14 +++++++------- packages/text-to-speech/src/textToSpeech.ts | 4 ++-- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts index d506f1432..8e0ec10a5 100644 --- a/packages/text-to-speech/src/htmlToSsml.ts +++ b/packages/text-to-speech/src/htmlToSsml.ts @@ -279,7 +279,12 @@ const textToUtterance = ({ try { textWithWordOffset = htmlToText(text, { wordwrap: false }) } catch (err) { - console.error('Unable to convert HTML to text', { text, err }) + console.error( + 'Unable to convert HTML to text, html:', + text, + ', error:', + err + ) textWithWordOffset = parseHTML(text).document.documentElement.textContent ?? text console.info('Converted HTML to text:', textWithWordOffset) diff --git a/packages/text-to-speech/src/index.ts b/packages/text-to-speech/src/index.ts index 643148ce9..bcc124a39 100644 --- a/packages/text-to-speech/src/index.ts +++ b/packages/text-to-speech/src/index.ts @@ -84,7 +84,7 @@ export const textToSpeechHandler = Sentry.GCPFunction.wrapHttpFunction( try { jwt.verify(token, process.env.JWT_SECRET) } catch (e) { - console.error(e) + console.error('Authentication error:', e) return res.status(200).send('UNAUTHENTICATED') } // validate input @@ -92,7 +92,7 @@ export const textToSpeechHandler = Sentry.GCPFunction.wrapHttpFunction( const id = input.id const bucket = input.bucket if (!id || !bucket) { - return res.status(200).send('Invalid data') + return res.status(200).send('INVALID_INPUT') } try { // audio file to be saved in GCS @@ -133,7 +133,7 @@ export const textToSpeechHandler = Sentry.GCPFunction.wrapHttpFunction( console.info('Text to speech cloud function completed') res.send('OK') } catch (e) { - console.error('Text to speech cloud function error', e) + console.error('Text to speech cloud function error:', e) await updateSpeech(id, token, 'FAILED') return res.status(500).send({ errorCodes: 'SYNTHESIZER_ERROR' }) } @@ -149,13 +149,13 @@ export const textToSpeechStreamingHandler = Sentry.GCPFunction.wrapHttpFunction( } const token = (req.query.token || req.headers.authorization) as string if (!token) { - return res.status(401).send({ errorCode: 'UNAUTHORIZED' }) + return res.status(401).send({ errorCode: 'INVALID_TOKEN' }) } try { jwt.verify(token, process.env.JWT_SECRET) } catch (e) { - console.error(e) - return res.status(401).send({ errorCode: 'UNAUTHORIZED' }) + console.error('Authentication error:', e) + return res.status(401).send({ errorCode: 'UNAUTHENTICATED' }) } try { @@ -174,7 +174,7 @@ export const textToSpeechStreamingHandler = Sentry.GCPFunction.wrapHttpFunction( speechMarks, }) } catch (e) { - console.error('Text to speech streaming error', e) + console.error('Text to speech streaming error:', e) return res.status(500).send({ errorCodes: 'SYNTHESIZER_ERROR' }) } } diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index 2d590bc0e..f7144d20f 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -81,7 +81,7 @@ export const synthesizeTextToSpeech = async ( if (cancellationDetails.reason === CancellationReason.Error) { str += ': ' + e.result.errorDetails } - console.error(str) + console.error('synthesis error:', str) } // The unit of e.audioOffset is tick (1 tick = 100 nanoseconds), divide by 10,000 to convert to milliseconds. @@ -157,7 +157,7 @@ export const synthesizeTextToSpeech = async ( speechMarks, } } catch (error) { - console.error('synthesis error', error) + console.error('synthesis error:', error) throw error } finally { console.log('closing synthesizer') From e4fc10e09b43c71660f5eb7234b279c0a848fa48 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 15 Sep 2022 10:58:27 +0800 Subject: [PATCH 3/5] Remove some debugging logs --- packages/text-to-speech/src/textToSpeech.ts | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index f7144d20f..604835549 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -86,11 +86,6 @@ export const synthesizeTextToSpeech = async ( // The unit of e.audioOffset is tick (1 tick = 100 nanoseconds), divide by 10,000 to convert to milliseconds. synthesizer.wordBoundary = (s, e) => { - console.debug( - `(word boundary) Audio offset: ${e.audioOffset / 10000}ms, text: ${ - e.text - }` - ) speechMarks.push({ word: e.text, time: (timeOffset + e.audioOffset) / 10000, @@ -101,11 +96,6 @@ export const synthesizeTextToSpeech = async ( } synthesizer.bookmarkReached = (s, e) => { - console.debug( - `(bookmark reached) Audio offset: ${ - e.audioOffset / 10000 - }ms, bookmark text: ${e.text}` - ) speechMarks.push({ word: e.text, time: (timeOffset + e.audioOffset) / 10000, From 8ce9c37314a62f85dcfdc3a6759c60ae483cfda2 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 15 Sep 2022 11:08:56 +0800 Subject: [PATCH 4/5] Log synthesis cancel reason --- packages/text-to-speech/src/textToSpeech.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index 604835549..a3ad34f29 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -81,7 +81,7 @@ export const synthesizeTextToSpeech = async ( if (cancellationDetails.reason === CancellationReason.Error) { str += ': ' + e.result.errorDetails } - console.error('synthesis error:', str) + console.log(str) } // The unit of e.audioOffset is tick (1 tick = 100 nanoseconds), divide by 10,000 to convert to milliseconds. @@ -142,6 +142,9 @@ export const synthesizeTextToSpeech = async ( wordOffset = -start.length const ssml = `${start}${input.text}${endSsml()}` const result = await speakSsmlAsyncPromise(ssml) + if (result.reason === ResultReason.Canceled) { + throw new Error(result.errorDetails) + } return { audioData: Buffer.from(result.audioData), speechMarks, From ceed42f247a2553262e03a5004247bc416959d4a Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 15 Sep 2022 11:14:35 +0800 Subject: [PATCH 5/5] Add colon in logging text --- packages/text-to-speech/src/htmlToSsml.ts | 2 +- packages/text-to-speech/src/textToSpeech.ts | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts index 8e0ec10a5..817049114 100644 --- a/packages/text-to-speech/src/htmlToSsml.ts +++ b/packages/text-to-speech/src/htmlToSsml.ts @@ -302,7 +302,7 @@ const textToUtterance = ({ export const htmlToSpeechFile = (htmlInput: HtmlInput): SpeechFile => { const { title, content, options } = htmlInput - console.log('creating speech file with options', options) + console.log('creating speech file with options:', options) const dom = parseHTML(content) const body = dom.document.querySelector('#readability-page-1') diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index a3ad34f29..27508175e 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -153,7 +153,6 @@ export const synthesizeTextToSpeech = async ( console.error('synthesis error:', error) throw error } finally { - console.log('closing synthesizer') audioStream?.end() synthesizer.close() console.log('synthesizer closed')