diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts index 9999e9f24..817049114 100644 --- a/packages/text-to-speech/src/htmlToSsml.ts +++ b/packages/text-to-speech/src/htmlToSsml.ts @@ -75,7 +75,7 @@ const TOP_LEVEL_TAGS = [ function parseDomTree(pageNode: Element) { if (!pageNode || pageNode.childNodes.length == 0) { - console.log(' no child nodes found') + console.log('no child nodes found') return [] } @@ -279,10 +279,15 @@ const textToUtterance = ({ try { textWithWordOffset = htmlToText(text, { wordwrap: false }) } catch (err) { - console.error('Unable to convert HTML to text', { text, err }) + console.error( + 'Unable to convert HTML to text, html:', + text, + ', error:', + err + ) textWithWordOffset = parseHTML(text).document.documentElement.textContent ?? text - console.debug('Converted HTML to text', { textWithWordOffset }) + console.info('Converted HTML to text:', textWithWordOffset) } } const wordCount = tokenizer.tokenize(textWithWordOffset).length @@ -297,7 +302,7 @@ const textToUtterance = ({ export const htmlToSpeechFile = (htmlInput: HtmlInput): SpeechFile => { const { title, content, options } = htmlInput - console.debug('creating speech file with options', options) + console.log('creating speech file with options:', options) const dom = parseHTML(content) const body = dom.document.querySelector('#readability-page-1') diff --git a/packages/text-to-speech/src/index.ts b/packages/text-to-speech/src/index.ts index 63f9a6631..bcc124a39 100644 --- a/packages/text-to-speech/src/index.ts +++ b/packages/text-to-speech/src/index.ts @@ -75,7 +75,7 @@ const updateSpeech = async ( export const textToSpeechHandler = Sentry.GCPFunction.wrapHttpFunction( async (req, res) => { - console.info('Text to speech request received') + console.info('Text to speech request body:', req.body) const token = req.query.token as string if (!process.env.JWT_SECRET) { console.error('JWT_SECRET not exists') @@ -84,7 +84,7 @@ export const textToSpeechHandler = Sentry.GCPFunction.wrapHttpFunction( try { jwt.verify(token, process.env.JWT_SECRET) } catch (e) { - console.error(e) + console.error('Authentication error:', e) return res.status(200).send('UNAUTHENTICATED') } // validate input @@ -92,7 +92,7 @@ export const textToSpeechHandler = Sentry.GCPFunction.wrapHttpFunction( const id = input.id const bucket = input.bucket if (!id || !bucket) { - return res.status(200).send('Invalid data') + return res.status(200).send('INVALID_INPUT') } try { // audio file to be saved in GCS @@ -133,7 +133,7 @@ export const textToSpeechHandler = Sentry.GCPFunction.wrapHttpFunction( console.info('Text to speech cloud function completed') res.send('OK') } catch (e) { - console.error('Text to speech cloud function error', e) + console.error('Text to speech cloud function error:', e) await updateSpeech(id, token, 'FAILED') return res.status(500).send({ errorCodes: 'SYNTHESIZER_ERROR' }) } @@ -142,20 +142,20 @@ export const textToSpeechHandler = Sentry.GCPFunction.wrapHttpFunction( export const textToSpeechStreamingHandler = Sentry.GCPFunction.wrapHttpFunction( async (req, res) => { - console.debug('Text to speech steaming request', req) + console.log('Text to speech steaming request body:', req.body) if (!process.env.JWT_SECRET) { console.error('JWT_SECRET not exists') return res.status(500).send({ errorCodes: 'JWT_SECRET_NOT_EXISTS' }) } const token = (req.query.token || req.headers.authorization) as string if (!token) { - return res.status(401).send({ errorCode: 'UNAUTHORIZED' }) + return res.status(401).send({ errorCode: 'INVALID_TOKEN' }) } try { jwt.verify(token, process.env.JWT_SECRET) } catch (e) { - console.error(e) - return res.status(401).send({ errorCode: 'UNAUTHORIZED' }) + console.error('Authentication error:', e) + return res.status(401).send({ errorCode: 'UNAUTHENTICATED' }) } try { @@ -174,7 +174,7 @@ export const textToSpeechStreamingHandler = Sentry.GCPFunction.wrapHttpFunction( speechMarks, }) } catch (e) { - console.error('Text to speech streaming error', e) + console.error('Text to speech streaming error:', e) return res.status(500).send({ errorCodes: 'SYNTHESIZER_ERROR' }) } } diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index 05df7807b..27508175e 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -81,16 +81,11 @@ export const synthesizeTextToSpeech = async ( if (cancellationDetails.reason === CancellationReason.Error) { str += ': ' + e.result.errorDetails } - console.error(str) + console.log(str) } // The unit of e.audioOffset is tick (1 tick = 100 nanoseconds), divide by 10,000 to convert to milliseconds. synthesizer.wordBoundary = (s, e) => { - console.debug( - `(word boundary) Audio offset: ${e.audioOffset / 10000}ms, text: ${ - e.text - }` - ) speechMarks.push({ word: e.text, time: (timeOffset + e.audioOffset) / 10000, @@ -101,11 +96,6 @@ export const synthesizeTextToSpeech = async ( } synthesizer.bookmarkReached = (s, e) => { - console.debug( - `(bookmark reached) Audio offset: ${ - e.audioOffset / 10000 - }ms, bookmark text: ${e.text}` - ) speechMarks.push({ word: e.text, time: (timeOffset + e.audioOffset) / 10000, @@ -152,17 +142,19 @@ export const synthesizeTextToSpeech = async ( wordOffset = -start.length const ssml = `${start}${input.text}${endSsml()}` const result = await speakSsmlAsyncPromise(ssml) + if (result.reason === ResultReason.Canceled) { + throw new Error(result.errorDetails) + } return { audioData: Buffer.from(result.audioData), speechMarks, } } catch (error) { - console.error('synthesis error', error) + console.error('synthesis error:', error) throw error } finally { - console.debug('closing synthesizer') audioStream?.end() synthesizer.close() - console.debug('synthesizer closed') + console.log('synthesizer closed') } }