Merge pull request #1144 from omnivore-app/tts-cloud-function
Setup new cloud function for the text to speech service
This commit is contained in:
@ -153,7 +153,14 @@ export function articleRouter() {
|
||||
voice: voice || userPersonalization?.speechVoice || 'en-US-JennyNeural',
|
||||
})
|
||||
// enqueue a task to convert text to speech
|
||||
const taskName = await enqueueTextToSpeech(uid, speech.id)
|
||||
const taskName = await enqueueTextToSpeech(
|
||||
uid,
|
||||
speech.id,
|
||||
page.content,
|
||||
'ssml',
|
||||
speech.voice,
|
||||
env.fileUpload.gcsUploadBucket
|
||||
)
|
||||
logger.info('Start Text to speech task', { taskName })
|
||||
res.status(202).send('Text to speech task started')
|
||||
}
|
||||
|
||||
@ -4,17 +4,14 @@
|
||||
import express from 'express'
|
||||
import cors from 'cors'
|
||||
import { corsConfig } from '../../utils/corsConfig'
|
||||
import { getRepository } from '../../entity/utils'
|
||||
import { getRepository, setClaims } from '../../entity/utils'
|
||||
import { getPageById } from '../../elastic/pages'
|
||||
import { Speech, SpeechState } from '../../entity/speech'
|
||||
import { buildLogger } from '../../utils/logger'
|
||||
import { getClaimsByToken } from '../../utils/auth'
|
||||
import {
|
||||
setSpeechFailure,
|
||||
shouldSynthesize,
|
||||
synthesize,
|
||||
} from '../../services/speech'
|
||||
import { shouldSynthesize, synthesize } from '../../services/speech'
|
||||
import { readPushSubscription } from '../../datalayer/pubsub'
|
||||
import { AppDataSource } from '../../server'
|
||||
|
||||
const logger = buildLogger('app.dispatch')
|
||||
|
||||
@ -79,58 +76,46 @@ export function speechServiceRouter() {
|
||||
router.options('/', cors<express.Request>({ ...corsConfig, maxAge: 600 }))
|
||||
// eslint-disable-next-line @typescript-eslint/no-misused-promises
|
||||
router.post('/', async (req, res) => {
|
||||
logger.info('Synthesize svc request', {
|
||||
logger.info('Updating speech', {
|
||||
// eslint-disable-next-line @typescript-eslint/no-unsafe-assignment
|
||||
body: req.body,
|
||||
})
|
||||
let userId: string
|
||||
const token = req.query.token as string
|
||||
try {
|
||||
if (!(await getClaimsByToken(token))) {
|
||||
const claims = await getClaimsByToken(token)
|
||||
if (!claims) {
|
||||
logger.info('Unauthorized request', { token })
|
||||
return res.status(200).send('UNAUTHORIZED')
|
||||
return res.status(401).send('UNAUTHORIZED')
|
||||
}
|
||||
userId = claims.uid
|
||||
} catch (error) {
|
||||
logger.error('Unauthorized request', { token, error })
|
||||
return res.status(200).send('UNAUTHORIZED')
|
||||
return res.status(401).send('UNAUTHORIZED')
|
||||
}
|
||||
|
||||
const { userId, speechId } = req.body as {
|
||||
userId: string
|
||||
speechId: string
|
||||
}
|
||||
if (!userId || !speechId) {
|
||||
return res.status(200).send('Invalid data')
|
||||
const { speechId, audioFileName, speechMarksFileName, state } =
|
||||
req.body as {
|
||||
speechId: string
|
||||
audioFileName: string
|
||||
speechMarksFileName: string
|
||||
state: SpeechState
|
||||
}
|
||||
if (!speechId) {
|
||||
return res.status(400).send('Invalid data')
|
||||
}
|
||||
|
||||
logger.info(`Create article speech`, {
|
||||
body: {
|
||||
userId,
|
||||
speechId,
|
||||
},
|
||||
labels: {
|
||||
source: 'CreateArticleSpeech',
|
||||
},
|
||||
// set state to completed
|
||||
await AppDataSource.transaction(async (t) => {
|
||||
await setClaims(t, userId)
|
||||
await t.getRepository(Speech).update(speechId, {
|
||||
audioFileName: audioFileName,
|
||||
speechMarksFileName: speechMarksFileName,
|
||||
state,
|
||||
})
|
||||
})
|
||||
const speech = await getRepository(Speech).findOneBy({
|
||||
id: speechId,
|
||||
user: { id: userId },
|
||||
})
|
||||
if (!speech) {
|
||||
return res.status(200).send('Speech not found')
|
||||
}
|
||||
|
||||
const page = await getPageById(speech.elasticPageId)
|
||||
if (!page) {
|
||||
await setSpeechFailure(speech.id)
|
||||
return res.status(200).send('Page not found')
|
||||
}
|
||||
|
||||
try {
|
||||
await synthesize(page, speech)
|
||||
} catch (error) {
|
||||
logger.error(`Error synthesizing article`, { error })
|
||||
res.status(500).send('Error synthesizing article')
|
||||
}
|
||||
res.send('OK')
|
||||
})
|
||||
|
||||
return router
|
||||
|
||||
@ -330,12 +330,19 @@ export const enqueueSyncWithIntegration = async (
|
||||
|
||||
export const enqueueTextToSpeech = async (
|
||||
userId: string,
|
||||
speechId: string
|
||||
speechId: string,
|
||||
text: string,
|
||||
textType: 'text' | 'ssml',
|
||||
voice: string,
|
||||
bucket: string
|
||||
): Promise<string> => {
|
||||
const { GOOGLE_CLOUD_PROJECT } = process.env
|
||||
const payload = {
|
||||
userId,
|
||||
speechId,
|
||||
id: speechId,
|
||||
text,
|
||||
voice,
|
||||
bucket,
|
||||
textType,
|
||||
}
|
||||
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
||||
// @ts-ignore
|
||||
|
||||
11
packages/db/migrations/0095.do.add_rls_to_speech.sql
Executable file
11
packages/db/migrations/0095.do.add_rls_to_speech.sql
Executable file
@ -0,0 +1,11 @@
|
||||
-- Type: DO
|
||||
-- Name: add_rls_to_speech
|
||||
-- Description: Add Row level security to speech table
|
||||
|
||||
BEGIN;
|
||||
|
||||
CREATE POLICY update_speech on omnivore.speech
|
||||
FOR UPDATE TO omnivore_user
|
||||
USING (user_id = omnivore.get_current_user_id());
|
||||
|
||||
COMMIT;
|
||||
9
packages/db/migrations/0095.undo.add_rls_to_speech.sql
Executable file
9
packages/db/migrations/0095.undo.add_rls_to_speech.sql
Executable file
@ -0,0 +1,9 @@
|
||||
-- Type: UNDO
|
||||
-- Name: add_rls_to_speech
|
||||
-- Description: Add Row level security to speech table
|
||||
|
||||
BEGIN;
|
||||
|
||||
DROP POLICY IF EXISTS update_speech ON omnivore.speech;
|
||||
|
||||
COMMIT;
|
||||
4
packages/text-to-speech/.eslintignore
Normal file
4
packages/text-to-speech/.eslintignore
Normal file
@ -0,0 +1,4 @@
|
||||
node_modules/
|
||||
dist/
|
||||
readabilityjs/
|
||||
src/generated/
|
||||
6
packages/text-to-speech/.eslintrc
Normal file
6
packages/text-to-speech/.eslintrc
Normal file
@ -0,0 +1,6 @@
|
||||
{
|
||||
"extends": "../../.eslintrc",
|
||||
"parserOptions": {
|
||||
"project": "tsconfig.json"
|
||||
}
|
||||
}
|
||||
16
packages/text-to-speech/.gcloudignore
Normal file
16
packages/text-to-speech/.gcloudignore
Normal file
@ -0,0 +1,16 @@
|
||||
# This file specifies files that are *not* uploaded to Google Cloud Platform
|
||||
# using gcloud. It follows the same syntax as .gitignore, with the addition of
|
||||
# "#!include" directives (which insert the entries of the given .gitignore-style
|
||||
# file at that point).
|
||||
#
|
||||
# For more information, run:
|
||||
# $ gcloud topic gcloudignore
|
||||
#
|
||||
.gcloudignore
|
||||
# If you would like to upload your .git directory, .gitignore file or files
|
||||
# from your .gitignore file, remove the corresponding line
|
||||
# below:
|
||||
.git
|
||||
.gitignore
|
||||
|
||||
node_modules
|
||||
5
packages/text-to-speech/mocha-config.json
Normal file
5
packages/text-to-speech/mocha-config.json
Normal file
@ -0,0 +1,5 @@
|
||||
{
|
||||
"extension": ["ts"],
|
||||
"spec": "test/**/*.test.ts",
|
||||
"require": "test/babel-register.js"
|
||||
}
|
||||
36
packages/text-to-speech/package.json
Normal file
36
packages/text-to-speech/package.json
Normal file
@ -0,0 +1,36 @@
|
||||
{
|
||||
"name": "@omnivore/text-to-speech-handler",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "build/src/index.js",
|
||||
"types": "build/src/index.d.ts",
|
||||
"files": [
|
||||
"build/src"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"keywords": [],
|
||||
"scripts": {
|
||||
"test": "yarn mocha -r ts-node/register --config mocha-config.json",
|
||||
"lint": "eslint src --ext ts,js,tsx,jsx",
|
||||
"compile": "tsc",
|
||||
"build": "tsc",
|
||||
"start": "functions-framework --source=build/src/ --target=textToSpeechHandler",
|
||||
"dev": "concurrently \"tsc -w\" \"nodemon --watch ./build/ --exec npm run start\"",
|
||||
"gcloud-deploy": "gcloud functions deploy text-to-speech --gen2 --trigger-http --allow-unauthenticated --region=us-west2 --runtime nodejs14",
|
||||
"deploy": "yarn build && yarn gcloud-deploy"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^14.11.2",
|
||||
"eslint-plugin-prettier": "^4.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@google-cloud/functions-framework": "3.1.2",
|
||||
"@google-cloud/storage": "^6.4.1",
|
||||
"@sentry/serverless": "^6.16.1",
|
||||
"axios": "^0.27.2",
|
||||
"dotenv": "^16.0.1",
|
||||
"jsonwebtoken": "^8.5.1",
|
||||
"linkedom": "^0.14.12",
|
||||
"microsoft-cognitiveservices-speech-sdk": "^1.22.0"
|
||||
}
|
||||
}
|
||||
422
packages/text-to-speech/src/index.ts
Normal file
422
packages/text-to-speech/src/index.ts
Normal file
@ -0,0 +1,422 @@
|
||||
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
|
||||
/* eslint-disable @typescript-eslint/no-explicit-any */
|
||||
/* eslint-disable @typescript-eslint/no-unsafe-argument */
|
||||
/* eslint-disable @typescript-eslint/no-unused-vars */
|
||||
|
||||
import * as Sentry from '@sentry/serverless'
|
||||
import { parseHTML } from 'linkedom'
|
||||
import { File, Storage } from '@google-cloud/storage'
|
||||
import {
|
||||
CancellationDetails,
|
||||
CancellationReason,
|
||||
ResultReason,
|
||||
SpeechConfig,
|
||||
SpeechSynthesisOutputFormat,
|
||||
SpeechSynthesisResult,
|
||||
SpeechSynthesizer,
|
||||
} from 'microsoft-cognitiveservices-speech-sdk'
|
||||
import axios from 'axios'
|
||||
import * as jwt from 'jsonwebtoken'
|
||||
import * as dotenv from 'dotenv' // see https://github.com/motdotla/dotenv#how-do-i-use-dotenv-with-import
|
||||
dotenv.config()
|
||||
|
||||
interface TextToSpeechInput {
|
||||
id: string
|
||||
text: string
|
||||
voice?: string
|
||||
languageCode?: string
|
||||
textType?: 'text' | 'ssml'
|
||||
rate?: number
|
||||
volume?: number
|
||||
complimentaryVoice?: string
|
||||
bucket: string
|
||||
}
|
||||
|
||||
interface TextToSpeechOutput {
|
||||
audioFileName: string
|
||||
speechMarksFileName: string
|
||||
}
|
||||
|
||||
interface SpeechMark {
|
||||
time: number
|
||||
start?: number
|
||||
length?: number
|
||||
word: string
|
||||
type: 'word' | 'bookmark'
|
||||
}
|
||||
|
||||
const storage = new Storage()
|
||||
|
||||
const uploadToBucket = async (
|
||||
filePath: string,
|
||||
data: Buffer,
|
||||
bucket: string,
|
||||
options?: { contentType?: string; public?: boolean }
|
||||
): Promise<void> => {
|
||||
await storage.bucket(bucket).file(filePath).save(data, options)
|
||||
}
|
||||
|
||||
const createGCSFile = (bucket: string, filename: string): File => {
|
||||
return storage.bucket(bucket).file(filename)
|
||||
}
|
||||
|
||||
const updateSpeech = async (
|
||||
speechId: string,
|
||||
token: string,
|
||||
state: 'COMPLETED' | 'FAILED',
|
||||
audioFileName?: string,
|
||||
speechMarksFileName?: string
|
||||
): Promise<boolean> => {
|
||||
if (!process.env.REST_BACKEND_ENDPOINT) {
|
||||
throw new Error('backend rest api endpoint not exists')
|
||||
}
|
||||
const response = await axios.post(
|
||||
`${process.env.REST_BACKEND_ENDPOINT}/svc/text-to-speech?token=${token}`,
|
||||
{
|
||||
speechId,
|
||||
audioFileName,
|
||||
speechMarksFileName,
|
||||
state,
|
||||
}
|
||||
)
|
||||
|
||||
return response.status === 200
|
||||
}
|
||||
|
||||
const synthesizeTextToSpeech = async (
|
||||
input: TextToSpeechInput
|
||||
): Promise<TextToSpeechOutput> => {
|
||||
if (!process.env.AZURE_SPEECH_KEY || !process.env.AZURE_SPEECH_REGION) {
|
||||
throw new Error('Azure Speech Key or Region not set')
|
||||
}
|
||||
const audioFileName = `speech/${input.id}.mp3`
|
||||
const audioFile = createGCSFile(input.bucket, audioFileName)
|
||||
const writeStream = audioFile.createWriteStream({
|
||||
resumable: true,
|
||||
})
|
||||
const speechConfig = SpeechConfig.fromSubscription(
|
||||
process.env.AZURE_SPEECH_KEY,
|
||||
process.env.AZURE_SPEECH_REGION
|
||||
)
|
||||
const textType = input.textType || 'text'
|
||||
if (textType === 'text') {
|
||||
speechConfig.speechSynthesisLanguage = input.languageCode || 'en-US'
|
||||
speechConfig.speechSynthesisVoiceName = input.voice || 'en-US-JennyNeural'
|
||||
}
|
||||
speechConfig.speechSynthesisOutputFormat =
|
||||
SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3
|
||||
|
||||
// Create the speech synthesizer.
|
||||
const synthesizer = new SpeechSynthesizer(speechConfig)
|
||||
const speechMarks: SpeechMark[] = []
|
||||
let timeOffset = 0
|
||||
let characterOffset = 0
|
||||
|
||||
synthesizer.synthesizing = function (s, e) {
|
||||
// convert arrayBuffer to stream and write to gcs file
|
||||
writeStream.write(Buffer.from(e.result.audioData))
|
||||
}
|
||||
|
||||
// The event synthesis completed signals that the synthesis is completed.
|
||||
synthesizer.synthesisCompleted = (s, e) => {
|
||||
console.info(
|
||||
`(synthesized) Reason: ${ResultReason[e.result.reason]} Audio length: ${
|
||||
e.result.audioData.byteLength
|
||||
}`
|
||||
)
|
||||
}
|
||||
|
||||
// The synthesis started event signals that the synthesis is started.
|
||||
synthesizer.synthesisStarted = (s, e) => {
|
||||
console.info('(synthesis started)')
|
||||
}
|
||||
|
||||
// The event signals that the service has stopped processing speech.
|
||||
// This can happen when an error is encountered.
|
||||
synthesizer.SynthesisCanceled = (s, e) => {
|
||||
const cancellationDetails = CancellationDetails.fromResult(e.result)
|
||||
let str =
|
||||
'(cancel) Reason: ' + CancellationReason[cancellationDetails.reason]
|
||||
if (cancellationDetails.reason === CancellationReason.Error) {
|
||||
str += ': ' + e.result.errorDetails
|
||||
}
|
||||
console.info(str)
|
||||
}
|
||||
|
||||
// The unit of e.audioOffset is tick (1 tick = 100 nanoseconds), divide by 10,000 to convert to milliseconds.
|
||||
synthesizer.wordBoundary = (s, e) => {
|
||||
speechMarks.push({
|
||||
word: e.text,
|
||||
time: (timeOffset + e.audioOffset) / 10000,
|
||||
start: characterOffset + e.textOffset,
|
||||
length: e.wordLength,
|
||||
type: 'word',
|
||||
})
|
||||
}
|
||||
|
||||
synthesizer.bookmarkReached = (s, e) => {
|
||||
console.debug(
|
||||
`(Bookmark reached), Audio offset: ${
|
||||
e.audioOffset / 10000
|
||||
}ms, bookmark text: ${e.text}`
|
||||
)
|
||||
speechMarks.push({
|
||||
word: e.text,
|
||||
time: (timeOffset + e.audioOffset) / 10000,
|
||||
type: 'bookmark',
|
||||
})
|
||||
}
|
||||
|
||||
const speakTextAsyncPromise = (
|
||||
text: string
|
||||
): Promise<SpeechSynthesisResult> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
synthesizer.speakTextAsync(
|
||||
text,
|
||||
(result) => {
|
||||
resolve(result)
|
||||
},
|
||||
(error) => {
|
||||
reject(error)
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
const speakSsmlAsyncPromise = (
|
||||
text: string
|
||||
): Promise<SpeechSynthesisResult> => {
|
||||
return new Promise((resolve, reject) => {
|
||||
synthesizer.speakSsmlAsync(
|
||||
text,
|
||||
(result) => {
|
||||
resolve(result)
|
||||
},
|
||||
(error) => {
|
||||
reject(error)
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
if (textType === 'text') {
|
||||
// slice the text into chunks of 5,000 characters
|
||||
let currentTextChunk = ''
|
||||
const textChunks = input.text.split('\n')
|
||||
for (let i = 0; i < textChunks.length; i++) {
|
||||
currentTextChunk += textChunks[i] + '\n'
|
||||
if (currentTextChunk.length < 5000 && i < textChunks.length - 1) {
|
||||
continue
|
||||
}
|
||||
console.debug(`synthesizing ${currentTextChunk}`)
|
||||
const result = await speakTextAsyncPromise(currentTextChunk)
|
||||
timeOffset = timeOffset + result.audioDuration
|
||||
characterOffset = characterOffset + currentTextChunk.length
|
||||
currentTextChunk = ''
|
||||
}
|
||||
} else {
|
||||
const document = parseHTML(input.text).document
|
||||
const elements = document.querySelectorAll(
|
||||
'h1, h2, h3, p, ul, ol, blockquote'
|
||||
)
|
||||
// convert html elements to the ssml document
|
||||
for (const e of Array.from(elements)) {
|
||||
const htmlElement = e as HTMLElement
|
||||
if (htmlElement.innerText) {
|
||||
// use complimentary voice for blockquote, hardcoded for now
|
||||
const voice =
|
||||
htmlElement.tagName.toLowerCase() === 'blockquote'
|
||||
? input.complimentaryVoice || 'en-US-AriaNeural'
|
||||
: input.voice
|
||||
const ssml = htmlElementToSsml({
|
||||
htmlElement: e,
|
||||
language: input.languageCode,
|
||||
rate: input.rate,
|
||||
volume: input.volume,
|
||||
voice,
|
||||
})
|
||||
console.debug(`synthesizing ${ssml}`)
|
||||
const result = await speakSsmlAsyncPromise(ssml)
|
||||
// if (result.reason === ResultReason.Canceled) {
|
||||
// synthesizer.close()
|
||||
// throw new Error(result.errorDetails)
|
||||
// }
|
||||
timeOffset = timeOffset + result.audioDuration
|
||||
// characterOffset = characterOffset + htmlElement.innerText.length
|
||||
}
|
||||
}
|
||||
}
|
||||
writeStream.end()
|
||||
synthesizer.close()
|
||||
|
||||
console.debug(`audio file: ${audioFileName}`)
|
||||
|
||||
// upload Speech Marks file to GCS
|
||||
const speechMarksFileName = `speech/${input.id}.json`
|
||||
await uploadToBucket(
|
||||
speechMarksFileName,
|
||||
Buffer.from(JSON.stringify(speechMarks)),
|
||||
input.bucket
|
||||
)
|
||||
|
||||
return {
|
||||
audioFileName,
|
||||
speechMarksFileName,
|
||||
}
|
||||
}
|
||||
|
||||
const htmlElementToSsml = ({
|
||||
htmlElement,
|
||||
language = 'en-US',
|
||||
voice = 'en-US-JennyNeural',
|
||||
rate = 1,
|
||||
volume = 100,
|
||||
}: {
|
||||
htmlElement: Element
|
||||
language?: string
|
||||
voice?: string
|
||||
rate?: number
|
||||
volume?: number
|
||||
}): string => {
|
||||
const replaceElement = (newElement: Element, oldElement: Element) => {
|
||||
const id = oldElement.getAttribute('data-omnivore-anchor-idx')
|
||||
if (id) {
|
||||
const e = htmlElement.querySelector(`[data-omnivore-anchor-idx="${id}"]`)
|
||||
e?.parentNode?.replaceChild(newElement, e)
|
||||
}
|
||||
}
|
||||
|
||||
const appendBookmarkElement = (parent: Element, element: Element) => {
|
||||
const id = element.getAttribute('data-omnivore-anchor-idx')
|
||||
if (id) {
|
||||
const bookMark = ssml.createElement('bookmark')
|
||||
bookMark.setAttribute('mark', `data-omnivore-anchor-idx-${id}`)
|
||||
parent.appendChild(bookMark)
|
||||
}
|
||||
}
|
||||
|
||||
const replaceWithEmphasis = (element: Element, level: string) => {
|
||||
const parent = ssml.createDocumentFragment() as unknown as Element
|
||||
appendBookmarkElement(parent, element)
|
||||
const emphasisElement = ssml.createElement('emphasis')
|
||||
emphasisElement.setAttribute('level', level)
|
||||
emphasisElement.innerHTML = element.innerHTML.trim()
|
||||
parent.appendChild(emphasisElement)
|
||||
replaceElement(parent, element)
|
||||
}
|
||||
|
||||
const replaceWithSentence = (element: Element) => {
|
||||
const parent = ssml.createDocumentFragment() as unknown as Element
|
||||
appendBookmarkElement(parent, element)
|
||||
const sentenceElement = ssml.createElement('s')
|
||||
sentenceElement.innerHTML = element.innerHTML.trim()
|
||||
parent.appendChild(sentenceElement)
|
||||
replaceElement(parent, element)
|
||||
}
|
||||
|
||||
// create new ssml document
|
||||
const ssml = parseHTML('').document
|
||||
const speakElement = ssml.createElement('speak')
|
||||
speakElement.setAttribute('version', '1.0')
|
||||
speakElement.setAttribute('xmlns', 'http://www.w3.org/2001/10/synthesis')
|
||||
speakElement.setAttribute('xml:lang', language)
|
||||
const voiceElement = ssml.createElement('voice')
|
||||
voiceElement.setAttribute('name', voice)
|
||||
speakElement.appendChild(voiceElement)
|
||||
const prosodyElement = ssml.createElement('prosody')
|
||||
prosodyElement.setAttribute('rate', `${rate}`)
|
||||
prosodyElement.setAttribute('volume', volume.toString())
|
||||
voiceElement.appendChild(prosodyElement)
|
||||
// add each paragraph to the ssml document
|
||||
appendBookmarkElement(prosodyElement, htmlElement)
|
||||
// replace emphasis elements with ssml
|
||||
htmlElement.querySelectorAll('*').forEach((e) => {
|
||||
switch (e.tagName.toLowerCase()) {
|
||||
case 's':
|
||||
replaceWithEmphasis(e, 'moderate')
|
||||
break
|
||||
case 'sub':
|
||||
if (e.getAttribute('alias') === null) {
|
||||
replaceWithEmphasis(e, 'moderate')
|
||||
}
|
||||
break
|
||||
case 'i':
|
||||
case 'em':
|
||||
case 'q':
|
||||
case 'blockquote':
|
||||
case 'cite':
|
||||
case 'del':
|
||||
case 'strike':
|
||||
case 'sup':
|
||||
case 'summary':
|
||||
case 'caption':
|
||||
case 'figcaption':
|
||||
replaceWithEmphasis(e, 'moderate')
|
||||
break
|
||||
case 'b':
|
||||
case 'strong':
|
||||
case 'dt':
|
||||
case 'dfn':
|
||||
case 'u':
|
||||
case 'mark':
|
||||
case 'th':
|
||||
case 'title':
|
||||
case 'var':
|
||||
replaceWithEmphasis(e, 'moderate')
|
||||
break
|
||||
case 'li':
|
||||
replaceWithSentence(e)
|
||||
break
|
||||
default: {
|
||||
const parent = ssml.createDocumentFragment() as unknown as Element
|
||||
appendBookmarkElement(parent, e)
|
||||
const text = (e as HTMLElement).innerText.trim()
|
||||
const textElement = ssml.createTextNode(text)
|
||||
parent.appendChild(textElement)
|
||||
replaceElement(parent, e)
|
||||
}
|
||||
}
|
||||
})
|
||||
prosodyElement.appendChild(htmlElement)
|
||||
|
||||
return speakElement.outerHTML.replace(/ |\n/g, '')
|
||||
}
|
||||
|
||||
export const textToSpeechHandler = Sentry.GCPFunction.wrapHttpFunction(
|
||||
async (req, res) => {
|
||||
console.debug('New text to speech request', req)
|
||||
const token = req.query.token as string
|
||||
if (!process.env.JWT_SECRET) {
|
||||
console.error('JWT_SECRET not exists')
|
||||
return res.status(500).send('JWT_SECRET not exists')
|
||||
}
|
||||
try {
|
||||
jwt.verify(token, process.env.JWT_SECRET)
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
return res.status(200).send('UNAUTHENTICATED')
|
||||
}
|
||||
const input = req.body as TextToSpeechInput
|
||||
try {
|
||||
const { audioFileName, speechMarksFileName } =
|
||||
await synthesizeTextToSpeech(input)
|
||||
const updated = await updateSpeech(
|
||||
input.id,
|
||||
token,
|
||||
'COMPLETED',
|
||||
audioFileName,
|
||||
speechMarksFileName
|
||||
)
|
||||
|
||||
if (!updated) {
|
||||
return res.status(500).send('Failed to update speech')
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(e)
|
||||
await updateSpeech(input.id, token, 'FAILED')
|
||||
return res.status(500).send('Failed to synthesize')
|
||||
}
|
||||
|
||||
res.send('OK')
|
||||
}
|
||||
)
|
||||
3
packages/text-to-speech/test/babel-register.js
Normal file
3
packages/text-to-speech/test/babel-register.js
Normal file
@ -0,0 +1,3 @@
|
||||
const register = require('@babel/register').default
|
||||
|
||||
register({ extensions: ['.ts', '.tsx', '.js', '.jsx'] })
|
||||
13
packages/text-to-speech/test/stub.test.ts
Normal file
13
packages/text-to-speech/test/stub.test.ts
Normal file
@ -0,0 +1,13 @@
|
||||
import 'mocha'
|
||||
import * as chai from 'chai'
|
||||
import { expect } from 'chai'
|
||||
import 'chai/register-should'
|
||||
import chaiString from 'chai-string'
|
||||
|
||||
chai.use(chaiString)
|
||||
|
||||
describe('Stub test', () => {
|
||||
it('should pass', () => {
|
||||
expect(true).to.be.true
|
||||
})
|
||||
})
|
||||
9
packages/text-to-speech/tsconfig.json
Normal file
9
packages/text-to-speech/tsconfig.json
Normal file
@ -0,0 +1,9 @@
|
||||
{
|
||||
"extends": "@tsconfig/node14/tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "build",
|
||||
"rootDir": ".",
|
||||
"lib": ["dom"]
|
||||
},
|
||||
"include": ["src", "test"]
|
||||
}
|
||||
68
yarn.lock
68
yarn.lock
@ -2458,7 +2458,7 @@
|
||||
resolved "https://registry.yarnpkg.com/@google-cloud/opentelemetry-resource-util/-/opentelemetry-resource-util-1.1.0.tgz#0bd1fe708ba27288f6efc9712fbd3705fd325540"
|
||||
integrity sha512-AXfQiqIxeespEYcRNaotC05ddiy2Vgk2yqY73b7Hl1UoJ75Gt4kSRcswrVn18eoDI0YQkSTBh7Ye9ugfFLN5HA==
|
||||
|
||||
"@google-cloud/paginator@^3.0.0", "@google-cloud/paginator@^3.0.6":
|
||||
"@google-cloud/paginator@^3.0.0", "@google-cloud/paginator@^3.0.6", "@google-cloud/paginator@^3.0.7":
|
||||
version "3.0.7"
|
||||
resolved "https://registry.yarnpkg.com/@google-cloud/paginator/-/paginator-3.0.7.tgz#fb6f8e24ec841f99defaebf62c75c2e744dd419b"
|
||||
integrity sha512-jJNutk0arIQhmpUUQJPJErsojqo834KcyB6X7a1mxuic8i1tKXxde8E69IZxNZawRIlZdIK2QY4WALvlK5MzYQ==
|
||||
@ -2548,6 +2548,30 @@
|
||||
stream-events "^1.0.4"
|
||||
xdg-basedir "^4.0.0"
|
||||
|
||||
"@google-cloud/storage@^6.4.1":
|
||||
version "6.4.1"
|
||||
resolved "https://registry.yarnpkg.com/@google-cloud/storage/-/storage-6.4.1.tgz#83334150d4e224cb48691de4d7f9c38e143a0970"
|
||||
integrity sha512-lAddmRJ8tvxPykUqJfONBQA5XGwGk0vut1POXublc64+nCdB5aQMxwuBMf7J1zubx19QGpYPQwW6wR7YTWrvLw==
|
||||
dependencies:
|
||||
"@google-cloud/paginator" "^3.0.7"
|
||||
"@google-cloud/projectify" "^3.0.0"
|
||||
"@google-cloud/promisify" "^3.0.0"
|
||||
abort-controller "^3.0.0"
|
||||
arrify "^2.0.0"
|
||||
async-retry "^1.3.3"
|
||||
compressible "^2.0.12"
|
||||
duplexify "^4.0.0"
|
||||
ent "^2.2.0"
|
||||
extend "^3.0.2"
|
||||
gaxios "^5.0.0"
|
||||
google-auth-library "^8.0.1"
|
||||
mime "^3.0.0"
|
||||
mime-types "^2.0.8"
|
||||
p-limit "^3.0.1"
|
||||
retry-request "^5.0.0"
|
||||
teeny-request "^8.0.0"
|
||||
uuid "^8.0.0"
|
||||
|
||||
"@google-cloud/tasks@^2.3.0":
|
||||
version "2.5.0"
|
||||
resolved "https://registry.yarnpkg.com/@google-cloud/tasks/-/tasks-2.5.0.tgz#e6c2598038001550c408845e91570d176c18a25a"
|
||||
@ -12450,6 +12474,11 @@ dotenv@^16.0.0:
|
||||
resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-16.0.0.tgz#c619001253be89ebb638d027b609c75c26e47411"
|
||||
integrity sha512-qD9WU0MPM4SWLPJy/r2Be+2WgQj8plChsyrCNQzW/0WjvcJQiKQJ9mH3ZgB3fxbUUxgc/11ZJ0Fi5KiimWGz2Q==
|
||||
|
||||
dotenv@^16.0.1:
|
||||
version "16.0.1"
|
||||
resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-16.0.1.tgz#8f8f9d94876c35dac989876a5d3a82a267fdce1d"
|
||||
integrity sha512-1K6hR6wtk2FviQ4kEiSjFiH5rpzEVi8WW0x96aztHVMhEspNpc4DVOUTEHtEva5VThQ8IaBX1Pe4gSzpVVUsKQ==
|
||||
|
||||
dotenv@^8.0.0, dotenv@^8.2.0:
|
||||
version "8.6.0"
|
||||
resolved "https://registry.yarnpkg.com/dotenv/-/dotenv-8.6.0.tgz#061af664d19f7f4d8fc6e4ff9b584ce237adcb8b"
|
||||
@ -14122,7 +14151,7 @@ gaxios@^4.0.0:
|
||||
is-stream "^2.0.0"
|
||||
node-fetch "^2.3.0"
|
||||
|
||||
gaxios@^5.0.0:
|
||||
gaxios@^5.0.0, gaxios@^5.0.1:
|
||||
version "5.0.1"
|
||||
resolved "https://registry.yarnpkg.com/gaxios/-/gaxios-5.0.1.tgz#50fc76a2d04bc1700ed8c3ff1561e52255dfc6e0"
|
||||
integrity sha512-keK47BGKHyyOVQxgcUaSaFvr3ehZYAlvhvpHXy0YB2itzZef+GqZR8TBsfVRWghdwlKrYsn+8L8i3eblF7Oviw==
|
||||
@ -14512,6 +14541,21 @@ google-auth-library@^7.0.0, google-auth-library@^7.6.1, google-auth-library@^7.9
|
||||
jws "^4.0.0"
|
||||
lru-cache "^6.0.0"
|
||||
|
||||
google-auth-library@^8.0.1:
|
||||
version "8.4.0"
|
||||
resolved "https://registry.yarnpkg.com/google-auth-library/-/google-auth-library-8.4.0.tgz#3a5414344bb313ee64ceeef1f7e5162cc1fdf04b"
|
||||
integrity sha512-cg/usxyQEmq4PPDBQRt+kGIrfL3k+mOrAoS9Xv1hitQL66AoY7iWvRBcYo3Rb0w4V1t9e/GqW2/D4honlAtMDg==
|
||||
dependencies:
|
||||
arrify "^2.0.0"
|
||||
base64-js "^1.3.0"
|
||||
ecdsa-sig-formatter "^1.0.11"
|
||||
fast-text-encoding "^1.0.0"
|
||||
gaxios "^5.0.0"
|
||||
gcp-metadata "^5.0.0"
|
||||
gtoken "^6.1.0"
|
||||
jws "^4.0.0"
|
||||
lru-cache "^6.0.0"
|
||||
|
||||
google-auth-library@^8.0.2:
|
||||
version "8.1.0"
|
||||
resolved "https://registry.yarnpkg.com/google-auth-library/-/google-auth-library-8.1.0.tgz#879e8d2e90a9d47e6eab32fd1d5fd9ed52d7d441"
|
||||
@ -14744,6 +14788,15 @@ gtoken@^6.0.0:
|
||||
google-p12-pem "^4.0.0"
|
||||
jws "^4.0.0"
|
||||
|
||||
gtoken@^6.1.0:
|
||||
version "6.1.1"
|
||||
resolved "https://registry.yarnpkg.com/gtoken/-/gtoken-6.1.1.tgz#29ebf3e6893719176d180f5694f1cad525ce3c04"
|
||||
integrity sha512-HPM4VzzPEGxjQ7T2xLrdSYBs+h1c0yHAUiN+8RHPDoiZbndlpg9Sx3SjWcrTt9+N3FHsSABEpjvdQVan5AAuZQ==
|
||||
dependencies:
|
||||
gaxios "^5.0.1"
|
||||
google-p12-pem "^4.0.0"
|
||||
jws "^4.0.0"
|
||||
|
||||
gzip-size@^6.0.0:
|
||||
version "6.0.0"
|
||||
resolved "https://registry.yarnpkg.com/gzip-size/-/gzip-size-6.0.0.tgz#065367fd50c239c0671cbcbad5be3e2eeb10e462"
|
||||
@ -17490,6 +17543,17 @@ lines-and-columns@^1.1.6:
|
||||
resolved "https://registry.yarnpkg.com/lines-and-columns/-/lines-and-columns-1.1.6.tgz#1c00c743b433cd0a4e80758f7b64a57440d9ff00"
|
||||
integrity sha1-HADHQ7QzzQpOgHWPe2SldEDZ/wA=
|
||||
|
||||
linkedom@^0.14.12:
|
||||
version "0.14.12"
|
||||
resolved "https://registry.yarnpkg.com/linkedom/-/linkedom-0.14.12.tgz#3b19442e41de33a9ef9b035ccdd97bf5b66c77e1"
|
||||
integrity sha512-8uw8LZifCwyWeVWr80T79sQTMmNXt4Da7oN5yH5gTXRqQM+TuZWJyBqRMcIp32zx/f8anHNHyil9Avw9y76ziQ==
|
||||
dependencies:
|
||||
css-select "^5.1.0"
|
||||
cssom "^0.5.0"
|
||||
html-escaper "^3.0.3"
|
||||
htmlparser2 "^8.0.1"
|
||||
uhyphen "^0.1.0"
|
||||
|
||||
linkedom@^0.14.9:
|
||||
version "0.14.9"
|
||||
resolved "https://registry.yarnpkg.com/linkedom/-/linkedom-0.14.9.tgz#34c6f15eddc809406f42d8ee48cd30b0222eccb0"
|
||||
|
||||
Reference in New Issue
Block a user