Merge pull request #1203 from omnivore-app/feature/cache-tts-audio

feature/cache tts audio
This commit is contained in:
Hongbo Wu
2022-09-21 12:24:15 +08:00
committed by GitHub
7 changed files with 148 additions and 22 deletions

View File

@ -66,6 +66,17 @@ services:
ports:
- "5601:5601"
redis:
image: "redis:6.2.7"
container_name: "omnivore-redis"
healthcheck:
test: "exit 0"
interval: 2s
timeout: 12s
retries: 3
ports:
- "6379:6379"
api:
build:
context: .
@ -100,6 +111,8 @@ services:
condition: service_completed_successfully
elastic:
condition: service_healthy
redis:
condition: service_healthy
web:
build:

View File

@ -76,6 +76,7 @@
"pg": "^8.3.3",
"postgrator": "^4.2.0",
"private-ip": "^2.3.3",
"redis": "^4.3.1",
"sanitize-html": "^2.3.2",
"search-query-parser": "^1.6.0",
"snake-case": "^3.0.3",

View File

@ -44,6 +44,7 @@ export type SSMLOptions = {
const DEFAULT_LANGUAGE = 'en-US'
const DEFAULT_VOICE = 'en-US-JennyNeural'
const DEFAULT_SECONDARY_VOICE = 'en-US-GuyNeural'
const DEFAULT_RATE = '1.0'
const ANCHOR_ELEMENTS_BLOCKED_ATTRIBUTES = [
@ -190,13 +191,11 @@ function emitElement(
export const startSsml = (options: SSMLOptions, element?: Element): string => {
const voice =
element?.nodeName === 'BLOCKQUOTE'
? options.secondaryVoice
: options.primaryVoice
? options.secondaryVoice ?? DEFAULT_SECONDARY_VOICE
: options.primaryVoice ?? DEFAULT_VOICE
return `<speak xmlns="http://www.w3.org/2001/10/synthesis" version="1.0" xml:lang="${
options.language || DEFAULT_LANGUAGE
}"><voice name="${voice || DEFAULT_VOICE}"><prosody rate="${
options.rate || DEFAULT_RATE
}">`
}"><voice name="${voice}"><prosody rate="${options.rate || DEFAULT_RATE}">`
}
export const endSsml = (): string => {

View File

@ -7,9 +7,15 @@ import * as Sentry from '@sentry/serverless'
import axios from 'axios'
import * as jwt from 'jsonwebtoken'
import * as dotenv from 'dotenv' // see https://github.com/motdotla/dotenv#how-do-i-use-dotenv-with-import
import { synthesizeTextToSpeech, TextToSpeechInput } from './textToSpeech'
import {
SpeechMark,
synthesizeTextToSpeech,
TextToSpeechInput,
} from './textToSpeech'
import { File, Storage } from '@google-cloud/storage'
import { htmlToSpeechFile } from './htmlToSsml'
import { endSsml, htmlToSpeechFile, startSsml } from './htmlToSsml'
import crypto from 'crypto'
import { createRedisClient } from './redis'
interface UtteranceInput {
voice?: string
@ -29,6 +35,11 @@ interface HTMLInput {
bucket: string
}
interface CacheResult {
audioDataString: string
speechMarks: SpeechMark[]
}
dotenv.config()
Sentry.GCPFunction.init({
dsn: process.env.SENTRY_DSN,
@ -160,17 +171,56 @@ export const textToSpeechStreamingHandler = Sentry.GCPFunction.wrapHttpFunction(
try {
const utteranceInput = req.body as UtteranceInput
const ssmlOptions = {
primaryVoice: utteranceInput.voice,
secondaryVoice: utteranceInput.voice,
language: utteranceInput.language,
rate: utteranceInput.rate,
}
// for utterance, assemble the ssml and pass it through
const ssml = `${startSsml(ssmlOptions)}${utteranceInput.text}${endSsml()}`
// hash ssml to get the cache key
const cacheKey = crypto.createHash('md5').update(ssml).digest('hex')
const redisClient = await createRedisClient()
// find audio data in cache
const cacheResult = await redisClient.get(cacheKey)
if (cacheResult) {
console.log('Cache hit')
const { audioDataString, speechMarks }: CacheResult =
JSON.parse(cacheResult)
res.send({
idx: utteranceInput.idx,
audioData: audioDataString,
speechMarks,
})
return
}
console.log('Cache miss')
// synthesize text to speech if cache miss
const input: TextToSpeechInput = {
...utteranceInput,
textType: 'utterance',
textType: 'ssml',
text: ssml,
}
const { audioData, speechMarks } = await synthesizeTextToSpeech(input)
if (!audioData) {
return res.status(500).send({ errorCode: 'SYNTHESIZER_ERROR' })
}
const audioDataString = audioData.toString('hex')
// save audio data to cache for 1 hour
await redisClient.set(
cacheKey,
JSON.stringify({ audioDataString, speechMarks }),
{
EX: 3600, // in seconds
NX: true,
}
)
console.log('Cache saved')
res.send({
idx: utteranceInput.idx,
audioData: audioData.toString('hex'),
audioData: audioDataString,
speechMarks,
})
} catch (e) {

View File

@ -0,0 +1,11 @@
import { createClient } from 'redis'
export const createRedisClient = async () => {
const redisClient = createClient({ url: process.env.REDIS_URL })
redisClient.on('error', (err) => console.error('Redis Client Error', err))
await redisClient.connect()
return redisClient
}

View File

@ -7,13 +7,13 @@ import {
SpeechSynthesisResult,
SpeechSynthesizer,
} from 'microsoft-cognitiveservices-speech-sdk'
import { endSsml, htmlToSsmlItems, ssmlItemText, startSsml } from './htmlToSsml'
import { htmlToSsmlItems, ssmlItemText } from './htmlToSsml'
export interface TextToSpeechInput {
text: string
voice?: string
language?: string
textType?: 'html' | 'utterance'
textType?: 'html' | 'ssml'
rate?: string
secondaryVoice?: string
audioStream?: NodeJS.ReadWriteStream
@ -51,7 +51,7 @@ export const synthesizeTextToSpeech = async (
const synthesizer = new SpeechSynthesizer(speechConfig)
const speechMarks: SpeechMark[] = []
let timeOffset = 0
let wordOffset = 0
const wordOffset = 0
synthesizer.synthesizing = function (s, e) {
// convert arrayBuffer to stream and write to stream
@ -137,11 +137,7 @@ export const synthesizeTextToSpeech = async (
speechMarks,
}
}
// for utterance, just assemble the ssml and pass it through
const start = startSsml(ssmlOptions)
wordOffset = -start.length
const ssml = `${start}${input.text}${endSsml()}`
const result = await speakSsmlAsyncPromise(ssml)
const result = await speakSsmlAsyncPromise(input.text)
if (result.reason === ResultReason.Canceled) {
throw new Error(result.errorDetails)
}

View File

@ -5022,6 +5022,40 @@
tiny-warning "^1.0.3"
tslib "^2.3.0"
"@redis/bloom@1.0.2":
version "1.0.2"
resolved "https://registry.yarnpkg.com/@redis/bloom/-/bloom-1.0.2.tgz#42b82ec399a92db05e29fffcdfd9235a5fc15cdf"
integrity sha512-EBw7Ag1hPgFzdznK2PBblc1kdlj5B5Cw3XwI9/oG7tSn85/HKy3X9xHy/8tm/eNXJYHLXHJL/pkwBpFMVVefkw==
"@redis/client@1.3.0":
version "1.3.0"
resolved "https://registry.yarnpkg.com/@redis/client/-/client-1.3.0.tgz#c62ccd707f16370a2dc2f9e158a28b7da049fa77"
integrity sha512-XCFV60nloXAefDsPnYMjHGtvbtHR8fV5Om8cQ0JYqTNbWcQo/4AryzJ2luRj4blveWazRK/j40gES8M7Cp6cfQ==
dependencies:
cluster-key-slot "1.1.0"
generic-pool "3.8.2"
yallist "4.0.0"
"@redis/graph@1.0.1":
version "1.0.1"
resolved "https://registry.yarnpkg.com/@redis/graph/-/graph-1.0.1.tgz#eabc58ba99cd70d0c907169c02b55497e4ec8a99"
integrity sha512-oDE4myMCJOCVKYMygEMWuriBgqlS5FqdWerikMoJxzmmTUErnTRRgmIDa2VcgytACZMFqpAOWDzops4DOlnkfQ==
"@redis/json@1.0.4":
version "1.0.4"
resolved "https://registry.yarnpkg.com/@redis/json/-/json-1.0.4.tgz#f372b5f93324e6ffb7f16aadcbcb4e5c3d39bda1"
integrity sha512-LUZE2Gdrhg0Rx7AN+cZkb1e6HjoSKaeeW8rYnt89Tly13GBI5eP4CwDVr+MY8BAYfCg4/N15OUrtLoona9uSgw==
"@redis/search@1.1.0":
version "1.1.0"
resolved "https://registry.yarnpkg.com/@redis/search/-/search-1.1.0.tgz#7abb18d431f27ceafe6bcb4dd83a3fa67e9ab4df"
integrity sha512-NyFZEVnxIJEybpy+YskjgOJRNsfTYqaPbK/Buv6W2kmFNaRk85JiqjJZA5QkRmWvGbyQYwoO5QfDi2wHskKrQQ==
"@redis/time-series@1.0.3":
version "1.0.3"
resolved "https://registry.yarnpkg.com/@redis/time-series/-/time-series-1.0.3.tgz#4cfca8e564228c0bddcdf4418cba60c20b224ac4"
integrity sha512-OFp0q4SGrTH0Mruf6oFsHGea58u8vS/iI5+NpYdicaM+7BgqBZH8FFvNZ8rYYLrUO/QRqMq72NpXmxLVNcdmjA==
"@rushstack/eslint-patch@^1.0.8":
version "1.1.0"
resolved "https://registry.yarnpkg.com/@rushstack/eslint-patch/-/eslint-patch-1.1.0.tgz#7f698254aadf921e48dda8c0a6b304026b8a9323"
@ -10957,6 +10991,11 @@ clsx@^1.1.1:
resolved "https://registry.yarnpkg.com/clsx/-/clsx-1.1.1.tgz#98b3134f9abbdf23b2663491ace13c5c03a73188"
integrity sha512-6/bPho624p3S2pMyvP5kKBPXnI3ufHLObBFCfgx+LkeR5lg2XYy2hqZqUf45ypD8COn2bhgGJSUE+l5dhNBieA==
cluster-key-slot@1.1.0:
version "1.1.0"
resolved "https://registry.yarnpkg.com/cluster-key-slot/-/cluster-key-slot-1.1.0.tgz#30474b2a981fb12172695833052bc0d01336d10d"
integrity sha512-2Nii8p3RwAPiFwsnZvukotvow2rIHM+yQ6ZcBXGHdniadkYGZYiGmkHJIbZPIV9nfv7m/U1IPMVVcAhoWFeklw==
cmd-shim@^4.1.0:
version "4.1.0"
resolved "https://registry.yarnpkg.com/cmd-shim/-/cmd-shim-4.1.0.tgz#b3a904a6743e9fede4148c6f3800bf2a08135bdd"
@ -14219,6 +14258,11 @@ gcp-metadata@^5.0.0:
gaxios "^5.0.0"
json-bigint "^1.0.0"
generic-pool@3.8.2:
version "3.8.2"
resolved "https://registry.yarnpkg.com/generic-pool/-/generic-pool-3.8.2.tgz#aab4f280adb522fdfbdc5e5b64d718d3683f04e9"
integrity sha512-nGToKy6p3PAbYQ7p1UlWl6vSPwfwU6TMSWK7TTu+WUY4ZjyZQGniGGt2oNVvyNSpyZYSB43zMXVLcBm08MTMkg==
gensync@^1.0.0-beta.1, gensync@^1.0.0-beta.2:
version "1.0.0-beta.2"
resolved "https://registry.yarnpkg.com/gensync/-/gensync-1.0.0-beta.2.tgz#32a6ee76c3d7f52d46b2b1ae5d93fea8580a25e0"
@ -21734,6 +21778,18 @@ redent@^3.0.0:
indent-string "^4.0.0"
strip-indent "^3.0.0"
redis@^4.3.1:
version "4.3.1"
resolved "https://registry.yarnpkg.com/redis/-/redis-4.3.1.tgz#290532a0c22221e05e991162ac4dca1e1b2ff6da"
integrity sha512-cM7yFU5CA6zyCF7N/+SSTcSJQSRMEKN0k0Whhu6J7n9mmXRoXugfWDBo5iOzGwABmsWKSwGPTU5J4Bxbl+0mrA==
dependencies:
"@redis/bloom" "1.0.2"
"@redis/client" "1.3.0"
"@redis/graph" "1.0.1"
"@redis/json" "1.0.4"
"@redis/search" "1.1.0"
"@redis/time-series" "1.0.3"
reflect-metadata@^0.1.13:
version "0.1.13"
resolved "https://registry.yarnpkg.com/reflect-metadata/-/reflect-metadata-0.1.13.tgz#67ae3ca57c972a2aa1642b10fe363fe32d49dc08"
@ -25685,6 +25741,11 @@ y18n@^5.0.5:
resolved "https://registry.yarnpkg.com/y18n/-/y18n-5.0.8.tgz#7f4934d0f7ca8c56f95314939ddcd2dd91ce1d55"
integrity sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==
yallist@4.0.0, yallist@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72"
integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==
yallist@^2.0.0, yallist@^2.1.2:
version "2.1.2"
resolved "https://registry.yarnpkg.com/yallist/-/yallist-2.1.2.tgz#1c11f9218f076089a47dd512f93c6699a6a81d52"
@ -25695,11 +25756,6 @@ yallist@^3.0.0, yallist@^3.0.2, yallist@^3.1.1:
resolved "https://registry.yarnpkg.com/yallist/-/yallist-3.1.1.tgz#dbb7daf9bfd8bac9ab45ebf602b8cbad0d5d08fd"
integrity sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==
yallist@^4.0.0:
version "4.0.0"
resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72"
integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==
yaml-ast-parser@^0.0.43:
version "0.0.43"
resolved "https://registry.yarnpkg.com/yaml-ast-parser/-/yaml-ast-parser-0.0.43.tgz#e8a23e6fb4c38076ab92995c5dca33f3d3d7c9bb"