map realistic voice to openai voice in tts-handler

This commit is contained in:
Hongbo Wu
2024-07-04 16:24:35 +08:00
parent 8c2a59eb1f
commit a841f5748e
3 changed files with 74 additions and 117 deletions

View File

@ -13,7 +13,6 @@ import * as jwt from 'jsonwebtoken'
import { AzureTextToSpeech } from './azureTextToSpeech'
import { endSsml, htmlToSpeechFile, startSsml } from './htmlToSsml'
import { OpenAITextToSpeech } from './openaiTextToSpeech'
import { RealisticTextToSpeech } from './realisticTextToSpeech'
import {
SpeechMark,
TextToSpeechInput,
@ -59,11 +58,7 @@ Sentry.GCPFunction.init({
const MAX_CHARACTER_COUNT = 50000
const storage = new Storage()
const textToSpeechHandlers = [
new OpenAITextToSpeech(),
new AzureTextToSpeech(),
new RealisticTextToSpeech(),
]
const textToSpeechHandlers = [new OpenAITextToSpeech(), new AzureTextToSpeech()]
const synthesizeTextToSpeech = async (
input: TextToSpeechInput

View File

@ -1,19 +1,82 @@
import axios from 'axios'
import { stripEmojis } from './htmlToSsml'
import {
TextToSpeech,
TextToSpeechInput,
TextToSpeechOutput,
} from './textToSpeech'
import axios from 'axios'
import { stripEmojis } from './htmlToSsml'
const OPEN_AI_VOICES = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
const OPENAI_VOICE_PREFIX = 'openai-'
const getVoiceId = (name: string | undefined): string | undefined => {
if (!name) {
return undefined
}
if (name.startsWith(OPENAI_VOICE_PREFIX)) {
return name.substring(OPENAI_VOICE_PREFIX.length)
}
// map realistic voice name to openai voice id
const voiceList = [
{
voiceId: 'ErXwobaYiN019PkySvjV',
name: 'echo',
},
{
voiceId: 'pMsXgVXv3BLzUgSXRplE',
name: 'alloy',
},
{
voiceId: 'onwK4e9ZLuTAKqWW03F9',
name: 'onyx',
},
{
voiceId: 'ThT5KcBeYPX3keUQqHPh',
name: 'fable',
},
{
voiceId: 'flq6f7yk4E4fJM5XTYuZ',
name: 'onyx',
},
{
voiceId: 'XrExE9yKIg1WjnnlVkGX',
name: 'shimmer',
},
{
voiceId: '21m00Tcm4TlvDq8ikWAM',
name: 'nova',
},
{
voiceId: 'EXAVITQu4vr4xnSDxMaL',
name: 'alloy',
},
{
voiceId: 'MF3mGyEYCl7XYWbV9V6O',
name: 'shimmer',
},
{
voiceId: 'TxGEqnHWrfWFTfGW9XjX',
name: 'echo',
},
{
voiceId: 'VR6AewLTigWG4xSOukaG',
name: 'nova',
},
{
voiceId: 'pNInz6obpgDQGcFmaJgB',
name: 'fable',
},
]
return voiceList.find((voice) => voice.name === name)?.voiceId
}
export class OpenAITextToSpeech implements TextToSpeech {
synthesizeTextToSpeech = async (
input: TextToSpeechInput
): Promise<TextToSpeechOutput> => {
const apiKey = process.env.OPENAI_API_KEY
const voice = input.voice?.substring('openai-'.length)
const voice = getVoiceId(input.voice)
if (!apiKey) {
throw new Error('API credentials not set')
@ -51,6 +114,12 @@ export class OpenAITextToSpeech implements TextToSpeech {
if (input.voice?.startsWith('openai-')) {
return true
}
// Use OpenAI voice for ultra realistic voice
if (input.isUltraRealisticVoice) {
return true
}
return false
}
}

View File

@ -1,107 +0,0 @@
import {
TextToSpeech,
TextToSpeechInput,
TextToSpeechOutput,
} from './textToSpeech'
import axios from 'axios'
import { stripEmojis } from './htmlToSsml'
const getRealisticVoiceId = (name: string | undefined) => {
const voiceList = [
{
voiceId: 'ErXwobaYiN019PkySvjV',
name: 'Antoni',
},
{
voiceId: 'pMsXgVXv3BLzUgSXRplE',
name: 'Serena',
},
{
voiceId: 'onwK4e9ZLuTAKqWW03F9',
name: 'Daniel',
},
{
voiceId: 'ThT5KcBeYPX3keUQqHPh',
name: 'Dorothy',
},
{
voiceId: 'flq6f7yk4E4fJM5XTYuZ',
name: 'Michael',
},
{
voiceId: 'XrExE9yKIg1WjnnlVkGX',
name: 'Matilda',
},
{
voiceId: '21m00Tcm4TlvDq8ikWAM',
name: 'Rachel',
},
{
voiceId: 'EXAVITQu4vr4xnSDxMaL',
name: 'Bella',
},
{
voiceId: 'MF3mGyEYCl7XYWbV9V6O',
name: 'Elli',
},
{
voiceId: 'TxGEqnHWrfWFTfGW9XjX',
name: 'Josh',
},
{
voiceId: 'VR6AewLTigWG4xSOukaG',
name: 'Arnold',
},
{
voiceId: 'pNInz6obpgDQGcFmaJgB',
name: 'Adam',
},
]
return voiceList.find((voice) => voice.name === name)?.voiceId
}
export class RealisticTextToSpeech implements TextToSpeech {
synthesizeTextToSpeech = async (
input: TextToSpeechInput
): Promise<TextToSpeechOutput> => {
const voiceId = getRealisticVoiceId(input.voice)
const apiKey = process.env.REALISTIC_VOICE_API_KEY
const apiEndpoint = process.env.REALISTIC_VOICE_API_ENDPOINT
if (!apiEndpoint || !apiKey || !voiceId) {
throw new Error('API credentials not set')
}
const HEADERS = {
'xi-api-key': apiKey,
voice_id: voiceId,
'Content-Type': 'application/json',
}
const requestUrl = `${apiEndpoint}${voiceId}`
const response = await axios.post<Buffer>(
requestUrl,
{
text: stripEmojis(input.text),
},
{
headers: HEADERS,
responseType: 'arraybuffer',
}
)
if (response.data.length === 0) {
console.log('No payload returned: ', response)
throw new Error('No payload returned')
}
return {
speechMarks: [],
audioData: response.data,
}
}
use(input: TextToSpeechInput): boolean {
return !!input.isUltraRealisticVoice
}
}