map realistic voice to openai voice in tts-handler
This commit is contained in:
@ -13,7 +13,6 @@ import * as jwt from 'jsonwebtoken'
|
||||
import { AzureTextToSpeech } from './azureTextToSpeech'
|
||||
import { endSsml, htmlToSpeechFile, startSsml } from './htmlToSsml'
|
||||
import { OpenAITextToSpeech } from './openaiTextToSpeech'
|
||||
import { RealisticTextToSpeech } from './realisticTextToSpeech'
|
||||
import {
|
||||
SpeechMark,
|
||||
TextToSpeechInput,
|
||||
@ -59,11 +58,7 @@ Sentry.GCPFunction.init({
|
||||
const MAX_CHARACTER_COUNT = 50000
|
||||
const storage = new Storage()
|
||||
|
||||
const textToSpeechHandlers = [
|
||||
new OpenAITextToSpeech(),
|
||||
new AzureTextToSpeech(),
|
||||
new RealisticTextToSpeech(),
|
||||
]
|
||||
const textToSpeechHandlers = [new OpenAITextToSpeech(), new AzureTextToSpeech()]
|
||||
|
||||
const synthesizeTextToSpeech = async (
|
||||
input: TextToSpeechInput
|
||||
|
||||
@ -1,19 +1,82 @@
|
||||
import axios from 'axios'
|
||||
import { stripEmojis } from './htmlToSsml'
|
||||
import {
|
||||
TextToSpeech,
|
||||
TextToSpeechInput,
|
||||
TextToSpeechOutput,
|
||||
} from './textToSpeech'
|
||||
import axios from 'axios'
|
||||
import { stripEmojis } from './htmlToSsml'
|
||||
|
||||
const OPEN_AI_VOICES = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
|
||||
const OPENAI_VOICE_PREFIX = 'openai-'
|
||||
|
||||
const getVoiceId = (name: string | undefined): string | undefined => {
|
||||
if (!name) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
if (name.startsWith(OPENAI_VOICE_PREFIX)) {
|
||||
return name.substring(OPENAI_VOICE_PREFIX.length)
|
||||
}
|
||||
|
||||
// map realistic voice name to openai voice id
|
||||
const voiceList = [
|
||||
{
|
||||
voiceId: 'ErXwobaYiN019PkySvjV',
|
||||
name: 'echo',
|
||||
},
|
||||
{
|
||||
voiceId: 'pMsXgVXv3BLzUgSXRplE',
|
||||
name: 'alloy',
|
||||
},
|
||||
{
|
||||
voiceId: 'onwK4e9ZLuTAKqWW03F9',
|
||||
name: 'onyx',
|
||||
},
|
||||
{
|
||||
voiceId: 'ThT5KcBeYPX3keUQqHPh',
|
||||
name: 'fable',
|
||||
},
|
||||
{
|
||||
voiceId: 'flq6f7yk4E4fJM5XTYuZ',
|
||||
name: 'onyx',
|
||||
},
|
||||
{
|
||||
voiceId: 'XrExE9yKIg1WjnnlVkGX',
|
||||
name: 'shimmer',
|
||||
},
|
||||
{
|
||||
voiceId: '21m00Tcm4TlvDq8ikWAM',
|
||||
name: 'nova',
|
||||
},
|
||||
{
|
||||
voiceId: 'EXAVITQu4vr4xnSDxMaL',
|
||||
name: 'alloy',
|
||||
},
|
||||
{
|
||||
voiceId: 'MF3mGyEYCl7XYWbV9V6O',
|
||||
name: 'shimmer',
|
||||
},
|
||||
{
|
||||
voiceId: 'TxGEqnHWrfWFTfGW9XjX',
|
||||
name: 'echo',
|
||||
},
|
||||
{
|
||||
voiceId: 'VR6AewLTigWG4xSOukaG',
|
||||
name: 'nova',
|
||||
},
|
||||
{
|
||||
voiceId: 'pNInz6obpgDQGcFmaJgB',
|
||||
name: 'fable',
|
||||
},
|
||||
]
|
||||
return voiceList.find((voice) => voice.name === name)?.voiceId
|
||||
}
|
||||
|
||||
export class OpenAITextToSpeech implements TextToSpeech {
|
||||
synthesizeTextToSpeech = async (
|
||||
input: TextToSpeechInput
|
||||
): Promise<TextToSpeechOutput> => {
|
||||
const apiKey = process.env.OPENAI_API_KEY
|
||||
const voice = input.voice?.substring('openai-'.length)
|
||||
const voice = getVoiceId(input.voice)
|
||||
|
||||
if (!apiKey) {
|
||||
throw new Error('API credentials not set')
|
||||
@ -51,6 +114,12 @@ export class OpenAITextToSpeech implements TextToSpeech {
|
||||
if (input.voice?.startsWith('openai-')) {
|
||||
return true
|
||||
}
|
||||
|
||||
// Use OpenAI voice for ultra realistic voice
|
||||
if (input.isUltraRealisticVoice) {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,107 +0,0 @@
|
||||
import {
|
||||
TextToSpeech,
|
||||
TextToSpeechInput,
|
||||
TextToSpeechOutput,
|
||||
} from './textToSpeech'
|
||||
import axios from 'axios'
|
||||
import { stripEmojis } from './htmlToSsml'
|
||||
|
||||
const getRealisticVoiceId = (name: string | undefined) => {
|
||||
const voiceList = [
|
||||
{
|
||||
voiceId: 'ErXwobaYiN019PkySvjV',
|
||||
name: 'Antoni',
|
||||
},
|
||||
{
|
||||
voiceId: 'pMsXgVXv3BLzUgSXRplE',
|
||||
name: 'Serena',
|
||||
},
|
||||
{
|
||||
voiceId: 'onwK4e9ZLuTAKqWW03F9',
|
||||
name: 'Daniel',
|
||||
},
|
||||
{
|
||||
voiceId: 'ThT5KcBeYPX3keUQqHPh',
|
||||
name: 'Dorothy',
|
||||
},
|
||||
{
|
||||
voiceId: 'flq6f7yk4E4fJM5XTYuZ',
|
||||
name: 'Michael',
|
||||
},
|
||||
{
|
||||
voiceId: 'XrExE9yKIg1WjnnlVkGX',
|
||||
name: 'Matilda',
|
||||
},
|
||||
{
|
||||
voiceId: '21m00Tcm4TlvDq8ikWAM',
|
||||
name: 'Rachel',
|
||||
},
|
||||
{
|
||||
voiceId: 'EXAVITQu4vr4xnSDxMaL',
|
||||
name: 'Bella',
|
||||
},
|
||||
{
|
||||
voiceId: 'MF3mGyEYCl7XYWbV9V6O',
|
||||
name: 'Elli',
|
||||
},
|
||||
{
|
||||
voiceId: 'TxGEqnHWrfWFTfGW9XjX',
|
||||
name: 'Josh',
|
||||
},
|
||||
{
|
||||
voiceId: 'VR6AewLTigWG4xSOukaG',
|
||||
name: 'Arnold',
|
||||
},
|
||||
{
|
||||
voiceId: 'pNInz6obpgDQGcFmaJgB',
|
||||
name: 'Adam',
|
||||
},
|
||||
]
|
||||
return voiceList.find((voice) => voice.name === name)?.voiceId
|
||||
}
|
||||
|
||||
export class RealisticTextToSpeech implements TextToSpeech {
|
||||
synthesizeTextToSpeech = async (
|
||||
input: TextToSpeechInput
|
||||
): Promise<TextToSpeechOutput> => {
|
||||
const voiceId = getRealisticVoiceId(input.voice)
|
||||
const apiKey = process.env.REALISTIC_VOICE_API_KEY
|
||||
const apiEndpoint = process.env.REALISTIC_VOICE_API_ENDPOINT
|
||||
|
||||
if (!apiEndpoint || !apiKey || !voiceId) {
|
||||
throw new Error('API credentials not set')
|
||||
}
|
||||
|
||||
const HEADERS = {
|
||||
'xi-api-key': apiKey,
|
||||
voice_id: voiceId,
|
||||
'Content-Type': 'application/json',
|
||||
}
|
||||
|
||||
const requestUrl = `${apiEndpoint}${voiceId}`
|
||||
const response = await axios.post<Buffer>(
|
||||
requestUrl,
|
||||
{
|
||||
text: stripEmojis(input.text),
|
||||
},
|
||||
{
|
||||
headers: HEADERS,
|
||||
responseType: 'arraybuffer',
|
||||
}
|
||||
)
|
||||
|
||||
if (response.data.length === 0) {
|
||||
console.log('No payload returned: ', response)
|
||||
throw new Error('No payload returned')
|
||||
}
|
||||
|
||||
return {
|
||||
speechMarks: [],
|
||||
audioData: response.data,
|
||||
}
|
||||
}
|
||||
|
||||
use(input: TextToSpeechInput): boolean {
|
||||
return !!input.isUltraRealisticVoice
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user