Add openai voices

This commit is contained in:
Jackson Harper
2023-11-08 19:33:36 +08:00
parent 98dd809337
commit e395036e61
5 changed files with 124 additions and 1 deletions

View File

@ -0,0 +1,64 @@
//
// File.swift
//
//
// Created by Jackson Harper on 11/8/23.
//
import SwiftUI
struct CachedAsyncImage<Content>: View where Content: View {
private let url: URL
private let scale: CGFloat
private let transaction: Transaction
private let content: (AsyncImagePhase) -> Content
init(
url: URL,
scale: CGFloat = 1.0,
transaction: Transaction = Transaction(),
@ViewBuilder content: @escaping (AsyncImagePhase) -> Content
) {
self.url = url
self.scale = scale
self.transaction = transaction
self.content = content
}
var body: some View {
if let cached = ImageCache[url] {
// _ = print("cached \(url.absoluteString)")
content(.success(cached))
} else {
// _ = print("request \(url.absoluteString)")
AsyncImage(
url: url,
scale: scale,
transaction: transaction
) { phase in
cacheAndRender(phase: phase)
}
}
}
func cacheAndRender(phase: AsyncImagePhase) -> some View {
if case let .success(image) = phase {
ImageCache[url] = image
}
return content(phase)
}
}
private enum ImageCache {
private static var cache: [URL: Image] = [:]
static subscript(url: URL) -> Image? {
get {
ImageCache.cache[url]
}
set {
ImageCache.cache[url] = newValue
}
}
}

View File

@ -18,7 +18,7 @@ import {
export class AzureTextToSpeech implements TextToSpeech {
use(input: TextToSpeechInput): boolean {
return !input.isUltraRealisticVoice
return !input.isUltraRealisticVoice && !input.isOpenAIVoice
}
synthesizeTextToSpeech = async (

View File

@ -19,6 +19,7 @@ import {
} from './textToSpeech'
import { createClient } from 'redis'
import { RealisticTextToSpeech } from './realisticTextToSpeech'
import { OpenAITextToSpeech } from './openaiTextToSpeech'
// explicitly create the return type of RedisClient
type RedisClient = ReturnType<typeof createClient>
@ -63,6 +64,7 @@ const MAX_CHARACTER_COUNT = 50000
const storage = new Storage()
const textToSpeechHandlers = [
new OpenAITextToSpeech(),
new AzureTextToSpeech(),
new RealisticTextToSpeech(),
]

View File

@ -0,0 +1,56 @@
import {
TextToSpeech,
TextToSpeechInput,
TextToSpeechOutput,
} from './textToSpeech'
import axios from 'axios'
import { stripEmojis } from './htmlToSsml'
const OPEN_AI_VOICES = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
export class OpenAITextToSpeech implements TextToSpeech {
synthesizeTextToSpeech = async (
input: TextToSpeechInput
): Promise<TextToSpeechOutput> => {
const apiKey = process.env.OPENAI_API_KEY
const voice = input.voice?.substring('openai-'.length)
if (!apiKey) {
throw new Error('API credentials not set')
}
const HEADERS = {
Authorization: `Bearer ${apiKey}`,
'Content-Type': 'application/json',
}
const payload = {
model: 'tts-1',
voice: voice,
input: stripEmojis(input.text),
}
const requestUrl = `https://api.openai.com/v1/audio/speech`
const response = await axios.post<Buffer>(requestUrl, payload, {
headers: HEADERS,
responseType: 'arraybuffer',
})
if (response.data.length === 0) {
console.log('No payload returned: ', response)
throw new Error('No payload returned')
}
return {
speechMarks: [],
audioData: response.data,
}
}
use(input: TextToSpeechInput): boolean {
if (input.voice?.startsWith('openai-')) {
return true
}
return false
}
}

View File

@ -7,6 +7,7 @@ export interface TextToSpeechInput {
rate?: string
secondaryVoice?: string
audioStream?: NodeJS.ReadWriteStream
isOpenAIVoice?: boolean
isUltraRealisticVoice?: boolean
}