diff --git a/apple/OmnivoreKit/Sources/Views/Images/File.swift b/apple/OmnivoreKit/Sources/Views/Images/File.swift new file mode 100644 index 000000000..079b6d9b9 --- /dev/null +++ b/apple/OmnivoreKit/Sources/Views/Images/File.swift @@ -0,0 +1,64 @@ +// +// File.swift +// +// +// Created by Jackson Harper on 11/8/23. +// + +import SwiftUI + +struct CachedAsyncImage: View where Content: View { + private let url: URL + private let scale: CGFloat + private let transaction: Transaction + private let content: (AsyncImagePhase) -> Content + + init( + url: URL, + scale: CGFloat = 1.0, + transaction: Transaction = Transaction(), + @ViewBuilder content: @escaping (AsyncImagePhase) -> Content + ) { + self.url = url + self.scale = scale + self.transaction = transaction + self.content = content + } + + var body: some View { + if let cached = ImageCache[url] { + // _ = print("cached \(url.absoluteString)") + content(.success(cached)) + } else { + // _ = print("request \(url.absoluteString)") + AsyncImage( + url: url, + scale: scale, + transaction: transaction + ) { phase in + cacheAndRender(phase: phase) + } + } + } + + func cacheAndRender(phase: AsyncImagePhase) -> some View { + if case let .success(image) = phase { + ImageCache[url] = image + } + + return content(phase) + } +} + +private enum ImageCache { + private static var cache: [URL: Image] = [:] + + static subscript(url: URL) -> Image? { + get { + ImageCache.cache[url] + } + set { + ImageCache.cache[url] = newValue + } + } +} diff --git a/packages/text-to-speech/src/azureTextToSpeech.ts b/packages/text-to-speech/src/azureTextToSpeech.ts index 1d972ec1c..fe43b826f 100644 --- a/packages/text-to-speech/src/azureTextToSpeech.ts +++ b/packages/text-to-speech/src/azureTextToSpeech.ts @@ -18,7 +18,7 @@ import { export class AzureTextToSpeech implements TextToSpeech { use(input: TextToSpeechInput): boolean { - return !input.isUltraRealisticVoice + return !input.isUltraRealisticVoice && !input.isOpenAIVoice } synthesizeTextToSpeech = async ( diff --git a/packages/text-to-speech/src/index.ts b/packages/text-to-speech/src/index.ts index 992111ef7..9ff7abf4a 100644 --- a/packages/text-to-speech/src/index.ts +++ b/packages/text-to-speech/src/index.ts @@ -19,6 +19,7 @@ import { } from './textToSpeech' import { createClient } from 'redis' import { RealisticTextToSpeech } from './realisticTextToSpeech' +import { OpenAITextToSpeech } from './openaiTextToSpeech' // explicitly create the return type of RedisClient type RedisClient = ReturnType @@ -63,6 +64,7 @@ const MAX_CHARACTER_COUNT = 50000 const storage = new Storage() const textToSpeechHandlers = [ + new OpenAITextToSpeech(), new AzureTextToSpeech(), new RealisticTextToSpeech(), ] diff --git a/packages/text-to-speech/src/openaiTextToSpeech.ts b/packages/text-to-speech/src/openaiTextToSpeech.ts new file mode 100644 index 000000000..0c4a42225 --- /dev/null +++ b/packages/text-to-speech/src/openaiTextToSpeech.ts @@ -0,0 +1,56 @@ +import { + TextToSpeech, + TextToSpeechInput, + TextToSpeechOutput, +} from './textToSpeech' +import axios from 'axios' +import { stripEmojis } from './htmlToSsml' + +const OPEN_AI_VOICES = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer'] + +export class OpenAITextToSpeech implements TextToSpeech { + synthesizeTextToSpeech = async ( + input: TextToSpeechInput + ): Promise => { + const apiKey = process.env.OPENAI_API_KEY + const voice = input.voice?.substring('openai-'.length) + + if (!apiKey) { + throw new Error('API credentials not set') + } + + const HEADERS = { + Authorization: `Bearer ${apiKey}`, + 'Content-Type': 'application/json', + } + + const payload = { + model: 'tts-1', + voice: voice, + input: stripEmojis(input.text), + } + + const requestUrl = `https://api.openai.com/v1/audio/speech` + const response = await axios.post(requestUrl, payload, { + headers: HEADERS, + responseType: 'arraybuffer', + }) + + if (response.data.length === 0) { + console.log('No payload returned: ', response) + throw new Error('No payload returned') + } + + return { + speechMarks: [], + audioData: response.data, + } + } + + use(input: TextToSpeechInput): boolean { + if (input.voice?.startsWith('openai-')) { + return true + } + return false + } +} diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index a446b18b3..ed385a29e 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -7,6 +7,7 @@ export interface TextToSpeechInput { rate?: string secondaryVoice?: string audioStream?: NodeJS.ReadWriteStream + isOpenAIVoice?: boolean isUltraRealisticVoice?: boolean }