Add openai voices

2023-11-08 19:33:36 +08:00
parent 98dd809337
commit e395036e61
5 changed files with 124 additions and 1 deletions
--- a/apple/OmnivoreKit/Sources/Views/Images/File.swift
+++ b/apple/OmnivoreKit/Sources/Views/Images/File.swift
@ -0,0 +1,64 @@
+//
+//  File.swift
+//
+//
+//  Created by Jackson Harper on 11/8/23.
+//
+
+import SwiftUI
+
+struct CachedAsyncImage<Content>: View where Content: View {
+  private let url: URL
+  private let scale: CGFloat
+  private let transaction: Transaction
+  private let content: (AsyncImagePhase) -> Content
+
+  init(
+    url: URL,
+    scale: CGFloat = 1.0,
+    transaction: Transaction = Transaction(),
+    @ViewBuilder content: @escaping (AsyncImagePhase) -> Content
+  ) {
+    self.url = url
+    self.scale = scale
+    self.transaction = transaction
+    self.content = content
+  }
+
+  var body: some View {
+    if let cached = ImageCache[url] {
+      // _ = print("cached \(url.absoluteString)")
+      content(.success(cached))
+    } else {
+      // _ = print("request \(url.absoluteString)")
+      AsyncImage(
+        url: url,
+        scale: scale,
+        transaction: transaction
+      ) { phase in
+        cacheAndRender(phase: phase)
+      }
+    }
+  }
+
+  func cacheAndRender(phase: AsyncImagePhase) -> some View {
+    if case let .success(image) = phase {
+      ImageCache[url] = image
+    }
+
+    return content(phase)
+  }
+}
+
+private enum ImageCache {
+  private static var cache: [URL: Image] = [:]
+
+  static subscript(url: URL) -> Image? {
+    get {
+      ImageCache.cache[url]
+    }
+    set {
+      ImageCache.cache[url] = newValue
+    }
+  }
+}
--- a/packages/text-to-speech/src/azureTextToSpeech.ts
+++ b/packages/text-to-speech/src/azureTextToSpeech.ts
@ -18,7 +18,7 @@ import {

 export class AzureTextToSpeech implements TextToSpeech {
  use(input: TextToSpeechInput): boolean {
-    return !input.isUltraRealisticVoice
+    return !input.isUltraRealisticVoice && !input.isOpenAIVoice
  }

  synthesizeTextToSpeech = async (
--- a/packages/text-to-speech/src/index.ts
+++ b/packages/text-to-speech/src/index.ts
@ -19,6 +19,7 @@ import {
 } from './textToSpeech'
 import { createClient } from 'redis'
 import { RealisticTextToSpeech } from './realisticTextToSpeech'
+import { OpenAITextToSpeech } from './openaiTextToSpeech'

 // explicitly create the return type of RedisClient
 type RedisClient = ReturnType<typeof createClient>
@ -63,6 +64,7 @@ const MAX_CHARACTER_COUNT = 50000
 const storage = new Storage()

 const textToSpeechHandlers = [
+  new OpenAITextToSpeech(),
  new AzureTextToSpeech(),
  new RealisticTextToSpeech(),
 ]
--- a/packages/text-to-speech/src/openaiTextToSpeech.ts
+++ b/packages/text-to-speech/src/openaiTextToSpeech.ts
@ -0,0 +1,56 @@
+import {
+  TextToSpeech,
+  TextToSpeechInput,
+  TextToSpeechOutput,
+} from './textToSpeech'
+import axios from 'axios'
+import { stripEmojis } from './htmlToSsml'
+
+const OPEN_AI_VOICES = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
+
+export class OpenAITextToSpeech implements TextToSpeech {
+  synthesizeTextToSpeech = async (
+    input: TextToSpeechInput
+  ): Promise<TextToSpeechOutput> => {
+    const apiKey = process.env.OPENAI_API_KEY
+    const voice = input.voice?.substring('openai-'.length)
+
+    if (!apiKey) {
+      throw new Error('API credentials not set')
+    }
+
+    const HEADERS = {
+      Authorization: `Bearer ${apiKey}`,
+      'Content-Type': 'application/json',
+    }
+
+    const payload = {
+      model: 'tts-1',
+      voice: voice,
+      input: stripEmojis(input.text),
+    }
+
+    const requestUrl = `https://api.openai.com/v1/audio/speech`
+    const response = await axios.post<Buffer>(requestUrl, payload, {
+      headers: HEADERS,
+      responseType: 'arraybuffer',
+    })
+
+    if (response.data.length === 0) {
+      console.log('No payload returned: ', response)
+      throw new Error('No payload returned')
+    }
+
+    return {
+      speechMarks: [],
+      audioData: response.data,
+    }
+  }
+
+  use(input: TextToSpeechInput): boolean {
+    if (input.voice?.startsWith('openai-')) {
+      return true
+    }
+    return false
+  }
+}
--- a/packages/text-to-speech/src/textToSpeech.ts
+++ b/packages/text-to-speech/src/textToSpeech.ts
@ -7,6 +7,7 @@ export interface TextToSpeechInput {
  rate?: string
  secondaryVoice?: string
  audioStream?: NodeJS.ReadWriteStream
+  isOpenAIVoice?: boolean
  isUltraRealisticVoice?: boolean
 }