diff --git a/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/ExpandedPlayer.swift b/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/ExpandedPlayer.swift index 39384a6cf..372d8edc2 100644 --- a/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/ExpandedPlayer.swift +++ b/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/ExpandedPlayer.swift @@ -146,7 +146,7 @@ + Text(audioController.unreadText) .font(.textToSpeechRead.leading(.loose)) - .foregroundColor(Color.appGrayText) + .foregroundColor(audioController.useUltraRealisticVoices ? Color.appGrayTextContrast : Color.appGrayText) } } } diff --git a/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/MiniPlayer.swift b/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/MiniPlayer.swift index 3021943de..1747c1e67 100644 --- a/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/MiniPlayer.swift +++ b/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/MiniPlayer.swift @@ -21,7 +21,11 @@ } var isPresented: Bool { - audioController.itemAudioProperties != nil && audioController.state != .stopped + let presented = audioController.itemAudioProperties != nil && audioController.state != .stopped + if !presented { + print("isPresented: ", audioController.itemAudioProperties, audioController.state) + } + return true // presented } var playPauseButtonImage: String { @@ -172,6 +176,9 @@ } } } + }.alert("There was an error playing back your audio.", + isPresented: $audioController.playbackError) { + Button("Dismiss", role: .none) {} } } } diff --git a/apple/OmnivoreKit/Sources/App/Views/Profile/TextToSpeechVoiceSelectionView.swift b/apple/OmnivoreKit/Sources/App/Views/Profile/TextToSpeechVoiceSelectionView.swift index 34549ef02..deddc99dd 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Profile/TextToSpeechVoiceSelectionView.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Profile/TextToSpeechVoiceSelectionView.swift @@ -17,36 +17,39 @@ var body: some View { Group { Form { - if showLanguageChanger { - Section("Language") { - NavigationLink(destination: TextToSpeechLanguageView().navigationTitle("Language")) { - Text(audioController.currentVoiceLanguage.name) + Toggle("Use Ultra Realistic Voices", isOn: $audioController.useUltraRealisticVoices) + .accentColor(Color.green) + + if audioController.useUltraRealisticVoices { + Section { + Text("Ultra realistic voices take longer to generate and do not offer a follow along user interface.") + .multilineTextAlignment(.leading) + } + ultraRealisticVoices + } else { + if showLanguageChanger { + Section("Language") { + NavigationLink(destination: TextToSpeechLanguageView().navigationTitle("Language")) { + Text(audioController.currentVoiceLanguage.name) + } } } + standardVoices } - innerBody } } .navigationTitle("Choose a Voice") } - private var innerBody: some View { + private var standardVoices: some View { ForEach(language.categories, id: \.self) { category in Section(category.rawValue) { ForEach(audioController.voiceList?.filter { $0.category == category } ?? [], id: \.key.self) { voice in HStack { - // Voice samples are not working yet -// Button(action: { -// audioController.playVoiceSample(voice: voice.key) -// }) { -// Image(systemName: "play.circle").font(.appTitleTwo) -// } -// .buttonStyle(PlainButtonStyle()) - Button(action: { audioController.setPreferredVoice(voice.key, forLanguage: language.key) audioController.currentVoice = voice.key - }) { + }, label: { HStack { Text(voice.name) Spacer() @@ -60,12 +63,46 @@ } } .contentShape(Rectangle()) - } - .buttonStyle(PlainButtonStyle()) + }) + .buttonStyle(PlainButtonStyle()) } } } } } + + private var ultraRealisticVoices: some View { + ForEach([VoiceCategory.enUS, VoiceCategory.enCA, VoiceCategory.enUK], id: \.self) { category in + Section(category.rawValue) { + ForEach(audioController.realisticVoiceList?.filter { $0.category == category } ?? [], id: \.key.self) { voice in + voiceRow(for: voice) + } + } + } + } + + func voiceRow(for voice: VoiceItem) -> some View { + HStack { + Button(action: { + audioController.setPreferredVoice(voice.key, forLanguage: language.key) + audioController.currentVoice = voice.key + }, label: { + HStack { + Text(voice.name) + Spacer() + + if voice.selected { + if audioController.isPlaying, audioController.isLoading { + ProgressView() + } else { + Image(systemName: "checkmark") + } + } + } + .contentShape(Rectangle()) + }) + .buttonStyle(PlainButtonStyle()) + } + } } #endif diff --git a/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift b/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift index fdb4a686a..ec0726c62 100644 --- a/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift +++ b/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift @@ -59,6 +59,9 @@ let duration = CMTimeGetSeconds(item.duration) item.session.updateDuration(forItem: item.speechItem, newDuration: duration) } + if item.status == .failed { + item.session.stopWithError() + } } NotificationCenter.default.addObserver( @@ -119,23 +122,35 @@ Task { guard let speechItem = self.owner?.speechItem else { // This probably can't happen, but if it does, just returning should - // let AVPlayer try again. - print("No speech item found: ", self.owner?.speechItem) + DispatchQueue.main.async { + self.processPlaybackError(error: BasicError.message(messageText: "No speech item found.")) + } return } - // TODO: how do we want to propogate this and handle it in the player - let speechData = try? await SpeechSynthesizer.download(speechItem: speechItem, session: self.session) - DispatchQueue.main.async { - if speechData == nil { - self.session = nil - } - if let owner = self.owner, let speechData = speechData { - owner.speechMarks = speechData.speechMarks - } - self.mediaData = speechData?.audioData + do { + let speechData = try await SpeechSynthesizer.download(speechItem: speechItem, session: self.session ?? URLSession.shared) - self.processPendingRequests() + DispatchQueue.main.async { + if speechData == nil { + self.session = nil + self.processPlaybackError(error: BasicError.message(messageText: "Unable to download speech data.")) + return + } + + if let owner = self.owner, let speechData = speechData { + owner.speechMarks = speechData.speechMarks + } + self.mediaData = speechData?.audioData + + self.processPendingRequests() + } + } catch URLError.cancelled { + print("cancelled request error being ignored") + } catch { + DispatchQueue.main.async { + self.processPlaybackError(error: error) + } } } } @@ -158,6 +173,15 @@ _ = requestsFulfilled.map { self.pendingRequests.remove($0) } } + func processPlaybackError(error: Error?) { + let requestsFulfilled = Set(pendingRequests.compactMap { + $0.finishLoading(with: error) + return nil + }) + + _ = requestsFulfilled.map { self.pendingRequests.remove($0) } + } + func fillInContentInformationRequest(_ contentInformationRequest: AVAssetResourceLoadingContentInformationRequest?) { contentInformationRequest?.contentType = UTType.mp3.identifier @@ -205,10 +229,13 @@ @Published public var duration: TimeInterval = 0 @Published public var timeElapsedString: String? @Published public var durationString: String? - @Published public var voiceList: [(name: String, key: String, category: VoiceCategory, selected: Bool)]? + @Published public var voiceList: [VoiceItem]? + @Published public var realisticVoiceList: [VoiceItem]? @Published public var textItems: [String]? + @Published public var playbackError: Bool = false + let dataService: DataService var timer: Timer? @@ -224,6 +251,7 @@ super.init() self.voiceList = generateVoiceList() + self.realisticVoiceList = generateRealisticVoiceList() } deinit { @@ -277,11 +305,25 @@ } } - public func generateVoiceList() -> [(name: String, key: String, category: VoiceCategory, selected: Bool)] { + public func stopWithError() { + stop() + playbackError = true + } + + public func generateVoiceList() -> [VoiceItem] { Voices.Pairs.flatMap { voicePair in [ - (name: voicePair.firstName, key: voicePair.firstKey, category: voicePair.category, selected: voicePair.firstKey == currentVoice), - (name: voicePair.secondName, key: voicePair.secondKey, category: voicePair.category, selected: voicePair.secondKey == currentVoice) + VoiceItem(name: voicePair.firstName, key: voicePair.firstKey, category: voicePair.category, selected: voicePair.firstKey == currentVoice), + VoiceItem(name: voicePair.secondName, key: voicePair.secondKey, category: voicePair.category, selected: voicePair.secondKey == currentVoice) + ] + }.sorted { $0.name.lowercased() < $1.name.lowercased() } + } + + public func generateRealisticVoiceList() -> [VoiceItem] { + Voices.UltraPairs.flatMap { voicePair in + [ + VoiceItem(name: voicePair.firstName, key: voicePair.firstKey, category: voicePair.category, selected: voicePair.firstKey == currentVoice), + VoiceItem(name: voicePair.secondName, key: voicePair.secondKey, category: voicePair.category, selected: voicePair.secondKey == currentVoice) ] }.sorted { $0.name.lowercased() < $1.name.lowercased() } } @@ -419,6 +461,8 @@ @AppStorage(UserDefaultKey.textToSpeechPreloadEnabled.rawValue) public var preloadEnabled = false + @AppStorage(UserDefaultKey.textToSpeechUseUltraRealisticVoices.rawValue) public var useUltraRealisticVoices = false + public var currentVoiceLanguage: VoiceLanguage { Voices.Languages.first(where: { $0.key == currentLanguage }) ?? Voices.English } @@ -458,6 +502,7 @@ set { _currentVoice = newValue voiceList = generateVoiceList() + realisticVoiceList = generateRealisticVoiceList() var currentIdx = 0 var currentOffset = 0.0 diff --git a/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift b/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift index 370d1f8e9..fab83d43c 100644 --- a/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift +++ b/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift @@ -16,6 +16,7 @@ struct UtteranceRequest: Codable { let voice: String let language: String let rate: String + let isUltraRealisticVoice: Bool } struct Utterance: Decodable { @@ -26,10 +27,12 @@ struct Utterance: Decodable { public let wordCount: Double func toSSML(document: SpeechDocument) throws -> Data? { + let usedVoice = voice ?? document.defaultVoice let request = UtteranceRequest(text: text, - voice: voice ?? document.defaultVoice, + voice: usedVoice, language: document.language, - rate: "1.1") + rate: "1.1", + isUltraRealisticVoice: Voices.isUltraRealisticVoice(usedVoice)) return try JSONEncoder().encode(request) } } @@ -120,7 +123,7 @@ struct SpeechSynthesizer { func createPlayerItems(from: Int) -> [SpeechItem] { var result: [SpeechItem] = [] - for idx in from ..< min(7, document.utterances.count) { + for idx in from ..< document.utterances.count { let utterance = document.utterances[idx] let voiceStr = utterance.voice ?? document.defaultVoice let segmentStr = String(format: "%04d", arguments: [idx]) @@ -159,9 +162,31 @@ struct SpeechSynthesizer { return request } + static func downloadData(session: URLSession, request: URLRequest) async throws -> Data { + do { + let result: (Data, URLResponse)? = try await session.data(for: request) + guard let httpResponse = result?.1 as? HTTPURLResponse, 200 ..< 300 ~= httpResponse.statusCode else { + print("error: ", result?.1) + throw BasicError.message(messageText: "audioFetch failed. no response or bad status code.") + } + + guard let data = result?.0 else { + throw BasicError.message(messageText: "audioFetch failed. no data received.") + } + + return data + } catch URLError.cancelled { + print("cancled request error being ignored") + return Data() + } catch { + print("ERROR DOWNLOADING AUDIO DATA", error) + throw error + } + } + static func download(speechItem: SpeechItem, redownloadCached: Bool = false, - session: URLSession? = URLSession.shared) async throws -> SynthesizeData? + session: URLSession = URLSession.shared) async throws -> SynthesizeData? { let decoder = JSONDecoder() @@ -174,16 +199,7 @@ struct SpeechSynthesizer { } } - let request = speechItem.urlRequest - let result: (Data, URLResponse)? = try? await (session ?? URLSession.shared).data(for: request) - guard let httpResponse = result?.1 as? HTTPURLResponse, 200 ..< 300 ~= httpResponse.statusCode else { - print("error: ", result?.1 as Any) - throw BasicError.message(messageText: "audioFetch failed. no response or bad status code.") - } - - guard let data = result?.0 else { - throw BasicError.message(messageText: "audioFetch failed. no data received.") - } + let data = try await downloadData(session: session, request: speechItem.urlRequest) let tempPath = FileManager.default .urls(for: .cachesDirectory, in: .userDomainMask)[0] @@ -204,8 +220,6 @@ struct SpeechSynthesizer { try? FileManager.default.removeItem(at: speechItem.localAudioURL) try FileManager.default.moveItem(at: tempPath, to: speechItem.localAudioURL) - let savedData = try? Data(contentsOf: speechItem.localAudioURL) - let encoder = JSONEncoder() let speechMarksData = try encoder.encode(jsonData.speechMarks) try speechMarksData.write(to: tempSMPath) @@ -214,6 +228,7 @@ struct SpeechSynthesizer { return SynthesizeData(audioData: audioData, speechMarks: jsonData.speechMarks) } catch { + print("ERROR DOWNLOADING SPEECH DATA:", error) let errorMessage = "audioFetch failed. could not write MP3 data to disk" throw BasicError.message(messageText: errorMessage) } @@ -222,12 +237,12 @@ struct SpeechSynthesizer { struct SynthesizeResult: Decodable { let audioData: String - let speechMarks: [SpeechMark] + let speechMarks: [SpeechMark]? } struct SynthesizeData: Decodable { let audioData: Data - let speechMarks: [SpeechMark] + let speechMarks: [SpeechMark]? } extension Data { diff --git a/apple/OmnivoreKit/Sources/Services/AudioSession/Voices.swift b/apple/OmnivoreKit/Sources/Services/AudioSession/Voices.swift index 71c09b185..a314d2b88 100644 --- a/apple/OmnivoreKit/Sources/Services/AudioSession/Voices.swift +++ b/apple/OmnivoreKit/Sources/Services/AudioSession/Voices.swift @@ -14,6 +14,13 @@ public struct VoiceLanguage { public let categories: [VoiceCategory] } +public struct VoiceItem { + public let name: String + public let key: String + public let category: VoiceCategory + public let selected: Bool +} + public enum VoiceCategory: String, CaseIterable { case enUS = "English (US)" case enAU = "English (Australia)" @@ -40,6 +47,12 @@ public struct VoicePair { } public enum Voices { + public static func isUltraRealisticVoice(_ voiceKey: String) -> Bool { + UltraPairs.contains(where: { voice in + voice.firstKey == voiceKey || voice.secondKey == voiceKey + }) + } + public static let English = VoiceLanguage(key: "en", name: "English", defaultVoice: "en-US-ChristopherNeural", @@ -72,4 +85,37 @@ public enum Voices { VoicePair(firstKey: "de-DE-ChristophNeural", secondKey: "de-DE-LouisaNeural", firstName: "Christoph", secondName: "Louisa", language: "de-DE", category: .deDE), VoicePair(firstKey: "ja-JP-NanamiNeural", secondKey: "ja-JP-KeitaNeural", firstName: "Nanami", secondName: "Keita", language: "ja-JP", category: .jaJP) ] + + public static let UltraPairs = [ + VoicePair(firstKey: "Larry", secondKey: "susan", firstName: "Larry", secondName: "Susan", language: "en-US", category: .enUS), + + VoicePair(firstKey: "Jordan", secondKey: "William", firstName: "Jordan", secondName: "William", language: "en-US", category: .enUS), + VoicePair(firstKey: "Adrian", secondKey: "Anthony", firstName: "Adrian", secondName: "Anthony", language: "en-US", category: .enUS), + + VoicePair(firstKey: "Oliver", secondKey: "Arthur", firstName: "Oliver", secondName: "Arthur", language: "en-UK", category: .enUK), + + VoicePair(firstKey: "Daniel", secondKey: "Charlotte", firstName: "Daniel", secondName: "Charlotte", language: "en-CA", category: .enCA), + + VoicePair(firstKey: "Alexander", secondKey: "Aurora", firstName: "Alexander", secondName: "Aurora", language: "en-UK", category: .enUK), + + VoicePair(firstKey: "Axel", secondKey: "Carter", firstName: "Axel", secondName: "Carter", language: "en-US", category: .enUS), + + VoicePair(firstKey: "Ellie", secondKey: "Evelyn", firstName: "Ellie", secondName: "Evelyn", language: "en-US", category: .enUS), + + VoicePair(firstKey: "Frankie", secondKey: "Harrison", firstName: "Frankie", secondName: "Harrison", language: "en-US", category: .enUS), + + VoicePair(firstKey: "Frederick", secondKey: "Hunter", firstName: "Frederick", secondName: "Hunter", language: "en-UK", category: .enUK), + + VoicePair(firstKey: "Lillian", secondKey: "Lottie", firstName: "Lillian", secondName: "Lottie", language: "en-UK", category: .enUK), + + VoicePair(firstKey: "Nolan", secondKey: "Phoebe", firstName: "Nolan", secondName: "Phoebe", language: "en-UK", category: .enUK), + + VoicePair(firstKey: "Daisy", secondKey: "Stella", firstName: "Daisy", secondName: "Stella", language: "en-UK", category: .enUK), + + VoicePair(firstKey: "Maverick", secondKey: "Natalie", firstName: "Maverick", secondName: "Natalie", language: "en-US", category: .enUS), + + VoicePair(firstKey: "Nova", secondKey: "Owen", firstName: "Nova", secondName: "Owen", language: "en-US", category: .enUS) + + // VoicePair(firstKey: "Theodore", secondKey: "Theodore", firstName: "Theodore", secondName: "Theodore", language: "en-US", category: .enUS) + ] } diff --git a/apple/OmnivoreKit/Sources/Utils/UserDefaultKeys.swift b/apple/OmnivoreKit/Sources/Utils/UserDefaultKeys.swift index f61b0742c..7ab00c494 100644 --- a/apple/OmnivoreKit/Sources/Utils/UserDefaultKeys.swift +++ b/apple/OmnivoreKit/Sources/Utils/UserDefaultKeys.swift @@ -17,6 +17,7 @@ public enum UserDefaultKey: String { case textToSpeechPreferredVoice case textToSpeechDefaultLanguage case textToSpeechPreloadEnabled + case textToSpeechUseUltraRealisticVoices case recentSearchTerms case audioPlayerExpanded case themeName