diff --git a/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift b/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift index e91017126..d48023ab9 100644 --- a/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift +++ b/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift @@ -1,5 +1,4 @@ // swiftlint:disable file_length type_body_length - #if os(iOS) import AVFoundation @@ -39,23 +38,23 @@ public struct DigestAudioItem: AudioItemProperties { public var language: String? public var startIndex: Int = 0 public var startOffset: Double = 0.0 - + public init(digest: DigestResult) { self.digest = digest self.itemID = digest.id self.title = digest.title self.startIndex = 0 self.startOffset = 0 - + self.imageURL = nil - + if let first = digest.speechFiles.first { self.language = first.language self.byline = digest.byline } } } - + // swiftlint:disable all @MainActor public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate { @@ -108,7 +107,7 @@ public struct DigestAudioItem: AudioItemProperties { playbackError = false self.itemAudioProperties = itemAudioProperties startAudio(atIndex: itemAudioProperties.startIndex, andOffset: itemAudioProperties.startOffset) - + EventTracker.track( .audioSessionStart( linkID: itemAudioProperties.itemID, @@ -319,7 +318,7 @@ public struct DigestAudioItem: AudioItemProperties { public func seek(toIdx: Int) { let before = durationBefore(playerIndex: toIdx) let remainder = 0.0 - + // if the foundIdx happens to be the current item, we just set the position if let playerItem = player?.currentItem as? SpeechPlayerItem { if playerItem.speechItem.audioIdx == toIdx { @@ -666,8 +665,10 @@ public struct DigestAudioItem: AudioItemProperties { player = AVQueuePlayer(items: []) if let player = player { observer = player.observe(\.currentItem, options: [.new]) { _, _ in - self.currentAudioIndex = (player.currentItem as? SpeechPlayerItem)?.speechItem.audioIdx ?? 0 - self.updateReadText() + DispatchQueue.main.async { + self.currentAudioIndex = (player.currentItem as? SpeechPlayerItem)?.speechItem.audioIdx ?? 0 + self.updateReadText() + } } } @@ -683,8 +684,8 @@ public struct DigestAudioItem: AudioItemProperties { func synthesizeFrom(start: Int, playWhenReady: Bool, atOffset: Double = 0.0) { if let synthesizer = self.synthesizer, let items = self.synthesizer?.createPlayerItems(from: start) { let prefetchQueue = OperationQueue() - prefetchQueue.maxConcurrentOperationCount = 5 - + prefetchQueue.maxConcurrentOperationCount = 1 + for speechItem in items { let isLast = speechItem.audioIdx == synthesizer.document.utterances.count - 1 let playerItem = SpeechPlayerItem(session: self, prefetchQueue: prefetchQueue, speechItem: speechItem) { diff --git a/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechPlayerItem.swift b/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechPlayerItem.swift index 90a31ff46..456b2e5c4 100644 --- a/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechPlayerItem.swift +++ b/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechPlayerItem.swift @@ -40,12 +40,11 @@ class SpeechPlayerItem: AVPlayerItem { resourceLoaderDelegate.owner = self self.observer = observe(\.status, options: [.new]) { item, _ in - if item.status == .readyToPlay { - let duration = CMTimeGetSeconds(item.duration) - item.session.updateDuration(forItem: item.speechItem, newDuration: duration) - } - if item.status == .failed { - item.session.stopWithError() + DispatchQueue.main.async { + if item.status == .readyToPlay { + let duration = CMTimeGetSeconds(item.duration) + item.session.updateDuration(forItem: item.speechItem, newDuration: duration) + } } } @@ -55,11 +54,29 @@ class SpeechPlayerItem: AVPlayerItem { ) { [weak self] _ in guard let self = self else { return } self.completed() + self.checkPrefetchQueue(prefetchQueue: prefetchQueue) } self.prefetchOperation = PrefetchSpeechItemOperation(speechItem: speechItem) if let prefetchOperation = self.prefetchOperation { prefetchQueue.addOperation(prefetchOperation) + prefetchOperation.completionBlock = { + self.checkPrefetchQueue(prefetchQueue: prefetchQueue) + } + } + } + + func checkPrefetchQueue(prefetchQueue: OperationQueue) { + DispatchQueue.main.async { + if self.speechItem.audioIdx > self.session.currentAudioIndex + 5 { + // prefetch has gotten too far ahead of the audio. Pause the prefetch queue + print("PAUSING PREFETCH QUEUE", self.speechItem.audioIdx, self.session.currentAudioIndex + 10, self.speechItem.text) + prefetchQueue.isSuspended = true + } + if self.speechItem.audioIdx < self.session.currentAudioIndex + 5 { + print("RESUMING PREFETCH QUEUE", self.speechItem.audioIdx, self.session.currentAudioIndex + 5) + prefetchQueue.isSuspended = false + } } } diff --git a/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift b/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift index a48342668..2a723fe45 100644 --- a/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift +++ b/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift @@ -222,9 +222,13 @@ struct SpeechSynthesizer { do { let jsonData = try decoder.decode(SynthesizeResult.self, from: data) as SynthesizeResult - let audioData = Data(fromHexEncodedString: jsonData.audioData)! + var audioData = Data(fromHexEncodedString: jsonData.audioData)! if audioData.count < 1 { - throw BasicError.message(messageText: "Audio data is empty") + if let silence = generateSilentAudioBuffer() { + audioData = silence + } else { + throw BasicError.message(messageText: "Audio data is empty") + } } try audioData.write(to: tempPath) @@ -244,6 +248,34 @@ struct SpeechSynthesizer { throw BasicError.message(messageText: errorMessage) } } + + static func generateSilentAudioBuffer() -> Data? { + let audioFormat = AVAudioFormat(standardFormatWithSampleRate: 44100, channels: 1)! + let frameCount = UInt32(audioFormat.sampleRate * 0.001) // 1 millisecond of frames + guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: frameCount) else { + return nil + } + buffer.frameLength = buffer.frameCapacity + return bufferToData(buffer: buffer) + } + + static func bufferToData(buffer: AVAudioPCMBuffer) -> Data { + let channelCount = Int(buffer.format.channelCount) + let frames = Int(buffer.frameLength) + let channels = UnsafeBufferPointer(start: buffer.floatChannelData, count: channelCount) + + var data = Data() + + for frame in 0.. String { let encoder = JSONEncoder() let explainRequest = ExplainRequest(text: text, libraryItemId: libraryItemId)