Merge pull request #3878 from omnivore-app/fix/ios-audio-reliability

Add prefetch queue dampening on audio, better handle segments with empty audio data
This commit is contained in:
Jackson Harper
2024-05-02 12:28:04 +08:00
committed by GitHub
4 changed files with 70 additions and 20 deletions

View File

@ -1,5 +1,4 @@
// swiftlint:disable file_length type_body_length
#if os(iOS)
import AVFoundation
@ -39,23 +38,23 @@ public struct DigestAudioItem: AudioItemProperties {
public var language: String?
public var startIndex: Int = 0
public var startOffset: Double = 0.0
public init(digest: DigestResult) {
self.digest = digest
self.itemID = digest.id
self.title = digest.title
self.startIndex = 0
self.startOffset = 0
self.imageURL = nil
if let first = digest.speechFiles.first {
self.language = first.language
self.byline = digest.byline
}
}
}
// swiftlint:disable all
@MainActor
public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate {
@ -108,7 +107,7 @@ public struct DigestAudioItem: AudioItemProperties {
playbackError = false
self.itemAudioProperties = itemAudioProperties
startAudio(atIndex: itemAudioProperties.startIndex, andOffset: itemAudioProperties.startOffset)
EventTracker.track(
.audioSessionStart(
linkID: itemAudioProperties.itemID,
@ -319,7 +318,7 @@ public struct DigestAudioItem: AudioItemProperties {
public func seek(toIdx: Int) {
let before = durationBefore(playerIndex: toIdx)
let remainder = 0.0
// if the foundIdx happens to be the current item, we just set the position
if let playerItem = player?.currentItem as? SpeechPlayerItem {
if playerItem.speechItem.audioIdx == toIdx {
@ -666,8 +665,10 @@ public struct DigestAudioItem: AudioItemProperties {
player = AVQueuePlayer(items: [])
if let player = player {
observer = player.observe(\.currentItem, options: [.new]) { _, _ in
self.currentAudioIndex = (player.currentItem as? SpeechPlayerItem)?.speechItem.audioIdx ?? 0
self.updateReadText()
DispatchQueue.main.async {
self.currentAudioIndex = (player.currentItem as? SpeechPlayerItem)?.speechItem.audioIdx ?? 0
self.updateReadText()
}
}
}
@ -683,8 +684,8 @@ public struct DigestAudioItem: AudioItemProperties {
func synthesizeFrom(start: Int, playWhenReady: Bool, atOffset: Double = 0.0) {
if let synthesizer = self.synthesizer, let items = self.synthesizer?.createPlayerItems(from: start) {
let prefetchQueue = OperationQueue()
prefetchQueue.maxConcurrentOperationCount = 5
prefetchQueue.maxConcurrentOperationCount = 1
for speechItem in items {
let isLast = speechItem.audioIdx == synthesizer.document.utterances.count - 1
let playerItem = SpeechPlayerItem(session: self, prefetchQueue: prefetchQueue, speechItem: speechItem) {

View File

@ -40,12 +40,11 @@ class SpeechPlayerItem: AVPlayerItem {
resourceLoaderDelegate.owner = self
self.observer = observe(\.status, options: [.new]) { item, _ in
if item.status == .readyToPlay {
let duration = CMTimeGetSeconds(item.duration)
item.session.updateDuration(forItem: item.speechItem, newDuration: duration)
}
if item.status == .failed {
item.session.stopWithError()
DispatchQueue.main.async {
if item.status == .readyToPlay {
let duration = CMTimeGetSeconds(item.duration)
item.session.updateDuration(forItem: item.speechItem, newDuration: duration)
}
}
}
@ -55,11 +54,29 @@ class SpeechPlayerItem: AVPlayerItem {
) { [weak self] _ in
guard let self = self else { return }
self.completed()
self.checkPrefetchQueue(prefetchQueue: prefetchQueue)
}
self.prefetchOperation = PrefetchSpeechItemOperation(speechItem: speechItem)
if let prefetchOperation = self.prefetchOperation {
prefetchQueue.addOperation(prefetchOperation)
prefetchOperation.completionBlock = {
self.checkPrefetchQueue(prefetchQueue: prefetchQueue)
}
}
}
func checkPrefetchQueue(prefetchQueue: OperationQueue) {
DispatchQueue.main.async {
if self.speechItem.audioIdx > self.session.currentAudioIndex + 5 {
// prefetch has gotten too far ahead of the audio. Pause the prefetch queue
print("PAUSING PREFETCH QUEUE", self.speechItem.audioIdx, self.session.currentAudioIndex + 10, self.speechItem.text)
prefetchQueue.isSuspended = true
}
if self.speechItem.audioIdx < self.session.currentAudioIndex + 5 {
print("RESUMING PREFETCH QUEUE", self.speechItem.audioIdx, self.session.currentAudioIndex + 5)
prefetchQueue.isSuspended = false
}
}
}

View File

@ -222,9 +222,13 @@ struct SpeechSynthesizer {
do {
let jsonData = try decoder.decode(SynthesizeResult.self, from: data) as SynthesizeResult
let audioData = Data(fromHexEncodedString: jsonData.audioData)!
var audioData = Data(fromHexEncodedString: jsonData.audioData)!
if audioData.count < 1 {
throw BasicError.message(messageText: "Audio data is empty")
if let silence = generateSilentAudioBuffer() {
audioData = silence
} else {
throw BasicError.message(messageText: "Audio data is empty")
}
}
try audioData.write(to: tempPath)
@ -244,6 +248,34 @@ struct SpeechSynthesizer {
throw BasicError.message(messageText: errorMessage)
}
}
static func generateSilentAudioBuffer() -> Data? {
let audioFormat = AVAudioFormat(standardFormatWithSampleRate: 44100, channels: 1)!
let frameCount = UInt32(audioFormat.sampleRate * 0.001) // 1 millisecond of frames
guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: frameCount) else {
return nil
}
buffer.frameLength = buffer.frameCapacity
return bufferToData(buffer: buffer)
}
static func bufferToData(buffer: AVAudioPCMBuffer) -> Data {
let channelCount = Int(buffer.format.channelCount)
let frames = Int(buffer.frameLength)
let channels = UnsafeBufferPointer(start: buffer.floatChannelData, count: channelCount)
var data = Data()
for frame in 0..<frames {
for channel in 0..<channelCount {
let value = channels[channel][frame]
var temp = value
data.append(UnsafeBufferPointer(start: &temp, count: 1))
}
}
return data
}
}
struct SynthesizeResult: Decodable {

View File

@ -172,7 +172,7 @@ extension DataService {
try? data.write(to: localPath)
}
}
public func explain(text: String, libraryItemId: String) async throws -> String {
let encoder = JSONEncoder()
let explainRequest = ExplainRequest(text: text, libraryItemId: libraryItemId)