Add prefetch queue dampening on audio, better handle segments with empty audio data
This commit is contained in:
@ -1,5 +1,4 @@
|
||||
// swiftlint:disable file_length type_body_length
|
||||
|
||||
#if os(iOS)
|
||||
|
||||
import AVFoundation
|
||||
@ -39,23 +38,23 @@ public struct DigestAudioItem: AudioItemProperties {
|
||||
public var language: String?
|
||||
public var startIndex: Int = 0
|
||||
public var startOffset: Double = 0.0
|
||||
|
||||
|
||||
public init(digest: DigestResult) {
|
||||
self.digest = digest
|
||||
self.itemID = digest.id
|
||||
self.title = digest.title
|
||||
self.startIndex = 0
|
||||
self.startOffset = 0
|
||||
|
||||
|
||||
self.imageURL = nil
|
||||
|
||||
|
||||
if let first = digest.speechFiles.first {
|
||||
self.language = first.language
|
||||
self.byline = digest.byline
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// swiftlint:disable all
|
||||
@MainActor
|
||||
public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate {
|
||||
@ -108,7 +107,7 @@ public struct DigestAudioItem: AudioItemProperties {
|
||||
playbackError = false
|
||||
self.itemAudioProperties = itemAudioProperties
|
||||
startAudio(atIndex: itemAudioProperties.startIndex, andOffset: itemAudioProperties.startOffset)
|
||||
|
||||
|
||||
EventTracker.track(
|
||||
.audioSessionStart(
|
||||
linkID: itemAudioProperties.itemID,
|
||||
@ -319,7 +318,7 @@ public struct DigestAudioItem: AudioItemProperties {
|
||||
public func seek(toIdx: Int) {
|
||||
let before = durationBefore(playerIndex: toIdx)
|
||||
let remainder = 0.0
|
||||
|
||||
|
||||
// if the foundIdx happens to be the current item, we just set the position
|
||||
if let playerItem = player?.currentItem as? SpeechPlayerItem {
|
||||
if playerItem.speechItem.audioIdx == toIdx {
|
||||
@ -666,8 +665,10 @@ public struct DigestAudioItem: AudioItemProperties {
|
||||
player = AVQueuePlayer(items: [])
|
||||
if let player = player {
|
||||
observer = player.observe(\.currentItem, options: [.new]) { _, _ in
|
||||
self.currentAudioIndex = (player.currentItem as? SpeechPlayerItem)?.speechItem.audioIdx ?? 0
|
||||
self.updateReadText()
|
||||
DispatchQueue.main.async {
|
||||
self.currentAudioIndex = (player.currentItem as? SpeechPlayerItem)?.speechItem.audioIdx ?? 0
|
||||
self.updateReadText()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -683,8 +684,8 @@ public struct DigestAudioItem: AudioItemProperties {
|
||||
func synthesizeFrom(start: Int, playWhenReady: Bool, atOffset: Double = 0.0) {
|
||||
if let synthesizer = self.synthesizer, let items = self.synthesizer?.createPlayerItems(from: start) {
|
||||
let prefetchQueue = OperationQueue()
|
||||
prefetchQueue.maxConcurrentOperationCount = 5
|
||||
|
||||
prefetchQueue.maxConcurrentOperationCount = 1
|
||||
|
||||
for speechItem in items {
|
||||
let isLast = speechItem.audioIdx == synthesizer.document.utterances.count - 1
|
||||
let playerItem = SpeechPlayerItem(session: self, prefetchQueue: prefetchQueue, speechItem: speechItem) {
|
||||
|
||||
@ -40,12 +40,11 @@ class SpeechPlayerItem: AVPlayerItem {
|
||||
resourceLoaderDelegate.owner = self
|
||||
|
||||
self.observer = observe(\.status, options: [.new]) { item, _ in
|
||||
if item.status == .readyToPlay {
|
||||
let duration = CMTimeGetSeconds(item.duration)
|
||||
item.session.updateDuration(forItem: item.speechItem, newDuration: duration)
|
||||
}
|
||||
if item.status == .failed {
|
||||
item.session.stopWithError()
|
||||
DispatchQueue.main.async {
|
||||
if item.status == .readyToPlay {
|
||||
let duration = CMTimeGetSeconds(item.duration)
|
||||
item.session.updateDuration(forItem: item.speechItem, newDuration: duration)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -55,11 +54,29 @@ class SpeechPlayerItem: AVPlayerItem {
|
||||
) { [weak self] _ in
|
||||
guard let self = self else { return }
|
||||
self.completed()
|
||||
self.checkPrefetchQueue(prefetchQueue: prefetchQueue)
|
||||
}
|
||||
|
||||
self.prefetchOperation = PrefetchSpeechItemOperation(speechItem: speechItem)
|
||||
if let prefetchOperation = self.prefetchOperation {
|
||||
prefetchQueue.addOperation(prefetchOperation)
|
||||
prefetchOperation.completionBlock = {
|
||||
self.checkPrefetchQueue(prefetchQueue: prefetchQueue)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func checkPrefetchQueue(prefetchQueue: OperationQueue) {
|
||||
DispatchQueue.main.async {
|
||||
if self.speechItem.audioIdx > self.session.currentAudioIndex + 5 {
|
||||
// prefetch has gotten too far ahead of the audio. Pause the prefetch queue
|
||||
print("PAUSING PREFETCH QUEUE", self.speechItem.audioIdx, self.session.currentAudioIndex + 10, self.speechItem.text)
|
||||
prefetchQueue.isSuspended = true
|
||||
}
|
||||
if self.speechItem.audioIdx < self.session.currentAudioIndex + 5 {
|
||||
print("RESUMING PREFETCH QUEUE", self.speechItem.audioIdx, self.session.currentAudioIndex + 5)
|
||||
prefetchQueue.isSuspended = false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -222,9 +222,13 @@ struct SpeechSynthesizer {
|
||||
|
||||
do {
|
||||
let jsonData = try decoder.decode(SynthesizeResult.self, from: data) as SynthesizeResult
|
||||
let audioData = Data(fromHexEncodedString: jsonData.audioData)!
|
||||
var audioData = Data(fromHexEncodedString: jsonData.audioData)!
|
||||
if audioData.count < 1 {
|
||||
throw BasicError.message(messageText: "Audio data is empty")
|
||||
if let silence = generateSilentAudioBuffer() {
|
||||
audioData = silence
|
||||
} else {
|
||||
throw BasicError.message(messageText: "Audio data is empty")
|
||||
}
|
||||
}
|
||||
|
||||
try audioData.write(to: tempPath)
|
||||
@ -244,6 +248,34 @@ struct SpeechSynthesizer {
|
||||
throw BasicError.message(messageText: errorMessage)
|
||||
}
|
||||
}
|
||||
|
||||
static func generateSilentAudioBuffer() -> Data? {
|
||||
let audioFormat = AVAudioFormat(standardFormatWithSampleRate: 44100, channels: 1)!
|
||||
let frameCount = UInt32(audioFormat.sampleRate * 0.001) // 1 millisecond of frames
|
||||
guard let buffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: frameCount) else {
|
||||
return nil
|
||||
}
|
||||
buffer.frameLength = buffer.frameCapacity
|
||||
return bufferToData(buffer: buffer)
|
||||
}
|
||||
|
||||
static func bufferToData(buffer: AVAudioPCMBuffer) -> Data {
|
||||
let channelCount = Int(buffer.format.channelCount)
|
||||
let frames = Int(buffer.frameLength)
|
||||
let channels = UnsafeBufferPointer(start: buffer.floatChannelData, count: channelCount)
|
||||
|
||||
var data = Data()
|
||||
|
||||
for frame in 0..<frames {
|
||||
for channel in 0..<channelCount {
|
||||
let value = channels[channel][frame]
|
||||
var temp = value
|
||||
data.append(UnsafeBufferPointer(start: &temp, count: 1))
|
||||
}
|
||||
}
|
||||
|
||||
return data
|
||||
}
|
||||
}
|
||||
|
||||
struct SynthesizeResult: Decodable {
|
||||
|
||||
@ -172,7 +172,7 @@ extension DataService {
|
||||
try? data.write(to: localPath)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public func explain(text: String, libraryItemId: String) async throws -> String {
|
||||
let encoder = JSONEncoder()
|
||||
let explainRequest = ExplainRequest(text: text, libraryItemId: libraryItemId)
|
||||
|
||||
Reference in New Issue
Block a user