From 9796646742234b0ecb4e935b51feb9237b8adcd9 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Fri, 9 Sep 2022 22:03:05 +0800 Subject: [PATCH] Start preloading audio speechfiles, use new domains --- .../Sources/Models/AppEnvironment.swift | 20 ++- .../AudioSession/AudioController.swift | 122 ++++++------------ .../AudioSession/SpeechSynthesizer.swift | 28 ++-- 3 files changed, 70 insertions(+), 100 deletions(-) diff --git a/apple/OmnivoreKit/Sources/Models/AppEnvironment.swift b/apple/OmnivoreKit/Sources/Models/AppEnvironment.swift index b6c94f86e..1c5cc0c6c 100644 --- a/apple/OmnivoreKit/Sources/Models/AppEnvironment.swift +++ b/apple/OmnivoreKit/Sources/Models/AppEnvironment.swift @@ -25,14 +25,13 @@ private let devBaseURL = "https://api-dev.omnivore.app" private let demoBaseURL = "https://api-demo.omnivore.app" private let prodBaseURL = "https://api-prod.omnivore.app" +private let demoTtsURL = "https://tts-demo.omnivore.app" +private let prodTtsURL = "https://tts-prod.omnivore.app" + private let devWebURL = "https://web-dev.omnivore.app" private let demoWebURL = "https://demo.omnivore.app" private let prodWebURL = "https://omnivore.app" -private let devHighlightsServerURL = "https://highlights-dev.omnivore.app" -private let demoHighlightsServerURL = "https://highlights-demo.omnivore.app" -private let prodHighlightsServerURL = "https://highlights.omnivore.app" - public extension AppEnvironment { var graphqlPath: String { "\(serverBaseURL.absoluteString)/api/graphql" @@ -63,4 +62,17 @@ public extension AppEnvironment { return URL(string: "http://localhost:3000")! } } + + var ttsBaseURL: URL { + switch self { + case .dev: + return URL(string: "notimplemented")! + case .demo: + return URL(string: demoTtsURL)! + case .prod: + return URL(string: prodTtsURL)! + case .test, .local: + return URL(string: "http://localhost:4000")! + } + } } diff --git a/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift b/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift index b123accb5..c6e2ccdd6 100644 --- a/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift +++ b/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift @@ -26,11 +26,6 @@ public enum PlayerScrubState { case scrubEnded(TimeInterval) } -enum DownloadType: String { - case mp3 - case speechMarks -} - enum DownloadPriority: String { case low case high @@ -141,46 +136,12 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate } } - public func preload(itemIDs _: [String], retryCount _: Int = 0) async -> Bool { -// var pendingList = [String]() -// -// for pageId in itemIDs { -// let permFile = pathForAudioFile(pageId: pageId) -// if FileManager.default.fileExists(atPath: permFile.path) { -// print("audio file already downloaded: ", permFile) -// continue -// } -// -// // Attempt to fetch the file if not downloaded already -// let result = try? await downloadAudioFile(pageId: pageId, type: .mp3, priority: .low) -// if result == nil { -// print("audio file had error downloading: ", pageId) -// pendingList.append(pageId) -// } -// -// if let result = result, result.pending { -// print("audio file is pending download: ", pageId) -// pendingList.append(pageId) -// } else { -// print("audio file is downloaded: ", pageId) -// } -// } -// -// print("audio files pending download: ", pendingList) -// if pendingList.isEmpty { -// return true -// } -// -// if retryCount > 5 { -// print("reached max preload depth, stopping preloading") -// return false -// } -// -// let retryDelayInNanoSeconds = UInt64(retryCount * 2 * 1_000_000_000) -// try? await Task.sleep(nanoseconds: retryDelayInNanoSeconds) -// -// return await preload(itemIDs: pendingList, retryCount: retryCount + 1) - true + public func preload(itemIDs: [String], retryCount _: Int = 0) async -> Bool { + for pageId in itemIDs { + print("preloading speech file: ", pageId) + _ = try? await downloadSpeechFile(pageId: pageId, priority: .low) + } + return true } public var scrubState: PlayerScrubState = .reset { @@ -281,21 +242,27 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate pageId + "-" + currentVoice + ".mp3" } - public func pathForAudioFile(pageId: String) -> URL { + public func pathForAudioDirectory(pageId: String) -> URL { FileManager.default .urls(for: .documentDirectory, in: .userDomainMask)[0] - .appendingPathComponent(fileNameForAudioFile(pageId)) + .appendingPathComponent("audio-\(pageId)/") + } + + public func pathForSpeechFile(pageId: String) -> URL { + pathForAudioDirectory(pageId: pageId) + .appendingPathComponent("speech-\(currentVoice).json") } public func startAudio() { state = .loading setupNotifications() - let pageId = item!.unwrappedID - Task { - self.document = try? await downloadAudioFile(pageId: pageId, type: .mp3, priority: .high) - DispatchQueue.main.async { - self.startStreamingAudio(pageId: pageId) + if let pageId = item?.id { + Task { + self.document = try? await downloadSpeechFile(pageId: pageId, priority: .high) + DispatchQueue.main.async { + self.startStreamingAudio(pageId: pageId) + } } } } @@ -311,8 +278,7 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate } player = AVQueuePlayer(items: []) - let synthesizer = SpeechSynthesizer(networker: networker, document: document!) - synthesizer.prepare() + let synthesizer = SpeechSynthesizer(appEnvironment: appEnvironment, networker: networker, document: document!) durations = synthesizer.estimatedDurations(forSpeed: 1.0) self.synthesizer = synthesizer @@ -492,12 +458,19 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate } } - func downloadAudioFile(pageId: String, type _: DownloadType, priority: DownloadPriority) async throws -> SpeechDocument { -// let audioUrl = pathForAudioFile(pageId: pageId) -// -// if FileManager.default.fileExists(atPath: audioUrl.path) { -// return (pending: false, url: audioUrl) -// } + func downloadSpeechFile(pageId: String, priority: DownloadPriority) async throws -> SpeechDocument { + let decoder = JSONDecoder() + let speechFileUrl = pathForSpeechFile(pageId: pageId) + + if FileManager.default.fileExists(atPath: speechFileUrl.path) { + print("SPEECH FILE ALREADY EXISTS: ", speechFileUrl.path) + let data = try Data(contentsOf: speechFileUrl) + document = try decoder.decode(SpeechDocument.self, from: data) + // If we can't load it from disk we make the API call + if let document = document { + return document + } + } let path = "/api/article/\(pageId)/speech?voice=\(currentVoice)&secondaryVoice=\(secondaryVoice)&priority=\(priority)" guard let url = URL(string: path, relativeTo: appEnvironment.serverBaseURL) else { @@ -525,28 +498,15 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate let document = try! JSONDecoder().decode(SpeechDocument.self, from: data) + // Cache the file + do { + try? FileManager.default.createDirectory(at: document.audioDirectory, withIntermediateDirectories: true) + try data.write(to: speechFileUrl) + } catch { + print("error writing file", error) + } + return document - //// let tempPath = FileManager.default - //// .urls(for: .cachesDirectory, in: .userDomainMask)[0] - //// .appendingPathComponent(UUID().uuidString + ".mp3") -// -// do { - //// if let googleHash = httpResponse.value(forHTTPHeaderField: "x-goog-hash") { - //// let hash = Data(Insecure.MD5.hash(data: data)).base64EncodedString() - //// if !googleHash.contains("md5=\(hash)") { - //// print("Downloaded mp3 file hashes do not match: returned: \(googleHash) v computed: \(hash)") - //// throw BasicError.message(messageText: "Downloaded mp3 file hashes do not match: returned: \(googleHash) v computed: \(hash)") - //// } - //// } - //// - //// try data.write(to: tempPath) - //// try? FileManager.default.removeItem(at: audioUrl) - //// try FileManager.default.moveItem(at: tempPath, to: audioUrl) -// } catch { -// print("error writing file: ", error) -// let errorMessage = "audioFetch failed. could not write MP3 data to disk" -// throw BasicError.message(messageText: errorMessage) -// } } public func audioPlayerDidFinishPlaying(_ player: AVAudioPlayer, successfully _: Bool) { diff --git a/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift b/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift index a4c53fe0a..816add96c 100644 --- a/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift +++ b/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift @@ -62,18 +62,15 @@ struct SpeechItem { struct SpeechSynthesizer { typealias Element = SpeechItem let document: SpeechDocument + let appEnvironment: AppEnvironment let networker: Networker - init(networker: Networker, document: SpeechDocument) { + init(appEnvironment: AppEnvironment, networker: Networker, document: SpeechDocument) { + self.appEnvironment = appEnvironment self.networker = networker self.document = document } - func prepare() { - // We will fail later on if this doesn't happen, so just use try? here - try? FileManager.default.createDirectory(at: document.audioDirectory, withIntermediateDirectories: true) - } - func estimatedDurations(forSpeed speed: Double) -> [Double] { document.utterances.map { document.estimatedDuration(utterance: $0, speed: speed) } } @@ -117,7 +114,8 @@ struct SpeechSynthesisFetcher: AsyncSequence { } let utterance = synthesizer.document.utterances[currentIdx] - let fetched = try? await fetchUtterance(networker: synthesizer.networker, + let fetched = try? await fetchUtterance(appEnvironment: synthesizer.appEnvironment, + networker: synthesizer.networker, document: synthesizer.document, segmentIdx: currentIdx, utterance: utterance) @@ -158,33 +156,33 @@ extension Data { self.init(capacity: string.utf8.count / 2) var iter = string.utf8.makeIterator() - while let c1 = iter.next() { + while let char1 = iter.next() { guard - let val1 = decodeNibble(nibble: c1), - let c2 = iter.next(), - let val2 = decodeNibble(nibble: c2) + let val1 = decodeNibble(nibble: char1), + let char2 = iter.next(), + let val2 = decodeNibble(nibble: char2) else { return nil } append(val1 << 4 + val2) } } } -func fetchUtterance(networker: Networker, +func fetchUtterance(appEnvironment: AppEnvironment, + networker: Networker, document: SpeechDocument, segmentIdx: Int, utterance: Utterance) async throws -> URL { let voiceStr = utterance.voice ?? document.defaultVoice let segmentStr = String(format: "%04d", arguments: [segmentIdx]) - let audioPath = document.audioDirectory.appendingPathComponent("audio-\(voiceStr)-\(segmentStr).mp3") - let url = URL(string: "https://text-to-speech-streaming-bryle2uxwq-wl.a.run.app/")! + let audioPath = document.audioDirectory.appendingPathComponent("\(segmentStr)-\(voiceStr).mp3") if FileManager.default.fileExists(atPath: audioPath.path) { print("audio file already downloaded: ", audioPath.path) return audioPath } - var request = URLRequest(url: url) + var request = URLRequest(url: appEnvironment.ttsBaseURL) request.httpMethod = "POST" request.timeoutInterval = 600