From ca725b4908d01fbc89e3b3d22ad2f60b3c2c8fe3 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 5 Oct 2022 15:14:00 +0800 Subject: [PATCH] Improve text to speech follow UX --- .../App/Views/AudioPlayer/MiniPlayer.swift | 198 +++++++++--------- .../AudioPlayer/ScrollingStackModifier.swift | 78 ------- .../AudioSession/AudioController.swift | 89 +++----- .../AudioSession/SpeechSynthesizer.swift | 16 +- apple/OmnivoreKit/Sources/Views/Fonts.swift | 6 +- 5 files changed, 135 insertions(+), 252 deletions(-) delete mode 100644 apple/OmnivoreKit/Sources/App/Views/AudioPlayer/ScrollingStackModifier.swift diff --git a/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/MiniPlayer.swift b/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/MiniPlayer.swift index fb669c83b..ebb7225d9 100644 --- a/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/MiniPlayer.swift +++ b/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/MiniPlayer.swift @@ -20,6 +20,8 @@ public struct MiniPlayer: View { @State var offset: CGFloat = 0 @State var showVoiceSheet = false @State var showLanguageSheet = false + + @State var tabIndex: Int = 0 @Namespace private var animation let minExpandedHeight = UIScreen.main.bounds.height / 3 @@ -85,26 +87,6 @@ public struct MiniPlayer: View { ) } -// var shareButton: some View { -// Button( -// action: { -// let shareActivity = UIActivityViewController(activityItems: [self.audioSession.localAudioUrl], applicationActivities: nil) -// if let vc = UIApplication.shared.windows.first?.rootViewController { -// shareActivity.popoverPresentationController?.sourceView = vc.view -// // Setup share activity position on screen on bottom center -// shareActivity.popoverPresentationController?.sourceRect = CGRect(x: UIScreen.main.bounds.width / 2, y: UIScreen.main.bounds.height, width: 0, height: 0) -// shareActivity.popoverPresentationController?.permittedArrowDirections = UIPopoverArrowDirection.down -// vc.present(shareActivity, animated: true, completion: nil) -// } -// }, -// label: { -// Image(systemName: "square.and.arrow.up") -// .font(.appCallout) -// .tint(.appGrayText) -// } -// ) -// } - var closeButton: some View { Button( action: { @@ -114,10 +96,12 @@ public struct MiniPlayer: View { }, label: { Image(systemName: "chevron.down") - .font(.appCallout) - .tint(.appGrayText) + .font(.appTitleTwo) + .tint(.appGrayTextContrast) } ) + + .contentShape(Rectangle()) } func viewArticle() { @@ -142,6 +126,36 @@ public struct MiniPlayer: View { } } + struct SpeechCard: View { + let id: Int + @EnvironmentObject var audioController: AudioController + + var body: some View { + Group { + if id != self.audioController.currentAudioIndex || self.audioController.isLoading { + Text(self.audioController.textItems?[id] ?? "\(id)") + .font(.textToSpeechRead.leading(.loose)) + .foregroundColor(Color.appGrayTextContrast) + } else { + Group { + Text(audioController.readText) + .font(.textToSpeechRead.leading(.loose)) + .foregroundColor(Color.appGrayTextContrast) + + + Text(audioController.unreadText) + .font(.textToSpeechRead.leading(.loose)) + .foregroundColor(Color.appGrayText) + } + } + } + .padding(16) + } + + init(id: Int) { + self.id = id + } + } + // swiftlint:disable:next function_body_length func playerContent(_ itemAudioProperties: LinkedItemAudioProperties) -> some View { GeometryReader { geom in @@ -150,12 +164,9 @@ public struct MiniPlayer: View { ZStack { closeButton .padding(.top, 24) + .padding(.leading, 16) .frame(maxWidth: .infinity, alignment: .leading) - // shareButton - // .padding(.top, 8) - // .frame(maxWidth: .infinity, alignment: .trailing) - Capsule() .fill(.gray) .frame(width: 60, height: 4) @@ -164,8 +175,7 @@ public struct MiniPlayer: View { } } else { HStack(alignment: .center) { - let maxSize = 2 * (min(geom.size.width, geom.size.height) / 3) - let dim = 64.0 // expanded ? maxSize : 64 + let dim = 64.0 if let imageURL = itemAudioProperties.imageURL { AsyncImage(url: imageURL) { phase in @@ -187,14 +197,12 @@ public struct MiniPlayer: View { defaultArtwork(forDimensions: dim) } - // if !expanded { VStack { Text(itemAudioProperties.title) .font(.appCallout) .foregroundColor(.appGrayTextContrast) .fixedSize(horizontal: false, vertical: false) .frame(maxWidth: .infinity, alignment: .leading) - .matchedGeometryEffect(id: "ArticleTitle", in: animation) if let byline = itemAudioProperties.byline { Text(byline) @@ -203,7 +211,6 @@ public struct MiniPlayer: View { .foregroundColor(.appGrayText) .fixedSize(horizontal: false, vertical: false) .frame(maxWidth: .infinity, alignment: .leading) - // .matchedGeometryEffect(id: "ArticleTitle", in: animation) } } @@ -215,82 +222,73 @@ public struct MiniPlayer: View { }.frame(maxHeight: .infinity) } - // Spacer(minLength: 0) - if expanded { -// Marquee(text: itemAudioProperties.title, font: UIFont(name: "Inter-Regular", size: 22)!) -// .foregroundColor(.appGrayTextContrast) -// .onTapGesture { -// viewArticle() -// } -// -// if let byline = itemAudioProperties.byline { -// Marquee(text: byline, font: UIFont(name: "Inter-Regular", size: 16)!) -// .foregroundColor(.appGrayText) -// } + ZStack { + TabView(selection: $tabIndex) { + ForEach(0 ..< (self.audioController.textItems?.count ?? 0), id: \.self) { id in + SpeechCard(id: id) + .frame(width: geom.size.width) + .tag(id) + } + } + .tabViewStyle(PageTabViewStyle(indexDisplayMode: .never)) + .onChange(of: tabIndex, perform: { index in + if index != audioController.currentAudioIndex, index < (audioController.textItems?.count ?? 0) { + audioController.seek(toUtterance: index) + } + }) + .onChange(of: audioController.currentAudioIndex, perform: { index in + if self.audioController.state != .reachedEnd { + tabIndex = index + } else { + tabIndex = (self.audioController.textItems?.count ?? 0) + 1 + } + }) + .frame(width: geom.size.width) - Group { - Text(audioController.readText) - .font(.textToSpeechRead.leading(.loose)) - .foregroundColor(Color.appGrayTextContrast) - + - Text(audioController.unreadText) - .font(.textToSpeechRead.leading(.loose)) - .foregroundColor(Color.appGrayText) + if audioController.state == .reachedEnd { + // If we have reached the end display a replay button + Button( + action: { + tabIndex = 0 + audioController.unpause() + audioController.seek(to: 0.0) + }, + label: { + Image(systemName: "gobackward") + .font(.appCallout) + .tint(.appGrayTextContrast) + Text("Replay") + } + ) + } } - .padding(24) - -// ScrollView { -// ScrollViewReader { _ in -// ForEach(Array(self.audioController.readText.enumerated()), id: \.1.self) { index, text in -// Text(text) -// .font(.textToSpeechRead) -// .lineSpacing(2.5) -// .padding() -// .id(index) -// } -// -// ForEach(Array(self.audioController.unreadText.enumerated()), id: \.1.self) { index, text in -// Text(text) -// .font(.textToSpeechUnread) -// .opacity(0.55) -// .lineSpacing(2.5) -// .padding() -// .id(index) -// -// }.onChange(of: self.audioController.currentAudioIndex) { _ in -// // withAnimation(.spring()) { -// // proxy.scrollTo(value, anchor: .top) -// // } -// } -// } -// - //// Text("This is where the main content would go") + - //// Text("its multiple lines of text.") + - //// Text("It would probably need to scroll as our text segments are pretty big") -// } Spacer() - ScrubberView(value: $audioController.timeElapsed, - minValue: 0, maxValue: self.audioController.duration, - onEditingChanged: { scrubStarted in - if scrubStarted { - self.audioController.scrubState = .scrubStarted - } else { - self.audioController.scrubState = .scrubEnded(self.audioController.timeElapsed) - } - }) + Group { + ScrubberView(value: $audioController.timeElapsed, + minValue: 0, maxValue: self.audioController.duration, + onEditingChanged: { scrubStarted in + if scrubStarted { + self.audioController.scrubState = .scrubStarted + } else { + self.audioController.scrubState = .scrubEnded(self.audioController.timeElapsed) + } + }) - HStack { - Text(audioController.timeElapsedString ?? "0:00") - .font(.appCaptionTwo) - .foregroundColor(.appGrayText) - Spacer() - Text(audioController.durationString ?? "0:00") - .font(.appCaptionTwo) - .foregroundColor(.appGrayText) + HStack { + Text(audioController.timeElapsedString ?? "0:00") + .font(.appCaptionTwo) + .foregroundColor(.appGrayText) + Spacer() + Text(audioController.durationString ?? "0:00") + .font(.appCaptionTwo) + .foregroundColor(.appGrayText) + } } + .padding(.leading, 16) + .padding(.trailing, 16) HStack(alignment: .center, spacing: 36) { Menu { @@ -344,7 +342,7 @@ public struct MiniPlayer: View { }.padding(.bottom, 16) } } - .padding(EdgeInsets(top: 0, leading: 6, bottom: 0, trailing: 6)) + .padding(EdgeInsets(top: 0, leading: 0, bottom: 0, trailing: 0)) .background( Color.systemBackground .shadow(color: expanded ? .clear : .gray.opacity(0.33), radius: 8, x: 0, y: 4) diff --git a/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/ScrollingStackModifier.swift b/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/ScrollingStackModifier.swift deleted file mode 100644 index 1bce64649..000000000 --- a/apple/OmnivoreKit/Sources/App/Views/AudioPlayer/ScrollingStackModifier.swift +++ /dev/null @@ -1,78 +0,0 @@ -// -// ScrollingStackModifier.swift -// ScrollView_Tests -// -// Created by Jean-Marc Boullianne on 8/7/20. -// -import SwiftUI - -struct ScrollingHStackModifier: ViewModifier { - - @State private var scrollOffset: CGFloat - @State private var dragOffset: CGFloat - - var items: Int - var itemWidth: CGFloat - var itemSpacing: CGFloat - - init(items: Int, itemWidth: CGFloat, itemSpacing: CGFloat) { - self.items = items - self.itemWidth = itemWidth - self.itemSpacing = itemSpacing - - // Calculate Total Content Width - let contentWidth: CGFloat = CGFloat(items) * itemWidth + CGFloat(items - 1) * itemSpacing - let screenWidth = UIScreen.main.bounds.width - - // Set Initial Offset to first Item - let initialOffset = (contentWidth/2.0) - (screenWidth/2.0) + ((screenWidth - itemWidth) / 2.0) - - self._scrollOffset = State(initialValue: initialOffset) - self._dragOffset = State(initialValue: 0) - } - - func body(content: Content) -> some View { - content - .offset(x: scrollOffset + dragOffset, y: 0) - .gesture(DragGesture() - .onChanged({ event in - dragOffset = event.translation.width - }) - .onEnded({ event in - // Scroll to where user dragged - scrollOffset += event.translation.width - dragOffset = 0 - - // Now calculate which item to snap to - let contentWidth: CGFloat = CGFloat(items) * itemWidth + CGFloat(items - 1) * itemSpacing - let screenWidth = UIScreen.main.bounds.width - - // Center position of current offset - let center = scrollOffset + (screenWidth / 2.0) + (contentWidth / 2.0) - - // Calculate which item we are closest to using the defined size - var index = (center - (screenWidth / 2.0)) / (itemWidth + itemSpacing) - - // Should we stay at current index or are we closer to the next item... - if index.remainder(dividingBy: 1) > 0.5 { - index += 1 - } else { - index = CGFloat(Int(index)) - } - - // Protect from scrolling out of bounds - index = min(index, CGFloat(items) - 1) - index = max(index, 0) - - // Set final offset (snapping to item) - let newOffset = index * itemWidth + (index - 1) * itemSpacing - (contentWidth / 2.0) + (screenWidth / 2.0) - ((screenWidth - itemWidth) / 2.0) + itemSpacing - - // Animate snapping - withAnimation { - scrollOffset = newOffset - } - - }) - ) - } -} diff --git a/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift b/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift index 75b82a078..345fce5d6 100644 --- a/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift +++ b/apple/OmnivoreKit/Sources/Services/AudioSession/AudioController.swift @@ -93,7 +93,9 @@ class SpeechPlayerItem: AVPlayerItem { var pendingRequests = Set() weak var owner: SpeechPlayerItem? - func resourceLoader(_: AVAssetResourceLoader, shouldWaitForLoadingOfRequestedResource loadingRequest: AVAssetResourceLoadingRequest) -> Bool { + func resourceLoader(_: AVAssetResourceLoader, + shouldWaitForLoadingOfRequestedResource loadingRequest: AVAssetResourceLoadingRequest) -> Bool + { if owner == nil { return true } @@ -120,7 +122,7 @@ class SpeechPlayerItem: AVPlayerItem { guard let speechItem = self.owner?.speechItem else { // This probably can't happen, but if it does, just returning should // let AVPlayer try again. - print("No speech item found: ", self.owner) + print("No speech item found: ", self.owner?.speechItem) return } @@ -180,7 +182,8 @@ class SpeechPlayerItem: AVPlayerItem { } let bytesToRespond = min(songDataUnwrapped.count - currentOffset, requestedLength) - let dataToRespond = songDataUnwrapped.subdata(in: Range(uncheckedBounds: (currentOffset, currentOffset + bytesToRespond))) + let range = Range(uncheckedBounds: (currentOffset, currentOffset + bytesToRespond)) + let dataToRespond = songDataUnwrapped.subdata(in: range) dataRequest.respond(with: dataToRespond) return songDataUnwrapped.count >= requestedLength + requestedOffset @@ -198,8 +201,6 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate @Published public var currentAudioIndex: Int = 0 @Published public var readText: String = "" @Published public var unreadText: String = "" - @Published public var numberOfSpeechItems: Int = 0 - @Published public var itemAudioProperties: LinkedItemAudioProperties? @Published public var timeElapsed: TimeInterval = 0 @@ -255,7 +256,7 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate player?.removeAllItems() document = nil - numberOfSpeechItems = 0 + textItems = nil timer = nil player = nil @@ -338,8 +339,16 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate } } + public func seek(toUtterance: Int) { + player?.pause() + + player?.removeAllItems() + synthesizeFrom(start: toUtterance, playWhenReady: state == .playing, atOffset: 0.0) + scrubState = .reset + fireTimer() + } + public func seek(to: TimeInterval) { - var hasOffset = false let position = max(0, to) // If we are in reachedEnd state, and seek back, we need to move to @@ -365,10 +374,6 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate let before = durationBefore(playerIndex: foundIdx) let remainder = position - before - if remainder > 0 { - hasOffset = true - } - // if the foundIdx happens to be the current item, we just set the position if let playerItem = player?.currentItem as? SpeechPlayerItem { if playerItem.speechItem.audioIdx == foundIdx { @@ -477,22 +482,24 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate let body: String } - public var textItems: [String]? { + public var textItems: [String]? + + func setTextItems() { if let document = self.document { - return document.utterances.map { utterance in + textItems = document.utterances.map { utterance in if let regex = try? NSRegularExpression(pattern: "<[^>]*>", options: .caseInsensitive) { let modString = regex.stringByReplacingMatches(in: utterance.text, options: [], range: NSRange(location: 0, length: utterance.text.count), withTemplate: "") return modString } return "" } + } else { + textItems = nil } - return nil } func updateReadText() { - if let textItems = textItems, let item = player?.currentItem as? SpeechPlayerItem, let speechMarks = item.speechMarks { - // up till: + if let item = player?.currentItem as? SpeechPlayerItem, let speechMarks = item.speechMarks { var currentItemOffset = 0 for i in 0 ..< speechMarks.count { if speechMarks[i].time ?? 0 < 0 { @@ -510,46 +517,22 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate } } + // Sometimes we get negatives + currentItemOffset = max(currentItemOffset, 0) + let idx = item.speechItem.audioIdx - let currentItem = textItems[idx] - let currentReadIndex = currentItem.index(currentItem.startIndex, offsetBy: max(currentItemOffset, currentItem.count)) + let currentItem = document?.utterances[idx].text ?? "" + let currentReadIndex = currentItem.index(currentItem.startIndex, offsetBy: min(currentItemOffset, currentItem.count)) let lastItem = String(currentItem[..]*>", options: .caseInsensitive) { -// let modString = regex.stringByReplacingMatches(in: text, options: [], range: NSRange(location: 0, length: text.count), withTemplate: "") -// return modString -// } -// return "" -// }) } else { readText = "" } } - func updateUnreadText() { -// if let textItems = textItems, let item = player?.currentItem as? SpeechPlayerItem { -// let idx = item.speechItem.audioIdx -// -// -// unreadText = Array(textItems[idx...].map { text in -// if let regex = try? NSRegularExpression(pattern: "<[^>]*>", options: .caseInsensitive) { -// let modString = regex.stringByReplacingMatches(in: text, options: [], range: NSRange(location: 0, length: text.count), withTemplate: "") -// return modString -// } -// return "" -// }) -// } else { -// unreadText = [] -// } - } - public func getPreferredVoice(forLanguage language: String) -> String { UserDefaults.standard.string(forKey: "\(language)-\(UserDefaultKey.textToSpeechPreferredVoice.rawValue)") ?? currentVoiceLanguage.defaultVoice } @@ -572,6 +555,8 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate DispatchQueue.main.async { if let document = document { let synthesizer = SpeechSynthesizer(appEnvironment: self.appEnvironment, networker: self.networker, document: document) + + self.setTextItems() self.durations = synthesizer.estimatedDurations(forSpeed: self.playbackRate) self.synthesizer = synthesizer @@ -673,7 +658,7 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate let document = try? await downloadSpeechFile(itemID: itemID, priority: .high) DispatchQueue.main.async { - self.numberOfSpeechItems = document?.utterances.count ?? 0 + self.setTextItems() if let document = document { self.startStreamingAudio(itemID: itemID, document: document) } else { @@ -698,7 +683,6 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate player = AVQueuePlayer(items: []) if let player = player { observer = player.observe(\.currentItem, options: [.new]) { _, _ in - print("current item did change: ", (player.currentItem as? SpeechPlayerItem)?.speechItem.audioIdx) self.currentAudioIndex = (player.currentItem as? SpeechPlayerItem)?.speechItem.audioIdx ?? 0 } } @@ -816,17 +800,6 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate } } } - -// if let item = self.item, let speechItem = player?.currentItem as? SpeechPlayerItem { -// NotificationCenter.default.post( -// name: NSNotification.SpeakingReaderItem, -// object: nil, -// userInfo: [ -// "pageID": item.unwrappedID, -// "anchorIdx": String(speechItem.speechItem.htmlIdx) -// ] -// ) -// } } func clearNowPlayingInfo() { diff --git a/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift b/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift index 6c67f5fc0..c54355f6a 100644 --- a/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift +++ b/apple/OmnivoreKit/Sources/Services/AudioSession/SpeechSynthesizer.swift @@ -125,7 +125,7 @@ struct SpeechSynthesizer { let voiceStr = utterance.voice ?? document.defaultVoice let segmentStr = String(format: "%04d", arguments: [idx]) let localAudioURL = document.audioDirectory.appendingPathComponent("\(segmentStr)-\(voiceStr).mp3") - let localSpeechURL = document.audioDirectory.appendingPathComponent("\(segmentStr)-\(voiceStr).mp3") + let localSpeechURL = document.audioDirectory.appendingPathComponent("\(segmentStr)-\(voiceStr).speechMarks") if let request = urlRequestFor(utterance: utterance) { let item = SpeechItem(htmlIdx: utterance.idx, @@ -165,13 +165,12 @@ struct SpeechSynthesizer { { let decoder = JSONDecoder() - if !redownloadCached, FileManager.default.fileExists(atPath: speechItem.localAudioURL.path) { + if !redownloadCached { if let speechMarksData = try? Data(contentsOf: speechItem.localSpeechURL), let speechMarks = try? decoder.decode([SpeechMark].self, from: speechMarksData), let localData = try? Data(contentsOf: speechItem.localAudioURL) { - print("CACHED DATA LENGTH: ", localData.count) - // return SynthesizeData(audioData: localData, speechMarks: speechMarks) + return SynthesizeData(audioData: localData, speechMarks: speechMarks) } } @@ -195,31 +194,26 @@ struct SpeechSynthesizer { .appendingPathComponent(UUID().uuidString + ".speechMarks") do { - print("SPEECH DATA: ", String(decoding: data, as: UTF8.self)) - let jsonData = try decoder.decode(SynthesizeResult.self, from: data) as SynthesizeResult let audioData = Data(fromHexEncodedString: jsonData.audioData)! if audioData.count < 1 { throw BasicError.message(messageText: "Audio data is empty") } - print("AUDIO DATA LENGTH: ", audioData.count) - try audioData.write(to: tempPath) try? FileManager.default.removeItem(at: speechItem.localAudioURL) try FileManager.default.moveItem(at: tempPath, to: speechItem.localAudioURL) + let savedData = try? Data(contentsOf: speechItem.localAudioURL) + let encoder = JSONEncoder() let speechMarksData = try encoder.encode(jsonData.speechMarks) try speechMarksData.write(to: tempSMPath) try? FileManager.default.removeItem(at: speechItem.localSpeechURL) try FileManager.default.moveItem(at: tempSMPath, to: speechItem.localSpeechURL) - print("DOWNLOADED SPEECH MARKS: ", jsonData.speechMarks) - return SynthesizeData(audioData: audioData, speechMarks: jsonData.speechMarks) } catch { - print("ERROR WRITING DATA", error) let errorMessage = "audioFetch failed. could not write MP3 data to disk" throw BasicError.message(messageText: errorMessage) } diff --git a/apple/OmnivoreKit/Sources/Views/Fonts.swift b/apple/OmnivoreKit/Sources/Views/Fonts.swift index 8e5f70e73..03fb7d1ad 100644 --- a/apple/OmnivoreKit/Sources/Views/Fonts.swift +++ b/apple/OmnivoreKit/Sources/Views/Fonts.swift @@ -20,11 +20,7 @@ public extension Font { } static var textToSpeechRead: Font { - Font.custom(InterFont.bold.rawValue, size: 28, relativeTo: .title2) - } - - static var textToSpeechUnread: Font { - Font.custom(InterFont.regular.rawValue, size: 22, relativeTo: .title2) + Font.custom(InterFont.bold.rawValue, size: 24, relativeTo: .title2) } /// 20pt, Inter-Regular