Improve text to speech follow UX

This commit is contained in:
Jackson Harper
2022-10-05 15:14:00 +08:00
parent b983a4336b
commit ca725b4908
5 changed files with 135 additions and 252 deletions

View File

@ -20,6 +20,8 @@ public struct MiniPlayer: View {
@State var offset: CGFloat = 0
@State var showVoiceSheet = false
@State var showLanguageSheet = false
@State var tabIndex: Int = 0
@Namespace private var animation
let minExpandedHeight = UIScreen.main.bounds.height / 3
@ -85,26 +87,6 @@ public struct MiniPlayer: View {
)
}
// var shareButton: some View {
// Button(
// action: {
// let shareActivity = UIActivityViewController(activityItems: [self.audioSession.localAudioUrl], applicationActivities: nil)
// if let vc = UIApplication.shared.windows.first?.rootViewController {
// shareActivity.popoverPresentationController?.sourceView = vc.view
// // Setup share activity position on screen on bottom center
// shareActivity.popoverPresentationController?.sourceRect = CGRect(x: UIScreen.main.bounds.width / 2, y: UIScreen.main.bounds.height, width: 0, height: 0)
// shareActivity.popoverPresentationController?.permittedArrowDirections = UIPopoverArrowDirection.down
// vc.present(shareActivity, animated: true, completion: nil)
// }
// },
// label: {
// Image(systemName: "square.and.arrow.up")
// .font(.appCallout)
// .tint(.appGrayText)
// }
// )
// }
var closeButton: some View {
Button(
action: {
@ -114,10 +96,12 @@ public struct MiniPlayer: View {
},
label: {
Image(systemName: "chevron.down")
.font(.appCallout)
.tint(.appGrayText)
.font(.appTitleTwo)
.tint(.appGrayTextContrast)
}
)
.contentShape(Rectangle())
}
func viewArticle() {
@ -142,6 +126,36 @@ public struct MiniPlayer: View {
}
}
struct SpeechCard: View {
let id: Int
@EnvironmentObject var audioController: AudioController
var body: some View {
Group {
if id != self.audioController.currentAudioIndex || self.audioController.isLoading {
Text(self.audioController.textItems?[id] ?? "\(id)")
.font(.textToSpeechRead.leading(.loose))
.foregroundColor(Color.appGrayTextContrast)
} else {
Group {
Text(audioController.readText)
.font(.textToSpeechRead.leading(.loose))
.foregroundColor(Color.appGrayTextContrast)
+
Text(audioController.unreadText)
.font(.textToSpeechRead.leading(.loose))
.foregroundColor(Color.appGrayText)
}
}
}
.padding(16)
}
init(id: Int) {
self.id = id
}
}
// swiftlint:disable:next function_body_length
func playerContent(_ itemAudioProperties: LinkedItemAudioProperties) -> some View {
GeometryReader { geom in
@ -150,12 +164,9 @@ public struct MiniPlayer: View {
ZStack {
closeButton
.padding(.top, 24)
.padding(.leading, 16)
.frame(maxWidth: .infinity, alignment: .leading)
// shareButton
// .padding(.top, 8)
// .frame(maxWidth: .infinity, alignment: .trailing)
Capsule()
.fill(.gray)
.frame(width: 60, height: 4)
@ -164,8 +175,7 @@ public struct MiniPlayer: View {
}
} else {
HStack(alignment: .center) {
let maxSize = 2 * (min(geom.size.width, geom.size.height) / 3)
let dim = 64.0 // expanded ? maxSize : 64
let dim = 64.0
if let imageURL = itemAudioProperties.imageURL {
AsyncImage(url: imageURL) { phase in
@ -187,14 +197,12 @@ public struct MiniPlayer: View {
defaultArtwork(forDimensions: dim)
}
// if !expanded {
VStack {
Text(itemAudioProperties.title)
.font(.appCallout)
.foregroundColor(.appGrayTextContrast)
.fixedSize(horizontal: false, vertical: false)
.frame(maxWidth: .infinity, alignment: .leading)
.matchedGeometryEffect(id: "ArticleTitle", in: animation)
if let byline = itemAudioProperties.byline {
Text(byline)
@ -203,7 +211,6 @@ public struct MiniPlayer: View {
.foregroundColor(.appGrayText)
.fixedSize(horizontal: false, vertical: false)
.frame(maxWidth: .infinity, alignment: .leading)
// .matchedGeometryEffect(id: "ArticleTitle", in: animation)
}
}
@ -215,82 +222,73 @@ public struct MiniPlayer: View {
}.frame(maxHeight: .infinity)
}
// Spacer(minLength: 0)
if expanded {
// Marquee(text: itemAudioProperties.title, font: UIFont(name: "Inter-Regular", size: 22)!)
// .foregroundColor(.appGrayTextContrast)
// .onTapGesture {
// viewArticle()
// }
//
// if let byline = itemAudioProperties.byline {
// Marquee(text: byline, font: UIFont(name: "Inter-Regular", size: 16)!)
// .foregroundColor(.appGrayText)
// }
ZStack {
TabView(selection: $tabIndex) {
ForEach(0 ..< (self.audioController.textItems?.count ?? 0), id: \.self) { id in
SpeechCard(id: id)
.frame(width: geom.size.width)
.tag(id)
}
}
.tabViewStyle(PageTabViewStyle(indexDisplayMode: .never))
.onChange(of: tabIndex, perform: { index in
if index != audioController.currentAudioIndex, index < (audioController.textItems?.count ?? 0) {
audioController.seek(toUtterance: index)
}
})
.onChange(of: audioController.currentAudioIndex, perform: { index in
if self.audioController.state != .reachedEnd {
tabIndex = index
} else {
tabIndex = (self.audioController.textItems?.count ?? 0) + 1
}
})
.frame(width: geom.size.width)
Group {
Text(audioController.readText)
.font(.textToSpeechRead.leading(.loose))
.foregroundColor(Color.appGrayTextContrast)
+
Text(audioController.unreadText)
.font(.textToSpeechRead.leading(.loose))
.foregroundColor(Color.appGrayText)
if audioController.state == .reachedEnd {
// If we have reached the end display a replay button
Button(
action: {
tabIndex = 0
audioController.unpause()
audioController.seek(to: 0.0)
},
label: {
Image(systemName: "gobackward")
.font(.appCallout)
.tint(.appGrayTextContrast)
Text("Replay")
}
)
}
}
.padding(24)
// ScrollView {
// ScrollViewReader { _ in
// ForEach(Array(self.audioController.readText.enumerated()), id: \.1.self) { index, text in
// Text(text)
// .font(.textToSpeechRead)
// .lineSpacing(2.5)
// .padding()
// .id(index)
// }
//
// ForEach(Array(self.audioController.unreadText.enumerated()), id: \.1.self) { index, text in
// Text(text)
// .font(.textToSpeechUnread)
// .opacity(0.55)
// .lineSpacing(2.5)
// .padding()
// .id(index)
//
// }.onChange(of: self.audioController.currentAudioIndex) { _ in
// // withAnimation(.spring()) {
// // proxy.scrollTo(value, anchor: .top)
// // }
// }
// }
//
//// Text("This is where the main content would go") +
//// Text("its multiple lines of text.") +
//// Text("It would probably need to scroll as our text segments are pretty big")
// }
Spacer()
ScrubberView(value: $audioController.timeElapsed,
minValue: 0, maxValue: self.audioController.duration,
onEditingChanged: { scrubStarted in
if scrubStarted {
self.audioController.scrubState = .scrubStarted
} else {
self.audioController.scrubState = .scrubEnded(self.audioController.timeElapsed)
}
})
Group {
ScrubberView(value: $audioController.timeElapsed,
minValue: 0, maxValue: self.audioController.duration,
onEditingChanged: { scrubStarted in
if scrubStarted {
self.audioController.scrubState = .scrubStarted
} else {
self.audioController.scrubState = .scrubEnded(self.audioController.timeElapsed)
}
})
HStack {
Text(audioController.timeElapsedString ?? "0:00")
.font(.appCaptionTwo)
.foregroundColor(.appGrayText)
Spacer()
Text(audioController.durationString ?? "0:00")
.font(.appCaptionTwo)
.foregroundColor(.appGrayText)
HStack {
Text(audioController.timeElapsedString ?? "0:00")
.font(.appCaptionTwo)
.foregroundColor(.appGrayText)
Spacer()
Text(audioController.durationString ?? "0:00")
.font(.appCaptionTwo)
.foregroundColor(.appGrayText)
}
}
.padding(.leading, 16)
.padding(.trailing, 16)
HStack(alignment: .center, spacing: 36) {
Menu {
@ -344,7 +342,7 @@ public struct MiniPlayer: View {
}.padding(.bottom, 16)
}
}
.padding(EdgeInsets(top: 0, leading: 6, bottom: 0, trailing: 6))
.padding(EdgeInsets(top: 0, leading: 0, bottom: 0, trailing: 0))
.background(
Color.systemBackground
.shadow(color: expanded ? .clear : .gray.opacity(0.33), radius: 8, x: 0, y: 4)

View File

@ -1,78 +0,0 @@
//
// ScrollingStackModifier.swift
// ScrollView_Tests
//
// Created by Jean-Marc Boullianne on 8/7/20.
//
import SwiftUI
struct ScrollingHStackModifier: ViewModifier {
@State private var scrollOffset: CGFloat
@State private var dragOffset: CGFloat
var items: Int
var itemWidth: CGFloat
var itemSpacing: CGFloat
init(items: Int, itemWidth: CGFloat, itemSpacing: CGFloat) {
self.items = items
self.itemWidth = itemWidth
self.itemSpacing = itemSpacing
// Calculate Total Content Width
let contentWidth: CGFloat = CGFloat(items) * itemWidth + CGFloat(items - 1) * itemSpacing
let screenWidth = UIScreen.main.bounds.width
// Set Initial Offset to first Item
let initialOffset = (contentWidth/2.0) - (screenWidth/2.0) + ((screenWidth - itemWidth) / 2.0)
self._scrollOffset = State(initialValue: initialOffset)
self._dragOffset = State(initialValue: 0)
}
func body(content: Content) -> some View {
content
.offset(x: scrollOffset + dragOffset, y: 0)
.gesture(DragGesture()
.onChanged({ event in
dragOffset = event.translation.width
})
.onEnded({ event in
// Scroll to where user dragged
scrollOffset += event.translation.width
dragOffset = 0
// Now calculate which item to snap to
let contentWidth: CGFloat = CGFloat(items) * itemWidth + CGFloat(items - 1) * itemSpacing
let screenWidth = UIScreen.main.bounds.width
// Center position of current offset
let center = scrollOffset + (screenWidth / 2.0) + (contentWidth / 2.0)
// Calculate which item we are closest to using the defined size
var index = (center - (screenWidth / 2.0)) / (itemWidth + itemSpacing)
// Should we stay at current index or are we closer to the next item...
if index.remainder(dividingBy: 1) > 0.5 {
index += 1
} else {
index = CGFloat(Int(index))
}
// Protect from scrolling out of bounds
index = min(index, CGFloat(items) - 1)
index = max(index, 0)
// Set final offset (snapping to item)
let newOffset = index * itemWidth + (index - 1) * itemSpacing - (contentWidth / 2.0) + (screenWidth / 2.0) - ((screenWidth - itemWidth) / 2.0) + itemSpacing
// Animate snapping
withAnimation {
scrollOffset = newOffset
}
})
)
}
}

View File

@ -93,7 +93,9 @@ class SpeechPlayerItem: AVPlayerItem {
var pendingRequests = Set<AVAssetResourceLoadingRequest>()
weak var owner: SpeechPlayerItem?
func resourceLoader(_: AVAssetResourceLoader, shouldWaitForLoadingOfRequestedResource loadingRequest: AVAssetResourceLoadingRequest) -> Bool {
func resourceLoader(_: AVAssetResourceLoader,
shouldWaitForLoadingOfRequestedResource loadingRequest: AVAssetResourceLoadingRequest) -> Bool
{
if owner == nil {
return true
}
@ -120,7 +122,7 @@ class SpeechPlayerItem: AVPlayerItem {
guard let speechItem = self.owner?.speechItem else {
// This probably can't happen, but if it does, just returning should
// let AVPlayer try again.
print("No speech item found: ", self.owner)
print("No speech item found: ", self.owner?.speechItem)
return
}
@ -180,7 +182,8 @@ class SpeechPlayerItem: AVPlayerItem {
}
let bytesToRespond = min(songDataUnwrapped.count - currentOffset, requestedLength)
let dataToRespond = songDataUnwrapped.subdata(in: Range(uncheckedBounds: (currentOffset, currentOffset + bytesToRespond)))
let range = Range(uncheckedBounds: (currentOffset, currentOffset + bytesToRespond))
let dataToRespond = songDataUnwrapped.subdata(in: range)
dataRequest.respond(with: dataToRespond)
return songDataUnwrapped.count >= requestedLength + requestedOffset
@ -198,8 +201,6 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate
@Published public var currentAudioIndex: Int = 0
@Published public var readText: String = ""
@Published public var unreadText: String = ""
@Published public var numberOfSpeechItems: Int = 0
@Published public var itemAudioProperties: LinkedItemAudioProperties?
@Published public var timeElapsed: TimeInterval = 0
@ -255,7 +256,7 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate
player?.removeAllItems()
document = nil
numberOfSpeechItems = 0
textItems = nil
timer = nil
player = nil
@ -338,8 +339,16 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate
}
}
public func seek(toUtterance: Int) {
player?.pause()
player?.removeAllItems()
synthesizeFrom(start: toUtterance, playWhenReady: state == .playing, atOffset: 0.0)
scrubState = .reset
fireTimer()
}
public func seek(to: TimeInterval) {
var hasOffset = false
let position = max(0, to)
// If we are in reachedEnd state, and seek back, we need to move to
@ -365,10 +374,6 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate
let before = durationBefore(playerIndex: foundIdx)
let remainder = position - before
if remainder > 0 {
hasOffset = true
}
// if the foundIdx happens to be the current item, we just set the position
if let playerItem = player?.currentItem as? SpeechPlayerItem {
if playerItem.speechItem.audioIdx == foundIdx {
@ -477,22 +482,24 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate
let body: String
}
public var textItems: [String]? {
public var textItems: [String]?
func setTextItems() {
if let document = self.document {
return document.utterances.map { utterance in
textItems = document.utterances.map { utterance in
if let regex = try? NSRegularExpression(pattern: "<[^>]*>", options: .caseInsensitive) {
let modString = regex.stringByReplacingMatches(in: utterance.text, options: [], range: NSRange(location: 0, length: utterance.text.count), withTemplate: "")
return modString
}
return ""
}
} else {
textItems = nil
}
return nil
}
func updateReadText() {
if let textItems = textItems, let item = player?.currentItem as? SpeechPlayerItem, let speechMarks = item.speechMarks {
// up till:
if let item = player?.currentItem as? SpeechPlayerItem, let speechMarks = item.speechMarks {
var currentItemOffset = 0
for i in 0 ..< speechMarks.count {
if speechMarks[i].time ?? 0 < 0 {
@ -510,46 +517,22 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate
}
}
// Sometimes we get negatives
currentItemOffset = max(currentItemOffset, 0)
let idx = item.speechItem.audioIdx
let currentItem = textItems[idx]
let currentReadIndex = currentItem.index(currentItem.startIndex, offsetBy: max(currentItemOffset, currentItem.count))
let currentItem = document?.utterances[idx].text ?? ""
let currentReadIndex = currentItem.index(currentItem.startIndex, offsetBy: min(currentItemOffset, currentItem.count))
let lastItem = String(currentItem[..<currentReadIndex])
let lastItemAfter = String(currentItem[currentReadIndex...])
// print("LAST ITEM: ", lastItem)
//
readText = lastItem
unreadText = lastItemAfter
// readText = Array((textItems[..<idx] + [lastItem]).map { text in
// if let regex = try? NSRegularExpression(pattern: "<[^>]*>", options: .caseInsensitive) {
// let modString = regex.stringByReplacingMatches(in: text, options: [], range: NSRange(location: 0, length: text.count), withTemplate: "")
// return modString
// }
// return ""
// })
} else {
readText = ""
}
}
func updateUnreadText() {
// if let textItems = textItems, let item = player?.currentItem as? SpeechPlayerItem {
// let idx = item.speechItem.audioIdx
//
//
// unreadText = Array(textItems[idx...].map { text in
// if let regex = try? NSRegularExpression(pattern: "<[^>]*>", options: .caseInsensitive) {
// let modString = regex.stringByReplacingMatches(in: text, options: [], range: NSRange(location: 0, length: text.count), withTemplate: "")
// return modString
// }
// return ""
// })
// } else {
// unreadText = []
// }
}
public func getPreferredVoice(forLanguage language: String) -> String {
UserDefaults.standard.string(forKey: "\(language)-\(UserDefaultKey.textToSpeechPreferredVoice.rawValue)") ?? currentVoiceLanguage.defaultVoice
}
@ -572,6 +555,8 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate
DispatchQueue.main.async {
if let document = document {
let synthesizer = SpeechSynthesizer(appEnvironment: self.appEnvironment, networker: self.networker, document: document)
self.setTextItems()
self.durations = synthesizer.estimatedDurations(forSpeed: self.playbackRate)
self.synthesizer = synthesizer
@ -673,7 +658,7 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate
let document = try? await downloadSpeechFile(itemID: itemID, priority: .high)
DispatchQueue.main.async {
self.numberOfSpeechItems = document?.utterances.count ?? 0
self.setTextItems()
if let document = document {
self.startStreamingAudio(itemID: itemID, document: document)
} else {
@ -698,7 +683,6 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate
player = AVQueuePlayer(items: [])
if let player = player {
observer = player.observe(\.currentItem, options: [.new]) { _, _ in
print("current item did change: ", (player.currentItem as? SpeechPlayerItem)?.speechItem.audioIdx)
self.currentAudioIndex = (player.currentItem as? SpeechPlayerItem)?.speechItem.audioIdx ?? 0
}
}
@ -816,17 +800,6 @@ public class AudioController: NSObject, ObservableObject, AVAudioPlayerDelegate
}
}
}
// if let item = self.item, let speechItem = player?.currentItem as? SpeechPlayerItem {
// NotificationCenter.default.post(
// name: NSNotification.SpeakingReaderItem,
// object: nil,
// userInfo: [
// "pageID": item.unwrappedID,
// "anchorIdx": String(speechItem.speechItem.htmlIdx)
// ]
// )
// }
}
func clearNowPlayingInfo() {

View File

@ -125,7 +125,7 @@ struct SpeechSynthesizer {
let voiceStr = utterance.voice ?? document.defaultVoice
let segmentStr = String(format: "%04d", arguments: [idx])
let localAudioURL = document.audioDirectory.appendingPathComponent("\(segmentStr)-\(voiceStr).mp3")
let localSpeechURL = document.audioDirectory.appendingPathComponent("\(segmentStr)-\(voiceStr).mp3")
let localSpeechURL = document.audioDirectory.appendingPathComponent("\(segmentStr)-\(voiceStr).speechMarks")
if let request = urlRequestFor(utterance: utterance) {
let item = SpeechItem(htmlIdx: utterance.idx,
@ -165,13 +165,12 @@ struct SpeechSynthesizer {
{
let decoder = JSONDecoder()
if !redownloadCached, FileManager.default.fileExists(atPath: speechItem.localAudioURL.path) {
if !redownloadCached {
if let speechMarksData = try? Data(contentsOf: speechItem.localSpeechURL),
let speechMarks = try? decoder.decode([SpeechMark].self, from: speechMarksData),
let localData = try? Data(contentsOf: speechItem.localAudioURL)
{
print("CACHED DATA LENGTH: ", localData.count)
// return SynthesizeData(audioData: localData, speechMarks: speechMarks)
return SynthesizeData(audioData: localData, speechMarks: speechMarks)
}
}
@ -195,31 +194,26 @@ struct SpeechSynthesizer {
.appendingPathComponent(UUID().uuidString + ".speechMarks")
do {
print("SPEECH DATA: ", String(decoding: data, as: UTF8.self))
let jsonData = try decoder.decode(SynthesizeResult.self, from: data) as SynthesizeResult
let audioData = Data(fromHexEncodedString: jsonData.audioData)!
if audioData.count < 1 {
throw BasicError.message(messageText: "Audio data is empty")
}
print("AUDIO DATA LENGTH: ", audioData.count)
try audioData.write(to: tempPath)
try? FileManager.default.removeItem(at: speechItem.localAudioURL)
try FileManager.default.moveItem(at: tempPath, to: speechItem.localAudioURL)
let savedData = try? Data(contentsOf: speechItem.localAudioURL)
let encoder = JSONEncoder()
let speechMarksData = try encoder.encode(jsonData.speechMarks)
try speechMarksData.write(to: tempSMPath)
try? FileManager.default.removeItem(at: speechItem.localSpeechURL)
try FileManager.default.moveItem(at: tempSMPath, to: speechItem.localSpeechURL)
print("DOWNLOADED SPEECH MARKS: ", jsonData.speechMarks)
return SynthesizeData(audioData: audioData, speechMarks: jsonData.speechMarks)
} catch {
print("ERROR WRITING DATA", error)
let errorMessage = "audioFetch failed. could not write MP3 data to disk"
throw BasicError.message(messageText: errorMessage)
}

View File

@ -20,11 +20,7 @@ public extension Font {
}
static var textToSpeechRead: Font {
Font.custom(InterFont.bold.rawValue, size: 28, relativeTo: .title2)
}
static var textToSpeechUnread: Font {
Font.custom(InterFont.regular.rawValue, size: 22, relativeTo: .title2)
Font.custom(InterFont.bold.rawValue, size: 24, relativeTo: .title2)
}
/// 20pt, Inter-Regular