Merge pull request #1277 from omnivore-app/feature/mark-by-sentence
Add sentence-level speech marks
This commit is contained in:
@ -21,29 +21,6 @@ const load = (path: string): string => {
|
||||
}
|
||||
|
||||
describe('Newsletter email test', () => {
|
||||
before(() => {
|
||||
nock('https://email.mg2.substack.com')
|
||||
.head(
|
||||
'/c/eJxNkk2TojAQhn-N3KTyQfg4cGDGchdnYcsZx9K5UCE0EMVAkTiKv36iHnarupNUd7rfVJ4W3EDTj1M89No496Uw0wCxgovuwBgYnbOGsZBVjDHzKPWYU8VehUMWOlIX9Qhw4rKLzXgGZziXnRTcyF7dK0iIGMVOG_OS1aTmKPRDilgVhTQUPCQIcE0x-MFTmJ8rCUpA3KtuenR2urg1ZtAzmszI0tq_Z7m66y-ilQo0uAqMTQ7WRX8auJKg56blZg7WB-iHDuYEBzO6NP0R1IwuYFphQbbTjnTH9NBfs80nym4Zyj8uUvyKbtUyGr5eUz9fNDQ7JCxfJDo9dW1lY9lmj_JNivPbGmf2Pt_lN9tDit9b-WeTetni85Z9pDpVOd7L1E_Vy7egayNO23ZP34eSeLJeux1b0rer_xaZ7ykS78nuSjMY-nL98rparNZNcv07JCjN06_EkTFBxBqOUMACErnELUNMSxTUjLDQZwzcqa4bRjCfeejUEFefS224OLr2S5wxPtij7lVrs80d2CNseRV2P52VNFMBipcdVE-U5jkRD7hFAwpGOylVwU2Mfc9qBh7DoR89yVnWXhgQFHnIsbpVb6tU_B-hH_2yzWY'
|
||||
)
|
||||
.reply(302, undefined, {
|
||||
Location:
|
||||
'https://newsletter.slowchinese.net/p/companies-that-eat-people-217',
|
||||
})
|
||||
.get('/p/companies-that-eat-people-217')
|
||||
.reply(200, '')
|
||||
|
||||
nock('https://u23463625.ct.sendgrid.net')
|
||||
.head(
|
||||
'/ss/c/AX1lEgEQaxtvFxLaVo0GBo_geajNrlI1TGeIcmMViR3pL3fEDZnbbkoeKcaY62QZk0KPFudUiUXc_uMLerV4nA/3k5/3TFZmreTR0qKSCgowABnVg/h30/zzLik7UXd1H_n4oyd5W8Xu639AYQQB2UXz-CsssSnno'
|
||||
)
|
||||
.reply(302, undefined, {
|
||||
Location: 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple',
|
||||
})
|
||||
.get('/p/talked-guy-spent-30m-beeple')
|
||||
.reply(200, '')
|
||||
})
|
||||
|
||||
describe('#getNewsletterUrl()', () => {
|
||||
it('returns url when email is from SubStack', async () => {
|
||||
const rawUrl = '<https://hongbo130.substack.com/p/tldr>'
|
||||
@ -162,21 +139,59 @@ describe('Newsletter email test', () => {
|
||||
})
|
||||
|
||||
describe('findNewsletterUrl', async () => {
|
||||
it('gets the URL from the header if it is a substack newsletter', async () => {
|
||||
const html = load('./test/data/substack-forwarded-newsletter.html')
|
||||
const url = await new SubstackHandler().findNewsletterUrl(html)
|
||||
// Not sure if the redirects from substack expire, this test could eventually fail
|
||||
expect(url).to.startWith(
|
||||
'https://newsletter.slowchinese.net/p/companies-that-eat-people-217'
|
||||
)
|
||||
}).timeout(10000)
|
||||
it('gets the URL from the header if it is a beehiiv newsletter', async () => {
|
||||
const html = load('./test/data/beehiiv-newsletter.html')
|
||||
const url = await new BeehiivHandler().findNewsletterUrl(html)
|
||||
expect(url).to.startWith(
|
||||
'https://www.milkroad.com/p/talked-guy-spent-30m-beeple'
|
||||
)
|
||||
}).timeout(10000)
|
||||
context('when email is from Substack', () => {
|
||||
before(() => {
|
||||
nock('https://email.mg2.substack.com')
|
||||
.head(
|
||||
'/c/eJxNkk2TojAQhn-N3KTyQfg4cGDGchdnYcsZx9K5UCE0EMVAkTiKv36iHnarupNUd7rfVJ4W3EDTj1M89No496Uw0wCxgovuwBgYnbOGsZBVjDHzKPWYU8VehUMWOlIX9Qhw4rKLzXgGZziXnRTcyF7dK0iIGMVOG_OS1aTmKPRDilgVhTQUPCQIcE0x-MFTmJ8rCUpA3KtuenR2urg1ZtAzmszI0tq_Z7m66y-ilQo0uAqMTQ7WRX8auJKg56blZg7WB-iHDuYEBzO6NP0R1IwuYFphQbbTjnTH9NBfs80nym4Zyj8uUvyKbtUyGr5eUz9fNDQ7JCxfJDo9dW1lY9lmj_JNivPbGmf2Pt_lN9tDit9b-WeTetni85Z9pDpVOd7L1E_Vy7egayNO23ZP34eSeLJeux1b0rer_xaZ7ykS78nuSjMY-nL98rparNZNcv07JCjN06_EkTFBxBqOUMACErnELUNMSxTUjLDQZwzcqa4bRjCfeejUEFefS224OLr2S5wxPtij7lVrs80d2CNseRV2P52VNFMBipcdVE-U5jkRD7hFAwpGOylVwU2Mfc9qBh7DoR89yVnWXhgQFHnIsbpVb6tU_B-hH_2yzWY'
|
||||
)
|
||||
.reply(302, undefined, {
|
||||
Location:
|
||||
'https://newsletter.slowchinese.net/p/companies-that-eat-people-217',
|
||||
})
|
||||
.get('/p/companies-that-eat-people-217')
|
||||
.reply(200, '')
|
||||
})
|
||||
after(() => {
|
||||
nock.restore()
|
||||
})
|
||||
|
||||
it('gets the URL from the header', async () => {
|
||||
const html = load('./test/data/substack-forwarded-newsletter.html')
|
||||
const url = await new SubstackHandler().findNewsletterUrl(html)
|
||||
// Not sure if the redirects from substack expire, this test could eventually fail
|
||||
expect(url).to.startWith(
|
||||
'https://newsletter.slowchinese.net/p/companies-that-eat-people-217'
|
||||
)
|
||||
}).timeout(10000)
|
||||
})
|
||||
|
||||
context('when email is from beehiiv', () => {
|
||||
before(() => {
|
||||
nock('https://u23463625.ct.sendgrid.net')
|
||||
.head(
|
||||
'/ss/c/AX1lEgEQaxtvFxLaVo0GBo_geajNrlI1TGeIcmMViR3pL3fEDZnbbkoeKcaY62QZk0KPFudUiUXc_uMLerV4nA/3k5/3TFZmreTR0qKSCgowABnVg/h30/zzLik7UXd1H_n4oyd5W8Xu639AYQQB2UXz-CsssSnno'
|
||||
)
|
||||
.reply(302, undefined, {
|
||||
Location: 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple',
|
||||
})
|
||||
.get('/p/talked-guy-spent-30m-beeple')
|
||||
.reply(200, '')
|
||||
})
|
||||
|
||||
after(() => {
|
||||
nock.restore()
|
||||
})
|
||||
|
||||
it('gets the URL from the header', async () => {
|
||||
const html = load('./test/data/beehiiv-newsletter.html')
|
||||
const url = await new BeehiivHandler().findNewsletterUrl(html)
|
||||
expect(url).to.startWith(
|
||||
'https://www.milkroad.com/p/talked-guy-spent-30m-beeple'
|
||||
)
|
||||
}).timeout(10000)
|
||||
})
|
||||
|
||||
it('returns undefined if it is not a newsletter', async () => {
|
||||
const html = load('./test/data/substack-forwarded-welcome-email.html')
|
||||
const url = await new SubstackHandler().findNewsletterUrl(html)
|
||||
|
||||
@ -1,8 +1,10 @@
|
||||
import {
|
||||
CancellationDetails,
|
||||
CancellationReason,
|
||||
PropertyId,
|
||||
ResultReason,
|
||||
SpeechConfig,
|
||||
SpeechSynthesisBoundaryType,
|
||||
SpeechSynthesisOutputFormat,
|
||||
SpeechSynthesisResult,
|
||||
SpeechSynthesizer,
|
||||
@ -30,7 +32,7 @@ export interface SpeechMark {
|
||||
start?: number
|
||||
length?: number
|
||||
word: string
|
||||
type: 'word' | 'bookmark'
|
||||
type: 'word' | 'bookmark' | 'punctuation' | 'sentence'
|
||||
}
|
||||
|
||||
export const synthesizeTextToSpeech = async (
|
||||
@ -47,12 +49,17 @@ export const synthesizeTextToSpeech = async (
|
||||
)
|
||||
speechConfig.speechSynthesisOutputFormat =
|
||||
SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3
|
||||
// Required for sentence-level WordBoundary events
|
||||
speechConfig.setProperty(
|
||||
PropertyId.SpeechServiceResponse_RequestSentenceBoundary,
|
||||
'true'
|
||||
)
|
||||
|
||||
// Create the speech synthesizer.
|
||||
const synthesizer = new SpeechSynthesizer(speechConfig)
|
||||
const speechMarks: SpeechMark[] = []
|
||||
let timeOffset = 0
|
||||
let wordOffset = 0
|
||||
// let wordOffset = 0
|
||||
|
||||
synthesizer.synthesizing = function (s, e) {
|
||||
// convert arrayBuffer to stream and write to stream
|
||||
@ -87,13 +94,14 @@ export const synthesizeTextToSpeech = async (
|
||||
|
||||
// The unit of e.audioOffset is tick (1 tick = 100 nanoseconds), divide by 10,000 to convert to milliseconds.
|
||||
synthesizer.wordBoundary = (s, e) => {
|
||||
speechMarks.push({
|
||||
word: e.text,
|
||||
time: (timeOffset + e.audioOffset) / 10000,
|
||||
start: wordOffset + e.textOffset,
|
||||
length: e.wordLength,
|
||||
type: 'word',
|
||||
})
|
||||
e.boundaryType === SpeechSynthesisBoundaryType.Sentence &&
|
||||
speechMarks.push({
|
||||
word: e.text,
|
||||
time: (timeOffset + e.audioOffset) / 10000,
|
||||
start: e.textOffset,
|
||||
length: e.text.length,
|
||||
type: 'sentence',
|
||||
})
|
||||
}
|
||||
|
||||
synthesizer.bookmarkReached = (s, e) => {
|
||||
@ -143,7 +151,7 @@ export const synthesizeTextToSpeech = async (
|
||||
const text = _.escape(input.text)
|
||||
const ssml = `${startSsmlTag}${text}${endSsml()}`
|
||||
// set the text offset to be the end of SSML start tag
|
||||
wordOffset -= startSsmlTag.length
|
||||
// wordOffset -= startSsmlTag.length
|
||||
const result = await speakSsmlAsyncPromise(ssml)
|
||||
if (result.reason === ResultReason.Canceled) {
|
||||
throw new Error(result.errorDetails)
|
||||
|
||||
Reference in New Issue
Block a user