From 03a11b310ca045be589b72c414ed2febf7cd1d65 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 15 Nov 2022 13:10:20 +0800 Subject: [PATCH] Fix not tokenizing sentences correctly in li elements by adding a new line to the end of each text in the li element --- packages/text-to-speech/src/htmlToSsml.ts | 4 ++++ .../test/fixtures/blockquote.html | 1 + .../text-to-speech/test/htmlToSsml.test.ts | 21 +++++++++++++++---- 3 files changed, 22 insertions(+), 4 deletions(-) create mode 100644 packages/text-to-speech/test/fixtures/blockquote.html diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts index be3f1c2e8..e6c1207d6 100644 --- a/packages/text-to-speech/src/htmlToSsml.ts +++ b/packages/text-to-speech/src/htmlToSsml.ts @@ -178,6 +178,10 @@ function emitElement( } if (child.nodeType == 1 /* Node.ELEMENT_NODE */) { maxVisitedIdx = emitElement(textItems, child as HTMLElement, false) + if (child.nodeName === 'LI') { + // add a new line after each list item + emit(textItems, '\n') + } } } diff --git a/packages/text-to-speech/test/fixtures/blockquote.html b/packages/text-to-speech/test/fixtures/blockquote.html new file mode 100644 index 000000000..6cc238e7b --- /dev/null +++ b/packages/text-to-speech/test/fixtures/blockquote.html @@ -0,0 +1 @@ +

Join the Slow Chinese Community

Our first live meet-up kicks-off in just under 24 hours at 7pm UK time on Tuesday 15 November.

The 20% discount on annual membership ends just before that at 6pm on Tuesday.

So click here to claim your discount and join our first monthly meet-up.

Join Now!

What resources do I get access to as a member?

As a member of the community, you’ll get access to lots of extra learning tools, helping you use the newsletter content as a resource to take your Chinese language skills to the next level:

  • 🔉Podcast

  • ✅ Downloads

  • 📚Reading practice

  • 🔎 Archive

  • 💬 Community

Join Now!

Is the membership right for me?

And as the clock ticks and this ship gets ready to set sail, here is a quick quiz to decide if membership is right for you:

  • Are you frustrated that your hard-won Chinese language skills are getting worse not better?

  • Do you find it hard to make time for improving your language skills?

  • Do you struggle with motivation to carry on learning Chinese?

  • Is it difficult to find good content to help you learn?

If you are an advanced or long-time learner of Chinese, and the answer to any of these questions is ‘yes’, the you’re in luck.

Because the 200+ members of the Slow Chinese community all faced these exact same problems.

But they now access unique learning resources, engaging content, and a community of like-minded learners who are all taking their language skills to the next level.

So….

Click here to claim your discount and join our first monthly meet-up on Tuesday at 7pm.

Join Now!

What’s on the agenda in our first meet-up?

The format will be a conversation in mostly English between me and our editor, Zoe Qian.

We will discuss your questions about your language learning challenges, any content you’ve read in the newsletter, and on the membership product.

We’ll chat for around 30 minutes. And we’ll publish the audio recording as a member-only podcast shortly after.

We’ll aim to answer 3-5 questions in the live session.

Some questions submitted so far include:

  • Just for curiosity, how do you pick the articles for Slow Chinese?

  • Any advice on finding opportunities to communicate in Chinese?

  • What are your tips to improve comprehension?  I feel like I’m working on reading, listening, and speaking all at once, sometimes I feel like I’m just getting surface understanding.

  • I often feel I use the same words/phrases over and over and my short-term memory is weak, I live in a non-Chinese environment although I have many opportunities to practice Chinese and have no plans to travel to China.

  • I'm American-born Chinese, so I grew up with Chinese speaking parents, but I used English and home and in daily life. My listening is strong, everything else is weak. I'm in China currently studying in a Master's program. I'm probably about HSK5-6 in my vocabulary and reading comprehension. If you have any tips for picking up reading/speaking, I'd love to know.

Join Now!

As long as you sign up before the deadline of 6pm 15 November, you’ll get the discount and be able join live and ask a question.

So, hopefully we’ll see quite a few of you tomorrow!

Andrew

+++

Ps - we’ve added a lot to the membership product over the last few months. So we plan to increase our prices in December. This discount will lock you in to the lower price of $96 per year for as long as you are subscribed.

diff --git a/packages/text-to-speech/test/htmlToSsml.test.ts b/packages/text-to-speech/test/htmlToSsml.test.ts index 4f53bdaf4..261ebf549 100644 --- a/packages/text-to-speech/test/htmlToSsml.test.ts +++ b/packages/text-to-speech/test/htmlToSsml.test.ts @@ -15,6 +15,10 @@ const TEST_OPTIONS = { rate: '1.0', } +const load = (filename: string) => { + return fs.readFileSync(path.join(__dirname, filename), 'utf8') +} + describe('stripEmojis', () => { it('strips emojis from text and removes the extra space', () => { const text = '🥛The Big Short guy is back with a new prediction' @@ -226,10 +230,8 @@ describe('htmlToSpeechFile', () => { describe('convert HTML to Speech file', () => { it('converts each
  • to an utterance', () => { - const html = fs.readFileSync( - path.resolve(__dirname, './fixtures/li.html'), - { encoding: 'utf-8' } - ) + const html = load('./fixtures/li.html') + const speechFile = htmlToSpeechFile({ content: html, title: 'Wang Yi at the UN; Fu Zhenghua sentenced; Nvidia China sales', @@ -290,4 +292,15 @@ describe('convert HTML to Speech file', () => { 'If terms of the original $12.5 billion financing package remain the same, bankers may struggle to sell the risky Twitter buyout debt just as credit markets begin to crack, with yields at multiyear highs, they’re potentially on the hook for hundreds of millions of dollars of losses on the unsecured portion alone should they try to unload it to investors.' ) }) + + it('splits sentences correctly in a blockquote element', () => { + const html = load('./fixtures/blockquote.html') + + const speechFile = htmlToSpeechFile({ + content: html, + options: TEST_OPTIONS, + }) + + expect(speechFile.utterances).to.have.lengthOf(42) + }) })