diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts index 944a5f1e5..cc0506c76 100644 --- a/packages/text-to-speech/src/htmlToSsml.ts +++ b/packages/text-to-speech/src/htmlToSsml.ts @@ -69,8 +69,7 @@ const TOP_LEVEL_TAGS = [ 'H4', 'H5', 'H6', - 'UL', - 'OL', + 'LI', 'CODE', ] @@ -312,7 +311,7 @@ export const htmlToSpeechFile = (htmlInput: HtmlInput): SpeechFile => { const dom = parseHTML(content) const body = dom.document.querySelector('#readability-page-1') if (!body) { - console.log('No HTML body found:', content) + console.log('No HTML body found') return { wordCount: 0, language, @@ -323,7 +322,7 @@ export const htmlToSpeechFile = (htmlInput: HtmlInput): SpeechFile => { const parsedNodes = parseDomTree(body) if (parsedNodes.length < 1) { - console.log('No HTML nodes found:', body) + console.log('No HTML nodes found') return { wordCount: 0, language, diff --git a/packages/text-to-speech/test/fixtures/large.html b/packages/text-to-speech/test/fixtures/large.html new file mode 100644 index 000000000..868f1fa49 --- /dev/null +++ b/packages/text-to-speech/test/fixtures/large.html @@ -0,0 +1,181 @@ +
+
+
+
+

Summary of today’s Essential Eight:

+
    +
  1. +

    + Wang Yi at the UN + - Among Wang YI’s meetings was one with Russian Foreign Minister + Lavrov. There was nothing in the readout from the Lavrov meeting + that would indicate a shift in the PRC position in the Russian + invasion of Ukraine. Wang will meet US Secretary of State + Blinken Friday. + +

    +
  2. +
  3. +

    + Two more sentences in “Sun Lijun clique” case + - Authorities are wrapping up the Sun Lijun "clique" case before + the 20th. Today both Fu Zhenghua and Wang Like were sentenced + death with a two year reprieve, and both releases said they had + no possibility of parole or reduction in sentence. Sun has yet + to be sentenced but it feels like it will happen imminently. + Given his leadership role he should at least get life in jail, + if not the actual death penalty, though he was promoted for + years by people above him in the system so perhaps he performed + “meritorious service” and ratted out other senior officials. + +

    +
  4. +
  5. +

    + Weekly State Council Executive Meeting + - This meeting did not offer any significant economic boosts, + among other things it reviewed reports of the inspection teams + sent to several provinces to check on implementation of economic + stabilization measures, promised more administrative reforms, + and cut toll fees for freight trucks by 10% and + government-designated cargo port charges by 20% in Q4. + +

    +
  6. +
  7. +

    + Why this economic downturn may be different + - Two good pieces, one from Logan Wright and another from “China Charts”. The real estate boom is over and it is not coming back any + time soon, if ever. That is the outcome the policymakers have + been targeting for years, though they may have been + overconfident in their ability to rein in real estate without + creating dangerous domino effects throughout the economy. We are + all waiting for the 20th Party Congress outcomes, but I see no + reason to think there will be outcomes from that meeting that + reverse the trajectory of the real estate sector. + +

    +
  8. +
  9. +

    + PCAOB Audit inspections in Hong Kong + - The trial audits of PRC firms are underway, so far the signs + are positive that the PRC side understands the concessions + needed to keep the PRC firms listed in the US, but as the PCAOB + chair said today “The Holding Foreign Companies Accountable Act + demands complete access. The agreement we signed with our + Chinese counterparts guarantees complete access. And the PCAOB + will accept nothing less than complete access when we make our + determinations by the end of this year. When I say no loopholes + and no exceptions, I mean none.” Having a law that allows little + room for concessions has been very helpful to US negotiators. + +

    +
  10. +
  11. +

    + Nvidia CEO does not sound too worried about China sales + - The CEO told Caixin that ““There will be versions that are + going to be not restricted and serve the needs of the vast + majority of our market very comfortably” and he told Stratechery + that “The limitations and the restrictions are very specific to + a combination of computation level and multi-chip + interconnection level. That restriction gives us plenty of + envelope to go and run our business and for the vast majority of + our customers in China”. +

    +
  12. +
  13. +

    + US-PRC scientific relations + - There are two new reports of note, one on scientists who + worked at Los Alamos labs and then returned to the PRC and + contributed to PRC weapons development, and another on the + outflow of Chinese scientists from the US. + +

    +
  14. +
  15. +

    + Another scandal in the film and TV sector +

    +
  16. +
+

Thanks for reading.

+
+
+

This post is for paid subscribers

+
+
+
+
diff --git a/packages/text-to-speech/test/htmlToSsml.test.ts b/packages/text-to-speech/test/htmlToSsml.test.ts index bfc86c55c..6159bb047 100644 --- a/packages/text-to-speech/test/htmlToSsml.test.ts +++ b/packages/text-to-speech/test/htmlToSsml.test.ts @@ -1,6 +1,19 @@ import 'mocha' import { expect } from 'chai' -import { htmlToSsmlItems, stripEmojis } from '../src/htmlToSsml' +import { + htmlToSpeechFile, + htmlToSsmlItems, + stripEmojis, +} from '../src/htmlToSsml' +import * as fs from 'fs' +import path from 'path' + +const TEST_OPTIONS = { + primaryVoice: 'test-primary', + secondaryVoice: 'test-secondary', + language: 'en-US', + rate: '1.0', +} describe('stripEmojis', () => { it('strips emojis from text and removes the extra space', () => { @@ -20,14 +33,7 @@ describe('stripEmojis', () => { }) }) -describe('htmlToSsmlItems', () => { - const TEST_OPTIONS = { - primaryVoice: 'test-primary', - secondaryVoice: 'test-secondary', - language: 'en-US', - rate: '1.0', - } - +describe('htmlToSpeechFile', () => { describe('a simple html file', () => { xit('should convert Html to SSML', () => { const ssml = htmlToSsmlItems( @@ -217,3 +223,18 @@ describe('htmlToSsmlItems', () => { // }) // }) }) + +describe('convert HTML to Speech file', () => { + it('should convert HTML to many utterances', () => { + const html = fs.readFileSync( + path.resolve(__dirname, './fixtures/large.html'), + { encoding: 'utf-8' } + ) + const speechFile = htmlToSpeechFile({ + content: html, + title: 'test', + options: TEST_OPTIONS, + }) + expect(speechFile.utterances).to.have.lengthOf(12) + }) +})