Make each <li> an utterance to shorter the utterance length and add pause after the utterance ends
This commit is contained in:
@ -69,8 +69,7 @@ const TOP_LEVEL_TAGS = [
|
||||
'H4',
|
||||
'H5',
|
||||
'H6',
|
||||
'UL',
|
||||
'OL',
|
||||
'LI',
|
||||
'CODE',
|
||||
]
|
||||
|
||||
@ -312,7 +311,7 @@ export const htmlToSpeechFile = (htmlInput: HtmlInput): SpeechFile => {
|
||||
const dom = parseHTML(content)
|
||||
const body = dom.document.querySelector('#readability-page-1')
|
||||
if (!body) {
|
||||
console.log('No HTML body found:', content)
|
||||
console.log('No HTML body found')
|
||||
return {
|
||||
wordCount: 0,
|
||||
language,
|
||||
@ -323,7 +322,7 @@ export const htmlToSpeechFile = (htmlInput: HtmlInput): SpeechFile => {
|
||||
|
||||
const parsedNodes = parseDomTree(body)
|
||||
if (parsedNodes.length < 1) {
|
||||
console.log('No HTML nodes found:', body)
|
||||
console.log('No HTML nodes found')
|
||||
return {
|
||||
wordCount: 0,
|
||||
language,
|
||||
|
||||
181
packages/text-to-speech/test/fixtures/large.html
vendored
Normal file
181
packages/text-to-speech/test/fixtures/large.html
vendored
Normal file
@ -0,0 +1,181 @@
|
||||
<div id="readability-content">
|
||||
<div class="page" id="readability-page-1">
|
||||
<div data-omnivore-anchor-idx="1">
|
||||
<div data-omnivore-anchor-idx="2" dir="auto">
|
||||
<p data-omnivore-anchor-idx="3">Summary of today’s Essential Eight:</p>
|
||||
<ol data-omnivore-anchor-idx="4">
|
||||
<li data-omnivore-anchor-idx="5">
|
||||
<p data-omnivore-anchor-idx="6">
|
||||
<strong data-omnivore-anchor-idx="7"
|
||||
><a
|
||||
data-omnivore-anchor-idx="8"
|
||||
href="https://sinocism.com/i/74410518/wang-yi-at-the-un"
|
||||
rel=""
|
||||
>Wang Yi at the UN</a
|
||||
></strong
|
||||
><span data-omnivore-anchor-idx="9">
|
||||
- Among Wang YI’s meetings was one with Russian Foreign Minister
|
||||
Lavrov. There was nothing in the readout from the Lavrov meeting
|
||||
that would indicate a shift in the PRC position in the Russian
|
||||
invasion of Ukraine. Wang will meet US Secretary of State
|
||||
Blinken Friday.
|
||||
</span>
|
||||
</p>
|
||||
</li>
|
||||
<li data-omnivore-anchor-idx="10">
|
||||
<p data-omnivore-anchor-idx="11">
|
||||
<strong data-omnivore-anchor-idx="12"
|
||||
><a
|
||||
data-omnivore-anchor-idx="13"
|
||||
href="https://sinocism.com/i/74410518/two-more-sentences-in-sun-lijun-clique-case"
|
||||
rel=""
|
||||
>Two more sentences in “Sun Lijun clique” case</a
|
||||
></strong
|
||||
><span data-omnivore-anchor-idx="14">
|
||||
- Authorities are wrapping up the Sun Lijun "clique" case before
|
||||
the 20th. Today both Fu Zhenghua and Wang Like were sentenced
|
||||
death with a two year reprieve, and both releases said they had
|
||||
no possibility of parole or reduction in sentence. Sun has yet
|
||||
to be sentenced but it feels like it will happen imminently.
|
||||
Given his leadership role he should at least get life in jail,
|
||||
if not the actual death penalty, though he was promoted for
|
||||
years by people above him in the system so perhaps he performed
|
||||
“meritorious service” and ratted out other senior officials.
|
||||
</span>
|
||||
</p>
|
||||
</li>
|
||||
<li data-omnivore-anchor-idx="15">
|
||||
<p data-omnivore-anchor-idx="16">
|
||||
<strong data-omnivore-anchor-idx="17"
|
||||
><a
|
||||
data-omnivore-anchor-idx="18"
|
||||
href="https://sinocism.com/i/74410518/weekly-state-council-executive-meeting"
|
||||
rel=""
|
||||
>Weekly State Council Executive Meeting</a
|
||||
></strong
|
||||
><span data-omnivore-anchor-idx="19">
|
||||
- This meeting did not offer any significant economic boosts,
|
||||
among other things it reviewed reports of the inspection teams
|
||||
sent to several provinces to check on implementation of economic
|
||||
stabilization measures, promised more administrative reforms,
|
||||
and cut toll fees for freight trucks by 10% and
|
||||
government-designated cargo port charges by 20% in Q4.
|
||||
</span>
|
||||
</p>
|
||||
</li>
|
||||
<li data-omnivore-anchor-idx="20">
|
||||
<p data-omnivore-anchor-idx="21">
|
||||
<strong data-omnivore-anchor-idx="22"
|
||||
><a
|
||||
data-omnivore-anchor-idx="23"
|
||||
href="https://sinocism.com/i/74410518/why-this-economic-downturn-may-be-different"
|
||||
rel=""
|
||||
>Why this economic downturn may be different</a
|
||||
></strong
|
||||
><span data-omnivore-anchor-idx="24">
|
||||
- Two good pieces, one from Logan Wright and another from </span
|
||||
><a
|
||||
data-omnivore-anchor-idx="25"
|
||||
href="https://www.realchinacharts.com/p/long-view-its-coming-pt22?isFreemail=false"
|
||||
rel=""
|
||||
>“China Charts”</a
|
||||
><span data-omnivore-anchor-idx="26"
|
||||
>. The real estate boom is over and it is not coming back any
|
||||
time soon, if ever. That is the outcome the policymakers have
|
||||
been targeting for years, though they may have been
|
||||
overconfident in their ability to rein in real estate without
|
||||
creating dangerous domino effects throughout the economy. We are
|
||||
all waiting for the 20th Party Congress outcomes, but I see no
|
||||
reason to think there will be outcomes from that meeting that
|
||||
reverse the trajectory of the real estate sector.
|
||||
</span>
|
||||
</p>
|
||||
</li>
|
||||
<li data-omnivore-anchor-idx="27">
|
||||
<p data-omnivore-anchor-idx="28">
|
||||
<strong data-omnivore-anchor-idx="29"
|
||||
><a
|
||||
data-omnivore-anchor-idx="30"
|
||||
href="https://sinocism.com/i/74410518/pcaob-audit-inspections-in-hong-kong"
|
||||
rel=""
|
||||
>PCAOB Audit inspections in Hong Kong</a
|
||||
></strong
|
||||
><span data-omnivore-anchor-idx="31">
|
||||
- The trial audits of PRC firms are underway, so far the signs
|
||||
are positive that the PRC side understands the concessions
|
||||
needed to keep the PRC firms listed in the US, but as the PCAOB
|
||||
chair said today “The Holding Foreign Companies Accountable Act
|
||||
demands complete access. The agreement we signed with our
|
||||
Chinese counterparts guarantees complete access. And the PCAOB
|
||||
will accept nothing less than complete access when we make our
|
||||
determinations by the end of this year. When I say no loopholes
|
||||
and no exceptions, I mean none.” Having a law that allows little
|
||||
room for concessions has been very helpful to US negotiators.
|
||||
</span>
|
||||
</p>
|
||||
</li>
|
||||
<li data-omnivore-anchor-idx="32">
|
||||
<p data-omnivore-anchor-idx="33">
|
||||
<strong data-omnivore-anchor-idx="34"
|
||||
><a
|
||||
data-omnivore-anchor-idx="35"
|
||||
href="https://sinocism.com/i/74410518/nvidia-ceo-does-not-sound-too-worried-about-china-sales"
|
||||
rel=""
|
||||
>Nvidia CEO does not sound too worried about China sales</a
|
||||
></strong
|
||||
><span data-omnivore-anchor-idx="36">
|
||||
- The CEO told Caixin that ““There will be versions that are
|
||||
going to be not restricted and serve the needs of the vast
|
||||
majority of our market very comfortably” and he told </span
|
||||
><a
|
||||
data-omnivore-anchor-idx="37"
|
||||
href="https://stratechery.com/2022/an-interview-with-nvidia-ceo-jensen-huang-about-building-the-omniverse-cloud/#glut"
|
||||
rel=""
|
||||
>Stratechery</a
|
||||
><span data-omnivore-anchor-idx="38">
|
||||
that “The limitations and the restrictions are very specific to
|
||||
a combination of computation level and multi-chip
|
||||
interconnection level. That restriction gives us plenty of
|
||||
envelope to go and run our business and for the vast majority of
|
||||
our customers in China”.</span
|
||||
>
|
||||
</p>
|
||||
</li>
|
||||
<li data-omnivore-anchor-idx="39">
|
||||
<p data-omnivore-anchor-idx="40">
|
||||
<strong data-omnivore-anchor-idx="41"
|
||||
><a
|
||||
data-omnivore-anchor-idx="42"
|
||||
href="https://sinocism.com/i/74410518/us-prc-scientific-relations"
|
||||
rel=""
|
||||
>US-PRC scientific relations</a
|
||||
></strong
|
||||
><span data-omnivore-anchor-idx="43">
|
||||
- There are two new reports of note, one on scientists who
|
||||
worked at Los Alamos labs and then returned to the PRC and
|
||||
contributed to PRC weapons development, and another on the
|
||||
outflow of Chinese scientists from the US.
|
||||
</span>
|
||||
</p>
|
||||
</li>
|
||||
<li data-omnivore-anchor-idx="44">
|
||||
<p data-omnivore-anchor-idx="45">
|
||||
<strong data-omnivore-anchor-idx="46"
|
||||
><a
|
||||
data-omnivore-anchor-idx="47"
|
||||
href="https://sinocism.com/i/74410518/another-scandal-in-the-film-and-tv-sector"
|
||||
rel=""
|
||||
>Another scandal in the film and TV sector</a
|
||||
></strong
|
||||
>
|
||||
</p>
|
||||
</li>
|
||||
</ol>
|
||||
<p data-omnivore-anchor-idx="48">Thanks for reading.</p>
|
||||
</div>
|
||||
<div data-omnivore-anchor-idx="49" data-testid="paywall">
|
||||
<h2 data-omnivore-anchor-idx="50">This post is for paid subscribers</h2>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@ -1,6 +1,19 @@
|
||||
import 'mocha'
|
||||
import { expect } from 'chai'
|
||||
import { htmlToSsmlItems, stripEmojis } from '../src/htmlToSsml'
|
||||
import {
|
||||
htmlToSpeechFile,
|
||||
htmlToSsmlItems,
|
||||
stripEmojis,
|
||||
} from '../src/htmlToSsml'
|
||||
import * as fs from 'fs'
|
||||
import path from 'path'
|
||||
|
||||
const TEST_OPTIONS = {
|
||||
primaryVoice: 'test-primary',
|
||||
secondaryVoice: 'test-secondary',
|
||||
language: 'en-US',
|
||||
rate: '1.0',
|
||||
}
|
||||
|
||||
describe('stripEmojis', () => {
|
||||
it('strips emojis from text and removes the extra space', () => {
|
||||
@ -20,14 +33,7 @@ describe('stripEmojis', () => {
|
||||
})
|
||||
})
|
||||
|
||||
describe('htmlToSsmlItems', () => {
|
||||
const TEST_OPTIONS = {
|
||||
primaryVoice: 'test-primary',
|
||||
secondaryVoice: 'test-secondary',
|
||||
language: 'en-US',
|
||||
rate: '1.0',
|
||||
}
|
||||
|
||||
describe('htmlToSpeechFile', () => {
|
||||
describe('a simple html file', () => {
|
||||
xit('should convert Html to SSML', () => {
|
||||
const ssml = htmlToSsmlItems(
|
||||
@ -217,3 +223,18 @@ describe('htmlToSsmlItems', () => {
|
||||
// })
|
||||
// })
|
||||
})
|
||||
|
||||
describe('convert HTML to Speech file', () => {
|
||||
it('should convert HTML to many utterances', () => {
|
||||
const html = fs.readFileSync(
|
||||
path.resolve(__dirname, './fixtures/large.html'),
|
||||
{ encoding: 'utf-8' }
|
||||
)
|
||||
const speechFile = htmlToSpeechFile({
|
||||
content: html,
|
||||
title: 'test',
|
||||
options: TEST_OPTIONS,
|
||||
})
|
||||
expect(speechFile.utterances).to.have.lengthOf(12)
|
||||
})
|
||||
})
|
||||
|
||||
Reference in New Issue
Block a user