Add function to parse HTML to SSML
This commit is contained in:
@ -327,14 +327,12 @@ export const enqueueSyncWithIntegration = async (
|
||||
|
||||
export const enqueueTextToSpeech = async (
|
||||
userId: string,
|
||||
pageId: string,
|
||||
text: string
|
||||
pageId: string
|
||||
): Promise<string> => {
|
||||
const { GOOGLE_CLOUD_PROJECT } = process.env
|
||||
const payload = {
|
||||
userId,
|
||||
pageId,
|
||||
text,
|
||||
}
|
||||
|
||||
// If there is no Google Cloud Project Id exposed, it means that we are in local environment
|
||||
|
||||
@ -276,6 +276,48 @@ export const parsePreparedContent = async (
|
||||
})
|
||||
article.content = article.dom.outerHTML
|
||||
}
|
||||
|
||||
const ANCHOR_ELEMENTS_BLOCKED_ATTRIBUTES = [
|
||||
'omnivore-highlight-id',
|
||||
'data-twitter-tweet-id',
|
||||
'data-instagram-id',
|
||||
]
|
||||
|
||||
// Get the top level element?
|
||||
const pageNode = article.dom.firstElementChild as HTMLElement
|
||||
console.log('pageNode: ', pageNode)
|
||||
const nodesToVisitStack: [HTMLElement] = [pageNode]
|
||||
const visitedNodeList = []
|
||||
|
||||
while (nodesToVisitStack.length > 0) {
|
||||
const currentNode = nodesToVisitStack.pop()
|
||||
console.log('currentNode: ', currentNode?.nodeType)
|
||||
if (
|
||||
currentNode?.nodeType !== 1 ||
|
||||
// Avoiding dynamic elements from being counted as anchor-allowed elements
|
||||
ANCHOR_ELEMENTS_BLOCKED_ATTRIBUTES.some((attrib) =>
|
||||
currentNode.hasAttribute(attrib)
|
||||
)
|
||||
) {
|
||||
continue
|
||||
}
|
||||
visitedNodeList.push(currentNode)
|
||||
;[].slice
|
||||
.call(currentNode.childNodes)
|
||||
.reverse()
|
||||
.forEach(function (node) {
|
||||
nodesToVisitStack.push(node)
|
||||
})
|
||||
}
|
||||
|
||||
visitedNodeList.shift()
|
||||
visitedNodeList.forEach((node, index) => {
|
||||
// start from index 1, index 0 reserved for anchor unknown.
|
||||
node.setAttribute('data-omnivore-anchor-idx', (index + 1).toString())
|
||||
})
|
||||
|
||||
console.log('article content:', article.dom.outerHTML)
|
||||
article.content = article.dom.outerHTML
|
||||
}
|
||||
|
||||
const newWindow = parseHTML('')
|
||||
|
||||
@ -10,13 +10,12 @@ import {
|
||||
SpeechSynthesizer,
|
||||
} from 'microsoft-cognitiveservices-speech-sdk'
|
||||
import { env } from '../env'
|
||||
import { parseHTML } from 'linkedom'
|
||||
|
||||
export interface TextToSpeechInput {
|
||||
id: string
|
||||
text: string
|
||||
voice?: string
|
||||
textType?: 'text' | 'ssml'
|
||||
engine?: 'standard' | 'neural'
|
||||
languageCode?: string
|
||||
}
|
||||
|
||||
@ -27,16 +26,14 @@ export interface TextToSpeechOutput {
|
||||
|
||||
export interface SpeechMark {
|
||||
time: number
|
||||
start: number
|
||||
length: number
|
||||
start?: number
|
||||
length?: number
|
||||
word: string
|
||||
type: 'word' | 'bookmark'
|
||||
}
|
||||
|
||||
const logger = buildLogger('app.dispatch')
|
||||
|
||||
// // create a new AWS Polly client
|
||||
// const client = new AWS.Polly()
|
||||
|
||||
export const synthesizeTextToSpeech = async (
|
||||
input: TextToSpeechInput
|
||||
): Promise<TextToSpeechOutput> => {
|
||||
@ -69,10 +66,9 @@ export const synthesizeTextToSpeech = async (
|
||||
// The event synthesis completed signals that the synthesis is completed.
|
||||
synthesizer.synthesisCompleted = (s, e) => {
|
||||
logger.info(
|
||||
'(synthesized) Reason: ' +
|
||||
ResultReason[e.result.reason] +
|
||||
' Audio length: ' +
|
||||
`(synthesized) Reason: ${ResultReason[e.result.reason]} Audio length: ${
|
||||
e.result.audioData.byteLength
|
||||
}`
|
||||
)
|
||||
}
|
||||
|
||||
@ -100,6 +96,20 @@ export const synthesizeTextToSpeech = async (
|
||||
time: (timeOffset + e.audioOffset) / 10000,
|
||||
start: characterOffset + e.textOffset,
|
||||
length: e.wordLength,
|
||||
type: 'word',
|
||||
})
|
||||
}
|
||||
|
||||
synthesizer.bookmarkReached = (s, e) => {
|
||||
logger.info(
|
||||
`(Bookmark reached), Audio offset: ${
|
||||
e.audioOffset / 10000
|
||||
}ms, bookmark text: ${e.text}`
|
||||
)
|
||||
speechMarks.push({
|
||||
word: e.text,
|
||||
time: (timeOffset + e.audioOffset) / 10000,
|
||||
type: 'bookmark',
|
||||
})
|
||||
}
|
||||
|
||||
@ -138,84 +148,39 @@ export const synthesizeTextToSpeech = async (
|
||||
}
|
||||
}
|
||||
|
||||
// export const createAudio = async (
|
||||
// input: TextToSpeechInput
|
||||
// ): Promise<Buffer> => {
|
||||
// const { text, voice, textType, engine, languageCode } = input
|
||||
// const params: SynthesizeSpeechInput = {
|
||||
// OutputFormat: 'ogg_vorbis',
|
||||
// Text: text,
|
||||
// TextType: textType || 'text',
|
||||
// VoiceId: voice || 'Joanna',
|
||||
// Engine: engine || 'neural',
|
||||
// LanguageCode: languageCode || 'en-US',
|
||||
// }
|
||||
// try {
|
||||
// const data = await client.synthesizeSpeech(params).promise()
|
||||
// return data.AudioStream as Buffer
|
||||
// } catch (error) {
|
||||
// logger.error('Unable to create audio file', { error })
|
||||
// throw error
|
||||
// }
|
||||
// }
|
||||
export const htmlToSsml = (
|
||||
html: string,
|
||||
language = 'en-US',
|
||||
voice = 'en-US-JennyNeural',
|
||||
rate = 100,
|
||||
volume = 100
|
||||
): string => {
|
||||
const document = parseHTML(html).document
|
||||
const paragraphs = document.querySelectorAll('p')
|
||||
// create new ssml document
|
||||
const ssml = parseHTML('').document
|
||||
const speakElement = ssml.createElement('speak')
|
||||
speakElement.setAttribute('version', '1.0')
|
||||
speakElement.setAttribute('xmlns', 'http://www.w3.org/2001/10/synthesis')
|
||||
speakElement.setAttribute('xml:lang', language)
|
||||
const voiceElement = ssml.createElement('voice')
|
||||
voiceElement.setAttribute('name', voice)
|
||||
speakElement.appendChild(voiceElement)
|
||||
const prosodyElement = ssml.createElement('prosody')
|
||||
prosodyElement.setAttribute('rate', `${rate}%`)
|
||||
prosodyElement.setAttribute('volume', volume.toString())
|
||||
voiceElement.appendChild(prosodyElement)
|
||||
// add each paragraph to the ssml document
|
||||
paragraphs.forEach((p) => {
|
||||
const id = p.getAttribute('data-omnivore-anchor-idx')
|
||||
if (id) {
|
||||
const text = p.innerText
|
||||
const bookMark = ssml.createElement('bookmark')
|
||||
bookMark.setAttribute('mark', `data-omnivore-anchor-idx-${id}`)
|
||||
bookMark.innerText = text
|
||||
prosodyElement.appendChild(bookMark)
|
||||
}
|
||||
})
|
||||
|
||||
// export const createSpeechMarks = async (
|
||||
// input: TextToSpeechInput
|
||||
// ): Promise<string> => {
|
||||
// const { text, voice, textType, engine, languageCode } = input
|
||||
// const params: SynthesizeSpeechInput = {
|
||||
// OutputFormat: 'json',
|
||||
// Text: text,
|
||||
// TextType: textType || 'text',
|
||||
// VoiceId: voice || 'Joanna',
|
||||
// Engine: engine || 'neural',
|
||||
// SpeechMarkTypes: ['word'],
|
||||
// LanguageCode: languageCode || 'en-US',
|
||||
// }
|
||||
// try {
|
||||
// const data = await client.synthesizeSpeech(params).promise()
|
||||
// return (data.AudioStream as Buffer).toString()
|
||||
// } catch (error) {
|
||||
// logger.error('Unable to create speech marks', { error })
|
||||
// throw error
|
||||
// }
|
||||
// }
|
||||
//
|
||||
// export const createAudioWithSpeechMarks = async (
|
||||
// input: TextToSpeechInput
|
||||
// ): Promise<TextToSpeechOutput> => {
|
||||
// try {
|
||||
// const audio = await createAudio(input)
|
||||
// // upload audio to google cloud storage
|
||||
// const filePath = `speech/${input.id}.ogg`
|
||||
//
|
||||
// logger.info('start uploading...', { filePath })
|
||||
// await uploadToBucket(filePath, audio, {
|
||||
// contentType: 'audio/ogg',
|
||||
// public: true,
|
||||
// })
|
||||
//
|
||||
// // get public url for audio file
|
||||
// const publicUrl = getFilePublicUrl(filePath)
|
||||
// logger.info('upload complete', { publicUrl })
|
||||
//
|
||||
// const speechMarks = await createSpeechMarks(input)
|
||||
// return {
|
||||
// audioUrl: publicUrl,
|
||||
// speechMarks,
|
||||
// }
|
||||
// } catch (error) {
|
||||
// logger.error('Unable to create audio with speech marks', error)
|
||||
// throw error
|
||||
// }
|
||||
// }
|
||||
|
||||
// export const htmlToSsml = (
|
||||
// html: string,
|
||||
// language = 'en-US',
|
||||
// voice = 'en-US-JennyNeural',
|
||||
// rate = 100,
|
||||
// volume = 100
|
||||
// ): string => {
|
||||
// return `<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="${language}"><voice name="${voice}"><prosody rate="${rate}%" volume="${volume}%">${html}</prosody></voice></speak>`
|
||||
// }
|
||||
return speakElement.outerHTML
|
||||
}
|
||||
|
||||
@ -11,16 +11,9 @@ describe('textToSpeech', () => {
|
||||
it('should create an audio file with speech marks', async () => {
|
||||
const input: TextToSpeechInput = {
|
||||
id: generateFakeUuid(),
|
||||
text:
|
||||
'《太阁立志传5 DX》清洲会议触发教程\n' +
|
||||
'玩家要亲历清洲会议事件,需要位于织田家。\n' +
|
||||
'清洲会议需要完成以下条件才能触发:\n' +
|
||||
'本能寺发生之后,织田信长和织田信忠死亡。\n' +
|
||||
'羽柴秀吉、柴田胜家、织田信雄、织田信孝为大名。\n' +
|
||||
'清洲城必须为信雄的直辖城,或者清洲城主为信雄一方。\n' +
|
||||
'前两个条件都很容易达成,主要是要保证清洲城主为信雄这一条件比较难办,需要玩家控制城主封地。',
|
||||
languageCode: 'zh-CN',
|
||||
voice: 'zh-CN-XiaochenNeural',
|
||||
text: 'Marry had a little lamb',
|
||||
languageCode: 'en-US',
|
||||
voice: 'en-US-JennyNeural',
|
||||
}
|
||||
const output = await synthesizeTextToSpeech(input)
|
||||
expect(output.audioUrl).to.be.a('string')
|
||||
|
||||
Reference in New Issue
Block a user