Update API for ultra realistic voices

This commit is contained in:
Jackson Harper
2023-02-16 10:56:24 +08:00
parent c3020e4ddc
commit 9566c17db3
2 changed files with 19 additions and 98 deletions

View File

@ -69,6 +69,10 @@ public extension LinkedItem {
(labels?.count ?? 0) > 0
}
var isUnread: Bool {
readAt == nil
}
var isRead: Bool {
readingProgress >= 0.98
}

View File

@ -4,126 +4,43 @@ import {
TextToSpeechOutput,
} from './textToSpeech'
import axios from 'axios'
import ffmpegPath from '@ffmpeg-installer/ffmpeg'
import ffmpeg from 'fluent-ffmpeg'
import { PassThrough } from 'stream'
ffmpeg.setFfmpegPath(ffmpegPath.path)
interface PlayHtConvertResponse {
message: string
payload: string[]
}
const convertWavToMp3AndUpload = async (
inputStream: PassThrough,
outputStream: PassThrough
) => {
return new Promise<void>((resolve, reject) => {
ffmpeg(inputStream)
.audioCodec('libmp3lame')
.format('mp3')
.on('error', (err) => {
reject(err)
})
.on('end', () => {
console.debug('Finished processing')
resolve()
})
.pipe(outputStream, { end: true })
})
}
export class RealisticTextToSpeech implements TextToSpeech {
synthesizeTextToSpeech = async (
input: TextToSpeechInput
): Promise<TextToSpeechOutput> => {
const apiEndpoint = process.env.REALISTIC_VOICE_API_ENDPOINT
const voiceId = process.env.REALISTIC_VOICE_ID
const apiKey = process.env.REALISTIC_VOICE_API_KEY
const userId = process.env.REALISTIC_VOICE_USER_ID
if (!apiEndpoint || !apiKey || !userId) {
throw new Error('PlayHT API credentials not set')
const apiEndpoint = process.env.REALISTIC_API_ENDPOINT
if (!apiEndpoint || !apiKey || !voiceId) {
throw new Error('API credentials not set')
}
const inputStream = new PassThrough()
const HEADERS = {
Authorization: apiKey,
'X-User-ID': userId,
'xi-api-key': apiKey,
voice_id: voiceId,
'Content-Type': 'application/json',
}
const data = {
voice: input.voice,
content: [input.text],
}
// get the download url first
const response = await axios.post<PlayHtConvertResponse>(
apiEndpoint,
data,
const requestUrl = `${apiEndpoint}/${voiceId}`
const response = await axios.post<Buffer>(
requestUrl,
{
text: input.text,
},
{
headers: HEADERS,
}
)
if (response.data.payload.length === 0) {
if (response.data.length === 0) {
throw new Error('No payload returned')
}
const downloadUrl = response.data.payload[0]
// polling the download url until the file is ready
// timeout after 1 hour
const timeout = 60 * 60 * 1000
const startTime = Date.now()
let isReady = false
while (!isReady) {
if (Date.now() - startTime > timeout) {
throw new Error('Timeout when polling the download url')
}
// download the audio file
try {
const downloadResponse = await axios.get(downloadUrl, {
responseType: 'arraybuffer',
headers: {
'Content-Type': 'audio/wav',
},
})
// write the audio file to the input stream
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
inputStream.end(Buffer.from(downloadResponse.data, 'binary'))
isReady = true
} catch (e) {
// ignore error
console.debug('checking status of audio file', downloadUrl)
}
}
const outputStream = new PassThrough()
// transcode the audio file to mp3
await convertWavToMp3AndUpload(inputStream, outputStream)
// convert the buffer stream to a buffer
const audioData = await new Promise<Buffer>((resolve, reject) => {
const chunks: Buffer[] = []
outputStream.on('data', (chunk) => {
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
chunks.push(chunk)
})
outputStream.on('end', () => {
resolve(Buffer.concat(chunks))
})
outputStream.on('error', (err) => {
reject(err)
})
})
return {
audioData,
speechMarks: [],
audioData: response.data,
}
}