Enqueue text to speech tasks

This commit is contained in:
Hongbo Wu
2022-08-12 17:53:41 +08:00
parent 4b42d013ca
commit 94f9dd9e6e
7 changed files with 174 additions and 4 deletions

View File

@ -0,0 +1,38 @@
import {
Column,
CreateDateColumn,
Entity,
JoinColumn,
ManyToOne,
PrimaryGeneratedColumn,
UpdateDateColumn,
} from 'typeorm'
import { User } from './user'
@Entity({ name: 'speech' })
export class Speech {
@PrimaryGeneratedColumn('uuid')
id!: string
@ManyToOne(() => User, { onDelete: 'CASCADE' })
@JoinColumn({ name: 'user_id' })
user!: User
@Column('text')
elasticPageId!: string
@Column('text')
audioUrl!: string
@Column('text')
speechMarks!: string
@Column('text')
voiceId!: string
@CreateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
createdAt!: Date
@UpdateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
updatedAt!: Date
}

View File

@ -0,0 +1,53 @@
import {
Column,
CreateDateColumn,
Entity,
JoinColumn,
OneToOne,
PrimaryGeneratedColumn,
UpdateDateColumn,
} from 'typeorm'
import { User } from './user'
@Entity({ name: 'user_personalization' })
export class UserPersonalization {
@PrimaryGeneratedColumn('uuid')
id!: string
@OneToOne(() => User, { onDelete: 'CASCADE' })
@JoinColumn({ name: 'user_id' })
user!: User
@Column('text', { nullable: true })
fontFamily?: string
@Column('integer', { nullable: true })
fontSize?: number
@Column('text', { nullable: true })
margin?: number
@Column('text', { nullable: true })
theme?: string
@Column('text', { nullable: true })
libraryLayoutType?: string
@Column('text', { nullable: true })
librarySortOrder?: string
@Column('text', { nullable: true })
speechVoice?: string
@Column('integer', { nullable: true })
speechRate?: number
@Column('integer', { nullable: true })
speechVolume?: number
@CreateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
createdAt!: Date
@UpdateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
updatedAt!: Date
}

View File

@ -63,6 +63,7 @@ interface BackendEnv {
contentFetchGCFUrl: string
reminderTaskHanderUrl: string
integrationTaskHandlerUrl: string
textToSpeechTaskHandlerUrl: string
}
fileUpload: {
gcsUploadBucket: string
@ -138,6 +139,7 @@ const nullableEnvVars = [
'SENDGRID_INSTALLATION_TEMPLATE_ID',
'READWISE_API_URL',
'INTEGRATION_TASK_HANDLER_URL',
'TEXT_TO_SPEECH_TASK_HANDLER_URL',
] // Allow some vars to be null/empty
/* If not in GAE and Prod/QA/Demo env (f.e. on localhost/dev env), allow following env vars to be null */
@ -221,6 +223,7 @@ export function getEnv(): BackendEnv {
contentFetchGCFUrl: parse('CONTENT_FETCH_GCF_URL'),
reminderTaskHanderUrl: parse('REMINDER_TASK_HANDLER_URL'),
integrationTaskHandlerUrl: parse('INTEGRATION_TASK_HANDLER_URL'),
textToSpeechTaskHandlerUrl: parse('TEXT_TO_SPEECH_TASK_HANDLER_URL'),
}
const imageProxy = {
url: parse('IMAGE_PROXY_URL'),

View File

@ -325,4 +325,35 @@ export const enqueueSyncWithIntegration = async (
return createdTasks[0].name
}
export const enqueueTextToSpeech = async (
userId: string,
pageId: string
): Promise<string> => {
const { GOOGLE_CLOUD_PROJECT } = process.env
const payload = {
userId,
pageId,
}
// If there is no Google Cloud Project Id exposed, it means that we are in local environment
if (env.dev.isLocal || !GOOGLE_CLOUD_PROJECT) {
return nanoid()
}
const createdTasks = await createHttpTaskWithToken({
project: GOOGLE_CLOUD_PROJECT,
payload,
taskHandlerUrl: env.queue.textToSpeechTaskHandlerUrl,
})
if (!createdTasks || !createdTasks[0].name) {
logger.error(`Unable to get the name of the task`, {
payload,
createdTasks,
})
throw new CreateTaskError(`Unable to get the name of the task`)
}
return createdTasks[0].name
}
export default createHttpTaskWithToken

View File

@ -1,7 +1,7 @@
import * as AWS from 'aws-sdk'
import { buildLogger } from './logger'
import { SynthesizeSpeechInput } from 'aws-sdk/clients/polly'
import { getFilePublicUrl, uploadToBucket } from './uploads'
import { SynthesizeSpeechInput } from 'aws-sdk/clients/polly'
export interface TextToSpeechInput {
id: string
@ -9,6 +9,7 @@ export interface TextToSpeechInput {
voice?: string
textType?: 'text' | 'ssml'
engine?: 'standard' | 'neural'
languageCode?: string
}
export interface TextToSpeechOutput {
@ -24,13 +25,14 @@ const client = new AWS.Polly()
export const createAudio = async (
input: TextToSpeechInput
): Promise<Buffer> => {
const { text, voice, textType, engine } = input
const { text, voice, textType, engine, languageCode } = input
const params: SynthesizeSpeechInput = {
OutputFormat: 'ogg_vorbis',
Text: text,
TextType: textType || 'text',
VoiceId: voice || 'Joanna',
Engine: engine || 'neural',
LanguageCode: languageCode || 'en-US',
}
try {
const data = await client.synthesizeSpeech(params).promise()
@ -44,14 +46,15 @@ export const createAudio = async (
export const createSpeechMarks = async (
input: TextToSpeechInput
): Promise<string> => {
const { text, voice, textType, engine } = input
const { text, voice, textType, engine, languageCode } = input
const params: SynthesizeSpeechInput = {
OutputFormat: 'json',
Text: text,
TextType: textType || 'text',
VoiceId: voice || 'Joanna',
Engine: engine || 'neural',
SpeechMarkTypes: ['sentence'],
SpeechMarkTypes: ['word'],
LanguageCode: languageCode || 'en-US',
}
try {
const data = await client.synthesizeSpeech(params).promise()

View File

@ -0,0 +1,28 @@
-- Type: DO
-- Name: speech
-- Description: Add speech table containing text to speech audio_url and speech_marks
BEGIN;
CREATE TABLE omnivore.speech (
id uuid PRIMARY KEY DEFAULT uuid_generate_v1mc(),
user_id uuid NOT NULL REFERENCES omnivore.user ON DELETE CASCADE,
elastic_page_id TEXT NOT NULL,
voice text,
audio_url text NOT NULL,
speech_marks text NOT NULL,
created_at timestamptz NOT NULL DEFAULT current_timestamp,
updated_at timestamptz NOT NULL DEFAULT current_timestamp
);
CREATE TRIGGER speech_modtime BEFORE UPDATE ON omnivore.speech FOR EACH ROW EXECUTE PROCEDURE update_updated_at_column();
-- No permission to delete on the speech table, only superuser can delete.
GRANT SELECT, INSERT, UPDATE ON omnivore.speech TO omnivore_user;
ALTER TABLE omnivore.user_personalization
ADD COLUMN speech_voice TEXT,
ADD COLUMN speech_rate INTEGER,
ADD COLUMN speech_volume INTEGER;
COMMIT;

View File

@ -0,0 +1,14 @@
-- Type: UNDO
-- Name: speech
-- Description: Add speech table containing text to speech audio_url and speech_marks
BEGIN;
DROP TABLE IF EXISTS omnivore.speech;
ALTER TABLE omnivore.user_personalization
DROP COLUMN IF EXISTS speech_voice,
DROP COLUMN IF EXISTS speech_rate,
DROP COLUMN IF EXISTS speech_volume;
COMMIT;