From 94f9dd9e6e55779ca2c744bb7f2b6534f9a37181 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Fri, 12 Aug 2022 17:53:41 +0800 Subject: [PATCH] Enqueue text to speech tasks --- packages/api/src/entity/speech.ts | 38 +++++++++++++ .../api/src/entity/user_personalization.ts | 53 +++++++++++++++++++ packages/api/src/util.ts | 3 ++ packages/api/src/utils/createTask.ts | 31 +++++++++++ packages/api/src/utils/textToSpeech.ts | 11 ++-- packages/db/migrations/0093.do.speech.sql | 28 ++++++++++ packages/db/migrations/0093.undo.speech.sql | 14 +++++ 7 files changed, 174 insertions(+), 4 deletions(-) create mode 100644 packages/api/src/entity/speech.ts create mode 100644 packages/api/src/entity/user_personalization.ts create mode 100755 packages/db/migrations/0093.do.speech.sql create mode 100755 packages/db/migrations/0093.undo.speech.sql diff --git a/packages/api/src/entity/speech.ts b/packages/api/src/entity/speech.ts new file mode 100644 index 000000000..27e189d6b --- /dev/null +++ b/packages/api/src/entity/speech.ts @@ -0,0 +1,38 @@ +import { + Column, + CreateDateColumn, + Entity, + JoinColumn, + ManyToOne, + PrimaryGeneratedColumn, + UpdateDateColumn, +} from 'typeorm' +import { User } from './user' + +@Entity({ name: 'speech' }) +export class Speech { + @PrimaryGeneratedColumn('uuid') + id!: string + + @ManyToOne(() => User, { onDelete: 'CASCADE' }) + @JoinColumn({ name: 'user_id' }) + user!: User + + @Column('text') + elasticPageId!: string + + @Column('text') + audioUrl!: string + + @Column('text') + speechMarks!: string + + @Column('text') + voiceId!: string + + @CreateDateColumn({ default: () => 'CURRENT_TIMESTAMP' }) + createdAt!: Date + + @UpdateDateColumn({ default: () => 'CURRENT_TIMESTAMP' }) + updatedAt!: Date +} diff --git a/packages/api/src/entity/user_personalization.ts b/packages/api/src/entity/user_personalization.ts new file mode 100644 index 000000000..8044ef1a7 --- /dev/null +++ b/packages/api/src/entity/user_personalization.ts @@ -0,0 +1,53 @@ +import { + Column, + CreateDateColumn, + Entity, + JoinColumn, + OneToOne, + PrimaryGeneratedColumn, + UpdateDateColumn, +} from 'typeorm' +import { User } from './user' + +@Entity({ name: 'user_personalization' }) +export class UserPersonalization { + @PrimaryGeneratedColumn('uuid') + id!: string + + @OneToOne(() => User, { onDelete: 'CASCADE' }) + @JoinColumn({ name: 'user_id' }) + user!: User + + @Column('text', { nullable: true }) + fontFamily?: string + + @Column('integer', { nullable: true }) + fontSize?: number + + @Column('text', { nullable: true }) + margin?: number + + @Column('text', { nullable: true }) + theme?: string + + @Column('text', { nullable: true }) + libraryLayoutType?: string + + @Column('text', { nullable: true }) + librarySortOrder?: string + + @Column('text', { nullable: true }) + speechVoice?: string + + @Column('integer', { nullable: true }) + speechRate?: number + + @Column('integer', { nullable: true }) + speechVolume?: number + + @CreateDateColumn({ default: () => 'CURRENT_TIMESTAMP' }) + createdAt!: Date + + @UpdateDateColumn({ default: () => 'CURRENT_TIMESTAMP' }) + updatedAt!: Date +} diff --git a/packages/api/src/util.ts b/packages/api/src/util.ts index f43a9a8b2..3cd8f7edb 100755 --- a/packages/api/src/util.ts +++ b/packages/api/src/util.ts @@ -63,6 +63,7 @@ interface BackendEnv { contentFetchGCFUrl: string reminderTaskHanderUrl: string integrationTaskHandlerUrl: string + textToSpeechTaskHandlerUrl: string } fileUpload: { gcsUploadBucket: string @@ -138,6 +139,7 @@ const nullableEnvVars = [ 'SENDGRID_INSTALLATION_TEMPLATE_ID', 'READWISE_API_URL', 'INTEGRATION_TASK_HANDLER_URL', + 'TEXT_TO_SPEECH_TASK_HANDLER_URL', ] // Allow some vars to be null/empty /* If not in GAE and Prod/QA/Demo env (f.e. on localhost/dev env), allow following env vars to be null */ @@ -221,6 +223,7 @@ export function getEnv(): BackendEnv { contentFetchGCFUrl: parse('CONTENT_FETCH_GCF_URL'), reminderTaskHanderUrl: parse('REMINDER_TASK_HANDLER_URL'), integrationTaskHandlerUrl: parse('INTEGRATION_TASK_HANDLER_URL'), + textToSpeechTaskHandlerUrl: parse('TEXT_TO_SPEECH_TASK_HANDLER_URL'), } const imageProxy = { url: parse('IMAGE_PROXY_URL'), diff --git a/packages/api/src/utils/createTask.ts b/packages/api/src/utils/createTask.ts index 3dec00231..c69d5a986 100644 --- a/packages/api/src/utils/createTask.ts +++ b/packages/api/src/utils/createTask.ts @@ -325,4 +325,35 @@ export const enqueueSyncWithIntegration = async ( return createdTasks[0].name } +export const enqueueTextToSpeech = async ( + userId: string, + pageId: string +): Promise => { + const { GOOGLE_CLOUD_PROJECT } = process.env + const payload = { + userId, + pageId, + } + + // If there is no Google Cloud Project Id exposed, it means that we are in local environment + if (env.dev.isLocal || !GOOGLE_CLOUD_PROJECT) { + return nanoid() + } + + const createdTasks = await createHttpTaskWithToken({ + project: GOOGLE_CLOUD_PROJECT, + payload, + taskHandlerUrl: env.queue.textToSpeechTaskHandlerUrl, + }) + + if (!createdTasks || !createdTasks[0].name) { + logger.error(`Unable to get the name of the task`, { + payload, + createdTasks, + }) + throw new CreateTaskError(`Unable to get the name of the task`) + } + return createdTasks[0].name +} + export default createHttpTaskWithToken diff --git a/packages/api/src/utils/textToSpeech.ts b/packages/api/src/utils/textToSpeech.ts index 5d3f40eca..7f855cab7 100644 --- a/packages/api/src/utils/textToSpeech.ts +++ b/packages/api/src/utils/textToSpeech.ts @@ -1,7 +1,7 @@ import * as AWS from 'aws-sdk' import { buildLogger } from './logger' -import { SynthesizeSpeechInput } from 'aws-sdk/clients/polly' import { getFilePublicUrl, uploadToBucket } from './uploads' +import { SynthesizeSpeechInput } from 'aws-sdk/clients/polly' export interface TextToSpeechInput { id: string @@ -9,6 +9,7 @@ export interface TextToSpeechInput { voice?: string textType?: 'text' | 'ssml' engine?: 'standard' | 'neural' + languageCode?: string } export interface TextToSpeechOutput { @@ -24,13 +25,14 @@ const client = new AWS.Polly() export const createAudio = async ( input: TextToSpeechInput ): Promise => { - const { text, voice, textType, engine } = input + const { text, voice, textType, engine, languageCode } = input const params: SynthesizeSpeechInput = { OutputFormat: 'ogg_vorbis', Text: text, TextType: textType || 'text', VoiceId: voice || 'Joanna', Engine: engine || 'neural', + LanguageCode: languageCode || 'en-US', } try { const data = await client.synthesizeSpeech(params).promise() @@ -44,14 +46,15 @@ export const createAudio = async ( export const createSpeechMarks = async ( input: TextToSpeechInput ): Promise => { - const { text, voice, textType, engine } = input + const { text, voice, textType, engine, languageCode } = input const params: SynthesizeSpeechInput = { OutputFormat: 'json', Text: text, TextType: textType || 'text', VoiceId: voice || 'Joanna', Engine: engine || 'neural', - SpeechMarkTypes: ['sentence'], + SpeechMarkTypes: ['word'], + LanguageCode: languageCode || 'en-US', } try { const data = await client.synthesizeSpeech(params).promise() diff --git a/packages/db/migrations/0093.do.speech.sql b/packages/db/migrations/0093.do.speech.sql new file mode 100755 index 000000000..39a50aadf --- /dev/null +++ b/packages/db/migrations/0093.do.speech.sql @@ -0,0 +1,28 @@ +-- Type: DO +-- Name: speech +-- Description: Add speech table containing text to speech audio_url and speech_marks + +BEGIN; + +CREATE TABLE omnivore.speech ( + id uuid PRIMARY KEY DEFAULT uuid_generate_v1mc(), + user_id uuid NOT NULL REFERENCES omnivore.user ON DELETE CASCADE, + elastic_page_id TEXT NOT NULL, + voice text, + audio_url text NOT NULL, + speech_marks text NOT NULL, + created_at timestamptz NOT NULL DEFAULT current_timestamp, + updated_at timestamptz NOT NULL DEFAULT current_timestamp +); + +CREATE TRIGGER speech_modtime BEFORE UPDATE ON omnivore.speech FOR EACH ROW EXECUTE PROCEDURE update_updated_at_column(); + +-- No permission to delete on the speech table, only superuser can delete. +GRANT SELECT, INSERT, UPDATE ON omnivore.speech TO omnivore_user; + +ALTER TABLE omnivore.user_personalization + ADD COLUMN speech_voice TEXT, + ADD COLUMN speech_rate INTEGER, + ADD COLUMN speech_volume INTEGER; + +COMMIT; diff --git a/packages/db/migrations/0093.undo.speech.sql b/packages/db/migrations/0093.undo.speech.sql new file mode 100755 index 000000000..9457f5bd8 --- /dev/null +++ b/packages/db/migrations/0093.undo.speech.sql @@ -0,0 +1,14 @@ +-- Type: UNDO +-- Name: speech +-- Description: Add speech table containing text to speech audio_url and speech_marks + +BEGIN; + +DROP TABLE IF EXISTS omnivore.speech; + +ALTER TABLE omnivore.user_personalization + DROP COLUMN IF EXISTS speech_voice, + DROP COLUMN IF EXISTS speech_rate, + DROP COLUMN IF EXISTS speech_volume; + +COMMIT;