From 94f9dd9e6e55779ca2c744bb7f2b6534f9a37181 Mon Sep 17 00:00:00 2001
From: Hongbo Wu <hongbo@omnivore.app>
Date: Fri, 12 Aug 2022 17:53:41 +0800
Subject: [PATCH] Enqueue text to speech tasks

---
 packages/api/src/entity/speech.ts             | 38 +++++++++++++
 .../api/src/entity/user_personalization.ts    | 53 +++++++++++++++++++
 packages/api/src/util.ts                      |  3 ++
 packages/api/src/utils/createTask.ts          | 31 +++++++++++
 packages/api/src/utils/textToSpeech.ts        | 11 ++--
 packages/db/migrations/0093.do.speech.sql     | 28 ++++++++++
 packages/db/migrations/0093.undo.speech.sql   | 14 +++++
 7 files changed, 174 insertions(+), 4 deletions(-)
 create mode 100644 packages/api/src/entity/speech.ts
 create mode 100644 packages/api/src/entity/user_personalization.ts
 create mode 100755 packages/db/migrations/0093.do.speech.sql
 create mode 100755 packages/db/migrations/0093.undo.speech.sql

diff --git a/packages/api/src/entity/speech.ts b/packages/api/src/entity/speech.ts
new file mode 100644
index 000000000..27e189d6b
--- /dev/null
+++ b/packages/api/src/entity/speech.ts
@@ -0,0 +1,38 @@
+import {
+  Column,
+  CreateDateColumn,
+  Entity,
+  JoinColumn,
+  ManyToOne,
+  PrimaryGeneratedColumn,
+  UpdateDateColumn,
+} from 'typeorm'
+import { User } from './user'
+
+@Entity({ name: 'speech' })
+export class Speech {
+  @PrimaryGeneratedColumn('uuid')
+  id!: string
+
+  @ManyToOne(() => User, { onDelete: 'CASCADE' })
+  @JoinColumn({ name: 'user_id' })
+  user!: User
+
+  @Column('text')
+  elasticPageId!: string
+
+  @Column('text')
+  audioUrl!: string
+
+  @Column('text')
+  speechMarks!: string
+
+  @Column('text')
+  voiceId!: string
+
+  @CreateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
+  createdAt!: Date
+
+  @UpdateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
+  updatedAt!: Date
+}
diff --git a/packages/api/src/entity/user_personalization.ts b/packages/api/src/entity/user_personalization.ts
new file mode 100644
index 000000000..8044ef1a7
--- /dev/null
+++ b/packages/api/src/entity/user_personalization.ts
@@ -0,0 +1,53 @@
+import {
+  Column,
+  CreateDateColumn,
+  Entity,
+  JoinColumn,
+  OneToOne,
+  PrimaryGeneratedColumn,
+  UpdateDateColumn,
+} from 'typeorm'
+import { User } from './user'
+
+@Entity({ name: 'user_personalization' })
+export class UserPersonalization {
+  @PrimaryGeneratedColumn('uuid')
+  id!: string
+
+  @OneToOne(() => User, { onDelete: 'CASCADE' })
+  @JoinColumn({ name: 'user_id' })
+  user!: User
+
+  @Column('text', { nullable: true })
+  fontFamily?: string
+
+  @Column('integer', { nullable: true })
+  fontSize?: number
+
+  @Column('text', { nullable: true })
+  margin?: number
+
+  @Column('text', { nullable: true })
+  theme?: string
+
+  @Column('text', { nullable: true })
+  libraryLayoutType?: string
+
+  @Column('text', { nullable: true })
+  librarySortOrder?: string
+
+  @Column('text', { nullable: true })
+  speechVoice?: string
+
+  @Column('integer', { nullable: true })
+  speechRate?: number
+
+  @Column('integer', { nullable: true })
+  speechVolume?: number
+
+  @CreateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
+  createdAt!: Date
+
+  @UpdateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
+  updatedAt!: Date
+}
diff --git a/packages/api/src/util.ts b/packages/api/src/util.ts
index f43a9a8b2..3cd8f7edb 100755
--- a/packages/api/src/util.ts
+++ b/packages/api/src/util.ts
@@ -63,6 +63,7 @@ interface BackendEnv {
     contentFetchGCFUrl: string
     reminderTaskHanderUrl: string
     integrationTaskHandlerUrl: string
+    textToSpeechTaskHandlerUrl: string
   }
   fileUpload: {
     gcsUploadBucket: string
@@ -138,6 +139,7 @@ const nullableEnvVars = [
   'SENDGRID_INSTALLATION_TEMPLATE_ID',
   'READWISE_API_URL',
   'INTEGRATION_TASK_HANDLER_URL',
+  'TEXT_TO_SPEECH_TASK_HANDLER_URL',
 ] // Allow some vars to be null/empty
 
 /* If not in GAE and Prod/QA/Demo env (f.e. on localhost/dev env), allow following env vars to be null */
@@ -221,6 +223,7 @@ export function getEnv(): BackendEnv {
     contentFetchGCFUrl: parse('CONTENT_FETCH_GCF_URL'),
     reminderTaskHanderUrl: parse('REMINDER_TASK_HANDLER_URL'),
     integrationTaskHandlerUrl: parse('INTEGRATION_TASK_HANDLER_URL'),
+    textToSpeechTaskHandlerUrl: parse('TEXT_TO_SPEECH_TASK_HANDLER_URL'),
   }
   const imageProxy = {
     url: parse('IMAGE_PROXY_URL'),
diff --git a/packages/api/src/utils/createTask.ts b/packages/api/src/utils/createTask.ts
index 3dec00231..c69d5a986 100644
--- a/packages/api/src/utils/createTask.ts
+++ b/packages/api/src/utils/createTask.ts
@@ -325,4 +325,35 @@ export const enqueueSyncWithIntegration = async (
   return createdTasks[0].name
 }
 
+export const enqueueTextToSpeech = async (
+  userId: string,
+  pageId: string
+): Promise<string> => {
+  const { GOOGLE_CLOUD_PROJECT } = process.env
+  const payload = {
+    userId,
+    pageId,
+  }
+
+  // If there is no Google Cloud Project Id exposed, it means that we are in local environment
+  if (env.dev.isLocal || !GOOGLE_CLOUD_PROJECT) {
+    return nanoid()
+  }
+
+  const createdTasks = await createHttpTaskWithToken({
+    project: GOOGLE_CLOUD_PROJECT,
+    payload,
+    taskHandlerUrl: env.queue.textToSpeechTaskHandlerUrl,
+  })
+
+  if (!createdTasks || !createdTasks[0].name) {
+    logger.error(`Unable to get the name of the task`, {
+      payload,
+      createdTasks,
+    })
+    throw new CreateTaskError(`Unable to get the name of the task`)
+  }
+  return createdTasks[0].name
+}
+
 export default createHttpTaskWithToken
diff --git a/packages/api/src/utils/textToSpeech.ts b/packages/api/src/utils/textToSpeech.ts
index 5d3f40eca..7f855cab7 100644
--- a/packages/api/src/utils/textToSpeech.ts
+++ b/packages/api/src/utils/textToSpeech.ts
@@ -1,7 +1,7 @@
 import * as AWS from 'aws-sdk'
 import { buildLogger } from './logger'
-import { SynthesizeSpeechInput } from 'aws-sdk/clients/polly'
 import { getFilePublicUrl, uploadToBucket } from './uploads'
+import { SynthesizeSpeechInput } from 'aws-sdk/clients/polly'
 
 export interface TextToSpeechInput {
   id: string
@@ -9,6 +9,7 @@ export interface TextToSpeechInput {
   voice?: string
   textType?: 'text' | 'ssml'
   engine?: 'standard' | 'neural'
+  languageCode?: string
 }
 
 export interface TextToSpeechOutput {
@@ -24,13 +25,14 @@ const client = new AWS.Polly()
 export const createAudio = async (
   input: TextToSpeechInput
 ): Promise<Buffer> => {
-  const { text, voice, textType, engine } = input
+  const { text, voice, textType, engine, languageCode } = input
   const params: SynthesizeSpeechInput = {
     OutputFormat: 'ogg_vorbis',
     Text: text,
     TextType: textType || 'text',
     VoiceId: voice || 'Joanna',
     Engine: engine || 'neural',
+    LanguageCode: languageCode || 'en-US',
   }
   try {
     const data = await client.synthesizeSpeech(params).promise()
@@ -44,14 +46,15 @@ export const createAudio = async (
 export const createSpeechMarks = async (
   input: TextToSpeechInput
 ): Promise<string> => {
-  const { text, voice, textType, engine } = input
+  const { text, voice, textType, engine, languageCode } = input
   const params: SynthesizeSpeechInput = {
     OutputFormat: 'json',
     Text: text,
     TextType: textType || 'text',
     VoiceId: voice || 'Joanna',
     Engine: engine || 'neural',
-    SpeechMarkTypes: ['sentence'],
+    SpeechMarkTypes: ['word'],
+    LanguageCode: languageCode || 'en-US',
   }
   try {
     const data = await client.synthesizeSpeech(params).promise()
diff --git a/packages/db/migrations/0093.do.speech.sql b/packages/db/migrations/0093.do.speech.sql
new file mode 100755
index 000000000..39a50aadf
--- /dev/null
+++ b/packages/db/migrations/0093.do.speech.sql
@@ -0,0 +1,28 @@
+-- Type: DO
+-- Name: speech
+-- Description: Add speech table containing text to speech audio_url and speech_marks
+
+BEGIN;
+
+CREATE TABLE omnivore.speech (
+    id uuid PRIMARY KEY DEFAULT uuid_generate_v1mc(),
+    user_id uuid NOT NULL REFERENCES omnivore.user ON DELETE CASCADE,
+    elastic_page_id TEXT NOT NULL,
+    voice text,
+    audio_url text NOT NULL,
+    speech_marks text NOT NULL,
+    created_at timestamptz NOT NULL DEFAULT current_timestamp,
+    updated_at timestamptz NOT NULL DEFAULT current_timestamp
+);
+
+CREATE TRIGGER speech_modtime BEFORE UPDATE ON omnivore.speech FOR EACH ROW EXECUTE PROCEDURE update_updated_at_column();
+
+-- No permission to delete on the speech table, only superuser can delete.
+GRANT SELECT, INSERT, UPDATE ON omnivore.speech TO omnivore_user;
+
+ALTER TABLE omnivore.user_personalization
+    ADD COLUMN speech_voice TEXT,
+    ADD COLUMN speech_rate INTEGER,
+    ADD COLUMN speech_volume INTEGER;
+
+COMMIT;
diff --git a/packages/db/migrations/0093.undo.speech.sql b/packages/db/migrations/0093.undo.speech.sql
new file mode 100755
index 000000000..9457f5bd8
--- /dev/null
+++ b/packages/db/migrations/0093.undo.speech.sql
@@ -0,0 +1,14 @@
+-- Type: UNDO
+-- Name: speech
+-- Description: Add speech table containing text to speech audio_url and speech_marks
+
+BEGIN;
+
+DROP TABLE IF EXISTS omnivore.speech;
+
+ALTER TABLE omnivore.user_personalization
+    DROP COLUMN IF EXISTS speech_voice,
+    DROP COLUMN IF EXISTS speech_rate,
+    DROP COLUMN IF EXISTS speech_volume;
+
+COMMIT;