Enqueue text to speech tasks

2022-08-12 17:53:41 +08:00
parent 4b42d013ca
commit 94f9dd9e6e
7 changed files with 174 additions and 4 deletions
--- a/packages/api/src/entity/speech.ts
+++ b/packages/api/src/entity/speech.ts
@ -0,0 +1,38 @@
+import {
+  Column,
+  CreateDateColumn,
+  Entity,
+  JoinColumn,
+  ManyToOne,
+  PrimaryGeneratedColumn,
+  UpdateDateColumn,
+} from 'typeorm'
+import { User } from './user'
+
+@Entity({ name: 'speech' })
+export class Speech {
+  @PrimaryGeneratedColumn('uuid')
+  id!: string
+
+  @ManyToOne(() => User, { onDelete: 'CASCADE' })
+  @JoinColumn({ name: 'user_id' })
+  user!: User
+
+  @Column('text')
+  elasticPageId!: string
+
+  @Column('text')
+  audioUrl!: string
+
+  @Column('text')
+  speechMarks!: string
+
+  @Column('text')
+  voiceId!: string
+
+  @CreateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
+  createdAt!: Date
+
+  @UpdateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
+  updatedAt!: Date
+}
--- a/packages/api/src/entity/user_personalization.ts
+++ b/packages/api/src/entity/user_personalization.ts
@ -0,0 +1,53 @@
+import {
+  Column,
+  CreateDateColumn,
+  Entity,
+  JoinColumn,
+  OneToOne,
+  PrimaryGeneratedColumn,
+  UpdateDateColumn,
+} from 'typeorm'
+import { User } from './user'
+
+@Entity({ name: 'user_personalization' })
+export class UserPersonalization {
+  @PrimaryGeneratedColumn('uuid')
+  id!: string
+
+  @OneToOne(() => User, { onDelete: 'CASCADE' })
+  @JoinColumn({ name: 'user_id' })
+  user!: User
+
+  @Column('text', { nullable: true })
+  fontFamily?: string
+
+  @Column('integer', { nullable: true })
+  fontSize?: number
+
+  @Column('text', { nullable: true })
+  margin?: number
+
+  @Column('text', { nullable: true })
+  theme?: string
+
+  @Column('text', { nullable: true })
+  libraryLayoutType?: string
+
+  @Column('text', { nullable: true })
+  librarySortOrder?: string
+
+  @Column('text', { nullable: true })
+  speechVoice?: string
+
+  @Column('integer', { nullable: true })
+  speechRate?: number
+
+  @Column('integer', { nullable: true })
+  speechVolume?: number
+
+  @CreateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
+  createdAt!: Date
+
+  @UpdateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
+  updatedAt!: Date
+}
--- a/packages/api/src/util.ts
+++ b/packages/api/src/util.ts
@ -63,6 +63,7 @@ interface BackendEnv {
    contentFetchGCFUrl: string
    reminderTaskHanderUrl: string
    integrationTaskHandlerUrl: string
+    textToSpeechTaskHandlerUrl: string
  }
  fileUpload: {
    gcsUploadBucket: string
@ -138,6 +139,7 @@ const nullableEnvVars = [
  'SENDGRID_INSTALLATION_TEMPLATE_ID',
  'READWISE_API_URL',
  'INTEGRATION_TASK_HANDLER_URL',
+  'TEXT_TO_SPEECH_TASK_HANDLER_URL',
 ] // Allow some vars to be null/empty

 /* If not in GAE and Prod/QA/Demo env (f.e. on localhost/dev env), allow following env vars to be null */
@ -221,6 +223,7 @@ export function getEnv(): BackendEnv {
    contentFetchGCFUrl: parse('CONTENT_FETCH_GCF_URL'),
    reminderTaskHanderUrl: parse('REMINDER_TASK_HANDLER_URL'),
    integrationTaskHandlerUrl: parse('INTEGRATION_TASK_HANDLER_URL'),
+    textToSpeechTaskHandlerUrl: parse('TEXT_TO_SPEECH_TASK_HANDLER_URL'),
  }
  const imageProxy = {
    url: parse('IMAGE_PROXY_URL'),
--- a/packages/api/src/utils/createTask.ts
+++ b/packages/api/src/utils/createTask.ts
@ -325,4 +325,35 @@ export const enqueueSyncWithIntegration = async (
  return createdTasks[0].name
 }

+export const enqueueTextToSpeech = async (
+  userId: string,
+  pageId: string
+): Promise<string> => {
+  const { GOOGLE_CLOUD_PROJECT } = process.env
+  const payload = {
+    userId,
+    pageId,
+  }
+
+  // If there is no Google Cloud Project Id exposed, it means that we are in local environment
+  if (env.dev.isLocal || !GOOGLE_CLOUD_PROJECT) {
+    return nanoid()
+  }
+
+  const createdTasks = await createHttpTaskWithToken({
+    project: GOOGLE_CLOUD_PROJECT,
+    payload,
+    taskHandlerUrl: env.queue.textToSpeechTaskHandlerUrl,
+  })
+
+  if (!createdTasks || !createdTasks[0].name) {
+    logger.error(`Unable to get the name of the task`, {
+      payload,
+      createdTasks,
+    })
+    throw new CreateTaskError(`Unable to get the name of the task`)
+  }
+  return createdTasks[0].name
+}
+
 export default createHttpTaskWithToken
--- a/packages/api/src/utils/textToSpeech.ts
+++ b/packages/api/src/utils/textToSpeech.ts
@ -1,7 +1,7 @@
 import * as AWS from 'aws-sdk'
 import { buildLogger } from './logger'
-import { SynthesizeSpeechInput } from 'aws-sdk/clients/polly'
 import { getFilePublicUrl, uploadToBucket } from './uploads'
+import { SynthesizeSpeechInput } from 'aws-sdk/clients/polly'

 export interface TextToSpeechInput {
  id: string
@ -9,6 +9,7 @@ export interface TextToSpeechInput {
  voice?: string
  textType?: 'text' | 'ssml'
  engine?: 'standard' | 'neural'
+  languageCode?: string
 }

 export interface TextToSpeechOutput {
@ -24,13 +25,14 @@ const client = new AWS.Polly()
 export const createAudio = async (
  input: TextToSpeechInput
 ): Promise<Buffer> => {
-  const { text, voice, textType, engine } = input
+  const { text, voice, textType, engine, languageCode } = input
  const params: SynthesizeSpeechInput = {
    OutputFormat: 'ogg_vorbis',
    Text: text,
    TextType: textType || 'text',
    VoiceId: voice || 'Joanna',
    Engine: engine || 'neural',
+    LanguageCode: languageCode || 'en-US',
  }
  try {
    const data = await client.synthesizeSpeech(params).promise()
@ -44,14 +46,15 @@ export const createAudio = async (
 export const createSpeechMarks = async (
  input: TextToSpeechInput
 ): Promise<string> => {
-  const { text, voice, textType, engine } = input
+  const { text, voice, textType, engine, languageCode } = input
  const params: SynthesizeSpeechInput = {
    OutputFormat: 'json',
    Text: text,
    TextType: textType || 'text',
    VoiceId: voice || 'Joanna',
    Engine: engine || 'neural',
-    SpeechMarkTypes: ['sentence'],
+    SpeechMarkTypes: ['word'],
+    LanguageCode: languageCode || 'en-US',
  }
  try {
    const data = await client.synthesizeSpeech(params).promise()
--- a/packages/db/migrations/0093.do.speech.sql
+++ b/packages/db/migrations/0093.do.speech.sql
@ -0,0 +1,28 @@
+-- Type: DO
+-- Name: speech
+-- Description: Add speech table containing text to speech audio_url and speech_marks
+
+BEGIN;
+
+CREATE TABLE omnivore.speech (
+    id uuid PRIMARY KEY DEFAULT uuid_generate_v1mc(),
+    user_id uuid NOT NULL REFERENCES omnivore.user ON DELETE CASCADE,
+    elastic_page_id TEXT NOT NULL,
+    voice text,
+    audio_url text NOT NULL,
+    speech_marks text NOT NULL,
+    created_at timestamptz NOT NULL DEFAULT current_timestamp,
+    updated_at timestamptz NOT NULL DEFAULT current_timestamp
+);
+
+CREATE TRIGGER speech_modtime BEFORE UPDATE ON omnivore.speech FOR EACH ROW EXECUTE PROCEDURE update_updated_at_column();
+
+-- No permission to delete on the speech table, only superuser can delete.
+GRANT SELECT, INSERT, UPDATE ON omnivore.speech TO omnivore_user;
+
+ALTER TABLE omnivore.user_personalization
+    ADD COLUMN speech_voice TEXT,
+    ADD COLUMN speech_rate INTEGER,
+    ADD COLUMN speech_volume INTEGER;
+
+COMMIT;
--- a/packages/db/migrations/0093.undo.speech.sql
+++ b/packages/db/migrations/0093.undo.speech.sql
@ -0,0 +1,14 @@
+-- Type: UNDO
+-- Name: speech
+-- Description: Add speech table containing text to speech audio_url and speech_marks
+
+BEGIN;
+
+DROP TABLE IF EXISTS omnivore.speech;
+
+ALTER TABLE omnivore.user_personalization
+    DROP COLUMN IF EXISTS speech_voice,
+    DROP COLUMN IF EXISTS speech_rate,
+    DROP COLUMN IF EXISTS speech_volume;
+
+COMMIT;