Enqueue text to speech tasks
This commit is contained in:
38
packages/api/src/entity/speech.ts
Normal file
38
packages/api/src/entity/speech.ts
Normal file
@ -0,0 +1,38 @@
|
||||
import {
|
||||
Column,
|
||||
CreateDateColumn,
|
||||
Entity,
|
||||
JoinColumn,
|
||||
ManyToOne,
|
||||
PrimaryGeneratedColumn,
|
||||
UpdateDateColumn,
|
||||
} from 'typeorm'
|
||||
import { User } from './user'
|
||||
|
||||
@Entity({ name: 'speech' })
|
||||
export class Speech {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id!: string
|
||||
|
||||
@ManyToOne(() => User, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'user_id' })
|
||||
user!: User
|
||||
|
||||
@Column('text')
|
||||
elasticPageId!: string
|
||||
|
||||
@Column('text')
|
||||
audioUrl!: string
|
||||
|
||||
@Column('text')
|
||||
speechMarks!: string
|
||||
|
||||
@Column('text')
|
||||
voiceId!: string
|
||||
|
||||
@CreateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
|
||||
createdAt!: Date
|
||||
|
||||
@UpdateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
|
||||
updatedAt!: Date
|
||||
}
|
||||
53
packages/api/src/entity/user_personalization.ts
Normal file
53
packages/api/src/entity/user_personalization.ts
Normal file
@ -0,0 +1,53 @@
|
||||
import {
|
||||
Column,
|
||||
CreateDateColumn,
|
||||
Entity,
|
||||
JoinColumn,
|
||||
OneToOne,
|
||||
PrimaryGeneratedColumn,
|
||||
UpdateDateColumn,
|
||||
} from 'typeorm'
|
||||
import { User } from './user'
|
||||
|
||||
@Entity({ name: 'user_personalization' })
|
||||
export class UserPersonalization {
|
||||
@PrimaryGeneratedColumn('uuid')
|
||||
id!: string
|
||||
|
||||
@OneToOne(() => User, { onDelete: 'CASCADE' })
|
||||
@JoinColumn({ name: 'user_id' })
|
||||
user!: User
|
||||
|
||||
@Column('text', { nullable: true })
|
||||
fontFamily?: string
|
||||
|
||||
@Column('integer', { nullable: true })
|
||||
fontSize?: number
|
||||
|
||||
@Column('text', { nullable: true })
|
||||
margin?: number
|
||||
|
||||
@Column('text', { nullable: true })
|
||||
theme?: string
|
||||
|
||||
@Column('text', { nullable: true })
|
||||
libraryLayoutType?: string
|
||||
|
||||
@Column('text', { nullable: true })
|
||||
librarySortOrder?: string
|
||||
|
||||
@Column('text', { nullable: true })
|
||||
speechVoice?: string
|
||||
|
||||
@Column('integer', { nullable: true })
|
||||
speechRate?: number
|
||||
|
||||
@Column('integer', { nullable: true })
|
||||
speechVolume?: number
|
||||
|
||||
@CreateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
|
||||
createdAt!: Date
|
||||
|
||||
@UpdateDateColumn({ default: () => 'CURRENT_TIMESTAMP' })
|
||||
updatedAt!: Date
|
||||
}
|
||||
@ -63,6 +63,7 @@ interface BackendEnv {
|
||||
contentFetchGCFUrl: string
|
||||
reminderTaskHanderUrl: string
|
||||
integrationTaskHandlerUrl: string
|
||||
textToSpeechTaskHandlerUrl: string
|
||||
}
|
||||
fileUpload: {
|
||||
gcsUploadBucket: string
|
||||
@ -138,6 +139,7 @@ const nullableEnvVars = [
|
||||
'SENDGRID_INSTALLATION_TEMPLATE_ID',
|
||||
'READWISE_API_URL',
|
||||
'INTEGRATION_TASK_HANDLER_URL',
|
||||
'TEXT_TO_SPEECH_TASK_HANDLER_URL',
|
||||
] // Allow some vars to be null/empty
|
||||
|
||||
/* If not in GAE and Prod/QA/Demo env (f.e. on localhost/dev env), allow following env vars to be null */
|
||||
@ -221,6 +223,7 @@ export function getEnv(): BackendEnv {
|
||||
contentFetchGCFUrl: parse('CONTENT_FETCH_GCF_URL'),
|
||||
reminderTaskHanderUrl: parse('REMINDER_TASK_HANDLER_URL'),
|
||||
integrationTaskHandlerUrl: parse('INTEGRATION_TASK_HANDLER_URL'),
|
||||
textToSpeechTaskHandlerUrl: parse('TEXT_TO_SPEECH_TASK_HANDLER_URL'),
|
||||
}
|
||||
const imageProxy = {
|
||||
url: parse('IMAGE_PROXY_URL'),
|
||||
|
||||
@ -325,4 +325,35 @@ export const enqueueSyncWithIntegration = async (
|
||||
return createdTasks[0].name
|
||||
}
|
||||
|
||||
export const enqueueTextToSpeech = async (
|
||||
userId: string,
|
||||
pageId: string
|
||||
): Promise<string> => {
|
||||
const { GOOGLE_CLOUD_PROJECT } = process.env
|
||||
const payload = {
|
||||
userId,
|
||||
pageId,
|
||||
}
|
||||
|
||||
// If there is no Google Cloud Project Id exposed, it means that we are in local environment
|
||||
if (env.dev.isLocal || !GOOGLE_CLOUD_PROJECT) {
|
||||
return nanoid()
|
||||
}
|
||||
|
||||
const createdTasks = await createHttpTaskWithToken({
|
||||
project: GOOGLE_CLOUD_PROJECT,
|
||||
payload,
|
||||
taskHandlerUrl: env.queue.textToSpeechTaskHandlerUrl,
|
||||
})
|
||||
|
||||
if (!createdTasks || !createdTasks[0].name) {
|
||||
logger.error(`Unable to get the name of the task`, {
|
||||
payload,
|
||||
createdTasks,
|
||||
})
|
||||
throw new CreateTaskError(`Unable to get the name of the task`)
|
||||
}
|
||||
return createdTasks[0].name
|
||||
}
|
||||
|
||||
export default createHttpTaskWithToken
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import * as AWS from 'aws-sdk'
|
||||
import { buildLogger } from './logger'
|
||||
import { SynthesizeSpeechInput } from 'aws-sdk/clients/polly'
|
||||
import { getFilePublicUrl, uploadToBucket } from './uploads'
|
||||
import { SynthesizeSpeechInput } from 'aws-sdk/clients/polly'
|
||||
|
||||
export interface TextToSpeechInput {
|
||||
id: string
|
||||
@ -9,6 +9,7 @@ export interface TextToSpeechInput {
|
||||
voice?: string
|
||||
textType?: 'text' | 'ssml'
|
||||
engine?: 'standard' | 'neural'
|
||||
languageCode?: string
|
||||
}
|
||||
|
||||
export interface TextToSpeechOutput {
|
||||
@ -24,13 +25,14 @@ const client = new AWS.Polly()
|
||||
export const createAudio = async (
|
||||
input: TextToSpeechInput
|
||||
): Promise<Buffer> => {
|
||||
const { text, voice, textType, engine } = input
|
||||
const { text, voice, textType, engine, languageCode } = input
|
||||
const params: SynthesizeSpeechInput = {
|
||||
OutputFormat: 'ogg_vorbis',
|
||||
Text: text,
|
||||
TextType: textType || 'text',
|
||||
VoiceId: voice || 'Joanna',
|
||||
Engine: engine || 'neural',
|
||||
LanguageCode: languageCode || 'en-US',
|
||||
}
|
||||
try {
|
||||
const data = await client.synthesizeSpeech(params).promise()
|
||||
@ -44,14 +46,15 @@ export const createAudio = async (
|
||||
export const createSpeechMarks = async (
|
||||
input: TextToSpeechInput
|
||||
): Promise<string> => {
|
||||
const { text, voice, textType, engine } = input
|
||||
const { text, voice, textType, engine, languageCode } = input
|
||||
const params: SynthesizeSpeechInput = {
|
||||
OutputFormat: 'json',
|
||||
Text: text,
|
||||
TextType: textType || 'text',
|
||||
VoiceId: voice || 'Joanna',
|
||||
Engine: engine || 'neural',
|
||||
SpeechMarkTypes: ['sentence'],
|
||||
SpeechMarkTypes: ['word'],
|
||||
LanguageCode: languageCode || 'en-US',
|
||||
}
|
||||
try {
|
||||
const data = await client.synthesizeSpeech(params).promise()
|
||||
|
||||
28
packages/db/migrations/0093.do.speech.sql
Executable file
28
packages/db/migrations/0093.do.speech.sql
Executable file
@ -0,0 +1,28 @@
|
||||
-- Type: DO
|
||||
-- Name: speech
|
||||
-- Description: Add speech table containing text to speech audio_url and speech_marks
|
||||
|
||||
BEGIN;
|
||||
|
||||
CREATE TABLE omnivore.speech (
|
||||
id uuid PRIMARY KEY DEFAULT uuid_generate_v1mc(),
|
||||
user_id uuid NOT NULL REFERENCES omnivore.user ON DELETE CASCADE,
|
||||
elastic_page_id TEXT NOT NULL,
|
||||
voice text,
|
||||
audio_url text NOT NULL,
|
||||
speech_marks text NOT NULL,
|
||||
created_at timestamptz NOT NULL DEFAULT current_timestamp,
|
||||
updated_at timestamptz NOT NULL DEFAULT current_timestamp
|
||||
);
|
||||
|
||||
CREATE TRIGGER speech_modtime BEFORE UPDATE ON omnivore.speech FOR EACH ROW EXECUTE PROCEDURE update_updated_at_column();
|
||||
|
||||
-- No permission to delete on the speech table, only superuser can delete.
|
||||
GRANT SELECT, INSERT, UPDATE ON omnivore.speech TO omnivore_user;
|
||||
|
||||
ALTER TABLE omnivore.user_personalization
|
||||
ADD COLUMN speech_voice TEXT,
|
||||
ADD COLUMN speech_rate INTEGER,
|
||||
ADD COLUMN speech_volume INTEGER;
|
||||
|
||||
COMMIT;
|
||||
14
packages/db/migrations/0093.undo.speech.sql
Executable file
14
packages/db/migrations/0093.undo.speech.sql
Executable file
@ -0,0 +1,14 @@
|
||||
-- Type: UNDO
|
||||
-- Name: speech
|
||||
-- Description: Add speech table containing text to speech audio_url and speech_marks
|
||||
|
||||
BEGIN;
|
||||
|
||||
DROP TABLE IF EXISTS omnivore.speech;
|
||||
|
||||
ALTER TABLE omnivore.user_personalization
|
||||
DROP COLUMN IF EXISTS speech_voice,
|
||||
DROP COLUMN IF EXISTS speech_rate,
|
||||
DROP COLUMN IF EXISTS speech_volume;
|
||||
|
||||
COMMIT;
|
||||
Reference in New Issue
Block a user