diff --git a/packages/api/package.json b/packages/api/package.json index 089294a75..40b70a0e3 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -116,7 +116,7 @@ "voca": "^1.4.0", "winston": "^3.3.3", "yaml": "^2.4.1", - "youtubei": "1.4.0" + "youtubei": "^1.5.4" }, "devDependencies": { "@istanbuljs/nyc-config-typescript": "^1.0.2", diff --git a/packages/api/src/jobs/process-youtube-video.ts b/packages/api/src/jobs/process-youtube-video.ts index 339657203..97d4fb0d5 100644 --- a/packages/api/src/jobs/process-youtube-video.ts +++ b/packages/api/src/jobs/process-youtube-video.ts @@ -315,6 +315,20 @@ export interface ProcessYouTubeTranscriptJobData { libraryItemId: string } +const sanitizeTranscript = ( + transcript: TranscriptProperties[] +): TranscriptProperties[] => { + return transcript.map((item) => { + return { + // Youtubei library uses comma and space to separate words in the transcript + // We need to remove the comma to avoid breaking the transcript + text: item.text.replace(/,/g, ''), + start: item.start, + duration: item.duration, + } + }) +} + export const processYouTubeTranscript = async ( jobData: ProcessYouTubeTranscriptJobData ) => { @@ -350,10 +364,12 @@ export const processYouTubeTranscript = async ( let transcript: TranscriptProperties[] | undefined = undefined if ('getTranscript' in video) { - transcript = await video.getTranscript() + transcript = await video.captions?.get() } if (transcript) { + transcript = sanitizeTranscript(transcript) + if (chapters) { transcript = addTranscriptChapters(chapters, transcript) } diff --git a/yarn.lock b/yarn.lock index 50342c7cf..fd5a95c26 100644 --- a/yarn.lock +++ b/yarn.lock @@ -26012,6 +26012,24 @@ protobufjs@7.2.4: "@types/node" ">=13.7.0" long "^5.0.0" +protobufjs@7.2.6: + version "7.2.6" + resolved "https://registry.yarnpkg.com/protobufjs/-/protobufjs-7.2.6.tgz#4a0ccd79eb292717aacf07530a07e0ed20278215" + integrity sha512-dgJaEDDL6x8ASUZ1YqWciTRrdOuYNzoOf27oHNfdyvKqHr5i0FV7FSLU+aIeFjyFgVxrpTOtQUi0BLLBymZaBw== + dependencies: + "@protobufjs/aspromise" "^1.1.2" + "@protobufjs/base64" "^1.1.2" + "@protobufjs/codegen" "^2.0.4" + "@protobufjs/eventemitter" "^1.1.0" + "@protobufjs/fetch" "^1.1.0" + "@protobufjs/float" "^1.0.2" + "@protobufjs/inquire" "^1.1.0" + "@protobufjs/path" "^1.1.2" + "@protobufjs/pool" "^1.1.0" + "@protobufjs/utf8" "^1.1.0" + "@types/node" ">=13.7.0" + long "^5.0.0" + protobufjs@^6.11.3: version "6.11.4" resolved "https://registry.yarnpkg.com/protobufjs/-/protobufjs-6.11.4.tgz#29a412c38bf70d89e537b6d02d904a6f448173aa" @@ -32517,13 +32535,13 @@ yocto-queue@^1.0.0: resolved "https://registry.yarnpkg.com/yocto-queue/-/yocto-queue-1.0.0.tgz#7f816433fb2cbc511ec8bf7d263c3b58a1a3c251" integrity sha512-9bnSc/HEW2uRy67wc+T8UwauLuPJVn28jb+GtJY16iiKWyvmYJRXVT4UamsAEGQfPohgr2q4Tq0sQbQlxTfi1g== -youtubei@1.4.0: - version "1.4.0" - resolved "https://registry.yarnpkg.com/youtubei/-/youtubei-1.4.0.tgz#a853080a292ab1a002c2658929cb8edd9e756fda" - integrity sha512-n3/f+46Q91p/Rfso73g9IHtmHhpW7z6ML5mELdeYY0BXsh757KFDvTT91e7RCzUblrSnLiKGMyO3UM4hIUJFsw== +youtubei@^1.5.4: + version "1.5.4" + resolved "https://registry.yarnpkg.com/youtubei/-/youtubei-1.5.4.tgz#2f1cd42f5f8dd614a60ab50bd5fabb8a15b4cd0f" + integrity sha512-TT99h0W6CUwHTxj6Q5xOT1w3v6pEDPw3xXQvTQ3tZ4Ez1VtZ20CGz5WSOyHjx7iXT8hDetHMZ1OQp64etGdI8Q== dependencies: node-fetch "2.6.7" - protobufjs "7.2.4" + protobufjs "7.2.6" yup@^0.31.0: version "0.31.1"