Better matching for youtube video URLs
This commit is contained in:
@ -78,7 +78,6 @@
|
||||
"image-size": "^1.0.2",
|
||||
"intercom-client": "^3.1.4",
|
||||
"ioredis": "^5.3.2",
|
||||
"redis": "^4.6.13",
|
||||
"jsonwebtoken": "^8.5.1",
|
||||
"jwks-rsa": "^2.0.3",
|
||||
"langchain": "^0.1.21",
|
||||
@ -95,6 +94,7 @@
|
||||
"posthog-node": "^3.6.3",
|
||||
"private-ip": "^2.3.3",
|
||||
"prom-client": "^15.1.0",
|
||||
"redis": "^4.6.13",
|
||||
"rss-parser": "^3.13.0",
|
||||
"sanitize-html": "^2.3.2",
|
||||
"sax": "^1.3.0",
|
||||
@ -106,6 +106,7 @@
|
||||
"typeorm": "^0.3.4",
|
||||
"typeorm-naming-strategies": "^4.1.0",
|
||||
"underscore": "^1.13.6",
|
||||
"url-pattern": "^1.0.3",
|
||||
"urlsafe-base64": "^1.0.0",
|
||||
"uuid": "^8.3.1",
|
||||
"voca": "^1.4.0",
|
||||
@ -167,4 +168,4 @@
|
||||
"volta": {
|
||||
"extends": "../../package.json"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -14,6 +14,7 @@ import { enqueueProcessYouTubeTranscript } from '../utils/createTask'
|
||||
import { stringToHash } from '../utils/helpers'
|
||||
import { logger } from '../utils/logger'
|
||||
import { parsePreparedContent } from '../utils/parser'
|
||||
import { videoIdFromYouTubeUrl } from '../utils/youtube'
|
||||
|
||||
export interface ProcessYouTubeVideoJobData {
|
||||
userId: string
|
||||
@ -334,7 +335,7 @@ export const processYouTubeVideo = async (
|
||||
}
|
||||
|
||||
videoURL = new URL(libraryItem.originalUrl)
|
||||
const videoId = videoURL.searchParams.get('v')
|
||||
const videoId = videoIdFromYouTubeUrl(libraryItem.originalUrl)
|
||||
|
||||
if (!videoId) {
|
||||
logger.warning('no video id for supplied youtube url', {
|
||||
|
||||
@ -10,23 +10,12 @@ import {
|
||||
enqueueWebhookJob,
|
||||
} from './utils/createTask'
|
||||
import { buildLogger } from './utils/logger'
|
||||
import { isYouTubeVideoURL } from './utils/youtube'
|
||||
|
||||
const logger = buildLogger('pubsub')
|
||||
|
||||
const client = new PubSub()
|
||||
|
||||
const isYouTubeVideoURL = (url: string | undefined): boolean => {
|
||||
if (!url) {
|
||||
return false
|
||||
}
|
||||
const u = new URL(url)
|
||||
if (!u.host.endsWith('youtube.com') && !u.host.endsWith('youtu.be')) {
|
||||
return false
|
||||
}
|
||||
const videoId = u.searchParams.get('v')
|
||||
return videoId != null
|
||||
}
|
||||
|
||||
export const createPubSubClient = (): PubsubClient => {
|
||||
const publish = (topicName: string, msg: Buffer): Promise<void> => {
|
||||
if (env.dev.isLocal) {
|
||||
@ -93,11 +82,11 @@ export const createPubSubClient = (): PubsubClient => {
|
||||
// })
|
||||
// }
|
||||
|
||||
const isYoutubeVideo = (data: any): data is { originalUrl: string } => {
|
||||
const isItemWithURL = (data: any): data is { originalUrl: string } => {
|
||||
return 'originalUrl' in data
|
||||
}
|
||||
|
||||
if (isYoutubeVideo(data) && isYouTubeVideoURL(data['originalUrl'])) {
|
||||
if (isItemWithURL(data) && isYouTubeVideoURL(data['originalUrl'])) {
|
||||
await enqueueProcessYouTubeVideo({
|
||||
userId,
|
||||
libraryItemId,
|
||||
|
||||
61
packages/api/src/utils/youtube.ts
Normal file
61
packages/api/src/utils/youtube.ts
Normal file
@ -0,0 +1,61 @@
|
||||
import UrlPattern from 'url-pattern'
|
||||
|
||||
export const videoIdFromYouTubeUrl = (
|
||||
urlString: string | undefined
|
||||
): string | undefined => {
|
||||
if (!urlString) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const url = new URL(urlString)
|
||||
if (
|
||||
!url.hostname.endsWith('youtube.com') &&
|
||||
!url.hostname.endsWith('youtu.be')
|
||||
) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const videoId = url.searchParams.get('v')
|
||||
if (videoId) {
|
||||
return videoId || undefined
|
||||
}
|
||||
|
||||
const parsed = (() => {
|
||||
const parsedUrl = new URL(url)
|
||||
parsedUrl.search = ''
|
||||
return parsedUrl.toString()
|
||||
})()
|
||||
|
||||
const shortVideo = new UrlPattern('http(s)\\://(www.)youtu.be/:videoId')
|
||||
const directVideo = new UrlPattern(
|
||||
'(http(s)\\://)(www.)youtube.com/v/:videoId'
|
||||
)
|
||||
const embedVideo = new UrlPattern(
|
||||
'(http(s)\\://)(www.)youtube.com/embed/:videoId'
|
||||
)
|
||||
|
||||
let params = shortVideo.match(parsed) as Record<string, string>
|
||||
if (params && params.videoId) {
|
||||
return params.videoId
|
||||
}
|
||||
|
||||
params = directVideo.match(parsed) as Record<string, string>
|
||||
if (params && params.videoId) {
|
||||
return params.videoId
|
||||
}
|
||||
|
||||
params = embedVideo.match(parsed) as Record<string, string>
|
||||
if (params && params.videoId) {
|
||||
return params.videoId
|
||||
}
|
||||
|
||||
return undefined
|
||||
}
|
||||
|
||||
export const isYouTubeVideoURL = (url: string | undefined): boolean => {
|
||||
if (!url) {
|
||||
return false
|
||||
}
|
||||
const videoId = videoIdFromYouTubeUrl(url)
|
||||
return videoId != null
|
||||
}
|
||||
86
packages/api/test/utils/youtube.test.ts
Normal file
86
packages/api/test/utils/youtube.test.ts
Normal file
@ -0,0 +1,86 @@
|
||||
import 'mocha'
|
||||
import { expect } from 'chai'
|
||||
import {
|
||||
isYouTubeVideoURL,
|
||||
videoIdFromYouTubeUrl,
|
||||
} from '../../src/utils/youtube'
|
||||
|
||||
describe('videoIdFromYouTubeUrl', () => {
|
||||
it('Returns video id for video with playlist id', () => {
|
||||
const result = videoIdFromYouTubeUrl(
|
||||
'https://www.youtube.com/watch?v=kfchvCyHmsc&list=PLDyKn8uKYtRalFdBWtv_EjDtUo2UEbu-a'
|
||||
)
|
||||
expect(result).to.eq('kfchvCyHmsc')
|
||||
})
|
||||
|
||||
it('Returns video id for direct url', () => {
|
||||
const result = videoIdFromYouTubeUrl(
|
||||
'https://www.youtube.com/v/vLfAtCbE_Jc'
|
||||
)
|
||||
expect(result).to.eq('vLfAtCbE_Jc')
|
||||
})
|
||||
|
||||
it('Returns video id for standard url', () => {
|
||||
const result = videoIdFromYouTubeUrl(
|
||||
'https://www.youtube.com/watch?v=vLfAtCbE_Jc'
|
||||
)
|
||||
expect(result).to.eq('vLfAtCbE_Jc')
|
||||
})
|
||||
|
||||
it('Returns video id for short url', () => {
|
||||
const result = videoIdFromYouTubeUrl('https://youtu.be/vLfAtCbE_Jc')
|
||||
expect(result).to.eq('vLfAtCbE_Jc')
|
||||
})
|
||||
|
||||
it('Returns video id for short url with share id', () => {
|
||||
const result = videoIdFromYouTubeUrl(
|
||||
'https://youtu.be/iZxR7rPdvuQ?si=ad73DTmmXL_lbn31'
|
||||
)
|
||||
expect(result).to.eq('iZxR7rPdvuQ')
|
||||
})
|
||||
|
||||
it('Returns video id for embed url', () => {
|
||||
const result = videoIdFromYouTubeUrl(
|
||||
'https://www.youtube.com/embed/vLfAtCbE_Jc'
|
||||
)
|
||||
expect(result).to.eq('vLfAtCbE_Jc')
|
||||
})
|
||||
|
||||
it('Returns undefined for non-youtube url', () => {
|
||||
const result = videoIdFromYouTubeUrl(
|
||||
'https://omnivore.app/iZxR7rPdvuQ?si=ad73DTmmXL_lbn31'
|
||||
)
|
||||
expect(result).to.eq(undefined)
|
||||
})
|
||||
|
||||
it('Returns undefined for non-youtube short url', () => {
|
||||
const result = videoIdFromYouTubeUrl('https://omnivore.app/?v=iZxR7rPdvuQ')
|
||||
expect(result).to.eq(undefined)
|
||||
})
|
||||
|
||||
it('Returns video id when port is added', () => {
|
||||
const result = videoIdFromYouTubeUrl(
|
||||
'https://www.youtube.com:443/watch?v=kfchvCyHmsc'
|
||||
)
|
||||
expect(result).to.eq('kfchvCyHmsc')
|
||||
})
|
||||
})
|
||||
|
||||
describe('isYouTubeVideoURL', () => {
|
||||
it('Returns false for a shorts URL', () => {
|
||||
const result = isYouTubeVideoURL(
|
||||
'https://www.youtube.com/shorts/ZsQKYwXbo4s'
|
||||
)
|
||||
expect(result).to.eq(false)
|
||||
})
|
||||
it('Returns false for a non-youtube URL', () => {
|
||||
const result = isYouTubeVideoURL('https://omnivore.app/about')
|
||||
expect(result).to.eq(false)
|
||||
})
|
||||
it('Returns true for a video URL', () => {
|
||||
const result = isYouTubeVideoURL(
|
||||
'https://www.youtube.com/watch?v=p4YOXmm839c'
|
||||
)
|
||||
expect(result).to.eq(true)
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user