Merge pull request #4274 from omnivore-app/fix/youtube-handler

fix: special handler for youtube shorts
This commit is contained in:
Hongbo Wu
2024-08-18 12:38:42 +08:00
committed by GitHub
3 changed files with 64 additions and 35 deletions

View File

@ -165,8 +165,8 @@ const incrementContentFetchFailure = async (
const key = failureRedisKey(domain)
try {
const result = await redisClient.incr(key)
// expire the key in 1 day
await redisClient.expire(key, 24 * 60 * 60)
// expire the key in 1 hour
await redisClient.expire(key, 60 * 60)
return result
} catch (error) {

View File

@ -1,21 +1,13 @@
import { ContentHandler, PreHandleResult } from '../content-handler'
import axios from 'axios'
import _ from 'underscore'
import { ContentHandler, PreHandleResult } from '../content-handler'
const YOUTUBE_URL_MATCH =
/^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w-]+\?v=|embed\/|v\/)?)([\w-]+)(\S+)?$/
/^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w-]+\?v=|embed\/|v\/|shorts\/|playlist\?list=)?)([\w-]+)(\S+)?$/
export const getYoutubeVideoId = (url: string) => {
const u = new URL(url)
const videoId = u.searchParams.get('v')
if (!videoId) {
const match = url.toString().match(YOUTUBE_URL_MATCH)
if (match === null || match.length < 6 || !match[5]) {
return undefined
}
return match[5]
}
return videoId
return u.searchParams.get('v')
}
export const getYoutubePlaylistId = (url: string) => {
@ -23,6 +15,40 @@ export const getYoutubePlaylistId = (url: string) => {
return u.searchParams.get('list')
}
export const getEmbedData = (url: string) => {
const BaseUrl = 'https://www.youtube.com'
const embedBaseUrl = 'https://www.youtube.com/embed'
const match = url.match(YOUTUBE_URL_MATCH)
if (match === null || match.length < 6) {
console.error('Invalid youtube url', url)
throw new Error(`Invalid youtube url: ${url}`)
}
const playlistId = getYoutubePlaylistId(url)
if (playlistId) {
return {
src: `${embedBaseUrl}/videoseries?list=${playlistId}`,
url: `${BaseUrl}/playlist?list=${playlistId}`,
}
}
const type = match[4]
const id = match[5]
if (type === '/shorts/') {
return {
src: `${embedBaseUrl}/${id}`,
url: `${BaseUrl}/shorts/${id}`,
}
}
const videoId = getYoutubeVideoId(url) || id
return {
src: `${embedBaseUrl}/${videoId}`,
url: `${BaseUrl}/watch?v=${videoId}`,
}
}
export const escapeTitle = (title: string) => {
return _.escape(title)
}
@ -38,21 +64,15 @@ export class YoutubeHandler extends ContentHandler {
}
async preHandle(url: string): Promise<PreHandleResult> {
const BaseUrl = 'https://www.youtube.com'
const embedBaseUrl = 'https://www.youtube.com/embed'
let urlToEncode: string
let src: string
const playlistId = getYoutubePlaylistId(url)
if (playlistId) {
urlToEncode = `${BaseUrl}/playlist?list=${playlistId}`
src = `${embedBaseUrl}/videoseries?list=${playlistId}`
} else {
const videoId = getYoutubeVideoId(url)
if (!videoId) {
return {}
}
urlToEncode = `${BaseUrl}/watch?v=${videoId}`
src = `${embedBaseUrl}/${videoId}`
let src, urlToEncode
try {
const embedData = getEmbedData(url)
src = embedData.src
urlToEncode = embedData.url
} catch (error) {
console.error('Error getting embed data', error)
return {}
}
const oembedUrl =

View File

@ -1,6 +1,21 @@
import { expect } from "chai";
import "mocha";
import { escapeTitle, getYoutubePlaylistId, getYoutubeVideoId } from "../src/websites/youtube-handler";
import {
escapeTitle,
getEmbedData,
getYoutubePlaylistId,
getYoutubeVideoId,
} from '../src/websites/youtube-handler'
describe('getEmbedData', () => {
expect('https://www.youtube.com/embed/vFD2gu007dc').to.eq(
getEmbedData('https://youtu.be/vFD2gu007dc').src
)
expect('https://www.youtube.com/embed/cg9b4RC87LI').to.eq(
getEmbedData('https://youtu.be/cg9b4RC87LI?t=116').src
)
})
describe('getYoutubeVideoId', () => {
it('should parse video id out of a URL', async () => {
@ -12,15 +27,9 @@ describe('getYoutubeVideoId', () => {
'https://www.youtube.com/watch?v=vFD2gu007dc&list=RDvFD2gu007dc&start_radio=1'
)
)
expect('vFD2gu007dc').to.eq(
getYoutubeVideoId('https://youtu.be/vFD2gu007dc')
)
expect('BMFVCnbRaV4').to.eq(
getYoutubeVideoId('https://youtube.com/watch?v=BMFVCnbRaV4&feature=share')
)
expect('cg9b4RC87LI').to.eq(
getYoutubeVideoId('https://youtu.be/cg9b4RC87LI?t=116')
)
})
})