diff --git a/packages/content-handler/src/index.ts b/packages/content-handler/src/index.ts index 4e61552ca..2745ee8ba 100644 --- a/packages/content-handler/src/index.ts +++ b/packages/content-handler/src/index.ts @@ -1,39 +1,40 @@ -import { AppleNewsHandler } from './websites/apple-news-handler' -import { BloombergHandler } from './websites/bloomberg-handler' -import { DerstandardHandler } from './websites/derstandard-handler' -import { ImageHandler } from './websites/image-handler' -import { MediumHandler } from './websites/medium-handler' -import { PdfHandler } from './websites/pdf-handler' -import { ScrapingBeeHandler } from './websites/scrapingBee-handler' -import { TDotCoHandler } from './websites/t-dot-co-handler' -import { TwitterHandler } from './websites/twitter-handler' -import { YoutubeHandler } from './websites/youtube-handler' -import { WikipediaHandler } from './websites/wikipedia-handler' -import { GitHubHandler } from './websites/github-handler' +import { parseHTML } from 'linkedom' +import { Browser } from 'puppeteer-core' import { ContentHandler, NewsletterInput, NewsletterResult, PreHandleResult, } from './content-handler' -import { SubstackHandler } from './newsletters/substack-handler' import { AxiosHandler } from './newsletters/axios-handler' -import { GolangHandler } from './newsletters/golang-handler' -import { MorningBrewHandler } from './newsletters/morning-brew-handler' -import { BloombergNewsletterHandler } from './newsletters/bloomberg-newsletter-handler' import { BeehiivHandler } from './newsletters/beehiiv-handler' +import { BloombergNewsletterHandler } from './newsletters/bloomberg-newsletter-handler' import { ConvertkitHandler } from './newsletters/convertkit-handler' -import { RevueHandler } from './newsletters/revue-handler' -import { GhostHandler } from './newsletters/ghost-handler' -import { parseHTML } from 'linkedom' import { CooperPressHandler } from './newsletters/cooper-press-handler' -import { HeyWorldHandler } from './newsletters/hey-world-handler' -import { Browser } from 'puppeteer-core' -import { StackOverflowHandler } from './websites/stack-overflow-handler' -import { GenericHandler } from './newsletters/generic-handler' -import { EveryIoHandler } from './newsletters/every-io-handler' import { EnergyWorldHandler } from './newsletters/energy-world' +import { EveryIoHandler } from './newsletters/every-io-handler' +import { GenericHandler } from './newsletters/generic-handler' +import { GhostHandler } from './newsletters/ghost-handler' +import { GolangHandler } from './newsletters/golang-handler' +import { HeyWorldHandler } from './newsletters/hey-world-handler' import { IndiaTimesHandler } from './newsletters/india-times-handler' +import { MorningBrewHandler } from './newsletters/morning-brew-handler' +import { RevueHandler } from './newsletters/revue-handler' +import { SubstackHandler } from './newsletters/substack-handler' +import { AppleNewsHandler } from './websites/apple-news-handler' +import { BloombergHandler } from './websites/bloomberg-handler' +import { DerstandardHandler } from './websites/derstandard-handler' +import { GitHubHandler } from './websites/github-handler' +import { ImageHandler } from './websites/image-handler' +import { MediumHandler } from './websites/medium-handler' +import { PdfHandler } from './websites/pdf-handler' +import { PipedVideoHandler } from './websites/piped-video-handler' +import { ScrapingBeeHandler } from './websites/scrapingBee-handler' +import { StackOverflowHandler } from './websites/stack-overflow-handler' +import { TDotCoHandler } from './websites/t-dot-co-handler' +import { TwitterHandler } from './websites/twitter-handler' +import { WikipediaHandler } from './websites/wikipedia-handler' +import { YoutubeHandler } from './websites/youtube-handler' const validateUrlString = (url: string): boolean => { const u = new URL(url) @@ -73,6 +74,7 @@ const contentHandlers: ContentHandler[] = [ new SubstackHandler(), new StackOverflowHandler(), new EnergyWorldHandler(), + new PipedVideoHandler(), ] const newsletterHandlers: ContentHandler[] = [ diff --git a/packages/content-handler/src/websites/piped-video-handler.ts b/packages/content-handler/src/websites/piped-video-handler.ts new file mode 100644 index 000000000..ffa8c050b --- /dev/null +++ b/packages/content-handler/src/websites/piped-video-handler.ts @@ -0,0 +1,82 @@ +import axios from 'axios' +import _ from 'underscore' +import { ContentHandler, PreHandleResult } from '../content-handler' + +export class PipedVideoHandler extends ContentHandler { + // https://piped.video/watch?v={videoId} + PIPED_URL_MATCH = /^((?:https?:)?\/\/)?piped\.video\/watch\?v=[^&]+/ + + constructor() { + super() + this.name = 'Piped-video' + } + + getYoutubeVideoId = (url: string) => { + const u = new URL(url) + return u.searchParams.get('v') + } + + escapeTitle = (title: string) => { + return _.escape(title) + } + + shouldPreHandle(url: string): boolean { + return this.PIPED_URL_MATCH.test(url.toString()) + } + + async preHandle(url: string): Promise { + const videoId = this.getYoutubeVideoId(url) + if (!videoId) { + return {} + } + const baseUrl = 'https://api-piped.mha.fi' + const apiUrl = `${baseUrl}/streams/${videoId}` + const metadata = (await axios.get(apiUrl)).data as { + title: string + thumbnailUrl: string + uploader: string + uploaderUrl: string + uploadDate: string + description: string + videoStreams: { + width: number + height: number + url: string + }[] + } + const videoStreams = metadata.videoStreams + if (!videoStreams || videoStreams.length == 0) { + return {} + } + const videoStream = videoStreams[0] + const src = `https://piped.mha.fi/embed/${videoId}` + // escape html entities in title + const title = metadata.title + const escapedTitle = this.escapeTitle(title) + const ratio = videoStream.width / videoStream.height + const thumbnail = metadata.thumbnailUrl + const height = 350 + const width = height * ratio + const authorName = _.escape(metadata.uploader) + const content = ` + + + ${escapedTitle} + + + + + + + + + + +

${escapedTitle}

+ + + ` + + return { content, title } + } +} diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index bb5b14a91..1f4ca841b 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -184,7 +184,7 @@ Readability.prototype = { publishedDate: /published|modified|created|updated/i, replaceFonts: /<(\/?)font[^>]*>/gi, normalize: /\s{2,}/g, - videos: /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq|cdnapisec\.kaltura)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i, + videos: /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq|cdnapisec\.kaltura)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv|piped\.mha\.fi)/i, shareElements: /(\b|_)(share|sharedaddy|post-tags)(\b|_)/i, nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i, prevLink: /(prev|earl|old|new|<|«)/i, diff --git a/pkg/extension/src/scripts/common.js b/pkg/extension/src/scripts/common.js index 36303f828..763163a13 100644 --- a/pkg/extension/src/scripts/common.js +++ b/pkg/extension/src/scripts/common.js @@ -80,7 +80,7 @@ function handleBackendUrl(url) { const FORCE_CONTENT_FETCH_URLS = [ // twitter status url regex /twitter\.com\/(?:#!\/)?(\w+)\/status(?:es)?\/(\d+)(?:\/.*)?/, - /^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w-]+\?v=|embed\/|v\/)?)([\w-]+)(\S+)?$/, + /^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu\.be|piped\.video))(\/(?:[\w-]+\?v=|embed\/|v\/)?)([\w-]+)(\S+)?$/, ] return FORCE_CONTENT_FETCH_URLS.some((regex) => regex.test(url)) } catch (error) {