From deff73953acff13a8bf4111c1d9d45465944a0f0 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 6 Apr 2023 16:30:52 +0800 Subject: [PATCH 1/4] Do not delete embeded iframe of piped video --- packages/readabilityjs/Readability.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index bb5b14a91..816f93637 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -184,7 +184,7 @@ Readability.prototype = { publishedDate: /published|modified|created|updated/i, replaceFonts: /<(\/?)font[^>]*>/gi, normalize: /\s{2,}/g, - videos: /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq|cdnapisec\.kaltura)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i, + videos: /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq|cdnapisec\.kaltura)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv|api-piped\.mha\.fi)/i, shareElements: /(\b|_)(share|sharedaddy|post-tags)(\b|_)/i, nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i, prevLink: /(prev|earl|old|new|<|«)/i, From 70f4068d893195a1417c5db5207d805e5021b16c Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 6 Apr 2023 16:31:06 +0800 Subject: [PATCH 2/4] Add handler for piped video --- packages/content-handler/src/index.ts | 50 +++++------ .../src/websites/piped-video-handler.ts | 82 +++++++++++++++++++ 2 files changed, 108 insertions(+), 24 deletions(-) create mode 100644 packages/content-handler/src/websites/piped-video-handler.ts diff --git a/packages/content-handler/src/index.ts b/packages/content-handler/src/index.ts index 4e61552ca..2745ee8ba 100644 --- a/packages/content-handler/src/index.ts +++ b/packages/content-handler/src/index.ts @@ -1,39 +1,40 @@ -import { AppleNewsHandler } from './websites/apple-news-handler' -import { BloombergHandler } from './websites/bloomberg-handler' -import { DerstandardHandler } from './websites/derstandard-handler' -import { ImageHandler } from './websites/image-handler' -import { MediumHandler } from './websites/medium-handler' -import { PdfHandler } from './websites/pdf-handler' -import { ScrapingBeeHandler } from './websites/scrapingBee-handler' -import { TDotCoHandler } from './websites/t-dot-co-handler' -import { TwitterHandler } from './websites/twitter-handler' -import { YoutubeHandler } from './websites/youtube-handler' -import { WikipediaHandler } from './websites/wikipedia-handler' -import { GitHubHandler } from './websites/github-handler' +import { parseHTML } from 'linkedom' +import { Browser } from 'puppeteer-core' import { ContentHandler, NewsletterInput, NewsletterResult, PreHandleResult, } from './content-handler' -import { SubstackHandler } from './newsletters/substack-handler' import { AxiosHandler } from './newsletters/axios-handler' -import { GolangHandler } from './newsletters/golang-handler' -import { MorningBrewHandler } from './newsletters/morning-brew-handler' -import { BloombergNewsletterHandler } from './newsletters/bloomberg-newsletter-handler' import { BeehiivHandler } from './newsletters/beehiiv-handler' +import { BloombergNewsletterHandler } from './newsletters/bloomberg-newsletter-handler' import { ConvertkitHandler } from './newsletters/convertkit-handler' -import { RevueHandler } from './newsletters/revue-handler' -import { GhostHandler } from './newsletters/ghost-handler' -import { parseHTML } from 'linkedom' import { CooperPressHandler } from './newsletters/cooper-press-handler' -import { HeyWorldHandler } from './newsletters/hey-world-handler' -import { Browser } from 'puppeteer-core' -import { StackOverflowHandler } from './websites/stack-overflow-handler' -import { GenericHandler } from './newsletters/generic-handler' -import { EveryIoHandler } from './newsletters/every-io-handler' import { EnergyWorldHandler } from './newsletters/energy-world' +import { EveryIoHandler } from './newsletters/every-io-handler' +import { GenericHandler } from './newsletters/generic-handler' +import { GhostHandler } from './newsletters/ghost-handler' +import { GolangHandler } from './newsletters/golang-handler' +import { HeyWorldHandler } from './newsletters/hey-world-handler' import { IndiaTimesHandler } from './newsletters/india-times-handler' +import { MorningBrewHandler } from './newsletters/morning-brew-handler' +import { RevueHandler } from './newsletters/revue-handler' +import { SubstackHandler } from './newsletters/substack-handler' +import { AppleNewsHandler } from './websites/apple-news-handler' +import { BloombergHandler } from './websites/bloomberg-handler' +import { DerstandardHandler } from './websites/derstandard-handler' +import { GitHubHandler } from './websites/github-handler' +import { ImageHandler } from './websites/image-handler' +import { MediumHandler } from './websites/medium-handler' +import { PdfHandler } from './websites/pdf-handler' +import { PipedVideoHandler } from './websites/piped-video-handler' +import { ScrapingBeeHandler } from './websites/scrapingBee-handler' +import { StackOverflowHandler } from './websites/stack-overflow-handler' +import { TDotCoHandler } from './websites/t-dot-co-handler' +import { TwitterHandler } from './websites/twitter-handler' +import { WikipediaHandler } from './websites/wikipedia-handler' +import { YoutubeHandler } from './websites/youtube-handler' const validateUrlString = (url: string): boolean => { const u = new URL(url) @@ -73,6 +74,7 @@ const contentHandlers: ContentHandler[] = [ new SubstackHandler(), new StackOverflowHandler(), new EnergyWorldHandler(), + new PipedVideoHandler(), ] const newsletterHandlers: ContentHandler[] = [ diff --git a/packages/content-handler/src/websites/piped-video-handler.ts b/packages/content-handler/src/websites/piped-video-handler.ts new file mode 100644 index 000000000..bd19b0d6e --- /dev/null +++ b/packages/content-handler/src/websites/piped-video-handler.ts @@ -0,0 +1,82 @@ +import axios from 'axios' +import _ from 'underscore' +import { ContentHandler, PreHandleResult } from '../content-handler' + +export class PipedVideoHandler extends ContentHandler { + // https://piped.video/watch?v={videoId} + PIPED_URL_MATCH = /^((?:https?:)?\/\/)?piped\.video\/watch\?v=[^&]+/ + + constructor() { + super() + this.name = 'Piped-video' + } + + getYoutubeVideoId = (url: string) => { + const u = new URL(url) + return u.searchParams.get('v') + } + + escapeTitle = (title: string) => { + return _.escape(title) + } + + shouldPreHandle(url: string): boolean { + return this.PIPED_URL_MATCH.test(url.toString()) + } + + async preHandle(url: string): Promise { + const videoId = this.getYoutubeVideoId(url) + if (!videoId) { + return {} + } + const baseUrl = 'https://api-piped.mha.fi' + const apiUrl = `${baseUrl}/streams/${videoId}` + const metadata = (await axios.get(apiUrl)).data as { + title: string + thumbnailUrl: string + uploader: string + uploaderUrl: string + uploadedDate: string + description: string + videoStreams: { + width: number + height: number + url: string + }[] + } + const videoStreams = metadata.videoStreams + if (!videoStreams || videoStreams.length == 0) { + return {} + } + const videoStream = videoStreams[0] + const src = `${baseUrl}/embed/${videoId}` + // escape html entities in title + const title = metadata.title + const escapedTitle = this.escapeTitle(title) + const ratio = videoStream.width / videoStream.height + const thumbnail = metadata.thumbnailUrl + const height = 350 + const width = height * ratio + const authorName = _.escape(metadata.uploader) + const content = ` + + + ${escapedTitle} + + + + + + + + + + +

${escapedTitle}

+ + + ` + + return { content, title } + } +} From bfbd80e8f1e5f2dd439aea237cdad970420f3600 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 6 Apr 2023 16:41:21 +0800 Subject: [PATCH 3/4] Get the published date --- packages/content-handler/src/websites/piped-video-handler.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/content-handler/src/websites/piped-video-handler.ts b/packages/content-handler/src/websites/piped-video-handler.ts index bd19b0d6e..f1fdb002f 100644 --- a/packages/content-handler/src/websites/piped-video-handler.ts +++ b/packages/content-handler/src/websites/piped-video-handler.ts @@ -36,7 +36,7 @@ export class PipedVideoHandler extends ContentHandler { thumbnailUrl: string uploader: string uploaderUrl: string - uploadedDate: string + uploadDate: string description: string videoStreams: { width: number @@ -68,7 +68,7 @@ export class PipedVideoHandler extends ContentHandler { - + From eb58bf11ba446e3924975988d0397162c24f5e38 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 10 Apr 2023 20:52:09 +0800 Subject: [PATCH 4/4] Force to use content handler of piped.video when saving from extensions --- packages/content-handler/src/websites/piped-video-handler.ts | 4 ++-- packages/readabilityjs/Readability.js | 2 +- pkg/extension/src/scripts/common.js | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/content-handler/src/websites/piped-video-handler.ts b/packages/content-handler/src/websites/piped-video-handler.ts index f1fdb002f..ffa8c050b 100644 --- a/packages/content-handler/src/websites/piped-video-handler.ts +++ b/packages/content-handler/src/websites/piped-video-handler.ts @@ -49,7 +49,7 @@ export class PipedVideoHandler extends ContentHandler { return {} } const videoStream = videoStreams[0] - const src = `${baseUrl}/embed/${videoId}` + const src = `https://piped.mha.fi/embed/${videoId}` // escape html entities in title const title = metadata.title const escapedTitle = this.escapeTitle(title) @@ -67,7 +67,7 @@ export class PipedVideoHandler extends ContentHandler { - + diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index 816f93637..1f4ca841b 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -184,7 +184,7 @@ Readability.prototype = { publishedDate: /published|modified|created|updated/i, replaceFonts: /<(\/?)font[^>]*>/gi, normalize: /\s{2,}/g, - videos: /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq|cdnapisec\.kaltura)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv|api-piped\.mha\.fi)/i, + videos: /\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq|cdnapisec\.kaltura)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv|piped\.mha\.fi)/i, shareElements: /(\b|_)(share|sharedaddy|post-tags)(\b|_)/i, nextLink: /(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i, prevLink: /(prev|earl|old|new|<|«)/i, diff --git a/pkg/extension/src/scripts/common.js b/pkg/extension/src/scripts/common.js index 36303f828..763163a13 100644 --- a/pkg/extension/src/scripts/common.js +++ b/pkg/extension/src/scripts/common.js @@ -80,7 +80,7 @@ function handleBackendUrl(url) { const FORCE_CONTENT_FETCH_URLS = [ // twitter status url regex /twitter\.com\/(?:#!\/)?(\w+)\/status(?:es)?\/(\d+)(?:\/.*)?/, - /^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w-]+\?v=|embed\/|v\/)?)([\w-]+)(\S+)?$/, + /^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu\.be|piped\.video))(\/(?:[\w-]+\?v=|embed\/|v\/)?)([\w-]+)(\S+)?$/, ] return FORCE_CONTENT_FETCH_URLS.some((regex) => regex.test(url)) } catch (error) {