Import content-handler in content-fetch
This commit is contained in:
@ -1,36 +0,0 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const Url = require('url');
|
||||
const axios = require('axios');
|
||||
const { promisify } = require('util');
|
||||
const { DateTime } = require('luxon');
|
||||
const os = require('os');
|
||||
const { Cipher } = require('crypto');
|
||||
const { parseHTML } = require('linkedom');
|
||||
|
||||
exports.appleNewsHandler = {
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
const u = new URL(url);
|
||||
if (u.hostname === 'apple.news') {
|
||||
return true;
|
||||
}
|
||||
return false
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
const MOBILE_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'
|
||||
const response = await axios.get(url, { headers: { 'User-Agent': MOBILE_USER_AGENT } } );
|
||||
const data = response.data;
|
||||
|
||||
const dom = parseHTML(data).document;
|
||||
|
||||
// make sure its a valid URL by wrapping in new URL
|
||||
const u = new URL(dom.querySelector('span.click-here').parentNode.href);
|
||||
return { url: u.href };
|
||||
}
|
||||
}
|
||||
@ -1,39 +0,0 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const os = require('os');
|
||||
const { parseHTML } = require('linkedom');
|
||||
|
||||
exports.bloombergHandler = {
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
const BLOOMBERG_URL_MATCH =
|
||||
/https?:\/\/(www\.)?bloomberg.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)/
|
||||
return BLOOMBERG_URL_MATCH.test(url.toString())
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
console.log('prehandling bloomberg url', url)
|
||||
|
||||
try {
|
||||
const response = await axios.get('https://app.scrapingbee.com/api/v1', {
|
||||
params: {
|
||||
'api_key': process.env.SCRAPINGBEE_API_KEY,
|
||||
'url': url,
|
||||
'return_page_source': true,
|
||||
'block_ads': true,
|
||||
'block_resources': false,
|
||||
}
|
||||
})
|
||||
const dom = parseHTML(response.data).document;
|
||||
return { title: dom.title, content: dom.querySelector('body').innerHTML, url: url }
|
||||
} catch (error) {
|
||||
console.error('error prehandling bloomberg url', error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,35 +0,0 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const { parseHTML } = require('linkedom');
|
||||
|
||||
exports.derstandardHandler = {
|
||||
shouldPrehandle: (url, env) => {
|
||||
const u = new URL(url);
|
||||
return u.hostname === 'www.derstandard.at';
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
const response = await axios.get(url, {
|
||||
// set cookie to give consent to get the article
|
||||
headers: {
|
||||
'cookie': `DSGVO_ZUSAGE_V1=true; consentUUID=2bacb9c1-1e80-4be0-9f7b-ee987cf4e7b0_6`
|
||||
},
|
||||
});
|
||||
const content = response.data;
|
||||
|
||||
var title = undefined;
|
||||
const dom = parseHTML(content).document;
|
||||
const titleElement = dom.querySelector('.article-title')
|
||||
if (!titleElement) {
|
||||
title = titleElement.textContent
|
||||
titleElement.remove()
|
||||
}
|
||||
|
||||
return { content: dom.body.outerHTML, title: title };
|
||||
}
|
||||
}
|
||||
@ -9,16 +9,10 @@ const puppeteer = require('puppeteer-core');
|
||||
const axios = require('axios');
|
||||
const jwt = require('jsonwebtoken');
|
||||
const { promisify } = require('util');
|
||||
const { parseHTML } = require('linkedom');
|
||||
const { preHandleContent } = require('@omnivore/content-handler');
|
||||
|
||||
const signToken = promisify(jwt.sign);
|
||||
const { appleNewsHandler } = require('./apple-news-handler');
|
||||
const { twitterHandler } = require('./twitter-handler');
|
||||
const { youtubeHandler } = require('./youtube-handler');
|
||||
const { tDotCoHandler } = require('./t-dot-co-handler');
|
||||
const { pdfHandler } = require('./pdf-handler');
|
||||
const { mediumHandler } = require('./medium-handler');
|
||||
const { derstandardHandler } = require('./derstandard-handler');
|
||||
const { imageHandler } = require('./image-handler');
|
||||
const { scrapingBeeHandler } = require('./scrapingBee-handler')
|
||||
|
||||
const MOBILE_USER_AGENT = 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.62 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
|
||||
const DESKTOP_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4372.0 Safari/537.36'
|
||||
@ -29,8 +23,6 @@ const NON_SCRIPT_HOSTS= ['medium.com', 'fastcompany.com'];
|
||||
|
||||
const ALLOWED_CONTENT_TYPES = ['text/html', 'application/octet-stream', 'text/plain', 'application/pdf'];
|
||||
|
||||
const { parseHTML } = require('linkedom');
|
||||
|
||||
// Add stealth plugin to hide puppeteer usage
|
||||
// const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||
// puppeteer.use(StealthPlugin());
|
||||
@ -207,19 +199,6 @@ const saveUploadedPdf = async (userId, url, uploadFileId, articleSavingRequestId
|
||||
);
|
||||
};
|
||||
|
||||
const handlers = {
|
||||
'pdf': pdfHandler,
|
||||
'apple-news': appleNewsHandler,
|
||||
'twitter': twitterHandler,
|
||||
'youtube': youtubeHandler,
|
||||
't-dot-co': tDotCoHandler,
|
||||
'medium': mediumHandler,
|
||||
'derstandard': derstandardHandler,
|
||||
'image': imageHandler,
|
||||
'scrapingBee': scrapingBeeHandler,
|
||||
};
|
||||
|
||||
|
||||
async function fetchContent(req, res) {
|
||||
functionStartTime = Date.now();
|
||||
|
||||
@ -246,61 +225,18 @@ async function fetchContent(req, res) {
|
||||
return res.sendStatus(400);
|
||||
}
|
||||
|
||||
// if (!userId || !articleSavingRequestId) {
|
||||
// Object.assign(logRecord, { invalidParams: true, body: req.body, query: req.query });
|
||||
// console.log(`Invalid parameters`, logRecord);
|
||||
// return res.sendStatus(400);
|
||||
// }
|
||||
|
||||
// Before we run the regular handlers we check to see if we need tp
|
||||
// pre-resolve the URL. TODO: This should probably happen recursively,
|
||||
// so URLs can be pre-resolved, handled, pre-resolved, handled, etc.
|
||||
for (const [key, handler] of Object.entries(handlers)) {
|
||||
if (handler.shouldResolve && handler.shouldResolve(url)) {
|
||||
try {
|
||||
url = await handler.resolve(url);
|
||||
validateUrlString(url);
|
||||
} catch (err) {
|
||||
console.log('error resolving url with handler', key, err);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Before we fetch the page we check the handlers, to see if they want
|
||||
// to perform a prefetch action that can modify our requests.
|
||||
// enumerate the handlers and see if any of them want to handle the request
|
||||
const handler = Object.keys(handlers).find(key => {
|
||||
try {
|
||||
return handlers[key].shouldPrehandle(url)
|
||||
} catch (e) {
|
||||
console.log('error with handler: ', key, e);
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
var title = undefined;
|
||||
var content = undefined;
|
||||
var contentType = undefined;
|
||||
|
||||
if (handler) {
|
||||
try {
|
||||
// The only handler we have now can modify the URL, but in the
|
||||
// future maybe we let it modify content. In that case
|
||||
// we might exit the request early.
|
||||
console.log('pre-handling url with handler: ', handler);
|
||||
|
||||
const result = await handlers[handler].prehandle(url);
|
||||
if (result && result.url) {
|
||||
url = result.url
|
||||
validateUrlString(url);
|
||||
}
|
||||
if (result && result.title) { title = result.title }
|
||||
if (result && result.content) { content = result.content }
|
||||
if (result && result.contentType) { contentType = result.contentType }
|
||||
} catch (e) {
|
||||
console.log('error with handler: ', handler, e);
|
||||
let title, content, contentType;
|
||||
try {
|
||||
const result = await preHandleContent(url);
|
||||
if (result && result.url) {
|
||||
url = result.url
|
||||
validateUrlString(url);
|
||||
}
|
||||
if (result && result.title) { title = result.title }
|
||||
if (result && result.content) { content = result.content }
|
||||
if (result && result.contentType) { contentType = result.contentType }
|
||||
} catch (e) {
|
||||
console.log('error with handler: ', e);
|
||||
}
|
||||
|
||||
let context, page, finalUrl;
|
||||
|
||||
@ -1,34 +0,0 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
|
||||
|
||||
exports.imageHandler = {
|
||||
shouldPrehandle: (url, env) => {
|
||||
const IMAGE_URL_PATTERN =
|
||||
/(https?:\/\/.*\.(?:jpg|jpeg|png|webp))/i
|
||||
return IMAGE_URL_PATTERN.test(url.toString())
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
const title = url.toString().split('/').pop();
|
||||
const content = `
|
||||
<html>
|
||||
<head>
|
||||
<title>${title}</title>
|
||||
<meta property="og:image" content="${url}" />
|
||||
<meta property="og:title" content="${title}" />
|
||||
</head>
|
||||
<body>
|
||||
<div>
|
||||
<img src="${url}" alt="${title}">
|
||||
</div>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
return { title, content };
|
||||
}
|
||||
}
|
||||
@ -1,29 +0,0 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const os = require('os');
|
||||
|
||||
exports.mediumHandler = {
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
const u = new URL(url);
|
||||
return u.hostname.endsWith('medium.com')
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
console.log('prehandling medium url', url)
|
||||
|
||||
try {
|
||||
const res = new URL(url);
|
||||
res.searchParams.delete('source');
|
||||
return { url: res.toString() }
|
||||
} catch (error) {
|
||||
console.error('error prehandling medium url', error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -11,7 +11,8 @@
|
||||
"linkedom": "^0.14.9",
|
||||
"luxon": "^2.3.1",
|
||||
"puppeteer-core": "^16.1.0",
|
||||
"underscore": "^1.13.4"
|
||||
"underscore": "^1.13.4",
|
||||
"@omnivore/content-handler": "1.0.0"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "node app.js",
|
||||
|
||||
@ -1,21 +0,0 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const Url = require('url');
|
||||
|
||||
|
||||
exports.pdfHandler = {
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
const u = Url.parse(url)
|
||||
const path = u.path.replace(u.search, '')
|
||||
return path.endsWith('.pdf')
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
return { contentType: 'application/pdf' };
|
||||
}
|
||||
}
|
||||
@ -1,44 +0,0 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const { parseHTML } = require('linkedom');
|
||||
|
||||
const os = require('os');
|
||||
|
||||
exports.scrapingBeeHandler = {
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
const u = new URL(url);
|
||||
const hostnames = [
|
||||
'nytimes.com',
|
||||
'news.google.com',
|
||||
]
|
||||
|
||||
return hostnames.some((h) => u.hostname.endsWith(h))
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
console.log('prehandling url with scrapingbee', url)
|
||||
|
||||
try {
|
||||
const response = await axios.get('https://app.scrapingbee.com/api/v1', {
|
||||
params: {
|
||||
'api_key': process.env.SCRAPINGBEE_API_KEY,
|
||||
'url': url,
|
||||
'return_page_source': true,
|
||||
'block_ads': true,
|
||||
'block_resources': false,
|
||||
}
|
||||
})
|
||||
const dom = parseHTML(response.data).document;
|
||||
return { title: dom.title, content: response.data, url: url }
|
||||
} catch (error) {
|
||||
console.error('error prehandling url w/scrapingbee', error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,31 +0,0 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const Url = require('url');
|
||||
|
||||
|
||||
exports.tDotCoHandler = {
|
||||
|
||||
shouldResolve: function (url, env) {
|
||||
const T_DOT_CO_URL_MATCH = /^https:\/\/(?:www\.)?t\.co\/.*$/;
|
||||
return T_DOT_CO_URL_MATCH.test(url);
|
||||
},
|
||||
|
||||
resolve: async function(url, env) {
|
||||
return await axios.get(url, { maxRedirects: 0, validateStatus: null })
|
||||
.then(res => {
|
||||
return Url.parse(res.headers.location).href;
|
||||
}).catch((err) => {
|
||||
console.log('err with t.co url', err);
|
||||
return undefined;
|
||||
});
|
||||
},
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
return false
|
||||
},
|
||||
}
|
||||
@ -1,9 +0,0 @@
|
||||
const { expect } = require('chai')
|
||||
const { appleNewsHandler } = require('../apple-news-handler')
|
||||
|
||||
describe('open a simple web page', () => {
|
||||
it('should return a response', async () => {
|
||||
const response = await appleNewsHandler.prehandle('https://apple.news/AxjzaZaPvSn23b67LhXI5EQ')
|
||||
console.log('response', response)
|
||||
})
|
||||
})
|
||||
@ -1,12 +0,0 @@
|
||||
const { expect } = require('chai')
|
||||
const { getYoutubeVideoId } = require('../youtube-handler')
|
||||
|
||||
describe('getYoutubeVideoId', () => {
|
||||
it('should parse video id out of a URL', async () => {
|
||||
expect('BnSUk0je6oo').to.eq(getYoutubeVideoId('https://www.youtube.com/watch?v=BnSUk0je6oo&t=269s'));
|
||||
expect('vFD2gu007dc').to.eq(getYoutubeVideoId('https://www.youtube.com/watch?v=vFD2gu007dc&list=RDvFD2gu007dc&start_radio=1'));
|
||||
expect('vFD2gu007dc').to.eq(getYoutubeVideoId('https://youtu.be/vFD2gu007dc'));
|
||||
expect('BMFVCnbRaV4').to.eq(getYoutubeVideoId('https://youtube.com/watch?v=BMFVCnbRaV4&feature=share'));
|
||||
expect('cg9b4RC87LI').to.eq(getYoutubeVideoId('https://youtu.be/cg9b4RC87LI?t=116'));
|
||||
})
|
||||
})
|
||||
@ -1,172 +0,0 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const { DateTime } = require('luxon');
|
||||
const _ = require('underscore');
|
||||
|
||||
const TWITTER_BEARER_TOKEN = process.env.TWITTER_BEARER_TOKEN;
|
||||
const TWITTER_URL_MATCH = /twitter\.com\/(?:#!\/)?(\w+)\/status(?:es)?\/(\d+)(?:\/.*)?/
|
||||
|
||||
const embeddedTweet = async (url) => {
|
||||
|
||||
const BASE_ENDPOINT = 'https://publish.twitter.com/oembed'
|
||||
|
||||
const apiUrl = new URL(BASE_ENDPOINT)
|
||||
apiUrl.searchParams.append('url', url);
|
||||
apiUrl.searchParams.append('omit_script', true);
|
||||
apiUrl.searchParams.append('dnt', true);
|
||||
|
||||
return await axios.get(apiUrl.toString(), {
|
||||
headers: {
|
||||
Authorization: `Bearer ${TWITTER_BEARER_TOKEN}`,
|
||||
redirect: "follow",
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
const getTweetFields = () => {
|
||||
const TWEET_FIELDS =
|
||||
"&tweet.fields=attachments,author_id,conversation_id,created_at," +
|
||||
"entities,geo,in_reply_to_user_id,lang,possibly_sensitive,public_metrics,referenced_tweets," +
|
||||
"source,withheld";
|
||||
const EXPANSIONS = "&expansions=author_id,attachments.media_keys";
|
||||
const USER_FIELDS =
|
||||
"&user.fields=created_at,description,entities,location,pinned_tweet_id,profile_image_url,protected,public_metrics,url,verified,withheld";
|
||||
const MEDIA_FIELDS =
|
||||
"&media.fields=duration_ms,height,preview_image_url,url,media_key,public_metrics,width";
|
||||
|
||||
return `${TWEET_FIELDS}${EXPANSIONS}${USER_FIELDS}${MEDIA_FIELDS}`;
|
||||
}
|
||||
|
||||
const getTweetById = async (id) => {
|
||||
const BASE_ENDPOINT = "https://api.twitter.com/2/tweets/";
|
||||
const apiUrl = new URL(BASE_ENDPOINT + id + '?' + getTweetFields())
|
||||
|
||||
return await axios.get(apiUrl.toString(), {
|
||||
headers: {
|
||||
Authorization: `Bearer ${TWITTER_BEARER_TOKEN}`,
|
||||
redirect: "follow",
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
const getUserByUsername = async (username) => {
|
||||
const BASE_ENDPOINT = "https://api.twitter.com/2/users/by/username/";
|
||||
|
||||
const apiUrl = new URL(BASE_ENDPOINT + username)
|
||||
apiUrl.searchParams.append('user.fields', 'profile_image_url');
|
||||
|
||||
return await axios.get(apiUrl.toString(), {
|
||||
headers: {
|
||||
Authorization: `Bearer ${TWITTER_BEARER_TOKEN}`,
|
||||
redirect: "follow",
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
const titleForTweet = (tweet) => {
|
||||
return `${tweet.data.author_name} on Twitter`
|
||||
};
|
||||
|
||||
const titleForAuthor = (author) => {
|
||||
return `${author.name} on Twitter`
|
||||
};
|
||||
|
||||
const usernameFromStatusUrl = (url) => {
|
||||
const match = url.toString().match(TWITTER_URL_MATCH)
|
||||
return match[1]
|
||||
};
|
||||
|
||||
const tweetIdFromStatusUrl = (url) => {
|
||||
const match = url.toString().match(TWITTER_URL_MATCH)
|
||||
return match[2]
|
||||
};
|
||||
|
||||
const formatTimestamp = (timestamp) => {
|
||||
return DateTime.fromJSDate(new Date(timestamp)).toLocaleString(DateTime.DATETIME_FULL);
|
||||
};
|
||||
|
||||
exports.twitterHandler = {
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
return TWITTER_BEARER_TOKEN && TWITTER_URL_MATCH.test(url.toString())
|
||||
},
|
||||
|
||||
// version of the handler that uses the oembed API
|
||||
// This isn't great as it doesn't work well with our
|
||||
// readability API. But could potentially give a more consistent
|
||||
// look to the tweets
|
||||
// prehandle: async (url, env) => {
|
||||
// const oeTweet = await embeddedTweet(url)
|
||||
// const dom = new JSDOM(oeTweet.data.html);
|
||||
// const bq = dom.window.document.querySelector('blockquote')
|
||||
// console.log('blockquote:', bq);
|
||||
|
||||
// const title = titleForTweet(oeTweet)
|
||||
// return { title, content: '<div>' + bq.innerHTML + '</div>', url: oeTweet.data.url };
|
||||
// }
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
console.log('prehandling twitter url', url)
|
||||
|
||||
const tweetId = tweetIdFromStatusUrl(url)
|
||||
const tweetData = (await getTweetById(tweetId)).data;
|
||||
const authorId = tweetData.data.author_id;
|
||||
const author = tweetData.includes.users.filter(u => u.id = authorId)[0];
|
||||
// escape html entities in title
|
||||
const title = _.escape(titleForAuthor(author))
|
||||
const authorImage = author.profile_image_url.replace('_normal', '_400x400')
|
||||
|
||||
let text = tweetData.data.text;
|
||||
if (tweetData.data.entities && tweetData.data.entities.urls) {
|
||||
for (let urlObj of tweetData.data.entities.urls) {
|
||||
text = text.replace(
|
||||
urlObj.url,
|
||||
`<a href="${urlObj.expanded_url}">${urlObj.display_url}</a>`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const front = `
|
||||
<div>
|
||||
<p>${text}</p>
|
||||
`
|
||||
|
||||
var includesHtml = '';
|
||||
if (tweetData.includes.media) {
|
||||
includesHtml = tweetData.includes.media.map(m => {
|
||||
const linkUrl = m.type == 'photo' ? m.url : url;
|
||||
const previewUrl = m.type == 'photo' ? m.url : m.preview_image_url;
|
||||
const mediaOpen = `<a class="media-link" href=${linkUrl}>
|
||||
<picture>
|
||||
<img class="tweet-img" src=${previewUrl} />
|
||||
</picture>
|
||||
</a>`
|
||||
return mediaOpen
|
||||
}).join('\n');
|
||||
}
|
||||
|
||||
const back = `
|
||||
— <a href="https://twitter.com/${author.username}">${author.username}</a> ${author.name} <a href="${url}">${formatTimestamp(tweetData.data.created_at)}</a>
|
||||
</div>
|
||||
`
|
||||
const content = `
|
||||
<head>
|
||||
<meta property="og:image" content="${authorImage}" />
|
||||
<meta property="og:image:secure_url" content="${authorImage}" />
|
||||
<meta property="og:title" content="${title}" />
|
||||
<meta property="og:description" content="${_.escape(tweetData.data.text)}" />
|
||||
</head>
|
||||
<body>
|
||||
${front}
|
||||
${includesHtml}
|
||||
${back}
|
||||
</body>`
|
||||
|
||||
return { content, url, title };
|
||||
}
|
||||
}
|
||||
@ -1,68 +0,0 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const _ = require('underscore');
|
||||
|
||||
const YOUTUBE_URL_MATCH =
|
||||
/^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w-]+\?v=|embed\/|v\/)?)([\w-]+)(\S+)?$/
|
||||
|
||||
function getYoutubeVideoId(url) {
|
||||
const u = new URL(url);
|
||||
const videoId = u.searchParams.get('v');
|
||||
if (!videoId) {
|
||||
const match = url.toString().match(YOUTUBE_URL_MATCH)
|
||||
if (match === null || match.length < 6 || !match[5]) {
|
||||
return undefined
|
||||
}
|
||||
return match[5]
|
||||
}
|
||||
return videoId
|
||||
}
|
||||
exports.getYoutubeVideoId = getYoutubeVideoId
|
||||
|
||||
exports.youtubeHandler = {
|
||||
shouldPrehandle: (url, env) => {
|
||||
return YOUTUBE_URL_MATCH.test(url.toString())
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
const videoId = getYoutubeVideoId(url)
|
||||
if (!videoId) {
|
||||
return {}
|
||||
}
|
||||
|
||||
const oembedUrl = `https://www.youtube.com/oembed?format=json&url=` + encodeURIComponent(`https://www.youtube.com/watch?v=${videoId}`)
|
||||
const oembed = (await axios.get(oembedUrl.toString())).data;
|
||||
// escape html entities in title
|
||||
const title = _.escape(oembed.title);
|
||||
const ratio = oembed.width / oembed.height;
|
||||
const thumbnail = oembed.thumbnail_url;
|
||||
const height = 350;
|
||||
const width = height * ratio;
|
||||
const authorName = _.escape(oembed.author_name);
|
||||
|
||||
const content = `
|
||||
<html>
|
||||
<head><title>${title}</title>
|
||||
<meta property="og:image" content="${thumbnail}" />
|
||||
<meta property="og:image:secure_url" content="${thumbnail}" />
|
||||
<meta property="og:title" content="${title}" />
|
||||
<meta property="og:description" content="" />
|
||||
<meta property="og:article:author" content="${authorName}" />
|
||||
</head>
|
||||
<body>
|
||||
<iframe width="${width}" height="${height}" src="https://www.youtube.com/embed/${videoId}" title="${title}" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
<p><a href="${url}" target="_blank">${title}</a></p>
|
||||
<p itemscope="" itemprop="author" itemtype="http://schema.org/Person">By <a href="${oembed.author_url}" target="_blank">${authorName}</a></p>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
console.log('got video id', videoId)
|
||||
|
||||
return { content, title: 'Youtube Content' };
|
||||
}
|
||||
}
|
||||
@ -1,8 +1,13 @@
|
||||
import { ContentHandler, PreHandleResult } from './index'
|
||||
import axios from 'axios'
|
||||
import { parseHTML } from 'linkedom'
|
||||
import { ContentHandler, PreHandleResult } from './content-handler'
|
||||
|
||||
export class AppleNewsHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 'Apple News'
|
||||
}
|
||||
|
||||
shouldPreHandle(url: string, dom?: Document): boolean {
|
||||
const u = new URL(url)
|
||||
return u.hostname === 'apple.news'
|
||||
|
||||
@ -1,8 +1,13 @@
|
||||
import { ContentHandler, PreHandleResult } from './index'
|
||||
import axios from 'axios'
|
||||
import { parseHTML } from 'linkedom'
|
||||
import { ContentHandler, PreHandleResult } from './content-handler'
|
||||
|
||||
export class BloombergHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 'Bloomberg'
|
||||
}
|
||||
|
||||
class BloombergHandler extends ContentHandler {
|
||||
shouldPreHandle(url: string, dom?: Document): boolean {
|
||||
const BLOOMBERG_URL_MATCH =
|
||||
/https?:\/\/(www\.)?bloomberg.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)/
|
||||
|
||||
126
packages/content-handler/src/content-handler.ts
Normal file
126
packages/content-handler/src/content-handler.ts
Normal file
@ -0,0 +1,126 @@
|
||||
import addressparser from 'addressparser'
|
||||
import rfc2047 from 'rfc2047'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
|
||||
interface Unsubscribe {
|
||||
mailTo?: string
|
||||
httpUrl?: string
|
||||
}
|
||||
|
||||
interface NewsletterMessage {
|
||||
email: string
|
||||
content: string
|
||||
url: string
|
||||
title: string
|
||||
author: string
|
||||
unsubMailTo?: string
|
||||
unsubHttpUrl?: string
|
||||
}
|
||||
|
||||
export interface PreHandleResult {
|
||||
url?: string
|
||||
title?: string
|
||||
content?: string
|
||||
contentType?: string
|
||||
dom?: Document
|
||||
}
|
||||
|
||||
export abstract class ContentHandler {
|
||||
protected senderRegex: RegExp
|
||||
protected urlRegex: RegExp
|
||||
protected defaultUrl: string
|
||||
public name: string
|
||||
|
||||
protected constructor() {
|
||||
this.senderRegex = new RegExp(/NEWSLETTER_SENDER_REGEX/)
|
||||
this.urlRegex = new RegExp(/NEWSLETTER_URL_REGEX/)
|
||||
this.defaultUrl = 'NEWSLETTER_DEFAULT_URL'
|
||||
this.name = 'Handler name'
|
||||
}
|
||||
|
||||
shouldResolve(url: string): boolean {
|
||||
return false
|
||||
}
|
||||
|
||||
async resolve(url: string): Promise<string | undefined> {
|
||||
return Promise.resolve(url)
|
||||
}
|
||||
|
||||
shouldPreHandle(url: string, dom?: Document): boolean {
|
||||
return false
|
||||
}
|
||||
|
||||
async preHandle(url: string, document?: Document): Promise<PreHandleResult> {
|
||||
return Promise.resolve({ url, dom: document })
|
||||
}
|
||||
|
||||
isNewsletter(postHeader: string, from: string, unSubHeader: string): boolean {
|
||||
// Axios newsletter is from <xx@axios.com>
|
||||
const re = new RegExp(this.senderRegex)
|
||||
return re.test(from) && (!!postHeader || !!unSubHeader)
|
||||
}
|
||||
|
||||
parseNewsletterUrl(_postHeader: string, html: string): string | undefined {
|
||||
// get newsletter url from html
|
||||
const matches = html.match(this.urlRegex)
|
||||
if (matches) {
|
||||
return matches[1]
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
parseAuthor(from: string): string {
|
||||
// get author name from email
|
||||
// e.g. 'Jackson Harper from Omnivore App <jacksonh@substack.com>'
|
||||
// or 'Mike Allen <mike@axios.com>'
|
||||
const parsed = addressparser(from)
|
||||
if (parsed.length > 0) {
|
||||
return parsed[0].name
|
||||
}
|
||||
return from
|
||||
}
|
||||
|
||||
parseUnsubscribe(unSubHeader: string): Unsubscribe {
|
||||
// parse list-unsubscribe header
|
||||
// e.g. List-Unsubscribe: <https://omnivore.com/unsub>, <mailto:unsub@omnivore.com>
|
||||
const decoded = rfc2047.decode(unSubHeader)
|
||||
return {
|
||||
mailTo: decoded.match(/<(https?:\/\/[^>]*)>/)?.[1],
|
||||
httpUrl: decoded.match(/<mailto:([^>]*)>/)?.[1],
|
||||
}
|
||||
}
|
||||
|
||||
handleNewsletter(
|
||||
email: string,
|
||||
html: string,
|
||||
postHeader: string,
|
||||
title: string,
|
||||
from: string,
|
||||
unSubHeader: string
|
||||
): NewsletterMessage {
|
||||
console.log('handleNewsletter', email, postHeader, title, from)
|
||||
|
||||
if (!email || !html || !title || !from) {
|
||||
console.log('invalid newsletter email')
|
||||
throw new Error('invalid newsletter email')
|
||||
}
|
||||
|
||||
// fallback to default url if newsletter url does not exist
|
||||
// assign a random uuid to the default url to avoid duplicate url
|
||||
const url =
|
||||
this.parseNewsletterUrl(postHeader, html) ||
|
||||
`${this.defaultUrl}?source=newsletters&id=${uuidv4()}`
|
||||
const author = this.parseAuthor(from)
|
||||
const unsubscribe = this.parseUnsubscribe(unSubHeader)
|
||||
|
||||
return {
|
||||
email,
|
||||
content: html,
|
||||
url,
|
||||
title,
|
||||
author,
|
||||
unsubMailTo: unsubscribe.mailTo || '',
|
||||
unsubHttpUrl: unsubscribe.httpUrl || '',
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1,8 +1,13 @@
|
||||
import { ContentHandler, PreHandleResult } from './index'
|
||||
import { ContentHandler, PreHandleResult } from './content-handler'
|
||||
import axios from 'axios'
|
||||
import { parseHTML } from 'linkedom'
|
||||
|
||||
class DerstandardHandler extends ContentHandler {
|
||||
export class DerstandardHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 'Derstandard'
|
||||
}
|
||||
|
||||
shouldPreHandle(url: string, dom?: Document): boolean {
|
||||
const u = new URL(url)
|
||||
return u.hostname === 'www.derstandard.at'
|
||||
|
||||
@ -1,6 +1,11 @@
|
||||
import { ContentHandler, PreHandleResult } from './index'
|
||||
import { ContentHandler, PreHandleResult } from './content-handler'
|
||||
|
||||
export class ImageHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 'Image'
|
||||
}
|
||||
|
||||
class ImageHandler extends ContentHandler {
|
||||
shouldPreHandle(url: string, dom?: Document): boolean {
|
||||
const IMAGE_URL_PATTERN = /(https?:\/\/.*\.(?:jpg|jpeg|png|webp))/i
|
||||
return IMAGE_URL_PATTERN.test(url.toString())
|
||||
|
||||
@ -1,111 +1,76 @@
|
||||
import addressparser from 'addressparser'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
import rfc2047 from 'rfc2047'
|
||||
import { AppleNewsHandler } from './apple-news-handler'
|
||||
import { BloombergHandler } from './bloomberg-handler'
|
||||
import { DerstandardHandler } from './derstandard-handler'
|
||||
import { ImageHandler } from './image-handler'
|
||||
import { MediumHandler } from './medium-handler'
|
||||
import { PdfHandler } from './pdf-handler'
|
||||
import { ScrapingBeeHandler } from './scrapingBee-handler'
|
||||
import { TDotCoHandler } from './t-dot-co-handler'
|
||||
import { TwitterHandler } from './twitter-handler'
|
||||
import { YoutubeHandler } from './youtube-handler'
|
||||
import { ContentHandler, PreHandleResult } from './content-handler'
|
||||
|
||||
interface Unsubscribe {
|
||||
mailTo?: string
|
||||
httpUrl?: string
|
||||
const validateUrlString = (url: string) => {
|
||||
const u = new URL(url)
|
||||
// Make sure the URL is http or https
|
||||
if (u.protocol !== 'http:' && u.protocol !== 'https:') {
|
||||
throw new Error('Invalid URL protocol check failed')
|
||||
}
|
||||
// Make sure the domain is not localhost
|
||||
if (u.hostname === 'localhost' || u.hostname === '0.0.0.0') {
|
||||
throw new Error('Invalid URL is localhost')
|
||||
}
|
||||
// Make sure the domain is not a private IP
|
||||
if (/^(10|172\.16|192\.168)\..*/.test(u.hostname)) {
|
||||
throw new Error('Invalid URL is private ip')
|
||||
}
|
||||
}
|
||||
|
||||
interface NewsletterMessage {
|
||||
email: string
|
||||
content: string
|
||||
url: string
|
||||
title: string
|
||||
author: string
|
||||
unsubMailTo?: string
|
||||
unsubHttpUrl?: string
|
||||
}
|
||||
const contentHandlers: ContentHandler[] = [
|
||||
new AppleNewsHandler(),
|
||||
new BloombergHandler(),
|
||||
new DerstandardHandler(),
|
||||
new ImageHandler(),
|
||||
new MediumHandler(),
|
||||
new PdfHandler(),
|
||||
new ScrapingBeeHandler(),
|
||||
new TDotCoHandler(),
|
||||
new TwitterHandler(),
|
||||
new YoutubeHandler(),
|
||||
]
|
||||
|
||||
export interface PreHandleResult {
|
||||
url?: string
|
||||
title?: string
|
||||
content?: string
|
||||
contentType?: string
|
||||
export const preHandleContent = async (
|
||||
url: string,
|
||||
dom?: Document
|
||||
): Promise<PreHandleResult | undefined> => {
|
||||
// Before we run the regular handlers we check to see if we need tp
|
||||
// pre-resolve the URL. TODO: This should probably happen recursively,
|
||||
// so URLs can be pre-resolved, handled, pre-resolved, handled, etc.
|
||||
for (const handler of contentHandlers) {
|
||||
if (handler.shouldResolve(url)) {
|
||||
try {
|
||||
const resolvedUrl = await handler.resolve(url)
|
||||
if (resolvedUrl && validateUrlString(resolvedUrl)) {
|
||||
url = resolvedUrl
|
||||
}
|
||||
} catch (err) {
|
||||
console.log('error resolving url with handler', handler.name, err)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
// Before we fetch the page we check the handlers, to see if they want
|
||||
// to perform a prefetch action that can modify our requests.
|
||||
// enumerate the handlers and see if any of them want to handle the request
|
||||
for (const handler of contentHandlers) {
|
||||
if (handler.shouldPreHandle(url, dom)) {
|
||||
console.log('preHandleContent', handler.name, url)
|
||||
return handler.preHandle(url, dom)
|
||||
}
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
export class ContentHandler {
|
||||
protected senderRegex = /NEWSLETTER_SENDER_REGEX/
|
||||
protected urlRegex = /NEWSLETTER_URL_REGEX/
|
||||
protected defaultUrl = 'NEWSLETTER_DEFAULT_URL'
|
||||
protected name = ''
|
||||
|
||||
shouldPreHandle(url: string, dom?: Document): boolean {
|
||||
return false
|
||||
}
|
||||
|
||||
async preHandle(url: string, document?: Document): Promise<PreHandleResult> {
|
||||
return Promise.resolve({ url, dom: document })
|
||||
}
|
||||
|
||||
isNewsletter(postHeader: string, from: string, unSubHeader: string): boolean {
|
||||
// Axios newsletter is from <xx@axios.com>
|
||||
const re = new RegExp(this.senderRegex)
|
||||
return re.test(from) && (!!postHeader || !!unSubHeader)
|
||||
}
|
||||
|
||||
parseNewsletterUrl(_postHeader: string, html: string): string | undefined {
|
||||
// get newsletter url from html
|
||||
const matches = html.match(this.urlRegex)
|
||||
if (matches) {
|
||||
return matches[1]
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
parseAuthor(from: string): string {
|
||||
// get author name from email
|
||||
// e.g. 'Jackson Harper from Omnivore App <jacksonh@substack.com>'
|
||||
// or 'Mike Allen <mike@axios.com>'
|
||||
const parsed = addressparser(from)
|
||||
if (parsed.length > 0) {
|
||||
return parsed[0].name
|
||||
}
|
||||
return from
|
||||
}
|
||||
|
||||
parseUnsubscribe(unSubHeader: string): Unsubscribe {
|
||||
// parse list-unsubscribe header
|
||||
// e.g. List-Unsubscribe: <https://omnivore.com/unsub>, <mailto:unsub@omnivore.com>
|
||||
const decoded = rfc2047.decode(unSubHeader)
|
||||
return {
|
||||
mailTo: decoded.match(/<(https?:\/\/[^>]*)>/)?.[1],
|
||||
httpUrl: decoded.match(/<mailto:([^>]*)>/)?.[1],
|
||||
}
|
||||
}
|
||||
|
||||
handleNewsletter(
|
||||
email: string,
|
||||
html: string,
|
||||
postHeader: string,
|
||||
title: string,
|
||||
from: string,
|
||||
unSubHeader: string
|
||||
): NewsletterMessage {
|
||||
console.log('handleNewsletter', email, postHeader, title, from)
|
||||
|
||||
if (!email || !html || !title || !from) {
|
||||
console.log('invalid newsletter email')
|
||||
throw new Error('invalid newsletter email')
|
||||
}
|
||||
|
||||
// fallback to default url if newsletter url does not exist
|
||||
// assign a random uuid to the default url to avoid duplicate url
|
||||
const url =
|
||||
this.parseNewsletterUrl(postHeader, html) ||
|
||||
`${this.defaultUrl}?source=newsletters&id=${uuidv4()}`
|
||||
const author = this.parseAuthor(from)
|
||||
const unsubscribe = this.parseUnsubscribe(unSubHeader)
|
||||
|
||||
return {
|
||||
email,
|
||||
content: html,
|
||||
url,
|
||||
title,
|
||||
author,
|
||||
unsubMailTo: unsubscribe.mailTo || '',
|
||||
unsubHttpUrl: unsubscribe.httpUrl || '',
|
||||
}
|
||||
}
|
||||
module.exports = {
|
||||
preHandleContent,
|
||||
}
|
||||
|
||||
@ -1,6 +1,11 @@
|
||||
import { ContentHandler, PreHandleResult } from './index'
|
||||
import { ContentHandler, PreHandleResult } from './content-handler'
|
||||
|
||||
export class MediumHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 'Medium'
|
||||
}
|
||||
|
||||
class MediumHandler extends ContentHandler {
|
||||
shouldPreHandle(url: string, dom?: Document): boolean {
|
||||
const u = new URL(url)
|
||||
return u.hostname.endsWith('medium.com')
|
||||
|
||||
@ -1,6 +1,11 @@
|
||||
import { ContentHandler, PreHandleResult } from './index'
|
||||
import { ContentHandler, PreHandleResult } from './content-handler'
|
||||
|
||||
export class PdfHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 'PDF'
|
||||
}
|
||||
|
||||
class PdfHandler extends ContentHandler {
|
||||
shouldPreHandle(url: string, dom?: Document): boolean {
|
||||
const u = new URL(url)
|
||||
const path = u.pathname.replace(u.search, '')
|
||||
|
||||
@ -1,8 +1,13 @@
|
||||
import { ContentHandler, PreHandleResult } from './index'
|
||||
import { ContentHandler, PreHandleResult } from './content-handler'
|
||||
import axios from 'axios'
|
||||
import { parseHTML } from 'linkedom'
|
||||
|
||||
class ScrapingBeeHandler extends ContentHandler {
|
||||
export class ScrapingBeeHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 'ScrapingBee'
|
||||
}
|
||||
|
||||
shouldPreHandle(url: string, dom?: Document): boolean {
|
||||
const u = new URL(url)
|
||||
const hostnames = ['nytimes.com', 'news.google.com']
|
||||
|
||||
@ -1,7 +1,12 @@
|
||||
import { ContentHandler } from './index'
|
||||
import { ContentHandler } from './content-handler'
|
||||
import axios from 'axios'
|
||||
|
||||
class TDotCoHandler extends ContentHandler {
|
||||
export class TDotCoHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 't.co'
|
||||
}
|
||||
|
||||
shouldResolve(url: string): boolean {
|
||||
const T_DOT_CO_URL_MATCH = /^https:\/\/(?:www\.)?t\.co\/.*$/
|
||||
return T_DOT_CO_URL_MATCH.test(url)
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { ContentHandler, PreHandleResult } from './index'
|
||||
import { ContentHandler, PreHandleResult } from './content-handler'
|
||||
import axios from 'axios'
|
||||
import { DateTime } from 'luxon'
|
||||
import _ from 'underscore'
|
||||
@ -52,7 +52,12 @@ const formatTimestamp = (timestamp: string) => {
|
||||
)
|
||||
}
|
||||
|
||||
class TwitterHandler extends ContentHandler {
|
||||
export class TwitterHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 'Twitter'
|
||||
}
|
||||
|
||||
shouldPreHandle(url: string, dom?: Document): boolean {
|
||||
return !!TWITTER_BEARER_TOKEN && TWITTER_URL_MATCH.test(url.toString())
|
||||
}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
import { ContentHandler, PreHandleResult } from './index'
|
||||
import { ContentHandler, PreHandleResult } from './content-handler'
|
||||
import axios from 'axios'
|
||||
import _ from 'underscore'
|
||||
|
||||
@ -18,7 +18,12 @@ export const getYoutubeVideoId = (url: string) => {
|
||||
return videoId
|
||||
}
|
||||
|
||||
class YoutubeHandler extends ContentHandler {
|
||||
export class YoutubeHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
this.name = 'Youtube'
|
||||
}
|
||||
|
||||
shouldPreHandle(url: string, dom?: Document): boolean {
|
||||
return YOUTUBE_URL_MATCH.test(url.toString())
|
||||
}
|
||||
|
||||
20
yarn.lock
20
yarn.lock
@ -10579,19 +10579,6 @@ chai@^4.3.4:
|
||||
pathval "^1.1.1"
|
||||
type-detect "^4.0.5"
|
||||
|
||||
chai@^4.3.6:
|
||||
version "4.3.6"
|
||||
resolved "https://registry.yarnpkg.com/chai/-/chai-4.3.6.tgz#ffe4ba2d9fa9d6680cc0b370adae709ec9011e9c"
|
||||
integrity sha512-bbcp3YfHCUzMOvKqsztczerVgBKSsEijCySNlHHbX3VG1nskvqjz5Rfso1gGwD6w6oOV3eI60pKuMOV5MV7p3Q==
|
||||
dependencies:
|
||||
assertion-error "^1.1.0"
|
||||
check-error "^1.0.2"
|
||||
deep-eql "^3.0.1"
|
||||
get-func-name "^2.0.0"
|
||||
loupe "^2.3.1"
|
||||
pathval "^1.1.1"
|
||||
type-detect "^4.0.5"
|
||||
|
||||
chalk@^1.0.0, chalk@^1.1.3:
|
||||
version "1.1.3"
|
||||
resolved "https://registry.yarnpkg.com/chalk/-/chalk-1.1.3.tgz#a8115c55e4a702fe4d150abd3872822a7e09fc98"
|
||||
@ -18078,13 +18065,6 @@ loose-envify@^1.0.0, loose-envify@^1.1.0, loose-envify@^1.4.0:
|
||||
dependencies:
|
||||
js-tokens "^3.0.0 || ^4.0.0"
|
||||
|
||||
loupe@^2.3.1:
|
||||
version "2.3.4"
|
||||
resolved "https://registry.yarnpkg.com/loupe/-/loupe-2.3.4.tgz#7e0b9bffc76f148f9be769cb1321d3dcf3cb25f3"
|
||||
integrity sha512-OvKfgCC2Ndby6aSTREl5aCCPTNIzlDfQZvZxNUrBrihDhL3xcrYegTblhmEiCrg2kKQz4XsFIaemE5BF4ybSaQ==
|
||||
dependencies:
|
||||
get-func-name "^2.0.0"
|
||||
|
||||
lower-case-first@^1.0.0:
|
||||
version "1.0.2"
|
||||
resolved "https://registry.yarnpkg.com/lower-case-first/-/lower-case-first-1.0.2.tgz#e5da7c26f29a7073be02d52bac9980e5922adfa1"
|
||||
|
||||
Reference in New Issue
Block a user