Merge pull request #566 from omnivore-app/optimize-puppeteer
Optimize saving performance
This commit is contained in:
@ -255,22 +255,25 @@ export const parsePreparedContent = async (
|
||||
// TODO: we probably want to move this type of thing
|
||||
// to the handlers, and have some concept of postHandle
|
||||
if (article?.dom) {
|
||||
article.dom.querySelectorAll('code').forEach((e) => {
|
||||
console.log(e.textContent)
|
||||
if (e.textContent) {
|
||||
const att = hljs.highlightAuto(e.textContent)
|
||||
const code = window.document.createElement('code')
|
||||
const langClass =
|
||||
`hljs language-${att.language}` +
|
||||
(att.second_best?.language
|
||||
? ` language-${att.second_best?.language}`
|
||||
: '')
|
||||
code.setAttribute('class', langClass)
|
||||
code.innerHTML = att.value
|
||||
e.replaceWith(code)
|
||||
}
|
||||
})
|
||||
article.content = article.dom.outerHTML
|
||||
const codeBlocks = article.dom.querySelectorAll('code')
|
||||
if (codeBlocks.length > 0) {
|
||||
codeBlocks.forEach((e) => {
|
||||
console.log(e.textContent)
|
||||
if (e.textContent) {
|
||||
const att = hljs.highlightAuto(e.textContent)
|
||||
const code = window.document.createElement('code')
|
||||
const langClass =
|
||||
`hljs language-${att.language}` +
|
||||
(att.second_best?.language
|
||||
? ` language-${att.second_best?.language}`
|
||||
: '')
|
||||
code.setAttribute('class', langClass)
|
||||
code.innerHTML = att.value
|
||||
e.replaceWith(code)
|
||||
}
|
||||
})
|
||||
article.content = article.dom.outerHTML
|
||||
}
|
||||
}
|
||||
|
||||
const newWindow = new JSDOM('').window
|
||||
@ -278,16 +281,18 @@ export const parsePreparedContent = async (
|
||||
DOMPurify.addHook('uponSanitizeElement', domPurifySanitizeHook)
|
||||
const clean = DOMPurify.sanitize(article?.content || '', DOM_PURIFY_CONFIG)
|
||||
|
||||
const jsonLdLinkMetadata = await getJSONLdLinkMetadata(window.document)
|
||||
logRecord.JSONLdParsed = jsonLdLinkMetadata
|
||||
const jsonLdLinkMetadata = (async () => {
|
||||
return getJSONLdLinkMetadata(window.document)
|
||||
})()
|
||||
|
||||
Object.assign(article, {
|
||||
content: clean,
|
||||
title: article?.title || jsonLdLinkMetadata.title,
|
||||
previewImage: article?.previewImage || jsonLdLinkMetadata.previewImage,
|
||||
siteName: article?.siteName || jsonLdLinkMetadata.siteName,
|
||||
title: article?.title || (await jsonLdLinkMetadata).title,
|
||||
previewImage:
|
||||
article?.previewImage || (await jsonLdLinkMetadata).previewImage,
|
||||
siteName: article?.siteName || (await jsonLdLinkMetadata).siteName,
|
||||
siteIcon: article?.siteIcon,
|
||||
byline: article?.byline || jsonLdLinkMetadata.byline,
|
||||
byline: article?.byline || (await jsonLdLinkMetadata).byline,
|
||||
})
|
||||
logRecord.parseSuccess = true
|
||||
} catch (error) {
|
||||
|
||||
1
packages/content-fetch/.gitignore
vendored
Normal file
1
packages/content-fetch/.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
app.yaml
|
||||
41
packages/content-fetch/Dockerfile
Normal file
41
packages/content-fetch/Dockerfile
Normal file
@ -0,0 +1,41 @@
|
||||
FROM node:14.18-alpine
|
||||
|
||||
# Installs latest Chromium (92) package.
|
||||
RUN apk add --no-cache \
|
||||
chromium \
|
||||
nss \
|
||||
freetype \
|
||||
harfbuzz \
|
||||
ca-certificates \
|
||||
ttf-freefont \
|
||||
nodejs \
|
||||
yarn
|
||||
|
||||
# Tell Puppeteer to skip installing Chrome. We'll be using the installed package.
|
||||
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \
|
||||
PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium-browser
|
||||
|
||||
# Puppeteer v10.0.0 works with Chromium 92.
|
||||
RUN yarn add puppeteer@10.0.0
|
||||
|
||||
# Add user so we don't need --no-sandbox.
|
||||
RUN addgroup -S pptruser && adduser -S -g pptruser pptruser \
|
||||
&& mkdir -p /home/pptruser/Downloads /app \
|
||||
&& chown -R pptruser:pptruser /home/pptruser \
|
||||
&& chown -R pptruser:pptruser /app
|
||||
|
||||
# Run everything after as non-privileged user.
|
||||
WORKDIR /app
|
||||
|
||||
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
|
||||
ENV CHROMIUM_PATH /usr/bin/chromium-browser
|
||||
ENV LAUNCH_HEADLESS=true
|
||||
|
||||
COPY . /app/
|
||||
WORKDIR app
|
||||
|
||||
RUN yarn install --pure-lockfile
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
ENTRYPOINT ["yarn", "start"]
|
||||
26
packages/content-fetch/app.js
Normal file
26
packages/content-fetch/app.js
Normal file
@ -0,0 +1,26 @@
|
||||
|
||||
|
||||
const express = require('express');
|
||||
|
||||
const app = express();
|
||||
const fetchContent = require('./fetch-content');
|
||||
|
||||
app.use(express.json());
|
||||
app.use(express.urlencoded({ extended: true }));
|
||||
|
||||
|
||||
app.get('/', (req, res) => {
|
||||
fetchContent(req, res)
|
||||
});
|
||||
|
||||
app.post('/', (req, res) => {
|
||||
fetchContent(req, res)
|
||||
});
|
||||
|
||||
const PORT = parseInt(process.env.PORT) || 8080;
|
||||
app.listen(PORT, () => {
|
||||
console.log(`App listening on port ${PORT}`);
|
||||
console.log('Press Ctrl+C to quit.');
|
||||
});
|
||||
|
||||
module.exports = app;
|
||||
39
packages/content-fetch/apple-news-handler.js
Normal file
39
packages/content-fetch/apple-news-handler.js
Normal file
@ -0,0 +1,39 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const Url = require('url');
|
||||
const axios = require('axios');
|
||||
const { promisify } = require('util');
|
||||
const { DateTime } = require('luxon');
|
||||
const os = require('os');
|
||||
const jsdom = require("jsdom");
|
||||
const { Cipher } = require('crypto');
|
||||
const { JSDOM } = jsdom;
|
||||
|
||||
|
||||
|
||||
exports.appleNewsHandler = {
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
const u = new URL(url);
|
||||
if (u.hostname === 'apple.news') {
|
||||
return true;
|
||||
}
|
||||
return false
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
const MOBILE_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'
|
||||
const response = await axios.get(url, { headers: { 'User-Agent': MOBILE_USER_AGENT } } );
|
||||
const data = response.data;
|
||||
|
||||
const dom = new JSDOM(data);
|
||||
|
||||
// make sure its a valid URL by wrapping in new URL
|
||||
const u = new URL(dom.window.document.querySelector('span.click-here').parentNode.href);
|
||||
return { url: u.href };
|
||||
}
|
||||
}
|
||||
40
packages/content-fetch/bloomberg-handler.js
Normal file
40
packages/content-fetch/bloomberg-handler.js
Normal file
@ -0,0 +1,40 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const os = require('os');
|
||||
const jsdom = require("jsdom");
|
||||
const { JSDOM } = jsdom;
|
||||
|
||||
exports.bloombergHandler = {
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
const BLOOMBERG_URL_MATCH =
|
||||
/https?:\/\/(www\.)?bloomberg.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)/
|
||||
return BLOOMBERG_URL_MATCH.test(url.toString())
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
console.log('prehandling bloomberg url', url)
|
||||
|
||||
try {
|
||||
const response = await axios.get('https://app.scrapingbee.com/api/v1', {
|
||||
params: {
|
||||
'api_key': process.env.SCRAPINGBEE_API_KEY,
|
||||
'url': url,
|
||||
'return_page_source': true,
|
||||
'block_ads': true,
|
||||
'block_resources': false,
|
||||
}
|
||||
})
|
||||
const dom = new JSDOM(response.data);
|
||||
return { title: dom.window.document.title, content: dom.window.document.querySelector('body').innerHTML, url: url }
|
||||
} catch (error) {
|
||||
console.error('error prehandling bloomberg url', error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
}
|
||||
36
packages/content-fetch/derstandard-handler.js
Normal file
36
packages/content-fetch/derstandard-handler.js
Normal file
@ -0,0 +1,36 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const jsdom = require("jsdom");
|
||||
const { JSDOM } = jsdom;
|
||||
|
||||
exports.derstandardHandler = {
|
||||
shouldPrehandle: (url, env) => {
|
||||
const u = new URL(url);
|
||||
return u.hostname === 'www.derstandard.at';
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
const response = await axios.get(url, {
|
||||
// set cookie to give consent to get the article
|
||||
headers: {
|
||||
'cookie': `DSGVO_ZUSAGE_V1=true; consentUUID=2bacb9c1-1e80-4be0-9f7b-ee987cf4e7b0_6`
|
||||
},
|
||||
});
|
||||
const content = response.data;
|
||||
|
||||
var title = undefined
|
||||
const dom = new JSDOM(content)
|
||||
const titleElement = dom.window.document.querySelector('.article-title')
|
||||
if (!titleElement) {
|
||||
title = titleElement.textContent
|
||||
titleElement.remove()
|
||||
}
|
||||
|
||||
return { content: dom.window.document.body.outerHTML, title: title };
|
||||
}
|
||||
}
|
||||
587
packages/content-fetch/fetch-content.js
Normal file
587
packages/content-fetch/fetch-content.js
Normal file
@ -0,0 +1,587 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const Url = require('url');
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
const chromium = require('chrome-aws-lambda');
|
||||
const axios = require('axios');
|
||||
const jwt = require('jsonwebtoken');
|
||||
const { promisify } = require('util');
|
||||
const signToken = promisify(jwt.sign);
|
||||
const { appleNewsHandler } = require('./apple-news-handler');
|
||||
const { twitterHandler } = require('./twitter-handler');
|
||||
const { youtubeHandler } = require('./youtube-handler');
|
||||
const { tDotCoHandler } = require('./t-dot-co-handler');
|
||||
const { pdfHandler } = require('./pdf-handler');
|
||||
const { mediumHandler } = require('./medium-handler');
|
||||
const { derstandardHandler } = require('./derstandard-handler');
|
||||
const { imageHandler } = require('./image-handler');
|
||||
|
||||
const MOBILE_USER_AGENT = 'Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.62 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
|
||||
const DESKTOP_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4372.0 Safari/537.36'
|
||||
const BOT_DESKTOP_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4372.0 Safari/537.36'
|
||||
const NON_BOT_DESKTOP_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_6_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4372.0 Safari/537.36'
|
||||
const NON_BOT_HOSTS = ['bloomberg.com', 'forbes.com']
|
||||
|
||||
const ALLOWED_CONTENT_TYPES = ['text/html', 'application/octet-stream', 'text/plain', 'application/pdf'];
|
||||
|
||||
// Add stealth plugin to hide puppeteer usage
|
||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||
puppeteer.use(StealthPlugin());
|
||||
// Add adblocker plugin to block ads and trackers
|
||||
const AdblockerPlugin = require('puppeteer-extra-plugin-adblocker');
|
||||
puppeteer.use(AdblockerPlugin({ blockTrackers: true }));
|
||||
|
||||
|
||||
const userAgentForUrl = (url) => {
|
||||
try {
|
||||
const u = new URL(url);
|
||||
for (const host of NON_BOT_HOSTS) {
|
||||
if (u.hostname.endsWith(host)) {
|
||||
return NON_BOT_DESKTOP_USER_AGENT;
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.log('error getting user agent for url', url, e)
|
||||
}
|
||||
return DESKTOP_USER_AGENT
|
||||
};
|
||||
|
||||
// launch Puppeteer
|
||||
const getBrowserPromise = (async () => {
|
||||
return puppeteer.launch({
|
||||
args: chromium.args,
|
||||
defaultViewport: { height: 1080, width: 1920 },
|
||||
executablePath: process.env.CHROMIUM_PATH ,
|
||||
headless: true, // process.env.LAUNCH_HEADLESS ? true : false,
|
||||
timeout: 0,
|
||||
userDataDir: '/tmp/puppeteer',
|
||||
});
|
||||
})();
|
||||
|
||||
let logRecord, functionStartTime;
|
||||
|
||||
const uploadToSignedUrl = async ({ id, uploadSignedUrl }, contentType, contentObjUrl) => {
|
||||
const stream = await axios.get(contentObjUrl, { responseType: 'stream' });
|
||||
return await axios.put(uploadSignedUrl, stream.data, {
|
||||
headers: {
|
||||
'Content-Type': contentType,
|
||||
},
|
||||
maxBodyLength: 1000000000,
|
||||
maxContentLength: 100000000,
|
||||
})
|
||||
};
|
||||
|
||||
const getUploadIdAndSignedUrl = async (userId, url) => {
|
||||
const auth = await signToken({ uid: userId }, process.env.JWT_SECRET);
|
||||
const data = JSON.stringify({
|
||||
query: `mutation UploadFileRequest($input: UploadFileRequestInput!) {
|
||||
uploadFileRequest(input:$input) {
|
||||
... on UploadFileRequestError {
|
||||
errorCodes
|
||||
}
|
||||
... on UploadFileRequestSuccess {
|
||||
id
|
||||
uploadSignedUrl
|
||||
}
|
||||
}
|
||||
}`,
|
||||
variables: {
|
||||
input: {
|
||||
url,
|
||||
contentType: 'application/pdf',
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const response = await axios.post(`${process.env.REST_BACKEND_ENDPOINT}/graphql`, data,
|
||||
{
|
||||
headers: {
|
||||
Cookie: `auth=${auth};`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
});
|
||||
return response.data.data.uploadFileRequest;
|
||||
};
|
||||
|
||||
const uploadPdf = async (url, userId) => {
|
||||
validateUrlString(url);
|
||||
|
||||
const uploadResult = await getUploadIdAndSignedUrl(userId, url);
|
||||
await uploadToSignedUrl(uploadResult, 'application/pdf', url);
|
||||
return uploadResult.id;
|
||||
};
|
||||
|
||||
const sendCreateArticleMutation = async (userId, input) => {
|
||||
const data = JSON.stringify({
|
||||
query: `mutation CreateArticle ($input: CreateArticleInput!){
|
||||
createArticle(input:$input){
|
||||
... on CreateArticleSuccess{
|
||||
createdArticle{
|
||||
id
|
||||
}
|
||||
}
|
||||
... on CreateArticleError{
|
||||
errorCodes
|
||||
}
|
||||
}
|
||||
}`,
|
||||
variables: {
|
||||
input: Object.assign({}, input , { source: 'puppeteer-parse' }),
|
||||
},
|
||||
});
|
||||
|
||||
const auth = await signToken({ uid: userId }, process.env.JWT_SECRET);
|
||||
const response = await axios.post(`${process.env.REST_BACKEND_ENDPOINT}/graphql`, data,
|
||||
{
|
||||
headers: {
|
||||
Cookie: `auth=${auth};`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
});
|
||||
console.log('response', response);
|
||||
return response.data.data.createArticle;
|
||||
};
|
||||
|
||||
const saveUploadedPdf = async (userId, url, uploadFileId, articleSavingRequestId) => {
|
||||
return sendCreateArticleMutation(userId, {
|
||||
url: encodeURI(url),
|
||||
articleSavingRequestId,
|
||||
uploadFileId: uploadFileId,
|
||||
},
|
||||
);
|
||||
};
|
||||
|
||||
const handlers = {
|
||||
'pdf': pdfHandler,
|
||||
'apple-news': appleNewsHandler,
|
||||
'twitter': twitterHandler,
|
||||
'youtube': youtubeHandler,
|
||||
't-dot-co': tDotCoHandler,
|
||||
'medium': mediumHandler,
|
||||
'derstandard': derstandardHandler,
|
||||
'image': imageHandler,
|
||||
};
|
||||
|
||||
|
||||
async function fetchContent(req, res) {
|
||||
functionStartTime = Date.now();
|
||||
|
||||
let url = getUrl(req);
|
||||
const userId = (req.query ? req.query.userId : undefined) || (req.body ? req.body.userId : undefined);
|
||||
const articleSavingRequestId = (req.query ? req.query.saveRequestId : undefined) || (req.body ? req.body.saveRequestId : undefined);
|
||||
|
||||
console.log('user id', userId, 'url', url)
|
||||
|
||||
logRecord = {
|
||||
url,
|
||||
userId,
|
||||
articleSavingRequestId,
|
||||
labels: {
|
||||
source: 'parseContent',
|
||||
},
|
||||
};
|
||||
|
||||
console.log(`Article parsing request`, logRecord);
|
||||
|
||||
if (!url) {
|
||||
logRecord.urlIsInvalid = true;
|
||||
console.log(`Valid URL to parse not specified`, logRecord);
|
||||
return res.sendStatus(400);
|
||||
}
|
||||
|
||||
// if (!userId || !articleSavingRequestId) {
|
||||
// Object.assign(logRecord, { invalidParams: true, body: req.body, query: req.query });
|
||||
// console.log(`Invalid parameters`, logRecord);
|
||||
// return res.sendStatus(400);
|
||||
// }
|
||||
|
||||
// Before we run the regular handlers we check to see if we need tp
|
||||
// pre-resolve the URL. TODO: This should probably happen recursively,
|
||||
// so URLs can be pre-resolved, handled, pre-resolved, handled, etc.
|
||||
for (const [key, handler] of Object.entries(handlers)) {
|
||||
if (handler.shouldResolve && handler.shouldResolve(url)) {
|
||||
try {
|
||||
url = await handler.resolve(url);
|
||||
validateUrlString(url);
|
||||
} catch (err) {
|
||||
console.log('error resolving url with handler', key, err);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Before we fetch the page we check the handlers, to see if they want
|
||||
// to perform a prefetch action that can modify our requests.
|
||||
// enumerate the handlers and see if any of them want to handle the request
|
||||
const handler = Object.keys(handlers).find(key => {
|
||||
try {
|
||||
return handlers[key].shouldPrehandle(url)
|
||||
} catch (e) {
|
||||
console.log('error with handler: ', key, e);
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
var title = undefined;
|
||||
var content = undefined;
|
||||
var contentType = undefined;
|
||||
|
||||
if (handler) {
|
||||
try {
|
||||
// The only handler we have now can modify the URL, but in the
|
||||
// future maybe we let it modify content. In that case
|
||||
// we might exit the request early.
|
||||
console.log('pre-handling url with handler: ', handler);
|
||||
|
||||
const result = await handlers[handler].prehandle(url);
|
||||
if (result && result.url) {
|
||||
url = result.url
|
||||
validateUrlString(url);
|
||||
}
|
||||
if (result && result.title) { title = result.title }
|
||||
if (result && result.content) { content = result.content }
|
||||
if (result && result.contentType) { contentType = result.contentType }
|
||||
} catch (e) {
|
||||
console.log('error with handler: ', handler, e);
|
||||
}
|
||||
}
|
||||
|
||||
var context, page, finalUrl;
|
||||
if ((!content || !title) && contentType !== 'application/pdf') {
|
||||
const result = await retrievePage(url)
|
||||
if (result && result.context) { context = result.context }
|
||||
if (result && result.page) { page = result.page }
|
||||
if (result && result.finalUrl) { finalUrl = result.finalUrl }
|
||||
if (result && result.contentType) { contentType = result.contentType }
|
||||
} else {
|
||||
finalUrl = url
|
||||
}
|
||||
|
||||
try {
|
||||
if (contentType === 'application/pdf') {
|
||||
const uploadedFileId = await uploadPdf(finalUrl, userId);
|
||||
const l = await saveUploadedPdf(userId, finalUrl, uploadedFileId, articleSavingRequestId);
|
||||
} else {
|
||||
if (!content || !title) {
|
||||
const result = await retrieveHtml(page);
|
||||
title = result.title;
|
||||
content = result.domContent;
|
||||
} else {
|
||||
console.log('using prefetched content and title');
|
||||
console.log(content);
|
||||
}
|
||||
|
||||
logRecord.fetchContentTime = Date.now() - functionStartTime;
|
||||
|
||||
const apiResponse = await sendCreateArticleMutation(userId, {
|
||||
url: finalUrl,
|
||||
articleSavingRequestId,
|
||||
preparedDocument: {
|
||||
document: content,
|
||||
pageInfo: {
|
||||
title,
|
||||
canonicalUrl: finalUrl,
|
||||
},
|
||||
},
|
||||
skipParsing: !content,
|
||||
});
|
||||
|
||||
logRecord.totalTime = Date.now() - functionStartTime;
|
||||
logRecord.result = apiResponse.createArticle;
|
||||
console.log(`parse-page`, logRecord);
|
||||
|
||||
// return res.send({
|
||||
// url: finalUrl,
|
||||
// articleSavingRequestId,
|
||||
// preparedDocument: {
|
||||
// document: content,
|
||||
// pageInfo: {
|
||||
// title,
|
||||
// canonicalUrl: finalUrl,
|
||||
// },
|
||||
// },
|
||||
// skipParsing: !content,
|
||||
// timeTaken: Date.now() - functionStartTime,
|
||||
// })
|
||||
}
|
||||
} catch (e) {
|
||||
console.log('error', e)
|
||||
logRecord.error = e.message;
|
||||
console.log(`Error while retrieving page`, logRecord);
|
||||
return res.sendStatus(503);
|
||||
} finally {
|
||||
if (context) {
|
||||
await context.close();
|
||||
}
|
||||
}
|
||||
|
||||
return res.sendStatus(200);
|
||||
}
|
||||
|
||||
function validateUrlString(url) {
|
||||
const u = new URL(url);
|
||||
// Make sure the URL is http or https
|
||||
if (u.protocol !== 'http:' && u.protocol !== 'https:') {
|
||||
throw new Error('Invalid URL protocol check failed')
|
||||
}
|
||||
// Make sure the domain is not localhost
|
||||
if (u.hostname === 'localhost' || u.hostname === '0.0.0.0') {
|
||||
throw new Error('Invalid URL is localhost')
|
||||
}
|
||||
// Make sure the domain is not a private IP
|
||||
if (/^(10|172\.16|192\.168)\..*/.test(u.hostname)) {
|
||||
throw new Error('Invalid URL is private ip')
|
||||
}
|
||||
}
|
||||
|
||||
function getUrl(req) {
|
||||
console.log('body', req.body)
|
||||
const urlStr = (req.query ? req.query.url : undefined) || (req.body ? req.body.url : undefined);
|
||||
if (!urlStr) {
|
||||
throw new Error('No URL specified');
|
||||
}
|
||||
|
||||
validateUrlString(urlStr);
|
||||
|
||||
const parsed = Url.parse(urlStr);
|
||||
return parsed.href;
|
||||
}
|
||||
|
||||
async function retrievePage(url) {
|
||||
validateUrlString(url);
|
||||
|
||||
const browser = await getBrowserPromise;
|
||||
logRecord.timing = { ...logRecord.timing, browserOpened: Date.now() - functionStartTime };
|
||||
|
||||
const context = await browser.createIncognitoBrowserContext();
|
||||
const page = await context.newPage();
|
||||
await page.setUserAgent(userAgentForUrl(url));
|
||||
|
||||
const client = await page.target().createCDPSession();
|
||||
|
||||
// intercept request when response headers was received
|
||||
await client.send('Network.setRequestInterception', {
|
||||
patterns: [
|
||||
{
|
||||
urlPattern: '*',
|
||||
resourceType: 'Document',
|
||||
interceptionStage: 'HeadersReceived',
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
const path = require('path');
|
||||
const download_path = path.resolve('./download_dir/');
|
||||
|
||||
await page._client.send('Page.setDownloadBehavior', {
|
||||
behavior: 'allow',
|
||||
userDataDir: './',
|
||||
downloadPath: download_path,
|
||||
})
|
||||
|
||||
client.on('Network.requestIntercepted', async e => {
|
||||
const headers = e.responseHeaders || {};
|
||||
|
||||
const [contentType] = (headers['content-type'] || headers['Content-Type'] || '')
|
||||
.toLowerCase()
|
||||
.split(';');
|
||||
const obj = { interceptionId: e.interceptionId };
|
||||
|
||||
if (e.responseStatusCode >= 200 && e.responseStatusCode < 300) {
|
||||
// We only check content-type on success responses
|
||||
// as it doesn't matter what the content type is for things
|
||||
// like redirects
|
||||
if (contentType && !ALLOWED_CONTENT_TYPES.includes(contentType)) {
|
||||
obj['errorReason'] = 'BlockedByClient';
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await client.send('Network.continueInterceptedRequest', obj);
|
||||
// eslint-disable-next-line no-empty
|
||||
} catch {}
|
||||
});
|
||||
|
||||
/*
|
||||
* Disallow MathJax from running in Puppeteer and modifying the document,
|
||||
* we shall instead run it in our frontend application to transform any
|
||||
* mathjax content when present.
|
||||
*/
|
||||
await page.setRequestInterception(true);
|
||||
let requestCount = 0;
|
||||
// page.on('request', request => {
|
||||
// if (request.resourceType() === 'font' || request.resourceType() === 'image') {
|
||||
// request.abort();
|
||||
// return;
|
||||
// }
|
||||
// if (requestCount++ > 100) {
|
||||
// request.abort();
|
||||
// return;
|
||||
// }
|
||||
// if (
|
||||
// request.resourceType() === 'script' &&
|
||||
// request.url().toLowerCase().indexOf('mathjax') > -1
|
||||
// ) {
|
||||
// request.abort();
|
||||
// } else {
|
||||
// request.continue();
|
||||
// }
|
||||
// });
|
||||
|
||||
// Puppeteer fails during download of PDf files,
|
||||
// so record the failure and use those items
|
||||
let lastPdfUrl = undefined;
|
||||
page.on('response', response => {
|
||||
if (response.headers()['content-type'] === 'application/pdf') {
|
||||
lastPdfUrl = response.url();
|
||||
}
|
||||
});
|
||||
|
||||
try {
|
||||
const response = await page.goto(url, { waitUntil: ['networkidle2'] });
|
||||
const finalUrl = response.url();
|
||||
const contentType = response.headers()['content-type'];
|
||||
|
||||
logRecord.finalUrl = response.url();
|
||||
logRecord.contentType = response.headers()['content-type'];
|
||||
|
||||
return { context, page, response, finalUrl: finalUrl, contentType: contentType };
|
||||
} catch (error) {
|
||||
if (lastPdfUrl) {
|
||||
return { context, page, finalUrl: lastPdfUrl, contentType: 'application/pdf' };
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async function retrieveHtml(page) {
|
||||
let domContent = '', title;
|
||||
try {
|
||||
title = await page.title();
|
||||
logRecord.title = title;
|
||||
|
||||
const pageScrollingStart = Date.now();
|
||||
/* scroll with a 5 second timeout */
|
||||
await Promise.race([
|
||||
new Promise(resolve => {
|
||||
(async function () {
|
||||
try {
|
||||
await page.evaluate(`(async () => {
|
||||
/* credit: https://github.com/puppeteer/puppeteer/issues/305 */
|
||||
return new Promise((resolve, reject) => {
|
||||
let scrollHeight = document.body.scrollHeight;
|
||||
let totalHeight = 0;
|
||||
let distance = 500;
|
||||
let timer = setInterval(() => {
|
||||
window.scrollBy(0, distance);
|
||||
totalHeight += distance;
|
||||
if(totalHeight >= scrollHeight){
|
||||
clearInterval(timer);
|
||||
resolve(true);
|
||||
}
|
||||
}, 10);
|
||||
});
|
||||
})()`);
|
||||
} catch (e) {
|
||||
logRecord.scrollError = true;
|
||||
} finally {
|
||||
resolve(true);
|
||||
}
|
||||
})();
|
||||
}),
|
||||
page.waitForTimeout(1000), //5 second timeout
|
||||
]);
|
||||
logRecord.timing = { ...logRecord.timing, pageScrolled: Date.now() - pageScrollingStart };
|
||||
|
||||
const iframes = {};
|
||||
const urls = [];
|
||||
const framesPromises = [];
|
||||
const allowedUrls = /instagram\.com/gi;
|
||||
|
||||
for (const frame of page.mainFrame().childFrames()) {
|
||||
if (frame.url() && allowedUrls.test(frame.url())) {
|
||||
urls.push(frame.url());
|
||||
framesPromises.push(frame.evaluate(el => el.innerHTML, await frame.$('body')));
|
||||
}
|
||||
}
|
||||
|
||||
(await Promise.all(framesPromises)).forEach((frame, index) => (iframes[urls[index]] = frame));
|
||||
|
||||
const domContentCapturingStart = Date.now();
|
||||
// get document body with all hidden elements removed
|
||||
domContent = await page.evaluate(iframes => {
|
||||
const BI_SRC_REGEXP = /url\("(.+?)"\)/gi;
|
||||
|
||||
Array.from(document.body.getElementsByTagName('*')).forEach(el => {
|
||||
const style = window.getComputedStyle(el);
|
||||
|
||||
// Removing blurred images since they are mostly the copies of lazy loaded ones
|
||||
if (['img', 'image'].includes(el.tagName.toLowerCase())) {
|
||||
const filter = style.getPropertyValue('filter');
|
||||
if (filter && filter.startsWith('blur')) {
|
||||
el.parentNode && el.parentNode.removeChild(el);
|
||||
}
|
||||
}
|
||||
|
||||
// convert all nodes with background image to img nodes
|
||||
if (!['', 'none'].includes(style.getPropertyValue('background-image'))) {
|
||||
const filter = style.getPropertyValue('filter');
|
||||
// avoiding image nodes with a blur effect creation
|
||||
if (filter && filter.startsWith('blur')) {
|
||||
el && el.parentNode && el.parentNode.removeChild(el);
|
||||
} else {
|
||||
const matchedSRC = BI_SRC_REGEXP.exec(style.getPropertyValue('background-image'));
|
||||
// Using "g" flag with a regex we have to manually break down lastIndex to zero after every usage
|
||||
// More details here: https://stackoverflow.com/questions/1520800/why-does-a-regexp-with-global-flag-give-wrong-results
|
||||
BI_SRC_REGEXP.lastIndex = 0;
|
||||
|
||||
if (matchedSRC && matchedSRC[1] && !el.src) {
|
||||
// Replacing element only of there are no content inside, b/c might remove important div with content.
|
||||
// Article example: http://www.josiahzayner.com/2017/01/genetic-designer-part-i.html
|
||||
// DIV with class "content-inner" has `url("https://resources.blogblog.com/blogblog/data/1kt/travel/bg_container.png")` background image.
|
||||
if (el.innerHTML.length < 25) {
|
||||
const img = document.createElement('img');
|
||||
img.src = matchedSRC[1];
|
||||
el && el.parentNode && el.parentNode.removeChild(el);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (el.tagName === 'IFRAME') {
|
||||
if (iframes[el.src]) {
|
||||
const newNode = document.createElement('div');
|
||||
newNode.className = 'omnivore-instagram-embed';
|
||||
newNode.innerHTML = iframes[el.src];
|
||||
el && el.parentNode && el.parentNode.replaceChild(newNode, el);
|
||||
}
|
||||
}
|
||||
});
|
||||
return document.documentElement.innerHTML;
|
||||
}, iframes);
|
||||
logRecord.puppeteerSuccess = true;
|
||||
logRecord.timing = {
|
||||
...logRecord.timing,
|
||||
contenCaptured: Date.now() - domContentCapturingStart,
|
||||
};
|
||||
|
||||
// [END puppeteer-block]
|
||||
} catch (e) {
|
||||
if (e.message.startsWith('net::ERR_BLOCKED_BY_CLIENT at ')) {
|
||||
logRecord.blockedByClient = true;
|
||||
} else {
|
||||
logRecord.puppeteerSuccess = false;
|
||||
logRecord.puppeteerError = {
|
||||
message: e.message,
|
||||
stack: e.stack,
|
||||
};
|
||||
}
|
||||
}
|
||||
return { domContent, title };
|
||||
}
|
||||
|
||||
module.exports = fetchContent;
|
||||
34
packages/content-fetch/image-handler.js
Normal file
34
packages/content-fetch/image-handler.js
Normal file
@ -0,0 +1,34 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
|
||||
|
||||
exports.imageHandler = {
|
||||
shouldPrehandle: (url, env) => {
|
||||
const IMAGE_URL_PATTERN =
|
||||
/(https?:\/\/.*\.(?:jpg|jpeg|png|webp))/i
|
||||
return IMAGE_URL_PATTERN.test(url.toString())
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
const title = url.toString().split('/').pop();
|
||||
const content = `
|
||||
<html>
|
||||
<head>
|
||||
<title>${title}</title>
|
||||
<meta property="og:image" content="${url}" />
|
||||
<meta property="og:title" content="${title}" />
|
||||
</head>
|
||||
<body>
|
||||
<div>
|
||||
<img src="${url}" alt="${title}">
|
||||
</div>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
return { title, content };
|
||||
}
|
||||
}
|
||||
33
packages/content-fetch/medium-handler.js
Normal file
33
packages/content-fetch/medium-handler.js
Normal file
@ -0,0 +1,33 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const os = require('os');
|
||||
const jsdom = require("jsdom");
|
||||
const { JSDOM } = jsdom;
|
||||
|
||||
exports.mediumHandler = {
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
const MEDIUM_URL_MATCH =
|
||||
/https?:\/\/(www\.)?medium.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)/
|
||||
const res = MEDIUM_URL_MATCH.test(url.toString())
|
||||
return res
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
console.log('prehandling medium url', url)
|
||||
|
||||
try {
|
||||
const res = new URL('https://example.org:81/foo');
|
||||
myURL.searchParams.delete('source');
|
||||
return { url: res }
|
||||
} catch (error) {
|
||||
console.error('error prehandling bloomberg url', error)
|
||||
throw error
|
||||
}
|
||||
}
|
||||
}
|
||||
25
packages/content-fetch/package.json
Normal file
25
packages/content-fetch/package.json
Normal file
@ -0,0 +1,25 @@
|
||||
{
|
||||
"name": "@omnivore/content-fetch",
|
||||
"version": "1.0.0",
|
||||
"description": "Service that fetches page content from a URL",
|
||||
"main": "index.js",
|
||||
"dependencies": {
|
||||
"@cliqz/adblocker-puppeteer": "^1.23.7",
|
||||
"ad-block-js": "^0.0.2",
|
||||
"axios": "^0.26.0",
|
||||
"dotenv": "^8.2.0",
|
||||
"chrome-aws-lambda": "^10.1.0",
|
||||
"express": "^4.17.1",
|
||||
"jsdom": "^19.0.0",
|
||||
"jsonwebtoken": "^8.5.1",
|
||||
"luxon": "^2.3.1",
|
||||
"puppeteer-core": "^13.7.0",
|
||||
"puppeteer-extra": "^3.2.3",
|
||||
"puppeteer-extra-plugin-adblocker": "^2.12.0",
|
||||
"puppeteer-extra-plugin-stealth": "^2.9.0"
|
||||
},
|
||||
"scripts": {
|
||||
"start": "node app.js",
|
||||
"test": "yarn mocha"
|
||||
}
|
||||
}
|
||||
21
packages/content-fetch/pdf-handler.js
Normal file
21
packages/content-fetch/pdf-handler.js
Normal file
@ -0,0 +1,21 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const Url = require('url');
|
||||
|
||||
|
||||
exports.pdfHandler = {
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
const u = Url.parse(url)
|
||||
const path = u.path.replace(u.search, '')
|
||||
return path.endsWith('.pdf')
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
return { contentType: 'application/pdf' };
|
||||
}
|
||||
}
|
||||
32
packages/content-fetch/t-dot-co-handler.js
Normal file
32
packages/content-fetch/t-dot-co-handler.js
Normal file
@ -0,0 +1,32 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const Url = require('url');
|
||||
|
||||
|
||||
exports.tDotCoHandler = {
|
||||
|
||||
shouldResolve: function (url, env) {
|
||||
const T_DOT_CO_URL_MATCH = /^https:\/\/(?:www\.)?t\.co\/.*$/;
|
||||
console.log('should preresolve?', T_DOT_CO_URL_MATCH.test(url), url)
|
||||
return T_DOT_CO_URL_MATCH.test(url);
|
||||
},
|
||||
|
||||
resolve: async function(url, env) {
|
||||
return await axios.get(url, { maxRedirects: 0, validateStatus: null })
|
||||
.then(res => {
|
||||
return Url.parse(res.headers.location).href;
|
||||
}).catch((err) => {
|
||||
console.log('err with t.co url', err);
|
||||
return undefined;
|
||||
});
|
||||
},
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
return false
|
||||
},
|
||||
}
|
||||
9
packages/content-fetch/test/apple-news-handler.test.js
Normal file
9
packages/content-fetch/test/apple-news-handler.test.js
Normal file
@ -0,0 +1,9 @@
|
||||
const { expect } = require('chai')
|
||||
const { appleNewsHandler } = require('../apple-news-handler')
|
||||
|
||||
describe('open a simple web page', () => {
|
||||
it('should return a response', async () => {
|
||||
const response = await appleNewsHandler.prehandle('https://apple.news/AxjzaZaPvSn23b67LhXI5EQ')
|
||||
console.log('response', response)
|
||||
})
|
||||
})
|
||||
12
packages/content-fetch/test/youtube-handler.test.js
Normal file
12
packages/content-fetch/test/youtube-handler.test.js
Normal file
@ -0,0 +1,12 @@
|
||||
const { expect } = require('chai')
|
||||
const { youtubeHandler } = require('../youtube-handler')
|
||||
|
||||
describe('getVideoId', () => {
|
||||
it('should parse video id out of a URL', async () => {
|
||||
expect('BnSUk0je6oo').to.eq(youtubeHandler.getVideoId('https://www.youtube.com/watch?v=BnSUk0je6oo&t=269s'));
|
||||
expect('vFD2gu007dc').to.eq(youtubeHandler.getVideoId('https://www.youtube.com/watch?v=vFD2gu007dc&list=RDvFD2gu007dc&start_radio=1'));
|
||||
expect('vFD2gu007dc').to.eq(youtubeHandler.getVideoId('https://youtu.be/vFD2gu007dc'));
|
||||
expect('BMFVCnbRaV4').to.eq(youtubeHandler.getVideoId('https://youtube.com/watch?v=BMFVCnbRaV4&feature=share'));
|
||||
expect('cg9b4RC87LI').to.eq(youtubeHandler.getVideoId('https://youtu.be/cg9b4RC87LI?t=116'));
|
||||
})
|
||||
})
|
||||
170
packages/content-fetch/twitter-handler.js
Normal file
170
packages/content-fetch/twitter-handler.js
Normal file
@ -0,0 +1,170 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
const { DateTime } = require('luxon');
|
||||
|
||||
const TWITTER_BEARER_TOKEN = process.env.TWITTER_BEARER_TOKEN;
|
||||
const TWITTER_URL_MATCH = /twitter\.com\/(?:#!\/)?(\w+)\/status(?:es)?\/(\d+)(?:\/.*)?/
|
||||
|
||||
const embeddedTweet = async (url) => {
|
||||
|
||||
const BASE_ENDPOINT = 'https://publish.twitter.com/oembed'
|
||||
|
||||
const apiUrl = new URL(BASE_ENDPOINT)
|
||||
apiUrl.searchParams.append('url', url);
|
||||
apiUrl.searchParams.append('omit_script', true);
|
||||
apiUrl.searchParams.append('dnt', true);
|
||||
|
||||
return await axios.get(apiUrl.toString(), {
|
||||
headers: {
|
||||
Authorization: `Bearer ${TWITTER_BEARER_TOKEN}`,
|
||||
redirect: "follow",
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
const getTweetFields = () => {
|
||||
const TWEET_FIELDS =
|
||||
"&tweet.fields=attachments,author_id,conversation_id,created_at," +
|
||||
"entities,geo,in_reply_to_user_id,lang,possibly_sensitive,public_metrics,referenced_tweets," +
|
||||
"source,withheld";
|
||||
const EXPANSIONS = "&expansions=author_id,attachments.media_keys";
|
||||
const USER_FIELDS =
|
||||
"&user.fields=created_at,description,entities,location,pinned_tweet_id,profile_image_url,protected,public_metrics,url,verified,withheld";
|
||||
const MEDIA_FIELDS =
|
||||
"&media.fields=duration_ms,height,preview_image_url,url,media_key,public_metrics,width";
|
||||
|
||||
return `${TWEET_FIELDS}${EXPANSIONS}${USER_FIELDS}${MEDIA_FIELDS}`;
|
||||
}
|
||||
|
||||
const getTweetById = async (id) => {
|
||||
const BASE_ENDPOINT = "https://api.twitter.com/2/tweets/";
|
||||
const apiUrl = new URL(BASE_ENDPOINT + id + '?' + getTweetFields())
|
||||
|
||||
return await axios.get(apiUrl.toString(), {
|
||||
headers: {
|
||||
Authorization: `Bearer ${TWITTER_BEARER_TOKEN}`,
|
||||
redirect: "follow",
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
const getUserByUsername = async (username) => {
|
||||
const BASE_ENDPOINT = "https://api.twitter.com/2/users/by/username/";
|
||||
|
||||
const apiUrl = new URL(BASE_ENDPOINT + username)
|
||||
apiUrl.searchParams.append('user.fields', 'profile_image_url');
|
||||
|
||||
return await axios.get(apiUrl.toString(), {
|
||||
headers: {
|
||||
Authorization: `Bearer ${TWITTER_BEARER_TOKEN}`,
|
||||
redirect: "follow",
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
const titleForTweet = (tweet) => {
|
||||
return `${tweet.data.author_name} on Twitter`
|
||||
};
|
||||
|
||||
const titleForAuthor = (author) => {
|
||||
return `${author.name} on Twitter`
|
||||
};
|
||||
|
||||
const usernameFromStatusUrl = (url) => {
|
||||
const match = url.toString().match(TWITTER_URL_MATCH)
|
||||
return match[1]
|
||||
};
|
||||
|
||||
const tweetIdFromStatusUrl = (url) => {
|
||||
const match = url.toString().match(TWITTER_URL_MATCH)
|
||||
return match[2]
|
||||
};
|
||||
|
||||
const formatTimestamp = (timestamp) => {
|
||||
return DateTime.fromJSDate(new Date(timestamp)).toLocaleString(DateTime.DATETIME_FULL);
|
||||
};
|
||||
|
||||
exports.twitterHandler = {
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
return TWITTER_BEARER_TOKEN && TWITTER_URL_MATCH.test(url.toString())
|
||||
},
|
||||
|
||||
// version of the handler that uses the oembed API
|
||||
// This isn't great as it doesn't work well with our
|
||||
// readability API. But could potentially give a more consistent
|
||||
// look to the tweets
|
||||
// prehandle: async (url, env) => {
|
||||
// const oeTweet = await embeddedTweet(url)
|
||||
// const dom = new JSDOM(oeTweet.data.html);
|
||||
// const bq = dom.window.document.querySelector('blockquote')
|
||||
// console.log('blockquote:', bq);
|
||||
|
||||
// const title = titleForTweet(oeTweet)
|
||||
// return { title, content: '<div>' + bq.innerHTML + '</div>', url: oeTweet.data.url };
|
||||
// }
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
console.log('prehandling twitter url', url)
|
||||
|
||||
const tweetId = tweetIdFromStatusUrl(url)
|
||||
const tweetData = (await getTweetById(tweetId)).data;
|
||||
const authorId = tweetData.data.author_id;
|
||||
const author = tweetData.includes.users.filter(u => u.id = authorId)[0];
|
||||
const title = titleForAuthor(author)
|
||||
const authorImage = author.profile_image_url.replace('_normal', '_400x400')
|
||||
|
||||
let text = tweetData.data.text;
|
||||
if (tweetData.data.entities && tweetData.data.entities.urls) {
|
||||
for (let urlObj of tweetData.data.entities.urls) {
|
||||
text = text.replace(
|
||||
urlObj.url,
|
||||
`<a href="${urlObj.expanded_url}">${urlObj.display_url}</a>`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const front = `
|
||||
<div>
|
||||
<p>${text}</p>
|
||||
`
|
||||
|
||||
var includesHtml = '';
|
||||
if (tweetData.includes.media) {
|
||||
includesHtml = tweetData.includes.media.map(m => {
|
||||
const linkUrl = m.type == 'photo' ? m.url : url;
|
||||
const previewUrl = m.type == 'photo' ? m.url : m.preview_image_url;
|
||||
const mediaOpen = `<a class="media-link" href=${linkUrl}>
|
||||
<picture>
|
||||
<img class="tweet-img" src=${previewUrl} />
|
||||
</picture>
|
||||
</a>`
|
||||
return mediaOpen
|
||||
}).join('\n');
|
||||
}
|
||||
|
||||
const back = `
|
||||
— <a href="https://twitter.com/${author.username}">${author.username}</a> ${author.name} <a href="${url}">${formatTimestamp(tweetData.data.created_at)}</a>
|
||||
</div>
|
||||
`
|
||||
const content = `
|
||||
<head>
|
||||
<meta property="og:image" content="${authorImage}" />
|
||||
<meta property="og:image:secure_url" content="${authorImage}" />
|
||||
<meta property="og:title" content="${title}" />
|
||||
<meta property="og:description" content="${tweetData.data.text}" />
|
||||
</head>
|
||||
<body>
|
||||
${front}
|
||||
${includesHtml}
|
||||
${back}
|
||||
</body>`
|
||||
|
||||
return { content, url, title };
|
||||
}
|
||||
}
|
||||
68
packages/content-fetch/youtube-handler.js
Normal file
68
packages/content-fetch/youtube-handler.js
Normal file
@ -0,0 +1,68 @@
|
||||
/* eslint-disable no-undef */
|
||||
/* eslint-disable no-empty */
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
require('dotenv').config();
|
||||
const axios = require('axios');
|
||||
|
||||
const YOUTUBE_URL_MATCH =
|
||||
/^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w-]+\?v=|embed\/|v\/)?)([\w-]+)(\S+)?$/
|
||||
|
||||
exports.youtubeHandler = {
|
||||
|
||||
shouldPrehandle: (url, env) => {
|
||||
return YOUTUBE_URL_MATCH.test(url.toString())
|
||||
},
|
||||
|
||||
getVideoId: (url) => {
|
||||
const u = new URL(url);
|
||||
const videoId = u.searchParams['v']
|
||||
if (!videoId) {
|
||||
const match = url.toString().match(YOUTUBE_URL_MATCH)
|
||||
if (match === null || match.length < 6 || !match[5]) {
|
||||
return undefined
|
||||
}
|
||||
return match[5]
|
||||
}
|
||||
return videoId
|
||||
},
|
||||
|
||||
prehandle: async (url, env) => {
|
||||
const videoId = getVideoId(url)
|
||||
if (!videoId) {
|
||||
return {}
|
||||
}
|
||||
|
||||
const oembedUrl = `https://www.youtube.com/oembed?format=json&url=` + encodeURIComponent(`https://www.youtube.com/watch?v=${videoId}`)
|
||||
const oembed = (await axios.get(oembedUrl.toString())).data;
|
||||
const title = oembed.title;
|
||||
const ratio = oembed.width / oembed.height;
|
||||
const thumbnail = oembed.thumbnail_url;
|
||||
const height = 350;
|
||||
const width = height * ratio;
|
||||
|
||||
const content = `
|
||||
<html>
|
||||
<head><title>${title}</title>
|
||||
<meta property="og:image" content="${thumbnail}" />
|
||||
<meta property="og:image:secure_url" content="${thumbnail}" />
|
||||
<meta property="og:title" content="${title}" />
|
||||
<meta property="og:description" content="" />
|
||||
<meta property="og:article:author" content="${oembed.author_name}" />
|
||||
</head>
|
||||
<body>
|
||||
<center>
|
||||
<iframe width="${width}" height="${height}" src="https://www.youtube.com/embed/${videoId}" title="${title}" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>
|
||||
</center>
|
||||
<br />
|
||||
<a href="${url}">${title}</a>
|
||||
<div itemscope="" itemprop="author" itemtype="http://schema.org/Person">By <a href="${oembed.author_url}">${oembed.author_name}</a></div>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
console.log('got video id', videoId)
|
||||
|
||||
return { content, title: 'Youtube Content' };
|
||||
}
|
||||
}
|
||||
@ -6,7 +6,6 @@
|
||||
require('dotenv').config();
|
||||
const Url = require('url');
|
||||
const chromium = require('chrome-aws-lambda');
|
||||
const puppeteer = require('puppeteer-core');
|
||||
const axios = require('axios');
|
||||
const jwt = require('jsonwebtoken');
|
||||
const { promisify } = require('util');
|
||||
@ -25,6 +24,14 @@ const { pdfHandler } = require('./pdf-handler');
|
||||
const { mediumHandler } = require('./medium-handler');
|
||||
const { derstandardHandler } = require('./derstandard-handler');
|
||||
const { imageHandler } = require('./image-handler');
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
|
||||
// Add stealth plugin to hide puppeteer usage
|
||||
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
|
||||
puppeteer.use(StealthPlugin());
|
||||
// Add adblocker plugin to block ads and trackers
|
||||
const AdblockerPlugin = require('puppeteer-extra-plugin-adblocker');
|
||||
puppeteer.use(AdblockerPlugin({ blockTrackers: true }));
|
||||
|
||||
const storage = new Storage();
|
||||
const ALLOWED_ORIGINS = process.env.ALLOWED_ORIGINS ? process.env.ALLOWED_ORIGINS.split(',') : [];
|
||||
@ -126,6 +133,7 @@ const getBrowserPromise = (async () => {
|
||||
executablePath: process.env.CHROMIUM_PATH || (await chromium.executablePath),
|
||||
headless: process.env.LAUNCH_HEADLESS ? true : chromium.headless,
|
||||
timeout: 0,
|
||||
userDataDir: '/tmp/puppeteer',
|
||||
});
|
||||
})();
|
||||
|
||||
@ -355,6 +363,8 @@ exports.puppeteer = Sentry.GCPFunction.wrapHttpFunction(async (req, res) => {
|
||||
console.log(content);
|
||||
}
|
||||
|
||||
logRecord.timing.contentFetchTime = Date.now() - functionStartTime;
|
||||
|
||||
const apiResponse = await sendCreateArticleMutation(userId, {
|
||||
url: finalUrl,
|
||||
articleSavingRequestId,
|
||||
@ -368,6 +378,7 @@ exports.puppeteer = Sentry.GCPFunction.wrapHttpFunction(async (req, res) => {
|
||||
skipParsing: !content,
|
||||
});
|
||||
|
||||
logRecord.timing.totalTime = Date.now() - functionStartTime;
|
||||
logRecord.result = apiResponse.createArticle;
|
||||
logger.info(`parse-page`, logRecord);
|
||||
}
|
||||
@ -598,28 +609,6 @@ async function retrievePage(url) {
|
||||
} catch {}
|
||||
});
|
||||
|
||||
/*
|
||||
* Disallow MathJax from running in Puppeteer and modifying the document,
|
||||
* we shall instead run it in our frontend application to transform any
|
||||
* mathjax content when present.
|
||||
*/
|
||||
await page.setRequestInterception(true);
|
||||
let requestCount = 0;
|
||||
page.on('request', request => {
|
||||
if (requestCount++ > 100) {
|
||||
request.abort();
|
||||
return;
|
||||
}
|
||||
if (
|
||||
request.resourceType() === 'script' &&
|
||||
request.url().toLowerCase().indexOf('mathjax') > -1
|
||||
) {
|
||||
request.abort();
|
||||
} else {
|
||||
request.continue();
|
||||
}
|
||||
});
|
||||
|
||||
// Puppeteer fails during download of PDf files,
|
||||
// so record the failure and use those items
|
||||
let lastPdfUrl = undefined;
|
||||
|
||||
@ -13,7 +13,10 @@
|
||||
"jsdom": "^19.0.0",
|
||||
"jsonwebtoken": "^8.5.1",
|
||||
"luxon": "^2.3.1",
|
||||
"puppeteer-core": "^7.1.0",
|
||||
"puppeteer-core": "^13.7.0",
|
||||
"puppeteer-extra": "^3.2.3",
|
||||
"puppeteer-extra-plugin-adblocker": "^2.12.0",
|
||||
"puppeteer-extra-plugin-stealth": "^2.9.0",
|
||||
"winston": "^3.3.3"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
363
yarn.lock
363
yarn.lock
@ -2031,6 +2031,50 @@
|
||||
commander "^4.1.0"
|
||||
microtime "^3.0.0"
|
||||
|
||||
"@cliqz/adblocker-content@^1.22.6", "@cliqz/adblocker-content@^1.23.7":
|
||||
version "1.23.7"
|
||||
resolved "https://registry.yarnpkg.com/@cliqz/adblocker-content/-/adblocker-content-1.23.7.tgz#21a1035e479d8f4dc4e7ecc2500cf8a4149eaace"
|
||||
integrity sha512-tgCFcWhBty+WI3ObYBLDk56i1G7nScd5TBAOXlQf2EIbVJ+DC+G0YE91YHhQpzfy4ySI/wRGyEHrKzTRgmkuKQ==
|
||||
dependencies:
|
||||
"@cliqz/adblocker-extended-selectors" "^1.23.7"
|
||||
|
||||
"@cliqz/adblocker-extended-selectors@^1.23.7":
|
||||
version "1.23.7"
|
||||
resolved "https://registry.yarnpkg.com/@cliqz/adblocker-extended-selectors/-/adblocker-extended-selectors-1.23.7.tgz#4782829c97b7ed229789a14d4e39b2339825077d"
|
||||
integrity sha512-3y1eeHHZS3qvvv3dDZDjVVr6g03GeUBbDExCW/GOiHdo+qKYmM2K40EaLN+Upzc2ppuum9UKvzsHxG8Hs9R/kQ==
|
||||
|
||||
"@cliqz/adblocker-puppeteer@1.22.6":
|
||||
version "1.22.6"
|
||||
resolved "https://registry.yarnpkg.com/@cliqz/adblocker-puppeteer/-/adblocker-puppeteer-1.22.6.tgz#6b311c74ab8f144ab71f2c259b9e49fa76243e9b"
|
||||
integrity sha512-Yg+64gsBfG8NKIJTKRg+sgK8G32W/z4qNEoMGdGJc7mdKVCn+y93WklDMO3pCy64u9jqUVS/Rd7z/Z96dX3K8Q==
|
||||
dependencies:
|
||||
"@cliqz/adblocker" "^1.22.6"
|
||||
"@cliqz/adblocker-content" "^1.22.6"
|
||||
tldts-experimental "^5.6.21"
|
||||
|
||||
"@cliqz/adblocker-puppeteer@^1.23.7":
|
||||
version "1.23.7"
|
||||
resolved "https://registry.yarnpkg.com/@cliqz/adblocker-puppeteer/-/adblocker-puppeteer-1.23.7.tgz#18367ddefa603d9abf03a72e6e4ecb584430df1d"
|
||||
integrity sha512-n6xPyRHApAirFVFowK5ngxTOF8ilV/QTNxzHjhmEtJXxiLD55Q4UrwgH7mj22wDhSkOKWUjY6CeCaUdYYb9JVA==
|
||||
dependencies:
|
||||
"@cliqz/adblocker" "^1.23.7"
|
||||
"@cliqz/adblocker-content" "^1.23.7"
|
||||
tldts-experimental "^5.6.21"
|
||||
|
||||
"@cliqz/adblocker@^1.22.6", "@cliqz/adblocker@^1.23.7":
|
||||
version "1.23.7"
|
||||
resolved "https://registry.yarnpkg.com/@cliqz/adblocker/-/adblocker-1.23.7.tgz#d439fe8b62d2789c274b5e124cddb930de25f59a"
|
||||
integrity sha512-HUvC7CcmNbcIftcRhFeuQhHSpyNdOVdXazXDL0+avvWR0cxlI9zcC8yHMRKjxeY6nsiOXdyEFby+V5KgJvfwaQ==
|
||||
dependencies:
|
||||
"@cliqz/adblocker-content" "^1.23.7"
|
||||
"@cliqz/adblocker-extended-selectors" "^1.23.7"
|
||||
"@remusao/guess-url-type" "^1.1.2"
|
||||
"@remusao/small" "^1.1.2"
|
||||
"@remusao/smaz" "^1.7.1"
|
||||
"@types/chrome" "^0.0.180"
|
||||
"@types/firefox-webext-browser" "^94.0.0"
|
||||
tldts-experimental "^5.6.21"
|
||||
|
||||
"@cnakazawa/watch@^1.0.3":
|
||||
version "1.0.4"
|
||||
resolved "https://registry.yarnpkg.com/@cnakazawa/watch/-/watch-1.0.4.tgz#f864ae85004d0fcab6f50be9141c4da368d1656a"
|
||||
@ -4946,6 +4990,41 @@
|
||||
dependencies:
|
||||
"@babel/runtime" "^7.13.10"
|
||||
|
||||
"@remusao/guess-url-type@^1.1.2":
|
||||
version "1.2.1"
|
||||
resolved "https://registry.yarnpkg.com/@remusao/guess-url-type/-/guess-url-type-1.2.1.tgz#b3e7c32abdf98d0fb4f93cc67cad580b5fe4ba57"
|
||||
integrity sha512-rbOqre2jW8STjheOsOaQHLgYBaBZ9Owbdt8NO7WvNZftJlaG3y/K9oOkl8ZUpuFBisIhmBuMEW6c+YrQl5inRA==
|
||||
|
||||
"@remusao/small@^1.1.2":
|
||||
version "1.2.1"
|
||||
resolved "https://registry.yarnpkg.com/@remusao/small/-/small-1.2.1.tgz#63bfe4548832289f94ac868a0c305970c9a0e5f9"
|
||||
integrity sha512-7MjoGt0TJMVw1GPKgWq6SJPws1SLsUXQRa43Umht+nkyw2jnpy3WpiLNqGdwo5rHr5Wp9B2W/Pm5RQp656UJdw==
|
||||
|
||||
"@remusao/smaz-compress@^1.9.1":
|
||||
version "1.9.1"
|
||||
resolved "https://registry.yarnpkg.com/@remusao/smaz-compress/-/smaz-compress-1.9.1.tgz#fc75eaf9bcac2d58bc4c3d518183a7cb9612d275"
|
||||
integrity sha512-E2f48TwloQu3r6BdLOGF2aczeH7bJ/32oJGqvzT9SKur0cuUnLcZ7ZXP874E2fwmdE+cXzfC7bKzp79cDnmeyw==
|
||||
dependencies:
|
||||
"@remusao/trie" "^1.4.1"
|
||||
|
||||
"@remusao/smaz-decompress@^1.9.1":
|
||||
version "1.9.1"
|
||||
resolved "https://registry.yarnpkg.com/@remusao/smaz-decompress/-/smaz-decompress-1.9.1.tgz#8094f997e8fb591a678cda9cf08c209c825eba5b"
|
||||
integrity sha512-TfjKKprYe3n47od8auhvJ/Ikj9kQTbDTe71ynKlxslrvvUhlIV3VQSuwYuMWMbdz1fIs0H/fxCN1Z8/H3km6/A==
|
||||
|
||||
"@remusao/smaz@^1.7.1":
|
||||
version "1.9.1"
|
||||
resolved "https://registry.yarnpkg.com/@remusao/smaz/-/smaz-1.9.1.tgz#a2b9b045385f81e1615a68d932b7cc8b04c9db8d"
|
||||
integrity sha512-e6BLuP8oaXCZ9+v46Is4ilAZ/Vq6YLgmBP204Ixgk1qTjXmqvFYG7+AS7v9nsZdGOy96r9DWGFbbDVgMxwu1rA==
|
||||
dependencies:
|
||||
"@remusao/smaz-compress" "^1.9.1"
|
||||
"@remusao/smaz-decompress" "^1.9.1"
|
||||
|
||||
"@remusao/trie@^1.4.1":
|
||||
version "1.4.1"
|
||||
resolved "https://registry.yarnpkg.com/@remusao/trie/-/trie-1.4.1.tgz#755d09f8a007476334e611f42719b2d581f00720"
|
||||
integrity sha512-yvwa+aCyYI/UjeD39BnpMypG8N06l86wIDW1/PAc6ihBRnodIfZDwccxQN3n1t74wduzaz74m4ZMHZnB06567Q==
|
||||
|
||||
"@rushstack/eslint-patch@^1.0.8":
|
||||
version "1.1.0"
|
||||
resolved "https://registry.yarnpkg.com/@rushstack/eslint-patch/-/eslint-patch-1.1.0.tgz#7f698254aadf921e48dda8c0a6b304026b8a9323"
|
||||
@ -7369,6 +7448,14 @@
|
||||
resolved "https://registry.yarnpkg.com/@types/chai/-/chai-4.2.21.tgz#9f35a5643129df132cf3b5c1ec64046ea1af0650"
|
||||
integrity sha512-yd+9qKmJxm496BOV9CMNaey8TWsikaZOwMRwPHQIjcOJM9oV+fi9ZMNw3JsVnbEEbo2gRTDnGEBv8pjyn67hNg==
|
||||
|
||||
"@types/chrome@^0.0.180":
|
||||
version "0.0.180"
|
||||
resolved "https://registry.yarnpkg.com/@types/chrome/-/chrome-0.0.180.tgz#b7fb831848f6b6c49abffd85410b73037c02381e"
|
||||
integrity sha512-A/CuuKAaHq2CHEpYBAtl0lp2ib7TTXK7VjJI4q+c+1U/HDvZLJ8IlsdEDzzHdvkNdh36bGONxrMnO9YZrKqbAw==
|
||||
dependencies:
|
||||
"@types/filesystem" "*"
|
||||
"@types/har-format" "*"
|
||||
|
||||
"@types/color-convert@^2.0.0":
|
||||
version "2.0.0"
|
||||
resolved "https://registry.yarnpkg.com/@types/color-convert/-/color-convert-2.0.0.tgz#8f5ee6b9e863dcbee5703f5a517ffb13d3ea4e22"
|
||||
@ -7418,6 +7505,13 @@
|
||||
resolved "https://registry.yarnpkg.com/@types/cors/-/cors-2.8.12.tgz#6b2c510a7ad7039e98e7b8d3d6598f4359e5c080"
|
||||
integrity sha512-vt+kDhq/M2ayberEtJcIN/hxXy1Pk+59g2FV/ZQceeaTyCtCucjL2Q7FXlFjtWn4n15KCr1NE2lNNFhp0lEThw==
|
||||
|
||||
"@types/debug@^4.1.0":
|
||||
version "4.1.7"
|
||||
resolved "https://registry.yarnpkg.com/@types/debug/-/debug-4.1.7.tgz#7cc0ea761509124709b8b2d1090d8f6c17aadb82"
|
||||
integrity sha512-9AonUzyTjXXhEOa0DnqpzZi6VHlqKMswga9EXjpXnnqxwLtdvPPtlO8evrI5D9S6asFRCQ6v+wpiUKbw+vKqyg==
|
||||
dependencies:
|
||||
"@types/ms" "*"
|
||||
|
||||
"@types/diff-match-patch@^1.0.32":
|
||||
version "1.0.32"
|
||||
resolved "https://registry.yarnpkg.com/@types/diff-match-patch/-/diff-match-patch-1.0.32.tgz#d9c3b8c914aa8229485351db4865328337a3d09f"
|
||||
@ -7501,11 +7595,28 @@
|
||||
"@types/qs" "*"
|
||||
"@types/serve-static" "*"
|
||||
|
||||
"@types/filesystem@*":
|
||||
version "0.0.32"
|
||||
resolved "https://registry.yarnpkg.com/@types/filesystem/-/filesystem-0.0.32.tgz#307df7cc084a2293c3c1a31151b178063e0a8edf"
|
||||
integrity sha512-Yuf4jR5YYMR2DVgwuCiP11s0xuVRyPKmz8vo6HBY3CGdeMj8af93CFZX+T82+VD1+UqHOxTq31lO7MI7lepBtQ==
|
||||
dependencies:
|
||||
"@types/filewriter" "*"
|
||||
|
||||
"@types/filewriter@*":
|
||||
version "0.0.29"
|
||||
resolved "https://registry.yarnpkg.com/@types/filewriter/-/filewriter-0.0.29.tgz#a48795ecadf957f6c0d10e0c34af86c098fa5bee"
|
||||
integrity sha512-BsPXH/irW0ht0Ji6iw/jJaK8Lj3FJemon2gvEqHKpCdDCeemHa+rI3WBGq5z7cDMZgoLjY40oninGxqk+8NzNQ==
|
||||
|
||||
"@types/fined@*":
|
||||
version "1.1.3"
|
||||
resolved "https://registry.yarnpkg.com/@types/fined/-/fined-1.1.3.tgz#83f03e8f0a8d3673dfcafb18fce3571f6250e1bc"
|
||||
integrity sha512-CWYnSRnun3CGbt6taXeVo2lCbuaj4mchVJ4UF/BdU5TSuIn3AmS13pGMwCsBUoehGbhZrBrpNJZSZI5EVilXww==
|
||||
|
||||
"@types/firefox-webext-browser@^94.0.0":
|
||||
version "94.0.1"
|
||||
resolved "https://registry.yarnpkg.com/@types/firefox-webext-browser/-/firefox-webext-browser-94.0.1.tgz#52afb975253dc0fd350d5d58c7fe9fd1a01f64a1"
|
||||
integrity sha512-I6iHRQJSTZ+gYt2IxdH2RRAMvcUyK8v5Ig7fHQR0IwUNYP7hz9+cziBVIKxLCO6XI7fiyRsNOWObfl3/4Js2Lg==
|
||||
|
||||
"@types/glob@*":
|
||||
version "7.2.0"
|
||||
resolved "https://registry.yarnpkg.com/@types/glob/-/glob-7.2.0.tgz#bc1b5bf3aa92f25bd5dd39f35c57361bdce5b2eb"
|
||||
@ -7536,6 +7647,11 @@
|
||||
dependencies:
|
||||
graphql "*"
|
||||
|
||||
"@types/har-format@*":
|
||||
version "1.2.8"
|
||||
resolved "https://registry.yarnpkg.com/@types/har-format/-/har-format-1.2.8.tgz#e6908b76d4c88be3db642846bb8b455f0bfb1c4e"
|
||||
integrity sha512-OP6L9VuZNdskgNN3zFQQ54ceYD8OLq5IbqO4VK91ORLfOm7WdT/CiT/pHEBSQEqCInJ2y3O6iCm/zGtPElpgJQ==
|
||||
|
||||
"@types/hast@^2.0.0":
|
||||
version "2.3.4"
|
||||
resolved "https://registry.yarnpkg.com/@types/hast/-/hast-2.3.4.tgz#8aa5ef92c117d20d974a82bdfb6a648b08c0bafc"
|
||||
@ -7737,6 +7853,11 @@
|
||||
resolved "https://registry.yarnpkg.com/@types/mocha/-/mocha-8.2.3.tgz#bbeb55fbc73f28ea6de601fbfa4613f58d785323"
|
||||
integrity sha512-ekGvFhFgrc2zYQoX4JeZPmVzZxw6Dtllga7iGHzfbYIYkAMUx/sAFP2GdFpLff+vdHXu5fl7WX9AT+TtqYcsyw==
|
||||
|
||||
"@types/ms@*":
|
||||
version "0.7.31"
|
||||
resolved "https://registry.yarnpkg.com/@types/ms/-/ms-0.7.31.tgz#31b7ca6407128a3d2bbc27fe2d21b345397f6197"
|
||||
integrity sha512-iiUgKzV9AuaEkZqkOLDIvlQiL6ltuZd9tGcW3gwpnX8JbuiuhFlEGmmFXEXkN50Cvq7Os88IY2v0dkDqXYWVgA==
|
||||
|
||||
"@types/nanoid@^3.0.0":
|
||||
version "3.0.0"
|
||||
resolved "https://registry.yarnpkg.com/@types/nanoid/-/nanoid-3.0.0.tgz#c757b20f343f3a1dd76e80a9a431b6290fc20f35"
|
||||
@ -7857,6 +7978,13 @@
|
||||
resolved "https://registry.yarnpkg.com/@types/prop-types/-/prop-types-15.7.4.tgz#fcf7205c25dff795ee79af1e30da2c9790808f11"
|
||||
integrity sha512-rZ5drC/jWjrArrS8BR6SIr4cWpW09RNTYt9AMZo3Jwwif+iacXAqgVjm0B0Bv/S1jhDXKHqRVNCbACkJ89RAnQ==
|
||||
|
||||
"@types/puppeteer@*":
|
||||
version "5.4.6"
|
||||
resolved "https://registry.yarnpkg.com/@types/puppeteer/-/puppeteer-5.4.6.tgz#afc438e41dcbc27ca1ba0235ea464a372db2b21c"
|
||||
integrity sha512-98Kghehs7+/GD9b56qryhqdqVCXUTbetTv3PlvDnmFRTHQH0j9DIp1f7rkAW3BAj4U3yoeSEQnKgdW8bDq0Y0Q==
|
||||
dependencies:
|
||||
"@types/node" "*"
|
||||
|
||||
"@types/qs@*", "@types/qs@^6.9.5":
|
||||
version "6.9.7"
|
||||
resolved "https://registry.yarnpkg.com/@types/qs/-/qs-6.9.7.tgz#63bb7d067db107cc1e457c303bc25d511febf6cb"
|
||||
@ -8623,6 +8751,11 @@ acorn@^8.0.4, acorn@^8.2.4, acorn@^8.4.1, acorn@^8.5.0, acorn@^8.7.0:
|
||||
resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.7.0.tgz#90951fde0f8f09df93549481e5fc141445b791cf"
|
||||
integrity sha512-V/LGr1APy+PXIwKebEWrkZPwoeoF+w1jiOBUmuxuiUIaOHtob8Qc9BTrYo7VuI5fR8tqsy+buA2WFooR5olqvQ==
|
||||
|
||||
ad-block-js@^0.0.2:
|
||||
version "0.0.2"
|
||||
resolved "https://registry.yarnpkg.com/ad-block-js/-/ad-block-js-0.0.2.tgz#294196a1fcc40881bd86abdbad288e20c516cd81"
|
||||
integrity sha512-1n71M3WihSgFoaQ6S+LL9L3YFfCRtvNfk/yvqY8MAn66njWOJo2s6f/TYBOS2gLbyXuxEUqq9n6slUHi/xQIHg==
|
||||
|
||||
add-stream@^1.0.0:
|
||||
version "1.0.0"
|
||||
resolved "https://registry.yarnpkg.com/add-stream/-/add-stream-1.0.0.tgz#6a7990437ca736d5e1288db92bd3266d5f5cb2aa"
|
||||
@ -10696,6 +10829,17 @@ cliui@^7.0.2:
|
||||
strip-ansi "^6.0.0"
|
||||
wrap-ansi "^7.0.0"
|
||||
|
||||
clone-deep@^0.2.4:
|
||||
version "0.2.4"
|
||||
resolved "https://registry.yarnpkg.com/clone-deep/-/clone-deep-0.2.4.tgz#4e73dd09e9fb971cc38670c5dced9c1896481cc6"
|
||||
integrity sha1-TnPdCen7lxzDhnDF3O2cGJZIHMY=
|
||||
dependencies:
|
||||
for-own "^0.1.3"
|
||||
is-plain-object "^2.0.1"
|
||||
kind-of "^3.0.2"
|
||||
lazy-cache "^1.0.3"
|
||||
shallow-clone "^0.1.2"
|
||||
|
||||
clone-deep@^4.0.1:
|
||||
version "4.0.1"
|
||||
resolved "https://registry.yarnpkg.com/clone-deep/-/clone-deep-4.0.1.tgz#c19fd9bdbbf85942b4fd979c84dcf7d5f07c2387"
|
||||
@ -11369,7 +11513,7 @@ create-require@^1.1.0:
|
||||
resolved "https://registry.yarnpkg.com/create-require/-/create-require-1.1.1.tgz#c1d7e8f1e5f6cfc9ff65f9cd352d37348756c333"
|
||||
integrity sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==
|
||||
|
||||
cross-fetch@^3.0.6, cross-fetch@^3.1.5:
|
||||
cross-fetch@3.1.5, cross-fetch@^3.0.6, cross-fetch@^3.1.5:
|
||||
version "3.1.5"
|
||||
resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.5.tgz#e1389f44d9e7ba767907f7af8454787952ab534f"
|
||||
integrity sha512-lvb1SBsI0Z7GDwmuid+mU3kWVBwTVUbe7S0H52yaaAdQOXq2YktTCZdlAcNKFzE6QtRz0snpw9bNiPeOIkkQvw==
|
||||
@ -11719,6 +11863,13 @@ debug@4.3.1:
|
||||
dependencies:
|
||||
ms "2.1.2"
|
||||
|
||||
debug@4.3.4:
|
||||
version "4.3.4"
|
||||
resolved "https://registry.yarnpkg.com/debug/-/debug-4.3.4.tgz#1319f6579357f2338d3337d2cdd4914bb5dcc865"
|
||||
integrity sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==
|
||||
dependencies:
|
||||
ms "2.1.2"
|
||||
|
||||
debug@^3.0.0, debug@^3.1.0, debug@^3.1.1, debug@^3.2.7:
|
||||
version "3.2.7"
|
||||
resolved "https://registry.yarnpkg.com/debug/-/debug-3.2.7.tgz#72580b7e9145fb39b6676f9c5e5fb100b934179a"
|
||||
@ -11995,16 +12146,16 @@ detect-port@^1.3.0:
|
||||
address "^1.0.1"
|
||||
debug "^2.6.0"
|
||||
|
||||
devtools-protocol@0.0.847576:
|
||||
version "0.0.847576"
|
||||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.847576.tgz#2f201bfb68aa9ef4497199fbd7f5d5dfde3b200b"
|
||||
integrity sha512-0M8kobnSQE0Jmly7Mhbeq0W/PpZfnuK+WjN2ZRVPbGqYwCHCioAVp84H0TcLimgECcN5H976y5QiXMGBC9JKmg==
|
||||
|
||||
devtools-protocol@0.0.901419:
|
||||
version "0.0.901419"
|
||||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.901419.tgz#79b5459c48fe7e1c5563c02bd72f8fec3e0cebcd"
|
||||
integrity sha512-4INMPwNm9XRpBukhNbF7OB6fNTTCaI8pzy/fXg0xQzAy5h3zL1P8xT3QazgKqBrb/hAYwIBizqDBZ7GtJE74QQ==
|
||||
|
||||
devtools-protocol@0.0.981744:
|
||||
version "0.0.981744"
|
||||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.981744.tgz#9960da0370284577d46c28979a0b32651022bacf"
|
||||
integrity sha512-0cuGS8+jhR67Fy7qG3i3Pc7Aw494sb9yG9QgpG97SFVWwolgYjlhJg7n+UaHxOQT30d1TYu/EYe9k01ivLErIg==
|
||||
|
||||
dezalgo@1.0.3, dezalgo@^1.0.0:
|
||||
version "1.0.3"
|
||||
resolved "https://registry.yarnpkg.com/dezalgo/-/dezalgo-1.0.3.tgz#7f742de066fc748bc8db820569dddce49bf0d456"
|
||||
@ -13213,7 +13364,7 @@ extract-files@^9.0.0:
|
||||
resolved "https://registry.yarnpkg.com/extract-files/-/extract-files-9.0.0.tgz#8a7744f2437f81f5ed3250ed9f1550de902fe54a"
|
||||
integrity sha512-CvdFfHkC95B4bBBk36hcEmvdR2awOdhhVUYH6S/zrVj3477zven/fJMYg7121h4T1xHZC+tetUpubpAhxwI7hQ==
|
||||
|
||||
extract-zip@2.0.1, extract-zip@^2.0.0:
|
||||
extract-zip@2.0.1:
|
||||
version "2.0.1"
|
||||
resolved "https://registry.yarnpkg.com/extract-zip/-/extract-zip-2.0.1.tgz#663dca56fe46df890d5f131ef4a06d22bb8ba13a"
|
||||
integrity sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==
|
||||
@ -13615,11 +13766,23 @@ follow-redirects@^1.14.0, follow-redirects@^1.14.8:
|
||||
resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.14.8.tgz#016996fb9a11a100566398b1c6839337d7bfa8fc"
|
||||
integrity sha512-1x0S9UVJHsQprFcEC/qnNzBLcIxsjAV905f/UkQxbclCsoTWlacCNOpQa/anodLl2uaEKFhfWOvM2Qg77+15zA==
|
||||
|
||||
for-in@^0.1.3:
|
||||
version "0.1.8"
|
||||
resolved "https://registry.yarnpkg.com/for-in/-/for-in-0.1.8.tgz#d8773908e31256109952b1fdb9b3fa867d2775e1"
|
||||
integrity sha1-2Hc5COMSVhCZUrH9ubP6hn0ndeE=
|
||||
|
||||
for-in@^1.0.1, for-in@^1.0.2:
|
||||
version "1.0.2"
|
||||
resolved "https://registry.yarnpkg.com/for-in/-/for-in-1.0.2.tgz#81068d295a8142ec0ac726c6e2200c30fb6d5e80"
|
||||
integrity sha1-gQaNKVqBQuwKxybG4iAMMPttXoA=
|
||||
|
||||
for-own@^0.1.3:
|
||||
version "0.1.5"
|
||||
resolved "https://registry.yarnpkg.com/for-own/-/for-own-0.1.5.tgz#5265c681a4f294dabbf17c9509b6763aa84510ce"
|
||||
integrity sha1-UmXGgaTylNq78XyVCbZ2OqhFEM4=
|
||||
dependencies:
|
||||
for-in "^1.0.1"
|
||||
|
||||
for-own@^1.0.0:
|
||||
version "1.0.0"
|
||||
resolved "https://registry.yarnpkg.com/for-own/-/for-own-1.0.0.tgz#c63332f415cedc4b04dbfe70cf836494c53cb44b"
|
||||
@ -13787,6 +13950,15 @@ fs-extra@^0.30.0:
|
||||
path-is-absolute "^1.0.0"
|
||||
rimraf "^2.2.8"
|
||||
|
||||
fs-extra@^10.0.0:
|
||||
version "10.1.0"
|
||||
resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-10.1.0.tgz#02873cfbc4084dde127eaa5f9905eef2325d1abf"
|
||||
integrity sha512-oRXApq54ETRj4eMiFzGnHWGy+zo5raudjuxN0b8H7s/RU2oW0Wvsx9O0ACRN/kRq9E8Vu/ReskGB5o3ji+FzHQ==
|
||||
dependencies:
|
||||
graceful-fs "^4.2.0"
|
||||
jsonfile "^6.0.1"
|
||||
universalify "^2.0.0"
|
||||
|
||||
fs-extra@^9.0.0, fs-extra@^9.0.1, fs-extra@^9.1.0:
|
||||
version "9.1.0"
|
||||
resolved "https://registry.yarnpkg.com/fs-extra/-/fs-extra-9.1.0.tgz#5954460c764a8da2094ba3554bf839e6b9a7c86d"
|
||||
@ -15061,6 +15233,14 @@ https-proxy-agent@5.0.0, https-proxy-agent@^5.0.0:
|
||||
agent-base "6"
|
||||
debug "4"
|
||||
|
||||
https-proxy-agent@5.0.1:
|
||||
version "5.0.1"
|
||||
resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz#c59ef224a04fe8b754f3db0063a25ea30d0005d6"
|
||||
integrity sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==
|
||||
dependencies:
|
||||
agent-base "6"
|
||||
debug "4"
|
||||
|
||||
human-signals@^1.1.1:
|
||||
version "1.1.1"
|
||||
resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-1.1.1.tgz#c5b1cd14f50aeae09ab6c59fe63ba3395fe4dfa3"
|
||||
@ -15445,7 +15625,7 @@ is-boolean-object@^1.1.0:
|
||||
call-bind "^1.0.2"
|
||||
has-tostringtag "^1.0.0"
|
||||
|
||||
is-buffer@^1.1.5, is-buffer@~1.1.6:
|
||||
is-buffer@^1.0.2, is-buffer@^1.1.5, is-buffer@~1.1.6:
|
||||
version "1.1.6"
|
||||
resolved "https://registry.yarnpkg.com/is-buffer/-/is-buffer-1.1.6.tgz#efaa2ea9daa0d7ab2ea13a97b2b8ad51fefbe8be"
|
||||
integrity sha512-NcdALwpXkTm5Zvvbk7owOUSvVvBKDgKP5/ewfXEznmQFfs4ZRmanOeKBTjRVjka3QFoN6XJ+9F3USqfHqTaU5w==
|
||||
@ -15757,7 +15937,7 @@ is-plain-object@5.0.0, is-plain-object@^5.0.0:
|
||||
resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-5.0.0.tgz#4427f50ab3429e9025ea7d52e9043a9ef4159344"
|
||||
integrity sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==
|
||||
|
||||
is-plain-object@^2.0.3, is-plain-object@^2.0.4:
|
||||
is-plain-object@^2.0.1, is-plain-object@^2.0.3, is-plain-object@^2.0.4:
|
||||
version "2.0.4"
|
||||
resolved "https://registry.yarnpkg.com/is-plain-object/-/is-plain-object-2.0.4.tgz#2c163b3fafb1b606d9d17928f05c2a1c38e07677"
|
||||
integrity sha512-h5PpgXkWitc38BBMYawTYMWJHFZJVnBquFE57xFpjB8pJFiF6gZ+bU+WyI/yqXiFR5mdLsgYNaPe8uao6Uv9Og==
|
||||
@ -16965,6 +17145,13 @@ keyv@^3.0.0:
|
||||
dependencies:
|
||||
json-buffer "3.0.0"
|
||||
|
||||
kind-of@^2.0.1:
|
||||
version "2.0.1"
|
||||
resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-2.0.1.tgz#018ec7a4ce7e3a86cb9141be519d24c8faa981b5"
|
||||
integrity sha1-AY7HpM5+OobLkUG+UZ0kyPqpgbU=
|
||||
dependencies:
|
||||
is-buffer "^1.0.2"
|
||||
|
||||
kind-of@^3.0.2, kind-of@^3.0.3, kind-of@^3.2.0:
|
||||
version "3.2.2"
|
||||
resolved "https://registry.yarnpkg.com/kind-of/-/kind-of-3.2.2.tgz#31ea21a734bab9bbb0f32466d893aea51e4a3c64"
|
||||
@ -17072,6 +17259,16 @@ lazy-ass@^1.6.0:
|
||||
resolved "https://registry.yarnpkg.com/lazy-ass/-/lazy-ass-1.6.0.tgz#7999655e8646c17f089fdd187d150d3324d54513"
|
||||
integrity sha1-eZllXoZGwX8In90YfRUNMyTVRRM=
|
||||
|
||||
lazy-cache@^0.2.3:
|
||||
version "0.2.7"
|
||||
resolved "https://registry.yarnpkg.com/lazy-cache/-/lazy-cache-0.2.7.tgz#7feddf2dcb6edb77d11ef1d117ab5ffdf0ab1b65"
|
||||
integrity sha1-f+3fLctu23fRHvHRF6tf/fCrG2U=
|
||||
|
||||
lazy-cache@^1.0.3:
|
||||
version "1.0.4"
|
||||
resolved "https://registry.yarnpkg.com/lazy-cache/-/lazy-cache-1.0.4.tgz#a1d78fc3a50474cb80845d3b3b6e1da49a446e8e"
|
||||
integrity sha1-odePw6UEdMuAhF07O24dpJpEbo4=
|
||||
|
||||
lazy-universal-dotenv@^3.0.1:
|
||||
version "3.0.1"
|
||||
resolved "https://registry.yarnpkg.com/lazy-universal-dotenv/-/lazy-universal-dotenv-3.0.1.tgz#a6c8938414bca426ab8c9463940da451a911db38"
|
||||
@ -17943,6 +18140,15 @@ meow@^8.0.0:
|
||||
type-fest "^0.18.0"
|
||||
yargs-parser "^20.2.3"
|
||||
|
||||
merge-deep@^3.0.1:
|
||||
version "3.0.3"
|
||||
resolved "https://registry.yarnpkg.com/merge-deep/-/merge-deep-3.0.3.tgz#1a2b2ae926da8b2ae93a0ac15d90cd1922766003"
|
||||
integrity sha512-qtmzAS6t6grwEkNrunqTBdn0qKwFgNWvlxUbAV8es9M7Ot1EbyApytCnvE0jALPa46ZpKDUo527kKiaWplmlFA==
|
||||
dependencies:
|
||||
arr-union "^3.1.0"
|
||||
clone-deep "^0.2.4"
|
||||
kind-of "^3.0.2"
|
||||
|
||||
merge-descriptors@1.0.1:
|
||||
version "1.0.1"
|
||||
resolved "https://registry.yarnpkg.com/merge-descriptors/-/merge-descriptors-1.0.1.tgz#b00aaa556dd8b44568150ec9d1b953f3f90cbb61"
|
||||
@ -18253,6 +18459,14 @@ mixin-deep@^1.2.0:
|
||||
for-in "^1.0.2"
|
||||
is-extendable "^1.0.1"
|
||||
|
||||
mixin-object@^2.0.1:
|
||||
version "2.0.1"
|
||||
resolved "https://registry.yarnpkg.com/mixin-object/-/mixin-object-2.0.1.tgz#4fb949441dab182540f1fe035ba60e1947a5e57e"
|
||||
integrity sha1-T7lJRB2rGCVA8f4DW6YOGUel5X4=
|
||||
dependencies:
|
||||
for-in "^0.1.3"
|
||||
is-extendable "^0.1.1"
|
||||
|
||||
mkdirp-classic@^0.5.2:
|
||||
version "0.5.3"
|
||||
resolved "https://registry.yarnpkg.com/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz#fa10c9115cc6d8865be221ba47ee9bed78601113"
|
||||
@ -18603,7 +18817,7 @@ node-fetch@2.6.1:
|
||||
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.1.tgz#045bd323631f76ed2e2b55573394416b639a0052"
|
||||
integrity sha512-V4aYg89jEoVRxRb2fJdAg8FHvI7cEyYdVAh94HH0UIK8oJxUfkjlDQN9RbMx+bEjP7+ggMiFRprSti032Oipxw==
|
||||
|
||||
node-fetch@2.6.7, node-fetch@^2.3.0, node-fetch@^2.6.1, node-fetch@^2.6.7:
|
||||
node-fetch@2.6.7, node-fetch@^2.3.0, node-fetch@^2.6.0, node-fetch@^2.6.1, node-fetch@^2.6.7:
|
||||
version "2.6.7"
|
||||
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.7.tgz#24de9fba827e3b4ae44dc8b20256a379160052ad"
|
||||
integrity sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ==
|
||||
@ -20327,7 +20541,7 @@ progress@2.0.1:
|
||||
resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.1.tgz#c9242169342b1c29d275889c95734621b1952e31"
|
||||
integrity sha512-OE+a6vzqazc+K6LxJrX5UPyKFvGnL5CYmq2jFGNIBWHpc4QyE49/YOumcrpQFJpfejmvRtbJzgO1zPmMCqlbBg==
|
||||
|
||||
progress@^2.0.0, progress@^2.0.1, progress@^2.0.3:
|
||||
progress@2.0.3, progress@^2.0.0, progress@^2.0.3:
|
||||
version "2.0.3"
|
||||
resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8"
|
||||
integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==
|
||||
@ -20578,23 +20792,80 @@ pupa@^2.1.1:
|
||||
dependencies:
|
||||
escape-goat "^2.0.0"
|
||||
|
||||
puppeteer-core@^7.1.0:
|
||||
version "7.1.0"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-7.1.0.tgz#75a00484481e99aa3578bd93ae13a6991fdc7e97"
|
||||
integrity sha512-2wjKs3L1rYuoVNNtRR/GbAGjbt6LF8DRUxcg/UoCQZrzjfppWlrIqiHRF5uBzJk+Nc0w7ZkvVzKQCvB5PFqFdA==
|
||||
puppeteer-core@^13.7.0:
|
||||
version "13.7.0"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-13.7.0.tgz#3344bee3994163f49120a55ddcd144a40575ba5b"
|
||||
integrity sha512-rXja4vcnAzFAP1OVLq/5dWNfwBGuzcOARJ6qGV7oAZhnLmVRU8G5MsdeQEAOy332ZhkIOnn9jp15R89LKHyp2Q==
|
||||
dependencies:
|
||||
debug "^4.1.0"
|
||||
devtools-protocol "0.0.847576"
|
||||
extract-zip "^2.0.0"
|
||||
https-proxy-agent "^5.0.0"
|
||||
node-fetch "^2.6.1"
|
||||
pkg-dir "^4.2.0"
|
||||
progress "^2.0.1"
|
||||
proxy-from-env "^1.1.0"
|
||||
rimraf "^3.0.2"
|
||||
tar-fs "^2.0.0"
|
||||
unbzip2-stream "^1.3.3"
|
||||
ws "^7.2.3"
|
||||
cross-fetch "3.1.5"
|
||||
debug "4.3.4"
|
||||
devtools-protocol "0.0.981744"
|
||||
extract-zip "2.0.1"
|
||||
https-proxy-agent "5.0.1"
|
||||
pkg-dir "4.2.0"
|
||||
progress "2.0.3"
|
||||
proxy-from-env "1.1.0"
|
||||
rimraf "3.0.2"
|
||||
tar-fs "2.1.1"
|
||||
unbzip2-stream "1.4.3"
|
||||
ws "8.5.0"
|
||||
|
||||
puppeteer-extra-plugin-adblocker@^2.12.0:
|
||||
version "2.12.0"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-extra-plugin-adblocker/-/puppeteer-extra-plugin-adblocker-2.12.0.tgz#afd4e78abe37dde5158ceb190bc5e747f91420bd"
|
||||
integrity sha512-x0/G6suPa28FomEuCGAKGTvqcxt2efZWXE8mjEPRASGQaMe4smHYM1xT2nYS5Z82wIOqPZQVVJ7l3dGY9E+BuQ==
|
||||
dependencies:
|
||||
"@cliqz/adblocker-puppeteer" "1.22.6"
|
||||
debug "^4.1.1"
|
||||
node-fetch "^2.6.0"
|
||||
puppeteer-extra-plugin "^3.2.0"
|
||||
|
||||
puppeteer-extra-plugin-stealth@^2.9.0:
|
||||
version "2.9.0"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-extra-plugin-stealth/-/puppeteer-extra-plugin-stealth-2.9.0.tgz#aa39f4469bf47343af4517efe9f97846228b3b01"
|
||||
integrity sha512-erZ9lkIcOkfYmLPP2jv2AiqvNBFhQJinWJhcm40pqSjwJTsZXHsTARUyRCsBYEEBvNIs3Wz3E0zVlTRc4IJ6Hg==
|
||||
dependencies:
|
||||
debug "^4.1.1"
|
||||
puppeteer-extra-plugin "^3.2.0"
|
||||
puppeteer-extra-plugin-user-preferences "^2.3.1"
|
||||
|
||||
puppeteer-extra-plugin-user-data-dir@^2.3.1:
|
||||
version "2.3.1"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-extra-plugin-user-data-dir/-/puppeteer-extra-plugin-user-data-dir-2.3.1.tgz#1a8777b6383cf212de361497e94616ab29712a38"
|
||||
integrity sha512-yhaYMaNFdfQ1LbA94ZElW1zU8rh+MFmO+GZA0gtQ8BXc+UZ6aRrWS9flIZvlXDzk+ZsXhCbTEohEwZ8lEDLRVA==
|
||||
dependencies:
|
||||
debug "^4.1.1"
|
||||
fs-extra "^10.0.0"
|
||||
puppeteer-extra-plugin "^3.2.0"
|
||||
|
||||
puppeteer-extra-plugin-user-preferences@^2.3.1:
|
||||
version "2.3.1"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-extra-plugin-user-preferences/-/puppeteer-extra-plugin-user-preferences-2.3.1.tgz#20faacd9e4cc00a52e8261604309e897aa569fa5"
|
||||
integrity sha512-t/FyGQj2aqtHOROqL02z+k2kNQe0cjT0Hd9pG5FJ7x0JXx1722PhOuK7FeJLQMJ+BLl2YvCUgaWSC8Zohjts5A==
|
||||
dependencies:
|
||||
debug "^4.1.1"
|
||||
deepmerge "^4.2.2"
|
||||
puppeteer-extra-plugin "^3.2.0"
|
||||
puppeteer-extra-plugin-user-data-dir "^2.3.1"
|
||||
|
||||
puppeteer-extra-plugin@^3.2.0:
|
||||
version "3.2.0"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-extra-plugin/-/puppeteer-extra-plugin-3.2.0.tgz#f964e2a714d0f9c7a00b557c780ac28c6affd5e9"
|
||||
integrity sha512-wbiw12USE3b+maMk/IMaroYsz7rusVI9G+ml6pCFCnFFh91Z9BAEiVzhCpOHuquVXEiCCsDTWhDUgvdNxQHOyw==
|
||||
dependencies:
|
||||
"@types/debug" "^4.1.0"
|
||||
debug "^4.1.1"
|
||||
merge-deep "^3.0.1"
|
||||
|
||||
puppeteer-extra@^3.2.3:
|
||||
version "3.2.3"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-extra/-/puppeteer-extra-3.2.3.tgz#1b24ae12ab7c7660f81922c1065beb5887cc189e"
|
||||
integrity sha512-CnSN9yIedbAbS8WmRybaDHJLf6goRk+VYM/kbH6i/+EMadCaAeh2O+1/mFUMN2LbkbDNAp2Vd/UwrTVCHjTxyg==
|
||||
dependencies:
|
||||
"@types/debug" "^4.1.0"
|
||||
"@types/puppeteer" "*"
|
||||
debug "^4.1.1"
|
||||
deepmerge "^4.2.2"
|
||||
|
||||
puppeteer@^10.1.0:
|
||||
version "10.4.0"
|
||||
@ -21976,6 +22247,16 @@ sha.js@^2.4.0, sha.js@^2.4.11, sha.js@^2.4.8:
|
||||
inherits "^2.0.1"
|
||||
safe-buffer "^5.0.1"
|
||||
|
||||
shallow-clone@^0.1.2:
|
||||
version "0.1.2"
|
||||
resolved "https://registry.yarnpkg.com/shallow-clone/-/shallow-clone-0.1.2.tgz#5909e874ba77106d73ac414cfec1ffca87d97060"
|
||||
integrity sha1-WQnodLp3EG1zrEFM/sH/yofZcGA=
|
||||
dependencies:
|
||||
is-extendable "^0.1.1"
|
||||
kind-of "^2.0.1"
|
||||
lazy-cache "^0.2.3"
|
||||
mixin-object "^2.0.1"
|
||||
|
||||
shallow-clone@^3.0.0:
|
||||
version "3.0.1"
|
||||
resolved "https://registry.yarnpkg.com/shallow-clone/-/shallow-clone-3.0.1.tgz#8f2981ad92531f55035b01fb230769a40e02efa3"
|
||||
@ -22975,7 +23256,7 @@ tar-fs@2.0.0:
|
||||
pump "^3.0.0"
|
||||
tar-stream "^2.0.0"
|
||||
|
||||
tar-fs@^2.0.0, tar-fs@^2.1.1:
|
||||
tar-fs@2.1.1, tar-fs@^2.1.1:
|
||||
version "2.1.1"
|
||||
resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-2.1.1.tgz#489a15ab85f1f0befabb370b7de4f9eb5cbe8784"
|
||||
integrity sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==
|
||||
@ -23254,6 +23535,18 @@ title-case@^3.0.3:
|
||||
dependencies:
|
||||
tslib "^2.0.3"
|
||||
|
||||
tldts-core@^5.7.77:
|
||||
version "5.7.77"
|
||||
resolved "https://registry.yarnpkg.com/tldts-core/-/tldts-core-5.7.77.tgz#ddc7fb6c9b0c03b19cf0985f4eaf6f79c43d9096"
|
||||
integrity sha512-iEVvz9jdx8zxKPx4qT2bVoewFoU3fctREwYTjlXmBJJKX8JWk90W4pPKqSIFGdfvC0laH3XyZKe1sugHqUpgDQ==
|
||||
|
||||
tldts-experimental@^5.6.21:
|
||||
version "5.7.77"
|
||||
resolved "https://registry.yarnpkg.com/tldts-experimental/-/tldts-experimental-5.7.77.tgz#2b2271c4fc3c2956af6809fe865d181c0eaacd00"
|
||||
integrity sha512-hzZB5ctCHS6ZJJn0o2ip/gVUIKkeY/LI/X5O/jrMw/sp94ebvCDII2Ps6Fg3X+MPK97RhrC9Rboq1aheWf3+ww==
|
||||
dependencies:
|
||||
tldts-core "^5.7.77"
|
||||
|
||||
tmp@^0.0.33:
|
||||
version "0.0.33"
|
||||
resolved "https://registry.yarnpkg.com/tmp/-/tmp-0.0.33.tgz#6d34335889768d21b2bcda0aa277ced3b1bfadf9"
|
||||
@ -23734,7 +24027,7 @@ unbzip2-stream@1.3.3:
|
||||
buffer "^5.2.1"
|
||||
through "^2.3.8"
|
||||
|
||||
unbzip2-stream@^1.3.3:
|
||||
unbzip2-stream@1.4.3:
|
||||
version "1.4.3"
|
||||
resolved "https://registry.yarnpkg.com/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz#b0da04c4371311df771cdc215e87f2130991ace7"
|
||||
integrity sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg==
|
||||
@ -24924,16 +25217,16 @@ ws@7.4.6:
|
||||
resolved "https://registry.yarnpkg.com/ws/-/ws-7.4.6.tgz#5654ca8ecdeee47c33a9a4bf6d28e2be2980377c"
|
||||
integrity sha512-YmhHDO4MzaDLB+M9ym/mDA5z0naX8j7SIlT8f8z+I0VtzsRbekxEutHSme7NPS2qE8StCYQNUnfWdXta/Yu85A==
|
||||
|
||||
"ws@^5.2.0 || ^6.0.0 || ^7.0.0", ws@^7.2.3, ws@^7.3.1, ws@^7.4.6:
|
||||
version "7.5.7"
|
||||
resolved "https://registry.yarnpkg.com/ws/-/ws-7.5.7.tgz#9e0ac77ee50af70d58326ecff7e85eb3fa375e67"
|
||||
integrity sha512-KMvVuFzpKBuiIXW3E4u3mySRO2/mCHSyZDJQM5NQ9Q9KHWHWh0NHgfbRMLLrceUK5qAL4ytALJbpRMjixFZh8A==
|
||||
|
||||
ws@^8.2.3, ws@^8.3.0, ws@^8.4.2:
|
||||
ws@8.5.0, ws@^8.2.3, ws@^8.3.0, ws@^8.4.2:
|
||||
version "8.5.0"
|
||||
resolved "https://registry.yarnpkg.com/ws/-/ws-8.5.0.tgz#bfb4be96600757fe5382de12c670dab984a1ed4f"
|
||||
integrity sha512-BWX0SWVgLPzYwF8lTzEy1egjhS4S4OEAHfsO8o65WOVsrnSRGaSiUaa9e0ggGlkMTtBlmOpEXiie9RUcBO86qg==
|
||||
|
||||
"ws@^5.2.0 || ^6.0.0 || ^7.0.0", ws@^7.3.1, ws@^7.4.6:
|
||||
version "7.5.7"
|
||||
resolved "https://registry.yarnpkg.com/ws/-/ws-7.5.7.tgz#9e0ac77ee50af70d58326ecff7e85eb3fa375e67"
|
||||
integrity sha512-KMvVuFzpKBuiIXW3E4u3mySRO2/mCHSyZDJQM5NQ9Q9KHWHWh0NHgfbRMLLrceUK5qAL4ytALJbpRMjixFZh8A==
|
||||
|
||||
xdg-basedir@^4.0.0:
|
||||
version "4.0.0"
|
||||
resolved "https://registry.yarnpkg.com/xdg-basedir/-/xdg-basedir-4.0.0.tgz#4bc8d9984403696225ef83a1573cbbcb4e79db13"
|
||||
|
||||
Reference in New Issue
Block a user