Merge pull request #1577 from omnivore-app/readability-in-puppeteer
readability in puppeteer
This commit is contained in:
@ -1612,6 +1612,21 @@ export enum PageType {
|
||||
Website = 'WEBSITE'
|
||||
}
|
||||
|
||||
export type ParseResult = {
|
||||
byline?: InputMaybe<Scalars['String']>;
|
||||
content: Scalars['String'];
|
||||
dir?: InputMaybe<Scalars['String']>;
|
||||
excerpt: Scalars['String'];
|
||||
language?: InputMaybe<Scalars['String']>;
|
||||
length: Scalars['Int'];
|
||||
previewImage?: InputMaybe<Scalars['String']>;
|
||||
publishedDate?: InputMaybe<Scalars['Date']>;
|
||||
siteIcon?: InputMaybe<Scalars['String']>;
|
||||
siteName?: InputMaybe<Scalars['String']>;
|
||||
textContent: Scalars['String'];
|
||||
title: Scalars['String'];
|
||||
};
|
||||
|
||||
export type PreparedDocumentInput = {
|
||||
document: Scalars['String'];
|
||||
pageInfo: PageInfoInput;
|
||||
@ -2071,6 +2086,7 @@ export type SaveFilterSuccess = {
|
||||
export type SavePageInput = {
|
||||
clientRequestId: Scalars['ID'];
|
||||
originalContent: Scalars['String'];
|
||||
parseResult?: InputMaybe<ParseResult>;
|
||||
source: Scalars['String'];
|
||||
title?: InputMaybe<Scalars['String']>;
|
||||
url: Scalars['String'];
|
||||
@ -3291,6 +3307,7 @@ export type ResolversTypes = {
|
||||
PageInfo: ResolverTypeWrapper<PageInfo>;
|
||||
PageInfoInput: PageInfoInput;
|
||||
PageType: PageType;
|
||||
ParseResult: ParseResult;
|
||||
PreparedDocumentInput: PreparedDocumentInput;
|
||||
Profile: ResolverTypeWrapper<Profile>;
|
||||
Query: ResolverTypeWrapper<{}>;
|
||||
@ -3704,6 +3721,7 @@ export type ResolversParentTypes = {
|
||||
Page: Page;
|
||||
PageInfo: PageInfo;
|
||||
PageInfoInput: PageInfoInput;
|
||||
ParseResult: ParseResult;
|
||||
PreparedDocumentInput: PreparedDocumentInput;
|
||||
Profile: Profile;
|
||||
Query: {};
|
||||
|
||||
@ -1169,6 +1169,21 @@ enum PageType {
|
||||
WEBSITE
|
||||
}
|
||||
|
||||
input ParseResult {
|
||||
byline: String
|
||||
content: String!
|
||||
dir: String
|
||||
excerpt: String!
|
||||
language: String
|
||||
length: Int!
|
||||
previewImage: String
|
||||
publishedDate: Date
|
||||
siteIcon: String
|
||||
siteName: String
|
||||
textContent: String!
|
||||
title: String!
|
||||
}
|
||||
|
||||
input PreparedDocumentInput {
|
||||
document: String!
|
||||
pageInfo: PageInfoInput!
|
||||
@ -1501,6 +1516,7 @@ type SaveFilterSuccess {
|
||||
input SavePageInput {
|
||||
clientRequestId: ID!
|
||||
originalContent: String!
|
||||
parseResult: ParseResult
|
||||
source: String!
|
||||
title: String
|
||||
url: String!
|
||||
|
||||
15
packages/api/src/readability.d.ts
vendored
15
packages/api/src/readability.d.ts
vendored
@ -145,9 +145,9 @@ declare module '@omnivore/readability' {
|
||||
/** Article title */
|
||||
title: string
|
||||
/** Author metadata */
|
||||
byline: string
|
||||
byline?: string | null
|
||||
/** Content direction */
|
||||
dir: string
|
||||
dir?: string | null
|
||||
/** HTML string of processed article content */
|
||||
content: string
|
||||
/** non-HTML version of `content` */
|
||||
@ -157,15 +157,14 @@ declare module '@omnivore/readability' {
|
||||
/** Article description, or short excerpt from the content */
|
||||
excerpt: string
|
||||
/** Article site name */
|
||||
siteName: string
|
||||
siteName?: string | null
|
||||
/** Article site icon */
|
||||
siteIcon: string
|
||||
siteIcon?: string | null
|
||||
/** Article preview image */
|
||||
previewImage?: string
|
||||
previewImage?: string | null
|
||||
/** Article published date */
|
||||
publishedDate?: Date
|
||||
dom?: Element
|
||||
language?: string
|
||||
publishedDate?: Date | null
|
||||
language?: string | null
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -526,12 +526,28 @@ const schema = gql`
|
||||
uploadFileId: ID!
|
||||
}
|
||||
|
||||
input ParseResult {
|
||||
title: String!
|
||||
byline: String
|
||||
dir: String
|
||||
content: String!
|
||||
textContent: String!
|
||||
length: Int!
|
||||
excerpt: String!
|
||||
siteName: String
|
||||
siteIcon: String
|
||||
previewImage: String
|
||||
publishedDate: Date
|
||||
language: String
|
||||
}
|
||||
|
||||
input SavePageInput {
|
||||
url: String!
|
||||
source: String!
|
||||
clientRequestId: ID!
|
||||
title: String
|
||||
originalContent: String!
|
||||
parseResult: ParseResult
|
||||
}
|
||||
|
||||
input SaveUrlInput {
|
||||
|
||||
@ -41,6 +41,7 @@ export const saveEmail = async (
|
||||
// can leave this empty for now
|
||||
},
|
||||
},
|
||||
null,
|
||||
true
|
||||
)
|
||||
const content = parseResult.parsedContent?.content || input.originalContent
|
||||
@ -62,15 +63,21 @@ export const saveEmail = async (
|
||||
}),
|
||||
pageType: parseResult.pageType,
|
||||
hash: stringToHash(content),
|
||||
image: metadata?.previewImage || parseResult.parsedContent?.previewImage,
|
||||
publishedAt: validatedDate(parseResult.parsedContent?.publishedDate),
|
||||
image:
|
||||
metadata?.previewImage ||
|
||||
parseResult.parsedContent?.previewImage ||
|
||||
undefined,
|
||||
publishedAt: validatedDate(
|
||||
parseResult.parsedContent?.publishedDate ?? undefined
|
||||
),
|
||||
createdAt: new Date(),
|
||||
savedAt: new Date(),
|
||||
readingProgressAnchorIndex: 0,
|
||||
readingProgressPercent: 0,
|
||||
subscription: input.author,
|
||||
state: ArticleSavingRequestStatus.Succeeded,
|
||||
siteIcon: parseResult.parsedContent?.siteIcon,
|
||||
siteIcon: parseResult.parsedContent?.siteIcon ?? undefined,
|
||||
siteName: parseResult.parsedContent?.siteName ?? undefined,
|
||||
}
|
||||
|
||||
const page = await getPageByParam({
|
||||
|
||||
@ -77,13 +77,17 @@ export const savePage = async (
|
||||
input: SavePageInput
|
||||
): Promise<SaveResult> => {
|
||||
const [slug, croppedPathname] = createSlug(input.url, input.title)
|
||||
const parseResult = await parsePreparedContent(input.url, {
|
||||
document: input.originalContent,
|
||||
pageInfo: {
|
||||
title: input.title,
|
||||
canonicalUrl: input.url,
|
||||
const parseResult = await parsePreparedContent(
|
||||
input.url,
|
||||
{
|
||||
document: input.originalContent,
|
||||
pageInfo: {
|
||||
title: input.title,
|
||||
canonicalUrl: input.url,
|
||||
},
|
||||
},
|
||||
})
|
||||
input.parseResult
|
||||
)
|
||||
|
||||
const articleToSave = parsedContentToPage({
|
||||
url: input.url,
|
||||
@ -222,24 +226,24 @@ export const parsedContentToPage = ({
|
||||
croppedPathname ||
|
||||
parsedContent?.siteName ||
|
||||
url,
|
||||
author: parsedContent?.byline,
|
||||
author: parsedContent?.byline ?? undefined,
|
||||
url: normalizeUrl(canonicalUrl || url, {
|
||||
stripHash: true,
|
||||
stripWWW: false,
|
||||
}),
|
||||
pageType,
|
||||
hash: uploadFileHash || stringToHash(parsedContent?.content || url),
|
||||
image: parsedContent?.previewImage,
|
||||
publishedAt: validatedDate(parsedContent?.publishedDate),
|
||||
image: parsedContent?.previewImage ?? undefined,
|
||||
publishedAt: validatedDate(parsedContent?.publishedDate ?? undefined),
|
||||
uploadFileId: uploadFileId,
|
||||
readingProgressPercent: 0,
|
||||
readingProgressAnchorIndex: 0,
|
||||
state: ArticleSavingRequestStatus.Succeeded,
|
||||
createdAt: saveTime || new Date(),
|
||||
savedAt: saveTime || new Date(),
|
||||
siteName: parsedContent?.siteName,
|
||||
language: parsedContent?.language,
|
||||
siteIcon: parsedContent?.siteIcon,
|
||||
siteName: parsedContent?.siteName ?? undefined,
|
||||
language: parsedContent?.language ?? undefined,
|
||||
siteIcon: parsedContent?.siteIcon ?? undefined,
|
||||
wordsCount: wordsCount(parsedContent?.textContent || ''),
|
||||
}
|
||||
}
|
||||
|
||||
@ -17,8 +17,8 @@ import { v4 as uuid } from 'uuid'
|
||||
import addressparser from 'addressparser'
|
||||
import { preParseContent } from '@omnivore/content-handler'
|
||||
import {
|
||||
findEmbeddedHighlight,
|
||||
EmbeddedHighlightData,
|
||||
findEmbeddedHighlight,
|
||||
} from './highlightGenerator'
|
||||
|
||||
const logger = buildLogger('utils.parse')
|
||||
@ -174,6 +174,7 @@ const getReadabilityResult = async (
|
||||
export const parsePreparedContent = async (
|
||||
url: string,
|
||||
preparedDocument: PreparedDocumentInput,
|
||||
parseResult?: Readability.ParseResult | null,
|
||||
isNewsletter?: boolean,
|
||||
allowRetry = true
|
||||
): Promise<ParsedContentPuppeteer> => {
|
||||
@ -208,20 +209,29 @@ export const parsePreparedContent = async (
|
||||
preParsedDom && (dom = preParsedDom)
|
||||
|
||||
try {
|
||||
article = await getReadabilityResult(url, document, dom, isNewsletter)
|
||||
article =
|
||||
parseResult ||
|
||||
(await getReadabilityResult(url, document, dom, isNewsletter))
|
||||
if (!article?.textContent && allowRetry) {
|
||||
const newDocument = {
|
||||
...preparedDocument,
|
||||
document: '<html>' + preparedDocument.document + '</html>',
|
||||
}
|
||||
return parsePreparedContent(url, newDocument, isNewsletter, false)
|
||||
return parsePreparedContent(
|
||||
url,
|
||||
newDocument,
|
||||
parseResult,
|
||||
isNewsletter,
|
||||
false
|
||||
)
|
||||
}
|
||||
|
||||
// Format code blocks
|
||||
// TODO: we probably want to move this type of thing
|
||||
// to the handlers, and have some concept of postHandle
|
||||
if (article?.dom) {
|
||||
const codeBlocks = article.dom.querySelectorAll('code')
|
||||
if (article?.content) {
|
||||
const articleDom = parseHTML(article.content).document
|
||||
const codeBlocks = articleDom.querySelectorAll('code')
|
||||
if (codeBlocks.length > 0) {
|
||||
codeBlocks.forEach((e) => {
|
||||
if (e.textContent) {
|
||||
@ -237,12 +247,10 @@ export const parsePreparedContent = async (
|
||||
e.replaceWith(code)
|
||||
}
|
||||
})
|
||||
article.content = article.dom.outerHTML
|
||||
article.content = articleDom.documentElement.outerHTML
|
||||
}
|
||||
|
||||
if (article?.dom) {
|
||||
highlightData = findEmbeddedHighlight(article?.dom)
|
||||
}
|
||||
highlightData = findEmbeddedHighlight(articleDom.documentElement)
|
||||
|
||||
const ANCHOR_ELEMENTS_BLOCKED_ATTRIBUTES = [
|
||||
'omnivore-highlight-id',
|
||||
@ -251,7 +259,7 @@ export const parsePreparedContent = async (
|
||||
]
|
||||
|
||||
// Get the top level element?
|
||||
const pageNode = article.dom.firstElementChild as HTMLElement
|
||||
const pageNode = articleDom.firstElementChild as HTMLElement
|
||||
const nodesToVisitStack: [HTMLElement] = [pageNode]
|
||||
const visitedNodeList = []
|
||||
|
||||
@ -281,7 +289,7 @@ export const parsePreparedContent = async (
|
||||
node.setAttribute('data-omnivore-anchor-idx', (index + 1).toString())
|
||||
})
|
||||
|
||||
article.content = article.dom.outerHTML
|
||||
article.content = articleDom.documentElement.outerHTML
|
||||
}
|
||||
|
||||
const newWindow = parseHTML('')
|
||||
|
||||
@ -9,7 +9,10 @@ RUN apk add --no-cache \
|
||||
ca-certificates \
|
||||
ttf-freefont \
|
||||
nodejs \
|
||||
yarn
|
||||
yarn \
|
||||
g++ \
|
||||
make \
|
||||
python3
|
||||
|
||||
# Add user so we don't need --no-sandbox.
|
||||
RUN addgroup -S pptruser && adduser -S -g pptruser pptruser \
|
||||
@ -29,6 +32,7 @@ COPY tsconfig.json .
|
||||
COPY .prettierrc .
|
||||
COPY .eslintrc .
|
||||
|
||||
COPY /packages/readabilityjs/package.json ./packages/readabilityjs/package.json
|
||||
COPY /packages/content-handler/package.json ./packages/content-handler/package.json
|
||||
COPY /packages/puppeteer-parse/package.json ./packages/puppeteer-parse/package.json
|
||||
|
||||
@ -37,6 +41,7 @@ RUN yarn install --pure-lockfile
|
||||
ADD /packages/content-fetch ./packages/content-fetch
|
||||
ADD /packages/content-handler ./packages/content-handler
|
||||
ADD /packages/puppeteer-parse ./packages/puppeteer-parse
|
||||
ADD /packages/readabilityjs ./packages/readabilityjs
|
||||
RUN yarn workspace @omnivore/content-handler build
|
||||
|
||||
# After building, fetch the production dependencies
|
||||
|
||||
@ -9,7 +9,10 @@ RUN apk add --no-cache \
|
||||
ca-certificates \
|
||||
ttf-freefont \
|
||||
nodejs \
|
||||
yarn
|
||||
yarn \
|
||||
g++ \
|
||||
make \
|
||||
python3
|
||||
|
||||
# Add user so we don't need --no-sandbox.
|
||||
RUN addgroup -S pptruser && adduser -S -g pptruser pptruser \
|
||||
@ -30,6 +33,7 @@ COPY tsconfig.json .
|
||||
COPY .prettierrc .
|
||||
COPY .eslintrc .
|
||||
|
||||
COPY /packages/readabilityjs/package.json ./packages/readabilityjs/package.json
|
||||
COPY /packages/content-handler/package.json ./packages/content-handler/package.json
|
||||
COPY /packages/puppeteer-parse/package.json ./packages/puppeteer-parse/package.json
|
||||
|
||||
@ -38,6 +42,7 @@ RUN yarn install --pure-lockfile
|
||||
ADD /packages/content-handler ./packages/content-handler
|
||||
ADD /packages/puppeteer-parse ./packages/puppeteer-parse
|
||||
ADD /packages/content-fetch ./packages/content-fetch
|
||||
ADD /packages/readabilityjs ./packages/readabilityjs
|
||||
RUN yarn workspace @omnivore/content-handler build
|
||||
|
||||
# After building, fetch the production dependencies
|
||||
|
||||
@ -3,6 +3,9 @@
|
||||
/* eslint-disable @typescript-eslint/explicit-function-return-type */
|
||||
/* eslint-disable @typescript-eslint/no-var-requires */
|
||||
/* eslint-disable @typescript-eslint/no-require-imports */
|
||||
const { encode } = require("urlsafe-base64");
|
||||
const crypto = require("crypto");
|
||||
|
||||
const Url = require('url');
|
||||
// const puppeteer = require('puppeteer-extra');
|
||||
const axios = require('axios');
|
||||
@ -13,6 +16,7 @@ const os = require('os');
|
||||
const { Storage } = require('@google-cloud/storage');
|
||||
const { parseHTML } = require('linkedom');
|
||||
const { preHandleContent } = require("@omnivore/content-handler");
|
||||
const { Readability } = require("@omnivore/readability");
|
||||
|
||||
const puppeteer = require('puppeteer-extra');
|
||||
|
||||
@ -22,6 +26,7 @@ puppeteer.use(StealthPlugin());
|
||||
|
||||
// Add adblocker plugin to block all ads and trackers (saves bandwidth)
|
||||
const AdblockerPlugin = require('puppeteer-extra-plugin-adblocker');
|
||||
const createDOMPurify = require("dompurify");
|
||||
puppeteer.use(AdblockerPlugin({ blockTrackers: true }));
|
||||
|
||||
const storage = new Storage();
|
||||
@ -199,6 +204,35 @@ const sendCreateArticleMutation = async (userId, input) => {
|
||||
return response.data.data.createArticle;
|
||||
};
|
||||
|
||||
const sendSavePageMutation = async (userId, input) => {
|
||||
const data = JSON.stringify({
|
||||
query: `mutation SavePage ($input: SavePageInput!){
|
||||
savePage(input:$input){
|
||||
... on SaveSuccess{
|
||||
url
|
||||
clientRequestId
|
||||
}
|
||||
... on SaveError{
|
||||
errorCodes
|
||||
}
|
||||
}
|
||||
}`,
|
||||
variables: {
|
||||
input: Object.assign({}, input , { source: 'puppeteer-parse' }),
|
||||
},
|
||||
});
|
||||
|
||||
const auth = await signToken({ uid: userId }, process.env.JWT_SECRET);
|
||||
const response = await axios.post(`${process.env.REST_BACKEND_ENDPOINT}/graphql`, data,
|
||||
{
|
||||
headers: {
|
||||
Cookie: `auth=${auth};`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
});
|
||||
return response.data.data.savePage;
|
||||
};
|
||||
|
||||
const saveUploadedPdf = async (userId, url, uploadFileId, articleSavingRequestId) => {
|
||||
return sendCreateArticleMutation(userId, {
|
||||
url: encodeURI(url),
|
||||
@ -280,17 +314,14 @@ async function fetchContent(req, res) {
|
||||
|
||||
logRecord.fetchContentTime = Date.now() - functionStartTime;
|
||||
|
||||
const apiResponse = await sendCreateArticleMutation(userId, {
|
||||
const readabilityResult = content ? (await getReadabilityResult(url, content)) : null;
|
||||
|
||||
const apiResponse = await sendSavePageMutation(userId, {
|
||||
url: finalUrl,
|
||||
articleSavingRequestId,
|
||||
preparedDocument: {
|
||||
document: content,
|
||||
pageInfo: {
|
||||
title,
|
||||
canonicalUrl: finalUrl,
|
||||
},
|
||||
},
|
||||
skipParsing: !content,
|
||||
clientRequestId: articleSavingRequestId,
|
||||
title,
|
||||
originalContent: content,
|
||||
parseResult: readabilityResult,
|
||||
});
|
||||
|
||||
logRecord.totalTime = Date.now() - functionStartTime;
|
||||
@ -306,17 +337,14 @@ async function fetchContent(req, res) {
|
||||
const content = sbResult.domContent;
|
||||
logRecord.fetchContentTime = Date.now() - functionStartTime;
|
||||
|
||||
const apiResponse = await sendCreateArticleMutation(userId, {
|
||||
url: sbUrl,
|
||||
articleSavingRequestId,
|
||||
preparedDocument: {
|
||||
document: content,
|
||||
pageInfo: {
|
||||
title: sbResult.title,
|
||||
canonicalUrl: sbUrl,
|
||||
},
|
||||
},
|
||||
skipParsing: !content,
|
||||
const readabilityResult = content ? (await getReadabilityResult(url, content)) : null;
|
||||
|
||||
const apiResponse = await sendSavePageMutation(userId, {
|
||||
url: finalUrl,
|
||||
clientRequestId: articleSavingRequestId,
|
||||
title,
|
||||
originalContent: content,
|
||||
parseResult: readabilityResult,
|
||||
});
|
||||
|
||||
logRecord.totalTime = Date.now() - functionStartTime;
|
||||
@ -758,6 +786,99 @@ async function preview(req, res) {
|
||||
return res.redirect(`${process.env.PREVIEW_IMAGE_CDN_ORIGIN}/${destination}`);
|
||||
}
|
||||
|
||||
const DOM_PURIFY_CONFIG = {
|
||||
ADD_TAGS: ['iframe'],
|
||||
ADD_ATTR: ['allow', 'allowfullscreen', 'frameborder', 'scrolling'],
|
||||
FORBID_ATTR: [
|
||||
'data-ml-dynamic',
|
||||
'data-ml-dynamic-type',
|
||||
'data-orig-url',
|
||||
'data-ml-id',
|
||||
'data-ml',
|
||||
'data-xid',
|
||||
'data-feature',
|
||||
],
|
||||
}
|
||||
|
||||
function domPurifySanitizeHook(node, data) {
|
||||
if (data.tagName === 'iframe') {
|
||||
const urlRegex = /^(https?:)?\/\/www\.youtube(-nocookie)?\.com\/embed\//i
|
||||
const src = node.getAttribute('src') || ''
|
||||
const dataSrc = node.getAttribute('data-src') || ''
|
||||
|
||||
if (src && urlRegex.test(src)) {
|
||||
return
|
||||
}
|
||||
|
||||
if (dataSrc && urlRegex.test(dataSrc)) {
|
||||
node.setAttribute('src', dataSrc)
|
||||
return
|
||||
}
|
||||
|
||||
node.parentNode?.removeChild(node)
|
||||
}
|
||||
}
|
||||
|
||||
function getPurifiedContent(html) {
|
||||
const newWindow = parseHTML('')
|
||||
const DOMPurify = createDOMPurify(newWindow)
|
||||
DOMPurify.addHook('uponSanitizeElement', domPurifySanitizeHook)
|
||||
const clean = DOMPurify.sanitize(html, DOM_PURIFY_CONFIG)
|
||||
return parseHTML(clean).document
|
||||
}
|
||||
|
||||
function signImageProxyUrl(url) {
|
||||
return encode(
|
||||
crypto.createHmac('sha256', process.env.IMAGE_PROXY_SECRET).update(url).digest()
|
||||
)
|
||||
}
|
||||
|
||||
function createImageProxyUrl(url, width = 0, height = 0) {
|
||||
if (!process.env.IMAGE_PROXY_URL || !process.env.IMAGE_PROXY_SECRET) {
|
||||
return url
|
||||
}
|
||||
|
||||
const urlWithOptions = `${url}#${width}x${height}`
|
||||
const signature = signImageProxyUrl(urlWithOptions)
|
||||
|
||||
return `${process.env.IMAGE_PROXY_URL}/${width}x${height},s${signature}/${url}`
|
||||
}
|
||||
|
||||
async function getReadabilityResult(url, document) {
|
||||
// First attempt to read the article as is.
|
||||
// if that fails attempt to purify then read
|
||||
const sources = [
|
||||
() => {
|
||||
return document
|
||||
},
|
||||
() => {
|
||||
return getPurifiedContent(document)
|
||||
},
|
||||
]
|
||||
|
||||
for (const source of sources) {
|
||||
const document = source()
|
||||
if (!document) {
|
||||
continue
|
||||
}
|
||||
|
||||
try {
|
||||
const article = await new Readability(document, {
|
||||
createImageProxyUrl,
|
||||
url,
|
||||
}).parse()
|
||||
|
||||
if (article) {
|
||||
return article
|
||||
}
|
||||
} catch (error) {
|
||||
console.log('parsing error for url', url, error)
|
||||
}
|
||||
}
|
||||
|
||||
return null
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
fetchContent,
|
||||
preview,
|
||||
|
||||
@ -6,14 +6,18 @@
|
||||
"dependencies": {
|
||||
"@google-cloud/storage": "^5.18.1",
|
||||
"@omnivore/content-handler": "1.0.0",
|
||||
"@omnivore/readability": "1.0.0",
|
||||
"axios": "^0.27.2",
|
||||
"crypto": "^1.0.1",
|
||||
"dompurify": "^2.4.1",
|
||||
"jsonwebtoken": "^8.5.1",
|
||||
"linkedom": "^0.14.9",
|
||||
"puppeteer-core": "^16.1.0",
|
||||
"puppeteer-extra": "^3.3.4",
|
||||
"puppeteer-extra-plugin-adblocker": "^2.13.5",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.1",
|
||||
"underscore": "^1.13.4"
|
||||
"underscore": "^1.13.4",
|
||||
"urlsafe-base64": "^1.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"chai": "^4.3.6",
|
||||
|
||||
@ -2998,7 +2998,6 @@ Readability.prototype = {
|
||||
siteIcon: metadata.siteIcon,
|
||||
previewImage: metadata.previewImage,
|
||||
publishedDate: metadata.publishedDate || publishedAt || this._articlePublishedDate,
|
||||
dom: articleContent,
|
||||
language: this._getLanguage(metadata.locale || this._languageCode),
|
||||
};
|
||||
}
|
||||
|
||||
12
yarn.lock
12
yarn.lock
@ -12251,6 +12251,11 @@ crypto-random-string@^2.0.0:
|
||||
resolved "https://registry.yarnpkg.com/crypto-random-string/-/crypto-random-string-2.0.0.tgz#ef2a7a966ec11083388369baa02ebead229b30d5"
|
||||
integrity sha512-v1plID3y9r/lPhviJ1wrXpLeyUIGAZ2SHNYTEapm7/8A9nLPoyvVp3RK/EPFqn5kEznyWgYZNsRtYYIWbuG8KA==
|
||||
|
||||
crypto@^1.0.1:
|
||||
version "1.0.1"
|
||||
resolved "https://registry.yarnpkg.com/crypto/-/crypto-1.0.1.tgz#2af1b7cad8175d24c8a1b0778255794a21803037"
|
||||
integrity sha512-VxBKmeNcqQdiUQUW2Tzq0t377b54N2bMtXO/qiLa+6eRRmmC4qT3D4OnTGoT/U6O9aklQ/jTwbOtRMTTY8G0Ig==
|
||||
|
||||
css-loader@^3.6.0:
|
||||
version "3.6.0"
|
||||
resolved "https://registry.yarnpkg.com/css-loader/-/css-loader-3.6.0.tgz#2e4b2c7e6e2d27f8c8f28f61bffcd2e6c91ef645"
|
||||
@ -13055,6 +13060,11 @@ dompurify@^2.0.17:
|
||||
resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.3.8.tgz#224fe9ae57d7ebd9a1ae1ac18c1c1ca3f532226f"
|
||||
integrity sha512-eVhaWoVibIzqdGYjwsBWodIQIaXFSB+cKDf4cfxLMsK0xiud6SE+/WCVx/Xw/UwQsa4cS3T2eITcdtmTg2UKcw==
|
||||
|
||||
dompurify@^2.4.1:
|
||||
version "2.4.1"
|
||||
resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.4.1.tgz#f9cb1a275fde9af6f2d0a2644ef648dd6847b631"
|
||||
integrity sha512-ewwFzHzrrneRjxzmK6oVz/rZn9VWspGFRDb4/rRtIsM1n36t9AKma/ye8syCpcw+XJ25kOK/hOG7t1j2I2yBqA==
|
||||
|
||||
domutils@^2.0.0, domutils@^2.5.2:
|
||||
version "2.7.0"
|
||||
resolved "https://registry.yarnpkg.com/domutils/-/domutils-2.7.0.tgz#8ebaf0c41ebafcf55b0b72ec31c56323712c5442"
|
||||
@ -26280,7 +26290,7 @@ url@^0.11.0:
|
||||
urlsafe-base64@^1.0.0:
|
||||
version "1.0.0"
|
||||
resolved "https://registry.yarnpkg.com/urlsafe-base64/-/urlsafe-base64-1.0.0.tgz#23f89069a6c62f46cf3a1d3b00169cefb90be0c6"
|
||||
integrity sha1-I/iQaabGL0bPOh07ABac77kL4MY=
|
||||
integrity sha512-RtuPeMy7c1UrHwproMZN9gN6kiZ0SvJwRaEzwZY0j9MypEkFqyBaKv176jvlPtg58Zh36bOkS0NFABXMHvvGCA==
|
||||
|
||||
use-callback-ref@^1.2.3:
|
||||
version "1.2.5"
|
||||
|
||||
Reference in New Issue
Block a user