remove unused dependencies
This commit is contained in:
@ -9,18 +9,12 @@
|
||||
],
|
||||
"dependencies": {
|
||||
"@omnivore/content-handler": "1.0.0",
|
||||
"@omnivore/readability": "1.0.0",
|
||||
"crypto": "^1.0.1",
|
||||
"dompurify": "^2.4.1",
|
||||
"puppeteer-core": "^22.12.1",
|
||||
"puppeteer-extra": "^3.3.6",
|
||||
"puppeteer-extra-plugin-adblocker": "^2.13.6",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
||||
"urlsafe-base64": "^1.0.0"
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/dompurify": "^3.0.5",
|
||||
"@types/urlsafe-base64": "^1.0.31",
|
||||
"chai": "^4.3.6",
|
||||
"mocha": "^10.0.0"
|
||||
},
|
||||
|
||||
@ -1,4 +1,3 @@
|
||||
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
|
||||
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
|
||||
import { preHandleContent } from '@omnivore/content-handler'
|
||||
import path from 'path'
|
||||
|
||||
173
packages/puppeteer-parse/src/readability.d.ts
vendored
173
packages/puppeteer-parse/src/readability.d.ts
vendored
@ -1,173 +0,0 @@
|
||||
// Type definitions for non-npm package mozilla-readability 0.2
|
||||
// Project: https://github.com/mozilla/readability
|
||||
// Definitions by: Charles Vandevoorde <https://github.com/charlesvdv>, Alex Wendland <https://github.com/awendland>
|
||||
// Definitions: https://github.com/DefinitelyTyped/DefinitelyTyped
|
||||
// TypeScript Version: 2.2
|
||||
|
||||
declare module '@omnivore/readability' {
|
||||
/**
|
||||
* A standalone version of the readability library used for Firefox Reader View.
|
||||
*
|
||||
* Note that isProbablyReaderable() was moved into a separate file in https://github.com/mozilla/readability/commit/2620542dd1e8380220d82afa97a2c283ae636e40
|
||||
* and therefore is no longer part of the Readability class.
|
||||
*/
|
||||
class Readability {
|
||||
/**
|
||||
* ## Usage on the web
|
||||
*
|
||||
* To parse a document, you must create a new Readability object from a
|
||||
* DOM document object, and then call parse(). Here's an example:
|
||||
*
|
||||
* ```js
|
||||
* var article = new Readability(document).parse();
|
||||
* ```
|
||||
*
|
||||
* If you're using Readability on the web, you will likely be able to
|
||||
* use a document reference from elsewhere (e.g. fetched via XMLHttpRequest,
|
||||
* in a same-origin <iframe> you have access to, etc.).
|
||||
*
|
||||
* ## Usage from node.js
|
||||
*
|
||||
* In node.js, you won't generally have a DOM document object. To obtain one, you can use external
|
||||
* libraries like [jsdom](https://github.com/tmpvar/jsdom). While this repository contains a parser of
|
||||
* its own (`JSDOMParser`), that is restricted to reading XML-compatible markup and therefore we do
|
||||
* not recommend it for general use.
|
||||
*
|
||||
* If you're using `jsdom` to create a DOM object, you should ensure that the page doesn't run (page)
|
||||
* scripts (avoid fetching remote resources etc.) as well as passing it the page's URI as the `url`
|
||||
* property of the `options` object you pass the `JSDOM` constructor.
|
||||
*
|
||||
* ```js
|
||||
* var JSDOM = require('jsdom').JSDOM;
|
||||
* var doc = new JSDOM("<body>Here's a bunch of text</body>", {
|
||||
* url: "https://www.example.com/the-page-i-got-the-source-from",
|
||||
* });
|
||||
* let reader = new Readability(doc.window.document);
|
||||
* let article = reader.parse();
|
||||
* ```
|
||||
*/
|
||||
constructor(doc: Document, options?: Readability.Options)
|
||||
|
||||
/**
|
||||
* Runs readability.
|
||||
*
|
||||
* ## Workflow:
|
||||
*
|
||||
* 1. Prep the document by removing script tags, css, etc.
|
||||
* 2. Build readability's DOM tree.
|
||||
* 3. Grab the article content from the current dom tree.
|
||||
* 4. Replace the current DOM tree with the new one.
|
||||
* 5. Read peacefully.
|
||||
*
|
||||
* ## Additional notes:
|
||||
*
|
||||
* Readability's parse() works by modifying the DOM. This removes some
|
||||
* elements in the web page. You could avoid this by passing the clone
|
||||
* of the document object while creating a Readability object.
|
||||
*
|
||||
* ```js
|
||||
* var documentClone = document.cloneNode(true);
|
||||
* var article = new Readability(documentClone).parse();
|
||||
* ```
|
||||
*
|
||||
* The response will be null if the processing failed (https://github.com/mozilla/readability/blob/52ab9b5c8916c306a47b2119270dcdabebf9d203/Readability.js#L2038)
|
||||
*/
|
||||
async parse(): Promise<Readability.ParseResult | null>
|
||||
}
|
||||
|
||||
namespace Readability {
|
||||
interface Options {
|
||||
/**
|
||||
* Control whether log messages are sent to the console
|
||||
*/
|
||||
debug?: boolean
|
||||
|
||||
/**
|
||||
* Set a maximum size on the documents that will be processed. This size is
|
||||
* checked before any parsing operations occur. If the number of elements in
|
||||
* the document exceeds this threshold then an Error will be thrown.
|
||||
*
|
||||
* See implementation details at https://github.com/mozilla/readability/blob/52ab9b5c8916c306a47b2119270dcdabebf9d203/Readability.js#L2019
|
||||
*/
|
||||
maxElemsToParse?: number
|
||||
|
||||
nbTopCandidates?: number
|
||||
|
||||
/**
|
||||
* Minimum number of characters in the extracted textContent in order to
|
||||
* consider the article correctly identified. If the threshold is not met then
|
||||
* the extraction process will automatically run again with different flags.
|
||||
*
|
||||
* See implementation details at https://github.com/mozilla/readability/blob/52ab9b5c8916c306a47b2119270dcdabebf9d203/Readability.js#L1208
|
||||
*
|
||||
* Changed from wordThreshold in https://github.com/mozilla/readability/commit/3ff9a166fb27928f222c4c0722e730eda412658a
|
||||
*/
|
||||
charThreshold?: number
|
||||
|
||||
/**
|
||||
* parse() removes the class="" attribute from every element in the given
|
||||
* subtree, except those that match CLASSES_TO_PRESERVE and
|
||||
* the classesToPreserve array from the options object.
|
||||
*/
|
||||
classesToPreserve?: string[]
|
||||
|
||||
/**
|
||||
* By default Readability will strip all classes from the HTML elements in the
|
||||
* processed article. By setting this to `true` the classes will be retained.
|
||||
*
|
||||
* This is a blanket alternative to `classesToPreserve`.
|
||||
*
|
||||
* Added in https://github.com/mozilla/readability/commit/2982216913af2c66b0690e88606b03116553ad92
|
||||
*/
|
||||
|
||||
keepClasses?: boolean
|
||||
url?: string
|
||||
|
||||
/**
|
||||
* Function that converts a regular image url into imageproxy url
|
||||
* @param url string
|
||||
*/
|
||||
createImageProxyUrl?: (
|
||||
url: string,
|
||||
width?: number,
|
||||
height?: number
|
||||
) => string
|
||||
|
||||
/**
|
||||
* By default, Readability will clean all tables from the HTML elements in the
|
||||
* processed article. But newsletters in emails use tables to display their content.
|
||||
* By setting this to `true`, these tables will be retained.
|
||||
*/
|
||||
keepTables?: boolean
|
||||
ignoreLinkDensity?: boolean
|
||||
}
|
||||
|
||||
interface ParseResult {
|
||||
/** Article title */
|
||||
title: string
|
||||
/** Author metadata */
|
||||
byline?: string | null
|
||||
/** Content direction */
|
||||
dir?: string | null
|
||||
/** HTML string of processed article content */
|
||||
content: string
|
||||
/** non-HTML version of `content` */
|
||||
textContent: string
|
||||
/** Length of an article, in characters */
|
||||
length: number
|
||||
/** Article description, or short excerpt from the content */
|
||||
excerpt: string
|
||||
/** Article site name */
|
||||
siteName?: string | null
|
||||
/** Article site icon */
|
||||
siteIcon?: string | null
|
||||
/** Article preview image */
|
||||
previewImage?: string | null
|
||||
/** Article published date */
|
||||
publishedDate?: Date | null
|
||||
language?: string | null
|
||||
}
|
||||
}
|
||||
|
||||
export { Readability }
|
||||
}
|
||||
24
yarn.lock
24
yarn.lock
@ -7910,13 +7910,6 @@
|
||||
dependencies:
|
||||
"@types/trusted-types" "*"
|
||||
|
||||
"@types/dompurify@^3.0.5":
|
||||
version "3.0.5"
|
||||
resolved "https://registry.yarnpkg.com/@types/dompurify/-/dompurify-3.0.5.tgz#02069a2fcb89a163bacf1a788f73cb415dd75cb7"
|
||||
integrity sha512-1Wg0g3BtQF7sSb27fJQAKck1HECM6zV1EB66j8JH9i3LCjYabJa0FSdiSgsD5K/RbrsR0SiraKacLB+T8ZVYAg==
|
||||
dependencies:
|
||||
"@types/trusted-types" "*"
|
||||
|
||||
"@types/duplexify@^3.6.0":
|
||||
version "3.6.0"
|
||||
resolved "https://registry.yarnpkg.com/@types/duplexify/-/duplexify-3.6.0.tgz#dfc82b64bd3a2168f5bd26444af165bf0237dcd8"
|
||||
@ -8851,13 +8844,6 @@
|
||||
dependencies:
|
||||
"@types/node" "*"
|
||||
|
||||
"@types/urlsafe-base64@^1.0.31":
|
||||
version "1.0.31"
|
||||
resolved "https://registry.yarnpkg.com/@types/urlsafe-base64/-/urlsafe-base64-1.0.31.tgz#716fde3a64a558a32f7f464bc31b43da3e5e87fd"
|
||||
integrity sha512-f+l5StyboXmVsjTXLUkNHu8FptkdJ6iGl7z7nKBQVQwvSDulzi+Ov5Rtuq6c43jHYjSHax7Mplr1ldweOdFohg==
|
||||
dependencies:
|
||||
"@types/node" "*"
|
||||
|
||||
"@types/uuid@^8.3.0", "@types/uuid@^8.3.1", "@types/uuid@^8.3.4":
|
||||
version "8.3.4"
|
||||
resolved "https://registry.yarnpkg.com/@types/uuid/-/uuid-8.3.4.tgz#bd86a43617df0594787d38b735f55c805becf1bc"
|
||||
@ -13241,11 +13227,6 @@ crypto-random-string@^2.0.0:
|
||||
resolved "https://registry.yarnpkg.com/crypto-random-string/-/crypto-random-string-2.0.0.tgz#ef2a7a966ec11083388369baa02ebead229b30d5"
|
||||
integrity sha512-v1plID3y9r/lPhviJ1wrXpLeyUIGAZ2SHNYTEapm7/8A9nLPoyvVp3RK/EPFqn5kEznyWgYZNsRtYYIWbuG8KA==
|
||||
|
||||
crypto@^1.0.1:
|
||||
version "1.0.1"
|
||||
resolved "https://registry.yarnpkg.com/crypto/-/crypto-1.0.1.tgz#2af1b7cad8175d24c8a1b0778255794a21803037"
|
||||
integrity sha512-VxBKmeNcqQdiUQUW2Tzq0t377b54N2bMtXO/qiLa+6eRRmmC4qT3D4OnTGoT/U6O9aklQ/jTwbOtRMTTY8G0Ig==
|
||||
|
||||
css-loader@^3.6.0:
|
||||
version "3.6.0"
|
||||
resolved "https://registry.yarnpkg.com/css-loader/-/css-loader-3.6.0.tgz#2e4b2c7e6e2d27f8c8f28f61bffcd2e6c91ef645"
|
||||
@ -14185,11 +14166,6 @@ dompurify@^2.0.17:
|
||||
resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.3.8.tgz#224fe9ae57d7ebd9a1ae1ac18c1c1ca3f532226f"
|
||||
integrity sha512-eVhaWoVibIzqdGYjwsBWodIQIaXFSB+cKDf4cfxLMsK0xiud6SE+/WCVx/Xw/UwQsa4cS3T2eITcdtmTg2UKcw==
|
||||
|
||||
dompurify@^2.4.1:
|
||||
version "2.4.1"
|
||||
resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.4.1.tgz#f9cb1a275fde9af6f2d0a2644ef648dd6847b631"
|
||||
integrity sha512-ewwFzHzrrneRjxzmK6oVz/rZn9VWspGFRDb4/rRtIsM1n36t9AKma/ye8syCpcw+XJ25kOK/hOG7t1j2I2yBqA==
|
||||
|
||||
dompurify@^2.4.3:
|
||||
version "2.4.3"
|
||||
resolved "https://registry.yarnpkg.com/dompurify/-/dompurify-2.4.3.tgz#f4133af0e6a50297fc8874e2eaedc13a3c308c03"
|
||||
|
||||
Reference in New Issue
Block a user