Merge branch 'main' into feat/ios-display-digest-icon
This commit is contained in:
11
Makefile
11
Makefile
@ -30,5 +30,12 @@ web:
|
||||
qp:
|
||||
yarn workspace @omnivore/api dev_qp
|
||||
|
||||
content:
|
||||
yarn workspace @omnivore/content_fetch start
|
||||
content_handler:
|
||||
yarn workspace @omnivore/content-handler build
|
||||
|
||||
puppeteer:
|
||||
yarn workspace @omnivore/puppeteer-parse build
|
||||
|
||||
content_fetch: content_handler puppeteer
|
||||
yarn workspace @omnivore/content-fetch build
|
||||
yarn workspace @omnivore/content-fetch start
|
||||
|
||||
161
apple/OmnivoreKit/Sources/App/Views/AI/DigestConfigView.swift
Normal file
161
apple/OmnivoreKit/Sources/App/Views/AI/DigestConfigView.swift
Normal file
@ -0,0 +1,161 @@
|
||||
import SwiftUI
|
||||
import Models
|
||||
import Services
|
||||
import Views
|
||||
import MarkdownUI
|
||||
import Utils
|
||||
import Transmission
|
||||
|
||||
@MainActor
|
||||
public class DigestConfigViewModel: ObservableObject {
|
||||
@Published var isLoading = false
|
||||
@Published var digest: DigestResult?
|
||||
@Published var chapterInfo: [(DigestChapter, DigestChapterData)]?
|
||||
@Published var presentedLibraryItem: String?
|
||||
@Published var presentWebContainer = false
|
||||
|
||||
@AppStorage(UserDefaultKey.lastVisitedDigestId.rawValue) var lastVisitedDigestId = ""
|
||||
|
||||
func load(dataService: DataService) async {
|
||||
isLoading = true
|
||||
if !digestNeedsRefresh() {
|
||||
if let digest = dataService.loadStoredDigest() {
|
||||
self.digest = digest
|
||||
}
|
||||
} else {
|
||||
do {
|
||||
if let digest = try await dataService.getLatestDigest(timeoutInterval: 10) {
|
||||
self.digest = digest
|
||||
}
|
||||
} catch {
|
||||
print("ERROR WITH DIGEST: ", error)
|
||||
self.digest = nil
|
||||
}
|
||||
}
|
||||
|
||||
isLoading = false
|
||||
}
|
||||
|
||||
func refreshDigest(dataService: DataService) async {
|
||||
do {
|
||||
try await dataService.refreshDigest()
|
||||
} catch {
|
||||
print("ERROR WITH DIGEST: ", error)
|
||||
}
|
||||
}
|
||||
|
||||
func digestNeedsRefresh() -> Bool {
|
||||
let fileManager = FileManager.default
|
||||
let localURL = URL.om_cachesDirectory.appendingPathComponent("digest.json")
|
||||
do {
|
||||
let attributes = try fileManager.attributesOfItem(atPath: localURL.path)
|
||||
if let modificationDate = attributes[.modificationDate] as? Date {
|
||||
// Two hours ago
|
||||
let twoHoursAgo = Date().addingTimeInterval(-2 * 60 * 60)
|
||||
return modificationDate < twoHoursAgo
|
||||
}
|
||||
} catch {
|
||||
print("Error: \(error)")
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
@available(iOS 17.0, *)
|
||||
@MainActor
|
||||
struct DigestConfigView: View {
|
||||
@StateObject var viewModel = DigestConfigViewModel()
|
||||
let dataService: DataService
|
||||
|
||||
@Environment(\.dismiss) private var dismiss
|
||||
|
||||
public init(dataService: DataService) {
|
||||
self.dataService = dataService
|
||||
}
|
||||
|
||||
var titleBlock: some View {
|
||||
HStack {
|
||||
Text("Omnivore Digest")
|
||||
.font(Font.system(size: 18, weight: .semibold))
|
||||
Image.tabDigestSelected
|
||||
Spacer()
|
||||
closeButton
|
||||
}
|
||||
.padding(.top, 20)
|
||||
.padding(.horizontal, 20)
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
VStack {
|
||||
titleBlock
|
||||
.padding(.top, 10)
|
||||
itemBody
|
||||
.padding(15)
|
||||
|
||||
Spacer()
|
||||
}.task {
|
||||
await viewModel.load(dataService: dataService)
|
||||
}
|
||||
}
|
||||
|
||||
var closeButton: some View {
|
||||
Button(action: {
|
||||
dismiss()
|
||||
}, label: {
|
||||
Text("Close")
|
||||
.foregroundColor(Color.blue)
|
||||
})
|
||||
.buttonStyle(.plain)
|
||||
}
|
||||
|
||||
var logoBlock: some View {
|
||||
HStack {
|
||||
Image.coloredSmallOmnivoreLogo
|
||||
.resizable()
|
||||
.frame(width: 20, height: 20)
|
||||
Text("Omnivore.app")
|
||||
.font(Font.system(size: 14))
|
||||
.foregroundColor(Color.themeLibraryItemSubtle)
|
||||
Spacer()
|
||||
}
|
||||
}
|
||||
|
||||
@available(iOS 17.0, *)
|
||||
var itemBody: some View {
|
||||
VStack(alignment: .leading, spacing: 20) {
|
||||
logoBlock
|
||||
|
||||
let description1 =
|
||||
"""
|
||||
Omnivore Digest is a free daily digest of your best recent library items. Omnivore
|
||||
filters and ranks all the items recently added to your library, uses AI to summarize them,
|
||||
and creates a short library item, email, or a daily podcast you can listen to in our iOS app.
|
||||
|
||||
Note that if you sign up for Digest, your recent library items will be processed by an AI
|
||||
service (Anthropic, or OpenAI). Your highlights, notes, and labels will not be sent to the AI
|
||||
service.
|
||||
|
||||
Digest is available to all users that have saved at least ten items and added two subscriptions.
|
||||
"""
|
||||
Markdown(description1)
|
||||
.lineSpacing(10)
|
||||
.accentColor(.appGraySolid)
|
||||
.font(.appSubheadline)
|
||||
.padding(5)
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
|
||||
HStack {
|
||||
Spacer()
|
||||
|
||||
Button(action: {}, label: { Text("Hide digest") })
|
||||
.buttonStyle(RoundedRectButtonStyle())
|
||||
|
||||
Button(action: {}, label: { Text("Enable digest") })
|
||||
.buttonStyle(RoundedRectButtonStyle(color: Color.blue, textColor: Color.white))
|
||||
}
|
||||
}
|
||||
.padding(15)
|
||||
.background(Color.themeLabelBackground.opacity(0.6))
|
||||
.cornerRadius(5)
|
||||
}
|
||||
}
|
||||
@ -331,15 +331,15 @@ struct AnimatingCellHeight: AnimatableModifier {
|
||||
Text("Sorry digest is only available on iOS 17 and above")
|
||||
}
|
||||
}
|
||||
// .sheet(isPresented: $showDigestConfig) {
|
||||
// if #available(iOS 17.0, *) {
|
||||
// NavigationView {
|
||||
// DigestConfigView(dataService: dataService)
|
||||
// }
|
||||
// } else {
|
||||
// Text("Sorry digest is only available on iOS 17 and above")
|
||||
// }
|
||||
// }
|
||||
.sheet(isPresented: $showDigestConfig) {
|
||||
if #available(iOS 17.0, *) {
|
||||
NavigationView {
|
||||
DigestConfigView(dataService: dataService)
|
||||
}
|
||||
} else {
|
||||
Text("Sorry digest is only available on iOS 17 and above")
|
||||
}
|
||||
}
|
||||
.toolbar {
|
||||
toolbarItems
|
||||
}
|
||||
@ -422,6 +422,15 @@ struct AnimatingCellHeight: AnimatableModifier {
|
||||
// .buttonStyle(.plain)
|
||||
// .padding(.trailing, 4)
|
||||
// }
|
||||
if #available(iOS 17.0, *), !dataService.featureFlags.digestEnabled, !viewModel.digestHidden {
|
||||
// Give the user an opportunity to enable digest
|
||||
Button(
|
||||
action: { showDigestConfig = true },
|
||||
label: { Image.tabDigestSelected }
|
||||
)
|
||||
.buttonStyle(.plain)
|
||||
.padding(.trailing, 4)
|
||||
}
|
||||
if prefersListLayout {
|
||||
Button(
|
||||
action: { isEditMode = isEditMode == .active ? .inactive : .active },
|
||||
|
||||
@ -51,6 +51,7 @@ enum LoadingBarStyle {
|
||||
@AppStorage("LibraryTabView::hideFollowingTab") var hideFollowingTab = false
|
||||
@AppStorage("LibraryTabView::digestHidden") var digestHidden = false
|
||||
@AppStorage(UserDefaultKey.lastVisitedDigestId.rawValue) var lastVisitedDigestId = ""
|
||||
@AppStorage("LibraryTabView::digestHidden") var digestHidden = false
|
||||
|
||||
@AppStorage(UserDefaultKey.lastSelectedFeaturedItemFilter.rawValue) var featureFilter = FeaturedItemFilter.continueReading.rawValue
|
||||
|
||||
|
||||
@ -96,6 +96,17 @@ services:
|
||||
- JWT_SECRET=some_secret
|
||||
- VERIFICATION_TOKEN=some_token
|
||||
- REST_BACKEND_ENDPOINT=http://api:8080/api
|
||||
- REDIS_URL=redis://redis:6379
|
||||
depends_on:
|
||||
redis:
|
||||
condition: service_healthy
|
||||
api:
|
||||
condition: service_healthy
|
||||
|
||||
redis:
|
||||
image: "redis:7.2.4"
|
||||
container_name: "omnivore-redis"
|
||||
ports:
|
||||
- "6379:6379"
|
||||
healthcheck:
|
||||
test: [ "CMD", "redis-cli", "--raw", "incr", "ping" ]
|
||||
|
||||
@ -127,7 +127,9 @@ export const _findThumbnail = (imagesSizes: (ImageSize | null)[]) => {
|
||||
export const findThumbnail = async (data: Data) => {
|
||||
const { libraryItemId, userId } = data
|
||||
|
||||
const item = await findLibraryItemById(libraryItemId, userId)
|
||||
const item = await findLibraryItemById(libraryItemId, userId, {
|
||||
select: ['thumbnail', 'readableContent'],
|
||||
})
|
||||
if (!item) {
|
||||
logger.info('page not found')
|
||||
return false
|
||||
|
||||
@ -12,6 +12,7 @@ import { saveFile } from '../services/save_file'
|
||||
import { savePage } from '../services/save_page'
|
||||
import { uploadFile } from '../services/upload_file'
|
||||
import { logError, logger } from '../utils/logger'
|
||||
import { downloadFromUrl, uploadToSignedUrl } from '../utils/uploads'
|
||||
|
||||
const signToken = promisify(jwt.sign)
|
||||
|
||||
@ -47,39 +48,6 @@ const isFetchResult = (obj: unknown): obj is FetchResult => {
|
||||
return typeof obj === 'object' && obj !== null && 'finalUrl' in obj
|
||||
}
|
||||
|
||||
const uploadToSignedUrl = async (
|
||||
uploadSignedUrl: string,
|
||||
contentType: string,
|
||||
contentObjUrl: string
|
||||
) => {
|
||||
const maxContentLength = 10 * 1024 * 1024 // 10MB
|
||||
|
||||
logger.info('downloading content', {
|
||||
contentObjUrl,
|
||||
})
|
||||
|
||||
// download the content as stream and max 10MB
|
||||
const response = await axios.get(contentObjUrl, {
|
||||
responseType: 'stream',
|
||||
maxContentLength,
|
||||
timeout: REQUEST_TIMEOUT,
|
||||
})
|
||||
|
||||
logger.info('uploading to signed url', {
|
||||
uploadSignedUrl,
|
||||
contentType,
|
||||
})
|
||||
|
||||
// upload the stream to the signed url
|
||||
await axios.put(uploadSignedUrl, response.data, {
|
||||
headers: {
|
||||
'Content-Type': contentType,
|
||||
},
|
||||
maxBodyLength: maxContentLength,
|
||||
timeout: REQUEST_TIMEOUT,
|
||||
})
|
||||
}
|
||||
|
||||
const uploadPdf = async (
|
||||
url: string,
|
||||
userId: string,
|
||||
@ -98,7 +66,19 @@ const uploadPdf = async (
|
||||
throw new Error('error while getting upload id and signed url')
|
||||
}
|
||||
|
||||
await uploadToSignedUrl(result.uploadSignedUrl, 'application/pdf', url)
|
||||
logger.info('downloading content', {
|
||||
url,
|
||||
})
|
||||
|
||||
const data = await downloadFromUrl(url, REQUEST_TIMEOUT)
|
||||
|
||||
const uploadSignedUrl = result.uploadSignedUrl
|
||||
const contentType = 'application/pdf'
|
||||
logger.info('uploading to signed url', {
|
||||
uploadSignedUrl,
|
||||
contentType,
|
||||
})
|
||||
await uploadToSignedUrl(uploadSignedUrl, data, contentType, REQUEST_TIMEOUT)
|
||||
|
||||
logger.info('pdf uploaded successfully', {
|
||||
url,
|
||||
|
||||
65
packages/api/src/jobs/upload_content.ts
Normal file
65
packages/api/src/jobs/upload_content.ts
Normal file
@ -0,0 +1,65 @@
|
||||
import { findLibraryItemById } from '../services/library_item'
|
||||
import { logger } from '../utils/logger'
|
||||
import { htmlToHighlightedMarkdown, htmlToMarkdown } from '../utils/parser'
|
||||
import { uploadToBucket } from '../utils/uploads'
|
||||
|
||||
export const UPLOAD_CONTENT_JOB = 'UPLOAD_CONTENT_JOB'
|
||||
|
||||
export type ContentFormat = 'markdown' | 'highlightedMarkdown' | 'original'
|
||||
|
||||
export interface UploadContentJobData {
|
||||
libraryItemId: string
|
||||
userId: string
|
||||
format: ContentFormat
|
||||
filePath: string
|
||||
}
|
||||
|
||||
const convertContent = (content: string, format: ContentFormat): string => {
|
||||
switch (format) {
|
||||
case 'markdown':
|
||||
return htmlToMarkdown(content)
|
||||
case 'highlightedMarkdown':
|
||||
return htmlToHighlightedMarkdown(content)
|
||||
case 'original':
|
||||
return content
|
||||
default:
|
||||
throw new Error('Unsupported format')
|
||||
}
|
||||
}
|
||||
|
||||
const CONTENT_TYPES = {
|
||||
markdown: 'text/markdown',
|
||||
highlightedMarkdown: 'text/markdown',
|
||||
original: 'text/html',
|
||||
}
|
||||
|
||||
export const uploadContentJob = async (data: UploadContentJobData) => {
|
||||
logger.info('Uploading content to bucket', data)
|
||||
|
||||
const { libraryItemId, userId, format, filePath } = data
|
||||
const libraryItem = await findLibraryItemById(libraryItemId, userId, {
|
||||
select: ['originalContent'],
|
||||
})
|
||||
if (!libraryItem) {
|
||||
logger.error('Library item not found', data)
|
||||
throw new Error('Library item not found')
|
||||
}
|
||||
|
||||
if (!libraryItem.originalContent) {
|
||||
logger.error('Original content not found', data)
|
||||
throw new Error('Original content not found')
|
||||
}
|
||||
|
||||
logger.info('Converting content', data)
|
||||
const content = convertContent(libraryItem.originalContent, format)
|
||||
|
||||
console.time('uploadToBucket')
|
||||
logger.info('Uploading content', data)
|
||||
await uploadToBucket(filePath, Buffer.from(content), {
|
||||
contentType: CONTENT_TYPES[format],
|
||||
timeout: 60000, // 1 minute
|
||||
})
|
||||
console.timeEnd('uploadToBucket')
|
||||
|
||||
logger.info('Content uploaded', data)
|
||||
}
|
||||
@ -60,6 +60,7 @@ import {
|
||||
UPDATE_LABELS_JOB,
|
||||
} from './jobs/update_db'
|
||||
import { updatePDFContentJob } from './jobs/update_pdf_content'
|
||||
import { uploadContentJob, UPLOAD_CONTENT_JOB } from './jobs/upload_content'
|
||||
import { redisDataSource } from './redis_data_source'
|
||||
import { CACHED_READING_POSITION_PREFIX } from './services/cached_reading_position'
|
||||
import { getJobPriority } from './utils/createTask'
|
||||
@ -182,6 +183,8 @@ export const createWorker = (connection: ConnectionOptions) =>
|
||||
return forwardEmailJob(job.data)
|
||||
case CREATE_DIGEST_JOB:
|
||||
return createDigest(job.data)
|
||||
case UPLOAD_CONTENT_JOB:
|
||||
return uploadContentJob(job.data)
|
||||
default:
|
||||
logger.warning(`[queue-processor] unhandled job: ${job.name}`)
|
||||
}
|
||||
|
||||
@ -399,6 +399,10 @@ export const getArticleResolver = authorized<
|
||||
'recommendations.recommender',
|
||||
'recommendations_recommender'
|
||||
)
|
||||
.leftJoinAndSelect(
|
||||
'recommendations_recommender.profile',
|
||||
'recommendations_recommender_profile'
|
||||
)
|
||||
.where('libraryItem.user_id = :uid', { uid })
|
||||
|
||||
// We allow the backend to use the ID instead of a slug to fetch the article
|
||||
|
||||
@ -82,7 +82,22 @@ export const articleSavingRequestResolver = authorized<
|
||||
|
||||
let libraryItem: LibraryItem | null = null
|
||||
if (id) {
|
||||
libraryItem = await findLibraryItemById(id, uid)
|
||||
libraryItem = await findLibraryItemById(id, uid, {
|
||||
select: [
|
||||
'id',
|
||||
'state',
|
||||
'originalUrl',
|
||||
'slug',
|
||||
'title',
|
||||
'author',
|
||||
'createdAt',
|
||||
'updatedAt',
|
||||
'savedAt',
|
||||
],
|
||||
relations: {
|
||||
user: true,
|
||||
},
|
||||
})
|
||||
} else if (url) {
|
||||
libraryItem = await findLibraryItemByUrl(cleanUrl(url), uid)
|
||||
}
|
||||
|
||||
@ -141,7 +141,14 @@ export const recommendResolver = authorized<
|
||||
MutationRecommendArgs
|
||||
>(async (_, { input }, { uid, log, signToken }) => {
|
||||
try {
|
||||
const item = await findLibraryItemById(input.pageId, uid)
|
||||
const item = await findLibraryItemById(input.pageId, uid, {
|
||||
select: ['id'],
|
||||
relations: {
|
||||
highlights: {
|
||||
user: true,
|
||||
},
|
||||
},
|
||||
})
|
||||
if (!item) {
|
||||
return {
|
||||
errorCodes: [RecommendErrorCode.NotFound],
|
||||
@ -259,7 +266,9 @@ export const recommendHighlightsResolver = authorized<
|
||||
}
|
||||
}
|
||||
|
||||
const item = await findLibraryItemById(input.pageId, uid)
|
||||
const item = await findLibraryItemById(input.pageId, uid, {
|
||||
select: ['id'],
|
||||
})
|
||||
if (!item) {
|
||||
return {
|
||||
errorCodes: [RecommendHighlightsErrorCode.NotFound],
|
||||
|
||||
@ -94,7 +94,9 @@ export function articleRouter() {
|
||||
})
|
||||
|
||||
try {
|
||||
const item = await findLibraryItemById(articleId, uid)
|
||||
const item = await findLibraryItemById(articleId, uid, {
|
||||
select: ['title', 'readableContent', 'itemLanguage'],
|
||||
})
|
||||
if (!item) {
|
||||
return res.status(404).send('Page not found')
|
||||
}
|
||||
|
||||
125
packages/api/src/routers/content_router.ts
Normal file
125
packages/api/src/routers/content_router.ts
Normal file
@ -0,0 +1,125 @@
|
||||
import cors from 'cors'
|
||||
import express, { Router } from 'express'
|
||||
import { ContentFormat, UploadContentJobData } from '../jobs/upload_content'
|
||||
import { findLibraryItemsByIds } from '../services/library_item'
|
||||
import { getClaimsByToken, getTokenByRequest } from '../utils/auth'
|
||||
import { corsConfig } from '../utils/corsConfig'
|
||||
import { enqueueBulkUploadContentJob } from '../utils/createTask'
|
||||
import { logger } from '../utils/logger'
|
||||
import { generateDownloadSignedUrl, isFileExists } from '../utils/uploads'
|
||||
|
||||
export function contentRouter() {
|
||||
const router = Router()
|
||||
|
||||
interface GetContentRequest {
|
||||
libraryItemIds: string[]
|
||||
format: ContentFormat
|
||||
}
|
||||
|
||||
const isContentRequest = (data: any): data is GetContentRequest => {
|
||||
return (
|
||||
typeof data === 'object' &&
|
||||
data !== null &&
|
||||
'libraryItemIds' in data &&
|
||||
'format' in data
|
||||
)
|
||||
}
|
||||
|
||||
router.options('/', cors<express.Request>({ ...corsConfig, maxAge: 600 }))
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-misused-promises
|
||||
router.post('/', cors<express.Request>(corsConfig), async (req, res) => {
|
||||
if (!isContentRequest(req.body)) {
|
||||
logger.error('Bad request')
|
||||
return res.status(400).send({ errorCode: 'BAD_REQUEST' })
|
||||
}
|
||||
|
||||
const { libraryItemIds, format } = req.body
|
||||
if (
|
||||
!Array.isArray(libraryItemIds) ||
|
||||
libraryItemIds.length === 0 ||
|
||||
libraryItemIds.length > 50
|
||||
) {
|
||||
logger.error('Library item ids are invalid')
|
||||
return res.status(400).send({ errorCode: 'BAD_REQUEST' })
|
||||
}
|
||||
|
||||
const token = getTokenByRequest(req)
|
||||
// get claims from token
|
||||
const claims = await getClaimsByToken(token)
|
||||
if (!claims) {
|
||||
logger.error('Token not found')
|
||||
return res.status(401).send({
|
||||
error: 'UNAUTHORIZED',
|
||||
})
|
||||
}
|
||||
|
||||
// get user by uid from claims
|
||||
const userId = claims.uid
|
||||
|
||||
const libraryItems = await findLibraryItemsByIds(libraryItemIds, userId, {
|
||||
select: ['id', 'updatedAt'],
|
||||
})
|
||||
if (libraryItems.length === 0) {
|
||||
logger.error('Library items not found')
|
||||
return res.status(404).send({ errorCode: 'NOT_FOUND' })
|
||||
}
|
||||
|
||||
// generate signed url for each library item
|
||||
const data = await Promise.all(
|
||||
libraryItems.map(async (libraryItem) => {
|
||||
const filePath = `content/${userId}/${
|
||||
libraryItem.id
|
||||
}.${libraryItem.updatedAt.getTime()}.${format}`
|
||||
|
||||
try {
|
||||
const downloadUrl = await generateDownloadSignedUrl(filePath, {
|
||||
expires: Date.now() + 60 * 60 * 1000, // 1 hour
|
||||
})
|
||||
|
||||
// check if file is already uploaded
|
||||
const exists = await isFileExists(filePath)
|
||||
if (exists) {
|
||||
logger.info('File already exists', filePath)
|
||||
}
|
||||
|
||||
return {
|
||||
libraryItemId: libraryItem.id,
|
||||
userId,
|
||||
filePath,
|
||||
downloadUrl,
|
||||
format,
|
||||
exists,
|
||||
}
|
||||
} catch (error) {
|
||||
logger.error('Error while generating signed url', error)
|
||||
return {
|
||||
libraryItemId: libraryItem.id,
|
||||
error: 'Failed to generate download url',
|
||||
}
|
||||
}
|
||||
})
|
||||
)
|
||||
logger.info('Signed urls generated', data)
|
||||
|
||||
// skip uploading if there is an error or file already exists
|
||||
const uploadData = data.filter(
|
||||
(d) => !('error' in d) && d.downloadUrl !== undefined && !d.exists
|
||||
) as UploadContentJobData[]
|
||||
|
||||
if (uploadData.length > 0) {
|
||||
await enqueueBulkUploadContentJob(uploadData)
|
||||
logger.info('Bulk upload content job enqueued', uploadData)
|
||||
}
|
||||
|
||||
res.send({
|
||||
data: data.map((d) => ({
|
||||
libraryItemId: d.libraryItemId,
|
||||
downloadUrl: d.downloadUrl,
|
||||
error: d.error,
|
||||
})),
|
||||
})
|
||||
})
|
||||
|
||||
return router
|
||||
}
|
||||
@ -146,7 +146,11 @@ export function pageRouter() {
|
||||
return res.status(400).send({ errorCode: 'BAD_DATA' })
|
||||
}
|
||||
|
||||
const item = await findLibraryItemById(itemId, claims.uid)
|
||||
const item = await findLibraryItemById(itemId, claims.uid, {
|
||||
relations: {
|
||||
highlights: true,
|
||||
},
|
||||
})
|
||||
if (!item) {
|
||||
return res.status(404).send({ errorCode: 'NOT_FOUND' })
|
||||
}
|
||||
|
||||
@ -20,6 +20,7 @@ import { aiSummariesRouter } from './routers/ai_summary_router'
|
||||
import { articleRouter } from './routers/article_router'
|
||||
import { authRouter } from './routers/auth/auth_router'
|
||||
import { mobileAuthRouter } from './routers/auth/mobile/mobile_auth_router'
|
||||
import { contentRouter } from './routers/content_router'
|
||||
import { digestRouter } from './routers/digest_router'
|
||||
import { explainRouter } from './routers/explain_router'
|
||||
import { integrationRouter } from './routers/integration_router'
|
||||
@ -101,6 +102,8 @@ export const createApp = (): Express => {
|
||||
app.use('/api/integration', integrationRouter())
|
||||
app.use('/api/tasks', taskRouter())
|
||||
app.use('/api/digest', digestRouter())
|
||||
app.use('/api/content', contentRouter())
|
||||
|
||||
app.use('/svc/pubsub/content', contentServiceRouter())
|
||||
app.use('/svc/pubsub/links', linkServiceRouter())
|
||||
app.use('/svc/pubsub/newsletters', newsletterServiceRouter())
|
||||
@ -165,10 +168,12 @@ const main = async (): Promise<void> => {
|
||||
await apollo.start()
|
||||
apollo.applyMiddleware({ app, path: '/api/graphql', cors: corsConfig })
|
||||
|
||||
const mwLogger = loggers.get('express', { levels: config.syslog.levels })
|
||||
const transport = buildLoggerTransport('express')
|
||||
const mw = await lw.express.makeMiddleware(mwLogger, transport)
|
||||
app.use(mw)
|
||||
if (!env.dev.isLocal) {
|
||||
const mwLogger = loggers.get('express', { levels: config.syslog.levels })
|
||||
const transport = buildLoggerTransport('express')
|
||||
const mw = await lw.express.makeMiddleware(mwLogger, transport)
|
||||
app.use(mw)
|
||||
}
|
||||
|
||||
const listener = httpServer.listen({ port: PORT }, async () => {
|
||||
const logger = buildLogger('app.dispatch')
|
||||
|
||||
@ -764,10 +764,18 @@ export const findRecentLibraryItems = async (
|
||||
)
|
||||
}
|
||||
|
||||
export const findLibraryItemsByIds = async (ids: string[], userId: string) => {
|
||||
const selectColumns = getColumns(libraryItemRepository)
|
||||
.filter((column) => column !== 'originalContent')
|
||||
.map((column) => `library_item.${column}`)
|
||||
export const findLibraryItemsByIds = async (
|
||||
ids: string[],
|
||||
userId: string,
|
||||
options?: {
|
||||
select?: (keyof LibraryItem)[]
|
||||
}
|
||||
) => {
|
||||
const selectColumns =
|
||||
options?.select?.map((column) => `library_item.${column}`) ||
|
||||
getColumns(libraryItemRepository)
|
||||
.filter((column) => column !== 'originalContent')
|
||||
.map((column) => `library_item.${column}`)
|
||||
return authTrx(
|
||||
async (tx) =>
|
||||
tx
|
||||
@ -782,17 +790,27 @@ export const findLibraryItemsByIds = async (ids: string[], userId: string) => {
|
||||
|
||||
export const findLibraryItemById = async (
|
||||
id: string,
|
||||
userId: string
|
||||
userId: string,
|
||||
options?: {
|
||||
select?: (keyof LibraryItem)[]
|
||||
relations?: {
|
||||
user?: boolean
|
||||
labels?: boolean
|
||||
highlights?:
|
||||
| {
|
||||
user?: boolean
|
||||
}
|
||||
| boolean
|
||||
}
|
||||
}
|
||||
): Promise<LibraryItem | null> => {
|
||||
return authTrx(
|
||||
async (tx) =>
|
||||
tx
|
||||
.createQueryBuilder(LibraryItem, 'library_item')
|
||||
.leftJoinAndSelect('library_item.labels', 'labels')
|
||||
.leftJoinAndSelect('library_item.highlights', 'highlights')
|
||||
.leftJoinAndSelect('highlights.user', 'user')
|
||||
.where('library_item.id = :id', { id })
|
||||
.getOne(),
|
||||
tx.withRepository(libraryItemRepository).findOne({
|
||||
select: options?.select,
|
||||
where: { id },
|
||||
relations: options?.relations,
|
||||
}),
|
||||
undefined,
|
||||
userId
|
||||
)
|
||||
|
||||
@ -11,7 +11,9 @@ export const saveContentDisplayReport = async (
|
||||
uid: string,
|
||||
input: ReportItemInput
|
||||
): Promise<boolean> => {
|
||||
const item = await findLibraryItemById(input.pageId, uid)
|
||||
const item = await findLibraryItemById(input.pageId, uid, {
|
||||
select: ['id', 'readableContent', 'originalContent', 'originalUrl'],
|
||||
})
|
||||
if (!item) {
|
||||
logger.info('unable to submit report, item not found', input)
|
||||
return false
|
||||
@ -53,7 +55,9 @@ export const saveAbuseReport = async (
|
||||
uid: string,
|
||||
input: ReportItemInput
|
||||
): Promise<boolean> => {
|
||||
const item = await findLibraryItemById(input.pageId, uid)
|
||||
const item = await findLibraryItemById(input.pageId, uid, {
|
||||
select: ['id'],
|
||||
})
|
||||
if (!item) {
|
||||
logger.info('unable to submit report, item not found', input)
|
||||
return false
|
||||
|
||||
@ -53,6 +53,10 @@ import {
|
||||
UPDATE_HIGHLIGHT_JOB,
|
||||
UPDATE_LABELS_JOB,
|
||||
} from '../jobs/update_db'
|
||||
import {
|
||||
UploadContentJobData,
|
||||
UPLOAD_CONTENT_JOB,
|
||||
} from '../jobs/upload_content'
|
||||
import { getBackendQueue, JOB_VERSION } from '../queue-processor'
|
||||
import { redisDataSource } from '../redis_data_source'
|
||||
import { writeDigest } from '../services/digest'
|
||||
@ -89,8 +93,9 @@ export const getJobPriority = (jobName: string): number => {
|
||||
return 5
|
||||
case BULK_ACTION_JOB_NAME:
|
||||
case `${REFRESH_FEED_JOB_NAME}_high`:
|
||||
return 10
|
||||
case PROCESS_YOUTUBE_TRANSCRIPT_JOB_NAME:
|
||||
case UPLOAD_CONTENT_JOB:
|
||||
return 10
|
||||
case `${REFRESH_FEED_JOB_NAME}_low`:
|
||||
case EXPORT_ITEM_JOB_NAME:
|
||||
case CREATE_DIGEST_JOB:
|
||||
@ -953,4 +958,24 @@ export const enqueueCreateDigest = async (
|
||||
}
|
||||
}
|
||||
|
||||
export const enqueueBulkUploadContentJob = async (
|
||||
data: UploadContentJobData[]
|
||||
) => {
|
||||
const queue = await getBackendQueue()
|
||||
if (!queue) {
|
||||
return ''
|
||||
}
|
||||
|
||||
const jobs = data.map((d) => ({
|
||||
name: UPLOAD_CONTENT_JOB,
|
||||
data: d,
|
||||
opts: {
|
||||
attempts: 3,
|
||||
priority: getJobPriority(UPLOAD_CONTENT_JOB),
|
||||
},
|
||||
}))
|
||||
|
||||
return queue.addBulk(jobs)
|
||||
}
|
||||
|
||||
export default createHttpTaskWithToken
|
||||
|
||||
@ -1,6 +1,7 @@
|
||||
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
|
||||
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
|
||||
import { File, GetSignedUrlConfig, Storage } from '@google-cloud/storage'
|
||||
import axios from 'axios'
|
||||
import { ContentReaderType } from '../entity/library_item'
|
||||
import { env } from '../env'
|
||||
import { PageType } from '../generated/graphql'
|
||||
@ -33,6 +34,7 @@ const storage = env.fileUpload?.gcsUploadSAKeyFilePath
|
||||
? new Storage({ keyFilename: env.fileUpload.gcsUploadSAKeyFilePath })
|
||||
: new Storage()
|
||||
const bucketName = env.fileUpload.gcsUploadBucket
|
||||
const maxContentLength = 10 * 1024 * 1024 // 10MB
|
||||
|
||||
export const countOfFilesWithPrefix = async (prefix: string) => {
|
||||
const [files] = await storage.bucket(bucketName).getFiles({ prefix })
|
||||
@ -62,12 +64,16 @@ export const generateUploadSignedUrl = async (
|
||||
}
|
||||
|
||||
export const generateDownloadSignedUrl = async (
|
||||
filePathName: string
|
||||
filePathName: string,
|
||||
config?: {
|
||||
expires?: number
|
||||
}
|
||||
): Promise<string> => {
|
||||
const options: GetSignedUrlConfig = {
|
||||
version: 'v4',
|
||||
action: 'read',
|
||||
expires: Date.now() + 240 * 60 * 1000, // four hours
|
||||
...config,
|
||||
}
|
||||
const [url] = await storage
|
||||
.bucket(bucketName)
|
||||
@ -100,15 +106,50 @@ export const generateUploadFilePathName = (
|
||||
export const uploadToBucket = async (
|
||||
filePath: string,
|
||||
data: Buffer,
|
||||
options?: { contentType?: string; public?: boolean },
|
||||
options?: { contentType?: string; public?: boolean; timeout?: number },
|
||||
selectedBucket?: string
|
||||
): Promise<void> => {
|
||||
await storage
|
||||
.bucket(selectedBucket || bucketName)
|
||||
.file(filePath)
|
||||
.save(data, { ...options, timeout: 30000 })
|
||||
.save(data, { timeout: 30000, ...options }) // default timeout 30s
|
||||
}
|
||||
|
||||
export const createGCSFile = (filename: string): File => {
|
||||
return storage.bucket(bucketName).file(filename)
|
||||
}
|
||||
|
||||
export const downloadFromUrl = async (
|
||||
contentObjUrl: string,
|
||||
timeout?: number
|
||||
) => {
|
||||
// download the content as stream and max 10MB
|
||||
const response = await axios.get<Buffer>(contentObjUrl, {
|
||||
responseType: 'stream',
|
||||
maxContentLength,
|
||||
timeout,
|
||||
})
|
||||
|
||||
return response.data
|
||||
}
|
||||
|
||||
export const uploadToSignedUrl = async (
|
||||
uploadSignedUrl: string,
|
||||
data: Buffer,
|
||||
contentType: string,
|
||||
timeout?: number
|
||||
) => {
|
||||
// upload the stream to the signed url
|
||||
await axios.put(uploadSignedUrl, data, {
|
||||
headers: {
|
||||
'Content-Type': contentType,
|
||||
},
|
||||
maxBodyLength: maxContentLength,
|
||||
timeout,
|
||||
})
|
||||
}
|
||||
|
||||
export const isFileExists = async (filePath: string): Promise<boolean> => {
|
||||
const [exists] = await storage.bucket(bucketName).file(filePath).exists()
|
||||
return exists
|
||||
}
|
||||
|
||||
@ -2345,7 +2345,11 @@ describe('Article API', () => {
|
||||
authToken
|
||||
).expect(200)
|
||||
|
||||
const item = await findLibraryItemById(articleId, user.id)
|
||||
const item = await findLibraryItemById(articleId, user.id, {
|
||||
relations: {
|
||||
labels: true,
|
||||
},
|
||||
})
|
||||
expect(item?.labels?.map((l) => l.name)).to.eql(['Favorites'])
|
||||
})
|
||||
})
|
||||
|
||||
@ -293,7 +293,11 @@ describe('Labels API', () => {
|
||||
labelId,
|
||||
}).expect(200)
|
||||
|
||||
const updatedItem = await findLibraryItemById(item.id, user.id)
|
||||
const updatedItem = await findLibraryItemById(item.id, user.id, {
|
||||
relations: {
|
||||
labels: true,
|
||||
},
|
||||
})
|
||||
expect(updatedItem?.labels).not.deep.include(toDeleteLabel)
|
||||
})
|
||||
})
|
||||
@ -545,7 +549,11 @@ describe('Labels API', () => {
|
||||
it('should update the item with the label', async () => {
|
||||
await graphqlRequest(query, authToken).expect(200)
|
||||
|
||||
const updatedItem = await findLibraryItemById(item.id, user.id)
|
||||
const updatedItem = await findLibraryItemById(item.id, user.id, {
|
||||
relations: {
|
||||
labels: true,
|
||||
},
|
||||
})
|
||||
const updatedLabel = updatedItem?.labels?.filter(
|
||||
(l) => l.id === labelId
|
||||
)?.[0]
|
||||
|
||||
@ -11,6 +11,7 @@
|
||||
"dotenv": "^8.2.0",
|
||||
"express": "^4.17.1",
|
||||
"ioredis": "^5.3.2",
|
||||
"posthog-node": "^3.6.3",
|
||||
"@google-cloud/functions-framework": "^3.0.0",
|
||||
"@omnivore/puppeteer-parse": "^1.0.0",
|
||||
"@sentry/serverless": "^7.77.0"
|
||||
|
||||
41
packages/content-fetch/src/analytics.ts
Normal file
41
packages/content-fetch/src/analytics.ts
Normal file
@ -0,0 +1,41 @@
|
||||
import { PostHog } from 'posthog-node'
|
||||
|
||||
interface AnalyticEvent {
|
||||
distinctId: string
|
||||
event: string
|
||||
properties?: Record<string | number, any>
|
||||
}
|
||||
|
||||
interface AnalyticClient {
|
||||
capture: (event: AnalyticEvent) => void
|
||||
shutdownAsync?: () => Promise<void>
|
||||
}
|
||||
|
||||
class PostHogClient implements AnalyticClient {
|
||||
private client: PostHog
|
||||
|
||||
constructor(apiKey: string) {
|
||||
this.client = new PostHog(apiKey)
|
||||
}
|
||||
|
||||
capture({ distinctId, event, properties }: AnalyticEvent) {
|
||||
// get client from request context
|
||||
|
||||
this.client.capture({
|
||||
distinctId,
|
||||
event,
|
||||
properties: {
|
||||
...properties,
|
||||
env: process.env.API_ENV || 'demo',
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
async shutdownAsync() {
|
||||
return this.client.shutdownAsync()
|
||||
}
|
||||
}
|
||||
|
||||
export const analytics = new PostHogClient(
|
||||
process.env.POSTHOG_API_KEY || 'test'
|
||||
)
|
||||
@ -1,5 +1,6 @@
|
||||
import { fetchContent } from '@omnivore/puppeteer-parse'
|
||||
import { RequestHandler } from 'express'
|
||||
import { analytics } from './analytics'
|
||||
import { queueSavePageJob } from './job'
|
||||
import { redisDataSource } from './redis_data_source'
|
||||
|
||||
@ -147,11 +148,33 @@ export const contentFetchRequestHandler: RequestHandler = async (req, res) => {
|
||||
logRecord.error = 'unknown error'
|
||||
}
|
||||
|
||||
// capture error event
|
||||
users.forEach((user) => {
|
||||
analytics.capture({
|
||||
distinctId: user.id,
|
||||
event: 'content_fetch_failure',
|
||||
properties: {
|
||||
url,
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
return res.sendStatus(500)
|
||||
} finally {
|
||||
logRecord.totalTime = Date.now() - functionStartTime
|
||||
console.log(`parse-page result`, logRecord)
|
||||
}
|
||||
|
||||
// capture success event
|
||||
users.forEach((user) => {
|
||||
analytics.capture({
|
||||
distinctId: user.id,
|
||||
event: 'content_fetch_success',
|
||||
properties: {
|
||||
url,
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
res.sendStatus(200)
|
||||
}
|
||||
|
||||
@ -36,7 +36,6 @@
|
||||
"linkedom": "^0.14.16",
|
||||
"lodash": "^4.17.21",
|
||||
"luxon": "^3.0.4",
|
||||
"puppeteer-core": "^20.9.0",
|
||||
"underscore": "^1.13.6",
|
||||
"uuid": "^9.0.0"
|
||||
},
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
import addressparser from 'addressparser'
|
||||
import axios from 'axios'
|
||||
import { parseHTML } from 'linkedom'
|
||||
import { Browser } from 'puppeteer-core'
|
||||
import { v4 as uuid } from 'uuid'
|
||||
|
||||
interface Unsubscribe {
|
||||
@ -61,7 +60,7 @@ export abstract class ContentHandler {
|
||||
return false
|
||||
}
|
||||
|
||||
async preHandle(url: string, browser?: Browser): Promise<PreHandleResult> {
|
||||
async preHandle(url: string): Promise<PreHandleResult> {
|
||||
return Promise.resolve({ url })
|
||||
}
|
||||
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
import { parseHTML } from 'linkedom'
|
||||
import { Browser } from 'puppeteer-core'
|
||||
import {
|
||||
ContentHandler,
|
||||
NewsletterInput,
|
||||
@ -104,8 +103,7 @@ const newsletterHandlers: ContentHandler[] = [
|
||||
]
|
||||
|
||||
export const preHandleContent = async (
|
||||
url: string,
|
||||
browser: Browser
|
||||
url: string
|
||||
): Promise<PreHandleResult | undefined> => {
|
||||
// Before we run the regular handlers we check to see if we need tp
|
||||
// pre-resolve the URL. TODO: This should probably happen recursively,
|
||||
@ -129,7 +127,7 @@ export const preHandleContent = async (
|
||||
for (const handler of contentHandlers) {
|
||||
if (handler.shouldPreHandle(url)) {
|
||||
console.log('preHandleContent', handler.name, url)
|
||||
return handler.preHandle(url, browser)
|
||||
return handler.preHandle(url)
|
||||
}
|
||||
}
|
||||
return undefined
|
||||
|
||||
@ -1,7 +1,6 @@
|
||||
import axios from 'axios'
|
||||
import { parseHTML } from 'linkedom'
|
||||
import { DateTime } from 'luxon'
|
||||
import { Browser, BrowserContext } from 'puppeteer-core'
|
||||
import { ContentHandler, PreHandleResult } from '../content-handler'
|
||||
|
||||
interface TweetIncludes {
|
||||
@ -190,126 +189,6 @@ const getTweetsFromResponse = (response: Tweets): Tweet[] => {
|
||||
return tweets
|
||||
}
|
||||
|
||||
const getOldTweets = async (
|
||||
browser: Browser,
|
||||
conversationId: string,
|
||||
username: string
|
||||
): Promise<Tweet[]> => {
|
||||
const tweetIds = await getTweetIds(browser, conversationId, username)
|
||||
if (tweetIds.length === 0) {
|
||||
return []
|
||||
}
|
||||
const response = await getTweetsByIds(tweetIds)
|
||||
return getTweetsFromResponse(response)
|
||||
}
|
||||
|
||||
const getRecentTweets = async (conversationId: string): Promise<Tweet[]> => {
|
||||
const thread = await getTweetThread(conversationId)
|
||||
if (thread.meta.result_count === 0) {
|
||||
return []
|
||||
}
|
||||
// tweets are in reverse chronological order in the thread
|
||||
return getTweetsFromResponse(thread).reverse()
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for `ms` amount of milliseconds
|
||||
* @param {number} ms
|
||||
*/
|
||||
const waitFor = (ms: number) =>
|
||||
new Promise((resolve) => setTimeout(resolve, ms))
|
||||
|
||||
/**
|
||||
* Get tweets(even older than 7 days) using puppeteer
|
||||
* @param browser
|
||||
* @param {string} tweetId
|
||||
* @param {string} author
|
||||
*/
|
||||
const getTweetIds = async (
|
||||
browser: Browser,
|
||||
tweetId: string,
|
||||
author: string
|
||||
): Promise<string[]> => {
|
||||
const pageURL = `https://twitter.com/${author}/status/${tweetId}`
|
||||
|
||||
let context: BrowserContext | undefined
|
||||
try {
|
||||
context = await browser.createIncognitoBrowserContext()
|
||||
const page = await context.newPage()
|
||||
|
||||
// Modify this variable to control the size of viewport
|
||||
const deviceScaleFactor = 0.2
|
||||
const height = Math.floor(2000 / deviceScaleFactor)
|
||||
const width = Math.floor(1700 / deviceScaleFactor)
|
||||
await page.setViewport({ width, height, deviceScaleFactor })
|
||||
|
||||
await page.goto(pageURL, {
|
||||
waitUntil: 'networkidle0',
|
||||
timeout: 60000, // 60 seconds
|
||||
})
|
||||
|
||||
return await page.evaluate(async (author) => {
|
||||
/**
|
||||
* Wait for `ms` amount of milliseconds
|
||||
* @param {number} ms
|
||||
*/
|
||||
const waitFor = (ms: number) =>
|
||||
new Promise((resolve) => setTimeout(resolve, ms))
|
||||
|
||||
const ids = []
|
||||
|
||||
// Find the first Show thread button and click it
|
||||
const showRepliesButton = Array.from(
|
||||
document.querySelectorAll('div[dir]')
|
||||
)
|
||||
.filter(
|
||||
(node) => node.children[0] && node.children[0].tagName === 'SPAN'
|
||||
)
|
||||
.find((node) => node.children[0].innerHTML === 'Show replies')
|
||||
|
||||
if (showRepliesButton) {
|
||||
;(showRepliesButton as HTMLElement).click()
|
||||
|
||||
await waitFor(2000)
|
||||
}
|
||||
|
||||
const timeNodes = Array.from(document.querySelectorAll('time'))
|
||||
|
||||
for (const timeNode of timeNodes) {
|
||||
/** @type {HTMLAnchorElement | HTMLSpanElement} */
|
||||
const timeContainerAnchor: HTMLAnchorElement | HTMLSpanElement | null =
|
||||
timeNode.parentElement
|
||||
if (!timeContainerAnchor) continue
|
||||
|
||||
if (timeContainerAnchor.tagName === 'SPAN') continue
|
||||
|
||||
const href = timeContainerAnchor.getAttribute('href')
|
||||
if (!href) continue
|
||||
|
||||
// Get the tweet id and username from the href: https://twitter.com/username/status/1234567890
|
||||
const match = href.match(/\/([^/]+)\/status\/(\d+)/)
|
||||
if (!match) continue
|
||||
|
||||
const id = match[2]
|
||||
const username = match[1]
|
||||
|
||||
// skip non-author replies
|
||||
username === author && ids.push(id)
|
||||
}
|
||||
|
||||
return ids
|
||||
}, author)
|
||||
} catch (error) {
|
||||
console.error('Error getting tweets', error)
|
||||
|
||||
return []
|
||||
} finally {
|
||||
if (context) {
|
||||
await context.close()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export class TwitterHandler extends ContentHandler {
|
||||
constructor() {
|
||||
super()
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
"crypto": "^1.0.1",
|
||||
"dompurify": "^2.4.1",
|
||||
"linkedom": "^0.14.9",
|
||||
"puppeteer-core": "^20.9.0",
|
||||
"puppeteer-core": "^22.8.0",
|
||||
"puppeteer-extra": "^3.3.4",
|
||||
"puppeteer-extra-plugin-adblocker": "^2.13.5",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.1",
|
||||
|
||||
@ -9,7 +9,6 @@ import { Browser, BrowserContext, Page, Protocol } from 'puppeteer-core'
|
||||
import puppeteer from 'puppeteer-extra'
|
||||
import AdblockerPlugin from 'puppeteer-extra-plugin-adblocker'
|
||||
import StealthPlugin from 'puppeteer-extra-plugin-stealth'
|
||||
import Url from 'url'
|
||||
|
||||
// Add stealth plugin to hide puppeteer usage
|
||||
puppeteer.use(StealthPlugin())
|
||||
@ -96,7 +95,8 @@ const enableJavascriptForUrl = (url: string) => {
|
||||
// launch Puppeteer
|
||||
const getBrowserPromise = (async () => {
|
||||
console.log('starting puppeteer browser')
|
||||
return (await puppeteer.launch({
|
||||
|
||||
const browser = (await puppeteer.launch({
|
||||
args: [
|
||||
'--allow-running-insecure-content',
|
||||
'--autoplay-policy=user-gesture-required',
|
||||
@ -119,7 +119,7 @@ const getBrowserPromise = (async () => {
|
||||
'--no-zygote',
|
||||
'--window-size=1920,1080',
|
||||
'--disable-extensions',
|
||||
].filter((item) => !!item),
|
||||
],
|
||||
defaultViewport: {
|
||||
deviceScaleFactor: 1,
|
||||
hasTouch: false,
|
||||
@ -131,7 +131,12 @@ const getBrowserPromise = (async () => {
|
||||
executablePath: process.env.CHROMIUM_PATH,
|
||||
headless: !!process.env.LAUNCH_HEADLESS,
|
||||
timeout: 120000, // 2 minutes
|
||||
dumpio: true, // show console logs in the terminal
|
||||
})) as Browser
|
||||
|
||||
console.log('browser started')
|
||||
|
||||
return browser
|
||||
})()
|
||||
|
||||
export const fetchContent = async (
|
||||
@ -162,8 +167,7 @@ export const fetchContent = async (
|
||||
|
||||
// pre handle url with custom handlers
|
||||
try {
|
||||
const browser = await getBrowserPromise
|
||||
const result = await preHandleContent(url, browser)
|
||||
const result = await preHandleContent(url)
|
||||
if (result && result.url) {
|
||||
validateUrlString(url)
|
||||
url = result.url
|
||||
@ -220,6 +224,8 @@ export const fetchContent = async (
|
||||
}
|
||||
} catch (e) {
|
||||
console.error(`Error while retrieving page ${url}`, e)
|
||||
const browser = await getBrowserPromise
|
||||
console.log(browser.debugInfo.pendingProtocolErrors)
|
||||
|
||||
// fallback to scrapingbee for non pdf content
|
||||
if (url && contentType !== 'application/pdf') {
|
||||
@ -239,7 +245,9 @@ export const fetchContent = async (
|
||||
} finally {
|
||||
// close browser context if it was opened
|
||||
if (context) {
|
||||
console.info('closing context...', url)
|
||||
await context.close()
|
||||
console.info('context closed', url)
|
||||
}
|
||||
|
||||
console.info(`content-fetch result`, logRecord)
|
||||
@ -289,7 +297,7 @@ function getUrl(urlStr: string) {
|
||||
|
||||
validateUrlString(url)
|
||||
|
||||
const parsed = Url.parse(url)
|
||||
const parsed = new URL(url)
|
||||
return parsed.href
|
||||
}
|
||||
|
||||
@ -308,119 +316,122 @@ async function retrievePage(
|
||||
browserOpened: Date.now() - functionStartTime,
|
||||
}
|
||||
|
||||
const context = await browser.createIncognitoBrowserContext()
|
||||
const page = await context.newPage()
|
||||
|
||||
if (!enableJavascriptForUrl(url)) {
|
||||
await page.setJavaScriptEnabled(false)
|
||||
}
|
||||
await page.setUserAgent(userAgentForUrl(url))
|
||||
|
||||
// set locale for the page
|
||||
if (locale) {
|
||||
await page.setExtraHTTPHeaders({ 'Accept-Language': locale })
|
||||
}
|
||||
|
||||
// set timezone for the page
|
||||
if (timezone) {
|
||||
await page.emulateTimezone(timezone)
|
||||
}
|
||||
|
||||
const client = await page.target().createCDPSession()
|
||||
|
||||
const downloadPath = path.resolve('./download_dir/')
|
||||
await client.send('Page.setDownloadBehavior', {
|
||||
behavior: 'allow',
|
||||
downloadPath,
|
||||
})
|
||||
|
||||
// intercept request when response headers was received
|
||||
await client.send('Network.setRequestInterception', {
|
||||
patterns: [
|
||||
{
|
||||
urlPattern: '*',
|
||||
resourceType: 'Document',
|
||||
interceptionStage: 'HeadersReceived',
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
client.on(
|
||||
'Network.requestIntercepted',
|
||||
(e: Protocol.Network.RequestInterceptedEvent) => {
|
||||
;(async () => {
|
||||
const headers = e.responseHeaders || {}
|
||||
|
||||
const [contentType] = (
|
||||
headers['content-type'] ||
|
||||
headers['Content-Type'] ||
|
||||
''
|
||||
)
|
||||
.toLowerCase()
|
||||
.split(';')
|
||||
const obj: Protocol.Network.ContinueInterceptedRequestRequest = {
|
||||
interceptionId: e.interceptionId,
|
||||
}
|
||||
|
||||
if (
|
||||
e.responseStatusCode &&
|
||||
e.responseStatusCode >= 200 &&
|
||||
e.responseStatusCode < 300
|
||||
) {
|
||||
// We only check content-type on success responses
|
||||
// as it doesn't matter what the content type is for things
|
||||
// like redirects
|
||||
if (contentType && !ALLOWED_CONTENT_TYPES.includes(contentType)) {
|
||||
obj['errorReason'] = 'BlockedByClient'
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await client.send('Network.continueInterceptedRequest', obj)
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
})()
|
||||
}
|
||||
)
|
||||
|
||||
/*
|
||||
* Disallow MathJax from running in Puppeteer and modifying the document,
|
||||
* we shall instead run it in our frontend application to transform any
|
||||
* mathjax content when present.
|
||||
*/
|
||||
await page.setRequestInterception(true)
|
||||
let requestCount = 0
|
||||
page.on('request', (request) => {
|
||||
;(async () => {
|
||||
if (request.resourceType() === 'font') {
|
||||
// Disallow fonts from loading
|
||||
return request.abort()
|
||||
}
|
||||
if (requestCount++ > 100) {
|
||||
return request.abort()
|
||||
}
|
||||
if (
|
||||
request.resourceType() === 'script' &&
|
||||
request.url().toLowerCase().indexOf('mathjax') > -1
|
||||
) {
|
||||
return request.abort()
|
||||
}
|
||||
|
||||
await request.continue()
|
||||
})()
|
||||
})
|
||||
// create a new incognito browser context
|
||||
const context = await browser.createBrowserContext()
|
||||
|
||||
// Puppeteer fails during download of PDf files,
|
||||
// so record the failure and use those items
|
||||
let lastPdfUrl = undefined
|
||||
page.on('response', (response) => {
|
||||
if (response.headers()['content-type'] === 'application/pdf') {
|
||||
lastPdfUrl = response.url()
|
||||
}
|
||||
})
|
||||
|
||||
let lastPdfUrl
|
||||
let page
|
||||
try {
|
||||
page = await context.newPage()
|
||||
|
||||
if (!enableJavascriptForUrl(url)) {
|
||||
await page.setJavaScriptEnabled(false)
|
||||
}
|
||||
await page.setUserAgent(userAgentForUrl(url))
|
||||
|
||||
// set locale for the page
|
||||
if (locale) {
|
||||
await page.setExtraHTTPHeaders({ 'Accept-Language': locale })
|
||||
}
|
||||
|
||||
// set timezone for the page
|
||||
if (timezone) {
|
||||
await page.emulateTimezone(timezone)
|
||||
}
|
||||
|
||||
const client = await page.createCDPSession()
|
||||
|
||||
const downloadPath = path.resolve('./download_dir/')
|
||||
await client.send('Page.setDownloadBehavior', {
|
||||
behavior: 'allow',
|
||||
downloadPath,
|
||||
})
|
||||
|
||||
// intercept request when response headers was received
|
||||
await client.send('Network.setRequestInterception', {
|
||||
patterns: [
|
||||
{
|
||||
urlPattern: '*',
|
||||
resourceType: 'Document',
|
||||
interceptionStage: 'HeadersReceived',
|
||||
},
|
||||
],
|
||||
})
|
||||
|
||||
client.on(
|
||||
'Network.requestIntercepted',
|
||||
(e: Protocol.Network.RequestInterceptedEvent) => {
|
||||
;(async () => {
|
||||
const headers = e.responseHeaders || {}
|
||||
|
||||
const [contentType] = (
|
||||
headers['content-type'] ||
|
||||
headers['Content-Type'] ||
|
||||
''
|
||||
)
|
||||
.toLowerCase()
|
||||
.split(';')
|
||||
const obj: Protocol.Network.ContinueInterceptedRequestRequest = {
|
||||
interceptionId: e.interceptionId,
|
||||
}
|
||||
|
||||
if (
|
||||
e.responseStatusCode &&
|
||||
e.responseStatusCode >= 200 &&
|
||||
e.responseStatusCode < 300
|
||||
) {
|
||||
// We only check content-type on success responses
|
||||
// as it doesn't matter what the content type is for things
|
||||
// like redirects
|
||||
if (contentType && !ALLOWED_CONTENT_TYPES.includes(contentType)) {
|
||||
obj['errorReason'] = 'BlockedByClient'
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await client.send('Network.continueInterceptedRequest', obj)
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
})()
|
||||
}
|
||||
)
|
||||
|
||||
/*
|
||||
* Disallow MathJax from running in Puppeteer and modifying the document,
|
||||
* we shall instead run it in our frontend application to transform any
|
||||
* mathjax content when present.
|
||||
*/
|
||||
await page.setRequestInterception(true)
|
||||
let requestCount = 0
|
||||
page.on('request', (request) => {
|
||||
;(async () => {
|
||||
if (request.resourceType() === 'font') {
|
||||
// Disallow fonts from loading
|
||||
return request.abort()
|
||||
}
|
||||
if (requestCount++ > 100) {
|
||||
return request.abort()
|
||||
}
|
||||
if (
|
||||
request.resourceType() === 'script' &&
|
||||
request.url().toLowerCase().indexOf('mathjax') > -1
|
||||
) {
|
||||
return request.abort()
|
||||
}
|
||||
|
||||
await request.continue()
|
||||
})()
|
||||
})
|
||||
|
||||
page.on('response', (response) => {
|
||||
if (response.headers()['content-type'] === 'application/pdf') {
|
||||
lastPdfUrl = response.url()
|
||||
}
|
||||
})
|
||||
|
||||
const response = await page.goto(url, {
|
||||
timeout: 30 * 1000,
|
||||
waitUntil: ['networkidle2'],
|
||||
|
||||
@ -32,7 +32,7 @@
|
||||
"linkedom": "^0.14.9",
|
||||
"mocha": "^8.2.0",
|
||||
"nock": "^13.3.1",
|
||||
"puppeteer-core": "^20.9.0",
|
||||
"puppeteer-core": "^22.8.0",
|
||||
"puppeteer-extra": "^3.3.4",
|
||||
"puppeteer-extra-plugin-adblocker": "^2.13.5",
|
||||
"puppeteer-extra-plugin-stealth": "^2.11.1",
|
||||
|
||||
239
yarn.lock
239
yarn.lock
@ -4972,18 +4972,19 @@
|
||||
resolved "https://registry.yarnpkg.com/@protobufjs/utf8/-/utf8-1.1.0.tgz#a777360b5b39a1a2e5106f8e858f2fd2d060c570"
|
||||
integrity sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw==
|
||||
|
||||
"@puppeteer/browsers@1.4.6":
|
||||
version "1.4.6"
|
||||
resolved "https://registry.yarnpkg.com/@puppeteer/browsers/-/browsers-1.4.6.tgz#1f70fd23d5d2ccce9d29b038e5039d7a1049ca77"
|
||||
integrity sha512-x4BEjr2SjOPowNeiguzjozQbsc6h437ovD/wu+JpaenxVLm3jkgzHY2xOslMTp50HoTvQreMjiexiGQw1sqZlQ==
|
||||
"@puppeteer/browsers@2.2.3":
|
||||
version "2.2.3"
|
||||
resolved "https://registry.yarnpkg.com/@puppeteer/browsers/-/browsers-2.2.3.tgz#ad6b79129c50825e77ddaba082680f4dad0b674e"
|
||||
integrity sha512-bJ0UBsk0ESOs6RFcLXOt99a3yTDcOKlzfjad+rhFwdaG1Lu/Wzq58GHYCDTlZ9z6mldf4g+NTb+TXEfe0PpnsQ==
|
||||
dependencies:
|
||||
debug "4.3.4"
|
||||
extract-zip "2.0.1"
|
||||
progress "2.0.3"
|
||||
proxy-agent "6.3.0"
|
||||
tar-fs "3.0.4"
|
||||
proxy-agent "6.4.0"
|
||||
semver "7.6.0"
|
||||
tar-fs "3.0.5"
|
||||
unbzip2-stream "1.4.3"
|
||||
yargs "17.7.1"
|
||||
yargs "17.7.2"
|
||||
|
||||
"@radix-ui/number@1.0.1":
|
||||
version "1.0.1"
|
||||
@ -9503,13 +9504,20 @@ agent-base@^6.0.1, agent-base@^6.0.2:
|
||||
dependencies:
|
||||
debug "4"
|
||||
|
||||
agent-base@^7.0.1, agent-base@^7.0.2, agent-base@^7.1.0:
|
||||
agent-base@^7.0.2, agent-base@^7.1.0:
|
||||
version "7.1.0"
|
||||
resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-7.1.0.tgz#536802b76bc0b34aa50195eb2442276d613e3434"
|
||||
integrity sha512-o/zjMZRhJxny7OyEF+Op8X+efiELC7k7yOjMzgfzVqOzXqkBkWI79YoTdOtsuWd5BWhAGAuOY/Xa6xpiaWXiNg==
|
||||
dependencies:
|
||||
debug "^4.3.4"
|
||||
|
||||
agent-base@^7.1.1:
|
||||
version "7.1.1"
|
||||
resolved "https://registry.yarnpkg.com/agent-base/-/agent-base-7.1.1.tgz#bdbded7dfb096b751a2a087eeeb9664725b2e317"
|
||||
integrity sha512-H0TSyFNDMomMNJQBn8wFV5YC/2eJ+VXECwOadZJT554xP6cODZHPX3H9QMQECxvrgiSOP1pHjy1sMWQVYJOUOA==
|
||||
dependencies:
|
||||
debug "^4.3.4"
|
||||
|
||||
agentkeepalive@^4.2.1:
|
||||
version "4.5.0"
|
||||
resolved "https://registry.yarnpkg.com/agentkeepalive/-/agentkeepalive-4.5.0.tgz#2673ad1389b3c418c5a20c5d7364f93ca04be923"
|
||||
@ -10869,6 +10877,39 @@ balanced-match@^1.0.0:
|
||||
resolved "https://registry.yarnpkg.com/balanced-match/-/balanced-match-1.0.0.tgz#89b4d199ab2bee49de164ea02b89ce462d71b767"
|
||||
integrity sha1-ibTRmasr7kneFk6gK4nORi1xt2c=
|
||||
|
||||
bare-events@^2.0.0, bare-events@^2.2.0:
|
||||
version "2.2.2"
|
||||
resolved "https://registry.yarnpkg.com/bare-events/-/bare-events-2.2.2.tgz#a98a41841f98b2efe7ecc5c5468814469b018078"
|
||||
integrity sha512-h7z00dWdG0PYOQEvChhOSWvOfkIKsdZGkWr083FgN/HyoQuebSew/cgirYqh9SCuy/hRvxc5Vy6Fw8xAmYHLkQ==
|
||||
|
||||
bare-fs@^2.1.1:
|
||||
version "2.3.0"
|
||||
resolved "https://registry.yarnpkg.com/bare-fs/-/bare-fs-2.3.0.tgz#0872f8e33cf291c9fd527d827154f156a298d402"
|
||||
integrity sha512-TNFqa1B4N99pds2a5NYHR15o0ZpdNKbAeKTE/+G6ED/UeOavv8RY3dr/Fu99HW3zU3pXpo2kDNO8Sjsm2esfOw==
|
||||
dependencies:
|
||||
bare-events "^2.0.0"
|
||||
bare-path "^2.0.0"
|
||||
bare-stream "^1.0.0"
|
||||
|
||||
bare-os@^2.1.0:
|
||||
version "2.3.0"
|
||||
resolved "https://registry.yarnpkg.com/bare-os/-/bare-os-2.3.0.tgz#718e680b139effff0624a7421c098e7a2c2d63da"
|
||||
integrity sha512-oPb8oMM1xZbhRQBngTgpcQ5gXw6kjOaRsSWsIeNyRxGed2w/ARyP7ScBYpWR1qfX2E5rS3gBw6OWcSQo+s+kUg==
|
||||
|
||||
bare-path@^2.0.0, bare-path@^2.1.0:
|
||||
version "2.1.2"
|
||||
resolved "https://registry.yarnpkg.com/bare-path/-/bare-path-2.1.2.tgz#7a0940d34ebe65f7e179fa61ed8d49d9dc151d67"
|
||||
integrity sha512-o7KSt4prEphWUHa3QUwCxUI00R86VdjiuxmJK0iNVDHYPGo+HsDaVCnqCmPbf/MiW1ok8F4p3m8RTHlWk8K2ig==
|
||||
dependencies:
|
||||
bare-os "^2.1.0"
|
||||
|
||||
bare-stream@^1.0.0:
|
||||
version "1.0.0"
|
||||
resolved "https://registry.yarnpkg.com/bare-stream/-/bare-stream-1.0.0.tgz#25c3e56198d922187320c3f8c52d75c4051178b4"
|
||||
integrity sha512-KhNUoDL40iP4gFaLSsoGE479t0jHijfYdIcxRn/XtezA2BaUD0NRf/JGRpsMq6dMNM+SrCrB0YSSo/5wBY4rOQ==
|
||||
dependencies:
|
||||
streamx "^2.16.1"
|
||||
|
||||
base-64@^0.1.0:
|
||||
version "0.1.0"
|
||||
resolved "https://registry.yarnpkg.com/base-64/-/base-64-0.1.0.tgz#780a99c84e7d600260361511c4877613bf24f6bb"
|
||||
@ -12006,12 +12047,14 @@ chrome-trace-event@^1.0.2:
|
||||
resolved "https://registry.yarnpkg.com/chrome-trace-event/-/chrome-trace-event-1.0.3.tgz#1015eced4741e15d06664a957dbbf50d041e26ac"
|
||||
integrity sha512-p3KULyQg4S7NIHixdwbGX+nFHkoBiA4YQmyWtjb8XngSKV124nJmRysgAeujbUVb15vh+RvFUfCPqU7rXk+hZg==
|
||||
|
||||
chromium-bidi@0.4.16:
|
||||
version "0.4.16"
|
||||
resolved "https://registry.yarnpkg.com/chromium-bidi/-/chromium-bidi-0.4.16.tgz#8a67bfdf6bb8804efc22765a82859d20724b46ab"
|
||||
integrity sha512-7ZbXdWERxRxSwo3txsBjjmc/NLxqb1Bk30mRb0BMS4YIaiV6zvKZqL/UAH+DdqcDYayDWk2n/y8klkBDODrPvA==
|
||||
chromium-bidi@0.5.19:
|
||||
version "0.5.19"
|
||||
resolved "https://registry.yarnpkg.com/chromium-bidi/-/chromium-bidi-0.5.19.tgz#e4f4951b7d9b20d668d6b387839f7b7bf2d69ef4"
|
||||
integrity sha512-UA6zL77b7RYCjJkZBsZ0wlvCTD+jTjllZ8f6wdO4buevXgTZYjV+XLB9CiEa2OuuTGGTLnI7eN9I60YxuALGQg==
|
||||
dependencies:
|
||||
mitt "3.0.0"
|
||||
mitt "3.0.1"
|
||||
urlpattern-polyfill "10.0.0"
|
||||
zod "3.22.4"
|
||||
|
||||
chunkd@^2.0.1:
|
||||
version "2.0.1"
|
||||
@ -13056,13 +13099,6 @@ cron-parser@^4.6.0:
|
||||
dependencies:
|
||||
luxon "^3.2.1"
|
||||
|
||||
cross-fetch@4.0.0:
|
||||
version "4.0.0"
|
||||
resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-4.0.0.tgz#f037aef1580bb3a1a35164ea2a848ba81b445983"
|
||||
integrity sha512-e4a5N8lVvuLgAWgnCrLr2PP0YyDOTHa9H/Rj54dirp61qXnNq46m82bRhNqIA5VccJtWBvPTFRV3TtvHUKPB1g==
|
||||
dependencies:
|
||||
node-fetch "^2.6.12"
|
||||
|
||||
cross-fetch@^3.0.6, cross-fetch@^3.1.5:
|
||||
version "3.1.5"
|
||||
resolved "https://registry.yarnpkg.com/cross-fetch/-/cross-fetch-3.1.5.tgz#e1389f44d9e7ba767907f7af8454787952ab534f"
|
||||
@ -13821,10 +13857,10 @@ detect-port@^1.3.0:
|
||||
address "^1.0.1"
|
||||
debug "^2.6.0"
|
||||
|
||||
devtools-protocol@0.0.1147663:
|
||||
version "0.0.1147663"
|
||||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1147663.tgz#4ec5610b39a6250d1f87e6b9c7e16688ed0ac78e"
|
||||
integrity sha512-hyWmRrexdhbZ1tcJUGpO95ivbRhWXz++F4Ko+n21AY5PNln2ovoJw+8ZMNDTtip+CNFQfrtLVh/w4009dXO/eQ==
|
||||
devtools-protocol@0.0.1273771:
|
||||
version "0.0.1273771"
|
||||
resolved "https://registry.yarnpkg.com/devtools-protocol/-/devtools-protocol-0.0.1273771.tgz#46aeb5db41417e2c2ad3d8367c598c975290b1a5"
|
||||
integrity sha512-QDbb27xcTVReQQW/GHJsdQqGKwYBE7re7gxehj467kKP2DKuYBUj6i2k5LRiAC66J1yZG/9gsxooz/s9pcm0Og==
|
||||
|
||||
dezalgo@1.0.3:
|
||||
version "1.0.3"
|
||||
@ -17922,6 +17958,14 @@ http-proxy-agent@^7.0.0:
|
||||
agent-base "^7.1.0"
|
||||
debug "^4.3.4"
|
||||
|
||||
http-proxy-agent@^7.0.1:
|
||||
version "7.0.2"
|
||||
resolved "https://registry.yarnpkg.com/http-proxy-agent/-/http-proxy-agent-7.0.2.tgz#9a8b1f246866c028509486585f62b8f2c18c270e"
|
||||
integrity sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==
|
||||
dependencies:
|
||||
agent-base "^7.1.0"
|
||||
debug "^4.3.4"
|
||||
|
||||
http-proxy-middleware@^2.0.0:
|
||||
version "2.0.4"
|
||||
resolved "https://registry.yarnpkg.com/http-proxy-middleware/-/http-proxy-middleware-2.0.4.tgz#03af0f4676d172ae775cb5c33f592f40e1a4e07a"
|
||||
@ -17989,6 +18033,14 @@ https-proxy-agent@^7.0.0, https-proxy-agent@^7.0.1:
|
||||
agent-base "^7.0.2"
|
||||
debug "4"
|
||||
|
||||
https-proxy-agent@^7.0.2, https-proxy-agent@^7.0.3:
|
||||
version "7.0.4"
|
||||
resolved "https://registry.yarnpkg.com/https-proxy-agent/-/https-proxy-agent-7.0.4.tgz#8e97b841a029ad8ddc8731f26595bad868cb4168"
|
||||
integrity sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==
|
||||
dependencies:
|
||||
agent-base "^7.0.2"
|
||||
debug "4"
|
||||
|
||||
human-signals@^1.1.1:
|
||||
version "1.1.1"
|
||||
resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-1.1.1.tgz#c5b1cd14f50aeae09ab6c59fe63ba3395fe4dfa3"
|
||||
@ -22563,10 +22615,10 @@ mississippi@^3.0.0:
|
||||
stream-each "^1.1.0"
|
||||
through2 "^2.0.0"
|
||||
|
||||
mitt@3.0.0:
|
||||
version "3.0.0"
|
||||
resolved "https://registry.yarnpkg.com/mitt/-/mitt-3.0.0.tgz#69ef9bd5c80ff6f57473e8d89326d01c414be0bd"
|
||||
integrity sha512-7dX2/10ITVyqh4aOSVI9gdape+t9l2/8QxHrFmUXu4EEUpdlxl6RudZUPZoc+zuY2hk1j7XxVroIVIan/pD/SQ==
|
||||
mitt@3.0.1:
|
||||
version "3.0.1"
|
||||
resolved "https://registry.yarnpkg.com/mitt/-/mitt-3.0.1.tgz#ea36cf0cc30403601ae074c8f77b7092cdab36d1"
|
||||
integrity sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==
|
||||
|
||||
mixin-deep@^1.2.0:
|
||||
version "1.3.2"
|
||||
@ -23170,7 +23222,7 @@ node-fetch@2.6.7, node-fetch@^2.3.0, node-fetch@^2.6.0, node-fetch@^2.6.1, node-
|
||||
dependencies:
|
||||
whatwg-url "^5.0.0"
|
||||
|
||||
node-fetch@^2.6.12, node-fetch@^2.6.9:
|
||||
node-fetch@^2.6.9:
|
||||
version "2.6.12"
|
||||
resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-2.6.12.tgz#02eb8e22074018e3d5a83016649d04df0e348fba"
|
||||
integrity sha512-C/fGU2E8ToujUivIO0H+tpQ6HWo4eEmchoPIoXtxCrVghxdKq+QOHqEZW7tuP3KlV3bC8FRMO5nMCC7Zm1VP6g==
|
||||
@ -24534,19 +24586,19 @@ p-waterfall@2.1.1:
|
||||
dependencies:
|
||||
p-reduce "^2.0.0"
|
||||
|
||||
pac-proxy-agent@^7.0.0:
|
||||
version "7.0.0"
|
||||
resolved "https://registry.yarnpkg.com/pac-proxy-agent/-/pac-proxy-agent-7.0.0.tgz#db42120c64292685dafaf2bd921e223c56bfb13b"
|
||||
integrity sha512-t4tRAMx0uphnZrio0S0Jw9zg3oDbz1zVhQ/Vy18FjLfP1XOLNUEjaVxYCYRI6NS+BsMBXKIzV6cTLOkO9AtywA==
|
||||
pac-proxy-agent@^7.0.1:
|
||||
version "7.0.1"
|
||||
resolved "https://registry.yarnpkg.com/pac-proxy-agent/-/pac-proxy-agent-7.0.1.tgz#6b9ddc002ec3ff0ba5fdf4a8a21d363bcc612d75"
|
||||
integrity sha512-ASV8yU4LLKBAjqIPMbrgtaKIvxQri/yh2OpI+S6hVa9JRkUI3Y3NPFbfngDtY7oFtSMD3w31Xns89mDa3Feo5A==
|
||||
dependencies:
|
||||
"@tootallnate/quickjs-emscripten" "^0.23.0"
|
||||
agent-base "^7.0.2"
|
||||
debug "^4.3.4"
|
||||
get-uri "^6.0.1"
|
||||
http-proxy-agent "^7.0.0"
|
||||
https-proxy-agent "^7.0.0"
|
||||
https-proxy-agent "^7.0.2"
|
||||
pac-resolver "^7.0.0"
|
||||
socks-proxy-agent "^8.0.1"
|
||||
socks-proxy-agent "^8.0.2"
|
||||
|
||||
pac-resolver@^7.0.0:
|
||||
version "7.0.0"
|
||||
@ -25864,19 +25916,19 @@ proxy-addr@~2.0.7:
|
||||
forwarded "0.2.0"
|
||||
ipaddr.js "1.9.1"
|
||||
|
||||
proxy-agent@6.3.0:
|
||||
version "6.3.0"
|
||||
resolved "https://registry.yarnpkg.com/proxy-agent/-/proxy-agent-6.3.0.tgz#72f7bb20eb06049db79f7f86c49342c34f9ba08d"
|
||||
integrity sha512-0LdR757eTj/JfuU7TL2YCuAZnxWXu3tkJbg4Oq3geW/qFNT/32T0sp2HnZ9O0lMR4q3vwAt0+xCA8SR0WAD0og==
|
||||
proxy-agent@6.4.0:
|
||||
version "6.4.0"
|
||||
resolved "https://registry.yarnpkg.com/proxy-agent/-/proxy-agent-6.4.0.tgz#b4e2dd51dee2b377748aef8d45604c2d7608652d"
|
||||
integrity sha512-u0piLU+nCOHMgGjRbimiXmA9kM/L9EHh3zL81xCdp7m+Y2pHIsnmbdDoEDoAz5geaonNR6q6+yOPQs6n4T6sBQ==
|
||||
dependencies:
|
||||
agent-base "^7.0.2"
|
||||
debug "^4.3.4"
|
||||
http-proxy-agent "^7.0.0"
|
||||
https-proxy-agent "^7.0.0"
|
||||
http-proxy-agent "^7.0.1"
|
||||
https-proxy-agent "^7.0.3"
|
||||
lru-cache "^7.14.1"
|
||||
pac-proxy-agent "^7.0.0"
|
||||
pac-proxy-agent "^7.0.1"
|
||||
proxy-from-env "^1.1.0"
|
||||
socks-proxy-agent "^8.0.1"
|
||||
socks-proxy-agent "^8.0.2"
|
||||
|
||||
proxy-from-env@1.0.0:
|
||||
version "1.0.0"
|
||||
@ -25981,17 +26033,16 @@ pupa@^2.1.1:
|
||||
dependencies:
|
||||
escape-goat "^2.0.0"
|
||||
|
||||
puppeteer-core@^20.9.0:
|
||||
version "20.9.0"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-20.9.0.tgz#6f4b420001b64419deab38d398a4d9cd071040e6"
|
||||
integrity sha512-H9fYZQzMTRrkboEfPmf7m3CLDN6JvbxXA3qTtS+dFt27tR+CsFHzPsT6pzp6lYL6bJbAPaR0HaPO6uSi+F94Pg==
|
||||
puppeteer-core@^22.8.0:
|
||||
version "22.8.0"
|
||||
resolved "https://registry.yarnpkg.com/puppeteer-core/-/puppeteer-core-22.8.0.tgz#82c0e7ebf62ba5f34404394034e313b82014de5f"
|
||||
integrity sha512-S5bWx3g/fNuyFxjZX9TkZMN07CEH47+9Zm6IiTl1QfqI9pnVaShbwrD9kRe5vmz/XPp/jLGhhxRUj1sY4wObnA==
|
||||
dependencies:
|
||||
"@puppeteer/browsers" "1.4.6"
|
||||
chromium-bidi "0.4.16"
|
||||
cross-fetch "4.0.0"
|
||||
"@puppeteer/browsers" "2.2.3"
|
||||
chromium-bidi "0.5.19"
|
||||
debug "4.3.4"
|
||||
devtools-protocol "0.0.1147663"
|
||||
ws "8.13.0"
|
||||
devtools-protocol "0.0.1273771"
|
||||
ws "8.17.0"
|
||||
|
||||
puppeteer-extra-plugin-adblocker@^2.13.5:
|
||||
version "2.13.5"
|
||||
@ -28084,6 +28135,13 @@ semver@7.5.3:
|
||||
dependencies:
|
||||
lru-cache "^6.0.0"
|
||||
|
||||
semver@7.6.0:
|
||||
version "7.6.0"
|
||||
resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.0.tgz#1a46a4db4bffcccd97b743b5005c8325f23d4e2d"
|
||||
integrity sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg==
|
||||
dependencies:
|
||||
lru-cache "^6.0.0"
|
||||
|
||||
semver@^6.0.0, semver@^6.1.1, semver@^6.1.2, semver@^6.2.0, semver@^6.3.0, semver@^6.3.1:
|
||||
version "6.3.1"
|
||||
resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4"
|
||||
@ -28570,12 +28628,12 @@ socks-proxy-agent@^7.0.0:
|
||||
debug "^4.3.3"
|
||||
socks "^2.6.2"
|
||||
|
||||
socks-proxy-agent@^8.0.1:
|
||||
version "8.0.1"
|
||||
resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-8.0.1.tgz#ffc5859a66dac89b0c4dab90253b96705f3e7120"
|
||||
integrity sha512-59EjPbbgg8U3x62hhKOFVAmySQUcfRQ4C7Q/D5sEHnZTQRrQlNKINks44DMR1gwXp0p4LaVIeccX2KHTTcHVqQ==
|
||||
socks-proxy-agent@^8.0.2:
|
||||
version "8.0.3"
|
||||
resolved "https://registry.yarnpkg.com/socks-proxy-agent/-/socks-proxy-agent-8.0.3.tgz#6b2da3d77364fde6292e810b496cb70440b9b89d"
|
||||
integrity sha512-VNegTZKhuGq5vSD6XNKlbqWhyt/40CgoEw8XxD6dhnm8Jq9IEa3nIa4HwnM8XOqU0CdB0BwWVXusqiFXfHB3+A==
|
||||
dependencies:
|
||||
agent-base "^7.0.1"
|
||||
agent-base "^7.1.1"
|
||||
debug "^4.3.4"
|
||||
socks "^2.7.1"
|
||||
|
||||
@ -28976,6 +29034,16 @@ streamx@^2.15.0:
|
||||
fast-fifo "^1.1.0"
|
||||
queue-tick "^1.0.1"
|
||||
|
||||
streamx@^2.16.1:
|
||||
version "2.16.1"
|
||||
resolved "https://registry.yarnpkg.com/streamx/-/streamx-2.16.1.tgz#2b311bd34832f08aa6bb4d6a80297c9caef89614"
|
||||
integrity sha512-m9QYj6WygWyWa3H1YY69amr4nVgy61xfjys7xO7kviL5rfIEc2naf+ewFiOA+aEJD7y0JO3h2GoiUv4TDwEGzQ==
|
||||
dependencies:
|
||||
fast-fifo "^1.1.0"
|
||||
queue-tick "^1.0.1"
|
||||
optionalDependencies:
|
||||
bare-events "^2.2.0"
|
||||
|
||||
string-convert@^0.2.0:
|
||||
version "0.2.1"
|
||||
resolved "https://registry.yarnpkg.com/string-convert/-/string-convert-0.2.1.tgz#6982cc3049fbb4cd85f8b24568b9d9bf39eeff97"
|
||||
@ -29517,14 +29585,16 @@ tapable@^2.0.0, tapable@^2.1.1, tapable@^2.2.0:
|
||||
resolved "https://registry.yarnpkg.com/tapable/-/tapable-2.2.1.tgz#1967a73ef4060a82f12ab96af86d52fdb76eeca0"
|
||||
integrity sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==
|
||||
|
||||
tar-fs@3.0.4, tar-fs@^3.0.4:
|
||||
version "3.0.4"
|
||||
resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-3.0.4.tgz#a21dc60a2d5d9f55e0089ccd78124f1d3771dbbf"
|
||||
integrity sha512-5AFQU8b9qLfZCX9zp2duONhPmZv0hGYiBPJsyUdqMjzq/mqVpy/rEUSeHk1+YitmxugaptgBh5oDGU3VsAJq4w==
|
||||
tar-fs@3.0.5:
|
||||
version "3.0.5"
|
||||
resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-3.0.5.tgz#f954d77767e4e6edf973384e1eb95f8f81d64ed9"
|
||||
integrity sha512-JOgGAmZyMgbqpLwct7ZV8VzkEB6pxXFBVErLtb+XCOqzc6w1xiWKI9GVd6bwk68EX7eJ4DWmfXVmq8K2ziZTGg==
|
||||
dependencies:
|
||||
mkdirp-classic "^0.5.2"
|
||||
pump "^3.0.0"
|
||||
tar-stream "^3.1.5"
|
||||
optionalDependencies:
|
||||
bare-fs "^2.1.1"
|
||||
bare-path "^2.1.0"
|
||||
|
||||
tar-fs@^2.0.0:
|
||||
version "2.1.1"
|
||||
@ -29536,6 +29606,15 @@ tar-fs@^2.0.0:
|
||||
pump "^3.0.0"
|
||||
tar-stream "^2.1.4"
|
||||
|
||||
tar-fs@^3.0.4:
|
||||
version "3.0.4"
|
||||
resolved "https://registry.yarnpkg.com/tar-fs/-/tar-fs-3.0.4.tgz#a21dc60a2d5d9f55e0089ccd78124f1d3771dbbf"
|
||||
integrity sha512-5AFQU8b9qLfZCX9zp2duONhPmZv0hGYiBPJsyUdqMjzq/mqVpy/rEUSeHk1+YitmxugaptgBh5oDGU3VsAJq4w==
|
||||
dependencies:
|
||||
mkdirp-classic "^0.5.2"
|
||||
pump "^3.0.0"
|
||||
tar-stream "^3.1.5"
|
||||
|
||||
tar-stream@^2.1.4, tar-stream@~2.2.0:
|
||||
version "2.2.0"
|
||||
resolved "https://registry.yarnpkg.com/tar-stream/-/tar-stream-2.2.0.tgz#acad84c284136b060dc3faa64474aa9aebd77287"
|
||||
@ -30954,6 +31033,11 @@ url@^0.11.0:
|
||||
punycode "1.3.2"
|
||||
querystring "0.2.0"
|
||||
|
||||
urlpattern-polyfill@10.0.0:
|
||||
version "10.0.0"
|
||||
resolved "https://registry.yarnpkg.com/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz#f0a03a97bfb03cdf33553e5e79a2aadd22cac8ec"
|
||||
integrity sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg==
|
||||
|
||||
urlsafe-base64@^1.0.0:
|
||||
version "1.0.0"
|
||||
resolved "https://registry.yarnpkg.com/urlsafe-base64/-/urlsafe-base64-1.0.0.tgz#23f89069a6c62f46cf3a1d3b00169cefb90be0c6"
|
||||
@ -31970,10 +32054,10 @@ write-pkg@4.0.0:
|
||||
type-fest "^0.4.1"
|
||||
write-json-file "^3.2.0"
|
||||
|
||||
ws@8.13.0:
|
||||
version "8.13.0"
|
||||
resolved "https://registry.yarnpkg.com/ws/-/ws-8.13.0.tgz#9a9fb92f93cf41512a0735c8f4dd09b8a1211cd0"
|
||||
integrity sha512-x9vcZYTrFPC7aSIbj7sRCYo7L/Xb8Iy+pW0ng0wt2vCJv7M9HOMy0UoN3rr+IFC7hb7vXoqS+P9ktyLLLhO+LA==
|
||||
ws@8.17.0:
|
||||
version "8.17.0"
|
||||
resolved "https://registry.yarnpkg.com/ws/-/ws-8.17.0.tgz#d145d18eca2ed25aaf791a183903f7be5e295fea"
|
||||
integrity sha512-uJq6108EgZMAl20KagGkzCKfMEjxmKvZHG7Tlq0Z6nOky7YF7aq4mOx6xK8TJ/i1LeK4Qus7INktacctDgY8Ow==
|
||||
|
||||
"ws@^5.2.0 || ^6.0.0 || ^7.0.0", ws@^7.3.1, ws@^7.4.6:
|
||||
version "7.5.7"
|
||||
@ -32150,10 +32234,10 @@ yargs@16.2.0, yargs@^16.0.0, yargs@^16.2.0:
|
||||
y18n "^5.0.5"
|
||||
yargs-parser "^20.2.2"
|
||||
|
||||
yargs@17.7.1:
|
||||
version "17.7.1"
|
||||
resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.7.1.tgz#34a77645201d1a8fc5213ace787c220eabbd0967"
|
||||
integrity sha512-cwiTb08Xuv5fqF4AovYacTFNxk62th7LKJ6BL9IGUpTJrWoU7/7WdQGTP2SjKf1dUNBGzDd28p/Yfs/GI6JrLw==
|
||||
yargs@17.7.2, yargs@^17.5.1, yargs@^17.6.2:
|
||||
version "17.7.2"
|
||||
resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.7.2.tgz#991df39aca675a192b816e1e0363f9d75d2aa269"
|
||||
integrity sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==
|
||||
dependencies:
|
||||
cliui "^8.0.1"
|
||||
escalade "^3.1.1"
|
||||
@ -32193,19 +32277,6 @@ yargs@^17.0.0, yargs@^17.3.1:
|
||||
y18n "^5.0.5"
|
||||
yargs-parser "^21.0.0"
|
||||
|
||||
yargs@^17.5.1, yargs@^17.6.2:
|
||||
version "17.7.2"
|
||||
resolved "https://registry.yarnpkg.com/yargs/-/yargs-17.7.2.tgz#991df39aca675a192b816e1e0363f9d75d2aa269"
|
||||
integrity sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==
|
||||
dependencies:
|
||||
cliui "^8.0.1"
|
||||
escalade "^3.1.1"
|
||||
get-caller-file "^2.0.5"
|
||||
require-directory "^2.1.1"
|
||||
string-width "^4.2.3"
|
||||
y18n "^5.0.5"
|
||||
yargs-parser "^21.1.1"
|
||||
|
||||
yauzl@^2.10.0:
|
||||
version "2.10.0"
|
||||
resolved "https://registry.yarnpkg.com/yauzl/-/yauzl-2.10.0.tgz#c7eb17c93e112cb1086fa6d8e51fb0667b79a5f9"
|
||||
@ -32263,7 +32334,7 @@ zod-to-json-schema@^3.22.4:
|
||||
resolved "https://registry.yarnpkg.com/zod-to-json-schema/-/zod-to-json-schema-3.23.0.tgz#4fc60e88d3c709eedbfaae3f92f8a7bf786469f2"
|
||||
integrity sha512-az0uJ243PxsRIa2x1WmNE/pnuA05gUq/JB8Lwe1EDCCL/Fz9MgjYQ0fPlyc2Tcv6aF2ZA7WM5TWaRZVEFaAIag==
|
||||
|
||||
zod@^3.22.3, zod@^3.22.4:
|
||||
zod@3.22.4, zod@^3.22.3, zod@^3.22.4:
|
||||
version "3.22.4"
|
||||
resolved "https://registry.yarnpkg.com/zod/-/zod-3.22.4.tgz#f31c3a9386f61b1f228af56faa9255e845cf3fff"
|
||||
integrity sha512-iC+8Io04lddc+mVqQ9AZ7OQ2MrUKGN+oIQyq1vemgt46jwCwLfhq7/pwnBnNXXXZb8VTVLKwp9EDkx+ryxIWmg==
|
||||
|
||||
Reference in New Issue
Block a user