From 7634ed667f40411cfaafbb6abaec8d393c493098 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 13 May 2024 17:01:52 +0800 Subject: [PATCH 1/4] capture total time of fetching a page --- packages/content-fetch/src/analytics.ts | 31 ++++++++++------- packages/content-fetch/src/request_handler.ts | 34 +++++++------------ 2 files changed, 30 insertions(+), 35 deletions(-) diff --git a/packages/content-fetch/src/analytics.ts b/packages/content-fetch/src/analytics.ts index c964579c8..ec1121a8c 100644 --- a/packages/content-fetch/src/analytics.ts +++ b/packages/content-fetch/src/analytics.ts @@ -1,13 +1,12 @@ import { PostHog } from 'posthog-node' interface AnalyticEvent { - distinctId: string - event: string + result: 'success' | 'failure' properties?: Record } interface AnalyticClient { - capture: (event: AnalyticEvent) => void + capture: (userIds: string[], event: AnalyticEvent) => void shutdownAsync?: () => Promise } @@ -18,17 +17,23 @@ class PostHogClient implements AnalyticClient { this.client = new PostHog(apiKey) } - capture({ distinctId, event, properties }: AnalyticEvent) { - // get client from request context + capture(userIds: string[], { properties, result }: AnalyticEvent) { + if (process.env.SEND_ANALYTICS) { + userIds.forEach((userId) => { + this.client.capture({ + distinctId: userId, + event: `content_fetch_${result}`, + properties: { + ...properties, + env: process.env.API_ENV, + }, + }) + }) - this.client.capture({ - distinctId, - event, - properties: { - ...properties, - env: process.env.API_ENV || 'demo', - }, - }) + return + } + + console.log('analytics', { userIds, result, properties }) } async shutdownAsync() { diff --git a/packages/content-fetch/src/request_handler.ts b/packages/content-fetch/src/request_handler.ts index 6b1721e5e..51db8532d 100644 --- a/packages/content-fetch/src/request_handler.ts +++ b/packages/content-fetch/src/request_handler.ts @@ -148,33 +148,23 @@ export const contentFetchRequestHandler: RequestHandler = async (req, res) => { logRecord.error = 'unknown error' } - // capture error event - users.forEach((user) => { - analytics.capture({ - distinctId: user.id, - event: 'content_fetch_failure', - properties: { - url, - }, - }) - }) - return res.sendStatus(500) } finally { logRecord.totalTime = Date.now() - functionStartTime console.log(`parse-page result`, logRecord) - } - // capture success event - users.forEach((user) => { - analytics.capture({ - distinctId: user.id, - event: 'content_fetch_success', - properties: { - url, - }, - }) - }) + // capture events + analytics.capture( + users.map((user) => user.id), + { + result: logRecord.error ? 'failure' : 'success', + properties: { + url, + totalTime: logRecord.totalTime, + }, + } + ) + } res.sendStatus(200) } From 0a96057bda778e1d6591f008565a1e3500f1c990 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 13 May 2024 18:35:24 +0800 Subject: [PATCH 2/4] fix: text not highlighted in highlightedMarkdown content --- packages/api/src/jobs/upload_content.ts | 54 +++++++++++++++++++---- packages/api/src/services/library_item.ts | 2 +- 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/packages/api/src/jobs/upload_content.ts b/packages/api/src/jobs/upload_content.ts index 78d9339b9..2a00d55a1 100644 --- a/packages/api/src/jobs/upload_content.ts +++ b/packages/api/src/jobs/upload_content.ts @@ -1,3 +1,4 @@ +import { Highlight } from '../entity/highlight' import { findLibraryItemById } from '../services/library_item' import { logger } from '../utils/logger' import { htmlToHighlightedMarkdown, htmlToMarkdown } from '../utils/parser' @@ -14,12 +15,16 @@ export interface UploadContentJobData { filePath: string } -const convertContent = (content: string, format: ContentFormat): string => { +const convertContent = ( + content: string, + format: ContentFormat, + highlights?: Highlight[] +): string => { switch (format) { case 'markdown': return htmlToMarkdown(content) case 'highlightedMarkdown': - return htmlToHighlightedMarkdown(content) + return htmlToHighlightedMarkdown(content, highlights) case 'original': return content default: @@ -33,29 +38,62 @@ const CONTENT_TYPES = { original: 'text/html', } +const getSelectOptions = ( + format: ContentFormat +): { column: 'readableContent' | 'originalContent'; highlights?: boolean } => { + switch (format) { + case 'markdown': + return { + column: 'readableContent', + } + case 'highlightedMarkdown': + return { + column: 'readableContent', + highlights: true, + } + case 'original': + return { + column: 'originalContent', + } + default: + throw new Error('Unsupported format') + } +} + export const uploadContentJob = async (data: UploadContentJobData) => { logger.info('Uploading content to bucket', data) const { libraryItemId, userId, format, filePath } = data + + const { column, highlights } = getSelectOptions(format) const libraryItem = await findLibraryItemById(libraryItemId, userId, { - select: ['originalContent'], + select: [column], + relations: { + highlights, + }, }) if (!libraryItem) { logger.error('Library item not found', data) throw new Error('Library item not found') } - if (!libraryItem.originalContent) { - logger.error('Original content not found', data) - throw new Error('Original content not found') + const content = libraryItem[column] + + if (!content) { + logger.error(`${column} not found`, data) + throw new Error('Content not found') } logger.info('Converting content', data) - const content = convertContent(libraryItem.originalContent, format) + const convertedContent = convertContent( + content, + format, + libraryItem.highlights + ) console.time('uploadToBucket') logger.info('Uploading content', data) - await uploadToBucket(filePath, Buffer.from(content), { + await uploadToBucket(filePath, Buffer.from(convertedContent), { contentType: CONTENT_TYPES[format], timeout: 60000, // 1 minute }) diff --git a/packages/api/src/services/library_item.ts b/packages/api/src/services/library_item.ts index a7376da30..0e40ea621 100644 --- a/packages/api/src/services/library_item.ts +++ b/packages/api/src/services/library_item.ts @@ -807,7 +807,7 @@ export const findLibraryItemById = async ( return authTrx( async (tx) => tx.withRepository(libraryItemRepository).findOne({ - select: options?.select, + select: ['id' as keyof LibraryItem].concat(options?.select || []), // always select id where: { id }, relations: options?.relations, }), From 33e1c4dd00ff5d2b40e57d55f36d666f8dc68f70 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 13 May 2024 19:10:14 +0800 Subject: [PATCH 3/4] remove flush method from analytics class --- packages/content-fetch/src/analytics.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/packages/content-fetch/src/analytics.ts b/packages/content-fetch/src/analytics.ts index ec1121a8c..fb36a970f 100644 --- a/packages/content-fetch/src/analytics.ts +++ b/packages/content-fetch/src/analytics.ts @@ -7,7 +7,6 @@ interface AnalyticEvent { interface AnalyticClient { capture: (userIds: string[], event: AnalyticEvent) => void - shutdownAsync?: () => Promise } class PostHogClient implements AnalyticClient { @@ -35,10 +34,6 @@ class PostHogClient implements AnalyticClient { console.log('analytics', { userIds, result, properties }) } - - async shutdownAsync() { - return this.client.shutdownAsync() - } } export const analytics = new PostHogClient( From 302efc119e6c9a6d7b048e8f7aa007716175a5c8 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 13 May 2024 19:13:55 +0800 Subject: [PATCH 4/4] fix tests --- packages/api/src/jobs/upload_content.ts | 2 +- packages/api/src/services/library_item.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/api/src/jobs/upload_content.ts b/packages/api/src/jobs/upload_content.ts index 2a00d55a1..7159510b2 100644 --- a/packages/api/src/jobs/upload_content.ts +++ b/packages/api/src/jobs/upload_content.ts @@ -67,7 +67,7 @@ export const uploadContentJob = async (data: UploadContentJobData) => { const { column, highlights } = getSelectOptions(format) const libraryItem = await findLibraryItemById(libraryItemId, userId, { - select: [column], + select: ['id', column], // id is required for relations relations: { highlights, }, diff --git a/packages/api/src/services/library_item.ts b/packages/api/src/services/library_item.ts index 0e40ea621..a7376da30 100644 --- a/packages/api/src/services/library_item.ts +++ b/packages/api/src/services/library_item.ts @@ -807,7 +807,7 @@ export const findLibraryItemById = async ( return authTrx( async (tx) => tx.withRepository(libraryItemRepository).findOne({ - select: ['id' as keyof LibraryItem].concat(options?.select || []), // always select id + select: options?.select, where: { id }, relations: options?.relations, }),