diff --git a/packages/api/src/jobs/save_page.ts b/packages/api/src/jobs/save_page.ts index 6047e5a52..3f2839bbe 100644 --- a/packages/api/src/jobs/save_page.ts +++ b/packages/api/src/jobs/save_page.ts @@ -38,18 +38,7 @@ interface Data { savedAt?: string publishedAt?: string taskId?: string - contentHash?: string -} - -interface FetchResult { - finalUrl: string - title?: string - content?: string - contentType?: string -} - -const isFetchResult = (obj: unknown): obj is FetchResult => { - return typeof obj === 'object' && obj !== null && 'finalUrl' in obj + urlHash?: string } const uploadPdf = async ( @@ -139,7 +128,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => { finalUrl, title, contentType, - contentHash, + urlHash, } = data let isImported, isSaved, @@ -195,7 +184,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => { } let originalContent - if (!contentHash) { + if (!urlHash) { logger.info(`content is not uploaded: ${finalUrl}`) // set the state to failed if we don't have content originalContent = 'Failed to fetch content' @@ -203,7 +192,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => { } else { // download content from the bucket const downloaded = await downloadStringFromBucket( - `originalContent/${contentHash}` + `originalContent/${urlHash}` ) if (!downloaded) { logger.error('error while downloading content from bucket') diff --git a/packages/content-fetch/src/request_handler.ts b/packages/content-fetch/src/request_handler.ts index 706430063..2d010ade4 100644 --- a/packages/content-fetch/src/request_handler.ts +++ b/packages/content-fetch/src/request_handler.ts @@ -122,14 +122,14 @@ export const contentFetchRequestHandler: RequestHandler = async (req, res) => { try { const fetchResult = await fetchContent(url, locale, timezone) const finalUrl = fetchResult.finalUrl - let contentHash: string | undefined + let urlHash: string | undefined const content = fetchResult.content if (content) { // hash content to use as key - contentHash = hash(content) - await uploadToBucket(contentHash, content) - console.log('content uploaded to bucket', contentHash) + urlHash = hash(finalUrl) + await uploadToBucket(urlHash, content) + console.log('content uploaded to bucket', urlHash) } const savePageJobs = users.map((user) => ({ @@ -149,7 +149,7 @@ export const contentFetchRequestHandler: RequestHandler = async (req, res) => { taskId, title: fetchResult.title, contentType: fetchResult.contentType, - contentHash, + urlHash, }, isRss: !!rssFeedUrl, isImport: !!taskId,