hash url as the key

This commit is contained in:
Hongbo Wu
2024-04-19 18:00:54 +08:00
parent 7a0b2f3d33
commit 5bd157ca25
2 changed files with 9 additions and 20 deletions

View File

@ -38,18 +38,7 @@ interface Data {
savedAt?: string
publishedAt?: string
taskId?: string
contentHash?: string
}
interface FetchResult {
finalUrl: string
title?: string
content?: string
contentType?: string
}
const isFetchResult = (obj: unknown): obj is FetchResult => {
return typeof obj === 'object' && obj !== null && 'finalUrl' in obj
urlHash?: string
}
const uploadPdf = async (
@ -139,7 +128,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => {
finalUrl,
title,
contentType,
contentHash,
urlHash,
} = data
let isImported,
isSaved,
@ -195,7 +184,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => {
}
let originalContent
if (!contentHash) {
if (!urlHash) {
logger.info(`content is not uploaded: ${finalUrl}`)
// set the state to failed if we don't have content
originalContent = 'Failed to fetch content'
@ -203,7 +192,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => {
} else {
// download content from the bucket
const downloaded = await downloadStringFromBucket(
`originalContent/${contentHash}`
`originalContent/${urlHash}`
)
if (!downloaded) {
logger.error('error while downloading content from bucket')

View File

@ -122,14 +122,14 @@ export const contentFetchRequestHandler: RequestHandler = async (req, res) => {
try {
const fetchResult = await fetchContent(url, locale, timezone)
const finalUrl = fetchResult.finalUrl
let contentHash: string | undefined
let urlHash: string | undefined
const content = fetchResult.content
if (content) {
// hash content to use as key
contentHash = hash(content)
await uploadToBucket(contentHash, content)
console.log('content uploaded to bucket', contentHash)
urlHash = hash(finalUrl)
await uploadToBucket(urlHash, content)
console.log('content uploaded to bucket', urlHash)
}
const savePageJobs = users.map((user) => ({
@ -149,7 +149,7 @@ export const contentFetchRequestHandler: RequestHandler = async (req, res) => {
taskId,
title: fetchResult.title,
contentType: fetchResult.contentType,
contentHash,
urlHash,
},
isRss: !!rssFeedUrl,
isImport: !!taskId,