get content from cache first when saving url
This commit is contained in:
@ -6,6 +6,7 @@ import {
|
||||
ArticleSavingRequestStatus,
|
||||
CreateLabelInput,
|
||||
} from '../generated/graphql'
|
||||
import { redisDataSource } from '../redis_data_source'
|
||||
import { userRepository } from '../repository/user'
|
||||
import { saveFile } from '../services/save_file'
|
||||
import { savePage } from '../services/save_page'
|
||||
@ -43,6 +44,19 @@ interface Data {
|
||||
rssFeedUrl?: string
|
||||
publishedAt?: string
|
||||
taskId?: string
|
||||
cacheKey?: string
|
||||
}
|
||||
|
||||
const getCachedContent = async (key: string): Promise<string | null> => {
|
||||
const result = await redisDataSource.redisClient?.get(key)
|
||||
if (!result) {
|
||||
logger.info('fetch result is not cached', { key })
|
||||
return null
|
||||
}
|
||||
|
||||
logger.info('content is cached', { key })
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
const uploadPdf = async (
|
||||
@ -133,6 +147,7 @@ export const savePageJob = async (data: Data, attemptsMade: number) => {
|
||||
title,
|
||||
contentType,
|
||||
state,
|
||||
cacheKey,
|
||||
} = data
|
||||
let isImported, isSaved
|
||||
|
||||
@ -185,25 +200,42 @@ export const savePageJob = async (data: Data, attemptsMade: number) => {
|
||||
return true
|
||||
}
|
||||
|
||||
// download the original content
|
||||
const filePath = contentFilePath({
|
||||
userId,
|
||||
libraryItemId: articleSavingRequestId,
|
||||
format: 'original',
|
||||
savedAt: new Date(savedAt),
|
||||
})
|
||||
const exists = await isFileExists(filePath)
|
||||
if (!exists) {
|
||||
logger.error('Original content file does not exist', {
|
||||
finalUrl,
|
||||
filePath,
|
||||
})
|
||||
let content
|
||||
|
||||
throw new Error('Original content file does not exist')
|
||||
if (cacheKey) {
|
||||
logger.info('fetching content from cache', {
|
||||
cacheKey,
|
||||
})
|
||||
content = await getCachedContent(cacheKey)
|
||||
|
||||
logger.info('fetched content from cache')
|
||||
}
|
||||
|
||||
const content = (await downloadFromBucket(filePath)).toString()
|
||||
console.log('Downloaded original content from:', filePath)
|
||||
if (!content) {
|
||||
logger.info('downloading content from GCS', {
|
||||
url,
|
||||
})
|
||||
|
||||
// download the original content
|
||||
const filePath = contentFilePath({
|
||||
userId,
|
||||
libraryItemId: articleSavingRequestId,
|
||||
format: 'original',
|
||||
savedAt: new Date(savedAt),
|
||||
})
|
||||
const exists = await isFileExists(filePath)
|
||||
if (!exists) {
|
||||
logger.error('Original content file does not exist', {
|
||||
finalUrl,
|
||||
filePath,
|
||||
})
|
||||
|
||||
throw new Error('Original content file does not exist')
|
||||
}
|
||||
|
||||
content = (await downloadFromBucket(filePath)).toString()
|
||||
logger.info('Downloaded original content from:', { filePath })
|
||||
}
|
||||
|
||||
// for non-pdf content, we need to save the content
|
||||
const result = await savePage(
|
||||
|
||||
@ -20,6 +20,7 @@ interface SavePageJobData {
|
||||
taskId?: string
|
||||
title?: string
|
||||
contentType?: string
|
||||
cacheKey?: string
|
||||
}
|
||||
|
||||
interface SavePageJob {
|
||||
|
||||
@ -49,13 +49,6 @@ interface LogRecord {
|
||||
totalTime?: number
|
||||
}
|
||||
|
||||
interface FetchResult {
|
||||
finalUrl: string
|
||||
title?: string
|
||||
content?: string
|
||||
contentType?: string
|
||||
}
|
||||
|
||||
const storage = process.env.GCS_UPLOAD_SA_KEY_FILE_PATH
|
||||
? new Storage({ keyFilename: process.env.GCS_UPLOAD_SA_KEY_FILE_PATH })
|
||||
: new Storage()
|
||||
@ -87,37 +80,22 @@ const uploadOriginalContent = async (
|
||||
const cacheKey = (url: string, locale = '', timezone = '') =>
|
||||
`fetch-result:${url}:${locale}:${timezone}`
|
||||
|
||||
const isFetchResult = (obj: unknown): obj is FetchResult => {
|
||||
return typeof obj === 'object' && obj !== null && 'finalUrl' in obj
|
||||
}
|
||||
|
||||
export const cacheFetchResult = async (
|
||||
key: string,
|
||||
fetchResult: FetchResult
|
||||
) => {
|
||||
export const cacheContent = async (key: string, content: string) => {
|
||||
// cache the fetch result for 24 hours
|
||||
const ttl = 24 * 60 * 60
|
||||
const value = JSON.stringify(fetchResult)
|
||||
return redisDataSource.cacheClient.set(key, value, 'EX', ttl, 'NX')
|
||||
return redisDataSource.cacheClient.set(key, content, 'EX', ttl, 'NX')
|
||||
}
|
||||
|
||||
const getCachedFetchResult = async (
|
||||
key: string
|
||||
): Promise<FetchResult | null> => {
|
||||
const result = await redisDataSource.cacheClient.get(key)
|
||||
if (!result) {
|
||||
const getCachedContent = async (key: string): Promise<string | null> => {
|
||||
const content = await redisDataSource.cacheClient.get(key)
|
||||
if (!content) {
|
||||
console.info('fetch result is not cached', key)
|
||||
return null
|
||||
}
|
||||
|
||||
const fetchResult = JSON.parse(result) as unknown
|
||||
if (!isFetchResult(fetchResult)) {
|
||||
throw new Error('fetch result is not valid')
|
||||
}
|
||||
console.info('content is cached', key)
|
||||
|
||||
console.info('fetch result is cached', key)
|
||||
|
||||
return fetchResult
|
||||
return content
|
||||
}
|
||||
|
||||
export const contentFetchRequestHandler: RequestHandler = async (req, res) => {
|
||||
@ -171,25 +149,31 @@ export const contentFetchRequestHandler: RequestHandler = async (req, res) => {
|
||||
console.log(`Article parsing request`, logRecord)
|
||||
|
||||
try {
|
||||
const key = cacheKey(url, locale, timezone)
|
||||
let fetchResult = await getCachedFetchResult(key)
|
||||
if (!fetchResult) {
|
||||
console.log(
|
||||
'fetch result not found in cache, fetching content now...',
|
||||
url
|
||||
)
|
||||
let finalUrl: string,
|
||||
title: string | undefined,
|
||||
contentType: string | undefined
|
||||
|
||||
fetchResult = await fetchContent(url, locale, timezone)
|
||||
const key = cacheKey(url, locale, timezone)
|
||||
let content = await getCachedContent(key)
|
||||
if (!content) {
|
||||
console.log('content not found in cache, fetching content now...', url)
|
||||
|
||||
const fetchResult = await fetchContent(url, locale, timezone)
|
||||
console.log('content has been fetched')
|
||||
|
||||
if (fetchResult.content) {
|
||||
const cacheResult = await cacheFetchResult(url, fetchResult)
|
||||
content = fetchResult.content
|
||||
const cacheResult = await cacheContent(key, content)
|
||||
|
||||
console.log('cache result', cacheResult)
|
||||
}
|
||||
|
||||
finalUrl = fetchResult.finalUrl
|
||||
title = fetchResult.title
|
||||
contentType = fetchResult.contentType
|
||||
}
|
||||
|
||||
const savedDate = savedAt ? new Date(savedAt) : new Date()
|
||||
const { finalUrl, title, content, contentType } = fetchResult
|
||||
if (content) {
|
||||
await uploadOriginalContent(users, content, savedDate.getTime())
|
||||
}
|
||||
@ -211,6 +195,7 @@ export const contentFetchRequestHandler: RequestHandler = async (req, res) => {
|
||||
taskId,
|
||||
title,
|
||||
contentType,
|
||||
cacheKey: key,
|
||||
},
|
||||
isRss: !!rssFeedUrl,
|
||||
isImport: !!taskId,
|
||||
|
||||
Reference in New Issue
Block a user