From 0e0c4bddacf3742c3572dcdc0f70541a5bfc5b79 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 24 Jul 2024 16:55:50 +0800 Subject: [PATCH] block failed domains --- packages/content-fetch/src/request_handler.ts | 64 ++++++++++++++++++- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/packages/content-fetch/src/request_handler.ts b/packages/content-fetch/src/request_handler.ts index 81fc04de2..fafe53ead 100644 --- a/packages/content-fetch/src/request_handler.ts +++ b/packages/content-fetch/src/request_handler.ts @@ -129,6 +129,52 @@ const getCachedFetchResult = async ( return fetchResult } +const failureRedisKey = (domain: string) => `fetch-failure:${domain}` + +const isDomainBlocked = async ( + redisDataSource: RedisDataSource, + domain: string +) => { + const blockedDomains = ['localhost', 'weibo.com'] + if (blockedDomains.includes(domain)) { + return true + } + + const key = failureRedisKey(domain) + const redisClient = redisDataSource.cacheClient + try { + const result = await redisClient.get(key) + // if the domain has failed to fetch more than certain times, block it + const maxFailures = parseInt(process.env.MAX_FEED_FETCH_FAILURES ?? '10') + if (result && parseInt(result) > maxFailures) { + console.info(`domain is blocked: ${domain}`) + return true + } + } catch (error) { + console.error('Failed to check domain block status', { domain, error }) + } + + return false +} + +const incrementContentFetchFailure = async ( + redisDataSource: RedisDataSource, + domain: string +) => { + const redisClient = redisDataSource.cacheClient + const key = failureRedisKey(domain) + try { + const result = await redisClient.incr(key) + // expire the key in 1 day + await redisClient.expire(key, 24 * 60 * 60) + + return result + } catch (error) { + console.error('Failed to increment failure in redis', { domain, error }) + return null + } +} + export const contentFetchRequestHandler: RequestHandler = async (req, res) => { const functionStartTime = Date.now() @@ -200,8 +246,22 @@ export const contentFetchRequestHandler: RequestHandler = async (req, res) => { url ) - fetchResult = await fetchContent(url, locale, timezone) - console.log('content has been fetched') + const domain = new URL(url).hostname + const isBlocked = await isDomainBlocked(redisDataSource, domain) + if (isBlocked) { + console.log('domain is blocked', domain) + + return res.sendStatus(200) + } + + try { + fetchResult = await fetchContent(url, locale, timezone) + console.log('content has been fetched') + } catch (error) { + await incrementContentFetchFailure(redisDataSource, domain) + + throw error + } if (fetchResult.content && !NO_CACHE_URLS.includes(url)) { const cacheResult = await cacheFetchResult(