From c9b93a118011784ccd5ac433f4dfedc3793f0774 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Fri, 29 Dec 2023 12:26:43 +0800 Subject: [PATCH 1/2] Dont try to fetch content for some feed URLs like arvix which blocks fetches --- packages/rss-handler/src/index.ts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index 067a55d54..fcbd3b9d8 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -53,6 +53,13 @@ export const isOldItem = (item: RssFeedItem, lastFetchedAt: number) => { ) } +export const isContentFetchBlocked = (feedUrl: string) => { + if (feedUrl.startsWith('https://arxiv.org/')) { + return true + } + return false +} + const getThumbnail = (item: RssFeedItem) => { if (item['media:thumbnail']) { return item['media:thumbnail'].$.url @@ -455,6 +462,10 @@ const processSubscription = async ( continue } + if (isContentFetchBlocked(feedUrl)) { + fetchContent = false + } + const created = await createTask( userId, feedUrl, From 816416934ffb7525b3037cff6fa35a3e64ee67f3 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Fri, 29 Dec 2023 12:42:43 +0800 Subject: [PATCH 2/2] Move the rss content block check to the rss handler --- packages/rss-handler/src/index.ts | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index fcbd3b9d8..a615fa675 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -462,10 +462,6 @@ const processSubscription = async ( continue } - if (isContentFetchBlocked(feedUrl)) { - fetchContent = false - } - const created = await createTask( userId, feedUrl, @@ -559,6 +555,12 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( return res.status(500).send('INVALID_RSS_FEED') } + let allowFetchContent = true + if (isContentFetchBlocked(feedUrl)) { + console.log('fetching content blocked for feed: ', feedUrl) + allowFetchContent = false + } + console.log('Fetched feed', feed.title, new Date()) await Promise.all( @@ -571,7 +573,7 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( lastFetchedTimestamps[i], scheduledTimestamps[i], lastFetchedChecksums[i], - fetchContents[i], + fetchContents[i] && allowFetchContent, folders[i], feed )