From 834c4ec2bd714bbbdd0705b0fb38b134870423bc Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 19 Jul 2023 13:44:29 +0800 Subject: [PATCH 1/3] if the feed has never been fetched, save the last valid item --- packages/rss-handler/src/index.ts | 100 ++++++++++++++++++++++++------ 1 file changed, 81 insertions(+), 19 deletions(-) diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index 66d3d9f49..bc1b859ec 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -12,6 +12,11 @@ interface RssFeedRequest { lastFetchedAt: string } +interface ValidRssFeedItem { + link: string + isoDate: string +} + function isRssFeedRequest(body: any): body is RssFeedRequest { return ( 'subscriptionId' in body && 'feedUrl' in body && 'lastFetchedAt' in body @@ -78,6 +83,35 @@ const sendUpdateSubscriptionMutation = async ( } } +const createSavingItemTask = async ( + userId: string, + feedUrl: string, + item: ValidRssFeedItem +) => { + const input = { + userId, + source: 'rss-feeder', + url: item.link, + saveRequestId: '', + labels: [{ name: 'RSS', color: '#f26522' }], + rssFeedUrl: feedUrl, + savedAt: item.isoDate, + publishedAt: item.isoDate, + } + + try { + console.log('Creating task', input.url) + // save page + const task = await createCloudTask(CONTENT_FETCH_URL, input) + console.log('Created task', task) + + return !!task + } catch (error) { + console.error('Error while creating task', error) + return false + } +} + dotenv.config() Sentry.GCPFunction.init({ dsn: process.env.SENTRY_DSN, @@ -121,13 +155,15 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( const { feedUrl, subscriptionId, lastFetchedAt } = req.body console.log('Processing feed', feedUrl, lastFetchedAt) + let lastItemFetchedAt: Date | null = null + let lastValidItem: ValidRssFeedItem | null = null + // fetch feed const feed = await parser.parseURL(feedUrl) - const newFetchedAt = new Date() - console.log('Fetched feed', feed.title, newFetchedAt) + console.log('Fetched feed', feed.title, new Date()) // save each item in the feed - for await (const item of feed.items) { + for (const item of feed.items) { console.log('Processing feed item', item.link, item.isoDate) if (!item.link || !item.isoDate) { @@ -135,6 +171,12 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( continue } + // remember the last valid item + lastValidItem = { + link: item.link, + isoDate: item.isoDate, + } + // skip old items and items that were published before 24h const publishedAt = new Date(item.isoDate) if ( @@ -145,32 +187,52 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( continue } - const input = { + const created = await createSavingItemTask( userId, - source: 'rss-feeder', - url: item.link, - saveRequestId: '', - labels: [{ name: 'RSS', color: '#f26522' }], - rssFeedUrl: feedUrl, - savedAt: publishedAt, - publishedAt, + feedUrl, + lastValidItem + ) + if (!created) { + console.error('Failed to create task for feed item', item.link) + continue } - try { - console.log('Creating task', input.url) - // save page - const task = await createCloudTask(CONTENT_FETCH_URL, input) - console.log('Created task', task) - } catch (error) { - console.error('Error while creating task', error) + // remember the last item fetched at + if (!lastItemFetchedAt || publishedAt > lastItemFetchedAt) { + lastItemFetchedAt = publishedAt } } + // no items saved + if (!lastItemFetchedAt) { + // the feed has been fetched before, no new valid items found + if (lastFetchedAt || !lastValidItem) { + console.log('No new valid items found') + return res.send('ok') + } + + // the feed has never been fetched, save the last valid item + const created = await createSavingItemTask( + userId, + feedUrl, + lastValidItem + ) + if (!created) { + console.error( + 'Failed to create task for feed item', + lastValidItem.link + ) + return res.status(500).send('INTERNAL_SERVER_ERROR') + } + + lastItemFetchedAt = new Date(lastValidItem.isoDate) + } + // update subscription lastFetchedAt const updatedSubscription = await sendUpdateSubscriptionMutation( userId, subscriptionId, - newFetchedAt + lastItemFetchedAt ) console.log('Updated subscription', updatedSubscription) From 234b51af28543a4a4f32d76968254a6669ab2624 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 19 Jul 2023 13:45:44 +0800 Subject: [PATCH 2/3] reword comment --- packages/rss-handler/src/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index bc1b859ec..df1d7150a 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -211,7 +211,7 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( return res.send('ok') } - // the feed has never been fetched, save the last valid item + // the feed has never been fetched, save at least the last valid item const created = await createSavingItemTask( userId, feedUrl, From cd440a77c3de333a17fb75d7ee6f633678e00477 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 19 Jul 2023 14:09:38 +0800 Subject: [PATCH 3/3] send last fetched at as a unix timestamp in ms --- packages/api/src/utils/createTask.ts | 2 +- packages/rss-handler/src/index.ts | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/api/src/utils/createTask.ts b/packages/api/src/utils/createTask.ts index ea3745594..26ec75c41 100644 --- a/packages/api/src/utils/createTask.ts +++ b/packages/api/src/utils/createTask.ts @@ -573,7 +573,7 @@ export const enqueueRssFeedFetch = async ( const payload = { subscriptionId: rssFeedSubscription.id, feedUrl: rssFeedSubscription.url, - lastFetchedAt: rssFeedSubscription.lastFetchedAt, + lastFetchedAt: rssFeedSubscription.lastFetchedAt?.getTime() || 0, // unix timestamp in milliseconds } const headers = { diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index df1d7150a..bbbd537f2 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -9,7 +9,7 @@ import { CONTENT_FETCH_URL, createCloudTask } from './task' interface RssFeedRequest { subscriptionId: string feedUrl: string - lastFetchedAt: string + lastFetchedAt: number // unix timestamp in milliseconds } interface ValidRssFeedItem { @@ -180,10 +180,10 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( // skip old items and items that were published before 24h const publishedAt = new Date(item.isoDate) if ( - publishedAt < new Date(Date.now() - 24 * 60 * 60 * 1000) || - publishedAt < new Date(lastFetchedAt) + publishedAt < new Date(lastFetchedAt) || + publishedAt < new Date(Date.now() - 24 * 60 * 60 * 1000) ) { - console.log('Skipping old feed item', item.link) + console.log('Skipping old feed item', lastValidItem.link) continue }