diff --git a/packages/api/src/utils/createTask.ts b/packages/api/src/utils/createTask.ts index ea3745594..26ec75c41 100644 --- a/packages/api/src/utils/createTask.ts +++ b/packages/api/src/utils/createTask.ts @@ -573,7 +573,7 @@ export const enqueueRssFeedFetch = async ( const payload = { subscriptionId: rssFeedSubscription.id, feedUrl: rssFeedSubscription.url, - lastFetchedAt: rssFeedSubscription.lastFetchedAt, + lastFetchedAt: rssFeedSubscription.lastFetchedAt?.getTime() || 0, // unix timestamp in milliseconds } const headers = { diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index 66d3d9f49..bbbd537f2 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -9,7 +9,12 @@ import { CONTENT_FETCH_URL, createCloudTask } from './task' interface RssFeedRequest { subscriptionId: string feedUrl: string - lastFetchedAt: string + lastFetchedAt: number // unix timestamp in milliseconds +} + +interface ValidRssFeedItem { + link: string + isoDate: string } function isRssFeedRequest(body: any): body is RssFeedRequest { @@ -78,6 +83,35 @@ const sendUpdateSubscriptionMutation = async ( } } +const createSavingItemTask = async ( + userId: string, + feedUrl: string, + item: ValidRssFeedItem +) => { + const input = { + userId, + source: 'rss-feeder', + url: item.link, + saveRequestId: '', + labels: [{ name: 'RSS', color: '#f26522' }], + rssFeedUrl: feedUrl, + savedAt: item.isoDate, + publishedAt: item.isoDate, + } + + try { + console.log('Creating task', input.url) + // save page + const task = await createCloudTask(CONTENT_FETCH_URL, input) + console.log('Created task', task) + + return !!task + } catch (error) { + console.error('Error while creating task', error) + return false + } +} + dotenv.config() Sentry.GCPFunction.init({ dsn: process.env.SENTRY_DSN, @@ -121,13 +155,15 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( const { feedUrl, subscriptionId, lastFetchedAt } = req.body console.log('Processing feed', feedUrl, lastFetchedAt) + let lastItemFetchedAt: Date | null = null + let lastValidItem: ValidRssFeedItem | null = null + // fetch feed const feed = await parser.parseURL(feedUrl) - const newFetchedAt = new Date() - console.log('Fetched feed', feed.title, newFetchedAt) + console.log('Fetched feed', feed.title, new Date()) // save each item in the feed - for await (const item of feed.items) { + for (const item of feed.items) { console.log('Processing feed item', item.link, item.isoDate) if (!item.link || !item.isoDate) { @@ -135,42 +171,68 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( continue } + // remember the last valid item + lastValidItem = { + link: item.link, + isoDate: item.isoDate, + } + // skip old items and items that were published before 24h const publishedAt = new Date(item.isoDate) if ( - publishedAt < new Date(Date.now() - 24 * 60 * 60 * 1000) || - publishedAt < new Date(lastFetchedAt) + publishedAt < new Date(lastFetchedAt) || + publishedAt < new Date(Date.now() - 24 * 60 * 60 * 1000) ) { - console.log('Skipping old feed item', item.link) + console.log('Skipping old feed item', lastValidItem.link) continue } - const input = { + const created = await createSavingItemTask( userId, - source: 'rss-feeder', - url: item.link, - saveRequestId: '', - labels: [{ name: 'RSS', color: '#f26522' }], - rssFeedUrl: feedUrl, - savedAt: publishedAt, - publishedAt, + feedUrl, + lastValidItem + ) + if (!created) { + console.error('Failed to create task for feed item', item.link) + continue } - try { - console.log('Creating task', input.url) - // save page - const task = await createCloudTask(CONTENT_FETCH_URL, input) - console.log('Created task', task) - } catch (error) { - console.error('Error while creating task', error) + // remember the last item fetched at + if (!lastItemFetchedAt || publishedAt > lastItemFetchedAt) { + lastItemFetchedAt = publishedAt } } + // no items saved + if (!lastItemFetchedAt) { + // the feed has been fetched before, no new valid items found + if (lastFetchedAt || !lastValidItem) { + console.log('No new valid items found') + return res.send('ok') + } + + // the feed has never been fetched, save at least the last valid item + const created = await createSavingItemTask( + userId, + feedUrl, + lastValidItem + ) + if (!created) { + console.error( + 'Failed to create task for feed item', + lastValidItem.link + ) + return res.status(500).send('INTERNAL_SERVER_ERROR') + } + + lastItemFetchedAt = new Date(lastValidItem.isoDate) + } + // update subscription lastFetchedAt const updatedSubscription = await sendUpdateSubscriptionMutation( userId, subscriptionId, - newFetchedAt + lastItemFetchedAt ) console.log('Updated subscription', updatedSubscription)