Merge pull request #2506 from omnivore-app/more-rss-improvement
if the feed has never been fetched, save at least the last valid item
This commit is contained in:
@ -573,7 +573,7 @@ export const enqueueRssFeedFetch = async (
|
||||
const payload = {
|
||||
subscriptionId: rssFeedSubscription.id,
|
||||
feedUrl: rssFeedSubscription.url,
|
||||
lastFetchedAt: rssFeedSubscription.lastFetchedAt,
|
||||
lastFetchedAt: rssFeedSubscription.lastFetchedAt?.getTime() || 0, // unix timestamp in milliseconds
|
||||
}
|
||||
|
||||
const headers = {
|
||||
|
||||
@ -9,7 +9,12 @@ import { CONTENT_FETCH_URL, createCloudTask } from './task'
|
||||
interface RssFeedRequest {
|
||||
subscriptionId: string
|
||||
feedUrl: string
|
||||
lastFetchedAt: string
|
||||
lastFetchedAt: number // unix timestamp in milliseconds
|
||||
}
|
||||
|
||||
interface ValidRssFeedItem {
|
||||
link: string
|
||||
isoDate: string
|
||||
}
|
||||
|
||||
function isRssFeedRequest(body: any): body is RssFeedRequest {
|
||||
@ -78,6 +83,35 @@ const sendUpdateSubscriptionMutation = async (
|
||||
}
|
||||
}
|
||||
|
||||
const createSavingItemTask = async (
|
||||
userId: string,
|
||||
feedUrl: string,
|
||||
item: ValidRssFeedItem
|
||||
) => {
|
||||
const input = {
|
||||
userId,
|
||||
source: 'rss-feeder',
|
||||
url: item.link,
|
||||
saveRequestId: '',
|
||||
labels: [{ name: 'RSS', color: '#f26522' }],
|
||||
rssFeedUrl: feedUrl,
|
||||
savedAt: item.isoDate,
|
||||
publishedAt: item.isoDate,
|
||||
}
|
||||
|
||||
try {
|
||||
console.log('Creating task', input.url)
|
||||
// save page
|
||||
const task = await createCloudTask(CONTENT_FETCH_URL, input)
|
||||
console.log('Created task', task)
|
||||
|
||||
return !!task
|
||||
} catch (error) {
|
||||
console.error('Error while creating task', error)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
dotenv.config()
|
||||
Sentry.GCPFunction.init({
|
||||
dsn: process.env.SENTRY_DSN,
|
||||
@ -121,13 +155,15 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction(
|
||||
const { feedUrl, subscriptionId, lastFetchedAt } = req.body
|
||||
console.log('Processing feed', feedUrl, lastFetchedAt)
|
||||
|
||||
let lastItemFetchedAt: Date | null = null
|
||||
let lastValidItem: ValidRssFeedItem | null = null
|
||||
|
||||
// fetch feed
|
||||
const feed = await parser.parseURL(feedUrl)
|
||||
const newFetchedAt = new Date()
|
||||
console.log('Fetched feed', feed.title, newFetchedAt)
|
||||
console.log('Fetched feed', feed.title, new Date())
|
||||
|
||||
// save each item in the feed
|
||||
for await (const item of feed.items) {
|
||||
for (const item of feed.items) {
|
||||
console.log('Processing feed item', item.link, item.isoDate)
|
||||
|
||||
if (!item.link || !item.isoDate) {
|
||||
@ -135,42 +171,68 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction(
|
||||
continue
|
||||
}
|
||||
|
||||
// remember the last valid item
|
||||
lastValidItem = {
|
||||
link: item.link,
|
||||
isoDate: item.isoDate,
|
||||
}
|
||||
|
||||
// skip old items and items that were published before 24h
|
||||
const publishedAt = new Date(item.isoDate)
|
||||
if (
|
||||
publishedAt < new Date(Date.now() - 24 * 60 * 60 * 1000) ||
|
||||
publishedAt < new Date(lastFetchedAt)
|
||||
publishedAt < new Date(lastFetchedAt) ||
|
||||
publishedAt < new Date(Date.now() - 24 * 60 * 60 * 1000)
|
||||
) {
|
||||
console.log('Skipping old feed item', item.link)
|
||||
console.log('Skipping old feed item', lastValidItem.link)
|
||||
continue
|
||||
}
|
||||
|
||||
const input = {
|
||||
const created = await createSavingItemTask(
|
||||
userId,
|
||||
source: 'rss-feeder',
|
||||
url: item.link,
|
||||
saveRequestId: '',
|
||||
labels: [{ name: 'RSS', color: '#f26522' }],
|
||||
rssFeedUrl: feedUrl,
|
||||
savedAt: publishedAt,
|
||||
publishedAt,
|
||||
feedUrl,
|
||||
lastValidItem
|
||||
)
|
||||
if (!created) {
|
||||
console.error('Failed to create task for feed item', item.link)
|
||||
continue
|
||||
}
|
||||
|
||||
try {
|
||||
console.log('Creating task', input.url)
|
||||
// save page
|
||||
const task = await createCloudTask(CONTENT_FETCH_URL, input)
|
||||
console.log('Created task', task)
|
||||
} catch (error) {
|
||||
console.error('Error while creating task', error)
|
||||
// remember the last item fetched at
|
||||
if (!lastItemFetchedAt || publishedAt > lastItemFetchedAt) {
|
||||
lastItemFetchedAt = publishedAt
|
||||
}
|
||||
}
|
||||
|
||||
// no items saved
|
||||
if (!lastItemFetchedAt) {
|
||||
// the feed has been fetched before, no new valid items found
|
||||
if (lastFetchedAt || !lastValidItem) {
|
||||
console.log('No new valid items found')
|
||||
return res.send('ok')
|
||||
}
|
||||
|
||||
// the feed has never been fetched, save at least the last valid item
|
||||
const created = await createSavingItemTask(
|
||||
userId,
|
||||
feedUrl,
|
||||
lastValidItem
|
||||
)
|
||||
if (!created) {
|
||||
console.error(
|
||||
'Failed to create task for feed item',
|
||||
lastValidItem.link
|
||||
)
|
||||
return res.status(500).send('INTERNAL_SERVER_ERROR')
|
||||
}
|
||||
|
||||
lastItemFetchedAt = new Date(lastValidItem.isoDate)
|
||||
}
|
||||
|
||||
// update subscription lastFetchedAt
|
||||
const updatedSubscription = await sendUpdateSubscriptionMutation(
|
||||
userId,
|
||||
subscriptionId,
|
||||
newFetchedAt
|
||||
lastItemFetchedAt
|
||||
)
|
||||
console.log('Updated subscription', updatedSubscription)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user