Merge pull request #2506 from omnivore-app/more-rss-improvement

if the feed has never been fetched, save at least the last valid item
This commit is contained in:
Hongbo Wu
2023-07-19 14:10:45 +08:00
committed by GitHub
2 changed files with 86 additions and 24 deletions

View File

@ -573,7 +573,7 @@ export const enqueueRssFeedFetch = async (
const payload = {
subscriptionId: rssFeedSubscription.id,
feedUrl: rssFeedSubscription.url,
lastFetchedAt: rssFeedSubscription.lastFetchedAt,
lastFetchedAt: rssFeedSubscription.lastFetchedAt?.getTime() || 0, // unix timestamp in milliseconds
}
const headers = {

View File

@ -9,7 +9,12 @@ import { CONTENT_FETCH_URL, createCloudTask } from './task'
interface RssFeedRequest {
subscriptionId: string
feedUrl: string
lastFetchedAt: string
lastFetchedAt: number // unix timestamp in milliseconds
}
interface ValidRssFeedItem {
link: string
isoDate: string
}
function isRssFeedRequest(body: any): body is RssFeedRequest {
@ -78,6 +83,35 @@ const sendUpdateSubscriptionMutation = async (
}
}
const createSavingItemTask = async (
userId: string,
feedUrl: string,
item: ValidRssFeedItem
) => {
const input = {
userId,
source: 'rss-feeder',
url: item.link,
saveRequestId: '',
labels: [{ name: 'RSS', color: '#f26522' }],
rssFeedUrl: feedUrl,
savedAt: item.isoDate,
publishedAt: item.isoDate,
}
try {
console.log('Creating task', input.url)
// save page
const task = await createCloudTask(CONTENT_FETCH_URL, input)
console.log('Created task', task)
return !!task
} catch (error) {
console.error('Error while creating task', error)
return false
}
}
dotenv.config()
Sentry.GCPFunction.init({
dsn: process.env.SENTRY_DSN,
@ -121,13 +155,15 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction(
const { feedUrl, subscriptionId, lastFetchedAt } = req.body
console.log('Processing feed', feedUrl, lastFetchedAt)
let lastItemFetchedAt: Date | null = null
let lastValidItem: ValidRssFeedItem | null = null
// fetch feed
const feed = await parser.parseURL(feedUrl)
const newFetchedAt = new Date()
console.log('Fetched feed', feed.title, newFetchedAt)
console.log('Fetched feed', feed.title, new Date())
// save each item in the feed
for await (const item of feed.items) {
for (const item of feed.items) {
console.log('Processing feed item', item.link, item.isoDate)
if (!item.link || !item.isoDate) {
@ -135,42 +171,68 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction(
continue
}
// remember the last valid item
lastValidItem = {
link: item.link,
isoDate: item.isoDate,
}
// skip old items and items that were published before 24h
const publishedAt = new Date(item.isoDate)
if (
publishedAt < new Date(Date.now() - 24 * 60 * 60 * 1000) ||
publishedAt < new Date(lastFetchedAt)
publishedAt < new Date(lastFetchedAt) ||
publishedAt < new Date(Date.now() - 24 * 60 * 60 * 1000)
) {
console.log('Skipping old feed item', item.link)
console.log('Skipping old feed item', lastValidItem.link)
continue
}
const input = {
const created = await createSavingItemTask(
userId,
source: 'rss-feeder',
url: item.link,
saveRequestId: '',
labels: [{ name: 'RSS', color: '#f26522' }],
rssFeedUrl: feedUrl,
savedAt: publishedAt,
publishedAt,
feedUrl,
lastValidItem
)
if (!created) {
console.error('Failed to create task for feed item', item.link)
continue
}
try {
console.log('Creating task', input.url)
// save page
const task = await createCloudTask(CONTENT_FETCH_URL, input)
console.log('Created task', task)
} catch (error) {
console.error('Error while creating task', error)
// remember the last item fetched at
if (!lastItemFetchedAt || publishedAt > lastItemFetchedAt) {
lastItemFetchedAt = publishedAt
}
}
// no items saved
if (!lastItemFetchedAt) {
// the feed has been fetched before, no new valid items found
if (lastFetchedAt || !lastValidItem) {
console.log('No new valid items found')
return res.send('ok')
}
// the feed has never been fetched, save at least the last valid item
const created = await createSavingItemTask(
userId,
feedUrl,
lastValidItem
)
if (!created) {
console.error(
'Failed to create task for feed item',
lastValidItem.link
)
return res.status(500).send('INTERNAL_SERVER_ERROR')
}
lastItemFetchedAt = new Date(lastValidItem.isoDate)
}
// update subscription lastFetchedAt
const updatedSubscription = await sendUpdateSubscriptionMutation(
userId,
subscriptionId,
newFetchedAt
lastItemFetchedAt
)
console.log('Updated subscription', updatedSubscription)