Merge pull request #3250 from omnivore-app/fix/duplicate-rss-item

skip fetching re-published items in the feed to deduplicate
This commit is contained in:
Hongbo Wu
2023-12-15 17:42:38 +08:00
committed by GitHub
2 changed files with 36 additions and 6 deletions

View File

@ -44,6 +44,15 @@ type RssFeedItem = Item & {
'media:content'?: RssFeedItemMedia[]
}
export const isOldItem = (item: RssFeedItem, lastFetchedAt: number) => {
// existing items and items that were published before 24h
const publishedAt = item.isoDate ? new Date(item.isoDate) : new Date()
return (
publishedAt <= new Date(lastFetchedAt) ||
publishedAt < new Date(Date.now() - 24 * 60 * 60 * 1000)
)
}
const getThumbnail = (item: RssFeedItem) => {
if (item['media:thumbnail']) {
return item['media:thumbnail'].$.url
@ -400,7 +409,7 @@ const processSubscription = async (
console.log('Feed last build date', feedLastBuildDate)
if (
feedLastBuildDate &&
new Date(feedLastBuildDate) < new Date(lastFetchedAt)
new Date(feedLastBuildDate) <= new Date(lastFetchedAt)
) {
console.log('Skipping old feed', feedLastBuildDate)
return
@ -440,11 +449,8 @@ const processSubscription = async (
continue
}
// skip old items and items that were published before 24h
if (
publishedAt < new Date(lastFetchedAt) ||
publishedAt < new Date(Date.now() - 24 * 60 * 60 * 1000)
) {
// skip old items
if (isOldItem(item, lastFetchedAt)) {
console.log('Skipping old feed item', item.link)
continue
}

View File

@ -0,0 +1,24 @@
import { expect } from 'chai'
import 'mocha'
import { Item } from 'rss-parser'
import { isOldItem } from '../src'
describe('isOldItem', () => {
it('returns true if item is older than 1 day', () => {
const item = {
pubDate: '2020-01-01',
} as Item
const lastFetchedAt = Date.now()
expect(isOldItem(item, lastFetchedAt)).to.be.true
})
it('returns true if item was published at the last fetched time', () => {
const lastFetchedAt = Date.now()
const item = {
pubDate: new Date(lastFetchedAt).toISOString(),
} as Item
expect(isOldItem(item, lastFetchedAt)).to.be.true
})
})