Merge pull request #3250 from omnivore-app/fix/duplicate-rss-item
skip fetching re-published items in the feed to deduplicate
This commit is contained in:
@ -44,6 +44,15 @@ type RssFeedItem = Item & {
|
||||
'media:content'?: RssFeedItemMedia[]
|
||||
}
|
||||
|
||||
export const isOldItem = (item: RssFeedItem, lastFetchedAt: number) => {
|
||||
// existing items and items that were published before 24h
|
||||
const publishedAt = item.isoDate ? new Date(item.isoDate) : new Date()
|
||||
return (
|
||||
publishedAt <= new Date(lastFetchedAt) ||
|
||||
publishedAt < new Date(Date.now() - 24 * 60 * 60 * 1000)
|
||||
)
|
||||
}
|
||||
|
||||
const getThumbnail = (item: RssFeedItem) => {
|
||||
if (item['media:thumbnail']) {
|
||||
return item['media:thumbnail'].$.url
|
||||
@ -400,7 +409,7 @@ const processSubscription = async (
|
||||
console.log('Feed last build date', feedLastBuildDate)
|
||||
if (
|
||||
feedLastBuildDate &&
|
||||
new Date(feedLastBuildDate) < new Date(lastFetchedAt)
|
||||
new Date(feedLastBuildDate) <= new Date(lastFetchedAt)
|
||||
) {
|
||||
console.log('Skipping old feed', feedLastBuildDate)
|
||||
return
|
||||
@ -440,11 +449,8 @@ const processSubscription = async (
|
||||
continue
|
||||
}
|
||||
|
||||
// skip old items and items that were published before 24h
|
||||
if (
|
||||
publishedAt < new Date(lastFetchedAt) ||
|
||||
publishedAt < new Date(Date.now() - 24 * 60 * 60 * 1000)
|
||||
) {
|
||||
// skip old items
|
||||
if (isOldItem(item, lastFetchedAt)) {
|
||||
console.log('Skipping old feed item', item.link)
|
||||
continue
|
||||
}
|
||||
|
||||
24
packages/rss-handler/test/index.test.ts
Normal file
24
packages/rss-handler/test/index.test.ts
Normal file
@ -0,0 +1,24 @@
|
||||
import { expect } from 'chai'
|
||||
import 'mocha'
|
||||
import { Item } from 'rss-parser'
|
||||
import { isOldItem } from '../src'
|
||||
|
||||
describe('isOldItem', () => {
|
||||
it('returns true if item is older than 1 day', () => {
|
||||
const item = {
|
||||
pubDate: '2020-01-01',
|
||||
} as Item
|
||||
const lastFetchedAt = Date.now()
|
||||
|
||||
expect(isOldItem(item, lastFetchedAt)).to.be.true
|
||||
})
|
||||
|
||||
it('returns true if item was published at the last fetched time', () => {
|
||||
const lastFetchedAt = Date.now()
|
||||
const item = {
|
||||
pubDate: new Date(lastFetchedAt).toISOString(),
|
||||
} as Item
|
||||
|
||||
expect(isOldItem(item, lastFetchedAt)).to.be.true
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user