From 0f774b6ae42ee945d11e444580ce7ad9d4c742be Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 4 Jan 2024 17:36:12 +0800 Subject: [PATCH 01/12] skip recently saved rss item --- packages/rss-handler/package.json | 1 + packages/rss-handler/src/index.ts | 104 +++++++++++++++++++++--------- packages/rss-handler/src/redis.ts | 26 ++++++++ 3 files changed, 100 insertions(+), 31 deletions(-) create mode 100644 packages/rss-handler/src/redis.ts diff --git a/packages/rss-handler/package.json b/packages/rss-handler/package.json index 49452e22e..728cd9b82 100644 --- a/packages/rss-handler/package.json +++ b/packages/rss-handler/package.json @@ -29,6 +29,7 @@ "dotenv": "^16.0.1", "jsonwebtoken": "^8.5.1", "linkedom": "^0.16.4", + "redis": "^4.3.1", "rss-parser": "^3.13.0" }, "volta": { diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index a615fa675..2da93f3f7 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -4,11 +4,15 @@ import crypto from 'crypto' import * as dotenv from 'dotenv' // see https://github.com/motdotla/dotenv#how-do-i-use-dotenv-with-import import * as jwt from 'jsonwebtoken' import { parseHTML } from 'linkedom' +import { createClient } from 'redis' import Parser, { Item } from 'rss-parser' import { promisify } from 'util' +import { createRedisClient } from './redis' import { CONTENT_FETCH_URL, createCloudTask } from './task' type FolderType = 'following' | 'inbox' +// explicitly create the return type of RedisClient +type RedisClient = ReturnType interface RssFeedRequest { subscriptionIds: string[] @@ -42,6 +46,7 @@ type RssFeedItemMedia = { type RssFeedItem = Item & { 'media:thumbnail'?: RssFeedItemMedia 'media:content'?: RssFeedItemMedia[] + link: string } export const isOldItem = (item: RssFeedItem, lastFetchedAt: number) => { @@ -214,13 +219,34 @@ const sendUpdateSubscriptionMutation = async ( } } +const isItemRecentlySaved = async ( + redisClient: RedisClient, + userId: string, + url: string +) => { + const key = `rss-recent-save:${userId}:${url}` + const result = await redisClient.get(key) + return !!result +} + const createTask = async ( userId: string, feedUrl: string, item: RssFeedItem, fetchContent: boolean, - folder: FolderType + folder: FolderType, + redisClient: RedisClient ) => { + const isRecentlySaved = await isItemRecentlySaved( + redisClient, + userId, + item.link + ) + if (isRecentlySaved) { + console.log('Item recently saved', item.link) + return true + } + if (folder === 'following' && !fetchContent) { return createItemWithPreviewContent(userId, feedUrl, item) } @@ -363,7 +389,7 @@ const getUpdatePeriodInHours = (feed: RssFeed) => { } // get link following the order of preference: via, alternate, self -const getLink = (links: RssFeedItemLink[]) => { +const getLink = (links: RssFeedItemLink[]): string | undefined => { // sort links by preference const sortedLinks: string[] = [] @@ -398,10 +424,11 @@ const processSubscription = async ( lastFetchedChecksum: string, fetchContent: boolean, folder: FolderType, - feed: RssFeed + feed: RssFeed, + redisClient: RedisClient ) => { let lastItemFetchedAt: Date | null = null - let lastValidItem: Item | null = null + let lastValidItem: RssFeedItem | null = null if (fetchResult.checksum === lastFetchedChecksum) { console.log('feed has not been updated', feedUrl, lastFetchedChecksum) @@ -425,7 +452,7 @@ const processSubscription = async ( // save each item in the feed for (const item of feed.items) { // use published or updated if isoDate is not available for atom feeds - item.isoDate = + const isoDate = item.isoDate || item.published || item.updated || item.created console.log('Processing feed item', item.links, item.isoDate, feed.feedUrl) @@ -434,21 +461,28 @@ const processSubscription = async ( continue } - item.link = getLink(item.links) - if (!item.link) { + const link = getLink(item.links) + if (!link) { console.log('Invalid feed item links', item.links) continue } - console.log('Fetching feed item', item.link) + console.log('Fetching feed item', link) + const feedItem = { + ...item, + isoDate, + link, + } - const publishedAt = item.isoDate ? new Date(item.isoDate) : new Date() + const publishedAt = feedItem.isoDate + ? new Date(feedItem.isoDate) + : new Date() // remember the last valid item if ( !lastValidItem || (lastValidItem.isoDate && publishedAt > new Date(lastValidItem.isoDate)) ) { - lastValidItem = item + lastValidItem = feedItem } // Max limit per-feed update @@ -457,20 +491,21 @@ const processSubscription = async ( } // skip old items - if (isOldItem(item, lastFetchedAt)) { - console.log('Skipping old feed item', item.link) + if (isOldItem(feedItem, lastFetchedAt)) { + console.log('Skipping old feed item', feedItem.link) continue } const created = await createTask( userId, feedUrl, - item, + feedItem, fetchContent, - folder + folder, + redisClient ) if (!created) { - console.error('Failed to create task for feed item', item.link) + console.error('Failed to create task for feed item', feedItem.link) continue } @@ -496,7 +531,8 @@ const processSubscription = async ( feedUrl, lastValidItem, fetchContent, - folder + folder, + redisClient ) if (!created) { console.error('Failed to create task for feed item', lastValidItem.link) @@ -536,6 +572,12 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( return res.status(400).send('INVALID_REQUEST_BODY') } + // create redis client + const redisClient = await createRedisClient( + process.env.REDIS_URL, + process.env.REDIS_CERT + ) + const { feedUrl, subscriptionIds, @@ -563,22 +605,22 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( console.log('Fetched feed', feed.title, new Date()) - await Promise.all( - subscriptionIds.map((_, i) => - processSubscription( - subscriptionIds[i], - userIds[i], - feedUrl, - fetchResult, - lastFetchedTimestamps[i], - scheduledTimestamps[i], - lastFetchedChecksums[i], - fetchContents[i] && allowFetchContent, - folders[i], - feed - ) + // process each subscription sequentially + for (let i = 0; i < subscriptionIds.length; i++) { + await processSubscription( + subscriptionIds[i], + userIds[i], + feedUrl, + fetchResult, + lastFetchedTimestamps[i], + scheduledTimestamps[i], + lastFetchedChecksums[i], + fetchContents[i] && allowFetchContent, + folders[i], + feed, + redisClient ) - ) + } res.send('ok') } catch (e) { diff --git a/packages/rss-handler/src/redis.ts b/packages/rss-handler/src/redis.ts new file mode 100644 index 000000000..350cb09f6 --- /dev/null +++ b/packages/rss-handler/src/redis.ts @@ -0,0 +1,26 @@ +import { createClient } from 'redis' + +export const createRedisClient = async (url?: string, cert?: string) => { + const redisClient = createClient({ + url, + socket: { + tls: url?.startsWith('rediss://'), // rediss:// is the protocol for TLS + cert: cert?.replace(/\\n/g, '\n'), // replace \n with new line + rejectUnauthorized: false, // for self-signed certs + connectTimeout: 10000, // 10 seconds + reconnectStrategy(retries: number): number | Error { + if (retries > 10) { + return new Error('Retries exhausted') + } + return 1000 + }, + }, + }) + + redisClient.on('error', (err) => console.error('Redis Client Error', err)) + + await redisClient.connect() + console.log('Redis Client Connected:', url) + + return redisClient +} From 08c4624fceb856cfa12c2836f38f2eb39384b75f Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 4 Jan 2024 18:03:34 +0800 Subject: [PATCH 02/12] save the url in redis for 8 hours so rss-feeder will not try to re-save it --- packages/api/package.json | 1 + packages/api/src/redis.ts | 18 ++++++++++++++++++ packages/api/src/server.ts | 3 +++ packages/api/src/services/save_page.ts | 9 +++++++++ packages/api/src/util.ts | 11 +++++++++++ packages/rss-handler/src/index.ts | 2 +- 6 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 packages/api/src/redis.ts diff --git a/packages/api/package.json b/packages/api/package.json index 34d9b24fb..75cecca83 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -82,6 +82,7 @@ "pg": "^8.3.3", "postgrator": "^4.2.0", "private-ip": "^2.3.3", + "redis": "^4.3.1", "rss-parser": "^3.13.0", "sanitize-html": "^2.3.2", "sax": "^1.3.0", diff --git a/packages/api/src/redis.ts b/packages/api/src/redis.ts new file mode 100644 index 000000000..14559c0be --- /dev/null +++ b/packages/api/src/redis.ts @@ -0,0 +1,18 @@ +import { createClient } from 'redis' +import { env } from './env' + +export const redisClient = createClient({ + url: env.redis.url, + socket: { + tls: env.redis.url?.startsWith('rediss://'), // rediss:// is the protocol for TLS + cert: env.redis.cert?.replace(/\\n/g, '\n'), // replace \n with new line + rejectUnauthorized: false, // for self-signed certs + connectTimeout: 10000, // 10 seconds + reconnectStrategy(retries: number): number | Error { + if (retries > 10) { + return new Error('Retries exhausted') + } + return 1000 + }, + }, +}) diff --git a/packages/api/src/server.ts b/packages/api/src/server.ts index 1bb75a085..2380e3c37 100755 --- a/packages/api/src/server.ts +++ b/packages/api/src/server.ts @@ -15,6 +15,7 @@ import { config, loggers } from 'winston' import { makeApolloServer } from './apollo' import { appDataSource } from './data_source' import { env } from './env' +import { redisClient } from './redis' import { articleRouter } from './routers/article_router' import { authRouter } from './routers/auth/auth_router' import { mobileAuthRouter } from './routers/auth/mobile/mobile_auth_router' @@ -157,6 +158,8 @@ const main = async (): Promise => { // as healthy. await appDataSource.initialize() + await redisClient.connect() + const { app, apollo, httpServer } = createApp() await apollo.start() diff --git a/packages/api/src/services/save_page.ts b/packages/api/src/services/save_page.ts index 442dcea93..a49f9ac86 100644 --- a/packages/api/src/services/save_page.ts +++ b/packages/api/src/services/save_page.ts @@ -13,6 +13,7 @@ import { SavePageInput, SaveResult, } from '../generated/graphql' +import { redisClient } from '../redis' import { authTrx } from '../repository' import { enqueueThumbnailTask } from '../utils/createTask' import { @@ -194,6 +195,14 @@ export const savePage = async ( } } + // save the url in redis for 8 hours so rss-feeder won't try to re-save it + const redisKey = `recent-saved-item:${user.id}:${itemToSave.originalUrl}` + const expireInSeconds = 60 * 60 * 8 + await redisClient.set(redisKey, 1, { + EX: expireInSeconds, + NX: true, + }) + return { clientRequestId, url: `${homePageURL()}/${user.profile.username}/${slug}`, diff --git a/packages/api/src/util.ts b/packages/api/src/util.ts index c056d17cd..f37b51587 100755 --- a/packages/api/src/util.ts +++ b/packages/api/src/util.ts @@ -109,6 +109,10 @@ interface BackendEnv { max: number } } + redis: { + url?: string + cert?: string + } } /*** @@ -173,6 +177,8 @@ const nullableEnvVars = [ 'INTEGRATION_EXPORTER_URL', 'INTEGRATION_IMPORTER_URL', 'SUBSCRIPTION_FEED_MAX', + 'REDIS_URL', + 'REDIS_CERT', ] // Allow some vars to be null/empty /* If not in GAE and Prod/QA/Demo env (f.e. on localhost/dev env), allow following env vars to be null */ @@ -316,6 +322,10 @@ export function getEnv(): BackendEnv { : 256, // default to 256 }, } + const redis = { + url: parse('REDIS_URL'), + cert: parse('REDIS_CERT'), + } return { pg, @@ -338,6 +348,7 @@ export function getEnv(): BackendEnv { gcp, pocket, subscription, + redis, } } diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index 2da93f3f7..4604690ef 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -224,7 +224,7 @@ const isItemRecentlySaved = async ( userId: string, url: string ) => { - const key = `rss-recent-save:${userId}:${url}` + const key = `recent-saved-item:${userId}:${url}` const result = await redisClient.get(key) return !!result } From bcd05cc7fc8e93bd9cc1e7ca9445b54bd983f1fa Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 4 Jan 2024 18:10:14 +0800 Subject: [PATCH 03/12] set the url in redis when an item is inserted into db --- packages/api/src/services/library_item.ts | 4 +++- packages/api/src/services/save_page.ts | 8 -------- packages/api/src/utils/helpers.ts | 14 ++++++++++++++ 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/packages/api/src/services/library_item.ts b/packages/api/src/services/library_item.ts index d6944af12..c68e6526d 100644 --- a/packages/api/src/services/library_item.ts +++ b/packages/api/src/services/library_item.ts @@ -15,7 +15,7 @@ import { valuesToRawSql, } from '../repository' import { libraryItemRepository } from '../repository/library_item' -import { wordsCount } from '../utils/helpers' +import { setRecentlySavedItemInRedis, wordsCount } from '../utils/helpers' import { parseSearchQuery } from '../utils/search' enum ReadFilter { @@ -818,6 +818,8 @@ export const createLibraryItem = async ( userId ) + await setRecentlySavedItemInRedis(userId, newLibraryItem.originalUrl) + if (skipPubSub) { return newLibraryItem } diff --git a/packages/api/src/services/save_page.ts b/packages/api/src/services/save_page.ts index a49f9ac86..0cded9a52 100644 --- a/packages/api/src/services/save_page.ts +++ b/packages/api/src/services/save_page.ts @@ -195,14 +195,6 @@ export const savePage = async ( } } - // save the url in redis for 8 hours so rss-feeder won't try to re-save it - const redisKey = `recent-saved-item:${user.id}:${itemToSave.originalUrl}` - const expireInSeconds = 60 * 60 * 8 - await redisClient.set(redisKey, 1, { - EX: expireInSeconds, - NX: true, - }) - return { clientRequestId, url: `${homePageURL()}/${user.profile.username}/${slug}`, diff --git a/packages/api/src/utils/helpers.ts b/packages/api/src/utils/helpers.ts index df6537996..48437b29e 100644 --- a/packages/api/src/utils/helpers.ts +++ b/packages/api/src/utils/helpers.ts @@ -25,6 +25,7 @@ import { SearchItem, } from '../generated/graphql' import { createPubSubClient } from '../pubsub' +import { redisClient } from '../redis' import { Claims, WithDataSourcesContext } from '../resolvers/types' import { validateUrl } from '../services/create_page_save_request' import { updateLibraryItem } from '../services/library_item' @@ -407,3 +408,16 @@ export const isRelativeUrl = (url: string): boolean => { export const getAbsoluteUrl = (url: string, baseUrl: string): string => { return new URL(url, baseUrl).href } + +export const setRecentlySavedItemInRedis = async ( + userId: string, + url: string +) => { + // save the url in redis for 8 hours so rss-feeder won't try to re-save it + const redisKey = `recent-saved-item:${userId}:${url}` + const ttlInSeconds = 60 * 60 * 8 + return redisClient.set(redisKey, 1, { + EX: ttlInSeconds, + NX: true, + }) +} From 8137a74f5139faf830f39bb0ba920f4fade6ee73 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 4 Jan 2024 18:14:50 +0800 Subject: [PATCH 04/12] add redis back to docker compose --- docker-compose.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index a66ee9f61..a42a6931c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -31,6 +31,14 @@ services: depends_on: postgres: condition: service_healthy + + redis: + image: "redis:6.0.9-alpine" + container_name: "omnivore-redis" + ports: + - "6379:6379" + expose: + - 6379 api: build: @@ -61,6 +69,8 @@ services: depends_on: migrate: condition: service_completed_successfully + redis: + condition: service_healthy web: build: From 0cf7054ce67c655229466cf579ece280283106e6 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 4 Jan 2024 18:17:16 +0800 Subject: [PATCH 05/12] add redis url to env var --- docker-compose.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index a42a6931c..75aff218e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -37,6 +37,10 @@ services: container_name: "omnivore-redis" ports: - "6379:6379" + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 15s + timeout: 90s expose: - 6379 @@ -66,6 +70,7 @@ services: - CLIENT_URL=http://localhost:3000 - GATEWAY_URL=http://localhost:8080/api - CONTENT_FETCH_URL=http://content-fetch:8080/?token=some_token + - REDIS_URL=redis://redis:6379 depends_on: migrate: condition: service_completed_successfully From 32a58e49f9d43fbaf042e2e7d1fe63ab30b1f47a Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 4 Jan 2024 21:55:41 +0800 Subject: [PATCH 06/12] make redis optional for backend --- docker-compose.yml | 12 ------------ packages/api/src/server.ts | 5 ++++- packages/api/src/services/library_item.ts | 6 +++++- 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 75aff218e..fb807fa66 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -31,18 +31,6 @@ services: depends_on: postgres: condition: service_healthy - - redis: - image: "redis:6.0.9-alpine" - container_name: "omnivore-redis" - ports: - - "6379:6379" - healthcheck: - test: ["CMD", "redis-cli", "ping"] - interval: 15s - timeout: 90s - expose: - - 6379 api: build: diff --git a/packages/api/src/server.ts b/packages/api/src/server.ts index 2380e3c37..0cf7cad88 100755 --- a/packages/api/src/server.ts +++ b/packages/api/src/server.ts @@ -158,7 +158,10 @@ const main = async (): Promise => { // as healthy. await appDataSource.initialize() - await redisClient.connect() + // redis is optional + if (env.redis.url) { + await redisClient.connect() + } const { app, apollo, httpServer } = createApp() diff --git a/packages/api/src/services/library_item.ts b/packages/api/src/services/library_item.ts index c68e6526d..8478f33f3 100644 --- a/packages/api/src/services/library_item.ts +++ b/packages/api/src/services/library_item.ts @@ -6,6 +6,7 @@ import { EntityLabel } from '../entity/entity_label' import { Highlight } from '../entity/highlight' import { Label } from '../entity/label' import { LibraryItem, LibraryItemState } from '../entity/library_item' +import { env } from '../env' import { BulkActionType, InputMaybe, SortParams } from '../generated/graphql' import { createPubSubClient, EntityType } from '../pubsub' import { @@ -818,7 +819,10 @@ export const createLibraryItem = async ( userId ) - await setRecentlySavedItemInRedis(userId, newLibraryItem.originalUrl) + // set recently saved item in redis if redis is enabled + if (env.redis.url) { + await setRecentlySavedItemInRedis(userId, newLibraryItem.originalUrl) + } if (skipPubSub) { return newLibraryItem From f4ff4b9fcda8891996798e5425390bebedb5ce43 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 4 Jan 2024 22:27:31 +0800 Subject: [PATCH 07/12] fix tests --- packages/api/test/global-setup.ts | 7 +++++++ packages/api/test/global-teardown.ts | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/packages/api/test/global-setup.ts b/packages/api/test/global-setup.ts index d4a56e120..b36cfa15b 100644 --- a/packages/api/test/global-setup.ts +++ b/packages/api/test/global-setup.ts @@ -1,3 +1,5 @@ +import { env } from '../src/env' +import { redisClient } from '../src/redis' import { createTestConnection } from './db' import { startApolloServer } from './util' @@ -5,6 +7,11 @@ export const mochaGlobalSetup = async () => { await createTestConnection() console.log('db connection created') + if (env.redis.url) { + await redisClient.connect() + console.log('redis connection created') + } + await startApolloServer() console.log('apollo server started') } diff --git a/packages/api/test/global-teardown.ts b/packages/api/test/global-teardown.ts index ec201d2db..2d11b2a62 100644 --- a/packages/api/test/global-teardown.ts +++ b/packages/api/test/global-teardown.ts @@ -1,4 +1,6 @@ import { appDataSource } from '../src/data_source' +import { env } from '../src/env' +import { redisClient } from '../src/redis' import { stopApolloServer } from './util' export const mochaGlobalTeardown = async () => { @@ -7,4 +9,9 @@ export const mochaGlobalTeardown = async () => { await appDataSource.destroy() console.log('db connection closed') + + if (env.redis.url) { + await redisClient.disconnect() + console.log('redis connection closed') + } } From 6d48816c22e7e164c66bc419fb1efbc166618baf Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 4 Jan 2024 22:42:31 +0800 Subject: [PATCH 08/12] fix tests --- packages/rss-handler/src/index.ts | 2 +- packages/rss-handler/test/index.test.ts | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index 4604690ef..248c6b4a9 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -43,7 +43,7 @@ type RssFeed = Parser.Output<{ type RssFeedItemMedia = { $: { url: string; width?: string; height?: string; medium?: string } } -type RssFeedItem = Item & { +export type RssFeedItem = Item & { 'media:thumbnail'?: RssFeedItemMedia 'media:content'?: RssFeedItemMedia[] link: string diff --git a/packages/rss-handler/test/index.test.ts b/packages/rss-handler/test/index.test.ts index 5abf0b77c..b43cf7973 100644 --- a/packages/rss-handler/test/index.test.ts +++ b/packages/rss-handler/test/index.test.ts @@ -1,13 +1,12 @@ import { expect } from 'chai' import 'mocha' -import { Item } from 'rss-parser' -import { isOldItem } from '../src' +import { isOldItem, RssFeedItem } from '../src' describe('isOldItem', () => { it('returns true if item is older than 1 day', () => { const item = { pubDate: '2020-01-01', - } as Item + } as RssFeedItem const lastFetchedAt = Date.now() expect(isOldItem(item, lastFetchedAt)).to.be.true @@ -17,7 +16,7 @@ describe('isOldItem', () => { const lastFetchedAt = Date.now() const item = { pubDate: new Date(lastFetchedAt).toISOString(), - } as Item + } as RssFeedItem expect(isOldItem(item, lastFetchedAt)).to.be.true }) From b54b696fab9bab0920d2407c80379cfba005c447 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 4 Jan 2024 23:06:37 +0800 Subject: [PATCH 09/12] skip parsing blocked feeds stored in redis --- packages/rss-handler/src/index.ts | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index 248c6b4a9..df8e53761 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -58,6 +58,30 @@ export const isOldItem = (item: RssFeedItem, lastFetchedAt: number) => { ) } +const feedFetchFailedRedisKey = (feedUrl: string) => + `feed-fetch-failure:${feedUrl}` + +const isFeedBlocked = async (feedUrl: string, redisClient: RedisClient) => { + const key = feedFetchFailedRedisKey(feedUrl) + const result = await redisClient.get(key) + // if the feed has failed to fetch more than certain times, block it + const maxFailures = parseInt(process.env.MAX_FEED_FETCH_FAILURES ?? '10') + if (result && parseInt(result) > maxFailures) { + return true + } + + return false +} + +const blockFeed = async (feedUrl: string, redisClient: RedisClient) => { + const key = feedFetchFailedRedisKey(feedUrl) + const result = await redisClient.incr(key) + // expire the key in 1 day + await redisClient.expire(key, 24 * 60 * 60, 'NX') + + return result +} + export const isContentFetchBlocked = (feedUrl: string) => { if (feedUrl.startsWith('https://arxiv.org/')) { return true @@ -590,10 +614,17 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( } = req.body console.log('Processing feed', feedUrl) + const isBlocked = await isFeedBlocked(feedUrl, redisClient) + if (isBlocked) { + console.log('feed is blocked: ', feedUrl) + return res.sendStatus(200) + } + const fetchResult = await fetchAndChecksum(feedUrl) const feed = await parseFeed(feedUrl, fetchResult.content) if (!feed) { console.error('Failed to parse RSS feed', feedUrl) + await blockFeed(feedUrl, redisClient) return res.status(500).send('INVALID_RSS_FEED') } From 75d9206262cecf34018969f71e2d70ee751cf55f Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Fri, 5 Jan 2024 11:01:15 +0800 Subject: [PATCH 10/12] remove redis from docker-compose --- docker-compose.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index fb807fa66..a66ee9f61 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -58,12 +58,9 @@ services: - CLIENT_URL=http://localhost:3000 - GATEWAY_URL=http://localhost:8080/api - CONTENT_FETCH_URL=http://content-fetch:8080/?token=some_token - - REDIS_URL=redis://redis:6379 depends_on: migrate: condition: service_completed_successfully - redis: - condition: service_healthy web: build: From ccd4f2d3f0208d344c6942d4358f01866fba0838 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Fri, 5 Jan 2024 15:20:42 +0800 Subject: [PATCH 11/12] catch redis client error and continue the process --- packages/api/src/server.ts | 5 +++++ packages/api/src/utils/helpers.ts | 15 +++++++++++---- packages/rss-handler/src/index.ts | 27 ++++++++++++++++++--------- 3 files changed, 34 insertions(+), 13 deletions(-) diff --git a/packages/api/src/server.ts b/packages/api/src/server.ts index 0cf7cad88..a6458730f 100755 --- a/packages/api/src/server.ts +++ b/packages/api/src/server.ts @@ -160,7 +160,12 @@ const main = async (): Promise => { // redis is optional if (env.redis.url) { + redisClient.on('error', (err) => { + console.error('Redis Client Error', err) + }) + await redisClient.connect() + console.log('Redis Client Connected:', env.redis.url) } const { app, apollo, httpServer } = createApp() diff --git a/packages/api/src/utils/helpers.ts b/packages/api/src/utils/helpers.ts index 48437b29e..cf179fd35 100644 --- a/packages/api/src/utils/helpers.ts +++ b/packages/api/src/utils/helpers.ts @@ -416,8 +416,15 @@ export const setRecentlySavedItemInRedis = async ( // save the url in redis for 8 hours so rss-feeder won't try to re-save it const redisKey = `recent-saved-item:${userId}:${url}` const ttlInSeconds = 60 * 60 * 8 - return redisClient.set(redisKey, 1, { - EX: ttlInSeconds, - NX: true, - }) + try { + return redisClient.set(redisKey, 1, { + EX: ttlInSeconds, + NX: true, + }) + } catch (error) { + logger.error('error setting recently saved item in redis', { + redisKey, + error, + }) + } } diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index df8e53761..1e36a7d02 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -63,11 +63,15 @@ const feedFetchFailedRedisKey = (feedUrl: string) => const isFeedBlocked = async (feedUrl: string, redisClient: RedisClient) => { const key = feedFetchFailedRedisKey(feedUrl) - const result = await redisClient.get(key) - // if the feed has failed to fetch more than certain times, block it - const maxFailures = parseInt(process.env.MAX_FEED_FETCH_FAILURES ?? '10') - if (result && parseInt(result) > maxFailures) { - return true + try { + const result = await redisClient.get(key) + // if the feed has failed to fetch more than certain times, block it + const maxFailures = parseInt(process.env.MAX_FEED_FETCH_FAILURES ?? '10') + if (result && parseInt(result) > maxFailures) { + return true + } + } catch (error) { + console.error('Failed to check feed block status', feedUrl, error) } return false @@ -75,11 +79,16 @@ const isFeedBlocked = async (feedUrl: string, redisClient: RedisClient) => { const blockFeed = async (feedUrl: string, redisClient: RedisClient) => { const key = feedFetchFailedRedisKey(feedUrl) - const result = await redisClient.incr(key) - // expire the key in 1 day - await redisClient.expire(key, 24 * 60 * 60, 'NX') + try { + const result = await redisClient.incr(key) + // expire the key in 1 day + await redisClient.expire(key, 24 * 60 * 60, 'NX') - return result + return result + } catch (error) { + console.error('Failed to block feed', feedUrl, error) + return null + } } export const isContentFetchBlocked = (feedUrl: string) => { From 44bed67e3554d9e700b93c0e405e6feb8ae51367 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Fri, 5 Jan 2024 15:41:01 +0800 Subject: [PATCH 12/12] add comment --- packages/rss-handler/src/index.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index 1e36a7d02..b3636e41c 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -68,6 +68,7 @@ const isFeedBlocked = async (feedUrl: string, redisClient: RedisClient) => { // if the feed has failed to fetch more than certain times, block it const maxFailures = parseInt(process.env.MAX_FEED_FETCH_FAILURES ?? '10') if (result && parseInt(result) > maxFailures) { + console.log('feed is blocked: ', feedUrl) return true } } catch (error) {