From cf101c6d18e39f252d7b2b0355803201ca0b96be Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 18 Oct 2023 16:26:03 +0800 Subject: [PATCH 01/13] Cache and check feed checksums to reduce fetching --- packages/api/src/entity/subscription.ts | 3 + packages/api/src/generated/graphql.ts | 1 + packages/api/src/generated/schema.graphql | 1 + .../api/src/resolvers/subscriptions/index.ts | 1 + packages/api/src/routers/svc/rss_feed.ts | 2 +- packages/api/src/schema.ts | 1 + packages/api/src/utils/createTask.ts | 1 + ...do.add_checksum_to_subscriptions_table.sql | 7 ++ ...do.add_checksum_to_subscriptions_table.sql | 9 ++ packages/rss-handler/src/index.ts | 84 +++++++++++++------ 10 files changed, 85 insertions(+), 25 deletions(-) create mode 100755 packages/db/migrations/0138.do.add_checksum_to_subscriptions_table.sql create mode 100755 packages/db/migrations/0138.undo.add_checksum_to_subscriptions_table.sql diff --git a/packages/api/src/entity/subscription.ts b/packages/api/src/entity/subscription.ts index 90f7d593b..b63400d56 100644 --- a/packages/api/src/entity/subscription.ts +++ b/packages/api/src/entity/subscription.ts @@ -59,6 +59,9 @@ export class Subscription { @Column('timestamp', { nullable: true }) lastFetchedAt?: Date | null + @Column('text', { nullable: true }) + lastFetchedChecksum?: string | null + @CreateDateColumn({ default: () => 'CURRENT_TIMESTAMP' }) createdAt!: Date diff --git a/packages/api/src/generated/graphql.ts b/packages/api/src/generated/graphql.ts index f0f41ff68..8ceb89481 100644 --- a/packages/api/src/generated/graphql.ts +++ b/packages/api/src/generated/graphql.ts @@ -2978,6 +2978,7 @@ export type UpdateSubscriptionInput = { description?: InputMaybe; id: Scalars['ID']; lastFetchedAt?: InputMaybe; + lastfetchedChecksum?: InputMaybe; name?: InputMaybe; status?: InputMaybe; }; diff --git a/packages/api/src/generated/schema.graphql b/packages/api/src/generated/schema.graphql index 3ef33f886..39a6c00e7 100644 --- a/packages/api/src/generated/schema.graphql +++ b/packages/api/src/generated/schema.graphql @@ -2391,6 +2391,7 @@ input UpdateSubscriptionInput { description: String id: ID! lastFetchedAt: Date + lastfetchedChecksum: String name: String status: SubscriptionStatus } diff --git a/packages/api/src/resolvers/subscriptions/index.ts b/packages/api/src/resolvers/subscriptions/index.ts index 74375441d..a1069423b 100644 --- a/packages/api/src/resolvers/subscriptions/index.ts +++ b/packages/api/src/resolvers/subscriptions/index.ts @@ -290,6 +290,7 @@ export const updateSubscriptionResolver = authorized< lastFetchedAt: input.lastFetchedAt ? new Date(input.lastFetchedAt) : undefined, + lastFetchedChecksum: input.lastfetchedChecksum, status: input.status || undefined, }) diff --git a/packages/api/src/routers/svc/rss_feed.ts b/packages/api/src/routers/svc/rss_feed.ts index 0834e5025..c06ec9f71 100644 --- a/packages/api/src/routers/svc/rss_feed.ts +++ b/packages/api/src/routers/svc/rss_feed.ts @@ -24,7 +24,7 @@ export function rssFeedRouter() { // get all active rss feed subscriptions const subscriptions = await getRepository(Subscription).find({ - select: ['id', 'url', 'user', 'lastFetchedAt'], + select: ['id', 'url', 'user', 'lastFetchedAt', 'lastFetchedChecksum'], where: { type: SubscriptionType.Rss, status: SubscriptionStatus.Active, diff --git a/packages/api/src/schema.ts b/packages/api/src/schema.ts index e3b2b3442..6d78471cb 100755 --- a/packages/api/src/schema.ts +++ b/packages/api/src/schema.ts @@ -2548,6 +2548,7 @@ const schema = gql` name: String description: String lastFetchedAt: Date + lastfetchedChecksum: String status: SubscriptionStatus } diff --git a/packages/api/src/utils/createTask.ts b/packages/api/src/utils/createTask.ts index bb3457e00..483921bc9 100644 --- a/packages/api/src/utils/createTask.ts +++ b/packages/api/src/utils/createTask.ts @@ -601,6 +601,7 @@ export const enqueueRssFeedFetch = async ( subscriptionId: rssFeedSubscription.id, feedUrl: rssFeedSubscription.url, lastFetchedAt: rssFeedSubscription.lastFetchedAt?.getTime() || 0, // unix timestamp in milliseconds + lastFetchedChecksum: rssFeedSubscription.lastFetchedChecksum || null, } const headers = { diff --git a/packages/db/migrations/0138.do.add_checksum_to_subscriptions_table.sql b/packages/db/migrations/0138.do.add_checksum_to_subscriptions_table.sql new file mode 100755 index 000000000..5f704a666 --- /dev/null +++ b/packages/db/migrations/0138.do.add_checksum_to_subscriptions_table.sql @@ -0,0 +1,7 @@ +-- Type: DO +-- Name: add_checksum_to_subscriptions_table +-- Description: Add a last fetched checksum field to the subscriptions table + +BEGIN; + +COMMIT; diff --git a/packages/db/migrations/0138.undo.add_checksum_to_subscriptions_table.sql b/packages/db/migrations/0138.undo.add_checksum_to_subscriptions_table.sql new file mode 100755 index 000000000..e138d501a --- /dev/null +++ b/packages/db/migrations/0138.undo.add_checksum_to_subscriptions_table.sql @@ -0,0 +1,9 @@ +-- Type: UNDO +-- Name: add_checksum_to_subscriptions_table +-- Description: Add a last fetched checksum field to the subscriptions table + +BEGIN; + +ALTER TABLE omnivore.subscriptions DROP COLUMN last_fetched_checksum ; + +COMMIT; diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index 60e789e73..4edd58709 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -1,5 +1,6 @@ import * as Sentry from '@sentry/serverless' import axios from 'axios' +import crypto from 'crypto' import * as dotenv from 'dotenv' // see https://github.com/motdotla/dotenv#how-do-i-use-dotenv-with-import import * as jwt from 'jsonwebtoken' import Parser, { Item } from 'rss-parser' @@ -10,6 +11,7 @@ interface RssFeedRequest { subscriptionId: string feedUrl: string lastFetchedAt: number // unix timestamp in milliseconds + lastFetchedChecksum: string | undefined } // link can be a string or an object @@ -21,10 +23,42 @@ function isRssFeedRequest(body: any): body is RssFeedRequest { ) } +type FeedFetchResult = { + url: string + content: string + checksum: string +} + +async function fetchAndChecksum(url: string): Promise { + try { + // Fetch the content from the URL + const response = await axios.get(url, { + responseType: 'arraybuffer', + headers: { + 'User-Agent': + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', + Accept: + 'application/rss+xml, application/rdf+xml;q=0.8, application/atom+xml;q=0.6, application/xml;q=0.4, text/xml;q=0.4', + }, + }) + + // Create a sha256 hash of the content + const hash = crypto.createHash('sha256') + hash.update(response.data) + + return { url, content: response.data, checksum: hash.digest('hex') } + } catch (error) { + throw new Error( + `Failed to fetch or hash content from ${url}. Error: ${error}` + ) + } +} + const sendUpdateSubscriptionMutation = async ( userId: string, subscriptionId: string, - lastFetchedAt: Date + lastFetchedAt: Date, + lastFetchedChecksum: string ) => { const JWT_SECRET = process.env.JWT_SECRET const REST_BACKEND_ENDPOINT = process.env.REST_BACKEND_ENDPOINT @@ -51,6 +85,7 @@ const sendUpdateSubscriptionMutation = async ( input: { id: subscriptionId, lastFetchedAt, + lastFetchedChecksum, }, }, }) @@ -121,15 +156,12 @@ const parser = new Parser({ timeout: 60000, // 60 seconds maxRedirects: 10, customFields: { - item: [['link', 'links', { keepArray: true }], 'published', 'updated'], - feed: ['dc:date', 'lastBuildDate', 'pubDate'], - }, - headers: { - // some rss feeds require user agent - 'User-Agent': - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', - Accept: - 'application/rss+xml, application/rdf+xml;q=0.8, application/atom+xml;q=0.6, application/xml;q=0.4, text/xml;q=0.4', + item: [ + ['link', 'links', { keepArray: true }], + 'published', + 'updated', + 'created', + ], }, }) @@ -190,31 +222,34 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( return res.status(400).send('INVALID_REQUEST_BODY') } - const { feedUrl, subscriptionId, lastFetchedAt } = req.body + const { feedUrl, subscriptionId, lastFetchedAt, lastFetchedChecksum } = + req.body console.log('Processing feed', feedUrl, lastFetchedAt) let lastItemFetchedAt: Date | null = null let lastValidItem: Item | null = null + let updatedLastFetchedChecksum: string | null + + let fetchResult = await fetchAndChecksum(feedUrl) + if (fetchResult.checksum === lastFetchedChecksum) { + console.log('feed has not been updated', feedUrl, lastFetchedChecksum) + return res.status(200) + } + updatedLastFetchedChecksum = fetchResult.checksum // fetch feed let itemCount = 0 - const feed = await parser.parseURL(feedUrl) - console.log('Fetched feed', feed, new Date()) - - const feedPubDate = (feed['dc:date'] || - feed.pubDate || - feed.lastBuildDate) as string | undefined - console.log('Feed pub date', feedPubDate) - if (feedPubDate && new Date(feedPubDate) < new Date(lastFetchedAt)) { - console.log('Skipping old feed', feedPubDate) - return res.send('ok') - } + const feed = await parser.parseString(fetchResult.content) + console.log('Fetched feed', feed.title, new Date()) // save each item in the feed for (const item of feed.items) { // use published or updated if isoDate is not available for atom feeds item.isoDate = - item.isoDate || (item.published as string) || (item.updated as string) + item.isoDate || + (item.published as string) || + (item.updated as string) || + (item.created as string) console.log('Processing feed item', item.links, item.isoDate) if (!item.links || item.links.length === 0) { @@ -299,7 +334,8 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( const updatedSubscription = await sendUpdateSubscriptionMutation( userId, subscriptionId, - lastItemFetchedAt + lastItemFetchedAt, + updatedLastFetchedChecksum ) console.log('Updated subscription', updatedSubscription) From f14fc034fe95ed20631c98b8e2c56691d759d8b7 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 18 Oct 2023 16:28:22 +0800 Subject: [PATCH 02/13] Add last fetched column to the database --- .../migrations/0138.do.add_checksum_to_subscriptions_table.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/db/migrations/0138.do.add_checksum_to_subscriptions_table.sql b/packages/db/migrations/0138.do.add_checksum_to_subscriptions_table.sql index 5f704a666..dfc37edfe 100755 --- a/packages/db/migrations/0138.do.add_checksum_to_subscriptions_table.sql +++ b/packages/db/migrations/0138.do.add_checksum_to_subscriptions_table.sql @@ -4,4 +4,6 @@ BEGIN; +ALTER TABLE omnivore.subscriptions ADD COLUMN last_fetched_checksum TEXT ; + COMMIT; From 78484136454b64f3f3ace2b21e48e26532920e78 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 18 Oct 2023 16:34:20 +0800 Subject: [PATCH 03/13] Update schema --- packages/api/src/schema.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/api/src/schema.ts b/packages/api/src/schema.ts index 6d78471cb..2cc4e8b58 100755 --- a/packages/api/src/schema.ts +++ b/packages/api/src/schema.ts @@ -2548,7 +2548,7 @@ const schema = gql` name: String description: String lastFetchedAt: Date - lastfetchedChecksum: String + lastFetchedChecksum: String status: SubscriptionStatus } From d592dc44c6a6b931e5757dfddde1ae998aa55b83 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 18 Oct 2023 16:34:40 +0800 Subject: [PATCH 04/13] Add back missing pubdate check --- packages/rss-handler/src/index.ts | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index 4edd58709..048592c27 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -242,6 +242,15 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( const feed = await parser.parseString(fetchResult.content) console.log('Fetched feed', feed.title, new Date()) + const feedPubDate = (feed['dc:date'] || + feed.pubDate || + feed.lastBuildDate) as string | undefined + console.log('Feed pub date', feedPubDate) + if (feedPubDate && new Date(feedPubDate) < new Date(lastFetchedAt)) { + console.log('Skipping old feed', feedPubDate) + return res.send('ok') + } + // save each item in the feed for (const item of feed.items) { // use published or updated if isoDate is not available for atom feeds From 921a46a13ae7828823f7513093dfae1d025681f2 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 18 Oct 2023 16:51:23 +0800 Subject: [PATCH 05/13] Linting fixes --- packages/rss-handler/src/index.ts | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index 048592c27..5d1a3c654 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -31,7 +31,6 @@ type FeedFetchResult = { async function fetchAndChecksum(url: string): Promise { try { - // Fetch the content from the URL const response = await axios.get(url, { responseType: 'arraybuffer', headers: { @@ -42,15 +41,15 @@ async function fetchAndChecksum(url: string): Promise { }, }) - // Create a sha256 hash of the content const hash = crypto.createHash('sha256') - hash.update(response.data) + hash.update(response.data as Buffer) - return { url, content: response.data, checksum: hash.digest('hex') } + const dataStr = (response.data as Buffer).toString() + + return { url, content: dataStr, checksum: hash.digest('hex') } } catch (error) { - throw new Error( - `Failed to fetch or hash content from ${url}. Error: ${error}` - ) + console.log(error) + throw new Error(`Failed to fetch or hash content from ${url}.`) } } @@ -228,14 +227,13 @@ export const rssHandler = Sentry.GCPFunction.wrapHttpFunction( let lastItemFetchedAt: Date | null = null let lastValidItem: Item | null = null - let updatedLastFetchedChecksum: string | null - let fetchResult = await fetchAndChecksum(feedUrl) + const fetchResult = await fetchAndChecksum(feedUrl) if (fetchResult.checksum === lastFetchedChecksum) { console.log('feed has not been updated', feedUrl, lastFetchedChecksum) return res.status(200) } - updatedLastFetchedChecksum = fetchResult.checksum + const updatedLastFetchedChecksum = fetchResult.checksum // fetch feed let itemCount = 0 From 479fc8fb7d96bdec37505dcea683593e6f051de2 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 18 Oct 2023 17:11:02 +0800 Subject: [PATCH 06/13] Add a test for checksumming --- packages/rss-handler/package.json | 3 ++- packages/rss-handler/src/index.ts | 8 +------- packages/rss-handler/test/checksum.test.ts | 14 ++++++++++++++ packages/rss-handler/test/stub.test.ts | 8 -------- 4 files changed, 17 insertions(+), 16 deletions(-) create mode 100644 packages/rss-handler/test/checksum.test.ts delete mode 100644 packages/rss-handler/test/stub.test.ts diff --git a/packages/rss-handler/package.json b/packages/rss-handler/package.json index 2a6d40851..0db5d93fd 100644 --- a/packages/rss-handler/package.json +++ b/packages/rss-handler/package.json @@ -17,7 +17,8 @@ "devDependencies": { "chai": "^4.3.6", "eslint-plugin-prettier": "^4.0.0", - "mocha": "^10.0.0" + "mocha": "^10.0.0", + "nock": "^13.3.4" }, "dependencies": { "@google-cloud/functions-framework": "3.1.2", diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index 5d1a3c654..2b1bdf1fc 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -23,13 +23,7 @@ function isRssFeedRequest(body: any): body is RssFeedRequest { ) } -type FeedFetchResult = { - url: string - content: string - checksum: string -} - -async function fetchAndChecksum(url: string): Promise { +export const fetchAndChecksum = async (url: string) => { try { const response = await axios.get(url, { responseType: 'arraybuffer', diff --git a/packages/rss-handler/test/checksum.test.ts b/packages/rss-handler/test/checksum.test.ts new file mode 100644 index 000000000..24eee7a0b --- /dev/null +++ b/packages/rss-handler/test/checksum.test.ts @@ -0,0 +1,14 @@ +import 'mocha' +import nock from 'nock' +import { expect } from 'chai' +import { fetchAndChecksum } from '../src/index' + +describe('fetchAndChecksum', () => { + it('should hash the content available', async () => { + nock('https://fake.com', {}).get('/rss.xml').reply(200, 'i am some content') + const result = await fetchAndChecksum('https://fake.com/rss.xml') + expect(result.checksum).to.eq( + 'd6bc10faec048d999d0cf4b2f7103d84557fb9cd94c3bccd17884b1288949375' + ) + }) +}) diff --git a/packages/rss-handler/test/stub.test.ts b/packages/rss-handler/test/stub.test.ts deleted file mode 100644 index 24ad25c8f..000000000 --- a/packages/rss-handler/test/stub.test.ts +++ /dev/null @@ -1,8 +0,0 @@ -import 'mocha' -import { expect } from 'chai' - -describe('stub test', () => { - it('should pass', () => { - expect(true).to.be.true - }) -}) From d2c0efc540142d46c4df639b946bddfd03331d61 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 18 Oct 2023 18:22:22 +0800 Subject: [PATCH 07/13] add some debug --- packages/rss-handler/src/index.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index 2b1bdf1fc..a61bdfbec 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -83,6 +83,8 @@ const sendUpdateSubscriptionMutation = async ( }, }) + console.log('sending', data) + const auth = (await signToken({ uid: userId }, JWT_SECRET)) as string try { const response = await axios.post( From fbaaaeca7c67714d9ce7189994ad8fd6d7ae8e7b Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 18 Oct 2023 18:48:18 +0800 Subject: [PATCH 08/13] Add some debugging --- packages/api/src/resolvers/subscriptions/index.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/api/src/resolvers/subscriptions/index.ts b/packages/api/src/resolvers/subscriptions/index.ts index a1069423b..8cc6c4d01 100644 --- a/packages/api/src/resolvers/subscriptions/index.ts +++ b/packages/api/src/resolvers/subscriptions/index.ts @@ -290,7 +290,7 @@ export const updateSubscriptionResolver = authorized< lastFetchedAt: input.lastFetchedAt ? new Date(input.lastFetchedAt) : undefined, - lastFetchedChecksum: input.lastfetchedChecksum, + lastFetchedChecksum: input.lastfetchedChecksum || undefined, status: input.status || undefined, }) @@ -300,6 +300,8 @@ export const updateSubscriptionResolver = authorized< }) }) + console.log('updatedSubscription', updatedSubscription) + return { subscription: updatedSubscription, } From 5c576347a8878701c7debe114419ca6173e2f724 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 18 Oct 2023 19:26:45 +0800 Subject: [PATCH 09/13] MOre debug --- packages/api/src/resolvers/subscriptions/index.ts | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/api/src/resolvers/subscriptions/index.ts b/packages/api/src/resolvers/subscriptions/index.ts index 8cc6c4d01..6b415e4d4 100644 --- a/packages/api/src/resolvers/subscriptions/index.ts +++ b/packages/api/src/resolvers/subscriptions/index.ts @@ -282,8 +282,7 @@ export const updateSubscriptionResolver = authorized< const updatedSubscription = await authTrx(async (t) => { const repo = t.getRepository(Subscription) - // update subscription - await t.getRepository(Subscription).save({ + const dict = { id: input.id, name: input.name || undefined, description: input.description || undefined, @@ -292,7 +291,10 @@ export const updateSubscriptionResolver = authorized< : undefined, lastFetchedChecksum: input.lastfetchedChecksum || undefined, status: input.status || undefined, - }) + } + console.log('saving dict:', JSON.stringify(dict)) + // update subscription + await t.getRepository(Subscription).save(dict) return repo.findOneByOrFail({ id: input.id, From 78a642db6d0142628e387376b2ed08ce17d32ee1 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 18 Oct 2023 19:53:26 +0800 Subject: [PATCH 10/13] Update type signature --- packages/api/src/generated/graphql.ts | 2 +- packages/api/src/generated/schema.graphql | 2 +- packages/api/src/resolvers/subscriptions/index.ts | 10 ++++------ 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/packages/api/src/generated/graphql.ts b/packages/api/src/generated/graphql.ts index 8ceb89481..0ad412b4f 100644 --- a/packages/api/src/generated/graphql.ts +++ b/packages/api/src/generated/graphql.ts @@ -2978,7 +2978,7 @@ export type UpdateSubscriptionInput = { description?: InputMaybe; id: Scalars['ID']; lastFetchedAt?: InputMaybe; - lastfetchedChecksum?: InputMaybe; + lastFetchedChecksum?: InputMaybe; name?: InputMaybe; status?: InputMaybe; }; diff --git a/packages/api/src/generated/schema.graphql b/packages/api/src/generated/schema.graphql index 39a6c00e7..af9064bee 100644 --- a/packages/api/src/generated/schema.graphql +++ b/packages/api/src/generated/schema.graphql @@ -2391,7 +2391,7 @@ input UpdateSubscriptionInput { description: String id: ID! lastFetchedAt: Date - lastfetchedChecksum: String + lastFetchedChecksum: String name: String status: SubscriptionStatus } diff --git a/packages/api/src/resolvers/subscriptions/index.ts b/packages/api/src/resolvers/subscriptions/index.ts index 6b415e4d4..50fdcb839 100644 --- a/packages/api/src/resolvers/subscriptions/index.ts +++ b/packages/api/src/resolvers/subscriptions/index.ts @@ -282,19 +282,17 @@ export const updateSubscriptionResolver = authorized< const updatedSubscription = await authTrx(async (t) => { const repo = t.getRepository(Subscription) - const dict = { + // update subscription + await t.getRepository(Subscription).save({ id: input.id, name: input.name || undefined, description: input.description || undefined, lastFetchedAt: input.lastFetchedAt ? new Date(input.lastFetchedAt) : undefined, - lastFetchedChecksum: input.lastfetchedChecksum || undefined, + lastFetchedChecksum: input.lastFetchedChecksum || undefined, status: input.status || undefined, - } - console.log('saving dict:', JSON.stringify(dict)) - // update subscription - await t.getRepository(Subscription).save(dict) + }) return repo.findOneByOrFail({ id: input.id, From 3a4547a6afbf391e6ae00dca92271623819a9d4a Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 18 Oct 2023 21:01:01 +0800 Subject: [PATCH 11/13] Remove debug --- packages/rss-handler/src/index.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index a61bdfbec..2b1bdf1fc 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -83,8 +83,6 @@ const sendUpdateSubscriptionMutation = async ( }, }) - console.log('sending', data) - const auth = (await signToken({ uid: userId }, JWT_SECRET)) as string try { const response = await axios.post( From c93e61ec247745ac44fb0356e2db878de3afcada Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Wed, 18 Oct 2023 21:30:23 +0800 Subject: [PATCH 12/13] Remove debug --- packages/api/src/resolvers/subscriptions/index.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/api/src/resolvers/subscriptions/index.ts b/packages/api/src/resolvers/subscriptions/index.ts index 50fdcb839..430347654 100644 --- a/packages/api/src/resolvers/subscriptions/index.ts +++ b/packages/api/src/resolvers/subscriptions/index.ts @@ -300,8 +300,6 @@ export const updateSubscriptionResolver = authorized< }) }) - console.log('updatedSubscription', updatedSubscription) - return { subscription: updatedSubscription, } From 699e8077dcfd738f0fa5c252b1165c68eb259053 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Thu, 19 Oct 2023 09:57:28 +0800 Subject: [PATCH 13/13] Add feed custom fields, set timeout/redirects --- packages/rss-handler/src/index.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/rss-handler/src/index.ts b/packages/rss-handler/src/index.ts index 2b1bdf1fc..ca6afb04b 100644 --- a/packages/rss-handler/src/index.ts +++ b/packages/rss-handler/src/index.ts @@ -27,6 +27,8 @@ export const fetchAndChecksum = async (url: string) => { try { const response = await axios.get(url, { responseType: 'arraybuffer', + timeout: 60_000, + maxRedirects: 10, headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36', @@ -146,8 +148,6 @@ Sentry.GCPFunction.init({ const signToken = promisify(jwt.sign) const parser = new Parser({ - timeout: 60000, // 60 seconds - maxRedirects: 10, customFields: { item: [ ['link', 'links', { keepArray: true }], @@ -155,6 +155,7 @@ const parser = new Parser({ 'updated', 'created', ], + feed: ['dc:date', 'lastBuildDate', 'pubDate'], }, })