From f4b6e470f2e42364a3b4714cd102f3dfee1427ce Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Fri, 23 Feb 2024 14:19:22 +0800 Subject: [PATCH] replace fetch_content with fetch_content_type as a enum: ALWAYS, NEVER, WHEN_EMPTY --- packages/api/src/entity/subscription.ts | 12 ++ packages/api/src/generated/graphql.ts | 11 ++ packages/api/src/generated/schema.graphql | 9 ++ packages/api/src/jobs/rss/refreshAllFeeds.ts | 4 +- packages/api/src/jobs/rss/refreshFeed.ts | 109 +++++++++++------- .../api/src/resolvers/subscriptions/index.ts | 13 ++- packages/api/src/schema.ts | 9 ++ packages/api/src/services/save_page.ts | 16 ++- .../api/src/services/update_subscription.ts | 12 +- packages/api/src/utils/createTask.ts | 5 +- ...ename_preview_content_in_library_item.sql} | 4 + ...ename_preview_content_in_library_item.sql} | 4 + 12 files changed, 152 insertions(+), 56 deletions(-) rename packages/db/migrations/{0162.do.rename_preview_content_in_library_item.sql => 0165.do.rename_preview_content_in_library_item.sql} (54%) rename packages/db/migrations/{0162.undo.rename_preview_content_in_library_item.sql => 0165.undo.rename_preview_content_in_library_item.sql} (71%) diff --git a/packages/api/src/entity/subscription.ts b/packages/api/src/entity/subscription.ts index fd17d8d73..bb9802f7b 100644 --- a/packages/api/src/entity/subscription.ts +++ b/packages/api/src/entity/subscription.ts @@ -23,6 +23,12 @@ export enum SubscriptionType { Rss = 'RSS', } +export enum FetchContentType { + Always = 'ALWAYS', + Never = 'NEVER', + WhenEmpty = 'WHEN_EMPTY', +} + @Entity({ name: 'subscriptions' }) export class Subscription { @PrimaryGeneratedColumn('uuid') @@ -98,6 +104,12 @@ export class Subscription { @Column('boolean') fetchContent!: boolean + @Column('enum', { + enum: FetchContentType, + default: FetchContentType.Always, + }) + fetchContentType!: FetchContentType + @Column('text') folder?: string | null } diff --git a/packages/api/src/generated/graphql.ts b/packages/api/src/generated/graphql.ts index e50946158..de12eefd7 100644 --- a/packages/api/src/generated/graphql.ts +++ b/packages/api/src/generated/graphql.ts @@ -854,6 +854,12 @@ export type FetchContentSuccess = { success: Scalars['Boolean']; }; +export enum FetchContentType { + Always = 'ALWAYS', + Never = 'NEVER', + WhenEmpty = 'WHEN_EMPTY' +} + export type Filter = { __typename?: 'Filter'; category?: Maybe; @@ -2818,6 +2824,7 @@ export enum SubscribeErrorCode { export type SubscribeInput = { autoAddToLibrary?: InputMaybe; fetchContent?: InputMaybe; + fetchContentType?: InputMaybe; folder?: InputMaybe; isPrivate?: InputMaybe; subscriptionType?: InputMaybe; @@ -2839,6 +2846,7 @@ export type Subscription = { description?: Maybe; failedAt?: Maybe; fetchContent: Scalars['Boolean']; + fetchContentType: FetchContentType; folder: Scalars['String']; icon?: Maybe; id: Scalars['ID']; @@ -3214,6 +3222,7 @@ export type UpdateSubscriptionInput = { description?: InputMaybe; failedAt?: InputMaybe; fetchContent?: InputMaybe; + fetchContentType?: InputMaybe; folder?: InputMaybe; id: Scalars['ID']; isPrivate?: InputMaybe; @@ -3711,6 +3720,7 @@ export type ResolversTypes = { FetchContentErrorCode: FetchContentErrorCode; FetchContentResult: ResolversTypes['FetchContentError'] | ResolversTypes['FetchContentSuccess']; FetchContentSuccess: ResolverTypeWrapper; + FetchContentType: FetchContentType; Filter: ResolverTypeWrapper; FiltersError: ResolverTypeWrapper; FiltersErrorCode: FiltersErrorCode; @@ -6198,6 +6208,7 @@ export type SubscriptionResolvers, "description", ParentType, ContextType>; failedAt?: SubscriptionResolver, "failedAt", ParentType, ContextType>; fetchContent?: SubscriptionResolver; + fetchContentType?: SubscriptionResolver; folder?: SubscriptionResolver; icon?: SubscriptionResolver, "icon", ParentType, ContextType>; id?: SubscriptionResolver; diff --git a/packages/api/src/generated/schema.graphql b/packages/api/src/generated/schema.graphql index 633b66562..e7f5e748a 100644 --- a/packages/api/src/generated/schema.graphql +++ b/packages/api/src/generated/schema.graphql @@ -758,6 +758,12 @@ type FetchContentSuccess { success: Boolean! } +enum FetchContentType { + ALWAYS + NEVER + WHEN_EMPTY +} + type Filter { category: String createdAt: Date! @@ -2216,6 +2222,7 @@ enum SubscribeErrorCode { input SubscribeInput { autoAddToLibrary: Boolean fetchContent: Boolean + fetchContentType: FetchContentType folder: String isPrivate: Boolean subscriptionType: SubscriptionType @@ -2235,6 +2242,7 @@ type Subscription { description: String failedAt: Date fetchContent: Boolean! + fetchContentType: FetchContentType! folder: String! icon: String id: ID! @@ -2581,6 +2589,7 @@ input UpdateSubscriptionInput { description: String failedAt: Date fetchContent: Boolean + fetchContentType: FetchContentType folder: String id: ID! isPrivate: Boolean diff --git a/packages/api/src/jobs/rss/refreshAllFeeds.ts b/packages/api/src/jobs/rss/refreshAllFeeds.ts index 26f0882e4..b4b020c96 100644 --- a/packages/api/src/jobs/rss/refreshAllFeeds.ts +++ b/packages/api/src/jobs/rss/refreshAllFeeds.ts @@ -31,7 +31,7 @@ export const refreshAllFeeds = async (db: DataSource): Promise => { ARRAY_AGG(s.most_recent_item_date) AS "mostRecentItemDates", ARRAY_AGG(coalesce(s.scheduled_at, NOW())) AS "scheduledDates", ARRAY_AGG(s.last_fetched_checksum) AS checksums, - ARRAY_AGG(s.fetch_content) AS "fetchContents", + ARRAY_AGG(s.fetch_content_type) AS "fetchContentTypes", ARRAY_AGG(coalesce(s.folder, $3)) AS folders FROM omnivore.subscriptions s @@ -106,7 +106,7 @@ const updateSubscriptionGroup = async ( timestamp.getTime() ), // unix timestamp in milliseconds userIds: group.userIds, - fetchContents: group.fetchContents, + fetchContents: group.fetchContentTypes, folders: group.folders, } diff --git a/packages/api/src/jobs/rss/refreshFeed.ts b/packages/api/src/jobs/rss/refreshFeed.ts index 8cf66ad28..fc7d2f203 100644 --- a/packages/api/src/jobs/rss/refreshFeed.ts +++ b/packages/api/src/jobs/rss/refreshFeed.ts @@ -2,14 +2,20 @@ import axios from 'axios' import crypto from 'crypto' import { parseHTML } from 'linkedom' import Parser, { Item } from 'rss-parser' +import { FetchContentType } from '../../entity/subscription' import { env } from '../../env' +import { ArticleSavingRequestStatus } from '../../generated/graphql' import { redisDataSource } from '../../redis_data_source' import { validateUrl } from '../../services/create_page_save_request' +import { savePage } from '../../services/save_page' import { updateSubscription, updateSubscriptions, } from '../../services/update_subscription' +import { findActiveUser } from '../../services/user' import createHttpTaskWithToken from '../../utils/createTask' +import { cleanUrl } from '../../utils/helpers' +import { createThumbnailUrl } from '../../utils/imageproxy' import { logger } from '../../utils/logger' import { RSSRefreshContext } from './refreshAllFeeds' @@ -22,7 +28,7 @@ interface RefreshFeedRequest { scheduledTimestamps: number[] // unix timestamp in milliseconds lastFetchedChecksums: string[] userIds: string[] - fetchContents: boolean[] + fetchContentTypes: FetchContentType[] folders: FolderType[] refreshContext?: RSSRefreshContext } @@ -35,7 +41,7 @@ export const isRefreshFeedRequest = (data: any): data is RefreshFeedRequest => { 'scheduledTimestamps' in data && 'userIds' in data && 'lastFetchedChecksums' in data && - 'fetchContents' in data && + 'fetchContentTypes' in data && 'folders' in data ) } @@ -263,7 +269,7 @@ const createTask = async ( userId: string, feedUrl: string, item: RssFeedItem, - fetchContent: boolean, + fetchContentType: FetchContentType, folder: FolderType ) => { const isRecentlySaved = await isItemRecentlySaved(userId, item.link) @@ -272,8 +278,12 @@ const createTask = async ( return true } - if (folder === 'following' && !fetchContent) { - return createItemWithFeedContent(userId, feedUrl, item) + const feedContent = item.content || item.contentSnippet || item.summary + if ( + fetchContentType === FetchContentType.Never || + (fetchContentType === FetchContentType.WhenEmpty && !feedContent) + ) { + return createItemWithFeedContent(userId, feedUrl, item, folder, feedContent) } logger.info(`adding fetch content task ${userId} ${item.link.trim()}`) @@ -312,41 +322,57 @@ const fetchContentAndCreateItem = async ( const createItemWithFeedContent = async ( userId: string, feedUrl: string, - item: RssFeedItem + item: RssFeedItem, + folder: FolderType, + feedContent?: string ) => { - const input = { - userIds: [userId], - url: item.link, - title: item.title, - author: item.creator, - description: item.summary, - addedToFollowingFrom: 'feed', - feedContent: item.content || item.contentSnippet || item.summary, - addedToFollowingBy: feedUrl, - savedAt: item.isoDate, - publishedAt: item.isoDate, - previewContentType: 'text/html', // TODO: get content type from feed - thumbnail: getThumbnail(item), - } - try { - const serviceBaseUrl = process.env.INTERNAL_API_URL - const token = process.env.PUBSUB_VERIFICATION_TOKEN - if (!serviceBaseUrl || !token) { - throw 'Environment not configured correctly' + logger.info('saving feed item with feed content', { + userId, + feedUrl, + item, + folder, + }) + + const thumbnail = getThumbnail(item) + const previewImage = thumbnail && createThumbnailUrl(thumbnail) + const url = cleanUrl(item.link) + + const user = await findActiveUser(userId) + if (!user) { + logger.error('User not found', { userId }) + return false } - // save page - const taskHandlerUrl = `${serviceBaseUrl}/svc/following/save?token=${token}` - const task = await createHttpTaskWithToken({ - queue: env.queue.name, - priority: 'low', - taskHandlerUrl: taskHandlerUrl, - payload: input, - }) - return !!task + const result = await savePage( + { + url, + feedContent, + title: item.title, + folder, + rssFeedUrl: feedUrl, + savedAt: item.isoDate, + publishedAt: item.isoDate, + originalContent: feedContent || '', + source: 'rss-feeder', + state: ArticleSavingRequestStatus.ContentNotFetched, + clientRequestId: '', + author: item.creator, + previewImage, + }, + user + ) + + if (result.__typename === 'SaveError') { + logger.error( + `Error while saving feed item with feed content: ${result.errorCodes[0]}` + ) + return false + } + + return true } catch (error) { - logger.error('Error while creating task', error) + logger.error('Error while saving feed item with feed content', error) return false } } @@ -456,7 +482,7 @@ const processSubscription = async ( mostRecentItemDate: number, scheduledAt: number, lastFetchedChecksum: string, - fetchContent: boolean, + fetchContentType: FetchContentType, folder: FolderType, feed: RssFeed ) => { @@ -547,7 +573,7 @@ const processSubscription = async ( userId, feedUrl, feedItem, - fetchContent, + fetchContentType, folder ) if (!created) { @@ -580,7 +606,7 @@ const processSubscription = async ( userId, feedUrl, lastValidItem, - fetchContent, + fetchContentType, folder ) if (!created) { @@ -626,7 +652,7 @@ export const _refreshFeed = async (request: RefreshFeedRequest) => { scheduledTimestamps, userIds, lastFetchedChecksums, - fetchContents, + fetchContentTypes, folders, refreshContext, } = request @@ -666,6 +692,9 @@ export const _refreshFeed = async (request: RefreshFeedRequest) => { // process each subscription sequentially for (let i = 0; i < subscriptionIds.length; i++) { const subscriptionId = subscriptionIds[i] + const fetchContentType = allowFetchContent + ? fetchContentTypes[i] + : FetchContentType.Never try { await processSubscription( @@ -677,7 +706,7 @@ export const _refreshFeed = async (request: RefreshFeedRequest) => { mostRecentItemDates[i], scheduledTimestamps[i], lastFetchedChecksums[i], - fetchContents[i] && allowFetchContent, + fetchContentType, folders[i], feed ) diff --git a/packages/api/src/resolvers/subscriptions/index.ts b/packages/api/src/resolvers/subscriptions/index.ts index a13529325..bc10867fb 100644 --- a/packages/api/src/resolvers/subscriptions/index.ts +++ b/packages/api/src/resolvers/subscriptions/index.ts @@ -3,6 +3,7 @@ import { parseHTML } from 'linkedom' import { Brackets, In } from 'typeorm' import { DEFAULT_SUBSCRIPTION_FOLDER, + FetchContentType, Subscription, SubscriptionStatus, SubscriptionType, @@ -226,7 +227,9 @@ export const subscribeResolver = authorized< // re-subscribe const updatedSubscription = await getRepository(Subscription).save({ ...existingSubscription, - fetchContent: input.fetchContent ?? undefined, + fetchContentType: input.fetchContentType + ? (input.fetchContentType as FetchContentType) + : undefined, folder: input.folder ?? undefined, isPrivate: input.isPrivate, status: SubscriptionStatus.Active, @@ -240,7 +243,7 @@ export const subscribeResolver = authorized< scheduledDates: [new Date()], // fetch immediately mostRecentItemDates: [updatedSubscription.mostRecentItemDate || null], checksums: [updatedSubscription.lastFetchedChecksum || null], - fetchContents: [updatedSubscription.fetchContent], + fetchContentTypes: [updatedSubscription.fetchContentType], folders: [updatedSubscription.folder || DEFAULT_SUBSCRIPTION_FOLDER], }) @@ -254,7 +257,7 @@ export const subscribeResolver = authorized< // limit number of rss subscriptions to max const results = (await getRepository(Subscription).query( - `insert into omnivore.subscriptions (name, url, description, type, user_id, icon, is_private, fetch_content, folder) + `insert into omnivore.subscriptions (name, url, description, type, user_id, icon, is_private, fetch_content_type, folder) select $1, $2, $3, $4, $5, $6, $7, $8, $9 from omnivore.subscriptions where user_id = $5 and type = 'RSS' and status = 'ACTIVE' having count(*) < $10 @@ -267,7 +270,7 @@ export const subscribeResolver = authorized< uid, feed.thumbnail, input.isPrivate, - input.fetchContent ?? true, + input.fetchContentType ?? FetchContentType.Always, input.folder ?? 'following', MAX_RSS_SUBSCRIPTIONS, ] @@ -290,7 +293,7 @@ export const subscribeResolver = authorized< scheduledDates: [new Date()], // fetch immediately mostRecentItemDates: [null], checksums: [null], - fetchContents: [newSubscription.fetchContent], + fetchContentTypes: [newSubscription.fetchContentType], folders: [newSubscription.folder || DEFAULT_SUBSCRIPTION_FOLDER], }) diff --git a/packages/api/src/schema.ts b/packages/api/src/schema.ts index 2d142dc20..e3421f20a 100755 --- a/packages/api/src/schema.ts +++ b/packages/api/src/schema.ts @@ -1671,6 +1671,12 @@ const schema = gql` NEWSLETTER } + enum FetchContentType { + ALWAYS + NEVER + WHEN_EMPTY + } + type Subscription { id: ID! name: String! @@ -1689,6 +1695,7 @@ const schema = gql` isPrivate: Boolean autoAddToLibrary: Boolean fetchContent: Boolean! + fetchContentType: FetchContentType! folder: String! mostRecentItemDate: Date refreshedAt: Date @@ -2597,6 +2604,7 @@ const schema = gql` isPrivate: Boolean autoAddToLibrary: Boolean fetchContent: Boolean + fetchContentType: FetchContentType folder: String } @@ -2610,6 +2618,7 @@ const schema = gql` isPrivate: Boolean autoAddToLibrary: Boolean fetchContent: Boolean + fetchContentType: FetchContentType folder: String refreshedAt: Date mostRecentItemDate: Date diff --git a/packages/api/src/services/save_page.ts b/packages/api/src/services/save_page.ts index f79b28b52..f8d68bc30 100644 --- a/packages/api/src/services/save_page.ts +++ b/packages/api/src/services/save_page.ts @@ -11,6 +11,7 @@ import { SavePageInput, SaveResult, } from '../generated/graphql' +import { Merge } from '../util' import { enqueueThumbnailJob } from '../utils/createTask' import { cleanUrl, @@ -61,10 +62,13 @@ const shouldParseInBackend = (input: SavePageInput): boolean => { ) } +export type SavePageArgs = Merge< + SavePageInput, + { feedContent?: string; previewImage?: string; author?: string } +> + export const savePage = async ( - input: SavePageInput & { - finalUrl?: string - }, + input: SavePageArgs, user: User ): Promise => { const [slug, croppedPathname] = createSlug(input.url, input.title) @@ -100,6 +104,8 @@ export const savePage = async ( pageInfo: { title: input.title, canonicalUrl: input.url, + previewImage: input.previewImage, + author: input.author, }, }) @@ -119,6 +125,7 @@ export const savePage = async ( state: input.state || undefined, rssFeedUrl: input.rssFeedUrl, folder: input.folder, + feedContent: input.feedContent, }) const isImported = input.source === 'csv-importer' || input.source === 'pocket' @@ -196,6 +203,7 @@ export const parsedContentToLibraryItem = ({ state, rssFeedUrl, folder, + feedContent, }: { url: string userId: string @@ -215,6 +223,7 @@ export const parsedContentToLibraryItem = ({ state?: ArticleSavingRequestStatus | null rssFeedUrl?: string | null folder?: string | null + feedContent?: string | null }): DeepPartial & { originalUrl: string } => { logger.info('save_page', { url, state, itemId }) return { @@ -257,5 +266,6 @@ export const parsedContentToLibraryItem = ({ archivedAt: state === ArticleSavingRequestStatus.Archived ? new Date() : null, deletedAt: state === ArticleSavingRequestStatus.Deleted ? new Date() : null, + feedContent, } } diff --git a/packages/api/src/services/update_subscription.ts b/packages/api/src/services/update_subscription.ts index bcc077291..a956163ef 100644 --- a/packages/api/src/services/update_subscription.ts +++ b/packages/api/src/services/update_subscription.ts @@ -1,4 +1,8 @@ -import { Subscription, SubscriptionStatus } from '../entity/subscription' +import { + FetchContentType, + Subscription, + SubscriptionStatus, +} from '../entity/subscription' import { getRepository } from '../repository' const ensureOwns = async (userId: string, subscriptionId: string) => { @@ -16,7 +20,7 @@ const ensureOwns = async (userId: string, subscriptionId: string) => { type UpdateSubscriptionData = { autoAddToLibrary?: boolean | null description?: string | null - fetchContent?: boolean | null + fetchContentType?: FetchContentType | null folder?: string | null isPrivate?: boolean | null mostRecentItemDate?: Date | null @@ -48,7 +52,7 @@ export const updateSubscription = async ( failedAt: newData.failedAt || undefined, autoAddToLibrary: newData.autoAddToLibrary ?? undefined, isPrivate: newData.isPrivate ?? undefined, - fetchContent: newData.fetchContent ?? undefined, + fetchContentType: newData.fetchContentType ?? undefined, folder: newData.folder ?? undefined, }) @@ -75,7 +79,7 @@ export const updateSubscriptions = async ( failedAt: newData.failedAt || undefined, autoAddToLibrary: newData.autoAddToLibrary ?? undefined, isPrivate: newData.isPrivate ?? undefined, - fetchContent: newData.fetchContent ?? undefined, + fetchContentType: newData.fetchContentType ?? undefined, folder: newData.folder ?? undefined, })) ) diff --git a/packages/api/src/utils/createTask.ts b/packages/api/src/utils/createTask.ts index 50ef58f45..95777a240 100644 --- a/packages/api/src/utils/createTask.ts +++ b/packages/api/src/utils/createTask.ts @@ -9,6 +9,7 @@ import { DeepPartial } from 'typeorm' import { v4 as uuid } from 'uuid' import { ImportItemState } from '../entity/integration' import { Recommendation } from '../entity/recommendation' +import { FetchContentType } from '../entity/subscription' import { env } from '../env' import { ArticleSavingRequestStatus, @@ -625,7 +626,7 @@ export interface RssSubscriptionGroup { mostRecentItemDates: (Date | null)[] scheduledDates: Date[] checksums: (string | null)[] - fetchContents: boolean[] + fetchContentTypes: FetchContentType[] folders: string[] } @@ -648,7 +649,7 @@ export const enqueueRssFeedFetch = async ( timestamp.getTime() ), // unix timestamp in milliseconds userIds: subscriptionGroup.userIds, - fetchContents: subscriptionGroup.fetchContents, + fetchContentTypes: subscriptionGroup.fetchContentTypes, folders: subscriptionGroup.folders, } diff --git a/packages/db/migrations/0162.do.rename_preview_content_in_library_item.sql b/packages/db/migrations/0165.do.rename_preview_content_in_library_item.sql similarity index 54% rename from packages/db/migrations/0162.do.rename_preview_content_in_library_item.sql rename to packages/db/migrations/0165.do.rename_preview_content_in_library_item.sql index 932baaed3..098e76e49 100755 --- a/packages/db/migrations/0162.do.rename_preview_content_in_library_item.sql +++ b/packages/db/migrations/0165.do.rename_preview_content_in_library_item.sql @@ -6,4 +6,8 @@ BEGIN; ALTER TABLE omnivore.library_item RENAME COLUMN preview_content TO feed_content; +CREATE TYPE fetch_content_enum AS ENUM ('ALWAYS', 'NEVER', 'WHEN_EMPTY'); + +ALTER TABLE omnivore.subscriptions ADD COLUMN fetch_content_type fetch_content_enum NOT NULL DEFAULT 'ALWAYS'::fetch_content_enum; + COMMIT; diff --git a/packages/db/migrations/0162.undo.rename_preview_content_in_library_item.sql b/packages/db/migrations/0165.undo.rename_preview_content_in_library_item.sql similarity index 71% rename from packages/db/migrations/0162.undo.rename_preview_content_in_library_item.sql rename to packages/db/migrations/0165.undo.rename_preview_content_in_library_item.sql index 77a78d2da..b2aa4a628 100755 --- a/packages/db/migrations/0162.undo.rename_preview_content_in_library_item.sql +++ b/packages/db/migrations/0165.undo.rename_preview_content_in_library_item.sql @@ -4,6 +4,10 @@ BEGIN; +ALTER TABLE omnivore.subscriptions DROP COLUMN fetch_content_type; + +DROP TYPE fetch_content_enum; + ALTER TABLE omnivore.library_item RENAME COLUMN feed_content TO preview_content; COMMIT;