replace fetch_content with fetch_content_type as a enum: ALWAYS, NEVER, WHEN_EMPTY

This commit is contained in:
Hongbo Wu
2024-02-23 14:19:22 +08:00
parent 08e4bed146
commit f4b6e470f2
12 changed files with 152 additions and 56 deletions

View File

@ -23,6 +23,12 @@ export enum SubscriptionType {
Rss = 'RSS',
}
export enum FetchContentType {
Always = 'ALWAYS',
Never = 'NEVER',
WhenEmpty = 'WHEN_EMPTY',
}
@Entity({ name: 'subscriptions' })
export class Subscription {
@PrimaryGeneratedColumn('uuid')
@ -98,6 +104,12 @@ export class Subscription {
@Column('boolean')
fetchContent!: boolean
@Column('enum', {
enum: FetchContentType,
default: FetchContentType.Always,
})
fetchContentType!: FetchContentType
@Column('text')
folder?: string | null
}

View File

@ -854,6 +854,12 @@ export type FetchContentSuccess = {
success: Scalars['Boolean'];
};
export enum FetchContentType {
Always = 'ALWAYS',
Never = 'NEVER',
WhenEmpty = 'WHEN_EMPTY'
}
export type Filter = {
__typename?: 'Filter';
category?: Maybe<Scalars['String']>;
@ -2818,6 +2824,7 @@ export enum SubscribeErrorCode {
export type SubscribeInput = {
autoAddToLibrary?: InputMaybe<Scalars['Boolean']>;
fetchContent?: InputMaybe<Scalars['Boolean']>;
fetchContentType?: InputMaybe<FetchContentType>;
folder?: InputMaybe<Scalars['String']>;
isPrivate?: InputMaybe<Scalars['Boolean']>;
subscriptionType?: InputMaybe<SubscriptionType>;
@ -2839,6 +2846,7 @@ export type Subscription = {
description?: Maybe<Scalars['String']>;
failedAt?: Maybe<Scalars['Date']>;
fetchContent: Scalars['Boolean'];
fetchContentType: FetchContentType;
folder: Scalars['String'];
icon?: Maybe<Scalars['String']>;
id: Scalars['ID'];
@ -3214,6 +3222,7 @@ export type UpdateSubscriptionInput = {
description?: InputMaybe<Scalars['String']>;
failedAt?: InputMaybe<Scalars['Date']>;
fetchContent?: InputMaybe<Scalars['Boolean']>;
fetchContentType?: InputMaybe<FetchContentType>;
folder?: InputMaybe<Scalars['String']>;
id: Scalars['ID'];
isPrivate?: InputMaybe<Scalars['Boolean']>;
@ -3711,6 +3720,7 @@ export type ResolversTypes = {
FetchContentErrorCode: FetchContentErrorCode;
FetchContentResult: ResolversTypes['FetchContentError'] | ResolversTypes['FetchContentSuccess'];
FetchContentSuccess: ResolverTypeWrapper<FetchContentSuccess>;
FetchContentType: FetchContentType;
Filter: ResolverTypeWrapper<Filter>;
FiltersError: ResolverTypeWrapper<FiltersError>;
FiltersErrorCode: FiltersErrorCode;
@ -6198,6 +6208,7 @@ export type SubscriptionResolvers<ContextType = ResolverContext, ParentType exte
description?: SubscriptionResolver<Maybe<ResolversTypes['String']>, "description", ParentType, ContextType>;
failedAt?: SubscriptionResolver<Maybe<ResolversTypes['Date']>, "failedAt", ParentType, ContextType>;
fetchContent?: SubscriptionResolver<ResolversTypes['Boolean'], "fetchContent", ParentType, ContextType>;
fetchContentType?: SubscriptionResolver<ResolversTypes['FetchContentType'], "fetchContentType", ParentType, ContextType>;
folder?: SubscriptionResolver<ResolversTypes['String'], "folder", ParentType, ContextType>;
icon?: SubscriptionResolver<Maybe<ResolversTypes['String']>, "icon", ParentType, ContextType>;
id?: SubscriptionResolver<ResolversTypes['ID'], "id", ParentType, ContextType>;

View File

@ -758,6 +758,12 @@ type FetchContentSuccess {
success: Boolean!
}
enum FetchContentType {
ALWAYS
NEVER
WHEN_EMPTY
}
type Filter {
category: String
createdAt: Date!
@ -2216,6 +2222,7 @@ enum SubscribeErrorCode {
input SubscribeInput {
autoAddToLibrary: Boolean
fetchContent: Boolean
fetchContentType: FetchContentType
folder: String
isPrivate: Boolean
subscriptionType: SubscriptionType
@ -2235,6 +2242,7 @@ type Subscription {
description: String
failedAt: Date
fetchContent: Boolean!
fetchContentType: FetchContentType!
folder: String!
icon: String
id: ID!
@ -2581,6 +2589,7 @@ input UpdateSubscriptionInput {
description: String
failedAt: Date
fetchContent: Boolean
fetchContentType: FetchContentType
folder: String
id: ID!
isPrivate: Boolean

View File

@ -31,7 +31,7 @@ export const refreshAllFeeds = async (db: DataSource): Promise<boolean> => {
ARRAY_AGG(s.most_recent_item_date) AS "mostRecentItemDates",
ARRAY_AGG(coalesce(s.scheduled_at, NOW())) AS "scheduledDates",
ARRAY_AGG(s.last_fetched_checksum) AS checksums,
ARRAY_AGG(s.fetch_content) AS "fetchContents",
ARRAY_AGG(s.fetch_content_type) AS "fetchContentTypes",
ARRAY_AGG(coalesce(s.folder, $3)) AS folders
FROM
omnivore.subscriptions s
@ -106,7 +106,7 @@ const updateSubscriptionGroup = async (
timestamp.getTime()
), // unix timestamp in milliseconds
userIds: group.userIds,
fetchContents: group.fetchContents,
fetchContents: group.fetchContentTypes,
folders: group.folders,
}

View File

@ -2,14 +2,20 @@ import axios from 'axios'
import crypto from 'crypto'
import { parseHTML } from 'linkedom'
import Parser, { Item } from 'rss-parser'
import { FetchContentType } from '../../entity/subscription'
import { env } from '../../env'
import { ArticleSavingRequestStatus } from '../../generated/graphql'
import { redisDataSource } from '../../redis_data_source'
import { validateUrl } from '../../services/create_page_save_request'
import { savePage } from '../../services/save_page'
import {
updateSubscription,
updateSubscriptions,
} from '../../services/update_subscription'
import { findActiveUser } from '../../services/user'
import createHttpTaskWithToken from '../../utils/createTask'
import { cleanUrl } from '../../utils/helpers'
import { createThumbnailUrl } from '../../utils/imageproxy'
import { logger } from '../../utils/logger'
import { RSSRefreshContext } from './refreshAllFeeds'
@ -22,7 +28,7 @@ interface RefreshFeedRequest {
scheduledTimestamps: number[] // unix timestamp in milliseconds
lastFetchedChecksums: string[]
userIds: string[]
fetchContents: boolean[]
fetchContentTypes: FetchContentType[]
folders: FolderType[]
refreshContext?: RSSRefreshContext
}
@ -35,7 +41,7 @@ export const isRefreshFeedRequest = (data: any): data is RefreshFeedRequest => {
'scheduledTimestamps' in data &&
'userIds' in data &&
'lastFetchedChecksums' in data &&
'fetchContents' in data &&
'fetchContentTypes' in data &&
'folders' in data
)
}
@ -263,7 +269,7 @@ const createTask = async (
userId: string,
feedUrl: string,
item: RssFeedItem,
fetchContent: boolean,
fetchContentType: FetchContentType,
folder: FolderType
) => {
const isRecentlySaved = await isItemRecentlySaved(userId, item.link)
@ -272,8 +278,12 @@ const createTask = async (
return true
}
if (folder === 'following' && !fetchContent) {
return createItemWithFeedContent(userId, feedUrl, item)
const feedContent = item.content || item.contentSnippet || item.summary
if (
fetchContentType === FetchContentType.Never ||
(fetchContentType === FetchContentType.WhenEmpty && !feedContent)
) {
return createItemWithFeedContent(userId, feedUrl, item, folder, feedContent)
}
logger.info(`adding fetch content task ${userId} ${item.link.trim()}`)
@ -312,41 +322,57 @@ const fetchContentAndCreateItem = async (
const createItemWithFeedContent = async (
userId: string,
feedUrl: string,
item: RssFeedItem
item: RssFeedItem,
folder: FolderType,
feedContent?: string
) => {
const input = {
userIds: [userId],
url: item.link,
title: item.title,
author: item.creator,
description: item.summary,
addedToFollowingFrom: 'feed',
feedContent: item.content || item.contentSnippet || item.summary,
addedToFollowingBy: feedUrl,
savedAt: item.isoDate,
publishedAt: item.isoDate,
previewContentType: 'text/html', // TODO: get content type from feed
thumbnail: getThumbnail(item),
}
try {
const serviceBaseUrl = process.env.INTERNAL_API_URL
const token = process.env.PUBSUB_VERIFICATION_TOKEN
if (!serviceBaseUrl || !token) {
throw 'Environment not configured correctly'
logger.info('saving feed item with feed content', {
userId,
feedUrl,
item,
folder,
})
const thumbnail = getThumbnail(item)
const previewImage = thumbnail && createThumbnailUrl(thumbnail)
const url = cleanUrl(item.link)
const user = await findActiveUser(userId)
if (!user) {
logger.error('User not found', { userId })
return false
}
// save page
const taskHandlerUrl = `${serviceBaseUrl}/svc/following/save?token=${token}`
const task = await createHttpTaskWithToken({
queue: env.queue.name,
priority: 'low',
taskHandlerUrl: taskHandlerUrl,
payload: input,
})
return !!task
const result = await savePage(
{
url,
feedContent,
title: item.title,
folder,
rssFeedUrl: feedUrl,
savedAt: item.isoDate,
publishedAt: item.isoDate,
originalContent: feedContent || '',
source: 'rss-feeder',
state: ArticleSavingRequestStatus.ContentNotFetched,
clientRequestId: '',
author: item.creator,
previewImage,
},
user
)
if (result.__typename === 'SaveError') {
logger.error(
`Error while saving feed item with feed content: ${result.errorCodes[0]}`
)
return false
}
return true
} catch (error) {
logger.error('Error while creating task', error)
logger.error('Error while saving feed item with feed content', error)
return false
}
}
@ -456,7 +482,7 @@ const processSubscription = async (
mostRecentItemDate: number,
scheduledAt: number,
lastFetchedChecksum: string,
fetchContent: boolean,
fetchContentType: FetchContentType,
folder: FolderType,
feed: RssFeed
) => {
@ -547,7 +573,7 @@ const processSubscription = async (
userId,
feedUrl,
feedItem,
fetchContent,
fetchContentType,
folder
)
if (!created) {
@ -580,7 +606,7 @@ const processSubscription = async (
userId,
feedUrl,
lastValidItem,
fetchContent,
fetchContentType,
folder
)
if (!created) {
@ -626,7 +652,7 @@ export const _refreshFeed = async (request: RefreshFeedRequest) => {
scheduledTimestamps,
userIds,
lastFetchedChecksums,
fetchContents,
fetchContentTypes,
folders,
refreshContext,
} = request
@ -666,6 +692,9 @@ export const _refreshFeed = async (request: RefreshFeedRequest) => {
// process each subscription sequentially
for (let i = 0; i < subscriptionIds.length; i++) {
const subscriptionId = subscriptionIds[i]
const fetchContentType = allowFetchContent
? fetchContentTypes[i]
: FetchContentType.Never
try {
await processSubscription(
@ -677,7 +706,7 @@ export const _refreshFeed = async (request: RefreshFeedRequest) => {
mostRecentItemDates[i],
scheduledTimestamps[i],
lastFetchedChecksums[i],
fetchContents[i] && allowFetchContent,
fetchContentType,
folders[i],
feed
)

View File

@ -3,6 +3,7 @@ import { parseHTML } from 'linkedom'
import { Brackets, In } from 'typeorm'
import {
DEFAULT_SUBSCRIPTION_FOLDER,
FetchContentType,
Subscription,
SubscriptionStatus,
SubscriptionType,
@ -226,7 +227,9 @@ export const subscribeResolver = authorized<
// re-subscribe
const updatedSubscription = await getRepository(Subscription).save({
...existingSubscription,
fetchContent: input.fetchContent ?? undefined,
fetchContentType: input.fetchContentType
? (input.fetchContentType as FetchContentType)
: undefined,
folder: input.folder ?? undefined,
isPrivate: input.isPrivate,
status: SubscriptionStatus.Active,
@ -240,7 +243,7 @@ export const subscribeResolver = authorized<
scheduledDates: [new Date()], // fetch immediately
mostRecentItemDates: [updatedSubscription.mostRecentItemDate || null],
checksums: [updatedSubscription.lastFetchedChecksum || null],
fetchContents: [updatedSubscription.fetchContent],
fetchContentTypes: [updatedSubscription.fetchContentType],
folders: [updatedSubscription.folder || DEFAULT_SUBSCRIPTION_FOLDER],
})
@ -254,7 +257,7 @@ export const subscribeResolver = authorized<
// limit number of rss subscriptions to max
const results = (await getRepository(Subscription).query(
`insert into omnivore.subscriptions (name, url, description, type, user_id, icon, is_private, fetch_content, folder)
`insert into omnivore.subscriptions (name, url, description, type, user_id, icon, is_private, fetch_content_type, folder)
select $1, $2, $3, $4, $5, $6, $7, $8, $9 from omnivore.subscriptions
where user_id = $5 and type = 'RSS' and status = 'ACTIVE'
having count(*) < $10
@ -267,7 +270,7 @@ export const subscribeResolver = authorized<
uid,
feed.thumbnail,
input.isPrivate,
input.fetchContent ?? true,
input.fetchContentType ?? FetchContentType.Always,
input.folder ?? 'following',
MAX_RSS_SUBSCRIPTIONS,
]
@ -290,7 +293,7 @@ export const subscribeResolver = authorized<
scheduledDates: [new Date()], // fetch immediately
mostRecentItemDates: [null],
checksums: [null],
fetchContents: [newSubscription.fetchContent],
fetchContentTypes: [newSubscription.fetchContentType],
folders: [newSubscription.folder || DEFAULT_SUBSCRIPTION_FOLDER],
})

View File

@ -1671,6 +1671,12 @@ const schema = gql`
NEWSLETTER
}
enum FetchContentType {
ALWAYS
NEVER
WHEN_EMPTY
}
type Subscription {
id: ID!
name: String!
@ -1689,6 +1695,7 @@ const schema = gql`
isPrivate: Boolean
autoAddToLibrary: Boolean
fetchContent: Boolean!
fetchContentType: FetchContentType!
folder: String!
mostRecentItemDate: Date
refreshedAt: Date
@ -2597,6 +2604,7 @@ const schema = gql`
isPrivate: Boolean
autoAddToLibrary: Boolean
fetchContent: Boolean
fetchContentType: FetchContentType
folder: String
}
@ -2610,6 +2618,7 @@ const schema = gql`
isPrivate: Boolean
autoAddToLibrary: Boolean
fetchContent: Boolean
fetchContentType: FetchContentType
folder: String
refreshedAt: Date
mostRecentItemDate: Date

View File

@ -11,6 +11,7 @@ import {
SavePageInput,
SaveResult,
} from '../generated/graphql'
import { Merge } from '../util'
import { enqueueThumbnailJob } from '../utils/createTask'
import {
cleanUrl,
@ -61,10 +62,13 @@ const shouldParseInBackend = (input: SavePageInput): boolean => {
)
}
export type SavePageArgs = Merge<
SavePageInput,
{ feedContent?: string; previewImage?: string; author?: string }
>
export const savePage = async (
input: SavePageInput & {
finalUrl?: string
},
input: SavePageArgs,
user: User
): Promise<SaveResult> => {
const [slug, croppedPathname] = createSlug(input.url, input.title)
@ -100,6 +104,8 @@ export const savePage = async (
pageInfo: {
title: input.title,
canonicalUrl: input.url,
previewImage: input.previewImage,
author: input.author,
},
})
@ -119,6 +125,7 @@ export const savePage = async (
state: input.state || undefined,
rssFeedUrl: input.rssFeedUrl,
folder: input.folder,
feedContent: input.feedContent,
})
const isImported =
input.source === 'csv-importer' || input.source === 'pocket'
@ -196,6 +203,7 @@ export const parsedContentToLibraryItem = ({
state,
rssFeedUrl,
folder,
feedContent,
}: {
url: string
userId: string
@ -215,6 +223,7 @@ export const parsedContentToLibraryItem = ({
state?: ArticleSavingRequestStatus | null
rssFeedUrl?: string | null
folder?: string | null
feedContent?: string | null
}): DeepPartial<LibraryItem> & { originalUrl: string } => {
logger.info('save_page', { url, state, itemId })
return {
@ -257,5 +266,6 @@ export const parsedContentToLibraryItem = ({
archivedAt:
state === ArticleSavingRequestStatus.Archived ? new Date() : null,
deletedAt: state === ArticleSavingRequestStatus.Deleted ? new Date() : null,
feedContent,
}
}

View File

@ -1,4 +1,8 @@
import { Subscription, SubscriptionStatus } from '../entity/subscription'
import {
FetchContentType,
Subscription,
SubscriptionStatus,
} from '../entity/subscription'
import { getRepository } from '../repository'
const ensureOwns = async (userId: string, subscriptionId: string) => {
@ -16,7 +20,7 @@ const ensureOwns = async (userId: string, subscriptionId: string) => {
type UpdateSubscriptionData = {
autoAddToLibrary?: boolean | null
description?: string | null
fetchContent?: boolean | null
fetchContentType?: FetchContentType | null
folder?: string | null
isPrivate?: boolean | null
mostRecentItemDate?: Date | null
@ -48,7 +52,7 @@ export const updateSubscription = async (
failedAt: newData.failedAt || undefined,
autoAddToLibrary: newData.autoAddToLibrary ?? undefined,
isPrivate: newData.isPrivate ?? undefined,
fetchContent: newData.fetchContent ?? undefined,
fetchContentType: newData.fetchContentType ?? undefined,
folder: newData.folder ?? undefined,
})
@ -75,7 +79,7 @@ export const updateSubscriptions = async (
failedAt: newData.failedAt || undefined,
autoAddToLibrary: newData.autoAddToLibrary ?? undefined,
isPrivate: newData.isPrivate ?? undefined,
fetchContent: newData.fetchContent ?? undefined,
fetchContentType: newData.fetchContentType ?? undefined,
folder: newData.folder ?? undefined,
}))
)

View File

@ -9,6 +9,7 @@ import { DeepPartial } from 'typeorm'
import { v4 as uuid } from 'uuid'
import { ImportItemState } from '../entity/integration'
import { Recommendation } from '../entity/recommendation'
import { FetchContentType } from '../entity/subscription'
import { env } from '../env'
import {
ArticleSavingRequestStatus,
@ -625,7 +626,7 @@ export interface RssSubscriptionGroup {
mostRecentItemDates: (Date | null)[]
scheduledDates: Date[]
checksums: (string | null)[]
fetchContents: boolean[]
fetchContentTypes: FetchContentType[]
folders: string[]
}
@ -648,7 +649,7 @@ export const enqueueRssFeedFetch = async (
timestamp.getTime()
), // unix timestamp in milliseconds
userIds: subscriptionGroup.userIds,
fetchContents: subscriptionGroup.fetchContents,
fetchContentTypes: subscriptionGroup.fetchContentTypes,
folders: subscriptionGroup.folders,
}

View File

@ -6,4 +6,8 @@ BEGIN;
ALTER TABLE omnivore.library_item RENAME COLUMN preview_content TO feed_content;
CREATE TYPE fetch_content_enum AS ENUM ('ALWAYS', 'NEVER', 'WHEN_EMPTY');
ALTER TABLE omnivore.subscriptions ADD COLUMN fetch_content_type fetch_content_enum NOT NULL DEFAULT 'ALWAYS'::fetch_content_enum;
COMMIT;

View File

@ -4,6 +4,10 @@
BEGIN;
ALTER TABLE omnivore.subscriptions DROP COLUMN fetch_content_type;
DROP TYPE fetch_content_enum;
ALTER TABLE omnivore.library_item RENAME COLUMN feed_content TO preview_content;
COMMIT;