diff --git a/packages/api/package.json b/packages/api/package.json index d4801f5ed..3418c2962 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -83,6 +83,7 @@ "private-ip": "^2.3.3", "rss-parser": "^3.13.0", "sanitize-html": "^2.3.2", + "sax": "^1.3.0", "search-query-parser": "^1.6.0", "snake-case": "^3.0.3", "supertest": "^6.2.2", @@ -122,6 +123,7 @@ "@types/oauth": "^0.9.1", "@types/private-ip": "^1.0.0", "@types/sanitize-html": "^1.27.1", + "@types/sax": "^1.2.7", "@types/sinon": "^10.0.13", "@types/sinon-chai": "^3.2.8", "@types/supertest": "^2.0.11", diff --git a/packages/api/src/generated/graphql.ts b/packages/api/src/generated/graphql.ts index fe6a395b8..49eacf09a 100644 --- a/packages/api/src/generated/graphql.ts +++ b/packages/api/src/generated/graphql.ts @@ -729,13 +729,14 @@ export type Feature = { export type Feed = { __typename?: 'Feed'; author?: Maybe; - createdAt: Scalars['Date']; + createdAt?: Maybe; description?: Maybe; - id: Scalars['ID']; + id?: Maybe; image?: Maybe; publishedAt?: Maybe; title: Scalars['String']; - updatedAt: Scalars['Date']; + type?: Maybe; + updatedAt?: Maybe; url: Scalars['String']; }; @@ -1807,6 +1808,7 @@ export type Query = { recentEmails: RecentEmailsResult; recentSearches: RecentSearchesResult; rules: RulesResult; + scanFeeds: ScanFeedsResult; search: SearchResult; sendInstallInstructions: SendInstallInstructionsResult; subscriptions: SubscriptionsResult; @@ -1843,6 +1845,11 @@ export type QueryRulesArgs = { }; +export type QueryScanFeedsArgs = { + input: ScanFeedsInput; +}; + + export type QuerySearchArgs = { after?: InputMaybe; first?: InputMaybe; @@ -2272,6 +2279,33 @@ export type SaveUrlInput = { url: Scalars['String']; }; +export type ScanFeedsError = { + __typename?: 'ScanFeedsError'; + errorCodes: Array; +}; + +export enum ScanFeedsErrorCode { + BadRequest = 'BAD_REQUEST' +} + +export type ScanFeedsInput = { + opml?: InputMaybe; + type: ScanFeedsType; + url?: InputMaybe; +}; + +export type ScanFeedsResult = ScanFeedsError | ScanFeedsSuccess; + +export type ScanFeedsSuccess = { + __typename?: 'ScanFeedsSuccess'; + feeds: Array; +}; + +export enum ScanFeedsType { + Html = 'HTML', + Opml = 'OPML' +} + export type SearchError = { __typename?: 'SearchError'; errorCodes: Array; @@ -3739,6 +3773,12 @@ export type ResolversTypes = { SaveResult: ResolversTypes['SaveError'] | ResolversTypes['SaveSuccess']; SaveSuccess: ResolverTypeWrapper; SaveUrlInput: SaveUrlInput; + ScanFeedsError: ResolverTypeWrapper; + ScanFeedsErrorCode: ScanFeedsErrorCode; + ScanFeedsInput: ScanFeedsInput; + ScanFeedsResult: ResolversTypes['ScanFeedsError'] | ResolversTypes['ScanFeedsSuccess']; + ScanFeedsSuccess: ResolverTypeWrapper; + ScanFeedsType: ScanFeedsType; SearchError: ResolverTypeWrapper; SearchErrorCode: SearchErrorCode; SearchItem: ResolverTypeWrapper; @@ -4190,6 +4230,10 @@ export type ResolversParentTypes = { SaveResult: ResolversParentTypes['SaveError'] | ResolversParentTypes['SaveSuccess']; SaveSuccess: SaveSuccess; SaveUrlInput: SaveUrlInput; + ScanFeedsError: ScanFeedsError; + ScanFeedsInput: ScanFeedsInput; + ScanFeedsResult: ResolversParentTypes['ScanFeedsError'] | ResolversParentTypes['ScanFeedsSuccess']; + ScanFeedsSuccess: ScanFeedsSuccess; SearchError: SearchError; SearchItem: SearchItem; SearchItemEdge: SearchItemEdge; @@ -4845,13 +4889,14 @@ export type FeatureResolvers = { author?: Resolver, ParentType, ContextType>; - createdAt?: Resolver; + createdAt?: Resolver, ParentType, ContextType>; description?: Resolver, ParentType, ContextType>; - id?: Resolver; + id?: Resolver, ParentType, ContextType>; image?: Resolver, ParentType, ContextType>; publishedAt?: Resolver, ParentType, ContextType>; title?: Resolver; - updatedAt?: Resolver; + type?: Resolver, ParentType, ContextType>; + updatedAt?: Resolver, ParentType, ContextType>; url?: Resolver; __isTypeOf?: IsTypeOfResolverFn; }; @@ -5436,6 +5481,7 @@ export type QueryResolvers; recentSearches?: Resolver; rules?: Resolver>; + scanFeeds?: Resolver>; search?: Resolver>; sendInstallInstructions?: Resolver; subscriptions?: Resolver>; @@ -5689,6 +5735,20 @@ export type SaveSuccessResolvers; }; +export type ScanFeedsErrorResolvers = { + errorCodes?: Resolver, ParentType, ContextType>; + __isTypeOf?: IsTypeOfResolverFn; +}; + +export type ScanFeedsResultResolvers = { + __resolveType: TypeResolveFn<'ScanFeedsError' | 'ScanFeedsSuccess', ParentType, ContextType>; +}; + +export type ScanFeedsSuccessResolvers = { + feeds?: Resolver, ParentType, ContextType>; + __isTypeOf?: IsTypeOfResolverFn; +}; + export type SearchErrorResolvers = { errorCodes?: Resolver, ParentType, ContextType>; __isTypeOf?: IsTypeOfResolverFn; @@ -6581,6 +6641,9 @@ export type Resolvers = { SaveFilterSuccess?: SaveFilterSuccessResolvers; SaveResult?: SaveResultResolvers; SaveSuccess?: SaveSuccessResolvers; + ScanFeedsError?: ScanFeedsErrorResolvers; + ScanFeedsResult?: ScanFeedsResultResolvers; + ScanFeedsSuccess?: ScanFeedsSuccessResolvers; SearchError?: SearchErrorResolvers; SearchItem?: SearchItemResolvers; SearchItemEdge?: SearchItemEdgeResolvers; diff --git a/packages/api/src/generated/schema.graphql b/packages/api/src/generated/schema.graphql index 2903d85fb..537a7e696 100644 --- a/packages/api/src/generated/schema.graphql +++ b/packages/api/src/generated/schema.graphql @@ -644,13 +644,14 @@ type Feature { type Feed { author: String - createdAt: Date! + createdAt: Date description: String - id: ID! + id: ID image: String publishedAt: Date title: String! - updatedAt: Date! + type: String + updatedAt: Date url: String! } @@ -1364,6 +1365,7 @@ type Query { recentEmails: RecentEmailsResult! recentSearches: RecentSearchesResult! rules(enabled: Boolean): RulesResult! + scanFeeds(input: ScanFeedsInput!): ScanFeedsResult! search(after: String, first: Int, format: String, includeContent: Boolean, query: String): SearchResult! sendInstallInstructions: SendInstallInstructionsResult! subscriptions(sort: SortParams, type: SubscriptionType): SubscriptionsResult! @@ -1729,6 +1731,31 @@ input SaveUrlInput { url: String! } +type ScanFeedsError { + errorCodes: [ScanFeedsErrorCode!]! +} + +enum ScanFeedsErrorCode { + BAD_REQUEST +} + +input ScanFeedsInput { + opml: String + type: ScanFeedsType! + url: String +} + +union ScanFeedsResult = ScanFeedsError | ScanFeedsSuccess + +type ScanFeedsSuccess { + feeds: [Feed!]! +} + +enum ScanFeedsType { + HTML + OPML +} + type SearchError { errorCodes: [SearchErrorCode!]! } diff --git a/packages/api/src/resolvers/following/index.ts b/packages/api/src/resolvers/following/index.ts index 6c8f1deb7..2d20e3d24 100644 --- a/packages/api/src/resolvers/following/index.ts +++ b/packages/api/src/resolvers/following/index.ts @@ -1,3 +1,5 @@ +import axios from 'axios' +import { parseHTML } from 'linkedom' import { LibraryItem } from '../../entity/library_item' import { FeedEdge, @@ -9,6 +11,11 @@ import { MoveToFolderSuccess, MutationMoveToFolderArgs, QueryFeedsArgs, + QueryScanFeedsArgs, + ScanFeedsError, + ScanFeedsErrorCode, + ScanFeedsSuccess, + ScanFeedsType, } from '../../generated/graphql' import { feedRepository } from '../../repository/feed' import { createPageSaveRequest } from '../../services/create_page_save_request' @@ -18,6 +25,7 @@ import { authorized, libraryItemToArticleSavingRequest, } from '../../utils/helpers' +import { parseOpml } from '../../utils/parser' export const feedsResolver = authorized< FeedsSuccess, @@ -144,3 +152,80 @@ export const moveToFolderResolver = authorized< ), } }) + +export const scanFeedsResolver = authorized< + ScanFeedsSuccess, + ScanFeedsError, + QueryScanFeedsArgs +>(async (_, { input: { type, opml, url } }, { log, uid }) => { + analytics.track({ + userId: uid, + event: 'scan_feeds', + properties: { + type, + }, + }) + + if (type === ScanFeedsType.Opml) { + if (!opml) { + return { + errorCodes: [ScanFeedsErrorCode.BadRequest], + } + } + + // parse opml + const feeds = parseOpml(opml) + if (!feeds) { + return { + errorCodes: [ScanFeedsErrorCode.BadRequest], + } + } + + return { + __typename: 'ScanFeedsSuccess', + feeds: feeds.map((feed) => ({ + url: feed.feedUrl, + title: feed.title, + type: feed.feedType || 'rss', + })), + } + } + + if (!url) { + return { + errorCodes: [ScanFeedsErrorCode.BadRequest], + } + } + + try { + // fetch HTML and parse feeds + const response = await axios.get(url, { + timeout: 5000, + headers: { + 'User-Agent': 'Mozilla/5.0', + Accept: 'text/html', + }, + }) + const html = response.data as string + const dom = parseHTML(html).document + const links = dom.querySelectorAll('link[type="application/rss+xml"]') + const feeds = Array.from(links) + .map((link) => ({ + url: link.getAttribute('href') || '', + title: link.getAttribute('title') || '', + type: 'rss', + })) + .filter((feed) => feed.url) + + return { + __typename: 'ScanFeedsSuccess', + feeds, + } + } catch (error) { + log.error('Error scanning HTML', error) + + return { + errorCodes: [ScanFeedsErrorCode.BadRequest], + } + } +}) diff --git a/packages/api/src/schema.ts b/packages/api/src/schema.ts index 1e774fc0b..fca5ff9c9 100755 --- a/packages/api/src/schema.ts +++ b/packages/api/src/schema.ts @@ -2646,15 +2646,16 @@ const schema = gql` } type Feed { - id: ID! + id: ID title: String! url: String! description: String image: String - createdAt: Date! - updatedAt: Date! + createdAt: Date + updatedAt: Date publishedAt: Date author: String + type: String } union MoveToFolderResult = MoveToFolderSuccess | MoveToFolderError @@ -2673,6 +2674,31 @@ const schema = gql` ALREADY_EXISTS } + input ScanFeedsInput { + type: ScanFeedsType! + url: String + opml: String + } + + enum ScanFeedsType { + OPML + HTML + } + + union ScanFeedsResult = ScanFeedsSuccess | ScanFeedsError + + type ScanFeedsSuccess { + feeds: [Feed!]! + } + + type ScanFeedsError { + errorCodes: [ScanFeedsErrorCode!]! + } + + enum ScanFeedsErrorCode { + BAD_REQUEST + } + # Mutations type Mutation { googleLogin(input: GoogleLoginInput!): LoginResult! @@ -2837,6 +2863,7 @@ const schema = gql` groups: GroupsResult! recentEmails: RecentEmailsResult! feeds(input: FeedsInput!): FeedsResult! + scanFeeds(input: ScanFeedsInput!): ScanFeedsResult! } ` diff --git a/packages/api/src/utils/helpers.ts b/packages/api/src/utils/helpers.ts index fedfe34f0..b0ff7cfcf 100644 --- a/packages/api/src/utils/helpers.ts +++ b/packages/api/src/utils/helpers.ts @@ -30,7 +30,6 @@ import { validateUrl } from '../services/create_page_save_request' import { updateLibraryItem } from '../services/library_item' import { Merge } from '../util' import { logger } from './logger' -import { InFilter } from './search' interface InputObject { // eslint-disable-next-line @typescript-eslint/no-explicit-any [key: string]: any diff --git a/packages/api/src/utils/parser.ts b/packages/api/src/utils/parser.ts index eb8ece9c6..d1516a9c9 100644 --- a/packages/api/src/utils/parser.ts +++ b/packages/api/src/utils/parser.ts @@ -12,6 +12,7 @@ import * as jwt from 'jsonwebtoken' import { parseHTML } from 'linkedom' import { NodeHtmlMarkdown, TranslatorConfigObject } from 'node-html-markdown' import { ElementNode } from 'node-html-markdown/dist/nodes' +import { parser } from 'sax' import { ILike } from 'typeorm' import { promisify } from 'util' import { v4 as uuid } from 'uuid' @@ -31,6 +32,13 @@ import { import { createImageProxyUrl } from './imageproxy' import { buildLogger, LogRecord } from './logger' +interface Feed { + title: string + url: string + feedUrl: string + feedType: string +} + const logger = buildLogger('utils.parse') const signToken = promisify(jwt.sign) @@ -703,3 +711,39 @@ export const getDistillerResult = async ( return undefined } } + +export const parseOpml = (opml: string): Feed[] | undefined => { + const xmlParser = parser(true, { lowercase: true }) + const feeds: Feed[] = [] + const existingFeeds = new Map() + + xmlParser.onopentag = function (node) { + if (node.name === 'outline') { + // folders also are outlines, make sure an xmlUrl is available + const feedUrl = node.attributes.xmlUrl.toString() + if (feedUrl && !existingFeeds.has(feedUrl)) { + feeds.push({ + title: + node.attributes.title.toString() || + node.attributes.text.toString() || + node.attributes.description.toString(), + url: node.attributes.htmlUrl.toString(), + feedUrl: feedUrl.toString(), + feedType: node.attributes.type.toString(), + }) + existingFeeds.set(feedUrl, true) + } + } + } + + xmlParser.onend = function () { + return feeds + } + + try { + xmlParser.write(opml).close() + } catch (error) { + logger.error('Error parsing opml', error) + return undefined + } +} diff --git a/yarn.lock b/yarn.lock index 1091ebb97..96f75c2b7 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7663,6 +7663,13 @@ dependencies: htmlparser2 "^4.1.0" +"@types/sax@^1.2.7": + version "1.2.7" + resolved "https://registry.yarnpkg.com/@types/sax/-/sax-1.2.7.tgz#ba5fe7df9aa9c89b6dff7688a19023dd2963091d" + integrity sha512-rO73L89PJxeYM3s3pPPjiPgVVcymqU490g0YO5n5By0k2Erzj6tay/4lr1CHAAU4JyOWd1rpQ8bCf6cZfHU96A== + dependencies: + "@types/node" "*" + "@types/scheduler@*": version "0.16.2" resolved "https://registry.yarnpkg.com/@types/scheduler/-/scheduler-0.16.2.tgz#1a62f89525723dde24ba1b01b092bf5df8ad4d39" @@ -24939,6 +24946,11 @@ sax@>=0.6.0: resolved "https://registry.yarnpkg.com/sax/-/sax-1.2.4.tgz#2816234e2378bddc4e5354fab5caa895df7100d9" integrity sha512-NqVDv9TpANUjFm0N8uM5GxL36UgKi9/atZw+x7YFnQ8ckwFGKrl4xX4yWtrey3UJm5nP1kUbnYgLopqWNSRhWw== +sax@^1.3.0: + version "1.3.0" + resolved "https://registry.yarnpkg.com/sax/-/sax-1.3.0.tgz#a5dbe77db3be05c9d1ee7785dbd3ea9de51593d0" + integrity sha512-0s+oAmw9zLl1V1cS9BtZN7JAd0cW5e0QH4W3LWEK6a4LaLEA2OTpGYWDY+6XasBLtz6wkm3u1xRw95mRuJ59WA== + saxes@^5.0.1: version "5.0.1" resolved "https://registry.yarnpkg.com/saxes/-/saxes-5.0.1.tgz#eebab953fa3b7608dbe94e5dadb15c888fa6696d"