return absolute feed url

This commit is contained in:
Hongbo Wu
2023-12-11 14:14:54 +08:00
parent 9f59d09c20
commit b3d0bb9ed7
4 changed files with 43 additions and 18 deletions

View File

@ -517,4 +517,5 @@ export const functionResolvers = {
...resultResolveTypeResolver('SetFavoriteArticle'),
...resultResolveTypeResolver('UpdateSubscription'),
...resultResolveTypeResolver('UpdateEmail'),
...resultResolveTypeResolver('ScanFeeds'),
}

View File

@ -40,7 +40,7 @@ import { unsubscribe } from '../../services/subscriptions'
import { Merge } from '../../util'
import { analytics } from '../../utils/analytics'
import { enqueueRssFeedFetch } from '../../utils/createTask'
import { authorized } from '../../utils/helpers'
import { authorized, getAbsoluteUrl } from '../../utils/helpers'
import { parseFeed, parseOpml, RSS_PARSER_CONFIG } from '../../utils/parser'
type PartialSubscription = Omit<Subscription, 'newsletterEmail'>
@ -418,7 +418,6 @@ export const scanFeedsResolver = authorized<
}
return {
__typename: 'ScanFeedsSuccess',
feeds: feeds.map((feed) => ({
url: feed.url,
title: feed.title,
@ -445,31 +444,39 @@ export const scanFeedsResolver = authorized<
if (isHtml) {
// this is an html page, parse rss feed links
const dom = parseHTML(content).document
const links = dom.querySelectorAll('link[type="application/rss+xml"]')
// type is application/rss+xml or application/atom+xml
const links = dom.querySelectorAll(
'link[type="application/rss+xml"], link[type="application/atom+xml"]'
)
const feeds = Array.from(links)
.map((link) => ({
url: link.getAttribute('href') || '',
title: link.getAttribute('title') || '',
type: 'rss',
}))
.map((link) => {
const href = link.getAttribute('href') || ''
const feedUrl = getAbsoluteUrl(href, url)
return {
url: feedUrl,
title: link.getAttribute('title') || '',
type: 'rss',
}
})
.filter((feed) => feed.url)
return {
__typename: 'ScanFeedsSuccess',
feeds,
}
}
// this is the url to an RSS feed
const feed = await parseFeed(url)
const feed = await parseFeed(url, content)
if (!feed) {
log.error('Failed to parse RSS feed')
return {
errorCodes: [ScanFeedsErrorCode.BadRequest],
feeds: [],
}
}
return {
__typename: 'ScanFeedsSuccess',
feeds: [feed],
}
} catch (error) {

View File

@ -399,3 +399,11 @@ export const deepDelete = <T, K extends keyof T>(obj: T, keys: K[]) => {
return copy as Omit<T, K>
}
export const isRelativeUrl = (url: string): boolean => {
return url.startsWith('/')
}
export const getAbsoluteUrl = (url: string, baseUrl: string): string => {
return new URL(url, baseUrl).href
}

View File

@ -798,17 +798,23 @@ export const parseHtml = async (url: string): Promise<Feed[] | undefined> => {
}
}
export const parseFeed = async (url: string): Promise<Feed | null> => {
export const parseFeed = async (
url: string,
content?: string | null
): Promise<Feed | null> => {
try {
// check if url is a telegram channel
const telegramRegex = /https:\/\/t\.me\/([a-zA-Z0-9_]+)/
const telegramMatch = url.match(telegramRegex)
if (telegramMatch) {
// fetch HTML and parse feeds
const html = await fetchHtml(url)
if (!html) return null
if (!content) {
// fetch HTML and parse feeds
content = await fetchHtml(url)
}
const dom = parseHTML(html).document
if (!content) return null
const dom = parseHTML(content).document
const title = dom.querySelector('meta[property="og:title"]')
const thumbnail = dom.querySelector('meta[property="og:image"]')
const description = dom.querySelector('meta[property="og:description"]')
@ -824,7 +830,10 @@ export const parseFeed = async (url: string): Promise<Feed | null> => {
const parser = new Parser(RSS_PARSER_CONFIG)
const feed = await parser.parseURL(url)
const feed = content
? await parser.parseString(content)
: await parser.parseURL(url)
const feedUrl = feed.feedUrl || url
return {