Merge pull request #1486 from omnivore-app/fix/site-name-extraction

Fix site_name not saved when a page is saved by extensions
This commit is contained in:
Hongbo Wu
2022-12-01 11:28:51 +08:00
committed by GitHub
3 changed files with 102 additions and 59 deletions

View File

@ -22,7 +22,6 @@ import {
import { client, INDEX_ALIAS } from './index'
import { EntityType } from '../datalayer/pubsub'
import { ResponseError } from '@elastic/elasticsearch/lib/errors'
import { wordsCount } from '../utils/helpers'
const appendQuery = (body: SearchBody, query: string): void => {
body.query.bool.should.push({
@ -199,7 +198,6 @@ export const createPage = async (
...page,
updatedAt: new Date(),
savedAt: new Date(),
wordsCount: wordsCount(page.content),
},
refresh: ctx.refresh,
})

View File

@ -59,11 +59,9 @@ import {
isBase64Image,
isParsingTimeout,
pageError,
stringToHash,
titleForFilePath,
userDataToUser,
validatedDate,
wordsCount,
} from '../../utils/helpers'
import {
ParsedContentPuppeteer,
@ -99,6 +97,7 @@ import {
} from '../../elastic/pages'
import { searchHighlights } from '../../elastic/highlights'
import { saveSearchHistory } from '../../services/search_history'
import { parsedContentToPage } from '../../services/save_page'
export type PartialArticle = Omit<
Article,
@ -262,40 +261,22 @@ export const createArticleResolver = authorized<
const saveTime = new Date()
const slug = generateSlug(parsedContent?.title || croppedPathname)
const articleToSave: Page = {
id: pageId || '',
const articleToSave = parsedContentToPage({
url,
title,
parsedContent,
userId: uid,
originalHtml: domContent,
content: parsedContent?.content || '',
description: parsedContent?.excerpt || '',
title:
title ||
parsedContent?.title ||
preparedDocument?.pageInfo.title ||
croppedPathname ||
parsedContent?.siteName ||
url,
author: parsedContent?.byline,
url: normalizeUrl(canonicalUrl || url, {
stripHash: true,
stripWWW: false,
}),
pageType: pageType,
hash: uploadFileHash || stringToHash(parsedContent?.content || url),
image: parsedContent?.previewImage,
publishedAt: validatedDate(parsedContent?.publishedDate),
uploadFileId: uploadFileId,
pageId,
slug,
createdAt: saveTime,
savedAt: saveTime,
siteName: parsedContent?.siteName,
siteIcon: parsedContent?.siteIcon,
readingProgressPercent: 0,
readingProgressAnchorIndex: 0,
state: ArticleSavingRequestStatus.Succeeded,
language: parsedContent?.language,
wordsCount: wordsCount(parsedContent?.textContent || ''),
}
croppedPathname,
originalHtml: domContent,
pageType,
preparedDocument,
uploadFileHash,
canonicalUrl,
uploadFileId,
saveTime,
})
let archive = false
if (pageId) {

View File

@ -2,19 +2,26 @@ import { PubsubClient } from '../datalayer/pubsub'
import { homePageURL } from '../env'
import {
Maybe,
PreparedDocumentInput,
SaveErrorCode,
SavePageInput,
SaveResult,
} from '../generated/graphql'
import { DataModels } from '../resolvers/types'
import { generateSlug, stringToHash, validatedDate } from '../utils/helpers'
import {
generateSlug,
stringToHash,
validatedDate,
wordsCount,
} from '../utils/helpers'
import { parsePreparedContent } from '../utils/parser'
import normalizeUrl from 'normalize-url'
import { createPageSaveRequest } from './create_page_save_request'
import { ArticleSavingRequestStatus, Page } from '../elastic/types'
import { ArticleSavingRequestStatus, Page, PageType } from '../elastic/types'
import { createPage, getPageByParam, updatePage } from '../elastic/pages'
import { addHighlightToPage } from '../elastic/highlights'
import { Readability } from '@omnivore/readability'
type SaveContext = {
pubsub: PubsubClient
@ -78,29 +85,18 @@ export const savePage = async (
},
})
const articleToSave: Page = {
id: input.clientRequestId,
slug,
const articleToSave = parsedContentToPage({
url: input.url,
title: input.title,
userId: saver.userId,
originalHtml: parseResult.domContent,
content: parseResult.parsedContent?.content || '',
description: parseResult.parsedContent?.excerpt,
title: parseResult.parsedContent?.title || input.title || croppedPathname,
author: parseResult.parsedContent?.byline,
url: normalizeUrl(parseResult.canonicalUrl || input.url, {
stripHash: true,
stripWWW: false,
}),
pageId: input.clientRequestId,
slug,
croppedPathname,
parsedContent: parseResult.parsedContent,
pageType: parseResult.pageType,
hash: stringToHash(parseResult.parsedContent?.content || input.url),
image: parseResult.parsedContent?.previewImage,
publishedAt: validatedDate(parseResult.parsedContent?.publishedDate),
readingProgressPercent: 0,
readingProgressAnchorIndex: 0,
state: ArticleSavingRequestStatus.Succeeded,
createdAt: new Date(),
savedAt: new Date(),
}
originalHtml: parseResult.domContent,
canonicalUrl: parseResult.canonicalUrl,
})
let pageId: string | undefined = undefined
const existingPage = await getPageByParam({
@ -179,3 +175,71 @@ export const savePage = async (
url: `${homePageURL()}/${saver.username}/${slug}`,
}
}
// convert parsed content to an elastic page
export const parsedContentToPage = ({
url,
userId,
originalHtml,
pageId,
parsedContent,
slug,
croppedPathname,
title,
preparedDocument,
canonicalUrl,
pageType,
uploadFileHash,
uploadFileId,
saveTime,
}: {
url: string
userId: string
slug: string
croppedPathname: string
pageType: PageType
parsedContent: Readability.ParseResult | null
originalHtml?: string | null
pageId?: string | null
title?: string | null
preparedDocument?: PreparedDocumentInput | null
canonicalUrl?: string | null
uploadFileHash?: string | null
uploadFileId?: string | null
saveTime?: Date
}): Page => {
return {
id: pageId || '',
slug,
userId,
originalHtml,
content: parsedContent?.content || '',
description: parsedContent?.excerpt,
title:
title ||
parsedContent?.title ||
preparedDocument?.pageInfo.title ||
croppedPathname ||
parsedContent?.siteName ||
url,
author: parsedContent?.byline,
url: normalizeUrl(canonicalUrl || url, {
stripHash: true,
stripWWW: false,
}),
pageType,
hash: uploadFileHash || stringToHash(parsedContent?.content || url),
image: parsedContent?.previewImage,
publishedAt: validatedDate(parsedContent?.publishedDate),
uploadFileId: uploadFileId,
readingProgressPercent: 0,
readingProgressAnchorIndex: 0,
state: ArticleSavingRequestStatus.Succeeded,
createdAt: saveTime || new Date(),
savedAt: saveTime || new Date(),
siteName: parsedContent?.siteName,
language: parsedContent?.language,
siteIcon: parsedContent?.siteIcon,
wordsCount: wordsCount(parsedContent?.textContent || ''),
}
}