From bb839960f153e3176d82bbd107adb7635283846d Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 30 Nov 2022 17:46:59 +0800 Subject: [PATCH] Fix site_name not saved when a page is saved by extensions --- packages/api/src/elastic/pages.ts | 2 - packages/api/src/resolvers/article/index.ts | 49 +++------ packages/api/src/services/save_page.ts | 110 ++++++++++++++++---- 3 files changed, 102 insertions(+), 59 deletions(-) diff --git a/packages/api/src/elastic/pages.ts b/packages/api/src/elastic/pages.ts index 1c4a9d482..c7222741a 100644 --- a/packages/api/src/elastic/pages.ts +++ b/packages/api/src/elastic/pages.ts @@ -22,7 +22,6 @@ import { import { client, INDEX_ALIAS } from './index' import { EntityType } from '../datalayer/pubsub' import { ResponseError } from '@elastic/elasticsearch/lib/errors' -import { wordsCount } from '../utils/helpers' const appendQuery = (body: SearchBody, query: string): void => { body.query.bool.should.push({ @@ -199,7 +198,6 @@ export const createPage = async ( ...page, updatedAt: new Date(), savedAt: new Date(), - wordsCount: wordsCount(page.content), }, refresh: ctx.refresh, }) diff --git a/packages/api/src/resolvers/article/index.ts b/packages/api/src/resolvers/article/index.ts index 3c5b22b6f..695e47664 100644 --- a/packages/api/src/resolvers/article/index.ts +++ b/packages/api/src/resolvers/article/index.ts @@ -59,11 +59,9 @@ import { isBase64Image, isParsingTimeout, pageError, - stringToHash, titleForFilePath, userDataToUser, validatedDate, - wordsCount, } from '../../utils/helpers' import { ParsedContentPuppeteer, @@ -99,6 +97,7 @@ import { } from '../../elastic/pages' import { searchHighlights } from '../../elastic/highlights' import { saveSearchHistory } from '../../services/search_history' +import { parsedContentToPage } from '../../services/save_page' export type PartialArticle = Omit< Article, @@ -262,40 +261,22 @@ export const createArticleResolver = authorized< const saveTime = new Date() const slug = generateSlug(parsedContent?.title || croppedPathname) - const articleToSave: Page = { - id: pageId || '', + const articleToSave = parsedContentToPage({ + url, + title, + parsedContent, userId: uid, - originalHtml: domContent, - content: parsedContent?.content || '', - description: parsedContent?.excerpt || '', - title: - title || - parsedContent?.title || - preparedDocument?.pageInfo.title || - croppedPathname || - parsedContent?.siteName || - url, - author: parsedContent?.byline, - url: normalizeUrl(canonicalUrl || url, { - stripHash: true, - stripWWW: false, - }), - pageType: pageType, - hash: uploadFileHash || stringToHash(parsedContent?.content || url), - image: parsedContent?.previewImage, - publishedAt: validatedDate(parsedContent?.publishedDate), - uploadFileId: uploadFileId, + pageId, slug, - createdAt: saveTime, - savedAt: saveTime, - siteName: parsedContent?.siteName, - siteIcon: parsedContent?.siteIcon, - readingProgressPercent: 0, - readingProgressAnchorIndex: 0, - state: ArticleSavingRequestStatus.Succeeded, - language: parsedContent?.language, - wordsCount: wordsCount(parsedContent?.textContent || ''), - } + croppedPathname, + originalHtml: domContent, + pageType, + preparedDocument, + uploadFileHash, + canonicalUrl, + uploadFileId, + saveTime, + }) let archive = false if (pageId) { diff --git a/packages/api/src/services/save_page.ts b/packages/api/src/services/save_page.ts index 6647e7a42..a7997969a 100644 --- a/packages/api/src/services/save_page.ts +++ b/packages/api/src/services/save_page.ts @@ -2,19 +2,26 @@ import { PubsubClient } from '../datalayer/pubsub' import { homePageURL } from '../env' import { Maybe, + PreparedDocumentInput, SaveErrorCode, SavePageInput, SaveResult, } from '../generated/graphql' import { DataModels } from '../resolvers/types' -import { generateSlug, stringToHash, validatedDate } from '../utils/helpers' +import { + generateSlug, + stringToHash, + validatedDate, + wordsCount, +} from '../utils/helpers' import { parsePreparedContent } from '../utils/parser' import normalizeUrl from 'normalize-url' import { createPageSaveRequest } from './create_page_save_request' -import { ArticleSavingRequestStatus, Page } from '../elastic/types' +import { ArticleSavingRequestStatus, Page, PageType } from '../elastic/types' import { createPage, getPageByParam, updatePage } from '../elastic/pages' import { addHighlightToPage } from '../elastic/highlights' +import { Readability } from '@omnivore/readability' type SaveContext = { pubsub: PubsubClient @@ -78,29 +85,18 @@ export const savePage = async ( }, }) - const articleToSave: Page = { - id: input.clientRequestId, - slug, + const articleToSave = parsedContentToPage({ + url: input.url, + title: input.title, userId: saver.userId, - originalHtml: parseResult.domContent, - content: parseResult.parsedContent?.content || '', - description: parseResult.parsedContent?.excerpt, - title: parseResult.parsedContent?.title || input.title || croppedPathname, - author: parseResult.parsedContent?.byline, - url: normalizeUrl(parseResult.canonicalUrl || input.url, { - stripHash: true, - stripWWW: false, - }), + pageId: input.clientRequestId, + slug, + croppedPathname, + parsedContent: parseResult.parsedContent, pageType: parseResult.pageType, - hash: stringToHash(parseResult.parsedContent?.content || input.url), - image: parseResult.parsedContent?.previewImage, - publishedAt: validatedDate(parseResult.parsedContent?.publishedDate), - readingProgressPercent: 0, - readingProgressAnchorIndex: 0, - state: ArticleSavingRequestStatus.Succeeded, - createdAt: new Date(), - savedAt: new Date(), - } + originalHtml: parseResult.domContent, + canonicalUrl: parseResult.canonicalUrl, + }) let pageId: string | undefined = undefined const existingPage = await getPageByParam({ @@ -179,3 +175,71 @@ export const savePage = async ( url: `${homePageURL()}/${saver.username}/${slug}`, } } + +// convert parsed content to an elastic page +export const parsedContentToPage = ({ + url, + userId, + originalHtml, + pageId, + parsedContent, + slug, + croppedPathname, + title, + preparedDocument, + canonicalUrl, + pageType, + uploadFileHash, + uploadFileId, + saveTime, +}: { + url: string + userId: string + slug: string + croppedPathname: string + pageType: PageType + parsedContent: Readability.ParseResult | null + originalHtml?: string | null + pageId?: string | null + title?: string | null + preparedDocument?: PreparedDocumentInput | null + canonicalUrl?: string | null + uploadFileHash?: string | null + uploadFileId?: string | null + saveTime?: Date +}): Page => { + return { + id: pageId || '', + slug, + userId, + originalHtml, + content: parsedContent?.content || '', + description: parsedContent?.excerpt, + title: + title || + parsedContent?.title || + preparedDocument?.pageInfo.title || + croppedPathname || + parsedContent?.siteName || + url, + author: parsedContent?.byline, + url: normalizeUrl(canonicalUrl || url, { + stripHash: true, + stripWWW: false, + }), + pageType, + hash: uploadFileHash || stringToHash(parsedContent?.content || url), + image: parsedContent?.previewImage, + publishedAt: validatedDate(parsedContent?.publishedDate), + uploadFileId: uploadFileId, + readingProgressPercent: 0, + readingProgressAnchorIndex: 0, + state: ArticleSavingRequestStatus.Succeeded, + createdAt: saveTime || new Date(), + savedAt: saveTime || new Date(), + siteName: parsedContent?.siteName, + language: parsedContent?.language, + siteIcon: parsedContent?.siteIcon, + wordsCount: wordsCount(parsedContent?.textContent || ''), + } +}