From 1f283e61221c766aa1c924bcf4e1c75e34831366 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 11 Jul 2023 16:15:32 +0800 Subject: [PATCH] add optional locale and timezone to saveUrl request for puppeteer to use --- packages/api/src/generated/graphql.ts | 2 ++ packages/api/src/generated/schema.graphql | 2 ++ packages/api/src/schema.ts | 2 ++ .../src/services/create_page_save_request.ts | 6 ++++++ packages/api/src/services/save_url.ts | 2 ++ packages/api/src/utils/createTask.ts | 6 ++++++ packages/puppeteer-parse/index.js | 18 ++++++++++++++++-- 7 files changed, 36 insertions(+), 2 deletions(-) diff --git a/packages/api/src/generated/graphql.ts b/packages/api/src/generated/graphql.ts index 82e0206d4..74cba4734 100644 --- a/packages/api/src/generated/graphql.ts +++ b/packages/api/src/generated/graphql.ts @@ -2266,8 +2266,10 @@ export type SaveSuccess = { export type SaveUrlInput = { clientRequestId: Scalars['ID']; labels?: InputMaybe>; + locale?: InputMaybe; source: Scalars['String']; state?: InputMaybe; + timezone?: InputMaybe; url: Scalars['String']; }; diff --git a/packages/api/src/generated/schema.graphql b/packages/api/src/generated/schema.graphql index 010eca42a..0c01844a6 100644 --- a/packages/api/src/generated/schema.graphql +++ b/packages/api/src/generated/schema.graphql @@ -1652,8 +1652,10 @@ type SaveSuccess { input SaveUrlInput { clientRequestId: ID! labels: [CreateLabelInput!] + locale: String source: String! state: ArticleSavingRequestStatus + timezone: String url: String! } diff --git a/packages/api/src/schema.ts b/packages/api/src/schema.ts index 4800d8da4..8bcce92df 100755 --- a/packages/api/src/schema.ts +++ b/packages/api/src/schema.ts @@ -569,6 +569,8 @@ const schema = gql` clientRequestId: ID! state: ArticleSavingRequestStatus labels: [CreateLabelInput!] + locale: String + timezone: String } union SaveResult = SaveSuccess | SaveError diff --git a/packages/api/src/services/create_page_save_request.ts b/packages/api/src/services/create_page_save_request.ts index 724389618..8e0678d74 100644 --- a/packages/api/src/services/create_page_save_request.ts +++ b/packages/api/src/services/create_page_save_request.ts @@ -30,6 +30,8 @@ interface PageSaveRequest { labels?: Label[] priority?: 'low' | 'high' user?: User | null + locale?: string + timezone?: string } const SAVING_CONTENT = 'Your link is being saved...' @@ -81,6 +83,8 @@ export const createPageSaveRequest = async ({ priority, labels, user, + locale, + timezone, }: PageSaveRequest): Promise => { try { validateUrl(url) @@ -170,6 +174,8 @@ export const createPageSaveRequest = async ({ priority, state: archivedAt ? ArticleSavingRequestStatus.Archived : undefined, labels: labelsInput, + locale, + timezone, }) return pageToArticleSavingRequest(user, page) diff --git a/packages/api/src/services/save_url.ts b/packages/api/src/services/save_url.ts index 93e981d8c..cba500588 100644 --- a/packages/api/src/services/save_url.ts +++ b/packages/api/src/services/save_url.ts @@ -34,6 +34,8 @@ export const saveUrl = async ( archivedAt, labels, user, + locale: input.locale || undefined, + timezone: input.timezone || undefined, }) return { diff --git a/packages/api/src/utils/createTask.ts b/packages/api/src/utils/createTask.ts index 03752913e..6c79dab5e 100644 --- a/packages/api/src/utils/createTask.ts +++ b/packages/api/src/utils/createTask.ts @@ -215,6 +215,8 @@ export const enqueueParseRequest = async ({ queue = env.queue.name, state, labels, + locale, + timezone, }: { url: string userId: string @@ -223,6 +225,8 @@ export const enqueueParseRequest = async ({ queue?: string state?: ArticleSavingRequestStatus labels?: CreateLabelInput[] + locale?: string + timezone?: string }): Promise => { const { GOOGLE_CLOUD_PROJECT } = process.env const payload = { @@ -231,6 +235,8 @@ export const enqueueParseRequest = async ({ saveRequestId, state, labels, + locale, + timezone, } // If there is no Google Cloud Project Id exposed, it means that we are in local environment diff --git a/packages/puppeteer-parse/index.js b/packages/puppeteer-parse/index.js index c172b546a..3ed1a4c46 100644 --- a/packages/puppeteer-parse/index.js +++ b/packages/puppeteer-parse/index.js @@ -333,6 +333,8 @@ async function fetchContent(req, res) { const source = req.body.source || 'parseContent'; const taskId = req.body.taskId; // taskId is used to update import status const urlStr = (req.query ? req.query.url : undefined) || (req.body ? req.body.url : undefined); + const locale = (req.query ? req.query.locale : undefined) || (req.body ? req.body.locale : undefined); + const timezone = (req.query ? req.query.timezone : undefined) || (req.body ? req.body.timezone : undefined); let logRecord = { url: urlStr, @@ -344,6 +346,8 @@ async function fetchContent(req, res) { state, labelsToAdd: labels, taskId: taskId, + locale, + timezone, }; console.info(`Article parsing request`, logRecord); @@ -374,7 +378,7 @@ async function fetchContent(req, res) { } if ((!content || !title) && contentType !== 'application/pdf') { - const result = await retrievePage(url, logRecord, functionStartTime); + const result = await retrievePage(url, logRecord, functionStartTime, locale, timezone); if (result && result.context) { context = result.context } if (result && result.page) { page = result.page } if (result && result.finalUrl) { finalUrl = result.finalUrl } @@ -522,7 +526,7 @@ function getUrl(urlStr) { return parsed.href; } -async function retrievePage(url, logRecord, functionStartTime) { +async function retrievePage(url, logRecord, functionStartTime, locale, timezone) { validateUrlString(url); const browser = await getBrowserPromise; @@ -536,6 +540,16 @@ async function retrievePage(url, logRecord, functionStartTime) { } await page.setUserAgent(userAgentForUrl(url)); + // set locale for the page + if (locale) { + await page.setExtraHTTPHeaders({ 'Accept-Language': locale }); + } + + // set timezone for the page + if (timezone) { + await page.emulateTimezone(timezone); + } + const client = await page.target().createCDPSession(); // intercept request when response headers was received