add optional locale and timezone to saveUrl request for puppeteer to use

This commit is contained in:
Hongbo Wu
2023-07-11 16:15:32 +08:00
parent 4cb05acfe0
commit 1f283e6122
7 changed files with 36 additions and 2 deletions

View File

@ -2266,8 +2266,10 @@ export type SaveSuccess = {
export type SaveUrlInput = {
clientRequestId: Scalars['ID'];
labels?: InputMaybe<Array<CreateLabelInput>>;
locale?: InputMaybe<Scalars['String']>;
source: Scalars['String'];
state?: InputMaybe<ArticleSavingRequestStatus>;
timezone?: InputMaybe<Scalars['String']>;
url: Scalars['String'];
};

View File

@ -1652,8 +1652,10 @@ type SaveSuccess {
input SaveUrlInput {
clientRequestId: ID!
labels: [CreateLabelInput!]
locale: String
source: String!
state: ArticleSavingRequestStatus
timezone: String
url: String!
}

View File

@ -569,6 +569,8 @@ const schema = gql`
clientRequestId: ID!
state: ArticleSavingRequestStatus
labels: [CreateLabelInput!]
locale: String
timezone: String
}
union SaveResult = SaveSuccess | SaveError

View File

@ -30,6 +30,8 @@ interface PageSaveRequest {
labels?: Label[]
priority?: 'low' | 'high'
user?: User | null
locale?: string
timezone?: string
}
const SAVING_CONTENT = 'Your link is being saved...'
@ -81,6 +83,8 @@ export const createPageSaveRequest = async ({
priority,
labels,
user,
locale,
timezone,
}: PageSaveRequest): Promise<ArticleSavingRequest> => {
try {
validateUrl(url)
@ -170,6 +174,8 @@ export const createPageSaveRequest = async ({
priority,
state: archivedAt ? ArticleSavingRequestStatus.Archived : undefined,
labels: labelsInput,
locale,
timezone,
})
return pageToArticleSavingRequest(user, page)

View File

@ -34,6 +34,8 @@ export const saveUrl = async (
archivedAt,
labels,
user,
locale: input.locale || undefined,
timezone: input.timezone || undefined,
})
return {

View File

@ -215,6 +215,8 @@ export const enqueueParseRequest = async ({
queue = env.queue.name,
state,
labels,
locale,
timezone,
}: {
url: string
userId: string
@ -223,6 +225,8 @@ export const enqueueParseRequest = async ({
queue?: string
state?: ArticleSavingRequestStatus
labels?: CreateLabelInput[]
locale?: string
timezone?: string
}): Promise<string> => {
const { GOOGLE_CLOUD_PROJECT } = process.env
const payload = {
@ -231,6 +235,8 @@ export const enqueueParseRequest = async ({
saveRequestId,
state,
labels,
locale,
timezone,
}
// If there is no Google Cloud Project Id exposed, it means that we are in local environment

View File

@ -333,6 +333,8 @@ async function fetchContent(req, res) {
const source = req.body.source || 'parseContent';
const taskId = req.body.taskId; // taskId is used to update import status
const urlStr = (req.query ? req.query.url : undefined) || (req.body ? req.body.url : undefined);
const locale = (req.query ? req.query.locale : undefined) || (req.body ? req.body.locale : undefined);
const timezone = (req.query ? req.query.timezone : undefined) || (req.body ? req.body.timezone : undefined);
let logRecord = {
url: urlStr,
@ -344,6 +346,8 @@ async function fetchContent(req, res) {
state,
labelsToAdd: labels,
taskId: taskId,
locale,
timezone,
};
console.info(`Article parsing request`, logRecord);
@ -374,7 +378,7 @@ async function fetchContent(req, res) {
}
if ((!content || !title) && contentType !== 'application/pdf') {
const result = await retrievePage(url, logRecord, functionStartTime);
const result = await retrievePage(url, logRecord, functionStartTime, locale, timezone);
if (result && result.context) { context = result.context }
if (result && result.page) { page = result.page }
if (result && result.finalUrl) { finalUrl = result.finalUrl }
@ -522,7 +526,7 @@ function getUrl(urlStr) {
return parsed.href;
}
async function retrievePage(url, logRecord, functionStartTime) {
async function retrievePage(url, logRecord, functionStartTime, locale, timezone) {
validateUrlString(url);
const browser = await getBrowserPromise;
@ -536,6 +540,16 @@ async function retrievePage(url, logRecord, functionStartTime) {
}
await page.setUserAgent(userAgentForUrl(url));
// set locale for the page
if (locale) {
await page.setExtraHTTPHeaders({ 'Accept-Language': locale });
}
// set timezone for the page
if (timezone) {
await page.emulateTimezone(timezone);
}
const client = await page.target().createCDPSession();
// intercept request when response headers was received