Merge pull request #2478 from omnivore-app/fix/puppeteer-timezone

add optional locale and timezone to saveUrl request for puppeteer to use
This commit is contained in:
Hongbo Wu
2023-07-11 17:32:10 +08:00
committed by GitHub
8 changed files with 38 additions and 24 deletions

View File

@ -2275,8 +2275,10 @@ export type SaveSuccess = {
export type SaveUrlInput = {
clientRequestId: Scalars['ID'];
labels?: InputMaybe<Array<CreateLabelInput>>;
locale?: InputMaybe<Scalars['String']>;
source: Scalars['String'];
state?: InputMaybe<ArticleSavingRequestStatus>;
timezone?: InputMaybe<Scalars['String']>;
url: Scalars['String'];
};

View File

@ -1654,8 +1654,10 @@ type SaveSuccess {
input SaveUrlInput {
clientRequestId: ID!
labels: [CreateLabelInput!]
locale: String
source: String!
state: ArticleSavingRequestStatus
timezone: String
url: String!
}

View File

@ -570,6 +570,8 @@ const schema = gql`
clientRequestId: ID!
state: ArticleSavingRequestStatus
labels: [CreateLabelInput!]
locale: String
timezone: String
}
union SaveResult = SaveSuccess | SaveError

View File

@ -30,6 +30,8 @@ interface PageSaveRequest {
labels?: Label[]
priority?: 'low' | 'high'
user?: User | null
locale?: string
timezone?: string
}
const SAVING_CONTENT = 'Your link is being saved...'
@ -81,6 +83,8 @@ export const createPageSaveRequest = async ({
priority,
labels,
user,
locale,
timezone,
}: PageSaveRequest): Promise<ArticleSavingRequest> => {
try {
validateUrl(url)
@ -170,6 +174,8 @@ export const createPageSaveRequest = async ({
priority,
state: archivedAt ? ArticleSavingRequestStatus.Archived : undefined,
labels: labelsInput,
locale,
timezone,
})
return pageToArticleSavingRequest(user, page)

View File

@ -34,6 +34,8 @@ export const saveUrl = async (
archivedAt,
labels,
user,
locale: input.locale || undefined,
timezone: input.timezone || undefined,
})
return {

View File

@ -216,6 +216,8 @@ export const enqueueParseRequest = async ({
queue = env.queue.name,
state,
labels,
locale,
timezone,
}: {
url: string
userId: string
@ -224,6 +226,8 @@ export const enqueueParseRequest = async ({
queue?: string
state?: ArticleSavingRequestStatus
labels?: CreateLabelInput[]
locale?: string
timezone?: string
}): Promise<string> => {
const { GOOGLE_CLOUD_PROJECT } = process.env
const payload = {
@ -232,6 +236,8 @@ export const enqueueParseRequest = async ({
saveRequestId,
state,
labels,
locale,
timezone,
}
// If there is no Google Cloud Project Id exposed, it means that we are in local environment

View File

@ -333,6 +333,8 @@ async function fetchContent(req, res) {
const source = req.body.source || 'parseContent';
const taskId = req.body.taskId; // taskId is used to update import status
const urlStr = (req.query ? req.query.url : undefined) || (req.body ? req.body.url : undefined);
const locale = (req.query ? req.query.locale : undefined) || (req.body ? req.body.locale : undefined);
const timezone = (req.query ? req.query.timezone : undefined) || (req.body ? req.body.timezone : undefined);
let logRecord = {
url: urlStr,
@ -344,6 +346,8 @@ async function fetchContent(req, res) {
state,
labelsToAdd: labels,
taskId: taskId,
locale,
timezone,
};
console.info(`Article parsing request`, logRecord);
@ -374,7 +378,7 @@ async function fetchContent(req, res) {
}
if ((!content || !title) && contentType !== 'application/pdf') {
const result = await retrievePage(url, logRecord, functionStartTime);
const result = await retrievePage(url, logRecord, functionStartTime, locale, timezone);
if (result && result.context) { context = result.context }
if (result && result.page) { page = result.page }
if (result && result.finalUrl) { finalUrl = result.finalUrl }
@ -522,7 +526,7 @@ function getUrl(urlStr) {
return parsed.href;
}
async function retrievePage(url, logRecord, functionStartTime) {
async function retrievePage(url, logRecord, functionStartTime, locale, timezone) {
validateUrlString(url);
const browser = await getBrowserPromise;
@ -536,6 +540,16 @@ async function retrievePage(url, logRecord, functionStartTime) {
}
await page.setUserAgent(userAgentForUrl(url));
// set locale for the page
if (locale) {
await page.setExtraHTTPHeaders({ 'Accept-Language': locale });
}
// set timezone for the page
if (timezone) {
await page.emulateTimezone(timezone);
}
const client = await page.target().createCDPSession();
// intercept request when response headers was received
@ -747,15 +761,7 @@ async function retrieveHtml(page, logRecord) {
document.getElementById('px-block-form-wrapper')) {
return 'IS_BLOCKED'
}
// check if create_time is defined
if (typeof create_time !== 'undefined' && create_time) {
// create_time is a global variable set by WeChat when rendering the page
const date = new Date(create_time * 1000);
const dateNode = document.createElement('div');
dateNode.className = 'omnivore-published-date';
dateNode.innerHTML = date.toLocaleString();
document.body.appendChild(dateNode);
}
return document.documentElement.outerHTML;
}, iframes);
logRecord.puppeteerSuccess = true;

View File

@ -136,19 +136,7 @@
document.documentElement.appendChild(contentCopyEl);
Array.from(contentCopyEl.getElementsByTagName('*')).forEach(prepareContentPostItem);
try {
// check if create_time is defined
if (typeof create_time !== 'undefined' && create_time) {
// create_time is a global variable set by WeChat when rendering the page
const date = new Date(create_time * 1000);
const dateNode = document.createElement('div');
dateNode.className = 'omnivore-published-date';
dateNode.innerHTML = date.toLocaleString();
contentCopyEl.appendChild(dateNode);
}
} catch (e) {
console.log('Error while trying to add published date to WeChat post', e);
}
/*
* Grab head and body separately as using clone on entire document into a div
* removes the head and body tags while grabbing html in them. Instead we