feat: add thumbnail handler

This commit is contained in:
Hongbo Wu
2023-06-06 11:02:46 +08:00
parent a439aac7e7
commit e2315b5662
13 changed files with 412 additions and 0 deletions

View File

@ -164,6 +164,8 @@ export const savePage = async (
pageId = newPageId
}
// TODO: update thumbnail and pre-cache images
if (parseResult.highlightData) {
const highlight = {
updatedAt: new Date(),

View File

@ -0,0 +1,5 @@
node_modules
build
.env*
Dockerfile
.dockerignore

View File

@ -0,0 +1,2 @@
node_modules/
build/

View File

@ -0,0 +1,6 @@
{
"extends": "../../.eslintrc",
"parserOptions": {
"project": "tsconfig.json"
}
}

View File

@ -0,0 +1,16 @@
# This file specifies files that are *not* uploaded to Google Cloud Platform
# using gcloud. It follows the same syntax as .gitignore, with the addition of
# "#!include" directives (which insert the entries of the given .gitignore-style
# file at that point).
#
# For more information, run:
# $ gcloud topic gcloudignore
#
.gcloudignore
# If you would like to upload your .git directory, .gitignore file or files
# from your .gitignore file, remove the corresponding line
# below:
.git
.gitignore
node_modules

View File

@ -0,0 +1,26 @@
FROM node:14.18-alpine
# Run everything after as non-privileged user.
WORKDIR /app
COPY package.json .
COPY yarn.lock .
COPY tsconfig.json .
COPY .eslintrc .
COPY /packages/thumbnail-handler/package.json ./packages/thumbnail-handler/package.json
RUN yarn install --pure-lockfile
ADD /packages/thumbnail-handler ./packages/thumbnail-handler
RUN yarn workspace @omnivore/thumbnail-handler build
# After building, fetch the production dependencies
RUN rm -rf /app/packages/thumbnail-handler/node_modules
RUN rm -rf /app/node_modules
RUN yarn install --pure-lockfile --production
EXPOSE 8080
CMD ["yarn", "workspace", "@omnivore/thumbnail-handler", "start"]

View File

@ -0,0 +1,5 @@
{
"extension": ["ts"],
"spec": "test/**/*.test.ts",
"require": "test/babel-register.js"
}

View File

@ -0,0 +1,30 @@
{
"name": "@omnivore/thumbnail-handler",
"version": "1.0.0",
"main": "build/src/index.js",
"files": [
"build/src"
],
"license": "Apache-2.0",
"scripts": {
"test": "yarn mocha -r ts-node/register --config mocha-config.json",
"lint": "eslint src --ext ts,js,tsx,jsx",
"compile": "tsc",
"build": "tsc",
"start": "functions-framework --target=thumbnailHandler",
"dev": "concurrently \"tsc -w\" \"nodemon --watch ./build/ --exec npm run start\""
},
"devDependencies": {
"chai": "^4.3.6",
"eslint-plugin-prettier": "^4.0.0",
"mocha": "^10.0.0"
},
"dependencies": {
"@google-cloud/functions-framework": "3.1.2",
"@sentry/serverless": "^6.16.1",
"axios": "^1.4.0",
"image-size": "^1.0.2",
"jsonwebtoken": "^9.0.0",
"linkedom": "^0.14.26"
}
}

View File

@ -0,0 +1,262 @@
import * as Sentry from '@sentry/serverless'
import axios from 'axios'
import sizeOf from 'image-size'
import * as jwt from 'jsonwebtoken'
import { parseHTML } from 'linkedom'
import { promisify } from 'util'
interface ArticleResponse {
data: {
article: {
article: Page
}
}
}
interface Page {
id: string
content: string
image?: string
}
interface UpdatePageResponse {
data: {
updatePage: {
updatedPage: Page
}
}
}
interface ThumbnailRequest {
slug: string
}
Sentry.GCPFunction.init({
dsn: process.env.SENTRY_DSN,
tracesSampleRate: 0,
})
const signToken = promisify(jwt.sign)
const articleQuery = async (userId: string, slug: string): Promise<Page> => {
const JWT_SECRET = process.env.JWT_SECRET
const REST_BACKEND_ENDPOINT = process.env.REST_BACKEND_ENDPOINT
if (!JWT_SECRET || !REST_BACKEND_ENDPOINT) {
throw 'Environment not configured correctly'
}
const data = JSON.stringify({
query: `query article ($username: String!, $slug: String!){
article(username: $username, slug: $slug){
... on ArticleSuccess {
Article {
id
content
image
}
}
... on ArticleError {
errorCode
}
}
}`,
variables: {
username: 'me',
slug,
},
})
const auth = (await signToken({ uid: userId }, JWT_SECRET)) as string
const response = await axios.post<ArticleResponse>(
`${REST_BACKEND_ENDPOINT}/graphql`,
data,
{
headers: {
Cookie: `auth=${auth};`,
'Content-Type': 'application/json',
},
}
)
return response.data.data.article.article
}
const updatePageMutation = async (
userId: string,
pageId: string,
image: string
) => {
const JWT_SECRET = process.env.JWT_SECRET
const REST_BACKEND_ENDPOINT = process.env.REST_BACKEND_ENDPOINT
if (!JWT_SECRET || !REST_BACKEND_ENDPOINT) {
throw 'Environment not configured correctly'
}
const data = JSON.stringify({
query: `mutation UpdatePage ($input: UpdatePageInput!) {
updatePage(input: $input) {
... on UpdatePageSuccess {
updatedPage {
id
previewImage
}
}
... on UpdateError{
errorCodes
}
}
}`,
variables: {
input: {
pageId,
previewImage: image,
},
},
})
const auth = (await signToken({ uid: userId }, JWT_SECRET)) as string
const response = await axios.post<UpdatePageResponse>(
`${REST_BACKEND_ENDPOINT}/graphql`,
data,
{
headers: {
Cookie: `auth=${auth};`,
'Content-Type': 'application/json',
},
}
)
return !!response.data.data.updatePage
}
const isThumbnailRequest = (body: any): body is ThumbnailRequest => {
return 'slug' in body
}
const getImageSize = async (url: string): Promise<[number, number] | null> => {
try {
// get image file by url
const response = await axios.get(url, {
responseType: 'arraybuffer',
})
// eslint-disable-next-line @typescript-eslint/no-unsafe-argument
const buffer = Buffer.from(response.data, 'binary')
// get image size
const { width, height } = sizeOf(buffer)
if (!width || !height) {
return null
}
return [width, height]
} catch (e) {
console.log(e)
return null
}
}
// credit to https://github.com/reddit-archive/reddit/blob/753b17407e9a9dca09558526805922de24133d53/r2/r2/lib/media.py#L706
const findThumbnail = async (content: string): Promise<string | null> => {
const dom = parseHTML(content).document
// find the largest and squarest image as the thumbnail
// and pre-cache all images
const images = dom.querySelectorAll('img[src]')
if (!images || images.length === 0) {
return null
}
let thumbnail = null
let largestArea = 0
for await (const image of Array.from(images)) {
const src = image.getAttribute('src')
if (!src) {
continue
}
const size = await getImageSize(src)
if (!size) {
continue
}
let area = size[0] * size[1]
// ignore little images
if (area < 5000) {
console.debug('ignore little', src)
continue
}
// ignore excessively long/wide images
if (Math.max(...size) / Math.min(...size) > 1.5) {
console.debug('ignore dimensions', src)
continue
}
// penalize images with "sprite" in their name
if (src.toLowerCase().includes('sprite')) {
console.debug('penalizing sprite', src)
area /= 10
}
if (area > largestArea) {
largestArea = area
thumbnail = src
}
}
return thumbnail
}
/**
* request structure
* {
* userId: string
* slug: string
* }
*/
export const thumbnailHandler = Sentry.GCPFunction.wrapHttpFunction(
async (req, res) => {
const token = req.headers?.authorization
if (!token) {
return res.status(401).send('UNAUTHORIZED')
}
const { uid } = jwt.decode(token) as { uid: string }
if (!uid) {
return res.status(401).send('UNAUTHORIZED')
}
if (!isThumbnailRequest(req.body)) {
return res.status(400).send('BAD_REQUEST')
}
const { slug } = req.body
try {
const page = await articleQuery(uid, slug)
// find thumbnail from all images & pre-cache
const thumbnail = await findThumbnail(page.content)
if (!thumbnail) {
return res.status(200).send('NOT_FOUND')
}
// update page with thumbnail if not already set
if (page.image) {
return res.status(200).send('OK')
}
await updatePageMutation(uid, page.id, thumbnail)
res.send('ok')
} catch (e) {
console.error(e)
return res.status(500).send('INTERNAL_SERVER_ERROR')
}
}
)

View File

@ -0,0 +1,3 @@
const register = require('@babel/register').default
register({ extensions: ['.ts', '.tsx', '.js', '.jsx'] })

View File

@ -0,0 +1,8 @@
import 'mocha'
import { expect } from 'chai'
describe('stub test', () => {
it('should pass', () => {
expect(true).to.be.true
})
})

View File

@ -0,0 +1,8 @@
{
"extends": "./../../tsconfig.json",
"compilerOptions": {
"outDir": "build",
"rootDir": "."
},
"include": ["src"]
}

View File

@ -10426,6 +10426,15 @@ axios@^1.2.0, axios@^1.2.2:
form-data "^4.0.0"
proxy-from-env "^1.1.0"
axios@^1.4.0:
version "1.4.0"
resolved "https://registry.yarnpkg.com/axios/-/axios-1.4.0.tgz#38a7bf1224cd308de271146038b551d725f0be1f"
integrity sha512-S4XCWMEmzvo64T9GfvQDOXgYRDJ/wsSZc7Jvdgx5u1sd0JwsuPLqb3SYmusag+edF6ziyMensPVqLTSc1PiSEA==
dependencies:
follow-redirects "^1.15.0"
form-data "^4.0.0"
proxy-from-env "^1.1.0"
axobject-query@^2.2.0:
version "2.2.0"
resolved "https://registry.yarnpkg.com/axobject-query/-/axobject-query-2.2.0.tgz#943d47e10c0b704aa42275e20edf3722648989be"
@ -16662,6 +16671,13 @@ ignore@^5.2.0:
resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.2.0.tgz#6d3bac8fa7fe0d45d9f9be7bac2fc279577e345a"
integrity sha512-CmxgYGiEPCLhfLnpPp1MoRmifwEIOgjcHXxOBjv7mY96c+eWScsOP9c112ZyLdWHi0FxHjI+4uVhKYp/gcdRmQ==
image-size@^1.0.2:
version "1.0.2"
resolved "https://registry.yarnpkg.com/image-size/-/image-size-1.0.2.tgz#d778b6d0ab75b2737c1556dd631652eb963bc486"
integrity sha512-xfOoWjceHntRb3qFCrh5ZFORYH8XCdYpASltMhZ/Q0KZiOwjdE/Yl2QCiWdwD+lygV5bMCvauzgu5PxBX/Yerg==
dependencies:
queue "6.0.2"
immediate@~3.0.5:
version "3.0.6"
resolved "https://registry.yarnpkg.com/immediate/-/immediate-3.0.6.tgz#9db1dbd0faf8de6fbe0f5dd5e56bb606280de69b"
@ -18823,6 +18839,17 @@ linkedom@^0.14.21:
htmlparser2 "^8.0.1"
uhyphen "^0.1.0"
linkedom@^0.14.26:
version "0.14.26"
resolved "https://registry.yarnpkg.com/linkedom/-/linkedom-0.14.26.tgz#fd8ddaef1a052e1191fb2e881605a1a001409f3b"
integrity sha512-mK6TrydfFA7phrnp+1j57ycBwFI5bGSW6YXlw9acHoqF+mP/y+FooEYYyniOt5Ot57FSKB3iwmnuQ1UUyNLm5A==
dependencies:
css-select "^5.1.0"
cssom "^0.5.0"
html-escaper "^3.0.3"
htmlparser2 "^8.0.1"
uhyphen "^0.2.0"
linkedom@^0.14.9:
version "0.14.9"
resolved "https://registry.yarnpkg.com/linkedom/-/linkedom-0.14.9.tgz#34c6f15eddc809406f42d8ee48cd30b0222eccb0"
@ -23161,6 +23188,13 @@ querystring@^0.2.0:
resolved "https://registry.yarnpkg.com/querystring/-/querystring-0.2.1.tgz#40d77615bb09d16902a85c3e38aa8b5ed761c2dd"
integrity sha512-wkvS7mL/JMugcup3/rMitHmd9ecIGd2lhFhK9N3UUQ450h66d1r3Y9nvXzQAW1Lq+wyx61k/1pfKS5KuKiyEbg==
queue@6.0.2:
version "6.0.2"
resolved "https://registry.yarnpkg.com/queue/-/queue-6.0.2.tgz#b91525283e2315c7553d2efa18d83e76432fed65"
integrity sha512-iHZWu+q3IdFZFX36ro/lKBkSvfkztY5Y7HMiPlOUjhupPcG2JMfst2KKEpu5XndviX/3UhFbRngUPNKtgvtZiA==
dependencies:
inherits "~2.0.3"
quick-lru@^4.0.1:
version "4.0.1"
resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-4.0.1.tgz#5b8878f113a58217848c6482026c73e1ba57727f"
@ -26919,6 +26953,11 @@ uhyphen@^0.1.0:
resolved "https://registry.yarnpkg.com/uhyphen/-/uhyphen-0.1.0.tgz#3cc22afa790daa802b9f6789f3583108d5b4a08c"
integrity sha512-o0QVGuFg24FK765Qdd5kk0zU/U4dEsCtN/GSiwNI9i8xsSVtjIAOdTaVhLwZ1nrbWxFVMxNDDl+9fednsOMsBw==
uhyphen@^0.2.0:
version "0.2.0"
resolved "https://registry.yarnpkg.com/uhyphen/-/uhyphen-0.2.0.tgz#8fdf0623314486e020a3c00ee5cc7a12fe722b81"
integrity sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA==
uid-number@0.0.6:
version "0.0.6"
resolved "https://registry.yarnpkg.com/uid-number/-/uid-number-0.0.6.tgz#0ea10e8035e8eb5b8e4449f06da1c730663baa81"