diff --git a/packages/api/src/services/save_page.ts b/packages/api/src/services/save_page.ts index 4a43f6e3f..0d92b63c0 100644 --- a/packages/api/src/services/save_page.ts +++ b/packages/api/src/services/save_page.ts @@ -164,6 +164,8 @@ export const savePage = async ( pageId = newPageId } + // TODO: update thumbnail and pre-cache images + if (parseResult.highlightData) { const highlight = { updatedAt: new Date(), diff --git a/packages/thumbnail-handler/.dockerignore b/packages/thumbnail-handler/.dockerignore new file mode 100644 index 000000000..d8aea4ee6 --- /dev/null +++ b/packages/thumbnail-handler/.dockerignore @@ -0,0 +1,5 @@ +node_modules +build +.env* +Dockerfile +.dockerignore diff --git a/packages/thumbnail-handler/.eslintignore b/packages/thumbnail-handler/.eslintignore new file mode 100644 index 000000000..b38db2f29 --- /dev/null +++ b/packages/thumbnail-handler/.eslintignore @@ -0,0 +1,2 @@ +node_modules/ +build/ diff --git a/packages/thumbnail-handler/.eslintrc b/packages/thumbnail-handler/.eslintrc new file mode 100644 index 000000000..e006282a6 --- /dev/null +++ b/packages/thumbnail-handler/.eslintrc @@ -0,0 +1,6 @@ +{ + "extends": "../../.eslintrc", + "parserOptions": { + "project": "tsconfig.json" + } +} \ No newline at end of file diff --git a/packages/thumbnail-handler/.gcloudignore b/packages/thumbnail-handler/.gcloudignore new file mode 100644 index 000000000..ccc4eb240 --- /dev/null +++ b/packages/thumbnail-handler/.gcloudignore @@ -0,0 +1,16 @@ +# This file specifies files that are *not* uploaded to Google Cloud Platform +# using gcloud. It follows the same syntax as .gitignore, with the addition of +# "#!include" directives (which insert the entries of the given .gitignore-style +# file at that point). +# +# For more information, run: +# $ gcloud topic gcloudignore +# +.gcloudignore +# If you would like to upload your .git directory, .gitignore file or files +# from your .gitignore file, remove the corresponding line +# below: +.git +.gitignore + +node_modules diff --git a/packages/thumbnail-handler/Dockerfile b/packages/thumbnail-handler/Dockerfile new file mode 100644 index 000000000..1ba49d688 --- /dev/null +++ b/packages/thumbnail-handler/Dockerfile @@ -0,0 +1,26 @@ +FROM node:14.18-alpine + +# Run everything after as non-privileged user. +WORKDIR /app + +COPY package.json . +COPY yarn.lock . +COPY tsconfig.json . +COPY .eslintrc . + +COPY /packages/thumbnail-handler/package.json ./packages/thumbnail-handler/package.json + +RUN yarn install --pure-lockfile + +ADD /packages/thumbnail-handler ./packages/thumbnail-handler +RUN yarn workspace @omnivore/thumbnail-handler build + +# After building, fetch the production dependencies +RUN rm -rf /app/packages/thumbnail-handler/node_modules +RUN rm -rf /app/node_modules +RUN yarn install --pure-lockfile --production + +EXPOSE 8080 + +CMD ["yarn", "workspace", "@omnivore/thumbnail-handler", "start"] + diff --git a/packages/thumbnail-handler/mocha-config.json b/packages/thumbnail-handler/mocha-config.json new file mode 100644 index 000000000..44d1d24c1 --- /dev/null +++ b/packages/thumbnail-handler/mocha-config.json @@ -0,0 +1,5 @@ +{ + "extension": ["ts"], + "spec": "test/**/*.test.ts", + "require": "test/babel-register.js" + } \ No newline at end of file diff --git a/packages/thumbnail-handler/package.json b/packages/thumbnail-handler/package.json new file mode 100644 index 000000000..9dbcb53bb --- /dev/null +++ b/packages/thumbnail-handler/package.json @@ -0,0 +1,30 @@ +{ + "name": "@omnivore/thumbnail-handler", + "version": "1.0.0", + "main": "build/src/index.js", + "files": [ + "build/src" + ], + "license": "Apache-2.0", + "scripts": { + "test": "yarn mocha -r ts-node/register --config mocha-config.json", + "lint": "eslint src --ext ts,js,tsx,jsx", + "compile": "tsc", + "build": "tsc", + "start": "functions-framework --target=thumbnailHandler", + "dev": "concurrently \"tsc -w\" \"nodemon --watch ./build/ --exec npm run start\"" + }, + "devDependencies": { + "chai": "^4.3.6", + "eslint-plugin-prettier": "^4.0.0", + "mocha": "^10.0.0" + }, + "dependencies": { + "@google-cloud/functions-framework": "3.1.2", + "@sentry/serverless": "^6.16.1", + "axios": "^1.4.0", + "image-size": "^1.0.2", + "jsonwebtoken": "^9.0.0", + "linkedom": "^0.14.26" + } +} diff --git a/packages/thumbnail-handler/src/index.ts b/packages/thumbnail-handler/src/index.ts new file mode 100644 index 000000000..abfabb66d --- /dev/null +++ b/packages/thumbnail-handler/src/index.ts @@ -0,0 +1,262 @@ +import * as Sentry from '@sentry/serverless' +import axios from 'axios' +import sizeOf from 'image-size' +import * as jwt from 'jsonwebtoken' +import { parseHTML } from 'linkedom' +import { promisify } from 'util' + +interface ArticleResponse { + data: { + article: { + article: Page + } + } +} + +interface Page { + id: string + content: string + image?: string +} + +interface UpdatePageResponse { + data: { + updatePage: { + updatedPage: Page + } + } +} + +interface ThumbnailRequest { + slug: string +} + +Sentry.GCPFunction.init({ + dsn: process.env.SENTRY_DSN, + tracesSampleRate: 0, +}) + +const signToken = promisify(jwt.sign) + +const articleQuery = async (userId: string, slug: string): Promise => { + const JWT_SECRET = process.env.JWT_SECRET + const REST_BACKEND_ENDPOINT = process.env.REST_BACKEND_ENDPOINT + + if (!JWT_SECRET || !REST_BACKEND_ENDPOINT) { + throw 'Environment not configured correctly' + } + + const data = JSON.stringify({ + query: `query article ($username: String!, $slug: String!){ + article(username: $username, slug: $slug){ + ... on ArticleSuccess { + Article { + id + content + image + } + } + ... on ArticleError { + errorCode + } + } + }`, + variables: { + username: 'me', + slug, + }, + }) + const auth = (await signToken({ uid: userId }, JWT_SECRET)) as string + + const response = await axios.post( + `${REST_BACKEND_ENDPOINT}/graphql`, + data, + { + headers: { + Cookie: `auth=${auth};`, + 'Content-Type': 'application/json', + }, + } + ) + + return response.data.data.article.article +} + +const updatePageMutation = async ( + userId: string, + pageId: string, + image: string +) => { + const JWT_SECRET = process.env.JWT_SECRET + const REST_BACKEND_ENDPOINT = process.env.REST_BACKEND_ENDPOINT + + if (!JWT_SECRET || !REST_BACKEND_ENDPOINT) { + throw 'Environment not configured correctly' + } + + const data = JSON.stringify({ + query: `mutation UpdatePage ($input: UpdatePageInput!) { + updatePage(input: $input) { + ... on UpdatePageSuccess { + updatedPage { + id + previewImage + } + } + ... on UpdateError{ + errorCodes + } + } + }`, + variables: { + input: { + pageId, + previewImage: image, + }, + }, + }) + + const auth = (await signToken({ uid: userId }, JWT_SECRET)) as string + const response = await axios.post( + `${REST_BACKEND_ENDPOINT}/graphql`, + data, + { + headers: { + Cookie: `auth=${auth};`, + 'Content-Type': 'application/json', + }, + } + ) + + return !!response.data.data.updatePage +} + +const isThumbnailRequest = (body: any): body is ThumbnailRequest => { + return 'slug' in body +} + +const getImageSize = async (url: string): Promise<[number, number] | null> => { + try { + // get image file by url + const response = await axios.get(url, { + responseType: 'arraybuffer', + }) + + // eslint-disable-next-line @typescript-eslint/no-unsafe-argument + const buffer = Buffer.from(response.data, 'binary') + + // get image size + const { width, height } = sizeOf(buffer) + + if (!width || !height) { + return null + } + + return [width, height] + } catch (e) { + console.log(e) + return null + } +} + +// credit to https://github.com/reddit-archive/reddit/blob/753b17407e9a9dca09558526805922de24133d53/r2/r2/lib/media.py#L706 +const findThumbnail = async (content: string): Promise => { + const dom = parseHTML(content).document + + // find the largest and squarest image as the thumbnail + // and pre-cache all images + const images = dom.querySelectorAll('img[src]') + if (!images || images.length === 0) { + return null + } + + let thumbnail = null + let largestArea = 0 + for await (const image of Array.from(images)) { + const src = image.getAttribute('src') + if (!src) { + continue + } + + const size = await getImageSize(src) + if (!size) { + continue + } + + let area = size[0] * size[1] + + // ignore little images + if (area < 5000) { + console.debug('ignore little', src) + continue + } + + // ignore excessively long/wide images + if (Math.max(...size) / Math.min(...size) > 1.5) { + console.debug('ignore dimensions', src) + continue + } + + // penalize images with "sprite" in their name + if (src.toLowerCase().includes('sprite')) { + console.debug('penalizing sprite', src) + area /= 10 + } + + if (area > largestArea) { + largestArea = area + thumbnail = src + } + } + + return thumbnail +} + +/** + * request structure + * { + * userId: string + * slug: string + * } + */ + +export const thumbnailHandler = Sentry.GCPFunction.wrapHttpFunction( + async (req, res) => { + const token = req.headers?.authorization + if (!token) { + return res.status(401).send('UNAUTHORIZED') + } + const { uid } = jwt.decode(token) as { uid: string } + if (!uid) { + return res.status(401).send('UNAUTHORIZED') + } + + if (!isThumbnailRequest(req.body)) { + return res.status(400).send('BAD_REQUEST') + } + + const { slug } = req.body + + try { + const page = await articleQuery(uid, slug) + + // find thumbnail from all images & pre-cache + const thumbnail = await findThumbnail(page.content) + if (!thumbnail) { + return res.status(200).send('NOT_FOUND') + } + + // update page with thumbnail if not already set + if (page.image) { + return res.status(200).send('OK') + } + + await updatePageMutation(uid, page.id, thumbnail) + + res.send('ok') + } catch (e) { + console.error(e) + return res.status(500).send('INTERNAL_SERVER_ERROR') + } + } +) diff --git a/packages/thumbnail-handler/test/babel-register.js b/packages/thumbnail-handler/test/babel-register.js new file mode 100644 index 000000000..a6f65f60a --- /dev/null +++ b/packages/thumbnail-handler/test/babel-register.js @@ -0,0 +1,3 @@ +const register = require('@babel/register').default + +register({ extensions: ['.ts', '.tsx', '.js', '.jsx'] }) diff --git a/packages/thumbnail-handler/test/stub.test.ts b/packages/thumbnail-handler/test/stub.test.ts new file mode 100644 index 000000000..24ad25c8f --- /dev/null +++ b/packages/thumbnail-handler/test/stub.test.ts @@ -0,0 +1,8 @@ +import 'mocha' +import { expect } from 'chai' + +describe('stub test', () => { + it('should pass', () => { + expect(true).to.be.true + }) +}) diff --git a/packages/thumbnail-handler/tsconfig.json b/packages/thumbnail-handler/tsconfig.json new file mode 100644 index 000000000..7ebe093f6 --- /dev/null +++ b/packages/thumbnail-handler/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "./../../tsconfig.json", + "compilerOptions": { + "outDir": "build", + "rootDir": "." + }, + "include": ["src"] +} diff --git a/yarn.lock b/yarn.lock index 339090cfe..0a239f537 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10426,6 +10426,15 @@ axios@^1.2.0, axios@^1.2.2: form-data "^4.0.0" proxy-from-env "^1.1.0" +axios@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/axios/-/axios-1.4.0.tgz#38a7bf1224cd308de271146038b551d725f0be1f" + integrity sha512-S4XCWMEmzvo64T9GfvQDOXgYRDJ/wsSZc7Jvdgx5u1sd0JwsuPLqb3SYmusag+edF6ziyMensPVqLTSc1PiSEA== + dependencies: + follow-redirects "^1.15.0" + form-data "^4.0.0" + proxy-from-env "^1.1.0" + axobject-query@^2.2.0: version "2.2.0" resolved "https://registry.yarnpkg.com/axobject-query/-/axobject-query-2.2.0.tgz#943d47e10c0b704aa42275e20edf3722648989be" @@ -16662,6 +16671,13 @@ ignore@^5.2.0: resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.2.0.tgz#6d3bac8fa7fe0d45d9f9be7bac2fc279577e345a" integrity sha512-CmxgYGiEPCLhfLnpPp1MoRmifwEIOgjcHXxOBjv7mY96c+eWScsOP9c112ZyLdWHi0FxHjI+4uVhKYp/gcdRmQ== +image-size@^1.0.2: + version "1.0.2" + resolved "https://registry.yarnpkg.com/image-size/-/image-size-1.0.2.tgz#d778b6d0ab75b2737c1556dd631652eb963bc486" + integrity sha512-xfOoWjceHntRb3qFCrh5ZFORYH8XCdYpASltMhZ/Q0KZiOwjdE/Yl2QCiWdwD+lygV5bMCvauzgu5PxBX/Yerg== + dependencies: + queue "6.0.2" + immediate@~3.0.5: version "3.0.6" resolved "https://registry.yarnpkg.com/immediate/-/immediate-3.0.6.tgz#9db1dbd0faf8de6fbe0f5dd5e56bb606280de69b" @@ -18823,6 +18839,17 @@ linkedom@^0.14.21: htmlparser2 "^8.0.1" uhyphen "^0.1.0" +linkedom@^0.14.26: + version "0.14.26" + resolved "https://registry.yarnpkg.com/linkedom/-/linkedom-0.14.26.tgz#fd8ddaef1a052e1191fb2e881605a1a001409f3b" + integrity sha512-mK6TrydfFA7phrnp+1j57ycBwFI5bGSW6YXlw9acHoqF+mP/y+FooEYYyniOt5Ot57FSKB3iwmnuQ1UUyNLm5A== + dependencies: + css-select "^5.1.0" + cssom "^0.5.0" + html-escaper "^3.0.3" + htmlparser2 "^8.0.1" + uhyphen "^0.2.0" + linkedom@^0.14.9: version "0.14.9" resolved "https://registry.yarnpkg.com/linkedom/-/linkedom-0.14.9.tgz#34c6f15eddc809406f42d8ee48cd30b0222eccb0" @@ -23161,6 +23188,13 @@ querystring@^0.2.0: resolved "https://registry.yarnpkg.com/querystring/-/querystring-0.2.1.tgz#40d77615bb09d16902a85c3e38aa8b5ed761c2dd" integrity sha512-wkvS7mL/JMugcup3/rMitHmd9ecIGd2lhFhK9N3UUQ450h66d1r3Y9nvXzQAW1Lq+wyx61k/1pfKS5KuKiyEbg== +queue@6.0.2: + version "6.0.2" + resolved "https://registry.yarnpkg.com/queue/-/queue-6.0.2.tgz#b91525283e2315c7553d2efa18d83e76432fed65" + integrity sha512-iHZWu+q3IdFZFX36ro/lKBkSvfkztY5Y7HMiPlOUjhupPcG2JMfst2KKEpu5XndviX/3UhFbRngUPNKtgvtZiA== + dependencies: + inherits "~2.0.3" + quick-lru@^4.0.1: version "4.0.1" resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-4.0.1.tgz#5b8878f113a58217848c6482026c73e1ba57727f" @@ -26919,6 +26953,11 @@ uhyphen@^0.1.0: resolved "https://registry.yarnpkg.com/uhyphen/-/uhyphen-0.1.0.tgz#3cc22afa790daa802b9f6789f3583108d5b4a08c" integrity sha512-o0QVGuFg24FK765Qdd5kk0zU/U4dEsCtN/GSiwNI9i8xsSVtjIAOdTaVhLwZ1nrbWxFVMxNDDl+9fednsOMsBw== +uhyphen@^0.2.0: + version "0.2.0" + resolved "https://registry.yarnpkg.com/uhyphen/-/uhyphen-0.2.0.tgz#8fdf0623314486e020a3c00ee5cc7a12fe722b81" + integrity sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA== + uid-number@0.0.6: version "0.0.6" resolved "https://registry.yarnpkg.com/uid-number/-/uid-number-0.0.6.tgz#0ea10e8035e8eb5b8e4449f06da1c730663baa81"