From 0dc8724fcb67c3d731fd7d993154c9db46c901e8 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 20 Apr 2023 16:31:06 +0800 Subject: [PATCH] Make pdf-handler subscribe to a pubsub notification for cloud storage --- packages/pdf-handler/package.json | 1 + packages/pdf-handler/src/index.ts | 123 ++++++++++++++++++------------ 2 files changed, 77 insertions(+), 47 deletions(-) diff --git a/packages/pdf-handler/package.json b/packages/pdf-handler/package.json index 7fa9b3c91..4e083bff5 100644 --- a/packages/pdf-handler/package.json +++ b/packages/pdf-handler/package.json @@ -29,6 +29,7 @@ "@google-cloud/functions-framework": "3.1.2", "@google-cloud/pubsub": "^3.4.1", "@google-cloud/storage": "^5.18.1", + "@sentry/serverless": "^7.30.0", "axios": "^0.27.2", "concurrently": "^7.0.0", "pdfjs-dist": "^2.9.359" diff --git a/packages/pdf-handler/src/index.ts b/packages/pdf-handler/src/index.ts index 70783a1f3..efc2d11eb 100644 --- a/packages/pdf-handler/src/index.ts +++ b/packages/pdf-handler/src/index.ts @@ -1,11 +1,13 @@ -import { - EventFunction, - CloudFunctionsContext, -} from '@google-cloud/functions-framework/build/src/functions' -import { GetSignedUrlConfig, Storage } from '@google-cloud/storage' import { PubSub } from '@google-cloud/pubsub' +import { GetSignedUrlConfig, Storage } from '@google-cloud/storage' +import * as Sentry from '@sentry/serverless' import { parsePdf } from './pdf' +Sentry.GCPFunction.init({ + dsn: process.env.SENTRY_DSN, + tracesSampleRate: 0, +}) + const pubsub = new PubSub() const storage = new Storage() const CONTENT_UPDATE_TOPIC = 'updatePageContent' @@ -16,18 +18,16 @@ interface StorageEventData { contentType: string } +function isStorageEventData(event: any): event is StorageEventData { + return 'name' in event && 'bucket' in event && 'contentType' in event +} + // Ensure this is a finalize event and that it is stored in the `u/` directory and is a PDF -const shouldHandle = (data: StorageEventData, ctx: CloudFunctionsContext) => { - if (ctx.eventType !== 'google.storage.object.finalize') { - return false - } - if ( - !data.name.startsWith('u/') || - data.contentType.toLowerCase() != 'application/pdf' - ) { - return false - } - return true +const shouldHandle = (data: StorageEventData) => { + return ( + data.name.startsWith('u/') && + data.contentType.toLowerCase() === 'application/pdf' + ) } const getDocumentUrl = async ( @@ -69,37 +69,66 @@ export const updatePageContent = ( }) } -export const pdfHandler: EventFunction = async (event, context) => { - const data = event as StorageEventData - const ctx = context as CloudFunctionsContext - - if (shouldHandle(data, ctx)) { - console.log('handling pdf data', data) - - const url = await getDocumentUrl(data) - console.log('PDF url: ', url) - if (!url) { - console.log('Could not fetch PDF', data.bucket, data.name) - return +const getStorageEventData = ( + pubSubMessage: string +): StorageEventData | undefined => { + try { + const str = Buffer.from(pubSubMessage, 'base64').toString().trim() + const obj = JSON.parse(str) as unknown + if (isStorageEventData(obj)) { + return obj } - - const parsed = await parsePdf(url) - const res = await updatePageContent( - data.name, - parsed.content, - parsed.title, - parsed.author, - parsed.description - ) - console.log( - 'publish result', - res, - 'title', - parsed.title, - 'author', - parsed.author - ) - } else { - console.log('not handling pdf data', data) + } catch (err) { + console.log('error deserializing event: ', { pubSubMessage, err }) } + return undefined } + +export const pdfHandler = Sentry.GCPFunction.wrapHttpFunction( + async (req, res) => { + /* eslint-disable @typescript-eslint/no-unsafe-member-access */ + if ('message' in req.body && 'data' in req.body.message) { + const pubSubMessage = req.body.message.data as string + const data = getStorageEventData(pubSubMessage) + if (data) { + try { + if (shouldHandle(data)) { + console.log('handling pdf data', data) + + const url = await getDocumentUrl(data) + console.log('PDF url: ', url) + if (!url) { + console.log('Could not fetch PDF', data.bucket, data.name) + return res.status(404).send('Could not fetch PDF') + } + + const parsed = await parsePdf(url) + const result = await updatePageContent( + data.name, + parsed.content, + parsed.title, + parsed.author, + parsed.description + ) + console.log( + 'publish result', + result, + 'title', + parsed.title, + 'author', + parsed.author + ) + } else { + console.log('not handling pdf data', data) + } + } catch (err) { + console.log('error handling event', { err, data }) + return res.status(500).send('Error handling event') + } + } + } else { + console.log('no pubsub message') + } + res.send('ok') + } +)