Make pdf-handler subscribe to a pubsub notification for cloud storage
This commit is contained in:
@ -29,6 +29,7 @@
|
||||
"@google-cloud/functions-framework": "3.1.2",
|
||||
"@google-cloud/pubsub": "^3.4.1",
|
||||
"@google-cloud/storage": "^5.18.1",
|
||||
"@sentry/serverless": "^7.30.0",
|
||||
"axios": "^0.27.2",
|
||||
"concurrently": "^7.0.0",
|
||||
"pdfjs-dist": "^2.9.359"
|
||||
|
||||
@ -1,11 +1,13 @@
|
||||
import {
|
||||
EventFunction,
|
||||
CloudFunctionsContext,
|
||||
} from '@google-cloud/functions-framework/build/src/functions'
|
||||
import { GetSignedUrlConfig, Storage } from '@google-cloud/storage'
|
||||
import { PubSub } from '@google-cloud/pubsub'
|
||||
import { GetSignedUrlConfig, Storage } from '@google-cloud/storage'
|
||||
import * as Sentry from '@sentry/serverless'
|
||||
import { parsePdf } from './pdf'
|
||||
|
||||
Sentry.GCPFunction.init({
|
||||
dsn: process.env.SENTRY_DSN,
|
||||
tracesSampleRate: 0,
|
||||
})
|
||||
|
||||
const pubsub = new PubSub()
|
||||
const storage = new Storage()
|
||||
const CONTENT_UPDATE_TOPIC = 'updatePageContent'
|
||||
@ -16,18 +18,16 @@ interface StorageEventData {
|
||||
contentType: string
|
||||
}
|
||||
|
||||
function isStorageEventData(event: any): event is StorageEventData {
|
||||
return 'name' in event && 'bucket' in event && 'contentType' in event
|
||||
}
|
||||
|
||||
// Ensure this is a finalize event and that it is stored in the `u/` directory and is a PDF
|
||||
const shouldHandle = (data: StorageEventData, ctx: CloudFunctionsContext) => {
|
||||
if (ctx.eventType !== 'google.storage.object.finalize') {
|
||||
return false
|
||||
}
|
||||
if (
|
||||
!data.name.startsWith('u/') ||
|
||||
data.contentType.toLowerCase() != 'application/pdf'
|
||||
) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
const shouldHandle = (data: StorageEventData) => {
|
||||
return (
|
||||
data.name.startsWith('u/') &&
|
||||
data.contentType.toLowerCase() === 'application/pdf'
|
||||
)
|
||||
}
|
||||
|
||||
const getDocumentUrl = async (
|
||||
@ -69,37 +69,66 @@ export const updatePageContent = (
|
||||
})
|
||||
}
|
||||
|
||||
export const pdfHandler: EventFunction = async (event, context) => {
|
||||
const data = event as StorageEventData
|
||||
const ctx = context as CloudFunctionsContext
|
||||
|
||||
if (shouldHandle(data, ctx)) {
|
||||
console.log('handling pdf data', data)
|
||||
|
||||
const url = await getDocumentUrl(data)
|
||||
console.log('PDF url: ', url)
|
||||
if (!url) {
|
||||
console.log('Could not fetch PDF', data.bucket, data.name)
|
||||
return
|
||||
const getStorageEventData = (
|
||||
pubSubMessage: string
|
||||
): StorageEventData | undefined => {
|
||||
try {
|
||||
const str = Buffer.from(pubSubMessage, 'base64').toString().trim()
|
||||
const obj = JSON.parse(str) as unknown
|
||||
if (isStorageEventData(obj)) {
|
||||
return obj
|
||||
}
|
||||
|
||||
const parsed = await parsePdf(url)
|
||||
const res = await updatePageContent(
|
||||
data.name,
|
||||
parsed.content,
|
||||
parsed.title,
|
||||
parsed.author,
|
||||
parsed.description
|
||||
)
|
||||
console.log(
|
||||
'publish result',
|
||||
res,
|
||||
'title',
|
||||
parsed.title,
|
||||
'author',
|
||||
parsed.author
|
||||
)
|
||||
} else {
|
||||
console.log('not handling pdf data', data)
|
||||
} catch (err) {
|
||||
console.log('error deserializing event: ', { pubSubMessage, err })
|
||||
}
|
||||
return undefined
|
||||
}
|
||||
|
||||
export const pdfHandler = Sentry.GCPFunction.wrapHttpFunction(
|
||||
async (req, res) => {
|
||||
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
|
||||
if ('message' in req.body && 'data' in req.body.message) {
|
||||
const pubSubMessage = req.body.message.data as string
|
||||
const data = getStorageEventData(pubSubMessage)
|
||||
if (data) {
|
||||
try {
|
||||
if (shouldHandle(data)) {
|
||||
console.log('handling pdf data', data)
|
||||
|
||||
const url = await getDocumentUrl(data)
|
||||
console.log('PDF url: ', url)
|
||||
if (!url) {
|
||||
console.log('Could not fetch PDF', data.bucket, data.name)
|
||||
return res.status(404).send('Could not fetch PDF')
|
||||
}
|
||||
|
||||
const parsed = await parsePdf(url)
|
||||
const result = await updatePageContent(
|
||||
data.name,
|
||||
parsed.content,
|
||||
parsed.title,
|
||||
parsed.author,
|
||||
parsed.description
|
||||
)
|
||||
console.log(
|
||||
'publish result',
|
||||
result,
|
||||
'title',
|
||||
parsed.title,
|
||||
'author',
|
||||
parsed.author
|
||||
)
|
||||
} else {
|
||||
console.log('not handling pdf data', data)
|
||||
}
|
||||
} catch (err) {
|
||||
console.log('error handling event', { err, data })
|
||||
return res.status(500).send('Error handling event')
|
||||
}
|
||||
}
|
||||
} else {
|
||||
console.log('no pubsub message')
|
||||
}
|
||||
res.send('ok')
|
||||
}
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user