diff --git a/packages/api/src/routers/svc/integrations.ts b/packages/api/src/routers/svc/integrations.ts index ec6d20bef..981028576 100644 --- a/packages/api/src/routers/svc/integrations.ts +++ b/packages/api/src/routers/svc/integrations.ts @@ -238,7 +238,7 @@ export function integrationsServiceRouter() { // write the list of urls, state and labels to the stream const csvData = retrievedData.map((page) => { const { url, state, labels } = page - return [url, state, `[${labels?.join(',') || ''}]`].join(',') + return [url, state, `"[${labels?.join(',') || ''}]"`].join(',') }) writeStream.write(csvData.join('\n')) diff --git a/packages/import-handler/src/csv.ts b/packages/import-handler/src/csv.ts index c505cc39a..66835127c 100644 --- a/packages/import-handler/src/csv.ts +++ b/packages/import-handler/src/csv.ts @@ -13,7 +13,10 @@ export const importCsv = async (ctx: ImportContext, stream: Stream) => { for await (const row of parser) { try { const url = new URL(row[0]) - await ctx.urlHandler(ctx, url) + const state = row.length > 1 ? row[1] : undefined + // labels follows format: "[label1, label2]" + const labels = row.length > 2 ? row[2].slice(1, -1).split(',') : undefined + await ctx.urlHandler(ctx, url, state, labels) ctx.countImported += 1 } catch (error) { console.log('invalid url', row, error) diff --git a/packages/import-handler/src/index.ts b/packages/import-handler/src/index.ts index ded094b5d..209edd852 100644 --- a/packages/import-handler/src/index.ts +++ b/packages/import-handler/src/index.ts @@ -1,7 +1,7 @@ import { Storage } from '@google-cloud/storage' import { importCsv } from './csv' import * as path from 'path' -import { importMatterArchive, importMatterHistoryCsv } from './matterHistory' +import { importMatterArchive } from './matterHistory' import { Stream } from 'node:stream' import { v4 as uuid } from 'uuid' import { CONTENT_FETCH_URL, createCloudTask, emailUserUrl } from './task' @@ -13,6 +13,8 @@ import { Readability } from '@omnivore/readability' import * as Sentry from '@sentry/serverless' +export type RetrievedDataState = 'archived' | 'saved' | 'deleted' + Sentry.GCPFunction.init({ dsn: process.env.SENTRY_DSN, tracesSampleRate: 0, @@ -24,7 +26,12 @@ const storage = new Storage() const CONTENT_TYPES = ['text/csv', 'application/zip'] -export type UrlHandler = (ctx: ImportContext, url: URL) => Promise +export type UrlHandler = ( + ctx: ImportContext, + url: URL, + state?: RetrievedDataState, + labels?: string[] +) => Promise export type ContentHandler = ( ctx: ImportContext, url: URL, diff --git a/packages/import-handler/test/csv/csv.test.ts b/packages/import-handler/test/csv/csv.test.ts index 0f695d69e..8526d25d5 100644 --- a/packages/import-handler/test/csv/csv.test.ts +++ b/packages/import-handler/test/csv/csv.test.ts @@ -4,7 +4,7 @@ import { expect } from 'chai' import chaiString from 'chai-string' import * as fs from 'fs' import { importCsv } from '../../src/csv' -import { ImportContext } from '../../src' +import { ImportContext, RetrievedDataState } from '../../src' import { stubImportCtx } from '../util' chai.use(chaiString) @@ -28,3 +28,44 @@ describe('Load a simple CSV file', () => { ]) }) }) + +describe('Load a complex CSV file', () => { + it('should call the handler for each URL, state and labels', async () => { + const results: { + url: URL + state?: RetrievedDataState + labels?: string[] + }[] = [] + const stream = fs.createReadStream('./test/csv/data/complex.csv') + const stub = stubImportCtx() + stub.urlHandler = ( + ctx: ImportContext, + url, + state, + labels + ): Promise => { + results.push({ + url, + state, + labels, + }) + return Promise.resolve() + } + + await importCsv(stub, stream) + expect(stub.countFailed).to.equal(0) + expect(stub.countImported).to.equal(2) + expect(results).to.eql([ + { + url: new URL('https://omnivore.app'), + state: 'archived', + labels: ['test'], + }, + { + url: new URL('https://google.com'), + state: 'saved', + labels: ['test', 'development'], + }, + ]) + }) +}) diff --git a/packages/import-handler/test/csv/data/complex.csv b/packages/import-handler/test/csv/data/complex.csv new file mode 100644 index 000000000..b2aa9cd64 --- /dev/null +++ b/packages/import-handler/test/csv/data/complex.csv @@ -0,0 +1,2 @@ +"https://omnivore.app",archived,"[test]" +"https://google.com",saved,"[test,development]" diff --git a/packages/import-handler/test/util.ts b/packages/import-handler/test/util.ts index 58cd9dd10..68027ad33 100644 --- a/packages/import-handler/test/util.ts +++ b/packages/import-handler/test/util.ts @@ -1,12 +1,17 @@ import { Readability } from '@omnivore/readability' -import { ImportContext } from '../src' +import { ImportContext, RetrievedDataState } from '../src' export const stubImportCtx = () => { return { userId: '', countImported: 0, countFailed: 0, - urlHandler: (ctx: ImportContext, url: URL): Promise => { + urlHandler: ( + ctx: ImportContext, + url: URL, + state?: RetrievedDataState, + labels?: string[] + ): Promise => { return Promise.resolve() }, contentHandler: (