Process labels and state in the csv file
This commit is contained in:
@ -238,7 +238,7 @@ export function integrationsServiceRouter() {
|
||||
// write the list of urls, state and labels to the stream
|
||||
const csvData = retrievedData.map((page) => {
|
||||
const { url, state, labels } = page
|
||||
return [url, state, `[${labels?.join(',') || ''}]`].join(',')
|
||||
return [url, state, `"[${labels?.join(',') || ''}]"`].join(',')
|
||||
})
|
||||
writeStream.write(csvData.join('\n'))
|
||||
|
||||
|
||||
@ -13,7 +13,10 @@ export const importCsv = async (ctx: ImportContext, stream: Stream) => {
|
||||
for await (const row of parser) {
|
||||
try {
|
||||
const url = new URL(row[0])
|
||||
await ctx.urlHandler(ctx, url)
|
||||
const state = row.length > 1 ? row[1] : undefined
|
||||
// labels follows format: "[label1, label2]"
|
||||
const labels = row.length > 2 ? row[2].slice(1, -1).split(',') : undefined
|
||||
await ctx.urlHandler(ctx, url, state, labels)
|
||||
ctx.countImported += 1
|
||||
} catch (error) {
|
||||
console.log('invalid url', row, error)
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import { Storage } from '@google-cloud/storage'
|
||||
import { importCsv } from './csv'
|
||||
import * as path from 'path'
|
||||
import { importMatterArchive, importMatterHistoryCsv } from './matterHistory'
|
||||
import { importMatterArchive } from './matterHistory'
|
||||
import { Stream } from 'node:stream'
|
||||
import { v4 as uuid } from 'uuid'
|
||||
import { CONTENT_FETCH_URL, createCloudTask, emailUserUrl } from './task'
|
||||
@ -13,6 +13,8 @@ import { Readability } from '@omnivore/readability'
|
||||
|
||||
import * as Sentry from '@sentry/serverless'
|
||||
|
||||
export type RetrievedDataState = 'archived' | 'saved' | 'deleted'
|
||||
|
||||
Sentry.GCPFunction.init({
|
||||
dsn: process.env.SENTRY_DSN,
|
||||
tracesSampleRate: 0,
|
||||
@ -24,7 +26,12 @@ const storage = new Storage()
|
||||
|
||||
const CONTENT_TYPES = ['text/csv', 'application/zip']
|
||||
|
||||
export type UrlHandler = (ctx: ImportContext, url: URL) => Promise<void>
|
||||
export type UrlHandler = (
|
||||
ctx: ImportContext,
|
||||
url: URL,
|
||||
state?: RetrievedDataState,
|
||||
labels?: string[]
|
||||
) => Promise<void>
|
||||
export type ContentHandler = (
|
||||
ctx: ImportContext,
|
||||
url: URL,
|
||||
|
||||
@ -4,7 +4,7 @@ import { expect } from 'chai'
|
||||
import chaiString from 'chai-string'
|
||||
import * as fs from 'fs'
|
||||
import { importCsv } from '../../src/csv'
|
||||
import { ImportContext } from '../../src'
|
||||
import { ImportContext, RetrievedDataState } from '../../src'
|
||||
import { stubImportCtx } from '../util'
|
||||
|
||||
chai.use(chaiString)
|
||||
@ -28,3 +28,44 @@ describe('Load a simple CSV file', () => {
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
describe('Load a complex CSV file', () => {
|
||||
it('should call the handler for each URL, state and labels', async () => {
|
||||
const results: {
|
||||
url: URL
|
||||
state?: RetrievedDataState
|
||||
labels?: string[]
|
||||
}[] = []
|
||||
const stream = fs.createReadStream('./test/csv/data/complex.csv')
|
||||
const stub = stubImportCtx()
|
||||
stub.urlHandler = (
|
||||
ctx: ImportContext,
|
||||
url,
|
||||
state,
|
||||
labels
|
||||
): Promise<void> => {
|
||||
results.push({
|
||||
url,
|
||||
state,
|
||||
labels,
|
||||
})
|
||||
return Promise.resolve()
|
||||
}
|
||||
|
||||
await importCsv(stub, stream)
|
||||
expect(stub.countFailed).to.equal(0)
|
||||
expect(stub.countImported).to.equal(2)
|
||||
expect(results).to.eql([
|
||||
{
|
||||
url: new URL('https://omnivore.app'),
|
||||
state: 'archived',
|
||||
labels: ['test'],
|
||||
},
|
||||
{
|
||||
url: new URL('https://google.com'),
|
||||
state: 'saved',
|
||||
labels: ['test', 'development'],
|
||||
},
|
||||
])
|
||||
})
|
||||
})
|
||||
|
||||
2
packages/import-handler/test/csv/data/complex.csv
Normal file
2
packages/import-handler/test/csv/data/complex.csv
Normal file
@ -0,0 +1,2 @@
|
||||
"https://omnivore.app",archived,"[test]"
|
||||
"https://google.com",saved,"[test,development]"
|
||||
|
@ -1,12 +1,17 @@
|
||||
import { Readability } from '@omnivore/readability'
|
||||
import { ImportContext } from '../src'
|
||||
import { ImportContext, RetrievedDataState } from '../src'
|
||||
|
||||
export const stubImportCtx = () => {
|
||||
return {
|
||||
userId: '',
|
||||
countImported: 0,
|
||||
countFailed: 0,
|
||||
urlHandler: (ctx: ImportContext, url: URL): Promise<void> => {
|
||||
urlHandler: (
|
||||
ctx: ImportContext,
|
||||
url: URL,
|
||||
state?: RetrievedDataState,
|
||||
labels?: string[]
|
||||
): Promise<void> => {
|
||||
return Promise.resolve()
|
||||
},
|
||||
contentHandler: (
|
||||
|
||||
Reference in New Issue
Block a user