diff --git a/packages/import-handler/src/matterHistory.ts b/packages/import-handler/src/matterHistory.ts new file mode 100644 index 000000000..8342735b5 --- /dev/null +++ b/packages/import-handler/src/matterHistory.ts @@ -0,0 +1,32 @@ +/* eslint-disable @typescript-eslint/no-unsafe-member-access */ +/* eslint-disable @typescript-eslint/no-unsafe-assignment */ +/* eslint-disable @typescript-eslint/no-unsafe-call */ +/* eslint-disable @typescript-eslint/no-unsafe-argument */ + +import { parse } from '@fast-csv/parse' +import { Stream } from 'stream' + +export type UrlHandler = (url: URL) => Promise + +export const importMatterHistory = async ( + stream: Stream, + handler: UrlHandler +): Promise => { + const parser = parse({ + headers: true, + strictColumnHandling: false, + }) + stream.pipe(parser) + + let count = 0 + for await (const row of parser) { + try { + const url = new URL(row['URL']) + await handler(url) + } catch (error) { + console.log('invalid url', row, error) + } + count++ + } + return count +} diff --git a/packages/import-handler/test/matter/data/_matter_history.csv b/packages/import-handler/test/matter/data/_matter_history.csv new file mode 100644 index 000000000..d403bc393 --- /dev/null +++ b/packages/import-handler/test/matter/data/_matter_history.csv @@ -0,0 +1,2 @@ +Title,Author,Publisher,URL,Word Count,Saved,Read,Highlight Count,Last Interaction Date,File Id +"The Only Crypto Story You Need, by Matt Levine",Matt Levine,Bloomberg,https://www.bloomberg.com/features/2022-the-crypto-story/,39138,TRUE,TRUE,2,2022-12-18 14:49:11,content_15530945.html \ No newline at end of file diff --git a/packages/import-handler/test/matter/matter_importer.test.ts b/packages/import-handler/test/matter/matter_importer.test.ts new file mode 100644 index 000000000..6a3b24e98 --- /dev/null +++ b/packages/import-handler/test/matter/matter_importer.test.ts @@ -0,0 +1,23 @@ +import 'mocha' +import * as chai from 'chai' +import { expect } from 'chai' +import chaiString from 'chai-string' +import * as fs from 'fs' +import { importMatterHistory } from '../../src/matterHistory' + +chai.use(chaiString) + +describe('Load a simple _matter_history file', () => { + it('should find the URL of each row', async () => { + const urls: URL[] = [] + const stream = fs.createReadStream('./test/matter/data/_matter_history.csv') + const count = await importMatterHistory(stream, (url): Promise => { + urls.push(url) + return Promise.resolve() + }) + expect(count).to.equal(1) + expect(urls).to.eql([ + new URL('https://www.bloomberg.com/features/2022-the-crypto-story/'), + ]) + }) +})