Add the _matter_history importer
This commit is contained in:
32
packages/import-handler/src/matterHistory.ts
Normal file
32
packages/import-handler/src/matterHistory.ts
Normal file
@ -0,0 +1,32 @@
|
||||
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
|
||||
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
|
||||
/* eslint-disable @typescript-eslint/no-unsafe-call */
|
||||
/* eslint-disable @typescript-eslint/no-unsafe-argument */
|
||||
|
||||
import { parse } from '@fast-csv/parse'
|
||||
import { Stream } from 'stream'
|
||||
|
||||
export type UrlHandler = (url: URL) => Promise<void>
|
||||
|
||||
export const importMatterHistory = async (
|
||||
stream: Stream,
|
||||
handler: UrlHandler
|
||||
): Promise<number> => {
|
||||
const parser = parse({
|
||||
headers: true,
|
||||
strictColumnHandling: false,
|
||||
})
|
||||
stream.pipe(parser)
|
||||
|
||||
let count = 0
|
||||
for await (const row of parser) {
|
||||
try {
|
||||
const url = new URL(row['URL'])
|
||||
await handler(url)
|
||||
} catch (error) {
|
||||
console.log('invalid url', row, error)
|
||||
}
|
||||
count++
|
||||
}
|
||||
return count
|
||||
}
|
||||
@ -0,0 +1,2 @@
|
||||
Title,Author,Publisher,URL,Word Count,Saved,Read,Highlight Count,Last Interaction Date,File Id
|
||||
"The Only Crypto Story You Need, by Matt Levine",Matt Levine,Bloomberg,https://www.bloomberg.com/features/2022-the-crypto-story/,39138,TRUE,TRUE,2,2022-12-18 14:49:11,content_15530945.html
|
||||
|
23
packages/import-handler/test/matter/matter_importer.test.ts
Normal file
23
packages/import-handler/test/matter/matter_importer.test.ts
Normal file
@ -0,0 +1,23 @@
|
||||
import 'mocha'
|
||||
import * as chai from 'chai'
|
||||
import { expect } from 'chai'
|
||||
import chaiString from 'chai-string'
|
||||
import * as fs from 'fs'
|
||||
import { importMatterHistory } from '../../src/matterHistory'
|
||||
|
||||
chai.use(chaiString)
|
||||
|
||||
describe('Load a simple _matter_history file', () => {
|
||||
it('should find the URL of each row', async () => {
|
||||
const urls: URL[] = []
|
||||
const stream = fs.createReadStream('./test/matter/data/_matter_history.csv')
|
||||
const count = await importMatterHistory(stream, (url): Promise<void> => {
|
||||
urls.push(url)
|
||||
return Promise.resolve()
|
||||
})
|
||||
expect(count).to.equal(1)
|
||||
expect(urls).to.eql([
|
||||
new URL('https://www.bloomberg.com/features/2022-the-crypto-story/'),
|
||||
])
|
||||
})
|
||||
})
|
||||
Reference in New Issue
Block a user