Add the _matter_history importer

This commit is contained in:
Jackson Harper
2023-01-03 18:13:59 +08:00
parent 0edd91057e
commit 9213d222c4
3 changed files with 57 additions and 0 deletions

View File

@ -0,0 +1,32 @@
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
/* eslint-disable @typescript-eslint/no-unsafe-call */
/* eslint-disable @typescript-eslint/no-unsafe-argument */
import { parse } from '@fast-csv/parse'
import { Stream } from 'stream'
export type UrlHandler = (url: URL) => Promise<void>
export const importMatterHistory = async (
stream: Stream,
handler: UrlHandler
): Promise<number> => {
const parser = parse({
headers: true,
strictColumnHandling: false,
})
stream.pipe(parser)
let count = 0
for await (const row of parser) {
try {
const url = new URL(row['URL'])
await handler(url)
} catch (error) {
console.log('invalid url', row, error)
}
count++
}
return count
}

View File

@ -0,0 +1,2 @@
Title,Author,Publisher,URL,Word Count,Saved,Read,Highlight Count,Last Interaction Date,File Id
"The Only Crypto Story You Need, by Matt Levine",Matt Levine,Bloomberg,https://www.bloomberg.com/features/2022-the-crypto-story/,39138,TRUE,TRUE,2,2022-12-18 14:49:11,content_15530945.html
1 Title Author Publisher URL Word Count Saved Read Highlight Count Last Interaction Date File Id
2 The Only Crypto Story You Need, by Matt Levine Matt Levine Bloomberg https://www.bloomberg.com/features/2022-the-crypto-story/ 39138 TRUE TRUE 2 2022-12-18 14:49:11 content_15530945.html

View File

@ -0,0 +1,23 @@
import 'mocha'
import * as chai from 'chai'
import { expect } from 'chai'
import chaiString from 'chai-string'
import * as fs from 'fs'
import { importMatterHistory } from '../../src/matterHistory'
chai.use(chaiString)
describe('Load a simple _matter_history file', () => {
it('should find the URL of each row', async () => {
const urls: URL[] = []
const stream = fs.createReadStream('./test/matter/data/_matter_history.csv')
const count = await importMatterHistory(stream, (url): Promise<void> => {
urls.push(url)
return Promise.resolve()
})
expect(count).to.equal(1)
expect(urls).to.eql([
new URL('https://www.bloomberg.com/features/2022-the-crypto-story/'),
])
})
})