Add tests

This commit is contained in:
Hongbo Wu
2022-09-29 16:22:51 +08:00
parent 5fdb8b337d
commit 8fb398eae4
15 changed files with 86 additions and 44 deletions

View File

@ -1,14 +1,14 @@
import { ContentHandler, PreHandleResult } from '../index'
import { ContentHandler, PreHandleResult } from './index'
import axios from 'axios'
import { parseHTML } from 'linkedom'
export class AppleNewsHandler extends ContentHandler {
shouldPreHandle(url: string, _dom: Document): boolean {
shouldPreHandle(url: string, dom?: Document): boolean {
const u = new URL(url)
return u.hostname === 'apple.news'
}
async preHandle(url: string, _document: Document): Promise<PreHandleResult> {
async preHandle(url: string, document?: Document): Promise<PreHandleResult> {
const MOBILE_USER_AGENT =
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36'
const response = await axios.get(url, {

View File

@ -1,15 +1,15 @@
import { ContentHandler, PreHandleResult } from '../index'
import { ContentHandler, PreHandleResult } from './index'
import axios from 'axios'
import { parseHTML } from 'linkedom'
class BloombergHandler extends ContentHandler {
shouldPreHandle(url: string, _dom: Document): boolean {
shouldPreHandle(url: string, dom?: Document): boolean {
const BLOOMBERG_URL_MATCH =
/https?:\/\/(www\.)?bloomberg.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)/
return BLOOMBERG_URL_MATCH.test(url.toString())
}
async preHandle(url: string, _document: Document): Promise<PreHandleResult> {
async preHandle(url: string, document?: Document): Promise<PreHandleResult> {
console.log('prehandling bloomberg url', url)
try {

View File

@ -1,14 +1,14 @@
import { ContentHandler, PreHandleResult } from '../index'
import { ContentHandler, PreHandleResult } from './index'
import axios from 'axios'
import { parseHTML } from 'linkedom'
class DerstandardHandler extends ContentHandler {
shouldPreHandle(url: string, _dom: Document): boolean {
shouldPreHandle(url: string, dom?: Document): boolean {
const u = new URL(url)
return u.hostname === 'www.derstandard.at'
}
async preHandle(url: string, _document: Document): Promise<PreHandleResult> {
async preHandle(url: string, document?: Document): Promise<PreHandleResult> {
const response = await axios.get(url, {
// set cookie to give consent to get the article
headers: {

View File

@ -1,12 +1,12 @@
import { ContentHandler, PreHandleResult } from '../index'
import { ContentHandler, PreHandleResult } from './index'
class ImageHandler extends ContentHandler {
shouldPreHandle(url: string, _dom: Document): boolean {
shouldPreHandle(url: string, dom?: Document): boolean {
const IMAGE_URL_PATTERN = /(https?:\/\/.*\.(?:jpg|jpeg|png|webp))/i
return IMAGE_URL_PATTERN.test(url.toString())
}
async preHandle(url: string, _document: Document): Promise<PreHandleResult> {
async preHandle(url: string, document?: Document): Promise<PreHandleResult> {
const title = url.toString().split('/').pop() || 'Image'
const content = `
<html>

View File

@ -31,11 +31,11 @@ export class ContentHandler {
protected defaultUrl = 'NEWSLETTER_DEFAULT_URL'
protected name = ''
shouldPreHandle(url: string, dom: Document): boolean {
shouldPreHandle(url: string, dom?: Document): boolean {
return false
}
preHandle(url: string, document: Document): Promise<PreHandleResult> {
async preHandle(url: string, document?: Document): Promise<PreHandleResult> {
return Promise.resolve({ url, dom: document })
}

View File

@ -1,12 +1,12 @@
import { ContentHandler, PreHandleResult } from '../index'
import { ContentHandler, PreHandleResult } from './index'
class MediumHandler extends ContentHandler {
shouldPreHandle(url: string, _dom: Document): boolean {
shouldPreHandle(url: string, dom?: Document): boolean {
const u = new URL(url)
return u.hostname.endsWith('medium.com')
}
async preHandle(url: string, _document: Document): Promise<PreHandleResult> {
async preHandle(url: string, document?: Document): Promise<PreHandleResult> {
console.log('prehandling medium url', url)
try {

View File

@ -1,13 +1,13 @@
import { ContentHandler, PreHandleResult } from '../index'
import { ContentHandler, PreHandleResult } from './index'
class PdfHandler extends ContentHandler {
shouldPreHandle(url: string, _dom: Document): boolean {
shouldPreHandle(url: string, dom?: Document): boolean {
const u = new URL(url)
const path = u.pathname.replace(u.search, '')
return path.endsWith('.pdf')
}
async preHandle(_url: string, _document: Document): Promise<PreHandleResult> {
async preHandle(_url: string, document?: Document): Promise<PreHandleResult> {
return Promise.resolve({ contentType: 'application/pdf' })
}
}

View File

@ -1,16 +1,16 @@
import { ContentHandler, PreHandleResult } from '../index'
import { ContentHandler, PreHandleResult } from './index'
import axios from 'axios'
import { parseHTML } from 'linkedom'
class ScrapingBeeHandler extends ContentHandler {
shouldPreHandle(url: string, _dom: Document): boolean {
shouldPreHandle(url: string, dom?: Document): boolean {
const u = new URL(url)
const hostnames = ['nytimes.com', 'news.google.com']
return hostnames.some((h) => u.hostname.endsWith(h))
}
async preHandle(url: string, _document: Document): Promise<PreHandleResult> {
async preHandle(url: string, document?: Document): Promise<PreHandleResult> {
console.log('prehandling url with scrapingbee', url)
try {

View File

@ -1,4 +1,4 @@
import { ContentHandler } from '../index'
import { ContentHandler } from './index'
import axios from 'axios'
class TDotCoHandler extends ContentHandler {

View File

@ -1,4 +1,4 @@
import { ContentHandler, PreHandleResult } from '../index'
import { ContentHandler, PreHandleResult } from './index'
import axios from 'axios'
import { DateTime } from 'luxon'
import _ from 'underscore'
@ -53,11 +53,11 @@ const formatTimestamp = (timestamp: string) => {
}
class TwitterHandler extends ContentHandler {
shouldPreHandle(url: string, _dom: Document): boolean {
shouldPreHandle(url: string, dom?: Document): boolean {
return !!TWITTER_BEARER_TOKEN && TWITTER_URL_MATCH.test(url.toString())
}
async preHandle(url: string, _document: Document): Promise<PreHandleResult> {
async preHandle(url: string, document?: Document): Promise<PreHandleResult> {
console.log('prehandling twitter url', url)
const tweetId = tweetIdFromStatusUrl(url)

View File

@ -1,11 +1,11 @@
import { ContentHandler, PreHandleResult } from '../index'
import { ContentHandler, PreHandleResult } from './index'
import axios from 'axios'
import _ from 'underscore'
const YOUTUBE_URL_MATCH =
/^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w-]+\?v=|embed\/|v\/)?)([\w-]+)(\S+)?$/
function getYoutubeVideoId(url: string) {
export const getYoutubeVideoId = (url: string) => {
const u = new URL(url)
const videoId = u.searchParams.get('v')
if (!videoId) {
@ -19,11 +19,11 @@ function getYoutubeVideoId(url: string) {
}
class YoutubeHandler extends ContentHandler {
shouldPreHandle(url: string, _dom: Document): boolean {
shouldPreHandle(url: string, dom?: Document): boolean {
return YOUTUBE_URL_MATCH.test(url.toString())
}
async preHandle(url: string, _document: Document): Promise<PreHandleResult> {
async preHandle(url: string, document?: Document): Promise<PreHandleResult> {
const videoId = getYoutubeVideoId(url)
if (!videoId) {
return {}

View File

@ -0,0 +1,10 @@
import { AppleNewsHandler } from '../src/apple-news-handler'
describe('open a simple web page', () => {
it('should return a response', async () => {
const response = await new AppleNewsHandler().preHandle(
'https://apple.news/AxjzaZaPvSn23b67LhXI5EQ'
)
console.log('response', response)
})
})

View File

@ -1,13 +0,0 @@
import 'mocha'
import * as chai from 'chai'
import { expect } from 'chai'
import 'chai/register-should'
import chaiString from 'chai-string'
chai.use(chaiString)
describe('Stub test', () => {
it('should pass', () => {
expect(true).to.be.true
})
})

View File

@ -0,0 +1,25 @@
import { expect } from 'chai'
import 'mocha'
import { getYoutubeVideoId } from '../src/youtube-handler'
describe('getYoutubeVideoId', () => {
it('should parse video id out of a URL', async () => {
expect('BnSUk0je6oo').to.eq(
getYoutubeVideoId('https://www.youtube.com/watch?v=BnSUk0je6oo&t=269s')
)
expect('vFD2gu007dc').to.eq(
getYoutubeVideoId(
'https://www.youtube.com/watch?v=vFD2gu007dc&list=RDvFD2gu007dc&start_radio=1'
)
)
expect('vFD2gu007dc').to.eq(
getYoutubeVideoId('https://youtu.be/vFD2gu007dc')
)
expect('BMFVCnbRaV4').to.eq(
getYoutubeVideoId('https://youtube.com/watch?v=BMFVCnbRaV4&feature=share')
)
expect('cg9b4RC87LI').to.eq(
getYoutubeVideoId('https://youtu.be/cg9b4RC87LI?t=116')
)
})
})

View File

@ -10579,6 +10579,19 @@ chai@^4.3.4:
pathval "^1.1.1"
type-detect "^4.0.5"
chai@^4.3.6:
version "4.3.6"
resolved "https://registry.yarnpkg.com/chai/-/chai-4.3.6.tgz#ffe4ba2d9fa9d6680cc0b370adae709ec9011e9c"
integrity sha512-bbcp3YfHCUzMOvKqsztczerVgBKSsEijCySNlHHbX3VG1nskvqjz5Rfso1gGwD6w6oOV3eI60pKuMOV5MV7p3Q==
dependencies:
assertion-error "^1.1.0"
check-error "^1.0.2"
deep-eql "^3.0.1"
get-func-name "^2.0.0"
loupe "^2.3.1"
pathval "^1.1.1"
type-detect "^4.0.5"
chalk@^1.0.0, chalk@^1.1.3:
version "1.1.3"
resolved "https://registry.yarnpkg.com/chalk/-/chalk-1.1.3.tgz#a8115c55e4a702fe4d150abd3872822a7e09fc98"
@ -18065,6 +18078,13 @@ loose-envify@^1.0.0, loose-envify@^1.1.0, loose-envify@^1.4.0:
dependencies:
js-tokens "^3.0.0 || ^4.0.0"
loupe@^2.3.1:
version "2.3.4"
resolved "https://registry.yarnpkg.com/loupe/-/loupe-2.3.4.tgz#7e0b9bffc76f148f9be769cb1321d3dcf3cb25f3"
integrity sha512-OvKfgCC2Ndby6aSTREl5aCCPTNIzlDfQZvZxNUrBrihDhL3xcrYegTblhmEiCrg2kKQz4XsFIaemE5BF4ybSaQ==
dependencies:
get-func-name "^2.0.0"
lower-case-first@^1.0.0:
version "1.0.2"
resolved "https://registry.yarnpkg.com/lower-case-first/-/lower-case-first-1.0.2.tgz#e5da7c26f29a7073be02d52bac9980e5922adfa1"