diff --git a/packages/content-handler/src/handlers/apple-news-handler.ts b/packages/content-handler/src/apple-news-handler.ts similarity index 81% rename from packages/content-handler/src/handlers/apple-news-handler.ts rename to packages/content-handler/src/apple-news-handler.ts index ab11bdc49..1958b3b8c 100644 --- a/packages/content-handler/src/handlers/apple-news-handler.ts +++ b/packages/content-handler/src/apple-news-handler.ts @@ -1,14 +1,14 @@ -import { ContentHandler, PreHandleResult } from '../index' +import { ContentHandler, PreHandleResult } from './index' import axios from 'axios' import { parseHTML } from 'linkedom' export class AppleNewsHandler extends ContentHandler { - shouldPreHandle(url: string, _dom: Document): boolean { + shouldPreHandle(url: string, dom?: Document): boolean { const u = new URL(url) return u.hostname === 'apple.news' } - async preHandle(url: string, _document: Document): Promise { + async preHandle(url: string, document?: Document): Promise { const MOBILE_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36' const response = await axios.get(url, { diff --git a/packages/content-handler/src/handlers/bloomberg-handler.ts b/packages/content-handler/src/bloomberg-handler.ts similarity index 83% rename from packages/content-handler/src/handlers/bloomberg-handler.ts rename to packages/content-handler/src/bloomberg-handler.ts index 8052e36e0..ac4fb23ed 100644 --- a/packages/content-handler/src/handlers/bloomberg-handler.ts +++ b/packages/content-handler/src/bloomberg-handler.ts @@ -1,15 +1,15 @@ -import { ContentHandler, PreHandleResult } from '../index' +import { ContentHandler, PreHandleResult } from './index' import axios from 'axios' import { parseHTML } from 'linkedom' class BloombergHandler extends ContentHandler { - shouldPreHandle(url: string, _dom: Document): boolean { + shouldPreHandle(url: string, dom?: Document): boolean { const BLOOMBERG_URL_MATCH = /https?:\/\/(www\.)?bloomberg.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)/ return BLOOMBERG_URL_MATCH.test(url.toString()) } - async preHandle(url: string, _document: Document): Promise { + async preHandle(url: string, document?: Document): Promise { console.log('prehandling bloomberg url', url) try { diff --git a/packages/content-handler/src/handlers/derstandard-handler.ts b/packages/content-handler/src/derstandard-handler.ts similarity index 80% rename from packages/content-handler/src/handlers/derstandard-handler.ts rename to packages/content-handler/src/derstandard-handler.ts index 1aae94213..1a1f50778 100644 --- a/packages/content-handler/src/handlers/derstandard-handler.ts +++ b/packages/content-handler/src/derstandard-handler.ts @@ -1,14 +1,14 @@ -import { ContentHandler, PreHandleResult } from '../index' +import { ContentHandler, PreHandleResult } from './index' import axios from 'axios' import { parseHTML } from 'linkedom' class DerstandardHandler extends ContentHandler { - shouldPreHandle(url: string, _dom: Document): boolean { + shouldPreHandle(url: string, dom?: Document): boolean { const u = new URL(url) return u.hostname === 'www.derstandard.at' } - async preHandle(url: string, _document: Document): Promise { + async preHandle(url: string, document?: Document): Promise { const response = await axios.get(url, { // set cookie to give consent to get the article headers: { diff --git a/packages/content-handler/src/handlers/image-handler.ts b/packages/content-handler/src/image-handler.ts similarity index 78% rename from packages/content-handler/src/handlers/image-handler.ts rename to packages/content-handler/src/image-handler.ts index 78447fe33..cb5e462c7 100644 --- a/packages/content-handler/src/handlers/image-handler.ts +++ b/packages/content-handler/src/image-handler.ts @@ -1,12 +1,12 @@ -import { ContentHandler, PreHandleResult } from '../index' +import { ContentHandler, PreHandleResult } from './index' class ImageHandler extends ContentHandler { - shouldPreHandle(url: string, _dom: Document): boolean { + shouldPreHandle(url: string, dom?: Document): boolean { const IMAGE_URL_PATTERN = /(https?:\/\/.*\.(?:jpg|jpeg|png|webp))/i return IMAGE_URL_PATTERN.test(url.toString()) } - async preHandle(url: string, _document: Document): Promise { + async preHandle(url: string, document?: Document): Promise { const title = url.toString().split('/').pop() || 'Image' const content = ` diff --git a/packages/content-handler/src/index.ts b/packages/content-handler/src/index.ts index 8f3cfbd81..bfb0214e4 100644 --- a/packages/content-handler/src/index.ts +++ b/packages/content-handler/src/index.ts @@ -31,11 +31,11 @@ export class ContentHandler { protected defaultUrl = 'NEWSLETTER_DEFAULT_URL' protected name = '' - shouldPreHandle(url: string, dom: Document): boolean { + shouldPreHandle(url: string, dom?: Document): boolean { return false } - preHandle(url: string, document: Document): Promise { + async preHandle(url: string, document?: Document): Promise { return Promise.resolve({ url, dom: document }) } diff --git a/packages/content-handler/src/handlers/medium-handler.ts b/packages/content-handler/src/medium-handler.ts similarity index 70% rename from packages/content-handler/src/handlers/medium-handler.ts rename to packages/content-handler/src/medium-handler.ts index 03cc1be8f..0b9d2fcb5 100644 --- a/packages/content-handler/src/handlers/medium-handler.ts +++ b/packages/content-handler/src/medium-handler.ts @@ -1,12 +1,12 @@ -import { ContentHandler, PreHandleResult } from '../index' +import { ContentHandler, PreHandleResult } from './index' class MediumHandler extends ContentHandler { - shouldPreHandle(url: string, _dom: Document): boolean { + shouldPreHandle(url: string, dom?: Document): boolean { const u = new URL(url) return u.hostname.endsWith('medium.com') } - async preHandle(url: string, _document: Document): Promise { + async preHandle(url: string, document?: Document): Promise { console.log('prehandling medium url', url) try { diff --git a/packages/content-handler/src/handlers/pdf-handler.ts b/packages/content-handler/src/pdf-handler.ts similarity index 57% rename from packages/content-handler/src/handlers/pdf-handler.ts rename to packages/content-handler/src/pdf-handler.ts index fb9b7e045..54df72bc2 100644 --- a/packages/content-handler/src/handlers/pdf-handler.ts +++ b/packages/content-handler/src/pdf-handler.ts @@ -1,13 +1,13 @@ -import { ContentHandler, PreHandleResult } from '../index' +import { ContentHandler, PreHandleResult } from './index' class PdfHandler extends ContentHandler { - shouldPreHandle(url: string, _dom: Document): boolean { + shouldPreHandle(url: string, dom?: Document): boolean { const u = new URL(url) const path = u.pathname.replace(u.search, '') return path.endsWith('.pdf') } - async preHandle(_url: string, _document: Document): Promise { + async preHandle(_url: string, document?: Document): Promise { return Promise.resolve({ contentType: 'application/pdf' }) } } diff --git a/packages/content-handler/src/handlers/scrapingBee-handler.ts b/packages/content-handler/src/scrapingBee-handler.ts similarity index 82% rename from packages/content-handler/src/handlers/scrapingBee-handler.ts rename to packages/content-handler/src/scrapingBee-handler.ts index c9cbc8afa..0b1b5984b 100644 --- a/packages/content-handler/src/handlers/scrapingBee-handler.ts +++ b/packages/content-handler/src/scrapingBee-handler.ts @@ -1,16 +1,16 @@ -import { ContentHandler, PreHandleResult } from '../index' +import { ContentHandler, PreHandleResult } from './index' import axios from 'axios' import { parseHTML } from 'linkedom' class ScrapingBeeHandler extends ContentHandler { - shouldPreHandle(url: string, _dom: Document): boolean { + shouldPreHandle(url: string, dom?: Document): boolean { const u = new URL(url) const hostnames = ['nytimes.com', 'news.google.com'] return hostnames.some((h) => u.hostname.endsWith(h)) } - async preHandle(url: string, _document: Document): Promise { + async preHandle(url: string, document?: Document): Promise { console.log('prehandling url with scrapingbee', url) try { diff --git a/packages/content-handler/src/handlers/t-dot-co-handler.ts b/packages/content-handler/src/t-dot-co-handler.ts similarity index 92% rename from packages/content-handler/src/handlers/t-dot-co-handler.ts rename to packages/content-handler/src/t-dot-co-handler.ts index 3a97849f3..41c90e4a5 100644 --- a/packages/content-handler/src/handlers/t-dot-co-handler.ts +++ b/packages/content-handler/src/t-dot-co-handler.ts @@ -1,4 +1,4 @@ -import { ContentHandler } from '../index' +import { ContentHandler } from './index' import axios from 'axios' class TDotCoHandler extends ContentHandler { diff --git a/packages/content-handler/src/handlers/twitter-handler.ts b/packages/content-handler/src/twitter-handler.ts similarity index 96% rename from packages/content-handler/src/handlers/twitter-handler.ts rename to packages/content-handler/src/twitter-handler.ts index e4234dcbc..10d9ead03 100644 --- a/packages/content-handler/src/handlers/twitter-handler.ts +++ b/packages/content-handler/src/twitter-handler.ts @@ -1,4 +1,4 @@ -import { ContentHandler, PreHandleResult } from '../index' +import { ContentHandler, PreHandleResult } from './index' import axios from 'axios' import { DateTime } from 'luxon' import _ from 'underscore' @@ -53,11 +53,11 @@ const formatTimestamp = (timestamp: string) => { } class TwitterHandler extends ContentHandler { - shouldPreHandle(url: string, _dom: Document): boolean { + shouldPreHandle(url: string, dom?: Document): boolean { return !!TWITTER_BEARER_TOKEN && TWITTER_URL_MATCH.test(url.toString()) } - async preHandle(url: string, _document: Document): Promise { + async preHandle(url: string, document?: Document): Promise { console.log('prehandling twitter url', url) const tweetId = tweetIdFromStatusUrl(url) diff --git a/packages/content-handler/src/handlers/youtube-handler.ts b/packages/content-handler/src/youtube-handler.ts similarity index 90% rename from packages/content-handler/src/handlers/youtube-handler.ts rename to packages/content-handler/src/youtube-handler.ts index 288f42aa0..0cbe0df57 100644 --- a/packages/content-handler/src/handlers/youtube-handler.ts +++ b/packages/content-handler/src/youtube-handler.ts @@ -1,11 +1,11 @@ -import { ContentHandler, PreHandleResult } from '../index' +import { ContentHandler, PreHandleResult } from './index' import axios from 'axios' import _ from 'underscore' const YOUTUBE_URL_MATCH = /^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w-]+\?v=|embed\/|v\/)?)([\w-]+)(\S+)?$/ -function getYoutubeVideoId(url: string) { +export const getYoutubeVideoId = (url: string) => { const u = new URL(url) const videoId = u.searchParams.get('v') if (!videoId) { @@ -19,11 +19,11 @@ function getYoutubeVideoId(url: string) { } class YoutubeHandler extends ContentHandler { - shouldPreHandle(url: string, _dom: Document): boolean { + shouldPreHandle(url: string, dom?: Document): boolean { return YOUTUBE_URL_MATCH.test(url.toString()) } - async preHandle(url: string, _document: Document): Promise { + async preHandle(url: string, document?: Document): Promise { const videoId = getYoutubeVideoId(url) if (!videoId) { return {} diff --git a/packages/content-handler/test/apple-news-handler.test.ts b/packages/content-handler/test/apple-news-handler.test.ts new file mode 100644 index 000000000..9af3be580 --- /dev/null +++ b/packages/content-handler/test/apple-news-handler.test.ts @@ -0,0 +1,10 @@ +import { AppleNewsHandler } from '../src/apple-news-handler' + +describe('open a simple web page', () => { + it('should return a response', async () => { + const response = await new AppleNewsHandler().preHandle( + 'https://apple.news/AxjzaZaPvSn23b67LhXI5EQ' + ) + console.log('response', response) + }) +}) diff --git a/packages/content-handler/test/stub.test.ts b/packages/content-handler/test/stub.test.ts deleted file mode 100644 index 173ca4917..000000000 --- a/packages/content-handler/test/stub.test.ts +++ /dev/null @@ -1,13 +0,0 @@ -import 'mocha' -import * as chai from 'chai' -import { expect } from 'chai' -import 'chai/register-should' -import chaiString from 'chai-string' - -chai.use(chaiString) - -describe('Stub test', () => { - it('should pass', () => { - expect(true).to.be.true - }) -}) diff --git a/packages/content-handler/test/youtube-handler.test.ts b/packages/content-handler/test/youtube-handler.test.ts new file mode 100644 index 000000000..f8c3a51ff --- /dev/null +++ b/packages/content-handler/test/youtube-handler.test.ts @@ -0,0 +1,25 @@ +import { expect } from 'chai' +import 'mocha' +import { getYoutubeVideoId } from '../src/youtube-handler' + +describe('getYoutubeVideoId', () => { + it('should parse video id out of a URL', async () => { + expect('BnSUk0je6oo').to.eq( + getYoutubeVideoId('https://www.youtube.com/watch?v=BnSUk0je6oo&t=269s') + ) + expect('vFD2gu007dc').to.eq( + getYoutubeVideoId( + 'https://www.youtube.com/watch?v=vFD2gu007dc&list=RDvFD2gu007dc&start_radio=1' + ) + ) + expect('vFD2gu007dc').to.eq( + getYoutubeVideoId('https://youtu.be/vFD2gu007dc') + ) + expect('BMFVCnbRaV4').to.eq( + getYoutubeVideoId('https://youtube.com/watch?v=BMFVCnbRaV4&feature=share') + ) + expect('cg9b4RC87LI').to.eq( + getYoutubeVideoId('https://youtu.be/cg9b4RC87LI?t=116') + ) + }) +}) diff --git a/yarn.lock b/yarn.lock index 3da81da9b..d1c9a65dd 100644 --- a/yarn.lock +++ b/yarn.lock @@ -10579,6 +10579,19 @@ chai@^4.3.4: pathval "^1.1.1" type-detect "^4.0.5" +chai@^4.3.6: + version "4.3.6" + resolved "https://registry.yarnpkg.com/chai/-/chai-4.3.6.tgz#ffe4ba2d9fa9d6680cc0b370adae709ec9011e9c" + integrity sha512-bbcp3YfHCUzMOvKqsztczerVgBKSsEijCySNlHHbX3VG1nskvqjz5Rfso1gGwD6w6oOV3eI60pKuMOV5MV7p3Q== + dependencies: + assertion-error "^1.1.0" + check-error "^1.0.2" + deep-eql "^3.0.1" + get-func-name "^2.0.0" + loupe "^2.3.1" + pathval "^1.1.1" + type-detect "^4.0.5" + chalk@^1.0.0, chalk@^1.1.3: version "1.1.3" resolved "https://registry.yarnpkg.com/chalk/-/chalk-1.1.3.tgz#a8115c55e4a702fe4d150abd3872822a7e09fc98" @@ -18065,6 +18078,13 @@ loose-envify@^1.0.0, loose-envify@^1.1.0, loose-envify@^1.4.0: dependencies: js-tokens "^3.0.0 || ^4.0.0" +loupe@^2.3.1: + version "2.3.4" + resolved "https://registry.yarnpkg.com/loupe/-/loupe-2.3.4.tgz#7e0b9bffc76f148f9be769cb1321d3dcf3cb25f3" + integrity sha512-OvKfgCC2Ndby6aSTREl5aCCPTNIzlDfQZvZxNUrBrihDhL3xcrYegTblhmEiCrg2kKQz4XsFIaemE5BF4ybSaQ== + dependencies: + get-func-name "^2.0.0" + lower-case-first@^1.0.0: version "1.0.2" resolved "https://registry.yarnpkg.com/lower-case-first/-/lower-case-first-1.0.2.tgz#e5da7c26f29a7073be02d52bac9980e5922adfa1"