From a9607adfd30a0ffec61d97d1bc06b0cbb186b79a Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 3 Oct 2022 11:11:24 +0800 Subject: [PATCH 01/54] Import content-handler as local dependency --- packages/content-fetch/Dockerfile | 12 +- packages/content-fetch/Dockerfile-local | 1 + packages/content-fetch/package.json | 4 +- packages/inbound-email-handler/package.json | 2 +- .../content-handler/.eslintignore | 1 + .../puppeteer-parse/content-handler/.eslintrc | 6 + .../content-handler/.gitignore | 2 + .../content-handler/.npmignore | 7 + .../content-handler/mocha-config.json | 5 + .../content-handler/package.json | 34 ++++ .../content-handler/src/content-handler.ts | 175 ++++++++++++++++ .../content-handler/src/index.ts | 116 +++++++++++ .../src/newsletters/axios-handler.ts | 46 +++++ .../src/newsletters/beehiiv-handler.ts | 43 ++++ .../bloomberg-newsletter-handler.ts | 37 ++++ .../src/newsletters/convertkit-handler.ts | 41 ++++ .../src/newsletters/golang-handler.ts | 27 +++ .../src/newsletters/morning-brew-handler.ts | 35 ++++ .../src/newsletters/revue-handler.ts | 46 +++++ .../src/newsletters/substack-handler.ts | 90 +++++++++ .../src/websites/apple-news-handler.ts | 31 +++ .../src/websites/bloomberg-handler.ts | 41 ++++ .../src/websites/derstandard-handler.ts | 34 ++++ .../src/websites/image-handler.ts | 32 +++ .../src/websites/medium-handler.ts | 26 +++ .../src/websites/pdf-handler.ts | 18 ++ .../src/websites/scrapingBee-handler.ts | 38 ++++ .../src/websites/t-dot-co-handler.ts | 26 +++ .../src/websites/twitter-handler.ts | 167 +++++++++++++++ .../src/websites/wikipedia-handler.ts | 20 ++ .../src/websites/youtube-handler.ts | 76 +++++++ .../test/apple-news-handler.test.ts | 10 + .../content-handler/test/babel-register.js | 3 + .../test/data/beehiiv-newsletter.html | 15 ++ .../data/substack-forwarded-newsletter.html | 1 + .../substack-forwarded-welcome-email.html | 1 + ...substack-private-forwarded-newsletter.html | 2 + .../content-handler/test/newsletter.test.ts | 191 ++++++++++++++++++ .../test/youtube-handler.test.ts | 25 +++ .../content-handler/tsconfig.json | 10 + packages/puppeteer-parse/package.json | 1 + yarn.lock | 11 + 42 files changed, 1503 insertions(+), 6 deletions(-) create mode 100644 packages/puppeteer-parse/content-handler/.eslintignore create mode 100644 packages/puppeteer-parse/content-handler/.eslintrc create mode 100644 packages/puppeteer-parse/content-handler/.gitignore create mode 100644 packages/puppeteer-parse/content-handler/.npmignore create mode 100644 packages/puppeteer-parse/content-handler/mocha-config.json create mode 100644 packages/puppeteer-parse/content-handler/package.json create mode 100644 packages/puppeteer-parse/content-handler/src/content-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/index.ts create mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/axios-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/beehiiv-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/bloomberg-newsletter-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/convertkit-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/golang-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/morning-brew-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/revue-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/substack-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/websites/apple-news-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/websites/bloomberg-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/websites/derstandard-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/websites/image-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/websites/medium-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/websites/pdf-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/websites/scrapingBee-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/websites/t-dot-co-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/websites/twitter-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/websites/wikipedia-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/src/websites/youtube-handler.ts create mode 100644 packages/puppeteer-parse/content-handler/test/apple-news-handler.test.ts create mode 100644 packages/puppeteer-parse/content-handler/test/babel-register.js create mode 100644 packages/puppeteer-parse/content-handler/test/data/beehiiv-newsletter.html create mode 100644 packages/puppeteer-parse/content-handler/test/data/substack-forwarded-newsletter.html create mode 100644 packages/puppeteer-parse/content-handler/test/data/substack-forwarded-welcome-email.html create mode 100644 packages/puppeteer-parse/content-handler/test/data/substack-private-forwarded-newsletter.html create mode 100644 packages/puppeteer-parse/content-handler/test/newsletter.test.ts create mode 100644 packages/puppeteer-parse/content-handler/test/youtube-handler.test.ts create mode 100644 packages/puppeteer-parse/content-handler/tsconfig.json diff --git a/packages/content-fetch/Dockerfile b/packages/content-fetch/Dockerfile index b05320b08..fd025629c 100644 --- a/packages/content-fetch/Dockerfile +++ b/packages/content-fetch/Dockerfile @@ -23,12 +23,18 @@ WORKDIR /app ENV CHROMIUM_PATH /usr/bin/chromium-browser ENV LAUNCH_HEADLESS=true -COPY . /app/ -WORKDIR app +COPY package.json . +COPY yarn.lock . +COPY tsconfig.json . +COPY .prettierrc . +COPY .eslintrc . + +COPY /packages/content-fetch ./packages/content-fetch +COPY /packages/content-handler ./packages/content-handler RUN yarn install --pure-lockfile EXPOSE 8080 -ENTRYPOINT ["yarn", "start"] +CMD ["yarn", "workspace", "@omnivore/content-fetch", "start"] diff --git a/packages/content-fetch/Dockerfile-local b/packages/content-fetch/Dockerfile-local index 383011f10..505e4c2da 100644 --- a/packages/content-fetch/Dockerfile-local +++ b/packages/content-fetch/Dockerfile-local @@ -34,6 +34,7 @@ COPY .prettierrc . COPY .eslintrc . COPY /packages/content-fetch ./packages/content-fetch +COPY /packages/content-handler ./packages/content-handler RUN yarn install --pure-lockfile diff --git a/packages/content-fetch/package.json b/packages/content-fetch/package.json index 3df85a237..70915c84c 100644 --- a/packages/content-fetch/package.json +++ b/packages/content-fetch/package.json @@ -4,6 +4,7 @@ "description": "Service that fetches page content from a URL", "main": "index.js", "dependencies": { + "@omnivore/content-handler": "file:./../content-handler", "axios": "^0.27.2", "dotenv": "^8.2.0", "express": "^4.17.1", @@ -11,8 +12,7 @@ "linkedom": "^0.14.9", "luxon": "^2.3.1", "puppeteer-core": "^16.1.0", - "underscore": "^1.13.4", - "@omnivore/content-handler": "1.0.0" + "underscore": "^1.13.4" }, "scripts": { "start": "node app.js", diff --git a/packages/inbound-email-handler/package.json b/packages/inbound-email-handler/package.json index ed22a170c..b11c89e31 100644 --- a/packages/inbound-email-handler/package.json +++ b/packages/inbound-email-handler/package.json @@ -29,9 +29,9 @@ "dependencies": { "@google-cloud/functions-framework": "3.1.2", "@google-cloud/pubsub": "^2.18.4", + "@omnivore/content-handler": "file:./../content-handler", "@sendgrid/client": "^7.6.0", "@sentry/serverless": "^6.16.1", - "@omnivore/content-handler": "1.0.0", "addressparser": "^1.0.1", "axios": "^0.27.2", "jsonwebtoken": "^8.5.1", diff --git a/packages/puppeteer-parse/content-handler/.eslintignore b/packages/puppeteer-parse/content-handler/.eslintignore new file mode 100644 index 000000000..c2658d7d1 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/.eslintignore @@ -0,0 +1 @@ +node_modules/ diff --git a/packages/puppeteer-parse/content-handler/.eslintrc b/packages/puppeteer-parse/content-handler/.eslintrc new file mode 100644 index 000000000..e006282a6 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/.eslintrc @@ -0,0 +1,6 @@ +{ + "extends": "../../.eslintrc", + "parserOptions": { + "project": "tsconfig.json" + } +} \ No newline at end of file diff --git a/packages/puppeteer-parse/content-handler/.gitignore b/packages/puppeteer-parse/content-handler/.gitignore new file mode 100644 index 000000000..0ae7e5c9e --- /dev/null +++ b/packages/puppeteer-parse/content-handler/.gitignore @@ -0,0 +1,2 @@ +node_modules +/lib diff --git a/packages/puppeteer-parse/content-handler/.npmignore b/packages/puppeteer-parse/content-handler/.npmignore new file mode 100644 index 000000000..b5e2b8569 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/.npmignore @@ -0,0 +1,7 @@ +/test/ +src +tsconfig.json +.eslintrc +.eslintignore +.gitignore +mocha-config.json diff --git a/packages/puppeteer-parse/content-handler/mocha-config.json b/packages/puppeteer-parse/content-handler/mocha-config.json new file mode 100644 index 000000000..44d1d24c1 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/mocha-config.json @@ -0,0 +1,5 @@ +{ + "extension": ["ts"], + "spec": "test/**/*.test.ts", + "require": "test/babel-register.js" + } \ No newline at end of file diff --git a/packages/puppeteer-parse/content-handler/package.json b/packages/puppeteer-parse/content-handler/package.json new file mode 100644 index 000000000..e4021b3e4 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/package.json @@ -0,0 +1,34 @@ +{ + "name": "@omnivore/content-handler", + "version": "1.0.0", + "description": "A standalone version of content handler to parse and format each type of content", + "main": "build/src/index.js", + "types": "build/src/index.d.ts", + "files": [ + "build/src" + ], + "license": "Apache-2.0", + "scripts": { + "test": "yarn mocha -r ts-node/register --config mocha-config.json", + "lint": "eslint src --ext ts,js,tsx,jsx", + "compile": "tsc", + "build": "tsc" + }, + "devDependencies": { + "chai": "^4.3.6", + "chai-as-promised": "^7.1.1", + "chai-string": "^1.5.0", + "eslint-plugin-prettier": "^4.0.0", + "mocha": "^10.0.0", + "nock": "^13.2.9" + }, + "dependencies": { + "addressparser": "^1.0.1", + "axios": "^0.27.2", + "linkedom": "^0.14.16", + "luxon": "^3.0.4", + "rfc2047": "^4.0.1", + "underscore": "^1.13.6", + "uuid": "^9.0.0" + } +} diff --git a/packages/puppeteer-parse/content-handler/src/content-handler.ts b/packages/puppeteer-parse/content-handler/src/content-handler.ts new file mode 100644 index 000000000..22216fabe --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/content-handler.ts @@ -0,0 +1,175 @@ +import addressparser from 'addressparser' +import rfc2047 from 'rfc2047' +import { v4 as uuid } from 'uuid' +import { parseHTML } from 'linkedom' +import axios from 'axios' + +interface Unsubscribe { + mailTo?: string + httpUrl?: string +} + +export interface NewsletterInput { + postHeader: string + from: string + unSubHeader: string + email: string + html: string + title: string +} + +export interface NewsletterResult { + email: string + content: string + url: string + title: string + author: string + unsubMailTo?: string + unsubHttpUrl?: string +} + +export interface PreHandleResult { + url?: string + title?: string + content?: string + contentType?: string + dom?: Document +} + +export const FAKE_URL_PREFIX = 'https://omnivore.app/no_url?q=' +export const generateUniqueUrl = () => FAKE_URL_PREFIX + uuid() + +export abstract class ContentHandler { + protected senderRegex: RegExp + protected urlRegex: RegExp + name: string + + protected constructor() { + this.senderRegex = new RegExp(/NEWSLETTER_SENDER_REGEX/) + this.urlRegex = new RegExp(/NEWSLETTER_URL_REGEX/) + this.name = 'Handler name' + } + + shouldResolve(url: string): boolean { + return false + } + + async resolve(url: string): Promise { + return Promise.resolve(url) + } + + shouldPreHandle(url: string, dom?: Document): boolean { + return false + } + + async preHandle(url: string, dom?: Document): Promise { + return Promise.resolve({ url, dom }) + } + + async isNewsletter(input: { + postHeader: string + from: string + unSubHeader: string + html?: string + }): Promise { + const re = new RegExp(this.senderRegex) + return Promise.resolve( + re.test(input.from) && (!!input.postHeader || !!input.unSubHeader) + ) + } + + findNewsletterHeaderHref(dom: Document): string | undefined { + return undefined + } + + // Given an HTML blob tries to find a URL to use for + // a canonical URL. + async findNewsletterUrl(html: string): Promise { + const dom = parseHTML(html).document + + // Check if this is a substack newsletter + const href = this.findNewsletterHeaderHref(dom) + if (href) { + // Try to make a HEAD request, so we get the redirected URL, since these + // will usually be behind tracking url redirects + try { + const response = await axios.head(href, { timeout: 5000 }) + return Promise.resolve( + // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access + response.request.res.responseUrl as string | undefined + ) + } catch (e) { + console.log('error making HEAD request', e) + return Promise.resolve(href) + } + } + + return Promise.resolve(undefined) + } + + async parseNewsletterUrl( + _postHeader: string, + html: string + ): Promise { + // get newsletter url from html + const matches = html.match(this.urlRegex) + if (matches) { + return Promise.resolve(matches[1]) + } + return Promise.resolve(undefined) + } + + parseAuthor(from: string): string { + // get author name from email + // e.g. 'Jackson Harper from Omnivore App ' + // or 'Mike Allen ' + const parsed = addressparser(from) + if (parsed.length > 0) { + return parsed[0].name + } + return from + } + + parseUnsubscribe(unSubHeader: string): Unsubscribe { + // parse list-unsubscribe header + // e.g. List-Unsubscribe: , + const decoded = rfc2047.decode(unSubHeader) + return { + mailTo: decoded.match(/<(https?:\/\/[^>]*)>/)?.[1], + httpUrl: decoded.match(/]*)>/)?.[1], + } + } + + async handleNewsletter({ + email, + html, + postHeader, + title, + from, + unSubHeader, + }: NewsletterInput): Promise { + console.log('handleNewsletter', email, postHeader, title, from) + + if (!email || !html || !title || !from) { + console.log('invalid newsletter email') + throw new Error('invalid newsletter email') + } + + // fallback to default url if newsletter url does not exist + // assign a random uuid to the default url to avoid duplicate url + const url = + (await this.parseNewsletterUrl(postHeader, html)) || generateUniqueUrl() + const author = this.parseAuthor(from) + const unsubscribe = this.parseUnsubscribe(unSubHeader) + + return { + email, + content: html, + url, + title, + author, + unsubMailTo: unsubscribe.mailTo || '', + unsubHttpUrl: unsubscribe.httpUrl || '', + } + } +} diff --git a/packages/puppeteer-parse/content-handler/src/index.ts b/packages/puppeteer-parse/content-handler/src/index.ts new file mode 100644 index 000000000..e41c811c4 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/index.ts @@ -0,0 +1,116 @@ +import { AppleNewsHandler } from './websites/apple-news-handler' +import { BloombergHandler } from './websites/bloomberg-handler' +import { DerstandardHandler } from './websites/derstandard-handler' +import { ImageHandler } from './websites/image-handler' +import { MediumHandler } from './websites/medium-handler' +import { PdfHandler } from './websites/pdf-handler' +import { ScrapingBeeHandler } from './websites/scrapingBee-handler' +import { TDotCoHandler } from './websites/t-dot-co-handler' +import { TwitterHandler } from './websites/twitter-handler' +import { YoutubeHandler } from './websites/youtube-handler' +import { WikipediaHandler } from './websites/wikipedia-handler' +import { + ContentHandler, + NewsletterInput, + NewsletterResult, + PreHandleResult, +} from './content-handler' +import { SubstackHandler } from './newsletters/substack-handler' +import { AxiosHandler } from './newsletters/axios-handler' +import { GolangHandler } from './newsletters/golang-handler' +import { MorningBrewHandler } from './newsletters/morning-brew-handler' +import { BloombergNewsletterHandler } from './newsletters/bloomberg-newsletter-handler' +import { BeehiivHandler } from './newsletters/beehiiv-handler' +import { ConvertkitHandler } from './newsletters/convertkit-handler' +import { RevueHandler } from './newsletters/revue-handler' + +const validateUrlString = (url: string) => { + const u = new URL(url) + // Make sure the URL is http or https + if (u.protocol !== 'http:' && u.protocol !== 'https:') { + throw new Error('Invalid URL protocol check failed') + } + // Make sure the domain is not localhost + if (u.hostname === 'localhost' || u.hostname === '0.0.0.0') { + throw new Error('Invalid URL is localhost') + } + // Make sure the domain is not a private IP + if (/^(10|172\.16|192\.168)\..*/.test(u.hostname)) { + throw new Error('Invalid URL is private ip') + } +} + +const contentHandlers: ContentHandler[] = [ + new AppleNewsHandler(), + new BloombergHandler(), + new DerstandardHandler(), + new ImageHandler(), + new MediumHandler(), + new PdfHandler(), + new ScrapingBeeHandler(), + new TDotCoHandler(), + new TwitterHandler(), + new YoutubeHandler(), + new WikipediaHandler(), +] + +const newsletterHandlers: ContentHandler[] = [ + new AxiosHandler(), + new BloombergNewsletterHandler(), + new GolangHandler(), + new SubstackHandler(), + new MorningBrewHandler(), + new SubstackHandler(), + new BeehiivHandler(), + new ConvertkitHandler(), + new RevueHandler(), +] + +export const preHandleContent = async ( + url: string, + dom?: Document +): Promise => { + // Before we run the regular handlers we check to see if we need tp + // pre-resolve the URL. TODO: This should probably happen recursively, + // so URLs can be pre-resolved, handled, pre-resolved, handled, etc. + for (const handler of contentHandlers) { + if (handler.shouldResolve(url)) { + try { + const resolvedUrl = await handler.resolve(url) + if (resolvedUrl && validateUrlString(resolvedUrl)) { + url = resolvedUrl + } + } catch (err) { + console.log('error resolving url with handler', handler.name, err) + } + break + } + } + // Before we fetch the page we check the handlers, to see if they want + // to perform a prefetch action that can modify our requests. + // enumerate the handlers and see if any of them want to handle the request + for (const handler of contentHandlers) { + if (handler.shouldPreHandle(url, dom)) { + console.log('preHandleContent', handler.name, url) + return handler.preHandle(url, dom) + } + } + return undefined +} + +export const handleNewsletter = async ( + input: NewsletterInput +): Promise => { + for (const handler of newsletterHandlers) { + if (await handler.isNewsletter(input)) { + return handler.handleNewsletter(input) + } + } + + return undefined +} + +module.exports = { + preHandleContent, + handleNewsletter, +} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/axios-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/axios-handler.ts new file mode 100644 index 000000000..cd783c30e --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/newsletters/axios-handler.ts @@ -0,0 +1,46 @@ +import { ContentHandler, PreHandleResult } from '../content-handler' + +export class AxiosHandler extends ContentHandler { + constructor() { + super() + this.senderRegex = /<.+@axios.com>/ + this.urlRegex = /View in browser at (.*)<\/a>/ + this.name = 'axios' + } + + shouldPreHandle(url: string, dom?: Document): boolean { + const host = this.name + '.com' + // check if url ends with axios.com + return new URL(url).hostname.endsWith(host) + } + + async preHandle(url: string, dom: Document): Promise { + const body = dom.querySelector('table') + + let isFooter = false + // this removes ads and replaces table with a div + body?.querySelectorAll('table').forEach((el) => { + // remove the footer and the ads + if (!el.textContent || el.textContent.length < 20 || isFooter) { + el.remove() + } else { + // removes the first few rows of the table (the header) + // remove the last two rows of the table (they are ads) + el.querySelectorAll('tr').forEach((tr, i) => { + if (i <= 7 || i >= el.querySelectorAll('tr').length - 2) { + console.log('removing', tr) + tr.remove() + } + }) + // replace the table with a div + const div = dom.createElement('div') + div.innerHTML = el.innerHTML + el.parentNode?.replaceChild(div, el) + // set the isFooter flag to true because the next table is the footer + isFooter = true + } + }) + + return Promise.resolve({ dom }) + } +} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/beehiiv-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/beehiiv-handler.ts new file mode 100644 index 000000000..0a50c1920 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/newsletters/beehiiv-handler.ts @@ -0,0 +1,43 @@ +import { ContentHandler } from '../content-handler' +import { parseHTML } from 'linkedom' + +export class BeehiivHandler extends ContentHandler { + constructor() { + super() + this.name = 'beehiiv' + } + + findNewsletterHeaderHref(dom: Document): string | undefined { + const readOnline = dom.querySelectorAll('table tr td div a[class*="link"]') + let res: string | undefined = undefined + readOnline.forEach((e) => { + if (e.textContent === 'Read Online') { + res = e.getAttribute('href') || undefined + } + }) + return res + } + + async isNewsletter(input: { + postHeader: string + from: string + unSubHeader: string + html: string + }): Promise { + const dom = parseHTML(input.html).document + if (dom.querySelectorAll('img[src*="beehiiv.net"]').length > 0) { + const beehiivUrl = this.findNewsletterHeaderHref(dom) + if (beehiivUrl) { + return Promise.resolve(true) + } + } + return false + } + + async parseNewsletterUrl( + postHeader: string, + html: string + ): Promise { + return this.findNewsletterUrl(html) + } +} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/bloomberg-newsletter-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/bloomberg-newsletter-handler.ts new file mode 100644 index 000000000..a5f84f076 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/newsletters/bloomberg-newsletter-handler.ts @@ -0,0 +1,37 @@ +import { ContentHandler, PreHandleResult } from '../content-handler' + +export class BloombergNewsletterHandler extends ContentHandler { + constructor() { + super() + this.senderRegex = /<.+@mail.bloomberg.*.com>/ + this.urlRegex = / { + const body = dom.querySelector('.wrapper') + + // this removes header + body?.querySelector('.sailthru-variables')?.remove() + body?.querySelector('.preview-text')?.remove() + body?.querySelector('.logo-wrapper')?.remove() + body?.querySelector('.by-the-number-wrapper')?.remove() + // this removes footer + body?.querySelector('.quote-box-wrapper')?.remove() + body?.querySelector('.header-wrapper')?.remove() + body?.querySelector('.component-wrapper')?.remove() + body?.querySelector('.footer')?.remove() + + return Promise.resolve({ dom }) + } +} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/convertkit-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/convertkit-handler.ts new file mode 100644 index 000000000..72e65f5da --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/newsletters/convertkit-handler.ts @@ -0,0 +1,41 @@ +import { ContentHandler } from '../content-handler' +import { parseHTML } from 'linkedom' + +export class ConvertkitHandler extends ContentHandler { + constructor() { + super() + this.name = 'convertkit' + } + + findNewsletterHeaderHref(dom: Document): string | undefined { + const readOnline = dom.querySelectorAll('table tr td a') + let res: string | undefined = undefined + readOnline.forEach((e) => { + if (e.textContent === 'View this email in your browser') { + res = e.getAttribute('href') || undefined + } + }) + return res + } + + async isNewsletter(input: { + postHeader: string + from: string + unSubHeader: string + html: string + }): Promise { + const dom = parseHTML(input.html).document + return Promise.resolve( + dom.querySelectorAll( + 'img[src*="convertkit.com"], img[src*="convertkit-mail.com"]' + ).length > 0 + ) + } + + async parseNewsletterUrl( + postHeader: string, + html: string + ): Promise { + return this.findNewsletterUrl(html) + } +} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/golang-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/golang-handler.ts new file mode 100644 index 000000000..7d4724004 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/newsletters/golang-handler.ts @@ -0,0 +1,27 @@ +import { ContentHandler, PreHandleResult } from '../content-handler' + +export class GolangHandler extends ContentHandler { + constructor() { + super() + this.senderRegex = /<.+@golangweekly.com>/ + this.urlRegex = /Read on the Web<\/a>/ + this.name = 'golangweekly' + } + + shouldPreHandle(url: string, dom?: Document): boolean { + const host = this.name + '.com' + // check if url ends with golangweekly.com + return new URL(url).hostname.endsWith(host) + } + + async preHandle(url: string, dom: Document): Promise { + const body = dom.querySelector('body') + + // this removes the "Subscribe" button + body?.querySelector('.el-splitbar')?.remove() + // this removes the title + body?.querySelector('.el-masthead')?.remove() + + return Promise.resolve({ dom }) + } +} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/morning-brew-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/morning-brew-handler.ts new file mode 100644 index 000000000..f187ac0dc --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/newsletters/morning-brew-handler.ts @@ -0,0 +1,35 @@ +import { ContentHandler, PreHandleResult } from '../content-handler' + +export class MorningBrewHandler extends ContentHandler { + constructor() { + super() + this.senderRegex = /Morning Brew / + this.urlRegex = /View Online<\/a>/ + this.name = 'morningbrew' + } + + shouldPreHandle(url: string, dom?: Document): boolean { + const host = this.name + '.com' + // check if url ends with morningbrew.com + return new URL(url).hostname.endsWith(host) + } + + async preHandle(url: string, dom: Document): Promise { + // retain the width of the cells in the table of market info + dom.querySelectorAll('.markets-arrow-cell').forEach((td) => { + const table = td.closest('table') + if (table) { + const bubbleTable = table.querySelector('.markets-bubble') + if (bubbleTable) { + // replace the nested table with the text + const e = bubbleTable.querySelector('.markets-table-text') + e && bubbleTable.parentNode?.replaceChild(e, bubbleTable) + } + // set custom class for the table + table.className = 'morning-brew-markets' + } + }) + + return Promise.resolve({ dom }) + } +} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/revue-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/revue-handler.ts new file mode 100644 index 000000000..d8c8f911c --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/newsletters/revue-handler.ts @@ -0,0 +1,46 @@ +import { ContentHandler } from '../content-handler' +import { parseHTML } from 'linkedom' + +export class RevueHandler extends ContentHandler { + constructor() { + super() + this.name = 'revue' + } + + findNewsletterHeaderHref(dom: Document): string | undefined { + const viewOnline = dom.querySelectorAll('table tr td a[target="_blank"]') + let res: string | undefined = undefined + viewOnline.forEach((e) => { + if (e.textContent === 'View online') { + res = e.getAttribute('href') || undefined + } + }) + return res + } + + async isNewsletter(input: { + postHeader: string + from: string + unSubHeader: string + html: string + }): Promise { + const dom = parseHTML(input.html).document + if ( + dom.querySelectorAll('img[src*="getrevue.co"], img[src*="revue.email"]') + .length > 0 + ) { + const getrevueUrl = this.findNewsletterHeaderHref(dom) + if (getrevueUrl) { + return Promise.resolve(true) + } + } + return false + } + + async parseNewsletterUrl( + postHeader: string, + html: string + ): Promise { + return this.findNewsletterUrl(html) + } +} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/substack-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/substack-handler.ts new file mode 100644 index 000000000..164068623 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/newsletters/substack-handler.ts @@ -0,0 +1,90 @@ +import addressparser from 'addressparser' +import { ContentHandler, PreHandleResult } from '../content-handler' +import { parseHTML } from 'linkedom' + +export class SubstackHandler extends ContentHandler { + constructor() { + super() + this.name = 'substack' + } + + shouldPreHandle(url: string, dom: Document): boolean { + const host = this.name + '.com' + // check if url ends with substack.com + // or has a profile image hosted at substack.com + return ( + new URL(url).hostname.endsWith(host) || + !!dom + .querySelector('.email-body img') + ?.getAttribute('src') + ?.includes(host) + ) + } + + async preHandle(url: string, dom: Document): Promise { + const body = dom.querySelector('.email-body-container') + + // this removes header and profile avatar + body?.querySelector('.header')?.remove() + body?.querySelector('.preamble')?.remove() + body?.querySelector('.meta-author-wrap')?.remove() + // this removes meta button + body?.querySelector('.post-meta')?.remove() + // this removes footer + body?.querySelector('.post-cta')?.remove() + body?.querySelector('.container-border')?.remove() + body?.querySelector('.footer')?.remove() + + return Promise.resolve(dom) + } + + findNewsletterHeaderHref(dom: Document): string | undefined { + // Substack header links + const postLink = dom.querySelector('h1 a ') + if (postLink) { + return postLink.getAttribute('href') || undefined + } + + return undefined + } + + async isNewsletter({ + postHeader, + html, + }: { + postHeader: string + from: string + unSubHeader: string + html: string + }): Promise { + if (postHeader) { + return Promise.resolve(true) + } + const dom = parseHTML(html).document + // substack newsletter emails have tables with a *post-meta class + if (dom.querySelector('table[class$="post-meta"]')) { + return true + } + // If the article has a header link, and substack icons its probably a newsletter + const href = this.findNewsletterHeaderHref(dom) + const heartIcon = dom.querySelector( + 'table tbody td span a img[src*="HeartIcon"]' + ) + const recommendIcon = dom.querySelector( + 'table tbody td span a img[src*="RecommendIconRounded"]' + ) + return Promise.resolve(!!(href && (heartIcon || recommendIcon))) + } + + async parseNewsletterUrl( + postHeader: string, + html: string + ): Promise { + // raw SubStack newsletter url is like + // we need to get the real url from the raw url + if (postHeader && addressparser(postHeader).length > 0) { + return Promise.resolve(addressparser(postHeader)[0].name) + } + return this.findNewsletterUrl(html) + } +} diff --git a/packages/puppeteer-parse/content-handler/src/websites/apple-news-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/apple-news-handler.ts new file mode 100644 index 000000000..0b4026fb6 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/websites/apple-news-handler.ts @@ -0,0 +1,31 @@ +import axios from 'axios' +import { parseHTML } from 'linkedom' +import { ContentHandler, PreHandleResult } from '../content-handler' + +export class AppleNewsHandler extends ContentHandler { + constructor() { + super() + this.name = 'Apple News' + } + + shouldPreHandle(url: string, dom?: Document): boolean { + const u = new URL(url) + return u.hostname === 'apple.news' + } + + async preHandle(url: string, document?: Document): Promise { + const MOBILE_USER_AGENT = + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36' + const response = await axios.get(url, { + headers: { 'User-Agent': MOBILE_USER_AGENT }, + }) + const data = response.data as string + const dom = parseHTML(data).document + // make sure it's a valid URL by wrapping in new URL + const href = dom + .querySelector('span.click-here') + ?.parentElement?.getAttribute('href') + const u = href ? new URL(href) : undefined + return { url: u?.href } + } +} diff --git a/packages/puppeteer-parse/content-handler/src/websites/bloomberg-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/bloomberg-handler.ts new file mode 100644 index 000000000..a867a3503 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/websites/bloomberg-handler.ts @@ -0,0 +1,41 @@ +import axios from 'axios' +import { parseHTML } from 'linkedom' +import { ContentHandler, PreHandleResult } from '../content-handler' + +export class BloombergHandler extends ContentHandler { + constructor() { + super() + this.name = 'Bloomberg' + } + + shouldPreHandle(url: string, dom?: Document): boolean { + const BLOOMBERG_URL_MATCH = + /https?:\/\/(www\.)?bloomberg.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)/ + return BLOOMBERG_URL_MATCH.test(url.toString()) + } + + async preHandle(url: string, document?: Document): Promise { + console.log('prehandling bloomberg url', url) + + try { + const response = await axios.get('https://app.scrapingbee.com/api/v1', { + params: { + api_key: process.env.SCRAPINGBEE_API_KEY, + url: url, + return_page_source: true, + block_ads: true, + block_resources: false, + }, + }) + const dom = parseHTML(response.data).document + return { + title: dom.title, + content: dom.querySelector('body')?.innerHTML, + url: url, + } + } catch (error) { + console.error('error prehandling bloomberg url', error) + throw error + } + } +} diff --git a/packages/puppeteer-parse/content-handler/src/websites/derstandard-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/derstandard-handler.ts new file mode 100644 index 000000000..28742a3e5 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/websites/derstandard-handler.ts @@ -0,0 +1,34 @@ +import { ContentHandler, PreHandleResult } from '../content-handler' +import axios from 'axios' +import { parseHTML } from 'linkedom' + +export class DerstandardHandler extends ContentHandler { + constructor() { + super() + this.name = 'Derstandard' + } + + shouldPreHandle(url: string, dom?: Document): boolean { + const u = new URL(url) + return u.hostname === 'www.derstandard.at' + } + + async preHandle(url: string, document?: Document): Promise { + const response = await axios.get(url, { + // set cookie to give consent to get the article + headers: { + cookie: `DSGVO_ZUSAGE_V1=true; consentUUID=2bacb9c1-1e80-4be0-9f7b-ee987cf4e7b0_6`, + }, + }) + const content = response.data as string + + const dom = parseHTML(content).document + const titleElement = dom.querySelector('.article-title') + titleElement && titleElement.remove() + + return { + content: dom.body.outerHTML, + title: titleElement?.textContent || undefined, + } + } +} diff --git a/packages/puppeteer-parse/content-handler/src/websites/image-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/image-handler.ts new file mode 100644 index 000000000..068a1cc66 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/websites/image-handler.ts @@ -0,0 +1,32 @@ +import { ContentHandler, PreHandleResult } from '../content-handler' + +export class ImageHandler extends ContentHandler { + constructor() { + super() + this.name = 'Image' + } + + shouldPreHandle(url: string, dom?: Document): boolean { + const IMAGE_URL_PATTERN = /(https?:\/\/.*\.(?:jpg|jpeg|png|webp))/i + return IMAGE_URL_PATTERN.test(url.toString()) + } + + async preHandle(url: string, document?: Document): Promise { + const title = url.toString().split('/').pop() || 'Image' + const content = ` + + + ${title} + + + + +
+ ${title} +
+ + ` + + return Promise.resolve({ title, content }) + } +} diff --git a/packages/puppeteer-parse/content-handler/src/websites/medium-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/medium-handler.ts new file mode 100644 index 000000000..211a30c37 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/websites/medium-handler.ts @@ -0,0 +1,26 @@ +import { ContentHandler, PreHandleResult } from '../content-handler' + +export class MediumHandler extends ContentHandler { + constructor() { + super() + this.name = 'Medium' + } + + shouldPreHandle(url: string, dom?: Document): boolean { + const u = new URL(url) + return u.hostname.endsWith('medium.com') + } + + async preHandle(url: string, document?: Document): Promise { + console.log('prehandling medium url', url) + + try { + const res = new URL(url) + res.searchParams.delete('source') + return Promise.resolve({ url: res.toString() }) + } catch (error) { + console.error('error prehandling medium url', error) + throw error + } + } +} diff --git a/packages/puppeteer-parse/content-handler/src/websites/pdf-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/pdf-handler.ts new file mode 100644 index 000000000..4c4ef748d --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/websites/pdf-handler.ts @@ -0,0 +1,18 @@ +import { ContentHandler, PreHandleResult } from '../content-handler' + +export class PdfHandler extends ContentHandler { + constructor() { + super() + this.name = 'PDF' + } + + shouldPreHandle(url: string, dom?: Document): boolean { + const u = new URL(url) + const path = u.pathname.replace(u.search, '') + return path.endsWith('.pdf') + } + + async preHandle(_url: string, document?: Document): Promise { + return Promise.resolve({ contentType: 'application/pdf' }) + } +} diff --git a/packages/puppeteer-parse/content-handler/src/websites/scrapingBee-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/scrapingBee-handler.ts new file mode 100644 index 000000000..4c04d00e8 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/websites/scrapingBee-handler.ts @@ -0,0 +1,38 @@ +import { ContentHandler, PreHandleResult } from '../content-handler' +import axios from 'axios' +import { parseHTML } from 'linkedom' + +export class ScrapingBeeHandler extends ContentHandler { + constructor() { + super() + this.name = 'ScrapingBee' + } + + shouldPreHandle(url: string, dom?: Document): boolean { + const u = new URL(url) + const hostnames = ['nytimes.com', 'news.google.com'] + + return hostnames.some((h) => u.hostname.endsWith(h)) + } + + async preHandle(url: string, document?: Document): Promise { + console.log('prehandling url with scrapingbee', url) + + try { + const response = await axios.get('https://app.scrapingbee.com/api/v1', { + params: { + api_key: process.env.SCRAPINGBEE_API_KEY, + url: url, + return_page_source: true, + block_ads: true, + block_resources: false, + }, + }) + const dom = parseHTML(response.data).document + return { title: dom.title, content: response.data as string, url: url } + } catch (error) { + console.error('error prehandling url w/scrapingbee', error) + throw error + } + } +} diff --git a/packages/puppeteer-parse/content-handler/src/websites/t-dot-co-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/t-dot-co-handler.ts new file mode 100644 index 000000000..277a8c087 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/websites/t-dot-co-handler.ts @@ -0,0 +1,26 @@ +import { ContentHandler } from '../content-handler' +import axios from 'axios' + +export class TDotCoHandler extends ContentHandler { + constructor() { + super() + this.name = 't.co' + } + + shouldResolve(url: string): boolean { + const T_DOT_CO_URL_MATCH = /^https:\/\/(?:www\.)?t\.co\/.*$/ + return T_DOT_CO_URL_MATCH.test(url) + } + + async resolve(url: string) { + return axios + .get(url, { maxRedirects: 0, validateStatus: null }) + .then((res) => { + return new URL(res.headers.location).href + }) + .catch((err) => { + console.log('err with t.co url', err) + return undefined + }) + } +} diff --git a/packages/puppeteer-parse/content-handler/src/websites/twitter-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/twitter-handler.ts new file mode 100644 index 000000000..ddd37e45c --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/websites/twitter-handler.ts @@ -0,0 +1,167 @@ +import { ContentHandler, PreHandleResult } from '../content-handler' +import axios from 'axios' +import { DateTime } from 'luxon' +import _ from 'underscore' + +const TWITTER_BEARER_TOKEN = process.env.TWITTER_BEARER_TOKEN +const TWITTER_URL_MATCH = + /twitter\.com\/(?:#!\/)?(\w+)\/status(?:es)?\/(\d+)(?:\/.*)?/ + +const getTweetFields = () => { + const TWEET_FIELDS = + '&tweet.fields=attachments,author_id,conversation_id,created_at,' + + 'entities,geo,in_reply_to_user_id,lang,possibly_sensitive,public_metrics,referenced_tweets,' + + 'source,withheld' + const EXPANSIONS = '&expansions=author_id,attachments.media_keys' + const USER_FIELDS = + '&user.fields=created_at,description,entities,location,pinned_tweet_id,profile_image_url,protected,public_metrics,url,verified,withheld' + const MEDIA_FIELDS = + '&media.fields=duration_ms,height,preview_image_url,url,media_key,public_metrics,width' + + return `${TWEET_FIELDS}${EXPANSIONS}${USER_FIELDS}${MEDIA_FIELDS}` +} + +const getTweetById = async (id: string) => { + const BASE_ENDPOINT = 'https://api.twitter.com/2/tweets/' + const apiUrl = new URL(BASE_ENDPOINT + id + '?' + getTweetFields()) + + if (!TWITTER_BEARER_TOKEN) { + throw new Error('No Twitter bearer token found') + } + + return axios.get(apiUrl.toString(), { + headers: { + Authorization: `Bearer ${TWITTER_BEARER_TOKEN}`, + redirect: 'follow', + }, + }) +} + +const titleForAuthor = (author: { name: string }) => { + return `${author.name} on Twitter` +} + +const tweetIdFromStatusUrl = (url: string): string | undefined => { + const match = url.toString().match(TWITTER_URL_MATCH) + return match?.[2] +} + +const formatTimestamp = (timestamp: string) => { + return DateTime.fromJSDate(new Date(timestamp)).toLocaleString( + DateTime.DATETIME_FULL + ) +} + +export class TwitterHandler extends ContentHandler { + constructor() { + super() + this.name = 'Twitter' + } + + shouldPreHandle(url: string, dom?: Document): boolean { + return !!TWITTER_BEARER_TOKEN && TWITTER_URL_MATCH.test(url.toString()) + } + + async preHandle(url: string, document?: Document): Promise { + console.log('prehandling twitter url', url) + + const tweetId = tweetIdFromStatusUrl(url) + if (!tweetId) { + throw new Error('could not find tweet id in url') + } + const tweetData = (await getTweetById(tweetId)).data as { + data: { + author_id: string + text: string + entities: { + urls: [ + { + url: string + expanded_url: string + display_url: string + } + ] + } + created_at: string + } + includes: { + users: [ + { + id: string + name: string + profile_image_url: string + username: string + } + ] + media: [ + { + preview_image_url: string + type: string + url: string + } + ] + } + } + const authorId = tweetData.data.author_id + const author = tweetData.includes.users.filter((u) => (u.id = authorId))[0] + // escape html entities in title + const title = _.escape(titleForAuthor(author)) + const authorImage = author.profile_image_url.replace('_normal', '_400x400') + + let text = tweetData.data.text + if (tweetData.data.entities && tweetData.data.entities.urls) { + for (const urlObj of tweetData.data.entities.urls) { + text = text.replace( + urlObj.url, + `
${urlObj.display_url}` + ) + } + } + + const front = ` +
+

${text}

+ ` + + let includesHtml = '' + if (tweetData.includes.media) { + includesHtml = tweetData.includes.media + .map((m) => { + const linkUrl = m.type == 'photo' ? m.url : url + const previewUrl = m.type == 'photo' ? m.url : m.preview_image_url + const mediaOpen = ` + + + + ` + return mediaOpen + }) + .join('\n') + } + + const back = ` + — ${ + author.username + } ${author.name} ${formatTimestamp( + tweetData.data.created_at + )} +
+ ` + const content = ` + + + + + + + + ${front} + ${includesHtml} + ${back} + ` + + return { content, url, title } + } +} diff --git a/packages/puppeteer-parse/content-handler/src/websites/wikipedia-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/wikipedia-handler.ts new file mode 100644 index 000000000..8c3a176fd --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/websites/wikipedia-handler.ts @@ -0,0 +1,20 @@ +import { ContentHandler, PreHandleResult } from '../content-handler' + +export class WikipediaHandler extends ContentHandler { + constructor() { + super() + this.name = 'wikipedia' + } + + shouldPreHandle(url: string, dom?: Document): boolean { + return new URL(url).hostname.endsWith('wikipedia.org') + } + + async preHandle(url: string, dom: Document): Promise { + // This removes the [edit] anchors from wikipedia pages + dom.querySelectorAll('.mw-editsection').forEach((e) => e.remove()) + // this removes the sidebar + dom.querySelector('.infobox')?.remove() + return Promise.resolve({ dom }) + } +} diff --git a/packages/puppeteer-parse/content-handler/src/websites/youtube-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/youtube-handler.ts new file mode 100644 index 000000000..4cdb7ee98 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/src/websites/youtube-handler.ts @@ -0,0 +1,76 @@ +import { ContentHandler, PreHandleResult } from '../content-handler' +import axios from 'axios' +import _ from 'underscore' + +const YOUTUBE_URL_MATCH = + /^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w-]+\?v=|embed\/|v\/)?)([\w-]+)(\S+)?$/ + +export const getYoutubeVideoId = (url: string) => { + const u = new URL(url) + const videoId = u.searchParams.get('v') + if (!videoId) { + const match = url.toString().match(YOUTUBE_URL_MATCH) + if (match === null || match.length < 6 || !match[5]) { + return undefined + } + return match[5] + } + return videoId +} + +export class YoutubeHandler extends ContentHandler { + constructor() { + super() + this.name = 'Youtube' + } + + shouldPreHandle(url: string, dom?: Document): boolean { + return YOUTUBE_URL_MATCH.test(url.toString()) + } + + async preHandle(url: string, document?: Document): Promise { + const videoId = getYoutubeVideoId(url) + if (!videoId) { + return {} + } + + const oembedUrl = + `https://www.youtube.com/oembed?format=json&url=` + + encodeURIComponent(`https://www.youtube.com/watch?v=${videoId}`) + const oembed = (await axios.get(oembedUrl.toString())).data as { + title: string + width: number + height: number + thumbnail_url: string + author_name: string + author_url: string + } + // escape html entities in title + const title = _.escape(oembed.title) + const ratio = oembed.width / oembed.height + const thumbnail = oembed.thumbnail_url + const height = 350 + const width = height * ratio + const authorName = _.escape(oembed.author_name) + + const content = ` + + ${title} + + + + + + + + +

${title}

+ + + ` + + console.log('got video id', videoId) + + return { content, title: 'Youtube Content' } + } +} diff --git a/packages/puppeteer-parse/content-handler/test/apple-news-handler.test.ts b/packages/puppeteer-parse/content-handler/test/apple-news-handler.test.ts new file mode 100644 index 000000000..1584f9e28 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/test/apple-news-handler.test.ts @@ -0,0 +1,10 @@ +import { AppleNewsHandler } from '../src/websites/apple-news-handler' + +describe('open a simple web page', () => { + it('should return a response', async () => { + const response = await new AppleNewsHandler().preHandle( + 'https://apple.news/AxjzaZaPvSn23b67LhXI5EQ' + ) + console.log('response', response) + }) +}) diff --git a/packages/puppeteer-parse/content-handler/test/babel-register.js b/packages/puppeteer-parse/content-handler/test/babel-register.js new file mode 100644 index 000000000..a6f65f60a --- /dev/null +++ b/packages/puppeteer-parse/content-handler/test/babel-register.js @@ -0,0 +1,3 @@ +const register = require('@babel/register').default + +register({ extensions: ['.ts', '.tsx', '.js', '.jsx'] }) diff --git a/packages/puppeteer-parse/content-handler/test/data/beehiiv-newsletter.html b/packages/puppeteer-parse/content-handler/test/data/beehiiv-newsletter.html new file mode 100644 index 000000000..369d42af0 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/test/data/beehiiv-newsletter.html @@ -0,0 +1,15 @@ +I talked to a guy that spent $30M on a Beeple + +
+
+ + + diff --git a/packages/puppeteer-parse/content-handler/test/data/substack-forwarded-newsletter.html b/packages/puppeteer-parse/content-handler/test/data/substack-forwarded-newsletter.html new file mode 100644 index 000000000..0f67fd04d --- /dev/null +++ b/packages/puppeteer-parse/content-handler/test/data/substack-forwarded-newsletter.html @@ -0,0 +1 @@ +


---------- Forwarded message ---------
From: Andrew Methven <slowchinese@substack.com>
Date: Fri, Feb 18, 2022 at 11:57 PM
Subject: Companies that eat people
To: <XXXXXXXXXX@gmail.com>


Slow Chinese 每周漫闻 ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌

Companies that eat people

Slow Chinese 每周漫闻

The phrase, ‘eating people’ (吃人 chī rén), is used to criticise companies in China that exploit their employees.

It’s originally from Lǔ Xùn’s (鲁迅), A Madman's Diary (狂人日记 kuángrén rìjì), published in 1918:

我翻开历史一查,这历史没有年代。歪歪斜斜的每页上都写着“仁义道德”几个字,我横竖睡不着,仔细看了半夜,才从字缝里看出来,满本上都写着两个字“吃人"!

As I look through the pages of history, I see there are no dates. On each page, written messily, are the characters, ‘benevolence and morality’. I can’t sleep. I read into the night. Finally, I find hidden between the characters across the page, the words, ‘eating people’.

The times have changed since Lu Xun made that observation more than 100 years ago, but the culture of ‘eating people’ has not, according to social media comments this week, such as this one:

吃人的事实,从来没有变过,历朝历代都是如此 - The reality of [companies] exploiting their employees is nothing new. It’s been the same throughout history.

Two of China’s biggest tech companies, Tencent and Bilibili, have recently been accused of ‘eating people’, abusing and exploiting their staff.

So that’s what we discuss this week.

  • Conversations worth consuming: interview with Zhāng Yìfēi 张义飞 a former employee of Tencent

  • Words of the week: coverage and social media commentary of a recent death of a Bilibili employee allegedly due to overwork.

The audio version of this newsletter is already live - become a member to access it in your podcast app!

Use this link to claim a one-month free trial of the membership to give the full experience a go:

One-month free trial


1. CONVERSATIONS WITH CONSUMING

腾讯带头“反内卷”:光子工作室拒绝996,保障双休_游戏

Interview with Zhang Yifei

Two weeks ago a 25-year-old programmer at Tencent, Zhāng Yìfēi 张义飞, became an Internet sensation after standing up to his bosses at the company. He announced in an internal group chat that he was quitting his job, which then went viral on social media.

If 20-hour days is what the company wants, he wrote, ‘I’ll resign tomorrow’

36Kr interviewed Zhang this week (in Chinese). He talks more about the overtime culture at Tencent, and why he dared to take on his company in such a public way - he already had another job lined up.

There are some excellent words in his description of life as a working person at Tencent.

Useful words

  • 卡 kǎ - stop, block

    什么时候离职的?有人卡你吗 - When did you leave your job? Did they try to stop you?

  • 剥削 bō xuē - exploit

    加班严重、996工作制、互联网巨头压榨剥削员工等话题再次被拿来讨论

    - Topics such as serious overtime, the 996 work system, and the exploitation of employees by Internet giants are being discussed again.

  • 底气 dǐ qì - confidence, back up

    自己已经提前拿到其他公司的offer,比较有底气 - I already had an offer from another company, so I was relatively confident about doing it.

  • 忌惮 jì dàn - fear, be afraid of

    如果一些互联网大厂因此忌惮、不录用我,我正好也不想去这种加班严重的地方 - If some big Internet companies are afraid to hire me, that’s fine by me. I also don't want to work in a company with such heavy overtime.

  • 手软 shǒu ruǎn - ‘soft hand’, forgiving

    不要特立独行,搞小团体,否则他不会手软 - Do not march to a different beat or form small cliques. He will come down hard on this kind of behaviour.

  • 打硬仗 dǎ yìng zhàng - fight a hard war

    张小龙管理下的企业微信,经常会强调用小而精的团队打硬仗 - The company Wechat, under Zhang Xiaolong’s management, would often emphasise using a small and efficient team to work on tight deadlines.

    • Note: a common phrase used in Chinese companies when a team is working intensely on a project or against a ridiculous deadline.

    • Related: 打胜仗 dǎ shèngzhàng - win a war

  • 喊口号 hǎn kǒuhào - shouting slogans

    但大家普遍的看法是,不想看到空洞地喊口号,只想看到具体行为 - The general view is they don’t want to see people shouting empty slogans. They want to see action.

Idioms

  • 初出茅庐 chūchū máolú - ‘just come out of the thatched cottage’; inexperienced, wet behind the ears

    但对于大众而言,互联网巨头和初出茅庐的应届生,相比较下毕竟力量悬殊 - There’s no comparison between the power of the big internet companies and graduate employees with no experience.

  • 昏昏沉沉 hūnhūn chénchén - feeling sleepy

    来这里入职两个月,感到昏昏沉沉,记忆力下降很多

    - I’ve been here for two months. I feel tired and my memory has declined a lot.

  • 热火朝天 rèhuǒ cháotiān - ‘hot fire face sky’; vigorously, with energy

    到点的时候,差不多一半人还没走,都在热火朝天地讨论工作

    - When it was time to finish at the end of the day, around half of the team stayed behind to talk energetically about their work.


2. WORDS OF THE WEEK

上海之旅-前往Bilibili总部! - 哔哩哔哩

Bilibili eats people

A man who headed a content moderation department at Chinese video-streaming site Bilibili died last week after suffering a cerebral hemorrhage while working a Chinese New Year holiday shift.

The company was heavily criticised (Sohu - in Chinese) of having a toxic work culture.

One of the top comments on social media adapted the line from Lu Xun’s A Madman’s Diary. But instead of looking through the pages of history, overworked netizens find the same message hidden in their payslips:

我翻开工资单一查,这工资单没有工资,歪歪斜斜的每条都写着“迟到扣款”四个字。我横竖睡不着,仔细看了半夜,才从字缝里看出来,满本上都写着两个字“吃人"!

I glance at my payslip. I don’t see any pay on it. All I see are the four characters scrawled across the page: ‘fined for being late’. I can’t sleep. I look at it deep into the night. I finally find hidden between the characters across the page, written the words: ‘eating people’.

The words shared below are from the Sohu article and also from social media comments.

Useful words

  • 猝死 cù sǐ - sudden death, die suddenly

    他2月5日凌晨脑出血猝死 - He died suddenly in the early hours of the morning on 5 February.

  • 嗝屁 gé pì - hiccup, to die

    对大企业嗝屁几个不算啥,对于各个家庭你就是唯一呀 - A few people dying means nothing to a big company, but for a family it’s their only child!

    • Related: 翘辫子 qiào biàn zi - make braids - kick the bucket (Qing Dynasty reference relating to when men had to remove their braids).

      他因为加班严重而翘辫子了 - He died because of too much overtime.

  • 企图 qǐ tú - try to, seek to do something (negative)

    通过各种企图将这件事压下来,我决定发声 - Bilibili attempted to suppress the situation through different means, so I decided to speak up.

    • Note: similar to 试图 shì tú, but more negative connotations

  • 腐朽 fǔ xiǔ - degenerate, rotten

    道出了资本家的腐朽和恶臭 - Reeks of the stench and rot of capitalism.

  • 压垮 yā kuǎ - crush

    就是无情的压榨现有劳动力,能压垮一个是一个,多招一个人算我输 - It’s the callous exploitation of the current employees. The company tries to squeeze as much as possible from each and every one of them rather than hiring one more employee.

Idioms

  • 血汗工厂 xuèhàn gōngchǎng - ‘blood sweat factory’ - sweat shop

    B站因员工猝死一事,被推进了“血汗工厂”的舆论漩涡 - Bilibili has been dragged into a public debate about the company being a ‘sweatshop’ due to the sudden death of an employee.

    • Note: The pronunciation of 血 is normally xiě in colloquial phrases, and xuè in technical terms. But the rule is vague and not very helpful. In this phrase it's always xuè. But in 血汗钱 xiěhàn qián, ‘hard earned money’, xiě is more common. So confusing!

  • 混淆视听 hùnxiáo shìtīng - to muddle or confuse an issue

    晚9到早9确实不属于加班,因为是大夜班的正常时间,大厂就这样混淆视听? - 9pm to 9am does not count as overtime. But that’s because the night shift is a normal working shift for these big tech companies. They are muddling up the matter.

  • 枯燥乏味 kūzào fáwèi - boring

    做审核的确工作强度很大,而且枯燥乏味 - Being a content moderator is a very intense job. It’s also extremely boring.

    • More: 枯燥无味 kūzào wúwèi - boring (same meaning)

  • 恬不知耻 tián bù zhī chǐ - shameless

    觉得正常吗?居然还能如此恬不知耻的说“没有让他加班” - Is this normal? How can they be so shameless in saying the company did not ‘ask him to work overtime’?

  • 难上加难 nán shàng jiā nán - very difficult

    只要企业做大了,普通职工想维权难上加难 - When the company gets big it’s almost impossible for employees to protect their rights.

    • Related: 雪上加霜 xuěshàng jiāshuāng - make matters worse

Colloquial phrases

  • 万变不离其宗 wàn biàn bùlí qízōng - make ten thousand changes but remain the same in essence

    好像这些大公司公关都是万变不离其宗,核心就是推卸责任!- It seems the PR of these big companies tells a nice story, but in essence they don’t change. They are merely avoiding their responsibility.

  • 不见棺材不落泪 bùjiàn guāncai bù luò lèi - won’t cry until they see the coffin

    这也说得出口啊!真是不见血不掉泪啊 - They actually say this? Do they really have to let somebody die before they accept they are in the wrong?

    • More: I wrote more about this colloquialism in SupChina’s phrase of the week.

    • Related: 不到黄河心不死 bù dào huánghé xīn bù sǐ - not to stop until one reaches the Yellow River; refuse to give up until all hope is gone


3. RECOMMENDATIONS

Become a member of the community

As a member of the community you get access to unique resources to help you master modern Mandarin, learn, use, and understand Chinese language the way people speak it today.

  • 📚 Resources: Pleco downloads, word lists, and example sentences print-outs and audio download for each issue.

  • 🔉 Audio: audio version of the newsletter delivered as a member-only podcast every Saturday morning (before the free newsletter is published)

  • 🤓 Archive: full database of all words and phrases in the archive (nearly 1,300!) searchable according to word-type, sector and topic with audio and example sentences for each entry, updated weekly.

Use this link to claim a one-month free trial of the membership to give the full experience a go.

One-month free trial

That’s it for this week.

I look forward to seeing you in your inbox same time next weekend.

Andrew

+++

ps - please do share this newsletter on your social channels and with your networks

Share Slow Chinese 每周漫闻

Like

© 2022 Andrew Methven Unsubscribe
548 Market Street PMB 72296, San Francisco, CA 94104

Publish on Substack

diff --git a/packages/puppeteer-parse/content-handler/test/data/substack-forwarded-welcome-email.html b/packages/puppeteer-parse/content-handler/test/data/substack-forwarded-welcome-email.html new file mode 100644 index 000000000..feb14eeb3 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/test/data/substack-forwarded-welcome-email.html @@ -0,0 +1 @@ +


---------- Forwarded message ---------
From: Andrew Methven <slowchinese@substack.com>
Date: Thu, Dec 9, 2021 at 11:27 PM
Subject: How can Slow Chinese 每周漫闻 help you?
To: <XXXXXXXXXX@gmail.com>


Thank you for subscribing to for Slow Chinese 每周漫闻 ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌

Thanks so much for subscribing to Slow Chinese 每周漫闻 and welcome aboard!

I’m excited to help you improve and practice your Chinese language skills.

Here’s a quick way I can help:

Reply to this email and tell me about your story of learning Chinese and what challenges you currently have with the language.

I’ll reply with a specific suggestion to help you.

Also, to make sure the next issue of the newsletter doesn’t land in your spam folder, add my email address to your contacts.

Thanks!

Andrew

© 2021 Andrew Methven Unsubscribe
548 Market Street PMB 72296, San Francisco, CA 94104

Publish on Substack

diff --git a/packages/puppeteer-parse/content-handler/test/data/substack-private-forwarded-newsletter.html b/packages/puppeteer-parse/content-handler/test/data/substack-private-forwarded-newsletter.html new file mode 100644 index 000000000..bb240dbe8 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/test/data/substack-private-forwarded-newsletter.html @@ -0,0 +1,2 @@ +


---------- Forwarded message ---------
From: giggs <darkgiggsxx@gmail.com>
Date: Wed, Mar 2, 2022 at 5:29 PM
Subject: Fwd: The German Retreat From Nuclear Power
To: Radek <radoslaw.jurga@gmail.com>



---------- Forwarded message ---------
De : Bismarck Analysis <bismarck@substack.com>
Date: mer. 2 mars 2022 à 15:02
Subject: The German Retreat From Nuclear Power
To: <darkgiggsxx@gmail.com>


Germany's economic priorities are torn between industrial growth and energy degrowth. As its last nuclear plants are shut down, the choice is between relying on fossil fuels or closing factories. ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌

The German Retreat From Nuclear Power

Germany's economic priorities are torn between industrial growth and energy degrowth. As its last nuclear plants are shut down, the choice is between relying on fossil fuels or closing factories.

Isar Nuclear Power Plant near Landshut, Germany in 2016. The Isar station is scheduled to be shut down by the end of 2022. Photo by Dennis Hansch. Source.
+
diff --git a/packages/puppeteer-parse/content-handler/test/newsletter.test.ts b/packages/puppeteer-parse/content-handler/test/newsletter.test.ts new file mode 100644 index 000000000..dd3b7941c --- /dev/null +++ b/packages/puppeteer-parse/content-handler/test/newsletter.test.ts @@ -0,0 +1,191 @@ +import 'mocha' +import * as chai from 'chai' +import { expect } from 'chai' +import chaiAsPromised from 'chai-as-promised' +import chaiString from 'chai-string' +import { SubstackHandler } from '../src/newsletters/substack-handler' +import { AxiosHandler } from '../src/newsletters/axios-handler' +import { BloombergNewsletterHandler } from '../src/newsletters/bloomberg-newsletter-handler' +import { GolangHandler } from '../src/newsletters/golang-handler' +import { MorningBrewHandler } from '../src/newsletters/morning-brew-handler' +import nock from 'nock' +import { generateUniqueUrl } from '../src/content-handler' +import fs from 'fs' +import { BeehiivHandler } from '../src/newsletters/beehiiv-handler' + +chai.use(chaiAsPromised) +chai.use(chaiString) + +const load = (path: string): string => { + return fs.readFileSync(path, 'utf8') +} + +describe('Newsletter email test', () => { + describe('#getNewsletterUrl()', () => { + it('returns url when email is from SubStack', async () => { + const rawUrl = '' + + await expect( + new SubstackHandler().parseNewsletterUrl(rawUrl, '') + ).to.eventually.equal('https://hongbo130.substack.com/p/tldr') + }) + + it('returns url when email is from Axios', async () => { + const url = 'https://axios.com/blog/the-best-way-to-build-a-web-app' + const html = `View in browser at ${url}` + + await expect( + new AxiosHandler().parseNewsletterUrl('', html) + ).to.eventually.equal(url) + }) + + it('returns url when email is from Bloomberg', async () => { + const url = 'https://www.bloomberg.com/news/google-is-now-a-partner' + const html = ` + + View in browser + + ` + + await expect( + new BloombergNewsletterHandler().parseNewsletterUrl('', html) + ).to.eventually.equal(url) + }) + + it('returns url when email is from Golang Weekly', async () => { + const url = 'https://www.golangweekly.com/first' + const html = ` + Read on the Web + ` + + await expect( + new GolangHandler().parseNewsletterUrl('', html) + ).to.eventually.equal(url) + }) + + it('returns url when email is from Morning Brew', async () => { + const url = 'https://www.morningbrew.com/daily/issues/first' + const html = ` + View Online + ` + + await expect( + new MorningBrewHandler().parseNewsletterUrl('', html) + ).to.eventually.equal(url) + }) + }) + + describe('get author from email address', () => { + it('returns author when email is from Substack', () => { + const from = 'Jackson Harper from Omnivore App ' + expect(new AxiosHandler().parseAuthor(from)).to.equal( + 'Jackson Harper from Omnivore App' + ) + }) + + it('returns author when email is from Axios', () => { + const from = 'Mike Allen ' + expect(new AxiosHandler().parseAuthor(from)).to.equal('Mike Allen') + }) + }) + + describe('isProbablyNewsletter', () => { + it('returns true for substack newsletter', async () => { + const html = load('./test/data/substack-forwarded-newsletter.html') + await expect( + new SubstackHandler().isNewsletter({ + html, + postHeader: '', + from: '', + unSubHeader: '', + }) + ).to.eventually.be.true + }) + it('returns true for private forwarded substack newsletter', async () => { + const html = load( + './test/data/substack-private-forwarded-newsletter.html' + ) + await expect( + new SubstackHandler().isNewsletter({ + html, + postHeader: '', + from: '', + unSubHeader: '', + }) + ).to.eventually.be.true + }) + it('returns false for substack welcome email', async () => { + const html = load('./test/data/substack-forwarded-welcome-email.html') + await expect( + new SubstackHandler().isNewsletter({ + html, + postHeader: '', + from: '', + unSubHeader: '', + }) + ).to.eventually.be.false + }) + it('returns true for beehiiv.com newsletter', async () => { + const html = load('./test/data/beehiiv-newsletter.html') + await expect( + new BeehiivHandler().isNewsletter({ + html, + postHeader: '', + from: '', + unSubHeader: '', + }) + ).to.eventually.be.true + }) + }) + + describe('findNewsletterUrl', async () => { + it('gets the URL from the header if it is a substack newsletter', async () => { + nock('https://email.mg2.substack.com') + .head( + '/c/eJxNkk2TojAQhn-N3KTyQfg4cGDGchdnYcsZx9K5UCE0EMVAkTiKv36iHnarupNUd7rfVJ4W3EDTj1M89No496Uw0wCxgovuwBgYnbOGsZBVjDHzKPWYU8VehUMWOlIX9Qhw4rKLzXgGZziXnRTcyF7dK0iIGMVOG_OS1aTmKPRDilgVhTQUPCQIcE0x-MFTmJ8rCUpA3KtuenR2urg1ZtAzmszI0tq_Z7m66y-ilQo0uAqMTQ7WRX8auJKg56blZg7WB-iHDuYEBzO6NP0R1IwuYFphQbbTjnTH9NBfs80nym4Zyj8uUvyKbtUyGr5eUz9fNDQ7JCxfJDo9dW1lY9lmj_JNivPbGmf2Pt_lN9tDit9b-WeTetni85Z9pDpVOd7L1E_Vy7egayNO23ZP34eSeLJeux1b0rer_xaZ7ykS78nuSjMY-nL98rparNZNcv07JCjN06_EkTFBxBqOUMACErnELUNMSxTUjLDQZwzcqa4bRjCfeejUEFefS224OLr2S5wxPtij7lVrs80d2CNseRV2P52VNFMBipcdVE-U5jkRD7hFAwpGOylVwU2Mfc9qBh7DoR89yVnWXhgQFHnIsbpVb6tU_B-hH_2yzWY' + ) + .reply(302, undefined, { + Location: + 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217', + }) + .get('/p/companies-that-eat-people-217') + .reply(200, '') + const html = load('./test/data/substack-forwarded-newsletter.html') + const url = await new SubstackHandler().findNewsletterUrl(html) + // Not sure if the redirects from substack expire, this test could eventually fail + expect(url).to.startWith( + 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217' + ) + }).timeout(10000) + it('gets the URL from the header if it is a beehiiv newsletter', async () => { + nock('https://u23463625.ct.sendgrid.net') + .head( + '/ss/c/AX1lEgEQaxtvFxLaVo0GBo_geajNrlI1TGeIcmMViR3pL3fEDZnbbkoeKcaY62QZk0KPFudUiUXc_uMLerV4nA/3k5/3TFZmreTR0qKSCgowABnVg/h30/zzLik7UXd1H_n4oyd5W8Xu639AYQQB2UXz-CsssSnno' + ) + .reply(302, undefined, { + Location: 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple', + }) + .get('/p/talked-guy-spent-30m-beeple') + .reply(200, '') + const html = load('./test/data/beehiiv-newsletter.html') + const url = await new BeehiivHandler().findNewsletterUrl(html) + expect(url).to.startWith( + 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple' + ) + }) + it('returns undefined if it is not a newsletter', async () => { + const html = load('./test/data/substack-forwarded-welcome-email.html') + const url = await new SubstackHandler().findNewsletterUrl(html) + expect(url).to.be.undefined + }) + }) + + describe('generateUniqueUrl', () => { + it('generates a unique URL', () => { + const url1 = generateUniqueUrl() + const url2 = generateUniqueUrl() + + expect(url1).to.not.eql(url2) + }) + }) +}) diff --git a/packages/puppeteer-parse/content-handler/test/youtube-handler.test.ts b/packages/puppeteer-parse/content-handler/test/youtube-handler.test.ts new file mode 100644 index 000000000..beb4d3a66 --- /dev/null +++ b/packages/puppeteer-parse/content-handler/test/youtube-handler.test.ts @@ -0,0 +1,25 @@ +import { expect } from 'chai' +import 'mocha' +import { getYoutubeVideoId } from '../src/websites/youtube-handler' + +describe('getYoutubeVideoId', () => { + it('should parse video id out of a URL', async () => { + expect('BnSUk0je6oo').to.eq( + getYoutubeVideoId('https://www.youtube.com/watch?v=BnSUk0je6oo&t=269s') + ) + expect('vFD2gu007dc').to.eq( + getYoutubeVideoId( + 'https://www.youtube.com/watch?v=vFD2gu007dc&list=RDvFD2gu007dc&start_radio=1' + ) + ) + expect('vFD2gu007dc').to.eq( + getYoutubeVideoId('https://youtu.be/vFD2gu007dc') + ) + expect('BMFVCnbRaV4').to.eq( + getYoutubeVideoId('https://youtube.com/watch?v=BMFVCnbRaV4&feature=share') + ) + expect('cg9b4RC87LI').to.eq( + getYoutubeVideoId('https://youtu.be/cg9b4RC87LI?t=116') + ) + }) +}) diff --git a/packages/puppeteer-parse/content-handler/tsconfig.json b/packages/puppeteer-parse/content-handler/tsconfig.json new file mode 100644 index 000000000..aeb8d2c3a --- /dev/null +++ b/packages/puppeteer-parse/content-handler/tsconfig.json @@ -0,0 +1,10 @@ +{ + "extends": "@tsconfig/node14/tsconfig.json", + "compilerOptions": { + "rootDir": ".", + "declaration": true, + "outDir": "build", + "lib": ["dom"] + }, + "include": ["src"] +} diff --git a/packages/puppeteer-parse/package.json b/packages/puppeteer-parse/package.json index 11e60b00d..0d1ab90c3 100644 --- a/packages/puppeteer-parse/package.json +++ b/packages/puppeteer-parse/package.json @@ -6,6 +6,7 @@ "dependencies": { "@google-cloud/logging-winston": "^5.1.1", "@google-cloud/storage": "^5.18.1", + "@omnivore/content-handler": "file:./../content-handler", "@sentry/serverless": "^6.13.3", "axios": "^0.27.2", "chrome-aws-lambda": "^10.1.0", diff --git a/yarn.lock b/yarn.lock index b76eae9d8..1edbd4c7a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4312,6 +4312,17 @@ dependencies: "@octokit/openapi-types" "^9.5.0" +"@omnivore/content-handler@file:./packages/content-handler": + version "1.0.0" + dependencies: + addressparser "^1.0.1" + axios "^0.27.2" + linkedom "^0.14.16" + luxon "^3.0.4" + rfc2047 "^4.0.1" + underscore "^1.13.6" + uuid "^9.0.0" + "@opentelemetry/api-metrics@0.27.0": version "0.27.0" resolved "https://registry.yarnpkg.com/@opentelemetry/api-metrics/-/api-metrics-0.27.0.tgz#d8eca344ed1155f3ea8a8133ade827b4bb90efbf" From cae2715a52c6127c8e22daff9c732d4965c1e38f Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 3 Oct 2022 11:15:09 +0800 Subject: [PATCH 02/54] Remove content-handler --- .../content-handler/.eslintignore | 1 - .../puppeteer-parse/content-handler/.eslintrc | 6 - .../content-handler/.gitignore | 2 - .../content-handler/.npmignore | 7 - .../content-handler/mocha-config.json | 5 - .../content-handler/package.json | 34 ---- .../content-handler/src/content-handler.ts | 175 ---------------- .../content-handler/src/index.ts | 116 ----------- .../src/newsletters/axios-handler.ts | 46 ----- .../src/newsletters/beehiiv-handler.ts | 43 ---- .../bloomberg-newsletter-handler.ts | 37 ---- .../src/newsletters/convertkit-handler.ts | 41 ---- .../src/newsletters/golang-handler.ts | 27 --- .../src/newsletters/morning-brew-handler.ts | 35 ---- .../src/newsletters/revue-handler.ts | 46 ----- .../src/newsletters/substack-handler.ts | 90 --------- .../src/websites/apple-news-handler.ts | 31 --- .../src/websites/bloomberg-handler.ts | 41 ---- .../src/websites/derstandard-handler.ts | 34 ---- .../src/websites/image-handler.ts | 32 --- .../src/websites/medium-handler.ts | 26 --- .../src/websites/pdf-handler.ts | 18 -- .../src/websites/scrapingBee-handler.ts | 38 ---- .../src/websites/t-dot-co-handler.ts | 26 --- .../src/websites/twitter-handler.ts | 167 --------------- .../src/websites/wikipedia-handler.ts | 20 -- .../src/websites/youtube-handler.ts | 76 ------- .../test/apple-news-handler.test.ts | 10 - .../content-handler/test/babel-register.js | 3 - .../test/data/beehiiv-newsletter.html | 15 -- .../data/substack-forwarded-newsletter.html | 1 - .../substack-forwarded-welcome-email.html | 1 - ...substack-private-forwarded-newsletter.html | 2 - .../content-handler/test/newsletter.test.ts | 191 ------------------ .../test/youtube-handler.test.ts | 25 --- .../content-handler/tsconfig.json | 10 - 36 files changed, 1478 deletions(-) delete mode 100644 packages/puppeteer-parse/content-handler/.eslintignore delete mode 100644 packages/puppeteer-parse/content-handler/.eslintrc delete mode 100644 packages/puppeteer-parse/content-handler/.gitignore delete mode 100644 packages/puppeteer-parse/content-handler/.npmignore delete mode 100644 packages/puppeteer-parse/content-handler/mocha-config.json delete mode 100644 packages/puppeteer-parse/content-handler/package.json delete mode 100644 packages/puppeteer-parse/content-handler/src/content-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/index.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/axios-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/beehiiv-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/bloomberg-newsletter-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/convertkit-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/golang-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/morning-brew-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/revue-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/newsletters/substack-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/websites/apple-news-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/websites/bloomberg-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/websites/derstandard-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/websites/image-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/websites/medium-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/websites/pdf-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/websites/scrapingBee-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/websites/t-dot-co-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/websites/twitter-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/websites/wikipedia-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/src/websites/youtube-handler.ts delete mode 100644 packages/puppeteer-parse/content-handler/test/apple-news-handler.test.ts delete mode 100644 packages/puppeteer-parse/content-handler/test/babel-register.js delete mode 100644 packages/puppeteer-parse/content-handler/test/data/beehiiv-newsletter.html delete mode 100644 packages/puppeteer-parse/content-handler/test/data/substack-forwarded-newsletter.html delete mode 100644 packages/puppeteer-parse/content-handler/test/data/substack-forwarded-welcome-email.html delete mode 100644 packages/puppeteer-parse/content-handler/test/data/substack-private-forwarded-newsletter.html delete mode 100644 packages/puppeteer-parse/content-handler/test/newsletter.test.ts delete mode 100644 packages/puppeteer-parse/content-handler/test/youtube-handler.test.ts delete mode 100644 packages/puppeteer-parse/content-handler/tsconfig.json diff --git a/packages/puppeteer-parse/content-handler/.eslintignore b/packages/puppeteer-parse/content-handler/.eslintignore deleted file mode 100644 index c2658d7d1..000000000 --- a/packages/puppeteer-parse/content-handler/.eslintignore +++ /dev/null @@ -1 +0,0 @@ -node_modules/ diff --git a/packages/puppeteer-parse/content-handler/.eslintrc b/packages/puppeteer-parse/content-handler/.eslintrc deleted file mode 100644 index e006282a6..000000000 --- a/packages/puppeteer-parse/content-handler/.eslintrc +++ /dev/null @@ -1,6 +0,0 @@ -{ - "extends": "../../.eslintrc", - "parserOptions": { - "project": "tsconfig.json" - } -} \ No newline at end of file diff --git a/packages/puppeteer-parse/content-handler/.gitignore b/packages/puppeteer-parse/content-handler/.gitignore deleted file mode 100644 index 0ae7e5c9e..000000000 --- a/packages/puppeteer-parse/content-handler/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -node_modules -/lib diff --git a/packages/puppeteer-parse/content-handler/.npmignore b/packages/puppeteer-parse/content-handler/.npmignore deleted file mode 100644 index b5e2b8569..000000000 --- a/packages/puppeteer-parse/content-handler/.npmignore +++ /dev/null @@ -1,7 +0,0 @@ -/test/ -src -tsconfig.json -.eslintrc -.eslintignore -.gitignore -mocha-config.json diff --git a/packages/puppeteer-parse/content-handler/mocha-config.json b/packages/puppeteer-parse/content-handler/mocha-config.json deleted file mode 100644 index 44d1d24c1..000000000 --- a/packages/puppeteer-parse/content-handler/mocha-config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "extension": ["ts"], - "spec": "test/**/*.test.ts", - "require": "test/babel-register.js" - } \ No newline at end of file diff --git a/packages/puppeteer-parse/content-handler/package.json b/packages/puppeteer-parse/content-handler/package.json deleted file mode 100644 index e4021b3e4..000000000 --- a/packages/puppeteer-parse/content-handler/package.json +++ /dev/null @@ -1,34 +0,0 @@ -{ - "name": "@omnivore/content-handler", - "version": "1.0.0", - "description": "A standalone version of content handler to parse and format each type of content", - "main": "build/src/index.js", - "types": "build/src/index.d.ts", - "files": [ - "build/src" - ], - "license": "Apache-2.0", - "scripts": { - "test": "yarn mocha -r ts-node/register --config mocha-config.json", - "lint": "eslint src --ext ts,js,tsx,jsx", - "compile": "tsc", - "build": "tsc" - }, - "devDependencies": { - "chai": "^4.3.6", - "chai-as-promised": "^7.1.1", - "chai-string": "^1.5.0", - "eslint-plugin-prettier": "^4.0.0", - "mocha": "^10.0.0", - "nock": "^13.2.9" - }, - "dependencies": { - "addressparser": "^1.0.1", - "axios": "^0.27.2", - "linkedom": "^0.14.16", - "luxon": "^3.0.4", - "rfc2047": "^4.0.1", - "underscore": "^1.13.6", - "uuid": "^9.0.0" - } -} diff --git a/packages/puppeteer-parse/content-handler/src/content-handler.ts b/packages/puppeteer-parse/content-handler/src/content-handler.ts deleted file mode 100644 index 22216fabe..000000000 --- a/packages/puppeteer-parse/content-handler/src/content-handler.ts +++ /dev/null @@ -1,175 +0,0 @@ -import addressparser from 'addressparser' -import rfc2047 from 'rfc2047' -import { v4 as uuid } from 'uuid' -import { parseHTML } from 'linkedom' -import axios from 'axios' - -interface Unsubscribe { - mailTo?: string - httpUrl?: string -} - -export interface NewsletterInput { - postHeader: string - from: string - unSubHeader: string - email: string - html: string - title: string -} - -export interface NewsletterResult { - email: string - content: string - url: string - title: string - author: string - unsubMailTo?: string - unsubHttpUrl?: string -} - -export interface PreHandleResult { - url?: string - title?: string - content?: string - contentType?: string - dom?: Document -} - -export const FAKE_URL_PREFIX = 'https://omnivore.app/no_url?q=' -export const generateUniqueUrl = () => FAKE_URL_PREFIX + uuid() - -export abstract class ContentHandler { - protected senderRegex: RegExp - protected urlRegex: RegExp - name: string - - protected constructor() { - this.senderRegex = new RegExp(/NEWSLETTER_SENDER_REGEX/) - this.urlRegex = new RegExp(/NEWSLETTER_URL_REGEX/) - this.name = 'Handler name' - } - - shouldResolve(url: string): boolean { - return false - } - - async resolve(url: string): Promise { - return Promise.resolve(url) - } - - shouldPreHandle(url: string, dom?: Document): boolean { - return false - } - - async preHandle(url: string, dom?: Document): Promise { - return Promise.resolve({ url, dom }) - } - - async isNewsletter(input: { - postHeader: string - from: string - unSubHeader: string - html?: string - }): Promise { - const re = new RegExp(this.senderRegex) - return Promise.resolve( - re.test(input.from) && (!!input.postHeader || !!input.unSubHeader) - ) - } - - findNewsletterHeaderHref(dom: Document): string | undefined { - return undefined - } - - // Given an HTML blob tries to find a URL to use for - // a canonical URL. - async findNewsletterUrl(html: string): Promise { - const dom = parseHTML(html).document - - // Check if this is a substack newsletter - const href = this.findNewsletterHeaderHref(dom) - if (href) { - // Try to make a HEAD request, so we get the redirected URL, since these - // will usually be behind tracking url redirects - try { - const response = await axios.head(href, { timeout: 5000 }) - return Promise.resolve( - // eslint-disable-next-line @typescript-eslint/no-unsafe-member-access - response.request.res.responseUrl as string | undefined - ) - } catch (e) { - console.log('error making HEAD request', e) - return Promise.resolve(href) - } - } - - return Promise.resolve(undefined) - } - - async parseNewsletterUrl( - _postHeader: string, - html: string - ): Promise { - // get newsletter url from html - const matches = html.match(this.urlRegex) - if (matches) { - return Promise.resolve(matches[1]) - } - return Promise.resolve(undefined) - } - - parseAuthor(from: string): string { - // get author name from email - // e.g. 'Jackson Harper from Omnivore App ' - // or 'Mike Allen ' - const parsed = addressparser(from) - if (parsed.length > 0) { - return parsed[0].name - } - return from - } - - parseUnsubscribe(unSubHeader: string): Unsubscribe { - // parse list-unsubscribe header - // e.g. List-Unsubscribe: , - const decoded = rfc2047.decode(unSubHeader) - return { - mailTo: decoded.match(/<(https?:\/\/[^>]*)>/)?.[1], - httpUrl: decoded.match(/]*)>/)?.[1], - } - } - - async handleNewsletter({ - email, - html, - postHeader, - title, - from, - unSubHeader, - }: NewsletterInput): Promise { - console.log('handleNewsletter', email, postHeader, title, from) - - if (!email || !html || !title || !from) { - console.log('invalid newsletter email') - throw new Error('invalid newsletter email') - } - - // fallback to default url if newsletter url does not exist - // assign a random uuid to the default url to avoid duplicate url - const url = - (await this.parseNewsletterUrl(postHeader, html)) || generateUniqueUrl() - const author = this.parseAuthor(from) - const unsubscribe = this.parseUnsubscribe(unSubHeader) - - return { - email, - content: html, - url, - title, - author, - unsubMailTo: unsubscribe.mailTo || '', - unsubHttpUrl: unsubscribe.httpUrl || '', - } - } -} diff --git a/packages/puppeteer-parse/content-handler/src/index.ts b/packages/puppeteer-parse/content-handler/src/index.ts deleted file mode 100644 index e41c811c4..000000000 --- a/packages/puppeteer-parse/content-handler/src/index.ts +++ /dev/null @@ -1,116 +0,0 @@ -import { AppleNewsHandler } from './websites/apple-news-handler' -import { BloombergHandler } from './websites/bloomberg-handler' -import { DerstandardHandler } from './websites/derstandard-handler' -import { ImageHandler } from './websites/image-handler' -import { MediumHandler } from './websites/medium-handler' -import { PdfHandler } from './websites/pdf-handler' -import { ScrapingBeeHandler } from './websites/scrapingBee-handler' -import { TDotCoHandler } from './websites/t-dot-co-handler' -import { TwitterHandler } from './websites/twitter-handler' -import { YoutubeHandler } from './websites/youtube-handler' -import { WikipediaHandler } from './websites/wikipedia-handler' -import { - ContentHandler, - NewsletterInput, - NewsletterResult, - PreHandleResult, -} from './content-handler' -import { SubstackHandler } from './newsletters/substack-handler' -import { AxiosHandler } from './newsletters/axios-handler' -import { GolangHandler } from './newsletters/golang-handler' -import { MorningBrewHandler } from './newsletters/morning-brew-handler' -import { BloombergNewsletterHandler } from './newsletters/bloomberg-newsletter-handler' -import { BeehiivHandler } from './newsletters/beehiiv-handler' -import { ConvertkitHandler } from './newsletters/convertkit-handler' -import { RevueHandler } from './newsletters/revue-handler' - -const validateUrlString = (url: string) => { - const u = new URL(url) - // Make sure the URL is http or https - if (u.protocol !== 'http:' && u.protocol !== 'https:') { - throw new Error('Invalid URL protocol check failed') - } - // Make sure the domain is not localhost - if (u.hostname === 'localhost' || u.hostname === '0.0.0.0') { - throw new Error('Invalid URL is localhost') - } - // Make sure the domain is not a private IP - if (/^(10|172\.16|192\.168)\..*/.test(u.hostname)) { - throw new Error('Invalid URL is private ip') - } -} - -const contentHandlers: ContentHandler[] = [ - new AppleNewsHandler(), - new BloombergHandler(), - new DerstandardHandler(), - new ImageHandler(), - new MediumHandler(), - new PdfHandler(), - new ScrapingBeeHandler(), - new TDotCoHandler(), - new TwitterHandler(), - new YoutubeHandler(), - new WikipediaHandler(), -] - -const newsletterHandlers: ContentHandler[] = [ - new AxiosHandler(), - new BloombergNewsletterHandler(), - new GolangHandler(), - new SubstackHandler(), - new MorningBrewHandler(), - new SubstackHandler(), - new BeehiivHandler(), - new ConvertkitHandler(), - new RevueHandler(), -] - -export const preHandleContent = async ( - url: string, - dom?: Document -): Promise => { - // Before we run the regular handlers we check to see if we need tp - // pre-resolve the URL. TODO: This should probably happen recursively, - // so URLs can be pre-resolved, handled, pre-resolved, handled, etc. - for (const handler of contentHandlers) { - if (handler.shouldResolve(url)) { - try { - const resolvedUrl = await handler.resolve(url) - if (resolvedUrl && validateUrlString(resolvedUrl)) { - url = resolvedUrl - } - } catch (err) { - console.log('error resolving url with handler', handler.name, err) - } - break - } - } - // Before we fetch the page we check the handlers, to see if they want - // to perform a prefetch action that can modify our requests. - // enumerate the handlers and see if any of them want to handle the request - for (const handler of contentHandlers) { - if (handler.shouldPreHandle(url, dom)) { - console.log('preHandleContent', handler.name, url) - return handler.preHandle(url, dom) - } - } - return undefined -} - -export const handleNewsletter = async ( - input: NewsletterInput -): Promise => { - for (const handler of newsletterHandlers) { - if (await handler.isNewsletter(input)) { - return handler.handleNewsletter(input) - } - } - - return undefined -} - -module.exports = { - preHandleContent, - handleNewsletter, -} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/axios-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/axios-handler.ts deleted file mode 100644 index cd783c30e..000000000 --- a/packages/puppeteer-parse/content-handler/src/newsletters/axios-handler.ts +++ /dev/null @@ -1,46 +0,0 @@ -import { ContentHandler, PreHandleResult } from '../content-handler' - -export class AxiosHandler extends ContentHandler { - constructor() { - super() - this.senderRegex = /<.+@axios.com>/ - this.urlRegex = /View in browser at (.*)<\/a>/ - this.name = 'axios' - } - - shouldPreHandle(url: string, dom?: Document): boolean { - const host = this.name + '.com' - // check if url ends with axios.com - return new URL(url).hostname.endsWith(host) - } - - async preHandle(url: string, dom: Document): Promise { - const body = dom.querySelector('table') - - let isFooter = false - // this removes ads and replaces table with a div - body?.querySelectorAll('table').forEach((el) => { - // remove the footer and the ads - if (!el.textContent || el.textContent.length < 20 || isFooter) { - el.remove() - } else { - // removes the first few rows of the table (the header) - // remove the last two rows of the table (they are ads) - el.querySelectorAll('tr').forEach((tr, i) => { - if (i <= 7 || i >= el.querySelectorAll('tr').length - 2) { - console.log('removing', tr) - tr.remove() - } - }) - // replace the table with a div - const div = dom.createElement('div') - div.innerHTML = el.innerHTML - el.parentNode?.replaceChild(div, el) - // set the isFooter flag to true because the next table is the footer - isFooter = true - } - }) - - return Promise.resolve({ dom }) - } -} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/beehiiv-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/beehiiv-handler.ts deleted file mode 100644 index 0a50c1920..000000000 --- a/packages/puppeteer-parse/content-handler/src/newsletters/beehiiv-handler.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { ContentHandler } from '../content-handler' -import { parseHTML } from 'linkedom' - -export class BeehiivHandler extends ContentHandler { - constructor() { - super() - this.name = 'beehiiv' - } - - findNewsletterHeaderHref(dom: Document): string | undefined { - const readOnline = dom.querySelectorAll('table tr td div a[class*="link"]') - let res: string | undefined = undefined - readOnline.forEach((e) => { - if (e.textContent === 'Read Online') { - res = e.getAttribute('href') || undefined - } - }) - return res - } - - async isNewsletter(input: { - postHeader: string - from: string - unSubHeader: string - html: string - }): Promise { - const dom = parseHTML(input.html).document - if (dom.querySelectorAll('img[src*="beehiiv.net"]').length > 0) { - const beehiivUrl = this.findNewsletterHeaderHref(dom) - if (beehiivUrl) { - return Promise.resolve(true) - } - } - return false - } - - async parseNewsletterUrl( - postHeader: string, - html: string - ): Promise { - return this.findNewsletterUrl(html) - } -} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/bloomberg-newsletter-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/bloomberg-newsletter-handler.ts deleted file mode 100644 index a5f84f076..000000000 --- a/packages/puppeteer-parse/content-handler/src/newsletters/bloomberg-newsletter-handler.ts +++ /dev/null @@ -1,37 +0,0 @@ -import { ContentHandler, PreHandleResult } from '../content-handler' - -export class BloombergNewsletterHandler extends ContentHandler { - constructor() { - super() - this.senderRegex = /<.+@mail.bloomberg.*.com>/ - this.urlRegex = / { - const body = dom.querySelector('.wrapper') - - // this removes header - body?.querySelector('.sailthru-variables')?.remove() - body?.querySelector('.preview-text')?.remove() - body?.querySelector('.logo-wrapper')?.remove() - body?.querySelector('.by-the-number-wrapper')?.remove() - // this removes footer - body?.querySelector('.quote-box-wrapper')?.remove() - body?.querySelector('.header-wrapper')?.remove() - body?.querySelector('.component-wrapper')?.remove() - body?.querySelector('.footer')?.remove() - - return Promise.resolve({ dom }) - } -} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/convertkit-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/convertkit-handler.ts deleted file mode 100644 index 72e65f5da..000000000 --- a/packages/puppeteer-parse/content-handler/src/newsletters/convertkit-handler.ts +++ /dev/null @@ -1,41 +0,0 @@ -import { ContentHandler } from '../content-handler' -import { parseHTML } from 'linkedom' - -export class ConvertkitHandler extends ContentHandler { - constructor() { - super() - this.name = 'convertkit' - } - - findNewsletterHeaderHref(dom: Document): string | undefined { - const readOnline = dom.querySelectorAll('table tr td a') - let res: string | undefined = undefined - readOnline.forEach((e) => { - if (e.textContent === 'View this email in your browser') { - res = e.getAttribute('href') || undefined - } - }) - return res - } - - async isNewsletter(input: { - postHeader: string - from: string - unSubHeader: string - html: string - }): Promise { - const dom = parseHTML(input.html).document - return Promise.resolve( - dom.querySelectorAll( - 'img[src*="convertkit.com"], img[src*="convertkit-mail.com"]' - ).length > 0 - ) - } - - async parseNewsletterUrl( - postHeader: string, - html: string - ): Promise { - return this.findNewsletterUrl(html) - } -} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/golang-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/golang-handler.ts deleted file mode 100644 index 7d4724004..000000000 --- a/packages/puppeteer-parse/content-handler/src/newsletters/golang-handler.ts +++ /dev/null @@ -1,27 +0,0 @@ -import { ContentHandler, PreHandleResult } from '../content-handler' - -export class GolangHandler extends ContentHandler { - constructor() { - super() - this.senderRegex = /<.+@golangweekly.com>/ - this.urlRegex = /Read on the Web<\/a>/ - this.name = 'golangweekly' - } - - shouldPreHandle(url: string, dom?: Document): boolean { - const host = this.name + '.com' - // check if url ends with golangweekly.com - return new URL(url).hostname.endsWith(host) - } - - async preHandle(url: string, dom: Document): Promise { - const body = dom.querySelector('body') - - // this removes the "Subscribe" button - body?.querySelector('.el-splitbar')?.remove() - // this removes the title - body?.querySelector('.el-masthead')?.remove() - - return Promise.resolve({ dom }) - } -} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/morning-brew-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/morning-brew-handler.ts deleted file mode 100644 index f187ac0dc..000000000 --- a/packages/puppeteer-parse/content-handler/src/newsletters/morning-brew-handler.ts +++ /dev/null @@ -1,35 +0,0 @@ -import { ContentHandler, PreHandleResult } from '../content-handler' - -export class MorningBrewHandler extends ContentHandler { - constructor() { - super() - this.senderRegex = /Morning Brew / - this.urlRegex = /View Online<\/a>/ - this.name = 'morningbrew' - } - - shouldPreHandle(url: string, dom?: Document): boolean { - const host = this.name + '.com' - // check if url ends with morningbrew.com - return new URL(url).hostname.endsWith(host) - } - - async preHandle(url: string, dom: Document): Promise { - // retain the width of the cells in the table of market info - dom.querySelectorAll('.markets-arrow-cell').forEach((td) => { - const table = td.closest('table') - if (table) { - const bubbleTable = table.querySelector('.markets-bubble') - if (bubbleTable) { - // replace the nested table with the text - const e = bubbleTable.querySelector('.markets-table-text') - e && bubbleTable.parentNode?.replaceChild(e, bubbleTable) - } - // set custom class for the table - table.className = 'morning-brew-markets' - } - }) - - return Promise.resolve({ dom }) - } -} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/revue-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/revue-handler.ts deleted file mode 100644 index d8c8f911c..000000000 --- a/packages/puppeteer-parse/content-handler/src/newsletters/revue-handler.ts +++ /dev/null @@ -1,46 +0,0 @@ -import { ContentHandler } from '../content-handler' -import { parseHTML } from 'linkedom' - -export class RevueHandler extends ContentHandler { - constructor() { - super() - this.name = 'revue' - } - - findNewsletterHeaderHref(dom: Document): string | undefined { - const viewOnline = dom.querySelectorAll('table tr td a[target="_blank"]') - let res: string | undefined = undefined - viewOnline.forEach((e) => { - if (e.textContent === 'View online') { - res = e.getAttribute('href') || undefined - } - }) - return res - } - - async isNewsletter(input: { - postHeader: string - from: string - unSubHeader: string - html: string - }): Promise { - const dom = parseHTML(input.html).document - if ( - dom.querySelectorAll('img[src*="getrevue.co"], img[src*="revue.email"]') - .length > 0 - ) { - const getrevueUrl = this.findNewsletterHeaderHref(dom) - if (getrevueUrl) { - return Promise.resolve(true) - } - } - return false - } - - async parseNewsletterUrl( - postHeader: string, - html: string - ): Promise { - return this.findNewsletterUrl(html) - } -} diff --git a/packages/puppeteer-parse/content-handler/src/newsletters/substack-handler.ts b/packages/puppeteer-parse/content-handler/src/newsletters/substack-handler.ts deleted file mode 100644 index 164068623..000000000 --- a/packages/puppeteer-parse/content-handler/src/newsletters/substack-handler.ts +++ /dev/null @@ -1,90 +0,0 @@ -import addressparser from 'addressparser' -import { ContentHandler, PreHandleResult } from '../content-handler' -import { parseHTML } from 'linkedom' - -export class SubstackHandler extends ContentHandler { - constructor() { - super() - this.name = 'substack' - } - - shouldPreHandle(url: string, dom: Document): boolean { - const host = this.name + '.com' - // check if url ends with substack.com - // or has a profile image hosted at substack.com - return ( - new URL(url).hostname.endsWith(host) || - !!dom - .querySelector('.email-body img') - ?.getAttribute('src') - ?.includes(host) - ) - } - - async preHandle(url: string, dom: Document): Promise { - const body = dom.querySelector('.email-body-container') - - // this removes header and profile avatar - body?.querySelector('.header')?.remove() - body?.querySelector('.preamble')?.remove() - body?.querySelector('.meta-author-wrap')?.remove() - // this removes meta button - body?.querySelector('.post-meta')?.remove() - // this removes footer - body?.querySelector('.post-cta')?.remove() - body?.querySelector('.container-border')?.remove() - body?.querySelector('.footer')?.remove() - - return Promise.resolve(dom) - } - - findNewsletterHeaderHref(dom: Document): string | undefined { - // Substack header links - const postLink = dom.querySelector('h1 a ') - if (postLink) { - return postLink.getAttribute('href') || undefined - } - - return undefined - } - - async isNewsletter({ - postHeader, - html, - }: { - postHeader: string - from: string - unSubHeader: string - html: string - }): Promise { - if (postHeader) { - return Promise.resolve(true) - } - const dom = parseHTML(html).document - // substack newsletter emails have tables with a *post-meta class - if (dom.querySelector('table[class$="post-meta"]')) { - return true - } - // If the article has a header link, and substack icons its probably a newsletter - const href = this.findNewsletterHeaderHref(dom) - const heartIcon = dom.querySelector( - 'table tbody td span a img[src*="HeartIcon"]' - ) - const recommendIcon = dom.querySelector( - 'table tbody td span a img[src*="RecommendIconRounded"]' - ) - return Promise.resolve(!!(href && (heartIcon || recommendIcon))) - } - - async parseNewsletterUrl( - postHeader: string, - html: string - ): Promise { - // raw SubStack newsletter url is like - // we need to get the real url from the raw url - if (postHeader && addressparser(postHeader).length > 0) { - return Promise.resolve(addressparser(postHeader)[0].name) - } - return this.findNewsletterUrl(html) - } -} diff --git a/packages/puppeteer-parse/content-handler/src/websites/apple-news-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/apple-news-handler.ts deleted file mode 100644 index 0b4026fb6..000000000 --- a/packages/puppeteer-parse/content-handler/src/websites/apple-news-handler.ts +++ /dev/null @@ -1,31 +0,0 @@ -import axios from 'axios' -import { parseHTML } from 'linkedom' -import { ContentHandler, PreHandleResult } from '../content-handler' - -export class AppleNewsHandler extends ContentHandler { - constructor() { - super() - this.name = 'Apple News' - } - - shouldPreHandle(url: string, dom?: Document): boolean { - const u = new URL(url) - return u.hostname === 'apple.news' - } - - async preHandle(url: string, document?: Document): Promise { - const MOBILE_USER_AGENT = - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36' - const response = await axios.get(url, { - headers: { 'User-Agent': MOBILE_USER_AGENT }, - }) - const data = response.data as string - const dom = parseHTML(data).document - // make sure it's a valid URL by wrapping in new URL - const href = dom - .querySelector('span.click-here') - ?.parentElement?.getAttribute('href') - const u = href ? new URL(href) : undefined - return { url: u?.href } - } -} diff --git a/packages/puppeteer-parse/content-handler/src/websites/bloomberg-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/bloomberg-handler.ts deleted file mode 100644 index a867a3503..000000000 --- a/packages/puppeteer-parse/content-handler/src/websites/bloomberg-handler.ts +++ /dev/null @@ -1,41 +0,0 @@ -import axios from 'axios' -import { parseHTML } from 'linkedom' -import { ContentHandler, PreHandleResult } from '../content-handler' - -export class BloombergHandler extends ContentHandler { - constructor() { - super() - this.name = 'Bloomberg' - } - - shouldPreHandle(url: string, dom?: Document): boolean { - const BLOOMBERG_URL_MATCH = - /https?:\/\/(www\.)?bloomberg.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_+.~#?&/=]*)/ - return BLOOMBERG_URL_MATCH.test(url.toString()) - } - - async preHandle(url: string, document?: Document): Promise { - console.log('prehandling bloomberg url', url) - - try { - const response = await axios.get('https://app.scrapingbee.com/api/v1', { - params: { - api_key: process.env.SCRAPINGBEE_API_KEY, - url: url, - return_page_source: true, - block_ads: true, - block_resources: false, - }, - }) - const dom = parseHTML(response.data).document - return { - title: dom.title, - content: dom.querySelector('body')?.innerHTML, - url: url, - } - } catch (error) { - console.error('error prehandling bloomberg url', error) - throw error - } - } -} diff --git a/packages/puppeteer-parse/content-handler/src/websites/derstandard-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/derstandard-handler.ts deleted file mode 100644 index 28742a3e5..000000000 --- a/packages/puppeteer-parse/content-handler/src/websites/derstandard-handler.ts +++ /dev/null @@ -1,34 +0,0 @@ -import { ContentHandler, PreHandleResult } from '../content-handler' -import axios from 'axios' -import { parseHTML } from 'linkedom' - -export class DerstandardHandler extends ContentHandler { - constructor() { - super() - this.name = 'Derstandard' - } - - shouldPreHandle(url: string, dom?: Document): boolean { - const u = new URL(url) - return u.hostname === 'www.derstandard.at' - } - - async preHandle(url: string, document?: Document): Promise { - const response = await axios.get(url, { - // set cookie to give consent to get the article - headers: { - cookie: `DSGVO_ZUSAGE_V1=true; consentUUID=2bacb9c1-1e80-4be0-9f7b-ee987cf4e7b0_6`, - }, - }) - const content = response.data as string - - const dom = parseHTML(content).document - const titleElement = dom.querySelector('.article-title') - titleElement && titleElement.remove() - - return { - content: dom.body.outerHTML, - title: titleElement?.textContent || undefined, - } - } -} diff --git a/packages/puppeteer-parse/content-handler/src/websites/image-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/image-handler.ts deleted file mode 100644 index 068a1cc66..000000000 --- a/packages/puppeteer-parse/content-handler/src/websites/image-handler.ts +++ /dev/null @@ -1,32 +0,0 @@ -import { ContentHandler, PreHandleResult } from '../content-handler' - -export class ImageHandler extends ContentHandler { - constructor() { - super() - this.name = 'Image' - } - - shouldPreHandle(url: string, dom?: Document): boolean { - const IMAGE_URL_PATTERN = /(https?:\/\/.*\.(?:jpg|jpeg|png|webp))/i - return IMAGE_URL_PATTERN.test(url.toString()) - } - - async preHandle(url: string, document?: Document): Promise { - const title = url.toString().split('/').pop() || 'Image' - const content = ` - - - ${title} - - - - -
- ${title} -
- - ` - - return Promise.resolve({ title, content }) - } -} diff --git a/packages/puppeteer-parse/content-handler/src/websites/medium-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/medium-handler.ts deleted file mode 100644 index 211a30c37..000000000 --- a/packages/puppeteer-parse/content-handler/src/websites/medium-handler.ts +++ /dev/null @@ -1,26 +0,0 @@ -import { ContentHandler, PreHandleResult } from '../content-handler' - -export class MediumHandler extends ContentHandler { - constructor() { - super() - this.name = 'Medium' - } - - shouldPreHandle(url: string, dom?: Document): boolean { - const u = new URL(url) - return u.hostname.endsWith('medium.com') - } - - async preHandle(url: string, document?: Document): Promise { - console.log('prehandling medium url', url) - - try { - const res = new URL(url) - res.searchParams.delete('source') - return Promise.resolve({ url: res.toString() }) - } catch (error) { - console.error('error prehandling medium url', error) - throw error - } - } -} diff --git a/packages/puppeteer-parse/content-handler/src/websites/pdf-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/pdf-handler.ts deleted file mode 100644 index 4c4ef748d..000000000 --- a/packages/puppeteer-parse/content-handler/src/websites/pdf-handler.ts +++ /dev/null @@ -1,18 +0,0 @@ -import { ContentHandler, PreHandleResult } from '../content-handler' - -export class PdfHandler extends ContentHandler { - constructor() { - super() - this.name = 'PDF' - } - - shouldPreHandle(url: string, dom?: Document): boolean { - const u = new URL(url) - const path = u.pathname.replace(u.search, '') - return path.endsWith('.pdf') - } - - async preHandle(_url: string, document?: Document): Promise { - return Promise.resolve({ contentType: 'application/pdf' }) - } -} diff --git a/packages/puppeteer-parse/content-handler/src/websites/scrapingBee-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/scrapingBee-handler.ts deleted file mode 100644 index 4c04d00e8..000000000 --- a/packages/puppeteer-parse/content-handler/src/websites/scrapingBee-handler.ts +++ /dev/null @@ -1,38 +0,0 @@ -import { ContentHandler, PreHandleResult } from '../content-handler' -import axios from 'axios' -import { parseHTML } from 'linkedom' - -export class ScrapingBeeHandler extends ContentHandler { - constructor() { - super() - this.name = 'ScrapingBee' - } - - shouldPreHandle(url: string, dom?: Document): boolean { - const u = new URL(url) - const hostnames = ['nytimes.com', 'news.google.com'] - - return hostnames.some((h) => u.hostname.endsWith(h)) - } - - async preHandle(url: string, document?: Document): Promise { - console.log('prehandling url with scrapingbee', url) - - try { - const response = await axios.get('https://app.scrapingbee.com/api/v1', { - params: { - api_key: process.env.SCRAPINGBEE_API_KEY, - url: url, - return_page_source: true, - block_ads: true, - block_resources: false, - }, - }) - const dom = parseHTML(response.data).document - return { title: dom.title, content: response.data as string, url: url } - } catch (error) { - console.error('error prehandling url w/scrapingbee', error) - throw error - } - } -} diff --git a/packages/puppeteer-parse/content-handler/src/websites/t-dot-co-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/t-dot-co-handler.ts deleted file mode 100644 index 277a8c087..000000000 --- a/packages/puppeteer-parse/content-handler/src/websites/t-dot-co-handler.ts +++ /dev/null @@ -1,26 +0,0 @@ -import { ContentHandler } from '../content-handler' -import axios from 'axios' - -export class TDotCoHandler extends ContentHandler { - constructor() { - super() - this.name = 't.co' - } - - shouldResolve(url: string): boolean { - const T_DOT_CO_URL_MATCH = /^https:\/\/(?:www\.)?t\.co\/.*$/ - return T_DOT_CO_URL_MATCH.test(url) - } - - async resolve(url: string) { - return axios - .get(url, { maxRedirects: 0, validateStatus: null }) - .then((res) => { - return new URL(res.headers.location).href - }) - .catch((err) => { - console.log('err with t.co url', err) - return undefined - }) - } -} diff --git a/packages/puppeteer-parse/content-handler/src/websites/twitter-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/twitter-handler.ts deleted file mode 100644 index ddd37e45c..000000000 --- a/packages/puppeteer-parse/content-handler/src/websites/twitter-handler.ts +++ /dev/null @@ -1,167 +0,0 @@ -import { ContentHandler, PreHandleResult } from '../content-handler' -import axios from 'axios' -import { DateTime } from 'luxon' -import _ from 'underscore' - -const TWITTER_BEARER_TOKEN = process.env.TWITTER_BEARER_TOKEN -const TWITTER_URL_MATCH = - /twitter\.com\/(?:#!\/)?(\w+)\/status(?:es)?\/(\d+)(?:\/.*)?/ - -const getTweetFields = () => { - const TWEET_FIELDS = - '&tweet.fields=attachments,author_id,conversation_id,created_at,' + - 'entities,geo,in_reply_to_user_id,lang,possibly_sensitive,public_metrics,referenced_tweets,' + - 'source,withheld' - const EXPANSIONS = '&expansions=author_id,attachments.media_keys' - const USER_FIELDS = - '&user.fields=created_at,description,entities,location,pinned_tweet_id,profile_image_url,protected,public_metrics,url,verified,withheld' - const MEDIA_FIELDS = - '&media.fields=duration_ms,height,preview_image_url,url,media_key,public_metrics,width' - - return `${TWEET_FIELDS}${EXPANSIONS}${USER_FIELDS}${MEDIA_FIELDS}` -} - -const getTweetById = async (id: string) => { - const BASE_ENDPOINT = 'https://api.twitter.com/2/tweets/' - const apiUrl = new URL(BASE_ENDPOINT + id + '?' + getTweetFields()) - - if (!TWITTER_BEARER_TOKEN) { - throw new Error('No Twitter bearer token found') - } - - return axios.get(apiUrl.toString(), { - headers: { - Authorization: `Bearer ${TWITTER_BEARER_TOKEN}`, - redirect: 'follow', - }, - }) -} - -const titleForAuthor = (author: { name: string }) => { - return `${author.name} on Twitter` -} - -const tweetIdFromStatusUrl = (url: string): string | undefined => { - const match = url.toString().match(TWITTER_URL_MATCH) - return match?.[2] -} - -const formatTimestamp = (timestamp: string) => { - return DateTime.fromJSDate(new Date(timestamp)).toLocaleString( - DateTime.DATETIME_FULL - ) -} - -export class TwitterHandler extends ContentHandler { - constructor() { - super() - this.name = 'Twitter' - } - - shouldPreHandle(url: string, dom?: Document): boolean { - return !!TWITTER_BEARER_TOKEN && TWITTER_URL_MATCH.test(url.toString()) - } - - async preHandle(url: string, document?: Document): Promise { - console.log('prehandling twitter url', url) - - const tweetId = tweetIdFromStatusUrl(url) - if (!tweetId) { - throw new Error('could not find tweet id in url') - } - const tweetData = (await getTweetById(tweetId)).data as { - data: { - author_id: string - text: string - entities: { - urls: [ - { - url: string - expanded_url: string - display_url: string - } - ] - } - created_at: string - } - includes: { - users: [ - { - id: string - name: string - profile_image_url: string - username: string - } - ] - media: [ - { - preview_image_url: string - type: string - url: string - } - ] - } - } - const authorId = tweetData.data.author_id - const author = tweetData.includes.users.filter((u) => (u.id = authorId))[0] - // escape html entities in title - const title = _.escape(titleForAuthor(author)) - const authorImage = author.profile_image_url.replace('_normal', '_400x400') - - let text = tweetData.data.text - if (tweetData.data.entities && tweetData.data.entities.urls) { - for (const urlObj of tweetData.data.entities.urls) { - text = text.replace( - urlObj.url, - `
${urlObj.display_url}` - ) - } - } - - const front = ` -
-

${text}

- ` - - let includesHtml = '' - if (tweetData.includes.media) { - includesHtml = tweetData.includes.media - .map((m) => { - const linkUrl = m.type == 'photo' ? m.url : url - const previewUrl = m.type == 'photo' ? m.url : m.preview_image_url - const mediaOpen = ` - - - - ` - return mediaOpen - }) - .join('\n') - } - - const back = ` - — ${ - author.username - } ${author.name} ${formatTimestamp( - tweetData.data.created_at - )} -
- ` - const content = ` - - - - - - - - ${front} - ${includesHtml} - ${back} - ` - - return { content, url, title } - } -} diff --git a/packages/puppeteer-parse/content-handler/src/websites/wikipedia-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/wikipedia-handler.ts deleted file mode 100644 index 8c3a176fd..000000000 --- a/packages/puppeteer-parse/content-handler/src/websites/wikipedia-handler.ts +++ /dev/null @@ -1,20 +0,0 @@ -import { ContentHandler, PreHandleResult } from '../content-handler' - -export class WikipediaHandler extends ContentHandler { - constructor() { - super() - this.name = 'wikipedia' - } - - shouldPreHandle(url: string, dom?: Document): boolean { - return new URL(url).hostname.endsWith('wikipedia.org') - } - - async preHandle(url: string, dom: Document): Promise { - // This removes the [edit] anchors from wikipedia pages - dom.querySelectorAll('.mw-editsection').forEach((e) => e.remove()) - // this removes the sidebar - dom.querySelector('.infobox')?.remove() - return Promise.resolve({ dom }) - } -} diff --git a/packages/puppeteer-parse/content-handler/src/websites/youtube-handler.ts b/packages/puppeteer-parse/content-handler/src/websites/youtube-handler.ts deleted file mode 100644 index 4cdb7ee98..000000000 --- a/packages/puppeteer-parse/content-handler/src/websites/youtube-handler.ts +++ /dev/null @@ -1,76 +0,0 @@ -import { ContentHandler, PreHandleResult } from '../content-handler' -import axios from 'axios' -import _ from 'underscore' - -const YOUTUBE_URL_MATCH = - /^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w-]+\?v=|embed\/|v\/)?)([\w-]+)(\S+)?$/ - -export const getYoutubeVideoId = (url: string) => { - const u = new URL(url) - const videoId = u.searchParams.get('v') - if (!videoId) { - const match = url.toString().match(YOUTUBE_URL_MATCH) - if (match === null || match.length < 6 || !match[5]) { - return undefined - } - return match[5] - } - return videoId -} - -export class YoutubeHandler extends ContentHandler { - constructor() { - super() - this.name = 'Youtube' - } - - shouldPreHandle(url: string, dom?: Document): boolean { - return YOUTUBE_URL_MATCH.test(url.toString()) - } - - async preHandle(url: string, document?: Document): Promise { - const videoId = getYoutubeVideoId(url) - if (!videoId) { - return {} - } - - const oembedUrl = - `https://www.youtube.com/oembed?format=json&url=` + - encodeURIComponent(`https://www.youtube.com/watch?v=${videoId}`) - const oembed = (await axios.get(oembedUrl.toString())).data as { - title: string - width: number - height: number - thumbnail_url: string - author_name: string - author_url: string - } - // escape html entities in title - const title = _.escape(oembed.title) - const ratio = oembed.width / oembed.height - const thumbnail = oembed.thumbnail_url - const height = 350 - const width = height * ratio - const authorName = _.escape(oembed.author_name) - - const content = ` - - ${title} - - - - - - - - -

${title}

- - - ` - - console.log('got video id', videoId) - - return { content, title: 'Youtube Content' } - } -} diff --git a/packages/puppeteer-parse/content-handler/test/apple-news-handler.test.ts b/packages/puppeteer-parse/content-handler/test/apple-news-handler.test.ts deleted file mode 100644 index 1584f9e28..000000000 --- a/packages/puppeteer-parse/content-handler/test/apple-news-handler.test.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { AppleNewsHandler } from '../src/websites/apple-news-handler' - -describe('open a simple web page', () => { - it('should return a response', async () => { - const response = await new AppleNewsHandler().preHandle( - 'https://apple.news/AxjzaZaPvSn23b67LhXI5EQ' - ) - console.log('response', response) - }) -}) diff --git a/packages/puppeteer-parse/content-handler/test/babel-register.js b/packages/puppeteer-parse/content-handler/test/babel-register.js deleted file mode 100644 index a6f65f60a..000000000 --- a/packages/puppeteer-parse/content-handler/test/babel-register.js +++ /dev/null @@ -1,3 +0,0 @@ -const register = require('@babel/register').default - -register({ extensions: ['.ts', '.tsx', '.js', '.jsx'] }) diff --git a/packages/puppeteer-parse/content-handler/test/data/beehiiv-newsletter.html b/packages/puppeteer-parse/content-handler/test/data/beehiiv-newsletter.html deleted file mode 100644 index 369d42af0..000000000 --- a/packages/puppeteer-parse/content-handler/test/data/beehiiv-newsletter.html +++ /dev/null @@ -1,15 +0,0 @@ -I talked to a guy that spent $30M on a Beeple - -
-
- - - diff --git a/packages/puppeteer-parse/content-handler/test/data/substack-forwarded-newsletter.html b/packages/puppeteer-parse/content-handler/test/data/substack-forwarded-newsletter.html deleted file mode 100644 index 0f67fd04d..000000000 --- a/packages/puppeteer-parse/content-handler/test/data/substack-forwarded-newsletter.html +++ /dev/null @@ -1 +0,0 @@ -


---------- Forwarded message ---------
From: Andrew Methven <slowchinese@substack.com>
Date: Fri, Feb 18, 2022 at 11:57 PM
Subject: Companies that eat people
To: <XXXXXXXXXX@gmail.com>


Slow Chinese 每周漫闻 ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌

Companies that eat people

Slow Chinese 每周漫闻

The phrase, ‘eating people’ (吃人 chī rén), is used to criticise companies in China that exploit their employees.

It’s originally from Lǔ Xùn’s (鲁迅), A Madman's Diary (狂人日记 kuángrén rìjì), published in 1918:

我翻开历史一查,这历史没有年代。歪歪斜斜的每页上都写着“仁义道德”几个字,我横竖睡不着,仔细看了半夜,才从字缝里看出来,满本上都写着两个字“吃人"!

As I look through the pages of history, I see there are no dates. On each page, written messily, are the characters, ‘benevolence and morality’. I can’t sleep. I read into the night. Finally, I find hidden between the characters across the page, the words, ‘eating people’.

The times have changed since Lu Xun made that observation more than 100 years ago, but the culture of ‘eating people’ has not, according to social media comments this week, such as this one:

吃人的事实,从来没有变过,历朝历代都是如此 - The reality of [companies] exploiting their employees is nothing new. It’s been the same throughout history.

Two of China’s biggest tech companies, Tencent and Bilibili, have recently been accused of ‘eating people’, abusing and exploiting their staff.

So that’s what we discuss this week.

  • Conversations worth consuming: interview with Zhāng Yìfēi 张义飞 a former employee of Tencent

  • Words of the week: coverage and social media commentary of a recent death of a Bilibili employee allegedly due to overwork.

The audio version of this newsletter is already live - become a member to access it in your podcast app!

Use this link to claim a one-month free trial of the membership to give the full experience a go:

One-month free trial


1. CONVERSATIONS WITH CONSUMING

腾讯带头“反内卷”:光子工作室拒绝996,保障双休_游戏

Interview with Zhang Yifei

Two weeks ago a 25-year-old programmer at Tencent, Zhāng Yìfēi 张义飞, became an Internet sensation after standing up to his bosses at the company. He announced in an internal group chat that he was quitting his job, which then went viral on social media.

If 20-hour days is what the company wants, he wrote, ‘I’ll resign tomorrow’

36Kr interviewed Zhang this week (in Chinese). He talks more about the overtime culture at Tencent, and why he dared to take on his company in such a public way - he already had another job lined up.

There are some excellent words in his description of life as a working person at Tencent.

Useful words

  • 卡 kǎ - stop, block

    什么时候离职的?有人卡你吗 - When did you leave your job? Did they try to stop you?

  • 剥削 bō xuē - exploit

    加班严重、996工作制、互联网巨头压榨剥削员工等话题再次被拿来讨论

    - Topics such as serious overtime, the 996 work system, and the exploitation of employees by Internet giants are being discussed again.

  • 底气 dǐ qì - confidence, back up

    自己已经提前拿到其他公司的offer,比较有底气 - I already had an offer from another company, so I was relatively confident about doing it.

  • 忌惮 jì dàn - fear, be afraid of

    如果一些互联网大厂因此忌惮、不录用我,我正好也不想去这种加班严重的地方 - If some big Internet companies are afraid to hire me, that’s fine by me. I also don't want to work in a company with such heavy overtime.

  • 手软 shǒu ruǎn - ‘soft hand’, forgiving

    不要特立独行,搞小团体,否则他不会手软 - Do not march to a different beat or form small cliques. He will come down hard on this kind of behaviour.

  • 打硬仗 dǎ yìng zhàng - fight a hard war

    张小龙管理下的企业微信,经常会强调用小而精的团队打硬仗 - The company Wechat, under Zhang Xiaolong’s management, would often emphasise using a small and efficient team to work on tight deadlines.

    • Note: a common phrase used in Chinese companies when a team is working intensely on a project or against a ridiculous deadline.

    • Related: 打胜仗 dǎ shèngzhàng - win a war

  • 喊口号 hǎn kǒuhào - shouting slogans

    但大家普遍的看法是,不想看到空洞地喊口号,只想看到具体行为 - The general view is they don’t want to see people shouting empty slogans. They want to see action.

Idioms

  • 初出茅庐 chūchū máolú - ‘just come out of the thatched cottage’; inexperienced, wet behind the ears

    但对于大众而言,互联网巨头和初出茅庐的应届生,相比较下毕竟力量悬殊 - There’s no comparison between the power of the big internet companies and graduate employees with no experience.

  • 昏昏沉沉 hūnhūn chénchén - feeling sleepy

    来这里入职两个月,感到昏昏沉沉,记忆力下降很多

    - I’ve been here for two months. I feel tired and my memory has declined a lot.

  • 热火朝天 rèhuǒ cháotiān - ‘hot fire face sky’; vigorously, with energy

    到点的时候,差不多一半人还没走,都在热火朝天地讨论工作

    - When it was time to finish at the end of the day, around half of the team stayed behind to talk energetically about their work.


2. WORDS OF THE WEEK

上海之旅-前往Bilibili总部! - 哔哩哔哩

Bilibili eats people

A man who headed a content moderation department at Chinese video-streaming site Bilibili died last week after suffering a cerebral hemorrhage while working a Chinese New Year holiday shift.

The company was heavily criticised (Sohu - in Chinese) of having a toxic work culture.

One of the top comments on social media adapted the line from Lu Xun’s A Madman’s Diary. But instead of looking through the pages of history, overworked netizens find the same message hidden in their payslips:

我翻开工资单一查,这工资单没有工资,歪歪斜斜的每条都写着“迟到扣款”四个字。我横竖睡不着,仔细看了半夜,才从字缝里看出来,满本上都写着两个字“吃人"!

I glance at my payslip. I don’t see any pay on it. All I see are the four characters scrawled across the page: ‘fined for being late’. I can’t sleep. I look at it deep into the night. I finally find hidden between the characters across the page, written the words: ‘eating people’.

The words shared below are from the Sohu article and also from social media comments.

Useful words

  • 猝死 cù sǐ - sudden death, die suddenly

    他2月5日凌晨脑出血猝死 - He died suddenly in the early hours of the morning on 5 February.

  • 嗝屁 gé pì - hiccup, to die

    对大企业嗝屁几个不算啥,对于各个家庭你就是唯一呀 - A few people dying means nothing to a big company, but for a family it’s their only child!

    • Related: 翘辫子 qiào biàn zi - make braids - kick the bucket (Qing Dynasty reference relating to when men had to remove their braids).

      他因为加班严重而翘辫子了 - He died because of too much overtime.

  • 企图 qǐ tú - try to, seek to do something (negative)

    通过各种企图将这件事压下来,我决定发声 - Bilibili attempted to suppress the situation through different means, so I decided to speak up.

    • Note: similar to 试图 shì tú, but more negative connotations

  • 腐朽 fǔ xiǔ - degenerate, rotten

    道出了资本家的腐朽和恶臭 - Reeks of the stench and rot of capitalism.

  • 压垮 yā kuǎ - crush

    就是无情的压榨现有劳动力,能压垮一个是一个,多招一个人算我输 - It’s the callous exploitation of the current employees. The company tries to squeeze as much as possible from each and every one of them rather than hiring one more employee.

Idioms

  • 血汗工厂 xuèhàn gōngchǎng - ‘blood sweat factory’ - sweat shop

    B站因员工猝死一事,被推进了“血汗工厂”的舆论漩涡 - Bilibili has been dragged into a public debate about the company being a ‘sweatshop’ due to the sudden death of an employee.

    • Note: The pronunciation of 血 is normally xiě in colloquial phrases, and xuè in technical terms. But the rule is vague and not very helpful. In this phrase it's always xuè. But in 血汗钱 xiěhàn qián, ‘hard earned money’, xiě is more common. So confusing!

  • 混淆视听 hùnxiáo shìtīng - to muddle or confuse an issue

    晚9到早9确实不属于加班,因为是大夜班的正常时间,大厂就这样混淆视听? - 9pm to 9am does not count as overtime. But that’s because the night shift is a normal working shift for these big tech companies. They are muddling up the matter.

  • 枯燥乏味 kūzào fáwèi - boring

    做审核的确工作强度很大,而且枯燥乏味 - Being a content moderator is a very intense job. It’s also extremely boring.

    • More: 枯燥无味 kūzào wúwèi - boring (same meaning)

  • 恬不知耻 tián bù zhī chǐ - shameless

    觉得正常吗?居然还能如此恬不知耻的说“没有让他加班” - Is this normal? How can they be so shameless in saying the company did not ‘ask him to work overtime’?

  • 难上加难 nán shàng jiā nán - very difficult

    只要企业做大了,普通职工想维权难上加难 - When the company gets big it’s almost impossible for employees to protect their rights.

    • Related: 雪上加霜 xuěshàng jiāshuāng - make matters worse

Colloquial phrases

  • 万变不离其宗 wàn biàn bùlí qízōng - make ten thousand changes but remain the same in essence

    好像这些大公司公关都是万变不离其宗,核心就是推卸责任!- It seems the PR of these big companies tells a nice story, but in essence they don’t change. They are merely avoiding their responsibility.

  • 不见棺材不落泪 bùjiàn guāncai bù luò lèi - won’t cry until they see the coffin

    这也说得出口啊!真是不见血不掉泪啊 - They actually say this? Do they really have to let somebody die before they accept they are in the wrong?

    • More: I wrote more about this colloquialism in SupChina’s phrase of the week.

    • Related: 不到黄河心不死 bù dào huánghé xīn bù sǐ - not to stop until one reaches the Yellow River; refuse to give up until all hope is gone


3. RECOMMENDATIONS

Become a member of the community

As a member of the community you get access to unique resources to help you master modern Mandarin, learn, use, and understand Chinese language the way people speak it today.

  • 📚 Resources: Pleco downloads, word lists, and example sentences print-outs and audio download for each issue.

  • 🔉 Audio: audio version of the newsletter delivered as a member-only podcast every Saturday morning (before the free newsletter is published)

  • 🤓 Archive: full database of all words and phrases in the archive (nearly 1,300!) searchable according to word-type, sector and topic with audio and example sentences for each entry, updated weekly.

Use this link to claim a one-month free trial of the membership to give the full experience a go.

One-month free trial

That’s it for this week.

I look forward to seeing you in your inbox same time next weekend.

Andrew

+++

ps - please do share this newsletter on your social channels and with your networks

Share Slow Chinese 每周漫闻

Like

© 2022 Andrew Methven Unsubscribe
548 Market Street PMB 72296, San Francisco, CA 94104

Publish on Substack

diff --git a/packages/puppeteer-parse/content-handler/test/data/substack-forwarded-welcome-email.html b/packages/puppeteer-parse/content-handler/test/data/substack-forwarded-welcome-email.html deleted file mode 100644 index feb14eeb3..000000000 --- a/packages/puppeteer-parse/content-handler/test/data/substack-forwarded-welcome-email.html +++ /dev/null @@ -1 +0,0 @@ -


---------- Forwarded message ---------
From: Andrew Methven <slowchinese@substack.com>
Date: Thu, Dec 9, 2021 at 11:27 PM
Subject: How can Slow Chinese 每周漫闻 help you?
To: <XXXXXXXXXX@gmail.com>


Thank you for subscribing to for Slow Chinese 每周漫闻 ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌

Thanks so much for subscribing to Slow Chinese 每周漫闻 and welcome aboard!

I’m excited to help you improve and practice your Chinese language skills.

Here’s a quick way I can help:

Reply to this email and tell me about your story of learning Chinese and what challenges you currently have with the language.

I’ll reply with a specific suggestion to help you.

Also, to make sure the next issue of the newsletter doesn’t land in your spam folder, add my email address to your contacts.

Thanks!

Andrew

© 2021 Andrew Methven Unsubscribe
548 Market Street PMB 72296, San Francisco, CA 94104

Publish on Substack

diff --git a/packages/puppeteer-parse/content-handler/test/data/substack-private-forwarded-newsletter.html b/packages/puppeteer-parse/content-handler/test/data/substack-private-forwarded-newsletter.html deleted file mode 100644 index bb240dbe8..000000000 --- a/packages/puppeteer-parse/content-handler/test/data/substack-private-forwarded-newsletter.html +++ /dev/null @@ -1,2 +0,0 @@ -


---------- Forwarded message ---------
From: giggs <darkgiggsxx@gmail.com>
Date: Wed, Mar 2, 2022 at 5:29 PM
Subject: Fwd: The German Retreat From Nuclear Power
To: Radek <radoslaw.jurga@gmail.com>



---------- Forwarded message ---------
De : Bismarck Analysis <bismarck@substack.com>
Date: mer. 2 mars 2022 à 15:02
Subject: The German Retreat From Nuclear Power
To: <darkgiggsxx@gmail.com>


Germany's economic priorities are torn between industrial growth and energy degrowth. As its last nuclear plants are shut down, the choice is between relying on fossil fuels or closing factories. ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌ ‌

The German Retreat From Nuclear Power

Germany's economic priorities are torn between industrial growth and energy degrowth. As its last nuclear plants are shut down, the choice is between relying on fossil fuels or closing factories.

Isar Nuclear Power Plant near Landshut, Germany in 2016. The Isar station is scheduled to be shut down by the end of 2022. Photo by Dennis Hansch. Source.
-
diff --git a/packages/puppeteer-parse/content-handler/test/newsletter.test.ts b/packages/puppeteer-parse/content-handler/test/newsletter.test.ts deleted file mode 100644 index dd3b7941c..000000000 --- a/packages/puppeteer-parse/content-handler/test/newsletter.test.ts +++ /dev/null @@ -1,191 +0,0 @@ -import 'mocha' -import * as chai from 'chai' -import { expect } from 'chai' -import chaiAsPromised from 'chai-as-promised' -import chaiString from 'chai-string' -import { SubstackHandler } from '../src/newsletters/substack-handler' -import { AxiosHandler } from '../src/newsletters/axios-handler' -import { BloombergNewsletterHandler } from '../src/newsletters/bloomberg-newsletter-handler' -import { GolangHandler } from '../src/newsletters/golang-handler' -import { MorningBrewHandler } from '../src/newsletters/morning-brew-handler' -import nock from 'nock' -import { generateUniqueUrl } from '../src/content-handler' -import fs from 'fs' -import { BeehiivHandler } from '../src/newsletters/beehiiv-handler' - -chai.use(chaiAsPromised) -chai.use(chaiString) - -const load = (path: string): string => { - return fs.readFileSync(path, 'utf8') -} - -describe('Newsletter email test', () => { - describe('#getNewsletterUrl()', () => { - it('returns url when email is from SubStack', async () => { - const rawUrl = '' - - await expect( - new SubstackHandler().parseNewsletterUrl(rawUrl, '') - ).to.eventually.equal('https://hongbo130.substack.com/p/tldr') - }) - - it('returns url when email is from Axios', async () => { - const url = 'https://axios.com/blog/the-best-way-to-build-a-web-app' - const html = `View in browser at ${url}` - - await expect( - new AxiosHandler().parseNewsletterUrl('', html) - ).to.eventually.equal(url) - }) - - it('returns url when email is from Bloomberg', async () => { - const url = 'https://www.bloomberg.com/news/google-is-now-a-partner' - const html = ` - - View in browser - - ` - - await expect( - new BloombergNewsletterHandler().parseNewsletterUrl('', html) - ).to.eventually.equal(url) - }) - - it('returns url when email is from Golang Weekly', async () => { - const url = 'https://www.golangweekly.com/first' - const html = ` - Read on the Web - ` - - await expect( - new GolangHandler().parseNewsletterUrl('', html) - ).to.eventually.equal(url) - }) - - it('returns url when email is from Morning Brew', async () => { - const url = 'https://www.morningbrew.com/daily/issues/first' - const html = ` - View Online - ` - - await expect( - new MorningBrewHandler().parseNewsletterUrl('', html) - ).to.eventually.equal(url) - }) - }) - - describe('get author from email address', () => { - it('returns author when email is from Substack', () => { - const from = 'Jackson Harper from Omnivore App ' - expect(new AxiosHandler().parseAuthor(from)).to.equal( - 'Jackson Harper from Omnivore App' - ) - }) - - it('returns author when email is from Axios', () => { - const from = 'Mike Allen ' - expect(new AxiosHandler().parseAuthor(from)).to.equal('Mike Allen') - }) - }) - - describe('isProbablyNewsletter', () => { - it('returns true for substack newsletter', async () => { - const html = load('./test/data/substack-forwarded-newsletter.html') - await expect( - new SubstackHandler().isNewsletter({ - html, - postHeader: '', - from: '', - unSubHeader: '', - }) - ).to.eventually.be.true - }) - it('returns true for private forwarded substack newsletter', async () => { - const html = load( - './test/data/substack-private-forwarded-newsletter.html' - ) - await expect( - new SubstackHandler().isNewsletter({ - html, - postHeader: '', - from: '', - unSubHeader: '', - }) - ).to.eventually.be.true - }) - it('returns false for substack welcome email', async () => { - const html = load('./test/data/substack-forwarded-welcome-email.html') - await expect( - new SubstackHandler().isNewsletter({ - html, - postHeader: '', - from: '', - unSubHeader: '', - }) - ).to.eventually.be.false - }) - it('returns true for beehiiv.com newsletter', async () => { - const html = load('./test/data/beehiiv-newsletter.html') - await expect( - new BeehiivHandler().isNewsletter({ - html, - postHeader: '', - from: '', - unSubHeader: '', - }) - ).to.eventually.be.true - }) - }) - - describe('findNewsletterUrl', async () => { - it('gets the URL from the header if it is a substack newsletter', async () => { - nock('https://email.mg2.substack.com') - .head( - '/c/eJxNkk2TojAQhn-N3KTyQfg4cGDGchdnYcsZx9K5UCE0EMVAkTiKv36iHnarupNUd7rfVJ4W3EDTj1M89No496Uw0wCxgovuwBgYnbOGsZBVjDHzKPWYU8VehUMWOlIX9Qhw4rKLzXgGZziXnRTcyF7dK0iIGMVOG_OS1aTmKPRDilgVhTQUPCQIcE0x-MFTmJ8rCUpA3KtuenR2urg1ZtAzmszI0tq_Z7m66y-ilQo0uAqMTQ7WRX8auJKg56blZg7WB-iHDuYEBzO6NP0R1IwuYFphQbbTjnTH9NBfs80nym4Zyj8uUvyKbtUyGr5eUz9fNDQ7JCxfJDo9dW1lY9lmj_JNivPbGmf2Pt_lN9tDit9b-WeTetni85Z9pDpVOd7L1E_Vy7egayNO23ZP34eSeLJeux1b0rer_xaZ7ykS78nuSjMY-nL98rparNZNcv07JCjN06_EkTFBxBqOUMACErnELUNMSxTUjLDQZwzcqa4bRjCfeejUEFefS224OLr2S5wxPtij7lVrs80d2CNseRV2P52VNFMBipcdVE-U5jkRD7hFAwpGOylVwU2Mfc9qBh7DoR89yVnWXhgQFHnIsbpVb6tU_B-hH_2yzWY' - ) - .reply(302, undefined, { - Location: - 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217', - }) - .get('/p/companies-that-eat-people-217') - .reply(200, '') - const html = load('./test/data/substack-forwarded-newsletter.html') - const url = await new SubstackHandler().findNewsletterUrl(html) - // Not sure if the redirects from substack expire, this test could eventually fail - expect(url).to.startWith( - 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217' - ) - }).timeout(10000) - it('gets the URL from the header if it is a beehiiv newsletter', async () => { - nock('https://u23463625.ct.sendgrid.net') - .head( - '/ss/c/AX1lEgEQaxtvFxLaVo0GBo_geajNrlI1TGeIcmMViR3pL3fEDZnbbkoeKcaY62QZk0KPFudUiUXc_uMLerV4nA/3k5/3TFZmreTR0qKSCgowABnVg/h30/zzLik7UXd1H_n4oyd5W8Xu639AYQQB2UXz-CsssSnno' - ) - .reply(302, undefined, { - Location: 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple', - }) - .get('/p/talked-guy-spent-30m-beeple') - .reply(200, '') - const html = load('./test/data/beehiiv-newsletter.html') - const url = await new BeehiivHandler().findNewsletterUrl(html) - expect(url).to.startWith( - 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple' - ) - }) - it('returns undefined if it is not a newsletter', async () => { - const html = load('./test/data/substack-forwarded-welcome-email.html') - const url = await new SubstackHandler().findNewsletterUrl(html) - expect(url).to.be.undefined - }) - }) - - describe('generateUniqueUrl', () => { - it('generates a unique URL', () => { - const url1 = generateUniqueUrl() - const url2 = generateUniqueUrl() - - expect(url1).to.not.eql(url2) - }) - }) -}) diff --git a/packages/puppeteer-parse/content-handler/test/youtube-handler.test.ts b/packages/puppeteer-parse/content-handler/test/youtube-handler.test.ts deleted file mode 100644 index beb4d3a66..000000000 --- a/packages/puppeteer-parse/content-handler/test/youtube-handler.test.ts +++ /dev/null @@ -1,25 +0,0 @@ -import { expect } from 'chai' -import 'mocha' -import { getYoutubeVideoId } from '../src/websites/youtube-handler' - -describe('getYoutubeVideoId', () => { - it('should parse video id out of a URL', async () => { - expect('BnSUk0je6oo').to.eq( - getYoutubeVideoId('https://www.youtube.com/watch?v=BnSUk0je6oo&t=269s') - ) - expect('vFD2gu007dc').to.eq( - getYoutubeVideoId( - 'https://www.youtube.com/watch?v=vFD2gu007dc&list=RDvFD2gu007dc&start_radio=1' - ) - ) - expect('vFD2gu007dc').to.eq( - getYoutubeVideoId('https://youtu.be/vFD2gu007dc') - ) - expect('BMFVCnbRaV4').to.eq( - getYoutubeVideoId('https://youtube.com/watch?v=BMFVCnbRaV4&feature=share') - ) - expect('cg9b4RC87LI').to.eq( - getYoutubeVideoId('https://youtu.be/cg9b4RC87LI?t=116') - ) - }) -}) diff --git a/packages/puppeteer-parse/content-handler/tsconfig.json b/packages/puppeteer-parse/content-handler/tsconfig.json deleted file mode 100644 index aeb8d2c3a..000000000 --- a/packages/puppeteer-parse/content-handler/tsconfig.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "extends": "@tsconfig/node14/tsconfig.json", - "compilerOptions": { - "rootDir": ".", - "declaration": true, - "outDir": "build", - "lib": ["dom"] - }, - "include": ["src"] -} From 4b01fccad8169138757d6eef03f846960c5b6e8a Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 3 Oct 2022 14:21:31 +0800 Subject: [PATCH 03/54] Fix content-fetch dockerfile --- packages/content-fetch/Dockerfile | 12 ++++++++++-- packages/content-fetch/Dockerfile-local | 12 ++++++++++-- packages/content-handler/package.json | 5 +++++ packages/content-handler/test/newsletter.test.ts | 2 +- yarn.lock | 7 ++++++- 5 files changed, 32 insertions(+), 6 deletions(-) diff --git a/packages/content-fetch/Dockerfile b/packages/content-fetch/Dockerfile index fd025629c..1201c497d 100644 --- a/packages/content-fetch/Dockerfile +++ b/packages/content-fetch/Dockerfile @@ -29,11 +29,19 @@ COPY tsconfig.json . COPY .prettierrc . COPY .eslintrc . -COPY /packages/content-fetch ./packages/content-fetch -COPY /packages/content-handler ./packages/content-handler +COPY /packages/content-handler/package.json ./packages/content-handler/package.json RUN yarn install --pure-lockfile +ADD /packages/content-fetch ./packages/content-fetch +ADD /packages/content-handler ./packages/content-handler +RUN yarn workspace @omnivore/content-handler build + +# After building, fetch the production dependencies +RUN rm -rf /app/packages/content-fetch/node_modules +RUN rm -rf /app/node_modules +RUN yarn install --pure-lockfile --production + EXPOSE 8080 CMD ["yarn", "workspace", "@omnivore/content-fetch", "start"] diff --git a/packages/content-fetch/Dockerfile-local b/packages/content-fetch/Dockerfile-local index 505e4c2da..694ef1a08 100644 --- a/packages/content-fetch/Dockerfile-local +++ b/packages/content-fetch/Dockerfile-local @@ -33,11 +33,19 @@ COPY tsconfig.json . COPY .prettierrc . COPY .eslintrc . -COPY /packages/content-fetch ./packages/content-fetch -COPY /packages/content-handler ./packages/content-handler +COPY /packages/content-handler/package.json ./packages/content-handler/package.json RUN yarn install --pure-lockfile +ADD /packages/content-fetch ./packages/content-fetch +ADD /packages/content-handler ./packages/content-handler +RUN yarn workspace @omnivore/content-handler build + +# After building, fetch the production dependencies +RUN rm -rf /app/packages/content-fetch/node_modules +RUN rm -rf /app/node_modules +RUN yarn install --pure-lockfile --production + EXPOSE 8080 CMD ["yarn", "workspace", "@omnivore/content-fetch", "start"] diff --git a/packages/content-handler/package.json b/packages/content-handler/package.json index e4021b3e4..dc4edb4b1 100644 --- a/packages/content-handler/package.json +++ b/packages/content-handler/package.json @@ -15,6 +15,11 @@ "build": "tsc" }, "devDependencies": { + "@types/addressparser": "^1.0.1", + "@types/luxon": "^3.0.1", + "@types/rfc2047": "^2.0.1", + "@types/underscore": "^1.11.4", + "@types/uuid": "^8.3.4", "chai": "^4.3.6", "chai-as-promised": "^7.1.1", "chai-string": "^1.5.0", diff --git a/packages/content-handler/test/newsletter.test.ts b/packages/content-handler/test/newsletter.test.ts index dd3b7941c..b8748b832 100644 --- a/packages/content-handler/test/newsletter.test.ts +++ b/packages/content-handler/test/newsletter.test.ts @@ -172,7 +172,7 @@ describe('Newsletter email test', () => { expect(url).to.startWith( 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple' ) - }) + }).timeout(10000) it('returns undefined if it is not a newsletter', async () => { const html = load('./test/data/substack-forwarded-welcome-email.html') const url = await new SubstackHandler().findNewsletterUrl(html) diff --git a/yarn.lock b/yarn.lock index 1edbd4c7a..25ddf6de0 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7896,6 +7896,11 @@ resolved "https://registry.yarnpkg.com/@types/luxon/-/luxon-1.27.1.tgz#aceeb2d5be8fccf541237e184e37ecff5faa9096" integrity sha512-cPiXpOvPFDr2edMnOXlz3UBDApwUfR+cpizvxCy0n3vp9bz/qe8BWzHPIEFcy+ogUOyjKuCISgyq77ELZPmkkg== +"@types/luxon@^3.0.1": + version "3.0.1" + resolved "https://registry.yarnpkg.com/@types/luxon/-/luxon-3.0.1.tgz#2b1657096473e24b049bdedf3710f99645f3a17f" + integrity sha512-/LAvk1cMOJt0ghzMFrZEvByUhsiEfeeT2IF53Le+Ki3A538yEL9pRZ7a6MuCxdrYK+YNqNIDmrKU/r2nnw04zQ== + "@types/mdast@^3.0.0": version "3.0.10" resolved "https://registry.yarnpkg.com/@types/mdast/-/mdast-3.0.10.tgz#4724244a82a4598884cbbe9bcfd73dff927ee8af" @@ -8260,7 +8265,7 @@ dependencies: "@types/node" "*" -"@types/uuid@^8.3.0", "@types/uuid@^8.3.1": +"@types/uuid@^8.3.0", "@types/uuid@^8.3.1", "@types/uuid@^8.3.4": version "8.3.4" resolved "https://registry.yarnpkg.com/@types/uuid/-/uuid-8.3.4.tgz#bd86a43617df0594787d38b735f55c805becf1bc" integrity sha512-c/I8ZRb51j+pYGAu5CrFMRxqZ2ke4y2grEBO5AUjgSkSk+qT2Ea+OdWElz/OiMf5MNpn2b17kuVBwZLQJXzihw== From 726df26c150d529601d55bd7157b98745fcd8709 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 3 Oct 2022 15:39:05 +0800 Subject: [PATCH 04/54] Temporarily copy content-handler to cloud functions root dir when doing cloud build --- packages/inbound-email-handler/package.json | 2 +- packages/puppeteer-parse/package.json | 2 +- yarn.lock | 11 +++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/packages/inbound-email-handler/package.json b/packages/inbound-email-handler/package.json index b11c89e31..61173efcb 100644 --- a/packages/inbound-email-handler/package.json +++ b/packages/inbound-email-handler/package.json @@ -29,7 +29,7 @@ "dependencies": { "@google-cloud/functions-framework": "3.1.2", "@google-cloud/pubsub": "^2.18.4", - "@omnivore/content-handler": "file:./../content-handler", + "@omnivore/content-handler": "file:./content-handler", "@sendgrid/client": "^7.6.0", "@sentry/serverless": "^6.16.1", "addressparser": "^1.0.1", diff --git a/packages/puppeteer-parse/package.json b/packages/puppeteer-parse/package.json index 0d1ab90c3..8db80fc53 100644 --- a/packages/puppeteer-parse/package.json +++ b/packages/puppeteer-parse/package.json @@ -6,7 +6,7 @@ "dependencies": { "@google-cloud/logging-winston": "^5.1.1", "@google-cloud/storage": "^5.18.1", - "@omnivore/content-handler": "file:./../content-handler", + "@omnivore/content-handler": "file:./content-handler", "@sentry/serverless": "^6.13.3", "axios": "^0.27.2", "chrome-aws-lambda": "^10.1.0", diff --git a/yarn.lock b/yarn.lock index 25ddf6de0..f60474b2a 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4323,6 +4323,17 @@ underscore "^1.13.6" uuid "^9.0.0" +"@omnivore/content-handler@file:./packages/inbound-email-handler/content-handler": + version "1.0.0" + dependencies: + addressparser "^1.0.1" + axios "^0.27.2" + linkedom "^0.14.16" + luxon "^3.0.4" + rfc2047 "^4.0.1" + underscore "^1.13.6" + uuid "^9.0.0" + "@opentelemetry/api-metrics@0.27.0": version "0.27.0" resolved "https://registry.yarnpkg.com/@opentelemetry/api-metrics/-/api-metrics-0.27.0.tgz#d8eca344ed1155f3ea8a8133ade827b4bb90efbf" From 89a657eec84bde51cb8ed938e03f759c403298c0 Mon Sep 17 00:00:00 2001 From: Satindar Dhillon Date: Mon, 3 Oct 2022 08:09:06 -0700 Subject: [PATCH 05/54] generate swift gql --- .../Services/DataService/GQLSchema.swift | 1267 +++++++++++++++++ 1 file changed, 1267 insertions(+) diff --git a/apple/OmnivoreKit/Sources/Services/DataService/GQLSchema.swift b/apple/OmnivoreKit/Sources/Services/DataService/GQLSchema.swift index f34630594..1876683b6 100644 --- a/apple/OmnivoreKit/Sources/Services/DataService/GQLSchema.swift +++ b/apple/OmnivoreKit/Sources/Services/DataService/GQLSchema.swift @@ -3774,6 +3774,137 @@ extension Selection where TypeLock == Never, Type == Never { typealias DeleteHighlightSuccess = Selection } +extension Objects { + struct DeleteIntegrationError { + let __typename: TypeName = .deleteIntegrationError + let errorCodes: [String: [Enums.DeleteIntegrationErrorCode]] + + enum TypeName: String, Codable { + case deleteIntegrationError = "DeleteIntegrationError" + } + } +} + +extension Objects.DeleteIntegrationError: Decodable { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: DynamicCodingKeys.self) + + var map = HashMap() + for codingKey in container.allKeys { + if codingKey.isTypenameKey { continue } + + let alias = codingKey.stringValue + let field = GraphQLField.getFieldNameFromAlias(alias) + + switch field { + case "errorCodes": + if let value = try container.decode([Enums.DeleteIntegrationErrorCode]?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + default: + throw DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Unknown key \(field)." + ) + ) + } + } + + errorCodes = map["errorCodes"] + } +} + +extension Fields where TypeLock == Objects.DeleteIntegrationError { + func errorCodes() throws -> [Enums.DeleteIntegrationErrorCode] { + let field = GraphQLField.leaf( + name: "errorCodes", + arguments: [] + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.errorCodes[field.alias!] { + return data + } + throw HttpError.badpayload + case .mocking: + return [] + } + } +} + +extension Selection where TypeLock == Never, Type == Never { + typealias DeleteIntegrationError = Selection +} + +extension Objects { + struct DeleteIntegrationSuccess { + let __typename: TypeName = .deleteIntegrationSuccess + let integration: [String: Objects.Integration] + + enum TypeName: String, Codable { + case deleteIntegrationSuccess = "DeleteIntegrationSuccess" + } + } +} + +extension Objects.DeleteIntegrationSuccess: Decodable { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: DynamicCodingKeys.self) + + var map = HashMap() + for codingKey in container.allKeys { + if codingKey.isTypenameKey { continue } + + let alias = codingKey.stringValue + let field = GraphQLField.getFieldNameFromAlias(alias) + + switch field { + case "integration": + if let value = try container.decode(Objects.Integration?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + default: + throw DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Unknown key \(field)." + ) + ) + } + } + + integration = map["integration"] + } +} + +extension Fields where TypeLock == Objects.DeleteIntegrationSuccess { + func integration(selection: Selection) throws -> Type { + let field = GraphQLField.composite( + name: "integration", + arguments: [], + selection: selection.selection + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.integration[field.alias!] { + return try selection.decode(data: data) + } + throw HttpError.badpayload + case .mocking: + return selection.mock() + } + } +} + +extension Selection where TypeLock == Never, Type == Never { + typealias DeleteIntegrationSuccess = Selection +} + extension Objects { struct DeleteLabelError { let __typename: TypeName = .deleteLabelError @@ -6330,6 +6461,322 @@ extension Selection where TypeLock == Never, Type == Never { typealias HighlightStats = Selection } +extension Objects { + struct Integration { + let __typename: TypeName = .integration + let createdAt: [String: DateTime] + let enabled: [String: Bool] + let id: [String: String] + let token: [String: String] + let type: [String: Enums.IntegrationType] + let updatedAt: [String: DateTime] + + enum TypeName: String, Codable { + case integration = "Integration" + } + } +} + +extension Objects.Integration: Decodable { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: DynamicCodingKeys.self) + + var map = HashMap() + for codingKey in container.allKeys { + if codingKey.isTypenameKey { continue } + + let alias = codingKey.stringValue + let field = GraphQLField.getFieldNameFromAlias(alias) + + switch field { + case "createdAt": + if let value = try container.decode(DateTime?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + case "enabled": + if let value = try container.decode(Bool?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + case "id": + if let value = try container.decode(String?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + case "token": + if let value = try container.decode(String?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + case "type": + if let value = try container.decode(Enums.IntegrationType?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + case "updatedAt": + if let value = try container.decode(DateTime?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + default: + throw DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Unknown key \(field)." + ) + ) + } + } + + createdAt = map["createdAt"] + enabled = map["enabled"] + id = map["id"] + token = map["token"] + type = map["type"] + updatedAt = map["updatedAt"] + } +} + +extension Fields where TypeLock == Objects.Integration { + func createdAt() throws -> DateTime { + let field = GraphQLField.leaf( + name: "createdAt", + arguments: [] + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.createdAt[field.alias!] { + return data + } + throw HttpError.badpayload + case .mocking: + return DateTime.mockValue + } + } + + func enabled() throws -> Bool { + let field = GraphQLField.leaf( + name: "enabled", + arguments: [] + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.enabled[field.alias!] { + return data + } + throw HttpError.badpayload + case .mocking: + return Bool.mockValue + } + } + + func id() throws -> String { + let field = GraphQLField.leaf( + name: "id", + arguments: [] + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.id[field.alias!] { + return data + } + throw HttpError.badpayload + case .mocking: + return String.mockValue + } + } + + func token() throws -> String { + let field = GraphQLField.leaf( + name: "token", + arguments: [] + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.token[field.alias!] { + return data + } + throw HttpError.badpayload + case .mocking: + return String.mockValue + } + } + + func type() throws -> Enums.IntegrationType { + let field = GraphQLField.leaf( + name: "type", + arguments: [] + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.type[field.alias!] { + return data + } + throw HttpError.badpayload + case .mocking: + return Enums.IntegrationType.allCases.first! + } + } + + func updatedAt() throws -> DateTime { + let field = GraphQLField.leaf( + name: "updatedAt", + arguments: [] + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.updatedAt[field.alias!] { + return data + } + throw HttpError.badpayload + case .mocking: + return DateTime.mockValue + } + } +} + +extension Selection where TypeLock == Never, Type == Never { + typealias Integration = Selection +} + +extension Objects { + struct IntegrationsError { + let __typename: TypeName = .integrationsError + let errorCodes: [String: [Enums.IntegrationsErrorCode]] + + enum TypeName: String, Codable { + case integrationsError = "IntegrationsError" + } + } +} + +extension Objects.IntegrationsError: Decodable { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: DynamicCodingKeys.self) + + var map = HashMap() + for codingKey in container.allKeys { + if codingKey.isTypenameKey { continue } + + let alias = codingKey.stringValue + let field = GraphQLField.getFieldNameFromAlias(alias) + + switch field { + case "errorCodes": + if let value = try container.decode([Enums.IntegrationsErrorCode]?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + default: + throw DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Unknown key \(field)." + ) + ) + } + } + + errorCodes = map["errorCodes"] + } +} + +extension Fields where TypeLock == Objects.IntegrationsError { + func errorCodes() throws -> [Enums.IntegrationsErrorCode] { + let field = GraphQLField.leaf( + name: "errorCodes", + arguments: [] + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.errorCodes[field.alias!] { + return data + } + throw HttpError.badpayload + case .mocking: + return [] + } + } +} + +extension Selection where TypeLock == Never, Type == Never { + typealias IntegrationsError = Selection +} + +extension Objects { + struct IntegrationsSuccess { + let __typename: TypeName = .integrationsSuccess + let integrations: [String: [Objects.Integration]] + + enum TypeName: String, Codable { + case integrationsSuccess = "IntegrationsSuccess" + } + } +} + +extension Objects.IntegrationsSuccess: Decodable { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: DynamicCodingKeys.self) + + var map = HashMap() + for codingKey in container.allKeys { + if codingKey.isTypenameKey { continue } + + let alias = codingKey.stringValue + let field = GraphQLField.getFieldNameFromAlias(alias) + + switch field { + case "integrations": + if let value = try container.decode([Objects.Integration]?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + default: + throw DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Unknown key \(field)." + ) + ) + } + } + + integrations = map["integrations"] + } +} + +extension Fields where TypeLock == Objects.IntegrationsSuccess { + func integrations(selection: Selection) throws -> Type { + let field = GraphQLField.composite( + name: "integrations", + arguments: [], + selection: selection.selection + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.integrations[field.alias!] { + return try selection.decode(data: data) + } + throw HttpError.badpayload + case .mocking: + return selection.mock() + } + } +} + +extension Selection where TypeLock == Never, Type == Never { + typealias IntegrationsSuccess = Selection +} + extension Objects { struct Label { let __typename: TypeName = .label @@ -6338,6 +6785,7 @@ extension Objects { let description: [String: String] let id: [String: String] let name: [String: String] + let position: [String: Int] enum TypeName: String, Codable { case label = "Label" @@ -6377,6 +6825,10 @@ extension Objects.Label: Decodable { if let value = try container.decode(String?.self, forKey: codingKey) { map.set(key: field, hash: alias, value: value as Any) } + case "position": + if let value = try container.decode(Int?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } default: throw DecodingError.dataCorrupted( DecodingError.Context( @@ -6392,6 +6844,7 @@ extension Objects.Label: Decodable { description = map["description"] id = map["id"] name = map["name"] + position = map["position"] } } @@ -6479,6 +6932,21 @@ extension Fields where TypeLock == Objects.Label { return String.mockValue } } + + func position() throws -> Int? { + let field = GraphQLField.leaf( + name: "position", + arguments: [] + ) + select(field) + + switch response { + case let .decoding(data): + return data.position[field.alias!] + case .mocking: + return nil + } + } } extension Selection where TypeLock == Never, Type == Never { @@ -7501,6 +7969,137 @@ extension Selection where TypeLock == Never, Type == Never { typealias MergeHighlightSuccess = Selection } +extension Objects { + struct MoveLabelError { + let __typename: TypeName = .moveLabelError + let errorCodes: [String: [Enums.MoveLabelErrorCode]] + + enum TypeName: String, Codable { + case moveLabelError = "MoveLabelError" + } + } +} + +extension Objects.MoveLabelError: Decodable { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: DynamicCodingKeys.self) + + var map = HashMap() + for codingKey in container.allKeys { + if codingKey.isTypenameKey { continue } + + let alias = codingKey.stringValue + let field = GraphQLField.getFieldNameFromAlias(alias) + + switch field { + case "errorCodes": + if let value = try container.decode([Enums.MoveLabelErrorCode]?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + default: + throw DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Unknown key \(field)." + ) + ) + } + } + + errorCodes = map["errorCodes"] + } +} + +extension Fields where TypeLock == Objects.MoveLabelError { + func errorCodes() throws -> [Enums.MoveLabelErrorCode] { + let field = GraphQLField.leaf( + name: "errorCodes", + arguments: [] + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.errorCodes[field.alias!] { + return data + } + throw HttpError.badpayload + case .mocking: + return [] + } + } +} + +extension Selection where TypeLock == Never, Type == Never { + typealias MoveLabelError = Selection +} + +extension Objects { + struct MoveLabelSuccess { + let __typename: TypeName = .moveLabelSuccess + let label: [String: Objects.Label] + + enum TypeName: String, Codable { + case moveLabelSuccess = "MoveLabelSuccess" + } + } +} + +extension Objects.MoveLabelSuccess: Decodable { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: DynamicCodingKeys.self) + + var map = HashMap() + for codingKey in container.allKeys { + if codingKey.isTypenameKey { continue } + + let alias = codingKey.stringValue + let field = GraphQLField.getFieldNameFromAlias(alias) + + switch field { + case "label": + if let value = try container.decode(Objects.Label?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + default: + throw DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Unknown key \(field)." + ) + ) + } + } + + label = map["label"] + } +} + +extension Fields where TypeLock == Objects.MoveLabelSuccess { + func label(selection: Selection) throws -> Type { + let field = GraphQLField.composite( + name: "label", + arguments: [], + selection: selection.selection + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.label[field.alias!] { + return try selection.decode(data: data) + } + throw HttpError.badpayload + case .mocking: + return selection.mock() + } + } +} + +extension Selection where TypeLock == Never, Type == Never { + typealias MoveLabelSuccess = Selection +} + extension Objects { struct Mutation { let __typename: TypeName = .mutation @@ -7516,6 +8115,7 @@ extension Objects { let deleteAccount: [String: Unions.DeleteAccountResult] let deleteHighlight: [String: Unions.DeleteHighlightResult] let deleteHighlightReply: [String: Unions.DeleteHighlightReplyResult] + let deleteIntegration: [String: Unions.DeleteIntegrationResult] let deleteLabel: [String: Unions.DeleteLabelResult] let deleteNewsletterEmail: [String: Unions.DeleteNewsletterEmailResult] let deleteReaction: [String: Unions.DeleteReactionResult] @@ -7526,6 +8126,7 @@ extension Objects { let googleSignup: [String: Unions.GoogleSignupResult] let logOut: [String: Unions.LogOutResult] let mergeHighlight: [String: Unions.MergeHighlightResult] + let moveLabel: [String: Unions.MoveLabelResult] let reportItem: [String: Objects.ReportItemResult] let revokeApiKey: [String: Unions.RevokeApiKeyResult] let saveArticleReadingProgress: [String: Unions.SaveArticleReadingProgressResult] @@ -7535,6 +8136,7 @@ extension Objects { let setBookmarkArticle: [String: Unions.SetBookmarkArticleResult] let setDeviceToken: [String: Unions.SetDeviceTokenResult] let setFollow: [String: Unions.SetFollowResult] + let setIntegration: [String: Unions.SetIntegrationResult] let setLabels: [String: Unions.SetLabelsResult] let setLabelsForHighlight: [String: Unions.SetLabelsResult] let setLinkArchived: [String: Unions.ArchiveLinkResult] @@ -7621,6 +8223,10 @@ extension Objects.Mutation: Decodable { if let value = try container.decode(Unions.DeleteHighlightReplyResult?.self, forKey: codingKey) { map.set(key: field, hash: alias, value: value as Any) } + case "deleteIntegration": + if let value = try container.decode(Unions.DeleteIntegrationResult?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } case "deleteLabel": if let value = try container.decode(Unions.DeleteLabelResult?.self, forKey: codingKey) { map.set(key: field, hash: alias, value: value as Any) @@ -7661,6 +8267,10 @@ extension Objects.Mutation: Decodable { if let value = try container.decode(Unions.MergeHighlightResult?.self, forKey: codingKey) { map.set(key: field, hash: alias, value: value as Any) } + case "moveLabel": + if let value = try container.decode(Unions.MoveLabelResult?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } case "reportItem": if let value = try container.decode(Objects.ReportItemResult?.self, forKey: codingKey) { map.set(key: field, hash: alias, value: value as Any) @@ -7697,6 +8307,10 @@ extension Objects.Mutation: Decodable { if let value = try container.decode(Unions.SetFollowResult?.self, forKey: codingKey) { map.set(key: field, hash: alias, value: value as Any) } + case "setIntegration": + if let value = try container.decode(Unions.SetIntegrationResult?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } case "setLabels": if let value = try container.decode(Unions.SetLabelsResult?.self, forKey: codingKey) { map.set(key: field, hash: alias, value: value as Any) @@ -7795,6 +8409,7 @@ extension Objects.Mutation: Decodable { deleteAccount = map["deleteAccount"] deleteHighlight = map["deleteHighlight"] deleteHighlightReply = map["deleteHighlightReply"] + deleteIntegration = map["deleteIntegration"] deleteLabel = map["deleteLabel"] deleteNewsletterEmail = map["deleteNewsletterEmail"] deleteReaction = map["deleteReaction"] @@ -7805,6 +8420,7 @@ extension Objects.Mutation: Decodable { googleSignup = map["googleSignup"] logOut = map["logOut"] mergeHighlight = map["mergeHighlight"] + moveLabel = map["moveLabel"] reportItem = map["reportItem"] revokeApiKey = map["revokeApiKey"] saveArticleReadingProgress = map["saveArticleReadingProgress"] @@ -7814,6 +8430,7 @@ extension Objects.Mutation: Decodable { setBookmarkArticle = map["setBookmarkArticle"] setDeviceToken = map["setDeviceToken"] setFollow = map["setFollow"] + setIntegration = map["setIntegration"] setLabels = map["setLabels"] setLabelsForHighlight = map["setLabelsForHighlight"] setLinkArchived = map["setLinkArchived"] @@ -8065,6 +8682,25 @@ extension Fields where TypeLock == Objects.Mutation { } } + func deleteIntegration(id: String, selection: Selection) throws -> Type { + let field = GraphQLField.composite( + name: "deleteIntegration", + arguments: [Argument(name: "id", type: "ID!", value: id)], + selection: selection.selection + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.deleteIntegration[field.alias!] { + return try selection.decode(data: data) + } + throw HttpError.badpayload + case .mocking: + return selection.mock() + } + } + func deleteLabel(id: String, selection: Selection) throws -> Type { let field = GraphQLField.composite( name: "deleteLabel", @@ -8255,6 +8891,25 @@ extension Fields where TypeLock == Objects.Mutation { } } + func moveLabel(input: InputObjects.MoveLabelInput, selection: Selection) throws -> Type { + let field = GraphQLField.composite( + name: "moveLabel", + arguments: [Argument(name: "input", type: "MoveLabelInput!", value: input)], + selection: selection.selection + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.moveLabel[field.alias!] { + return try selection.decode(data: data) + } + throw HttpError.badpayload + case .mocking: + return selection.mock() + } + } + func reportItem(input: InputObjects.ReportItemInput, selection: Selection) throws -> Type { let field = GraphQLField.composite( name: "reportItem", @@ -8426,6 +9081,25 @@ extension Fields where TypeLock == Objects.Mutation { } } + func setIntegration(input: InputObjects.SetIntegrationInput, selection: Selection) throws -> Type { + let field = GraphQLField.composite( + name: "setIntegration", + arguments: [Argument(name: "input", type: "SetIntegrationInput!", value: input)], + selection: selection.selection + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.setIntegration[field.alias!] { + return try selection.decode(data: data) + } + throw HttpError.badpayload + case .mocking: + return selection.mock() + } + } + func setLabels(input: InputObjects.SetLabelsInput, selection: Selection) throws -> Type { let field = GraphQLField.composite( name: "setLabels", @@ -9720,6 +10394,7 @@ extension Objects { let getFollowing: [String: Unions.GetFollowingResult] let getUserPersonalization: [String: Unions.GetUserPersonalizationResult] let hello: [String: String] + let integrations: [String: Unions.IntegrationsResult] let labels: [String: Unions.LabelsResult] let me: [String: Objects.User] let newsletterEmails: [String: Unions.NewsletterEmailsResult] @@ -9790,6 +10465,10 @@ extension Objects.Query: Decodable { if let value = try container.decode(String?.self, forKey: codingKey) { map.set(key: field, hash: alias, value: value as Any) } + case "integrations": + if let value = try container.decode(Unions.IntegrationsResult?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } case "labels": if let value = try container.decode(Unions.LabelsResult?.self, forKey: codingKey) { map.set(key: field, hash: alias, value: value as Any) @@ -9869,6 +10548,7 @@ extension Objects.Query: Decodable { getFollowing = map["getFollowing"] getUserPersonalization = map["getUserPersonalization"] hello = map["hello"] + integrations = map["integrations"] labels = map["labels"] me = map["me"] newsletterEmails = map["newsletterEmails"] @@ -10055,6 +10735,25 @@ extension Fields where TypeLock == Objects.Query { } } + func integrations(selection: Selection) throws -> Type { + let field = GraphQLField.composite( + name: "integrations", + arguments: [], + selection: selection.selection + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.integrations[field.alias!] { + return try selection.decode(data: data) + } + throw HttpError.badpayload + case .mocking: + return selection.mock() + } + } + func labels(selection: Selection) throws -> Type { let field = GraphQLField.composite( name: "labels", @@ -11491,6 +12190,7 @@ extension Objects { let readingProgressPercent: [String: Double] let savedAt: [String: DateTime] let shortId: [String: String] + let siteIcon: [String: String] let siteName: [String: String] let slug: [String: String] let state: [String: Enums.ArticleSavingRequestStatus] @@ -11608,6 +12308,10 @@ extension Objects.SearchItem: Decodable { if let value = try container.decode(String?.self, forKey: codingKey) { map.set(key: field, hash: alias, value: value as Any) } + case "siteIcon": + if let value = try container.decode(String?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } case "siteName": if let value = try container.decode(String?.self, forKey: codingKey) { map.set(key: field, hash: alias, value: value as Any) @@ -11680,6 +12384,7 @@ extension Objects.SearchItem: Decodable { readingProgressPercent = map["readingProgressPercent"] savedAt = map["savedAt"] shortId = map["shortId"] + siteIcon = map["siteIcon"] siteName = map["siteName"] slug = map["slug"] state = map["state"] @@ -12050,6 +12755,21 @@ extension Fields where TypeLock == Objects.SearchItem { } } + func siteIcon() throws -> String? { + let field = GraphQLField.leaf( + name: "siteIcon", + arguments: [] + ) + select(field) + + switch response { + case let .decoding(data): + return data.siteIcon[field.alias!] + case .mocking: + return nil + } + } + func siteName() throws -> String? { let field = GraphQLField.leaf( name: "siteName", @@ -12918,6 +13638,137 @@ extension Selection where TypeLock == Never, Type == Never { typealias SetFollowSuccess = Selection } +extension Objects { + struct SetIntegrationError { + let __typename: TypeName = .setIntegrationError + let errorCodes: [String: [Enums.SetIntegrationErrorCode]] + + enum TypeName: String, Codable { + case setIntegrationError = "SetIntegrationError" + } + } +} + +extension Objects.SetIntegrationError: Decodable { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: DynamicCodingKeys.self) + + var map = HashMap() + for codingKey in container.allKeys { + if codingKey.isTypenameKey { continue } + + let alias = codingKey.stringValue + let field = GraphQLField.getFieldNameFromAlias(alias) + + switch field { + case "errorCodes": + if let value = try container.decode([Enums.SetIntegrationErrorCode]?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + default: + throw DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Unknown key \(field)." + ) + ) + } + } + + errorCodes = map["errorCodes"] + } +} + +extension Fields where TypeLock == Objects.SetIntegrationError { + func errorCodes() throws -> [Enums.SetIntegrationErrorCode] { + let field = GraphQLField.leaf( + name: "errorCodes", + arguments: [] + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.errorCodes[field.alias!] { + return data + } + throw HttpError.badpayload + case .mocking: + return [] + } + } +} + +extension Selection where TypeLock == Never, Type == Never { + typealias SetIntegrationError = Selection +} + +extension Objects { + struct SetIntegrationSuccess { + let __typename: TypeName = .setIntegrationSuccess + let integration: [String: Objects.Integration] + + enum TypeName: String, Codable { + case setIntegrationSuccess = "SetIntegrationSuccess" + } + } +} + +extension Objects.SetIntegrationSuccess: Decodable { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: DynamicCodingKeys.self) + + var map = HashMap() + for codingKey in container.allKeys { + if codingKey.isTypenameKey { continue } + + let alias = codingKey.stringValue + let field = GraphQLField.getFieldNameFromAlias(alias) + + switch field { + case "integration": + if let value = try container.decode(Objects.Integration?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + default: + throw DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Unknown key \(field)." + ) + ) + } + } + + integration = map["integration"] + } +} + +extension Fields where TypeLock == Objects.SetIntegrationSuccess { + func integration(selection: Selection) throws -> Type { + let field = GraphQLField.composite( + name: "integration", + arguments: [], + selection: selection.selection + ) + select(field) + + switch response { + case let .decoding(data): + if let data = data.integration[field.alias!] { + return try selection.decode(data: data) + } + throw HttpError.badpayload + case .mocking: + return selection.mock() + } + } +} + +extension Selection where TypeLock == Never, Type == Never { + typealias SetIntegrationSuccess = Selection +} + extension Objects { struct SetLabelsError { let __typename: TypeName = .setLabelsError @@ -13996,6 +14847,7 @@ extension Objects { let __typename: TypeName = .subscription let createdAt: [String: DateTime] let description: [String: String] + let icon: [String: String] let id: [String: String] let name: [String: String] let newsletterEmail: [String: String] @@ -14031,6 +14883,10 @@ extension Objects.Subscription: Decodable { if let value = try container.decode(String?.self, forKey: codingKey) { map.set(key: field, hash: alias, value: value as Any) } + case "icon": + if let value = try container.decode(String?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } case "id": if let value = try container.decode(String?.self, forKey: codingKey) { map.set(key: field, hash: alias, value: value as Any) @@ -14075,6 +14931,7 @@ extension Objects.Subscription: Decodable { createdAt = map["createdAt"] description = map["description"] + icon = map["icon"] id = map["id"] name = map["name"] newsletterEmail = map["newsletterEmail"] @@ -14120,6 +14977,21 @@ extension Fields where TypeLock == Objects.Subscription { } } + func icon() throws -> String? { + let field = GraphQLField.leaf( + name: "icon", + arguments: [] + ) + select(field) + + switch response { + case let .decoding(data): + return data.icon[field.alias!] + case .mocking: + return nil + } + } + func id() throws -> String { let field = GraphQLField.leaf( name: "id", @@ -19035,6 +19907,80 @@ extension Selection where TypeLock == Never, Type == Never { typealias DeleteHighlightResult = Selection } +extension Unions { + struct DeleteIntegrationResult { + let __typename: TypeName + let errorCodes: [String: [Enums.DeleteIntegrationErrorCode]] + let integration: [String: Objects.Integration] + + enum TypeName: String, Codable { + case deleteIntegrationError = "DeleteIntegrationError" + case deleteIntegrationSuccess = "DeleteIntegrationSuccess" + } + } +} + +extension Unions.DeleteIntegrationResult: Decodable { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: DynamicCodingKeys.self) + + var map = HashMap() + for codingKey in container.allKeys { + if codingKey.isTypenameKey { continue } + + let alias = codingKey.stringValue + let field = GraphQLField.getFieldNameFromAlias(alias) + + switch field { + case "errorCodes": + if let value = try container.decode([Enums.DeleteIntegrationErrorCode]?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + case "integration": + if let value = try container.decode(Objects.Integration?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + default: + throw DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Unknown key \(field)." + ) + ) + } + } + + __typename = try container.decode(TypeName.self, forKey: DynamicCodingKeys(stringValue: "__typename")!) + + errorCodes = map["errorCodes"] + integration = map["integration"] + } +} + +extension Fields where TypeLock == Unions.DeleteIntegrationResult { + func on(deleteIntegrationError: Selection, deleteIntegrationSuccess: Selection) throws -> Type { + select([GraphQLField.fragment(type: "DeleteIntegrationError", selection: deleteIntegrationError.selection), GraphQLField.fragment(type: "DeleteIntegrationSuccess", selection: deleteIntegrationSuccess.selection)]) + + switch response { + case let .decoding(data): + switch data.__typename { + case .deleteIntegrationError: + let data = Objects.DeleteIntegrationError(errorCodes: data.errorCodes) + return try deleteIntegrationError.decode(data: data) + case .deleteIntegrationSuccess: + let data = Objects.DeleteIntegrationSuccess(integration: data.integration) + return try deleteIntegrationSuccess.decode(data: data) + } + case .mocking: + return deleteIntegrationError.mock() + } + } +} + +extension Selection where TypeLock == Never, Type == Never { + typealias DeleteIntegrationResult = Selection +} + extension Unions { struct DeleteLabelResult { let __typename: TypeName @@ -19855,6 +20801,80 @@ extension Selection where TypeLock == Never, Type == Never { typealias GoogleSignupResult = Selection } +extension Unions { + struct IntegrationsResult { + let __typename: TypeName + let errorCodes: [String: [Enums.IntegrationsErrorCode]] + let integrations: [String: [Objects.Integration]] + + enum TypeName: String, Codable { + case integrationsError = "IntegrationsError" + case integrationsSuccess = "IntegrationsSuccess" + } + } +} + +extension Unions.IntegrationsResult: Decodable { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: DynamicCodingKeys.self) + + var map = HashMap() + for codingKey in container.allKeys { + if codingKey.isTypenameKey { continue } + + let alias = codingKey.stringValue + let field = GraphQLField.getFieldNameFromAlias(alias) + + switch field { + case "errorCodes": + if let value = try container.decode([Enums.IntegrationsErrorCode]?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + case "integrations": + if let value = try container.decode([Objects.Integration]?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + default: + throw DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Unknown key \(field)." + ) + ) + } + } + + __typename = try container.decode(TypeName.self, forKey: DynamicCodingKeys(stringValue: "__typename")!) + + errorCodes = map["errorCodes"] + integrations = map["integrations"] + } +} + +extension Fields where TypeLock == Unions.IntegrationsResult { + func on(integrationsError: Selection, integrationsSuccess: Selection) throws -> Type { + select([GraphQLField.fragment(type: "IntegrationsError", selection: integrationsError.selection), GraphQLField.fragment(type: "IntegrationsSuccess", selection: integrationsSuccess.selection)]) + + switch response { + case let .decoding(data): + switch data.__typename { + case .integrationsError: + let data = Objects.IntegrationsError(errorCodes: data.errorCodes) + return try integrationsError.decode(data: data) + case .integrationsSuccess: + let data = Objects.IntegrationsSuccess(integrations: data.integrations) + return try integrationsSuccess.decode(data: data) + } + case .mocking: + return integrationsError.mock() + } + } +} + +extension Selection where TypeLock == Never, Type == Never { + typealias IntegrationsResult = Selection +} + extension Unions { struct LabelsResult { let __typename: TypeName @@ -20157,6 +21177,80 @@ extension Selection where TypeLock == Never, Type == Never { typealias MergeHighlightResult = Selection } +extension Unions { + struct MoveLabelResult { + let __typename: TypeName + let errorCodes: [String: [Enums.MoveLabelErrorCode]] + let label: [String: Objects.Label] + + enum TypeName: String, Codable { + case moveLabelError = "MoveLabelError" + case moveLabelSuccess = "MoveLabelSuccess" + } + } +} + +extension Unions.MoveLabelResult: Decodable { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: DynamicCodingKeys.self) + + var map = HashMap() + for codingKey in container.allKeys { + if codingKey.isTypenameKey { continue } + + let alias = codingKey.stringValue + let field = GraphQLField.getFieldNameFromAlias(alias) + + switch field { + case "errorCodes": + if let value = try container.decode([Enums.MoveLabelErrorCode]?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + case "label": + if let value = try container.decode(Objects.Label?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + default: + throw DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Unknown key \(field)." + ) + ) + } + } + + __typename = try container.decode(TypeName.self, forKey: DynamicCodingKeys(stringValue: "__typename")!) + + errorCodes = map["errorCodes"] + label = map["label"] + } +} + +extension Fields where TypeLock == Unions.MoveLabelResult { + func on(moveLabelError: Selection, moveLabelSuccess: Selection) throws -> Type { + select([GraphQLField.fragment(type: "MoveLabelError", selection: moveLabelError.selection), GraphQLField.fragment(type: "MoveLabelSuccess", selection: moveLabelSuccess.selection)]) + + switch response { + case let .decoding(data): + switch data.__typename { + case .moveLabelError: + let data = Objects.MoveLabelError(errorCodes: data.errorCodes) + return try moveLabelError.decode(data: data) + case .moveLabelSuccess: + let data = Objects.MoveLabelSuccess(label: data.label) + return try moveLabelSuccess.decode(data: data) + } + case .mocking: + return moveLabelError.mock() + } + } +} + +extension Selection where TypeLock == Never, Type == Never { + typealias MoveLabelResult = Selection +} + extension Unions { struct NewsletterEmailsResult { let __typename: TypeName @@ -20915,6 +22009,80 @@ extension Selection where TypeLock == Never, Type == Never { typealias SetFollowResult = Selection } +extension Unions { + struct SetIntegrationResult { + let __typename: TypeName + let errorCodes: [String: [Enums.SetIntegrationErrorCode]] + let integration: [String: Objects.Integration] + + enum TypeName: String, Codable { + case setIntegrationError = "SetIntegrationError" + case setIntegrationSuccess = "SetIntegrationSuccess" + } + } +} + +extension Unions.SetIntegrationResult: Decodable { + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: DynamicCodingKeys.self) + + var map = HashMap() + for codingKey in container.allKeys { + if codingKey.isTypenameKey { continue } + + let alias = codingKey.stringValue + let field = GraphQLField.getFieldNameFromAlias(alias) + + switch field { + case "errorCodes": + if let value = try container.decode([Enums.SetIntegrationErrorCode]?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + case "integration": + if let value = try container.decode(Objects.Integration?.self, forKey: codingKey) { + map.set(key: field, hash: alias, value: value as Any) + } + default: + throw DecodingError.dataCorrupted( + DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Unknown key \(field)." + ) + ) + } + } + + __typename = try container.decode(TypeName.self, forKey: DynamicCodingKeys(stringValue: "__typename")!) + + errorCodes = map["errorCodes"] + integration = map["integration"] + } +} + +extension Fields where TypeLock == Unions.SetIntegrationResult { + func on(setIntegrationError: Selection, setIntegrationSuccess: Selection) throws -> Type { + select([GraphQLField.fragment(type: "SetIntegrationError", selection: setIntegrationError.selection), GraphQLField.fragment(type: "SetIntegrationSuccess", selection: setIntegrationSuccess.selection)]) + + switch response { + case let .decoding(data): + switch data.__typename { + case .setIntegrationError: + let data = Objects.SetIntegrationError(errorCodes: data.errorCodes) + return try setIntegrationError.decode(data: data) + case .setIntegrationSuccess: + let data = Objects.SetIntegrationSuccess(integration: data.integration) + return try setIntegrationSuccess.decode(data: data) + } + case .mocking: + return setIntegrationError.mock() + } + } +} + +extension Selection where TypeLock == Never, Type == Never { + typealias SetIntegrationResult = Selection +} + extension Unions { struct SetLabelsResult { let __typename: TypeName @@ -23025,6 +24193,17 @@ extension Enums { } } +extension Enums { + /// DeleteIntegrationErrorCode + enum DeleteIntegrationErrorCode: String, CaseIterable, Codable { + case badRequest = "BAD_REQUEST" + + case notFound = "NOT_FOUND" + + case unauthorized = "UNAUTHORIZED" + } +} + extension Enums { /// DeleteLabelErrorCode enum DeleteLabelErrorCode: String, CaseIterable, Codable { @@ -23119,6 +24298,22 @@ extension Enums { } } +extension Enums { + /// IntegrationType + enum IntegrationType: String, CaseIterable, Codable { + case readwise = "READWISE" + } +} + +extension Enums { + /// IntegrationsErrorCode + enum IntegrationsErrorCode: String, CaseIterable, Codable { + case badRequest = "BAD_REQUEST" + + case unauthorized = "UNAUTHORIZED" + } +} + extension Enums { /// LabelsErrorCode enum LabelsErrorCode: String, CaseIterable, Codable { @@ -23169,6 +24364,17 @@ extension Enums { } } +extension Enums { + /// MoveLabelErrorCode + enum MoveLabelErrorCode: String, CaseIterable, Codable { + case badRequest = "BAD_REQUEST" + + case notFound = "NOT_FOUND" + + case unauthorized = "UNAUTHORIZED" + } +} + extension Enums { /// NewsletterEmailsErrorCode enum NewsletterEmailsErrorCode: String, CaseIterable, Codable { @@ -23318,6 +24524,21 @@ extension Enums { } } +extension Enums { + /// SetIntegrationErrorCode + enum SetIntegrationErrorCode: String, CaseIterable, Codable { + case alreadyExists = "ALREADY_EXISTS" + + case badRequest = "BAD_REQUEST" + + case invalidToken = "INVALID_TOKEN" + + case notFound = "NOT_FOUND" + + case unauthorized = "UNAUTHORIZED" + } +} + extension Enums { /// SetLabelsErrorCode enum SetLabelsErrorCode: String, CaseIterable, Codable { @@ -24040,6 +25261,25 @@ extension InputObjects { } } +extension InputObjects { + struct MoveLabelInput: Encodable, Hashable { + var afterLabelId: OptionalArgument = .absent() + + var labelId: String + + func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + if afterLabelId.hasValue { try container.encode(afterLabelId, forKey: .afterLabelId) } + try container.encode(labelId, forKey: .labelId) + } + + enum CodingKeys: String, CodingKey { + case afterLabelId + case labelId + } + } +} + extension InputObjects { struct PageInfoInput: Encodable, Hashable { var author: OptionalArgument = .absent() @@ -24290,6 +25530,33 @@ extension InputObjects { } } +extension InputObjects { + struct SetIntegrationInput: Encodable, Hashable { + var enabled: Bool + + var id: OptionalArgument = .absent() + + var token: String + + var type: Enums.IntegrationType + + func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(enabled, forKey: .enabled) + if id.hasValue { try container.encode(id, forKey: .id) } + try container.encode(token, forKey: .token) + try container.encode(type, forKey: .type) + } + + enum CodingKeys: String, CodingKey { + case enabled + case id + case token + case type + } + } +} + extension InputObjects { struct SetLabelsForHighlightInput: Encodable, Hashable { var highlightId: String From 29b35d6363b08dba5f4c6a196a19f0cef91b5d2c Mon Sep 17 00:00:00 2001 From: Satindar Dhillon Date: Mon, 3 Oct 2022 08:31:50 -0700 Subject: [PATCH 06/54] declare recentSearches as unknown array to avoid type error --- packages/web/components/templates/library/LibrarySearchBar.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/web/components/templates/library/LibrarySearchBar.tsx b/packages/web/components/templates/library/LibrarySearchBar.tsx index a0f243dd2..fb3a499dc 100644 --- a/packages/web/components/templates/library/LibrarySearchBar.tsx +++ b/packages/web/components/templates/library/LibrarySearchBar.tsx @@ -36,7 +36,7 @@ export type LibrarySearchBarProps = { } export function LibrarySearchBar(props: LibrarySearchBarProps): JSX.Element { - const [recentSearches, setRecentSearches] = useState(Array<[]>()) + const [recentSearches, setRecentSearches] = useState(Array()) useEffect(() => { setRecentSearches(Object.values(localStorage)) From 78bce2d1e7984645df0d1f5f05307c2f688a841a Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 4 Oct 2022 09:51:42 +0800 Subject: [PATCH 07/54] Add Dockerfile for cloud run --- packages/inbound-email-handler/Dockerfile | 28 +++++ packages/inbound-email-handler/package.json | 2 +- packages/puppeteer-parse/Dockerfile | 11 +- packages/puppeteer-parse/Dockerfile-preview | 111 ++++++++++++++++++++ packages/puppeteer-parse/package.json | 6 +- yarn.lock | 13 +-- 6 files changed, 152 insertions(+), 19 deletions(-) create mode 100644 packages/inbound-email-handler/Dockerfile create mode 100644 packages/puppeteer-parse/Dockerfile-preview diff --git a/packages/inbound-email-handler/Dockerfile b/packages/inbound-email-handler/Dockerfile new file mode 100644 index 000000000..e6cf56c11 --- /dev/null +++ b/packages/inbound-email-handler/Dockerfile @@ -0,0 +1,28 @@ +FROM node:14.18-alpine + +# Run everything after as non-privileged user. +WORKDIR /app + +COPY package.json . +COPY yarn.lock . +COPY tsconfig.json . +COPY .prettierrc . +COPY .eslintrc . + +COPY /packages/content-handler/package.json ./packages/content-handler/package.json + +RUN yarn install --pure-lockfile + +ADD /packages/inbound-email-handler ./packages/inbound-email-handler +ADD /packages/content-handler ./packages/content-handler +RUN yarn workspace @omnivore/content-handler build + +# After building, fetch the production dependencies +RUN rm -rf /app/packages/inbound-email-handler/node_modules +RUN rm -rf /app/node_modules +RUN yarn install --pure-lockfile --production + +EXPOSE 8080 + +CMD ["yarn", "workspace", "@omnivore/inbound-email-handler", "start"] + diff --git a/packages/inbound-email-handler/package.json b/packages/inbound-email-handler/package.json index 61173efcb..b11c89e31 100644 --- a/packages/inbound-email-handler/package.json +++ b/packages/inbound-email-handler/package.json @@ -29,7 +29,7 @@ "dependencies": { "@google-cloud/functions-framework": "3.1.2", "@google-cloud/pubsub": "^2.18.4", - "@omnivore/content-handler": "file:./content-handler", + "@omnivore/content-handler": "file:./../content-handler", "@sendgrid/client": "^7.6.0", "@sentry/serverless": "^6.16.1", "addressparser": "^1.0.1", diff --git a/packages/puppeteer-parse/Dockerfile b/packages/puppeteer-parse/Dockerfile index d3ce96e20..771aa0f5b 100644 --- a/packages/puppeteer-parse/Dockerfile +++ b/packages/puppeteer-parse/Dockerfile @@ -92,13 +92,20 @@ COPY tsconfig.json . COPY .prettierrc . COPY .eslintrc . -COPY /packages/puppeteer-parse/package.json ./packages/puppeteer-parse/package.json +COPY /packages/content-handler/package.json ./packages/content-handler/package.json RUN yarn install --pure-lockfile ADD /packages/puppeteer-parse ./packages/puppeteer-parse +ADD /packages/content-handler ./packages/content-handler +RUN yarn workspace @omnivore/content-handler build -EXPOSE 8080 +# After building, fetch the production dependencies +RUN rm -rf /app/packages/puppeteer-parse/node_modules +RUN rm -rf /app/node_modules +RUN yarn install --pure-lockfile --production + +EXPOSE 9090 # USER pptruser ENTRYPOINT ["yarn", "workspace", "@omnivore/puppeteer-parse", "start"] diff --git a/packages/puppeteer-parse/Dockerfile-preview b/packages/puppeteer-parse/Dockerfile-preview new file mode 100644 index 000000000..7379c0c31 --- /dev/null +++ b/packages/puppeteer-parse/Dockerfile-preview @@ -0,0 +1,111 @@ +# FROM node:14-slim + +# # Taken from pu + +# # Install latest chrome dev package and fonts to support major charsets (Chinese, Japanese, Arabic, Hebrew, Thai and a few others) +# # Note: this installs the necessary libs to make the bundled version of Chromium that Puppeteer +# # installs, work. +# RUN apt-get update \ +# && apt-get install -y wget gnupg \ +# && wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \ +# && sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \ +# && apt-get update \ +# && apt-get install -y google-chrome-stable fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-freefont-ttf libxss1 \ +# --no-install-recommends \ +# && rm -rf /var/lib/apt/lists/* + +# ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true +# ENV CHROMIUM_PATH "/usr/bin/google-chrome-stable" + +# ------------------------ + +# FROM --platform=linux/arm64 node:14.18 + +# RUN apt-get update \ +# && apt-get install -y chromium \ +# && apt-get install -y ca-certificates \ +# fonts-liberation \ +# libappindicator3-1 \ +# libasound2 \ +# libatk-bridge2.0-0 \ +# libatk1.0-0 \ +# libc6 \ +# libcairo2 \ +# libcups2 \ +# libdbus-1-3 \ +# libexpat1 \ +# libfontconfig1 \ +# libgbm1 \ +# libgcc1 \ +# libglib2.0-0 \ +# libgtk-3-0 \ +# libnspr4 \ +# libnss3 \ +# libpango-1.0-0 \ +# libpangocairo-1.0-0 \ +# libstdc++6 \ +# libx11-6 \ +# libx11-xcb1 \ +# libxcb1 \ +# libxcomposite1 \ +# libxcursor1 \ +# libxdamage1 \ +# libxext6 \ +# libxfixes3 \ +# libxi6 \ +# libxrandr2 \ +# libxrender1 \ +# libxss1 \ +# libxtst6 \ +# lsb-release \ +# wget \ +# xdg-utils + +FROM node:14.18-alpine + +# Installs latest Chromium (92) package. +RUN apk add --no-cache \ + chromium \ + nss \ + freetype \ + harfbuzz \ + ca-certificates \ + ttf-freefont \ + nodejs \ + yarn + +# Add user so we don't need --no-sandbox. +RUN addgroup -S pptruser && adduser -S -g pptruser pptruser \ + && mkdir -p /home/pptruser/Downloads /app \ + && chown -R pptruser:pptruser /home/pptruser \ + && chown -R pptruser:pptruser /app + +# Run everything after as non-privileged user. +WORKDIR /app + +ENV CHROMIUM_PATH /usr/bin/chromium-browser +ENV LAUNCH_HEADLESS=true + +COPY package.json . +COPY yarn.lock . +COPY tsconfig.json . +COPY .prettierrc . +COPY .eslintrc . + +COPY /packages/content-handler/package.json ./packages/content-handler/package.json + +RUN yarn install --pure-lockfile + +ADD /packages/puppeteer-parse ./packages/puppeteer-parse +ADD /packages/content-handler ./packages/content-handler +RUN yarn workspace @omnivore/content-handler build + +# After building, fetch the production dependencies +RUN rm -rf /app/packages/puppeteer-parse/node_modules +RUN rm -rf /app/node_modules +RUN yarn install --pure-lockfile --production + +EXPOSE 8080 + +# USER pptruser +ENTRYPOINT ["yarn", "workspace", "@omnivore/puppeteer-parse", "start_preview"] diff --git a/packages/puppeteer-parse/package.json b/packages/puppeteer-parse/package.json index 8db80fc53..96109b9d1 100644 --- a/packages/puppeteer-parse/package.json +++ b/packages/puppeteer-parse/package.json @@ -4,9 +4,10 @@ "description": "Google Cloud Function that accepts URL of the article and parses its content", "main": "index.js", "dependencies": { + "@google-cloud/functions-framework": "^3.1.2", "@google-cloud/logging-winston": "^5.1.1", "@google-cloud/storage": "^5.18.1", - "@omnivore/content-handler": "file:./content-handler", + "@omnivore/content-handler": "file:./../content-handler", "@sentry/serverless": "^6.13.3", "axios": "^0.27.2", "chrome-aws-lambda": "^10.1.0", @@ -18,9 +19,6 @@ "underscore": "^1.13.4", "winston": "^3.3.3" }, - "devDependencies": { - "@google-cloud/functions-framework": "^3.0.0" - }, "scripts": { "start": "npx functions-framework --port=9090 --target=puppeteer", "start_preview": "npx functions-framework --target=preview", diff --git a/yarn.lock b/yarn.lock index f60474b2a..ed80ba88e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2390,7 +2390,7 @@ google-gax "^2.24.1" protobufjs "^6.8.6" -"@google-cloud/functions-framework@3.1.2", "@google-cloud/functions-framework@^3.0.0": +"@google-cloud/functions-framework@3.1.2", "@google-cloud/functions-framework@^3.1.2": version "3.1.2" resolved "https://registry.yarnpkg.com/@google-cloud/functions-framework/-/functions-framework-3.1.2.tgz#2cd92ce4307bf7f32555d028dca22e398473b410" integrity sha512-pYvEH65/Rqh1JNPdcBmorcV7Xoom2/iOSmbtYza8msro7Inl+qOYxbyMiQfySD2gwAyn38WyWPRqsDRcf/BFLg== @@ -4323,17 +4323,6 @@ underscore "^1.13.6" uuid "^9.0.0" -"@omnivore/content-handler@file:./packages/inbound-email-handler/content-handler": - version "1.0.0" - dependencies: - addressparser "^1.0.1" - axios "^0.27.2" - linkedom "^0.14.16" - luxon "^3.0.4" - rfc2047 "^4.0.1" - underscore "^1.13.6" - uuid "^9.0.0" - "@opentelemetry/api-metrics@0.27.0": version "0.27.0" resolved "https://registry.yarnpkg.com/@opentelemetry/api-metrics/-/api-metrics-0.27.0.tgz#d8eca344ed1155f3ea8a8133ade827b4bb90efbf" From 9cae703666258ac27e2ff3d43e995d4e35b9d14e Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 4 Oct 2022 10:20:13 +0800 Subject: [PATCH 08/54] Fix Dockerfile --- packages/content-fetch/fetch-content.js | 2 +- packages/inbound-email-handler/Dockerfile | 2 + packages/inbound-email-handler/package.json | 1 + packages/inbound-email-handler/tsconfig.json | 2 +- packages/puppeteer-parse/Dockerfile | 1 + packages/puppeteer-parse/Dockerfile-preview | 111 ------------------- packages/puppeteer-parse/index.js | 68 ++++++------ 7 files changed, 40 insertions(+), 147 deletions(-) delete mode 100644 packages/puppeteer-parse/Dockerfile-preview diff --git a/packages/content-fetch/fetch-content.js b/packages/content-fetch/fetch-content.js index 0c3118544..869de8aa2 100644 --- a/packages/content-fetch/fetch-content.js +++ b/packages/content-fetch/fetch-content.js @@ -100,7 +100,7 @@ const getBrowserPromise = (async () => { '--window-size=1920,1080', ].filter((item) => !!item), defaultViewport: { height: 1080, width: 1920 }, - executablePath: process.env.CHROMIUM_PATH , + executablePath: process.env.CHROMIUM_PATH, headless: !!process.env.LAUNCH_HEADLESS, timeout: 120000, // 2 minutes }); diff --git a/packages/inbound-email-handler/Dockerfile b/packages/inbound-email-handler/Dockerfile index e6cf56c11..364c786bc 100644 --- a/packages/inbound-email-handler/Dockerfile +++ b/packages/inbound-email-handler/Dockerfile @@ -9,6 +9,7 @@ COPY tsconfig.json . COPY .prettierrc . COPY .eslintrc . +COPY /packages/inbound-email-handler/package.json ./packages/inbound-email-handler/package.json COPY /packages/content-handler/package.json ./packages/content-handler/package.json RUN yarn install --pure-lockfile @@ -16,6 +17,7 @@ RUN yarn install --pure-lockfile ADD /packages/inbound-email-handler ./packages/inbound-email-handler ADD /packages/content-handler ./packages/content-handler RUN yarn workspace @omnivore/content-handler build +RUN yarn workspace @omnivore/inbound-email-handler build # After building, fetch the production dependencies RUN rm -rf /app/packages/inbound-email-handler/node_modules diff --git a/packages/inbound-email-handler/package.json b/packages/inbound-email-handler/package.json index b11c89e31..0fef8043a 100644 --- a/packages/inbound-email-handler/package.json +++ b/packages/inbound-email-handler/package.json @@ -24,6 +24,7 @@ "@types/json-bigint": "^1.0.1", "@types/node": "^14.11.2", "@types/rfc2047": "^2.0.1", + "chai": "^4.3.6", "eslint-plugin-prettier": "^4.0.0" }, "dependencies": { diff --git a/packages/inbound-email-handler/tsconfig.json b/packages/inbound-email-handler/tsconfig.json index f450acf38..5220d6b3f 100644 --- a/packages/inbound-email-handler/tsconfig.json +++ b/packages/inbound-email-handler/tsconfig.json @@ -5,5 +5,5 @@ "rootDir": ".", "lib": ["dom"] }, - "include": ["src", "test"] + "include": ["src"] } diff --git a/packages/puppeteer-parse/Dockerfile b/packages/puppeteer-parse/Dockerfile index 771aa0f5b..7faae17bf 100644 --- a/packages/puppeteer-parse/Dockerfile +++ b/packages/puppeteer-parse/Dockerfile @@ -92,6 +92,7 @@ COPY tsconfig.json . COPY .prettierrc . COPY .eslintrc . +COPY /packages/puppeteer-parse/package.json ./packages/puppeteer-parse/package.json COPY /packages/content-handler/package.json ./packages/content-handler/package.json RUN yarn install --pure-lockfile diff --git a/packages/puppeteer-parse/Dockerfile-preview b/packages/puppeteer-parse/Dockerfile-preview deleted file mode 100644 index 7379c0c31..000000000 --- a/packages/puppeteer-parse/Dockerfile-preview +++ /dev/null @@ -1,111 +0,0 @@ -# FROM node:14-slim - -# # Taken from pu - -# # Install latest chrome dev package and fonts to support major charsets (Chinese, Japanese, Arabic, Hebrew, Thai and a few others) -# # Note: this installs the necessary libs to make the bundled version of Chromium that Puppeteer -# # installs, work. -# RUN apt-get update \ -# && apt-get install -y wget gnupg \ -# && wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \ -# && sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \ -# && apt-get update \ -# && apt-get install -y google-chrome-stable fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-freefont-ttf libxss1 \ -# --no-install-recommends \ -# && rm -rf /var/lib/apt/lists/* - -# ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true -# ENV CHROMIUM_PATH "/usr/bin/google-chrome-stable" - -# ------------------------ - -# FROM --platform=linux/arm64 node:14.18 - -# RUN apt-get update \ -# && apt-get install -y chromium \ -# && apt-get install -y ca-certificates \ -# fonts-liberation \ -# libappindicator3-1 \ -# libasound2 \ -# libatk-bridge2.0-0 \ -# libatk1.0-0 \ -# libc6 \ -# libcairo2 \ -# libcups2 \ -# libdbus-1-3 \ -# libexpat1 \ -# libfontconfig1 \ -# libgbm1 \ -# libgcc1 \ -# libglib2.0-0 \ -# libgtk-3-0 \ -# libnspr4 \ -# libnss3 \ -# libpango-1.0-0 \ -# libpangocairo-1.0-0 \ -# libstdc++6 \ -# libx11-6 \ -# libx11-xcb1 \ -# libxcb1 \ -# libxcomposite1 \ -# libxcursor1 \ -# libxdamage1 \ -# libxext6 \ -# libxfixes3 \ -# libxi6 \ -# libxrandr2 \ -# libxrender1 \ -# libxss1 \ -# libxtst6 \ -# lsb-release \ -# wget \ -# xdg-utils - -FROM node:14.18-alpine - -# Installs latest Chromium (92) package. -RUN apk add --no-cache \ - chromium \ - nss \ - freetype \ - harfbuzz \ - ca-certificates \ - ttf-freefont \ - nodejs \ - yarn - -# Add user so we don't need --no-sandbox. -RUN addgroup -S pptruser && adduser -S -g pptruser pptruser \ - && mkdir -p /home/pptruser/Downloads /app \ - && chown -R pptruser:pptruser /home/pptruser \ - && chown -R pptruser:pptruser /app - -# Run everything after as non-privileged user. -WORKDIR /app - -ENV CHROMIUM_PATH /usr/bin/chromium-browser -ENV LAUNCH_HEADLESS=true - -COPY package.json . -COPY yarn.lock . -COPY tsconfig.json . -COPY .prettierrc . -COPY .eslintrc . - -COPY /packages/content-handler/package.json ./packages/content-handler/package.json - -RUN yarn install --pure-lockfile - -ADD /packages/puppeteer-parse ./packages/puppeteer-parse -ADD /packages/content-handler ./packages/content-handler -RUN yarn workspace @omnivore/content-handler build - -# After building, fetch the production dependencies -RUN rm -rf /app/packages/puppeteer-parse/node_modules -RUN rm -rf /app/node_modules -RUN yarn install --pure-lockfile --production - -EXPOSE 8080 - -# USER pptruser -ENTRYPOINT ["yarn", "workspace", "@omnivore/puppeteer-parse", "start_preview"] diff --git a/packages/puppeteer-parse/index.js b/packages/puppeteer-parse/index.js index e158f06d7..d3fadf998 100644 --- a/packages/puppeteer-parse/index.js +++ b/packages/puppeteer-parse/index.js @@ -118,43 +118,43 @@ const userAgentForUrl = (url) => { // launch Puppeteer const getBrowserPromise = (async () => { - return puppeteer.launch({ - args: chromium.args, - defaultViewport: chromium.defaultViewport, - executablePath: await chromium.executablePath, - headless: chromium.headless, - ignoreHTTPSErrors: true, - }); // return puppeteer.launch({ - // args: [ - // '--allow-running-insecure-content', - // '--autoplay-policy=user-gesture-required', - // '--disable-component-update', - // '--disable-domain-reliability', - // '--disable-features=AudioServiceOutOfProcess,IsolateOrigins,site-per-process', - // '--disable-print-preview', - // '--disable-setuid-sandbox', - // '--disable-site-isolation-trials', - // '--disable-speech-api', - // '--disable-web-security', - // '--disk-cache-size=33554432', - // '--enable-features=SharedArrayBuffer', - // '--hide-scrollbars', - // '--ignore-gpu-blocklist', - // '--in-process-gpu', - // '--mute-audio', - // '--no-default-browser-check', - // '--no-pings', - // '--no-sandbox', - // '--no-zygote', - // '--use-gl=swiftshader', - // '--window-size=1920,1080', - // ].filter((item) => !!item), - // defaultViewport: { height: 1080, width: 1920 }, + // args: chromium.args, + // defaultViewport: chromium.defaultViewport, // executablePath: process.env.CHROMIUM_PATH, - // headless: !!process.env.LAUNCH_HEADLESS, - // timeout: 0, + // headless: chromium.headless, + // ignoreHTTPSErrors: true, // }); + return puppeteer.launch({ + args: [ + '--allow-running-insecure-content', + '--autoplay-policy=user-gesture-required', + '--disable-component-update', + '--disable-domain-reliability', + '--disable-features=AudioServiceOutOfProcess,IsolateOrigins,site-per-process', + '--disable-print-preview', + '--disable-setuid-sandbox', + '--disable-site-isolation-trials', + '--disable-speech-api', + '--disable-web-security', + '--disk-cache-size=33554432', + '--enable-features=SharedArrayBuffer', + '--hide-scrollbars', + '--ignore-gpu-blocklist', + '--in-process-gpu', + '--mute-audio', + '--no-default-browser-check', + '--no-pings', + '--no-sandbox', + '--no-zygote', + '--use-gl=swiftshader', + '--window-size=1920,1080', + ].filter((item) => !!item), + defaultViewport: { height: 1080, width: 1920 }, + executablePath: process.env.CHROMIUM_PATH, + headless: !!process.env.LAUNCH_HEADLESS, + timeout: 120000, // 2 minutes + }); })(); let logRecord, functionStartTime; From 53d6afe25f921133d2064234d4a7d0944a6e7f8c Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 4 Oct 2022 10:47:58 +0800 Subject: [PATCH 09/54] Fix tests --- packages/content-fetch/package.json | 2 +- packages/inbound-email-handler/package.json | 5 +++-- .../inbound-email-handler/test/newsletter.test.ts | 1 + packages/inbound-email-handler/test/stub.test.ts | 13 ------------- packages/puppeteer-parse/package.json | 2 +- yarn.lock | 11 ----------- 6 files changed, 6 insertions(+), 28 deletions(-) delete mode 100644 packages/inbound-email-handler/test/stub.test.ts diff --git a/packages/content-fetch/package.json b/packages/content-fetch/package.json index 70915c84c..405f1a27c 100644 --- a/packages/content-fetch/package.json +++ b/packages/content-fetch/package.json @@ -4,7 +4,7 @@ "description": "Service that fetches page content from a URL", "main": "index.js", "dependencies": { - "@omnivore/content-handler": "file:./../content-handler", + "@omnivore/content-handler": "1.0.0", "axios": "^0.27.2", "dotenv": "^8.2.0", "express": "^4.17.1", diff --git a/packages/inbound-email-handler/package.json b/packages/inbound-email-handler/package.json index 0fef8043a..9f0ab5f32 100644 --- a/packages/inbound-email-handler/package.json +++ b/packages/inbound-email-handler/package.json @@ -25,12 +25,13 @@ "@types/node": "^14.11.2", "@types/rfc2047": "^2.0.1", "chai": "^4.3.6", - "eslint-plugin-prettier": "^4.0.0" + "eslint-plugin-prettier": "^4.0.0", + "mocha": "^10.0.0" }, "dependencies": { "@google-cloud/functions-framework": "3.1.2", "@google-cloud/pubsub": "^2.18.4", - "@omnivore/content-handler": "file:./../content-handler", + "@omnivore/content-handler": "1.0.0", "@sendgrid/client": "^7.6.0", "@sentry/serverless": "^6.16.1", "addressparser": "^1.0.1", diff --git a/packages/inbound-email-handler/test/newsletter.test.ts b/packages/inbound-email-handler/test/newsletter.test.ts index 253c294ab..369cf9ab5 100644 --- a/packages/inbound-email-handler/test/newsletter.test.ts +++ b/packages/inbound-email-handler/test/newsletter.test.ts @@ -1,3 +1,4 @@ +import 'mocha' import { expect } from 'chai' import { getConfirmationCode, diff --git a/packages/inbound-email-handler/test/stub.test.ts b/packages/inbound-email-handler/test/stub.test.ts deleted file mode 100644 index 173ca4917..000000000 --- a/packages/inbound-email-handler/test/stub.test.ts +++ /dev/null @@ -1,13 +0,0 @@ -import 'mocha' -import * as chai from 'chai' -import { expect } from 'chai' -import 'chai/register-should' -import chaiString from 'chai-string' - -chai.use(chaiString) - -describe('Stub test', () => { - it('should pass', () => { - expect(true).to.be.true - }) -}) diff --git a/packages/puppeteer-parse/package.json b/packages/puppeteer-parse/package.json index 96109b9d1..4ef6bb4d9 100644 --- a/packages/puppeteer-parse/package.json +++ b/packages/puppeteer-parse/package.json @@ -7,7 +7,7 @@ "@google-cloud/functions-framework": "^3.1.2", "@google-cloud/logging-winston": "^5.1.1", "@google-cloud/storage": "^5.18.1", - "@omnivore/content-handler": "file:./../content-handler", + "@omnivore/content-handler": "1.0.0", "@sentry/serverless": "^6.13.3", "axios": "^0.27.2", "chrome-aws-lambda": "^10.1.0", diff --git a/yarn.lock b/yarn.lock index ed80ba88e..fd1769c7e 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4312,17 +4312,6 @@ dependencies: "@octokit/openapi-types" "^9.5.0" -"@omnivore/content-handler@file:./packages/content-handler": - version "1.0.0" - dependencies: - addressparser "^1.0.1" - axios "^0.27.2" - linkedom "^0.14.16" - luxon "^3.0.4" - rfc2047 "^4.0.1" - underscore "^1.13.6" - uuid "^9.0.0" - "@opentelemetry/api-metrics@0.27.0": version "0.27.0" resolved "https://registry.yarnpkg.com/@opentelemetry/api-metrics/-/api-metrics-0.27.0.tgz#d8eca344ed1155f3ea8a8133ade827b4bb90efbf" From 767053e31b6cf0d08ab765cac353bf77cf37fe6d Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 4 Oct 2022 11:27:55 +0800 Subject: [PATCH 10/54] mock redirect request --- .../content-handler/test/newsletter.test.ts | 46 ++++++++++--------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/packages/content-handler/test/newsletter.test.ts b/packages/content-handler/test/newsletter.test.ts index b8748b832..f2b51a124 100644 --- a/packages/content-handler/test/newsletter.test.ts +++ b/packages/content-handler/test/newsletter.test.ts @@ -21,6 +21,29 @@ const load = (path: string): string => { } describe('Newsletter email test', () => { + before(() => { + nock('https://email.mg2.substack.com') + .head( + '/c/eJxNkk2TojAQhn-N3KTyQfg4cGDGchdnYcsZx9K5UCE0EMVAkTiKv36iHnarupNUd7rfVJ4W3EDTj1M89No496Uw0wCxgovuwBgYnbOGsZBVjDHzKPWYU8VehUMWOlIX9Qhw4rKLzXgGZziXnRTcyF7dK0iIGMVOG_OS1aTmKPRDilgVhTQUPCQIcE0x-MFTmJ8rCUpA3KtuenR2urg1ZtAzmszI0tq_Z7m66y-ilQo0uAqMTQ7WRX8auJKg56blZg7WB-iHDuYEBzO6NP0R1IwuYFphQbbTjnTH9NBfs80nym4Zyj8uUvyKbtUyGr5eUz9fNDQ7JCxfJDo9dW1lY9lmj_JNivPbGmf2Pt_lN9tDit9b-WeTetni85Z9pDpVOd7L1E_Vy7egayNO23ZP34eSeLJeux1b0rer_xaZ7ykS78nuSjMY-nL98rparNZNcv07JCjN06_EkTFBxBqOUMACErnELUNMSxTUjLDQZwzcqa4bRjCfeejUEFefS224OLr2S5wxPtij7lVrs80d2CNseRV2P52VNFMBipcdVE-U5jkRD7hFAwpGOylVwU2Mfc9qBh7DoR89yVnWXhgQFHnIsbpVb6tU_B-hH_2yzWY' + ) + .reply(302, undefined, { + Location: + 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217', + }) + .get('/p/companies-that-eat-people-217') + .reply(200, '') + + nock('https://u23463625.ct.sendgrid.net') + .head( + '/ss/c/AX1lEgEQaxtvFxLaVo0GBo_geajNrlI1TGeIcmMViR3pL3fEDZnbbkoeKcaY62QZk0KPFudUiUXc_uMLerV4nA/3k5/3TFZmreTR0qKSCgowABnVg/h30/zzLik7UXd1H_n4oyd5W8Xu639AYQQB2UXz-CsssSnno' + ) + .reply(302, undefined, { + Location: 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple', + }) + .get('/p/talked-guy-spent-30m-beeple') + .reply(200, '') + }) + describe('#getNewsletterUrl()', () => { it('returns url when email is from SubStack', async () => { const rawUrl = '' @@ -140,39 +163,20 @@ describe('Newsletter email test', () => { describe('findNewsletterUrl', async () => { it('gets the URL from the header if it is a substack newsletter', async () => { - nock('https://email.mg2.substack.com') - .head( - '/c/eJxNkk2TojAQhn-N3KTyQfg4cGDGchdnYcsZx9K5UCE0EMVAkTiKv36iHnarupNUd7rfVJ4W3EDTj1M89No496Uw0wCxgovuwBgYnbOGsZBVjDHzKPWYU8VehUMWOlIX9Qhw4rKLzXgGZziXnRTcyF7dK0iIGMVOG_OS1aTmKPRDilgVhTQUPCQIcE0x-MFTmJ8rCUpA3KtuenR2urg1ZtAzmszI0tq_Z7m66y-ilQo0uAqMTQ7WRX8auJKg56blZg7WB-iHDuYEBzO6NP0R1IwuYFphQbbTjnTH9NBfs80nym4Zyj8uUvyKbtUyGr5eUz9fNDQ7JCxfJDo9dW1lY9lmj_JNivPbGmf2Pt_lN9tDit9b-WeTetni85Z9pDpVOd7L1E_Vy7egayNO23ZP34eSeLJeux1b0rer_xaZ7ykS78nuSjMY-nL98rparNZNcv07JCjN06_EkTFBxBqOUMACErnELUNMSxTUjLDQZwzcqa4bRjCfeejUEFefS224OLr2S5wxPtij7lVrs80d2CNseRV2P52VNFMBipcdVE-U5jkRD7hFAwpGOylVwU2Mfc9qBh7DoR89yVnWXhgQFHnIsbpVb6tU_B-hH_2yzWY' - ) - .reply(302, undefined, { - Location: - 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217', - }) - .get('/p/companies-that-eat-people-217') - .reply(200, '') const html = load('./test/data/substack-forwarded-newsletter.html') const url = await new SubstackHandler().findNewsletterUrl(html) // Not sure if the redirects from substack expire, this test could eventually fail expect(url).to.startWith( 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217' ) - }).timeout(10000) + }) it('gets the URL from the header if it is a beehiiv newsletter', async () => { - nock('https://u23463625.ct.sendgrid.net') - .head( - '/ss/c/AX1lEgEQaxtvFxLaVo0GBo_geajNrlI1TGeIcmMViR3pL3fEDZnbbkoeKcaY62QZk0KPFudUiUXc_uMLerV4nA/3k5/3TFZmreTR0qKSCgowABnVg/h30/zzLik7UXd1H_n4oyd5W8Xu639AYQQB2UXz-CsssSnno' - ) - .reply(302, undefined, { - Location: 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple', - }) - .get('/p/talked-guy-spent-30m-beeple') - .reply(200, '') const html = load('./test/data/beehiiv-newsletter.html') const url = await new BeehiivHandler().findNewsletterUrl(html) expect(url).to.startWith( 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple' ) - }).timeout(10000) + }) it('returns undefined if it is not a newsletter', async () => { const html = load('./test/data/substack-forwarded-welcome-email.html') const url = await new SubstackHandler().findNewsletterUrl(html) From cdef752a812a579e231929c69a3a899f1f5b3ddf Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Tue, 4 Oct 2022 13:04:52 +0800 Subject: [PATCH 11/54] Embed the apple id button so it doesnt fail when the apple CDN returns 404 --- .../web/components/templates/LoginForm.tsx | 9 +- .../templates/auth/AppleIdButton.tsx | 117 ++++++++++++++++++ 2 files changed, 119 insertions(+), 7 deletions(-) create mode 100644 packages/web/components/templates/auth/AppleIdButton.tsx diff --git a/packages/web/components/templates/LoginForm.tsx b/packages/web/components/templates/LoginForm.tsx index 566276e80..2068d0c2c 100644 --- a/packages/web/components/templates/LoginForm.tsx +++ b/packages/web/components/templates/LoginForm.tsx @@ -8,6 +8,7 @@ import { appleAuthRedirectURI, } from '../../lib/appConfig' import AppleLogin from 'react-apple-login' +import { AppleIdButton } from './auth/AppleIdButton' const StyledTextSpan = styled('span', StyledText) @@ -90,19 +91,13 @@ export function LoginForm(props: LoginFormProps): JSX.Element { height: '40px', }} > - )} diff --git a/packages/web/components/templates/auth/AppleIdButton.tsx b/packages/web/components/templates/auth/AppleIdButton.tsx new file mode 100644 index 000000000..2275c4323 --- /dev/null +++ b/packages/web/components/templates/auth/AppleIdButton.tsx @@ -0,0 +1,117 @@ +// Based on react-apple-login + +import React from "react"; + +export interface AppleLoginProps { + clientId: string; + redirectURI: string; + scope: string; + state?: string; + responseType?: string | "code" | "id_token"; + responseMode?: string | "query" | "fragment" | "form_post"; + nonce?: string; +} + +export const AppleIdButton = (props: AppleLoginProps) => { + const { + clientId, + redirectURI, + state = "", + responseMode = "query", + responseType = "code", + nonce = "", + scope, + } = props; + + const onClick = async (e: any = null) => { + if (e) { + e.preventDefault(); + } + + let url = new URL(`https://appleid.apple.com/auth/authorize`) + url.searchParams.append('response_type', responseType) + url.searchParams.append('response_mode', responseMode) + url.searchParams.append('client_id', clientId) + url.searchParams.append('redirect_uri', encodeURIComponent(redirectURI)) + url.searchParams.append('state', state) + url.searchParams.append('nonce', nonce) + url.searchParams.append('scope', responseMode === "query" ? "" : scope) + window.location.href = url.toString() + + } + + return ( + <> +
+
+
+ + + + + + +  Continue with Apple + + + +
+
+
` + }} /> +
+ + ) +} From cf582df40617a542e0f2517a81b5d01aa766b1d6 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Tue, 4 Oct 2022 13:10:31 +0800 Subject: [PATCH 12/54] use const instead of let --- packages/web/components/templates/auth/AppleIdButton.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/web/components/templates/auth/AppleIdButton.tsx b/packages/web/components/templates/auth/AppleIdButton.tsx index 2275c4323..dbd610e64 100644 --- a/packages/web/components/templates/auth/AppleIdButton.tsx +++ b/packages/web/components/templates/auth/AppleIdButton.tsx @@ -28,7 +28,7 @@ export const AppleIdButton = (props: AppleLoginProps) => { e.preventDefault(); } - let url = new URL(`https://appleid.apple.com/auth/authorize`) + const url = new URL(`https://appleid.apple.com/auth/authorize`) url.searchParams.append('response_type', responseType) url.searchParams.append('response_mode', responseMode) url.searchParams.append('client_id', clientId) From dd508f2f63f1e89b10ee69ea1c42e332871bf6c7 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Tue, 4 Oct 2022 13:25:22 +0800 Subject: [PATCH 13/54] Dont double encode the redirect uri --- packages/web/components/templates/auth/AppleIdButton.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/web/components/templates/auth/AppleIdButton.tsx b/packages/web/components/templates/auth/AppleIdButton.tsx index dbd610e64..2457baea4 100644 --- a/packages/web/components/templates/auth/AppleIdButton.tsx +++ b/packages/web/components/templates/auth/AppleIdButton.tsx @@ -32,7 +32,7 @@ export const AppleIdButton = (props: AppleLoginProps) => { url.searchParams.append('response_type', responseType) url.searchParams.append('response_mode', responseMode) url.searchParams.append('client_id', clientId) - url.searchParams.append('redirect_uri', encodeURIComponent(redirectURI)) + url.searchParams.append('redirect_uri', redirectURI) url.searchParams.append('state', state) url.searchParams.append('nonce', nonce) url.searchParams.append('scope', responseMode === "query" ? "" : scope) From 8a4777011f2db74eb6ee333ac39531ec15c1a144 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Tue, 4 Oct 2022 13:40:26 +0800 Subject: [PATCH 14/54] Improve readability of dev.to pages --- packages/readabilityjs/Readability.js | 4 +- .../thevaluable.dev/expected-metadata.json | 12 + .../test-pages/thevaluable.dev/expected.html | 421 +++++++ .../test-pages/thevaluable.dev/source.html | 1079 +++++++++++++++++ .../test/test-pages/thevaluable.dev/url.txt | 1 + 5 files changed, 1515 insertions(+), 2 deletions(-) create mode 100644 packages/readabilityjs/test/test-pages/thevaluable.dev/expected-metadata.json create mode 100644 packages/readabilityjs/test/test-pages/thevaluable.dev/expected.html create mode 100644 packages/readabilityjs/test/test-pages/thevaluable.dev/source.html create mode 100644 packages/readabilityjs/test/test-pages/thevaluable.dev/url.txt diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index 4afd4b095..31ce3c9bb 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -171,10 +171,10 @@ Readability.prototype = { // Readability-readerable.js. Please keep both copies in sync. articleNegativeLookBehindCandidates: /breadcrumbs|breadcrumb|utils|trilist/i, articleNegativeLookAheadCandidates: /outstream(.?)_|sub(.?)_|m_|omeda-promo-|in-article-advert|block-ad-.*/i, - unlikelyCandidates: /\bad\b|ai2html|banner|breadcrumbs|breadcrumb|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager(?!ow)|popup|yom-remote|copyright|keywords|outline|infinite-list|beta|recirculation|site-index|hide-for-print|post-end-share-cta|post-end-cta-full|post-footer|main-navigation|programtic-ads|outstream_article|hfeed|comment-holder|back-to-top|show-up-next|onward-journey|topic-tracker|list-nav|block-ad-entity|adSpecs|gift-article-button|modal-title|in-story-masthead|share-tools|standard-dock|expanded-dock|margins-h/i, + unlikelyCandidates: /\bad\b|ai2html|banner|breadcrumbs|breadcrumb|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager(?!ow)|popup|yom-remote|copyright|keywords|outline|infinite-list|beta|recirculation|site-index|hide-for-print|post-end-share-cta|post-end-cta-full|post-footer|post-meta|post-head|main-navigation|programtic-ads|outstream_article|hfeed|comment-holder|back-to-top|show-up-next|onward-journey|topic-tracker|list-nav|block-ad-entity|adSpecs|gift-article-button|modal-title|in-story-masthead|share-tools|standard-dock|expanded-dock|margins-h/i, // okMaybeItsACandidate: /and|article(?!-breadcrumb)|body|column|content|main|shadow|post-header/i, get okMaybeItsACandidate() { - return new RegExp(`and|(? +
+
+
+
+
+
+ + Is inheritance that evil? + +

“You used inheritance in your code! Are you crazy? It’s forbidden! It’s clearly written in the Laws and Mantras of Good Software Practice Everyone Must Follow™ hanged in the toilets!”

+

Your thoughts about the nice Youtube video full of cute dogs you saw yesterday stop abruptly. Anxious, you look on your right: Dave, your colleague developer, is yelling at Davina, your desk neighbor. All three of you are working for the fantastic company MegaCorpMoneyMaker, the famous e-commerce which can sell ice to penguins.

+

“That’s true, Dave, you’re right. I used inheritance”, begins Davina. “No need to scream. You could have written it during the code review.”

+

Dave, disarmed by her calm and her honesty, replies: “I… that’s true but… well… I wanted to make an example! We should ban the Demon of Inheritance from the surface of Earth. It will destroy our codebase, our companies, our jobs, and our lives.” He’s now addressing the whole open-space. “Inheritance is evil! It has always been, and it will always be. Composition will save us all!”.

+

Inheritance is considered as a “pillar of OOP” in many articles, books, and other resources on software development. But, at the same time, many developers, like Dave, will recommend not using it in every possible context. Why is that?

+

We’ll try to answer this question throughout this article. In particular, we’ll see:

+
    +
  • The properties of this concept we call inheritance.
  • +
  • A brief history of inheritance to understand where it comes from.
  • +
  • How powerful single inheritance can be.
  • +
  • Composition versus single inheritance.
  • +
  • The legacy of inheritance.
  • +
  • Concrete use of inheritance.
  • +
  • How modern languages (Rust and Golang) implement inheritance.
  • +
+

The few examples of this article are written in PHP. Don’t worry if you don’t know it, it’s very easy to understand (when you don’t do complicated stuff with it). I don’t follow the Perfect Formatting the PHP Grandmasters follow, so don’t feel sad about that.

+

Dave and Davina are ready. Secure yourself and let’s go!

+

What’s Inheritance?

+

After Dave finishes his speech, Davina begins to explain her point of view.

+

“I don’t think inheritance is bad in every situation.”

+

She pauses, considering. “First, To be sure we understand each other, let’s decompose the concept of inheritance. We might discover some misconceptions and learn from each other.”

+

“Misconceptions…” repeat Dave, doubtful. “It’s perfectly clear for me, but go ahead. Let’s see if you understand it”.

+

Defining Inheritance

+

Davina remembers an interesting definition of inheritance encapsulating some core ideas:

+
+

A class may inherit - use by default - the fields and methods of its superclass. Inheritance is transitive, so a class may inherit from another class which inherits from another class, and so on, up to a base class (typically Object, possibly implicit/absent). Subclasses may override some methods and/or fields to alter the default behavior.

+
+

Let’s decompose this definition:

+
    +
  • + superclass: a class which has at least one subclass. +
  • +
  • + subclass: a class which is a descendant of at least one superclass.
      +
    • It can add new methods or properties, or use methods or properties of the superclass.
    • +
    • It can modify (or override) some methods or properties of the superclass.
    • +
    +
  • +
  • + base class : a superclass which is not a subclass. +
  • +
+

In many common programming languages, inheritance has also these two properties you can use together:

+
    +
  • + Multilevel inheritance: a subclass can be a superclass of other subclass(es). +
  • +
  • + Hierarchical inheritance: a superclass can have more than one subclass. +
  • +
+

Finally, inheritance systems can have one of the following properties but not both:

+
    +
  • + Single inheritance: a subclass can only have one superclass. +
  • +
  • + Multiple inheritance: a subclass can have more than one superclass. +
  • +
+

At that point, both Davina and Dave agree to speak mostly about single inheritance. Most programming languages won’t allow you to use multiple inheritance anyway. More on that later.

+

Since inheritance is a hierarchy of superclasses and subclasses, it’s easier to show them than to describe them. Davina draws with the precision of a beaver and the eye of an eagle the following:

+

+ + + Single, multiple, hierarchical, and multilevel inheritance + + +

+

Inheritance for Code Reuse

+

“What can we already see with what we have here?” asks Davina.

+

You answer this one: “We can see that a given subclass inherit all the properties and behaviors of all of its superclasses. It means that the leaves of our tree (the superclasses without subclasses) concentrate all the behaviors of all their parents”.

+

Imagine if humanity could conceive children with all the knowledge of each of their ancestors. How smart would they be? Well, maybe not as much as you think. The processing power of our brain is limited, that’s why we have difficulty to put in our head all the details of a complex codebase. As a result, it’s likely that our children would be lost in an ocean of knowledge.

+

Similarly, the more levels of hierarchy you have in your inheritance tree, the more complex the class inheriting from all this knowledge will be. We will have difficulties to think and reason about them if we need to maintain or modify them.

+

Inheritance for Specialization

+

For now, we only talked about inheritance as a way to inherit the implementation of superclasses in subclasses. That’s not all: many programming languages allow you to substitute a superclass by one of its subclass thanks to polymorphism. The subclasses can be seen as specialization of their superclasses.

+

This is a very important piece of the inheritance puzzle we’re lying here. Let’s take this exciting example:

+
+
<?php declare(strict_types=1);
+
+class Parser
+{
+    public function count(string $filepath) {
+        printf("I'm counting lines of %s pretty hard!", $filepath);
+    }
+}
+
+class JSONParser extends Parser {
+    public function parse(string $filepath) {
+        printf("I'm parsing some JSON file %s pretty hard!", $filepath);
+    }
+}
+
+class Shipment
+{
+    private Parser $parser;
+
+    public function __construct(Parser $parser) {
+        $this->parser = $parser;
+    }
+
+    public function count(string $filepath) {
+        $this->parser->count($filepath);
+    }
+}
+
+$shipment = new Shipment(new JSONParser());
+$shipment->count("/my/superb/shipment.json");
+
+
+

What’s happening here?

+
    +
  1. We feed to the object Shipment a new JSONParser.
  2. +
  3. Even if Shipment expect an object of type Parser, you can give JSONParser instead because it’s a subclass of Parser. That’s good old polymorphism here!
  4. +
  5. If you run this code, the surprising output “I’m counting lines of /my/superb/shipment.json pretty hard!” is displayed before your amazed eyes. I know, it feels like Christmas.
  6. +
+

Dave, who begins to get bored, shoot: “I know what’s next. You’ll speak about the superclass Animal and its subclasses, Dog and Platypus. I’m not stupid! I’m a Senior Web Developer for 18.3 years now! The CTO told me that I was a Ninja of the Crown this morning and…”.

+

With a gracious and meaningful movement of her left hand, Davina stop Dave in his enthusiastic show-off. Her eyes light up and her calm but determined voice begins to fill the entire open space. You’re still following the conversation, as well as other colleagues who began, curious, to gather around your desks. The tension begins to rise.

+

“Not at all”, begins Davina. “OOP was never meant to represent objects or living creatures surrounding our daily life, like a dog, a car, or a coffee machine. It was invented to solve specific problems linked to software development. In particular, it was designed because we can’t reason about complex systems with our limited brainpower. Inheritance is part of this process of problem solving, and we should analyse it in this context.”

+

She continues. “How many times did you create a Cat class, or a Dog class? You didn’t, not even once. You implemented abstract concepts, like a login or a parser, which have nothing to do with these “real-life” examples. Even if some of your classes are loosely related to real life objects, like a Shipment class, you shouldn’t even use inheritance for those, as we’ll see later.”

+
+

… you should be wary of attaching too much importance to the notion that object-oriented systems are directly deduced from the “real world”.

+
+
+

… we don’t advocate underlining nouns and simplistically modeling things in the real world. […] Their correspondence to real-world things may be tenuous, at best.

+
+

The open space is now silent. Everybody begins to realize the lies which was taught to them all these years.

+

Davina goes on. “These real-life examples confuse you: you have the impress that you understand inheritance because animals and dogs are familiar. It’s only a mere illusion. Inheritance can bring a lot of complexity.”

+

Dave shrugs. Suddenly, Davina stands up, addressing what is now her audience. “Let’s see where inheritance come from and why it was invented. I’ll tell you now The Story of Inheritance.”.

+

You see by the window a lightning streaking across the sky. A storm is coming.

+

A Brief History of Inheritance

+ + Once upon a time in the land of inheritance + +

Inheritance has been a hot subject since the creation of the OOP paradigm itself. The first programming languages implementing it was Simula in the 60s. Simula created also most of the concepts we take for granted in OOP, like classes and objects.

+

The next big step for OOP and inheritance was Smalltalk, a programming language created by Alan Kay and his team at Xerox Park. It’s interesting to note that the first implementation of Smalltalk didn’t include inheritance. From Alan Kay himself:

+
+

I didn’t like the way Simula I or Simula 67 did inheritance (though I thought Nygaard and Dahl were just tremendous thinkers and designers). So I decided to leave out inheritance as a built-in feature until I understood it better.

+
+

Dan Ingalls was part of Alan Kay team and ended up implementing five generations of Smalltalk environments. He liked inheritance, and implemented it in every version of Smalltalk following the first one. It’s where the disagreement with inheritance began in Software Development; this debate continues today.

+

What problem Dan Ingalls tried to solve with inheritance? Code reuse. He wanted a mechanism which could help programmers not repeating the same code in different classes. He wanted a system making the knowledge codified in a codebase more general.

+

Another language designer was heavily influenced by Simula: Bjarne Stroustrup, who created “C with classes”. The goal was to design and reason about complex system more easily. The first implementations of the language were copying many ideas from Simula, including inheritance, while being different from Smalltalk.

+

“C with classes” became C++. As time passed, the language gained a lot of traction: more and more programmers were using it. Smalltalk, after a huge success, began its descent into the Pit of Forgotten Languages. At the end of the 80s, C++ was the only language implementing multiple inheritance, something many believed impossible to achieve.

+

But C++ wasn’t the only attempt to extend C with classes. Apple Computer had its own version called Objective-C. The language was extended by Steve’s Job team at NeXT at the beginning of the 90s, and, among other things, they implemented a construct sharing similar properties with single and multiple inheritance. Similar, but not identical: only the interface was inherited, not the implementation. They called this new construct a protocol.

+

Java, in the middle of the 90s, implemented exactly the same thing, with a different name: interface. I hate this name, it’s too easy to confuse this “interface” with the more general idea of interface (ways from the outside of a construct to act on its inside). That’s why I call this Java “interface” the interface construct in my articles.

+

Davina pauses, drink a bit of water, and adds: “When you read in a random tutorial that a class Dog and a class Platypus inherit from a class Animal, do they speak about code reuse?”

+

You begin to imagine the “code” an Animal could have. Are we in the Matrix?

+

“Not at all. You speak about specialization: a Cat is a specialized form of Animal, although in this context it doesn’t really make sense either. These examples are just lame. Anyway, inheritance is a concept which can bring many powerful features, and that’s its main problem, as we’ll see below. That’s why it was discussed for so long and the concept was ultimately dumbed down”.

+

How Powerful Is Single Inheritance?

+ + Inheritance is as powerful as Hulk! + +

To understand the drawbacks of single inheritance your colleagues, friends, and dogs are complaining about, let’s decompose first what we can precisely do with single inheritance of implementation:

+
    +
  1. You can add some behavior in a subclass.
  2. +
  3. You can override (modify) the behavior of any superclass in a subclass.
  4. +
+

Let’s see what the possible benefits and drawbacks of these two approaches.

+

Adding Behavior In a Subclass

+

Here’s a slightly modified version of our legendary parser:

+
+
<?php declare(strict_types=1);
+
+class Parser
+{
+    public function count(string $filepath) {
+        printf("I'm counting lines of %s pretty hard!", $filepath);
+    }
+}
+
+
+class JSONParser extends Parser {
+    public function parse(string $filepath) {
+        printf("I'm parsing some JSON file %s pretty hard!", $filepath);
+    }
+}
+
+class Shipment
+{
+    private JSONParser $parser;
+
+    public function __construct(JSONParser $parser) {
+        $this->parser = $parser;
+    }
+
+    public function count(string $filepath) {
+        $this->parser->count($filepath);
+    }
+
+    public function import(string $filepath) {
+        $this->parser->parse($filepath);
+    }
+
+}
+
+$shipment = new Shipment(new JSONParser());
+$shipment->count("/my/superb/shipment.json");
+$shipment->import("/my/superb/shipment.json");
+
+
+

The class JSONParser inherit the implementation of Parser and add its own method parse.

+

How our system supports changes in this example? If we modify the behavior of the method count from Parser, every subclass inheriting from Parser will get the change too. In that sense, inheritance breaks encapsulation between the superclasses and their subclasses.

+
+

In languages with inheritance, a data abstraction implementation (i.e., a class) has two kinds of users. There are the “outsiders” who simply use the objects by calling the operations. But in addition there are the “insiders.” These are the subclasses, which are typically permitted to violate encapsulation.

+
+

Let’s imagine that Parser has two subclasses, and these subclasses have two more subclasses. You end up with a hierarchy on 3 levels. It doesn’t seem that much of a stretch, but you still end up with seven classes in total. If you modify the base class Parser, six other classes will be affected!

+

We didn’t create a huge inheritance tree here, but changing a superclass has a rippling effect in the whole hierarchy.

+
+

Hierarchical systems seem to have the property that something considered as an undivided entity on one level, is considered as a composite object on the next lower level of greater detail.

+
+

Don’t get me wrong: it can be beneficial if you want that each change of a superclass affects every subclass at every level below. Actually, it’s the main reason why inheritance was invented at the first place: being able to make the code more general and reusing it easily. But you need to be sure that your classes are very cohesive, that is, the change of any superclass needs to affect every subclass.

+

If your classes are not cohesive, you’ll have all the drawbacks of tight coupled classes in your face: nobody will know if changing a superclass will either cover them with glory and fame or crash the entire system. Your codebase, while growing in complexity, will become impossible to reason about, because you don’t have enough brainpower to build in your head an accurate mental model of all the effects of a change. In short, you’ll end up with one of the problem the OOP paradigm tried to solve at the first place.

+

Overriding Behavior in a Subclass

+

Let’s continue further by adding a new element in our inheritance soup: overriding. Here’s another simple example:

+
+
<?php declare(strict_types=1);
+
+class Parser
+{
+    public function count(string $filepath) {
+        printf("I'm counting lines of %s pretty hard!\n", $filepath);
+    }
+}
+
+
+class JSONParser extends Parser {
+    public function count(string $filepath) {
+        printf("I'm counting JSON objects from file %s pretty hard!\n", $filepath);
+    }
+}
+
+class Shipment
+{
+    private Parser $parser;
+
+    public function __construct(Parser $parser) {
+        $this->parser = $parser;
+    }
+
+    public function count(string $filepath) {
+        $this->parser->count($filepath);
+    }
+}
+
+$shipment = new Shipment(new Parser());
+$shipment->count("/my/superb/shipment");
+
+
+
+

We override here the method count in our subclass JSONParser. Now, our class Shipment will “work” if we pass to our new Shipment both Parser or JSONParser, in the sense that no type error will be thrown.

+

But will it works as intended? What parser to use when we want an instance of Shipment? A Parser? A JSONParser?

+

We could look at the implementation of Shipment, then at the implementation of both JSONParser and Parser to decide what behavior we need depending on the context. But we create objects and abstract behavior not to look at their implementations. It frees us some precious brain power to think about the part of the system we want to modify.

+

Mixing overriding and polymorphism is a recipe for disasters. In an inheritance tree with more classes and more overidding, you need to know what superclass override what behavior, if the subclass override what the superclass overrided, and so on.

+

This confusion between inheritance of implementation and specialization led to the Liskov Substitution Principle (LSP).

+

The Liskov Substitution Principle

+ + The LSP is about substituting a parent class with its base class + +

Barbara Liskov was a researcher who won the Turing Award for her work on abstract data types. When she was asked to talk at the keynote of OOPSLA in 1987, she looked at the papers about inheritance hierarchies and how developers were using them. She was pretty disappointed.

+

This keynote led to the paper Data abstraction and hierarchy. From there, some began to speak about a “Liskov Substitution Principle”, often quoting the following:

+
+

If for each object o1 of type S there is an object o2 of type T such that for all programs P defined in terms of T, the behavior of P is unchanged when o1 is substituted for o2, then S is a subtype of T.

+
+

Who doesn’t like good old academic writing full of S, T, o2, and friends? Here’s a clearer way to define the same idea:

+
+

Objects of subtypes should behave like those of supertypes if used via supertype methods.

+
+

It’s with this paper that the concept of subtyping was born.

+

This solves the problem of substitution of subclasses. According to Liskov, if you want to use polymorphism with inheritance, you need to have proper subtyping. How? By following this rule: when you substitute one superclass by its subclass, the behavior of the whole system should be unchanged.

+

Said differently: don’t override anything.

+

With this definition of the LSP, and to come back to our example above, there won’t be any doubt about the consequences substituting Parser by its subclass JSONParser. We are sure it won’t have unexpected results because JSONParser doesn’t override any behavior of its superclass. Said differently, you would always use the same method parse from Parser whatever the subclass of Parser you use.

+

But between Barbara Liskov’s first statement and now, the LSP changed. From “the behavior should stay the same”, we know think that “the behavior shouldn’t break the application”. This last definition is more ambiguous: how do we know that our system still behave correctly? What does it mean? Do we have every possible tests to ensure that it’s the case? If our system doesn’t break but doesn’t follow the specifications either, is it a violation of the LSP?

+

“But wait!” interrupts suddenly Dave. “This is not the Liskov Substitution Principle! This is not how it’s defined in the Holy SOLID Principles!”

+

Davina sigh. “The SOLID principles should be the D principle. The last one is the only one we can still save. The others are misinterpretations of important ideas when they’re not bad ideas. The LSP is a misinterpretation: the definition given by Robert Martin has not much to do with the definition given by Barbara Liskov.”.

+

Let’s look at Martin definition of the LSP:

+
+

All implementations of interfaces are subtypes of an interface.

+
+

From the example above, the class JSONParser implements the same interface as Parser, but one method count count the number of lines and the other count the number of JSON objects. Interface substitution is not what Barbara Liskov was speaking about, and it won’t save your codebase if you try to mix specialization and overriding.

+

Often, developers don’t like strong behavioral subtyping as Barbara Liskov defined it. That’s why the principle was transformed over time. Often, inheritance is used to override the implementation of a superclass. On that regard, it’s interesting to note that inheritance is only interesting for Liskov in the context of subtyping; she doesn’t see any value to use it for inheriting implementation.

+

Why? Because inheritance is not the only solution for code reuse. Many prefer using composition.

+

Composition vs Single Inheritance

+

While the open space is still silent, you begin to feel the tension dropping. The magic word has been pronounced: composition. While inheritance is a demon which tries to eat companies and their employees, composition is the solution to every possible disaster.

+

Composition, Delegation, or Aggregation?

+

“Are we speaking about composition, delegation, or aggregation here?” asks Dave. “What’s the difference?” ask another colleague.

+

Dave, with a smile, begins his explanation: “Look at the examples we were speaking about. A JSONParser is a parser, so inheritance makes sense in that case. But, for example, a Shipment has a parser, that’s why we used composition”.

+

You intervene: “does it make sense to say that a shipment has a parser? No, here we’re speaking about delegation: the shipment use a parser, it delegates a task to a parser object. That’s all.”

+

“Really?” begins another colleague. “Are you sure a JSON parser is a parser, or does it has the behavior of a parser?” Another colleague takes part of the conversation: “No! Our Shipment own a Parser, so we’re speaking about aggregation here!”

+

Outside, the thunder growl again. Everybody begins to speak at the same time, throwing at each other is-a, has-a, part-of, add-to, and other pair of very short words you can link with a hyphen.

+

Davina listens to the conversation carefully, and when everybody calms down, she gives her opinion: “I see these ‘is-a’ or ‘has-a’ tricks all over the Internet. I also saw many developers defining different flavors of composition, delegation, aggregation, and whatnot. At the end, our problem are often so specific they don’t fit any of these definitions. They are useless in practice. Don’t use them”.

+

She continues. “Using natural language tricks (like is-a or has-a) to decide what solution we should apply to a problem is ambiguous, as we just witnessed. It’s one of the reason why Mathematical notation was invented at the first place: to avoid the ambiguity of natural language. My advice: don’t use these tricks to decide what solution you should use.”

+

To understand what Davina is speaking about, let’s get back to our class Shipment:

+
+
<?php
+class Shipment
+{
+    private JSONParser $parser;
+
+    public function __construct(JSONParser $parser)
+    {
+        $this->parser = $parser;
+    }
+
+    public function import(string $filepath)
+    {
+        $this->parser->parse($filepath);
+    }
+}
+
+
+

When we want to create an instance of Shipment, we need to inject an object of type JSONParser. It doesn’t matter if it’s called aggregation, composition, or delegation. At the end, it boils down to the same simple mechanism: injecting an object into another one.

+

Is Composition Better Than Inheritance?

+

We all know the Mantra of Composition, the one which will bless your codebase with the benediction of The Hasa and the Partof Gods. If you don’t know it yet, I’m sure you’ll hear it a good hundred of times in your career:

+
+

Favor object composition over class inheritance.

+
+

This is from the book Design Patterns, written by the Gang of Four. With a name like that, I’m not sure if they were trying to force some general-but-specific (admire the paradox) solutions to our poor codebases or if their real goals were to create the mafia of software developers. One thing is certain: this book gave to beginners the perfect pretext to show how smart they are by instantly changing a healthy codebase into a legacy mess full of Singleton and Abstract Factories.

+

Like many, I’m no innocent: I’ve chanted the Mantra of Composition for years, without looking at the Mantra in its context. But context is important.

+

So, what our godfathers Gang of Four are saying just after enlightening the world with their Mantra?

+
+

You should be able to get all the functionality you need just by assembling existing components through object composition. But this is rarely the case, because the set of available components is never quite rich enough in practice. Reuse by inheritance makes it easier to make new components that can be composed with old ones. Inheritance and object composition thus work together.

+
+

According to this book, we should favor composition not because inheritance is evil, but because nobody uses it correctly. Well, hopefully we understand it better now.

+

The Benefits of Composition

+

Composition is very useful indeed. Let’s imagine that we inject object A into object B. Here are the benefits:

+
    +
  • If Object A is properly encapsulated, nobody will destroy anything by changing object’s A inner implementation. Object B can suffer the change, but nothing else down the road.
  • +
  • You can use only part of the object’s implementation. Object B doesn’t automatically inherit from the whole object A.
  • +
+

To get back to our Shipment example, this means that the object JSONParser we inject is tightly coupled to the class Shipment, but this coupling stop there. If you instantiate Shipment later and you change the implementation of JSONParser, the class Shipment might need to change, and that’s all.

+

As we saw with inheritance, the problem of tight coupling (or the benefit of cohesion) will affect every layer down the inheritance tree.

+

The Drawbacks of Composition

+

Composition is not the best solution when you want to use many objects or objects with a lot of behavior.

+

Let’s say that you want to use 10 methods from 3 different objects and you want to add some implementation on top: you’ll need to inject your 3 objects, create 10 methods in your new class wrapping the 10 methods of the objects injected, and add more methods to take care of your new functionality.

+

“But I could directly call the object JSONParser from an instance of Shipment”, cut Dave.

+

“That’s true, answers Davina. But it would break the encapsulation of our objects Shipment in that case. It means that everything using the object Shipment would be tightly coupled to the object JSONParser. Encapsulation is broken.”

+

Composition doesn’t bring you the benefits of subtyping either. When you inject an object to a class, you’ll need to inject a precise object if your language has some sort of type checking, and nothing else. On that regards, it constrains you (which can be a good thing!). If your language doesn’t have any type checking and you can just give any object to the constructor of your class, it doesn’t mean that it will work as intended. The problem stays the same.

+

At least, an inheritance hierarchy can indicate what object you can use instead of another and, if it follows a strict form of LSP, nothing should break.

+

“That’s wrong!”, shoot Dave, suddenly. “What about the interface construct? I love these, and you can do some good polymorphism with them without using the Demon of Inheritance!”

+

“You’re right, answers Davina. But using interface constructs is only using another form of inheritance.”

+

Single and Multiple Inheritance Dumbed Down

+ + Over the years, inheritance has seen less powerful implementations. + +

As we saw, inheritance is very powerful, because you can mix reuse of implementation and subtyping in a hierarchy tree as deep as you want it to. This power is its biggest problem: many developers, not knowing all the implications we saw above, have a tendency to misuse inheritance, tightly coupling everything in huge inheritance hierarchies, which led to the Mantra of Composition. That’s why many gave up on single inheritance.

+

But multiple inheritance is even more flawed: the possibility for a subclass to have more than one superclass is making everything very ambiguous. As an example, you can look at the diamond problem. Additionally, multiple inheritance is very complex to implement in a programming language.

+

That’s why the designers of Objective-C and Java restricted inheritance with the protocol and the interface construct respectively. The benefits?

+
    +
  1. A class is forced to use the interface given by a protocol, which guarantee that each subtype has the same interface (but doesn’t guarantee that the substitution will work!).
  2. +
  3. There is no multilevel inheritance anymore.
  4. +
  5. No more inheritance of implementation: the protocol is only an interface.
  6. +
  7. Multiple inheritance of interfaces is much easier to manage than multiple inheritance of implementations.
  8. +
+
+

Please note that even Java includes a limited form of multiple inheritance: inheritance of interfaces.”

+
+

Concrete Use of Inheritance

+

We saw already some potential use of inheritance, but can we be more concrete? Over the years I’ve come up with this set of rules:

+
    +
  1. Never use inheritance for classes which are related to the business domain.
  2. +
  3. Sometimes use inheritance for reusing implementation of classes bringing some mechanics.
  4. +
  5. If you use subtypes, always try to respect the original strictness of LSP as much as possible.
  6. +
  7. If you use the interface constructs for subtyping, keep in mind that the implementation of the interface can still break everything.
  8. +
+

These rules are from my experience. Don’t use them as Mantras working in every situation. We should experiment carefully with them and use our brain to see if the technical solutions fit the problem you have.

+

Inheritance and Domain Objects

+

Davina explain further: “When I speak about domain objects, I mean all the objects which are related to the business we work for. In our present case, in MegaCorpMoneyMaker, it would be classes like Shipment, Order, or Product.”

+

Introducing any form of hard coupling or premature abstractions with these objects is always dangerous. They are the representation of real world constructs, and since the real world change in unpredictable manners, these objects will change in unpredictable manners too. Keep them isolated as much as possible from the mechanical parts of your system.

+

Inheritance and Mechanics

+

You ask Davina: “what do you mean by mechanics”?

+

“These classes are everything which are not domain objects. For example, our classes to parse files represent some mechanisms: they don’t represent anything from our business, they’re just general constructs to parse some files. Objects of this sort are often more general and can be applied in many more contexts than our precise business domain.”

+

For example, it’s not very likely that the world will come up tomorrow with a different definition of stacks. That’s why the object Stack won’t need many changes overtime.

+

Anything representing mathematical constructs are good examples too. After all, Mathematics try to be as disconnected as possible from the real world. It’s when you try to use mathematical concepts on the real world that everything begins to break. That’s what we call applied Mathematics.

+

If we think about it, inheritance create a hierarchy where its elements are not encapsulated with each others, but the hierarchy itself is encapsulated from its outside. We create a new construct doing so, an aggregation of objects. In that case, inheritance can be useful if you have to codify a general and coherent set of ideas where the properties and behaviors of the different objects will rarely change, or when the whole hierarchy needs to change when one of its member change predictably.

+

Modern Languages and Inheritance

+

Modern languages often take the decision to implement inheritance differently from “older” languages like Java, Python, Ruby, or PHP. They try hard to differentiate subtyping and inheritance of implementation. For example, in Rust’s documentation:

+
+

If a language must have inheritance to be an object-oriented language, then Rust is not one. There is no way to define a struct that inherits the parent struct’s fields and method implementations.

+
+

But Rust implement some form of inheritance I didn’t cover here: traits. If you need some polymorphism, Rust give you generic programming like many other languages.

+

Another example: Golang. You can’t do any inheritance of implementation, only composition is allowed. You can also use interface constructs if you want some inheritance of interface.

+ + Inheritance is not evil, it's just too powerful. + +

What did we see in this article?

+
    +
  • Inheritance create a hierarchical construct composed of superclasses, subclasses, and base classes.
  • +
  • Many common programming languages allow us to create a tree with infinite depth (multilevel inheritance) and width (hierarchical inheritance).
  • +
  • Your system can become hard to maintain if you mix two of the Three Power Gems of Inheritance in the same soup:
      +
    • Inheritance of implementation.
    • +
    • Substituting superclasses with their subclasses (subtyping if it follows some rules).
    • +
    • Multilevel inheritance.
    • +
    +
  • +
  • Most of the time, composition seems to be the best alternative to inheritance of implementation.
  • +
  • Everything is tightly coupled in an inheritance hierarchy, but this blurb of classes is still encapsulated from the outside.
  • +
  • Inheritance was a crude concept defined at the beginning of OOP and later refined with, for example, the interface construct.
  • +
  • Inheritance can be useful for the part of your system which won’t change too much (mechanical part).
  • +
+

If you need to retain one thing from all of that: don’t use DRY, or inheritance, or composition before you understand clearly what’s the problem you’re trying to solve and its context. These concepts should be used when you refactor you code; consider the first writing as a messy draft and, in that spirit, defer all the important decisions making your design hard to change as much as you can.

+

What should be together and what should not (cohesion) is one of these important decision. What should be under a layer of indirection using interface constructs is another.

+

If you think it’s a good idea to use an inheritance hierarchy, begin with a small one and see how it behaves in your system overtime.

+

Davina concludes:

+

“The concept of inheritance was refined over the years and gave us the constructs we use today, like the interface construct. In that sense, inheritance is definitely a pillar of the OOP paradigm. But it’s true that mixing features which are not necessarily orthogonal make inheritance difficult to harness in many programming languages.”

+

Everybody is silent now. The storm outside stopped. Dave is thinking hard, like everybody in the open space of MegaCorpMoneyMaker.

+

At the end, you should always read the documentation of the programming languages you’re using to see exactly how they implement inheritance. You’ll now be able to guess why the language’s designers made their decisions and how you can use their implementation of inheritance effectively.

+
+
+
+
+
\ No newline at end of file diff --git a/packages/readabilityjs/test/test-pages/thevaluable.dev/source.html b/packages/readabilityjs/test/test-pages/thevaluable.dev/source.html new file mode 100644 index 000000000..8d30838b1 --- /dev/null +++ b/packages/readabilityjs/test/test-pages/thevaluable.dev/source.html @@ -0,0 +1,1079 @@ + + + + + + + + + + + + + + + + + + + + Is Inheritance That Evil? + + + + + + + + + + + + + + + + + + +
+ +
+ The Valuable Dev + + +
+ +
+
+
+ +

+ Is Inheritance That Evil? +

+
+
+ Is inheritance that evil? +

+ “You used inheritance in your code! Are you crazy? It’s forbidden! It’s clearly written in the Laws and Mantras of Good Software Practice Everyone Must Follow™ hanged in the toilets!” +

+

+ Your thoughts about the nice Youtube video full of cute dogs you saw yesterday stop abruptly. Anxious, you look on your right: Dave, your colleague developer, is yelling at Davina, your desk neighbor. All three of you are working for the fantastic company MegaCorpMoneyMaker, the famous e-commerce which can sell ice to penguins. +

+

+ “That’s true, Dave, you’re right. I used inheritance”, begins Davina. “No need to scream. You could have written it during the code review.” +

+

+ Dave, disarmed by her calm and her honesty, replies: “I… that’s true but… well… I wanted to make an example! We should ban the Demon of Inheritance from the surface of Earth. It will destroy our codebase, our companies, our jobs, and our lives.” He’s now addressing the whole open-space. “Inheritance is evil! It has always been, and it will always be. Composition will save us all!”. +

+

+ Inheritance is considered as a “pillar of OOP” in many articles, books, and other resources on software development. But, at the same time, many developers, like Dave, will recommend not using it in every possible context. Why is that? +

+

+ We’ll try to answer this question throughout this article. In particular, we’ll see: +

+
    +
  • The properties of this concept we call inheritance. +
  • +
  • A brief history of inheritance to understand where it comes from. +
  • +
  • How powerful single inheritance can be. +
  • +
  • Composition versus single inheritance. +
  • +
  • The legacy of inheritance. +
  • +
  • Concrete use of inheritance. +
  • +
  • How modern languages (Rust and Golang) implement inheritance. +
  • +
+

+ The few examples of this article are written in PHP. Don’t worry if you don’t know it, it’s very easy to understand (when you don’t do complicated stuff with it). I don’t follow the Perfect Formatting the PHP Grandmasters follow, so don’t feel sad about that. +

+

+ Dave and Davina are ready. Secure yourself and let’s go! +

+

+ What’s Inheritance? +

+

+ After Dave finishes his speech, Davina begins to explain her point of view. +

+

+ “I don’t think inheritance is bad in every situation.” +

+

+ She pauses, considering. “First, To be sure we understand each other, let’s decompose the concept of inheritance. We might discover some misconceptions and learn from each other.” +

+

+ “Misconceptions…” repeat Dave, doubtful. “It’s perfectly clear for me, but go ahead. Let’s see if you understand it”. +

+

+ Defining Inheritance +

+

+ Davina remembers an interesting definition of inheritance encapsulating some core ideas: +

+
+

+ A class may inherit - use by default - the fields and methods of its superclass. Inheritance is transitive, so a class may inherit from another class which inherits from another class, and so on, up to a base class (typically Object, possibly implicit/absent). Subclasses may override some methods and/or fields to alter the default behavior. +

+ +
+

+ Let’s decompose this definition: +

+
    +
  • + superclass: a class which has at least one subclass. +
  • +
  • + subclass: a class which is a descendant of at least one superclass. +
      +
    • It can add new methods or properties, or use methods or properties of the superclass. +
    • +
    • It can modify (or override) some methods or properties of the superclass. +
    • +
    +
  • +
  • + base class : a superclass which is not a subclass. +
  • +
+

+ In many common programming languages, inheritance has also these two properties you can use together: +

+
    +
  • + Multilevel inheritance: a subclass can be a superclass of other subclass(es). +
  • +
  • + Hierarchical inheritance: a superclass can have more than one subclass. +
  • +
+

+ Finally, inheritance systems can have one of the following properties but not both: +

+
    +
  • + Single inheritance: a subclass can only have one superclass. +
  • +
  • + Multiple inheritance: a subclass can have more than one superclass. +
  • +
+

+ At that point, both Davina and Dave agree to speak mostly about single inheritance. Most programming languages won’t allow you to use multiple inheritance anyway. More on that later. +

+

+ Since inheritance is a hierarchy of superclasses and subclasses, it’s easier to show them than to describe them. Davina draws with the precision of a beaver and the eye of an eagle the following: +

+

+ Single, multiple, hierarchical, and multilevel inheritance +

+

+ Inheritance for Code Reuse +

+

+ “What can we already see with what we have here?” asks Davina. +

+

+ You answer this one: “We can see that a given subclass inherit all the properties and behaviors of all of its superclasses. It means that the leaves of our tree (the superclasses without subclasses) concentrate all the behaviors of all their parents”. +

+

+ Imagine if humanity could conceive children with all the knowledge of each of their ancestors. How smart would they be? Well, maybe not as much as you think. The processing power of our brain is limited, that’s why we have difficulty to put in our head all the details of a complex codebase. As a result, it’s likely that our children would be lost in an ocean of knowledge. +

+

+ Similarly, the more levels of hierarchy you have in your inheritance tree, the more complex the class inheriting from all this knowledge will be. We will have difficulties to think and reason about them if we need to maintain or modify them. +

+

+ Inheritance for Specialization +

+

+ For now, we only talked about inheritance as a way to inherit the implementation of superclasses in subclasses. That’s not all: many programming languages allow you to substitute a superclass by one of its subclass thanks to polymorphism. The subclasses can be seen as specialization of their superclasses. +

+

+ This is a very important piece of the inheritance puzzle we’re lying here. Let’s take this exciting example: +

+
+
<?php declare(strict_types=1);
+
+class Parser
+{
+    public function count(string $filepath) {
+        printf("I'm counting lines of %s pretty hard!", $filepath);
+    }
+}
+
+class JSONParser extends Parser {
+    public function parse(string $filepath) {
+        printf("I'm parsing some JSON file %s pretty hard!", $filepath);
+    }
+}
+
+class Shipment
+{
+    private Parser $parser;
+
+    public function __construct(Parser $parser) {
+        $this->parser = $parser;
+    }
+
+    public function count(string $filepath) {
+        $this->parser->count($filepath);
+    }
+}
+
+$shipment = new Shipment(new JSONParser());
+$shipment->count("/my/superb/shipment.json");
+
+
+

+ What’s happening here? +

+
    +
  1. We feed to the object Shipment a new JSONParser. +
  2. +
  3. Even if Shipment expect an object of type Parser, you can give JSONParser instead because it’s a subclass of Parser. That’s good old polymorphism here! +
  4. +
  5. If you run this code, the surprising output “I’m counting lines of /my/superb/shipment.json pretty hard!” is displayed before your amazed eyes. I know, it feels like Christmas. +
  6. +
+

+ Dave, who begins to get bored, shoot: “I know what’s next. You’ll speak about the superclass Animal and its subclasses, Dog and Platypus. I’m not stupid! I’m a Senior Web Developer for 18.3 years now! The CTO told me that I was a Ninja of the Crown this morning and…”. +

+

+ With a gracious and meaningful movement of her left hand, Davina stop Dave in his enthusiastic show-off. Her eyes light up and her calm but determined voice begins to fill the entire open space. You’re still following the conversation, as well as other colleagues who began, curious, to gather around your desks. The tension begins to rise. +

+

+ “Not at all”, begins Davina. “OOP was never meant to represent objects or living creatures surrounding our daily life, like a dog, a car, or a coffee machine. It was invented to solve specific problems linked to software development. In particular, it was designed because we can’t reason about complex systems with our limited brainpower. Inheritance is part of this process of problem solving, and we should analyse it in this context.” +

+

+ She continues. “How many times did you create a Cat class, or a Dog class? You didn’t, not even once. You implemented abstract concepts, like a login or a parser, which have nothing to do with these “real-life” examples. Even if some of your classes are loosely related to real life objects, like a Shipment class, you shouldn’t even use inheritance for those, as we’ll see later.” +

+
+

+ … you should be wary of attaching too much importance to the notion that object-oriented systems are directly deduced from the “real world”. +

+ +
+
+

+ … we don’t advocate underlining nouns and simplistically modeling things in the real world. […] Their correspondence to real-world things may be tenuous, at best. +

+
+ Rebecca Wirfs-Brock Source +
+
+

+ The open space is now silent. Everybody begins to realize the lies which was taught to them all these years. +

+

+ Davina goes on. “These real-life examples confuse you: you have the impress that you understand inheritance because animals and dogs are familiar. It’s only a mere illusion. Inheritance can bring a lot of complexity.” +

+

+ Dave shrugs. Suddenly, Davina stands up, addressing what is now her audience. “Let’s see where inheritance come from and why it was invented. I’ll tell you now The Story of Inheritance.”. +

+

+ You see by the window a lightning streaking across the sky. A storm is coming. +

+

+ A Brief History of Inheritance +

Once upon a time in the land of inheritance +

+ Inheritance has been a hot subject since the creation of the OOP paradigm itself. The first programming languages implementing it was Simula in the 60s. Simula created also most of the concepts we take for granted in OOP, like classes and objects. +

+

+ The next big step for OOP and inheritance was Smalltalk, a programming language created by Alan Kay and his team at Xerox Park. It’s interesting to note that the first implementation of Smalltalk didn’t include inheritance. From Alan Kay himself: +

+
+

+ I didn’t like the way Simula I or Simula 67 did inheritance (though I thought Nygaard and Dahl were just tremendous thinkers and designers). So I decided to leave out inheritance as a built-in feature until I understood it better. +

+ +
+

+ Dan Ingalls was part of Alan Kay team and ended up implementing five generations of Smalltalk environments. He liked inheritance, and implemented it in every version of Smalltalk following the first one. It’s where the disagreement with inheritance began in Software Development; this debate continues today. +

+

+ What problem Dan Ingalls tried to solve with inheritance? Code reuse. He wanted a mechanism which could help programmers not repeating the same code in different classes. He wanted a system making the knowledge codified in a codebase more general. +

+

+ Another language designer was heavily influenced by Simula: Bjarne Stroustrup, who created “C with classes”. The goal was to design and reason about complex system more easily. The first implementations of the language were copying many ideas from Simula, including inheritance, while being different from Smalltalk. +

+

+ “C with classes” became C++. As time passed, the language gained a lot of traction: more and more programmers were using it. Smalltalk, after a huge success, began its descent into the Pit of Forgotten Languages. At the end of the 80s, C++ was the only language implementing multiple inheritance, something many believed impossible to achieve. +

+

+ But C++ wasn’t the only attempt to extend C with classes. Apple Computer had its own version called Objective-C. The language was extended by Steve’s Job team at NeXT at the beginning of the 90s, and, among other things, they implemented a construct sharing similar properties with single and multiple inheritance. Similar, but not identical: only the interface was inherited, not the implementation. They called this new construct a protocol. +

+

+ Java, in the middle of the 90s, implemented exactly the same thing, with a different name: interface. I hate this name, it’s too easy to confuse this “interface” with the more general idea of interface (ways from the outside of a construct to act on its inside). That’s why I call this Java “interface” the interface construct in my articles. +

+

+ Davina pauses, drink a bit of water, and adds: “When you read in a random tutorial that a class Dog and a class Platypus inherit from a class Animal, do they speak about code reuse?” +

+

+ You begin to imagine the “code” an Animal could have. Are we in the Matrix? +

+

+ “Not at all. You speak about specialization: a Cat is a specialized form of Animal, although in this context it doesn’t really make sense either. These examples are just lame. Anyway, inheritance is a concept which can bring many powerful features, and that’s its main problem, as we’ll see below. That’s why it was discussed for so long and the concept was ultimately dumbed down”. +

+

+ How Powerful Is Single Inheritance? +

Inheritance is as powerful as Hulk! +

+ To understand the drawbacks of single inheritance your colleagues, friends, and dogs are complaining about, let’s decompose first what we can precisely do with single inheritance of implementation: +

+
    +
  1. You can add some behavior in a subclass. +
  2. +
  3. You can override (modify) the behavior of any superclass in a subclass. +
  4. +
+

+ Let’s see what the possible benefits and drawbacks of these two approaches. +

+

+ Adding Behavior In a Subclass +

+

+ Here’s a slightly modified version of our legendary parser: +

+
+
<?php declare(strict_types=1);
+
+class Parser
+{
+    public function count(string $filepath) {
+        printf("I'm counting lines of %s pretty hard!", $filepath);
+    }
+}
+
+
+class JSONParser extends Parser {
+    public function parse(string $filepath) {
+        printf("I'm parsing some JSON file %s pretty hard!", $filepath);
+    }
+}
+
+class Shipment
+{
+    private JSONParser $parser;
+
+    public function __construct(JSONParser $parser) {
+        $this->parser = $parser;
+    }
+
+    public function count(string $filepath) {
+        $this->parser->count($filepath);
+    }
+
+    public function import(string $filepath) {
+        $this->parser->parse($filepath);
+    }
+
+}
+
+$shipment = new Shipment(new JSONParser());
+$shipment->count("/my/superb/shipment.json");
+$shipment->import("/my/superb/shipment.json");
+
+
+

+ The class JSONParser inherit the implementation of Parser and add its own method parse. +

+

+ How our system supports changes in this example? If we modify the behavior of the method count from Parser, every subclass inheriting from Parser will get the change too. In that sense, inheritance breaks encapsulation between the superclasses and their subclasses. +

+
+

+ In languages with inheritance, a data abstraction implementation (i.e., a class) has two kinds of users. There are the “outsiders” who simply use the objects by calling the operations. But in addition there are the “insiders.” These are the subclasses, which are typically permitted to violate encapsulation. +

+ +
+

+ Let’s imagine that Parser has two subclasses, and these subclasses have two more subclasses. You end up with a hierarchy on 3 levels. It doesn’t seem that much of a stretch, but you still end up with seven classes in total. If you modify the base class Parser, six other classes will be affected! +

+

+ We didn’t create a huge inheritance tree here, but changing a superclass has a rippling effect in the whole hierarchy. +

+
+

+ Hierarchical systems seem to have the property that something considered as an undivided entity on one level, is considered as a composite object on the next lower level of greater detail. +

+ +
+

+ Don’t get me wrong: it can be beneficial if you want that each change of a superclass affects every subclass at every level below. Actually, it’s the main reason why inheritance was invented at the first place: being able to make the code more general and reusing it easily. But you need to be sure that your classes are very cohesive, that is, the change of any superclass needs to affect every subclass. +

+

+ If your classes are not cohesive, you’ll have all the drawbacks of tight coupled classes in your face: nobody will know if changing a superclass will either cover them with glory and fame or crash the entire system. Your codebase, while growing in complexity, will become impossible to reason about, because you don’t have enough brainpower to build in your head an accurate mental model of all the effects of a change. In short, you’ll end up with one of the problem the OOP paradigm tried to solve at the first place. +

+

+ Overriding Behavior in a Subclass +

+

+ Let’s continue further by adding a new element in our inheritance soup: overriding. Here’s another simple example: +

+
+
<?php declare(strict_types=1);
+
+class Parser
+{
+    public function count(string $filepath) {
+        printf("I'm counting lines of %s pretty hard!\n", $filepath);
+    }
+}
+
+
+class JSONParser extends Parser {
+    public function count(string $filepath) {
+        printf("I'm counting JSON objects from file %s pretty hard!\n", $filepath);
+    }
+}
+
+class Shipment
+{
+    private Parser $parser;
+
+    public function __construct(Parser $parser) {
+        $this->parser = $parser;
+    }
+
+    public function count(string $filepath) {
+        $this->parser->count($filepath);
+    }
+}
+
+$shipment = new Shipment(new Parser());
+$shipment->count("/my/superb/shipment");
+
+
+
+

+ We override here the method count in our subclass JSONParser. Now, our class Shipment will “work” if we pass to our new Shipment both Parser or JSONParser, in the sense that no type error will be thrown. +

+

+ But will it works as intended? What parser to use when we want an instance of Shipment? A Parser? A JSONParser? +

+

+ We could look at the implementation of Shipment, then at the implementation of both JSONParser and Parser to decide what behavior we need depending on the context. But we create objects and abstract behavior not to look at their implementations. It frees us some precious brain power to think about the part of the system we want to modify. +

+

+ Mixing overriding and polymorphism is a recipe for disasters. In an inheritance tree with more classes and more overidding, you need to know what superclass override what behavior, if the subclass override what the superclass overrided, and so on. +

+

+ This confusion between inheritance of implementation and specialization led to the Liskov Substitution Principle (LSP). +

+

+ The Liskov Substitution Principle +

The LSP is about substituting a parent class with its base class +

+ Barbara Liskov was a researcher who won the Turing Award for her work on abstract data types. When she was asked to talk at the keynote of OOPSLA in 1987, she looked at the papers about inheritance hierarchies and how developers were using them. She was pretty disappointed. +

+

+ This keynote led to the paper Data abstraction and hierarchy. From there, some began to speak about a “Liskov Substitution Principle”, often quoting the following: +

+
+

+ If for each object o1 of type S there is an object o2 of type T such that for all programs P defined in terms of T, the behavior of P is unchanged when o1 is substituted for o2, then S is a subtype of T. +

+ +
+

+ Who doesn’t like good old academic writing full of S, T, o2, and friends? Here’s a clearer way to define the same idea: +

+
+

+ Objects of subtypes should behave like those of supertypes if used via supertype methods. +

+ +
+

+ It’s with this paper that the concept of subtyping was born. +

+

+ This solves the problem of substitution of subclasses. According to Liskov, if you want to use polymorphism with inheritance, you need to have proper subtyping. How? By following this rule: when you substitute one superclass by its subclass, the behavior of the whole system should be unchanged. +

+

+ Said differently: don’t override anything. +

+

+ With this definition of the LSP, and to come back to our example above, there won’t be any doubt about the consequences substituting Parser by its subclass JSONParser. We are sure it won’t have unexpected results because JSONParser doesn’t override any behavior of its superclass. Said differently, you would always use the same method parse from Parser whatever the subclass of Parser you use. +

+

+ But between Barbara Liskov’s first statement and now, the LSP changed. From “the behavior should stay the same”, we know think that “the behavior shouldn’t break the application”. This last definition is more ambiguous: how do we know that our system still behave correctly? What does it mean? Do we have every possible tests to ensure that it’s the case? If our system doesn’t break but doesn’t follow the specifications either, is it a violation of the LSP? +

+

+ “But wait!” interrupts suddenly Dave. “This is not the Liskov Substitution Principle! This is not how it’s defined in the Holy SOLID Principles!” +

+

+ Davina sigh. “The SOLID principles should be the D principle. The last one is the only one we can still save. The others are misinterpretations of important ideas when they’re not bad ideas. The LSP is a misinterpretation: the definition given by Robert Martin has not much to do with the definition given by Barbara Liskov.”. +

+

+ Let’s look at Martin definition of the LSP: +

+
+

+ All implementations of interfaces are subtypes of an interface. +

+ +
+

+ From the example above, the class JSONParser implements the same interface as Parser, but one method count count the number of lines and the other count the number of JSON objects. Interface substitution is not what Barbara Liskov was speaking about, and it won’t save your codebase if you try to mix specialization and overriding. +

+

+ Often, developers don’t like strong behavioral subtyping as Barbara Liskov defined it. That’s why the principle was transformed over time. Often, inheritance is used to override the implementation of a superclass. On that regard, it’s interesting to note that inheritance is only interesting for Liskov in the context of subtyping; she doesn’t see any value to use it for inheriting implementation. +

+

+ Why? Because inheritance is not the only solution for code reuse. Many prefer using composition. +

+

+ Composition vs Single Inheritance +

+

+ While the open space is still silent, you begin to feel the tension dropping. The magic word has been pronounced: composition. While inheritance is a demon which tries to eat companies and their employees, composition is the solution to every possible disaster. +

+

+ Composition, Delegation, or Aggregation? +

+

+ “Are we speaking about composition, delegation, or aggregation here?” asks Dave. “What’s the difference?” ask another colleague. +

+

+ Dave, with a smile, begins his explanation: “Look at the examples we were speaking about. A JSONParser is a parser, so inheritance makes sense in that case. But, for example, a Shipment has a parser, that’s why we used composition”. +

+

+ You intervene: “does it make sense to say that a shipment has a parser? No, here we’re speaking about delegation: the shipment use a parser, it delegates a task to a parser object. That’s all.” +

+

+ “Really?” begins another colleague. “Are you sure a JSON parser is a parser, or does it has the behavior of a parser?” Another colleague takes part of the conversation: “No! Our Shipment own a Parser, so we’re speaking about aggregation here!” +

+

+ Outside, the thunder growl again. Everybody begins to speak at the same time, throwing at each other is-a, has-a, part-of, add-to, and other pair of very short words you can link with a hyphen. +

+

+ Davina listens to the conversation carefully, and when everybody calms down, she gives her opinion: “I see these ‘is-a’ or ‘has-a’ tricks all over the Internet. I also saw many developers defining different flavors of composition, delegation, aggregation, and whatnot. At the end, our problem are often so specific they don’t fit any of these definitions. They are useless in practice. Don’t use them”. +

+

+ She continues. “Using natural language tricks (like is-a or has-a) to decide what solution we should apply to a problem is ambiguous, as we just witnessed. It’s one of the reason why Mathematical notation was invented at the first place: to avoid the ambiguity of natural language. My advice: don’t use these tricks to decide what solution you should use.” +

+

+ To understand what Davina is speaking about, let’s get back to our class Shipment: +

+
+
<?php
+class Shipment
+{
+    private JSONParser $parser;
+
+    public function __construct(JSONParser $parser)
+    {
+        $this->parser = $parser;
+    }
+
+    public function import(string $filepath)
+    {
+        $this->parser->parse($filepath);
+    }
+}
+
+
+

+ When we want to create an instance of Shipment, we need to inject an object of type JSONParser. It doesn’t matter if it’s called aggregation, composition, or delegation. At the end, it boils down to the same simple mechanism: injecting an object into another one. +

+

+ Is Composition Better Than Inheritance? +

+

+ We all know the Mantra of Composition, the one which will bless your codebase with the benediction of The Hasa and the Partof Gods. If you don’t know it yet, I’m sure you’ll hear it a good hundred of times in your career: +

+
+

+ Favor object composition over class inheritance. +

+
+

+ This is from the book Design Patterns, written by the Gang of Four. With a name like that, I’m not sure if they were trying to force some general-but-specific (admire the paradox) solutions to our poor codebases or if their real goals were to create the mafia of software developers. One thing is certain: this book gave to beginners the perfect pretext to show how smart they are by instantly changing a healthy codebase into a legacy mess full of Singleton and Abstract Factories. +

+

+ Like many, I’m no innocent: I’ve chanted the Mantra of Composition for years, without looking at the Mantra in its context. But context is important. +

+

+ So, what our godfathers Gang of Four are saying just after enlightening the world with their Mantra? +

+
+

+ You should be able to get all the functionality you need just by assembling existing components through object composition. But this is rarely the case, because the set of available components is never quite rich enough in practice. Reuse by inheritance makes it easier to make new components that can be composed with old ones. Inheritance and object composition thus work together. +

+ +
+

+ According to this book, we should favor composition not because inheritance is evil, but because nobody uses it correctly. Well, hopefully we understand it better now. +

+

+ The Benefits of Composition +

+

+ Composition is very useful indeed. Let’s imagine that we inject object A into object B. Here are the benefits: +

+
    +
  • If Object A is properly encapsulated, nobody will destroy anything by changing object’s A inner implementation. Object B can suffer the change, but nothing else down the road. +
  • +
  • You can use only part of the object’s implementation. Object B doesn’t automatically inherit from the whole object A. +
  • +
+

+ To get back to our Shipment example, this means that the object JSONParser we inject is tightly coupled to the class Shipment, but this coupling stop there. If you instantiate Shipment later and you change the implementation of JSONParser, the class Shipment might need to change, and that’s all. +

+

+ As we saw with inheritance, the problem of tight coupling (or the benefit of cohesion) will affect every layer down the inheritance tree. +

+

+ The Drawbacks of Composition +

+

+ Composition is not the best solution when you want to use many objects or objects with a lot of behavior. +

+

+ Let’s say that you want to use 10 methods from 3 different objects and you want to add some implementation on top: you’ll need to inject your 3 objects, create 10 methods in your new class wrapping the 10 methods of the objects injected, and add more methods to take care of your new functionality. +

+

+ “But I could directly call the object JSONParser from an instance of Shipment”, cut Dave. +

+

+ “That’s true, answers Davina. But it would break the encapsulation of our objects Shipment in that case. It means that everything using the object Shipment would be tightly coupled to the object JSONParser. Encapsulation is broken.” +

+

+ Composition doesn’t bring you the benefits of subtyping either. When you inject an object to a class, you’ll need to inject a precise object if your language has some sort of type checking, and nothing else. On that regards, it constrains you (which can be a good thing!). If your language doesn’t have any type checking and you can just give any object to the constructor of your class, it doesn’t mean that it will work as intended. The problem stays the same. +

+

+ At least, an inheritance hierarchy can indicate what object you can use instead of another and, if it follows a strict form of LSP, nothing should break. +

+

+ “That’s wrong!”, shoot Dave, suddenly. “What about the interface construct? I love these, and you can do some good polymorphism with them without using the Demon of Inheritance!” +

+

+ “You’re right, answers Davina. But using interface constructs is only using another form of inheritance.” +

+

+ Single and Multiple Inheritance Dumbed Down +

Over the years, inheritance has seen less powerful implementations. +

+ As we saw, inheritance is very powerful, because you can mix reuse of implementation and subtyping in a hierarchy tree as deep as you want it to. This power is its biggest problem: many developers, not knowing all the implications we saw above, have a tendency to misuse inheritance, tightly coupling everything in huge inheritance hierarchies, which led to the Mantra of Composition. That’s why many gave up on single inheritance. +

+

+ But multiple inheritance is even more flawed: the possibility for a subclass to have more than one superclass is making everything very ambiguous. As an example, you can look at the diamond problem. Additionally, multiple inheritance is very complex to implement in a programming language. +

+

+ That’s why the designers of Objective-C and Java restricted inheritance with the protocol and the interface construct respectively. The benefits? +

+
    +
  1. A class is forced to use the interface given by a protocol, which guarantee that each subtype has the same interface (but doesn’t guarantee that the substitution will work!). +
  2. +
  3. There is no multilevel inheritance anymore. +
  4. +
  5. No more inheritance of implementation: the protocol is only an interface. +
  6. +
  7. Multiple inheritance of interfaces is much easier to manage than multiple inheritance of implementations. +
  8. +
+
+

+ Please note that even Java includes a limited form of multiple inheritance: inheritance of interfaces.” +

+
+ Bjarne Stroustrup Source +
+
+

+ Concrete Use of Inheritance +

+

+ We saw already some potential use of inheritance, but can we be more concrete? Over the years I’ve come up with this set of rules: +

+
    +
  1. Never use inheritance for classes which are related to the business domain. +
  2. +
  3. Sometimes use inheritance for reusing implementation of classes bringing some mechanics. +
  4. +
  5. If you use subtypes, always try to respect the original strictness of LSP as much as possible. +
  6. +
  7. If you use the interface constructs for subtyping, keep in mind that the implementation of the interface can still break everything. +
  8. +
+

+ These rules are from my experience. Don’t use them as Mantras working in every situation. We should experiment carefully with them and use our brain to see if the technical solutions fit the problem you have. +

+

+ Inheritance and Domain Objects +

+

+ Davina explain further: “When I speak about domain objects, I mean all the objects which are related to the business we work for. In our present case, in MegaCorpMoneyMaker, it would be classes like Shipment, Order, or Product.” +

+

+ Introducing any form of hard coupling or premature abstractions with these objects is always dangerous. They are the representation of real world constructs, and since the real world change in unpredictable manners, these objects will change in unpredictable manners too. Keep them isolated as much as possible from the mechanical parts of your system. +

+

+ Inheritance and Mechanics +

+

+ You ask Davina: “what do you mean by mechanics”? +

+

+ “These classes are everything which are not domain objects. For example, our classes to parse files represent some mechanisms: they don’t represent anything from our business, they’re just general constructs to parse some files. Objects of this sort are often more general and can be applied in many more contexts than our precise business domain.” +

+

+ For example, it’s not very likely that the world will come up tomorrow with a different definition of stacks. That’s why the object Stack won’t need many changes overtime. +

+

+ Anything representing mathematical constructs are good examples too. After all, Mathematics try to be as disconnected as possible from the real world. It’s when you try to use mathematical concepts on the real world that everything begins to break. That’s what we call applied Mathematics. +

+

+ If we think about it, inheritance create a hierarchy where its elements are not encapsulated with each others, but the hierarchy itself is encapsulated from its outside. We create a new construct doing so, an aggregation of objects. In that case, inheritance can be useful if you have to codify a general and coherent set of ideas where the properties and behaviors of the different objects will rarely change, or when the whole hierarchy needs to change when one of its member change predictably. +

+

+ Modern Languages and Inheritance +

+

+ Modern languages often take the decision to implement inheritance differently from “older” languages like Java, Python, Ruby, or PHP. They try hard to differentiate subtyping and inheritance of implementation. For example, in Rust’s documentation: +

+
+

+ If a language must have inheritance to be an object-oriented language, then Rust is not one. There is no way to define a struct that inherits the parent struct’s fields and method implementations. +

+ +
+

+ But Rust implement some form of inheritance I didn’t cover here: traits. If you need some polymorphism, Rust give you generic programming like many other languages. +

+

+ Another example: Golang. You can’t do any inheritance of implementation, only composition is allowed. You can also use interface constructs if you want some inheritance of interface. +

+

+ Inheritance Is Not Evil +

Inheritance is not evil, it's just too powerful. +

+ What did we see in this article? +

+
    +
  • Inheritance create a hierarchical construct composed of superclasses, subclasses, and base classes. +
  • +
  • Many common programming languages allow us to create a tree with infinite depth (multilevel inheritance) and width (hierarchical inheritance). +
  • +
  • Your system can become hard to maintain if you mix two of the Three Power Gems of Inheritance in the same soup: +
      +
    • Inheritance of implementation. +
    • +
    • Substituting superclasses with their subclasses (subtyping if it follows some rules). +
    • +
    • Multilevel inheritance. +
    • +
    +
  • +
  • Most of the time, composition seems to be the best alternative to inheritance of implementation. +
  • +
  • Everything is tightly coupled in an inheritance hierarchy, but this blurb of classes is still encapsulated from the outside. +
  • +
  • Inheritance was a crude concept defined at the beginning of OOP and later refined with, for example, the interface construct. +
  • +
  • Inheritance can be useful for the part of your system which won’t change too much (mechanical part). +
  • +
+

+ If you need to retain one thing from all of that: don’t use DRY, or inheritance, or composition before you understand clearly what’s the problem you’re trying to solve and its context. These concepts should be used when you refactor you code; consider the first writing as a messy draft and, in that spirit, defer all the important decisions making your design hard to change as much as you can. +

+

+ What should be together and what should not (cohesion) is one of these important decision. What should be under a layer of indirection using interface constructs is another. +

+

+ If you think it’s a good idea to use an inheritance hierarchy, begin with a small one and see how it behaves in your system overtime. +

+

+ Davina concludes: +

+

+ “The concept of inheritance was refined over the years and gave us the constructs we use today, like the interface construct. In that sense, inheritance is definitely a pillar of the OOP paradigm. But it’s true that mixing features which are not necessarily orthogonal make inheritance difficult to harness in many programming languages.” +

+

+ Everybody is silent now. The storm outside stopped. Dave is thinking hard, like everybody in the open space of MegaCorpMoneyMaker. +

+

+ At the end, you should always read the documentation of the programming languages you’re using to see exactly how they implement inheritance. You’ll now be able to guess why the language’s designers made their decisions and how you can use their implementation of inheritance effectively. +

+ +
+
+ +
+ Share Your Knowledge +
+
+ +
+
+
+ +
+ + +
+ Proudly generated by HUGO +
+
+
+ + + + diff --git a/packages/readabilityjs/test/test-pages/thevaluable.dev/url.txt b/packages/readabilityjs/test/test-pages/thevaluable.dev/url.txt new file mode 100644 index 000000000..194c6adcf --- /dev/null +++ b/packages/readabilityjs/test/test-pages/thevaluable.dev/url.txt @@ -0,0 +1 @@ +https://thevaluable.dev/guide-inheritance-oop/ \ No newline at end of file From 61303419a0aea4b3109dd1baec23e216e784c3d2 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 4 Oct 2022 14:25:19 +0800 Subject: [PATCH 15/54] Add timeout --- packages/content-handler/src/newsletters/substack-handler.ts | 2 +- packages/content-handler/test/newsletter.test.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/content-handler/src/newsletters/substack-handler.ts b/packages/content-handler/src/newsletters/substack-handler.ts index 164068623..f66a85a52 100644 --- a/packages/content-handler/src/newsletters/substack-handler.ts +++ b/packages/content-handler/src/newsletters/substack-handler.ts @@ -40,7 +40,7 @@ export class SubstackHandler extends ContentHandler { findNewsletterHeaderHref(dom: Document): string | undefined { // Substack header links - const postLink = dom.querySelector('h1 a ') + const postLink = dom.querySelector('h1 a') if (postLink) { return postLink.getAttribute('href') || undefined } diff --git a/packages/content-handler/test/newsletter.test.ts b/packages/content-handler/test/newsletter.test.ts index f2b51a124..90197b00d 100644 --- a/packages/content-handler/test/newsletter.test.ts +++ b/packages/content-handler/test/newsletter.test.ts @@ -169,14 +169,14 @@ describe('Newsletter email test', () => { expect(url).to.startWith( 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217' ) - }) + }).timeout(10000) it('gets the URL from the header if it is a beehiiv newsletter', async () => { const html = load('./test/data/beehiiv-newsletter.html') const url = await new BeehiivHandler().findNewsletterUrl(html) expect(url).to.startWith( 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple' ) - }) + }).timeout(10000) it('returns undefined if it is not a newsletter', async () => { const html = load('./test/data/substack-forwarded-welcome-email.html') const url = await new SubstackHandler().findNewsletterUrl(html) From 7187326d903eb511750e484e58a5ea143873a975 Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Tue, 4 Oct 2022 15:06:54 +0800 Subject: [PATCH 16/54] Use li-date and post-tag selectors instead of post-meta as that usually has useful data --- packages/readabilityjs/Readability.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/readabilityjs/Readability.js b/packages/readabilityjs/Readability.js index 31ce3c9bb..e7e67d2fe 100644 --- a/packages/readabilityjs/Readability.js +++ b/packages/readabilityjs/Readability.js @@ -171,7 +171,7 @@ Readability.prototype = { // Readability-readerable.js. Please keep both copies in sync. articleNegativeLookBehindCandidates: /breadcrumbs|breadcrumb|utils|trilist/i, articleNegativeLookAheadCandidates: /outstream(.?)_|sub(.?)_|m_|omeda-promo-|in-article-advert|block-ad-.*/i, - unlikelyCandidates: /\bad\b|ai2html|banner|breadcrumbs|breadcrumb|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager(?!ow)|popup|yom-remote|copyright|keywords|outline|infinite-list|beta|recirculation|site-index|hide-for-print|post-end-share-cta|post-end-cta-full|post-footer|post-meta|post-head|main-navigation|programtic-ads|outstream_article|hfeed|comment-holder|back-to-top|show-up-next|onward-journey|topic-tracker|list-nav|block-ad-entity|adSpecs|gift-article-button|modal-title|in-story-masthead|share-tools|standard-dock|expanded-dock|margins-h/i, + unlikelyCandidates: /\bad\b|ai2html|banner|breadcrumbs|breadcrumb|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager(?!ow)|popup|yom-remote|copyright|keywords|outline|infinite-list|beta|recirculation|site-index|hide-for-print|post-end-share-cta|post-end-cta-full|post-footer|post-head|post-tag|li-date|main-navigation|programtic-ads|outstream_article|hfeed|comment-holder|back-to-top|show-up-next|onward-journey|topic-tracker|list-nav|block-ad-entity|adSpecs|gift-article-button|modal-title|in-story-masthead|share-tools|standard-dock|expanded-dock|margins-h/i, // okMaybeItsACandidate: /and|article(?!-breadcrumb)|body|column|content|main|shadow|post-header/i, get okMaybeItsACandidate() { return new RegExp(`and|(? Date: Tue, 4 Oct 2022 15:28:12 +0800 Subject: [PATCH 17/54] Add Dockerfile for pdfHandler --- packages/content-fetch/.dockerignore | 5 ++++ packages/inbound-email-handler/.dockerignore | 5 ++++ packages/pdf-handler/.dockerignore | 5 ++++ packages/pdf-handler/Dockerfile | 27 ++++++++++++++++++++ packages/pdf-handler/package.json | 5 +++- packages/pdf-handler/tsconfig.json | 2 +- packages/puppeteer-parse/.dockerignore | 4 +++ packages/puppeteer-parse/Dockerfile | 1 + 8 files changed, 52 insertions(+), 2 deletions(-) create mode 100644 packages/content-fetch/.dockerignore create mode 100644 packages/inbound-email-handler/.dockerignore create mode 100644 packages/pdf-handler/.dockerignore create mode 100644 packages/pdf-handler/Dockerfile create mode 100644 packages/puppeteer-parse/.dockerignore diff --git a/packages/content-fetch/.dockerignore b/packages/content-fetch/.dockerignore new file mode 100644 index 000000000..77c017249 --- /dev/null +++ b/packages/content-fetch/.dockerignore @@ -0,0 +1,5 @@ +node_modules +.env* +Dockerfile +.dockerignore +*.yaml diff --git a/packages/inbound-email-handler/.dockerignore b/packages/inbound-email-handler/.dockerignore new file mode 100644 index 000000000..d8aea4ee6 --- /dev/null +++ b/packages/inbound-email-handler/.dockerignore @@ -0,0 +1,5 @@ +node_modules +build +.env* +Dockerfile +.dockerignore diff --git a/packages/pdf-handler/.dockerignore b/packages/pdf-handler/.dockerignore new file mode 100644 index 000000000..d8aea4ee6 --- /dev/null +++ b/packages/pdf-handler/.dockerignore @@ -0,0 +1,5 @@ +node_modules +build +.env* +Dockerfile +.dockerignore diff --git a/packages/pdf-handler/Dockerfile b/packages/pdf-handler/Dockerfile new file mode 100644 index 000000000..0666d57e5 --- /dev/null +++ b/packages/pdf-handler/Dockerfile @@ -0,0 +1,27 @@ +FROM node:14.18-alpine + +# Run everything after as non-privileged user. +WORKDIR /app + +COPY package.json . +COPY yarn.lock . +COPY tsconfig.json . +COPY .prettierrc . +COPY .eslintrc . + +COPY /packages/pdf-handler/package.json ./packages/pdf-handler/package.json + +RUN yarn install --pure-lockfile + +ADD /packages/pdf-handler ./packages/pdf-handler +RUN yarn workspace @omnivore/pdf-handler build + +# After building, fetch the production dependencies +RUN rm -rf /app/packages/pdf-handler/node_modules +RUN rm -rf /app/node_modules +RUN yarn install --pure-lockfile --production + +EXPOSE 8080 + +CMD ["yarn", "workspace", "@omnivore/pdf-handler", "start"] + diff --git a/packages/pdf-handler/package.json b/packages/pdf-handler/package.json index 4b91e76a1..0de3e5f9a 100644 --- a/packages/pdf-handler/package.json +++ b/packages/pdf-handler/package.json @@ -20,7 +20,10 @@ "deploy": "yarn build && yarn gcloud-deploy" }, "devDependencies": { - "@types/node": "^14.11.2" + "@types/node": "^14.11.2", + "chai": "^4.3.6", + "chai-string": "^1.5.0", + "mocha": "^10.0.0" }, "dependencies": { "@google-cloud/functions-framework": "3.1.2", diff --git a/packages/pdf-handler/tsconfig.json b/packages/pdf-handler/tsconfig.json index f450acf38..5220d6b3f 100644 --- a/packages/pdf-handler/tsconfig.json +++ b/packages/pdf-handler/tsconfig.json @@ -5,5 +5,5 @@ "rootDir": ".", "lib": ["dom"] }, - "include": ["src", "test"] + "include": ["src"] } diff --git a/packages/puppeteer-parse/.dockerignore b/packages/puppeteer-parse/.dockerignore new file mode 100644 index 000000000..2310bc768 --- /dev/null +++ b/packages/puppeteer-parse/.dockerignore @@ -0,0 +1,4 @@ +node_modules +.env* +Dockerfile +.dockerignore diff --git a/packages/puppeteer-parse/Dockerfile b/packages/puppeteer-parse/Dockerfile index 7faae17bf..a52f55122 100644 --- a/packages/puppeteer-parse/Dockerfile +++ b/packages/puppeteer-parse/Dockerfile @@ -85,6 +85,7 @@ WORKDIR /app ENV CHROMIUM_PATH /usr/bin/chromium-browser ENV LAUNCH_HEADLESS=true +ENV PORT 9090 COPY package.json . COPY yarn.lock . From 4ced49c4f36b43a3d8144d454c4d8d16d34808a4 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 4 Oct 2022 15:38:05 +0800 Subject: [PATCH 18/54] Remove register/should --- packages/pdf-handler/test/pdf/pdf.test.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/pdf-handler/test/pdf/pdf.test.ts b/packages/pdf-handler/test/pdf/pdf.test.ts index 259fd7650..001958422 100644 --- a/packages/pdf-handler/test/pdf/pdf.test.ts +++ b/packages/pdf-handler/test/pdf/pdf.test.ts @@ -1,7 +1,6 @@ import 'mocha' import * as chai from 'chai' import { expect } from 'chai' -import 'chai/register-should' import chaiString from 'chai-string' import { getDocument, From 1024e5e5d6c6c6ad09a6d35a6378cd3c18368107 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 4 Oct 2022 16:01:01 +0800 Subject: [PATCH 19/54] Add Dockerfile for text-to-speech --- packages/text-to-speech/.dockerignore | 5 +++++ packages/text-to-speech/Dockerfile | 27 +++++++++++++++++++++++ packages/text-to-speech/package.json | 4 +++- packages/text-to-speech/test/stub.test.ts | 13 ----------- 4 files changed, 35 insertions(+), 14 deletions(-) create mode 100644 packages/text-to-speech/.dockerignore create mode 100644 packages/text-to-speech/Dockerfile delete mode 100644 packages/text-to-speech/test/stub.test.ts diff --git a/packages/text-to-speech/.dockerignore b/packages/text-to-speech/.dockerignore new file mode 100644 index 000000000..d8aea4ee6 --- /dev/null +++ b/packages/text-to-speech/.dockerignore @@ -0,0 +1,5 @@ +node_modules +build +.env* +Dockerfile +.dockerignore diff --git a/packages/text-to-speech/Dockerfile b/packages/text-to-speech/Dockerfile new file mode 100644 index 000000000..7e4fb5fea --- /dev/null +++ b/packages/text-to-speech/Dockerfile @@ -0,0 +1,27 @@ +FROM node:14.18-alpine + +# Run everything after as non-privileged user. +WORKDIR /app + +COPY package.json . +COPY yarn.lock . +COPY tsconfig.json . +COPY .prettierrc . +COPY .eslintrc . + +COPY /packages/text-to-speech/package.json ./packages/text-to-speech/package.json + +RUN yarn install --pure-lockfile + +ADD /packages/text-to-speech ./packages/text-to-speech +RUN yarn workspace @omnivore/text-to-speech-handler build + +# After building, fetch the production dependencies +RUN rm -rf /app/packages/text-to-speech/node_modules +RUN rm -rf /app/node_modules +RUN yarn install --pure-lockfile --production + +EXPOSE 8080 + +CMD ["yarn", "workspace", "@omnivore/text-to-speech-handler", "start"] + diff --git a/packages/text-to-speech/package.json b/packages/text-to-speech/package.json index e326eb8f9..e63b348b2 100644 --- a/packages/text-to-speech/package.json +++ b/packages/text-to-speech/package.json @@ -25,7 +25,9 @@ "@types/natural": "^5.1.1", "@types/node": "^14.11.2", "@types/underscore": "^1.11.4", - "eslint-plugin-prettier": "^4.0.0" + "chai": "^4.3.6", + "eslint-plugin-prettier": "^4.0.0", + "mocha": "^10.0.0" }, "dependencies": { "@google-cloud/functions-framework": "3.1.2", diff --git a/packages/text-to-speech/test/stub.test.ts b/packages/text-to-speech/test/stub.test.ts deleted file mode 100644 index 173ca4917..000000000 --- a/packages/text-to-speech/test/stub.test.ts +++ /dev/null @@ -1,13 +0,0 @@ -import 'mocha' -import * as chai from 'chai' -import { expect } from 'chai' -import 'chai/register-should' -import chaiString from 'chai-string' - -chai.use(chaiString) - -describe('Stub test', () => { - it('should pass', () => { - expect(true).to.be.true - }) -}) From ae4c01f2d3abd7c08e36f72dddd6e0d2b4876ba0 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Mon, 3 Oct 2022 17:23:13 +0800 Subject: [PATCH 20/54] Split utterance into chunks of 256 chars --- packages/text-to-speech/src/htmlToSsml.ts | 78 +++++++++++++-------- packages/text-to-speech/src/textToSpeech.ts | 27 +++---- 2 files changed, 57 insertions(+), 48 deletions(-) diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts index 5e0741c8c..a64a370bf 100644 --- a/packages/text-to-speech/src/htmlToSsml.ts +++ b/packages/text-to-speech/src/htmlToSsml.ts @@ -255,7 +255,7 @@ export const stripEmojis = (text: string): string => { return text.replace(emojiRegex, '').replace(/\s+/g, ' ') } -const textToUtterance = ({ +const textToUtterances = ({ tokenizer, idx, textItems, @@ -269,32 +269,51 @@ const textToUtterance = ({ wordOffset: number voice?: string isHtml?: boolean -}): Utterance => { - const text = textItems.join('') - let textWithWordOffset = text - if (isHtml) { - try { - textWithWordOffset = htmlToText(text, { wordwrap: false }) - } catch (err) { - console.error( - 'Unable to convert HTML to text, html:', +}): Utterance[] => { + let text = textItems.join('') + if (!isHtml) { + // for title + const wordCount = tokenizer.tokenize(text).length + return [ + { + idx, text, - ', error:', - err - ) - textWithWordOffset = - parseHTML(text).document.documentElement.textContent ?? text - console.info('Converted HTML to text:', textWithWordOffset) + wordOffset, + wordCount, + voice, + }, + ] + } + + const utterances: Utterance[] = [] + try { + text = htmlToText(text, { wordwrap: false }) + } catch (err) { + console.error( + 'Unable to convert HTML to text, html:', + text, + ', error:', + err + ) + text = parseHTML(text).document.documentElement.textContent ?? text + console.info('Converted HTML to text:', text) + } + // split text into chunks of 256 characters to stream faster without breaking on words + const textChunks = text.match(/.{1,256}(?= |$)/g) + if (textChunks) { + for (const chunk of textChunks) { + const wordCount = tokenizer.tokenize(chunk).length + utterances.push({ + idx, + text: chunk, + wordOffset, + wordCount, + voice, + }) + wordOffset += wordCount } } - const wordCount = tokenizer.tokenize(textWithWordOffset).length - return { - idx, - text, - wordOffset, - wordCount, - voice, - } + return utterances } export const htmlToSpeechFile = (htmlInput: HtmlInput): SpeechFile => { @@ -331,13 +350,13 @@ export const htmlToSpeechFile = (htmlInput: HtmlInput): SpeechFile => { let wordOffset = 0 if (title) { // first utterances is the title - const titleUtterance = textToUtterance({ + const titleUtterance = textToUtterances({ tokenizer, idx: '', textItems: [cleanText(title)], // title could have HTML entity names like & or emoji wordOffset, isHtml: false, - }) + })[0] utterances.push(titleUtterance) wordOffset += titleUtterance.wordCount } @@ -351,7 +370,7 @@ export const htmlToSpeechFile = (htmlInput: HtmlInput): SpeechFile => { // use paragraph as anchor const idx = i.toString() i = emitElement(textItems, node, true) - const utterance = textToUtterance({ + const newUtterances = textToUtterances({ tokenizer, idx, textItems, @@ -359,8 +378,9 @@ export const htmlToSpeechFile = (htmlInput: HtmlInput): SpeechFile => { voice: node.nodeName === 'BLOCKQUOTE' ? options.secondaryVoice : undefined, }) - utterance.wordCount > 0 && utterances.push(utterance) - wordOffset += utterance.wordCount + const wordCount = newUtterances.reduce((acc, u) => acc + u.wordCount, 0) + wordCount > 0 && utterances.push(...newUtterances) + wordOffset += wordCount } } diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index aa5f39713..c7c35fed3 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -138,28 +138,17 @@ export const synthesizeTextToSpeech = async ( } } // for ssml - let audioData: Buffer = Buffer.from([]) - // split ssml into chunks of 2000 characters to stream faster - // both within limit & without breaking on words and bookmarks - const ssmlChunks = input.text.match(/.{1,2000}(?= |$)(?! mark=)/g) - if (ssmlChunks) { - for (const ssmlChunk of ssmlChunks) { - const startSsmlChunk = startSsml(ssmlOptions) - const ssml = `${startSsmlChunk}${ssmlChunk}${endSsml()}` - // set the text offset to be the end of SSML start tag - wordOffset -= startSsmlChunk.length - const result = await speakSsmlAsyncPromise(ssml) - if (result.reason === ResultReason.Canceled) { - throw new Error(result.errorDetails) - } - timeOffset = timeOffset + result.audioDuration - wordOffset = wordOffset + ssmlChunk.length - audioData = Buffer.concat([audioData, Buffer.from(result.audioData)]) - } + const startSsmlTag = startSsml(ssmlOptions) + const ssml = `${startSsmlTag}${input.text}${endSsml()}` + // set the text offset to be the end of SSML start tag + wordOffset -= startSsmlTag.length + const result = await speakSsmlAsyncPromise(ssml) + if (result.reason === ResultReason.Canceled) { + throw new Error(result.errorDetails) } return { - audioData, + audioData: Buffer.from(result.audioData), speechMarks, } } catch (error) { From 39dcab5076bb1a80ebd23253894a5f3d7ccb17a5 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 4 Oct 2022 16:28:04 +0800 Subject: [PATCH 21/54] Fix tests --- packages/text-to-speech/test/htmlToSsml.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/text-to-speech/test/htmlToSsml.test.ts b/packages/text-to-speech/test/htmlToSsml.test.ts index 5e3270ced..f49810d9e 100644 --- a/packages/text-to-speech/test/htmlToSsml.test.ts +++ b/packages/text-to-speech/test/htmlToSsml.test.ts @@ -235,6 +235,6 @@ describe('convert HTML to Speech file', () => { title: 'Wang Yi at the UN; Fu Zhenghua sentenced; Nvidia China sales', options: TEST_OPTIONS, }) - expect(speechFile.utterances).to.have.lengthOf(12) + expect(speechFile.utterances).to.have.lengthOf(21) }) }) From 690ce05b0ec8adf71b3bd03b34797e4730cd0da8 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 4 Oct 2022 17:16:26 +0800 Subject: [PATCH 22/54] if we hit 256, look back for first ending sentence within 80 chars --- packages/text-to-speech/src/htmlToSsml.ts | 45 ++++++++++++++++------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts index a64a370bf..6935de2ef 100644 --- a/packages/text-to-speech/src/htmlToSsml.ts +++ b/packages/text-to-speech/src/htmlToSsml.ts @@ -298,21 +298,40 @@ const textToUtterances = ({ text = parseHTML(text).document.documentElement.textContent ?? text console.info('Converted HTML to text:', text) } - // split text into chunks of 256 characters to stream faster without breaking on words - const textChunks = text.match(/.{1,256}(?= |$)/g) - if (textChunks) { - for (const chunk of textChunks) { - const wordCount = tokenizer.tokenize(chunk).length - utterances.push({ - idx, - text: chunk, - wordOffset, - wordCount, - voice, - }) - wordOffset += wordCount + // if we hit 256, look back for first ending sentence within 80 chars + const MAX_CHARS = 256 + const MAX_LOOKBACK = 80 + while (text.length > MAX_CHARS) { + let lookback = MAX_LOOKBACK + let end = MAX_CHARS - lookback + while (lookback > 0) { + if (text[end] === '.' || text[end] === '!' || text[end] === '?') { + break + } + end++ + lookback-- } + const utterance = text.substring(0, end + 1) + const wordCount = tokenizer.tokenize(utterance).length + utterances.push({ + idx, + text: utterance, + wordOffset, + wordCount, + voice, + }) + text = text.substring(end + 1) + wordOffset += wordCount } + + const wordCount = tokenizer.tokenize(text).length + utterances.push({ + idx, + text, + wordOffset, + wordCount, + voice, + }) return utterances } From 851e2643008a7fcb04181a331ebe0a653798961a Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 4 Oct 2022 19:35:14 +0800 Subject: [PATCH 23/54] Add test case --- .../test/fixtures/{large.html => li.html} | 0 .../text-to-speech/test/htmlToSsml.test.ts | 19 ++++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) rename packages/text-to-speech/test/fixtures/{large.html => li.html} (100%) diff --git a/packages/text-to-speech/test/fixtures/large.html b/packages/text-to-speech/test/fixtures/li.html similarity index 100% rename from packages/text-to-speech/test/fixtures/large.html rename to packages/text-to-speech/test/fixtures/li.html diff --git a/packages/text-to-speech/test/htmlToSsml.test.ts b/packages/text-to-speech/test/htmlToSsml.test.ts index f49810d9e..20f9a1d79 100644 --- a/packages/text-to-speech/test/htmlToSsml.test.ts +++ b/packages/text-to-speech/test/htmlToSsml.test.ts @@ -227,7 +227,7 @@ describe('htmlToSpeechFile', () => { describe('convert HTML to Speech file', () => { it('converts each
  • to an utterance', () => { const html = fs.readFileSync( - path.resolve(__dirname, './fixtures/large.html'), + path.resolve(__dirname, './fixtures/li.html'), { encoding: 'utf-8' } ) const speechFile = htmlToSpeechFile({ @@ -237,4 +237,21 @@ describe('convert HTML to Speech file', () => { }) expect(speechFile.utterances).to.have.lengthOf(21) }) + + it('converts long utterances to multiple utterances', () => { + const html = `
    +
    +
    + All neural voices are multilingual and fluent in their own language and English. For example, if the input text in English is "I'm excited to try text to speech" and you set es-ES-ElviraNeural, the text is spoken in English with a Spanish accent. If the voice doesn't speak the language of the input text, the Speech service won't output synthesized audio. See the full list of supported neural voices. +
    +
    +
    +` + const speechFile = htmlToSpeechFile({ + content: html, + title: 'How to synthesize speech from text', + options: TEST_OPTIONS, + }) + expect(speechFile.utterances).to.have.lengthOf(3) + }) }) From 3bd6b3d13729fd7f4f41af1459446f2ec509acbb Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 10:25:24 +0800 Subject: [PATCH 24/54] Do not break on words and long sentence if exceeds 256 chars --- packages/text-to-speech/src/htmlToSsml.ts | 32 +++++++++++------------ 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts index 6935de2ef..2cfad13c1 100644 --- a/packages/text-to-speech/src/htmlToSsml.ts +++ b/packages/text-to-speech/src/htmlToSsml.ts @@ -273,13 +273,12 @@ const textToUtterances = ({ let text = textItems.join('') if (!isHtml) { // for title - const wordCount = tokenizer.tokenize(text).length return [ { idx, text, wordOffset, - wordCount, + wordCount: tokenizer.tokenize(text).length, voice, }, ] @@ -302,15 +301,11 @@ const textToUtterances = ({ const MAX_CHARS = 256 const MAX_LOOKBACK = 80 while (text.length > MAX_CHARS) { - let lookback = MAX_LOOKBACK - let end = MAX_CHARS - lookback - while (lookback > 0) { - if (text[end] === '.' || text[end] === '!' || text[end] === '?') { - break - } + let end = MAX_CHARS - MAX_LOOKBACK - 1 + while (end < text.length && !text[end].match(/[.!?]/)) { end++ - lookback-- } + const utterance = text.substring(0, end + 1) const wordCount = tokenizer.tokenize(utterance).length utterances.push({ @@ -324,14 +319,17 @@ const textToUtterances = ({ wordOffset += wordCount } - const wordCount = tokenizer.tokenize(text).length - utterances.push({ - idx, - text, - wordOffset, - wordCount, - voice, - }) + if (text.length > 0) { + const wordCount = tokenizer.tokenize(text).length + utterances.push({ + idx, + text, + wordOffset, + wordCount, + voice, + }) + } + return utterances } From e3959c4ab88ce9a23bd383e6a4d2658e0317956d Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 10:26:12 +0800 Subject: [PATCH 25/54] Fix tests --- packages/text-to-speech/test/htmlToSsml.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/text-to-speech/test/htmlToSsml.test.ts b/packages/text-to-speech/test/htmlToSsml.test.ts index 20f9a1d79..6f291ca86 100644 --- a/packages/text-to-speech/test/htmlToSsml.test.ts +++ b/packages/text-to-speech/test/htmlToSsml.test.ts @@ -235,7 +235,7 @@ describe('convert HTML to Speech file', () => { title: 'Wang Yi at the UN; Fu Zhenghua sentenced; Nvidia China sales', options: TEST_OPTIONS, }) - expect(speechFile.utterances).to.have.lengthOf(21) + expect(speechFile.utterances).to.have.lengthOf(19) }) it('converts long utterances to multiple utterances', () => { From e5c215fb9d41bf27f0852914d22408c7747f605d Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 10:34:51 +0800 Subject: [PATCH 26/54] Add test for long sentence --- packages/text-to-speech/test/htmlToSsml.test.ts | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/packages/text-to-speech/test/htmlToSsml.test.ts b/packages/text-to-speech/test/htmlToSsml.test.ts index 6f291ca86..3d2350378 100644 --- a/packages/text-to-speech/test/htmlToSsml.test.ts +++ b/packages/text-to-speech/test/htmlToSsml.test.ts @@ -254,4 +254,21 @@ describe('convert HTML to Speech file', () => { }) expect(speechFile.utterances).to.have.lengthOf(3) }) + + it('does not break long sentences', () => { + const html = `
    +
    +
    + This meeting did not offer any significant economic boosts, among other things it reviewed reports of the inspection teams sent to several provinces to check on implementation of economic stabilization measures, promised more administrative reforms, and cut toll fees for freight trucks by 10% and government-designated cargo port charges by 20% in Q4. +
    +
    +
    +` + const speechFile = htmlToSpeechFile({ + content: html, + title: 'Test long sentence', + options: TEST_OPTIONS, + }) + expect(speechFile.utterances).to.have.lengthOf(2) + }) }) From fd3047a8abd167a528a3243094151e5105d5e4e9 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 11:27:28 +0800 Subject: [PATCH 27/54] Escape HTML entities when synthesizing because we are sending raw text now --- packages/text-to-speech/src/textToSpeech.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index c7c35fed3..2c72a4852 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -8,6 +8,7 @@ import { SpeechSynthesizer, } from 'microsoft-cognitiveservices-speech-sdk' import { endSsml, htmlToSsmlItems, ssmlItemText, startSsml } from './htmlToSsml' +import * as _ from 'underscore' export interface TextToSpeechInput { text: string @@ -139,7 +140,8 @@ export const synthesizeTextToSpeech = async ( } // for ssml const startSsmlTag = startSsml(ssmlOptions) - const ssml = `${startSsmlTag}${input.text}${endSsml()}` + const text = _.escape(input.text) + const ssml = `${startSsmlTag}${text}${endSsml()}` // set the text offset to be the end of SSML start tag wordOffset -= startSsmlTag.length const result = await speakSsmlAsyncPromise(ssml) From 5e79529e11788d6b34bd8dd53dae3a03a9ae4fe6 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 4 Oct 2022 17:40:21 +0800 Subject: [PATCH 28/54] Add sentence-level speech marks --- packages/text-to-speech/src/textToSpeech.ts | 24 ++++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index 2c72a4852..4426c6f19 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -1,8 +1,10 @@ import { CancellationDetails, CancellationReason, + PropertyId, ResultReason, SpeechConfig, + SpeechSynthesisBoundaryType, SpeechSynthesisOutputFormat, SpeechSynthesisResult, SpeechSynthesizer, @@ -30,7 +32,7 @@ export interface SpeechMark { start?: number length?: number word: string - type: 'word' | 'bookmark' + type: 'word' | 'bookmark' | 'punctuation' | 'sentence' } export const synthesizeTextToSpeech = async ( @@ -47,6 +49,11 @@ export const synthesizeTextToSpeech = async ( ) speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3 + // Required for sentence-level WordBoundary events + speechConfig.setProperty( + PropertyId.SpeechServiceResponse_RequestSentenceBoundary, + 'true' + ) // Create the speech synthesizer. const synthesizer = new SpeechSynthesizer(speechConfig) @@ -87,13 +94,14 @@ export const synthesizeTextToSpeech = async ( // The unit of e.audioOffset is tick (1 tick = 100 nanoseconds), divide by 10,000 to convert to milliseconds. synthesizer.wordBoundary = (s, e) => { - speechMarks.push({ - word: e.text, - time: (timeOffset + e.audioOffset) / 10000, - start: wordOffset + e.textOffset, - length: e.wordLength, - type: 'word', - }) + e.boundaryType === SpeechSynthesisBoundaryType.Sentence && + speechMarks.push({ + word: e.text, + time: (timeOffset + e.audioOffset) / 10000, + start: wordOffset + e.textOffset, + length: e.wordLength, + type: 'sentence', + }) } synthesizer.bookmarkReached = (s, e) => { From 034f8335294d4b971cec20912d341cadeee3f74c Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Tue, 4 Oct 2022 19:23:26 +0800 Subject: [PATCH 29/54] Calculate the length of each sentence in speech marks --- packages/text-to-speech/src/textToSpeech.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index 4426c6f19..7d9dc0daf 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -99,7 +99,7 @@ export const synthesizeTextToSpeech = async ( word: e.text, time: (timeOffset + e.audioOffset) / 10000, start: wordOffset + e.textOffset, - length: e.wordLength, + length: e.text.length, type: 'sentence', }) } From edd1b908ee22f45ee9b60cc4a0414a83a20353d0 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 09:05:46 +0800 Subject: [PATCH 30/54] Fix tests --- .../content-handler/test/newsletter.test.ts | 95 +++++++++++-------- 1 file changed, 57 insertions(+), 38 deletions(-) diff --git a/packages/content-handler/test/newsletter.test.ts b/packages/content-handler/test/newsletter.test.ts index 90197b00d..ff9396a12 100644 --- a/packages/content-handler/test/newsletter.test.ts +++ b/packages/content-handler/test/newsletter.test.ts @@ -21,29 +21,6 @@ const load = (path: string): string => { } describe('Newsletter email test', () => { - before(() => { - nock('https://email.mg2.substack.com') - .head( - '/c/eJxNkk2TojAQhn-N3KTyQfg4cGDGchdnYcsZx9K5UCE0EMVAkTiKv36iHnarupNUd7rfVJ4W3EDTj1M89No496Uw0wCxgovuwBgYnbOGsZBVjDHzKPWYU8VehUMWOlIX9Qhw4rKLzXgGZziXnRTcyF7dK0iIGMVOG_OS1aTmKPRDilgVhTQUPCQIcE0x-MFTmJ8rCUpA3KtuenR2urg1ZtAzmszI0tq_Z7m66y-ilQo0uAqMTQ7WRX8auJKg56blZg7WB-iHDuYEBzO6NP0R1IwuYFphQbbTjnTH9NBfs80nym4Zyj8uUvyKbtUyGr5eUz9fNDQ7JCxfJDo9dW1lY9lmj_JNivPbGmf2Pt_lN9tDit9b-WeTetni85Z9pDpVOd7L1E_Vy7egayNO23ZP34eSeLJeux1b0rer_xaZ7ykS78nuSjMY-nL98rparNZNcv07JCjN06_EkTFBxBqOUMACErnELUNMSxTUjLDQZwzcqa4bRjCfeejUEFefS224OLr2S5wxPtij7lVrs80d2CNseRV2P52VNFMBipcdVE-U5jkRD7hFAwpGOylVwU2Mfc9qBh7DoR89yVnWXhgQFHnIsbpVb6tU_B-hH_2yzWY' - ) - .reply(302, undefined, { - Location: - 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217', - }) - .get('/p/companies-that-eat-people-217') - .reply(200, '') - - nock('https://u23463625.ct.sendgrid.net') - .head( - '/ss/c/AX1lEgEQaxtvFxLaVo0GBo_geajNrlI1TGeIcmMViR3pL3fEDZnbbkoeKcaY62QZk0KPFudUiUXc_uMLerV4nA/3k5/3TFZmreTR0qKSCgowABnVg/h30/zzLik7UXd1H_n4oyd5W8Xu639AYQQB2UXz-CsssSnno' - ) - .reply(302, undefined, { - Location: 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple', - }) - .get('/p/talked-guy-spent-30m-beeple') - .reply(200, '') - }) - describe('#getNewsletterUrl()', () => { it('returns url when email is from SubStack', async () => { const rawUrl = '' @@ -162,21 +139,63 @@ describe('Newsletter email test', () => { }) describe('findNewsletterUrl', async () => { - it('gets the URL from the header if it is a substack newsletter', async () => { - const html = load('./test/data/substack-forwarded-newsletter.html') - const url = await new SubstackHandler().findNewsletterUrl(html) - // Not sure if the redirects from substack expire, this test could eventually fail - expect(url).to.startWith( - 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217' - ) - }).timeout(10000) - it('gets the URL from the header if it is a beehiiv newsletter', async () => { - const html = load('./test/data/beehiiv-newsletter.html') - const url = await new BeehiivHandler().findNewsletterUrl(html) - expect(url).to.startWith( - 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple' - ) - }).timeout(10000) + context('when email is from Substack', () => { + let scope: nock.Scope + + before(() => { + scope = nock('https://email.mg2.substack.com') + .head( + '/c/eJxNkk2TojAQhn-N3KTyQfg4cGDGchdnYcsZx9K5UCE0EMVAkTiKv36iHnarupNUd7rfVJ4W3EDTj1M89No496Uw0wCxgovuwBgYnbOGsZBVjDHzKPWYU8VehUMWOlIX9Qhw4rKLzXgGZziXnRTcyF7dK0iIGMVOG_OS1aTmKPRDilgVhTQUPCQIcE0x-MFTmJ8rCUpA3KtuenR2urg1ZtAzmszI0tq_Z7m66y-ilQo0uAqMTQ7WRX8auJKg56blZg7WB-iHDuYEBzO6NP0R1IwuYFphQbbTjnTH9NBfs80nym4Zyj8uUvyKbtUyGr5eUz9fNDQ7JCxfJDo9dW1lY9lmj_JNivPbGmf2Pt_lN9tDit9b-WeTetni85Z9pDpVOd7L1E_Vy7egayNO23ZP34eSeLJeux1b0rer_xaZ7ykS78nuSjMY-nL98rparNZNcv07JCjN06_EkTFBxBqOUMACErnELUNMSxTUjLDQZwzcqa4bRjCfeejUEFefS224OLr2S5wxPtij7lVrs80d2CNseRV2P52VNFMBipcdVE-U5jkRD7hFAwpGOylVwU2Mfc9qBh7DoR89yVnWXhgQFHnIsbpVb6tU_B-hH_2yzWY' + ) + .reply(302, undefined, { + Location: + 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217', + }) + .get('/p/companies-that-eat-people-217') + .reply(200, '') + }) + after(() => { + scope.done() + }) + + it('gets the URL from the header', async () => { + const html = load('./test/data/substack-forwarded-newsletter.html') + const url = await new SubstackHandler().findNewsletterUrl(html) + // Not sure if the redirects from substack expire, this test could eventually fail + expect(url).to.startWith( + 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217' + ) + }) + }) + + context('when email is from beehiiv', () => { + let scope: nock.Scope + + before(() => { + scope = nock('https://u23463625.ct.sendgrid.net') + .head( + '/ss/c/AX1lEgEQaxtvFxLaVo0GBo_geajNrlI1TGeIcmMViR3pL3fEDZnbbkoeKcaY62QZk0KPFudUiUXc_uMLerV4nA/3k5/3TFZmreTR0qKSCgowABnVg/h30/zzLik7UXd1H_n4oyd5W8Xu639AYQQB2UXz-CsssSnno' + ) + .reply(302, undefined, { + Location: 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple', + }) + .get('/p/talked-guy-spent-30m-beeple') + .reply(200, '') + }) + + after(() => { + scope.done() + }) + + it('gets the URL from the header', async () => { + const html = load('./test/data/beehiiv-newsletter.html') + const url = await new BeehiivHandler().findNewsletterUrl(html) + expect(url).to.startWith( + 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple' + ) + }) + }) + it('returns undefined if it is not a newsletter', async () => { const html = load('./test/data/substack-forwarded-welcome-email.html') const url = await new SubstackHandler().findNewsletterUrl(html) From c2a44f49911b8bfd460c810d844ee648a5827ee5 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 10:04:41 +0800 Subject: [PATCH 31/54] Restore HTTP interceptor to the normal unmocked behaviour after testing --- packages/content-handler/test/newsletter.test.ts | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/packages/content-handler/test/newsletter.test.ts b/packages/content-handler/test/newsletter.test.ts index ff9396a12..ea97a0421 100644 --- a/packages/content-handler/test/newsletter.test.ts +++ b/packages/content-handler/test/newsletter.test.ts @@ -140,10 +140,8 @@ describe('Newsletter email test', () => { describe('findNewsletterUrl', async () => { context('when email is from Substack', () => { - let scope: nock.Scope - before(() => { - scope = nock('https://email.mg2.substack.com') + nock('https://email.mg2.substack.com') .head( '/c/eJxNkk2TojAQhn-N3KTyQfg4cGDGchdnYcsZx9K5UCE0EMVAkTiKv36iHnarupNUd7rfVJ4W3EDTj1M89No496Uw0wCxgovuwBgYnbOGsZBVjDHzKPWYU8VehUMWOlIX9Qhw4rKLzXgGZziXnRTcyF7dK0iIGMVOG_OS1aTmKPRDilgVhTQUPCQIcE0x-MFTmJ8rCUpA3KtuenR2urg1ZtAzmszI0tq_Z7m66y-ilQo0uAqMTQ7WRX8auJKg56blZg7WB-iHDuYEBzO6NP0R1IwuYFphQbbTjnTH9NBfs80nym4Zyj8uUvyKbtUyGr5eUz9fNDQ7JCxfJDo9dW1lY9lmj_JNivPbGmf2Pt_lN9tDit9b-WeTetni85Z9pDpVOd7L1E_Vy7egayNO23ZP34eSeLJeux1b0rer_xaZ7ykS78nuSjMY-nL98rparNZNcv07JCjN06_EkTFBxBqOUMACErnELUNMSxTUjLDQZwzcqa4bRjCfeejUEFefS224OLr2S5wxPtij7lVrs80d2CNseRV2P52VNFMBipcdVE-U5jkRD7hFAwpGOylVwU2Mfc9qBh7DoR89yVnWXhgQFHnIsbpVb6tU_B-hH_2yzWY' ) @@ -155,7 +153,7 @@ describe('Newsletter email test', () => { .reply(200, '') }) after(() => { - scope.done() + nock.restore() }) it('gets the URL from the header', async () => { @@ -165,14 +163,12 @@ describe('Newsletter email test', () => { expect(url).to.startWith( 'https://newsletter.slowchinese.net/p/companies-that-eat-people-217' ) - }) + }).timeout(10000) }) context('when email is from beehiiv', () => { - let scope: nock.Scope - before(() => { - scope = nock('https://u23463625.ct.sendgrid.net') + nock('https://u23463625.ct.sendgrid.net') .head( '/ss/c/AX1lEgEQaxtvFxLaVo0GBo_geajNrlI1TGeIcmMViR3pL3fEDZnbbkoeKcaY62QZk0KPFudUiUXc_uMLerV4nA/3k5/3TFZmreTR0qKSCgowABnVg/h30/zzLik7UXd1H_n4oyd5W8Xu639AYQQB2UXz-CsssSnno' ) @@ -184,7 +180,7 @@ describe('Newsletter email test', () => { }) after(() => { - scope.done() + nock.restore() }) it('gets the URL from the header', async () => { @@ -193,7 +189,7 @@ describe('Newsletter email test', () => { expect(url).to.startWith( 'https://www.milkroad.com/p/talked-guy-spent-30m-beeple' ) - }) + }).timeout(10000) }) it('returns undefined if it is not a newsletter', async () => { From 0071d88443122fcaa485f2ac23df8e657bed52cc Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 12:36:29 +0800 Subject: [PATCH 32/54] Stop deducting length of SSML starting tags in the offset --- packages/text-to-speech/src/textToSpeech.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index 7d9dc0daf..00cc06c58 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -59,7 +59,7 @@ export const synthesizeTextToSpeech = async ( const synthesizer = new SpeechSynthesizer(speechConfig) const speechMarks: SpeechMark[] = [] let timeOffset = 0 - let wordOffset = 0 + // let wordOffset = 0 synthesizer.synthesizing = function (s, e) { // convert arrayBuffer to stream and write to stream @@ -98,7 +98,7 @@ export const synthesizeTextToSpeech = async ( speechMarks.push({ word: e.text, time: (timeOffset + e.audioOffset) / 10000, - start: wordOffset + e.textOffset, + start: e.textOffset, length: e.text.length, type: 'sentence', }) @@ -151,7 +151,7 @@ export const synthesizeTextToSpeech = async ( const text = _.escape(input.text) const ssml = `${startSsmlTag}${text}${endSsml()}` // set the text offset to be the end of SSML start tag - wordOffset -= startSsmlTag.length + // wordOffset -= startSsmlTag.length const result = await speakSsmlAsyncPromise(ssml) if (result.reason === ResultReason.Canceled) { throw new Error(result.errorDetails) From 99f03133a753cda1b0d1f4264567a862c7ee0538 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 16:39:45 +0800 Subject: [PATCH 33/54] Use NLP lib to better detect sentence boundary and split utterances --- packages/text-to-speech/src/htmlToSsml.ts | 76 ++++++++++++++--------- 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts index 2cfad13c1..3edd8066d 100644 --- a/packages/text-to-speech/src/htmlToSsml.ts +++ b/packages/text-to-speech/src/htmlToSsml.ts @@ -1,6 +1,6 @@ import { parseHTML } from 'linkedom' import * as _ from 'underscore' -import { WordPunctTokenizer } from 'natural' +import { SentenceTokenizer, WordPunctTokenizer } from 'natural' import { htmlToText } from 'html-to-text' // this code needs to be kept in sync with the @@ -297,38 +297,52 @@ const textToUtterances = ({ text = parseHTML(text).document.documentElement.textContent ?? text console.info('Converted HTML to text:', text) } - // if we hit 256, look back for first ending sentence within 80 chars + const MAX_CHARS = 256 - const MAX_LOOKBACK = 80 - while (text.length > MAX_CHARS) { - let end = MAX_CHARS - MAX_LOOKBACK - 1 - while (end < text.length && !text[end].match(/[.!?]/)) { - end++ + const sentenceTokenizer = new SentenceTokenizer() + const sentences = sentenceTokenizer.tokenize(text) + let currentText = '' + // max 256 chars per utterance + sentences.forEach((sentence, i) => { + const nextText = currentText + sentence + if (nextText.length > MAX_CHARS) { + if (currentText.length > 0) { + console.debug('Saving current text in the utterance:', currentText) + const wordCount = tokenizer.tokenize(currentText).length + utterances.push({ + idx, + text: currentText, + wordOffset, + wordCount, + voice, + }) + wordOffset += wordCount + currentText = sentence + } else { + console.debug('Sentence is too long to fit in an utterance:', sentence) + const wordCount = tokenizer.tokenize(sentence).length + utterances.push({ + idx, + text: sentence, + wordOffset, + wordCount, + voice, + }) + wordOffset += wordCount + } + } else { + currentText = nextText } - - const utterance = text.substring(0, end + 1) - const wordCount = tokenizer.tokenize(utterance).length - utterances.push({ - idx, - text: utterance, - wordOffset, - wordCount, - voice, - }) - text = text.substring(end + 1) - wordOffset += wordCount - } - - if (text.length > 0) { - const wordCount = tokenizer.tokenize(text).length - utterances.push({ - idx, - text, - wordOffset, - wordCount, - voice, - }) - } + if (i === sentences.length - 1 && currentText.length > 0) { + utterances.push({ + idx, + text: currentText, + wordOffset, + wordCount: tokenizer.tokenize(currentText).length, + voice, + }) + } + }) return utterances } From 174afe0085eb5b7e2a7eb572de8d5ed839bc643f Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 16:48:20 +0800 Subject: [PATCH 34/54] Add test for decimal point in a sentence --- .../text-to-speech/test/htmlToSsml.test.ts | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/packages/text-to-speech/test/htmlToSsml.test.ts b/packages/text-to-speech/test/htmlToSsml.test.ts index 3d2350378..4f53bdaf4 100644 --- a/packages/text-to-speech/test/htmlToSsml.test.ts +++ b/packages/text-to-speech/test/htmlToSsml.test.ts @@ -235,7 +235,7 @@ describe('convert HTML to Speech file', () => { title: 'Wang Yi at the UN; Fu Zhenghua sentenced; Nvidia China sales', options: TEST_OPTIONS, }) - expect(speechFile.utterances).to.have.lengthOf(19) + expect(speechFile.utterances).to.have.lengthOf(20) }) it('converts long utterances to multiple utterances', () => { @@ -271,4 +271,23 @@ describe('convert HTML to Speech file', () => { }) expect(speechFile.utterances).to.have.lengthOf(2) }) + + it('does not break on not decimal point in sentences', () => { + const html = `
    +
    +
    + If terms of the original $12.5 billion financing package remain the same, bankers may struggle to sell the risky Twitter buyout debt just as credit markets begin to crack, with yields at multiyear highs, they’re potentially on the hook for hundreds of millions of dollars of losses on the unsecured portion alone should they try to unload it to investors. +
    +
    +
    +` + const speechFile = htmlToSpeechFile({ + content: html, + title: 'Test long sentence with decimal point', + options: TEST_OPTIONS, + }) + expect(speechFile.utterances[1].text).to.eql( + 'If terms of the original $12.5 billion financing package remain the same, bankers may struggle to sell the risky Twitter buyout debt just as credit markets begin to crack, with yields at multiyear highs, they’re potentially on the hook for hundreds of millions of dollars of losses on the unsecured portion alone should they try to unload it to investors.' + ) + }) }) From d279790e81e49b3102abe32fd11104362a533b95 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 16:50:16 +0800 Subject: [PATCH 35/54] Detect word boundary only in speech marks --- packages/text-to-speech/src/textToSpeech.ts | 27 ++++++--------------- 1 file changed, 8 insertions(+), 19 deletions(-) diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index 00cc06c58..cd4114c00 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -1,10 +1,8 @@ import { CancellationDetails, CancellationReason, - PropertyId, ResultReason, SpeechConfig, - SpeechSynthesisBoundaryType, SpeechSynthesisOutputFormat, SpeechSynthesisResult, SpeechSynthesizer, @@ -32,7 +30,7 @@ export interface SpeechMark { start?: number length?: number word: string - type: 'word' | 'bookmark' | 'punctuation' | 'sentence' + type: 'word' | 'bookmark' } export const synthesizeTextToSpeech = async ( @@ -49,17 +47,11 @@ export const synthesizeTextToSpeech = async ( ) speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Audio16Khz32KBitRateMonoMp3 - // Required for sentence-level WordBoundary events - speechConfig.setProperty( - PropertyId.SpeechServiceResponse_RequestSentenceBoundary, - 'true' - ) // Create the speech synthesizer. const synthesizer = new SpeechSynthesizer(speechConfig) const speechMarks: SpeechMark[] = [] let timeOffset = 0 - // let wordOffset = 0 synthesizer.synthesizing = function (s, e) { // convert arrayBuffer to stream and write to stream @@ -94,14 +86,13 @@ export const synthesizeTextToSpeech = async ( // The unit of e.audioOffset is tick (1 tick = 100 nanoseconds), divide by 10,000 to convert to milliseconds. synthesizer.wordBoundary = (s, e) => { - e.boundaryType === SpeechSynthesisBoundaryType.Sentence && - speechMarks.push({ - word: e.text, - time: (timeOffset + e.audioOffset) / 10000, - start: e.textOffset, - length: e.text.length, - type: 'sentence', - }) + speechMarks.push({ + word: e.text, + time: (timeOffset + e.audioOffset) / 10000, + start: e.textOffset, + length: e.text.length, + type: 'word', + }) } synthesizer.bookmarkReached = (s, e) => { @@ -150,8 +141,6 @@ export const synthesizeTextToSpeech = async ( const startSsmlTag = startSsml(ssmlOptions) const text = _.escape(input.text) const ssml = `${startSsmlTag}${text}${endSsml()}` - // set the text offset to be the end of SSML start tag - // wordOffset -= startSsmlTag.length const result = await speakSsmlAsyncPromise(ssml) if (result.reason === ResultReason.Canceled) { throw new Error(result.errorDetails) From 1b8d6d761c602087b42fec83f43f0db7f8b92f11 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 17:07:45 +0800 Subject: [PATCH 36/54] Stop synthesize code blocks --- packages/text-to-speech/src/htmlToSsml.ts | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts index 3edd8066d..de785f744 100644 --- a/packages/text-to-speech/src/htmlToSsml.ts +++ b/packages/text-to-speech/src/htmlToSsml.ts @@ -69,7 +69,6 @@ const TOP_LEVEL_TAGS = [ 'H5', 'H6', 'LI', - 'CODE', ] function parseDomTree(pageNode: Element) { @@ -148,7 +147,15 @@ function emitElement( element: Element, isTopLevel: boolean ) { - const SKIP_TAGS = ['SCRIPT', 'STYLE', 'IMG', 'FIGURE', 'FIGCAPTION', 'IFRAME'] + const SKIP_TAGS = [ + 'SCRIPT', + 'STYLE', + 'IMG', + 'FIGURE', + 'FIGCAPTION', + 'IFRAME', + 'CODE', + ] const topLevelTags = ssmlTagsForTopLevelElement() const idx = element.getAttribute('data-omnivore-anchor-idx') @@ -302,12 +309,13 @@ const textToUtterances = ({ const sentenceTokenizer = new SentenceTokenizer() const sentences = sentenceTokenizer.tokenize(text) let currentText = '' - // max 256 chars per utterance + // split text to max 256 chars per utterance and + // use nlp lib to detect sentences and + // avoid splitting words and sentences sentences.forEach((sentence, i) => { const nextText = currentText + sentence if (nextText.length > MAX_CHARS) { if (currentText.length > 0) { - console.debug('Saving current text in the utterance:', currentText) const wordCount = tokenizer.tokenize(currentText).length utterances.push({ idx, @@ -319,7 +327,6 @@ const textToUtterances = ({ wordOffset += wordCount currentText = sentence } else { - console.debug('Sentence is too long to fit in an utterance:', sentence) const wordCount = tokenizer.tokenize(sentence).length utterances.push({ idx, From 321895844f2ae3d2f9e816c8343ae17955d41ab7 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 17:41:18 +0800 Subject: [PATCH 37/54] Revert back change on wordOffset --- packages/text-to-speech/src/textToSpeech.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index cd4114c00..d68789d29 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -52,6 +52,7 @@ export const synthesizeTextToSpeech = async ( const synthesizer = new SpeechSynthesizer(speechConfig) const speechMarks: SpeechMark[] = [] let timeOffset = 0 + let wordOffset = 0 synthesizer.synthesizing = function (s, e) { // convert arrayBuffer to stream and write to stream @@ -89,7 +90,7 @@ export const synthesizeTextToSpeech = async ( speechMarks.push({ word: e.text, time: (timeOffset + e.audioOffset) / 10000, - start: e.textOffset, + start: wordOffset + e.textOffset, length: e.text.length, type: 'word', }) @@ -139,6 +140,7 @@ export const synthesizeTextToSpeech = async ( } // for ssml const startSsmlTag = startSsml(ssmlOptions) + wordOffset -= startSsmlTag.length const text = _.escape(input.text) const ssml = `${startSsmlTag}${text}${endSsml()}` const result = await speakSsmlAsyncPromise(ssml) From 73de22f67809d6e4e332f118b9724ead72b70d21 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 17:42:24 +0800 Subject: [PATCH 38/54] Use wordLength in the event --- packages/text-to-speech/src/textToSpeech.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/text-to-speech/src/textToSpeech.ts b/packages/text-to-speech/src/textToSpeech.ts index d68789d29..da6c2e4c2 100644 --- a/packages/text-to-speech/src/textToSpeech.ts +++ b/packages/text-to-speech/src/textToSpeech.ts @@ -91,7 +91,7 @@ export const synthesizeTextToSpeech = async ( word: e.text, time: (timeOffset + e.audioOffset) / 10000, start: wordOffset + e.textOffset, - length: e.text.length, + length: e.wordLength, type: 'word', }) } From 811cec6c9384f3ba5756cf01bf7fa2656cf87e83 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Wed, 5 Oct 2022 17:55:10 +0800 Subject: [PATCH 39/54] add space to the end of sentence --- packages/text-to-speech/src/htmlToSsml.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/packages/text-to-speech/src/htmlToSsml.ts b/packages/text-to-speech/src/htmlToSsml.ts index de785f744..ef9d6af6e 100644 --- a/packages/text-to-speech/src/htmlToSsml.ts +++ b/packages/text-to-speech/src/htmlToSsml.ts @@ -313,6 +313,10 @@ const textToUtterances = ({ // use nlp lib to detect sentences and // avoid splitting words and sentences sentences.forEach((sentence, i) => { + if (i < sentences.length - 1) { + // add space to the end of sentence + sentence += ' ' + } const nextText = currentText + sentence if (nextText.length > MAX_CHARS) { if (currentText.length > 0) { From da9fa1b9628bedf32566fb0731ca6267ed8938be Mon Sep 17 00:00:00 2001 From: ShaneMaglangit Date: Wed, 5 Oct 2022 17:56:11 +0800 Subject: [PATCH 40/54] Chore: Correct .env.local to .env.template on the README for the frontend development section. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ccd44a561..dcc1143ff 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ with docker compose and the frontend locally: ```bash docker-compose up api content-fetch cd packages/web -cp .env.local .env +cp .env.template .env yarn dev ``` From dcfcd9f600c118bc1a5c1cc57ad6356a981bece4 Mon Sep 17 00:00:00 2001 From: ShaneMaglangit Date: Wed, 5 Oct 2022 18:10:23 +0800 Subject: [PATCH 41/54] Fix: Improve the icon alignment in toggle buttons --- .../web/components/templates/homeFeed/HomeFeedContainer.tsx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/web/components/templates/homeFeed/HomeFeedContainer.tsx b/packages/web/components/templates/homeFeed/HomeFeedContainer.tsx index 7f5f2a126..c3be5cc91 100644 --- a/packages/web/components/templates/homeFeed/HomeFeedContainer.tsx +++ b/packages/web/components/templates/homeFeed/HomeFeedContainer.tsx @@ -651,6 +651,9 @@ function HomeFeedGrid(props: HomeFeedContentProps): JSX.Element { const [, updateState] = useState({}) const StyledToggleButton = styled('button', { + display: 'flex', + alignItems: 'center', + justifyContent: 'center', p: '0px', backgroundColor: 'transparent', border: 'none', From d82ffa44324d1cd2fadb8b45336a53f6caddab39 Mon Sep 17 00:00:00 2001 From: Satindar Dhillon Date: Tue, 4 Oct 2022 08:45:48 -0700 Subject: [PATCH 42/54] add view highlights context menu to home feed items --- .../Sources/App/Views/Home/HomeFeedViewIOS.swift | 9 +++++++++ .../Sources/App/Views/Home/HomeFeedViewMac.swift | 2 ++ .../Sources/App/Views/Home/HomeFeedViewModel.swift | 1 + apple/OmnivoreKit/Sources/Utils/FeatureFlags.swift | 1 + apple/OmnivoreKit/Sources/Views/FeedItem/GridCard.swift | 5 +++++ 5 files changed, 18 insertions(+) diff --git a/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift b/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift index a4a1e91b4..58800211b 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift @@ -62,6 +62,9 @@ import Views .sheet(item: $viewModel.itemUnderTitleEdit) { item in LinkedItemTitleEditView(item: item) } + .sheet(item: $viewModel.itemForHighlightsView) { item in + Text("Highlights view for: \(item.unwrappedTitle)") // TODO: implement view + } .toolbar { ToolbarItem(placement: .barTrailing) { Button("", action: {}) @@ -255,6 +258,10 @@ import Views viewModel: viewModel ) .contextMenu { + Button( + action: { viewModel.itemForHighlightsView = item }, + label: { Label("View Highlights", systemImage: "highlighter") } + ) Button( action: { viewModel.itemUnderTitleEdit = item }, label: { Label("Edit Title/Description", systemImage: "textbox") } @@ -372,6 +379,8 @@ import Views func contextMenuActionHandler(item: LinkedItem, action: GridCardAction) { switch action { + case .viewHighlights: + viewModel.itemForHighlightsView = item case .toggleArchiveStatus: viewModel.setLinkArchived(dataService: dataService, objectID: item.objectID, archived: !item.isArchived) case .delete: diff --git a/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewMac.swift b/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewMac.swift index adced8706..49a12d1c9 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewMac.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewMac.swift @@ -36,6 +36,7 @@ import Views viewModel: viewModel ) .contextMenu { + // TODO: add highlights view button Button( action: { viewModel.itemUnderTitleEdit = item }, label: { Label("Edit Title/Description", systemImage: "textbox") } @@ -137,6 +138,7 @@ import Views .sheet(item: $viewModel.itemUnderTitleEdit) { item in LinkedItemTitleEditView(item: item) } + // TODO: add highlights view sheet .task { if viewModel.items.isEmpty { loadItems(isRefresh: true) diff --git a/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewModel.swift b/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewModel.swift index 543d23727..65d4f4d75 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewModel.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewModel.swift @@ -17,6 +17,7 @@ import Views @Published var showPushNotificationPrimer = false @Published var itemUnderLabelEdit: LinkedItem? @Published var itemUnderTitleEdit: LinkedItem? + @Published var itemForHighlightsView: LinkedItem? @Published var searchTerm = "" @Published var selectedLabels = [LinkedItemLabel]() @Published var negatedLabels = [LinkedItemLabel]() diff --git a/apple/OmnivoreKit/Sources/Utils/FeatureFlags.swift b/apple/OmnivoreKit/Sources/Utils/FeatureFlags.swift index 5ace227a6..5c8ac8803 100644 --- a/apple/OmnivoreKit/Sources/Utils/FeatureFlags.swift +++ b/apple/OmnivoreKit/Sources/Utils/FeatureFlags.swift @@ -15,4 +15,5 @@ public enum FeatureFlag { public static let enableSnooze = false public static let enableGridCardsOnPhone = false public static let enableTextToSpeechButton = true + public static let enableHighlightsView = true } diff --git a/apple/OmnivoreKit/Sources/Views/FeedItem/GridCard.swift b/apple/OmnivoreKit/Sources/Views/FeedItem/GridCard.swift index d3f1707e8..99223de4d 100644 --- a/apple/OmnivoreKit/Sources/Views/FeedItem/GridCard.swift +++ b/apple/OmnivoreKit/Sources/Views/FeedItem/GridCard.swift @@ -8,6 +8,7 @@ public enum GridCardAction { case editLabels case editTitle case downloadAudio + case viewHighlights } public struct GridCard: View { @@ -45,6 +46,10 @@ public struct GridCard: View { var contextMenuView: some View { Group { + Button( + action: { menuActionHandler(.viewHighlights) }, + label: { Label("View Highlights", systemImage: "highlighter") } + ) Button( action: { menuActionHandler(.editTitle) }, label: { Label("Edit Title/Description", systemImage: "textbox") } From 397fb5d9f70783fd84909ada9fbde62d482b882b Mon Sep 17 00:00:00 2001 From: Satindar Dhillon Date: Tue, 4 Oct 2022 09:14:45 -0700 Subject: [PATCH 43/54] add a basic highlights list view --- .../Views/Highlights/HighlightsListView.swift | 60 +++++++++++++++++++ .../Highlights/HighlightsListViewModel.swift | 13 ++++ .../App/Views/Home/HomeFeedViewIOS.swift | 2 +- .../Views/WebReader/WebReaderContainer.swift | 8 +++ 4 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift create mode 100644 apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift new file mode 100644 index 000000000..75332e587 --- /dev/null +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift @@ -0,0 +1,60 @@ +import Models +import Services +import SwiftUI +import Views + +struct HighlightsListView: View { + @EnvironmentObject var dataService: DataService + @Environment(\.presentationMode) private var presentationMode + @StateObject var viewModel = HighlightsListViewModel() + + let item: LinkedItem + + var innerBody: some View { + List { + Section { + ForEach(viewModel.highlights, id: \.self) { highlight in + Text(highlight.quote ?? "no quote") + } + } + } + .navigationTitle("Highlights") + #if os(iOS) + .navigationBarTitleDisplayMode(.inline) + .toolbar { + ToolbarItem(placement: .navigationBarTrailing) { + dismissButton + } + } + #else + .toolbar { + ToolbarItemGroup { + dismissButton + } + } + #endif + } + + var dismissButton: some View { + Button( + action: { presentationMode.wrappedValue.dismiss() }, + label: { Text("Done").foregroundColor(.appGrayTextContrast) } + ) + } + + var body: some View { + Group { + #if os(iOS) + NavigationView { + innerBody + } + #elseif os(macOS) + innerBody + .frame(minWidth: 400, minHeight: 400) + #endif + } + .task { + viewModel.load(item: item) + } + } +} diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift new file mode 100644 index 000000000..611b9a2aa --- /dev/null +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift @@ -0,0 +1,13 @@ +import CoreData +import Models +import Services +import SwiftUI +import Views + +@MainActor final class HighlightsListViewModel: ObservableObject { + @Published var highlights = [Highlight]() + + func load(item: LinkedItem) { + highlights = item.highlights.asArray(of: Highlight.self) + } +} diff --git a/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift b/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift index 58800211b..f74763fcf 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift @@ -63,7 +63,7 @@ import Views LinkedItemTitleEditView(item: item) } .sheet(item: $viewModel.itemForHighlightsView) { item in - Text("Highlights view for: \(item.unwrappedTitle)") // TODO: implement view + HighlightsListView(item: item) } .toolbar { ToolbarItem(placement: .barTrailing) { diff --git a/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift b/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift index 3624aec4b..b13acc07a 100644 --- a/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift +++ b/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift @@ -12,6 +12,7 @@ struct WebReaderContainerView: View { @State private var showPreferencesPopover = false @State private var showLabelsModal = false @State private var showTitleEdit = false + @State private var showHighlightsView = false @State var showHighlightAnnotationModal = false @State var safariWebLink: SafariWebLink? @State private var navBarVisibilityRatio = 1.0 @@ -131,6 +132,10 @@ struct WebReaderContainerView: View { Menu( content: { Group { + Button( + action: { showHighlightsView = true }, + label: { Label("View Highlights", systemImage: "highlighter") } + ) Button( action: { showTitleEdit = true }, label: { Label("Edit Title/Description", systemImage: "textbox") } @@ -198,6 +203,9 @@ struct WebReaderContainerView: View { .sheet(isPresented: $showTitleEdit) { LinkedItemTitleEditView(item: item) } + .sheet(isPresented: $showHighlightsView) { + HighlightsListView(item: item) + } #if os(macOS) .buttonStyle(PlainButtonStyle()) #endif From 29d14b3e8d9f513208dde045e5c218b468f6b645 Mon Sep 17 00:00:00 2001 From: Satindar Dhillon Date: Tue, 4 Oct 2022 09:36:28 -0700 Subject: [PATCH 44/54] create highlight list card --- .../App/Views/Highlights/HighlightsListCard.swift | 10 ++++++++++ .../App/Views/Highlights/HighlightsListView.swift | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift new file mode 100644 index 000000000..200fd4c85 --- /dev/null +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift @@ -0,0 +1,10 @@ +import Models +import SwiftUI + +struct HighlightsListCard: View { + let highlight: Highlight + + var body: some View { + Text(highlight.quote ?? "no quote") + } +} diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift index 75332e587..157a998d4 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift @@ -14,11 +14,12 @@ struct HighlightsListView: View { List { Section { ForEach(viewModel.highlights, id: \.self) { highlight in - Text(highlight.quote ?? "no quote") + HighlightsListCard(highlight: highlight) } } } .navigationTitle("Highlights") + .listStyle(PlainListStyle()) #if os(iOS) .navigationBarTitleDisplayMode(.inline) .toolbar { From 18760d9043c18b820ad4b5c9d6f51b2c6ff9cb1b Mon Sep 17 00:00:00 2001 From: Satindar Dhillon Date: Tue, 4 Oct 2022 10:09:26 -0700 Subject: [PATCH 45/54] style highlights card --- .../Views/Highlights/HighlightsListCard.swift | 49 ++++++++++++++++++- .../Sources/Utils/FeatureFlags.swift | 2 +- 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift index 200fd4c85..7767abbb5 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift @@ -2,9 +2,56 @@ import Models import SwiftUI struct HighlightsListCard: View { + @State var isContextMenuOpen = false + let highlight: Highlight + var contextMenuView: some View { + Group { + Button( + action: {}, + label: { Label("Stubby One", systemImage: "highlighter") } + ) + Button( + action: {}, + label: { Label("Stubby Two", systemImage: "textbox") } + ) + } + } + var body: some View { - Text(highlight.quote ?? "no quote") + VStack(alignment: .leading) { + HStack { + Image(systemName: "highlighter") + + Text(highlight.shortId ?? "no short Id") + .font(.appHeadline) + .foregroundColor(.appGrayTextContrast) + .lineLimit(1) + + Spacer() + + Menu( + content: { contextMenuView }, + label: { + Image(systemName: "ellipsis") + .foregroundColor(.appGrayTextContrast) + .padding() + } + ) + .frame(width: 16, height: 16, alignment: .center) + .onTapGesture { isContextMenuOpen = true } + } + .padding(.top, 8) + + HStack { + Divider() + .frame(width: 6) + .overlay(Color.appYellow48) + + Text(highlight.quote ?? "") + } + .padding(.bottom, 8) + } } } diff --git a/apple/OmnivoreKit/Sources/Utils/FeatureFlags.swift b/apple/OmnivoreKit/Sources/Utils/FeatureFlags.swift index 5c8ac8803..18db882c2 100644 --- a/apple/OmnivoreKit/Sources/Utils/FeatureFlags.swift +++ b/apple/OmnivoreKit/Sources/Utils/FeatureFlags.swift @@ -13,7 +13,7 @@ public enum FeatureFlag { public static let enablePushNotifications = false public static let enableShareButton = false public static let enableSnooze = false - public static let enableGridCardsOnPhone = false + public static let enableGridCardsOnPhone = true public static let enableTextToSpeechButton = true public static let enableHighlightsView = true } From 79875706e9893a78b314483ba55642496075989e Mon Sep 17 00:00:00 2001 From: Satindar Dhillon Date: Tue, 4 Oct 2022 12:29:03 -0700 Subject: [PATCH 46/54] add note section to highlights view --- .../Views/Highlights/HighlightsListCard.swift | 35 +++++++++++++++++-- .../Highlights/HighlightsListViewModel.swift | 6 ++++ .../Sources/Utils/FeatureFlags.swift | 2 +- 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift index 7767abbb5..89cd3a8a2 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift @@ -19,12 +19,33 @@ struct HighlightsListCard: View { } } + var noteSection: some View { + Group { + HStack { + Image(systemName: "note.text") + + Text("Note") + .font(.appSubheadline) + .foregroundColor(.appGrayTextContrast) + .lineLimit(1) + + Spacer() + } + + Text(highlight.annotation ?? "") + } + } + + var addNoteSection: some View { + Text("Tap to add a note") + } + var body: some View { VStack(alignment: .leading) { HStack { Image(systemName: "highlighter") - Text(highlight.shortId ?? "no short Id") + Text(highlight.highlightCardTitle) .font(.appHeadline) .foregroundColor(.appGrayTextContrast) .lineLimit(1) @@ -49,7 +70,17 @@ struct HighlightsListCard: View { .frame(width: 6) .overlay(Color.appYellow48) - Text(highlight.quote ?? "") + VStack(alignment: .leading, spacing: 8) { + Text(highlight.quote ?? "") + + Divider() + + if highlight.annotation == nil || highlight.annotation?.isEmpty == true { + addNoteSection + } else { + noteSection + } + } } .padding(.bottom, 8) } diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift index 611b9a2aa..bf6905755 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift @@ -11,3 +11,9 @@ import Views highlights = item.highlights.asArray(of: Highlight.self) } } + +extension Highlight { + var highlightCardTitle: String { + "Highlight" + } +} diff --git a/apple/OmnivoreKit/Sources/Utils/FeatureFlags.swift b/apple/OmnivoreKit/Sources/Utils/FeatureFlags.swift index 18db882c2..5c8ac8803 100644 --- a/apple/OmnivoreKit/Sources/Utils/FeatureFlags.swift +++ b/apple/OmnivoreKit/Sources/Utils/FeatureFlags.swift @@ -13,7 +13,7 @@ public enum FeatureFlag { public static let enablePushNotifications = false public static let enableShareButton = false public static let enableSnooze = false - public static let enableGridCardsOnPhone = true + public static let enableGridCardsOnPhone = false public static let enableTextToSpeechButton = true public static let enableHighlightsView = true } From cffdfa4342d3a9129a6029c77d738dc97fe78310 Mon Sep 17 00:00:00 2001 From: Satindar Dhillon Date: Tue, 4 Oct 2022 22:09:44 -0700 Subject: [PATCH 47/54] sort highlights by createdAt --- .../App/Views/Highlights/HighlightsListCard.swift | 14 +++++++++++++- .../Views/Highlights/HighlightsListViewModel.swift | 6 +++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift index 89cd3a8a2..aa067a6fa 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift @@ -37,7 +37,19 @@ struct HighlightsListCard: View { } var addNoteSection: some View { - Text("Tap to add a note") + HStack { + Image(systemName: "note.text.badge.plus").foregroundColor(.appGrayTextContrast) + + Text("Add a Note") + .font(.appSubheadline) + .foregroundColor(.appGrayTextContrast) + .lineLimit(1) + + Spacer() + } + .onTapGesture { + print("add a note") + } } var body: some View { diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift index bf6905755..a407be8a2 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift @@ -8,7 +8,11 @@ import Views @Published var highlights = [Highlight]() func load(item: LinkedItem) { - highlights = item.highlights.asArray(of: Highlight.self) + let unsortedHighlights = item.highlights.asArray(of: Highlight.self) + + highlights = unsortedHighlights.sorted { + ($0.createdAt ?? Date()) < ($1.createdAt ?? Date()) + } } } From 49e911f023c87007b2309586aa3a6e58bc530ae7 Mon Sep 17 00:00:00 2001 From: Satindar Dhillon Date: Wed, 5 Oct 2022 09:53:56 -0700 Subject: [PATCH 48/54] show highlight annotation sheet when tapping on note or 'add a note' --- .../Views/Highlights/HighlightsListCard.swift | 23 ++++++++++++++++++- .../Article/HighlightAnnotationSheet.swift | 6 ++--- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift index aa067a6fa..fd4e3fd12 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift @@ -1,8 +1,11 @@ import Models import SwiftUI +import Views struct HighlightsListCard: View { @State var isContextMenuOpen = false + @State var annotation = String() + @State var showAnnotationModal = false let highlight: Highlight @@ -34,6 +37,10 @@ struct HighlightsListCard: View { Text(highlight.annotation ?? "") } + .onTapGesture { + annotation = highlight.annotation ?? "" + showAnnotationModal = true + } } var addNoteSection: some View { @@ -48,7 +55,8 @@ struct HighlightsListCard: View { Spacer() } .onTapGesture { - print("add a note") + annotation = highlight.annotation ?? "" + showAnnotationModal = true } } @@ -96,5 +104,18 @@ struct HighlightsListCard: View { } .padding(.bottom, 8) } + .sheet(isPresented: $showAnnotationModal) { + HighlightAnnotationSheet( + annotation: $annotation, + onSave: { + print("new annotation = \(annotation)") + showAnnotationModal = false + }, + onCancel: { + print("cancel annotation") + showAnnotationModal = false + } + ) + } } } diff --git a/apple/OmnivoreKit/Sources/Views/Article/HighlightAnnotationSheet.swift b/apple/OmnivoreKit/Sources/Views/Article/HighlightAnnotationSheet.swift index 30dc1def4..bf0a872c6 100644 --- a/apple/OmnivoreKit/Sources/Views/Article/HighlightAnnotationSheet.swift +++ b/apple/OmnivoreKit/Sources/Views/Article/HighlightAnnotationSheet.swift @@ -22,15 +22,13 @@ public struct HighlightAnnotationSheet: View { HStack { Button("Cancel", action: onCancel) Spacer() - HStack { - Image(systemName: "note.text") - Text("Note") - } + Label("Note", systemImage: "note.text") Spacer() Button("Save") { onSave() } } + .foregroundColor(.appGrayTextContrast) ScrollView { TextEditor(text: $annotation) From c5ad8a238c7d33b9dbb7c1e56d83f57de8312a55 Mon Sep 17 00:00:00 2001 From: Satindar Dhillon Date: Wed, 5 Oct 2022 10:57:21 -0700 Subject: [PATCH 49/54] persist annotation changes from highlights view --- .../Views/Highlights/HighlightsListCard.swift | 18 +++---- .../Views/Highlights/HighlightsListView.swift | 15 ++++-- .../Highlights/HighlightsListViewModel.swift | 50 +++++++++++++++---- .../App/Views/Home/HomeFeedViewIOS.swift | 2 +- .../Views/WebReader/WebReaderContainer.swift | 2 +- .../Sources/Models/DataModels/FeedItem.swift | 6 +++ 6 files changed, 68 insertions(+), 25 deletions(-) diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift index fd4e3fd12..f488db303 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift @@ -7,7 +7,8 @@ struct HighlightsListCard: View { @State var annotation = String() @State var showAnnotationModal = false - let highlight: Highlight + let highlightParams: HighlightListItemParams + let onSaveAnnotation: (String) -> Void var contextMenuView: some View { Group { @@ -35,10 +36,10 @@ struct HighlightsListCard: View { Spacer() } - Text(highlight.annotation ?? "") + Text(highlightParams.annotation) } .onTapGesture { - annotation = highlight.annotation ?? "" + annotation = highlightParams.annotation showAnnotationModal = true } } @@ -55,7 +56,7 @@ struct HighlightsListCard: View { Spacer() } .onTapGesture { - annotation = highlight.annotation ?? "" + annotation = highlightParams.annotation showAnnotationModal = true } } @@ -65,7 +66,7 @@ struct HighlightsListCard: View { HStack { Image(systemName: "highlighter") - Text(highlight.highlightCardTitle) + Text(highlightParams.title) .font(.appHeadline) .foregroundColor(.appGrayTextContrast) .lineLimit(1) @@ -91,11 +92,11 @@ struct HighlightsListCard: View { .overlay(Color.appYellow48) VStack(alignment: .leading, spacing: 8) { - Text(highlight.quote ?? "") + Text(highlightParams.quote) Divider() - if highlight.annotation == nil || highlight.annotation?.isEmpty == true { + if highlightParams.annotation.isEmpty { addNoteSection } else { noteSection @@ -108,11 +109,10 @@ struct HighlightsListCard: View { HighlightAnnotationSheet( annotation: $annotation, onSave: { - print("new annotation = \(annotation)") + onSaveAnnotation(annotation) showAnnotationModal = false }, onCancel: { - print("cancel annotation") showAnnotationModal = false } ) diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift index 157a998d4..b3c0e848a 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift @@ -1,3 +1,4 @@ +import CoreData import Models import Services import SwiftUI @@ -8,13 +9,19 @@ struct HighlightsListView: View { @Environment(\.presentationMode) private var presentationMode @StateObject var viewModel = HighlightsListViewModel() - let item: LinkedItem + let itemObjectID: NSManagedObjectID var innerBody: some View { List { Section { - ForEach(viewModel.highlights, id: \.self) { highlight in - HighlightsListCard(highlight: highlight) + ForEach(viewModel.highlightItems) { highlightParams in + HighlightsListCard(highlightParams: highlightParams) { newAnnotation in + viewModel.updateAnnotation( + highlightID: highlightParams.highlightID, + annotation: newAnnotation, + dataService: dataService + ) + } } } } @@ -55,7 +62,7 @@ struct HighlightsListView: View { #endif } .task { - viewModel.load(item: item) + viewModel.load(itemObjectID: itemObjectID, dataService: dataService) } } } diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift index a407be8a2..791d85c29 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift @@ -4,20 +4,50 @@ import Services import SwiftUI import Views -@MainActor final class HighlightsListViewModel: ObservableObject { - @Published var highlights = [Highlight]() +struct HighlightListItemParams: Identifiable { + let id = UUID() + let highlightID: String + let title: String + let annotation: String + let quote: String +} - func load(item: LinkedItem) { +@MainActor final class HighlightsListViewModel: ObservableObject { + @Published var highlightItems = [HighlightListItemParams]() + + func load(itemObjectID: NSManagedObjectID, dataService: DataService) { + if let linkedItem = dataService.viewContext.object(with: itemObjectID) as? LinkedItem { + loadHighlights(item: linkedItem) + } + } + + func updateAnnotation(highlightID: String, annotation: String, dataService: DataService) { + dataService.updateHighlightAttributes(highlightID: highlightID, annotation: annotation) + + if let index = highlightItems.firstIndex(where: { $0.highlightID == highlightID }) { + highlightItems[index] = HighlightListItemParams( + highlightID: highlightID, + title: highlightItems[index].title, + annotation: annotation, + quote: highlightItems[index].quote + ) + } + } + + private func loadHighlights(item: LinkedItem) { let unsortedHighlights = item.highlights.asArray(of: Highlight.self) - highlights = unsortedHighlights.sorted { + let highlights = unsortedHighlights.sorted { ($0.createdAt ?? Date()) < ($1.createdAt ?? Date()) } + + highlightItems = highlights.map { + HighlightListItemParams( + highlightID: $0.unwrappedID, + title: "Highlight", + annotation: $0.annotation ?? "", + quote: $0.quote ?? "" + ) + } } } - -extension Highlight { - var highlightCardTitle: String { - "Highlight" - } -} diff --git a/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift b/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift index f74763fcf..85cb7c15b 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift @@ -63,7 +63,7 @@ import Views LinkedItemTitleEditView(item: item) } .sheet(item: $viewModel.itemForHighlightsView) { item in - HighlightsListView(item: item) + HighlightsListView(itemObjectID: item.objectID) } .toolbar { ToolbarItem(placement: .barTrailing) { diff --git a/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift b/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift index b13acc07a..a005c3808 100644 --- a/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift +++ b/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift @@ -204,7 +204,7 @@ struct WebReaderContainerView: View { LinkedItemTitleEditView(item: item) } .sheet(isPresented: $showHighlightsView) { - HighlightsListView(item: item) + HighlightsListView(itemObjectID: item.objectID) } #if os(macOS) .buttonStyle(PlainButtonStyle()) diff --git a/apple/OmnivoreKit/Sources/Models/DataModels/FeedItem.swift b/apple/OmnivoreKit/Sources/Models/DataModels/FeedItem.swift index e96ba857c..e8785777e 100644 --- a/apple/OmnivoreKit/Sources/Models/DataModels/FeedItem.swift +++ b/apple/OmnivoreKit/Sources/Models/DataModels/FeedItem.swift @@ -87,6 +87,12 @@ public extension LinkedItem { } } + var sortedHighlights: [Highlight] { + highlights.asArray(of: Highlight.self).sorted { + ($0.createdAt ?? Date()) < ($1.createdAt ?? Date()) + } + } + var labelsJSONString: String { let labels = self.labels.asArray(of: LinkedItemLabel.self).map { label in [ From db9246553fb121c394cef06928a82e48a1d48d07 Mon Sep 17 00:00:00 2001 From: Satindar Dhillon Date: Wed, 5 Oct 2022 11:25:05 -0700 Subject: [PATCH 50/54] message web container view when highlight view is dismissed and mutations have occurred --- .../App/Views/Highlights/HighlightsListCard.swift | 2 ++ .../App/Views/Highlights/HighlightsListView.swift | 6 +++++- .../Sources/App/Views/Home/HomeFeedViewIOS.swift | 3 ++- .../App/Views/WebReader/WebReaderContainer.swift | 13 +++++++++++-- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift index f488db303..be1857d6f 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift @@ -8,6 +8,7 @@ struct HighlightsListCard: View { @State var showAnnotationModal = false let highlightParams: HighlightListItemParams + @Binding var hasHighlightMutations: Bool let onSaveAnnotation: (String) -> Void var contextMenuView: some View { @@ -111,6 +112,7 @@ struct HighlightsListCard: View { onSave: { onSaveAnnotation(annotation) showAnnotationModal = false + hasHighlightMutations = true }, onCancel: { showAnnotationModal = false diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift index b3c0e848a..ac23279f6 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift @@ -10,12 +10,16 @@ struct HighlightsListView: View { @StateObject var viewModel = HighlightsListViewModel() let itemObjectID: NSManagedObjectID + @Binding var hasHighlightMutations: Bool var innerBody: some View { List { Section { ForEach(viewModel.highlightItems) { highlightParams in - HighlightsListCard(highlightParams: highlightParams) { newAnnotation in + HighlightsListCard( + highlightParams: highlightParams, + hasHighlightMutations: $hasHighlightMutations + ) { newAnnotation in viewModel.updateAnnotation( highlightID: highlightParams.highlightID, annotation: newAnnotation, diff --git a/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift b/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift index 85cb7c15b..b7d1cfa21 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Home/HomeFeedViewIOS.swift @@ -11,6 +11,7 @@ import Views private let enableGrid = UIDevice.isIPad || FeatureFlag.enableGridCardsOnPhone struct HomeFeedContainerView: View { + @State var hasHighlightMutations = false @EnvironmentObject var dataService: DataService @EnvironmentObject var audioController: AudioController @@ -63,7 +64,7 @@ import Views LinkedItemTitleEditView(item: item) } .sheet(item: $viewModel.itemForHighlightsView) { item in - HighlightsListView(itemObjectID: item.objectID) + HighlightsListView(itemObjectID: item.objectID, hasHighlightMutations: $hasHighlightMutations) } .toolbar { ToolbarItem(placement: .barTrailing) { diff --git a/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift b/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift index a005c3808..11b7e0717 100644 --- a/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift +++ b/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift @@ -13,6 +13,7 @@ struct WebReaderContainerView: View { @State private var showLabelsModal = false @State private var showTitleEdit = false @State private var showHighlightsView = false + @State private var hasPerformedHighlightMutations = false @State var showHighlightAnnotationModal = false @State var safariWebLink: SafariWebLink? @State private var navBarVisibilityRatio = 1.0 @@ -44,6 +45,11 @@ struct WebReaderContainerView: View { } } + func onHighlightListViewDismissal() { + print("has mutations: \(hasPerformedHighlightMutations)") + hasPerformedHighlightMutations = false + } + private func handleHighlightAction(message: WKScriptMessage) { guard let messageBody = message.body as? [String: String] else { return } guard let actionID = messageBody["actionID"] else { return } @@ -203,8 +209,11 @@ struct WebReaderContainerView: View { .sheet(isPresented: $showTitleEdit) { LinkedItemTitleEditView(item: item) } - .sheet(isPresented: $showHighlightsView) { - HighlightsListView(itemObjectID: item.objectID) + .sheet(isPresented: $showHighlightsView, onDismiss: onHighlightListViewDismissal) { + HighlightsListView( + itemObjectID: item.objectID, + hasHighlightMutations: $hasPerformedHighlightMutations + ) } #if os(macOS) .buttonStyle(PlainButtonStyle()) From e36920e96ea5dfcfe33e66f30f5b76dcdd109be3 Mon Sep 17 00:00:00 2001 From: Satindar Dhillon Date: Wed, 5 Oct 2022 12:01:40 -0700 Subject: [PATCH 51/54] reload web view if article highlights had been mutated from the highlits list modal --- .../Sources/App/Views/WebReader/WebReader.swift | 5 ++++- .../App/Views/WebReader/WebReaderContainer.swift | 16 ++++++++++++++-- .../Views/WebReader/WebReaderCoordinator.swift | 1 + .../Models/DataModels/ArticleContent.swift | 1 + 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReader.swift b/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReader.swift index f30aab097..78a8f3dce 100644 --- a/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReader.swift +++ b/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReader.swift @@ -70,6 +70,7 @@ struct WebReader: PlatformViewRepresentable { context.coordinator.linkHandler = openLinkAction context.coordinator.webViewActionHandler = webViewActionHandler context.coordinator.updateNavBarVisibilityRatio = navBarVisibilityRatioUpdater + context.coordinator.articleContentID = articleContent.id loadContent(webView: webView) return webView @@ -101,8 +102,10 @@ struct WebReader: PlatformViewRepresentable { } // If the webview had been terminated `needsReload` will have been set to true - if context.coordinator.needsReload { + // Or if the articleContent value has changed then it's id will be different from the coordinator's + if context.coordinator.needsReload || context.coordinator.articleContentID != articleContent.id { loadContent(webView: webView) + context.coordinator.articleContentID = articleContent.id context.coordinator.needsReload = false return } diff --git a/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift b/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift index 11b7e0717..83c78fbf9 100644 --- a/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift +++ b/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderContainer.swift @@ -46,8 +46,20 @@ struct WebReaderContainerView: View { } func onHighlightListViewDismissal() { - print("has mutations: \(hasPerformedHighlightMutations)") - hasPerformedHighlightMutations = false + // Reload the web view if mutation happened in highlights list modal + guard hasPerformedHighlightMutations else { return } + + hasPerformedHighlightMutations.toggle() + + Task { + if let username = dataService.currentViewer?.username { + await viewModel.loadContent( + dataService: dataService, + username: username, + itemID: item.unwrappedID + ) + } + } } private func handleHighlightAction(message: WKScriptMessage) { diff --git a/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderCoordinator.swift b/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderCoordinator.swift index acaaafd18..a08377a45 100644 --- a/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderCoordinator.swift +++ b/apple/OmnivoreKit/Sources/App/Views/WebReader/WebReaderCoordinator.swift @@ -19,6 +19,7 @@ final class WebReaderCoordinator: NSObject { var previousShowNavBarActionID: UUID? var previousShareActionID: UUID? var updateNavBarVisibilityRatio: (Double) -> Void = { _ in } + var articleContentID = UUID() private var yOffsetAtStartOfDrag: Double? private var lastYOffset: Double = 0 private var hasDragged = false diff --git a/apple/OmnivoreKit/Sources/Models/DataModels/ArticleContent.swift b/apple/OmnivoreKit/Sources/Models/DataModels/ArticleContent.swift index 028dcbacc..86c5469d1 100644 --- a/apple/OmnivoreKit/Sources/Models/DataModels/ArticleContent.swift +++ b/apple/OmnivoreKit/Sources/Models/DataModels/ArticleContent.swift @@ -9,6 +9,7 @@ public enum ArticleContentStatus: String { } public struct ArticleContent { + public let id = UUID() public let title: String public let htmlContent: String public let highlightsJSONString: String From 36e5e768098cfffa313fb9e88c843217abc80969 Mon Sep 17 00:00:00 2001 From: Satindar Dhillon Date: Wed, 5 Oct 2022 12:35:35 -0700 Subject: [PATCH 52/54] add copy and delete options to highlight list cards --- .../Views/Highlights/HighlightsListCard.swift | 21 +++++++++++++--- .../Views/Highlights/HighlightsListView.swift | 25 +++++++++++++------ .../Highlights/HighlightsListViewModel.swift | 5 ++++ 3 files changed, 39 insertions(+), 12 deletions(-) diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift index be1857d6f..efa1cd851 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListCard.swift @@ -10,16 +10,29 @@ struct HighlightsListCard: View { let highlightParams: HighlightListItemParams @Binding var hasHighlightMutations: Bool let onSaveAnnotation: (String) -> Void + let onDeleteHighlight: () -> Void var contextMenuView: some View { Group { Button( - action: {}, - label: { Label("Stubby One", systemImage: "highlighter") } + action: { + #if os(iOS) + UIPasteboard.general.string = highlightParams.quote + #endif + + #if os(macOS) + let pasteBoard = NSPasteboard.general + pasteBoard.clearContents() + pasteBoard.writeObjects([highlightParams.quote as NSString]) + #endif + + Snackbar.show(message: "Highlight copied") + }, + label: { Label("Copy", systemImage: "doc.on.doc") } ) Button( - action: {}, - label: { Label("Stubby Two", systemImage: "textbox") } + action: onDeleteHighlight, + label: { Label("Delete", systemImage: "trash") } ) } } diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift index ac23279f6..b16a0c3bf 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListView.swift @@ -18,14 +18,23 @@ struct HighlightsListView: View { ForEach(viewModel.highlightItems) { highlightParams in HighlightsListCard( highlightParams: highlightParams, - hasHighlightMutations: $hasHighlightMutations - ) { newAnnotation in - viewModel.updateAnnotation( - highlightID: highlightParams.highlightID, - annotation: newAnnotation, - dataService: dataService - ) - } + hasHighlightMutations: $hasHighlightMutations, + onSaveAnnotation: { + viewModel.updateAnnotation( + highlightID: highlightParams.highlightID, + annotation: $0, + dataService: dataService + ) + }, + onDeleteHighlight: { + hasHighlightMutations = true + + viewModel.deleteHighlight( + highlightID: highlightParams.highlightID, + dataService: dataService + ) + } + ) } } } diff --git a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift index 791d85c29..6eca3b4e9 100644 --- a/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift +++ b/apple/OmnivoreKit/Sources/App/Views/Highlights/HighlightsListViewModel.swift @@ -34,6 +34,11 @@ struct HighlightListItemParams: Identifiable { } } + func deleteHighlight(highlightID: String, dataService: DataService) { + dataService.deleteHighlight(highlightID: highlightID) + highlightItems.removeAll { $0.highlightID == highlightID } + } + private func loadHighlights(item: LinkedItem) { let unsortedHighlights = item.highlights.asArray(of: Highlight.self) From 111639a31dfe588de52814591e0e8fe05fb5a59d Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 6 Oct 2022 11:17:11 +0800 Subject: [PATCH 53/54] Exposes port 9090:8080 in the container --- docker-compose.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 8b6a44619..cacd77f47 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -140,8 +140,8 @@ services: context: . dockerfile: ./packages/content-fetch/Dockerfile-local container_name: "omnivore-content-fetch" - expose: - - 9090 + ports: + - "9090:8080" environment: - JWT_SECRET=some_secret - VERIFICATION_TOKEN=some_token From bc9b50c3cb7e50790dbd1b039c10d71835f72958 Mon Sep 17 00:00:00 2001 From: Hongbo Wu Date: Thu, 6 Oct 2022 12:57:30 +0800 Subject: [PATCH 54/54] Remove dockerfile-local --- docker-compose.yml | 2 +- packages/content-fetch/Dockerfile-local | 51 ------------------------- 2 files changed, 1 insertion(+), 52 deletions(-) delete mode 100644 packages/content-fetch/Dockerfile-local diff --git a/docker-compose.yml b/docker-compose.yml index cacd77f47..170ca72fa 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -138,7 +138,7 @@ services: content-fetch: build: context: . - dockerfile: ./packages/content-fetch/Dockerfile-local + dockerfile: ./packages/content-fetch/Dockerfile container_name: "omnivore-content-fetch" ports: - "9090:8080" diff --git a/packages/content-fetch/Dockerfile-local b/packages/content-fetch/Dockerfile-local deleted file mode 100644 index 694ef1a08..000000000 --- a/packages/content-fetch/Dockerfile-local +++ /dev/null @@ -1,51 +0,0 @@ -FROM node:14.18-alpine - -# Installs latest Chromium (92) package. -RUN apk add --no-cache \ - chromium \ - nss \ - freetype \ - harfbuzz \ - ca-certificates \ - ttf-freefont \ - nodejs \ - gcc \ - g++ \ - python3 \ - make \ - yarn - -# Add user so we don't need --no-sandbox. -RUN addgroup -S pptruser && adduser -S -g pptruser pptruser \ - && mkdir -p /home/pptruser/Downloads /app \ - && chown -R pptruser:pptruser /home/pptruser \ - && chown -R pptruser:pptruser /app - -# Run everything after as non-privileged user. -WORKDIR /app - -ENV CHROMIUM_PATH /usr/bin/chromium-browser -ENV LAUNCH_HEADLESS=true - -COPY package.json . -COPY yarn.lock . -COPY tsconfig.json . -COPY .prettierrc . -COPY .eslintrc . - -COPY /packages/content-handler/package.json ./packages/content-handler/package.json - -RUN yarn install --pure-lockfile - -ADD /packages/content-fetch ./packages/content-fetch -ADD /packages/content-handler ./packages/content-handler -RUN yarn workspace @omnivore/content-handler build - -# After building, fetch the production dependencies -RUN rm -rf /app/packages/content-fetch/node_modules -RUN rm -rf /app/node_modules -RUN yarn install --pure-lockfile --production - -EXPOSE 8080 - -CMD ["yarn", "workspace", "@omnivore/content-fetch", "start"]