Improving Self-Hosting and Removing 3rd Party dependencies. (#4513)

* fix: Library Header layout shift

* Bump Github Actions versions.

* Self-Hosting Changes

* Fix Minio Environment Variable

* Just make pdfs successful, due to lack of PDFHandler

* Fix issue where flag was set wrong

* Added an NGINX Example file

* Add some documentation for self-hosting via Docker Compose

* Make some adjustments to Puppeteer due to failing sites.

* adjust timings

* Add start of Mail Service

* Fix Docker Files

* More email service stuff

* Add Guide to use Zapier for Email-Importing.

* Ensure that if no env is provided it uses the old email settings

* Add some instructions for self-hosted email

* Add SNS Endpoints for Mail Watcher

* Add steps and functionality for using SES and SNS for email

* Uncomment a few jobs.

* Added option for Firefox for parser. Was having issues with Chromium on Docker.

* Add missing space.

Co-authored-by: Russ Taylor <729694+russtaylor@users.noreply.github.com>

* Fix some wording on the Guide

* update browser extension to handle self-hosted instances

* add slight documentation to options page

* Fix MV

* Do raw handlers for Medium

* Fix images in Medium

* Update self-hosting/GUIDE.md

Co-authored-by: Mike Baker <1426795+mbaker3@users.noreply.github.com>

* Update Guide with other variables

* Add The Verge to JS-less handlers

* Update regex and image-proxy

* Update self-hosting/nginx/nginx.conf

Co-authored-by: Mike Baker <1426795+mbaker3@users.noreply.github.com>

* Update regex and image-proxy

* Update self-hosting/docker-compose/docker-compose.yml

Co-authored-by: Mike Baker <1426795+mbaker3@users.noreply.github.com>

* Fix Minio for Export

* Merge to main

* Update GUIDE with newer NGINX

* Update nginx config to include api/save route

* Enable Native PDF View for PDFS

* Enable Native PDF View for PDFS

* feat:lover packages test

* feat:working build

* feat:alpine build

* docs:api dockerfile docs

* Write a PDF.js wrapper to replace pspdfkit

* Revert changes for replication, set settings to have default mode

* build folder got removed due to gitignore on pdf

* Add Box shadow to pdf pages

* Add Toggle for Progress in PDFS, enabled native viewer toggle

* Update node version to LTS

* Update node version to LTS

* Fix Linting issues

* Fix Linting issues

* Make env variable nullable

* Add touchend listener for mobile

* Make changes to PDF for mobile

* fix(android): change serverUrl to selfhosted first

* feat:2 stage alpine content fetch

* feat:separated start script

* fix:changed to node 22

* Add back youtube functionality and add guide

* trigger build

* Fix cache issue on YouTube

* Allow empty AWS_S3_ENDPOINT

* Allow empty AWS_S3_ENDPOINT

* Add GCHR for all images

* Add GCHR For self hosting.

* Add GCHR For self hosting.

* Test prebuilt.

* Test prebuilt

* Test prebuilt...

* Fix web image

* Remove Web Image (For now)

* Move docker-compose to images

* Move docker-compose files to correct locations

* Remove the need for ARGS

* Update packages, and Typescript versions

* Fix

* Fix issues with build on Web

* Correct push

* Fix Linting issues

* Fix Trace import

* Add missing types

* Fix Tasks

* Add information into guide about self-build

* Fix issues with PDF Viewer

---------

Co-authored-by: keumky2 <keumky2@woowahan.com>
Co-authored-by: William Theaker <wtheaker@nvidia.com>
Co-authored-by: Russ Taylor <729694+russtaylor@users.noreply.github.com>
Co-authored-by: David Adams <david@dadams2.com>
Co-authored-by: Mike Baker <1426795+mbaker3@users.noreply.github.com>
Co-authored-by: m1xxos <66390094+m1xxos@users.noreply.github.com>
Co-authored-by: Adil <mr.adil777@gmail.com>
This commit is contained in:
Tom Rogers
2025-01-27 13:33:16 +01:00
committed by GitHub
parent 4cd5f2f02a
commit 4e582fb55d
339 changed files with 14859 additions and 11964 deletions

View File

@ -11,6 +11,7 @@
}, },
"plugins": ["@typescript-eslint"], "plugins": ["@typescript-eslint"],
"rules": { "rules": {
"semi": [2, "never"] "semi": [2, "never"],
"@typescript-eslint/no-unnecessary-type-assertion": [0, "never"]
} }
} }

View File

@ -13,11 +13,11 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v2 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
- name: 'Login to GitHub container registry' - name: 'Login to GitHub container registry'
uses: docker/login-action@v1 uses: docker/login-action@v3
with: with:
registry: ghcr.io registry: ghcr.io
username: ${{github.actor}} username: ${{github.actor}}
@ -30,3 +30,4 @@ jobs:
run: | run: |
docker build --file packages/content-fetch/Dockerfile . --tag "ghcr.io/omnivore-app/content-fetch:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/content-fetch:latest docker build --file packages/content-fetch/Dockerfile . --tag "ghcr.io/omnivore-app/content-fetch:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/content-fetch:latest
docker push ghcr.io/omnivore-app/content-fetch:${GITHUB_SHA} docker push ghcr.io/omnivore-app/content-fetch:${GITHUB_SHA}

View File

@ -0,0 +1,64 @@
name: Build Self-Hosting Docker Images
on:
push:
branches:
- main
- self-host-updates
paths-ignore:
- 'apple/**'
- 'android/**'
jobs:
build-self-hostdocker-images:
name: Build self-host docker images
permissions:
contents: read
packages: write
attestations: write
id-token: write
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: 'Login to GitHub container registry'
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}}
- name: Build the backend docker image
run: |
docker build . --file packages/api/Dockerfile --tag "ghcr.io/omnivore-app/sh-backend:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-backend:latest
docker push ghcr.io/omnivore-app/sh-backend:${GITHUB_SHA}
docker push ghcr.io/omnivore-app/sh-backend:latest
- name: Build the content-fetch docker image
run: |
docker build --file packages/content-fetch/Dockerfile . --tag "ghcr.io/omnivore-app/sh-content-fetch:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-content-fetch:latest
docker push ghcr.io/omnivore-app/sh-content-fetch:${GITHUB_SHA}
docker push ghcr.io/omnivore-app/sh-content-fetch:latest
- name: Build the queue-processor docker image.
run: |
docker build . --file packages/api/queue-processor/Dockerfile --tag "ghcr.io/omnivore-app/sh-queue-processor:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-queue-processor:latest
docker push ghcr.io/omnivore-app/sh-queue-processor:${GITHUB_SHA}
docker push ghcr.io/omnivore-app/sh-queue-processor:latest
- name: Build the migrate docker image
run: |
docker build --file packages/db/Dockerfile . --tag "ghcr.io/omnivore-app/sh-migrate:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-migrate:latest
docker push ghcr.io/omnivore-app/sh-migrate:${GITHUB_SHA}
docker push ghcr.io/omnivore-app/sh-migrate:latest
- name: Build the image-proxy docker image
run: |
cp imageproxy/start_imageproxy.sh .
chmod +x start_imageproxy.sh
docker build --file imageproxy/Dockerfile . --tag "ghcr.io/omnivore-app/sh-image-proxy:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-image-proxy:latest
docker push ghcr.io/omnivore-app/sh-image-proxy:${GITHUB_SHA}
docker push ghcr.io/omnivore-app/sh-image-proxy:latest
- name: Build the mail-watch-server docker image
run: |
docker build --file packages/local-mail-watcher/Dockerfile . --tag "ghcr.io/omnivore-app/sh-local-mail-watcher:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-local-mail-watcher:latest
docker push ghcr.io/omnivore-app/sh-local-mail-watcher:${GITHUB_SHA}
docker push ghcr.io/omnivore-app/sh-local-mail-watcher:latest

View File

@ -9,7 +9,7 @@ jobs:
lint_migrations: lint_migrations:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v1 - uses: actions/checkout@v4
- name: Fetch main branch - name: Fetch main branch
run: git fetch origin main:main run: git fetch origin main:main
- name: Find modified migrations - name: Find modified migrations

View File

@ -14,12 +14,12 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
with: with:
repository: ${{ github.event.pull_request.head.repo.full_name }} repository: ${{ github.event.pull_request.head.repo.full_name }}
ref: ${{ github.event.pull_request.head.ref }} ref: ${{ github.event.pull_request.head.ref }}
- name: Setup Go - name: Setup Go
uses: actions/setup-go@v3 uses: actions/setup-go@v5
with: with:
go-version: 1.19 go-version: 1.19
- name: Generate distiller output for readability - name: Generate distiller output for readability
@ -34,7 +34,7 @@ jobs:
go-domdistiller file -i $f/source.html -o $f/distiller.html go-domdistiller file -i $f/source.html -o $f/distiller.html
done done
- name: Setup Python - name: Setup Python
uses: actions/setup-python@v4 uses: actions/setup-python@v5
with: with:
python-version: 3.9 python-version: 3.9
- name: Generate static html - name: Generate static html

View File

@ -45,7 +45,7 @@ jobs:
ports: ports:
- 6379 - 6379
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Initialize the database - name: Initialize the database
@ -55,9 +55,9 @@ jobs:
env: env:
PGPASSWORD: postgres PGPASSWORD: postgres
- name: Use Node.js ${{ matrix.node-version }} - name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v2 uses: actions/setup-node@v4
with: with:
node-version: 18.16 node-version: 22.12.0
- name: Get yarn cache directory path - name: Get yarn cache directory path
id: yarn-cache-dir-path id: yarn-cache-dir-path
run: echo "::set-output name=dir::$(source ~/.nvm/nvm.sh && yarn cache dir)" run: echo "::set-output name=dir::$(source ~/.nvm/nvm.sh && yarn cache dir)"
@ -102,7 +102,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v2 uses: actions/checkout@v4
with: with:
fetch-depth: 0 fetch-depth: 0
- name: Build the API docker image - name: Build the API docker image

View File

@ -26,13 +26,13 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout - name: Checkout
uses: actions/checkout@v3 uses: actions/checkout@v4
- name: Setup Pages - name: Setup Pages
uses: actions/configure-pages@v3 uses: actions/configure-pages@v5
- name: Upload artifact - name: Upload artifact
uses: actions/upload-pages-artifact@v1 uses: actions/upload-pages-artifact@v3
with: with:
path: 'packages/readabilityjs/test' path: 'packages/readabilityjs/test'
- name: Deploy to GitHub Pages - name: Deploy to GitHub Pages
id: deployment id: deployment
uses: actions/deploy-pages@v1 uses: actions/deploy-pages@v4

View File

@ -1 +1 @@
18.16 22.12.0

View File

@ -151,24 +151,7 @@ is done fetching your content you will see it in your library.
## How to deploy to your own server ## How to deploy to your own server
Omnivore was originally designed to be deployed on GCP and takes advantage A guide for running a self hosted server can be found [here](./self-hosting/GUIDE.md)
of some of GCP's PaaS features. We are working to make Omnivore more portable
so you can easily run the service on your own infrastructure. You can track
progress here: <https://github.com/omnivore-app/omnivore/issues/25>
To deploy Omnivore on your own hardware you will need to deploy three
dockerized services and configure access to a postgres service. To handle
PDF documents you will need to configure access to a Google Cloud Storage
bucket.
- `packages/api` - the backend API service
- `packages/web` - the web frontend (can easily be deployed to vercel)
- `packages/puppeteer-parse` - the content fetching service (can easily
be deployed as an AWS lambda or GCP Cloud Function)
Additionally, you will need to run our database migrations to initialize
your database. These are dockerized and can be run with the
`packages/db` service.
## License ## License

View File

@ -1,5 +1,12 @@
# Omnivore - Android # Omnivore - Android
## Setup with gradle
In case you do not have Android Studio and you do not want to install it, you may want to use gradlew scripts to build the application.
`./gradlew assembleDebug` should create `./app/build/outputs/apk/debug/app-debug.apk`
## Setup ## Setup
From the root directory run the following command: From the root directory run the following command:

View File

@ -10,6 +10,7 @@ import app.omnivore.omnivore.R
import app.omnivore.omnivore.core.data.DataService import app.omnivore.omnivore.core.data.DataService
import app.omnivore.omnivore.core.datastore.DatastoreRepository import app.omnivore.omnivore.core.datastore.DatastoreRepository
import app.omnivore.omnivore.core.datastore.omnivoreAuthToken import app.omnivore.omnivore.core.datastore.omnivoreAuthToken
import app.omnivore.omnivore.core.datastore.omnivoreSelfHostedApiServer
import app.omnivore.omnivore.graphql.generated.UpdatePageMutation import app.omnivore.omnivore.graphql.generated.UpdatePageMutation
import app.omnivore.omnivore.graphql.generated.type.UpdatePageInput import app.omnivore.omnivore.graphql.generated.type.UpdatePageInput
import app.omnivore.omnivore.utils.Constants import app.omnivore.omnivore.utils.Constants
@ -48,6 +49,12 @@ class EditInfoViewModel @Inject constructor(
datastoreRepo.getString(omnivoreAuthToken) datastoreRepo.getString(omnivoreAuthToken)
} }
fun baseUrl() = runBlocking {
datastoreRepo.getString(omnivoreSelfHostedApiServer) ?: Constants.apiURL
}
private fun serverUrl() = "${baseUrl()}/api/graphql"
fun editInfo(itemId: String, title: String, author: String?, description: String?) { fun editInfo(itemId: String, title: String, author: String?, description: String?) {
viewModelScope.launch { viewModelScope.launch {
isLoading = true isLoading = true
@ -62,7 +69,7 @@ class EditInfoViewModel @Inject constructor(
} }
val apolloClient = ApolloClient.Builder() val apolloClient = ApolloClient.Builder()
.serverUrl("${Constants.apiURL}/api/graphql") .serverUrl(serverUrl())
.addHttpHeader("Authorization", value = authToken) .addHttpHeader("Authorization", value = authToken)
.build() .build()

View File

@ -176,6 +176,12 @@ class OnboardingViewModel @Inject constructor(
resetPendingEmailUserCreds() resetPendingEmailUserCreds()
} }
fun baseUrl() = runBlocking {
datastoreRepository.getString(omnivoreSelfHostedApiServer) ?: Constants.apiURL
}
private fun serverUrl() = "${baseUrl()}/api/graphql"
fun validateUsername(potentialUsername: String) { fun validateUsername(potentialUsername: String) {
validateUsernameJob?.cancel() validateUsernameJob?.cancel()
@ -209,7 +215,7 @@ class OnboardingViewModel @Inject constructor(
} }
val apolloClient = val apolloClient =
ApolloClient.Builder().serverUrl("${Constants.apiURL}/api/graphql").build() ApolloClient.Builder().serverUrl(serverUrl()).build()
try { try {
val response = apolloClient.query( val response = apolloClient.query(

View File

@ -16,6 +16,7 @@ import androidx.work.WorkerParameters
import app.omnivore.omnivore.R import app.omnivore.omnivore.R
import app.omnivore.omnivore.core.datastore.DatastoreRepository import app.omnivore.omnivore.core.datastore.DatastoreRepository
import app.omnivore.omnivore.core.datastore.omnivoreAuthToken import app.omnivore.omnivore.core.datastore.omnivoreAuthToken
import app.omnivore.omnivore.core.datastore.omnivoreSelfHostedApiServer
import app.omnivore.omnivore.graphql.generated.SaveUrlMutation import app.omnivore.omnivore.graphql.generated.SaveUrlMutation
import app.omnivore.omnivore.graphql.generated.type.SaveUrlInput import app.omnivore.omnivore.graphql.generated.type.SaveUrlInput
import app.omnivore.omnivore.utils.Constants import app.omnivore.omnivore.utils.Constants
@ -59,11 +60,16 @@ class SaveURLWorker @AssistedInject constructor(
} }
} }
suspend fun baseUrl() =
datastoreRepository.getString(omnivoreSelfHostedApiServer) ?: Constants.apiURL
private suspend fun serverUrl() = "${baseUrl()}/api/graphql"
private suspend fun saveURL(url: String): Boolean { private suspend fun saveURL(url: String): Boolean {
val authToken = datastoreRepository.getString(omnivoreAuthToken) ?: return false val authToken = datastoreRepository.getString(omnivoreAuthToken) ?: return false
val apolloClient = ApolloClient.Builder() val apolloClient = ApolloClient.Builder()
.serverUrl("${Constants.apiURL}/api/graphql") .serverUrl(serverUrl())
.addHttpHeader("Authorization", value = authToken) .addHttpHeader("Authorization", value = authToken)
.build() .build()

View File

@ -11,6 +11,7 @@ import androidx.lifecycle.viewModelScope
import app.omnivore.omnivore.R import app.omnivore.omnivore.R
import app.omnivore.omnivore.core.datastore.DatastoreRepository import app.omnivore.omnivore.core.datastore.DatastoreRepository
import app.omnivore.omnivore.core.datastore.omnivoreAuthToken import app.omnivore.omnivore.core.datastore.omnivoreAuthToken
import app.omnivore.omnivore.core.datastore.omnivoreSelfHostedApiServer
import app.omnivore.omnivore.graphql.generated.SaveUrlMutation import app.omnivore.omnivore.graphql.generated.SaveUrlMutation
import app.omnivore.omnivore.graphql.generated.type.SaveUrlInput import app.omnivore.omnivore.graphql.generated.type.SaveUrlInput
import app.omnivore.omnivore.utils.Constants import app.omnivore.omnivore.utils.Constants
@ -71,6 +72,12 @@ class SaveViewModel @Inject constructor(
return null return null
} }
fun baseUrl() = runBlocking {
datastoreRepo.getString(omnivoreSelfHostedApiServer) ?: Constants.apiURL
}
private fun serverUrl() = "${baseUrl()}/api/graphql"
fun saveURL(url: String) { fun saveURL(url: String) {
viewModelScope.launch { viewModelScope.launch {
isLoading = true isLoading = true
@ -86,7 +93,7 @@ class SaveViewModel @Inject constructor(
} }
val apolloClient = ApolloClient.Builder() val apolloClient = ApolloClient.Builder()
.serverUrl("${Constants.apiURL}/api/graphql") .serverUrl(serverUrl())
.addHttpHeader("Authorization", value = authToken) .addHttpHeader("Authorization", value = authToken)
.build() .build()

Binary file not shown.

After

Width:  |  Height:  |  Size: 165 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 86 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 452 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 543 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 312 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 202 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 207 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 260 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 160 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 140 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 282 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 302 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 305 KiB

View File

@ -1,4 +1,4 @@
FROM willnorris/imageproxy:v0.10.0 as build FROM ghcr.io/willnorris/imageproxy:main as build
# Above imageproxy image is built from scratch image and is barebones # Above imageproxy image is built from scratch image and is barebones
# Switching over to ubuntu base image to allow us to debug better. # Switching over to ubuntu base image to allow us to debug better.

View File

@ -33,7 +33,7 @@
"graphql-tag": "^2.11.0", "graphql-tag": "^2.11.0",
"lerna": "^7.4.1", "lerna": "^7.4.1",
"prettier": "^2.5.1", "prettier": "^2.5.1",
"typescript": "4.5.2" "typescript": "5.7.3"
}, },
"volta": { "volta": {
"node": "18.16.1", "node": "18.16.1",

View File

@ -4,13 +4,27 @@
"project": "tsconfig.json" "project": "tsconfig.json"
}, },
"rules": { "rules": {
"@typescript-eslint/no-unsafe-argument": 0 "@typescript-eslint/no-unsafe-argument": 0,
"@typescript-eslint/no-unsafe-member-access": 0,
"@typescript-eslint/no-unnecessary-type-assertion": 0,
"@typescript-eslint/no-unsafe-assignment": 0,
"@typescript-eslint/no-unsafe-return": ["warn"],
"@typescript-eslint/no-unsafe-call" : ["warn"],
"@typescript-eslint/restrict-template-expressions": ["warn"],
"@typescript-eslint/no-misused-promises": ["warn"],
"@typescript-eslint/ban-ts-comment": ["warn"],
"@typescript-eslint/no-unused-vars": 0
}, },
"overrides": [ "overrides": [
{ {
"files": ["test/**/*.ts"], "files": ["test/**/*.ts"],
"rules": { "rules": {
"@typescript-eslint/no-unsafe-member-access": 0 "@typescript-eslint/no-unsafe-member-access": 0,
"@typescript-eslint/no-unnecessary-type-assertion": 0,
"@typescript-eslint/no-unsafe-assignment": 0,
"@typescript-eslint/no-unsafe-return": ["warn"],
"@typescript-eslint/no-unsafe-call" : ["warn"],
"@typescript-eslint/no-unused-vars": 0
} }
} }
] ]

View File

@ -1,51 +1,37 @@
FROM node:18.16 as builder FROM node:22.12 AS builder
WORKDIR /app WORKDIR /app
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
RUN apt-get update && apt-get install -y g++ make python3
COPY package.json . RUN apt-get update && apt-get install -y g++ make python3 && apt-get clean && rm -rf /var/lib/apt/lists/*
COPY yarn.lock .
COPY tsconfig.json .
COPY .prettierrc .
COPY .eslintrc .
COPY /packages/readabilityjs/package.json ./packages/readabilityjs/package.json COPY package.json yarn.lock tsconfig.json .prettierrc .eslintrc ./
COPY /packages/api/package.json ./packages/api/package.json COPY packages ./packages
COPY /packages/text-to-speech/package.json ./packages/text-to-speech/package.json
COPY /packages/content-handler/package.json ./packages/content-handler/package.json
COPY /packages/liqe/package.json ./packages/liqe/package.json
COPY /packages/utils/package.json ./packages/utils/package.json
RUN yarn install --pure-lockfile # Remove all except needed packages
RUN find packages -mindepth 1 -type d \
! -regex '^packages/\(api\|readabilityjs\|text-to-speech\|content-handler\|liqe\|utils\)\(/.*\)?' \
-exec rm -rf {} +
ADD /packages/readabilityjs ./packages/readabilityjs RUN yarn install --pure-lockfile && \
ADD /packages/api ./packages/api yarn workspace @omnivore/utils build && \
ADD /packages/text-to-speech ./packages/text-to-speech yarn workspace @omnivore/text-to-speech-handler build && \
ADD /packages/content-handler ./packages/content-handler yarn workspace @omnivore/content-handler build && \
ADD /packages/liqe ./packages/liqe yarn workspace @omnivore/liqe build && \
ADD /packages/utils ./packages/utils yarn workspace @omnivore/api build && \
rm -rf /app/packages/api/node_modules /app/node_modules && \
yarn install --pure-lockfile --production
RUN yarn workspace @omnivore/utils build
RUN yarn workspace @omnivore/text-to-speech-handler build
RUN yarn workspace @omnivore/content-handler build
RUN yarn workspace @omnivore/liqe build
RUN yarn workspace @omnivore/api build
# After building, fetch the production dependencies FROM node:22.12-alpine AS runner
RUN rm -rf /app/packages/api/node_modules
RUN rm -rf /app/node_modules
RUN yarn install --pure-lockfile --production
FROM node:18.16 as runner
LABEL org.opencontainers.image.source="https://github.com/omnivore-app/omnivore" LABEL org.opencontainers.image.source="https://github.com/omnivore-app/omnivore"
RUN apt-get update && apt-get install -y netcat-openbsd RUN apk update && apk add netcat-openbsd && rm -rf /var/cache/apk/*
WORKDIR /app WORKDIR /app
ENV NODE_ENV production ENV NODE_ENV=production
ENV NODE_OPTIONS=--max-old-space-size=4096 ENV NODE_OPTIONS=--max-old-space-size=4096
ENV PORT=8080 ENV PORT=8080
@ -59,6 +45,7 @@ COPY --from=builder /app/packages/text-to-speech/ /app/packages/text-to-speech/
COPY --from=builder /app/packages/content-handler/ /app/packages/content-handler/ COPY --from=builder /app/packages/content-handler/ /app/packages/content-handler/
COPY --from=builder /app/packages/liqe/ /app/packages/liqe/ COPY --from=builder /app/packages/liqe/ /app/packages/liqe/
COPY --from=builder /app/packages/utils/ /app/packages/utils/ COPY --from=builder /app/packages/utils/ /app/packages/utils/
EXPOSE 8080 EXPOSE 8080
CMD ["yarn", "workspace", "@omnivore/api", "start"] CMD ["yarn", "workspace", "@omnivore/api", "start"]

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine FROM node:22.12-alpine
WORKDIR /app WORKDIR /app

View File

@ -120,11 +120,15 @@
"voca": "^1.4.0", "voca": "^1.4.0",
"winston": "^3.3.3", "winston": "^3.3.3",
"yaml": "^2.4.1", "yaml": "^2.4.1",
"youtubei": "^1.5.4" "youtubei": "^1.5.4",
"@aws-sdk/client-s3": "^3.679.0",
"@aws-sdk/s3-request-presigner": "^3.679.0",
"@aws-sdk/lib-storage": "^3.679.0"
}, },
"devDependencies": { "devDependencies": {
"@istanbuljs/nyc-config-typescript": "^1.0.2", "@istanbuljs/nyc-config-typescript": "^1.0.2",
"@types/addressparser": "^1.0.1", "@types/addressparser": "^1.0.1",
"@types/lodash": "^4.17.14",
"@types/analytics-node": "^3.1.7", "@types/analytics-node": "^3.1.7",
"@types/archiver": "^6.0.2", "@types/archiver": "^6.0.2",
"@types/bcryptjs": "^2.4.2", "@types/bcryptjs": "^2.4.2",
@ -168,12 +172,11 @@
"postgrator": "^4.2.0", "postgrator": "^4.2.0",
"sinon": "^14.0.0", "sinon": "^14.0.0",
"sinon-chai": "^3.7.0", "sinon-chai": "^3.7.0",
"ts-node-dev": "^1.1.8" "ts-node-dev": "^1.1.8",
}, "typescript": "5.7.3"
"engines": {
"node": "18.16.1"
}, },
"volta": { "volta": {
"extends": "../../package.json" "extends": "../../package.json"
} }
} }

View File

@ -0,0 +1,61 @@
FROM node:22.12 as builder
WORKDIR /app
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
RUN apt-get update && apt-get install -y g++ make python3
COPY package.json .
COPY yarn.lock .
COPY tsconfig.json .
COPY .prettierrc .
COPY .eslintrc .
COPY /packages/readabilityjs/package.json ./packages/readabilityjs/package.json
COPY /packages/api/package.json ./packages/api/package.json
COPY /packages/text-to-speech/package.json ./packages/text-to-speech/package.json
COPY /packages/content-handler/package.json ./packages/content-handler/package.json
COPY /packages/liqe/package.json ./packages/liqe/package.json
COPY /packages/utils/package.json ./packages/utils/package.json
RUN yarn install --pure-lockfile
ADD /packages/readabilityjs ./packages/readabilityjs
ADD /packages/api ./packages/api
ADD /packages/text-to-speech ./packages/text-to-speech
ADD /packages/content-handler ./packages/content-handler
ADD /packages/liqe ./packages/liqe
ADD /packages/utils ./packages/utils
RUN yarn workspace @omnivore/utils build
RUN yarn workspace @omnivore/text-to-speech-handler build
RUN yarn workspace @omnivore/content-handler build
RUN yarn workspace @omnivore/liqe build
RUN yarn workspace @omnivore/api build
# After building, fetch the production dependencies
RUN rm -rf /app/packages/api/node_modules
RUN rm -rf /app/node_modules
RUN yarn install --pure-lockfile --production
FROM node:22.12 as runner
LABEL org.opencontainers.image.source="https://github.com/omnivore-app/omnivore"
RUN apt-get update && apt-get install -y netcat-openbsd
WORKDIR /app
ENV NODE_ENV production
COPY --from=builder /app/packages/api/dist /app/packages/api/dist
COPY --from=builder /app/packages/readabilityjs/ /app/packages/readabilityjs/
COPY --from=builder /app/packages/api/package.json /app/packages/api/package.json
COPY --from=builder /app/packages/api/node_modules /app/packages/api/node_modules
COPY --from=builder /app/node_modules /app/node_modules
COPY --from=builder /app/package.json /app/package.json
COPY --from=builder /app/packages/text-to-speech/ /app/packages/text-to-speech/
COPY --from=builder /app/packages/content-handler/ /app/packages/content-handler/
COPY --from=builder /app/packages/liqe/ /app/packages/liqe/
COPY --from=builder /app/packages/utils/ /app/packages/utils/
CMD ["yarn", "workspace", "@omnivore/api", "start_queue_processor"]

View File

@ -23,6 +23,7 @@ export const appDataSource = new DataSource({
max: env.pg.pool.max, max: env.pg.pool.max,
idleTimeoutMillis: 10000, // 10 seconds idleTimeoutMillis: 10000, // 10 seconds
}, },
replication: env.pg.replication replication: env.pg.replication
? { ? {
master: { master: {
@ -42,5 +43,15 @@ export const appDataSource = new DataSource({
}, },
], ],
} }
: undefined, : {
defaultMode: 'master',
master: {
host: env.pg.host,
port: env.pg.port,
username: env.pg.userName,
password: env.pg.password,
database: env.pg.dbName,
},
slaves: [],
},
}) })

View File

@ -73,7 +73,8 @@ import { CACHED_READING_POSITION_PREFIX } from './services/cached_reading_positi
import { logger } from './utils/logger' import { logger } from './utils/logger'
import { getQueue } from './queue-processor' import { getQueue } from './queue-processor'
export const EXPORT_QUEUE_NAME = 'omnivore-export-queue' export const EXPORT_QUEUE_NAME =
process.env['EXPORT_QUEUE_NAME'] ?? 'omnivore-export-queue'
export const createWorker = (connection: ConnectionOptions) => export const createWorker = (connection: ConnectionOptions) =>
new Worker( new Worker(

View File

@ -1,10 +1,6 @@
import archiver, { Archiver } from 'archiver' import archiver, { Archiver } from 'archiver'
import { v4 as uuidv4 } from 'uuid' import { v4 as uuidv4 } from 'uuid'
import { import { LibraryItem, LibraryItemState } from '../entity/library_item'
ContentReaderType,
LibraryItem,
LibraryItemState,
} from '../entity/library_item'
import { TaskState } from '../generated/graphql' import { TaskState } from '../generated/graphql'
import { findExportById, saveExport } from '../services/export' import { findExportById, saveExport } from '../services/export'
import { findHighlightsByLibraryItemId } from '../services/highlights' import { findHighlightsByLibraryItemId } from '../services/highlights'
@ -17,12 +13,11 @@ import { sendExportJobEmail } from '../services/send_emails'
import { findActiveUser } from '../services/user' import { findActiveUser } from '../services/user'
import { logger } from '../utils/logger' import { logger } from '../utils/logger'
import { highlightToMarkdown } from '../utils/parser' import { highlightToMarkdown } from '../utils/parser'
import { import { env } from '../env'
contentFilePath, import { storage } from '../repository/storage/storage'
createGCSFile, import { File } from '../repository/storage/StorageClient'
generateUploadFilePathName, import { Readable } from 'stream'
} from '../utils/uploads' import { contentFilePath, generateUploadFilePathName } from '../utils/uploads'
import { batch } from 'googleapis/build/src/apis/batch'
import { getRepository } from '../repository' import { getRepository } from '../repository'
import { UploadFile } from '../entity/upload_file' import { UploadFile } from '../entity/upload_file'
@ -31,6 +26,12 @@ export interface ExportJobData {
exportId: string exportId: string
} }
const bucketName = env.fileUpload.gcsUploadBucket
const createGCSFile = (filename: string): File => {
return storage.createFile(bucketName, filename)
}
export const EXPORT_JOB_NAME = 'export' export const EXPORT_JOB_NAME = 'export'
const itemStateMappping = (state: LibraryItemState) => { const itemStateMappping = (state: LibraryItemState) => {
@ -61,7 +62,7 @@ const uploadContent = async (
const file = createGCSFile(filePath) const file = createGCSFile(filePath)
// check if file is already uploaded // check if file is already uploaded
const [exists] = await file.exists() const exists = await file.exists()
if (!exists) { if (!exists) {
logger.info(`File not found: ${filePath}`) logger.info(`File not found: ${filePath}`)
@ -81,10 +82,14 @@ const uploadContent = async (
contentType: 'text/html', contentType: 'text/html',
private: true, private: true,
}) })
archive.append(Readable.from(item.readableContent), {
name: `content/${libraryItem.slug}.html`,
})
} }
// append the existing file to the archive // append the existing file to the archive
archive.append(file.createReadStream(), { const content = await file.download()
archive.append(Readable.from(content.toString()), {
name: `content/${libraryItem.slug}.html`, name: `content/${libraryItem.slug}.html`,
}) })
} }
@ -97,17 +102,19 @@ const uploadPdfContent = async (
id: libraryItem.uploadFileId, id: libraryItem.uploadFileId,
}) })
if (!upload || !upload.fileName) { if (!upload || !upload.fileName) {
console.log(`upload does not have a filename: ${upload}`) console.log(
`upload does not have a filename: ${upload?.fileName ?? 'empty'}`
)
return return
} }
const filePath = generateUploadFilePathName(upload.id, upload.fileName) const filePath = generateUploadFilePathName(upload.id, upload.fileName)
const file = createGCSFile(filePath) const file = createGCSFile(filePath)
const [exists] = await file.exists() const exists = await file.exists()
if (exists) { if (exists) {
console.log(`adding PDF file: ${filePath}`) console.log(`adding PDF file: ${filePath}`)
// append the existing file to the archive // append the existing file to the archive
archive.append(file.createReadStream(), { archive.append(await file.download(), {
name: `content/${libraryItem.slug}.pdf`, name: `content/${libraryItem.slug}.pdf`,
}) })
} }
@ -238,9 +245,18 @@ export const exportJob = async (jobData: ExportJobData) => {
// Create a write stream // Create a write stream
const writeStream = file.createWriteStream({ const writeStream = file.createWriteStream({
metadata: { contentType: 'application/zip',
contentType: 'application/zip', })
},
const finishedPromise = new Promise<void>((resolve, reject) => {
if (writeStream.closed) {
resolve()
}
writeStream.on('finish', () => {
logger.info('File successfully written to GCS')
resolve()
})
writeStream.on('error', reject)
}) })
// Handle any errors in the streams // Handle any errors in the streams
@ -248,10 +264,6 @@ export const exportJob = async (jobData: ExportJobData) => {
logger.error('Error writing to GCS:', err) logger.error('Error writing to GCS:', err)
}) })
writeStream.on('finish', () => {
logger.info('File successfully written to GCS')
})
// Initialize archiver for zipping files // Initialize archiver for zipping files
const archive = archiver('zip', { const archive = archiver('zip', {
zlib: { level: 9 }, // Compression level zlib: { level: 9 }, // Compression level
@ -264,7 +276,6 @@ export const exportJob = async (jobData: ExportJobData) => {
// Pipe the archiver output to the write stream // Pipe the archiver output to the write stream
archive.pipe(writeStream) archive.pipe(writeStream)
let cursor = 0 let cursor = 0
try { try {
// fetch data from the database // fetch data from the database
@ -305,17 +316,14 @@ export const exportJob = async (jobData: ExportJobData) => {
} }
// Ensure that the writeStream has finished // Ensure that the writeStream has finished
await new Promise((resolve, reject) => { await finishedPromise
writeStream.on('finish', resolve)
writeStream.on('error', reject)
})
logger.info(`export completed, exported ${cursor} items`, { logger.info(`export completed, exported ${cursor} items`, {
userId, userId,
}) })
// generate a temporary signed url for the zip file // generate a temporary signed url for the zip file
const [signedUrl] = await file.getSignedUrl({ const signedUrl = await storage.signedUrl(bucketName, fullPath, {
action: 'read', action: 'read',
expires: Date.now() + 168 * 60 * 60 * 1000, // one week expires: Date.now() + 168 * 60 * 60 * 1000, // one week
}) })

View File

@ -281,26 +281,34 @@ export const processYouTubeVideo = async (
updatedLibraryItem.publishedAt = new Date(video.uploadDate) updatedLibraryItem.publishedAt = new Date(video.uploadDate)
} }
// if ('getTranscript' in video && duration > 0 && duration < 1801) { if (
// // If the video has a transcript available, put a placehold in and 'getTranscript' in video &&
// // enqueue a job to process the full transcript duration > 0 &&
// const updatedContent = await addTranscriptToReadableContent( duration <
// libraryItem.originalUrl, Number(
// libraryItem.readableContent, process.env['YOUTUBE_MAXIMUM_VIDEO_DURATION_TRANSCRIPT'] ?? 1801
// TRANSCRIPT_PLACEHOLDER_TEXT ) &&
// ) process.env['OPENAI_API_KEY']
) {
// If the video has a transcript available, put a placehold in and
// enqueue a job to process the full transcript
const updatedContent = await addTranscriptToReadableContent(
libraryItem.originalUrl,
libraryItem.readableContent,
TRANSCRIPT_PLACEHOLDER_TEXT
)
// if (updatedContent) { if (updatedContent) {
// updatedLibraryItem.readableContent = updatedContent updatedLibraryItem.readableContent = updatedContent
// } }
// await enqueueProcessYouTubeTranscript({ await enqueueProcessYouTubeTranscript({
// videoId, videoId,
// ...jobData, ...jobData,
// }) })
// } }
if (updatedLibraryItem !== {}) { if (Object.keys(updatedLibraryItem).length > 0) {
await updateLibraryItem( await updateLibraryItem(
jobData.libraryItemId, jobData.libraryItemId,
updatedLibraryItem, updatedLibraryItem,

View File

@ -56,7 +56,10 @@ import {
PROCESS_YOUTUBE_VIDEO_JOB_NAME, PROCESS_YOUTUBE_VIDEO_JOB_NAME,
} from './jobs/process-youtube-video' } from './jobs/process-youtube-video'
import { pruneTrashJob, PRUNE_TRASH_JOB } from './jobs/prune_trash' import { pruneTrashJob, PRUNE_TRASH_JOB } from './jobs/prune_trash'
import { refreshAllFeeds } from './jobs/rss/refreshAllFeeds' import {
REFRESH_ALL_FEEDS_JOB_NAME,
refreshAllFeeds,
} from './jobs/rss/refreshAllFeeds'
import { refreshFeed } from './jobs/rss/refreshFeed' import { refreshFeed } from './jobs/rss/refreshFeed'
import { savePageJob } from './jobs/save_page' import { savePageJob } from './jobs/save_page'
import { import {
@ -159,25 +162,25 @@ export const createWorker = (connection: ConnectionOptions) =>
async (job: Job) => { async (job: Job) => {
const executeJob = async (job: Job) => { const executeJob = async (job: Job) => {
switch (job.name) { switch (job.name) {
// case 'refresh-all-feeds': { case 'refresh-all-feeds': {
// const queue = await getQueue() const queue = await getQueue()
// const counts = await queue?.getJobCounts('prioritized') const counts = await queue?.getJobCounts('prioritized')
// if (counts && counts.wait > 1000) { if (counts && counts.wait > 1000) {
// return return
// } }
// return await refreshAllFeeds(appDataSource) return await refreshAllFeeds(appDataSource)
// } }
// case 'refresh-feed': { case 'refresh-feed': {
// return await refreshFeed(job.data) return await refreshFeed(job.data)
// } }
case 'save-page': { case 'save-page': {
return savePageJob(job.data, job.attemptsMade) return savePageJob(job.data, job.attemptsMade)
} }
// case 'update-pdf-content': { case 'update-pdf-content': {
// return updatePDFContentJob(job.data) return updatePDFContentJob(job.data)
// } }
// case THUMBNAIL_JOB: case THUMBNAIL_JOB:
// return findThumbnail(job.data) return findThumbnail(job.data)
case TRIGGER_RULE_JOB_NAME: case TRIGGER_RULE_JOB_NAME:
return triggerRule(job.data) return triggerRule(job.data)
case UPDATE_LABELS_JOB: case UPDATE_LABELS_JOB:
@ -194,10 +197,13 @@ export const createWorker = (connection: ConnectionOptions) =>
return exportItem(job.data) return exportItem(job.data)
// case AI_SUMMARIZE_JOB_NAME: // case AI_SUMMARIZE_JOB_NAME:
// return aiSummarize(job.data) // return aiSummarize(job.data)
// case PROCESS_YOUTUBE_VIDEO_JOB_NAME: case PROCESS_YOUTUBE_VIDEO_JOB_NAME:
// return processYouTubeVideo(job.data) return processYouTubeVideo(job.data)
// case PROCESS_YOUTUBE_TRANSCRIPT_JOB_NAME: case PROCESS_YOUTUBE_TRANSCRIPT_JOB_NAME:
// return processYouTubeTranscript(job.data) if (process.env['OPENAI_API_KEY']) {
return processYouTubeTranscript(job.data)
}
break
case EXPORT_ALL_ITEMS_JOB_NAME: case EXPORT_ALL_ITEMS_JOB_NAME:
return exportAllItems(job.data) return exportAllItems(job.data)
case SEND_EMAIL_JOB: case SEND_EMAIL_JOB:
@ -218,8 +224,8 @@ export const createWorker = (connection: ConnectionOptions) =>
// return updateHome(job.data) // return updateHome(job.data)
// case SCORE_LIBRARY_ITEM_JOB: // case SCORE_LIBRARY_ITEM_JOB:
// return scoreLibraryItem(job.data) // return scoreLibraryItem(job.data)
// case GENERATE_PREVIEW_CONTENT_JOB: case GENERATE_PREVIEW_CONTENT_JOB:
// return generatePreviewContent(job.data) return generatePreviewContent(job.data)
case PRUNE_TRASH_JOB: case PRUNE_TRASH_JOB:
return pruneTrashJob(job.data) return pruneTrashJob(job.data)
case EXPIRE_FOLDERS_JOB_NAME: case EXPIRE_FOLDERS_JOB_NAME:
@ -260,6 +266,17 @@ const setupCronJobs = async () => {
}, },
} }
) )
await queue.add(
REFRESH_ALL_FEEDS_JOB_NAME,
{},
{
priority: getJobPriority(REFRESH_ALL_FEEDS_JOB_NAME),
repeat: {
every: 14_400_000, // 4 Hours
},
}
)
} }
const main = async () => { const main = async () => {

View File

@ -0,0 +1,88 @@
import {
SignedUrlParameters,
StorageClient,
File,
SaveOptions,
SaveData,
} from './StorageClient'
import { Storage, File as GCSFile } from '@google-cloud/storage'
export class GcsStorageClient implements StorageClient {
private storage: Storage
constructor(keyFilename: string | undefined) {
this.storage = new Storage({
keyFilename,
})
}
private convertFileToGeneric(gcsFile: GCSFile): File {
return {
isPublic: async () => {
const [isPublic] = await gcsFile.isPublic()
return isPublic
},
exists: async () => (await gcsFile.exists())[0],
download: async () => (await gcsFile.download())[0],
bucket: gcsFile.bucket.name,
publicUrl: () => gcsFile.publicUrl(),
getMetadataMd5: async () => {
const [metadata] = await gcsFile.getMetadata()
return metadata.md5Hash
},
createWriteStream: (saveOptions: SaveOptions) =>
gcsFile.createWriteStream({
metadata: { contentType: saveOptions.contentType },
}),
save: (saveData: SaveData, saveOptions: SaveOptions) =>
gcsFile.save(saveData, saveOptions),
key: gcsFile.name,
}
}
downloadFile(bucket: string, filePath: string): Promise<File> {
const file = this.storage.bucket(bucket).file(filePath)
return Promise.resolve(this.convertFileToGeneric(file))
}
createFile(bucket: string, filePath: string): File {
return this.convertFileToGeneric(this.storage.bucket(bucket).file(filePath))
}
async getFilesFromPrefix(bucket: string, prefix: string): Promise<File[]> {
const [filesWithPrefix] = await this.storage
.bucket(bucket)
.getFiles({ prefix })
return filesWithPrefix.map((it: GCSFile) => this.convertFileToGeneric(it))
}
async signedUrl(
bucket: string,
filePath: string,
options: SignedUrlParameters
): Promise<string> {
const [url] = await this.storage
.bucket(bucket)
.file(filePath)
.getSignedUrl({ ...options, version: 'v4' })
return url
}
upload(
bucket: string,
filePath: string,
data: Buffer,
options: {
contentType?: string
public?: boolean
timeout?: number
}
): Promise<void> {
return this.storage
.bucket(bucket)
.file(filePath)
.save(data, { timeout: 30000, ...options })
}
}

View File

@ -0,0 +1,261 @@
import {
SignedUrlParameters,
StorageClient,
File,
SaveOptions,
SaveData,
} from './StorageClient'
import { Upload } from '@aws-sdk/lib-storage'
import {
GetObjectCommand,
GetObjectCommandOutput,
S3Client,
ListObjectsV2Command,
PutObjectCommand,
HeadObjectCommand,
S3ServiceException,
} from '@aws-sdk/client-s3'
import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
import { Readable } from 'stream'
import * as stream from 'node:stream'
// While this is listed as S3, for self hosting we will use MinIO, which is
// S3 Compatible.
export class S3StorageClient implements StorageClient {
BlankFile = class implements File {
bucket: string
key: string
s3Client: S3StorageClient
downloadedFile: File | undefined
constructor(s3StorageClass: S3StorageClient, bucket: string, file: string) {
this.bucket = bucket
this.key = file
this.s3Client = s3StorageClass
}
isPublic() {
return Promise.resolve(true)
}
publicUrl() {
return `${this.s3Client.localUrl ?? ''}/${this.bucket}/${this.key}`
}
async download(): Promise<Buffer> {
this.downloadedFile = await this.s3Client.downloadFile(
this.bucket,
this.key
)
return this.downloadedFile.download()
}
async exists() {
try {
await this.s3Client.s3Client.send(
new HeadObjectCommand({
Bucket: this.bucket,
Key: this.key,
})
)
return true
} catch (e) {
if (
e instanceof S3ServiceException &&
e.$metadata.httpStatusCode == 404
) {
return false
}
throw e
}
}
save(saveData: SaveData, saveOptions: SaveOptions): Promise<void> {
return this.s3Client.upload(this.bucket, this.key, saveData, saveOptions)
}
createWriteStream(saveOptions: SaveOptions) {
return this.s3Client.createS3UploadStream(
this.bucket,
this.key,
saveOptions
)
}
getMetadataMd5() {
return this.downloadedFile?.getMetadataMd5() || Promise.resolve('')
}
}
private s3Client: S3Client
private signingS3Client: S3Client
private urlOverride: string | undefined
private localUrl: string | undefined
constructor(localUrl: string | undefined, urlOverride: string | undefined) {
this.localUrl = localUrl
this.urlOverride = urlOverride
this.s3Client = new S3Client({
forcePathStyle: true,
endpoint: urlOverride,
})
this.signingS3Client = new S3Client({
forcePathStyle: true,
endpoint: localUrl,
})
}
private createS3UploadStream = (
bucket: string,
key: string,
saveOptions: SaveOptions
) => {
const passThroughStream = new stream.PassThrough()
const upload = new Upload({
client: this.s3Client,
params: {
Bucket: bucket,
Key: key,
Body: passThroughStream,
ContentType: saveOptions.contentType,
},
})
void upload.done().then((res) => {
console.log(`Successfully Uploaded File ${res.Key ?? ''}`)
})
return passThroughStream
}
private convertFileToGeneric(
s3File: GetObjectCommandOutput,
bucket: string,
key: string
): File {
return {
exists: () => {
return Promise.resolve(s3File.$metadata.httpStatusCode == 200)
},
save: async () => Promise.resolve(),
isPublic: async () => Promise.resolve(true),
download: async () => this.getFileFromReadable(s3File.Body as Readable),
getMetadataMd5: () => Promise.resolve(s3File.ETag),
createWriteStream: (saveOptions: SaveOptions) =>
this.createS3UploadStream(bucket, key, saveOptions),
publicUrl: () => `${this.localUrl ?? ''}/${bucket}/${key}`,
bucket,
key,
}
}
private getFileFromReadable(stream: Readable): Promise<Buffer> {
return new Promise<Buffer>((resolve, reject) => {
const chunks: Buffer[] = []
stream.on('data', (chunk) => chunks.push(chunk))
stream.once('end', () => resolve(Buffer.concat(chunks)))
stream.once('error', reject)
})
}
async downloadFile(bucket: string, filePath: string): Promise<File> {
const s3File = await this.s3Client.send(
new GetObjectCommand({
Bucket: bucket,
Key: filePath, // path to the file you want to download,
})
)
return this.convertFileToGeneric(s3File, bucket, filePath)
}
createFile(bucket: string, filePath: string): File {
return new this.BlankFile(this, bucket, filePath) as unknown as File
}
async getFilesFromPrefix(bucket: string, prefix: string): Promise<File[]> {
const s3PrefixedFiles = await this.s3Client.send(
new ListObjectsV2Command({
Bucket: bucket,
Prefix: prefix, // path to the file you want to download,
})
)
const prefixKeys = s3PrefixedFiles.CommonPrefixes || []
return prefixKeys
.map(({ Prefix }) => Prefix)
.map((key: string | undefined) => {
return {
key: key || '',
exists: () => Promise.resolve(true),
isPublic: async () => Promise.resolve(true),
download: async () => {
const s3File = await this.s3Client.send(
new GetObjectCommand({
Bucket: bucket,
Key: key, // path to the file you want to download,
})
)
return this.getFileFromReadable(s3File.Body as Readable)
},
save: () => Promise.resolve(),
createWriteStream: (saveOptions: SaveOptions) =>
new stream.PassThrough(),
getMetadataMd5: () => Promise.resolve(key),
bucket: bucket,
publicUrl: () => `${this.localUrl ?? ''}/${bucket}/${key ?? ''}`,
}
})
}
async signedUrl(
bucket: string,
filePath: string,
options: SignedUrlParameters
): Promise<string> {
const command =
options.action == 'read'
? new GetObjectCommand({
Bucket: bucket,
Key: filePath, // path to the file you want to download,
})
: new PutObjectCommand({
Bucket: bucket,
Key: filePath, // path to the file you want to download,
})
// eslint-disable-next-line @typescript-eslint/no-unsafe-call
const url = await getSignedUrl(this.signingS3Client, command, {
expiresIn: 900,
})
return url
}
async upload(
bucket: string,
filePath: string,
data: SaveData,
options: {
contentType?: string
public?: boolean
timeout?: number
}
): Promise<void> {
await this.s3Client.send(
new PutObjectCommand({
Bucket: bucket,
Key: filePath,
Body: data.toString(),
ContentType: options.contentType,
})
)
}
}

View File

@ -0,0 +1,49 @@
import { PipelineSource, Writable } from 'stream'
export type SignedUrlParameters = {
action: 'read' | 'write' | 'delete' | 'resumable'
expires: number
}
export type SaveData = string | Buffer | PipelineSource<string | Buffer>
export type SaveOptions = {
contentType?: string
gzip?: string | boolean
resumable?: boolean
timeout?: number
validation?: string | boolean
private?: boolean | undefined
}
export type File = {
isPublic: () => Promise<boolean>
publicUrl: () => string
download: () => Promise<Buffer>
exists: () => Promise<boolean>
save: (saveData: SaveData, saveOptions: SaveOptions) => Promise<void>
createWriteStream: (saveOptions: SaveOptions) => Writable
getMetadataMd5: () => Promise<string | undefined>
bucket: string
key: string
}
export interface StorageClient {
downloadFile(bucket: string, filePath: string): Promise<File>
createFile(bucket: string, filePath: string): File
getFilesFromPrefix(bucket: string, filePrefix: string): Promise<File[]>
upload(
bucket: string,
filePath: string,
data: Buffer,
options: { contentType?: string; public?: boolean; timeout?: number }
): Promise<void>
signedUrl(
bucket: string,
filePath: string,
options: SignedUrlParameters
): Promise<string>
}

View File

@ -0,0 +1,10 @@
import { env } from '../../env'
import { S3StorageClient } from './S3StorageClient'
import { GcsStorageClient } from './GcsStorageClient'
export const storage = env.fileUpload.useLocalStorage
? new S3StorageClient(
env.fileUpload.localMinioUrl,
env.fileUpload.internalMinioUrl
)
: new GcsStorageClient(env.fileUpload?.gcsUploadSAKeyFilePath ?? undefined)

View File

@ -510,12 +510,12 @@ export const saveArticleReadingProgressResolver = authorized<
} }
} }
if (env.redis.cache && env.redis.mq) { if (env.redis.cache && force) {
if (force) { // clear any cached values.
// clear any cached values. await clearCachedReadingPosition(uid, id)
await clearCachedReadingPosition(uid, id) }
}
if (env.redis.cache && env.redis.mq && !force) {
// If redis caching and queueing are available we delay this write // If redis caching and queueing are available we delay this write
const updatedProgress = const updatedProgress =
await dataSources.readingProgress.updateReadingProgress(uid, id, { await dataSources.readingProgress.updateReadingProgress(uid, id, {

View File

@ -31,7 +31,7 @@ async function fetchApplePublicKey(kid: string): Promise<string | null> {
try { try {
const key: jwksClient.SigningKey = await new Promise((resolve, reject) => { const key: jwksClient.SigningKey = await new Promise((resolve, reject) => {
client.getSigningKey(kid, (error, result) => { client.getSigningKey(kid, (error, result) => {
if (error) { if (error || result === undefined) {
return reject(error) return reject(error)
} }
return resolve(result) return resolve(result)

View File

@ -29,6 +29,7 @@ import {
import { analytics } from '../../utils/analytics' import { analytics } from '../../utils/analytics'
import { import {
comparePassword, comparePassword,
generateVerificationToken,
hashPassword, hashPassword,
setAuthInCookie, setAuthInCookie,
verifyToken, verifyToken,
@ -544,7 +545,7 @@ export function authRouter() {
try { try {
// hash password // hash password
const hashedPassword = await hashPassword(password) const hashedPassword = await hashPassword(password)
await createUser({ const [user] = await createUser({
email: trimmedEmail, email: trimmedEmail,
provider: 'EMAIL', provider: 'EMAIL',
sourceUserId: trimmedEmail, sourceUserId: trimmedEmail,
@ -553,12 +554,17 @@ export function authRouter() {
pictureUrl, pictureUrl,
bio, bio,
password: hashedPassword, password: hashedPassword,
pendingConfirmation: true, pendingConfirmation: !env.dev.autoVerify,
}) })
res.redirect( if (env.dev.autoVerify) {
`${env.client.url}/auth/verify-email?message=SIGNUP_SUCCESS` const token = await generateVerificationToken({ id: user.id })
) res.redirect(`${env.client.url}/auth/confirm-email/${token}`)
} else {
res.redirect(
`${env.client.url}/auth/verify-email?message=SIGNUP_SUCCESS`
)
}
} catch (e) { } catch (e) {
logger.info('email-signup exception:', e) logger.info('email-signup exception:', e)
if (isErrorWithCode(e)) { if (isErrorWithCode(e)) {

View File

@ -13,6 +13,7 @@ import {
} from '../auth_types' } from '../auth_types'
import { decodeGoogleToken } from '../google_auth' import { decodeGoogleToken } from '../google_auth'
import { createPendingUserToken, suggestedUsername } from '../jwt_helpers' import { createPendingUserToken, suggestedUsername } from '../jwt_helpers'
import { env } from '../../../env'
export async function createMobileSignUpResponse( export async function createMobileSignUpResponse(
isAndroid: boolean, isAndroid: boolean,
@ -66,7 +67,7 @@ export async function createMobileEmailSignUpResponse(
name: name.trim(), name: name.trim(),
username: username.trim().toLowerCase(), username: username.trim().toLowerCase(),
password: hashedPassword, password: hashedPassword,
pendingConfirmation: true, pendingConfirmation: !env.dev.autoVerify,
}) })
return { return {

View File

@ -64,6 +64,7 @@ export const createApp = (): Express => {
app.use(cookieParser()) app.use(cookieParser())
app.use(json({ limit: '100mb' })) app.use(json({ limit: '100mb' }))
app.use(urlencoded({ limit: '100mb', extended: true })) app.use(urlencoded({ limit: '100mb', extended: true }))
// @ts-ignore
app.use(compression()) app.use(compression())
// set to true if behind a reverse proxy/load balancer // set to true if behind a reverse proxy/load balancer

View File

@ -1,4 +1,4 @@
import { diff_match_patch } from 'diff-match-patch' import { diff_match_patch, patch_obj } from 'diff-match-patch'
import { DeepPartial, In } from 'typeorm' import { DeepPartial, In } from 'typeorm'
import { QueryDeepPartialEntity } from 'typeorm/query-builder/QueryPartialEntity' import { QueryDeepPartialEntity } from 'typeorm/query-builder/QueryPartialEntity'
import { EntityLabel } from '../entity/entity_label' import { EntityLabel } from '../entity/entity_label'
@ -14,7 +14,7 @@ import { deepDelete } from '../utils/helpers'
import { ItemEvent } from './library_item' import { ItemEvent } from './library_item'
const columnsToDelete = ['user', 'sharedAt', 'libraryItem'] as const const columnsToDelete = ['user', 'sharedAt', 'libraryItem'] as const
type ColumnsToDeleteType = typeof columnsToDelete[number] type ColumnsToDeleteType = (typeof columnsToDelete)[number]
export type HighlightEvent = Merge< export type HighlightEvent = Merge<
Omit<DeepPartial<Highlight>, ColumnsToDeleteType>, Omit<DeepPartial<Highlight>, ColumnsToDeleteType>,
EntityEvent EntityEvent
@ -40,7 +40,7 @@ export const batchGetHighlightsFromLibraryItemIds = async (
export const getHighlightLocation = (patch: string): number | undefined => { export const getHighlightLocation = (patch: string): number | undefined => {
const dmp = new diff_match_patch() const dmp = new diff_match_patch()
const patches = dmp.patch_fromText(patch) const patches = dmp.patch_fromText(patch) as unknown as patch_obj[]
return patches[0].start1 || undefined return patches[0].start1 || undefined
} }

View File

@ -16,7 +16,7 @@ import { deepDelete } from '../utils/helpers'
import { findLibraryItemIdsByLabelId, ItemEvent } from './library_item' import { findLibraryItemIdsByLabelId, ItemEvent } from './library_item'
const columnsToDelete = ['description', 'createdAt'] as const const columnsToDelete = ['description', 'createdAt'] as const
type ColumnsToDeleteType = typeof columnsToDelete[number] type ColumnsToDeleteType = (typeof columnsToDelete)[number]
export type LabelEvent = Merge< export type LabelEvent = Merge<
Omit<DeepPartial<Label>, ColumnsToDeleteType>, Omit<DeepPartial<Label>, ColumnsToDeleteType>,
EntityEvent EntityEvent

View File

@ -52,7 +52,7 @@ const columnsToDelete = [
'readableContent', 'readableContent',
'feedContent', 'feedContent',
] as const ] as const
type ColumnsToDeleteType = typeof columnsToDelete[number] type ColumnsToDeleteType = (typeof columnsToDelete)[number]
type ItemBaseEvent = Merge< type ItemBaseEvent = Merge<
Omit<DeepPartial<LibraryItem>, ColumnsToDeleteType>, Omit<DeepPartial<LibraryItem>, ColumnsToDeleteType>,
{ {

View File

@ -113,7 +113,9 @@ const createRandomEmailAddress = (userName: string, length: number): string => {
when rand is sdfsdf-: jacksonh-sdfsdf-e@inbox.omnivore.app when rand is sdfsdf-: jacksonh-sdfsdf-e@inbox.omnivore.app
when rand is abcdef: jacksonh-abcdefe@inbox.omnivore.app when rand is abcdef: jacksonh-abcdefe@inbox.omnivore.app
*/ */
return `${userName}-${nanoid(length)}e@${inbox}.omnivore.app` return `${userName}-${nanoid(length)}e@${
env.email.domain || `@${inbox}.omnivore.app`
}`
} }
export const findNewsletterEmailById = async ( export const findNewsletterEmailById = async (

View File

@ -137,7 +137,7 @@ export const uploadFile = async (
itemType, itemType,
uploadFile: { id: uploadFileData.id }, uploadFile: { id: uploadFileData.id },
slug: generateSlug(uploadFilePathName), slug: generateSlug(uploadFilePathName),
state: LibraryItemState.Processing, state: LibraryItemState.Succeeded,
contentReader: contentReaderForLibraryItem(itemType, uploadFileId), contentReader: contentReaderForLibraryItem(itemType, uploadFileId),
}, },
uid uid

View File

@ -9,7 +9,6 @@ import { NodeTracerProvider } from '@opentelemetry/node'
import { BatchSpanProcessor } from '@opentelemetry/tracing' import { BatchSpanProcessor } from '@opentelemetry/tracing'
import { EventEmitter } from 'events' import { EventEmitter } from 'events'
import { GraphQLInstrumentation } from '@opentelemetry/instrumentation-graphql' import { GraphQLInstrumentation } from '@opentelemetry/instrumentation-graphql'
import { setSpan } from '@opentelemetry/api/build/src/trace/context-utils'
const provider: NodeTracerProvider = new NodeTracerProvider() const provider: NodeTracerProvider = new NodeTracerProvider()
@ -46,6 +45,8 @@ if (
} }
if (exporter !== undefined) { if (exporter !== undefined) {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
provider.addSpanProcessor(new BatchSpanProcessor(exporter)) provider.addSpanProcessor(new BatchSpanProcessor(exporter))
console.info('tracing initialized') console.info('tracing initialized')
} }
@ -78,7 +79,7 @@ export async function traceAs<A>(
const childSpan = async (): Promise<A> => { const childSpan = async (): Promise<A> => {
const span = tracer.startSpan(spanName, { attributes }) const span = tracer.startSpan(spanName, { attributes })
const result = await api.context.with( const result = await api.context.with(
setSpan(api.context.active(), span), api.trace.setSpan(api.context.active(), span),
fn fn
) )
span.end() span.end()

View File

@ -73,6 +73,7 @@ export interface BackendEnv {
} }
dev: { dev: {
isLocal: boolean isLocal: boolean
autoVerify: boolean
} }
queue: { queue: {
location: string location: string
@ -94,6 +95,12 @@ export interface BackendEnv {
gcsUploadSAKeyFilePath: string gcsUploadSAKeyFilePath: string
gcsUploadPrivateBucket: string gcsUploadPrivateBucket: string
dailyUploadLimit: number dailyUploadLimit: number
useLocalStorage: boolean
localMinioUrl: string
internalMinioUrl: string
}
email: {
domain: string
} }
sender: { sender: {
message: string message: string
@ -197,10 +204,15 @@ const nullableEnvVars = [
'PG_REPLICA_USER', 'PG_REPLICA_USER',
'PG_REPLICA_PASSWORD', 'PG_REPLICA_PASSWORD',
'PG_REPLICA_DB', 'PG_REPLICA_DB',
'AUTO_VERIFY',
'INTERCOM_WEB_SECRET', 'INTERCOM_WEB_SECRET',
'INTERCOM_IOS_SECRET', 'INTERCOM_IOS_SECRET',
'INTERCOM_ANDROID_SECRET', 'INTERCOM_ANDROID_SECRET',
'EXPORT_TASK_HANDLER_URL', 'EXPORT_TASK_HANDLER_URL',
'LOCAL_MINIO_URL',
'GCS_USE_LOCAL_HOST',
'LOCAL_EMAIL_DOMAIN',
'AWS_S3_ENDPOINT_URL',
] // Allow some vars to be null/empty ] // Allow some vars to be null/empty
const envParser = const envParser =
@ -240,6 +252,7 @@ export function getEnv(): BackendEnv {
pool: { pool: {
max: parseInt(parse('PG_POOL_MAX'), 10), max: parseInt(parse('PG_POOL_MAX'), 10),
}, },
replication: parse('PG_REPLICATION') === 'true', replication: parse('PG_REPLICATION') === 'true',
replica: { replica: {
host: parse('PG_REPLICA_HOST'), host: parse('PG_REPLICA_HOST'),
@ -249,6 +262,9 @@ export function getEnv(): BackendEnv {
dbName: parse('PG_REPLICA_DB'), dbName: parse('PG_REPLICA_DB'),
}, },
} }
const email = {
domain: parse('LOCAL_EMAIL_DOMAIN'),
}
const server = { const server = {
jwtSecret: parse('JWT_SECRET'), jwtSecret: parse('JWT_SECRET'),
ssoJwtSecret: parse('SSO_JWT_SECRET'), ssoJwtSecret: parse('SSO_JWT_SECRET'),
@ -288,6 +304,7 @@ export function getEnv(): BackendEnv {
} }
const dev = { const dev = {
isLocal: parse('API_ENV') == 'local', isLocal: parse('API_ENV') == 'local',
autoVerify: parse('AUTO_VERIFY') === 'true',
} }
const queue = { const queue = {
location: parse('PUPPETEER_QUEUE_LOCATION'), location: parse('PUPPETEER_QUEUE_LOCATION'),
@ -318,6 +335,9 @@ export function getEnv(): BackendEnv {
dailyUploadLimit: parse('GCS_UPLOAD_DAILY_LIMIT') dailyUploadLimit: parse('GCS_UPLOAD_DAILY_LIMIT')
? parseInt(parse('GCS_UPLOAD_DAILY_LIMIT'), 10) ? parseInt(parse('GCS_UPLOAD_DAILY_LIMIT'), 10)
: 5, // default to 5 : 5, // default to 5
useLocalStorage: parse('GCS_USE_LOCAL_HOST') == 'true',
localMinioUrl: parse('LOCAL_MINIO_URL'),
internalMinioUrl: parse('AWS_S3_ENDPOINT_URL'),
} }
const sender = { const sender = {
message: parse('SENDER_MESSAGE'), message: parse('SENDER_MESSAGE'),
@ -374,6 +394,7 @@ export function getEnv(): BackendEnv {
return { return {
pg, pg,
client, client,
email,
server, server,
google, google,
posthog, posthog,

View File

@ -2,7 +2,6 @@
/* eslint-disable @typescript-eslint/restrict-template-expressions */ /* eslint-disable @typescript-eslint/restrict-template-expressions */
// Imports the Google Cloud Tasks library. // Imports the Google Cloud Tasks library.
import { CloudTasksClient, protos } from '@google-cloud/tasks' import { CloudTasksClient, protos } from '@google-cloud/tasks'
import { google } from '@google-cloud/tasks/build/protos/protos'
import axios from 'axios' import axios from 'axios'
import { nanoid } from 'nanoid' import { nanoid } from 'nanoid'
import { DeepPartial } from 'typeorm' import { DeepPartial } from 'typeorm'
@ -74,8 +73,8 @@ import { OmnivoreAuthorizationHeader } from './auth'
import { CreateTaskError } from './errors' import { CreateTaskError } from './errors'
import { stringToHash } from './helpers' import { stringToHash } from './helpers'
import { logError, logger } from './logger' import { logError, logger } from './logger'
import View = google.cloud.tasks.v2.Task.View
import { EXPORT_QUEUE_NAME } from '../export-processor' import { EXPORT_QUEUE_NAME } from '../export-processor'
import View = protos.google.cloud.tasks.v2.Task.View
// Instantiates a client. // Instantiates a client.
const client = new CloudTasksClient() const client = new CloudTasksClient()
@ -285,7 +284,7 @@ export const createAppEngineTask = async ({
export const getTask = async ( export const getTask = async (
taskName: string taskName: string
): Promise<google.cloud.tasks.v2.ITask> => { ): Promise<protos.google.cloud.tasks.v2.ITask> => {
// If we are in local environment // If we are in local environment
if (env.dev.isLocal) { if (env.dev.isLocal) {
return { name: taskName } as protos.google.cloud.tasks.v2.ITask return { name: taskName } as protos.google.cloud.tasks.v2.ITask
@ -307,7 +306,7 @@ export const getTask = async (
export const deleteTask = async ( export const deleteTask = async (
taskName: string taskName: string
): Promise<google.protobuf.IEmpty | null> => { ): Promise<protos.google.protobuf.IEmpty | null> => {
// If we are in local environment // If we are in local environment
if (env.dev.isLocal) { if (env.dev.isLocal) {
return taskName return taskName

View File

@ -37,7 +37,7 @@ export const apiLimiter = rateLimit({
} }
}, },
keyGenerator: (req) => { keyGenerator: (req) => {
return getTokenByRequest(req) || req.ip return getTokenByRequest(req) || req.ip || ''
}, },
store: getStore('api-rate-limit'), store: getStore('api-rate-limit'),
}) })
@ -56,7 +56,7 @@ export const apiHourLimiter = rateLimit({
} }
}, },
keyGenerator: (req) => { keyGenerator: (req) => {
return getTokenByRequest(req) || req.ip return getTokenByRequest(req) || req.ip || ''
}, },
store: getStore('api-hour-rate-limit'), store: getStore('api-hour-rate-limit'),
}) })

View File

@ -1,12 +1,12 @@
/* eslint-disable @typescript-eslint/no-unsafe-member-access */ /* eslint-disable @typescript-eslint/no-unsafe-member-access */
/* eslint-disable @typescript-eslint/no-unsafe-assignment */ /* eslint-disable @typescript-eslint/no-unsafe-assignment */
import { File, GetSignedUrlConfig, Storage } from '@google-cloud/storage'
import axios from 'axios' import axios from 'axios'
import { ContentReaderType } from '../entity/library_item' import { ContentReaderType } from '../entity/library_item'
import { env } from '../env' import { env } from '../env'
import { PageType } from '../generated/graphql' import { PageType } from '../generated/graphql'
import { ContentFormat } from '../jobs/upload_content' import { ContentFormat } from '../jobs/upload_content'
import { logger } from './logger' import { logger } from './logger'
import { storage } from '../repository/storage/storage'
export const contentReaderForLibraryItem = ( export const contentReaderForLibraryItem = (
itemType: string, itemType: string,
@ -31,14 +31,12 @@ export const contentReaderForLibraryItem = (
* the default app engine service account on the IAM page. We also need to * the default app engine service account on the IAM page. We also need to
* enable IAM related APIs on the project. * enable IAM related APIs on the project.
*/ */
export const storage = env.fileUpload?.gcsUploadSAKeyFilePath
? new Storage({ keyFilename: env.fileUpload.gcsUploadSAKeyFilePath })
: new Storage()
const bucketName = env.fileUpload.gcsUploadBucket const bucketName = env.fileUpload.gcsUploadBucket
const maxContentLength = 10 * 1024 * 1024 // 10MB const maxContentLength = 10 * 1024 * 1024 // 10MB
export const countOfFilesWithPrefix = async (prefix: string) => { export const countOfFilesWithPrefix = async (prefix: string) => {
const [files] = await storage.bucket(bucketName).getFiles({ prefix }) const files = await storage.getFilesFromPrefix(bucketName, prefix)
return files.length return files.length
} }
@ -48,40 +46,29 @@ export const generateUploadSignedUrl = async (
selectedBucket?: string selectedBucket?: string
): Promise<string> => { ): Promise<string> => {
// These options will allow temporary uploading of file with requested content type // These options will allow temporary uploading of file with requested content type
const options: GetSignedUrlConfig = { const options = {
version: 'v4', version: 'v4',
action: 'write', action: 'write' as const,
expires: Date.now() + 15 * 60 * 1000, // 15 minutes expires: Date.now() + 15 * 60 * 1000, // 15 minutes
contentType: contentType, contentType: contentType,
} }
logger.info('signed url for: ', options) logger.info('signed url for: ', options)
// Get a v4 signed URL for uploading file return storage.signedUrl(selectedBucket || bucketName, filePathName, options)
const [url] = await storage
.bucket(selectedBucket || bucketName)
.file(filePathName)
.getSignedUrl(options)
return url
} }
export const generateDownloadSignedUrl = async ( export const generateDownloadSignedUrl = async (
filePathName: string, filePathName: string,
config?: { config?: {
bucketName?: string
expires?: number expires?: number
} }
): Promise<string> => { ): Promise<string> => {
const options: GetSignedUrlConfig = { const options = {
version: 'v4', action: 'read' as const,
action: 'read', expires: Date.now() + 240 * 60 * 1000, // four hours
expires: config?.expires ?? Date.now() + 240 * 60 * 1000, // four hours ...config,
} }
const [url] = await storage return storage.signedUrl(bucketName, filePathName, options)
.bucket(config?.bucketName || bucketName)
.file(filePathName)
.getSignedUrl(options)
logger.info(`generating download signed url: ${url}`)
return url
} }
export const getStorageFileDetails = async ( export const getStorageFileDetails = async (
@ -89,10 +76,10 @@ export const getStorageFileDetails = async (
fileName: string fileName: string
): Promise<{ md5Hash: string; fileUrl: string }> => { ): Promise<{ md5Hash: string; fileUrl: string }> => {
const filePathName = generateUploadFilePathName(id, fileName) const filePathName = generateUploadFilePathName(id, fileName)
const file = storage.bucket(bucketName).file(filePathName) const file = await storage.downloadFile(bucketName, filePathName)
const [metadata] = await file.getMetadata() const metadataMd5 = await file.getMetadataMd5()
// GCS returns MD5 Hash in base64 encoding, we convert it here to hex string // GCS returns MD5 Hash in base64 encoding, we convert it here to hex string
const md5Hash = Buffer.from(metadata.md5Hash || '', 'base64').toString('hex') const md5Hash = Buffer.from(metadataMd5 || '', 'base64').toString('hex')
return { md5Hash, fileUrl: file.publicUrl() } return { md5Hash, fileUrl: file.publicUrl() }
} }
@ -110,17 +97,10 @@ export const uploadToBucket = async (
options?: { contentType?: string; public?: boolean; timeout?: number }, options?: { contentType?: string; public?: boolean; timeout?: number },
selectedBucket?: string selectedBucket?: string
): Promise<void> => { ): Promise<void> => {
await storage await storage.upload(selectedBucket || bucketName, filePath, data, {
.bucket(selectedBucket || bucketName) timeout: 30000,
.file(filePath) ...options,
.save(data, { timeout: 30000, ...options }) // default timeout 30s })
}
export const createGCSFile = (
filename: string,
selectedBucket = bucketName
): File => {
return storage.bucket(selectedBucket).file(filename)
} }
export const downloadFromUrl = async ( export const downloadFromUrl = async (
@ -154,16 +134,14 @@ export const uploadToSignedUrl = async (
} }
export const isFileExists = async (filePath: string): Promise<boolean> => { export const isFileExists = async (filePath: string): Promise<boolean> => {
const [exists] = await storage.bucket(bucketName).file(filePath).exists() const file = await storage.downloadFile(bucketName, filePath)
const exists = await file.exists()
return exists return exists
} }
export const downloadFromBucket = async (filePath: string): Promise<Buffer> => { export const downloadFromBucket = async (filePath: string): Promise<Buffer> => {
const file = storage.bucket(bucketName).file(filePath) const file = await storage.downloadFile(bucketName, filePath)
return file.download()
// Download the file contents
const [data] = await file.download()
return data
} }
export const contentFilePath = ({ export const contentFilePath = ({

View File

@ -1,13 +1,13 @@
import { Storage } from '@google-cloud/storage' // import { Storage } from '@google-cloud/storage'
import sinon from 'sinon' // import sinon from 'sinon'
import * as uploads from '../src/utils/uploads' // import * as uploads from '../src/utils/uploads'
import { MockStorage } from './mock_storage' // import { MockStorage } from './mock_storage'
//
export const mochaHooks = { // export const mochaHooks = {
beforeEach() { // beforeEach() {
// Mock cloud storage // // Mock cloud storage
sinon // sinon
.stub(uploads, 'storage') // .stub(uploads, 'storage')
.value(new MockStorage() as unknown as Storage) // .value(new MockStorage() as unknown as Storage)
}, // },
} // }

View File

@ -4,6 +4,11 @@
"project": "tsconfig.json" "project": "tsconfig.json"
}, },
"rules": { "rules": {
"@typescript-eslint/no-unsafe-assignment": 0,
"@typescript-eslint/no-unnecessary-type-assertion": 0,
"@typescript-eslint/no-unsafe-member-access": ["warn"],
"@typescript-eslint/no-unsafe-call": ["warn"],
"@typescript-eslint/no-unsafe-argument": ["warn"],
"@typescript-eslint/no-floating-promises": [ "@typescript-eslint/no-floating-promises": [
"error", "error",
{ {

View File

@ -1,27 +1,22 @@
FROM node:18.16 FROM node:22.12-alpine AS build
LABEL org.opencontainers.image.source="https://github.com/omnivore-app/omnivore" LABEL org.opencontainers.image.source="https://github.com/omnivore-app/omnivore"
# Installs latest Chromium package. # Installs latest Chromium package and other dependencies.
RUN apt-get update && apt-get install -y \ RUN apk -U upgrade \
chromium \ && apk add --no-cache \
ca-certificates \
nodejs \
yarn \
g++ \ g++ \
make \ make \
python3 python3 \
py3-pip && \
rm -rf /var/cache/apk/*
WORKDIR /app WORKDIR /app
ENV CHROMIUM_PATH /usr/bin/chromium ENV CHROMIUM_PATH=/usr/bin/chromium
ENV FIREFOX_PATH=/usr/bin/firefox
ENV LAUNCH_HEADLESS=true ENV LAUNCH_HEADLESS=true
COPY package.json . COPY package.json yarn.lock tsconfig.json .prettierrc .eslintrc ./
COPY yarn.lock .
COPY tsconfig.json .
COPY .prettierrc .
COPY .eslintrc .
COPY /packages/content-fetch/package.json ./packages/content-fetch/package.json COPY /packages/content-fetch/package.json ./packages/content-fetch/package.json
COPY /packages/content-handler/package.json ./packages/content-handler/package.json COPY /packages/content-handler/package.json ./packages/content-handler/package.json
COPY /packages/puppeteer-parse/package.json ./packages/puppeteer-parse/package.json COPY /packages/puppeteer-parse/package.json ./packages/puppeteer-parse/package.json
@ -29,21 +24,50 @@ COPY /packages/utils/package.json ./packages/utils/package.json
RUN yarn install --pure-lockfile RUN yarn install --pure-lockfile
ADD /packages/content-fetch ./packages/content-fetch COPY /packages/content-fetch ./packages/content-fetch
ADD /packages/content-handler ./packages/content-handler COPY /packages/content-handler ./packages/content-handler
ADD /packages/puppeteer-parse ./packages/puppeteer-parse COPY /packages/puppeteer-parse ./packages/puppeteer-parse
ADD /packages/utils ./packages/utils COPY /packages/utils ./packages/utils
RUN yarn workspace @omnivore/utils build
RUN yarn workspace @omnivore/content-handler build
RUN yarn workspace @omnivore/puppeteer-parse build
RUN yarn workspace @omnivore/content-fetch build
# After building, fetch the production dependencies RUN yarn workspace @omnivore/utils build && \
RUN rm -rf /app/packages/content-fetch/node_modules yarn workspace @omnivore/content-handler build && \
RUN rm -rf /app/node_modules yarn workspace @omnivore/puppeteer-parse build && \
RUN yarn install --pure-lockfile --production yarn workspace @omnivore/content-fetch build && \
rm -rf /app/packages/content-fetch/node_modules /app/node_modules && \
yarn install --pure-lockfile --production
# Running stage
FROM node:22.12-alpine
RUN echo @edge https://dl-cdn.alpinelinux.org/alpine/edge/community >> /etc/apk/repositories \
&& echo @edge https://dl-cdn.alpinelinux.org/alpine/edge/main >> /etc/apk/repositories \
&& echo @edge https://dl-cdn.alpinelinux.org/alpine/edge/testing >> /etc/apk/repositories \
&& apk -U upgrade \
&& apk add --no-cache \
firefox@edge \
freetype@edge \
ttf-freefont@edge \
nss@edge \
libstdc++@edge \
sqlite-libs@edge \
chromium@edge \
firefox-esr@edge \
ca-certificates@edge \
rm -rf /var/cache/apk/*
WORKDIR /app
ENV CHROMIUM_PATH=/usr/bin/chromium
ENV FIREFOX_PATH=/usr/bin/firefox
ENV LAUNCH_HEADLESS=true
COPY --from=build /app /app
EXPOSE 8080 EXPOSE 8080
CMD ["yarn", "workspace", "@omnivore/content-fetch", "start"] # In Firefox we can't use the adblocking sites. Adding them to the hosts file of the docker seems to work.
COPY /packages/content-fetch/start.sh .
RUN wget https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts && \
chmod +x start.sh
CMD ["./start.sh"]

View File

@ -305,7 +305,7 @@ export const processFetchContentJob = async (
const savedDate = savedAt ? new Date(savedAt) : new Date() const savedDate = savedAt ? new Date(savedAt) : new Date()
const { finalUrl, title, content, contentType } = fetchResult const { finalUrl, title, content, contentType } = fetchResult
if (content) { if (content && process.env['SKIP_UPLOAD_ORIGINAL'] !== 'true') {
await uploadOriginalContent(users, content, savedDate.getTime()) await uploadOriginalContent(users, content, savedDate.getTime())
} }

View File

@ -0,0 +1,3 @@
#!/bin/sh
cat hosts >> /etc/hosts
yarn workspace @omnivore/content-fetch start

View File

@ -39,6 +39,7 @@ import { WikipediaHandler } from './websites/wikipedia-handler'
import { YoutubeHandler } from './websites/youtube-handler' import { YoutubeHandler } from './websites/youtube-handler'
import { ZhihuHandler } from './websites/zhihu-handler' import { ZhihuHandler } from './websites/zhihu-handler'
import { TikTokHandler } from './websites/tiktok-handler' import { TikTokHandler } from './websites/tiktok-handler'
import { RawContentHandler } from './websites/raw-handler'
const validateUrlString = (url: string): boolean => { const validateUrlString = (url: string): boolean => {
const u = new URL(url) const u = new URL(url)
@ -66,6 +67,7 @@ const contentHandlers: ContentHandler[] = [
new DerstandardHandler(), new DerstandardHandler(),
new ImageHandler(), new ImageHandler(),
new MediumHandler(), new MediumHandler(),
new RawContentHandler(),
new PdfHandler(), new PdfHandler(),
new ScrapingBeeHandler(), new ScrapingBeeHandler(),
new TDotCoHandler(), new TDotCoHandler(),

View File

@ -1,4 +1,6 @@
import { ContentHandler, PreHandleResult } from '../content-handler' import { ContentHandler, PreHandleResult } from '../content-handler'
import axios from 'axios'
import { parseHTML } from 'linkedom'
export class MediumHandler extends ContentHandler { export class MediumHandler extends ContentHandler {
constructor() { constructor() {
@ -11,13 +13,52 @@ export class MediumHandler extends ContentHandler {
return u.hostname.endsWith('medium.com') return u.hostname.endsWith('medium.com')
} }
addImages(document: Document): Document {
const pictures = document.querySelectorAll('picture')
pictures.forEach((pict) => {
const source = pict.querySelector('source')
if (source) {
const srcSet = source.getAttribute('srcSet')
const sources = (srcSet || '')
.split(', ')
.map((src) => src.split(' '))
.sort((a, b) =>
Number(a[1].replace('w', '')) > Number(b[1].replace('w', ''))
? -1
: 1
)
// This should be the largest image in the source set.
if (sources && sources.length && Array.isArray(sources[0])) {
const url = sources[0][0]
const img = document.createElement('img')
img.src = url
pict.after(img)
pict.remove()
}
}
})
return document
}
async preHandle(url: string): Promise<PreHandleResult> { async preHandle(url: string): Promise<PreHandleResult> {
console.log('prehandling medium url', url) console.log('prehandling medium url', url)
try { try {
const res = new URL(url) const res = new URL(url)
res.searchParams.delete('source') res.searchParams.delete('source')
return Promise.resolve({ url: res.toString() })
const response = await axios.get(res.toString())
const dom = parseHTML(response.data).document
const imageAddedDom = this.addImages(dom)
return {
title: dom.title,
content: imageAddedDom.body.outerHTML,
url: res.toString(),
}
} catch (error) { } catch (error) {
console.error('error prehandling medium url', error) console.error('error prehandling medium url', error)
throw error throw error

View File

@ -0,0 +1,33 @@
import { ContentHandler, PreHandleResult } from '../content-handler'
import axios from 'axios'
import { parseHTML } from 'linkedom'
export class RawContentHandler extends ContentHandler {
constructor() {
super()
this.name = 'RawContentHandler'
}
shouldPreHandle(url: string): boolean {
const u = new URL(url)
const hostnames = [
'medium.com',
'fastcompany.com',
'fortelabs.com',
'theverge.com',
]
return hostnames.some((h) => u.hostname.endsWith(h))
}
async preHandle(url: string): Promise<PreHandleResult> {
try {
const response = await axios.get(url)
const dom = parseHTML(response.data).document
return { title: dom.title, content: response.data as string, url: url }
} catch (error) {
console.error('error prehandling URL', error)
throw error
}
}
}

View File

@ -24,7 +24,9 @@ export class WeixinQqHandler extends ContentHandler {
// create a meta node to store the publish time in ISO format // create a meta node to store the publish time in ISO format
const metaNode = dom.createElement('meta') const metaNode = dom.createElement('meta')
metaNode.setAttribute('name', 'date') metaNode.setAttribute('name', 'date')
metaNode.setAttribute('content', publishTimeISO) if (publishTimeISO) {
metaNode.setAttribute('content', publishTimeISO)
}
dom.querySelector('head')?.appendChild(metaNode) dom.querySelector('head')?.appendChild(metaNode)
} }

View File

@ -1,4 +1,4 @@
FROM node:18.16 FROM node:22.12
WORKDIR /app WORKDIR /app

View File

@ -1,4 +1,4 @@
FROM node:18.16 as builder FROM node:22.12 as builder
WORKDIR /app WORKDIR /app
@ -17,7 +17,7 @@ COPY /packages/discover/tsconfig.json ./packages/discover/tsconfig.json
RUN yarn install --pure-lockfile RUN yarn install --pure-lockfile
RUN yarn workspace @omnivore/discover build RUN yarn workspace @omnivore/discover build
FROM node:18.16 as runner FROM node:22.12 as runner
WORKDIR /app WORKDIR /app

View File

@ -3,9 +3,7 @@
/* eslint-disable @typescript-eslint/no-unsafe-return */ /* eslint-disable @typescript-eslint/no-unsafe-return */
/* eslint-disable @typescript-eslint/restrict-template-expressions */ /* eslint-disable @typescript-eslint/restrict-template-expressions */
import { OmnivoreArticle } from '../../../../../types/OmnivoreArticle'
import { slugify } from 'voca' import { slugify } from 'voca'
import { Observable, tap } from 'rxjs'
import { fromArrayLike } from 'rxjs/internal/observable/innerFrom' import { fromArrayLike } from 'rxjs/internal/observable/innerFrom'
import { mapOrNull } from '../../../../utils/reactive' import { mapOrNull } from '../../../../utils/reactive'
import { import {

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine FROM node:22.12-alpine
WORKDIR /app WORKDIR /app

View File

@ -24,7 +24,7 @@
"eslint-plugin-prettier": "^4.0.0" "eslint-plugin-prettier": "^4.0.0"
}, },
"dependencies": { "dependencies": {
"@google-cloud/functions-framework": "3.1.2", "@google-cloud/functions-framework": "3.4.5",
"@google-cloud/storage": "^7.0.1", "@google-cloud/storage": "^7.0.1",
"@omnivore-app/api": "^1.0.4", "@omnivore-app/api": "^1.0.4",
"@omnivore/utils": "1.0.0", "@omnivore/utils": "1.0.0",

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine FROM node:22.12-alpine
WORKDIR /app WORKDIR /app

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine FROM node:22.12-alpine
WORKDIR /app WORKDIR /app

View File

@ -9,7 +9,6 @@
"keywords": [], "keywords": [],
"license": "Apache-2.0", "license": "Apache-2.0",
"scripts": { "scripts": {
"test": "yarn mocha -r ts-node/register --config mocha-config.json",
"test:typecheck": "tsc --noEmit", "test:typecheck": "tsc --noEmit",
"lint": "eslint src --ext ts,js,tsx,jsx", "lint": "eslint src --ext ts,js,tsx,jsx",
"compile": "tsc", "compile": "tsc",
@ -34,7 +33,7 @@
}, },
"dependencies": { "dependencies": {
"@fast-csv/parse": "^5.0.0", "@fast-csv/parse": "^5.0.0",
"@google-cloud/functions-framework": "3.1.2", "@google-cloud/functions-framework": "3.4.5",
"@google-cloud/storage": "^7.0.1", "@google-cloud/storage": "^7.0.1",
"@omnivore/readability": "1.0.0", "@omnivore/readability": "1.0.0",
"@omnivore/utils": "1.0.0", "@omnivore/utils": "1.0.0",

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine FROM node:22.12-alpine
# Run everything after as non-privileged user. # Run everything after as non-privileged user.
WORKDIR /app WORKDIR /app

View File

@ -33,7 +33,7 @@
"mocha": "^10.0.0" "mocha": "^10.0.0"
}, },
"dependencies": { "dependencies": {
"@google-cloud/functions-framework": "3.1.2", "@google-cloud/functions-framework": "3.4.5",
"@google-cloud/storage": "^7.0.1", "@google-cloud/storage": "^7.0.1",
"@omnivore/utils": "1.0.0", "@omnivore/utils": "1.0.0",
"@sentry/serverless": "^7.77.0", "@sentry/serverless": "^7.77.0",

View File

@ -2,5 +2,9 @@
"extends": "../../.eslintrc", "extends": "../../.eslintrc",
"parserOptions": { "parserOptions": {
"project": "tsconfig.json" "project": "tsconfig.json"
},
"rules": {
"@typescript-eslint/no-unsafe-assignment": ["warn"],
"@typescript-eslint/no-unsafe-argument": ["warn"]
} }
} }

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine FROM node:22.12-alpine
# Run everything after as non-privileged user. # Run everything after as non-privileged user.
WORKDIR /app WORKDIR /app

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine FROM node:22.12-alpine
# Run everything after as non-privileged user. # Run everything after as non-privileged user.
WORKDIR /app WORKDIR /app

View File

@ -26,7 +26,7 @@
"eslint-plugin-prettier": "^4.0.0" "eslint-plugin-prettier": "^4.0.0"
}, },
"dependencies": { "dependencies": {
"@google-cloud/functions-framework": "3.1.2", "@google-cloud/functions-framework": "3.4.5",
"@google-cloud/storage": "^7.0.1", "@google-cloud/storage": "^7.0.1",
"@sentry/serverless": "^7.77.0", "@sentry/serverless": "^7.77.0",
"axios": "^1.2.2", "axios": "^1.2.2",

View File

@ -0,0 +1,13 @@
{
"extends": "../../.eslintrc",
"parserOptions": {
"project": "tsconfig.json"
},
"rules": {
"@typescript-eslint/no-unsafe-argument": "off",
"@typescript-eslint/no-explicit-any": "off",
"@typescript-eslint/strictNullChecks": "off",
"@typescript-eslint/no-unsafe-member-access": "off",
"@typescript-eslint/no-unsafe-assignment": "off"
}
}

131
packages/local-mail-watcher/.gitignore vendored Normal file
View File

@ -0,0 +1,131 @@
.idea/
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# Snowpack dependency directory (https://snowpack.dev/)
web_modules/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional stylelint cache
.stylelintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local
# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache
# Next.js build output
.next
out
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# vuepress v2.x temp and cache directory
.temp
.cache
# Docusaurus cache and generated files
.docusaurus
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
# Stores VSCode versions used for testing VSCode extensions
.vscode-test
# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*

View File

@ -0,0 +1,40 @@
FROM node:22.12 as builder
WORKDIR /app
RUN apt-get update && apt-get install -y g++ make python3
COPY package.json .
COPY yarn.lock .
COPY tsconfig.json .
COPY .prettierrc .
COPY .eslintrc .
COPY /packages/local-mail-watcher/src ./packages/local-mail-watcher/src
COPY /packages/local-mail-watcher/package.json ./packages/local-mail-watcher/package.json
COPY /packages/local-mail-watcher/tsconfig.json ./packages/local-mail-watcher/tsconfig.json
COPY /packages/utils/package.json ./packages/utils/package.json
RUN yarn install --pure-lockfile
ADD /packages/utils ./packages/utils
RUN yarn workspace @omnivore/utils build
RUN yarn workspace @omnivore/local-mail-watcher build
FROM node:22.12 as runner
WORKDIR /app
ENV NODE_ENV production
COPY --from=builder /app/packages/local-mail-watcher/dist /app/packages/local-mail-watcher/dist
COPY --from=builder /app/packages/local-mail-watcher/package.json /app/packages/local-mail-watcher/package.json
COPY --from=builder /app/packages/local-mail-watcher/node_modules /app/packages/local-mail-watcher/node_modules
COPY --from=builder /app/packages/utils/ /app/packages/utils/
COPY --from=builder /app/node_modules /app/node_modules
COPY --from=builder /app/package.json /app/package.json
CMD ["yarn", "workspace", "@omnivore/local-mail-watcher", "start"]

View File

@ -0,0 +1,39 @@
FROM node:22.12 as builder
WORKDIR /app
RUN apt-get update && apt-get install -y g++ make python3
COPY package.json .
COPY yarn.lock .
COPY tsconfig.json .
COPY .prettierrc .
COPY .eslintrc .
COPY /packages/local-mail-watcher/src ./packages/local-mail-watcher/src
COPY /packages/local-mail-watcher/package.json ./packages/local-mail-watcher/package.json
COPY /packages/local-mail-watcher/tsconfig.json ./packages/local-mail-watcher/tsconfig.json
COPY /packages/utils/package.json ./packages/utils/package.json
RUN yarn install --pure-lockfile
ADD /packages/utils ./packages/utils
RUN yarn workspace @omnivore/utils build
RUN yarn workspace @omnivore/local-mail-watcher build
FROM node:22.12 as runner
WORKDIR /app
ENV NODE_ENV production
COPY --from=builder /app/packages/local-mail-watcher/dist /app/packages/local-mail-watcher/dist
COPY --from=builder /app/packages/local-mail-watcher/package.json /app/packages/local-mail-watcher/package.json
COPY --from=builder /app/packages/local-mail-watcher/node_modules /app/packages/local-mail-watcher/node_modules
COPY --from=builder /app/packages/utils/ /app/packages/utils/
COPY --from=builder /app/node_modules /app/node_modules
COPY --from=builder /app/package.json /app/package.json
CMD ["yarn", "workspace", "@omnivore/local-mail-watcher", "start-watcher"]

View File

@ -0,0 +1,40 @@
{
"name": "@omnivore/local-mail-watcher",
"version": "0.0.1",
"scripts": {
"build": "tsc",
"dev": "ts-node-dev --files src/index.ts",
"start": "node dist/index.js",
"start-watcher": "node dist/watcher.js",
"lint": "eslint src --ext ts,js,tsx,jsx",
"lint:fix": "eslint src --fix --ext ts,js,tsx,jsx",
"test:typecheck": "tsc --noEmit"
},
"dependencies": {
"chokidar": "^4.0.1",
"mailparser": "^3.7.1",
"axios": "^1.7.7",
"express": "^4.21.1",
"bullmq": "^5.22.0",
"@omnivore/utils": "1.0.0"
},
"devDependencies": {
"@types/html-to-text": "^9.0.2",
"@types/jsdom": "^21.1.3",
"@types/mailparser": "^3.4.5",
"@types/axios" : "^0.14.4",
"@types/node": "^22.10.7",
"typescript": "^5.7.3",
"@types/express": "^5.0.0",
"@types/pg": "^8.10.5",
"@types/pg-format": "^1.0.3",
"@types/urlsafe-base64": "^1.0.28",
"@types/uuid": "^9.0.1",
"@types/voca": "^1.4.3",
"ts-node": "^10.9.1",
"tslib": "^2.6.2"
},
"volta": {
"extends": "../../package.json"
}
}

View File

@ -0,0 +1,66 @@
interface redisConfig {
url?: string
cert?: string
}
interface WatcherEnv {
filesystem: {
filePath: string
}
redis: {
mq: redisConfig
cache: redisConfig
}
sns: {
snsArn: string
}
apiKey: string
apiEndpoint: string
}
const envParser =
(env: { [key: string]: string | undefined }) =>
(varName: string, throwOnUndefined = false): string | undefined => {
const value = env[varName]
if (typeof value === 'string' && value) {
return value
}
if (throwOnUndefined) {
throw new Error(
`Missing ${varName} with a non-empty value in process environment`
)
}
return
}
export function getEnv(): WatcherEnv {
const parse = envParser(process.env)
const filesystem = {
filePath: parse('MAIL_FILE_PATH')!,
}
const redis = {
mq: {
url: parse('MQ_REDIS_URL'),
cert: parse('MQ_REDIS_CERT')?.replace(/\\n/g, '\n'), // replace \n with new line
},
cache: {
url: parse('REDIS_URL'),
cert: parse('REDIS_CERT')?.replace(/\\n/g, '\n'), // replace \n with new line
},
}
const sns = {
snsArn: parse('SNS_ARN') || '',
}
return {
apiKey: parse('WATCHER_API_KEY')!,
apiEndpoint: parse('WATCHER_API_ENDPOINT')!,
sns,
filesystem,
redis,
}
}
export const env = getEnv()

View File

@ -0,0 +1,122 @@
import { RedisDataSource } from '@omnivore/utils'
import express, { Express, Request, Response } from 'express'
import { env } from './env'
import { getQueue } from './lib/queue'
import { SnsMessage } from './types/SNS'
import { simpleParser } from 'mailparser'
import axios from 'axios'
import { convertToMailObject } from './lib/emailApi'
console.log('Starting worker...')
const app: Express = express()
app.use(express.text({ limit: '50mb' }))
// Force JSON for SNS
app.use((req, res, next) => {
req.headers['content-type'] = 'application/json'
next()
})
app.use(express.json({ limit: '50mb' }))
app.use(express.urlencoded({ limit: '50mb', extended: true }))
// create redis source
const redisDataSource = new RedisDataSource({
cache: {
url: process.env.REDIS_URL,
cert: process.env.REDIS_CERT,
},
mq: {
url: process.env.MQ_REDIS_URL,
cert: process.env.MQ_REDIS_CERT,
},
})
const queue = getQueue(redisDataSource.queueRedisClient)
const addEmailEventToQueue = async (req: Request, res: Response) => {
const apiKey = req.headers['x-api-key']
if (!apiKey) {
res.status(401).send('Unauthorized: API key is missing')
return
}
if (apiKey != env.apiKey) {
res.status(401).send('Unauthorized: Invalid API Key')
return
}
await (
await queue
).add('save-newsletter', req.body, {
priority: 1,
attempts: 1,
delay: 500,
})
res.sendStatus(200)
}
// respond healthy to auto-scaler.
app.get('/_ah/health', (_req: Request, res: Response) => {
res.sendStatus(200)
})
app.post('/mail', addEmailEventToQueue)
app.post('/sns', async (req, res) => {
const bodyString = req.body as string
const snsMessage = JSON.parse(bodyString) as SnsMessage
console.log(`Received SNS Message`, snsMessage)
console.log(`Sns Topic ARN ${snsMessage['TopicArn']}`)
if (snsMessage.TopicArn != env.sns.snsArn) {
console.log(
`Topic ARN: ${snsMessage.TopicArn} Doesnt Match ${env.sns.snsArn}, failing...`
)
res.status(401).send()
return
}
if (snsMessage.Type == 'SubscriptionConfirmation') {
console.log('Subscribing to topic')
await axios.get(snsMessage.SubscribeURL)
res.status(200).send()
return
}
if (snsMessage.Type == 'Notification') {
const message = JSON.parse(snsMessage.Message) as {
notificationType: string
content: string
}
if (message.notificationType != 'Received') {
console.log('Not an email, failing...')
res.status(400).send()
}
const mailContent = await simpleParser(message.content)
const mail = convertToMailObject(mailContent)
console.log(mail)
await (
await queue
).add('save-newsletter', mail, {
priority: 1,
attempts: 1,
delay: 500,
})
res.sendStatus(200)
res.status(200).send()
return
}
res.status(400).send()
})
const port = process.env.PORT || 8080
const server = app.listen(port, () => {
console.log('Mail Server started')
})

View File

@ -0,0 +1,25 @@
import { EmailContents } from '../types/EmailContents'
import axios from 'axios'
import { env } from '../env'
import { ParsedMail } from 'mailparser'
export const sendToEmailApi = (data: EmailContents) => {
return axios.post(env.apiEndpoint, data, {
headers: {
['x-api-key']: env.apiKey,
'Content-Type': 'application/json',
},
timeout: 5000,
})
}
export const convertToMailObject = (it: ParsedMail): EmailContents => {
return {
from: it.from?.value[0]?.address || '',
to: (Array.isArray(it.to) ? it.to[0].text : it.to?.text) || '',
subject: it.subject || '',
html: it.html || '',
text: it.text || '',
headers: it.headers,
}
}

View File

@ -0,0 +1,27 @@
import { RedisDataSource } from '@omnivore/utils'
import { Queue, RedisClient } from 'bullmq'
export const QUEUE = 'omnivore-backend-queue'
export const getQueue = async (
connection: RedisClient,
queueName = QUEUE
): Promise<Queue> => {
const queue = new Queue(queueName, {
connection,
defaultJobOptions: {
backoff: {
type: 'exponential',
delay: 2000, // 2 seconds
},
removeOnComplete: {
age: 3600, // keep up to 1 hour
},
removeOnFail: {
age: 24 * 3600, // keep up to 1 day
},
},
})
await queue.waitUntilReady()
return queue
}

View File

@ -0,0 +1,20 @@
import { HeaderValue } from 'mailparser'
export type EmailContents = {
from: string
to: string
subject: string
html: string
text: string
headers: Map<string, HeaderValue>
unsubMailTo?: string
unsubHttpUrl?: string
forwardedFrom?: string
replyTo?: string
confirmationCode?: string
uploadFile?: {
fileName: string
contentType: string
id: string
}
}

View File

@ -0,0 +1,7 @@
export type SnsMessage = {
Type: string
TopicArn: string
SubscribeURL: string
content: string
Message: string
}

View File

@ -0,0 +1,22 @@
import chokidar from 'chokidar'
import { simpleParser } from 'mailparser'
import * as fs from 'node:fs'
import { convertToMailObject, sendToEmailApi } from './lib/emailApi'
import { env } from './env'
chokidar.watch(env.filesystem.filePath).on('add', (path, _event) => {
console.log(path)
const contents = fs.readFileSync(path).toString()
void simpleParser(contents)
.then(convertToMailObject)
.then(async (emailData) => {
await sendToEmailApi(emailData)
console.log('Sent to email API')
})
.then(() => {
if (process.env['DELETE_FILE'] == 'true') {
fs.unlinkSync(path)
}
console.log('Deleted File')
})
})

View File

@ -0,0 +1,9 @@
{
"extends": "./../../tsconfig.json",
"compileOnSave": false,
"include": ["./src/**/*"],
"compilerOptions": {
"outDir": "dist",
"typeRoots": ["./../../node_modules/pgvector/types"]
}
}

Some files were not shown because too many files have changed in this diff Show More