Improving Self-Hosting and Removing 3rd Party dependencies. (#4513)

* fix: Library Header layout shift

* Bump Github Actions versions.

* Self-Hosting Changes

* Fix Minio Environment Variable

* Just make pdfs successful, due to lack of PDFHandler

* Fix issue where flag was set wrong

* Added an NGINX Example file

* Add some documentation for self-hosting via Docker Compose

* Make some adjustments to Puppeteer due to failing sites.

* adjust timings

* Add start of Mail Service

* Fix Docker Files

* More email service stuff

* Add Guide to use Zapier for Email-Importing.

* Ensure that if no env is provided it uses the old email settings

* Add some instructions for self-hosted email

* Add SNS Endpoints for Mail Watcher

* Add steps and functionality for using SES and SNS for email

* Uncomment a few jobs.

* Added option for Firefox for parser. Was having issues with Chromium on Docker.

* Add missing space.

Co-authored-by: Russ Taylor <729694+russtaylor@users.noreply.github.com>

* Fix some wording on the Guide

* update browser extension to handle self-hosted instances

* add slight documentation to options page

* Fix MV

* Do raw handlers for Medium

* Fix images in Medium

* Update self-hosting/GUIDE.md

Co-authored-by: Mike Baker <1426795+mbaker3@users.noreply.github.com>

* Update Guide with other variables

* Add The Verge to JS-less handlers

* Update regex and image-proxy

* Update self-hosting/nginx/nginx.conf

Co-authored-by: Mike Baker <1426795+mbaker3@users.noreply.github.com>

* Update regex and image-proxy

* Update self-hosting/docker-compose/docker-compose.yml

Co-authored-by: Mike Baker <1426795+mbaker3@users.noreply.github.com>

* Fix Minio for Export

* Merge to main

* Update GUIDE with newer NGINX

* Update nginx config to include api/save route

* Enable Native PDF View for PDFS

* Enable Native PDF View for PDFS

* feat:lover packages test

* feat:working build

* feat:alpine build

* docs:api dockerfile docs

* Write a PDF.js wrapper to replace pspdfkit

* Revert changes for replication, set settings to have default mode

* build folder got removed due to gitignore on pdf

* Add Box shadow to pdf pages

* Add Toggle for Progress in PDFS, enabled native viewer toggle

* Update node version to LTS

* Update node version to LTS

* Fix Linting issues

* Fix Linting issues

* Make env variable nullable

* Add touchend listener for mobile

* Make changes to PDF for mobile

* fix(android): change serverUrl to selfhosted first

* feat:2 stage alpine content fetch

* feat:separated start script

* fix:changed to node 22

* Add back youtube functionality and add guide

* trigger build

* Fix cache issue on YouTube

* Allow empty AWS_S3_ENDPOINT

* Allow empty AWS_S3_ENDPOINT

* Add GCHR for all images

* Add GCHR For self hosting.

* Add GCHR For self hosting.

* Test prebuilt.

* Test prebuilt

* Test prebuilt...

* Fix web image

* Remove Web Image (For now)

* Move docker-compose to images

* Move docker-compose files to correct locations

* Remove the need for ARGS

* Update packages, and Typescript versions

* Fix

* Fix issues with build on Web

* Correct push

* Fix Linting issues

* Fix Trace import

* Add missing types

* Fix Tasks

* Add information into guide about self-build

* Fix issues with PDF Viewer

---------

Co-authored-by: keumky2 <keumky2@woowahan.com>
Co-authored-by: William Theaker <wtheaker@nvidia.com>
Co-authored-by: Russ Taylor <729694+russtaylor@users.noreply.github.com>
Co-authored-by: David Adams <david@dadams2.com>
Co-authored-by: Mike Baker <1426795+mbaker3@users.noreply.github.com>
Co-authored-by: m1xxos <66390094+m1xxos@users.noreply.github.com>
Co-authored-by: Adil <mr.adil777@gmail.com>
This commit is contained in:
Tom Rogers
2025-01-27 13:33:16 +01:00
committed by GitHub
parent 4cd5f2f02a
commit 4e582fb55d
339 changed files with 14859 additions and 11964 deletions

View File

@ -11,6 +11,7 @@
},
"plugins": ["@typescript-eslint"],
"rules": {
"semi": [2, "never"]
"semi": [2, "never"],
"@typescript-eslint/no-unnecessary-type-assertion": [0, "never"]
}
}

View File

@ -13,11 +13,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: 'Login to GitHub container registry'
uses: docker/login-action@v1
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{github.actor}}
@ -30,3 +30,4 @@ jobs:
run: |
docker build --file packages/content-fetch/Dockerfile . --tag "ghcr.io/omnivore-app/content-fetch:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/content-fetch:latest
docker push ghcr.io/omnivore-app/content-fetch:${GITHUB_SHA}

View File

@ -0,0 +1,64 @@
name: Build Self-Hosting Docker Images
on:
push:
branches:
- main
- self-host-updates
paths-ignore:
- 'apple/**'
- 'android/**'
jobs:
build-self-hostdocker-images:
name: Build self-host docker images
permissions:
contents: read
packages: write
attestations: write
id-token: write
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: 'Login to GitHub container registry'
uses: docker/login-action@v1
with:
registry: ghcr.io
username: ${{github.actor}}
password: ${{secrets.GITHUB_TOKEN}}
- name: Build the backend docker image
run: |
docker build . --file packages/api/Dockerfile --tag "ghcr.io/omnivore-app/sh-backend:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-backend:latest
docker push ghcr.io/omnivore-app/sh-backend:${GITHUB_SHA}
docker push ghcr.io/omnivore-app/sh-backend:latest
- name: Build the content-fetch docker image
run: |
docker build --file packages/content-fetch/Dockerfile . --tag "ghcr.io/omnivore-app/sh-content-fetch:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-content-fetch:latest
docker push ghcr.io/omnivore-app/sh-content-fetch:${GITHUB_SHA}
docker push ghcr.io/omnivore-app/sh-content-fetch:latest
- name: Build the queue-processor docker image.
run: |
docker build . --file packages/api/queue-processor/Dockerfile --tag "ghcr.io/omnivore-app/sh-queue-processor:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-queue-processor:latest
docker push ghcr.io/omnivore-app/sh-queue-processor:${GITHUB_SHA}
docker push ghcr.io/omnivore-app/sh-queue-processor:latest
- name: Build the migrate docker image
run: |
docker build --file packages/db/Dockerfile . --tag "ghcr.io/omnivore-app/sh-migrate:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-migrate:latest
docker push ghcr.io/omnivore-app/sh-migrate:${GITHUB_SHA}
docker push ghcr.io/omnivore-app/sh-migrate:latest
- name: Build the image-proxy docker image
run: |
cp imageproxy/start_imageproxy.sh .
chmod +x start_imageproxy.sh
docker build --file imageproxy/Dockerfile . --tag "ghcr.io/omnivore-app/sh-image-proxy:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-image-proxy:latest
docker push ghcr.io/omnivore-app/sh-image-proxy:${GITHUB_SHA}
docker push ghcr.io/omnivore-app/sh-image-proxy:latest
- name: Build the mail-watch-server docker image
run: |
docker build --file packages/local-mail-watcher/Dockerfile . --tag "ghcr.io/omnivore-app/sh-local-mail-watcher:${GITHUB_SHA}" --tag ghcr.io/omnivore-app/sh-local-mail-watcher:latest
docker push ghcr.io/omnivore-app/sh-local-mail-watcher:${GITHUB_SHA}
docker push ghcr.io/omnivore-app/sh-local-mail-watcher:latest

View File

@ -9,7 +9,7 @@ jobs:
lint_migrations:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v4
- name: Fetch main branch
run: git fetch origin main:main
- name: Find modified migrations

View File

@ -14,12 +14,12 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
repository: ${{ github.event.pull_request.head.repo.full_name }}
ref: ${{ github.event.pull_request.head.ref }}
- name: Setup Go
uses: actions/setup-go@v3
uses: actions/setup-go@v5
with:
go-version: 1.19
- name: Generate distiller output for readability
@ -34,7 +34,7 @@ jobs:
go-domdistiller file -i $f/source.html -o $f/distiller.html
done
- name: Setup Python
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: 3.9
- name: Generate static html

View File

@ -45,7 +45,7 @@ jobs:
ports:
- 6379
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Initialize the database
@ -55,9 +55,9 @@ jobs:
env:
PGPASSWORD: postgres
- name: Use Node.js ${{ matrix.node-version }}
uses: actions/setup-node@v2
uses: actions/setup-node@v4
with:
node-version: 18.16
node-version: 22.12.0
- name: Get yarn cache directory path
id: yarn-cache-dir-path
run: echo "::set-output name=dir::$(source ~/.nvm/nvm.sh && yarn cache dir)"
@ -102,7 +102,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Build the API docker image

View File

@ -26,13 +26,13 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
uses: actions/checkout@v4
- name: Setup Pages
uses: actions/configure-pages@v3
uses: actions/configure-pages@v5
- name: Upload artifact
uses: actions/upload-pages-artifact@v1
uses: actions/upload-pages-artifact@v3
with:
path: 'packages/readabilityjs/test'
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v1
uses: actions/deploy-pages@v4

View File

@ -1 +1 @@
18.16
22.12.0

View File

@ -151,24 +151,7 @@ is done fetching your content you will see it in your library.
## How to deploy to your own server
Omnivore was originally designed to be deployed on GCP and takes advantage
of some of GCP's PaaS features. We are working to make Omnivore more portable
so you can easily run the service on your own infrastructure. You can track
progress here: <https://github.com/omnivore-app/omnivore/issues/25>
To deploy Omnivore on your own hardware you will need to deploy three
dockerized services and configure access to a postgres service. To handle
PDF documents you will need to configure access to a Google Cloud Storage
bucket.
- `packages/api` - the backend API service
- `packages/web` - the web frontend (can easily be deployed to vercel)
- `packages/puppeteer-parse` - the content fetching service (can easily
be deployed as an AWS lambda or GCP Cloud Function)
Additionally, you will need to run our database migrations to initialize
your database. These are dockerized and can be run with the
`packages/db` service.
A guide for running a self hosted server can be found [here](./self-hosting/GUIDE.md)
## License

View File

@ -1,5 +1,12 @@
# Omnivore - Android
## Setup with gradle
In case you do not have Android Studio and you do not want to install it, you may want to use gradlew scripts to build the application.
`./gradlew assembleDebug` should create `./app/build/outputs/apk/debug/app-debug.apk`
## Setup
From the root directory run the following command:

View File

@ -10,6 +10,7 @@ import app.omnivore.omnivore.R
import app.omnivore.omnivore.core.data.DataService
import app.omnivore.omnivore.core.datastore.DatastoreRepository
import app.omnivore.omnivore.core.datastore.omnivoreAuthToken
import app.omnivore.omnivore.core.datastore.omnivoreSelfHostedApiServer
import app.omnivore.omnivore.graphql.generated.UpdatePageMutation
import app.omnivore.omnivore.graphql.generated.type.UpdatePageInput
import app.omnivore.omnivore.utils.Constants
@ -48,6 +49,12 @@ class EditInfoViewModel @Inject constructor(
datastoreRepo.getString(omnivoreAuthToken)
}
fun baseUrl() = runBlocking {
datastoreRepo.getString(omnivoreSelfHostedApiServer) ?: Constants.apiURL
}
private fun serverUrl() = "${baseUrl()}/api/graphql"
fun editInfo(itemId: String, title: String, author: String?, description: String?) {
viewModelScope.launch {
isLoading = true
@ -62,7 +69,7 @@ class EditInfoViewModel @Inject constructor(
}
val apolloClient = ApolloClient.Builder()
.serverUrl("${Constants.apiURL}/api/graphql")
.serverUrl(serverUrl())
.addHttpHeader("Authorization", value = authToken)
.build()

View File

@ -176,6 +176,12 @@ class OnboardingViewModel @Inject constructor(
resetPendingEmailUserCreds()
}
fun baseUrl() = runBlocking {
datastoreRepository.getString(omnivoreSelfHostedApiServer) ?: Constants.apiURL
}
private fun serverUrl() = "${baseUrl()}/api/graphql"
fun validateUsername(potentialUsername: String) {
validateUsernameJob?.cancel()
@ -209,7 +215,7 @@ class OnboardingViewModel @Inject constructor(
}
val apolloClient =
ApolloClient.Builder().serverUrl("${Constants.apiURL}/api/graphql").build()
ApolloClient.Builder().serverUrl(serverUrl()).build()
try {
val response = apolloClient.query(

View File

@ -16,6 +16,7 @@ import androidx.work.WorkerParameters
import app.omnivore.omnivore.R
import app.omnivore.omnivore.core.datastore.DatastoreRepository
import app.omnivore.omnivore.core.datastore.omnivoreAuthToken
import app.omnivore.omnivore.core.datastore.omnivoreSelfHostedApiServer
import app.omnivore.omnivore.graphql.generated.SaveUrlMutation
import app.omnivore.omnivore.graphql.generated.type.SaveUrlInput
import app.omnivore.omnivore.utils.Constants
@ -59,11 +60,16 @@ class SaveURLWorker @AssistedInject constructor(
}
}
suspend fun baseUrl() =
datastoreRepository.getString(omnivoreSelfHostedApiServer) ?: Constants.apiURL
private suspend fun serverUrl() = "${baseUrl()}/api/graphql"
private suspend fun saveURL(url: String): Boolean {
val authToken = datastoreRepository.getString(omnivoreAuthToken) ?: return false
val apolloClient = ApolloClient.Builder()
.serverUrl("${Constants.apiURL}/api/graphql")
.serverUrl(serverUrl())
.addHttpHeader("Authorization", value = authToken)
.build()

View File

@ -11,6 +11,7 @@ import androidx.lifecycle.viewModelScope
import app.omnivore.omnivore.R
import app.omnivore.omnivore.core.datastore.DatastoreRepository
import app.omnivore.omnivore.core.datastore.omnivoreAuthToken
import app.omnivore.omnivore.core.datastore.omnivoreSelfHostedApiServer
import app.omnivore.omnivore.graphql.generated.SaveUrlMutation
import app.omnivore.omnivore.graphql.generated.type.SaveUrlInput
import app.omnivore.omnivore.utils.Constants
@ -71,6 +72,12 @@ class SaveViewModel @Inject constructor(
return null
}
fun baseUrl() = runBlocking {
datastoreRepo.getString(omnivoreSelfHostedApiServer) ?: Constants.apiURL
}
private fun serverUrl() = "${baseUrl()}/api/graphql"
fun saveURL(url: String) {
viewModelScope.launch {
isLoading = true
@ -86,7 +93,7 @@ class SaveViewModel @Inject constructor(
}
val apolloClient = ApolloClient.Builder()
.serverUrl("${Constants.apiURL}/api/graphql")
.serverUrl(serverUrl())
.addHttpHeader("Authorization", value = authToken)
.build()

Binary file not shown.

After

Width:  |  Height:  |  Size: 165 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 86 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 452 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 543 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 312 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 202 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 207 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 63 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 48 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 120 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 260 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 160 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 140 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 282 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 302 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 305 KiB

View File

@ -1,4 +1,4 @@
FROM willnorris/imageproxy:v0.10.0 as build
FROM ghcr.io/willnorris/imageproxy:main as build
# Above imageproxy image is built from scratch image and is barebones
# Switching over to ubuntu base image to allow us to debug better.

View File

@ -33,7 +33,7 @@
"graphql-tag": "^2.11.0",
"lerna": "^7.4.1",
"prettier": "^2.5.1",
"typescript": "4.5.2"
"typescript": "5.7.3"
},
"volta": {
"node": "18.16.1",

View File

@ -4,13 +4,27 @@
"project": "tsconfig.json"
},
"rules": {
"@typescript-eslint/no-unsafe-argument": 0
"@typescript-eslint/no-unsafe-argument": 0,
"@typescript-eslint/no-unsafe-member-access": 0,
"@typescript-eslint/no-unnecessary-type-assertion": 0,
"@typescript-eslint/no-unsafe-assignment": 0,
"@typescript-eslint/no-unsafe-return": ["warn"],
"@typescript-eslint/no-unsafe-call" : ["warn"],
"@typescript-eslint/restrict-template-expressions": ["warn"],
"@typescript-eslint/no-misused-promises": ["warn"],
"@typescript-eslint/ban-ts-comment": ["warn"],
"@typescript-eslint/no-unused-vars": 0
},
"overrides": [
{
"files": ["test/**/*.ts"],
"rules": {
"@typescript-eslint/no-unsafe-member-access": 0
"@typescript-eslint/no-unsafe-member-access": 0,
"@typescript-eslint/no-unnecessary-type-assertion": 0,
"@typescript-eslint/no-unsafe-assignment": 0,
"@typescript-eslint/no-unsafe-return": ["warn"],
"@typescript-eslint/no-unsafe-call" : ["warn"],
"@typescript-eslint/no-unused-vars": 0
}
}
]

View File

@ -1,51 +1,37 @@
FROM node:18.16 as builder
FROM node:22.12 AS builder
WORKDIR /app
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
RUN apt-get update && apt-get install -y g++ make python3
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true
COPY package.json .
COPY yarn.lock .
COPY tsconfig.json .
COPY .prettierrc .
COPY .eslintrc .
RUN apt-get update && apt-get install -y g++ make python3 && apt-get clean && rm -rf /var/lib/apt/lists/*
COPY /packages/readabilityjs/package.json ./packages/readabilityjs/package.json
COPY /packages/api/package.json ./packages/api/package.json
COPY /packages/text-to-speech/package.json ./packages/text-to-speech/package.json
COPY /packages/content-handler/package.json ./packages/content-handler/package.json
COPY /packages/liqe/package.json ./packages/liqe/package.json
COPY /packages/utils/package.json ./packages/utils/package.json
COPY package.json yarn.lock tsconfig.json .prettierrc .eslintrc ./
COPY packages ./packages
RUN yarn install --pure-lockfile
# Remove all except needed packages
RUN find packages -mindepth 1 -type d \
! -regex '^packages/\(api\|readabilityjs\|text-to-speech\|content-handler\|liqe\|utils\)\(/.*\)?' \
-exec rm -rf {} +
ADD /packages/readabilityjs ./packages/readabilityjs
ADD /packages/api ./packages/api
ADD /packages/text-to-speech ./packages/text-to-speech
ADD /packages/content-handler ./packages/content-handler
ADD /packages/liqe ./packages/liqe
ADD /packages/utils ./packages/utils
RUN yarn install --pure-lockfile && \
yarn workspace @omnivore/utils build && \
yarn workspace @omnivore/text-to-speech-handler build && \
yarn workspace @omnivore/content-handler build && \
yarn workspace @omnivore/liqe build && \
yarn workspace @omnivore/api build && \
rm -rf /app/packages/api/node_modules /app/node_modules && \
yarn install --pure-lockfile --production
RUN yarn workspace @omnivore/utils build
RUN yarn workspace @omnivore/text-to-speech-handler build
RUN yarn workspace @omnivore/content-handler build
RUN yarn workspace @omnivore/liqe build
RUN yarn workspace @omnivore/api build
# After building, fetch the production dependencies
RUN rm -rf /app/packages/api/node_modules
RUN rm -rf /app/node_modules
RUN yarn install --pure-lockfile --production
FROM node:18.16 as runner
FROM node:22.12-alpine AS runner
LABEL org.opencontainers.image.source="https://github.com/omnivore-app/omnivore"
RUN apt-get update && apt-get install -y netcat-openbsd
RUN apk update && apk add netcat-openbsd && rm -rf /var/cache/apk/*
WORKDIR /app
ENV NODE_ENV production
ENV NODE_ENV=production
ENV NODE_OPTIONS=--max-old-space-size=4096
ENV PORT=8080
@ -59,6 +45,7 @@ COPY --from=builder /app/packages/text-to-speech/ /app/packages/text-to-speech/
COPY --from=builder /app/packages/content-handler/ /app/packages/content-handler/
COPY --from=builder /app/packages/liqe/ /app/packages/liqe/
COPY --from=builder /app/packages/utils/ /app/packages/utils/
EXPOSE 8080
CMD ["yarn", "workspace", "@omnivore/api", "start"]

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine
FROM node:22.12-alpine
WORKDIR /app

View File

@ -120,11 +120,15 @@
"voca": "^1.4.0",
"winston": "^3.3.3",
"yaml": "^2.4.1",
"youtubei": "^1.5.4"
"youtubei": "^1.5.4",
"@aws-sdk/client-s3": "^3.679.0",
"@aws-sdk/s3-request-presigner": "^3.679.0",
"@aws-sdk/lib-storage": "^3.679.0"
},
"devDependencies": {
"@istanbuljs/nyc-config-typescript": "^1.0.2",
"@types/addressparser": "^1.0.1",
"@types/lodash": "^4.17.14",
"@types/analytics-node": "^3.1.7",
"@types/archiver": "^6.0.2",
"@types/bcryptjs": "^2.4.2",
@ -168,11 +172,10 @@
"postgrator": "^4.2.0",
"sinon": "^14.0.0",
"sinon-chai": "^3.7.0",
"ts-node-dev": "^1.1.8"
},
"engines": {
"node": "18.16.1"
"ts-node-dev": "^1.1.8",
"typescript": "5.7.3"
},
"volta": {
"extends": "../../package.json"
}

View File

@ -0,0 +1,61 @@
FROM node:22.12 as builder
WORKDIR /app
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
RUN apt-get update && apt-get install -y g++ make python3
COPY package.json .
COPY yarn.lock .
COPY tsconfig.json .
COPY .prettierrc .
COPY .eslintrc .
COPY /packages/readabilityjs/package.json ./packages/readabilityjs/package.json
COPY /packages/api/package.json ./packages/api/package.json
COPY /packages/text-to-speech/package.json ./packages/text-to-speech/package.json
COPY /packages/content-handler/package.json ./packages/content-handler/package.json
COPY /packages/liqe/package.json ./packages/liqe/package.json
COPY /packages/utils/package.json ./packages/utils/package.json
RUN yarn install --pure-lockfile
ADD /packages/readabilityjs ./packages/readabilityjs
ADD /packages/api ./packages/api
ADD /packages/text-to-speech ./packages/text-to-speech
ADD /packages/content-handler ./packages/content-handler
ADD /packages/liqe ./packages/liqe
ADD /packages/utils ./packages/utils
RUN yarn workspace @omnivore/utils build
RUN yarn workspace @omnivore/text-to-speech-handler build
RUN yarn workspace @omnivore/content-handler build
RUN yarn workspace @omnivore/liqe build
RUN yarn workspace @omnivore/api build
# After building, fetch the production dependencies
RUN rm -rf /app/packages/api/node_modules
RUN rm -rf /app/node_modules
RUN yarn install --pure-lockfile --production
FROM node:22.12 as runner
LABEL org.opencontainers.image.source="https://github.com/omnivore-app/omnivore"
RUN apt-get update && apt-get install -y netcat-openbsd
WORKDIR /app
ENV NODE_ENV production
COPY --from=builder /app/packages/api/dist /app/packages/api/dist
COPY --from=builder /app/packages/readabilityjs/ /app/packages/readabilityjs/
COPY --from=builder /app/packages/api/package.json /app/packages/api/package.json
COPY --from=builder /app/packages/api/node_modules /app/packages/api/node_modules
COPY --from=builder /app/node_modules /app/node_modules
COPY --from=builder /app/package.json /app/package.json
COPY --from=builder /app/packages/text-to-speech/ /app/packages/text-to-speech/
COPY --from=builder /app/packages/content-handler/ /app/packages/content-handler/
COPY --from=builder /app/packages/liqe/ /app/packages/liqe/
COPY --from=builder /app/packages/utils/ /app/packages/utils/
CMD ["yarn", "workspace", "@omnivore/api", "start_queue_processor"]

View File

@ -23,6 +23,7 @@ export const appDataSource = new DataSource({
max: env.pg.pool.max,
idleTimeoutMillis: 10000, // 10 seconds
},
replication: env.pg.replication
? {
master: {
@ -42,5 +43,15 @@ export const appDataSource = new DataSource({
},
],
}
: undefined,
: {
defaultMode: 'master',
master: {
host: env.pg.host,
port: env.pg.port,
username: env.pg.userName,
password: env.pg.password,
database: env.pg.dbName,
},
slaves: [],
},
})

View File

@ -73,7 +73,8 @@ import { CACHED_READING_POSITION_PREFIX } from './services/cached_reading_positi
import { logger } from './utils/logger'
import { getQueue } from './queue-processor'
export const EXPORT_QUEUE_NAME = 'omnivore-export-queue'
export const EXPORT_QUEUE_NAME =
process.env['EXPORT_QUEUE_NAME'] ?? 'omnivore-export-queue'
export const createWorker = (connection: ConnectionOptions) =>
new Worker(

View File

@ -1,10 +1,6 @@
import archiver, { Archiver } from 'archiver'
import { v4 as uuidv4 } from 'uuid'
import {
ContentReaderType,
LibraryItem,
LibraryItemState,
} from '../entity/library_item'
import { LibraryItem, LibraryItemState } from '../entity/library_item'
import { TaskState } from '../generated/graphql'
import { findExportById, saveExport } from '../services/export'
import { findHighlightsByLibraryItemId } from '../services/highlights'
@ -17,12 +13,11 @@ import { sendExportJobEmail } from '../services/send_emails'
import { findActiveUser } from '../services/user'
import { logger } from '../utils/logger'
import { highlightToMarkdown } from '../utils/parser'
import {
contentFilePath,
createGCSFile,
generateUploadFilePathName,
} from '../utils/uploads'
import { batch } from 'googleapis/build/src/apis/batch'
import { env } from '../env'
import { storage } from '../repository/storage/storage'
import { File } from '../repository/storage/StorageClient'
import { Readable } from 'stream'
import { contentFilePath, generateUploadFilePathName } from '../utils/uploads'
import { getRepository } from '../repository'
import { UploadFile } from '../entity/upload_file'
@ -31,6 +26,12 @@ export interface ExportJobData {
exportId: string
}
const bucketName = env.fileUpload.gcsUploadBucket
const createGCSFile = (filename: string): File => {
return storage.createFile(bucketName, filename)
}
export const EXPORT_JOB_NAME = 'export'
const itemStateMappping = (state: LibraryItemState) => {
@ -61,7 +62,7 @@ const uploadContent = async (
const file = createGCSFile(filePath)
// check if file is already uploaded
const [exists] = await file.exists()
const exists = await file.exists()
if (!exists) {
logger.info(`File not found: ${filePath}`)
@ -81,10 +82,14 @@ const uploadContent = async (
contentType: 'text/html',
private: true,
})
archive.append(Readable.from(item.readableContent), {
name: `content/${libraryItem.slug}.html`,
})
}
// append the existing file to the archive
archive.append(file.createReadStream(), {
const content = await file.download()
archive.append(Readable.from(content.toString()), {
name: `content/${libraryItem.slug}.html`,
})
}
@ -97,17 +102,19 @@ const uploadPdfContent = async (
id: libraryItem.uploadFileId,
})
if (!upload || !upload.fileName) {
console.log(`upload does not have a filename: ${upload}`)
console.log(
`upload does not have a filename: ${upload?.fileName ?? 'empty'}`
)
return
}
const filePath = generateUploadFilePathName(upload.id, upload.fileName)
const file = createGCSFile(filePath)
const [exists] = await file.exists()
const exists = await file.exists()
if (exists) {
console.log(`adding PDF file: ${filePath}`)
// append the existing file to the archive
archive.append(file.createReadStream(), {
archive.append(await file.download(), {
name: `content/${libraryItem.slug}.pdf`,
})
}
@ -238,9 +245,18 @@ export const exportJob = async (jobData: ExportJobData) => {
// Create a write stream
const writeStream = file.createWriteStream({
metadata: {
contentType: 'application/zip',
},
contentType: 'application/zip',
})
const finishedPromise = new Promise<void>((resolve, reject) => {
if (writeStream.closed) {
resolve()
}
writeStream.on('finish', () => {
logger.info('File successfully written to GCS')
resolve()
})
writeStream.on('error', reject)
})
// Handle any errors in the streams
@ -248,10 +264,6 @@ export const exportJob = async (jobData: ExportJobData) => {
logger.error('Error writing to GCS:', err)
})
writeStream.on('finish', () => {
logger.info('File successfully written to GCS')
})
// Initialize archiver for zipping files
const archive = archiver('zip', {
zlib: { level: 9 }, // Compression level
@ -264,7 +276,6 @@ export const exportJob = async (jobData: ExportJobData) => {
// Pipe the archiver output to the write stream
archive.pipe(writeStream)
let cursor = 0
try {
// fetch data from the database
@ -305,17 +316,14 @@ export const exportJob = async (jobData: ExportJobData) => {
}
// Ensure that the writeStream has finished
await new Promise((resolve, reject) => {
writeStream.on('finish', resolve)
writeStream.on('error', reject)
})
await finishedPromise
logger.info(`export completed, exported ${cursor} items`, {
userId,
})
// generate a temporary signed url for the zip file
const [signedUrl] = await file.getSignedUrl({
const signedUrl = await storage.signedUrl(bucketName, fullPath, {
action: 'read',
expires: Date.now() + 168 * 60 * 60 * 1000, // one week
})

View File

@ -281,26 +281,34 @@ export const processYouTubeVideo = async (
updatedLibraryItem.publishedAt = new Date(video.uploadDate)
}
// if ('getTranscript' in video && duration > 0 && duration < 1801) {
// // If the video has a transcript available, put a placehold in and
// // enqueue a job to process the full transcript
// const updatedContent = await addTranscriptToReadableContent(
// libraryItem.originalUrl,
// libraryItem.readableContent,
// TRANSCRIPT_PLACEHOLDER_TEXT
// )
if (
'getTranscript' in video &&
duration > 0 &&
duration <
Number(
process.env['YOUTUBE_MAXIMUM_VIDEO_DURATION_TRANSCRIPT'] ?? 1801
) &&
process.env['OPENAI_API_KEY']
) {
// If the video has a transcript available, put a placehold in and
// enqueue a job to process the full transcript
const updatedContent = await addTranscriptToReadableContent(
libraryItem.originalUrl,
libraryItem.readableContent,
TRANSCRIPT_PLACEHOLDER_TEXT
)
// if (updatedContent) {
// updatedLibraryItem.readableContent = updatedContent
// }
if (updatedContent) {
updatedLibraryItem.readableContent = updatedContent
}
// await enqueueProcessYouTubeTranscript({
// videoId,
// ...jobData,
// })
// }
await enqueueProcessYouTubeTranscript({
videoId,
...jobData,
})
}
if (updatedLibraryItem !== {}) {
if (Object.keys(updatedLibraryItem).length > 0) {
await updateLibraryItem(
jobData.libraryItemId,
updatedLibraryItem,

View File

@ -56,7 +56,10 @@ import {
PROCESS_YOUTUBE_VIDEO_JOB_NAME,
} from './jobs/process-youtube-video'
import { pruneTrashJob, PRUNE_TRASH_JOB } from './jobs/prune_trash'
import { refreshAllFeeds } from './jobs/rss/refreshAllFeeds'
import {
REFRESH_ALL_FEEDS_JOB_NAME,
refreshAllFeeds,
} from './jobs/rss/refreshAllFeeds'
import { refreshFeed } from './jobs/rss/refreshFeed'
import { savePageJob } from './jobs/save_page'
import {
@ -159,25 +162,25 @@ export const createWorker = (connection: ConnectionOptions) =>
async (job: Job) => {
const executeJob = async (job: Job) => {
switch (job.name) {
// case 'refresh-all-feeds': {
// const queue = await getQueue()
// const counts = await queue?.getJobCounts('prioritized')
// if (counts && counts.wait > 1000) {
// return
// }
// return await refreshAllFeeds(appDataSource)
// }
// case 'refresh-feed': {
// return await refreshFeed(job.data)
// }
case 'refresh-all-feeds': {
const queue = await getQueue()
const counts = await queue?.getJobCounts('prioritized')
if (counts && counts.wait > 1000) {
return
}
return await refreshAllFeeds(appDataSource)
}
case 'refresh-feed': {
return await refreshFeed(job.data)
}
case 'save-page': {
return savePageJob(job.data, job.attemptsMade)
}
// case 'update-pdf-content': {
// return updatePDFContentJob(job.data)
// }
// case THUMBNAIL_JOB:
// return findThumbnail(job.data)
case 'update-pdf-content': {
return updatePDFContentJob(job.data)
}
case THUMBNAIL_JOB:
return findThumbnail(job.data)
case TRIGGER_RULE_JOB_NAME:
return triggerRule(job.data)
case UPDATE_LABELS_JOB:
@ -194,10 +197,13 @@ export const createWorker = (connection: ConnectionOptions) =>
return exportItem(job.data)
// case AI_SUMMARIZE_JOB_NAME:
// return aiSummarize(job.data)
// case PROCESS_YOUTUBE_VIDEO_JOB_NAME:
// return processYouTubeVideo(job.data)
// case PROCESS_YOUTUBE_TRANSCRIPT_JOB_NAME:
// return processYouTubeTranscript(job.data)
case PROCESS_YOUTUBE_VIDEO_JOB_NAME:
return processYouTubeVideo(job.data)
case PROCESS_YOUTUBE_TRANSCRIPT_JOB_NAME:
if (process.env['OPENAI_API_KEY']) {
return processYouTubeTranscript(job.data)
}
break
case EXPORT_ALL_ITEMS_JOB_NAME:
return exportAllItems(job.data)
case SEND_EMAIL_JOB:
@ -218,8 +224,8 @@ export const createWorker = (connection: ConnectionOptions) =>
// return updateHome(job.data)
// case SCORE_LIBRARY_ITEM_JOB:
// return scoreLibraryItem(job.data)
// case GENERATE_PREVIEW_CONTENT_JOB:
// return generatePreviewContent(job.data)
case GENERATE_PREVIEW_CONTENT_JOB:
return generatePreviewContent(job.data)
case PRUNE_TRASH_JOB:
return pruneTrashJob(job.data)
case EXPIRE_FOLDERS_JOB_NAME:
@ -260,6 +266,17 @@ const setupCronJobs = async () => {
},
}
)
await queue.add(
REFRESH_ALL_FEEDS_JOB_NAME,
{},
{
priority: getJobPriority(REFRESH_ALL_FEEDS_JOB_NAME),
repeat: {
every: 14_400_000, // 4 Hours
},
}
)
}
const main = async () => {

View File

@ -0,0 +1,88 @@
import {
SignedUrlParameters,
StorageClient,
File,
SaveOptions,
SaveData,
} from './StorageClient'
import { Storage, File as GCSFile } from '@google-cloud/storage'
export class GcsStorageClient implements StorageClient {
private storage: Storage
constructor(keyFilename: string | undefined) {
this.storage = new Storage({
keyFilename,
})
}
private convertFileToGeneric(gcsFile: GCSFile): File {
return {
isPublic: async () => {
const [isPublic] = await gcsFile.isPublic()
return isPublic
},
exists: async () => (await gcsFile.exists())[0],
download: async () => (await gcsFile.download())[0],
bucket: gcsFile.bucket.name,
publicUrl: () => gcsFile.publicUrl(),
getMetadataMd5: async () => {
const [metadata] = await gcsFile.getMetadata()
return metadata.md5Hash
},
createWriteStream: (saveOptions: SaveOptions) =>
gcsFile.createWriteStream({
metadata: { contentType: saveOptions.contentType },
}),
save: (saveData: SaveData, saveOptions: SaveOptions) =>
gcsFile.save(saveData, saveOptions),
key: gcsFile.name,
}
}
downloadFile(bucket: string, filePath: string): Promise<File> {
const file = this.storage.bucket(bucket).file(filePath)
return Promise.resolve(this.convertFileToGeneric(file))
}
createFile(bucket: string, filePath: string): File {
return this.convertFileToGeneric(this.storage.bucket(bucket).file(filePath))
}
async getFilesFromPrefix(bucket: string, prefix: string): Promise<File[]> {
const [filesWithPrefix] = await this.storage
.bucket(bucket)
.getFiles({ prefix })
return filesWithPrefix.map((it: GCSFile) => this.convertFileToGeneric(it))
}
async signedUrl(
bucket: string,
filePath: string,
options: SignedUrlParameters
): Promise<string> {
const [url] = await this.storage
.bucket(bucket)
.file(filePath)
.getSignedUrl({ ...options, version: 'v4' })
return url
}
upload(
bucket: string,
filePath: string,
data: Buffer,
options: {
contentType?: string
public?: boolean
timeout?: number
}
): Promise<void> {
return this.storage
.bucket(bucket)
.file(filePath)
.save(data, { timeout: 30000, ...options })
}
}

View File

@ -0,0 +1,261 @@
import {
SignedUrlParameters,
StorageClient,
File,
SaveOptions,
SaveData,
} from './StorageClient'
import { Upload } from '@aws-sdk/lib-storage'
import {
GetObjectCommand,
GetObjectCommandOutput,
S3Client,
ListObjectsV2Command,
PutObjectCommand,
HeadObjectCommand,
S3ServiceException,
} from '@aws-sdk/client-s3'
import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
import { Readable } from 'stream'
import * as stream from 'node:stream'
// While this is listed as S3, for self hosting we will use MinIO, which is
// S3 Compatible.
export class S3StorageClient implements StorageClient {
BlankFile = class implements File {
bucket: string
key: string
s3Client: S3StorageClient
downloadedFile: File | undefined
constructor(s3StorageClass: S3StorageClient, bucket: string, file: string) {
this.bucket = bucket
this.key = file
this.s3Client = s3StorageClass
}
isPublic() {
return Promise.resolve(true)
}
publicUrl() {
return `${this.s3Client.localUrl ?? ''}/${this.bucket}/${this.key}`
}
async download(): Promise<Buffer> {
this.downloadedFile = await this.s3Client.downloadFile(
this.bucket,
this.key
)
return this.downloadedFile.download()
}
async exists() {
try {
await this.s3Client.s3Client.send(
new HeadObjectCommand({
Bucket: this.bucket,
Key: this.key,
})
)
return true
} catch (e) {
if (
e instanceof S3ServiceException &&
e.$metadata.httpStatusCode == 404
) {
return false
}
throw e
}
}
save(saveData: SaveData, saveOptions: SaveOptions): Promise<void> {
return this.s3Client.upload(this.bucket, this.key, saveData, saveOptions)
}
createWriteStream(saveOptions: SaveOptions) {
return this.s3Client.createS3UploadStream(
this.bucket,
this.key,
saveOptions
)
}
getMetadataMd5() {
return this.downloadedFile?.getMetadataMd5() || Promise.resolve('')
}
}
private s3Client: S3Client
private signingS3Client: S3Client
private urlOverride: string | undefined
private localUrl: string | undefined
constructor(localUrl: string | undefined, urlOverride: string | undefined) {
this.localUrl = localUrl
this.urlOverride = urlOverride
this.s3Client = new S3Client({
forcePathStyle: true,
endpoint: urlOverride,
})
this.signingS3Client = new S3Client({
forcePathStyle: true,
endpoint: localUrl,
})
}
private createS3UploadStream = (
bucket: string,
key: string,
saveOptions: SaveOptions
) => {
const passThroughStream = new stream.PassThrough()
const upload = new Upload({
client: this.s3Client,
params: {
Bucket: bucket,
Key: key,
Body: passThroughStream,
ContentType: saveOptions.contentType,
},
})
void upload.done().then((res) => {
console.log(`Successfully Uploaded File ${res.Key ?? ''}`)
})
return passThroughStream
}
private convertFileToGeneric(
s3File: GetObjectCommandOutput,
bucket: string,
key: string
): File {
return {
exists: () => {
return Promise.resolve(s3File.$metadata.httpStatusCode == 200)
},
save: async () => Promise.resolve(),
isPublic: async () => Promise.resolve(true),
download: async () => this.getFileFromReadable(s3File.Body as Readable),
getMetadataMd5: () => Promise.resolve(s3File.ETag),
createWriteStream: (saveOptions: SaveOptions) =>
this.createS3UploadStream(bucket, key, saveOptions),
publicUrl: () => `${this.localUrl ?? ''}/${bucket}/${key}`,
bucket,
key,
}
}
private getFileFromReadable(stream: Readable): Promise<Buffer> {
return new Promise<Buffer>((resolve, reject) => {
const chunks: Buffer[] = []
stream.on('data', (chunk) => chunks.push(chunk))
stream.once('end', () => resolve(Buffer.concat(chunks)))
stream.once('error', reject)
})
}
async downloadFile(bucket: string, filePath: string): Promise<File> {
const s3File = await this.s3Client.send(
new GetObjectCommand({
Bucket: bucket,
Key: filePath, // path to the file you want to download,
})
)
return this.convertFileToGeneric(s3File, bucket, filePath)
}
createFile(bucket: string, filePath: string): File {
return new this.BlankFile(this, bucket, filePath) as unknown as File
}
async getFilesFromPrefix(bucket: string, prefix: string): Promise<File[]> {
const s3PrefixedFiles = await this.s3Client.send(
new ListObjectsV2Command({
Bucket: bucket,
Prefix: prefix, // path to the file you want to download,
})
)
const prefixKeys = s3PrefixedFiles.CommonPrefixes || []
return prefixKeys
.map(({ Prefix }) => Prefix)
.map((key: string | undefined) => {
return {
key: key || '',
exists: () => Promise.resolve(true),
isPublic: async () => Promise.resolve(true),
download: async () => {
const s3File = await this.s3Client.send(
new GetObjectCommand({
Bucket: bucket,
Key: key, // path to the file you want to download,
})
)
return this.getFileFromReadable(s3File.Body as Readable)
},
save: () => Promise.resolve(),
createWriteStream: (saveOptions: SaveOptions) =>
new stream.PassThrough(),
getMetadataMd5: () => Promise.resolve(key),
bucket: bucket,
publicUrl: () => `${this.localUrl ?? ''}/${bucket}/${key ?? ''}`,
}
})
}
async signedUrl(
bucket: string,
filePath: string,
options: SignedUrlParameters
): Promise<string> {
const command =
options.action == 'read'
? new GetObjectCommand({
Bucket: bucket,
Key: filePath, // path to the file you want to download,
})
: new PutObjectCommand({
Bucket: bucket,
Key: filePath, // path to the file you want to download,
})
// eslint-disable-next-line @typescript-eslint/no-unsafe-call
const url = await getSignedUrl(this.signingS3Client, command, {
expiresIn: 900,
})
return url
}
async upload(
bucket: string,
filePath: string,
data: SaveData,
options: {
contentType?: string
public?: boolean
timeout?: number
}
): Promise<void> {
await this.s3Client.send(
new PutObjectCommand({
Bucket: bucket,
Key: filePath,
Body: data.toString(),
ContentType: options.contentType,
})
)
}
}

View File

@ -0,0 +1,49 @@
import { PipelineSource, Writable } from 'stream'
export type SignedUrlParameters = {
action: 'read' | 'write' | 'delete' | 'resumable'
expires: number
}
export type SaveData = string | Buffer | PipelineSource<string | Buffer>
export type SaveOptions = {
contentType?: string
gzip?: string | boolean
resumable?: boolean
timeout?: number
validation?: string | boolean
private?: boolean | undefined
}
export type File = {
isPublic: () => Promise<boolean>
publicUrl: () => string
download: () => Promise<Buffer>
exists: () => Promise<boolean>
save: (saveData: SaveData, saveOptions: SaveOptions) => Promise<void>
createWriteStream: (saveOptions: SaveOptions) => Writable
getMetadataMd5: () => Promise<string | undefined>
bucket: string
key: string
}
export interface StorageClient {
downloadFile(bucket: string, filePath: string): Promise<File>
createFile(bucket: string, filePath: string): File
getFilesFromPrefix(bucket: string, filePrefix: string): Promise<File[]>
upload(
bucket: string,
filePath: string,
data: Buffer,
options: { contentType?: string; public?: boolean; timeout?: number }
): Promise<void>
signedUrl(
bucket: string,
filePath: string,
options: SignedUrlParameters
): Promise<string>
}

View File

@ -0,0 +1,10 @@
import { env } from '../../env'
import { S3StorageClient } from './S3StorageClient'
import { GcsStorageClient } from './GcsStorageClient'
export const storage = env.fileUpload.useLocalStorage
? new S3StorageClient(
env.fileUpload.localMinioUrl,
env.fileUpload.internalMinioUrl
)
: new GcsStorageClient(env.fileUpload?.gcsUploadSAKeyFilePath ?? undefined)

View File

@ -510,12 +510,12 @@ export const saveArticleReadingProgressResolver = authorized<
}
}
if (env.redis.cache && env.redis.mq) {
if (force) {
// clear any cached values.
await clearCachedReadingPosition(uid, id)
}
if (env.redis.cache && force) {
// clear any cached values.
await clearCachedReadingPosition(uid, id)
}
if (env.redis.cache && env.redis.mq && !force) {
// If redis caching and queueing are available we delay this write
const updatedProgress =
await dataSources.readingProgress.updateReadingProgress(uid, id, {

View File

@ -31,7 +31,7 @@ async function fetchApplePublicKey(kid: string): Promise<string | null> {
try {
const key: jwksClient.SigningKey = await new Promise((resolve, reject) => {
client.getSigningKey(kid, (error, result) => {
if (error) {
if (error || result === undefined) {
return reject(error)
}
return resolve(result)

View File

@ -29,6 +29,7 @@ import {
import { analytics } from '../../utils/analytics'
import {
comparePassword,
generateVerificationToken,
hashPassword,
setAuthInCookie,
verifyToken,
@ -544,7 +545,7 @@ export function authRouter() {
try {
// hash password
const hashedPassword = await hashPassword(password)
await createUser({
const [user] = await createUser({
email: trimmedEmail,
provider: 'EMAIL',
sourceUserId: trimmedEmail,
@ -553,12 +554,17 @@ export function authRouter() {
pictureUrl,
bio,
password: hashedPassword,
pendingConfirmation: true,
pendingConfirmation: !env.dev.autoVerify,
})
res.redirect(
`${env.client.url}/auth/verify-email?message=SIGNUP_SUCCESS`
)
if (env.dev.autoVerify) {
const token = await generateVerificationToken({ id: user.id })
res.redirect(`${env.client.url}/auth/confirm-email/${token}`)
} else {
res.redirect(
`${env.client.url}/auth/verify-email?message=SIGNUP_SUCCESS`
)
}
} catch (e) {
logger.info('email-signup exception:', e)
if (isErrorWithCode(e)) {

View File

@ -13,6 +13,7 @@ import {
} from '../auth_types'
import { decodeGoogleToken } from '../google_auth'
import { createPendingUserToken, suggestedUsername } from '../jwt_helpers'
import { env } from '../../../env'
export async function createMobileSignUpResponse(
isAndroid: boolean,
@ -66,7 +67,7 @@ export async function createMobileEmailSignUpResponse(
name: name.trim(),
username: username.trim().toLowerCase(),
password: hashedPassword,
pendingConfirmation: true,
pendingConfirmation: !env.dev.autoVerify,
})
return {

View File

@ -64,6 +64,7 @@ export const createApp = (): Express => {
app.use(cookieParser())
app.use(json({ limit: '100mb' }))
app.use(urlencoded({ limit: '100mb', extended: true }))
// @ts-ignore
app.use(compression())
// set to true if behind a reverse proxy/load balancer

View File

@ -1,4 +1,4 @@
import { diff_match_patch } from 'diff-match-patch'
import { diff_match_patch, patch_obj } from 'diff-match-patch'
import { DeepPartial, In } from 'typeorm'
import { QueryDeepPartialEntity } from 'typeorm/query-builder/QueryPartialEntity'
import { EntityLabel } from '../entity/entity_label'
@ -14,7 +14,7 @@ import { deepDelete } from '../utils/helpers'
import { ItemEvent } from './library_item'
const columnsToDelete = ['user', 'sharedAt', 'libraryItem'] as const
type ColumnsToDeleteType = typeof columnsToDelete[number]
type ColumnsToDeleteType = (typeof columnsToDelete)[number]
export type HighlightEvent = Merge<
Omit<DeepPartial<Highlight>, ColumnsToDeleteType>,
EntityEvent
@ -40,7 +40,7 @@ export const batchGetHighlightsFromLibraryItemIds = async (
export const getHighlightLocation = (patch: string): number | undefined => {
const dmp = new diff_match_patch()
const patches = dmp.patch_fromText(patch)
const patches = dmp.patch_fromText(patch) as unknown as patch_obj[]
return patches[0].start1 || undefined
}

View File

@ -16,7 +16,7 @@ import { deepDelete } from '../utils/helpers'
import { findLibraryItemIdsByLabelId, ItemEvent } from './library_item'
const columnsToDelete = ['description', 'createdAt'] as const
type ColumnsToDeleteType = typeof columnsToDelete[number]
type ColumnsToDeleteType = (typeof columnsToDelete)[number]
export type LabelEvent = Merge<
Omit<DeepPartial<Label>, ColumnsToDeleteType>,
EntityEvent

View File

@ -52,7 +52,7 @@ const columnsToDelete = [
'readableContent',
'feedContent',
] as const
type ColumnsToDeleteType = typeof columnsToDelete[number]
type ColumnsToDeleteType = (typeof columnsToDelete)[number]
type ItemBaseEvent = Merge<
Omit<DeepPartial<LibraryItem>, ColumnsToDeleteType>,
{

View File

@ -113,7 +113,9 @@ const createRandomEmailAddress = (userName: string, length: number): string => {
when rand is sdfsdf-: jacksonh-sdfsdf-e@inbox.omnivore.app
when rand is abcdef: jacksonh-abcdefe@inbox.omnivore.app
*/
return `${userName}-${nanoid(length)}e@${inbox}.omnivore.app`
return `${userName}-${nanoid(length)}e@${
env.email.domain || `@${inbox}.omnivore.app`
}`
}
export const findNewsletterEmailById = async (

View File

@ -137,7 +137,7 @@ export const uploadFile = async (
itemType,
uploadFile: { id: uploadFileData.id },
slug: generateSlug(uploadFilePathName),
state: LibraryItemState.Processing,
state: LibraryItemState.Succeeded,
contentReader: contentReaderForLibraryItem(itemType, uploadFileId),
},
uid

View File

@ -9,7 +9,6 @@ import { NodeTracerProvider } from '@opentelemetry/node'
import { BatchSpanProcessor } from '@opentelemetry/tracing'
import { EventEmitter } from 'events'
import { GraphQLInstrumentation } from '@opentelemetry/instrumentation-graphql'
import { setSpan } from '@opentelemetry/api/build/src/trace/context-utils'
const provider: NodeTracerProvider = new NodeTracerProvider()
@ -46,6 +45,8 @@ if (
}
if (exporter !== undefined) {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
provider.addSpanProcessor(new BatchSpanProcessor(exporter))
console.info('tracing initialized')
}
@ -78,7 +79,7 @@ export async function traceAs<A>(
const childSpan = async (): Promise<A> => {
const span = tracer.startSpan(spanName, { attributes })
const result = await api.context.with(
setSpan(api.context.active(), span),
api.trace.setSpan(api.context.active(), span),
fn
)
span.end()

View File

@ -73,6 +73,7 @@ export interface BackendEnv {
}
dev: {
isLocal: boolean
autoVerify: boolean
}
queue: {
location: string
@ -94,6 +95,12 @@ export interface BackendEnv {
gcsUploadSAKeyFilePath: string
gcsUploadPrivateBucket: string
dailyUploadLimit: number
useLocalStorage: boolean
localMinioUrl: string
internalMinioUrl: string
}
email: {
domain: string
}
sender: {
message: string
@ -197,10 +204,15 @@ const nullableEnvVars = [
'PG_REPLICA_USER',
'PG_REPLICA_PASSWORD',
'PG_REPLICA_DB',
'AUTO_VERIFY',
'INTERCOM_WEB_SECRET',
'INTERCOM_IOS_SECRET',
'INTERCOM_ANDROID_SECRET',
'EXPORT_TASK_HANDLER_URL',
'LOCAL_MINIO_URL',
'GCS_USE_LOCAL_HOST',
'LOCAL_EMAIL_DOMAIN',
'AWS_S3_ENDPOINT_URL',
] // Allow some vars to be null/empty
const envParser =
@ -240,6 +252,7 @@ export function getEnv(): BackendEnv {
pool: {
max: parseInt(parse('PG_POOL_MAX'), 10),
},
replication: parse('PG_REPLICATION') === 'true',
replica: {
host: parse('PG_REPLICA_HOST'),
@ -249,6 +262,9 @@ export function getEnv(): BackendEnv {
dbName: parse('PG_REPLICA_DB'),
},
}
const email = {
domain: parse('LOCAL_EMAIL_DOMAIN'),
}
const server = {
jwtSecret: parse('JWT_SECRET'),
ssoJwtSecret: parse('SSO_JWT_SECRET'),
@ -288,6 +304,7 @@ export function getEnv(): BackendEnv {
}
const dev = {
isLocal: parse('API_ENV') == 'local',
autoVerify: parse('AUTO_VERIFY') === 'true',
}
const queue = {
location: parse('PUPPETEER_QUEUE_LOCATION'),
@ -318,6 +335,9 @@ export function getEnv(): BackendEnv {
dailyUploadLimit: parse('GCS_UPLOAD_DAILY_LIMIT')
? parseInt(parse('GCS_UPLOAD_DAILY_LIMIT'), 10)
: 5, // default to 5
useLocalStorage: parse('GCS_USE_LOCAL_HOST') == 'true',
localMinioUrl: parse('LOCAL_MINIO_URL'),
internalMinioUrl: parse('AWS_S3_ENDPOINT_URL'),
}
const sender = {
message: parse('SENDER_MESSAGE'),
@ -374,6 +394,7 @@ export function getEnv(): BackendEnv {
return {
pg,
client,
email,
server,
google,
posthog,

View File

@ -2,7 +2,6 @@
/* eslint-disable @typescript-eslint/restrict-template-expressions */
// Imports the Google Cloud Tasks library.
import { CloudTasksClient, protos } from '@google-cloud/tasks'
import { google } from '@google-cloud/tasks/build/protos/protos'
import axios from 'axios'
import { nanoid } from 'nanoid'
import { DeepPartial } from 'typeorm'
@ -74,8 +73,8 @@ import { OmnivoreAuthorizationHeader } from './auth'
import { CreateTaskError } from './errors'
import { stringToHash } from './helpers'
import { logError, logger } from './logger'
import View = google.cloud.tasks.v2.Task.View
import { EXPORT_QUEUE_NAME } from '../export-processor'
import View = protos.google.cloud.tasks.v2.Task.View
// Instantiates a client.
const client = new CloudTasksClient()
@ -285,7 +284,7 @@ export const createAppEngineTask = async ({
export const getTask = async (
taskName: string
): Promise<google.cloud.tasks.v2.ITask> => {
): Promise<protos.google.cloud.tasks.v2.ITask> => {
// If we are in local environment
if (env.dev.isLocal) {
return { name: taskName } as protos.google.cloud.tasks.v2.ITask
@ -307,7 +306,7 @@ export const getTask = async (
export const deleteTask = async (
taskName: string
): Promise<google.protobuf.IEmpty | null> => {
): Promise<protos.google.protobuf.IEmpty | null> => {
// If we are in local environment
if (env.dev.isLocal) {
return taskName

View File

@ -37,7 +37,7 @@ export const apiLimiter = rateLimit({
}
},
keyGenerator: (req) => {
return getTokenByRequest(req) || req.ip
return getTokenByRequest(req) || req.ip || ''
},
store: getStore('api-rate-limit'),
})
@ -56,7 +56,7 @@ export const apiHourLimiter = rateLimit({
}
},
keyGenerator: (req) => {
return getTokenByRequest(req) || req.ip
return getTokenByRequest(req) || req.ip || ''
},
store: getStore('api-hour-rate-limit'),
})

View File

@ -1,12 +1,12 @@
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
import { File, GetSignedUrlConfig, Storage } from '@google-cloud/storage'
import axios from 'axios'
import { ContentReaderType } from '../entity/library_item'
import { env } from '../env'
import { PageType } from '../generated/graphql'
import { ContentFormat } from '../jobs/upload_content'
import { logger } from './logger'
import { storage } from '../repository/storage/storage'
export const contentReaderForLibraryItem = (
itemType: string,
@ -31,14 +31,12 @@ export const contentReaderForLibraryItem = (
* the default app engine service account on the IAM page. We also need to
* enable IAM related APIs on the project.
*/
export const storage = env.fileUpload?.gcsUploadSAKeyFilePath
? new Storage({ keyFilename: env.fileUpload.gcsUploadSAKeyFilePath })
: new Storage()
const bucketName = env.fileUpload.gcsUploadBucket
const maxContentLength = 10 * 1024 * 1024 // 10MB
export const countOfFilesWithPrefix = async (prefix: string) => {
const [files] = await storage.bucket(bucketName).getFiles({ prefix })
const files = await storage.getFilesFromPrefix(bucketName, prefix)
return files.length
}
@ -48,40 +46,29 @@ export const generateUploadSignedUrl = async (
selectedBucket?: string
): Promise<string> => {
// These options will allow temporary uploading of file with requested content type
const options: GetSignedUrlConfig = {
const options = {
version: 'v4',
action: 'write',
action: 'write' as const,
expires: Date.now() + 15 * 60 * 1000, // 15 minutes
contentType: contentType,
}
logger.info('signed url for: ', options)
// Get a v4 signed URL for uploading file
const [url] = await storage
.bucket(selectedBucket || bucketName)
.file(filePathName)
.getSignedUrl(options)
return url
return storage.signedUrl(selectedBucket || bucketName, filePathName, options)
}
export const generateDownloadSignedUrl = async (
filePathName: string,
config?: {
bucketName?: string
expires?: number
}
): Promise<string> => {
const options: GetSignedUrlConfig = {
version: 'v4',
action: 'read',
expires: config?.expires ?? Date.now() + 240 * 60 * 1000, // four hours
const options = {
action: 'read' as const,
expires: Date.now() + 240 * 60 * 1000, // four hours
...config,
}
const [url] = await storage
.bucket(config?.bucketName || bucketName)
.file(filePathName)
.getSignedUrl(options)
logger.info(`generating download signed url: ${url}`)
return url
return storage.signedUrl(bucketName, filePathName, options)
}
export const getStorageFileDetails = async (
@ -89,10 +76,10 @@ export const getStorageFileDetails = async (
fileName: string
): Promise<{ md5Hash: string; fileUrl: string }> => {
const filePathName = generateUploadFilePathName(id, fileName)
const file = storage.bucket(bucketName).file(filePathName)
const [metadata] = await file.getMetadata()
const file = await storage.downloadFile(bucketName, filePathName)
const metadataMd5 = await file.getMetadataMd5()
// GCS returns MD5 Hash in base64 encoding, we convert it here to hex string
const md5Hash = Buffer.from(metadata.md5Hash || '', 'base64').toString('hex')
const md5Hash = Buffer.from(metadataMd5 || '', 'base64').toString('hex')
return { md5Hash, fileUrl: file.publicUrl() }
}
@ -110,17 +97,10 @@ export const uploadToBucket = async (
options?: { contentType?: string; public?: boolean; timeout?: number },
selectedBucket?: string
): Promise<void> => {
await storage
.bucket(selectedBucket || bucketName)
.file(filePath)
.save(data, { timeout: 30000, ...options }) // default timeout 30s
}
export const createGCSFile = (
filename: string,
selectedBucket = bucketName
): File => {
return storage.bucket(selectedBucket).file(filename)
await storage.upload(selectedBucket || bucketName, filePath, data, {
timeout: 30000,
...options,
})
}
export const downloadFromUrl = async (
@ -154,16 +134,14 @@ export const uploadToSignedUrl = async (
}
export const isFileExists = async (filePath: string): Promise<boolean> => {
const [exists] = await storage.bucket(bucketName).file(filePath).exists()
const file = await storage.downloadFile(bucketName, filePath)
const exists = await file.exists()
return exists
}
export const downloadFromBucket = async (filePath: string): Promise<Buffer> => {
const file = storage.bucket(bucketName).file(filePath)
// Download the file contents
const [data] = await file.download()
return data
const file = await storage.downloadFile(bucketName, filePath)
return file.download()
}
export const contentFilePath = ({

View File

@ -1,13 +1,13 @@
import { Storage } from '@google-cloud/storage'
import sinon from 'sinon'
import * as uploads from '../src/utils/uploads'
import { MockStorage } from './mock_storage'
export const mochaHooks = {
beforeEach() {
// Mock cloud storage
sinon
.stub(uploads, 'storage')
.value(new MockStorage() as unknown as Storage)
},
}
// import { Storage } from '@google-cloud/storage'
// import sinon from 'sinon'
// import * as uploads from '../src/utils/uploads'
// import { MockStorage } from './mock_storage'
//
// export const mochaHooks = {
// beforeEach() {
// // Mock cloud storage
// sinon
// .stub(uploads, 'storage')
// .value(new MockStorage() as unknown as Storage)
// },
// }

View File

@ -4,6 +4,11 @@
"project": "tsconfig.json"
},
"rules": {
"@typescript-eslint/no-unsafe-assignment": 0,
"@typescript-eslint/no-unnecessary-type-assertion": 0,
"@typescript-eslint/no-unsafe-member-access": ["warn"],
"@typescript-eslint/no-unsafe-call": ["warn"],
"@typescript-eslint/no-unsafe-argument": ["warn"],
"@typescript-eslint/no-floating-promises": [
"error",
{

View File

@ -1,27 +1,22 @@
FROM node:18.16
FROM node:22.12-alpine AS build
LABEL org.opencontainers.image.source="https://github.com/omnivore-app/omnivore"
# Installs latest Chromium package.
RUN apt-get update && apt-get install -y \
chromium \
ca-certificates \
nodejs \
yarn \
# Installs latest Chromium package and other dependencies.
RUN apk -U upgrade \
&& apk add --no-cache \
g++ \
make \
python3
python3 \
py3-pip && \
rm -rf /var/cache/apk/*
WORKDIR /app
ENV CHROMIUM_PATH /usr/bin/chromium
ENV CHROMIUM_PATH=/usr/bin/chromium
ENV FIREFOX_PATH=/usr/bin/firefox
ENV LAUNCH_HEADLESS=true
COPY package.json .
COPY yarn.lock .
COPY tsconfig.json .
COPY .prettierrc .
COPY .eslintrc .
COPY package.json yarn.lock tsconfig.json .prettierrc .eslintrc ./
COPY /packages/content-fetch/package.json ./packages/content-fetch/package.json
COPY /packages/content-handler/package.json ./packages/content-handler/package.json
COPY /packages/puppeteer-parse/package.json ./packages/puppeteer-parse/package.json
@ -29,21 +24,50 @@ COPY /packages/utils/package.json ./packages/utils/package.json
RUN yarn install --pure-lockfile
ADD /packages/content-fetch ./packages/content-fetch
ADD /packages/content-handler ./packages/content-handler
ADD /packages/puppeteer-parse ./packages/puppeteer-parse
ADD /packages/utils ./packages/utils
RUN yarn workspace @omnivore/utils build
RUN yarn workspace @omnivore/content-handler build
RUN yarn workspace @omnivore/puppeteer-parse build
RUN yarn workspace @omnivore/content-fetch build
COPY /packages/content-fetch ./packages/content-fetch
COPY /packages/content-handler ./packages/content-handler
COPY /packages/puppeteer-parse ./packages/puppeteer-parse
COPY /packages/utils ./packages/utils
# After building, fetch the production dependencies
RUN rm -rf /app/packages/content-fetch/node_modules
RUN rm -rf /app/node_modules
RUN yarn install --pure-lockfile --production
RUN yarn workspace @omnivore/utils build && \
yarn workspace @omnivore/content-handler build && \
yarn workspace @omnivore/puppeteer-parse build && \
yarn workspace @omnivore/content-fetch build && \
rm -rf /app/packages/content-fetch/node_modules /app/node_modules && \
yarn install --pure-lockfile --production
# Running stage
FROM node:22.12-alpine
RUN echo @edge https://dl-cdn.alpinelinux.org/alpine/edge/community >> /etc/apk/repositories \
&& echo @edge https://dl-cdn.alpinelinux.org/alpine/edge/main >> /etc/apk/repositories \
&& echo @edge https://dl-cdn.alpinelinux.org/alpine/edge/testing >> /etc/apk/repositories \
&& apk -U upgrade \
&& apk add --no-cache \
firefox@edge \
freetype@edge \
ttf-freefont@edge \
nss@edge \
libstdc++@edge \
sqlite-libs@edge \
chromium@edge \
firefox-esr@edge \
ca-certificates@edge \
rm -rf /var/cache/apk/*
WORKDIR /app
ENV CHROMIUM_PATH=/usr/bin/chromium
ENV FIREFOX_PATH=/usr/bin/firefox
ENV LAUNCH_HEADLESS=true
COPY --from=build /app /app
EXPOSE 8080
CMD ["yarn", "workspace", "@omnivore/content-fetch", "start"]
# In Firefox we can't use the adblocking sites. Adding them to the hosts file of the docker seems to work.
COPY /packages/content-fetch/start.sh .
RUN wget https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts && \
chmod +x start.sh
CMD ["./start.sh"]

View File

@ -305,7 +305,7 @@ export const processFetchContentJob = async (
const savedDate = savedAt ? new Date(savedAt) : new Date()
const { finalUrl, title, content, contentType } = fetchResult
if (content) {
if (content && process.env['SKIP_UPLOAD_ORIGINAL'] !== 'true') {
await uploadOriginalContent(users, content, savedDate.getTime())
}

View File

@ -0,0 +1,3 @@
#!/bin/sh
cat hosts >> /etc/hosts
yarn workspace @omnivore/content-fetch start

View File

@ -39,6 +39,7 @@ import { WikipediaHandler } from './websites/wikipedia-handler'
import { YoutubeHandler } from './websites/youtube-handler'
import { ZhihuHandler } from './websites/zhihu-handler'
import { TikTokHandler } from './websites/tiktok-handler'
import { RawContentHandler } from './websites/raw-handler'
const validateUrlString = (url: string): boolean => {
const u = new URL(url)
@ -66,6 +67,7 @@ const contentHandlers: ContentHandler[] = [
new DerstandardHandler(),
new ImageHandler(),
new MediumHandler(),
new RawContentHandler(),
new PdfHandler(),
new ScrapingBeeHandler(),
new TDotCoHandler(),

View File

@ -1,4 +1,6 @@
import { ContentHandler, PreHandleResult } from '../content-handler'
import axios from 'axios'
import { parseHTML } from 'linkedom'
export class MediumHandler extends ContentHandler {
constructor() {
@ -11,13 +13,52 @@ export class MediumHandler extends ContentHandler {
return u.hostname.endsWith('medium.com')
}
addImages(document: Document): Document {
const pictures = document.querySelectorAll('picture')
pictures.forEach((pict) => {
const source = pict.querySelector('source')
if (source) {
const srcSet = source.getAttribute('srcSet')
const sources = (srcSet || '')
.split(', ')
.map((src) => src.split(' '))
.sort((a, b) =>
Number(a[1].replace('w', '')) > Number(b[1].replace('w', ''))
? -1
: 1
)
// This should be the largest image in the source set.
if (sources && sources.length && Array.isArray(sources[0])) {
const url = sources[0][0]
const img = document.createElement('img')
img.src = url
pict.after(img)
pict.remove()
}
}
})
return document
}
async preHandle(url: string): Promise<PreHandleResult> {
console.log('prehandling medium url', url)
try {
const res = new URL(url)
res.searchParams.delete('source')
return Promise.resolve({ url: res.toString() })
const response = await axios.get(res.toString())
const dom = parseHTML(response.data).document
const imageAddedDom = this.addImages(dom)
return {
title: dom.title,
content: imageAddedDom.body.outerHTML,
url: res.toString(),
}
} catch (error) {
console.error('error prehandling medium url', error)
throw error

View File

@ -0,0 +1,33 @@
import { ContentHandler, PreHandleResult } from '../content-handler'
import axios from 'axios'
import { parseHTML } from 'linkedom'
export class RawContentHandler extends ContentHandler {
constructor() {
super()
this.name = 'RawContentHandler'
}
shouldPreHandle(url: string): boolean {
const u = new URL(url)
const hostnames = [
'medium.com',
'fastcompany.com',
'fortelabs.com',
'theverge.com',
]
return hostnames.some((h) => u.hostname.endsWith(h))
}
async preHandle(url: string): Promise<PreHandleResult> {
try {
const response = await axios.get(url)
const dom = parseHTML(response.data).document
return { title: dom.title, content: response.data as string, url: url }
} catch (error) {
console.error('error prehandling URL', error)
throw error
}
}
}

View File

@ -24,7 +24,9 @@ export class WeixinQqHandler extends ContentHandler {
// create a meta node to store the publish time in ISO format
const metaNode = dom.createElement('meta')
metaNode.setAttribute('name', 'date')
metaNode.setAttribute('content', publishTimeISO)
if (publishTimeISO) {
metaNode.setAttribute('content', publishTimeISO)
}
dom.querySelector('head')?.appendChild(metaNode)
}

View File

@ -1,4 +1,4 @@
FROM node:18.16
FROM node:22.12
WORKDIR /app

View File

@ -1,4 +1,4 @@
FROM node:18.16 as builder
FROM node:22.12 as builder
WORKDIR /app
@ -17,7 +17,7 @@ COPY /packages/discover/tsconfig.json ./packages/discover/tsconfig.json
RUN yarn install --pure-lockfile
RUN yarn workspace @omnivore/discover build
FROM node:18.16 as runner
FROM node:22.12 as runner
WORKDIR /app

View File

@ -3,9 +3,7 @@
/* eslint-disable @typescript-eslint/no-unsafe-return */
/* eslint-disable @typescript-eslint/restrict-template-expressions */
import { OmnivoreArticle } from '../../../../../types/OmnivoreArticle'
import { slugify } from 'voca'
import { Observable, tap } from 'rxjs'
import { fromArrayLike } from 'rxjs/internal/observable/innerFrom'
import { mapOrNull } from '../../../../utils/reactive'
import {

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine
FROM node:22.12-alpine
WORKDIR /app

View File

@ -24,7 +24,7 @@
"eslint-plugin-prettier": "^4.0.0"
},
"dependencies": {
"@google-cloud/functions-framework": "3.1.2",
"@google-cloud/functions-framework": "3.4.5",
"@google-cloud/storage": "^7.0.1",
"@omnivore-app/api": "^1.0.4",
"@omnivore/utils": "1.0.0",

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine
FROM node:22.12-alpine
WORKDIR /app

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine
FROM node:22.12-alpine
WORKDIR /app

View File

@ -9,7 +9,6 @@
"keywords": [],
"license": "Apache-2.0",
"scripts": {
"test": "yarn mocha -r ts-node/register --config mocha-config.json",
"test:typecheck": "tsc --noEmit",
"lint": "eslint src --ext ts,js,tsx,jsx",
"compile": "tsc",
@ -34,7 +33,7 @@
},
"dependencies": {
"@fast-csv/parse": "^5.0.0",
"@google-cloud/functions-framework": "3.1.2",
"@google-cloud/functions-framework": "3.4.5",
"@google-cloud/storage": "^7.0.1",
"@omnivore/readability": "1.0.0",
"@omnivore/utils": "1.0.0",

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine
FROM node:22.12-alpine
# Run everything after as non-privileged user.
WORKDIR /app

View File

@ -33,7 +33,7 @@
"mocha": "^10.0.0"
},
"dependencies": {
"@google-cloud/functions-framework": "3.1.2",
"@google-cloud/functions-framework": "3.4.5",
"@google-cloud/storage": "^7.0.1",
"@omnivore/utils": "1.0.0",
"@sentry/serverless": "^7.77.0",

View File

@ -2,5 +2,9 @@
"extends": "../../.eslintrc",
"parserOptions": {
"project": "tsconfig.json"
},
"rules": {
"@typescript-eslint/no-unsafe-assignment": ["warn"],
"@typescript-eslint/no-unsafe-argument": ["warn"]
}
}

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine
FROM node:22.12-alpine
# Run everything after as non-privileged user.
WORKDIR /app

View File

@ -1,4 +1,4 @@
FROM node:18.16-alpine
FROM node:22.12-alpine
# Run everything after as non-privileged user.
WORKDIR /app

View File

@ -26,7 +26,7 @@
"eslint-plugin-prettier": "^4.0.0"
},
"dependencies": {
"@google-cloud/functions-framework": "3.1.2",
"@google-cloud/functions-framework": "3.4.5",
"@google-cloud/storage": "^7.0.1",
"@sentry/serverless": "^7.77.0",
"axios": "^1.2.2",

View File

@ -0,0 +1,13 @@
{
"extends": "../../.eslintrc",
"parserOptions": {
"project": "tsconfig.json"
},
"rules": {
"@typescript-eslint/no-unsafe-argument": "off",
"@typescript-eslint/no-explicit-any": "off",
"@typescript-eslint/strictNullChecks": "off",
"@typescript-eslint/no-unsafe-member-access": "off",
"@typescript-eslint/no-unsafe-assignment": "off"
}
}

131
packages/local-mail-watcher/.gitignore vendored Normal file
View File

@ -0,0 +1,131 @@
.idea/
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
.pnpm-debug.log*
# Diagnostic reports (https://nodejs.org/api/report.html)
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
.grunt
# Bower dependency directory (https://bower.io/)
bower_components
# node-waf configuration
.lock-wscript
# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release
# Dependency directories
node_modules/
jspm_packages/
# Snowpack dependency directory (https://snowpack.dev/)
web_modules/
# TypeScript cache
*.tsbuildinfo
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Optional stylelint cache
.stylelintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# dotenv environment variable files
.env
.env.development.local
.env.test.local
.env.production.local
.env.local
# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache
# Next.js build output
.next
out
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
# Comment in the public line in if your project uses Gatsby and not Next.js
# https://nextjs.org/blog/next-9-1#public-directory-support
# public
# vuepress build output
.vuepress/dist
# vuepress v2.x temp and cache directory
.temp
.cache
# Docusaurus cache and generated files
.docusaurus
# Serverless directories
.serverless/
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
# Stores VSCode versions used for testing VSCode extensions
.vscode-test
# yarn v2
.yarn/cache
.yarn/unplugged
.yarn/build-state.yml
.yarn/install-state.gz
.pnp.*

View File

@ -0,0 +1,40 @@
FROM node:22.12 as builder
WORKDIR /app
RUN apt-get update && apt-get install -y g++ make python3
COPY package.json .
COPY yarn.lock .
COPY tsconfig.json .
COPY .prettierrc .
COPY .eslintrc .
COPY /packages/local-mail-watcher/src ./packages/local-mail-watcher/src
COPY /packages/local-mail-watcher/package.json ./packages/local-mail-watcher/package.json
COPY /packages/local-mail-watcher/tsconfig.json ./packages/local-mail-watcher/tsconfig.json
COPY /packages/utils/package.json ./packages/utils/package.json
RUN yarn install --pure-lockfile
ADD /packages/utils ./packages/utils
RUN yarn workspace @omnivore/utils build
RUN yarn workspace @omnivore/local-mail-watcher build
FROM node:22.12 as runner
WORKDIR /app
ENV NODE_ENV production
COPY --from=builder /app/packages/local-mail-watcher/dist /app/packages/local-mail-watcher/dist
COPY --from=builder /app/packages/local-mail-watcher/package.json /app/packages/local-mail-watcher/package.json
COPY --from=builder /app/packages/local-mail-watcher/node_modules /app/packages/local-mail-watcher/node_modules
COPY --from=builder /app/packages/utils/ /app/packages/utils/
COPY --from=builder /app/node_modules /app/node_modules
COPY --from=builder /app/package.json /app/package.json
CMD ["yarn", "workspace", "@omnivore/local-mail-watcher", "start"]

View File

@ -0,0 +1,39 @@
FROM node:22.12 as builder
WORKDIR /app
RUN apt-get update && apt-get install -y g++ make python3
COPY package.json .
COPY yarn.lock .
COPY tsconfig.json .
COPY .prettierrc .
COPY .eslintrc .
COPY /packages/local-mail-watcher/src ./packages/local-mail-watcher/src
COPY /packages/local-mail-watcher/package.json ./packages/local-mail-watcher/package.json
COPY /packages/local-mail-watcher/tsconfig.json ./packages/local-mail-watcher/tsconfig.json
COPY /packages/utils/package.json ./packages/utils/package.json
RUN yarn install --pure-lockfile
ADD /packages/utils ./packages/utils
RUN yarn workspace @omnivore/utils build
RUN yarn workspace @omnivore/local-mail-watcher build
FROM node:22.12 as runner
WORKDIR /app
ENV NODE_ENV production
COPY --from=builder /app/packages/local-mail-watcher/dist /app/packages/local-mail-watcher/dist
COPY --from=builder /app/packages/local-mail-watcher/package.json /app/packages/local-mail-watcher/package.json
COPY --from=builder /app/packages/local-mail-watcher/node_modules /app/packages/local-mail-watcher/node_modules
COPY --from=builder /app/packages/utils/ /app/packages/utils/
COPY --from=builder /app/node_modules /app/node_modules
COPY --from=builder /app/package.json /app/package.json
CMD ["yarn", "workspace", "@omnivore/local-mail-watcher", "start-watcher"]

View File

@ -0,0 +1,40 @@
{
"name": "@omnivore/local-mail-watcher",
"version": "0.0.1",
"scripts": {
"build": "tsc",
"dev": "ts-node-dev --files src/index.ts",
"start": "node dist/index.js",
"start-watcher": "node dist/watcher.js",
"lint": "eslint src --ext ts,js,tsx,jsx",
"lint:fix": "eslint src --fix --ext ts,js,tsx,jsx",
"test:typecheck": "tsc --noEmit"
},
"dependencies": {
"chokidar": "^4.0.1",
"mailparser": "^3.7.1",
"axios": "^1.7.7",
"express": "^4.21.1",
"bullmq": "^5.22.0",
"@omnivore/utils": "1.0.0"
},
"devDependencies": {
"@types/html-to-text": "^9.0.2",
"@types/jsdom": "^21.1.3",
"@types/mailparser": "^3.4.5",
"@types/axios" : "^0.14.4",
"@types/node": "^22.10.7",
"typescript": "^5.7.3",
"@types/express": "^5.0.0",
"@types/pg": "^8.10.5",
"@types/pg-format": "^1.0.3",
"@types/urlsafe-base64": "^1.0.28",
"@types/uuid": "^9.0.1",
"@types/voca": "^1.4.3",
"ts-node": "^10.9.1",
"tslib": "^2.6.2"
},
"volta": {
"extends": "../../package.json"
}
}

View File

@ -0,0 +1,66 @@
interface redisConfig {
url?: string
cert?: string
}
interface WatcherEnv {
filesystem: {
filePath: string
}
redis: {
mq: redisConfig
cache: redisConfig
}
sns: {
snsArn: string
}
apiKey: string
apiEndpoint: string
}
const envParser =
(env: { [key: string]: string | undefined }) =>
(varName: string, throwOnUndefined = false): string | undefined => {
const value = env[varName]
if (typeof value === 'string' && value) {
return value
}
if (throwOnUndefined) {
throw new Error(
`Missing ${varName} with a non-empty value in process environment`
)
}
return
}
export function getEnv(): WatcherEnv {
const parse = envParser(process.env)
const filesystem = {
filePath: parse('MAIL_FILE_PATH')!,
}
const redis = {
mq: {
url: parse('MQ_REDIS_URL'),
cert: parse('MQ_REDIS_CERT')?.replace(/\\n/g, '\n'), // replace \n with new line
},
cache: {
url: parse('REDIS_URL'),
cert: parse('REDIS_CERT')?.replace(/\\n/g, '\n'), // replace \n with new line
},
}
const sns = {
snsArn: parse('SNS_ARN') || '',
}
return {
apiKey: parse('WATCHER_API_KEY')!,
apiEndpoint: parse('WATCHER_API_ENDPOINT')!,
sns,
filesystem,
redis,
}
}
export const env = getEnv()

View File

@ -0,0 +1,122 @@
import { RedisDataSource } from '@omnivore/utils'
import express, { Express, Request, Response } from 'express'
import { env } from './env'
import { getQueue } from './lib/queue'
import { SnsMessage } from './types/SNS'
import { simpleParser } from 'mailparser'
import axios from 'axios'
import { convertToMailObject } from './lib/emailApi'
console.log('Starting worker...')
const app: Express = express()
app.use(express.text({ limit: '50mb' }))
// Force JSON for SNS
app.use((req, res, next) => {
req.headers['content-type'] = 'application/json'
next()
})
app.use(express.json({ limit: '50mb' }))
app.use(express.urlencoded({ limit: '50mb', extended: true }))
// create redis source
const redisDataSource = new RedisDataSource({
cache: {
url: process.env.REDIS_URL,
cert: process.env.REDIS_CERT,
},
mq: {
url: process.env.MQ_REDIS_URL,
cert: process.env.MQ_REDIS_CERT,
},
})
const queue = getQueue(redisDataSource.queueRedisClient)
const addEmailEventToQueue = async (req: Request, res: Response) => {
const apiKey = req.headers['x-api-key']
if (!apiKey) {
res.status(401).send('Unauthorized: API key is missing')
return
}
if (apiKey != env.apiKey) {
res.status(401).send('Unauthorized: Invalid API Key')
return
}
await (
await queue
).add('save-newsletter', req.body, {
priority: 1,
attempts: 1,
delay: 500,
})
res.sendStatus(200)
}
// respond healthy to auto-scaler.
app.get('/_ah/health', (_req: Request, res: Response) => {
res.sendStatus(200)
})
app.post('/mail', addEmailEventToQueue)
app.post('/sns', async (req, res) => {
const bodyString = req.body as string
const snsMessage = JSON.parse(bodyString) as SnsMessage
console.log(`Received SNS Message`, snsMessage)
console.log(`Sns Topic ARN ${snsMessage['TopicArn']}`)
if (snsMessage.TopicArn != env.sns.snsArn) {
console.log(
`Topic ARN: ${snsMessage.TopicArn} Doesnt Match ${env.sns.snsArn}, failing...`
)
res.status(401).send()
return
}
if (snsMessage.Type == 'SubscriptionConfirmation') {
console.log('Subscribing to topic')
await axios.get(snsMessage.SubscribeURL)
res.status(200).send()
return
}
if (snsMessage.Type == 'Notification') {
const message = JSON.parse(snsMessage.Message) as {
notificationType: string
content: string
}
if (message.notificationType != 'Received') {
console.log('Not an email, failing...')
res.status(400).send()
}
const mailContent = await simpleParser(message.content)
const mail = convertToMailObject(mailContent)
console.log(mail)
await (
await queue
).add('save-newsletter', mail, {
priority: 1,
attempts: 1,
delay: 500,
})
res.sendStatus(200)
res.status(200).send()
return
}
res.status(400).send()
})
const port = process.env.PORT || 8080
const server = app.listen(port, () => {
console.log('Mail Server started')
})

View File

@ -0,0 +1,25 @@
import { EmailContents } from '../types/EmailContents'
import axios from 'axios'
import { env } from '../env'
import { ParsedMail } from 'mailparser'
export const sendToEmailApi = (data: EmailContents) => {
return axios.post(env.apiEndpoint, data, {
headers: {
['x-api-key']: env.apiKey,
'Content-Type': 'application/json',
},
timeout: 5000,
})
}
export const convertToMailObject = (it: ParsedMail): EmailContents => {
return {
from: it.from?.value[0]?.address || '',
to: (Array.isArray(it.to) ? it.to[0].text : it.to?.text) || '',
subject: it.subject || '',
html: it.html || '',
text: it.text || '',
headers: it.headers,
}
}

View File

@ -0,0 +1,27 @@
import { RedisDataSource } from '@omnivore/utils'
import { Queue, RedisClient } from 'bullmq'
export const QUEUE = 'omnivore-backend-queue'
export const getQueue = async (
connection: RedisClient,
queueName = QUEUE
): Promise<Queue> => {
const queue = new Queue(queueName, {
connection,
defaultJobOptions: {
backoff: {
type: 'exponential',
delay: 2000, // 2 seconds
},
removeOnComplete: {
age: 3600, // keep up to 1 hour
},
removeOnFail: {
age: 24 * 3600, // keep up to 1 day
},
},
})
await queue.waitUntilReady()
return queue
}

View File

@ -0,0 +1,20 @@
import { HeaderValue } from 'mailparser'
export type EmailContents = {
from: string
to: string
subject: string
html: string
text: string
headers: Map<string, HeaderValue>
unsubMailTo?: string
unsubHttpUrl?: string
forwardedFrom?: string
replyTo?: string
confirmationCode?: string
uploadFile?: {
fileName: string
contentType: string
id: string
}
}

View File

@ -0,0 +1,7 @@
export type SnsMessage = {
Type: string
TopicArn: string
SubscribeURL: string
content: string
Message: string
}

View File

@ -0,0 +1,22 @@
import chokidar from 'chokidar'
import { simpleParser } from 'mailparser'
import * as fs from 'node:fs'
import { convertToMailObject, sendToEmailApi } from './lib/emailApi'
import { env } from './env'
chokidar.watch(env.filesystem.filePath).on('add', (path, _event) => {
console.log(path)
const contents = fs.readFileSync(path).toString()
void simpleParser(contents)
.then(convertToMailObject)
.then(async (emailData) => {
await sendToEmailApi(emailData)
console.log('Sent to email API')
})
.then(() => {
if (process.env['DELETE_FILE'] == 'true') {
fs.unlinkSync(path)
}
console.log('Deleted File')
})
})

View File

@ -0,0 +1,9 @@
{
"extends": "./../../tsconfig.json",
"compileOnSave": false,
"include": ["./src/**/*"],
"compilerOptions": {
"outDir": "dist",
"typeRoots": ["./../../node_modules/pgvector/types"]
}
}

Some files were not shown because too many files have changed in this diff Show More