Merge pull request #3024 from omnivore-app/integratoin

improvement on integrations
This commit is contained in:
Hongbo Wu
2023-11-03 17:20:17 +08:00
committed by GitHub
40 changed files with 1238 additions and 912 deletions

View File

@ -14,6 +14,13 @@ export enum IntegrationType {
Import = 'IMPORT',
}
export enum ImportItemState {
Unread = 'UNREAD',
Unarchived = 'UNARCHIVED',
Archived = 'ARCHIVED',
All = 'ALL',
}
@Entity({ name: 'integrations' })
export class Integration {
@PrimaryGeneratedColumn('uuid')
@ -49,4 +56,7 @@ export class Integration {
@Column('text', { nullable: true })
taskName?: string | null
@Column('enum', { enum: ImportItemState, nullable: true })
importItemState?: ImportItemState | null
}

View File

@ -970,6 +970,13 @@ export type ImportFromIntegrationSuccess = {
success: Scalars['Boolean'];
};
export enum ImportItemState {
All = 'ALL',
Archived = 'ARCHIVED',
Unarchived = 'UNARCHIVED',
Unread = 'UNREAD'
}
export type Integration = {
__typename?: 'Integration';
createdAt: Scalars['Date'];
@ -2375,7 +2382,9 @@ export enum SetIntegrationErrorCode {
export type SetIntegrationInput = {
enabled: Scalars['Boolean'];
id?: InputMaybe<Scalars['ID']>;
importItemState?: InputMaybe<ImportItemState>;
name: Scalars['String'];
syncedAt?: InputMaybe<Scalars['Date']>;
token: Scalars['String'];
type?: InputMaybe<IntegrationType>;
};
@ -3494,6 +3503,7 @@ export type ResolversTypes = {
ImportFromIntegrationErrorCode: ImportFromIntegrationErrorCode;
ImportFromIntegrationResult: ResolversTypes['ImportFromIntegrationError'] | ResolversTypes['ImportFromIntegrationSuccess'];
ImportFromIntegrationSuccess: ResolverTypeWrapper<ImportFromIntegrationSuccess>;
ImportItemState: ImportItemState;
Int: ResolverTypeWrapper<Scalars['Int']>;
Integration: ResolverTypeWrapper<Integration>;
IntegrationType: IntegrationType;

View File

@ -863,6 +863,13 @@ type ImportFromIntegrationSuccess {
success: Boolean!
}
enum ImportItemState {
ALL
ARCHIVED
UNARCHIVED
UNREAD
}
type Integration {
createdAt: Date!
enabled: Boolean!
@ -1834,7 +1841,9 @@ enum SetIntegrationErrorCode {
input SetIntegrationInput {
enabled: Boolean!
id: ID
importItemState: ImportItemState
name: String!
syncedAt: Date
token: String!
type: IntegrationType
}

View File

@ -1,5 +1,9 @@
import { DeepPartial } from 'typeorm'
import { Integration, IntegrationType } from '../../entity/integration'
import {
ImportItemState,
Integration,
IntegrationType,
} from '../../entity/integration'
import { env } from '../../env'
import {
DeleteIntegrationError,
@ -18,10 +22,11 @@ import {
SetIntegrationErrorCode,
SetIntegrationSuccess,
} from '../../generated/graphql'
import { createIntegrationToken } from '../../routers/auth/jwt_helpers'
import {
findIntegration,
findIntegrations,
getIntegrationService,
getIntegrationClient,
removeIntegration,
saveIntegration,
updateIntegration,
@ -29,8 +34,8 @@ import {
import { analytics } from '../../utils/analytics'
import {
deleteTask,
enqueueExportToIntegration,
enqueueImportFromIntegration,
enqueueSyncWithIntegration,
} from '../../utils/createTask'
import { authorized } from '../../utils/helpers'
@ -45,6 +50,11 @@ export const setIntegrationResolver = authorized<
user: { id: uid },
id: input.id || undefined,
type: input.type || IntegrationType.Export,
syncedAt: input.syncedAt ? new Date(input.syncedAt) : undefined,
importItemState:
input.type === IntegrationType.Import
? input.importItemState || ImportItemState.Unarchived // default to unarchived
: undefined,
}
if (input.id) {
// Update
@ -59,7 +69,7 @@ export const setIntegrationResolver = authorized<
integrationToSave.taskName = existingIntegration.taskName
} else {
// Create
const integrationService = getIntegrationService(input.name)
const integrationService = getIntegrationClient(input.name)
// authorize and get access token
const token = await integrationService.accessToken(input.token)
if (!token) {
@ -73,12 +83,27 @@ export const setIntegrationResolver = authorized<
// save integration
const integration = await saveIntegration(integrationToSave, uid)
if (
integrationToSave.type === IntegrationType.Export &&
(!integrationToSave.id || integrationToSave.enabled)
) {
if (integrationToSave.type === IntegrationType.Export && !input.id) {
const authToken = await createIntegrationToken({
uid,
token: integration.token,
})
if (!authToken) {
log.error('failed to create auth token', {
integrationId: integration.id,
})
return {
errorCodes: [SetIntegrationErrorCode.BadRequest],
}
}
// create a task to sync all the pages if new integration or enable integration (export type)
const taskName = await enqueueSyncWithIntegration(uid, input.name)
const taskName = await enqueueExportToIntegration(
integration.id,
integration.name,
0,
authToken
)
log.info('enqueued task', taskName)
// update task name in integration
@ -190,7 +215,7 @@ export const importFromIntegrationResolver = authorized<
ImportFromIntegrationSuccess,
ImportFromIntegrationError,
MutationImportFromIntegrationArgs
>(async (_, { integrationId }, { claims: { uid }, log, signToken }) => {
>(async (_, { integrationId }, { claims: { uid }, log }) => {
log.info('importFromIntegrationResolver')
try {
@ -202,15 +227,23 @@ export const importFromIntegrationResolver = authorized<
}
}
const exp = Math.floor(Date.now() / 1000) + 60 * 60 * 24 // 1 day
const authToken = (await signToken(
{ uid, exp },
env.server.jwtSecret
)) as string
const authToken = await createIntegrationToken({
uid: integration.user.id,
token: integration.token,
})
if (!authToken) {
return {
errorCodes: [ImportFromIntegrationErrorCode.BadRequest],
}
}
// create a task to import all the pages
const taskName = await enqueueImportFromIntegration(
integration.id,
authToken
integration.name,
integration.syncedAt?.getTime() || 0,
authToken,
integration.importItemState || ImportItemState.Unarchived
)
// update task name in integration
await updateIntegration(integration.id, { taskName }, uid)

View File

@ -40,3 +40,8 @@ export function isPendingUserTokenPayload(
'username' in object
)
}
export type IntegrationTokenPayload = {
uid: string
token: string
}

View File

@ -4,6 +4,7 @@ import { promisify } from 'util'
import { env } from '../../env'
import { logger } from '../../utils/logger'
import {
IntegrationTokenPayload,
isPendingUserTokenPayload,
PendingUserTokenPayload,
} from './auth_types'
@ -85,3 +86,22 @@ export function suggestedUsername(name: string): string {
const suffix = Math.floor(Math.random() * 10000)
return `${prefix}${suffix}`
}
export async function createIntegrationToken(
payload: IntegrationTokenPayload
): Promise<string | undefined> {
try {
const exp = Math.floor(Date.now() / 1000) + 60 * 60 * 24 // 1 day
const authToken = await signToken(
{
...payload,
exp,
},
env.server.jwtSecret
)
logger.info('createIntegrationToken', payload)
return authToken as string
} catch {
return undefined
}
}

View File

@ -1,55 +1,22 @@
/* eslint-disable @typescript-eslint/no-misused-promises */
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
/* eslint-disable @typescript-eslint/no-unsafe-member-access */
import { stringify } from 'csv-stringify'
import express from 'express'
import { DateTime } from 'luxon'
import { v4 as uuidv4 } from 'uuid'
import { IntegrationType } from '../../entity/integration'
import { LibraryItem } from '../../entity/library_item'
import { EntityType, readPushSubscription } from '../../pubsub'
import { Claims } from '../../resolvers/types'
import {
findIntegration,
getIntegrationService,
updateIntegration,
} from '../../services/integrations'
import {
findLibraryItemById,
searchLibraryItems,
} from '../../services/library_item'
import { getClaimsByToken } from '../../utils/auth'
import { Integration, IntegrationType } from '../../entity/integration'
import { readPushSubscription } from '../../pubsub'
import { getRepository } from '../../repository'
import { enqueueExportToIntegration } from '../../utils/createTask'
import { logger } from '../../utils/logger'
import { DateFilter } from '../../utils/search'
import { createGCSFile } from '../../utils/uploads'
export interface Message {
type?: EntityType
id?: string
userId: string
pageId?: string
articleId?: string
}
interface ImportEvent {
integrationId: string
}
const isImportEvent = (event: any): event is ImportEvent =>
'integrationId' in event
import { createIntegrationToken } from '../auth/jwt_helpers'
export function integrationsServiceRouter() {
const router = express.Router()
router.post('/:integrationName/:action', async (req, res) => {
logger.info('start to sync with integration', {
action: req.params.action,
integrationName: req.params.integrationName,
})
router.post('/export', async (req, res) => {
logger.info('start to sync with integration')
try {
const { message: msgStr, expired } = readPushSubscription(req)
if (!msgStr) {
return res.status(200).send('Bad Request')
}
@ -59,119 +26,39 @@ export function integrationsServiceRouter() {
return res.status(200).send('Expired')
}
const data: Message = JSON.parse(msgStr)
const userId = data.userId
const type = data.type
if (!userId) {
logger.info('No userId found in message')
res.status(200).send('Bad Request')
return
}
const integration = await findIntegration(
{
name: req.params.integrationName.toUpperCase(),
type: IntegrationType.Export,
// find all active integrations
const integrations = await getRepository(Integration).find({
where: {
enabled: true,
type: IntegrationType.Export,
},
userId
)
if (!integration) {
logger.info('No active integration found for user', { userId })
res.status(200).send('No integration found')
return
}
relations: ['user'],
})
const action = req.params.action.toUpperCase()
const integrationService = getIntegrationService(integration.name)
if (action === 'SYNC_UPDATED') {
// get updated page by id
let id: string | undefined
switch (type) {
case EntityType.PAGE:
id = data.id
break
case EntityType.HIGHLIGHT:
id = data.articleId
break
case EntityType.LABEL:
id = data.pageId
break
}
if (!id) {
logger.info('No id found in message')
res.status(200).send('Bad Request')
return
}
const item = await findLibraryItemById(id, userId)
if (!item) {
logger.info('No item found for id', { id })
res.status(200).send('No page found')
return
}
// sync updated item with integration
logger.info('syncing updated item with integration', {
integrationId: integration.id,
itemId: item.id,
})
const synced = await integrationService.export(integration, [item])
if (!synced) {
logger.info('failed to sync item', {
integrationId: integration.id,
itemId: item.id,
// create a task to sync with each integration
await Promise.all(
integrations.map(async (integration) => {
const authToken = await createIntegrationToken({
uid: integration.user.id,
token: integration.token,
})
return res.status(400).send('Failed to sync')
}
} else if (action === 'SYNC_ALL') {
// sync all pages of the user
const size = 50
for (
let hasNextPage = true,
count = 0,
after = 0,
items: LibraryItem[] = [];
hasNextPage;
after += size, hasNextPage = count > after
) {
const syncedAt = integration.syncedAt
// only sync pages that were updated after syncedAt
const dateFilters: DateFilter[] = []
syncedAt &&
dateFilters.push({ field: 'updatedAt', startDate: syncedAt })
const { libraryItems } = await searchLibraryItems(
{ from: after, size, dateFilters },
userId
)
items = libraryItems
const itemIds = items.map((p) => p.id)
logger.info('syncing items', { pageIds: itemIds })
const synced = await integrationService.export(integration, items)
if (!synced) {
logger.error('failed to sync items', {
pageIds: itemIds,
if (!authToken) {
logger.error('failed to create auth token', {
integrationId: integration.id,
})
return res.status(400).send('Failed to sync')
return
}
}
// delete task name if completed
await updateIntegration(
integration.id,
{
taskName: null,
},
userId
)
} else {
logger.info('unknown action', { action })
res.status(200).send('Unknown action')
return
}
const syncAt = integration.syncedAt?.getTime() || 0
return enqueueExportToIntegration(
integration.id,
integration.name,
syncAt,
authToken
)
})
)
} catch (err) {
logger.error('sync with integrations failed', err)
return res.status(500).send(err)
@ -180,121 +67,5 @@ export function integrationsServiceRouter() {
res.status(200).send('OK')
})
// import pages from integration task handler
router.post('/import', async (req, res) => {
logger.info('start cloud task to import pages from integration')
const token = req.cookies?.auth || req.headers?.authorization
let claims: Claims | undefined
try {
claims = await getClaimsByToken(token)
if (!claims) {
return res.status(401).send('UNAUTHORIZED')
}
} catch (err) {
logger.error('failed to get claims from token', err)
return res.status(401).send('UNAUTHORIZED')
}
if (!isImportEvent(req.body)) {
logger.info('Invalid message')
return res.status(400).send('Bad Request')
}
let writeStream: NodeJS.WritableStream | undefined
try {
const userId = claims.uid
const integration = await findIntegration(
{
id: req.body.integrationId,
enabled: true,
type: IntegrationType.Import,
},
userId
)
if (!integration) {
logger.info('No active integration found for user', { userId })
return res.status(200).send('No integration found')
}
const integrationService = getIntegrationService(integration.name)
// import pages from integration
logger.info('importing pages from integration', {
integrationId: integration.id,
})
let offset = 0
const since = integration.syncedAt?.getTime() || 0
let syncedAt = since
// get pages from integration
const retrieved = await integrationService.retrieve({
token: integration.token,
since,
offset,
})
syncedAt = retrieved.since || Date.now()
let retrievedData = retrieved.data
// if there are pages to import
if (retrievedData.length > 0) {
// write the list of urls to a csv file and upload it to gcs
// path style: imports/<uid>/<date>/<type>-<uuid>.csv
const dateStr = DateTime.now().toISODate()
const fileUuid = uuidv4()
const fullPath = `imports/${userId}/${dateStr}/URL_LIST-${fileUuid}.csv`
// open a write_stream to the file
const file = createGCSFile(fullPath)
writeStream = file.createWriteStream({
contentType: 'text/csv',
})
// stringify the data and pipe it to the write_stream
const stringifier = stringify({
header: true,
columns: ['url', 'state', 'labels'],
})
stringifier.pipe(writeStream)
// paginate api calls to the integration
do {
// write the list of urls, state and labels to the stream
retrievedData.forEach((row) => stringifier.write(row))
// get next pages from the integration
offset += retrievedData.length
const retrieved = await integrationService.retrieve({
token: integration.token,
since,
offset,
})
syncedAt = retrieved.since || Date.now()
retrievedData = retrieved.data
logger.info('retrieved data', {
total: offset,
size: retrievedData.length,
})
} while (retrievedData.length > 0 && offset < 20000) // limit to 20k pages
}
// update the integration's syncedAt and remove taskName
await updateIntegration(
integration.id,
{
syncedAt: new Date(syncedAt),
taskName: null,
},
userId
)
} catch (err) {
logger.error('import pages from integration failed', err)
return res.status(500).send(err)
} finally {
writeStream?.end()
}
res.status(200).send('OK')
})
return router
}

View File

@ -1972,12 +1972,21 @@ const schema = gql`
ALREADY_EXISTS
}
enum ImportItemState {
UNREAD
UNARCHIVED
ARCHIVED
ALL
}
input SetIntegrationInput {
id: ID
name: String!
type: IntegrationType
token: String!
enabled: Boolean!
syncedAt: Date
importItemState: ImportItemState
}
union IntegrationsResult = IntegrationsSuccess | IntegrationsError

View File

@ -1,19 +1,19 @@
import { DeepPartial, FindOptionsWhere } from 'typeorm'
import { Integration } from '../../entity/integration'
import { authTrx } from '../../repository'
import { IntegrationService } from './integration'
import { PocketIntegration } from './pocket'
import { ReadwiseIntegration } from './readwise'
import { IntegrationClient } from './integration'
import { PocketClient } from './pocket'
import { ReadwiseClient } from './readwise'
const integrations: IntegrationService[] = [
new ReadwiseIntegration(),
new PocketIntegration(),
const integrations: IntegrationClient[] = [
new ReadwiseClient(),
new PocketClient(),
]
export const getIntegrationService = (name: string): IntegrationService => {
export const getIntegrationClient = (name: string): IntegrationClient => {
const service = integrations.find((s) => s.name === name)
if (!service) {
throw new Error(`Integration service not found: ${name}`)
throw new Error(`Integration client not found: ${name}`)
}
return service
}

View File

@ -1,5 +1,4 @@
import { Integration } from '../../entity/integration'
import { LibraryItem, LibraryItemState } from '../../entity/library_item'
import { LibraryItemState } from '../../entity/library_item'
export interface RetrievedData {
url: string
@ -19,19 +18,9 @@ export interface RetrieveRequest {
offset?: number
}
export abstract class IntegrationService {
abstract name: string
export interface IntegrationClient {
name: string
apiUrl: string
accessToken = async (token: string): Promise<string | null> => {
return Promise.resolve(null)
}
export = async (
integration: Integration,
items: LibraryItem[]
): Promise<boolean> => {
return Promise.resolve(false)
}
retrieve = async (req: RetrieveRequest): Promise<RetrievedResult> => {
return Promise.resolve({ data: [] })
}
accessToken(token: string): Promise<string | null>
}

View File

@ -3,7 +3,7 @@ import { LibraryItemState } from '../../entity/library_item'
import { env } from '../../env'
import { logger } from '../../utils/logger'
import {
IntegrationService,
IntegrationClient,
RetrievedResult,
RetrieveRequest,
} from './integration'
@ -51,16 +51,16 @@ interface Author {
name: string
}
export class PocketIntegration extends IntegrationService {
export class PocketClient implements IntegrationClient {
name = 'POCKET'
POCKET_API_URL = 'https://getpocket.com/v3'
apiUrl = 'https://getpocket.com/v3'
headers = {
'Content-Type': 'application/json',
'X-Accept': 'application/json',
}
accessToken = async (token: string): Promise<string | null> => {
const url = `${this.POCKET_API_URL}/oauth/authorize`
const url = `${this.apiUrl}/oauth/authorize`
try {
const response = await axios.post<{ access_token: string }>(
url,
@ -90,7 +90,7 @@ export class PocketIntegration extends IntegrationService {
count = 100,
offset = 0
): Promise<PocketResponse | null> => {
const url = `${this.POCKET_API_URL}/get`
const url = `${this.apiUrl}/get`
try {
const response = await axios.post<PocketResponse>(
url,

View File

@ -1,13 +1,6 @@
import axios from 'axios'
import { updateIntegration } from '.'
import { HighlightType } from '../../entity/highlight'
import { Integration } from '../../entity/integration'
import { LibraryItem } from '../../entity/library_item'
import { env } from '../../env'
import { wait } from '../../utils/helpers'
import { logger } from '../../utils/logger'
import { findHighlightsByLibraryItemId, getHighlightUrl } from '../highlights'
import { IntegrationService } from './integration'
import { IntegrationClient } from './integration'
interface ReadwiseHighlight {
// The highlight text, (technically the only field required in a highlight object)
@ -36,12 +29,12 @@ interface ReadwiseHighlight {
highlight_url?: string
}
export const READWISE_API_URL = 'https://readwise.io/api/v2'
export class ReadwiseIntegration extends IntegrationService {
export class ReadwiseClient implements IntegrationClient {
name = 'READWISE'
apiUrl = 'https://readwise.io/api/v2'
accessToken = async (token: string): Promise<string | null> => {
const authUrl = `${env.readwise.apiUrl || READWISE_API_URL}/auth`
const authUrl = `${this.apiUrl}/auth`
try {
const response = await axios.get(authUrl, {
headers: {
@ -58,110 +51,4 @@ export class ReadwiseIntegration extends IntegrationService {
return null
}
}
export = async (
integration: Integration,
items: LibraryItem[]
): Promise<boolean> => {
let result = true
const highlights = await Promise.all(
items.map((item) =>
this.libraryItemToReadwiseHighlight(item, integration.user.id)
)
)
// If there are no highlights, we will skip the sync
if (highlights.length > 0) {
result = await this.syncWithReadwise(integration.token, highlights.flat())
}
// update integration syncedAt if successful
if (result) {
logger.info('updating integration syncedAt')
await updateIntegration(
integration.id,
{
syncedAt: new Date(),
},
integration.user.id
)
}
return result
}
libraryItemToReadwiseHighlight = async (
item: LibraryItem,
userId: string
): Promise<ReadwiseHighlight[]> => {
let highlights = item.highlights
if (!highlights) {
highlights = await findHighlightsByLibraryItemId(item.id, userId)
}
const category = item.siteName === 'Twitter' ? 'tweets' : 'articles'
return highlights
.map((highlight) => {
// filter out highlights that are not of type highlight or have no quote
if (
highlight.highlightType !== HighlightType.Highlight ||
!highlight.quote
) {
return undefined
}
return {
text: highlight.quote,
title: item.title,
author: item.author || undefined,
highlight_url: getHighlightUrl(item.slug, highlight.id),
highlighted_at: new Date(highlight.createdAt).toISOString(),
category,
image_url: item.thumbnail || undefined,
// location: highlight.highlightPositionAnchorIndex || undefined,
location_type: 'order',
note: highlight.annotation || undefined,
source_type: 'omnivore',
source_url: item.originalUrl,
}
})
.filter((highlight) => highlight !== undefined) as ReadwiseHighlight[]
}
syncWithReadwise = async (
token: string,
highlights: ReadwiseHighlight[],
retryCount = 0
): Promise<boolean> => {
const url = `${env.readwise.apiUrl || READWISE_API_URL}/highlights`
try {
const response = await axios.post(
url,
{
highlights,
},
{
headers: {
Authorization: `Token ${token}`,
'Content-Type': 'application/json',
},
timeout: 5000, // 5 seconds
}
)
return response.status === 200
} catch (error) {
logger.error(error)
if (axios.isAxiosError(error)) {
if (error.response?.status === 429 && retryCount < 3) {
logger.info('Readwise API rate limit exceeded, retrying...')
// wait for Retry-After seconds in the header if rate limited
// max retry count is 3
const retryAfter = error.response?.headers['retry-after'] || '10' // default to 10 seconds
await wait(parseInt(retryAfter, 10) * 1000)
return this.syncWithReadwise(token, highlights, retryCount + 1)
}
}
return false
}
}
}

View File

@ -36,7 +36,12 @@ const FORCE_PUPPETEER_URLS = [
TWEET_URL_REGEX,
/^((?:https?:)?\/\/)?((?:www|m)\.)?((?:youtube\.com|youtu.be))(\/(?:[\w-]+\?v=|embed\/|v\/)?)([\w-]+)(\S+)?$/,
]
const ALREADY_PARSED_SOURCES = ['puppeteer-parse', 'csv-importer', 'rss-feeder']
const ALREADY_PARSED_SOURCES = [
'puppeteer-parse',
'csv-importer',
'rss-feeder',
'pocket',
]
const createSlug = (url: string, title?: Maybe<string> | undefined) => {
const { pathname } = new URL(url)
@ -93,7 +98,8 @@ export const savePage = async (
state: input.state || undefined,
rssFeedUrl: input.rssFeedUrl,
})
const isImported = input.source === 'csv-importer'
const isImported =
input.source === 'csv-importer' || input.source === 'pocket'
// always parse in backend if the url is in the force puppeteer list
if (shouldParseInBackend(input)) {

View File

@ -70,6 +70,8 @@ interface BackendEnv {
recommendationTaskHandlerUrl: string
thumbnailTaskHandlerUrl: string
rssFeedTaskHandlerUrl: string
integrationExporterUrl: string
integrationImporterUrl: string
}
fileUpload: {
gcsUploadBucket: string
@ -163,6 +165,8 @@ const nullableEnvVars = [
'SENDGRID_VERIFICATION_TEMPLATE_ID',
'REMINDER_TASK_HANDLER_URL',
'TRUST_PROXY',
'INTEGRATION_EXPORTER_URL',
'INTEGRATION_IMPORTER_URL',
] // Allow some vars to be null/empty
/* If not in GAE and Prod/QA/Demo env (f.e. on localhost/dev env), allow following env vars to be null */
@ -253,6 +257,8 @@ export function getEnv(): BackendEnv {
recommendationTaskHandlerUrl: parse('RECOMMENDATION_TASK_HANDLER_URL'),
thumbnailTaskHandlerUrl: parse('THUMBNAIL_TASK_HANDLER_URL'),
rssFeedTaskHandlerUrl: parse('RSS_FEED_TASK_HANDLER_URL'),
integrationExporterUrl: parse('INTEGRATION_EXPORTER_URL'),
integrationImporterUrl: parse('INTEGRATION_IMPORTER_URL'),
}
const imageProxy = {
url: parse('IMAGE_PROXY_URL'),

View File

@ -6,6 +6,7 @@ import { google } from '@google-cloud/tasks/build/protos/protos'
import axios from 'axios'
import { nanoid } from 'nanoid'
import { DeepPartial } from 'typeorm'
import { ImportItemState } from '../entity/integration'
import { Recommendation } from '../entity/recommendation'
import { env } from '../env'
import {
@ -328,47 +329,6 @@ export const enqueueReminder = async (
return createdTasks[0].name
}
export const enqueueSyncWithIntegration = async (
userId: string,
integrationName: string
): Promise<string> => {
const { GOOGLE_CLOUD_PROJECT, PUBSUB_VERIFICATION_TOKEN } = process.env
// use pubsub data format to send the userId to the task handler
const payload = {
message: {
data: Buffer.from(
JSON.stringify({
userId,
})
).toString('base64'),
publishTime: new Date().toISOString(),
},
}
// If there is no Google Cloud Project Id exposed, it means that we are in local environment
if (env.dev.isLocal || !GOOGLE_CLOUD_PROJECT) {
return nanoid()
}
const createdTasks = await createHttpTaskWithToken({
project: GOOGLE_CLOUD_PROJECT,
payload,
taskHandlerUrl: `${
env.queue.integrationTaskHandlerUrl
}/${integrationName.toLowerCase()}/sync_all?token=${PUBSUB_VERIFICATION_TOKEN}`,
priority: 'low',
})
if (!createdTasks || !createdTasks[0].name) {
logger.error(`Unable to get the name of the task`, {
payload,
createdTasks,
})
throw new CreateTaskError(`Unable to get the name of the task`)
}
return createdTasks[0].name
}
export const enqueueTextToSpeech = async ({
userId,
text,
@ -498,23 +458,29 @@ export const enqueueRecommendation = async (
export const enqueueImportFromIntegration = async (
integrationId: string,
authToken: string
integrationName: string,
syncAt: number, // unix timestamp in milliseconds
authToken: string,
state: ImportItemState
): Promise<string> => {
const { GOOGLE_CLOUD_PROJECT } = process.env
const payload = {
integrationId,
integrationName,
syncAt,
state,
}
const headers = {
Cookie: `auth=${authToken}`,
[OmnivoreAuthorizationHeader]: authToken,
}
// If there is no Google Cloud Project Id exposed, it means that we are in local environment
if (env.dev.isLocal || !GOOGLE_CLOUD_PROJECT) {
if (env.queue.integrationTaskHandlerUrl) {
if (env.queue.integrationImporterUrl) {
// Calling the handler function directly.
setTimeout(() => {
axios
.post(`${env.queue.integrationTaskHandlerUrl}/import`, payload, {
.post(env.queue.integrationImporterUrl, payload, {
headers,
})
.catch((error) => {
@ -528,7 +494,58 @@ export const enqueueImportFromIntegration = async (
const createdTasks = await createHttpTaskWithToken({
project: GOOGLE_CLOUD_PROJECT,
payload,
taskHandlerUrl: `${env.queue.integrationTaskHandlerUrl}/import`,
taskHandlerUrl: env.queue.integrationImporterUrl,
priority: 'low',
requestHeaders: headers,
})
if (!createdTasks || !createdTasks[0].name) {
logger.error(`Unable to get the name of the task`, {
payload,
createdTasks,
})
throw new CreateTaskError(`Unable to get the name of the task`)
}
return createdTasks[0].name
}
export const enqueueExportToIntegration = async (
integrationId: string,
integrationName: string,
syncAt: number, // unix timestamp in milliseconds
authToken: string
): Promise<string> => {
const { GOOGLE_CLOUD_PROJECT } = process.env
const payload = {
integrationId,
integrationName,
syncAt,
}
const headers = {
[OmnivoreAuthorizationHeader]: authToken,
}
// If there is no Google Cloud Project Id exposed, it means that we are in local environment
if (env.dev.isLocal || !GOOGLE_CLOUD_PROJECT) {
if (env.queue.integrationExporterUrl) {
// Calling the handler function directly.
setTimeout(() => {
axios
.post(env.queue.integrationExporterUrl, payload, {
headers,
})
.catch((error) => {
logError(error)
})
}, 0)
}
return nanoid()
}
const createdTasks = await createHttpTaskWithToken({
project: GOOGLE_CLOUD_PROJECT,
payload,
taskHandlerUrl: env.queue.integrationExporterUrl,
priority: 'low',
requestHeaders: headers,
})

View File

@ -11,7 +11,6 @@ import {
saveIntegration,
updateIntegration,
} from '../../src/services/integrations'
import { READWISE_API_URL } from '../../src/services/integrations/readwise'
import { deleteUser } from '../../src/services/user'
import { createTestUser } from '../db'
import { generateFakeUuid, graphqlRequest, request } from '../util'
@ -19,6 +18,8 @@ import { generateFakeUuid, graphqlRequest, request } from '../util'
chai.use(sinonChai)
describe('Integrations resolvers', () => {
const READWISE_API_URL = 'https://readwise.io/api/v2'
let loginUser: User
let authToken: string
@ -265,17 +266,6 @@ describe('Integrations resolvers', () => {
expect(res.body.data.setIntegration.integration.enabled).to.be
.true
})
it('creates new cloud task to sync all existing articles and highlights', async () => {
const res = await graphqlRequest(
query(integrationId, integrationName, token, enabled),
authToken
)
const integration = await findIntegration({
id: res.body.data.setIntegration.integration.id,
}, loginUser.id)
expect(integration?.taskName).not.to.be.null
})
})
})
})

View File

@ -1,423 +0,0 @@
import { Storage } from '@google-cloud/storage'
import { expect } from 'chai'
import { DateTime } from 'luxon'
import 'mocha'
import nock from 'nock'
import sinon from 'sinon'
import { Highlight } from '../../src/entity/highlight'
import { Integration, IntegrationType } from '../../src/entity/integration'
import { LibraryItem } from '../../src/entity/library_item'
import { User } from '../../src/entity/user'
import { env } from '../../src/env'
import { PubSubRequestBody } from '../../src/pubsub'
import { createHighlight, getHighlightUrl } from '../../src/services/highlights'
import {
deleteIntegrations,
saveIntegration,
updateIntegration,
} from '../../src/services/integrations'
import { READWISE_API_URL } from '../../src/services/integrations/readwise'
import { deleteLibraryItemById } from '../../src/services/library_item'
import { deleteUser } from '../../src/services/user'
import { createTestLibraryItem, createTestUser } from '../db'
import { MockBucket } from '../mock_storage'
import { request } from '../util'
describe('Integrations routers', () => {
const baseUrl = '/svc/pubsub/integrations'
let token: string
let user: User
let authToken: string
before(async () => {
user = await createTestUser('fakeUser')
const res = await request
.post('/local/debug/fake-user-login')
.send({ fakeEmail: user.email })
const body = res.body as { authToken: string }
authToken = body.authToken
})
after(async () => {
await deleteUser(user.id)
})
describe('sync with integrations', () => {
const endpoint = (token: string, name = 'name', action = 'action') =>
`${baseUrl}/${name}/${action}?token=${token}`
let action: string
let data: PubSubRequestBody
let integrationName: string
context('when token is invalid', () => {
before(() => {
token = 'invalid-token'
})
it('returns 200', async () => {
return request.post(endpoint(token)).send(data).expect(200)
})
})
context('when token is valid', () => {
before(() => {
token = process.env.PUBSUB_VERIFICATION_TOKEN as string
})
context('when data is expired', () => {
before(() => {
data = {
message: {
data: Buffer.from(
JSON.stringify({ userId: 'userId', type: 'page' })
).toString('base64'),
publishTime: DateTime.now().minus({ hours: 12 }).toISO(),
},
}
})
it('returns 200 with Expired', async () => {
const res = await request.post(endpoint(token)).send(data).expect(200)
expect(res.text).to.eql('Expired')
})
})
context('when userId is empty', () => {
before(() => {
data = {
message: {
data: Buffer.from(
JSON.stringify({ userId: '', type: 'page' })
).toString('base64'),
publishTime: new Date().toISOString(),
},
}
})
it('returns 200', async () => {
return request.post(endpoint(token)).send(data).expect(200)
})
})
context('when user exists', () => {
context('when integration not found', () => {
before(() => {
integrationName = 'READWISE'
data = {
message: {
data: Buffer.from(
JSON.stringify({ userId: user.id, type: 'page' })
).toString('base64'),
publishTime: new Date().toISOString(),
},
}
})
it('returns 200 with No integration found', async () => {
const res = await request
.post(endpoint(token, integrationName))
.send(data)
.expect(200)
expect(res.text).to.eql('No integration found')
})
})
context('when integration is readwise and enabled', () => {
let integration: Integration
let item: LibraryItem
let highlight: Highlight
let highlightsData: any
before(async () => {
integration = await saveIntegration(
{
user,
name: 'READWISE',
token: 'token',
},
user.id
)
integrationName = integration.name
// create page
item = await createTestLibraryItem(user.id)
// create highlight
const highlightPositionPercent = 25
highlight = await createHighlight(
{
patch: 'test patch',
quote: 'test quote',
shortId: 'test shortId',
highlightPositionPercent,
user,
libraryItem: item,
},
item.id,
user.id
)
// create highlights data for integration request
highlightsData = {
highlights: [
{
text: highlight.quote,
title: item.title,
author: item.author ?? undefined,
highlight_url: getHighlightUrl(item.slug, highlight.id),
highlighted_at: highlight.createdAt.toISOString(),
category: 'articles',
image_url: item.thumbnail ?? undefined,
// location: highlightPositionPercent,
location_type: 'order',
note: highlight.annotation ?? undefined,
source_type: 'omnivore',
source_url: item.originalUrl,
},
],
}
})
after(async () => {
await deleteIntegrations(user.id, [integration.id])
await deleteLibraryItemById(item.id)
})
context('when action is sync_updated', () => {
before(() => {
action = 'sync_updated'
})
context('when entity type is page', () => {
before(() => {
data = {
message: {
data: Buffer.from(
JSON.stringify({
userId: user.id,
type: 'page',
id: item.id,
})
).toString('base64'),
publishTime: new Date().toISOString(),
},
}
// mock Readwise Highlight API
nock(READWISE_API_URL, {
reqheaders: {
Authorization: `Token ${integration.token}`,
'Content-Type': 'application/json',
},
})
.post('/highlights', highlightsData)
.reply(200)
})
it('returns 200 with OK', async () => {
const res = await request
.post(endpoint(token, integrationName, action))
.send(data)
.expect(200)
expect(res.text).to.eql('OK')
})
context('when readwise highlight API reaches rate limits', () => {
before(() => {
// mock Readwise Highlight API with rate limits
// retry after 1 second
nock(READWISE_API_URL, {
reqheaders: {
Authorization: `Token ${integration.token}`,
'Content-Type': 'application/json',
},
})
.post('/highlights')
.reply(429, 'Rate Limited', { 'Retry-After': '1' })
// mock Readwise Highlight API after 1 second
nock(READWISE_API_URL, {
reqheaders: {
Authorization: `Token ${integration.token}`,
'Content-Type': 'application/json',
},
})
.post('/highlights')
.delay(1000)
.reply(200)
})
it('returns 200 with OK', async () => {
const res = await request
.post(endpoint(token, integrationName, action))
.send(data)
.expect(200)
expect(res.text).to.eql('OK')
})
})
})
context('when entity type is highlight', () => {
before(() => {
data = {
message: {
data: Buffer.from(
JSON.stringify({
userId: user.id,
type: 'highlight',
articleId: item.id,
})
).toString('base64'),
publishTime: new Date().toISOString(),
},
}
// mock Readwise Highlight API
nock(READWISE_API_URL, {
reqheaders: {
Authorization: `Token ${integration.token}`,
'Content-Type': 'application/json',
},
})
.post('/highlights', highlightsData)
.reply(200)
})
it('returns 200 with OK', async () => {
const res = await request
.post(endpoint(token, integrationName, action))
.send(data)
.expect(200)
expect(res.text).to.eql('OK')
})
})
})
context('when action is sync_all', () => {
before(async () => {
action = 'sync_all'
data = {
message: {
data: Buffer.from(
JSON.stringify({
userId: user.id,
})
).toString('base64'),
publishTime: new Date().toISOString(),
},
}
// mock Readwise Highlight API
nock(READWISE_API_URL, {
reqheaders: {
Authorization: `Token ${integration.token}`,
'Content-Type': 'application/json',
},
})
.post('/highlights', highlightsData)
.reply(200)
await updateIntegration(
integration.id,
{
syncedAt: null,
taskName: 'some task name',
},
user.id
)
})
it('returns 200 with OK', async () => {
const res = await request
.post(endpoint(token, integrationName, action))
.send(data)
.expect(200)
expect(res.text).to.eql('OK')
})
})
})
})
})
})
describe('import from integrations router', () => {
let integration: Integration
before(async () => {
token = 'test token'
// create integration
integration = await saveIntegration(
{
user: { id: user.id },
name: 'POCKET',
token,
type: IntegrationType.Import,
},
user.id
)
// mock Pocket API
const reqBody = {
access_token: token,
consumer_key: env.pocket.consumerKey,
state: 'all',
detailType: 'complete',
since: 0,
sort: 'oldest',
count: 100,
offset: 0,
}
nock('https://getpocket.com', {
reqheaders: {
'content-type': 'application/json',
'x-accept': 'application/json',
},
})
.post('/v3/get', reqBody)
.reply(200, {
complete: 1,
list: {
'123': {
given_url: 'https://omnivore.app/pocket-import-test,test',
state: '0',
tags: {
'1234': {
tag: 'test',
},
'1235': {
tag: 'new',
},
},
},
},
since: Date.now() / 1000,
})
.post('/v3/get', {
...reqBody,
offset: 1,
})
.reply(200, {
list: {},
})
// mock cloud storage
const mockBucket = new MockBucket('test')
sinon.replace(
Storage.prototype,
'bucket',
sinon.fake.returns(mockBucket as never)
)
})
after(async () => {
sinon.restore()
await deleteIntegrations(user.id, [integration.id])
})
context('when integration is pocket', () => {
it('returns 200 with OK', async () => {
return request
.post(`${baseUrl}/import`)
.send({
integrationId: integration.id,
})
.set('Cookie', `auth=${authToken}`)
.expect(200)
})
})
})
})

View File

@ -6,6 +6,10 @@
"compilerOptions": {
"outDir": "dist"
},
"include": ["src", "test"],
"include": [
"src",
"test",
"../integration-handler/test/integrations.test.ts"
],
"exclude": ["./src/generated", "./test"]
}

View File

@ -0,0 +1,16 @@
-- Type: DO
-- Name: add_import_item_state_to_integration
-- Description: Add import_item_state column to integration table
BEGIN;
CREATE type import_item_state_type AS ENUM (
'UNREAD',
'UNARCHIVED',
'ARCHIVED',
'ALL'
);
ALTER TABLE omnivore.integrations ADD COLUMN import_item_state import_item_state_type;
COMMIT;

View File

@ -0,0 +1,11 @@
-- Type: UNDO
-- Name: add_import_item_state_to_integration
-- Description: Add import_item_state column to integration table
BEGIN;
ALTER TABLE omnivore.integrations DROP COLUMN IF EXISTS import_item_state;
DROP TYPE IF EXISTS import_item_state_type;
COMMIT;

View File

@ -46,7 +46,7 @@ const parseDate = (date: string): Date => {
export const importCsv = async (ctx: ImportContext, stream: Stream) => {
// create metrics in redis
await createMetrics(ctx.redisClient, ctx.userId, ctx.taskId, 'csv-importer')
await createMetrics(ctx.redisClient, ctx.userId, ctx.taskId, ctx.source)
const parser = parse({
headers: true,

View File

@ -61,6 +61,7 @@ export type ImportContext = {
contentHandler: ContentHandler
redisClient: RedisClient
taskId: string
source: string
}
type importHandlerFunc = (ctx: ImportContext, stream: Stream) => Promise<void>
@ -176,13 +177,28 @@ const handlerForFile = (name: string): importHandlerFunc | undefined => {
const fileName = path.parse(name).name
if (fileName.startsWith('MATTER')) {
return importMatterArchive
} else if (fileName.startsWith('URL_LIST')) {
} else if (fileName.startsWith('URL_LIST') || fileName.startsWith('POCKET')) {
return importCsv
}
return undefined
}
const importSource = (name: string): string => {
const fileName = path.parse(name).name
if (fileName.startsWith('MATTER')) {
return 'matter-history'
}
if (fileName.startsWith('URL_LIST')) {
return 'csv-importer'
}
if (fileName.startsWith('POCKET')) {
return 'pocket'
}
return 'unknown'
}
const urlHandler = async (
ctx: ImportContext,
url: URL,
@ -196,7 +212,7 @@ const urlHandler = async (
const result = await importURL(
ctx.userId,
url,
'csv-importer',
ctx.source,
ctx.taskId,
state,
labels && labels.length > 0 ? labels : undefined,
@ -309,7 +325,7 @@ const handleEvent = async (data: StorageEvent, redisClient: RedisClient) => {
.file(data.name)
.createReadStream()
const ctx = {
const ctx: ImportContext = {
userId,
countImported: 0,
countFailed: 0,
@ -317,6 +333,7 @@ const handleEvent = async (data: StorageEvent, redisClient: RedisClient) => {
contentHandler,
redisClient,
taskId: data.name,
source: importSource(data.name),
}
await handler(ctx, stream)

View File

@ -28,5 +28,6 @@ export const stubImportCtx = async (): Promise<ImportContext> => {
},
redisClient,
taskId: '',
source: 'csv-importer',
}
}

View File

@ -0,0 +1,5 @@
node_modules
build
.env*
Dockerfile
.dockerignore

View File

@ -0,0 +1,2 @@
node_modules/
dist/

View File

@ -0,0 +1,6 @@
{
"extends": "../../.eslintrc",
"parserOptions": {
"project": "tsconfig.json"
}
}

View File

@ -0,0 +1,16 @@
# This file specifies files that are *not* uploaded to Google Cloud Platform
# using gcloud. It follows the same syntax as .gitignore, with the addition of
# "#!include" directives (which insert the entries of the given .gitignore-style
# file at that point).
#
# For more information, run:
# $ gcloud topic gcloudignore
#
.gcloudignore
# If you would like to upload your .git directory, .gitignore file or files
# from your .gitignore file, remove the corresponding line
# below:
.git
.gitignore
node_modules

View File

@ -0,0 +1,26 @@
FROM node:18.16-alpine
# Run everything after as non-privileged user.
WORKDIR /app
COPY package.json .
COPY yarn.lock .
COPY tsconfig.json .
COPY .eslintrc .
COPY /packages/integration-handler/package.json ./packages/integration-handler/package.json
RUN yarn install --pure-lockfile
COPY /packages/integration-handler ./packages/integration-handler
RUN yarn workspace @omnivore/integration-handler build
# After building, fetch the production dependencies
RUN rm -rf /app/packages/integration-handler/node_modules
RUN rm -rf /app/node_modules
RUN yarn install --pure-lockfile --production
EXPOSE 8080
CMD ["yarn", "workspace", "@omnivore/integration-handler", "start_exporter"]

View File

@ -0,0 +1,26 @@
FROM node:18.16-alpine
# Run everything after as non-privileged user.
WORKDIR /app
COPY package.json .
COPY yarn.lock .
COPY tsconfig.json .
COPY .eslintrc .
COPY /packages/integration-handler/package.json ./packages/integration-handler/package.json
RUN yarn install --pure-lockfile
COPY /packages/integration-handler ./packages/integration-handler
RUN yarn workspace @omnivore/integration-handler build
# After building, fetch the production dependencies
RUN rm -rf /app/packages/integration-handler/node_modules
RUN rm -rf /app/node_modules
RUN yarn install --pure-lockfile --production
EXPOSE 8080
CMD ["yarn", "workspace", "@omnivore/integration-handler", "start_importer"]

View File

@ -0,0 +1,5 @@
{
"extension": ["ts"],
"spec": "test/**/*.test.ts",
"require": "test/babel-register.js"
}

View File

@ -0,0 +1,42 @@
{
"name": "@omnivore/integration-handler",
"version": "1.0.0",
"description": "",
"main": "build/src/index.js",
"files": [
"build/src"
],
"keywords": [],
"license": "Apache-2.0",
"scripts": {
"test": "yarn mocha -r ts-node/register --config mocha-config.json",
"lint": "eslint src --ext ts,js,tsx,jsx",
"compile": "tsc",
"build": "tsc",
"start_exporter": "functions-framework --target=exporter",
"start_importer": "functions-framework --target=importer"
},
"devDependencies": {
"@types/chai": "^4.3.4",
"@types/jsonwebtoken": "^8.5.0",
"@types/luxon": "^1.25.0",
"@types/mocha": "^10.0.1",
"@types/node": "^14.11.2",
"@types/uuid": "^9.0.0",
"eslint-plugin-prettier": "^4.0.0"
},
"dependencies": {
"@google-cloud/functions-framework": "3.1.2",
"@google-cloud/storage": "^7.0.1",
"@sentry/serverless": "^7.30.0",
"axios": "^1.2.2",
"csv-stringify": "^6.4.0",
"dotenv": "^16.0.1",
"jsonwebtoken": "^8.5.1",
"luxon": "^3.2.1",
"uuid": "^9.0.0"
},
"volta": {
"extends": "../../package.json"
}
}

View File

@ -0,0 +1,277 @@
import { File, Storage } from '@google-cloud/storage'
import * as Sentry from '@sentry/serverless'
import { stringify } from 'csv-stringify'
import * as dotenv from 'dotenv' // see https://github.com/motdotla/dotenv#how-do-i-use-dotenv-with-import
import * as jwt from 'jsonwebtoken'
import { DateTime } from 'luxon'
import { v4 as uuidv4 } from 'uuid'
import { getIntegrationClient, updateIntegration } from './integrations'
import { State } from './integrations/integration'
import { search } from './item'
interface IntegrationRequest {
integrationId: string
syncAt: number // unix timestamp in milliseconds
integrationName: string
state?: State
}
interface Claims {
uid: string
token: string
}
dotenv.config()
Sentry.GCPFunction.init({
dsn: process.env.SENTRY_DSN,
tracesSampleRate: 0,
})
const storage = new Storage()
export const wait = (ms: number): Promise<void> => {
return new Promise((resolve) => {
setTimeout(resolve, ms)
})
}
function isIntegrationRequest(body: any): body is IntegrationRequest {
return (
'integrationId' in body && 'syncAt' in body && 'integrationName' in body
)
}
const createGCSFile = (bucket: string, filename: string): File => {
return storage.bucket(bucket).file(filename)
}
export const exporter = Sentry.GCPFunction.wrapHttpFunction(
async (req, res) => {
console.log('start to export to integration')
const JWT_SECRET = process.env.JWT_SECRET
const REST_BACKEND_ENDPOINT = process.env.REST_BACKEND_ENDPOINT
if (!JWT_SECRET || !REST_BACKEND_ENDPOINT) {
return res.status(500).send('Environment not configured correctly')
}
const token = req.get('Omnivore-Authorization')
if (!token) {
return res.status(401).send({ errorCode: 'INVALID_TOKEN' })
}
let claims: Claims
try {
claims = jwt.verify(token, JWT_SECRET) as Claims
} catch (e) {
console.error(e)
return res.status(401).send('UNAUTHORIZED')
}
try {
if (!isIntegrationRequest(req.body)) {
console.error('Invalid message')
return res.status(200).send('Bad Request')
}
const { integrationId, syncAt, integrationName } = req.body
const client = getIntegrationClient(integrationName)
// get paginated items from the backend
let hasMore = true
let after = '0'
while (hasMore) {
console.log('searching for items...')
const response = await search(
REST_BACKEND_ENDPOINT,
token,
client.highlightOnly,
new Date(syncAt),
'50',
after
)
if (!response) {
console.error('failed to search for items', {
integrationId,
})
return res.status(400).send('Failed to search')
}
hasMore = response.data.search.pageInfo.hasNextPage
after = response.data.search.pageInfo.endCursor
const items = response.data.search.edges.map((edge) => edge.node)
if (items.length === 0) {
break
}
console.log('exporting items...')
const synced = await client.export(claims.token, items)
if (!synced) {
console.error('failed to export item', {
integrationId,
})
return res.status(400).send('Failed to sync')
}
console.log('updating integration...')
// update integration syncedAt if successful
const updated = await updateIntegration(
REST_BACKEND_ENDPOINT,
integrationId,
items[items.length - 1].updatedAt,
integrationName,
claims.token,
token,
'EXPORT'
)
if (!updated) {
console.error('failed to update integration', {
integrationId,
})
return res.status(400).send('Failed to update integration')
}
// avoid rate limiting
await wait(500)
}
console.log('done')
} catch (err) {
console.error('export with integration failed', err)
return res.status(500).send(err)
}
res.sendStatus(200)
}
)
export const importer = Sentry.GCPFunction.wrapHttpFunction(
async (req, res) => {
console.log('start to import from integration')
const JWT_SECRET = process.env.JWT_SECRET
const REST_BACKEND_ENDPOINT = process.env.REST_BACKEND_ENDPOINT
const GCS_BUCKET = process.env.GCS_UPLOAD_BUCKET
if (!JWT_SECRET || !REST_BACKEND_ENDPOINT || !GCS_BUCKET) {
return res.status(500).send('Environment not configured correctly')
}
const token = req.get('Omnivore-Authorization')
if (!token) {
return res.status(401).send({ errorCode: 'INVALID_TOKEN' })
}
let claims: Claims
try {
claims = jwt.verify(token, JWT_SECRET) as Claims
} catch (e) {
console.error(e)
return res.status(401).send('UNAUTHORIZED')
}
if (!isIntegrationRequest(req.body)) {
console.error('Invalid message')
return res.status(200).send('Bad Request')
}
let writeStream: NodeJS.WritableStream | undefined
try {
const userId = claims.uid
const integrationClient = getIntegrationClient(req.body.integrationName)
let offset = 0
let syncedAt = req.body.syncAt
const since = syncedAt
const state = req.body.state || State.UNARCHIVED // default to unarchived
console.log('importing pages from integration...')
// get pages from integration
const retrieved = await integrationClient.retrieve({
token: claims.token,
since,
offset,
state,
})
syncedAt = retrieved.since || Date.now()
console.log('uploading items...')
let retrievedData = retrieved.data
// if there are pages to import
if (retrievedData.length > 0) {
// write the list of urls to a csv file and upload it to gcs
// path style: imports/<uid>/<date>/<type>-<uuid>.csv
const dateStr = DateTime.now().toISODate()
const fileUuid = uuidv4()
const fullPath = `imports/${userId}/${dateStr}/${integrationClient.name}-${fileUuid}.csv`
// open a write_stream to the file
const file = createGCSFile(GCS_BUCKET, fullPath)
writeStream = file.createWriteStream({
contentType: 'text/csv',
})
// stringify the data and pipe it to the write_stream
const stringifier = stringify({
header: true,
columns: ['url', 'state', 'labels'],
})
stringifier.pipe(writeStream)
// paginate api calls to the integration
do {
// write the list of urls, state and labels to the stream
retrievedData.forEach((row) => stringifier.write(row))
// get next pages from the integration
offset += retrievedData.length
const retrieved = await integrationClient.retrieve({
token: claims.token,
since,
offset,
state,
})
syncedAt = retrieved.since || Date.now()
retrievedData = retrieved.data
console.log('retrieved data', {
total: offset,
size: retrievedData.length,
})
console.log('uploading integration...')
// update the integration's syncedAt and remove taskName
const result = await updateIntegration(
REST_BACKEND_ENDPOINT,
req.body.integrationId,
new Date(syncedAt),
req.body.integrationName,
claims.token,
token,
'IMPORT'
)
if (!result) {
console.error('failed to update integration', {
integrationId: req.body.integrationId,
})
return res.status(400).send('Failed to update integration')
}
} while (retrievedData.length > 0 && offset < 20000) // limit to 20k pages
}
console.log('done')
} catch (err) {
console.error('import pages from integration failed', err)
return res.status(500).send(err)
} finally {
console.log('closing write stream')
writeStream?.end()
}
res.sendStatus(200)
}
)

View File

@ -0,0 +1,86 @@
import axios from 'axios'
import { IntegrationClient } from './integration'
import { PocketClient } from './pocket'
import { ReadwiseClient } from './readwise'
interface SetIntegrationResponse {
data: {
setIntegration: {
integration: {
id: string
}
errorCodes: string[]
}
}
}
const clients: IntegrationClient[] = [new ReadwiseClient(), new PocketClient()]
export const getIntegrationClient = (name: string): IntegrationClient => {
const client = clients.find((s) => s.name === name)
if (!client) {
throw new Error(`Integration client not found: ${name}`)
}
return client
}
export const updateIntegration = async (
apiEndpoint: string,
id: string,
syncedAt: Date,
name: string,
integrationToken: string,
token: string,
type: string
): Promise<boolean> => {
const requestData = JSON.stringify({
query: `
mutation SetIntegration($input: SetIntegrationInput!) {
setIntegration(input: $input) {
... on SetIntegrationSuccess {
integration {
id
enabled
}
}
... on SetIntegrationError {
errorCodes
}
}
}`,
variables: {
input: {
id,
syncedAt,
name,
token: integrationToken,
enabled: true,
type,
},
},
})
try {
const response = await axios.post<SetIntegrationResponse>(
`${apiEndpoint}/graphql`,
requestData,
{
headers: {
Cookie: `auth=${token};`,
'Content-Type': 'application/json',
'X-OmnivoreClient': 'integration-handler',
},
}
)
if (response.data.data.setIntegration.errorCodes) {
console.error(response.data.data.setIntegration.errorCodes)
return false
}
return true
} catch (error) {
console.error(error)
return false
}
}

View File

@ -0,0 +1,41 @@
import { Item } from '../item'
export enum State {
ARCHIVED = 'ARCHIVED',
UNREAD = 'UNREAD',
UNARCHIVED = 'UNARCHIVED',
ALL = 'ALL',
}
export interface RetrievedData {
url: string
labels?: string[]
state?: string
}
export interface RetrievedResult {
data: RetrievedData[]
hasMore?: boolean
since?: number // unix timestamp in milliseconds
}
export interface RetrieveRequest {
token: string
since?: number // unix timestamp in milliseconds
count?: number
offset?: number
state: State
}
export abstract class IntegrationClient {
abstract name: string
abstract apiUrl: string
highlightOnly = true
export = async (token: string, items: Item[]): Promise<boolean> => {
return Promise.resolve(false)
}
retrieve = async (req: RetrieveRequest): Promise<RetrievedResult> => {
return Promise.resolve({ data: [] })
}
}

View File

@ -0,0 +1,155 @@
import axios from 'axios'
import {
IntegrationClient,
RetrievedResult,
RetrieveRequest,
State,
} from './integration'
interface PocketResponse {
status: number // 1 if success
complete: number // 1 if all items have been returned
list: {
[key: string]: PocketItem
}
since: number // unix timestamp in seconds
search_meta: {
search_type: string
}
error: string
}
interface PocketItem {
item_id: string
resolved_id: string
given_url: string
resolved_url: string
given_title: string
resolved_title: string
favorite: string
status: string
excerpt: string
word_count: string
tags?: {
[key: string]: Tag
}
authors?: {
[key: string]: Author
}
}
interface Tag {
item_id: string
tag: string
}
interface Author {
item_id: string
author_id: string
name: string
}
export class PocketClient extends IntegrationClient {
name = 'POCKET'
apiUrl = 'https://getpocket.com/v3'
headers = {
'Content-Type': 'application/json',
'X-Accept': 'application/json',
}
retrievePocketData = async (
accessToken: string,
since: number, // unix timestamp in seconds
count = 100,
offset = 0,
state = 'all'
): Promise<PocketResponse | null> => {
const url = `${this.apiUrl}/get`
try {
const response = await axios.post<PocketResponse>(
url,
{
consumer_key: process.env.POCKET_CONSUMER_KEY,
access_token: accessToken,
state,
detailType: 'complete',
since,
sort: 'oldest',
count,
offset,
},
{
headers: this.headers,
timeout: 10000, // 10 seconds
}
)
return response.data
} catch (error) {
console.error('error retrievePocketData: ', error)
return null
}
}
retrieve = async ({
token,
since = 0,
count = 100,
offset = 0,
state,
}: RetrieveRequest): Promise<RetrievedResult> => {
let pocketItemState = 'all'
switch (state) {
case State.ARCHIVED:
pocketItemState = 'archive'
break
case State.UNREAD:
pocketItemState = 'unread'
break
}
const pocketData = await this.retrievePocketData(
token,
since / 1000,
count,
offset,
pocketItemState
)
if (!pocketData) {
throw new Error('Error retrieving pocket data')
}
const pocketItems = Object.values(pocketData.list)
const statusToState: Record<string, string> = {
'0': 'SUCCEEDED',
'1': 'ARCHIVED',
'2': 'DELETED',
}
const data = pocketItems
.map((item) => ({
url: item.given_url,
labels: item.tags
? Object.values(item.tags).map((tag) => tag.tag)
: undefined,
state: statusToState[item.status],
}))
.filter((item) => {
if (item.state === 'DELETED') {
return false
}
return state !== State.UNARCHIVED || item.state !== 'ARCHIVED'
})
if (pocketData.error) {
throw new Error(`Error retrieving pocket data: ${pocketData.error}`)
}
return {
data,
since: pocketData.since * 1000,
}
}
}

View File

@ -0,0 +1,114 @@
import axios from 'axios'
import { wait } from '..'
import { highlightUrl, Item } from '../item'
import { IntegrationClient } from './integration'
interface ReadwiseHighlight {
// The highlight text, (technically the only field required in a highlight object)
text: string
// The title of the page the highlight is on
title?: string
// The author of the page the highlight is on
author?: string
// The URL of the page image
image_url?: string
// The URL of the page
source_url?: string
// A meaningful unique identifier for your app
source_type?: string
// One of: books, articles, tweets or podcasts
category?: string
// Annotation note attached to the specific highlight
note?: string
// Highlight's location in the source text. Used to order the highlights
location?: number
// One of: page, order or time_offset
location_type?: string
// A datetime representing when the highlight was taken in the ISO 8601 format
highlighted_at?: string
// Unique url of the specific highlight
highlight_url?: string
}
export class ReadwiseClient extends IntegrationClient {
name = 'READWISE'
apiUrl = 'https://readwise.io/api/v2'
export = async (token: string, items: Item[]): Promise<boolean> => {
let result = true
const highlights = items.flatMap(this.itemToReadwiseHighlight)
// If there are no highlights, we will skip the sync
if (highlights.length > 0) {
result = await this.syncWithReadwise(token, highlights)
}
return result
}
itemToReadwiseHighlight = (item: Item): ReadwiseHighlight[] => {
const category = item.siteName === 'Twitter' ? 'tweets' : 'articles'
return item.highlights
.map((highlight) => {
// filter out highlights that are not of type highlight or have no quote
if (highlight.type !== 'HIGHLIGHT' || !highlight.quote) {
return undefined
}
return {
text: highlight.quote,
title: item.title,
author: item.author || undefined,
highlight_url: highlightUrl(item.slug, highlight.id),
highlighted_at: new Date(highlight.createdAt).toISOString(),
category,
image_url: item.image || undefined,
location_type: 'order',
note: highlight.annotation || undefined,
source_type: 'omnivore',
source_url: item.url,
}
})
.filter((highlight) => highlight !== undefined) as ReadwiseHighlight[]
}
syncWithReadwise = async (
token: string,
highlights: ReadwiseHighlight[],
retryCount = 0
): Promise<boolean> => {
const url = `${this.apiUrl}/highlights`
try {
const response = await axios.post(
url,
{
highlights,
},
{
headers: {
Authorization: `Token ${token}`,
'Content-Type': 'application/json',
},
timeout: 5000, // 5 seconds
}
)
return response.status === 200
} catch (error) {
console.error(error)
if (axios.isAxiosError(error)) {
if (error.response?.status === 429 && retryCount < 3) {
console.log('Readwise API rate limit exceeded, retrying...')
// wait for Retry-After seconds in the header if rate limited
// max retry count is 3
const retryAfter = error.response?.headers['retry-after'] || '10' // default to 10 seconds
await wait(parseInt(retryAfter, 10) * 1000)
return this.syncWithReadwise(token, highlights, retryCount + 1)
}
}
return false
}
}
}

View File

@ -0,0 +1,115 @@
import axios from 'axios'
interface SearchResponse {
data: {
search: {
edges: Edge[]
pageInfo: {
hasNextPage: boolean
endCursor: string
}
}
}
errors?: {
message: string
}[]
}
interface Edge {
node: Item
}
export interface Item {
id: string
title: string
image: string | null
author: string | null
siteName: string | null
highlights: Highlight[]
slug: string
url: string
updatedAt: Date
}
interface Highlight {
id: string
quote: string
annotation: string | null
type: string
createdAt: string
}
export const search = async (
apiEndpoint: string,
token: string,
highlightOnly: boolean,
updatedSince: Date,
first = '50',
after = '0'
): Promise<SearchResponse | null> => {
const query = `updated:${updatedSince.toISOString()} ${
highlightOnly ? 'has:highlights' : ''
} sort:updated-asc`
const requestData = JSON.stringify({
query: `query Search($query: String) {
search(query: $query) {
... on SearchSuccess {
edges {
node {
id
slug
url
updatedAt
title
image
author
siteName
highlights {
id
quote
annotation
type
createdAt
}
}
}
pageInfo {
hasNextPage
endCursor
}
}
... on SearchError {
errorCodes
}
}
}`,
variables: {
query,
first,
after,
},
})
try {
const response = await axios.post<SearchResponse>(
`${apiEndpoint}/graphql`,
requestData,
{
headers: {
Cookie: `auth=${token};`,
'Content-Type': 'application/json',
'X-OmnivoreClient': 'integration-handler',
},
}
)
return response.data
} catch (error) {
console.error(error)
return null
}
}
export const highlightUrl = (slug: string, highlightId: string): string =>
`https://omnivore.app/me/${slug}#${highlightId}`

View File

@ -0,0 +1,3 @@
const register = require('@babel/register').default;
register({ extensions: ['.ts', '.tsx', '.js', '.jsx'] });

View File

@ -0,0 +1,8 @@
import 'mocha'
import { expect } from 'chai'
describe('stub test', () => {
it('should pass', () => {
expect(true).to.be.true
})
})

View File

@ -0,0 +1,11 @@
{
"extends": "./../../tsconfig.json",
"ts-node": { "files": true },
"compilerOptions": {
"outDir": "build",
"rootDir": ".",
// Generate d.ts files
"declaration": true
},
"include": ["src/**/*", "test/**/*"]
}