Files
omnivore/packages/api/src/utils/helpers.ts
Jackson Harper e652a6ea8c Rebased version of the elastic PR (#225)
* Add elastic to our docker compose

* add AND/OR/NOT search operations

* add elastic and create article in elastic

* change error code when elastic throws error

* add search pages in elastic

* add search by labels

* Add elastic to GitHub Action

* Update elastic version

* Fix port for elastic

* add url in search query

* Set elastic features when running tests

* add debug logs

* Use localhost instead of service hostname

* refresh elastic after create/update

* update search labels query

* add typescript support

* search pages in elastic

* fix search queries

* use elastic for saving page

* fix test failure

* update getArticle api to use elastic

* use generic get page function

* add elastic migration python script

* fix bulk helper param

* save elastic page id in article_saving_request instead of postgres article_id

* fix page archiving and deleting

* add tests for deleteArticle

* remove custom date type in elastic mappings which not exist in older version of elastic

* fix timestamp format issue

* add tests for save reading progress

* add tests for save file

* optimize search results

* add alias to index

* update migration script to receive env var as params

* Add failing test to validate we don't decrease reading progress

This test is failing with Elastic because we aren't fetching
the reading progress from elastic here, and are fetching it
from postgres.

* Rename readingProgress to readingProgressPercent

This is the name stored in elastic, so fixes issues pulling the
value out.

* Linting

* Add failing test for creating highlights w/elastic

This test fails because the highlight can't be looked up. Is there
a different ID we should be passing in to query for highlights,
or do we need to update the query to look for elastic_id?

* add tests code coverage threshold

* update nyc config

* include more files in test coverage

* change alias name

* update updateContent to update pages in elastic

* remove debug log

* fix createhighlight test

* search pages by alias in elastic

* update set labels and delete labels in elastic

* migration script enumeration

* make BULK_SIZE an env var

* fix pdf search indexing

* debug github action exit issue

* call pubsub when create/update/delete page in elastic

* fix json parsing bug and reduce reading data from file

* replace a depreciated pubsub api call

* debug github action exit issue

* debug github action exit issue

* add handler to upload elastic page data to GCS

* fix tests

* Use http_auth instead of basic_auth

* add index creation and existing postgres tables update in migration script

* fix a typo to connect to elastic

* rename readingProgress to readingProgressPercent

* migrate elastic_page_id in highlights and article_saving_request tables

* update migration script to include number of updated rows

* update db migration query

* read index mappings from file

* fix upload pages to gcs

* fix tests failure due to pageContext

* fix upload file id not exist error

* Handle savedAt & isArchived attributes w/out quering elastic

* Fix prettier issues

* fix content-type mismatching

* revert pageId to linkId because frontend was not deployed yet

* fix newsletters and attachment not saved in elastic

* put linkId in article for setting labels

* exclude orginalHtml in the result of searching to improve performace

* exclude content in the result of searching to improve performace

* remove score sorting

* do not refresh immediately to reduce searching and indexing time

* do not replace the backup data in gcs

* fix no article id defined in articleSavingRequest

* add logging of elastic api running time

* reduce home feed pagination size to 15

* reduce home feed pagination size to 10

* stop revalidating first page

* do not use a separate api to fetch reading progress

* Remove unused comment

* get reading progress if not exists

* replace ngram tokenizer with standard tokenizer

* fix tests

* remove .env.local

* add sort keyword in searching to sort by score

Co-authored-by: Hongbo Wu <hongbo@omnivore.app>
2022-03-16 12:08:59 +08:00

250 lines
6.6 KiB
TypeScript

/* eslint-disable @typescript-eslint/no-unsafe-assignment */
import {
ArticleSavingRequest,
ArticleSavingRequestStatus,
CreateArticleError,
CreateArticleErrorCode,
FeedArticle,
Profile,
ResolverFn,
} from '../generated/graphql'
import { Claims, WithDataSourcesContext } from '../resolvers/types'
import {
MembershipTier,
RegistrationType,
UserData,
} from '../datalayer/user/model'
import crypto from 'crypto'
import slugify from 'voca/slugify'
import { Merge } from '../util'
import { ArticleSavingRequestData } from '../datalayer/article_saving_request/model'
import { CreateArticlesSuccessPartial } from '../resolvers'
interface InputObject {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
[key: string]: any
}
export const keysToCamelCase = (object: InputObject): InputObject => {
Object.keys(object).forEach((key) => {
const parts = key.split('_')
if (parts.length <= 1) return
const newKey =
parts[0] +
parts
.slice(1)
.map((p) => p[0].toUpperCase() + p.slice(1))
.join('')
delete Object.assign(object, { [newKey]: object[key] })[key]
})
return object
}
/**
* Generates uuid using MD5 hash from the specified string
* @param str - string to generate UUID from
* @example
* // returns "a3dcb4d2-29de-6fde-0db5-686dee47145d"
* return uuidWithMd5('test')
*/
export const stringToHash = (str: string, convertToUUID = false): string => {
const md5Hash = crypto.createHash('md5').update(str).digest('hex')
if (!convertToUUID) return md5Hash
return (
md5Hash.substring(0, 8) +
'-' +
md5Hash.substring(8, 12) +
'-' +
md5Hash.substring(12, 16) +
'-' +
md5Hash.substring(16, 20) +
'-' +
md5Hash.substring(20)
).toLowerCase()
}
export function authorized<
TSuccess,
TError extends { errorCodes: string[] },
/* eslint-disable @typescript-eslint/no-explicit-any */
TArgs = any,
TParent = any
/* eslint-enable @typescript-eslint/no-explicit-any */
>(
resolver: ResolverFn<
TSuccess | TError,
TParent,
WithDataSourcesContext & { claims: Claims },
TArgs
>
): ResolverFn<TSuccess | TError, TParent, WithDataSourcesContext, TArgs> {
return (parent, args, ctx, info) => {
const { claims } = ctx
if (claims?.uid) {
return resolver(parent, args, { ...ctx, claims }, info)
}
return { errorCodes: ['UNAUTHORIZED'] } as TError
}
}
export const findDelimiter = (
text: string,
delimiters = ['\t', ',', ':', ';'],
defaultDelimiter = '\t'
): string => {
const textChunk = text
// remove escaped sections that can contain false-positive delimiters
.replace(/"(.|\n)*?"/gm, '')
.split('\n')
.slice(0, 5)
const delimiter = delimiters.find((delimiter) =>
textChunk.every(
(row, _, array) =>
row.split(delimiter).length === array[0].split(delimiter).length &&
row.split(delimiter).length !== 1
)
)
return delimiter || defaultDelimiter
}
// FIXME: Remove this Date stub after nullable types will be fixed
export const userDataToUser = (
user: Merge<
UserData,
{
isFriend?: boolean
followersCount?: number
friendsCount?: number
sharedArticlesCount?: number
sharedHighlightsCount?: number
sharedNotesCount?: number
viewerIsFollowing?: boolean
}
>
): {
id: string
name: string
source: RegistrationType
membership: MembershipTier
email?: string | null
phone?: string | null
picture?: string | null
googleId?: string | null
createdAt: Date
isFriend?: boolean | null
isFullUser: boolean
viewerIsFollowing?: boolean | null
sourceUserId: string
friendsCount?: number
followersCount?: number
sharedArticles: FeedArticle[]
sharedArticlesCount?: number
sharedHighlightsCount?: number
sharedNotesCount?: number
profile: Profile
} => ({
...user,
name: user.name,
source: user.source as RegistrationType,
membership: user.membership as MembershipTier,
createdAt: user.createdAt || new Date(),
friendsCount: user.friendsCount || 0,
followersCount: user.followersCount || 0,
isFullUser: isFullUser(user.membership as MembershipTier),
viewerIsFollowing: user.viewerIsFollowing || user.isFriend || false,
picture: user.profile.picture_url,
sharedArticles: [],
sharedArticlesCount: user.sharedArticlesCount || 0,
sharedHighlightsCount: user.sharedHighlightsCount || 0,
sharedNotesCount: user.sharedNotesCount || 0,
profile: {
...user.profile,
pictureUrl: user.profile.picture_url,
},
})
export const isFullUser = (membership: MembershipTier): boolean => {
return membership != MembershipTier.WaitList
}
export const generateSlug = (title: string): string => {
return slugify(title).substring(0, 64) + '-' + Date.now().toString(16)
}
export const MAX_CONTENT_LENGTH = 5e7 //50MB
export const articleSavingRequestError = async (
result: CreateArticleError,
ctx: WithDataSourcesContext,
articleSavingReqest?: ArticleSavingRequestData
): Promise<CreateArticleError | CreateArticlesSuccessPartial> => {
if (!articleSavingReqest) return result
await ctx.authTrx((tx) =>
ctx.models.articleSavingRequest.update(
articleSavingReqest.id,
{
status: ArticleSavingRequestStatus.Failed,
errorCode: result.errorCodes[0],
},
tx
)
)
return result
}
export const articleSavingRequestPopulate = async (
result: CreateArticlesSuccessPartial,
ctx: WithDataSourcesContext,
articleSavingReqestId: string | undefined,
articleId: string | undefined
): Promise<CreateArticleError | CreateArticlesSuccessPartial> => {
if (!articleSavingReqestId) return result
await ctx.authTrx((tx) =>
ctx.models.articleSavingRequest.update(
articleSavingReqestId,
{
status: ArticleSavingRequestStatus.Succeeded,
elasticPageId: articleId,
},
tx
)
)
return result
}
export const articleSavingRequestDataToArticleSavingRequest = (
user: UserData,
articleSavingRequest: ArticleSavingRequestData
): ArticleSavingRequest => ({
...articleSavingRequest,
user: userDataToUser(user),
status: articleSavingRequest.status as ArticleSavingRequestStatus,
errorCode: articleSavingRequest.errorCode as CreateArticleErrorCode,
})
export const validatedDate = (
date: Date | string | undefined
): Date | undefined => {
try {
if (typeof date === 'string') {
// Sometimes readability returns a string for the date
date = new Date(date)
}
if (!date) return undefined
// Make sure the date year is not greater than 9999
if (date.getFullYear() > 9999) {
return undefined
}
return new Date(date)
} catch (e) {
console.log('error validating date', date, e)
return undefined
}
}