Files
omnivore/packages/api/src/utils/helpers.ts
2024-07-10 17:59:14 +08:00

294 lines
7.5 KiB
TypeScript

/* eslint-disable @typescript-eslint/no-unsafe-assignment */
import languages from '@cospired/i18n-iso-languages'
import { countWords } from 'alfaaz'
import crypto from 'crypto'
import { FingerprintGenerator } from 'fingerprint-generator'
import Redis from 'ioredis'
import { parseHTML } from 'linkedom'
import normalizeUrl from 'normalize-url'
import path from 'path'
import _ from 'underscore'
import slugify from 'voca/slugify'
import { LibraryItem, LibraryItemState } from '../entity/library_item'
import { CreateArticleError } from '../generated/graphql'
import { createPubSubClient } from '../pubsub'
import { validateUrl } from '../services/create_page_save_request'
import { updateLibraryItem } from '../services/library_item'
import { logger } from './logger'
interface InputObject {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
[key: string]: any
}
export const TWEET_URL_REGEX =
/twitter\.com\/(?:#!\/)?(\w+)\/status(?:es)?\/(\d+)(?:\/.*)?/
export const keysToCamelCase = (object: InputObject): InputObject => {
Object.keys(object).forEach((key) => {
const parts = key.split('_')
if (parts.length <= 1) return
const newKey =
parts[0] +
parts
.slice(1)
.map((p) => p[0].toUpperCase() + p.slice(1))
.join('')
delete Object.assign(object, { [newKey]: object[key] })[key]
})
return object
}
/**
* Generates uuid using MD5 hash from the specified string
* @param str - string to generate UUID from
* @example
* // returns "a3dcb4d2-29de-6fde-0db5-686dee47145d"
* return uuidWithMd5('test')
*/
export const stringToHash = (str: string, convertToUUID = false): string => {
const md5Hash = crypto.createHash('md5').update(str).digest('hex')
if (!convertToUUID) return md5Hash
return (
md5Hash.substring(0, 8) +
'-' +
md5Hash.substring(8, 12) +
'-' +
md5Hash.substring(12, 16) +
'-' +
md5Hash.substring(16, 20) +
'-' +
md5Hash.substring(20)
).toLowerCase()
}
export const findDelimiter = (
text: string,
delimiters = ['\t', ',', ':', ';'],
defaultDelimiter = '\t'
): string => {
const textChunk = text
// remove escaped sections that can contain false-positive delimiters
.replace(/"(.|\n)*?"/gm, '')
.split('\n')
.slice(0, 5)
const delimiter = delimiters.find((delimiter) =>
textChunk.every(
(row, _, array) =>
row.split(delimiter).length === array[0].split(delimiter).length &&
row.split(delimiter).length !== 1
)
)
return delimiter || defaultDelimiter
}
export const generateSlug = (title: string): string => {
return slugify(title).substring(0, 64) + '-' + Date.now().toString(16)
}
export const MAX_CONTENT_LENGTH = 5e7 //50MB
export const errorHandler = async (
result: CreateArticleError,
userId: string,
pageId?: string | null,
pubsub = createPubSubClient()
): Promise<CreateArticleError> => {
if (!pageId) return result
await updateLibraryItem(
pageId,
{
state: LibraryItemState.Failed,
},
userId,
pubsub
)
return result
}
export const isParsingTimeout = (libraryItem: LibraryItem): boolean => {
return (
// item processed more than 30 seconds ago
libraryItem.state === LibraryItemState.Processing &&
libraryItem.savedAt.getTime() < new Date().getTime() - 1000 * 30
)
}
export const validatedDate = (
date: Date | string | undefined
): Date | undefined => {
try {
if (typeof date === 'string') {
// Sometimes readability returns a string for the date
date = new Date(date)
}
if (!date) return undefined
// Make sure the date year is not greater than 9999
if (date.getFullYear() > 9999) {
return undefined
}
return new Date(date)
} catch (e) {
logger.error('error validating date', { date, error: e })
return undefined
}
}
export const fileNameForFilePath = (urlStr: string): string => {
const url = normalizeUrl(new URL(urlStr).href, {
stripHash: true,
stripWWW: false,
})
const fileName = decodeURI(path.basename(new URL(url).pathname)).replace(
/[^a-zA-Z0-9-_.]/g,
''
)
return fileName
}
export const titleForFilePath = (url: string): string => {
try {
const title = decodeURI(path.basename(new URL(url).pathname, '.pdf'))
return title
} catch (e) {
logger.error(e)
}
return url
}
export const validateUuid = (str: string): boolean => {
const regexExp =
/^[0-9a-fA-F]{8}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{4}\b-[0-9a-fA-F]{12}$/gi
return regexExp.test(str)
}
export const isString = (check: any): check is string => {
return typeof check === 'string' || check instanceof String
}
export const wait = (ms: number): Promise<void> => {
return new Promise((resolve) => {
setTimeout(resolve, ms)
})
}
export const wordsCount = (text: string, isHtml?: boolean): number => {
try {
if (isHtml) {
const dom = parseHTML(text).window.document
text = dom.body.textContent || ''
}
return countWords(text)
} catch {
return 0
}
}
export const isBase64Image = (str: string): boolean => {
return str.startsWith('data:image/')
}
export const generateRandomColor = (): string => {
return (
'#' +
Math.floor(Math.random() * 16777215)
.toString(16)
.padStart(6, '0')
.toUpperCase()
)
}
export const unescapeHtml = (html: string): string => {
return _.unescape(html)
}
export const isUrl = (str: string): boolean => {
try {
validateUrl(str)
return true
} catch {
logger.info('not an url', { url: str })
return false
}
}
export const cleanUrl = (url: string) => {
const trackingParams: (RegExp | string)[] = [/^utm_\w+/i] // remove utm tracking parameters
if (TWEET_URL_REGEX.test(url)) {
// remove tracking parameters from tweet links:
// https://twitter.com/omnivore/status/1673218959624093698?s=12&t=R91quPajs0E53Yds-fhv2g
trackingParams.push('s', 't')
}
return normalizeUrl(url, {
stripHash: true,
stripWWW: false,
removeQueryParameters: trackingParams,
removeTrailingSlash: false,
})
}
export const deepDelete = <T, K extends keyof T>(
obj: T,
keys: readonly K[]
) => {
// make a copy of the object
const copy = { ...obj }
keys.forEach((key) => {
delete copy[key]
})
return copy as Omit<T, K>
}
export const isRelativeUrl = (url: string): boolean => {
return url.startsWith('/')
}
export const getAbsoluteUrl = (url: string, baseUrl: string): string => {
return new URL(url, baseUrl).href
}
export const setRecentlySavedItemInRedis = async (
redisClient: Redis,
userId: string,
url: string
) => {
// save the url in redis for 26 hours so rss-feeder won't try to re-save it
const redisKey = `recent-saved-item:${userId}:${url}`
const ttlInSeconds = 60 * 60 * 26
try {
return await redisClient.set(redisKey, 1, 'EX', ttlInSeconds, 'NX')
} catch (error) {
logger.error('error setting recently saved item in redis', {
redisKey,
error,
})
}
}
export const getClientFromUserAgent = (userAgent: string): string => {
// for plugins, currently only obsidian and logseq are supported
const plugins = userAgent.match(/(obsidian|logseq)/i)
if (plugins) return plugins[0].toLowerCase()
// web browser
const browsers = userAgent.match(/(chrome|safari|firefox|edge|opera)/i)
if (browsers) return 'web'
return 'other'
}
export const lanaugeToCode = (language: string): string =>
languages.getAlpha2Code(language, 'en') || 'en'
export const generateFingerprint = () => {
return new FingerprintGenerator().getFingerprint()
}