Implement a reader for epubs

This commit is contained in:
Jackson Harper
2023-04-15 08:22:54 +08:00
parent cd5ed5a4f3
commit 69df32d428
14 changed files with 218 additions and 23 deletions

View File

@ -244,6 +244,7 @@ export enum BulkActionType {
}
export enum ContentReader {
Epub = 'EPUB',
Pdf = 'PDF',
Web = 'WEB'
}

View File

@ -203,6 +203,7 @@ enum BulkActionType {
}
enum ContentReader {
EPUB
PDF
WEB
}

View File

@ -98,10 +98,12 @@ import {
} from '../../utils/parser'
import { parseSearchQuery, SortBy, SortOrder } from '../../utils/search'
import {
contentReaderForPageType,
getStorageFileDetails,
makeStorageFilePublic,
} from '../../utils/uploads'
import { WithDataSourcesContext } from '../types'
import { pageTypeForContentType } from '../upload_files'
enum ArticleFormat {
Markdown = 'markdown',
@ -258,7 +260,7 @@ export const createArticleResolver = authorized<
uploadFileHash = uploadFileDetails.md5Hash
userArticleUrl = uploadFileDetails.fileUrl
canonicalUrl = uploadFile.url
pageType = PageType.File
pageType = pageTypeForContentType(uploadFile.contentType)
title = titleForFilePath(uploadFile.url)
} else if (
source !== 'puppeteer-parse' &&
@ -950,8 +952,7 @@ export const searchResolver = authorized<
...r,
image: r.image && createImageProxyUrl(r.image, 260, 260),
isArchived: !!r.archivedAt,
contentReader:
r.pageType === PageType.File ? ContentReader.Pdf : ContentReader.Web,
contentReader: contentReaderForPageType(r.pageType),
originalArticleUrl: r.url,
publishedAt: validatedDate(r.publishedAt),
ownedByViewer: r.userId === claims.uid,
@ -1054,10 +1055,7 @@ export const updatesSinceResolver = authorized<
...p,
image: p.image && createImageProxyUrl(p.image, 260, 260),
isArchived: !!p.archivedAt,
contentReader:
p.pageType === PageType.File
? ContentReader.Pdf
: ContentReader.Web,
contentReader: contentReaderForPageType(p.pageType),
} as SearchItem,
cursor: endCursor,
itemID: p.id,

View File

@ -20,6 +20,7 @@ import {
import { userDataToUser, validatedDate, wordsCount } from '../utils/helpers'
import { createImageProxyUrl } from '../utils/imageproxy'
import {
contentReaderForPageType,
generateDownloadSignedUrl,
generateUploadFilePathName,
} from '../utils/uploads'
@ -375,7 +376,8 @@ export const functionResolvers = {
Article: {
async url(article: Article, _: unknown, ctx: WithDataSourcesContext) {
if (
article.pageType == PageType.File &&
(article.pageType == PageType.File ||
article.pageType == PageType.Book) &&
ctx.claims &&
article.uploadFileId
) {
@ -468,9 +470,7 @@ export const functionResolvers = {
return !!page?.archivedAt || false
},
contentReader(article: { pageType: PageType }) {
return article.pageType === PageType.File
? ContentReader.Pdf
: ContentReader.Web
return contentReaderForPageType(article.pageType)
},
highlights(
article: { id: string; userId?: string; highlights?: Highlight[] },
@ -551,7 +551,11 @@ export const functionResolvers = {
},
SearchItem: {
async url(item: SearchItem, _: unknown, ctx: WithDataSourcesContext) {
if (item.pageType == PageType.File && ctx.claims && item.uploadFileId) {
if (
(item.pageType == PageType.File || item.pageType == PageType.Book) &&
ctx.claims &&
item.uploadFileId
) {
const upload = await ctx.models.uploadFile.get(item.uploadFileId)
if (!upload || !upload.fileName) {
return undefined

View File

@ -27,6 +27,13 @@ const isFileUrl = (url: string): boolean => {
return parsedUrl.protocol == 'file:'
}
export const pageTypeForContentType = (contentType: string): PageType => {
if (contentType == 'application/epub+zip') {
return PageType.Book
}
return PageType.File
}
export const uploadFileRequestResolver: ResolverFn<
UploadFileRequestResult,
unknown,
@ -145,7 +152,7 @@ export const uploadFileRequestResolver: ResolverFn<
title: title,
hash: uploadFilePathName,
content: '',
pageType: PageType.File,
pageType: pageTypeForContentType(input.contentType),
uploadFileId: uploadFileData.id,
slug: generateSlug(uploadFilePathName),
createdAt: new Date(),

View File

@ -36,6 +36,7 @@ const schema = gql`
enum ContentReader {
WEB
PDF
EPUB
}
input SortParams {

View File

@ -1,4 +1,6 @@
import { Page, PageType } from '../elastic/types'
import { ContentReader } from '../generated/graphql'
import { contentReaderForPageType } from '../utils/uploads'
import { FeatureName, isOptedIn } from './features'
/*
@ -8,7 +10,10 @@ export const shouldSynthesize = async (
userId: string,
page: Page
): Promise<boolean> => {
if (page.pageType === PageType.File || !page.content) {
if (
contentReaderForPageType(page.pageType) === ContentReader.Web ||
!page.content
) {
// we don't synthesize files for now
return false
}

View File

@ -2,6 +2,19 @@
/* eslint-disable @typescript-eslint/no-unsafe-assignment */
import { File, GetSignedUrlConfig, Storage } from '@google-cloud/storage'
import { env } from '../env'
import { ContentReader, PageType } from '../generated/graphql'
export const contentReaderForPageType = (pageType: PageType) => {
console.log('getting content reader: ', pageType)
switch (pageType) {
case PageType.Book:
return ContentReader.Epub
case PageType.File:
return ContentReader.Pdf
default:
return ContentReader.Web
}
}
/* On GAE/Prod, we shall rely on default app engine service account credentials.
* Two changes needed: 1) add default service account to our uploads GCS Bucket
@ -39,6 +52,7 @@ export const generateUploadSignedUrl = async (
expires: Date.now() + 15 * 60 * 1000, // 15 minutes
contentType: contentType,
}
console.log('signed url for: ', options)
// Get a v4 signed URL for uploading file
const [url] = await storage
@ -60,6 +74,7 @@ export const generateDownloadSignedUrl = async (
.bucket(bucketName)
.file(filePathName)
.getSignedUrl(options)
console.log('generating download signed url', url)
return url
}