From a02bb2f5c9a87c05e3c525d6e198a52fcbaab4dc Mon Sep 17 00:00:00 2001 From: Jackson Harper Date: Tue, 24 May 2022 21:01:01 -0700 Subject: [PATCH] Extract contentType from safari This fixes issues where we'd incorrectly identify a PDF as HTML. Safari will use our preprocess javascript file even if the file is PDF, so we rely on JS to detect the content type here. --- .../Sources/Models/PageScrapePayload.swift | 16 ++++++++++++---- apple/Sources/ShareExtension/ShareExtension.js | 1 + 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/apple/OmnivoreKit/Sources/Models/PageScrapePayload.swift b/apple/OmnivoreKit/Sources/Models/PageScrapePayload.swift index bdaebab00..5b6ff9939 100644 --- a/apple/OmnivoreKit/Sources/Models/PageScrapePayload.swift +++ b/apple/OmnivoreKit/Sources/Models/PageScrapePayload.swift @@ -19,11 +19,18 @@ public struct PageScrapePayload { public let url: String public let contentType: ContentType - init(url: String, title: String?, html: String?) { + init(url: String, title: String?, html: String?, contentType: String?) { self.url = url self.title = title self.html = html - self.contentType = url.hasSuffix(".pdf") ? .pdf : .html + + // If the content type was specified and we know its PDF, use that + // otherwise fallback to using file extensions. + if let contentType = contentType, contentType == "application/pdf" { + self.contentType = .pdf + } else { + self.contentType = url.hasSuffix(".pdf") ? .pdf : .html + } } } @@ -207,7 +214,7 @@ public enum PageScraper { private extension PageScrapePayload { static func make(url: URL?) -> PageScrapePayload? { guard let url = url else { return nil } - return PageScrapePayload(url: url.absoluteString, title: nil, html: nil) + return PageScrapePayload(url: url.absoluteString, title: nil, html: nil, contentType: nil) } static func make(item: NSSecureCoding?) -> PageScrapePayload? { @@ -216,7 +223,8 @@ private extension PageScrapePayload { guard let url = results?["url"] as? String else { return nil } let html = results?["documentHTML"] as? String let title = results?["title"] as? String + let contentType = results?["contentType"] as? String - return PageScrapePayload(url: url, title: title, html: html) + return PageScrapePayload(url: url, title: title, html: html, contentType: contentType) } } diff --git a/apple/Sources/ShareExtension/ShareExtension.js b/apple/Sources/ShareExtension/ShareExtension.js index 671777f7c..fd7c4c0b6 100644 --- a/apple/Sources/ShareExtension/ShareExtension.js +++ b/apple/Sources/ShareExtension/ShareExtension.js @@ -5,6 +5,7 @@ ShareExtension.prototype = { arguments.completionFunction({ 'url': window.location.href, 'title': document.title.toString(), + 'contentType': document.contentType, 'documentHTML': new XMLSerializer().serializeToString(document), }); }