This fails (Non-nominal type 'Any' cannot be extended)
extension Any {
func literal() -> String {
if let booleanValue = (self as? Bool) {
return String(format: (booleanValue ? "true" : "false"))
}
else
if let intValue = (self as? Int) {
return String(format: "%d", intValue)
}
else
if let floatValue = (self as? Float) {
return String(format: "%f", floatValue)
}
else
if let doubleValue = (self as? Double) {
return String(format: "%f", doubleValue)
}
else
{
return String(format: "<%#>", self)
}
}
}
as I would like to use it in a dictionary (self) to xml string factory like
extension Dictionary {
// Return an XML string from the dictionary
func xmlString(withElement element: String, isFirstElement: Bool) -> String {
var xml = String.init()
if isFirstElement { xml.append("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n") }
xml.append(String(format: "<%#>\n", element))
for node in self.keys {
let value = self[node]
if let array: Array<Any> = (value as? Array<Any>) {
xml.append(array.xmlString(withElement: node as! String, isFirstElemenet: false))
}
else
if let dict: Dictionary<AnyHashable,Any> = (value as? Dictionary<AnyHashable,Any>) {
xml.append(dict.xmlString(withElement: node as! String, isFirstElement: false))
}
else
{
xml.append(String(format: "<%#>", node as! CVarArg))
xml.append((value as Any).literal
xml.append(String(format: "</%#>\n", node as! CVarArg))
}
}
xml.append(String(format: "</%#>\n", element))
return xml.replacingOccurrences(of: "&", with: "&", options: .literal, range: nil)
}
}
I was trying to reduce the code somehow, as the above snippet is repeated a few times in a prototype I'm building but this is not the way to do it (a working copy with the snippet replicated works but ugly?).
Basically I want to generate a literal for an Any value - previously fetched from a dictionary.
It seems like you can't add extensions to Any. You do have some other options though - either make it a function toLiteral(value: Any) -> String, or what is probably a neater solution; use the description: String attribute which is present on all types that conform to CustomStringConvertible, which includes String, Int, Bool, and Float - your code would be simplified down to just xml.append(value.description). You then just have make a simple implementation for any other types that you might get.
Ok, finally got this working. First the preliminaries: each of your objects needs to have a dictionary() method to marshal itself. Note: "k.###" are struct static constants - i.e., k.name is "name", etc. I have two objects, a PlayItem and a PlayList:
class PlayItem : NSObject {
var name : String = k.item
var link : URL = URL.init(string: "http://")!
var time : TimeInterval
var rank : Int
var rect : NSRect
var label: Bool
var hover: Bool
var alpha: Float
var trans: Int
var temp : String {
get {
return link.absoluteString
}
set (value) {
link = URL.init(string: value)!
}
}
func dictionary() -> Dictionary<String,Any> {
var dict = Dictionary<String,Any>()
dict[k.name] = name
dict[k.link] = link.absoluteString
dict[k.time] = time
dict[k.rank] = rank
dict[k.rect] = NSStringFromRect(rect)
dict[k.label] = label ? 1 : 0
dict[k.hover] = hover ? 1 : 0
dict[k.alpha] = alpha
dict[k.trans] = trans
return dict
}
}
class PlayList : NSObject {
var name : String = k.list
var list : Array <PlayItem> = Array()
func dictionary() -> Dictionary<String,Any> {
var dict = Dictionary<String,Any>()
var items: [Any] = Array()
for item in list {
items.append(item.dictionary())
}
dict[k.name] = name
dict[k.list] = items
return dict
}
}
Note any value so marshal has to be those legal types for a dictionary; it helps to have aliases so in the PlayItem a "temp" is the string version for the link url, and its getter/setter would translate.
When needed, like the writeRowsWith drag-n-drop tableview handler, I do this:
func tableView(_ tableView: NSTableView, writeRowsWith rowIndexes: IndexSet, to pboard: NSPasteboard) -> Bool {
if tableView == playlistTableView {
let objects: [PlayList] = playlistArrayController.arrangedObjects as! [PlayList]
var items: [PlayList] = [PlayList]()
var promises = [String]()
for index in rowIndexes {
let item = objects[index]
let dict = item.dictionary()
let promise = dict.xmlString(withElement: item.className, isFirstElement: true)
promises.append(promise)
items.append(item)
}
let data = NSKeyedArchiver.archivedData(withRootObject: items)
pboard.setPropertyList(data, forType: PlayList.className())
pboard.setPropertyList(promises, forType:NSFilesPromisePboardType)
pboard.writeObjects(promises as [NSPasteboardWriting])
}
else
{
let objects: [PlayItem] = playitemArrayController.arrangedObjects as! [PlayItem]
var items: [PlayItem] = [PlayItem]()
var promises = [String]()
for index in rowIndexes {
let item = objects[index]
let dict = item.dictionary()
let promise = dict.xmlString(withElement: item.className, isFirstElement: true)
promises.append(promise)
items.append(item)
}
let data = NSKeyedArchiver.archivedData(withRootObject: items)
pboard.setPropertyList(data, forType: PlayList.className())
pboard.setPropertyList(promises, forType:NSFilesPromisePboardType)
pboard.writeObjects(promises as [NSPasteboardWriting])
}
return true
}
What makes this happen are these xmlString extensions and the toLiteral function - as you cannot extend "Any":
func toLiteral(_ value: Any) -> String {
if let booleanValue = (value as? Bool) {
return String(format: (booleanValue ? "1" : "0"))
}
else
if let intValue = (value as? Int) {
return String(format: "%d", intValue)
}
else
if let floatValue = (value as? Float) {
return String(format: "%f", floatValue)
}
else
if let doubleValue = (value as? Double) {
return String(format: "%f", doubleValue)
}
else
if let stringValue = (value as? String) {
return stringValue
}
else
if let dictValue: Dictionary<AnyHashable,Any> = (value as? Dictionary<AnyHashable,Any>)
{
return dictValue.xmlString(withElement: "Dictionary", isFirstElement: false)
}
else
{
return ((value as AnyObject).description)
}
}
extension Array {
func xmlString(withElement element: String, isFirstElemenet: Bool) -> String {
var xml = String.init()
xml.append(String(format: "<%#>\n", element))
self.forEach { (value) in
if let array: Array<Any> = (value as? Array<Any>) {
xml.append(array.xmlString(withElement: "Array", isFirstElemenet: false))
}
else
if let dict: Dictionary<AnyHashable,Any> = (value as? Dictionary<AnyHashable,Any>) {
xml.append(dict.xmlString(withElement: "Dictionary", isFirstElement: false))
}
else
{
xml.append(toLiteral(value))
}
}
xml.append(String(format: "<%#>\n", element))
return xml
}
}
extension Dictionary {
// Return an XML string from the dictionary
func xmlString(withElement element: String, isFirstElement: Bool) -> String {
var xml = String.init()
if isFirstElement { xml.append("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n") }
xml.append(String(format: "<%#>\n", element))
for node in self.keys {
let value = self[node]
if let array: Array<Any> = (value as? Array<Any>) {
xml.append(array.xmlString(withElement: node as! String, isFirstElemenet: false))
}
else
if let dict: Dictionary<AnyHashable,Any> = (value as? Dictionary<AnyHashable,Any>) {
xml.append(dict.xmlString(withElement: node as! String, isFirstElement: false))
}
else
{
xml.append(String(format: "<%#>", node as! CVarArg))
xml.append(toLiteral(value as Any))
xml.append(String(format: "</%#>\n", node as! CVarArg))
}
}
xml.append(String(format: "</%#>\n", element))
return xml
}
func xmlHTMLString(withElement element: String, isFirstElement: Bool) -> String {
let xml = self.xmlString(withElement: element, isFirstElement: isFirstElement)
return xml.replacingOccurrences(of: "&", with: "&", options: .literal, range: nil)
}
}
This continues another's solution, the toLiteral() suggestion above, in hopes it helps others.
Enjoy.
I'm trying to use Swift to parse the contents of PDF documents, following Apple's programming guide (in which all the examples are ObjC...)
let filepath = "/Users/ben/Desktop/Test.pdf"
let localUrl = filepath as CFString
if let pdfURL = CFURLCreateWithFileSystemPath(nil, localUrl, CFURLPathStyle.cfurlposixPathStyle, false) {
if let pdf = CGPDFDocument(pdfURL) {
if let inf = pdf.info {
CGPDFDictionaryApplyFunction(inf, { (key, object, info) -> Void in
print("\(key), \(object), \(info)")
}, nil)
}
if let cat = pdf.catalog {
CGPDFDictionaryApplyFunction(cat, { (key, object, info) -> Void in
print("\(key), \(object), \(info)")
}, nil)
}
}
}
While this seems to produce some results, it's just strings of hex digits.
0x00007ff29f43ce00, 0x00007ff29f492bd0, nil
0x00007ff29f443b60, 0x00007ff29f492cd0, nil
0x00007ff29f482590, 0x00007ff29f492dd0, nil
0x00007ff29f482a40, 0x00007ff29f492ed0, nil
0x00007ff29f482e30, 0x00007ff29f492fe0, nil
0x00007ff29f47da20, 0x00007ff29f4930e0, nil
0x00007ff29f474ac0, 0x00007ff29f842b50, nil
0x00007ff29f43f5d0, 0x00007ff29f842bf0, nil
0x00007ff29f485eb0, 0x00007ff29f842a60, nil
0x00007ff29f482f70, 0x00007ff29f842ab0, nil
0x00007ff29f48b1c0, 0x00007ff29f48f6d0, nil
So how do I get the actual data? Ideally, I'm trying to get at the document metadata and things like fonts contained.
Swift 4 - Here is an updated version of Daniel's excellent example which compiles in Swift 4.
import Foundation
import Quartz
print("Hello, World!")
func printPDFKeys( key: UnsafePointer<Int8>, object: CGPDFObjectRef) { //, info: UnsafeMutableRawPointer) {
// let _: CGPDFDictionaryRef = CGPDFDictionaryRef(info)
let keyString = String(cString: UnsafePointer<CChar>(key), encoding: .isoLatin1)
let objectType = CGPDFObjectGetType(object)
if keyString == nil {
return
}
print("key \(keyString!) is present in dictionary, type \(objectType.rawValue)")
var ptrObjectValue:UnsafePointer<Int8>? = nil
switch objectType {
// ObjectType is enum of:
// Null
// Boolean
// Integer
// Real
// Name
// String
// Array
// Dictionary
// Stream
case .boolean:
// Boolean
var objectBoolean:CGPDFBoolean = 0
if CGPDFObjectGetValue(object, objectType, &objectBoolean) {
let testbool = NSNumber(value: objectBoolean)
print("Boolean value \(testbool)")
}
case .integer:
// Integer
var objectInteger:CGPDFInteger? = nil
if CGPDFObjectGetValue(object, objectType, &objectInteger) {
print("Integer value \(objectInteger)")
}
case .real:
// Real
var objectReal:CGPDFReal? = nil
if CGPDFObjectGetValue(object, objectType, &objectReal) {
print("Real value \(objectReal)")
}
case .name:
// Name
if (CGPDFObjectGetValue(object, objectType, &ptrObjectValue)) {
let stringName = String(cString: UnsafePointer<CChar>(ptrObjectValue!), encoding: String.Encoding.isoLatin1)
print("Name value: \(stringName!)")
}
case .string:
// String
_ = CGPDFObjectGetValue(object, objectType, &ptrObjectValue)
let stringValue = CGPDFStringCopyTextString(OpaquePointer(ptrObjectValue!))
print("String value: \(stringValue!)")
case .array:
// Array
print("Array")
var objectArray:CGPDFArrayRef? = nil
if (CGPDFObjectGetValue(object, objectType, &objectArray))
{
print("array: \(arrayFromPDFArray(pdfArray: objectArray!))")
}
case .dictionary:
// Dictionary
var objectDictionary:CGPDFDictionaryRef? = nil
if (CGPDFObjectGetValue(object, objectType, &objectDictionary)) {
let count = CGPDFDictionaryGetCount(objectDictionary!)
print("Found dictionary with \(count) entries")
if !(keyString == "Parent") && !(keyString == "P") {
//catalogLevel = catalogLevel + 1
CGPDFDictionaryApplyFunction(objectDictionary!, { (key, object, info) -> Void in
printPDFKeys(key: key, object: object) // , info: info)
}, nil)
// CGPDFDictionaryApplyFunction(objectDictionary!, printPDFKeys as! CGPDFDictionaryApplierFunction, nil)
//catalogLevel = catalogLevel - 1
}
}
case .stream:
// Stream
print("Stream")
var objectStream:CGPDFStreamRef? = nil
if (CGPDFObjectGetValue(object, objectType, &objectStream)) {
let _: CGPDFDictionaryRef = CGPDFStreamGetDictionary( objectStream! )!
var fmt: CGPDFDataFormat = .raw
let streamData: CFData = CGPDFStreamCopyData(objectStream!, &fmt)!;
let data = NSData(data: streamData as Data)
let dataString = NSString(data: data as Data, encoding: String.Encoding.utf8.rawValue)
let dataLength: Int = CFDataGetLength(streamData)
print("data stream (length=\(dataLength)):")
if dataLength < 400 {
print(dataString)
}
}
default:
print("Null")
}
}
// convert a PDF array into an objC one
func arrayFromPDFArray(pdfArray: CGPDFArrayRef ) -> NSMutableArray {
var _:Int = 0
let tmpArray: NSMutableArray = NSMutableArray()
let count = CGPDFArrayGetCount(pdfArray)
for i in 0..<count {
var value:CGPDFObjectRef? = nil
if (CGPDFArrayGetObject(pdfArray, i, &value)) {
if let object = objectForPDFObject(object: value!) {
tmpArray.add(object)
}
}
}
return tmpArray
}
func objectForPDFObject( object: CGPDFObjectRef) -> AnyObject? {
let objectType: CGPDFObjectType = CGPDFObjectGetType(object)
var ptrObjectValue:UnsafePointer<Int8>? = nil
switch (objectType) {
case .boolean:
// Boolean
var objectBoolean = CGPDFBoolean()
if CGPDFObjectGetValue(object, objectType, &objectBoolean) {
let testbool = NSNumber(value: objectBoolean)
return testbool
}
case .integer:
// Integer
var objectInteger = CGPDFInteger()
if CGPDFObjectGetValue(object, objectType, &objectInteger) {
return objectInteger as AnyObject
}
case .real:
// Real
var objectReal = CGPDFReal()
if CGPDFObjectGetValue(object, objectType, &objectReal) {
return objectReal as AnyObject
}
case .string:
_ = CGPDFObjectGetValue(object, objectType, &ptrObjectValue)
let stringValue = CGPDFStringCopyTextString(OpaquePointer(ptrObjectValue!))
return stringValue
case .dictionary:
// Dictionary
var objectDictionary:CGPDFDictionaryRef? = nil
if (CGPDFObjectGetValue(object, objectType, &objectDictionary)) {
let count = CGPDFDictionaryGetCount(objectDictionary!)
print("In array, found dictionary with \(count) entries")
CGPDFDictionaryApplyFunction(objectDictionary!, { (key, object, info) -> Void in
printPDFKeys(key: key, object: object) // , info: info)
}, nil)
// CGPDFDictionaryApplyFunction(objectDictionary!, printPDFKeys as! CGPDFDictionaryApplierFunction, nil)
}
case .stream:
// Stream
var objectStream:CGPDFStreamRef? = nil
if (CGPDFObjectGetValue(object, objectType, &objectStream)) {
let _: CGPDFDictionaryRef = CGPDFStreamGetDictionary( objectStream! )!
var fmt: CGPDFDataFormat = .raw
let streamData: CFData = CGPDFStreamCopyData(objectStream!, &fmt)!;
let data = NSData(data: streamData as Data)
let dataString = NSString(data: data as Data, encoding: String.Encoding.utf8.rawValue)
print("data stream (length=\(CFDataGetLength(streamData))):")
return dataString
}
default:
return nil
}
return nil
}
func parse () {
let filepath = ("~/Desktop/doc.pdf" as NSString).expandingTildeInPath
let urlDocument = NSURL(fileURLWithPath: filepath)
let myDocument = CGPDFDocument(urlDocument)
if myDocument != nil {
let numPages = myDocument?.numberOfPages
print("Number of pages: \(numPages)")
// Get complete catalog
let myCatalog = myDocument?.catalog
CGPDFDictionaryApplyFunction(myCatalog!, { (key, object, info) -> Void in
printPDFKeys(key: key, object: object) // , info: info)
}, nil)
// CGPDFDictionaryApplyFunction(myCatalog!, printPDFKeys, nil)
let myInfo = myDocument?.info
CGPDFDictionaryApplyFunction(myInfo!, { (key, object, info) -> Void in
printPDFKeys(key: key, object: object) // , info: info)
}, nil)
// CGPDFDictionaryApplyFunction(myInfo!, printPDFKeys, nil)
} else {
print("Cannot open PDF document")
}
}
parse()
Your parsing retrieving high level dictionary and info data is correct, but you need to expand the decoding in CGPDFDictionaryApplyFunction to display the values of PDF data according their types (integer, string, array, dictionary, and so on). The syntax of the CGPDFDictionaryApplierFunction you are calling is:
typealias CGPDFDictionaryApplierFunction = (UnsafePointer<Int8>, COpaquePointer, UnsafeMutablePointer<()>) -> Void
Your program is displaying the pointers to the data, you could access the data values according their types as below (Swift 2):
let filepath = "/Users/ben/Desktop/Test.pdf"
let urlDocument = NSURL(fileURLWithPath: filepath)
let myDocument = CGPDFDocumentCreateWithURL(urlDocument)
if myDocument != nil {
let numPages = CGPDFDocumentGetNumberOfPages(myDocument)
print("Number of pages: \(numPages)")
// Get complete catalog
let myCatalog = CGPDFDocumentGetCatalog(myDocument)
CGPDFDictionaryApplyFunction(myCatalog, printPDFKeys, nil)
let myInfo = CGPDFDocumentGetInfo(myDocument)
CGPDFDictionaryApplyFunction(myInfo, printPDFKeys, nil)
} else {
print("Cannot open PDF document")
}
In order to be called from the CGPDFDictionaryApplyFunction, the printPDFKeys is to be called as a global function (outside your main class), alternately you could insert the code in a closure of CGPDFDictionaryApplyFunction as in your example above. The below code is shortened and is not including complete protection against errors and null values.
func printPDFKeys( key: UnsafePointer<Int8>, object: COpaquePointer, info: UnsafeMutablePointer<()>) {
let contentDict: CGPDFDictionaryRef = CGPDFDictionaryRef(info)
let keyString = String(CString: UnsafePointer<CChar>(key), encoding: NSISOLatin1StringEncoding)
let objectType = CGPDFObjectGetType(object)
if keyString == nil {
return
}
print("key \(keyString!) is present in dictionary, type \(objectType.rawValue)")
var ptrObjectValue = UnsafePointer<Int8>()
switch objectType {
// ObjectType is enum of:
// Null
// Boolean
// Integer
// Real
// Name
// String
// Array
// Dictionary
// Stream
case .Boolean:
// Boolean
var objectBoolean = CGPDFBoolean()
if CGPDFObjectGetValue(object, objectType, &objectBoolean) {
let testbool = NSNumber(unsignedChar: objectBoolean)
print("Boolean value \(testbool)")
}
case .Integer:
// Integer
var objectInteger = CGPDFInteger()
if CGPDFObjectGetValue(object, objectType, &objectInteger) {
print("Integer value \(objectInteger)")
}
case .Real:
// Real
var objectReal = CGPDFReal()
if CGPDFObjectGetValue(object, objectType, &objectReal) {
print("Real value \(objectReal)")
}
case .Name:
// Name
if (CGPDFObjectGetValue(object, objectType, &ptrObjectValue)) {
let stringName = String(CString: UnsafePointer<CChar>(ptrObjectValue), encoding: NSISOLatin1StringEncoding)
print("Name value: \(stringName!)")
}
case .String:
// String
let valueFound = CGPDFObjectGetValue(object, objectType, &ptrObjectValue)
let stringValue = CGPDFStringCopyTextString(COpaquePointer(ptrObjectValue))
print("String value: \(stringValue!)")
case .Array:
// Array
print("Array")
var objectArray = CGPDFArrayRef()
if (CGPDFObjectGetValue(object, objectType, &objectArray))
{
print("array: \(arrayFromPDFArray(objectArray))")
}
case .Dictionary:
// Dictionary
var objectDictionary = CGPDFDictionaryRef()
if (CGPDFObjectGetValue(object, objectType, &objectDictionary)) {
let count = CGPDFDictionaryGetCount(objectDictionary)
print("Found dictionary with \(count) entries")
if !(keyString == "Parent") && !(keyString == "P") {
//catalogLevel = catalogLevel + 1
CGPDFDictionaryApplyFunction(objectDictionary, printPDFKeys, nil)
//catalogLevel = catalogLevel - 1
}
}
case .Stream:
// Stream
print("Stream")
var objectStream = CGPDFStreamRef()
if (CGPDFObjectGetValue(object, objectType, &objectStream)) {
let dict: CGPDFDictionaryRef = CGPDFStreamGetDictionary( objectStream )
var fmt: CGPDFDataFormat = .Raw
let streamData: CFDataRef = CGPDFStreamCopyData(objectStream, &fmt)!;
let data = NSData(data: streamData)
let dataString = NSString(data: data, encoding: NSUTF8StringEncoding)
let dataLength: Int = CFDataGetLength(streamData)
print("data stream (length=\(dataLength)):")
if dataLength < 400 {
print(dataString)
}
}
default:
print("Null")
}
}
// convert a PDF array into an objC one
func arrayFromPDFArray(pdfArray: CGPDFArrayRef ) -> NSMutableArray {
var i:Int = 0
var tmpArray: NSMutableArray = NSMutableArray()
let count = CGPDFArrayGetCount(pdfArray)
for i in 0..<count {
var value = CGPDFObjectRef()
if (CGPDFArrayGetObject(pdfArray, i, &value)) {
if let object = objectForPDFObject(value) {
tmpArray.addObject(object)
}
}
}
return tmpArray
}
func objectForPDFObject( object: CGPDFObjectRef) -> AnyObject? {
let objectType: CGPDFObjectType = CGPDFObjectGetType(object)
var ptrObjectValue = UnsafePointer<Int8>()
switch (objectType) {
case .Boolean:
// Boolean
var objectBoolean = CGPDFBoolean()
if CGPDFObjectGetValue(object, objectType, &objectBoolean) {
let testbool = NSNumber(unsignedChar: objectBoolean)
return testbool
}
case .Integer:
// Integer
var objectInteger = CGPDFInteger()
if CGPDFObjectGetValue(object, objectType, &objectInteger) {
return objectInteger
}
case .Real:
// Real
var objectReal = CGPDFReal()
if CGPDFObjectGetValue(object, objectType, &objectReal) {
return objectReal
}
case .String:
let valueFound = CGPDFObjectGetValue(object, objectType, &ptrObjectValue)
let stringValue = CGPDFStringCopyTextString(COpaquePointer(ptrObjectValue))
return stringValue
case .Dictionary:
// Dictionary
var objectDictionary = CGPDFDictionaryRef()
if (CGPDFObjectGetValue(object, objectType, &objectDictionary)) {
let count = CGPDFDictionaryGetCount(objectDictionary)
print("In array, found dictionary with \(count) entries")
CGPDFDictionaryApplyFunction(objectDictionary, printPDFKeys, nil)
}
case .Stream:
// Stream
var objectStream = CGPDFStreamRef()
if (CGPDFObjectGetValue(object, objectType, &objectStream)) {
let dict: CGPDFDictionaryRef = CGPDFStreamGetDictionary( objectStream )
var fmt: CGPDFDataFormat = .Raw
let streamData: CFDataRef = CGPDFStreamCopyData(objectStream, &fmt)!;
let data = NSData(data: streamData)
let dataString = NSString(data: data, encoding: NSUTF8StringEncoding)
print("data stream (length=\(CFDataGetLength(streamData))):")
return dataString
}
default:
return nil
}
return nil
}
Made a parser (based on previous answers) that crawls the PDF hierarchy and gives you a JSON.
// Parse PDF into JSON.
PDFParser.parse(pdfUrl: pdfFileURL, into: jsonFileURL)
// Parse PDF into Dictionary.
let pdf: [String:Any?] = PDFParser.parse(pdfUrl: pdfFileURL)
Gives you:
{
"Catalog" : {
"Pages<Dictionary>" : {
"MediaBox<Array>" : [
0,
0,
612,
792
],
"Type<Name>" : "Pages",
"Kids<Array>" : [
{
"Rotate<Integer>" : 0,
"MediaBox<Array>" : [
0,
0,
595.27499999999998,
841.88999999999999
],
"Parent<Dictionary>" : "<PARENT_NOT_SERIALIZED>",
"Resources<Dictionary>" : {
"ColorSpace<Dictionary>" : {
"Cs1<Array>" : [
"ICCBased",
{
"N<Integer>" : 3,
"Filter<Name>" : "FlateDecode",
"Alternate<Name>" : "DeviceRGB",
"Length<Integer>" : 2612
}
]
}
...
To get from CGPDFDocument (like original question):
// Get document catalog.
guard
let document = CGPDFDocument(pdfFileURL as CFURL),
let catalog = document.catalog
else { return }
// Parse into dictionary.
let catalogDictionary = PDFParser.value(from: catalog)
Gives you a pretty usual Swift dictionary. Console output:
Optional(["Pages<Dictionary>": Optional({
"Count<Integer>" = 1;
"Kids<Array>" = (
{
"ArtBox<Array>" = (
"28.3465",
"325.193",
"393.389",
"813.543"
);
"Contents<Stream>" = {
Data = "q Q q 0 0 595.276 841.89 re W n 1 0 1 0 k /Gs1 gs 201.8862 420.9449 m 201.8862\n473.8269 244.7562 516.6959 297.6372 516.6959 c 350.5192 516.6959 393.3892\n473.8269 393.3892 420.9449 c 393.3892 368.0629 350.5192 325.1939 297.6372\n325.1939 c 244.7562 325.1939 201.8862 368.0629 201.8862 420.9449 c f Q q 28.346 530.078 283.464 283.465\nre W n 0 0 0 1 k /Gs1 gs BT 12 0 0 12 28.3467 803.499 Tm /Tc1 1 Tf [ (h) 4\n(ttp://epp) 7 (z.eu) ] TJ ET Q";
"Filter<Name>" = FlateDecode;
"Length<Integer>" = 237;
};
"MediaBox<Array>" = (
0,
0,
"595.2760000000001",
"841.89"
);
"Parent<Dictionary>" = "<PARENT_NOT_SERIALIZED>";
"Resources<Dictionary>" = {
"ExtGState<Dictionary>" = {
"Gs1<Dictionary>" = {
"OPM<Integer>" = 1;
"Type<Name>" = ExtGState;
};
};
...
ParsePDF.swift:
//
// PDFParser.swift
// PDFParser
//
// Copyright (c) 2020 Geri Borbás http://www.twitter.com/_eppz
//
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
//
import Foundation
import PDFKit
class PDFParser
{
/// Shorthand for type strings.
static let namesForTypes: [CGPDFObjectType:String] =
[
.null : "Null",
.boolean : "Boolean",
.integer : "Integer",
.real : "Real",
.name : "Name",
.string : "String",
.array : "Array",
.dictionary : "Dictionary",
.stream : "Stream",
CGPDFObjectTypeObject : "Object",
]
struct Message
{
static let parentNotSerialized = "<PARENT_NOT_SERIALIZED>"
static let couldNotParseValue = "<COULD_NOT_PARSE_VALUE>"
static let couldNotGetStreamData = "<COULD_NOT_GET_STREAM_DATA>"
static let unknownStreamDataFormat = "<UNKNOWN_STREAM_DATA_FORMAT>"
}
/// Parse a PDF file into a JSON file.
static func parse(pdfUrl: URL, into jsonURL: URL)
{
do
{
let pdf = PDFParser.parse(pdfUrl: pdfUrl)
let data = try JSONSerialization.data(withJSONObject: pdf, options: .prettyPrinted)
try data.write(to: jsonURL, options: [])
}
catch
{ print(error) }
}
/// Parse a PDF file into a JSON file.
static func parse(pdfUrl: URL) -> [String:Any?]
{
// Document.
guard
let document = CGPDFDocument(pdfUrl as CFURL),
let catalog = document.catalog,
let info = document.info
else
{
print("Cannot open PDF.")
return [:]
}
// Parse.
return [
"Catalog" : PDFParser.value(from: catalog),
"Info" : PDFParser.value(from: info)
]
}
static func value(from object: CGPDFObjectRef) -> Any?
{
switch (CGPDFObjectGetType(object))
{
case .null:
return nil
case .boolean:
var valueRef: CGPDFBoolean = 0
if CGPDFObjectGetValue(object, .boolean, &valueRef)
{ return Bool(valueRef == 0x01) }
case .integer:
var valueRef: CGPDFInteger = 0
if CGPDFObjectGetValue(object, .integer, &valueRef)
{ return valueRef as Int }
case .real:
var valueRef: CGPDFReal = 0.0
if CGPDFObjectGetValue(object, .real, &valueRef)
{ return Double(valueRef) }
case .name:
var objectRefOrNil: UnsafePointer<Int8>? = nil
if
CGPDFObjectGetValue(object, .name, &objectRefOrNil),
let objectRef = objectRefOrNil,
let string = String(cString: objectRef, encoding: String.Encoding.isoLatin1)
{ return string }
case .string:
var objectRefOrNil: UnsafePointer<Int8>? = nil
if
CGPDFObjectGetValue(object, .string, &objectRefOrNil),
let objectRef = objectRefOrNil,
let stringRef = CGPDFStringCopyTextString(OpaquePointer(objectRef))
{ return stringRef as String }
case .array:
var arrayRefOrNil: CGPDFArrayRef? = nil
if
CGPDFObjectGetValue(object, .array, &arrayRefOrNil),
let arrayRef = arrayRefOrNil
{
var array: [Any] = []
for index in 0 ..< CGPDFArrayGetCount(arrayRef)
{
var eachObjectRef: CGPDFObjectRef? = nil
if
CGPDFArrayGetObject(arrayRef, index, &eachObjectRef),
let eachObject = eachObjectRef,
let eachValue = PDFParser.value(from: eachObject)
{ array.append(eachValue) }
}
return array
}
case .stream:
var streamRefOrNil: CGPDFStreamRef? = nil
if
CGPDFObjectGetValue(object, .stream, &streamRefOrNil),
let streamRef = streamRefOrNil,
let streamDictionaryRef = CGPDFStreamGetDictionary(streamRef)
{
// Get stream dictionary.
var streamNSMutableDictionary = NSMutableDictionary()
Self.collectObjects(from: streamDictionaryRef, into: &streamNSMutableDictionary)
var streamDictionary = streamNSMutableDictionary as! [String: Any?]
// Get data.
var dataString: String? = Message.couldNotGetStreamData
var streamDataFormat: CGPDFDataFormat = .raw
if let streamData: CFData = CGPDFStreamCopyData(streamRef, &streamDataFormat)
{
switch streamDataFormat
{
case .raw: dataString = String(data: NSData(data: streamData as Data) as Data, encoding: String.Encoding.utf8)
case .jpegEncoded, .JPEG2000: dataString = NSData(data: streamData as Data).base64EncodedString()
#unknown default: dataString = Message.unknownStreamDataFormat
}
}
// Add to dictionary.
streamDictionary["Data"] = dataString
return streamDictionary
}
case .dictionary:
var dictionaryRefOrNil: CGPDFDictionaryRef? = nil
if
CGPDFObjectGetValue(object, .dictionary, &dictionaryRefOrNil),
let dictionaryRef = dictionaryRefOrNil
{
var dictionary = NSMutableDictionary()
Self.collectObjects(from: dictionaryRef, into: &dictionary)
return dictionary as! [String: Any?]
}
#unknown default:
var dictionary = NSMutableDictionary()
Self.collectObjects(from: object, into: &dictionary)
return dictionary as! [String: Any?]
}
// No known case.
return nil
}
static func collectObjects(from dictionaryRef: CGPDFDictionaryRef, into dictionaryPointer: UnsafeMutableRawPointer?)
{
CGPDFDictionaryApplyFunction(
dictionaryRef,
{
(eachKeyPointer, eachObject, eachContextOrNil: UnsafeMutableRawPointer?) -> Void in
// Unwrap dictionary.
guard let dictionary = eachContextOrNil?.assumingMemoryBound(to: NSMutableDictionary.self).pointee
else { return print("Could not unwrap dictionary.") }
// Unwrap key.
guard let eachKey = String(cString: UnsafePointer<CChar>(eachKeyPointer), encoding: .isoLatin1)
else { return print("Could not unwrap key.") }
// Type.
guard let eachTypeName = PDFParser.namesForTypes[CGPDFObjectGetType(eachObject)]
else { return print("Could not unwrap type.") }
// Assemble.
let eachDictionaryKey = "\(eachKey)<\(eachTypeName)>" as NSString
// Skip parent.
guard eachKey != "Parent"
else
{
dictionary.setObject(Message.parentNotSerialized, forKey: eachDictionaryKey)
return
}
// Parse value.
guard let eachValue = PDFParser.value(from: eachObject)
else
{
dictionary.setObject(Message.couldNotParseValue, forKey: eachDictionaryKey)
fatalError("😭")
// return
}
// Set.
dictionary.setObject(eachValue, forKey: eachDictionaryKey)
},
dictionaryPointer
)
}
}
Given a Bool?, I'd like to be able to do this:
let a = BoolToString(optbool) ?? "<None>"
which would either give me "true", "false", or "<None>".
Is there a built-in for BoolToString?
String(Bool) is the easiest way.
var myBool = true
var boolAsString = String(myBool)
let b1: Bool? = true
let b2: Bool? = false
let b3: Bool? = nil
print(b1?.description ?? "none") // "true"
print(b2?.description ?? "none") // "false"
print(b3?.description ?? "none") // "none"
or you can define 'one liner' which works with both Bool and Bool? as a function
func BoolToString(b: Bool?)->String { return b?.description ?? "<None>"}
let trueString = String(true) //"true"
let trueBool = Bool("true") //true
let falseBool = Bool("false") //false
let nilBool = Bool("foo") //nil
You could use the ?: ternary operator:
let a = optBool == nil ? "<None>" : "\(optBool!)"
Or you could use map:
let a = optBool.map { "\($0)" } ?? "<None>"
Of the two, optBool.map { "\($0)" } does exactly what you want BoolToString to do; it returns a String? that is Optional(true), Optional(false), or nil. Then the nil coalescing operator ?? unwraps that or replaces nil with "<None>".
Update:
This can also be written as:
let a = optBool.map(String.init) ?? "<None>"
or:
let a = optBool.map { String($0) } ?? "<None>"
var boolValue: Bool? = nil
var stringValue = "\(boolValue)" // can be either "true", "false", or "nil"
Or a more verbose custom function:
func boolToString(value: Bool?) -> String {
if let value = value {
return "\(value)"
}
else {
return "<None>"
// or you may return nil here. The return type would have to be String? in that case.
}
}
You can do it with extensions!
extension Optional where Wrapped == Bool {
func toString(_ nilString: String = "nil") -> String {
self.map { String($0) } ?? nilString
}
}
Usage:
let b1: Bool? = true
let b2: Bool? = false
let b3: Bool? = nil
b1.toString() // "true"
b2.toString() // "false"
b3.toString() // "nil"
b3.toString("<None>") // "<None>"