Compare commits
	
		
			22 Commits
		
	
	
		
			c++-librar
			...
			8a00d7064b
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						
						
							
						
						8a00d7064b
	
				 | 
					
					
						|||
| 
						
						
							
						
						b358467791
	
				 | 
					
					
						|||
| 
						
						
							
						
						d0f80aa6ae
	
				 | 
					
					
						|||
| 
						
						
							
						
						97ffe3d46e
	
				 | 
					
					
						|||
| 
						
						
							
						
						1a3c7a8282
	
				 | 
					
					
						|||
| 
						
						
							
						
						d8171b9ac4
	
				 | 
					
					
						|||
| 
						
						
							
						
						bcb24af926
	
				 | 
					
					
						|||
| 
						
						
							
						
						c0927c05dd
	
				 | 
					
					
						|||
| 
						
						
							
						
						5e87223b5d
	
				 | 
					
					
						|||
| 
						
						
							
						
						58a4ba1d05
	
				 | 
					
					
						|||
| 
						
						
							
						
						350cf89e51
	
				 | 
					
					
						|||
| 
						
						
							
						
						d4ff9a6e89
	
				 | 
					
					
						|||
| 
						
						
							
						
						a5176b5bbb
	
				 | 
					
					
						|||
| 
						
						
							
						
						af6a937033
	
				 | 
					
					
						|||
| 
						
						
							
						
						8913f8ba9c
	
				 | 
					
					
						|||
| 
						
						
							
						
						524eea9b2f
	
				 | 
					
					
						|||
| 
						
						
							
						
						3ce08d33f6
	
				 | 
					
					
						|||
| 
						
						
							
						
						a75f990565
	
				 | 
					
					
						|||
| 
						
						
							
						
						46fa50749f
	
				 | 
					
					
						|||
| 
						
						
							
						
						796a9640d3
	
				 | 
					
					
						|||
| 
						
						
							
						
						2d08100b58
	
				 | 
					
					
						|||
| 
						
						
							
						
						1224d9be47
	
				 | 
					
					
						
							
								
								
									
										8
									
								
								.clang-format
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								.clang-format
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,8 @@
 | 
			
		||||
BasedOnStyle: Chromium
 | 
			
		||||
ColumnLimit: 100
 | 
			
		||||
IndentCaseLabels: false
 | 
			
		||||
AccessModifierOffset: -2
 | 
			
		||||
ContinuationIndentWidth: 2
 | 
			
		||||
SpaceAfterTemplateKeyword: false
 | 
			
		||||
SpaceAfterCStyleCast: true
 | 
			
		||||
SpacesBeforeTrailingComments: 2
 | 
			
		||||
							
								
								
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								.gitignore
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,8 @@
 | 
			
		||||
/builddir
 | 
			
		||||
/pdf-simple-sign.cflags
 | 
			
		||||
/pdf-simple-sign.config
 | 
			
		||||
/pdf-simple-sign.creator
 | 
			
		||||
/pdf-simple-sign.creator.user
 | 
			
		||||
/pdf-simple-sign.cxxflags
 | 
			
		||||
/pdf-simple-sign.files
 | 
			
		||||
/pdf-simple-sign.includes
 | 
			
		||||
							
								
								
									
										2
									
								
								LICENSE
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								LICENSE
									
									
									
									
									
								
							@@ -1,4 +1,4 @@
 | 
			
		||||
Copyright (c) 2017, Přemysl Eric Janouch <p@janouch.name>
 | 
			
		||||
Copyright (c) 2017 - 2021, Přemysl Eric Janouch <p@janouch.name>
 | 
			
		||||
 | 
			
		||||
Permission to use, copy, modify, and/or distribute this software for any
 | 
			
		||||
purpose with or without fee is hereby granted.
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										25
									
								
								NEWS
									
									
									
									
									
								
							
							
						
						
									
										25
									
								
								NEWS
									
									
									
									
									
								
							@@ -1,3 +1,28 @@
 | 
			
		||||
1.1.1 (2020-09-06)
 | 
			
		||||
 | 
			
		||||
 * Fix a dysfunctional example in the manual
 | 
			
		||||
 | 
			
		||||
 * Go: write the xref table in a deterministic order
 | 
			
		||||
 | 
			
		||||
 * Add a trivial test suite, based on pdfsig from poppler-utils
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
1.1 (2020-09-05)
 | 
			
		||||
 | 
			
		||||
 * Make it possible to change the signature reservation with an option
 | 
			
		||||
 | 
			
		||||
 * Return errors rather than mangle documents in some cases,
 | 
			
		||||
   notably with pre-existing PDF forms
 | 
			
		||||
 | 
			
		||||
 * Avoid downgrading the document's PDF version to 1.6
 | 
			
		||||
 | 
			
		||||
 * A few fixes for PDF parsing and serialisation
 | 
			
		||||
 | 
			
		||||
 * Add an instructive man page
 | 
			
		||||
 | 
			
		||||
 * Add a native Go port of the utility, also usable as a library
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
1.0 (2018-08-03)
 | 
			
		||||
 | 
			
		||||
 * Initial release
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										29
									
								
								README.adoc
									
									
									
									
									
								
							
							
						
						
									
										29
									
								
								README.adoc
									
									
									
									
									
								
							@@ -1,27 +1,22 @@
 | 
			
		||||
pdf-simple-sign
 | 
			
		||||
===============
 | 
			
		||||
:compact-option:
 | 
			
		||||
 | 
			
		||||
'pdf-simple-sign' is a simple PDF signer intended for documents produced by
 | 
			
		||||
the Cairo library.  As such, it currently comes with some restrictions:
 | 
			
		||||
 | 
			
		||||
 * the document may not have any forms or signatures already, as they will be
 | 
			
		||||
   overwitten
 | 
			
		||||
 * the document may not employ cross-reference streams, or must constitute
 | 
			
		||||
   a hybrid-reference file at least
 | 
			
		||||
 * the document may not be newer than PDF 1.6 already, or it will get downgraded
 | 
			
		||||
   to that version
 | 
			
		||||
 * the signature may take at most 4 kilobytes as a compile-time limit,
 | 
			
		||||
   which should be enough space even for one intermediate certificate
 | 
			
		||||
 | 
			
		||||
The signature is attached to the first page and has no appearance.
 | 
			
		||||
the Cairo library, GNU troff, ImageMagick, or similar.
 | 
			
		||||
 | 
			
		||||
I don't aim to extend the functionality any further.  The project is fairly
 | 
			
		||||
self-contained and it should be easy to grasp and change to suit to your needs.
 | 
			
		||||
 | 
			
		||||
Documentation
 | 
			
		||||
-------------
 | 
			
		||||
See the link:pdf-simple-sign.adoc[man page] for information about usage.
 | 
			
		||||
The rest of this README will concern itself with externalities.
 | 
			
		||||
 | 
			
		||||
image:https://pkg.go.dev/badge/janouch.name/pdf-simple-sign@master/pdf["PkgGoDev", link="https://pkg.go.dev/janouch.name/pdf-simple-sign@master/pdf"]
 | 
			
		||||
 | 
			
		||||
Building
 | 
			
		||||
--------
 | 
			
		||||
Build dependencies: Meson, a C++11 compiler, pkg-config +
 | 
			
		||||
Build dependencies: Meson, Asciidoctor, a C++11 compiler, pkg-config +
 | 
			
		||||
Runtime dependencies: libcrypto (OpenSSL 1.1 API)
 | 
			
		||||
 | 
			
		||||
 $ git clone https://git.janouch.name/p/pdf-simple-sign.git
 | 
			
		||||
@@ -34,10 +29,10 @@ In addition to the C++ version, also included is a native Go port:
 | 
			
		||||
 | 
			
		||||
 $ go get janouch.name/pdf-simple-sign/cmd/pdf-simple-sign
 | 
			
		||||
 | 
			
		||||
Usage
 | 
			
		||||
-----
 | 
			
		||||
And a crude external VFS for Midnight Commander, that may be used to extract
 | 
			
		||||
all streams from a given PDF file:
 | 
			
		||||
 | 
			
		||||
 $ ./pdf-simple-sign document.pdf document.signed.pdf KeyAndCerts.p12 password
 | 
			
		||||
 $ go get janouch.name/pdf-simple-sign/cmd/extfs-pdf
 | 
			
		||||
 | 
			
		||||
Contributing and Support
 | 
			
		||||
------------------------
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										132
									
								
								cmd/extfs-pdf/main.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										132
									
								
								cmd/extfs-pdf/main.go
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,132 @@
 | 
			
		||||
//
 | 
			
		||||
// Copyright (c) 2021, Přemysl Eric Janouch <p@janouch.name>
 | 
			
		||||
//
 | 
			
		||||
// Permission to use, copy, modify, and/or distribute this software for any
 | 
			
		||||
// purpose with or without fee is hereby granted.
 | 
			
		||||
//
 | 
			
		||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 | 
			
		||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 | 
			
		||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 | 
			
		||||
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 | 
			
		||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
 | 
			
		||||
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 | 
			
		||||
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
// extfs-pdf is an external VFS plugin for Midnight Commander.
 | 
			
		||||
// More serious image extractors should rewrite this to use pdfimages(1).
 | 
			
		||||
package main
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"flag"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"os"
 | 
			
		||||
 | 
			
		||||
	"janouch.name/pdf-simple-sign/pdf"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
func die(status int, format string, args ...interface{}) {
 | 
			
		||||
	os.Stderr.WriteString(fmt.Sprintf(format+"\n", args...))
 | 
			
		||||
	os.Exit(status)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func usage() {
 | 
			
		||||
	die(1, "Usage: %s [-h] COMMAND DOCUMENT [ARG...]", os.Args[0])
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func streamSuffix(o *pdf.Object) string {
 | 
			
		||||
	if filter, _ := o.Dict["Filter"]; filter.Kind == pdf.Name {
 | 
			
		||||
		switch filter.String {
 | 
			
		||||
		case "JBIG2Decode":
 | 
			
		||||
			// This is the file extension used by pdfimages(1).
 | 
			
		||||
			// This is not a complete JBIG2 standalone file.
 | 
			
		||||
			return "jb2e"
 | 
			
		||||
		case "JPXDecode":
 | 
			
		||||
			return "jp2"
 | 
			
		||||
		case "DCTDecode":
 | 
			
		||||
			return "jpg"
 | 
			
		||||
		default:
 | 
			
		||||
			return filter.String
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return "stream"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func list(updater *pdf.Updater) {
 | 
			
		||||
	for _, o := range updater.ListIndirect() {
 | 
			
		||||
		object, err := updater.Get(o.N, o.Generation)
 | 
			
		||||
		size := 0
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			fmt.Fprintf(os.Stderr, "%s\n", err)
 | 
			
		||||
		} else {
 | 
			
		||||
			// Accidental transformation, retrieving original data is more work.
 | 
			
		||||
			size = len(object.Serialize())
 | 
			
		||||
		}
 | 
			
		||||
		fmt.Printf("-r--r--r-- 1 0 0 %d 01-01-1970 00:00 %d-%d\n",
 | 
			
		||||
			size, o.N, o.Generation)
 | 
			
		||||
		if object.Kind == pdf.Stream {
 | 
			
		||||
			fmt.Printf("-r--r--r-- 1 0 0 %d 01-01-1970 00:00 %d-%d.%s\n",
 | 
			
		||||
				len(object.Stream), o.N, o.Generation, streamSuffix(&object))
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func copyout(updater *pdf.Updater, storedFilename, extractTo string) {
 | 
			
		||||
	var (
 | 
			
		||||
		n, generation uint
 | 
			
		||||
		suffix        string
 | 
			
		||||
	)
 | 
			
		||||
	m, err := fmt.Sscanf(storedFilename, "%d-%d%s", &n, &generation, &suffix)
 | 
			
		||||
	if m < 2 {
 | 
			
		||||
		die(3, "%s: %s", storedFilename, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	object, err := updater.Get(n, generation)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		die(3, "%s: %s", storedFilename, err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	content := []byte(object.Serialize())
 | 
			
		||||
	if suffix != "" {
 | 
			
		||||
		content = object.Stream
 | 
			
		||||
	}
 | 
			
		||||
	if err = os.WriteFile(extractTo, content, 0666); err != nil {
 | 
			
		||||
		die(3, "%s", err)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func main() {
 | 
			
		||||
	flag.Usage = usage
 | 
			
		||||
	flag.Parse()
 | 
			
		||||
	if flag.NArg() < 2 {
 | 
			
		||||
		usage()
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	command, documentPath := flag.Arg(0), flag.Arg(1)
 | 
			
		||||
	doc, err := os.ReadFile(documentPath)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		die(1, "%s", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	updater, err := pdf.NewUpdater(doc)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		die(2, "%s", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	switch command {
 | 
			
		||||
	default:
 | 
			
		||||
		die(1, "unsupported command: %s", command)
 | 
			
		||||
	case "list":
 | 
			
		||||
		if flag.NArg() != 2 {
 | 
			
		||||
			usage()
 | 
			
		||||
		} else {
 | 
			
		||||
			list(updater)
 | 
			
		||||
		}
 | 
			
		||||
	case "copyout":
 | 
			
		||||
		if flag.NArg() != 4 {
 | 
			
		||||
			usage()
 | 
			
		||||
		} else {
 | 
			
		||||
			copyout(updater, flag.Arg(2), flag.Arg(3))
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
//
 | 
			
		||||
// Copyright (c) 2018, Přemysl Eric Janouch <p@janouch.name>
 | 
			
		||||
// Copyright (c) 2018 - 2020, Přemysl Eric Janouch <p@janouch.name>
 | 
			
		||||
//
 | 
			
		||||
// Permission to use, copy, modify, and/or distribute this software for any
 | 
			
		||||
// purpose with or without fee is hereby granted.
 | 
			
		||||
@@ -20,8 +20,9 @@ import (
 | 
			
		||||
	"flag"
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io/ioutil"
 | 
			
		||||
	"janouch.name/pdf-simple-sign/pdf"
 | 
			
		||||
	"os"
 | 
			
		||||
 | 
			
		||||
	"janouch.name/pdf-simple-sign/pdf"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// #include <unistd.h>
 | 
			
		||||
@@ -39,10 +40,13 @@ func die(status int, format string, args ...interface{}) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func usage() {
 | 
			
		||||
	die(1, "Usage: %s [-h] INPUT-FILENAME OUTPUT-FILENAME "+
 | 
			
		||||
	die(1, "Usage: %s [-h] [-r RESERVATION] INPUT-FILENAME OUTPUT-FILENAME "+
 | 
			
		||||
		"PKCS12-PATH PKCS12-PASS", os.Args[0])
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var reservation = flag.Int(
 | 
			
		||||
	"r", 4096, "signature reservation as a number of bytes")
 | 
			
		||||
 | 
			
		||||
func main() {
 | 
			
		||||
	flag.Usage = usage
 | 
			
		||||
	flag.Parse()
 | 
			
		||||
@@ -51,7 +55,7 @@ func main() {
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	inputPath, outputPath := flag.Arg(0), flag.Arg(1)
 | 
			
		||||
	pdfDocument, err := ioutil.ReadFile(inputPath)
 | 
			
		||||
	doc, err := ioutil.ReadFile(inputPath)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		die(1, "%s", err)
 | 
			
		||||
	}
 | 
			
		||||
@@ -63,10 +67,10 @@ func main() {
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		die(3, "%s", err)
 | 
			
		||||
	}
 | 
			
		||||
	if pdfDocument, err = pdf.Sign(pdfDocument, key, certs); err != nil {
 | 
			
		||||
	if doc, err = pdf.Sign(doc, key, certs, *reservation); err != nil {
 | 
			
		||||
		die(4, "error: %s", err)
 | 
			
		||||
	}
 | 
			
		||||
	if err = ioutil.WriteFile(outputPath, pdfDocument, 0666); err != nil {
 | 
			
		||||
	if err = ioutil.WriteFile(outputPath, doc, 0666); err != nil {
 | 
			
		||||
		die(5, "%s", err)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										2
									
								
								go.mod
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								go.mod
									
									
									
									
									
								
							@@ -1,6 +1,6 @@
 | 
			
		||||
module janouch.name/pdf-simple-sign
 | 
			
		||||
 | 
			
		||||
go 1.14
 | 
			
		||||
go 1.17
 | 
			
		||||
 | 
			
		||||
require (
 | 
			
		||||
	go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										4
									
								
								go.sum
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								go.sum
									
									
									
									
									
								
							@@ -1,9 +1,5 @@
 | 
			
		||||
go.mozilla.org/pkcs7 v0.0.0-20181029144607-24857c352dd8 h1:W3oGFPlHBLgXdsbPVixWFMYsuPhm81/Qww3XAgBbn/0=
 | 
			
		||||
go.mozilla.org/pkcs7 v0.0.0-20181029144607-24857c352dd8/go.mod h1:5fWP3IVYEMc04wC+lMJAfkmNmKAl2P1swVv8VS+URZ8=
 | 
			
		||||
go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1 h1:A/5uWzF44DlIgdm/PQFwfMkW0JX+cIcQi/SwLAmZP5M=
 | 
			
		||||
go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk=
 | 
			
		||||
golang.org/x/crypto v0.0.0-20181127143415-eb0de9b17e85 h1:et7+NAX3lLIk5qUCTA9QelBjGE/NkhzYw/mhnr0s7nI=
 | 
			
		||||
golang.org/x/crypto v0.0.0-20181127143415-eb0de9b17e85/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
 | 
			
		||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 | 
			
		||||
golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de h1:ikNHVSjEfnvz6sxdSPCaPt572qowuyMDMJLLm3Db3ig=
 | 
			
		||||
golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										27
									
								
								meson.build
									
									
									
									
									
								
							
							
						
						
									
										27
									
								
								meson.build
									
									
									
									
									
								
							@@ -1,12 +1,23 @@
 | 
			
		||||
project('pdf-simple-sign', 'cpp', default_options : ['cpp_std=c++11'])
 | 
			
		||||
project('pdf-simple-sign', 'cpp', default_options : ['cpp_std=c++11'],
 | 
			
		||||
	version : '1.1.1')
 | 
			
		||||
 | 
			
		||||
conf = configuration_data()
 | 
			
		||||
conf.set_quoted('PROJECT_NAME', meson.project_name())
 | 
			
		||||
conf.set_quoted('PROJECT_VERSION', meson.project_version())
 | 
			
		||||
configure_file(output : 'config.h', configuration : conf)
 | 
			
		||||
 | 
			
		||||
cryptodep = dependency('libcrypto')
 | 
			
		||||
 | 
			
		||||
executable('pdf-simple-sign', 'pdf-simple-sign.cpp', 'pdf.cpp',
 | 
			
		||||
executable('pdf-simple-sign', 'pdf-simple-sign.cpp',
 | 
			
		||||
	install : true,
 | 
			
		||||
	dependencies : cryptodep)
 | 
			
		||||
 | 
			
		||||
install_headers('pdf-simple-sign.h')
 | 
			
		||||
library('pdf-simple-sign', 'pdf.cpp',
 | 
			
		||||
	soversion : 0,
 | 
			
		||||
	install : true,
 | 
			
		||||
	dependencies : cryptodep)
 | 
			
		||||
asciidoctor = find_program('asciidoctor')
 | 
			
		||||
foreach page : ['pdf-simple-sign']
 | 
			
		||||
	custom_target('manpage for ' + page,
 | 
			
		||||
		input: page + '.adoc', output: page + '.1',
 | 
			
		||||
		command: [asciidoctor, '-b', 'manpage',
 | 
			
		||||
			'-a', 'release-version=' + meson.project_version(),
 | 
			
		||||
			'@INPUT@', '-o', '@OUTPUT@'],
 | 
			
		||||
		install: true,
 | 
			
		||||
		install_dir: join_paths(get_option('mandir'), 'man1'))
 | 
			
		||||
endforeach
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										80
									
								
								pdf-simple-sign.adoc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										80
									
								
								pdf-simple-sign.adoc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,80 @@
 | 
			
		||||
pdf-simple-sign(1)
 | 
			
		||||
==================
 | 
			
		||||
:doctype: manpage
 | 
			
		||||
:manmanual: pdf-simple-sign Manual
 | 
			
		||||
:mansource: pdf-simple-sign {release-version}
 | 
			
		||||
 | 
			
		||||
Name
 | 
			
		||||
----
 | 
			
		||||
pdf-simple-sign - a simple PDF signer
 | 
			
		||||
 | 
			
		||||
Synopsis
 | 
			
		||||
--------
 | 
			
		||||
*pdf-simple-sign* [_OPTION_]... _INPUT.pdf_ _OUTPUT.pdf_ _KEY-PAIR.p12_ _PASSWORD_
 | 
			
		||||
 | 
			
		||||
Description
 | 
			
		||||
-----------
 | 
			
		||||
'pdf-simple-sign' is a simple PDF signer intended for documents produced by
 | 
			
		||||
the Cairo library, GNU troff, ImageMagick, or similar.  As such, it currently
 | 
			
		||||
comes with some restrictions:
 | 
			
		||||
 | 
			
		||||
 * the document may not have any forms or signatures already, as they would be
 | 
			
		||||
   overwritten,
 | 
			
		||||
 * the document may not employ cross-reference streams, or must constitute
 | 
			
		||||
   a hybrid-reference file at least.
 | 
			
		||||
 | 
			
		||||
The key and certificate pair is accepted in the PKCS#12 format.  The _PASSWORD_
 | 
			
		||||
must be supplied on the command line, and may be empty if it is not needed.
 | 
			
		||||
 | 
			
		||||
The signature is attached to the first page and has no appearance.
 | 
			
		||||
 | 
			
		||||
If signature data don't fit within the default reservation of 4 kibibytes,
 | 
			
		||||
you might need to adjust it using the *-r* option, or throw out any unnecessary
 | 
			
		||||
intermediate certificates.
 | 
			
		||||
 | 
			
		||||
Options
 | 
			
		||||
-------
 | 
			
		||||
*-r* _RESERVATION_, *--reservation*=_RESERVATION_::
 | 
			
		||||
  Set aside _RESERVATION_ amount of bytes for the resulting signature.
 | 
			
		||||
  Feel free to try a few values in a loop.  The program itself has no
 | 
			
		||||
  conceptions about the data, so it can't make accurate predictions.
 | 
			
		||||
 | 
			
		||||
*-h*, *--help*::
 | 
			
		||||
  Display a help message and exit.
 | 
			
		||||
 | 
			
		||||
*-V*, *--version*::
 | 
			
		||||
  Output version information and exit.
 | 
			
		||||
 | 
			
		||||
Examples
 | 
			
		||||
--------
 | 
			
		||||
Create a self-signed certificate, make a document containing the current date,
 | 
			
		||||
sign it and verify the attached signature:
 | 
			
		||||
 | 
			
		||||
 $ openssl req -newkey rsa:2048 -subj /CN=Test -nodes \
 | 
			
		||||
   -keyout key.pem -x509 -addext keyUsage=digitalSignature \
 | 
			
		||||
   -out cert.pem 2>/dev/null
 | 
			
		||||
 $ openssl pkcs12 -inkey key.pem -in cert.pem \
 | 
			
		||||
   -export -passout pass: -out key-pair.p12
 | 
			
		||||
 $ date | groff -T pdf > test.pdf
 | 
			
		||||
 $ pdf-simple-sign test.pdf test.signed.pdf key-pair.p12 ""
 | 
			
		||||
 $ pdfsig test.signed.pdf
 | 
			
		||||
 Digital Signature Info of: test.signed.pdf
 | 
			
		||||
 Signature #1:
 | 
			
		||||
   - Signer Certificate Common Name: Test
 | 
			
		||||
   - Signer full Distinguished Name: CN=Test
 | 
			
		||||
   - Signing Time: Sep 05 2020 19:41:22
 | 
			
		||||
   - Signing Hash Algorithm: SHA-256
 | 
			
		||||
   - Signature Type: adbe.pkcs7.detached
 | 
			
		||||
   - Signed Ranges: [0 - 6522], [14716 - 15243]
 | 
			
		||||
   - Total document signed
 | 
			
		||||
   - Signature Validation: Signature is Valid.
 | 
			
		||||
   - Certificate Validation: Certificate issuer isn't Trusted.
 | 
			
		||||
 | 
			
		||||
Reporting bugs
 | 
			
		||||
--------------
 | 
			
		||||
Use https://git.janouch.name/p/pdf-simple-sign to report bugs, request features,
 | 
			
		||||
or submit pull requests.
 | 
			
		||||
 | 
			
		||||
See also
 | 
			
		||||
--------
 | 
			
		||||
*openssl*(1), *pdfsig*(1)
 | 
			
		||||
@@ -16,23 +16,922 @@
 | 
			
		||||
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
#include <cmath>
 | 
			
		||||
#include <cstdio>
 | 
			
		||||
#include <cstdlib>
 | 
			
		||||
#include <cstring>
 | 
			
		||||
#include <cerrno>
 | 
			
		||||
#include <cstdarg>
 | 
			
		||||
#undef NDEBUG
 | 
			
		||||
#include <cassert>
 | 
			
		||||
 | 
			
		||||
#include <map>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <regex>
 | 
			
		||||
#include <set>
 | 
			
		||||
#include <vector>
 | 
			
		||||
 | 
			
		||||
#if defined __GLIBCXX__ && __GLIBCXX__ < 20140422
 | 
			
		||||
#error Need libstdc++ >= 4.9 for <regex>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
#include <getopt.h>
 | 
			
		||||
#include <openssl/err.h>
 | 
			
		||||
#include <openssl/pkcs12.h>
 | 
			
		||||
#include <openssl/x509v3.h>
 | 
			
		||||
#include <unistd.h>
 | 
			
		||||
 | 
			
		||||
#include "pdf-simple-sign.h"
 | 
			
		||||
#include "config.h"
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
using uint = unsigned int;
 | 
			
		||||
using ushort = unsigned short;
 | 
			
		||||
 | 
			
		||||
static std::string concatenate(const std::vector<std::string>& v, const std::string& delim) {
 | 
			
		||||
  std::string res;
 | 
			
		||||
  if (v.empty())
 | 
			
		||||
    return res;
 | 
			
		||||
  for (const auto& s : v)
 | 
			
		||||
    res += s + delim;
 | 
			
		||||
  return res.substr(0, res.length() - delim.length());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename... Args>
 | 
			
		||||
std::string ssprintf(const std::string& format, Args... args) {
 | 
			
		||||
  size_t size = std::snprintf(nullptr, 0, format.c_str(), args...) + 1;
 | 
			
		||||
  std::unique_ptr<char[]> buf(new char[size]);
 | 
			
		||||
  std::snprintf(buf.get(), size, format.c_str(), args...);
 | 
			
		||||
  return std::string(buf.get(), buf.get() + size - 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
/// PDF token/object thingy.  Objects may be composed either from one or a sequence of tokens.
 | 
			
		||||
/// The PDF Reference doesn't actually speak of tokens, though ISO 32000-1:2008 does.
 | 
			
		||||
struct pdf_object {
 | 
			
		||||
  enum type {
 | 
			
		||||
    END, NL, COMMENT, NIL, BOOL, NUMERIC, KEYWORD, NAME, STRING,
 | 
			
		||||
    // Simple tokens
 | 
			
		||||
    B_ARRAY, E_ARRAY, B_DICT, E_DICT,
 | 
			
		||||
    // Higher-level objects
 | 
			
		||||
    ARRAY, DICT, OBJECT, REFERENCE,
 | 
			
		||||
  } type = END;
 | 
			
		||||
 | 
			
		||||
  std::string string;                      ///< END (error message), COMMENT/KEYWORD/NAME/STRING
 | 
			
		||||
  double number = 0.;                      ///< BOOL, NUMERIC
 | 
			
		||||
  std::vector<pdf_object> array;           ///< ARRAY, OBJECT
 | 
			
		||||
  std::map<std::string, pdf_object> dict;  ///< DICT, in the future also STREAM
 | 
			
		||||
  uint n = 0, generation = 0;              ///< OBJECT, REFERENCE
 | 
			
		||||
 | 
			
		||||
  pdf_object(enum type type = END)                          : type(type) {}
 | 
			
		||||
  pdf_object(enum type type, double v)                      : type(type), number(v) {}
 | 
			
		||||
  pdf_object(enum type type, const std::string& v)          : type(type), string(v) {}
 | 
			
		||||
  pdf_object(enum type type, uint n, uint g)                : type(type), n(n), generation(g) {}
 | 
			
		||||
  pdf_object(const std::vector<pdf_object>& array)          : type(ARRAY), array(array) {}
 | 
			
		||||
  pdf_object(const std::map<std::string, pdf_object>& dict) : type(DICT), dict(dict) {}
 | 
			
		||||
 | 
			
		||||
  pdf_object(const pdf_object&)            = default;
 | 
			
		||||
  pdf_object(pdf_object&&)                 = default;
 | 
			
		||||
  pdf_object& operator=(const pdf_object&) = default;
 | 
			
		||||
  pdf_object& operator=(pdf_object&&)      = default;
 | 
			
		||||
 | 
			
		||||
  /// Return whether this is a number without a fractional part
 | 
			
		||||
  bool is_integer() const {
 | 
			
		||||
    double tmp;
 | 
			
		||||
    return type == NUMERIC && std::modf(number, &tmp) == 0.;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/// Basic lexical analyser for the Portable Document Format, giving limited error information
 | 
			
		||||
struct pdf_lexer {
 | 
			
		||||
  const unsigned char* p;
 | 
			
		||||
  pdf_lexer(const char* s) : p(reinterpret_cast<const unsigned char*>(s)) {}
 | 
			
		||||
 | 
			
		||||
  static constexpr const char* oct_alphabet = "01234567";
 | 
			
		||||
  static constexpr const char* dec_alphabet = "0123456789";
 | 
			
		||||
  static constexpr const char* hex_alphabet = "0123456789abcdefABCDEF";
 | 
			
		||||
  static constexpr const char* whitespace = "\t\n\f\r ";
 | 
			
		||||
  static constexpr const char* delimiters = "()<>[]{}/%";
 | 
			
		||||
 | 
			
		||||
  bool eat_newline(int ch) {
 | 
			
		||||
    if (ch == '\r') {
 | 
			
		||||
      if (*p == '\n') p++;
 | 
			
		||||
      return true;
 | 
			
		||||
    }
 | 
			
		||||
    return ch == '\n';
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  pdf_object string() {
 | 
			
		||||
    std::string value;
 | 
			
		||||
    int parens = 1;
 | 
			
		||||
    while (1) {
 | 
			
		||||
      if (!*p) return {pdf_object::END, "unexpected end of string"};
 | 
			
		||||
      auto ch = *p++;
 | 
			
		||||
      if (eat_newline(ch)) ch = '\n';
 | 
			
		||||
      else if (ch == '(') { parens++; }
 | 
			
		||||
      else if (ch == ')') { if (!--parens) break; }
 | 
			
		||||
      else if (ch == '\\') {
 | 
			
		||||
        if (!*p) return {pdf_object::END, "unexpected end of string"};
 | 
			
		||||
        switch ((ch = *p++)) {
 | 
			
		||||
        case 'n': ch = '\n'; break;
 | 
			
		||||
        case 'r': ch = '\r'; break;
 | 
			
		||||
        case 't': ch = '\t'; break;
 | 
			
		||||
        case 'b': ch = '\b'; break;
 | 
			
		||||
        case 'f': ch = '\f'; break;
 | 
			
		||||
        default:
 | 
			
		||||
          if (eat_newline(ch))
 | 
			
		||||
            continue;
 | 
			
		||||
          std::string octal;
 | 
			
		||||
          if (ch && strchr(oct_alphabet, ch)) {
 | 
			
		||||
            octal += ch;
 | 
			
		||||
            if (*p && strchr(oct_alphabet, *p)) octal += *p++;
 | 
			
		||||
            if (*p && strchr(oct_alphabet, *p)) octal += *p++;
 | 
			
		||||
            ch = std::stoi(octal, nullptr, 8);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      value += ch;
 | 
			
		||||
    }
 | 
			
		||||
    return {pdf_object::STRING, value};
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  pdf_object string_hex() {
 | 
			
		||||
    std::string value, buf;
 | 
			
		||||
    while (*p != '>') {
 | 
			
		||||
      if (!*p) return {pdf_object::END, "unexpected end of hex string"};
 | 
			
		||||
      if (!strchr(hex_alphabet, *p))
 | 
			
		||||
        return {pdf_object::END, "invalid hex string"};
 | 
			
		||||
      buf += *p++;
 | 
			
		||||
      if (buf.size() == 2) {
 | 
			
		||||
        value += char(std::stoi(buf, nullptr, 16));
 | 
			
		||||
        buf.clear();
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    p++;
 | 
			
		||||
    if (!buf.empty()) value += char(std::stoi(buf + '0', nullptr, 16));
 | 
			
		||||
    return {pdf_object::STRING, value};
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  pdf_object name() {
 | 
			
		||||
    std::string value;
 | 
			
		||||
    while (!strchr(whitespace, *p) && !strchr(delimiters, *p)) {
 | 
			
		||||
      auto ch = *p++;
 | 
			
		||||
      if (ch == '#') {
 | 
			
		||||
        std::string hexa;
 | 
			
		||||
        if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
 | 
			
		||||
        if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
 | 
			
		||||
        if (hexa.size() != 2)
 | 
			
		||||
          return {pdf_object::END, "invalid name hexa escape"};
 | 
			
		||||
        ch = char(std::stoi(hexa, nullptr, 16));
 | 
			
		||||
      }
 | 
			
		||||
      value += ch;
 | 
			
		||||
    }
 | 
			
		||||
    if (value.empty()) return {pdf_object::END, "unexpected end of name"};
 | 
			
		||||
    return {pdf_object::NAME, value};
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  pdf_object comment() {
 | 
			
		||||
    std::string value;
 | 
			
		||||
    while (*p && *p != '\r' && *p != '\n')
 | 
			
		||||
      value += *p++;
 | 
			
		||||
    return {pdf_object::COMMENT, value};
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // XXX maybe invalid numbers should rather be interpreted as keywords
 | 
			
		||||
  pdf_object number() {
 | 
			
		||||
    std::string value;
 | 
			
		||||
    if (*p == '-')
 | 
			
		||||
      value += *p++;
 | 
			
		||||
    bool real = false, digits = false;
 | 
			
		||||
    while (*p) {
 | 
			
		||||
      if (strchr(dec_alphabet, *p))
 | 
			
		||||
        digits = true;
 | 
			
		||||
      else if (*p == '.' && !real)
 | 
			
		||||
        real = true;
 | 
			
		||||
      else
 | 
			
		||||
        break;
 | 
			
		||||
      value += *p++;
 | 
			
		||||
    }
 | 
			
		||||
    if (!digits) return {pdf_object::END, "invalid number"};
 | 
			
		||||
    return {pdf_object::NUMERIC, std::stod(value, nullptr)};
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  pdf_object next() {
 | 
			
		||||
    if (!*p)
 | 
			
		||||
      return {pdf_object::END};
 | 
			
		||||
    if (strchr("-0123456789.", *p))
 | 
			
		||||
      return number();
 | 
			
		||||
 | 
			
		||||
    // {} end up being keywords, we might want to error out on those
 | 
			
		||||
    std::string value;
 | 
			
		||||
    while (!strchr(whitespace, *p) && !strchr(delimiters, *p))
 | 
			
		||||
      value += *p++;
 | 
			
		||||
    if (!value.empty()) {
 | 
			
		||||
      if (value == "null")  return {pdf_object::NIL};
 | 
			
		||||
      if (value == "true")  return {pdf_object::BOOL, 1};
 | 
			
		||||
      if (value == "false") return {pdf_object::BOOL, 0};
 | 
			
		||||
      return {pdf_object::KEYWORD, value};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    switch (char ch = *p++) {
 | 
			
		||||
    case '/': return name();
 | 
			
		||||
    case '%': return comment();
 | 
			
		||||
    case '(': return string();
 | 
			
		||||
    case '[': return {pdf_object::B_ARRAY};
 | 
			
		||||
    case ']': return {pdf_object::E_ARRAY};
 | 
			
		||||
    case '<':
 | 
			
		||||
      if (*p++ == '<')
 | 
			
		||||
        return {pdf_object::B_DICT};
 | 
			
		||||
      p--;
 | 
			
		||||
      return string_hex();
 | 
			
		||||
    case '>':
 | 
			
		||||
      if (*p++ == '>')
 | 
			
		||||
        return {pdf_object::E_DICT};
 | 
			
		||||
      p--;
 | 
			
		||||
      return {pdf_object::END, "unexpected '>'"};
 | 
			
		||||
    default:
 | 
			
		||||
      if (eat_newline(ch))
 | 
			
		||||
        return {pdf_object::NL};
 | 
			
		||||
      if (strchr(whitespace, ch))
 | 
			
		||||
        return next();
 | 
			
		||||
      return {pdf_object::END, "unexpected input"};
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// FIXME lines /should not/ be longer than 255 characters, some wrapping is in order
 | 
			
		||||
static std::string pdf_serialize(const pdf_object& o) {
 | 
			
		||||
  switch (o.type) {
 | 
			
		||||
  case pdf_object::NL:      return "\n";
 | 
			
		||||
  case pdf_object::NIL:     return "null";
 | 
			
		||||
  case pdf_object::BOOL:    return o.number ? "true" : "false";
 | 
			
		||||
  case pdf_object::NUMERIC: {
 | 
			
		||||
    if (o.is_integer()) return std::to_string((long long) o.number);
 | 
			
		||||
    return std::to_string(o.number);
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::KEYWORD: return o.string;
 | 
			
		||||
  case pdf_object::NAME: {
 | 
			
		||||
    std::string escaped = "/";
 | 
			
		||||
    for (char c : o.string) {
 | 
			
		||||
      if (c == '#' || strchr(pdf_lexer::delimiters, c) || strchr(pdf_lexer::whitespace, c))
 | 
			
		||||
        escaped += ssprintf("#%02x", c);
 | 
			
		||||
      else
 | 
			
		||||
        escaped += c;
 | 
			
		||||
    }
 | 
			
		||||
    return escaped;
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::STRING: {
 | 
			
		||||
    std::string escaped;
 | 
			
		||||
    for (char c : o.string) {
 | 
			
		||||
      if (c == '\\' || c == '(' || c == ')')
 | 
			
		||||
        escaped += '\\';
 | 
			
		||||
      escaped += c;
 | 
			
		||||
    }
 | 
			
		||||
    return "(" + escaped + ")";
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::B_ARRAY: return "[";
 | 
			
		||||
  case pdf_object::E_ARRAY: return "]";
 | 
			
		||||
  case pdf_object::B_DICT:  return "<<";
 | 
			
		||||
  case pdf_object::E_DICT:  return ">>";
 | 
			
		||||
  case pdf_object::ARRAY: {
 | 
			
		||||
    std::vector<std::string> v;
 | 
			
		||||
    for (const auto& i : o.array)
 | 
			
		||||
      v.push_back(pdf_serialize(i));
 | 
			
		||||
    return "[ " + concatenate(v, " ") + " ]";
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::DICT: {
 | 
			
		||||
    std::string s;
 | 
			
		||||
    for (const auto i : o.dict)
 | 
			
		||||
      // FIXME the key is also supposed to be escaped by pdf_serialize()
 | 
			
		||||
      s += " /" + i.first + " " + pdf_serialize(i.second);
 | 
			
		||||
    return "<<" + s + " >>";
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::OBJECT:
 | 
			
		||||
    return ssprintf("%u %u obj\n", o.n, o.generation) + pdf_serialize(o.array.at(0)) + "\nendobj";
 | 
			
		||||
  case pdf_object::REFERENCE:
 | 
			
		||||
    return ssprintf("%u %u R", o.n, o.generation);
 | 
			
		||||
  default:
 | 
			
		||||
    assert(!"unsupported token for serialization");
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
/// Utility class to help read and possibly incrementally update PDF files
 | 
			
		||||
class pdf_updater {
 | 
			
		||||
  struct ref {
 | 
			
		||||
    size_t offset = 0;     ///< File offset or N of the next free entry
 | 
			
		||||
    uint generation = 0;   ///< Object generation
 | 
			
		||||
    bool free = true;      ///< Whether this N has been deleted
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  std::vector<ref> xref;   ///< Cross-reference table
 | 
			
		||||
  size_t xref_size = 0;    ///< Current cross-reference table size, correlated to xref.size()
 | 
			
		||||
  std::set<uint> updated;  ///< List of updated objects
 | 
			
		||||
 | 
			
		||||
  pdf_object parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
 | 
			
		||||
  pdf_object parse_R(std::vector<pdf_object>& stack) const;
 | 
			
		||||
  pdf_object parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
 | 
			
		||||
  std::string load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries);
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
  /// The new trailer dictionary to be written, initialized with the old one
 | 
			
		||||
  std::map<std::string, pdf_object> trailer;
 | 
			
		||||
 | 
			
		||||
  std::string& document;
 | 
			
		||||
  pdf_updater(std::string& document) : document(document) {}
 | 
			
		||||
 | 
			
		||||
  /// Build the cross-reference table and prepare a new trailer dictionary
 | 
			
		||||
  std::string initialize();
 | 
			
		||||
  /// Try to extract the claimed PDF version as a positive decimal number, e.g. 17 for PDF 1.7.
 | 
			
		||||
  /// Returns zero on failure.
 | 
			
		||||
  int version(const pdf_object& root) const;
 | 
			
		||||
  /// Retrieve an object by its number and generation -- may return NIL or END with an error
 | 
			
		||||
  pdf_object get(uint n, uint generation) const;
 | 
			
		||||
  /// Allocate a new object number
 | 
			
		||||
  uint allocate();
 | 
			
		||||
  /// Append an updated object to the end of the document
 | 
			
		||||
  void update(uint n, std::function<void()> fill);
 | 
			
		||||
  /// Write an updated cross-reference table and trailer
 | 
			
		||||
  void flush_updates();
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
/// If the object is an error, forward its message, otherwise return err.
 | 
			
		||||
static std::string pdf_error(const pdf_object& o, const char* err) {
 | 
			
		||||
  if (o.type != pdf_object::END || o.string.empty()) return err;
 | 
			
		||||
  return o.string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pdf_object pdf_updater::parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
 | 
			
		||||
  if (stack.size() < 2)
 | 
			
		||||
    return {pdf_object::END, "missing object ID pair"};
 | 
			
		||||
 | 
			
		||||
  auto g = stack.back(); stack.pop_back();
 | 
			
		||||
  auto n = stack.back(); stack.pop_back();
 | 
			
		||||
  if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX ||
 | 
			
		||||
      !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
 | 
			
		||||
    return {pdf_object::END, "invalid object ID pair"};
 | 
			
		||||
 | 
			
		||||
  pdf_object obj{pdf_object::OBJECT};
 | 
			
		||||
  obj.n = n.number;
 | 
			
		||||
  obj.generation = g.number;
 | 
			
		||||
 | 
			
		||||
  while (1) {
 | 
			
		||||
    auto object = parse(lex, obj.array);
 | 
			
		||||
    if (object.type == pdf_object::END)
 | 
			
		||||
      return {pdf_object::END, pdf_error(object, "object doesn't end")};
 | 
			
		||||
    if (object.type == pdf_object::KEYWORD && object.string == "endobj")
 | 
			
		||||
      break;
 | 
			
		||||
    obj.array.push_back(std::move(object));
 | 
			
		||||
  }
 | 
			
		||||
  return obj;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pdf_object pdf_updater::parse_R(std::vector<pdf_object>& stack) const {
 | 
			
		||||
  if (stack.size() < 2)
 | 
			
		||||
    return {pdf_object::END, "missing reference ID pair"};
 | 
			
		||||
 | 
			
		||||
  auto g = stack.back(); stack.pop_back();
 | 
			
		||||
  auto n = stack.back(); stack.pop_back();
 | 
			
		||||
  if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX ||
 | 
			
		||||
      !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
 | 
			
		||||
    return {pdf_object::END, "invalid reference ID pair"};
 | 
			
		||||
 | 
			
		||||
  pdf_object ref{pdf_object::REFERENCE};
 | 
			
		||||
  ref.n = n.number;
 | 
			
		||||
  ref.generation = g.number;
 | 
			
		||||
  return ref;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Read an object at the lexer's position.  Not a strict parser.
 | 
			
		||||
pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
 | 
			
		||||
  auto token = lex.next();
 | 
			
		||||
  switch (token.type) {
 | 
			
		||||
  case pdf_object::NL:
 | 
			
		||||
  case pdf_object::COMMENT:
 | 
			
		||||
    // These are not important to parsing, not even for this procedure's needs
 | 
			
		||||
    return parse(lex, stack);
 | 
			
		||||
  case pdf_object::B_ARRAY: {
 | 
			
		||||
    std::vector<pdf_object> array;
 | 
			
		||||
    while (1) {
 | 
			
		||||
      auto object = parse(lex, array);
 | 
			
		||||
      if (object.type == pdf_object::END)
 | 
			
		||||
        return {pdf_object::END, pdf_error(object, "array doesn't end")};
 | 
			
		||||
      if (object.type == pdf_object::E_ARRAY)
 | 
			
		||||
        break;
 | 
			
		||||
      array.push_back(std::move(object));
 | 
			
		||||
    }
 | 
			
		||||
    return array;
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::B_DICT: {
 | 
			
		||||
    std::vector<pdf_object> array;
 | 
			
		||||
    while (1) {
 | 
			
		||||
      auto object = parse(lex, array);
 | 
			
		||||
      if (object.type == pdf_object::END)
 | 
			
		||||
        return {pdf_object::END, pdf_error(object, "dictionary doesn't end")};
 | 
			
		||||
      if (object.type == pdf_object::E_DICT)
 | 
			
		||||
        break;
 | 
			
		||||
      array.push_back(std::move(object));
 | 
			
		||||
    }
 | 
			
		||||
    if (array.size() % 2)
 | 
			
		||||
      return {pdf_object::END, "unbalanced dictionary"};
 | 
			
		||||
    std::map<std::string, pdf_object> dict;
 | 
			
		||||
    for (size_t i = 0; i < array.size(); i += 2) {
 | 
			
		||||
      if (array[i].type != pdf_object::NAME)
 | 
			
		||||
        return {pdf_object::END, "invalid dictionary key type"};
 | 
			
		||||
      dict.insert({array[i].string, std::move(array[i + 1])});
 | 
			
		||||
    }
 | 
			
		||||
    return dict;
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::KEYWORD:
 | 
			
		||||
    // Appears in the document body, typically needs to access the cross-reference table
 | 
			
		||||
    // TODO use the xref to read /Length etc. once we actually need to read such objects;
 | 
			
		||||
    //   presumably streams can use the pdf_object::string member
 | 
			
		||||
    if (token.string == "stream") return {pdf_object::END, "streams are not supported yet"};
 | 
			
		||||
    if (token.string == "obj")    return parse_obj(lex, stack);
 | 
			
		||||
    if (token.string == "R")      return parse_R(stack);
 | 
			
		||||
    return token;
 | 
			
		||||
  default:
 | 
			
		||||
    return token;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries) {
 | 
			
		||||
  std::vector<pdf_object> throwaway_stack;
 | 
			
		||||
  {
 | 
			
		||||
    auto keyword = parse(lex, throwaway_stack);
 | 
			
		||||
    if (keyword.type != pdf_object::KEYWORD || keyword.string != "xref")
 | 
			
		||||
      return "invalid xref table";
 | 
			
		||||
  }
 | 
			
		||||
  while (1) {
 | 
			
		||||
    auto object = parse(lex, throwaway_stack);
 | 
			
		||||
    if (object.type == pdf_object::END)
 | 
			
		||||
      return pdf_error(object, "unexpected EOF while looking for the trailer");
 | 
			
		||||
    if (object.type == pdf_object::KEYWORD && object.string == "trailer")
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
    auto second = parse(lex, throwaway_stack);
 | 
			
		||||
    if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX ||
 | 
			
		||||
        !second.is_integer() || second.number < 0 || second.number > UINT_MAX)
 | 
			
		||||
      return "invalid xref section header";
 | 
			
		||||
 | 
			
		||||
    const size_t start = object.number;
 | 
			
		||||
    const size_t count = second.number;
 | 
			
		||||
    for (size_t i = 0; i < count; i++) {
 | 
			
		||||
      auto off = parse(lex, throwaway_stack);
 | 
			
		||||
      auto gen = parse(lex, throwaway_stack);
 | 
			
		||||
      auto key = parse(lex, throwaway_stack);
 | 
			
		||||
      if (!off.is_integer() || off.number < 0 || off.number > document.length() ||
 | 
			
		||||
          !gen.is_integer() || gen.number < 0 || gen.number > 65535 ||
 | 
			
		||||
          key.type != pdf_object::KEYWORD)
 | 
			
		||||
        return "invalid xref entry";
 | 
			
		||||
 | 
			
		||||
      bool free = true;
 | 
			
		||||
      if (key.string == "n")
 | 
			
		||||
        free = false;
 | 
			
		||||
      else if (key.string != "f")
 | 
			
		||||
        return "invalid xref entry";
 | 
			
		||||
 | 
			
		||||
      auto n = start + i;
 | 
			
		||||
      if (loaded_entries.count(n))
 | 
			
		||||
        continue;
 | 
			
		||||
      if (n >= xref.size())
 | 
			
		||||
        xref.resize(n + 1);
 | 
			
		||||
      loaded_entries.insert(n);
 | 
			
		||||
 | 
			
		||||
      auto& ref = xref[n];
 | 
			
		||||
      ref.generation = gen.number;
 | 
			
		||||
      ref.offset = off.number;
 | 
			
		||||
      ref.free = free;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return "";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
std::string pdf_updater::initialize() {
 | 
			
		||||
  // We only need to look for startxref roughly within the last kibibyte of the document
 | 
			
		||||
  static std::regex haystack_re(R"([\s\S]*\sstartxref\s+(\d+)\s+%%EOF)");
 | 
			
		||||
  std::string haystack = document.substr(document.length() < 1024 ? 0 : document.length() - 1024);
 | 
			
		||||
 | 
			
		||||
  std::smatch m;
 | 
			
		||||
  if (!std::regex_search(haystack, m, haystack_re, std::regex_constants::match_continuous))
 | 
			
		||||
    return "cannot find startxref";
 | 
			
		||||
 | 
			
		||||
  size_t xref_offset = std::stoul(m.str(1)), last_xref_offset = xref_offset;
 | 
			
		||||
  std::set<size_t> loaded_xrefs;
 | 
			
		||||
  std::set<uint> loaded_entries;
 | 
			
		||||
 | 
			
		||||
  std::vector<pdf_object> throwaway_stack;
 | 
			
		||||
  while (1) {
 | 
			
		||||
    if (loaded_xrefs.count(xref_offset))
 | 
			
		||||
      return "circular xref offsets";
 | 
			
		||||
    if (xref_offset >= document.length())
 | 
			
		||||
      return "invalid xref offset";
 | 
			
		||||
 | 
			
		||||
    pdf_lexer lex(document.c_str() + xref_offset);
 | 
			
		||||
    auto err = load_xref(lex, loaded_entries);
 | 
			
		||||
    if (!err.empty()) return err;
 | 
			
		||||
 | 
			
		||||
    auto trailer = parse(lex, throwaway_stack);
 | 
			
		||||
    if (trailer.type != pdf_object::DICT)
 | 
			
		||||
      return pdf_error(trailer, "invalid trailer dictionary");
 | 
			
		||||
    if (loaded_xrefs.empty())
 | 
			
		||||
      this->trailer = trailer.dict;
 | 
			
		||||
    loaded_xrefs.insert(xref_offset);
 | 
			
		||||
 | 
			
		||||
    const auto prev_offset = trailer.dict.find("Prev");
 | 
			
		||||
    if (prev_offset == trailer.dict.end())
 | 
			
		||||
      break;
 | 
			
		||||
    // FIXME do not read offsets and sizes as floating point numbers
 | 
			
		||||
    if (!prev_offset->second.is_integer() || prev_offset->second.number < 0)
 | 
			
		||||
      return "invalid Prev offset";
 | 
			
		||||
    xref_offset = prev_offset->second.number;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  trailer["Prev"] = {pdf_object::NUMERIC, double(last_xref_offset)};
 | 
			
		||||
  const auto last_size = trailer.find("Size");
 | 
			
		||||
  if (last_size == trailer.end() || !last_size->second.is_integer() ||
 | 
			
		||||
      last_size->second.number <= 0)
 | 
			
		||||
    return "invalid or missing cross-reference table Size";
 | 
			
		||||
 | 
			
		||||
  xref_size = last_size->second.number;
 | 
			
		||||
  return "";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int pdf_updater::version(const pdf_object& root) const {
 | 
			
		||||
  auto version = root.dict.find("Version");
 | 
			
		||||
  if (version != root.dict.end() && version->second.type == pdf_object::NAME) {
 | 
			
		||||
    const auto& v = version->second.string;
 | 
			
		||||
    if (isdigit(v[0]) && v[1] == '.' && isdigit(v[2]) && !v[3])
 | 
			
		||||
      return (v[0] - '0') * 10 + (v[2] - '0');
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // We only need to look for the comment roughly within the first kibibyte of the document
 | 
			
		||||
  static std::regex version_re(R"((?:^|[\r\n])%(?:!PS-Adobe-\d\.\d )?PDF-(\d)\.(\d)[\r\n])");
 | 
			
		||||
  std::string haystack = document.substr(0, 1024);
 | 
			
		||||
 | 
			
		||||
  std::smatch m;
 | 
			
		||||
  if (std::regex_search(haystack, m, version_re, std::regex_constants::match_default))
 | 
			
		||||
    return std::stoul(m.str(1)) * 10 + std::stoul(m.str(2));
 | 
			
		||||
 | 
			
		||||
  return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pdf_object pdf_updater::get(uint n, uint generation) const {
 | 
			
		||||
  if (n >= xref_size)
 | 
			
		||||
    return {pdf_object::NIL};
 | 
			
		||||
 | 
			
		||||
  const auto& ref = xref[n];
 | 
			
		||||
  if (ref.free || ref.generation != generation || ref.offset >= document.length())
 | 
			
		||||
    return {pdf_object::NIL};
 | 
			
		||||
 | 
			
		||||
  pdf_lexer lex(document.c_str() + ref.offset);
 | 
			
		||||
  std::vector<pdf_object> stack;
 | 
			
		||||
  while (1) {
 | 
			
		||||
    auto object = parse(lex, stack);
 | 
			
		||||
    if (object.type == pdf_object::END)
 | 
			
		||||
      return object;
 | 
			
		||||
    if (object.type != pdf_object::OBJECT)
 | 
			
		||||
      stack.push_back(std::move(object));
 | 
			
		||||
    else if (object.n != n || object.generation != generation)
 | 
			
		||||
      return {pdf_object::END, "object mismatch"};
 | 
			
		||||
    else
 | 
			
		||||
      return std::move(object.array.at(0));
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint pdf_updater::allocate() {
 | 
			
		||||
  assert(xref_size < UINT_MAX);
 | 
			
		||||
 | 
			
		||||
  auto n = xref_size++;
 | 
			
		||||
  if (xref.size() < xref_size)
 | 
			
		||||
    xref.resize(xref_size);
 | 
			
		||||
 | 
			
		||||
  // We don't make sure it gets a subsection in the update yet because we
 | 
			
		||||
  // make no attempts at fixing the linked list of free items either
 | 
			
		||||
  return n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void pdf_updater::update(uint n, std::function<void()> fill) {
 | 
			
		||||
  auto& ref = xref.at(n);
 | 
			
		||||
  ref.offset = document.length() + 1;
 | 
			
		||||
  ref.free = false;
 | 
			
		||||
  updated.insert(n);
 | 
			
		||||
 | 
			
		||||
  document += ssprintf("\n%u %u obj\n", n, ref.generation);
 | 
			
		||||
  // Separately so that the callback can use document.length() to get the current offset
 | 
			
		||||
  fill();
 | 
			
		||||
  document += "\nendobj";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void pdf_updater::flush_updates() {
 | 
			
		||||
  std::map<uint, size_t> groups;
 | 
			
		||||
  for (auto i = updated.cbegin(); i != updated.cend(); ) {
 | 
			
		||||
    size_t start = *i, count = 1;
 | 
			
		||||
    while (++i != updated.cend() && *i == start + count)
 | 
			
		||||
      count++;
 | 
			
		||||
    groups[start] = count;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Taking literally "Each cross-reference section begins with a line containing the keyword xref.
 | 
			
		||||
  // Following this line are one or more cross-reference subsections." from 3.4.3 in PDF Reference
 | 
			
		||||
  if (groups.empty())
 | 
			
		||||
    groups[0] = 0;
 | 
			
		||||
 | 
			
		||||
  auto startxref = document.length() + 1;
 | 
			
		||||
  document += "\nxref\n";
 | 
			
		||||
  for (const auto& g : groups) {
 | 
			
		||||
    document += ssprintf("%u %zu\n", g.first, g.second);
 | 
			
		||||
    for (size_t i = 0; i < g.second; i++) {
 | 
			
		||||
      auto& ref = xref[g.first + i];
 | 
			
		||||
      document += ssprintf("%010zu %05u %c \n", ref.offset, ref.generation, "nf"[!!ref.free]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  trailer["Size"] = {pdf_object::NUMERIC, double(xref_size)};
 | 
			
		||||
  document +=
 | 
			
		||||
    "trailer\n" + pdf_serialize(trailer) + ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
/// Make a PDF object representing the given point in time
 | 
			
		||||
static pdf_object pdf_date(time_t timestamp) {
 | 
			
		||||
  struct tm parts;
 | 
			
		||||
  assert(localtime_r(×tamp, &parts));
 | 
			
		||||
 | 
			
		||||
  char buf[64];
 | 
			
		||||
  assert(strftime(buf, sizeof buf, "D:%Y%m%d%H%M%S", &parts));
 | 
			
		||||
 | 
			
		||||
  std::string offset = "Z";
 | 
			
		||||
  auto offset_min = parts.tm_gmtoff / 60;
 | 
			
		||||
  if (parts.tm_gmtoff < 0)
 | 
			
		||||
    offset = ssprintf("-%02ld'%02ld'", -offset_min / 60, -offset_min % 60);
 | 
			
		||||
  if (parts.tm_gmtoff > 0)
 | 
			
		||||
    offset = ssprintf("+%02ld'%02ld'", +offset_min / 60, +offset_min % 60);
 | 
			
		||||
  return {pdf_object::STRING, buf + offset};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static pdf_object pdf_get_first_page(pdf_updater& pdf, uint node_n, uint node_generation) {
 | 
			
		||||
  auto obj = pdf.get(node_n, node_generation);
 | 
			
		||||
  if (obj.type != pdf_object::DICT)
 | 
			
		||||
    return {pdf_object::NIL};
 | 
			
		||||
 | 
			
		||||
  // Out of convenience; these aren't filled normally
 | 
			
		||||
  obj.n = node_n;
 | 
			
		||||
  obj.generation = node_generation;
 | 
			
		||||
 | 
			
		||||
  auto type = obj.dict.find("Type");
 | 
			
		||||
  if (type == obj.dict.end() || type->second.type != pdf_object::NAME)
 | 
			
		||||
    return {pdf_object::NIL};
 | 
			
		||||
  if (type->second.string == "Page")
 | 
			
		||||
    return obj;
 | 
			
		||||
  if (type->second.string != "Pages")
 | 
			
		||||
    return {pdf_object::NIL};
 | 
			
		||||
 | 
			
		||||
  // XXX technically speaking, this may be an indirect reference.  The correct way to solve this
 | 
			
		||||
  //   seems to be having "pdf_updater" include a wrapper around "obj.dict.find"
 | 
			
		||||
  auto kids = obj.dict.find("Kids");
 | 
			
		||||
  if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY ||
 | 
			
		||||
      kids->second.array.empty() ||
 | 
			
		||||
      kids->second.array.at(0).type != pdf_object::REFERENCE)
 | 
			
		||||
    return {pdf_object::NIL};
 | 
			
		||||
 | 
			
		||||
  // XXX nothing prevents us from recursing in an evil circular graph
 | 
			
		||||
  return pdf_get_first_page(pdf, kids->second.array.at(0).n, kids->second.array.at(0).generation);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
static std::string pkcs12_path, pkcs12_pass;
 | 
			
		||||
 | 
			
		||||
// /All/ bytes are checked, except for the signature hexstring itself
 | 
			
		||||
static std::string pdf_fill_in_signature(std::string& document, size_t sign_off, size_t sign_len) {
 | 
			
		||||
  size_t tail_off = sign_off + sign_len, tail_len = document.size() - tail_off;
 | 
			
		||||
  if (pkcs12_path.empty())
 | 
			
		||||
    return "undefined path to the signing key";
 | 
			
		||||
 | 
			
		||||
  auto pkcs12_fp = fopen(pkcs12_path.c_str(), "r");
 | 
			
		||||
  if (!pkcs12_fp)
 | 
			
		||||
    return pkcs12_path + ": " + strerror(errno);
 | 
			
		||||
 | 
			
		||||
  // Abandon hope, all ye who enter OpenSSL!  Half of it is undocumented.
 | 
			
		||||
  OpenSSL_add_all_algorithms();
 | 
			
		||||
  ERR_load_crypto_strings();
 | 
			
		||||
  ERR_clear_error();
 | 
			
		||||
 | 
			
		||||
  PKCS12* p12 = nullptr;
 | 
			
		||||
  EVP_PKEY* private_key = nullptr;
 | 
			
		||||
  X509* certificate = nullptr;
 | 
			
		||||
  STACK_OF(X509)* chain = nullptr;
 | 
			
		||||
  PKCS7* p7 = nullptr;
 | 
			
		||||
  int len = 0, sign_flags = PKCS7_DETACHED | PKCS7_BINARY | PKCS7_NOSMIMECAP | PKCS7_PARTIAL;
 | 
			
		||||
  BIO* p7bio = nullptr;
 | 
			
		||||
  unsigned char* buf = nullptr;
 | 
			
		||||
 | 
			
		||||
  // OpenSSL error reasons will usually be of more value than any distinction I can come up with
 | 
			
		||||
  std::string err = "OpenSSL failure";
 | 
			
		||||
 | 
			
		||||
  if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr)) ||
 | 
			
		||||
      !PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) {
 | 
			
		||||
    err = pkcs12_path + ": parse failure";
 | 
			
		||||
    goto error;
 | 
			
		||||
  }
 | 
			
		||||
  if (!private_key || !certificate) {
 | 
			
		||||
    err = pkcs12_path + ": must contain a private key and a valid certificate chain";
 | 
			
		||||
    goto error;
 | 
			
		||||
  }
 | 
			
		||||
  // Prevent useless signatures -- makes pdfsig from poppler happy at least (and NSS by extension)
 | 
			
		||||
  if (!(X509_get_key_usage(certificate) & (KU_DIGITAL_SIGNATURE | KU_NON_REPUDIATION))) {
 | 
			
		||||
    err = "the certificate's key usage must include digital signatures or non-repudiation";
 | 
			
		||||
    goto error;
 | 
			
		||||
  }
 | 
			
		||||
  if (!(X509_get_extended_key_usage(certificate) & (XKU_SMIME | XKU_ANYEKU))) {
 | 
			
		||||
    err = "the certificate's extended key usage must include S/MIME";
 | 
			
		||||
    goto error;
 | 
			
		||||
  }
 | 
			
		||||
#if 0  // This happily ignores XKU_ANYEKU and I want my tiny world to make a tiny bit more sense
 | 
			
		||||
  if (X509_check_purpose(certificate, X509_PURPOSE_SMIME_SIGN, false /* not a CA certificate */)) {
 | 
			
		||||
    err = "the certificate can't be used for S/MIME digital signatures";
 | 
			
		||||
    goto error;
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  // The default digest is SHA1, which is mildly insecure now -- hence using PKCS7_sign_add_signer
 | 
			
		||||
  if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags)) ||
 | 
			
		||||
      !PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags))
 | 
			
		||||
    goto error;
 | 
			
		||||
  // For RFC 3161, this is roughly how a timestamp token would be attached (see Appendix A):
 | 
			
		||||
  //   PKCS7_add_attribute(signer_info, NID_id_smime_aa_timeStampToken, V_ASN1_SEQUENCE, value)
 | 
			
		||||
  for (int i = 0; i < sk_X509_num(chain); i++)
 | 
			
		||||
    if (!PKCS7_add_certificate(p7, sk_X509_value(chain, i)))
 | 
			
		||||
      goto error;
 | 
			
		||||
 | 
			
		||||
  // Adaptation of the innards of the undocumented PKCS7_final() -- I didn't feel like making
 | 
			
		||||
  // a copy of the whole document.  Hopefully this writes directly into a digest BIO.
 | 
			
		||||
  if (!(p7bio = PKCS7_dataInit(p7, nullptr)) ||
 | 
			
		||||
      (ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off) ||
 | 
			
		||||
      (ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len) ||
 | 
			
		||||
      BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio))
 | 
			
		||||
    goto error;
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
  {
 | 
			
		||||
    // Debugging: openssl cms -inform PEM -in pdf_signature.pem -noout -cmsout -print
 | 
			
		||||
    // Context: https://stackoverflow.com/a/29253469
 | 
			
		||||
    auto fp = fopen("pdf_signature.pem", "wb");
 | 
			
		||||
    assert(PEM_write_PKCS7(fp, p7) && !fclose(fp));
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  if ((len = i2d_PKCS7(p7, &buf)) < 0)
 | 
			
		||||
    goto error;
 | 
			
		||||
  if (size_t(len) * 2 > sign_len - 2 /* hexstring quotes */) {
 | 
			
		||||
    // The obvious solution is to increase the allocation... or spend a week reading specifications
 | 
			
		||||
    // while losing all faith in humanity as a species, and skip the PKCS7 API entirely
 | 
			
		||||
    err = ssprintf("not enough space reserved for the signature (%zu nibbles vs %zu nibbles)",
 | 
			
		||||
                   sign_len - 2, size_t(len) * 2);
 | 
			
		||||
    goto error;
 | 
			
		||||
  }
 | 
			
		||||
  for (int i = 0; i < len; i++) {
 | 
			
		||||
    document[sign_off + 2 * i + 1] = "0123456789abcdef"[buf[i] / 16];
 | 
			
		||||
    document[sign_off + 2 * i + 2] = "0123456789abcdef"[buf[i] % 16];
 | 
			
		||||
  }
 | 
			
		||||
  err.clear();
 | 
			
		||||
 | 
			
		||||
error:
 | 
			
		||||
  OPENSSL_free(buf);
 | 
			
		||||
  BIO_free_all(p7bio);
 | 
			
		||||
  PKCS7_free(p7);
 | 
			
		||||
  sk_X509_pop_free(chain, X509_free);
 | 
			
		||||
  X509_free(certificate);
 | 
			
		||||
  EVP_PKEY_free(private_key);
 | 
			
		||||
  PKCS12_free(p12);
 | 
			
		||||
 | 
			
		||||
  // In any case, clear the error stack (it's a queue, really) to avoid confusion elsewhere
 | 
			
		||||
  while (auto code = ERR_get_error())
 | 
			
		||||
    if (auto reason = ERR_reason_error_string(code))
 | 
			
		||||
      err = err + "; " + reason;
 | 
			
		||||
 | 
			
		||||
  fclose(pkcs12_fp);
 | 
			
		||||
  return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
/// The presumption here is that the document is valid and that it doesn't employ cross-reference
 | 
			
		||||
/// streams from PDF 1.5, or at least constitutes a hybrid-reference file.  The results with
 | 
			
		||||
/// PDF 2.0 (2017) are currently unknown as the standard costs money.
 | 
			
		||||
///
 | 
			
		||||
/// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
 | 
			
		||||
/// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
 | 
			
		||||
/// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
 | 
			
		||||
static std::string pdf_sign(std::string& document, ushort reservation) {
 | 
			
		||||
  pdf_updater pdf(document);
 | 
			
		||||
  auto err = pdf.initialize();
 | 
			
		||||
  if (!err.empty())
 | 
			
		||||
    return err;
 | 
			
		||||
 | 
			
		||||
  auto root_ref = pdf.trailer.find("Root");
 | 
			
		||||
  if (root_ref == pdf.trailer.end() || root_ref->second.type != pdf_object::REFERENCE)
 | 
			
		||||
    return "trailer does not contain a reference to Root";
 | 
			
		||||
  auto root = pdf.get(root_ref->second.n, root_ref->second.generation);
 | 
			
		||||
  if (root.type != pdf_object::DICT)
 | 
			
		||||
    return "invalid Root dictionary reference";
 | 
			
		||||
 | 
			
		||||
  // 8.7 Digital Signatures - /signature dictionary/
 | 
			
		||||
  auto sigdict_n = pdf.allocate();
 | 
			
		||||
  size_t byterange_off = 0, byterange_len = 0, sign_off = 0, sign_len = 0;
 | 
			
		||||
  pdf.update(sigdict_n, [&] {
 | 
			
		||||
    // The timestamp is important for Adobe Acrobat Reader DC.  The ideal would be to use RFC 3161.
 | 
			
		||||
    pdf.document.append("<< /Type/Sig /Filter/Adobe.PPKLite /SubFilter/adbe.pkcs7.detached\n"
 | 
			
		||||
                        "   /M" + pdf_serialize(pdf_date(time(nullptr))) + " /ByteRange ");
 | 
			
		||||
    byterange_off = pdf.document.size();
 | 
			
		||||
    pdf.document.append((byterange_len = 32 /* fine for a gigabyte */), ' ');
 | 
			
		||||
    pdf.document.append("\n   /Contents <");
 | 
			
		||||
    sign_off = pdf.document.size();
 | 
			
		||||
    pdf.document.append((sign_len = reservation * 2), '0');
 | 
			
		||||
    pdf.document.append("> >>");
 | 
			
		||||
 | 
			
		||||
    // We actually need to exclude the hexstring quotes from signing
 | 
			
		||||
    sign_off -= 1;
 | 
			
		||||
    sign_len += 2;
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  // 8.6.3 Field Types - Signature Fields
 | 
			
		||||
  pdf_object sigfield{pdf_object::DICT};
 | 
			
		||||
  sigfield.dict.insert({"FT", {pdf_object::NAME, "Sig"}});
 | 
			
		||||
  sigfield.dict.insert({"V", {pdf_object::REFERENCE, sigdict_n, 0}});
 | 
			
		||||
  // 8.4.5 Annotations Types - Widget Annotations
 | 
			
		||||
  // We can merge the Signature Annotation and omit Kids here
 | 
			
		||||
  sigfield.dict.insert({"Subtype", {pdf_object::NAME, "Widget"}});
 | 
			
		||||
  sigfield.dict.insert({"F", {pdf_object::NUMERIC, 2 /* Hidden */}});
 | 
			
		||||
  sigfield.dict.insert({"T", {pdf_object::STRING, "Signature1"}});
 | 
			
		||||
  sigfield.dict.insert({"Rect", {std::vector<pdf_object>{
 | 
			
		||||
    {pdf_object::NUMERIC, 0},
 | 
			
		||||
    {pdf_object::NUMERIC, 0},
 | 
			
		||||
    {pdf_object::NUMERIC, 0},
 | 
			
		||||
    {pdf_object::NUMERIC, 0},
 | 
			
		||||
  }}});
 | 
			
		||||
 | 
			
		||||
  auto sigfield_n = pdf.allocate();
 | 
			
		||||
  pdf.update(sigfield_n, [&] { pdf.document += pdf_serialize(sigfield); });
 | 
			
		||||
 | 
			
		||||
  auto pages_ref = root.dict.find("Pages");
 | 
			
		||||
  if (pages_ref == root.dict.end() || pages_ref->second.type != pdf_object::REFERENCE)
 | 
			
		||||
    return "invalid Pages reference";
 | 
			
		||||
  auto page = pdf_get_first_page(pdf, pages_ref->second.n, pages_ref->second.generation);
 | 
			
		||||
  if (page.type != pdf_object::DICT)
 | 
			
		||||
    return "invalid or unsupported page tree";
 | 
			
		||||
 | 
			
		||||
  auto& annots = page.dict["Annots"];
 | 
			
		||||
  if (annots.type != pdf_object::ARRAY) {
 | 
			
		||||
    // TODO indirectly referenced arrays might not be that hard to support
 | 
			
		||||
    if (annots.type != pdf_object::END)
 | 
			
		||||
      return "unexpected Annots";
 | 
			
		||||
 | 
			
		||||
    annots = {pdf_object::ARRAY};
 | 
			
		||||
  }
 | 
			
		||||
  annots.array.emplace_back(pdf_object::REFERENCE, sigfield_n, 0);
 | 
			
		||||
  pdf.update(page.n, [&] { pdf.document += pdf_serialize(page); });
 | 
			
		||||
 | 
			
		||||
  // 8.6.1 Interactive Form Dictionary
 | 
			
		||||
  if (root.dict.count("AcroForm"))
 | 
			
		||||
    return "the document already contains forms, they would be overwritten";
 | 
			
		||||
 | 
			
		||||
  root.dict["AcroForm"] = {std::map<std::string, pdf_object>{
 | 
			
		||||
    {"Fields", {std::vector<pdf_object>{
 | 
			
		||||
      {pdf_object::REFERENCE, sigfield_n, 0}
 | 
			
		||||
    }}},
 | 
			
		||||
    {"SigFlags", {pdf_object::NUMERIC, 3 /* SignaturesExist | AppendOnly */}}
 | 
			
		||||
  }};
 | 
			
		||||
 | 
			
		||||
  // Upgrade the document version for SHA-256 etc.
 | 
			
		||||
  if (pdf.version(root) < 16)
 | 
			
		||||
    root.dict["Version"] = {pdf_object::NAME, "1.6"};
 | 
			
		||||
 | 
			
		||||
  pdf.update(root_ref->second.n, [&] { pdf.document += pdf_serialize(root); });
 | 
			
		||||
  pdf.flush_updates();
 | 
			
		||||
 | 
			
		||||
  // Now that we know the length of everything, store byte ranges of what we're about to sign,
 | 
			
		||||
  // which must be everything but the resulting signature itself
 | 
			
		||||
  size_t tail_off = sign_off + sign_len, tail_len = pdf.document.size() - tail_off;
 | 
			
		||||
  auto ranges = ssprintf("[0 %zu %zu %zu]", sign_off, tail_off, tail_len);
 | 
			
		||||
  if (ranges.length() > byterange_len)
 | 
			
		||||
    return "not enough space reserved for /ByteRange";
 | 
			
		||||
  pdf.document.replace(byterange_off, std::min(ranges.length(), byterange_len), ranges);
 | 
			
		||||
  return pdf_fill_in_signature(pdf.document, sign_off, sign_len);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
__attribute__((format(printf, 2, 3)))
 | 
			
		||||
static void die(int status, const char* format, ...) {
 | 
			
		||||
  va_list ap;
 | 
			
		||||
  va_start(ap, format);
 | 
			
		||||
  if (isatty(fileno(stderr)))
 | 
			
		||||
    vfprintf(stderr, ("\x1b[31m" + std::string(format) + "\x1b[0m\n").c_str(), ap);
 | 
			
		||||
    vfprintf(stderr, ssprintf("\x1b[31m%s\x1b[0m\n", format).c_str(), ap);
 | 
			
		||||
  else
 | 
			
		||||
    vfprintf(stderr, format, ap);
 | 
			
		||||
  va_end(ap);
 | 
			
		||||
@@ -41,26 +940,39 @@ static void die(int status, const char* format, ...) {
 | 
			
		||||
 | 
			
		||||
int main(int argc, char* argv[]) {
 | 
			
		||||
  auto invocation_name = argv[0];
 | 
			
		||||
  auto usage = [=]{
 | 
			
		||||
    die(1, "Usage: %s [-h] INPUT-FILENAME OUTPUT-FILENAME PKCS12-PATH PKCS12-PASS",
 | 
			
		||||
            invocation_name);
 | 
			
		||||
  auto usage = [=] {
 | 
			
		||||
    die(1, "Usage: %s [-h] [-r RESERVATION] INPUT-FILENAME OUTPUT-FILENAME PKCS12-PATH PKCS12-PASS",
 | 
			
		||||
        invocation_name);
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  static struct option opts[] = {
 | 
			
		||||
    {"help", no_argument, 0, 'h'},
 | 
			
		||||
    {"version", no_argument, 0, 'V'},
 | 
			
		||||
    {"reservation", required_argument, 0, 'r'},
 | 
			
		||||
    {nullptr, 0, 0, 0},
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Reserved space in bytes for the certificate, digest, encrypted digest, ...
 | 
			
		||||
  long reservation = 4096;
 | 
			
		||||
  while (1) {
 | 
			
		||||
    int option_index = 0;
 | 
			
		||||
    auto c = getopt_long(argc, const_cast<char* const*>(argv),
 | 
			
		||||
                         "h", opts, &option_index);
 | 
			
		||||
    auto c = getopt_long(argc, const_cast<char* const*>(argv), "hVr:", opts, &option_index);
 | 
			
		||||
    if (c == -1)
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
    char* end = nullptr;
 | 
			
		||||
    switch (c) {
 | 
			
		||||
    case 'h': usage(); break;
 | 
			
		||||
    default: usage();
 | 
			
		||||
    case 'r':
 | 
			
		||||
      errno = 0, reservation = strtol(optarg, &end, 10);
 | 
			
		||||
      if (errno || *end || reservation <= 0 || reservation > USHRT_MAX)
 | 
			
		||||
        die(1, "%s: must be a positive number", optarg);
 | 
			
		||||
      break;
 | 
			
		||||
    case 'V':
 | 
			
		||||
      die(0, "%s", PROJECT_NAME " " PROJECT_VERSION);
 | 
			
		||||
      break;
 | 
			
		||||
    case 'h':
 | 
			
		||||
    default:
 | 
			
		||||
      usage();
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
@@ -72,6 +984,8 @@ int main(int argc, char* argv[]) {
 | 
			
		||||
 | 
			
		||||
  const char* input_path  = argv[0];
 | 
			
		||||
  const char* output_path = argv[1];
 | 
			
		||||
  pkcs12_path = argv[2];
 | 
			
		||||
  pkcs12_pass = argv[3];
 | 
			
		||||
 | 
			
		||||
  std::string pdf_document;
 | 
			
		||||
  if (auto fp = fopen(input_path, "rb")) {
 | 
			
		||||
@@ -85,7 +999,7 @@ int main(int argc, char* argv[]) {
 | 
			
		||||
    die(1, "%s: %s", input_path, strerror(errno));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  auto err = pdf_simple_sign(pdf_document, argv[2], argv[3]);
 | 
			
		||||
  auto err = pdf_sign(pdf_document, ushort(reservation));
 | 
			
		||||
  if (!err.empty()) {
 | 
			
		||||
    die(2, "Error: %s", err.c_str());
 | 
			
		||||
  }
 | 
			
		||||
 
 | 
			
		||||
@@ -1,28 +0,0 @@
 | 
			
		||||
// vim: set sw=2 ts=2 sts=2 et tw=100:
 | 
			
		||||
//
 | 
			
		||||
// pdf-simple-sign: simple PDF signer
 | 
			
		||||
//
 | 
			
		||||
// Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
 | 
			
		||||
//
 | 
			
		||||
// Permission to use, copy, modify, and/or distribute this software for any
 | 
			
		||||
// purpose with or without fee is hereby granted.
 | 
			
		||||
//
 | 
			
		||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 | 
			
		||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 | 
			
		||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 | 
			
		||||
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 | 
			
		||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
 | 
			
		||||
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 | 
			
		||||
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <string>
 | 
			
		||||
 | 
			
		||||
/// Sign basic PDF documents, as generated by e.g. Cairo, using the key-certificate pair
 | 
			
		||||
/// stored in the PKCS#12 file named `pkcs12_path`, with password `pkcs12_pass`.
 | 
			
		||||
/// Returns a non-empty error string on failure.
 | 
			
		||||
std::string pdf_simple_sign(std::string& document,
 | 
			
		||||
                            const std::string& pkcs12_path,
 | 
			
		||||
                            const std::string& pkcs12_pass);
 | 
			
		||||
							
								
								
									
										906
									
								
								pdf.cpp
									
									
									
									
									
								
							
							
						
						
									
										906
									
								
								pdf.cpp
									
									
									
									
									
								
							@@ -1,906 +0,0 @@
 | 
			
		||||
// vim: set sw=2 ts=2 sts=2 et tw=100:
 | 
			
		||||
//
 | 
			
		||||
// pdf-simple-sign: simple PDF signer
 | 
			
		||||
//
 | 
			
		||||
// Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
 | 
			
		||||
//
 | 
			
		||||
// Permission to use, copy, modify, and/or distribute this software for any
 | 
			
		||||
// purpose with or without fee is hereby granted.
 | 
			
		||||
//
 | 
			
		||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 | 
			
		||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 | 
			
		||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 | 
			
		||||
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 | 
			
		||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
 | 
			
		||||
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 | 
			
		||||
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 | 
			
		||||
//
 | 
			
		||||
 | 
			
		||||
#include <cmath>
 | 
			
		||||
#undef NDEBUG
 | 
			
		||||
#include <cassert>
 | 
			
		||||
 | 
			
		||||
#include <vector>
 | 
			
		||||
#include <map>
 | 
			
		||||
#include <regex>
 | 
			
		||||
#include <memory>
 | 
			
		||||
#include <set>
 | 
			
		||||
 | 
			
		||||
#if defined __GLIBCXX__ && __GLIBCXX__ < 20140422
 | 
			
		||||
#error Need libstdc++ >= 4.9 for <regex>
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#include <openssl/err.h>
 | 
			
		||||
#include <openssl/x509v3.h>
 | 
			
		||||
#include <openssl/pkcs12.h>
 | 
			
		||||
 | 
			
		||||
#include "pdf-simple-sign.h"
 | 
			
		||||
 | 
			
		||||
namespace {
 | 
			
		||||
 | 
			
		||||
using uint = unsigned int;
 | 
			
		||||
 | 
			
		||||
static std::string concatenate(const std::vector<std::string>& v, const std::string& delim) {
 | 
			
		||||
  std::string res;
 | 
			
		||||
  if (v.empty())
 | 
			
		||||
    return res;
 | 
			
		||||
  for (const auto& s : v)
 | 
			
		||||
    res += s + delim;
 | 
			
		||||
  return res.substr(0, res.length() - delim.length());
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<typename... Args>
 | 
			
		||||
static std::string ssprintf(const std::string& format, Args... args) {
 | 
			
		||||
  size_t size = std::snprintf(nullptr, 0, format.c_str(), args... ) + 1;
 | 
			
		||||
  std::unique_ptr<char[]> buf(new char[size]);
 | 
			
		||||
  std::snprintf(buf.get(), size, format.c_str(), args...);
 | 
			
		||||
  return std::string(buf.get(), buf.get() + size - 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
/// PDF token/object thingy.  Objects may be composed either from one or a sequence of tokens.
 | 
			
		||||
/// The PDF Reference doesn't actually speak of tokens.
 | 
			
		||||
struct pdf_object {
 | 
			
		||||
  enum type {
 | 
			
		||||
    END, NL, COMMENT, NIL, BOOL, NUMERIC, KEYWORD, NAME, STRING,
 | 
			
		||||
    // Simple tokens
 | 
			
		||||
    B_ARRAY, E_ARRAY, B_DICT, E_DICT,
 | 
			
		||||
    // Higher-level objects
 | 
			
		||||
    ARRAY, DICT, OBJECT, REFERENCE,
 | 
			
		||||
  } type = END;
 | 
			
		||||
 | 
			
		||||
  std::string string;                      ///< END (error message), COMMENT/KEYWORD/NAME/STRING
 | 
			
		||||
  double number = 0.;                      ///< BOOL, NUMERIC
 | 
			
		||||
  std::vector<pdf_object> array;           ///< ARRAY, OBJECT
 | 
			
		||||
  std::map<std::string, pdf_object> dict;  ///< DICT, in the future also STREAM
 | 
			
		||||
  uint n = 0, generation = 0;              ///< OBJECT, REFERENCE
 | 
			
		||||
 | 
			
		||||
  pdf_object(enum type type = END)                          : type(type) {}
 | 
			
		||||
  pdf_object(enum type type, double v)                      : type(type), number(v) {}
 | 
			
		||||
  pdf_object(enum type type, const std::string& v)          : type(type), string(v) {}
 | 
			
		||||
  pdf_object(enum type type, uint n, uint g)                : type(type), n(n), generation(g) {}
 | 
			
		||||
  pdf_object(const std::vector<pdf_object>& array)          : type(ARRAY), array(array) {}
 | 
			
		||||
  pdf_object(const std::map<std::string, pdf_object>& dict) : type(DICT), dict(dict) {}
 | 
			
		||||
 | 
			
		||||
  pdf_object(const pdf_object&)            = default;
 | 
			
		||||
  pdf_object(pdf_object&&)                 = default;
 | 
			
		||||
  pdf_object& operator=(const pdf_object&) = default;
 | 
			
		||||
  pdf_object& operator=(pdf_object&&)      = default;
 | 
			
		||||
 | 
			
		||||
  /// Return whether this is a number without a fractional part
 | 
			
		||||
  bool is_integer() const {
 | 
			
		||||
    double tmp;
 | 
			
		||||
    return type == NUMERIC && std::modf(number, &tmp) == 0.;
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/// Basic lexical analyser for the Portable Document Format, giving limited error information
 | 
			
		||||
struct pdf_lexer {
 | 
			
		||||
  const unsigned char* p;
 | 
			
		||||
  pdf_lexer(const char* s) : p(reinterpret_cast<const unsigned char*>(s)) {}
 | 
			
		||||
 | 
			
		||||
  static constexpr const char* oct_alphabet = "01234567";
 | 
			
		||||
  static constexpr const char* dec_alphabet = "0123456789";
 | 
			
		||||
  static constexpr const char* hex_alphabet = "0123456789abcdefABCDEF";
 | 
			
		||||
  static constexpr const char* whitespace = "\t\n\f\r ";
 | 
			
		||||
  static constexpr const char* delimiters = "()<>[]{}/%";
 | 
			
		||||
 | 
			
		||||
  bool eat_newline(int ch) {
 | 
			
		||||
    if (ch == '\r') {
 | 
			
		||||
      if (*p == '\n') p++;
 | 
			
		||||
      return true;
 | 
			
		||||
    }
 | 
			
		||||
    return ch == '\n';
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  pdf_object string() {
 | 
			
		||||
    std::string value;
 | 
			
		||||
    int parens = 1;
 | 
			
		||||
    while (1) {
 | 
			
		||||
      if (!*p) return {pdf_object::END, "unexpected end of string"};
 | 
			
		||||
      auto ch = *p++;
 | 
			
		||||
      if (eat_newline(ch)) ch = '\n';
 | 
			
		||||
      else if (ch == '(') { parens++; }
 | 
			
		||||
      else if (ch == ')') { if (!--parens) break; }
 | 
			
		||||
      else if (ch == '\\') {
 | 
			
		||||
        if (!*p) return {pdf_object::END, "unexpected end of string"};
 | 
			
		||||
        switch ((ch = *p++)) {
 | 
			
		||||
        case 'n': ch = '\n'; break;
 | 
			
		||||
        case 'r': ch = '\r'; break;
 | 
			
		||||
        case 't': ch = '\t'; break;
 | 
			
		||||
        case 'b': ch = '\b'; break;
 | 
			
		||||
        case 'f': ch = '\f'; break;
 | 
			
		||||
        default:
 | 
			
		||||
          if (eat_newline(ch))
 | 
			
		||||
            continue;
 | 
			
		||||
          std::string octal;
 | 
			
		||||
          if (ch && strchr(oct_alphabet, ch)) {
 | 
			
		||||
            octal += ch;
 | 
			
		||||
            if (*p && strchr(oct_alphabet, *p)) octal += *p++;
 | 
			
		||||
            if (*p && strchr(oct_alphabet, *p)) octal += *p++;
 | 
			
		||||
            ch = std::stoi(octal, nullptr, 8);
 | 
			
		||||
          }
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      value += ch;
 | 
			
		||||
    }
 | 
			
		||||
    return {pdf_object::STRING, value};
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  pdf_object string_hex() {
 | 
			
		||||
    std::string value, buf;
 | 
			
		||||
    while (*p != '>') {
 | 
			
		||||
      if (!*p) return {pdf_object::END, "unexpected end of hex string"};
 | 
			
		||||
      if (!strchr(hex_alphabet, *p))
 | 
			
		||||
        return {pdf_object::END, "invalid hex string"};
 | 
			
		||||
      buf += *p++;
 | 
			
		||||
      if (buf.size() == 2) {
 | 
			
		||||
        value += char(std::stoi(buf, nullptr, 16));
 | 
			
		||||
        buf.clear();
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    p++;
 | 
			
		||||
    if (!buf.empty()) value += char(std::stoi(buf + '0', nullptr, 16));
 | 
			
		||||
    return {pdf_object::STRING, value};
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  pdf_object name() {
 | 
			
		||||
    std::string value;
 | 
			
		||||
    while (!strchr(whitespace, *p) && !strchr(delimiters, *p)) {
 | 
			
		||||
      auto ch = *p++;
 | 
			
		||||
      if (ch == '#') {
 | 
			
		||||
        std::string hexa;
 | 
			
		||||
        if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
 | 
			
		||||
        if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
 | 
			
		||||
        if (hexa.size() != 2)
 | 
			
		||||
          return {pdf_object::END, "invalid name hexa escape"};
 | 
			
		||||
        ch = char(std::stoi(hexa, nullptr, 16));
 | 
			
		||||
      }
 | 
			
		||||
      value += ch;
 | 
			
		||||
    }
 | 
			
		||||
    if (value.empty()) return {pdf_object::END, "unexpected end of name"};
 | 
			
		||||
    return {pdf_object::NAME, value};
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  pdf_object comment() {
 | 
			
		||||
    std::string value;
 | 
			
		||||
    while (*p && *p != '\r' && *p != '\n')
 | 
			
		||||
      value += *p++;
 | 
			
		||||
    return {pdf_object::COMMENT, value};
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // XXX maybe invalid numbers should rather be interpreted as keywords
 | 
			
		||||
  pdf_object number() {
 | 
			
		||||
    std::string value;
 | 
			
		||||
    if (*p == '-')
 | 
			
		||||
      value += *p++;
 | 
			
		||||
    bool real = false, digits = false;
 | 
			
		||||
    while (*p) {
 | 
			
		||||
      if (strchr(dec_alphabet, *p))
 | 
			
		||||
        digits = true;
 | 
			
		||||
      else if (*p == '.' && !real)
 | 
			
		||||
        real = true;
 | 
			
		||||
      else
 | 
			
		||||
        break;
 | 
			
		||||
      value += *p++;
 | 
			
		||||
    }
 | 
			
		||||
    if (!digits) return {pdf_object::END, "invalid number"};
 | 
			
		||||
    return {pdf_object::NUMERIC, std::stod(value, nullptr)};
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  pdf_object next() {
 | 
			
		||||
    if (!*p)
 | 
			
		||||
      return {pdf_object::END};
 | 
			
		||||
    if (strchr("-0123456789.", *p))
 | 
			
		||||
      return number();
 | 
			
		||||
 | 
			
		||||
    // {} end up being keywords, we might want to error out on those
 | 
			
		||||
    std::string value;
 | 
			
		||||
    while (!strchr(whitespace, *p) && !strchr(delimiters, *p))
 | 
			
		||||
      value += *p++;
 | 
			
		||||
    if (!value.empty()) {
 | 
			
		||||
      if (value == "null")  return {pdf_object::NIL};
 | 
			
		||||
      if (value == "true")  return {pdf_object::BOOL, 1};
 | 
			
		||||
      if (value == "false") return {pdf_object::BOOL, 0};
 | 
			
		||||
      return {pdf_object::KEYWORD, value};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    switch (char ch = *p++) {
 | 
			
		||||
    case '/': return name();
 | 
			
		||||
    case '%': return comment();
 | 
			
		||||
    case '(': return string();
 | 
			
		||||
    case '[': return {pdf_object::B_ARRAY};
 | 
			
		||||
    case ']': return {pdf_object::E_ARRAY};
 | 
			
		||||
    case '<':
 | 
			
		||||
      if (*p++ == '<')
 | 
			
		||||
        return {pdf_object::B_DICT};
 | 
			
		||||
      p--;
 | 
			
		||||
      return string_hex();
 | 
			
		||||
    case '>':
 | 
			
		||||
      if (*p++ == '>')
 | 
			
		||||
        return {pdf_object::E_DICT};
 | 
			
		||||
      p--;
 | 
			
		||||
      return {pdf_object::END, "unexpected '>'"};
 | 
			
		||||
    default:
 | 
			
		||||
      if (eat_newline(ch))
 | 
			
		||||
        return {pdf_object::NL};
 | 
			
		||||
      if (strchr(whitespace, ch))
 | 
			
		||||
        return next();
 | 
			
		||||
      return {pdf_object::END, "unexpected input"};
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// FIXME lines /should not/ be longer than 255 characters, some wrapping is in order
 | 
			
		||||
static std::string pdf_serialize(const pdf_object& o) {
 | 
			
		||||
  switch (o.type) {
 | 
			
		||||
  case pdf_object::NL:      return "\n";
 | 
			
		||||
  case pdf_object::NIL:     return "null";
 | 
			
		||||
  case pdf_object::BOOL:    return o.number ? "true" : "false";
 | 
			
		||||
  case pdf_object::NUMERIC:
 | 
			
		||||
  {
 | 
			
		||||
    if (o.is_integer()) return std::to_string((long long) o.number);
 | 
			
		||||
    return std::to_string(o.number);
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::KEYWORD: return o.string;
 | 
			
		||||
  case pdf_object::NAME:
 | 
			
		||||
  {
 | 
			
		||||
    std::string escaped = "/";
 | 
			
		||||
    for (char c : o.string) {
 | 
			
		||||
      if (c == '#' || strchr(pdf_lexer::delimiters, c) || strchr(pdf_lexer::whitespace, c))
 | 
			
		||||
        escaped += ssprintf("#%02x", c);
 | 
			
		||||
      else
 | 
			
		||||
        escaped += c;
 | 
			
		||||
    }
 | 
			
		||||
    return escaped;
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::STRING:
 | 
			
		||||
  {
 | 
			
		||||
    std::string escaped;
 | 
			
		||||
    for (char c : o.string) {
 | 
			
		||||
      if (c == '\\' || c == '(' || c == ')')
 | 
			
		||||
        escaped += '\\';
 | 
			
		||||
      escaped += c;
 | 
			
		||||
    }
 | 
			
		||||
    return "(" + escaped + ")";
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::B_ARRAY: return "[";
 | 
			
		||||
  case pdf_object::E_ARRAY: return "]";
 | 
			
		||||
  case pdf_object::B_DICT:  return "<<";
 | 
			
		||||
  case pdf_object::E_DICT:  return ">>";
 | 
			
		||||
  case pdf_object::ARRAY:
 | 
			
		||||
  {
 | 
			
		||||
    std::vector<std::string> v;
 | 
			
		||||
    for (const auto& i : o.array)
 | 
			
		||||
      v.push_back(pdf_serialize(i));
 | 
			
		||||
    return "[ " + concatenate(v, " ") + " ]";
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::DICT:
 | 
			
		||||
  {
 | 
			
		||||
    std::string s;
 | 
			
		||||
    for (const auto& i : o.dict)
 | 
			
		||||
      // FIXME the key is also supposed to be escaped by pdf_serialize()
 | 
			
		||||
      s += " /" + i.first + " " + pdf_serialize(i.second);
 | 
			
		||||
    return "<<" + s + " >>";
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::OBJECT:
 | 
			
		||||
    return ssprintf("%u %u obj\n", o.n, o.generation) + pdf_serialize(o.array.at(0)) + "\nendobj";
 | 
			
		||||
  case pdf_object::REFERENCE:
 | 
			
		||||
    return ssprintf("%u %u R", o.n, o.generation);
 | 
			
		||||
  default:
 | 
			
		||||
    assert(!"unsupported token for serialization");
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
/// Utility class to help read and possibly incrementally update PDF files
 | 
			
		||||
class pdf_updater {
 | 
			
		||||
  struct ref {
 | 
			
		||||
    size_t offset = 0;     ///< File offset or N of the next free entry
 | 
			
		||||
    uint generation = 0;   ///< Object generation
 | 
			
		||||
    bool free = true;      ///< Whether this N has been deleted
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  std::vector<ref> xref;   ///< Cross-reference table
 | 
			
		||||
  size_t xref_size = 0;    ///< Current cross-reference table size, correlated to xref.size()
 | 
			
		||||
  std::set<uint> updated;  ///< List of updated objects
 | 
			
		||||
 | 
			
		||||
  pdf_object parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
 | 
			
		||||
  pdf_object parse_R(std::vector<pdf_object>& stack) const;
 | 
			
		||||
  pdf_object parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
 | 
			
		||||
  std::string load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries);
 | 
			
		||||
 | 
			
		||||
public:
 | 
			
		||||
  /// The new trailer dictionary to be written, initialized with the old one
 | 
			
		||||
  std::map<std::string, pdf_object> trailer;
 | 
			
		||||
 | 
			
		||||
  std::string& document;
 | 
			
		||||
  pdf_updater(std::string& document) : document(document) {}
 | 
			
		||||
 | 
			
		||||
  /// Build the cross-reference table and prepare a new trailer dictionary
 | 
			
		||||
  std::string initialize();
 | 
			
		||||
  /// Retrieve an object by its number and generation -- may return NIL or END with an error
 | 
			
		||||
  pdf_object get(uint n, uint generation) const;
 | 
			
		||||
  /// Allocate a new object number
 | 
			
		||||
  uint allocate();
 | 
			
		||||
  /// Append an updated object to the end of the document
 | 
			
		||||
  void update(uint n, std::function<void()> fill);
 | 
			
		||||
  /// Write an updated cross-reference table and trailer
 | 
			
		||||
  void flush_updates();
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
/// If the object is an error, forward its message, otherwise return err.
 | 
			
		||||
static std::string pdf_error(const pdf_object& o, const char* err) {
 | 
			
		||||
  if (o.type != pdf_object::END || o.string.empty()) return err;
 | 
			
		||||
  return o.string;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pdf_object pdf_updater::parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
 | 
			
		||||
  if (stack.size() < 2)
 | 
			
		||||
    return {pdf_object::END, "missing object ID pair"};
 | 
			
		||||
 | 
			
		||||
  auto g = stack.back(); stack.pop_back();
 | 
			
		||||
  auto n = stack.back(); stack.pop_back();
 | 
			
		||||
  if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
 | 
			
		||||
   || !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
 | 
			
		||||
    return {pdf_object::END, "invalid object ID pair"};
 | 
			
		||||
 | 
			
		||||
  pdf_object obj{pdf_object::OBJECT};
 | 
			
		||||
  obj.n = n.number;
 | 
			
		||||
  obj.generation = g.number;
 | 
			
		||||
 | 
			
		||||
  while (1) {
 | 
			
		||||
    auto object = parse(lex, obj.array);
 | 
			
		||||
    if (object.type == pdf_object::END)
 | 
			
		||||
      return {pdf_object::END, pdf_error(object, "object doesn't end")};
 | 
			
		||||
    if (object.type == pdf_object::KEYWORD && object.string == "endobj")
 | 
			
		||||
      break;
 | 
			
		||||
    obj.array.push_back(std::move(object));
 | 
			
		||||
  }
 | 
			
		||||
  return obj;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pdf_object pdf_updater::parse_R(std::vector<pdf_object>& stack) const {
 | 
			
		||||
  if (stack.size() < 2)
 | 
			
		||||
    return {pdf_object::END, "missing reference ID pair"};
 | 
			
		||||
 | 
			
		||||
  auto g = stack.back(); stack.pop_back();
 | 
			
		||||
  auto n = stack.back(); stack.pop_back();
 | 
			
		||||
  if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
 | 
			
		||||
   || !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
 | 
			
		||||
    return {pdf_object::END, "invalid reference ID pair"};
 | 
			
		||||
 | 
			
		||||
  pdf_object ref{pdf_object::REFERENCE};
 | 
			
		||||
  ref.n = n.number;
 | 
			
		||||
  ref.generation = g.number;
 | 
			
		||||
  return ref;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Read an object at the lexer's position.  Not a strict parser.
 | 
			
		||||
pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
 | 
			
		||||
  auto token = lex.next();
 | 
			
		||||
  switch (token.type) {
 | 
			
		||||
  case pdf_object::NL:
 | 
			
		||||
  case pdf_object::COMMENT:
 | 
			
		||||
    // These are not important to parsing, not even for this procedure's needs
 | 
			
		||||
    return parse(lex, stack);
 | 
			
		||||
  case pdf_object::B_ARRAY:
 | 
			
		||||
  {
 | 
			
		||||
    std::vector<pdf_object> array;
 | 
			
		||||
    while (1) {
 | 
			
		||||
      auto object = parse(lex, array);
 | 
			
		||||
      if (object.type == pdf_object::END)
 | 
			
		||||
        return {pdf_object::END, pdf_error(object, "array doesn't end")};
 | 
			
		||||
      if (object.type == pdf_object::E_ARRAY)
 | 
			
		||||
        break;
 | 
			
		||||
      array.push_back(std::move(object));
 | 
			
		||||
    }
 | 
			
		||||
    return array;
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::B_DICT:
 | 
			
		||||
  {
 | 
			
		||||
    std::vector<pdf_object> array;
 | 
			
		||||
    while (1) {
 | 
			
		||||
      auto object = parse(lex, array);
 | 
			
		||||
      if (object.type == pdf_object::END)
 | 
			
		||||
        return {pdf_object::END, pdf_error(object, "dictionary doesn't end")};
 | 
			
		||||
      if (object.type == pdf_object::E_DICT)
 | 
			
		||||
        break;
 | 
			
		||||
      array.push_back(std::move(object));
 | 
			
		||||
    }
 | 
			
		||||
    if (array.size() % 2)
 | 
			
		||||
      return {pdf_object::END, "unbalanced dictionary"};
 | 
			
		||||
    std::map<std::string, pdf_object> dict;
 | 
			
		||||
    for (size_t i = 0; i < array.size(); i += 2) {
 | 
			
		||||
      if (array[i].type != pdf_object::NAME)
 | 
			
		||||
        return {pdf_object::END, "invalid dictionary key type"};
 | 
			
		||||
      dict.insert({array[i].string, std::move(array[i + 1])});
 | 
			
		||||
    }
 | 
			
		||||
    return dict;
 | 
			
		||||
  }
 | 
			
		||||
  case pdf_object::KEYWORD:
 | 
			
		||||
    // Appears in the document body, typically needs to access the cross-reference table
 | 
			
		||||
    // TODO use the xref to read /Length etc. once we actually need to read such objects;
 | 
			
		||||
    //   presumably streams can use the pdf_object::string member
 | 
			
		||||
    if (token.string == "stream") return {pdf_object::END, "streams are not supported yet"};
 | 
			
		||||
    if (token.string == "obj")    return parse_obj(lex, stack);
 | 
			
		||||
    if (token.string == "R")      return parse_R(stack);
 | 
			
		||||
    return token;
 | 
			
		||||
  default:
 | 
			
		||||
    return token;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries) {
 | 
			
		||||
  std::vector<pdf_object> throwaway_stack;
 | 
			
		||||
  {
 | 
			
		||||
    auto keyword = parse(lex, throwaway_stack);
 | 
			
		||||
    if (keyword.type != pdf_object::KEYWORD || keyword.string != "xref")
 | 
			
		||||
      return "invalid xref table";
 | 
			
		||||
  }
 | 
			
		||||
  while (1) {
 | 
			
		||||
    auto object = parse(lex, throwaway_stack);
 | 
			
		||||
    if (object.type == pdf_object::END)
 | 
			
		||||
      return pdf_error(object, "unexpected EOF while looking for the trailer");
 | 
			
		||||
    if (object.type == pdf_object::KEYWORD && object.string == "trailer")
 | 
			
		||||
      break;
 | 
			
		||||
 | 
			
		||||
    auto second = parse(lex, throwaway_stack);
 | 
			
		||||
    if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX
 | 
			
		||||
     || !second.is_integer() || second.number < 0 || second.number > UINT_MAX)
 | 
			
		||||
      return "invalid xref section header";
 | 
			
		||||
 | 
			
		||||
    const size_t start = object.number;
 | 
			
		||||
    const size_t count = second.number;
 | 
			
		||||
    for (size_t i = 0; i < count; i++) {
 | 
			
		||||
      auto off = parse(lex, throwaway_stack);
 | 
			
		||||
      auto gen = parse(lex, throwaway_stack);
 | 
			
		||||
      auto key = parse(lex, throwaway_stack);
 | 
			
		||||
      if (!off.is_integer() || off.number < 0 || off.number > document.length()
 | 
			
		||||
       || !gen.is_integer() || gen.number < 0 || gen.number > 65535
 | 
			
		||||
       || key.type != pdf_object::KEYWORD)
 | 
			
		||||
        return "invalid xref entry";
 | 
			
		||||
 | 
			
		||||
      bool free = true;
 | 
			
		||||
      if (key.string == "n")
 | 
			
		||||
        free = false;
 | 
			
		||||
      else if (key.string != "f")
 | 
			
		||||
        return "invalid xref entry";
 | 
			
		||||
 | 
			
		||||
      auto n = start + i;
 | 
			
		||||
      if (loaded_entries.count(n))
 | 
			
		||||
        continue;
 | 
			
		||||
      if (n >= xref.size())
 | 
			
		||||
        xref.resize(n + 1);
 | 
			
		||||
      loaded_entries.insert(n);
 | 
			
		||||
 | 
			
		||||
      auto& ref = xref[n];
 | 
			
		||||
      ref.generation = gen.number;
 | 
			
		||||
      ref.offset = off.number;
 | 
			
		||||
      ref.free = free;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
  return "";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
std::string pdf_updater::initialize() {
 | 
			
		||||
  // We only need to look for startxref roughly within the last kibibyte of the document
 | 
			
		||||
  static std::regex haystack_re("[\\s\\S]*\\sstartxref\\s+(\\d+)\\s+%%EOF");
 | 
			
		||||
  std::string haystack = document.substr(document.length() < 1024 ? 0 : document.length() - 1024);
 | 
			
		||||
 | 
			
		||||
  std::smatch m;
 | 
			
		||||
  if (!std::regex_search(haystack, m, haystack_re, std::regex_constants::match_continuous))
 | 
			
		||||
    return "cannot find startxref";
 | 
			
		||||
 | 
			
		||||
  size_t xref_offset = std::stoul(m.str(1)), last_xref_offset = xref_offset;
 | 
			
		||||
  std::set<size_t> loaded_xrefs;
 | 
			
		||||
  std::set<uint> loaded_entries;
 | 
			
		||||
 | 
			
		||||
  std::vector<pdf_object> throwaway_stack;
 | 
			
		||||
  while (1) {
 | 
			
		||||
    if (loaded_xrefs.count(xref_offset))
 | 
			
		||||
      return "circular xref offsets";
 | 
			
		||||
    if (xref_offset >= document.length())
 | 
			
		||||
      return "invalid xref offset";
 | 
			
		||||
 | 
			
		||||
    pdf_lexer lex(document.c_str() + xref_offset);
 | 
			
		||||
    auto err = load_xref(lex, loaded_entries);
 | 
			
		||||
    if (!err.empty()) return err;
 | 
			
		||||
 | 
			
		||||
    auto trailer = parse(lex, throwaway_stack);
 | 
			
		||||
    if (trailer.type != pdf_object::DICT)
 | 
			
		||||
      return pdf_error(trailer, "invalid trailer dictionary");
 | 
			
		||||
    if (loaded_xrefs.empty())
 | 
			
		||||
      this->trailer = trailer.dict;
 | 
			
		||||
    loaded_xrefs.insert(xref_offset);
 | 
			
		||||
 | 
			
		||||
    const auto prev_offset = trailer.dict.find("Prev");
 | 
			
		||||
    if (prev_offset == trailer.dict.end())
 | 
			
		||||
      break;
 | 
			
		||||
    // FIXME we don't check for size_t over or underflow
 | 
			
		||||
    if (!prev_offset->second.is_integer())
 | 
			
		||||
      return "invalid Prev offset";
 | 
			
		||||
    xref_offset = prev_offset->second.number;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  trailer["Prev"] = {pdf_object::NUMERIC, double(last_xref_offset)};
 | 
			
		||||
  const auto last_size = trailer.find("Size");
 | 
			
		||||
  if (last_size == trailer.end() || !last_size->second.is_integer() ||
 | 
			
		||||
      last_size->second.number <= 0)
 | 
			
		||||
    return "invalid or missing cross-reference table Size";
 | 
			
		||||
 | 
			
		||||
  xref_size = last_size->second.number;
 | 
			
		||||
  return "";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pdf_object pdf_updater::get(uint n, uint generation) const {
 | 
			
		||||
  if (n >= xref_size)
 | 
			
		||||
    return {pdf_object::NIL};
 | 
			
		||||
 | 
			
		||||
  const auto& ref = xref[n];
 | 
			
		||||
  if (ref.free || ref.generation != generation || ref.offset >= document.length())
 | 
			
		||||
    return {pdf_object::NIL};
 | 
			
		||||
 | 
			
		||||
  pdf_lexer lex(document.c_str() + ref.offset);
 | 
			
		||||
  std::vector<pdf_object> stack;
 | 
			
		||||
  while (1) {
 | 
			
		||||
    auto object = parse(lex, stack);
 | 
			
		||||
    if (object.type == pdf_object::END)
 | 
			
		||||
      return object;
 | 
			
		||||
    if (object.type != pdf_object::OBJECT)
 | 
			
		||||
      stack.push_back(std::move(object));
 | 
			
		||||
    else if (object.n != n || object.generation != generation)
 | 
			
		||||
      return {pdf_object::END, "object mismatch"};
 | 
			
		||||
    else
 | 
			
		||||
      return std::move(object.array.at(0));
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
uint pdf_updater::allocate() {
 | 
			
		||||
  assert(xref_size < UINT_MAX);
 | 
			
		||||
 | 
			
		||||
  auto n = xref_size++;
 | 
			
		||||
  if (xref.size() < xref_size)
 | 
			
		||||
    xref.resize(xref_size);
 | 
			
		||||
 | 
			
		||||
  // We don't make sure it gets a subsection in the update yet because we
 | 
			
		||||
  // make no attempts at fixing the linked list of free items either
 | 
			
		||||
  return n;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void pdf_updater::update(uint n, std::function<void()> fill) {
 | 
			
		||||
  auto& ref = xref.at(n);
 | 
			
		||||
  ref.offset = document.length() + 1;
 | 
			
		||||
  ref.free = false;
 | 
			
		||||
  updated.insert(n);
 | 
			
		||||
 | 
			
		||||
  document += ssprintf("\n%u %u obj\n", n, ref.generation);
 | 
			
		||||
  // Separately so that the callback can use document.length() to get the current offset
 | 
			
		||||
  fill();
 | 
			
		||||
  document += "\nendobj";
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void pdf_updater::flush_updates() {
 | 
			
		||||
  std::map<uint, size_t> groups;
 | 
			
		||||
  for (auto i = updated.cbegin(); i != updated.cend(); ) {
 | 
			
		||||
    size_t start = *i, count = 1;
 | 
			
		||||
    while (++i != updated.cend() && *i == start + count)
 | 
			
		||||
      count++;
 | 
			
		||||
    groups[start] = count;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Taking literally "Each cross-reference section begins with a line containing the keyword xref.
 | 
			
		||||
  // Following this line are one or more cross-reference subsections." from 3.4.3 in PDF Reference
 | 
			
		||||
  if (groups.empty())
 | 
			
		||||
    groups[0] = 0;
 | 
			
		||||
 | 
			
		||||
  auto startxref = document.length() + 1;
 | 
			
		||||
  document += "\nxref\n";
 | 
			
		||||
  for (const auto& g : groups) {
 | 
			
		||||
    document += ssprintf("%u %zu\n", g.first, g.second);
 | 
			
		||||
    for (size_t i = 0; i < g.second; i++) {
 | 
			
		||||
      auto& ref = xref[g.first + i];
 | 
			
		||||
      document += ssprintf("%010zu %05u %c \n", ref.offset, ref.generation, "nf"[!!ref.free]);
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  trailer["Size"] = {pdf_object::NUMERIC, double(xref_size)};
 | 
			
		||||
  document += "trailer\n" + pdf_serialize(trailer)
 | 
			
		||||
    + ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
/// Make a PDF object representing the given point in time
 | 
			
		||||
static pdf_object pdf_date(time_t timestamp) {
 | 
			
		||||
  struct tm parts;
 | 
			
		||||
  assert(localtime_r(×tamp, &parts));
 | 
			
		||||
 | 
			
		||||
  char buf[64];
 | 
			
		||||
  assert(strftime(buf, sizeof buf, "D:%Y%m%d%H%M%S", &parts));
 | 
			
		||||
 | 
			
		||||
  std::string offset = "Z";
 | 
			
		||||
  auto offset_min = parts.tm_gmtoff / 60;
 | 
			
		||||
  if (parts.tm_gmtoff < 0)
 | 
			
		||||
    offset = ssprintf("-%02ld'%02ld'", -offset_min / 60, -offset_min % 60);
 | 
			
		||||
  if (parts.tm_gmtoff > 0)
 | 
			
		||||
    offset = ssprintf("+%02ld'%02ld'", +offset_min / 60, +offset_min % 60);
 | 
			
		||||
  return {pdf_object::STRING, buf + offset};
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static pdf_object pdf_get_first_page(pdf_updater& pdf, uint node_n, uint node_generation) {
 | 
			
		||||
  auto obj = pdf.get(node_n, node_generation);
 | 
			
		||||
  if (obj.type != pdf_object::DICT)
 | 
			
		||||
    return {pdf_object::NIL};
 | 
			
		||||
 | 
			
		||||
  // Out of convenience; these aren't filled normally
 | 
			
		||||
  obj.n = node_n;
 | 
			
		||||
  obj.generation = node_generation;
 | 
			
		||||
 | 
			
		||||
  auto type = obj.dict.find("Type");
 | 
			
		||||
  if (type == obj.dict.end() || type->second.type != pdf_object::NAME)
 | 
			
		||||
    return {pdf_object::NIL};
 | 
			
		||||
  if (type->second.string == "Page")
 | 
			
		||||
    return obj;
 | 
			
		||||
  if (type->second.string != "Pages")
 | 
			
		||||
    return {pdf_object::NIL};
 | 
			
		||||
 | 
			
		||||
  // XXX technically speaking, this may be an indirect reference.  The correct way to solve this
 | 
			
		||||
  //   seems to be having "pdf_updater" include a wrapper around "obj.dict.find"
 | 
			
		||||
  auto kids = obj.dict.find("Kids");
 | 
			
		||||
  if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY
 | 
			
		||||
   || kids->second.array.empty()
 | 
			
		||||
   || kids->second.array.at(0).type != pdf_object::REFERENCE)
 | 
			
		||||
    return {pdf_object::NIL};
 | 
			
		||||
 | 
			
		||||
  // XXX nothing prevents us from recursing in an evil circular graph
 | 
			
		||||
  return pdf_get_first_page(pdf, kids->second.array.at(0).n, kids->second.array.at(0).generation);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
// /All/ bytes are checked, except for the signature hexstring itself
 | 
			
		||||
static std::string pdf_fill_in_signature(std::string& document, size_t sign_off, size_t sign_len,
 | 
			
		||||
                                         const std::string& pkcs12_path,
 | 
			
		||||
                                         const std::string& pkcs12_pass) {
 | 
			
		||||
  size_t tail_off = sign_off + sign_len, tail_len = document.size() - tail_off;
 | 
			
		||||
  if (pkcs12_path.empty())
 | 
			
		||||
    return "undefined path to the signing key";
 | 
			
		||||
 | 
			
		||||
  auto pkcs12_fp = fopen(pkcs12_path.c_str(), "r");
 | 
			
		||||
  if (!pkcs12_fp)
 | 
			
		||||
    return pkcs12_path + ": " + strerror(errno);
 | 
			
		||||
 | 
			
		||||
  // Abandon hope, all ye who enter OpenSSL!  Half of it is undocumented.
 | 
			
		||||
  OpenSSL_add_all_algorithms();
 | 
			
		||||
  ERR_load_crypto_strings();
 | 
			
		||||
  ERR_clear_error();
 | 
			
		||||
 | 
			
		||||
  PKCS12* p12 = nullptr;
 | 
			
		||||
  EVP_PKEY* private_key = nullptr;
 | 
			
		||||
  X509* certificate = nullptr;
 | 
			
		||||
  STACK_OF(X509)* chain = nullptr;
 | 
			
		||||
  PKCS7* p7 = nullptr;
 | 
			
		||||
  int len = 0, sign_flags = PKCS7_DETACHED | PKCS7_BINARY | PKCS7_NOSMIMECAP | PKCS7_PARTIAL;
 | 
			
		||||
  BIO* p7bio = nullptr;
 | 
			
		||||
  unsigned char* buf = nullptr;
 | 
			
		||||
 | 
			
		||||
  // OpenSSL error reasons will usually be of more value than any distinction I can come up with
 | 
			
		||||
  std::string err = "OpenSSL failure";
 | 
			
		||||
 | 
			
		||||
  if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr))
 | 
			
		||||
   || !PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) {
 | 
			
		||||
    err = pkcs12_path + ": parse failure";
 | 
			
		||||
    goto error;
 | 
			
		||||
  }
 | 
			
		||||
  if (!private_key || !certificate) {
 | 
			
		||||
    err = pkcs12_path + ": must contain a private key and a valid certificate chain";
 | 
			
		||||
    goto error;
 | 
			
		||||
  }
 | 
			
		||||
  // Prevent useless signatures -- makes pdfsig from poppler happy at least (and NSS by extension)
 | 
			
		||||
  if (!(X509_get_key_usage(certificate) & (KU_DIGITAL_SIGNATURE | KU_NON_REPUDIATION))) {
 | 
			
		||||
    err = "the certificate's key usage must include digital signatures or non-repudiation";
 | 
			
		||||
    goto error;
 | 
			
		||||
  }
 | 
			
		||||
  if (!(X509_get_extended_key_usage(certificate) & (XKU_SMIME | XKU_ANYEKU))) {
 | 
			
		||||
    err = "the certificate's extended key usage must include S/MIME";
 | 
			
		||||
    goto error;
 | 
			
		||||
  }
 | 
			
		||||
#if 0  // This happily ignores XKU_ANYEKU and I want my tiny world to make a tiny bit more sense
 | 
			
		||||
  if (X509_check_purpose(certificate, X509_PURPOSE_SMIME_SIGN, false /* not a CA certificate */)) {
 | 
			
		||||
    err = "the certificate can't be used for S/MIME digital signatures";
 | 
			
		||||
    goto error;
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  // The default digest is SHA1, which is mildly insecure now -- hence using PKCS7_sign_add_signer
 | 
			
		||||
  if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags))
 | 
			
		||||
   || !PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags))
 | 
			
		||||
    goto error;
 | 
			
		||||
  // For RFC 3161, this is roughly how a timestamp token would be attached (see Appendix A):
 | 
			
		||||
  //   PKCS7_add_attribute(signer_info, NID_id_smime_aa_timeStampToken, V_ASN1_SEQUENCE, value)
 | 
			
		||||
  for (int i = 0; i < sk_X509_num(chain); i++)
 | 
			
		||||
    if (!PKCS7_add_certificate(p7, sk_X509_value(chain, i)))
 | 
			
		||||
      goto error;
 | 
			
		||||
 | 
			
		||||
  // Adaptation of the innards of the undocumented PKCS7_final() -- I didn't feel like making
 | 
			
		||||
  // a copy of the whole document.  Hopefully this writes directly into a digest BIO.
 | 
			
		||||
  if (!(p7bio = PKCS7_dataInit(p7, nullptr))
 | 
			
		||||
   || (ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off)
 | 
			
		||||
   || (ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len)
 | 
			
		||||
   || BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio))
 | 
			
		||||
    goto error;
 | 
			
		||||
 | 
			
		||||
#if 0
 | 
			
		||||
  {
 | 
			
		||||
    // Debugging: openssl cms -inform PEM -in pdf_signature.pem -noout -cmsout -print
 | 
			
		||||
    // Context: https://stackoverflow.com/a/29253469
 | 
			
		||||
    auto fp = fopen("pdf_signature.pem", "wb");
 | 
			
		||||
    assert(PEM_write_PKCS7(fp, p7) && !fclose(fp));
 | 
			
		||||
  }
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  if ((len = i2d_PKCS7(p7, &buf)) < 0)
 | 
			
		||||
    goto error;
 | 
			
		||||
  if (size_t(len) * 2 > sign_len - 2 /* hexstring quotes */) {
 | 
			
		||||
    // The obvious solution is to increase the allocation... or spend a week reading specifications
 | 
			
		||||
    // while losing all faith in humanity as a species, and skip the PKCS7 API entirely
 | 
			
		||||
    err = ssprintf("not enough space reserved for the signature (%zu nibbles vs %zu nibbles)",
 | 
			
		||||
                   sign_len - 2, size_t(len) * 2);
 | 
			
		||||
    goto error;
 | 
			
		||||
  }
 | 
			
		||||
  for (int i = 0; i < len; i++) {
 | 
			
		||||
    document[sign_off + 2 * i + 1] = "0123456789abcdef"[buf[i] / 16];
 | 
			
		||||
    document[sign_off + 2 * i + 2] = "0123456789abcdef"[buf[i] % 16];
 | 
			
		||||
  }
 | 
			
		||||
  err.clear();
 | 
			
		||||
 | 
			
		||||
error:
 | 
			
		||||
  OPENSSL_free(buf);
 | 
			
		||||
  BIO_free_all(p7bio);
 | 
			
		||||
  PKCS7_free(p7);
 | 
			
		||||
  sk_X509_pop_free(chain, X509_free);
 | 
			
		||||
  X509_free(certificate);
 | 
			
		||||
  EVP_PKEY_free(private_key);
 | 
			
		||||
  PKCS12_free(p12);
 | 
			
		||||
 | 
			
		||||
  // In any case, clear the error stack (it's a queue, really) to avoid confusion elsewhere
 | 
			
		||||
  while (auto code = ERR_get_error())
 | 
			
		||||
    if (auto reason = ERR_reason_error_string(code))
 | 
			
		||||
      err = err + "; " + reason;
 | 
			
		||||
 | 
			
		||||
  fclose(pkcs12_fp);
 | 
			
		||||
  return err;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
}  // anonymous namespace
 | 
			
		||||
 | 
			
		||||
// -------------------------------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
// The presumption here is that the document is valid and that it doesn't employ cross-reference
 | 
			
		||||
// streams from PDF 1.5, or at least constitutes a hybrid-reference file.  The results with
 | 
			
		||||
// PDF 2.0 (2017) are currently unknown as the standard costs money.
 | 
			
		||||
//
 | 
			
		||||
// Carelessly assumes that the version of the original document is at most PDF 1.6.
 | 
			
		||||
//
 | 
			
		||||
// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
 | 
			
		||||
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
 | 
			
		||||
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
 | 
			
		||||
std::string pdf_simple_sign(std::string& document,
 | 
			
		||||
                            const std::string& pkcs12_path,
 | 
			
		||||
                            const std::string& pkcs12_pass) {
 | 
			
		||||
  pdf_updater pdf(document);
 | 
			
		||||
  auto err = pdf.initialize();
 | 
			
		||||
  if (!err.empty())
 | 
			
		||||
    return err;
 | 
			
		||||
 | 
			
		||||
  auto root_ref = pdf.trailer.find("Root");
 | 
			
		||||
  if (root_ref == pdf.trailer.end() || root_ref->second.type != pdf_object::REFERENCE)
 | 
			
		||||
    return "trailer does not contain a reference to Root";
 | 
			
		||||
  auto root = pdf.get(root_ref->second.n, root_ref->second.generation);
 | 
			
		||||
  if (root.type != pdf_object::DICT)
 | 
			
		||||
    return "invalid Root dictionary reference";
 | 
			
		||||
 | 
			
		||||
  // 8.7 Digital Signatures - /signature dictionary/
 | 
			
		||||
  auto sigdict_n = pdf.allocate();
 | 
			
		||||
  size_t byterange_off = 0, byterange_len = 0, sign_off = 0, sign_len = 0;
 | 
			
		||||
  pdf.update(sigdict_n, [&]{
 | 
			
		||||
    // The timestamp is important for Adobe Acrobat Reader DC.  The ideal would be to use RFC 3161.
 | 
			
		||||
    pdf.document.append("<< /Type/Sig /Filter/Adobe.PPKLite /SubFilter/adbe.pkcs7.detached\n"
 | 
			
		||||
                        "   /M" + pdf_serialize(pdf_date(time(nullptr))) + " /ByteRange ");
 | 
			
		||||
    byterange_off = pdf.document.size();
 | 
			
		||||
    pdf.document.append((byterange_len = 32 /* fine for a gigabyte */), ' ');
 | 
			
		||||
    pdf.document.append("\n   /Contents <");
 | 
			
		||||
    sign_off = pdf.document.size();
 | 
			
		||||
    pdf.document.append((sign_len = 8192 /* certificate, digest, encrypted digest, ... */), '0');
 | 
			
		||||
    pdf.document.append("> >>");
 | 
			
		||||
 | 
			
		||||
    // We actually need to exclude the hexstring quotes from signing
 | 
			
		||||
    sign_off -= 1;
 | 
			
		||||
    sign_len += 2;
 | 
			
		||||
  });
 | 
			
		||||
 | 
			
		||||
  // 8.6.3 Field Types - Signature Fields
 | 
			
		||||
  pdf_object sigfield{pdf_object::DICT};
 | 
			
		||||
  sigfield.dict.insert({"FT", {pdf_object::NAME, "Sig"}});
 | 
			
		||||
  sigfield.dict.insert({"V", {pdf_object::REFERENCE, sigdict_n, 0}});
 | 
			
		||||
  // 8.4.5 Annotations Types - Widget Annotations
 | 
			
		||||
  // We can merge the Signature Annotation and omit Kids here
 | 
			
		||||
  sigfield.dict.insert({"Subtype", {pdf_object::NAME, "Widget"}});
 | 
			
		||||
  sigfield.dict.insert({"F", {pdf_object::NUMERIC, 2 /* Hidden */}});
 | 
			
		||||
  sigfield.dict.insert({"T", {pdf_object::STRING, "Signature1"}});
 | 
			
		||||
  sigfield.dict.insert({"Rect", {std::vector<pdf_object>{
 | 
			
		||||
    {pdf_object::NUMERIC, 0},
 | 
			
		||||
    {pdf_object::NUMERIC, 0},
 | 
			
		||||
    {pdf_object::NUMERIC, 0},
 | 
			
		||||
    {pdf_object::NUMERIC, 0},
 | 
			
		||||
  }}});
 | 
			
		||||
 | 
			
		||||
  auto sigfield_n = pdf.allocate();
 | 
			
		||||
  pdf.update(sigfield_n, [&]{ pdf.document += pdf_serialize(sigfield); });
 | 
			
		||||
 | 
			
		||||
  auto pages_ref = root.dict.find("Pages");
 | 
			
		||||
  if (pages_ref == root.dict.end() || pages_ref->second.type != pdf_object::REFERENCE)
 | 
			
		||||
    return "invalid Pages reference";
 | 
			
		||||
  auto page = pdf_get_first_page(pdf, pages_ref->second.n, pages_ref->second.generation);
 | 
			
		||||
  if (page.type != pdf_object::DICT)
 | 
			
		||||
    return "invalid or unsupported page tree";
 | 
			
		||||
 | 
			
		||||
  // XXX assuming this won't be an indirectly referenced array
 | 
			
		||||
  auto& annots = page.dict["Annots"];
 | 
			
		||||
  if (annots.type != pdf_object::ARRAY)
 | 
			
		||||
    annots = {pdf_object::ARRAY};
 | 
			
		||||
  annots.array.emplace_back(pdf_object::REFERENCE, sigfield_n, 0);
 | 
			
		||||
  pdf.update(page.n, [&]{ pdf.document += pdf_serialize(page); });
 | 
			
		||||
 | 
			
		||||
  // 8.6.1 Interactive Form Dictionary
 | 
			
		||||
  // XXX assuming there are no forms already, overwriting everything
 | 
			
		||||
  root.dict["AcroForm"] = {std::map<std::string, pdf_object>{
 | 
			
		||||
    {"Fields", {std::vector<pdf_object>{
 | 
			
		||||
      {pdf_object::REFERENCE, sigfield_n, 0}
 | 
			
		||||
    }}},
 | 
			
		||||
    {"SigFlags", {pdf_object::NUMERIC, 3 /* SignaturesExist | AppendOnly */}}
 | 
			
		||||
  }};
 | 
			
		||||
 | 
			
		||||
  // Upgrade the document version for SHA-256 etc.
 | 
			
		||||
  // XXX assuming that it's not newer than 1.6 already -- while Cairo can't currently use a newer
 | 
			
		||||
  //   version that 1.5, it's not a bad idea to use cairo_pdf_surface_restrict_to_version()
 | 
			
		||||
  root.dict["Version"] = {pdf_object::NAME, "1.6"};
 | 
			
		||||
  pdf.update(root_ref->second.n, [&]{ pdf.document += pdf_serialize(root); });
 | 
			
		||||
  pdf.flush_updates();
 | 
			
		||||
 | 
			
		||||
  // Now that we know the length of everything, store byte ranges of what we're about to sign,
 | 
			
		||||
  // which must be everything but the resulting signature itself
 | 
			
		||||
  size_t tail_off = sign_off + sign_len, tail_len = pdf.document.size() - tail_off;
 | 
			
		||||
  auto ranges = ssprintf("[0 %zu %zu %zu]", sign_off, tail_off, tail_len);
 | 
			
		||||
  if (ranges.length() > byterange_len)
 | 
			
		||||
    return "not enough space reserved for /ByteRange";
 | 
			
		||||
  pdf.document.replace(byterange_off, std::min(ranges.length(), byterange_len), ranges);
 | 
			
		||||
  return pdf_fill_in_signature(pdf.document, sign_off, sign_len, pkcs12_path, pkcs12_pass);
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										211
									
								
								pdf/pdf.go
									
									
									
									
									
								
							
							
						
						
									
										211
									
								
								pdf/pdf.go
									
									
									
									
									
								
							@@ -1,5 +1,5 @@
 | 
			
		||||
//
 | 
			
		||||
// Copyright (c) 2018, Přemysl Eric Janouch <p@janouch.name>
 | 
			
		||||
// Copyright (c) 2018 - 2021, Přemysl Eric Janouch <p@janouch.name>
 | 
			
		||||
//
 | 
			
		||||
// Permission to use, copy, modify, and/or distribute this software for any
 | 
			
		||||
// purpose with or without fee is hereby granted.
 | 
			
		||||
@@ -32,6 +32,7 @@ import (
 | 
			
		||||
	"crypto/ecdsa"
 | 
			
		||||
	"crypto/rsa"
 | 
			
		||||
	"crypto/x509"
 | 
			
		||||
 | 
			
		||||
	"go.mozilla.org/pkcs7"
 | 
			
		||||
	"golang.org/x/crypto/pkcs12"
 | 
			
		||||
)
 | 
			
		||||
@@ -58,20 +59,22 @@ const (
 | 
			
		||||
	// higher-level objects
 | 
			
		||||
	Array
 | 
			
		||||
	Dict
 | 
			
		||||
	Stream
 | 
			
		||||
	Indirect
 | 
			
		||||
	Reference
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// Object is a PDF token/object thingy.  Objects may be composed either from
 | 
			
		||||
// Object is a PDF token/object thingy. Objects may be composed either from
 | 
			
		||||
// one or a sequence of tokens. The PDF Reference doesn't actually speak
 | 
			
		||||
// of tokens.
 | 
			
		||||
// of tokens, though ISO 32000-1:2008 does.
 | 
			
		||||
type Object struct {
 | 
			
		||||
	Kind ObjectKind
 | 
			
		||||
 | 
			
		||||
	String        string            // Comment/Keyword/Name/String
 | 
			
		||||
	Number        float64           // Bool, Numeric
 | 
			
		||||
	Array         []Object          // Array, Indirect
 | 
			
		||||
	Dict          map[string]Object // Dict, in the future also Stream
 | 
			
		||||
	Dict          map[string]Object // Dict, Stream
 | 
			
		||||
	Stream        []byte            // Stream
 | 
			
		||||
	N, Generation uint              // Indirect, Reference
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -457,6 +460,10 @@ func (o *Object) Serialize() string {
 | 
			
		||||
			fmt.Fprint(b, " /", k, " ", v.Serialize())
 | 
			
		||||
		}
 | 
			
		||||
		return "<<" + b.String() + " >>"
 | 
			
		||||
	case Stream:
 | 
			
		||||
		d := NewDict(o.Dict)
 | 
			
		||||
		d.Dict["Length"] = NewNumeric(float64(len(o.Stream)))
 | 
			
		||||
		return d.Serialize() + "\nstream\n" + string(o.Stream) + "\nendstream"
 | 
			
		||||
	case Indirect:
 | 
			
		||||
		return fmt.Sprintf("%d %d obj\n%s\nendobj", o.N, o.Generation,
 | 
			
		||||
			o.Array[0].Serialize())
 | 
			
		||||
@@ -496,6 +503,65 @@ type Updater struct {
 | 
			
		||||
	Trailer map[string]Object
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ListIndirect returns the whole cross-reference table as Reference Objects.
 | 
			
		||||
func (u *Updater) ListIndirect() []Object {
 | 
			
		||||
	result := []Object{}
 | 
			
		||||
	for i := 0; i < len(u.xref); i++ {
 | 
			
		||||
		if u.xref[i].nonfree {
 | 
			
		||||
			result = append(result, NewReference(uint(i), u.xref[i].generation))
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return result
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (u *Updater) parseStream(lex *Lexer, stack *[]Object) (Object, error) {
 | 
			
		||||
	lenStack := len(*stack)
 | 
			
		||||
	if lenStack < 1 {
 | 
			
		||||
		return newError("missing stream dictionary")
 | 
			
		||||
	}
 | 
			
		||||
	dict := (*stack)[lenStack-1]
 | 
			
		||||
	if dict.Kind != Dict {
 | 
			
		||||
		return newError("stream not preceded by a dictionary")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	*stack = (*stack)[:lenStack-1]
 | 
			
		||||
	length, ok := dict.Dict["Length"]
 | 
			
		||||
	if !ok {
 | 
			
		||||
		return newError("missing stream Length")
 | 
			
		||||
	}
 | 
			
		||||
	length, err := u.Dereference(length)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return length, err
 | 
			
		||||
	}
 | 
			
		||||
	if !length.IsUint() || length.Number > math.MaxInt {
 | 
			
		||||
		return newError("stream Length not an unsigned integer")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Expect exactly one newline.
 | 
			
		||||
	if nl, err := lex.Next(); err != nil {
 | 
			
		||||
		return nl, err
 | 
			
		||||
	} else if nl.Kind != NL {
 | 
			
		||||
		return newError("stream does not start with a newline")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	size := int(length.Number)
 | 
			
		||||
	if len(lex.P) < size {
 | 
			
		||||
		return newError("stream is longer than the document")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	dict.Kind = Stream
 | 
			
		||||
	dict.Stream = lex.P[:size]
 | 
			
		||||
	lex.P = lex.P[size:]
 | 
			
		||||
 | 
			
		||||
	// Skip any number of trailing newlines or comments.
 | 
			
		||||
	if end, err := u.parse(lex, stack); err != nil {
 | 
			
		||||
		return end, err
 | 
			
		||||
	} else if end.Kind != Keyword || end.String != "endstream" {
 | 
			
		||||
		return newError("improperly terminated stream")
 | 
			
		||||
	}
 | 
			
		||||
	return dict, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) (Object, error) {
 | 
			
		||||
	lenStack := len(*stack)
 | 
			
		||||
	if lenStack < 2 {
 | 
			
		||||
@@ -589,15 +655,11 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) (Object, error) {
 | 
			
		||||
		}
 | 
			
		||||
		return NewDict(dict), nil
 | 
			
		||||
	case Keyword:
 | 
			
		||||
		// Appears in the document body, typically needs
 | 
			
		||||
		// to access the cross-reference table.
 | 
			
		||||
		//
 | 
			
		||||
		// TODO(p): Use the xref to read /Length etc. once we
 | 
			
		||||
		// actually need to read such objects; presumably
 | 
			
		||||
		// streams can use the Object.String member.
 | 
			
		||||
		switch token.String {
 | 
			
		||||
		case "stream":
 | 
			
		||||
			return newError("streams are not supported yet")
 | 
			
		||||
			// Appears in the document body,
 | 
			
		||||
			// typically needs to access the cross-reference table.
 | 
			
		||||
			return u.parseStream(lex, stack)
 | 
			
		||||
		case "obj":
 | 
			
		||||
			return u.parseIndirect(lex, stack)
 | 
			
		||||
		case "R":
 | 
			
		||||
@@ -669,7 +731,7 @@ func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error {
 | 
			
		||||
 | 
			
		||||
// -----------------------------------------------------------------------------
 | 
			
		||||
 | 
			
		||||
var haystackRE = regexp.MustCompile(`(?s:.*)\sstartxref\s+(\d+)\s+%%EOF`)
 | 
			
		||||
var trailerRE = regexp.MustCompile(`(?s:.*)\sstartxref\s+(\d+)\s+%%EOF`)
 | 
			
		||||
 | 
			
		||||
// NewUpdater initializes an Updater, building the cross-reference table and
 | 
			
		||||
// preparing a new trailer dictionary.
 | 
			
		||||
@@ -684,7 +746,7 @@ func NewUpdater(document []byte) (*Updater, error) {
 | 
			
		||||
		haystack = haystack[len(haystack)-1024:]
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	m := haystackRE.FindSubmatch(haystack)
 | 
			
		||||
	m := trailerRE.FindSubmatch(haystack)
 | 
			
		||||
	if m == nil {
 | 
			
		||||
		return nil, errors.New("cannot find startxref")
 | 
			
		||||
	}
 | 
			
		||||
@@ -721,7 +783,7 @@ func NewUpdater(document []byte) (*Updater, error) {
 | 
			
		||||
		if !ok {
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
		// FIXME: We don't check for size_t over or underflow.
 | 
			
		||||
		// FIXME: Do not read offsets and sizes as floating point numbers.
 | 
			
		||||
		if !prevOffset.IsInteger() {
 | 
			
		||||
			return nil, errors.New("invalid Prev offset")
 | 
			
		||||
		}
 | 
			
		||||
@@ -738,10 +800,33 @@ func NewUpdater(document []byte) (*Updater, error) {
 | 
			
		||||
	return u, nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var versionRE = regexp.MustCompile(
 | 
			
		||||
	`(?:^|[\r\n])%(?:!PS-Adobe-\d\.\d )?PDF-(\d)\.(\d)[\r\n]`)
 | 
			
		||||
 | 
			
		||||
// Version extracts the claimed PDF version as a positive decimal number,
 | 
			
		||||
// e.g. 17 for PDF 1.7. Returns zero on failure.
 | 
			
		||||
func (u *Updater) Version(root *Object) int {
 | 
			
		||||
	if version, ok := root.Dict["Version"]; ok && version.Kind == Name {
 | 
			
		||||
		if v := version.String; len(v) == 3 && v[1] == '.' &&
 | 
			
		||||
			v[0] >= '0' && v[0] <= '9' && v[2] >= '0' && v[2] <= '9' {
 | 
			
		||||
			return int(v[0]-'0')*10 + int(v[2]-'0')
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// We only need to look for the comment roughly within
 | 
			
		||||
	// the first kibibyte of the document.
 | 
			
		||||
	haystack := u.Document
 | 
			
		||||
	if len(haystack) > 1024 {
 | 
			
		||||
		haystack = haystack[:1024]
 | 
			
		||||
	}
 | 
			
		||||
	if m := versionRE.FindSubmatch(haystack); m != nil {
 | 
			
		||||
		return int(m[1][0]-'0')*10 + int(m[2][0]-'0')
 | 
			
		||||
	}
 | 
			
		||||
	return 0
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Get retrieves an object by its number and generation--may return
 | 
			
		||||
// Nil or End with an error.
 | 
			
		||||
//
 | 
			
		||||
// TODO(p): We should fix all uses of this not to eat the error.
 | 
			
		||||
func (u *Updater) Get(n, generation uint) (Object, error) {
 | 
			
		||||
	if n >= u.xrefSize {
 | 
			
		||||
		return New(Nil), nil
 | 
			
		||||
@@ -770,6 +855,14 @@ func (u *Updater) Get(n, generation uint) (Object, error) {
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Derefence dereferences Reference objects, and passes the other kinds through.
 | 
			
		||||
func (u *Updater) Dereference(o Object) (Object, error) {
 | 
			
		||||
	if o.Kind != Reference {
 | 
			
		||||
		return o, nil
 | 
			
		||||
	}
 | 
			
		||||
	return u.Get(o.N, o.Generation)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Allocate allocates a new object number.
 | 
			
		||||
func (u *Updater) Allocate() uint {
 | 
			
		||||
	n := u.xrefSize
 | 
			
		||||
@@ -827,30 +920,19 @@ func (u *Updater) FlushUpdates() {
 | 
			
		||||
		return updated[i] < updated[j]
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	groups := make(map[uint]uint)
 | 
			
		||||
	for i := 0; i < len(updated); {
 | 
			
		||||
		start, count := updated[i], uint(1)
 | 
			
		||||
		for i++; i != len(updated) && updated[i] == start+count; i++ {
 | 
			
		||||
			count++
 | 
			
		||||
		}
 | 
			
		||||
		groups[start] = count
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Taking literally "Each cross-reference section begins with a line
 | 
			
		||||
	// containing the keyword xref. Following this line are one or more
 | 
			
		||||
	// cross-reference subsections." from 3.4.3 in PDF Reference.
 | 
			
		||||
	if len(groups) == 0 {
 | 
			
		||||
		groups[0] = 0
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	buf := bytes.NewBuffer(u.Document)
 | 
			
		||||
	startXref := buf.Len() + 1
 | 
			
		||||
	buf.WriteString("\nxref\n")
 | 
			
		||||
 | 
			
		||||
	for start, count := range groups {
 | 
			
		||||
		fmt.Fprintf(buf, "%d %d\n", start, count)
 | 
			
		||||
		for i := uint(0); i < count; i++ {
 | 
			
		||||
			ref := u.xref[start+uint(i)]
 | 
			
		||||
	for i := 0; i < len(updated); {
 | 
			
		||||
		start, stop := updated[i], updated[i]+1
 | 
			
		||||
		for i++; i < len(updated) && updated[i] == stop; i++ {
 | 
			
		||||
			stop++
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		fmt.Fprintf(buf, "%d %d\n", start, stop-start)
 | 
			
		||||
		for ; start < stop; start++ {
 | 
			
		||||
			ref := u.xref[start]
 | 
			
		||||
			if ref.nonfree {
 | 
			
		||||
				fmt.Fprintf(buf, "%010d %05d n \n", ref.offset, ref.generation)
 | 
			
		||||
			} else {
 | 
			
		||||
@@ -859,6 +941,13 @@ func (u *Updater) FlushUpdates() {
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Taking literally "Each cross-reference section begins with a line
 | 
			
		||||
	// containing the keyword xref. Following this line are one or more
 | 
			
		||||
	// cross-reference subsections." from 3.4.3 in PDF Reference.
 | 
			
		||||
	if len(updated) == 0 {
 | 
			
		||||
		fmt.Fprintf(buf, "%d %d\n", 0, 0)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	u.Trailer["Size"] = NewNumeric(float64(u.xrefSize))
 | 
			
		||||
	trailer := NewDict(u.Trailer)
 | 
			
		||||
 | 
			
		||||
@@ -884,15 +973,15 @@ func NewDate(ts time.Time) Object {
 | 
			
		||||
 | 
			
		||||
// GetFirstPage retrieves the first page of the given page (sub)tree reference,
 | 
			
		||||
// or returns a Nil object if unsuccessful.
 | 
			
		||||
func (u *Updater) GetFirstPage(nodeN, nodeGeneration uint) Object {
 | 
			
		||||
	obj, _ := u.Get(nodeN, nodeGeneration)
 | 
			
		||||
	if obj.Kind != Dict {
 | 
			
		||||
func (u *Updater) GetFirstPage(node Object) Object {
 | 
			
		||||
	obj, err := u.Dereference(node)
 | 
			
		||||
	if err != nil || obj.Kind != Dict {
 | 
			
		||||
		return New(Nil)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// Out of convenience; these aren't filled normally.
 | 
			
		||||
	obj.N = nodeN
 | 
			
		||||
	obj.Generation = nodeGeneration
 | 
			
		||||
	obj.N = node.N
 | 
			
		||||
	obj.Generation = node.Generation
 | 
			
		||||
 | 
			
		||||
	if typ, ok := obj.Dict["Type"]; !ok || typ.Kind != Name {
 | 
			
		||||
		return New(Nil)
 | 
			
		||||
@@ -912,7 +1001,7 @@ func (u *Updater) GetFirstPage(nodeN, nodeGeneration uint) Object {
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// XXX: Nothing prevents us from recursing in an evil circular graph.
 | 
			
		||||
	return u.GetFirstPage(kids.Array[0].N, kids.Array[0].Generation)
 | 
			
		||||
	return u.GetFirstPage(kids.Array[0])
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// -----------------------------------------------------------------------------
 | 
			
		||||
@@ -1089,15 +1178,14 @@ func FillInSignature(document []byte, signOff, signLen int,
 | 
			
		||||
// There must be at least one certificate, matching the private key.
 | 
			
		||||
// The certificates must form a chain.
 | 
			
		||||
//
 | 
			
		||||
// A good default for the reservation is around 4096 (the value is in bytes).
 | 
			
		||||
//
 | 
			
		||||
// The presumption here is that the document is valid and that it doesn't
 | 
			
		||||
// employ cross-reference streams from PDF 1.5, or at least constitutes
 | 
			
		||||
// a hybrid-reference file. The results with PDF 2.0 (2017) are currently
 | 
			
		||||
// unknown as the standard costs money.
 | 
			
		||||
//
 | 
			
		||||
// Carelessly assumes that the version of the original document is at most
 | 
			
		||||
// PDF 1.6.
 | 
			
		||||
func Sign(document []byte,
 | 
			
		||||
	key crypto.PrivateKey, certs []*x509.Certificate) ([]byte, error) {
 | 
			
		||||
func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate,
 | 
			
		||||
	reservation int) ([]byte, error) {
 | 
			
		||||
	pdf, err := NewUpdater(document)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, err
 | 
			
		||||
@@ -1107,7 +1195,10 @@ func Sign(document []byte,
 | 
			
		||||
	if !ok || rootRef.Kind != Reference {
 | 
			
		||||
		return nil, errors.New("trailer does not contain a reference to Root")
 | 
			
		||||
	}
 | 
			
		||||
	root, _ := pdf.Get(rootRef.N, rootRef.Generation)
 | 
			
		||||
	root, err := pdf.Dereference(rootRef)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return nil, fmt.Errorf("Root dictionary retrieval failed: %s", err)
 | 
			
		||||
	}
 | 
			
		||||
	if root.Kind != Dict {
 | 
			
		||||
		return nil, errors.New("invalid Root dictionary reference")
 | 
			
		||||
	}
 | 
			
		||||
@@ -1129,7 +1220,7 @@ func Sign(document []byte,
 | 
			
		||||
		buf.WriteString("\n   /Contents <")
 | 
			
		||||
 | 
			
		||||
		signOff = buf.Len()
 | 
			
		||||
		signLen = 8192 // cert, digest, encrypted digest, ...
 | 
			
		||||
		signLen = reservation * 2 // cert, digest, encrypted digest, ...
 | 
			
		||||
		buf.Write(bytes.Repeat([]byte{'0'}, signLen))
 | 
			
		||||
		buf.WriteString("> >>")
 | 
			
		||||
 | 
			
		||||
@@ -1161,14 +1252,18 @@ func Sign(document []byte,
 | 
			
		||||
	if !ok || pagesRef.Kind != Reference {
 | 
			
		||||
		return nil, errors.New("invalid Pages reference")
 | 
			
		||||
	}
 | 
			
		||||
	page := pdf.GetFirstPage(pagesRef.N, pagesRef.Generation)
 | 
			
		||||
	page := pdf.GetFirstPage(pagesRef)
 | 
			
		||||
	if page.Kind != Dict {
 | 
			
		||||
		return nil, errors.New("invalid or unsupported page tree")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	// XXX: Assuming this won't be an indirectly referenced array.
 | 
			
		||||
	annots := page.Dict["Annots"]
 | 
			
		||||
	if annots.Kind != Array {
 | 
			
		||||
		// TODO(p): Indirectly referenced arrays might not be
 | 
			
		||||
		// that hard to support.
 | 
			
		||||
		if annots.Kind != End {
 | 
			
		||||
			return nil, errors.New("unexpected Annots")
 | 
			
		||||
		}
 | 
			
		||||
		annots = NewArray(nil)
 | 
			
		||||
	}
 | 
			
		||||
	annots.Array = append(annots.Array, NewReference(sigfieldN, 0))
 | 
			
		||||
@@ -1179,17 +1274,21 @@ func Sign(document []byte,
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	// 8.6.1 Interactive Form Dictionary
 | 
			
		||||
	// XXX: Assuming there are no forms already, overwriting everything.
 | 
			
		||||
	if _, ok := root.Dict["AcroForm"]; ok {
 | 
			
		||||
		return nil, errors.New("the document already contains forms, " +
 | 
			
		||||
			"they would be overwritten")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	root.Dict["AcroForm"] = NewDict(map[string]Object{
 | 
			
		||||
		"Fields":   NewArray([]Object{NewReference(sigfieldN, 0)}),
 | 
			
		||||
		"SigFlags": NewNumeric(3 /* SignaturesExist | AppendOnly */),
 | 
			
		||||
	})
 | 
			
		||||
 | 
			
		||||
	// Upgrade the document version for SHA-256 etc.
 | 
			
		||||
	// XXX: Assuming that it's not newer than 1.6 already--while Cairo can't
 | 
			
		||||
	// currently use a newer version that 1.5, it's not a bad idea to use
 | 
			
		||||
	// cairo_pdf_surface_restrict_to_version().
 | 
			
		||||
	root.Dict["Version"] = NewName("1.6")
 | 
			
		||||
	if pdf.Version(&root) < 16 {
 | 
			
		||||
		root.Dict["Version"] = NewName("1.6")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	pdf.Update(rootRef.N, func(buf BytesWriter) {
 | 
			
		||||
		buf.WriteString(root.Serialize())
 | 
			
		||||
	})
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										77
									
								
								test.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										77
									
								
								test.sh
									
									
									
									
									
										Executable file
									
								
							@@ -0,0 +1,77 @@
 | 
			
		||||
#!/bin/sh -e
 | 
			
		||||
# Test basic functionality of both versions
 | 
			
		||||
# Usage: ./test.sh builddir/pdf-simple-sign cmd/pdf-simple-sign/pdf-simple-sign
 | 
			
		||||
 | 
			
		||||
log() { echo "`tput sitm`-- $1`tput sgr0`"; }
 | 
			
		||||
die() { echo "`tput bold`-- $1`tput sgr0`"; exit 1; }
 | 
			
		||||
 | 
			
		||||
# Get rid of old test files
 | 
			
		||||
rm -rf tmp
 | 
			
		||||
mkdir tmp
 | 
			
		||||
 | 
			
		||||
# Create documents in various tools
 | 
			
		||||
log "Creating source documents"
 | 
			
		||||
inkscape --pipe --export-filename=tmp/cairo.pdf <<'EOF' 2>/dev/null || :
 | 
			
		||||
<svg xmlns="http://www.w3.org/2000/svg"><text x="5" y="10">Hello</text></svg>
 | 
			
		||||
EOF
 | 
			
		||||
 | 
			
		||||
date | tee tmp/lowriter.txt | groff -T pdf > tmp/groff.pdf || :
 | 
			
		||||
lowriter --convert-to pdf tmp/lowriter.txt --outdir tmp >/dev/null || :
 | 
			
		||||
convert rose: tmp/imagemagick.pdf || :
 | 
			
		||||
 | 
			
		||||
# Create a root CA certificate pair
 | 
			
		||||
log "Creating certificates"
 | 
			
		||||
openssl req -newkey rsa:2048 -subj "/CN=Test CA" -nodes \
 | 
			
		||||
	-keyout tmp/ca.key.pem -x509 -out tmp/ca.cert.pem 2>/dev/null
 | 
			
		||||
 | 
			
		||||
# Create a private NSS database and insert our test CA there
 | 
			
		||||
rm -rf tmp/nssdir
 | 
			
		||||
mkdir tmp/nssdir
 | 
			
		||||
certutil -N --empty-password -d sql:tmp/nssdir
 | 
			
		||||
certutil -d sql:tmp/nssdir -A -n root -t ,C, -a -i tmp/ca.cert.pem
 | 
			
		||||
 | 
			
		||||
# Create a leaf certificate pair
 | 
			
		||||
cat > tmp/cert.cfg <<'EOF'
 | 
			
		||||
[smime]
 | 
			
		||||
basicConstraints = CA:FALSE
 | 
			
		||||
keyUsage = digitalSignature
 | 
			
		||||
extendedKeyUsage = emailProtection
 | 
			
		||||
nsCertType = email
 | 
			
		||||
EOF
 | 
			
		||||
 | 
			
		||||
openssl req -newkey rsa:2048 -subj "/CN=Test Leaf" -nodes \
 | 
			
		||||
	-keyout tmp/key.pem -out tmp/cert.csr 2>/dev/null
 | 
			
		||||
openssl x509 -req -in tmp/cert.csr -out tmp/cert.pem \
 | 
			
		||||
	-CA tmp/ca.cert.pem -CAkey tmp/ca.key.pem -set_serial 1 \
 | 
			
		||||
	-extensions smime -extfile tmp/cert.cfg 2>/dev/null
 | 
			
		||||
openssl verify -CAfile tmp/ca.cert.pem tmp/cert.pem >/dev/null
 | 
			
		||||
openssl pkcs12 -inkey tmp/key.pem -in tmp/cert.pem \
 | 
			
		||||
	-export -passout pass: -out tmp/key-pair.p12
 | 
			
		||||
 | 
			
		||||
for tool in "$@"; do
 | 
			
		||||
	rm -f tmp/*.signed.pdf
 | 
			
		||||
	for source in tmp/*.pdf; do
 | 
			
		||||
		log "Testing $tool with $source"
 | 
			
		||||
		result=${source%.pdf}.signed.pdf
 | 
			
		||||
		$tool "$source" "$result" tmp/key-pair.p12 ""
 | 
			
		||||
		pdfsig -nssdir sql:tmp/nssdir "$result" | grep Validation
 | 
			
		||||
	done
 | 
			
		||||
 | 
			
		||||
	log "Testing $tool for expected failures"
 | 
			
		||||
	$tool "$result" "$source.fail.pdf" tmp/key-pair.p12 "" \
 | 
			
		||||
		&& die "Double signing shouldn't succeed"
 | 
			
		||||
	$tool -r 1 "$source" "$source.fail.pdf" tmp/key-pair.p12 "" \
 | 
			
		||||
		&& die "Too low reservations shouldn't succeed"
 | 
			
		||||
 | 
			
		||||
	# Our generators do not use PDF versions higher than 1.5
 | 
			
		||||
	log "Testing $tool for version detection"
 | 
			
		||||
	grep -q "/Version /1.6" "$result" \
 | 
			
		||||
		|| die "Version detection seems to misbehave (no upgrade)"
 | 
			
		||||
 | 
			
		||||
	sed '1s/%PDF-1../%PDF-1.7/' "$source" > "$source.alt"
 | 
			
		||||
	$tool "$source.alt" "$result.alt" tmp/key-pair.p12 ""
 | 
			
		||||
	grep -q "/Version /1.6" "$result.alt" \
 | 
			
		||||
		&& die "Version detection seems to misbehave (downgraded)"
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
log "OK"
 | 
			
		||||
		Reference in New Issue
	
	Block a user