Bump version, update NEWS

Go: avoid non-deterministic output
The code has even turned out simpler.
2020-09-06 05:16:40 +02:00 · 2020-09-06 05:16:40 +02:00 · 2020-09-06 05:16:39 +02:00 · 2020-09-05 21:32:05 +02:00 · 2020-09-05 20:10:48 +02:00 · 2020-09-05 20:10:47 +02:00
11 changed files with 555 additions and 233 deletions
--- a/2
+++ b/2
@@ -1,4 +1,4 @@
-Copyright (c) 2017, Přemysl Janouch <p@janouch.name>
+Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>

 Permission to use, copy, modify, and/or distribute this software for any
 purpose with or without fee is hereby granted.
--- a/25
+++ b/25
@@ -1,3 +1,28 @@
+1.1.1 (2020-09-06)
+
+ * Fix a dysfunctional example in the manual
+
+ * Go: write the xref table in a deterministic order
+
+ * Add a trivial test suite, based on pdfsig from poppler-utils
+
+
+1.1 (2020-09-05)
+
+ * Make it possible to change the signature reservation with an option
+
+ * Return errors rather than mangle documents in some cases,
+   notably with pre-existing PDF forms
+
+ * Avoid downgrading the document's PDF version to 1.6
+
+ * A few fixes for PDF parsing and serialisation
+
+ * Add an instructive man page
+
+ * Add a native Go port of the utility, also usable as a library
+
+
 1.0 (2018-08-03)

 * Initial release
--- a/README.adoc
+++ b/README.adoc
@@ -1,27 +1,20 @@
 pdf-simple-sign
 ===============
-:compact-option:

 'pdf-simple-sign' is a simple PDF signer intended for documents produced by
-the Cairo library.  As such, it currently comes with some restrictions:
-
- * the document may not have any forms or signatures already, as they will be
-   overwitten
- * the document may not employ cross-reference streams, or must constitute
-   a hybrid-reference file at least
- * the document may not be newer than PDF 1.6 already, or it will get downgraded
-   to that version
- * the signature may take at most 4 kilobytes as a compile-time limit,
-   which should be enough space even for one intermediate certificate
-
-The signature is attached to the first page and has no appearance.
+the Cairo library, GNU troff, ImageMagick, or similar.

 I don't aim to extend the functionality any further.  The project is fairly
 self-contained and it should be easy to grasp and change to suit to your needs.

+Documentation
+-------------
+See the link:pdf-simple-sign.adoc[man page] for information about usage.
+The rest of this README will concern itself with externalities.
+
 Building
 --------
-Build dependencies: Meson, a C++11 compiler, pkg-config +
+Build dependencies: Meson, Asciidoctor, a C++11 compiler, pkg-config +
 Runtime dependencies: libcrypto (OpenSSL 1.1 API)

 $ git clone https://git.janouch.name/p/pdf-simple-sign.git
@@ -34,11 +27,6 @@ In addition to the C++ version, also included is a native Go port:

 $ go get janouch.name/pdf-simple-sign/cmd/pdf-simple-sign

-Usage
-----
-
- $ ./pdf-simple-sign document.pdf document.signed.pdf KeyAndCerts.p12 password
-
 Contributing and Support
 ------------------------
 Use https://git.janouch.name/p/pdf-simple-sign to report bugs, request features,
--- a/cmd/pdf-simple-sign/main.go
+++ b/cmd/pdf-simple-sign/main.go
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2018, Přemysl Janouch <p@janouch.name>
+// Copyright (c) 2018 - 2020, Přemysl Eric Janouch <p@janouch.name>
 //
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted.
@@ -20,8 +20,9 @@ import (
 	"flag"
 	"fmt"
 	"io/ioutil"
-	"janouch.name/pdf-simple-sign/pdf"
 	"os"
+
+	"janouch.name/pdf-simple-sign/pdf"
 )

 // #include <unistd.h>
@@ -39,10 +40,13 @@ func die(status int, format string, args ...interface{}) {
 }

 func usage() {
-	die(1, "Usage: %s [-h] INPUT-FILENAME OUTPUT-FILENAME "+
+	die(1, "Usage: %s [-h] [-r RESERVATION] INPUT-FILENAME OUTPUT-FILENAME "+
 		"PKCS12-PATH PKCS12-PASS", os.Args[0])
 }

+var reservation = flag.Int(
+	"r", 4096, "signature reservation as a number of bytes")
+
 func main() {
 	flag.Usage = usage
 	flag.Parse()
@@ -51,7 +55,7 @@ func main() {
 	}

 	inputPath, outputPath := flag.Arg(0), flag.Arg(1)
-	pdfDocument, err := ioutil.ReadFile(inputPath)
+	doc, err := ioutil.ReadFile(inputPath)
 	if err != nil {
 		die(1, "%s", err)
 	}
@@ -63,10 +67,10 @@ func main() {
 	if err != nil {
 		die(3, "%s", err)
 	}
-	if pdfDocument, err = pdf.Sign(pdfDocument, key, certs); err != nil {
-		die(2, "error: %s", err)
+	if doc, err = pdf.Sign(doc, key, certs, *reservation); err != nil {
+		die(4, "error: %s", err)
 	}
-	if err = ioutil.WriteFile(outputPath, pdfDocument, 0666); err != nil {
-		die(3, "%s", err)
+	if err = ioutil.WriteFile(outputPath, doc, 0666); err != nil {
+		die(5, "%s", err)
 	}
 }
--- a/go.mod
+++ b/go.mod
@@ -0,0 +1,8 @@
+module janouch.name/pdf-simple-sign
+
+go 1.14
+
+require (
+	go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1
+	golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de
+)
--- a/go.sum
+++ b/go.sum
@@ -0,0 +1,13 @@
+go.mozilla.org/pkcs7 v0.0.0-20181029144607-24857c352dd8 h1:W3oGFPlHBLgXdsbPVixWFMYsuPhm81/Qww3XAgBbn/0=
+go.mozilla.org/pkcs7 v0.0.0-20181029144607-24857c352dd8/go.mod h1:5fWP3IVYEMc04wC+lMJAfkmNmKAl2P1swVv8VS+URZ8=
+go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1 h1:A/5uWzF44DlIgdm/PQFwfMkW0JX+cIcQi/SwLAmZP5M=
+go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk=
+golang.org/x/crypto v0.0.0-20181127143415-eb0de9b17e85 h1:et7+NAX3lLIk5qUCTA9QelBjGE/NkhzYw/mhnr0s7nI=
+golang.org/x/crypto v0.0.0-20181127143415-eb0de9b17e85/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
+golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
+golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de h1:ikNHVSjEfnvz6sxdSPCaPt572qowuyMDMJLLm3Db3ig=
+golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
+golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
+golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
+golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
--- a/meson.build
+++ b/meson.build
@@ -1,5 +1,23 @@
-project('pdf-simple-sign', 'cpp', default_options : ['cpp_std=c++11'])
+project('pdf-simple-sign', 'cpp', default_options : ['cpp_std=c++11'],
+	version : '1.1.1')
+
+conf = configuration_data()
+conf.set('PROJECT_NAME', '"' + meson.project_name() + '"')
+conf.set('PROJECT_VERSION', '"' + meson.project_version() + '"')
+configure_file(output : 'config.h', configuration : conf)
+
 cryptodep = dependency('libcrypto')
 executable('pdf-simple-sign', 'pdf-simple-sign.cpp',
 	install : true,
 	dependencies : cryptodep)
+
+asciidoctor = find_program('asciidoctor')
+foreach page : ['pdf-simple-sign']
+	custom_target('manpage for ' + page,
+		input: page + '.adoc', output: page + '.1',
+		command: [asciidoctor, '-b', 'manpage',
+			'-a', 'release-version=' + meson.project_version(),
+			'@INPUT@', '-o', '@OUTPUT@'],
+		install: true,
+		install_dir: join_paths(get_option('mandir'), 'man1'))
+endforeach
--- a/pdf-simple-sign.adoc
+++ b/pdf-simple-sign.adoc
@@ -0,0 +1,80 @@
+pdf-simple-sign(1)
+==================
+:doctype: manpage
+:manmanual: pdf-simple-sign Manual
+:mansource: pdf-simple-sign {release-version}
+
+Name
+----
+pdf-simple-sign - a simple PDF signer
+
+Synopsis
+--------
+*pdf-simple-sign* [_OPTION_]... _INPUT.pdf_ _OUTPUT.pdf_ _KEY-PAIR.p12_ _PASSWORD_
+
+Description
+-----------
+'pdf-simple-sign' is a simple PDF signer intended for documents produced by
+the Cairo library, GNU troff, ImageMagick, or similar.  As such, it currently
+comes with some restrictions:
+
+ * the document may not have any forms or signatures already, as they would be
+   overwritten,
+ * the document may not employ cross-reference streams, or must constitute
+   a hybrid-reference file at least.
+
+The key and certificate pair is accepted in the PKCS#12 format.  The _PASSWORD_
+must be supplied on the command line, and may be empty if it is not needed.
+
+The signature is attached to the first page and has no appearance.
+
+If signature data don't fit within the default reservation of 4 kibibytes,
+you might need to adjust it using the *-r* option, or throw out any unnecessary
+intermediate certificates.
+
+Options
+-------
+*-r* _RESERVATION_, *--reservation*=_RESERVATION_::
+  Set aside _RESERVATION_ amount of bytes for the resulting signature.
+  Feel free to try a few values in a loop.  The program itself has no
+  conceptions about the data, so it can't make accurate predictions.
+
+*-h*, *--help*::
+  Display a help message and exit.
+
+*-V*, *--version*::
+  Output version information and exit.
+
+Examples
+--------
+Create a self-signed certificate, make a document containing the current date,
+sign it and verify the attached signature:
+
+ $ openssl req -newkey rsa:2048 -subj /CN=Test -nodes \
+   -keyout key.pem -x509 -addext keyUsage=digitalSignature \
+   -out cert.pem 2>/dev/null
+ $ openssl pkcs12 -inkey key.pem -in cert.pem \
+   -export -passout pass: -out key-pair.p12
+ $ date | groff -T pdf > test.pdf
+ $ pdf-simple-sign test.pdf test.signed.pdf key-pair.p12 ""
+ $ pdfsig test.signed.pdf
+ Digital Signature Info of: test.signed.pdf
+ Signature #1:
+   - Signer Certificate Common Name: Test
+   - Signer full Distinguished Name: CN=Test
+   - Signing Time: Sep 05 2020 19:41:22
+   - Signing Hash Algorithm: SHA-256
+   - Signature Type: adbe.pkcs7.detached
+   - Signed Ranges: [0 - 6522], [14716 - 15243]
+   - Total document signed
+   - Signature Validation: Signature is Valid.
+   - Certificate Validation: Certificate issuer isn't Trusted.
+
+Reporting bugs
+--------------
+Use https://git.janouch.name/p/pdf-simple-sign to report bugs, request features,
+or submit pull requests.
+
+See also
+--------
+*openssl*(1), *pdfsig*(1)
--- a/pdf-simple-sign.cpp
+++ b/pdf-simple-sign.cpp
@@ -2,7 +2,7 @@
 //
 // pdf-simple-sign: simple PDF signer
 //
-// Copyright (c) 2017, Přemysl Janouch <p@janouch.name>
+// Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
 //
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted.
@@ -37,9 +37,12 @@
 #include <openssl/x509v3.h>
 #include <openssl/pkcs12.h>

+#include "config.h"
+
 // -------------------------------------------------------------------------------------------------

 using uint = unsigned int;
+using ushort = unsigned short;

 static std::string concatenate(const std::vector<std::string>& v, const std::string& delim) {
  std::string res;
@@ -342,6 +345,9 @@ public:

  /// Build the cross-reference table and prepare a new trailer dictionary
  std::string initialize();
+  /// Try to extract the claimed PDF version as a positive decimal number, e.g. 17 for PDF 1.7.
+  /// Returns zero on failure.
+  int version(const pdf_object& root) const;
  /// Retrieve an object by its number and generation -- may return NIL or END with an error
  pdf_object get(uint n, uint generation) const;
  /// Allocate a new object number
@@ -512,7 +518,7 @@ std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entrie

 std::string pdf_updater::initialize() {
  // We only need to look for startxref roughly within the last kibibyte of the document
-  static std::regex haystack_re("[\\s\\S]*\\sstartxref\\s+(\\d+)\\s+%%EOF");
+  static std::regex haystack_re(R"([\s\S]*\sstartxref\s+(\d+)\s+%%EOF)");
  std::string haystack = document.substr(document.length() < 1024 ? 0 : document.length() - 1024);

  std::smatch m;
@@ -560,6 +566,25 @@ std::string pdf_updater::initialize() {
  return "";
 }

+int pdf_updater::version(const pdf_object& root) const {
+  auto version = root.dict.find("Version");
+  if (version != root.dict.end() && version->second.type == pdf_object::NAME) {
+    const auto& v = version->second.string;
+    if (isdigit(v[0]) && v[1] == '.' && isdigit(v[2]) && !v[3])
+      return (v[0] - '0') * 10 + (v[2] - '0');
+  }
+
+  // We only need to look for the comment roughly within the first kibibyte of the document
+  static std::regex version_re(R"((?:^|[\r\n])%(?:!PS-Adobe-\d\.\d )?PDF-(\d)\.(\d)[\r\n])");
+  std::string haystack = document.substr(0, 1024);
+
+  std::smatch m;
+  if (std::regex_search(haystack, m, version_re, std::regex_constants::match_default))
+    return std::stoul(m.str(1)) * 10 + std::stoul(m.str(2));
+
+  return 0;
+}
+
 pdf_object pdf_updater::get(uint n, uint generation) const {
  if (n >= xref_size)
    return {pdf_object::NIL};
@@ -806,12 +831,10 @@ error:
 /// streams from PDF 1.5, or at least constitutes a hybrid-reference file.  The results with
 /// PDF 2.0 (2017) are currently unknown as the standard costs money.
 ///
-/// Carelessly assumes that the version of the original document is at most PDF 1.6.
-///
 /// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
 /// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
 /// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
-static std::string pdf_sign(std::string& document) {
+static std::string pdf_sign(std::string& document, ushort reservation) {
  pdf_updater pdf(document);
  auto err = pdf.initialize();
  if (!err.empty())
@@ -835,7 +858,7 @@ static std::string pdf_sign(std::string& document) {
    pdf.document.append((byterange_len = 32 /* fine for a gigabyte */), ' ');
    pdf.document.append("\n   /Contents <");
    sign_off = pdf.document.size();
-    pdf.document.append((sign_len = 8192 /* certificate, digest, encrypted digest, ... */), '0');
+    pdf.document.append((sign_len = reservation * 2), '0');
    pdf.document.append("> >>");

    // We actually need to exclude the hexstring quotes from signing
@@ -869,15 +892,21 @@ static std::string pdf_sign(std::string& document) {
  if (page.type != pdf_object::DICT)
    return "invalid or unsupported page tree";

-  // XXX assuming this won't be an indirectly referenced array
  auto& annots = page.dict["Annots"];
-  if (annots.type != pdf_object::ARRAY)
+  if (annots.type != pdf_object::ARRAY) {
+    // TODO indirectly referenced arrays might not be that hard to support
+    if (annots.type != pdf_object::END)
+      return "unexpected Annots";
+
    annots = {pdf_object::ARRAY};
+  }
  annots.array.emplace_back(pdf_object::REFERENCE, sigfield_n, 0);
  pdf.update(page.n, [&]{ pdf.document += pdf_serialize(page); });

  // 8.6.1 Interactive Form Dictionary
-  // XXX assuming there are no forms already, overwriting everything
+  if (root.dict.count("AcroForm"))
+    return "the document already contains forms, they would be overwritten";
+
  root.dict["AcroForm"] = {std::map<std::string, pdf_object>{
    {"Fields", {std::vector<pdf_object>{
      {pdf_object::REFERENCE, sigfield_n, 0}
@@ -886,9 +915,9 @@ static std::string pdf_sign(std::string& document) {
  }};

  // Upgrade the document version for SHA-256 etc.
-  // XXX assuming that it's not newer than 1.6 already -- while Cairo can't currently use a newer
-  //   version that 1.5, it's not a bad idea to use cairo_pdf_surface_restrict_to_version()
-  root.dict["Version"] = {pdf_object::NAME, "1.6"};
+  if (pdf.version(root) < 16)
+    root.dict["Version"] = {pdf_object::NAME, "1.6"};
+
  pdf.update(root_ref->second.n, [&]{ pdf.document += pdf_serialize(root); });
  pdf.flush_updates();

@@ -919,25 +948,39 @@ static void die(int status, const char* format, ...) {
 int main(int argc, char* argv[]) {
  auto invocation_name = argv[0];
  auto usage = [=]{
-    die(1, "Usage: %s [-h] INPUT-FILENAME OUTPUT-FILENAME PKCS12-PATH PKCS12-PASS",
+    die(1, "Usage: %s [-h] [-r RESERVATION] INPUT-FILENAME OUTPUT-FILENAME PKCS12-PATH PKCS12-PASS",
            invocation_name);
  };

  static struct option opts[] = {
    {"help", no_argument, 0, 'h'},
+    {"version", no_argument, 0, 'V'},
+    {"reservation", required_argument, 0, 'r'},
    {nullptr, 0, 0, 0},
  };

+  // Reserved space in bytes for the certificate, digest, encrypted digest, ...
+  long reservation = 4096;
  while (1) {
    int option_index = 0;
    auto c = getopt_long(argc, const_cast<char* const*>(argv),
-                         "h", opts, &option_index);
+                         "hVr:", opts, &option_index);
    if (c == -1)
      break;

+    char* end = nullptr;
    switch (c) {
-    case 'h': usage(); break;
-    default: usage();
+    case 'r':
+      errno = 0, reservation = strtol(optarg, &end, 10);
+      if (errno || *end || reservation <= 0 || reservation > USHRT_MAX)
+        die(1, "%s: must be a positive number", optarg);
+      break;
+    case 'V':
+      die(0, "%s", PROJECT_NAME " " PROJECT_VERSION);
+      break;
+    case 'h':
+    default:
+      usage();
    }
  }

@@ -964,7 +1007,7 @@ int main(int argc, char* argv[]) {
    die(1, "%s: %s", input_path, strerror(errno));
  }

-  auto err = pdf_sign(pdf_document);
+  auto err = pdf_sign(pdf_document, ushort(reservation));
  if (!err.empty()) {
    die(2, "Error: %s", err.c_str());
  }
--- a/pdf/pdf.go
+++ b/pdf/pdf.go
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2018, Přemysl Janouch <p@janouch.name>
+// Copyright (c) 2018 - 2020, Přemysl Eric Janouch <p@janouch.name>
 //
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted.
@@ -32,6 +32,7 @@ import (
 	"crypto/ecdsa"
 	"crypto/rsa"
 	"crypto/x509"
+
 	"go.mozilla.org/pkcs7"
 	"golang.org/x/crypto/pkcs12"
 )
@@ -65,21 +66,14 @@ const (
 // Object is a PDF token/object thingy.  Objects may be composed either from
 // one or a sequence of tokens. The PDF Reference doesn't actually speak
 // of tokens.
-//
-// TODO(p): We probably want constructors like NewString, NewBool, NewArray, ...
 type Object struct {
 	Kind ObjectKind

-	// End (error message), Comment/Keyword/Name/String
-	String string
-	// Bool, Numeric
-	Number float64
-	// Array, Indirect
-	Array []Object
-	// Dict, in the future also Stream
-	Dict map[string]Object
-	// Indirect, Reference
-	N, Generation uint
+	String        string            // Comment/Keyword/Name/String
+	Number        float64           // Bool, Numeric
+	Array         []Object          // Array, Indirect
+	Dict          map[string]Object // Dict, in the future also Stream
+	N, Generation uint              // Indirect, Reference
 }

 // IsInteger checks if the PDF object is an integer number.
@@ -93,6 +87,48 @@ func (o *Object) IsUint() bool {
 	return o.IsInteger() && o.Number >= 0 && o.Number <= float64(^uint(0))
 }

+// A slew of constructors that will hopefully get all inlined.
+
+// New returns a new Object of the given kind, with default values.
+func New(kind ObjectKind) Object { return Object{Kind: kind} }
+
+func NewComment(c string) Object { return Object{Kind: Comment, String: c} }
+func NewKeyword(k string) Object { return Object{Kind: Keyword, String: k} }
+
+func NewBool(b bool) Object {
+	var b64 float64
+	if b {
+		b64 = 1
+	}
+	return Object{Kind: Bool, Number: b64}
+}
+
+func NewNumeric(n float64) Object { return Object{Kind: Numeric, Number: n} }
+func NewName(n string) Object     { return Object{Kind: Name, String: n} }
+func NewString(s string) Object   { return Object{Kind: String, String: s} }
+
+func NewArray(a []Object) Object {
+	return Object{Kind: Array, Array: a}
+}
+
+func NewDict(d map[string]Object) Object {
+	if d == nil {
+		d = make(map[string]Object)
+	}
+	return Object{Kind: Dict, Dict: d}
+}
+
+func NewIndirect(o Object, n, generation uint) Object {
+	return Object{Kind: Indirect, N: n, Generation: generation,
+		Array: []Object{o}}
+}
+
+func NewReference(n, generation uint) Object {
+	return Object{Kind: Reference, N: n, Generation: generation}
+}
+
+func newError(msg string) (Object, error) { return New(End), errors.New(msg) }
+
 // -----------------------------------------------------------------------------

 const (
@@ -106,21 +142,21 @@ const (
 // Lexer is a basic lexical analyser for the Portable Document Format,
 // giving limited error information.
 type Lexer struct {
-	p []byte // input buffer
+	P []byte // input buffer
 }

 func (lex *Lexer) read() (byte, bool) {
-	if len(lex.p) > 0 {
-		ch := lex.p[0]
-		lex.p = lex.p[1:]
+	if len(lex.P) > 0 {
+		ch := lex.P[0]
+		lex.P = lex.P[1:]
 		return ch, true
 	}
 	return 0, false
 }

 func (lex *Lexer) peek() (byte, bool) {
-	if len(lex.p) > 0 {
-		return lex.p[0], true
+	if len(lex.P) > 0 {
+		return lex.P[0], true
 	}
 	return 0, false
 }
@@ -165,13 +201,13 @@ func (lex *Lexer) unescape(ch byte) byte {
 	return ch
 }

-func (lex *Lexer) string() Object {
+func (lex *Lexer) string() (Object, error) {
 	var value []byte
 	parens := 1
 	for {
 		ch, ok := lex.read()
 		if !ok {
-			return Object{Kind: End, String: "unexpected end of string"}
+			return newError("unexpected end of string")
 		}
 		if lex.eatNewline(ch) {
 			ch = '\n'
@@ -183,7 +219,7 @@ func (lex *Lexer) string() Object {
 			}
 		} else if ch == '\\' {
 			if ch, ok = lex.read(); !ok {
-				return Object{Kind: End, String: "unexpected end of string"}
+				return newError("unexpected end of string")
 			} else if lex.eatNewline(ch) {
 				continue
 			} else {
@@ -192,19 +228,19 @@ func (lex *Lexer) string() Object {
 		}
 		value = append(value, ch)
 	}
-	return Object{Kind: String, String: string(value)}
+	return NewString(string(value)), nil
 }

-func (lex *Lexer) stringHex() Object {
+func (lex *Lexer) stringHex() (Object, error) {
 	var value, buf []byte
 	for {
 		ch, ok := lex.read()
 		if !ok {
-			return Object{Kind: End, String: "unexpected end of hex string"}
+			return newError("unexpected end of hex string")
 		} else if ch == '>' {
 			break
 		} else if strings.IndexByte(hexAlphabet, ch) < 0 {
-			return Object{Kind: End, String: "invalid hex string"}
+			return newError("invalid hex string")
 		} else if buf = append(buf, ch); len(buf) == 2 {
 			u, _ := strconv.ParseUint(string(buf), 16, 8)
 			value = append(value, byte(u))
@@ -215,10 +251,10 @@ func (lex *Lexer) stringHex() Object {
 		u, _ := strconv.ParseUint(string(buf)+"0", 16, 8)
 		value = append(value, byte(u))
 	}
-	return Object{Kind: String, String: string(value)}
+	return NewString(string(value)), nil
 }

-func (lex *Lexer) name() Object {
+func (lex *Lexer) name() (Object, error) {
 	var value []byte
 	for {
 		ch, ok := lex.peek()
@@ -237,7 +273,7 @@ func (lex *Lexer) name() Object {
 				lex.read()
 			}
 			if len(hexa) != 2 {
-				return Object{Kind: End, String: "invalid name hexa escape"}
+				return newError("invalid name hexa escape")
 			}
 			u, _ := strconv.ParseUint(string(value), 16, 8)
 			ch = byte(u)
@@ -245,12 +281,12 @@ func (lex *Lexer) name() Object {
 		value = append(value, ch)
 	}
 	if len(value) == 0 {
-		return Object{Kind: End, String: "unexpected end of name"}
+		return newError("unexpected end of name")
 	}
-	return Object{Kind: Name, String: string(value)}
+	return NewName(string(value)), nil
 }

-func (lex *Lexer) comment() Object {
+func (lex *Lexer) comment() (Object, error) {
 	var value []byte
 	for {
 		ch, ok := lex.peek()
@@ -260,11 +296,11 @@ func (lex *Lexer) comment() Object {
 		value = append(value, ch)
 		lex.read()
 	}
-	return Object{Kind: Comment, String: string(value)}
+	return NewComment(string(value)), nil
 }

 // XXX: Maybe invalid numbers should rather be interpreted as keywords.
-func (lex *Lexer) number() Object {
+func (lex *Lexer) number() (Object, error) {
 	var value []byte
 	ch, ok := lex.peek()
 	if ch == '-' {
@@ -287,16 +323,16 @@ func (lex *Lexer) number() Object {
 		lex.read()
 	}
 	if !digits {
-		return Object{Kind: End, String: "invalid number"}
+		return newError("invalid number")
 	}
 	f, _ := strconv.ParseFloat(string(value), 64)
-	return Object{Kind: Numeric, Number: f}
+	return NewNumeric(f), nil
 }

-func (lex *Lexer) Next() Object {
+func (lex *Lexer) Next() (Object, error) {
 	ch, ok := lex.peek()
 	if !ok {
-		return Object{Kind: End}
+		return New(End), nil
 	}
 	if strings.IndexByte("-0123456789.", ch) >= 0 {
 		return lex.number()
@@ -315,13 +351,13 @@ func (lex *Lexer) Next() Object {
 	switch v := string(value); v {
 	case "":
 	case "null":
-		return Object{Kind: Nil}
+		return New(Nil), nil
 	case "true":
-		return Object{Kind: Bool, Number: 1}
+		return NewBool(true), nil
 	case "false":
-		return Object{Kind: Bool, Number: 0}
+		return NewBool(false), nil
 	default:
-		return Object{Kind: Keyword, String: v}
+		return NewKeyword(v), nil
 	}

 	switch ch, _ := lex.read(); ch {
@@ -332,29 +368,29 @@ func (lex *Lexer) Next() Object {
 	case '(':
 		return lex.string()
 	case '[':
-		return Object{Kind: BArray}
+		return New(BArray), nil
 	case ']':
-		return Object{Kind: EArray}
+		return New(EArray), nil
 	case '<':
 		if ch, _ := lex.peek(); ch == '<' {
 			lex.read()
-			return Object{Kind: BDict}
+			return New(BDict), nil
 		}
 		return lex.stringHex()
 	case '>':
 		if ch, _ := lex.peek(); ch == '>' {
 			lex.read()
-			return Object{Kind: EDict}
+			return New(EDict), nil
 		}
-		return Object{Kind: End, String: "unexpected '>'"}
+		return newError("unexpected '>'")
 	default:
 		if lex.eatNewline(ch) {
-			return Object{Kind: NL}
+			return New(NL), nil
 		}
 		if strings.IndexByte(whitespace, ch) >= 0 {
 			return lex.Next()
 		}
-		return Object{Kind: End, String: "unexpected input"}
+		return newError("unexpected input")
 	}
 }

@@ -461,10 +497,10 @@ type Updater struct {
 	Trailer map[string]Object
 }

-func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) Object {
+func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) (Object, error) {
 	lenStack := len(*stack)
 	if lenStack < 2 {
-		return Object{Kind: End, String: "missing object ID pair"}
+		return newError("missing object ID pair")
 	}

 	n := (*stack)[lenStack-2]
@@ -472,28 +508,30 @@ func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) Object {
 	*stack = (*stack)[:lenStack-2]

 	if !g.IsUint() || !n.IsUint() {
-		return Object{Kind: End, String: "invalid object ID pair"}
+		return newError("invalid object ID pair")
 	}

-	obj := Object{
-		Kind: Indirect, N: uint(n.Number), Generation: uint(g.Number)}
+	var inner []Object
 	for {
-		object := u.parse(lex, &obj.Array)
+		object, _ := u.parse(lex, &inner)
 		if object.Kind == End {
-			return Object{Kind: End, String: "object doesn't end"}
+			return newError("object doesn't end")
 		}
 		if object.Kind == Keyword && object.String == "endobj" {
 			break
 		}
-		obj.Array = append(obj.Array, object)
+		inner = append(inner, object)
 	}
-	return obj
+	if len(inner) != 1 {
+		return newError("indirect objects must contain exactly one object")
+	}
+	return NewIndirect(inner[0], uint(n.Number), uint(g.Number)), nil
 }

-func (u *Updater) parseR(stack *[]Object) Object {
+func (u *Updater) parseR(stack *[]Object) (Object, error) {
 	lenStack := len(*stack)
 	if lenStack < 2 {
-		return Object{Kind: End, String: "missing reference ID pair"}
+		return newError("missing reference ID pair")
 	}

 	n := (*stack)[lenStack-2]
@@ -501,15 +539,16 @@ func (u *Updater) parseR(stack *[]Object) Object {
 	*stack = (*stack)[:lenStack-2]

 	if !g.IsUint() || !n.IsUint() {
-		return Object{Kind: End, String: "invalid reference ID pair"}
+		return newError("invalid reference ID pair")
 	}
-	return Object{
-		Kind: Reference, N: uint(n.Number), Generation: uint(g.Number)}
+	return NewReference(uint(n.Number), uint(g.Number)), nil
 }

-/// parse reads an object at the lexer's position. Not a strict parser.
-func (u *Updater) parse(lex *Lexer, stack *[]Object) Object {
-	switch token := lex.Next(); token.Kind {
+// parse reads an object at the lexer's position. Not a strict parser.
+//
+// TODO(p): We should fix all uses of this not to eat the error.
+func (u *Updater) parse(lex *Lexer, stack *[]Object) (Object, error) {
+	switch token, err := lex.Next(); token.Kind {
 	case NL, Comment:
 		// These are not important to parsing,
 		// not even for this procedure's needs.
@@ -517,22 +556,22 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) Object {
 	case BArray:
 		var array []Object
 		for {
-			object := u.parse(lex, &array)
+			object, _ := u.parse(lex, &array)
 			if object.Kind == End {
-				return Object{Kind: End, String: "array doesn't end"}
+				return newError("array doesn't end")
 			}
 			if object.Kind == EArray {
 				break
 			}
 			array = append(array, object)
 		}
-		return Object{Kind: Array, Array: array}
+		return NewArray(array), nil
 	case BDict:
 		var array []Object
 		for {
-			object := u.parse(lex, &array)
+			object, _ := u.parse(lex, &array)
 			if object.Kind == End {
-				return Object{Kind: End, String: "dictionary doesn't end"}
+				return newError("dictionary doesn't end")
 			}
 			if object.Kind == EDict {
 				break
@@ -540,17 +579,16 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) Object {
 			array = append(array, object)
 		}
 		if len(array)%2 != 0 {
-			return Object{Kind: End, String: "unbalanced dictionary"}
+			return newError("unbalanced dictionary")
 		}
 		dict := make(map[string]Object)
 		for i := 0; i < len(array); i += 2 {
 			if array[i].Kind != Name {
-				return Object{
-					Kind: End, String: "invalid dictionary key type"}
+				return newError("invalid dictionary key type")
 			}
 			dict[array[i].String] = array[i+1]
 		}
-		return Object{Kind: Dict, Dict: dict}
+		return NewDict(dict), nil
 	case Keyword:
 		// Appears in the document body, typically needs
 		// to access the cross-reference table.
@@ -560,7 +598,7 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) Object {
 		// streams can use the Object.String member.
 		switch token.String {
 		case "stream":
-			return Object{Kind: End, String: "streams are not supported yet"}
+			return newError("streams are not supported yet")
 		case "obj":
 			return u.parseIndirect(lex, stack)
 		case "R":
@@ -568,18 +606,18 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) Object {
 		}
 		fallthrough
 	default:
-		return token
+		return token, err
 	}
 }

 func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error {
 	var throwawayStack []Object
-	if keyword := u.parse(lex,
+	if keyword, _ := u.parse(lex,
 		&throwawayStack); keyword.Kind != Keyword || keyword.String != "xref" {
 		return errors.New("invalid xref table")
 	}
 	for {
-		object := u.parse(lex, &throwawayStack)
+		object, _ := u.parse(lex, &throwawayStack)
 		if object.Kind == End {
 			return errors.New("unexpected EOF while looking for the trailer")
 		}
@@ -587,16 +625,16 @@ func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error {
 			break
 		}

-		second := u.parse(lex, &throwawayStack)
+		second, _ := u.parse(lex, &throwawayStack)
 		if !object.IsUint() || !second.IsUint() {
 			return errors.New("invalid xref section header")
 		}

 		start, count := uint(object.Number), uint(second.Number)
 		for i := uint(0); i < count; i++ {
-			off := u.parse(lex, &throwawayStack)
-			gen := u.parse(lex, &throwawayStack)
-			key := u.parse(lex, &throwawayStack)
+			off, _ := u.parse(lex, &throwawayStack)
+			gen, _ := u.parse(lex, &throwawayStack)
+			key, _ := u.parse(lex, &throwawayStack)
 			if !off.IsInteger() || off.Number < 0 ||
 				off.Number > float64(len(u.Document)) ||
 				!gen.IsInteger() || gen.Number < 0 || gen.Number > 65535 ||
@@ -632,11 +670,12 @@ func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error {

 // -----------------------------------------------------------------------------

-var haystackRE = regexp.MustCompile(`(?s:.*)\sstartxref\s+(\d+)\s+%%EOF`)
+var trailerRE = regexp.MustCompile(`(?s:.*)\sstartxref\s+(\d+)\s+%%EOF`)

-// Initialize builds the cross-reference table and prepares
-// a new trailer dictionary.
-func (u *Updater) Initialize() error {
+// NewUpdater initializes an Updater, building the cross-reference table and
+// preparing a new trailer dictionary.
+func NewUpdater(document []byte) (*Updater, error) {
+	u := &Updater{Document: document}
 	u.updated = make(map[uint]struct{})

 	// We only need to look for startxref roughly within
@@ -646,33 +685,33 @@ func (u *Updater) Initialize() error {
 		haystack = haystack[len(haystack)-1024:]
 	}

-	m := haystackRE.FindSubmatch(haystack)
+	m := trailerRE.FindSubmatch(haystack)
 	if m == nil {
-		return errors.New("cannot find startxref")
+		return nil, errors.New("cannot find startxref")
 	}

 	xrefOffset, _ := strconv.ParseInt(string(m[1]), 10, 64)
 	lastXrefOffset := xrefOffset
-	loadedXrefs := map[int64]struct{}{}
-	loadedEntries := map[uint]struct{}{}
+	loadedXrefs := make(map[int64]struct{})
+	loadedEntries := make(map[uint]struct{})

 	var throwawayStack []Object
 	for {
 		if _, ok := loadedXrefs[xrefOffset]; ok {
-			return errors.New("circular xref offsets")
+			return nil, errors.New("circular xref offsets")
 		}
 		if xrefOffset >= int64(len(u.Document)) {
-			return errors.New("invalid xref offset")
+			return nil, errors.New("invalid xref offset")
 		}

 		lex := Lexer{u.Document[xrefOffset:]}
 		if err := u.loadXref(&lex, loadedEntries); err != nil {
-			return err
+			return nil, err
 		}

-		trailer := u.parse(&lex, &throwawayStack)
+		trailer, _ := u.parse(&lex, &throwawayStack)
 		if trailer.Kind != Dict {
-			return errors.New("invalid trailer dictionary")
+			return nil, errors.New("invalid trailer dictionary")
 		}
 		if len(loadedXrefs) == 0 {
 			u.Trailer = trailer.Dict
@@ -685,48 +724,74 @@ func (u *Updater) Initialize() error {
 		}
 		// FIXME: We don't check for size_t over or underflow.
 		if !prevOffset.IsInteger() {
-			return errors.New("invalid Prev offset")
+			return nil, errors.New("invalid Prev offset")
 		}
 		xrefOffset = int64(prevOffset.Number)
 	}

-	u.Trailer["Prev"] = Object{
-		Kind: Numeric, Number: float64(lastXrefOffset)}
+	u.Trailer["Prev"] = NewNumeric(float64(lastXrefOffset))

 	lastSize, ok := u.Trailer["Size"]
 	if !ok || !lastSize.IsInteger() || lastSize.Number <= 0 {
-		return errors.New("invalid or missing cross-reference table Size")
+		return nil, errors.New("invalid or missing cross-reference table Size")
 	}
 	u.xrefSize = uint(lastSize.Number)
-	return nil
+	return u, nil
+}
+
+var versionRE = regexp.MustCompile(
+	`(?:^|[\r\n])%(?:!PS-Adobe-\d\.\d )?PDF-(\d)\.(\d)[\r\n]`)
+
+// Version extracts the claimed PDF version as a positive decimal number,
+// e.g. 17 for PDF 1.7. Returns zero on failure.
+func (u *Updater) Version(root *Object) int {
+	if version, ok := root.Dict["Version"]; ok && version.Kind == Name {
+		if v := version.String; len(v) == 3 && v[1] == '.' &&
+			v[0] >= '0' && v[0] <= '9' && v[2] >= '0' && v[2] <= '9' {
+			return int(v[0]-'0')*10 + int(v[2]-'0')
+		}
+	}
+
+	// We only need to look for the comment roughly within
+	// the first kibibyte of the document.
+	haystack := u.Document
+	if len(haystack) > 1024 {
+		haystack = haystack[:1024]
+	}
+	if m := versionRE.FindSubmatch(haystack); m != nil {
+		return int(m[1][0]-'0')*10 + int(m[2][0]-'0')
+	}
+	return 0
 }

 // Get retrieves an object by its number and generation--may return
 // Nil or End with an error.
-func (u *Updater) Get(n, generation uint) Object {
+//
+// TODO(p): We should fix all uses of this not to eat the error.
+func (u *Updater) Get(n, generation uint) (Object, error) {
 	if n >= u.xrefSize {
-		return Object{Kind: Nil}
+		return New(Nil), nil
 	}

 	ref := u.xref[n]
 	if !ref.nonfree || ref.generation != generation ||
 		ref.offset >= int64(len(u.Document)) {
-		return Object{Kind: Nil}
+		return New(Nil), nil
 	}

 	lex := Lexer{u.Document[ref.offset:]}
 	var stack []Object
 	for {
-		object := u.parse(&lex, &stack)
+		object, err := u.parse(&lex, &stack)
 		if object.Kind == End {
-			return object
+			return object, err
 		}
 		if object.Kind != Indirect {
 			stack = append(stack, object)
 		} else if object.N != n || object.Generation != generation {
-			return Object{Kind: End, String: "object mismatch"}
+			return newError("object mismatch")
 		} else {
-			return object.Array[0]
+			return object.Array[0], nil
 		}
 	}
 }
@@ -757,7 +822,8 @@ type BytesWriter interface {
 	WriteString(s string) (n int, err error)
 }

-// Update appends an updated object to the end of the document.
+// Update appends an updated object to the end of the document. The fill
+// callback must write exactly one PDF object.
 func (u *Updater) Update(n uint, fill func(buf BytesWriter)) {
 	oldRef := u.xref[n]
 	u.updated[n] = struct{}{}
@@ -787,30 +853,19 @@ func (u *Updater) FlushUpdates() {
 		return updated[i] < updated[j]
 	})

-	groups := make(map[uint]uint)
-	for i := 0; i < len(updated); {
-		start, count := updated[i], uint(1)
-		for i++; i != len(updated) && updated[i] == start+count; i++ {
-			count++
-		}
-		groups[start] = count
-	}
-
-	// Taking literally "Each cross-reference section begins with a line
-	// containing the keyword xref. Following this line are one or more
-	// cross-reference subsections." from 3.4.3 in PDF Reference.
-	if len(groups) == 0 {
-		groups[0] = 0
-	}
-
 	buf := bytes.NewBuffer(u.Document)
 	startXref := buf.Len() + 1
 	buf.WriteString("\nxref\n")

-	for start, count := range groups {
-		fmt.Fprintf(buf, "%d %d\n", start, count)
-		for i := uint(0); i < count; i++ {
-			ref := u.xref[start+uint(i)]
+	for i := 0; i < len(updated); {
+		start, stop := updated[i], updated[i]+1
+		for i++; i < len(updated) && updated[i] == stop; i++ {
+			stop++
+		}
+
+		fmt.Fprintf(buf, "%d %d\n", start, stop-start)
+		for ; start < stop; start++ {
+			ref := u.xref[start]
 			if ref.nonfree {
 				fmt.Fprintf(buf, "%010d %05d n \n", ref.offset, ref.generation)
 			} else {
@@ -819,8 +874,15 @@ func (u *Updater) FlushUpdates() {
 		}
 	}

-	u.Trailer["Size"] = Object{Kind: Numeric, Number: float64(u.xrefSize)}
-	trailer := Object{Kind: Dict, Dict: u.Trailer}
+	// Taking literally "Each cross-reference section begins with a line
+	// containing the keyword xref. Following this line are one or more
+	// cross-reference subsections." from 3.4.3 in PDF Reference.
+	if len(updated) == 0 {
+		fmt.Fprintf(buf, "%d %d\n", 0, 0)
+	}
+
+	u.Trailer["Size"] = NewNumeric(float64(u.xrefSize))
+	trailer := NewDict(u.Trailer)

 	fmt.Fprintf(buf, "trailer\n%s\nstartxref\n%d\n%%%%EOF\n",
 		trailer.Serialize(), startXref)
@@ -829,8 +891,8 @@ func (u *Updater) FlushUpdates() {

 // -----------------------------------------------------------------------------

-// PdfDate makes a PDF object representing the given point in time.
-func PdfDate(ts time.Time) Object {
+// NewDate makes a PDF object representing the given point in time.
+func NewDate(ts time.Time) Object {
 	buf := ts.AppendFormat(nil, "D:20060102150405")
 	// "Z07'00'" doesn't work, we need to do some of it manually.
 	if _, offset := ts.Zone(); offset != 0 {
@@ -839,14 +901,15 @@ func PdfDate(ts time.Time) Object {
 	} else {
 		buf = append(buf, 'Z')
 	}
-	return Object{Kind: String, String: string(buf)}
+	return NewString(string(buf))
 }

-// PdfGetFirstPage retrieves the first page of the document or a Nil object.
-func PdfGetFirstPage(pdf *Updater, nodeN, nodeGeneration uint) Object {
-	obj := pdf.Get(nodeN, nodeGeneration)
+// GetFirstPage retrieves the first page of the given page (sub)tree reference,
+// or returns a Nil object if unsuccessful.
+func (u *Updater) GetFirstPage(nodeN, nodeGeneration uint) Object {
+	obj, _ := u.Get(nodeN, nodeGeneration)
 	if obj.Kind != Dict {
-		return Object{Kind: Nil}
+		return New(Nil)
 	}

 	// Out of convenience; these aren't filled normally.
@@ -854,11 +917,11 @@ func PdfGetFirstPage(pdf *Updater, nodeN, nodeGeneration uint) Object {
 	obj.Generation = nodeGeneration

 	if typ, ok := obj.Dict["Type"]; !ok || typ.Kind != Name {
-		return Object{Kind: Nil}
+		return New(Nil)
 	} else if typ.String == "Page" {
 		return obj
 	} else if typ.String != "Pages" {
-		return Object{Kind: Nil}
+		return New(Nil)
 	}

 	// XXX: Technically speaking, this may be an indirect reference.
@@ -867,11 +930,11 @@ func PdfGetFirstPage(pdf *Updater, nodeN, nodeGeneration uint) Object {
 	kids, ok := obj.Dict["Kids"]
 	if !ok || kids.Kind != Array || len(kids.Array) == 0 ||
 		kids.Array[0].Kind != Reference {
-		return Object{Kind: Nil}
+		return New(Nil)
 	}

 	// XXX: Nothing prevents us from recursing in an evil circular graph.
-	return PdfGetFirstPage(pdf, kids.Array[0].N, kids.Array[0].Generation)
+	return u.GetFirstPage(kids.Array[0].N, kids.Array[0].Generation)
 }

 // -----------------------------------------------------------------------------
@@ -968,8 +1031,9 @@ func PKCS12Parse(p12 []byte, password string) (

 // FillInSignature signs PDF contents and writes the signature into the given
 // window that has been reserved for this specific purpose.
+// This is a very low-level function.
 func FillInSignature(document []byte, signOff, signLen int,
-	key crypto.PublicKey, certs []*x509.Certificate) error {
+	key crypto.PrivateKey, certs []*x509.Certificate) error {
 	if signOff < 0 || signOff > len(document) ||
 		signLen < 2 || signOff+signLen > len(document) {
 		return errors.New("invalid signing window")
@@ -1039,23 +1103,24 @@ func FillInSignature(document []byte, signOff, signLen int,
 	return nil
 }

+// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
+// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
+// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
+
 // Sign signs the given document, growing and returning the passed-in slice.
+// There must be at least one certificate, matching the private key.
+// The certificates must form a chain.
+//
+// A good default for the reservation is around 4096 (the value is in bytes).
 //
 // The presumption here is that the document is valid and that it doesn't
 // employ cross-reference streams from PDF 1.5, or at least constitutes
 // a hybrid-reference file. The results with PDF 2.0 (2017) are currently
 // unknown as the standard costs money.
-//
-// Carelessly assumes that the version of the original document is at most
-// PDF 1.6.
-//
-// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
-// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
-// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
-func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
-	[]byte, error) {
-	pdf := &Updater{Document: document}
-	if err := pdf.Initialize(); err != nil {
+func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate,
+	reservation int) ([]byte, error) {
+	pdf, err := NewUpdater(document)
+	if err != nil {
 		return nil, err
 	}

@@ -1063,7 +1128,7 @@ func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
 	if !ok || rootRef.Kind != Reference {
 		return nil, errors.New("trailer does not contain a reference to Root")
 	}
-	root := pdf.Get(rootRef.N, rootRef.Generation)
+	root, _ := pdf.Get(rootRef.N, rootRef.Generation)
 	if root.Kind != Dict {
 		return nil, errors.New("invalid Root dictionary reference")
 	}
@@ -1074,7 +1139,7 @@ func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
 	pdf.Update(sigdictN, func(buf BytesWriter) {
 		// The timestamp is important for Adobe Acrobat Reader DC.
 		// The ideal would be to use RFC 3161.
-		now := PdfDate(time.Now())
+		now := NewDate(time.Now())
 		buf.WriteString("<< /Type/Sig /Filter/Adobe.PPKLite" +
 			" /SubFilter/adbe.pkcs7.detached\n" +
 			"   /M" + now.Serialize() + " /ByteRange ")
@@ -1085,7 +1150,7 @@ func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
 		buf.WriteString("\n   /Contents <")

 		signOff = buf.Len()
-		signLen = 8192 // cert, digest, encripted digest, ...
+		signLen = reservation * 2 // cert, digest, encrypted digest, ...
 		buf.Write(bytes.Repeat([]byte{'0'}, signLen))
 		buf.WriteString("> >>")

@@ -1094,22 +1159,19 @@ func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
 		signLen += 2
 	})

-	sigfield := Object{Kind: Dict, Dict: map[string]Object{
+	sigfield := NewDict(map[string]Object{
 		// 8.6.3 Field Types - Signature Fields
-		"FT": {Kind: Name, String: "Sig"},
-		"V":  {Kind: Reference, N: sigdictN, Generation: 0},
+		"FT": NewName("Sig"),
+		"V":  NewReference(sigdictN, 0),
 		// 8.4.5 Annotations Types - Widget Annotations
 		// We can merge the Signature Annotation and omit Kids here.
-		"Subtype": {Kind: Name, String: "Widget"},
-		"F":       {Kind: Numeric, Number: 2 /* Hidden */},
-		"T":       {Kind: String, String: "Signature1"},
-		"Rect": {Kind: Array, Array: []Object{
-			{Kind: Numeric, Number: 0},
-			{Kind: Numeric, Number: 0},
-			{Kind: Numeric, Number: 0},
-			{Kind: Numeric, Number: 0},
-		}},
-	}}
+		"Subtype": NewName("Widget"),
+		"F":       NewNumeric(2 /* Hidden */),
+		"T":       NewString("Signature1"),
+		"Rect": NewArray([]Object{
+			NewNumeric(0), NewNumeric(0), NewNumeric(0), NewNumeric(0),
+		}),
+	})

 	sigfieldN := pdf.Allocate()
 	pdf.Update(sigfieldN, func(buf BytesWriter) {
@@ -1120,18 +1182,21 @@ func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
 	if !ok || pagesRef.Kind != Reference {
 		return nil, errors.New("invalid Pages reference")
 	}
-	page := PdfGetFirstPage(pdf, pagesRef.N, pagesRef.Generation)
+	page := pdf.GetFirstPage(pagesRef.N, pagesRef.Generation)
 	if page.Kind != Dict {
 		return nil, errors.New("invalid or unsupported page tree")
 	}

-	// XXX: Assuming this won't be an indirectly referenced array.
 	annots := page.Dict["Annots"]
 	if annots.Kind != Array {
-		annots = Object{Kind: Array}
+		// TODO(p): Indirectly referenced arrays might not be
+		// that hard to support.
+		if annots.Kind != End {
+			return nil, errors.New("unexpected Annots")
+		}
+		annots = NewArray(nil)
 	}
-	annots.Array = append(annots.Array, Object{
-		Kind: Reference, N: sigfieldN, Generation: 0})
+	annots.Array = append(annots.Array, NewReference(sigfieldN, 0))

 	page.Dict["Annots"] = annots
 	pdf.Update(page.N, func(buf BytesWriter) {
@@ -1139,20 +1204,21 @@ func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
 	})

 	// 8.6.1 Interactive Form Dictionary
-	// XXX: Assuming there are no forms already, overwriting everything.
-	root.Dict["AcroForm"] = Object{Kind: Dict, Dict: map[string]Object{
-		"Fields": {Kind: Array, Array: []Object{
-			{Kind: Reference, N: sigfieldN, Generation: 0},
-		}},
-		"SigFlags": {Kind: Numeric,
-			Number: 3 /* SignaturesExist | AppendOnly */},
-	}}
+	if _, ok := root.Dict["AcroForm"]; ok {
+		return nil, errors.New("the document already contains forms, " +
+			"they would be overwritten")
+	}
+
+	root.Dict["AcroForm"] = NewDict(map[string]Object{
+		"Fields":   NewArray([]Object{NewReference(sigfieldN, 0)}),
+		"SigFlags": NewNumeric(3 /* SignaturesExist | AppendOnly */),
+	})

 	// Upgrade the document version for SHA-256 etc.
-	// XXX: Assuming that it's not newer than 1.6 already--while Cairo can't
-	// currently use a newer version that 1.5, it's not a bad idea to use
-	// cairo_pdf_surface_restrict_to_version().
-	root.Dict["Version"] = Object{Kind: Name, String: "1.6"}
+	if pdf.Version(&root) < 16 {
+		root.Dict["Version"] = NewName("1.6")
+	}
+
 	pdf.Update(rootRef.N, func(buf BytesWriter) {
 		buf.WriteString(root.Serialize())
 	})
--- a/test.sh
+++ b/test.sh
@@ -0,0 +1,77 @@
+#!/bin/sh -e
+# Test basic functionality of both versions
+# Usage: ./test.sh builddir/pdf-simple-sign cmd/pdf-simple-sign/pdf-simple-sign
+
+log() { echo "`tput sitm`-- $1`tput sgr0`"; }
+die() { echo "`tput bold`-- $1`tput sgr0`"; exit 1; }
+
+# Get rid of old test files
+rm -rf tmp
+mkdir tmp
+
+# Create documents in various tools
+log "Creating source documents"
+inkscape --pipe --export-filename=tmp/cairo.pdf <<'EOF' 2>/dev/null || :
+<svg xmlns="http://www.w3.org/2000/svg"><text x="5" y="10">Hello</text></svg>
+EOF
+
+date | tee tmp/lowriter.txt | groff -T pdf > tmp/groff.pdf || :
+lowriter --convert-to pdf tmp/lowriter.txt --outdir tmp >/dev/null || :
+convert rose: tmp/imagemagick.pdf || :
+
+# Create a root CA certificate pair
+log "Creating certificates"
+openssl req -newkey rsa:2048 -subj "/CN=Test CA" -nodes \
+	-keyout tmp/ca.key.pem -x509 -out tmp/ca.cert.pem 2>/dev/null
+
+# Create a private NSS database and insert our test CA there
+rm -rf tmp/nssdir
+mkdir tmp/nssdir
+certutil -N --empty-password -d sql:tmp/nssdir
+certutil -d sql:tmp/nssdir -A -n root -t ,C, -a -i tmp/ca.cert.pem
+
+# Create a leaf certificate pair
+cat > tmp/cert.cfg <<'EOF'
+[smime]
+basicConstraints = CA:FALSE
+keyUsage = digitalSignature
+extendedKeyUsage = emailProtection
+nsCertType = email
+EOF
+
+openssl req -newkey rsa:2048 -subj "/CN=Test Leaf" -nodes \
+	-keyout tmp/key.pem -out tmp/cert.csr 2>/dev/null
+openssl x509 -req -in tmp/cert.csr -out tmp/cert.pem \
+	-CA tmp/ca.cert.pem -CAkey tmp/ca.key.pem -set_serial 1 \
+	-extensions smime -extfile tmp/cert.cfg 2>/dev/null
+openssl verify -CAfile tmp/ca.cert.pem tmp/cert.pem >/dev/null
+openssl pkcs12 -inkey tmp/key.pem -in tmp/cert.pem \
+	-export -passout pass: -out tmp/key-pair.p12
+
+for tool in "$@"; do
+	rm -f tmp/*.signed.pdf
+	for source in tmp/*.pdf; do
+		log "Testing $tool with $source"
+		result=${source%.pdf}.signed.pdf
+		$tool "$source" "$result" tmp/key-pair.p12 ""
+		pdfsig -nssdir sql:tmp/nssdir "$result" | grep Validation
+	done
+
+	log "Testing $tool for expected failures"
+	$tool "$result" "$source.fail.pdf" tmp/key-pair.p12 "" \
+		&& die "Double signing shouldn't succeed"
+	$tool -r 1 "$source" "$source.fail.pdf" tmp/key-pair.p12 "" \
+		&& die "Too low reservations shouldn't succeed"
+
+	# Our generators do not use PDF versions higher than 1.5
+	log "Testing $tool for version detection"
+	grep -q "/Version /1.6" "$result" \
+		|| die "Version detection seems to misbehave (no upgrade)"
+
+	sed '1s/%PDF-1../%PDF-1.7/' "$source" > "$source.alt"
+	$tool "$source.alt" "$result.alt" tmp/key-pair.p12 ""
+	grep -q "/Version /1.6" "$result.alt" \
+		&& die "Version detection seems to misbehave (downgraded)"
+done
+
+log "OK"
Author	SHA1	Message	Date
Přemysl Eric Janouch	a5176b5bbb	Bump version, update NEWS	2020-09-06 05:16:40 +02:00
Přemysl Eric Janouch	af6a937033	Go: avoid non-deterministic output The code has even turned out simpler.	2020-09-06 05:16:40 +02:00
Přemysl Eric Janouch	8913f8ba9c	Add a test script to verify basic function	2020-09-06 05:16:39 +02:00
Přemysl Eric Janouch	524eea9b2f	Manual: fix the example Things managed to work once but for rather arbitrary reasons.	2020-09-05 21:32:05 +02:00
Přemysl Eric Janouch	3ce08d33f6	Bump version, update NEWS	2020-09-05 20:10:48 +02:00
Přemysl Eric Janouch	a75f990565	Add an instructive man page	2020-09-05 20:10:47 +02:00
Přemysl Eric Janouch	46fa50749f	Add a --version option And fix that --reservation was missing from the optstring.	2020-09-05 20:08:41 +02:00
Přemysl Eric Janouch	796a9640d3	Make it possible to change the signature reservation	2020-09-04 18:33:12 +02:00
Přemysl Eric Janouch	2d08100b58	Avoid downgrading the document's PDF version	2020-09-04 18:30:09 +02:00
Přemysl Eric Janouch	1224d9be47	Return errors rather than mangle documents	2020-09-04 16:05:14 +02:00
Přemysl Eric Janouch	486cafa6b4	Go: update dependencies	2020-08-12 06:15:41 +02:00
Přemysl Eric Janouch	a0696cdb88	Name change	2020-08-12 06:14:03 +02:00
Přemysl Janouch	be8480f8af	Consistency	2018-12-14 02:52:05 +01:00
Přemysl Janouch	f9f3171c02	Use Go modules	2018-12-01 22:43:11 +01:00
Přemysl Janouch	0ea296de67	Go: less API stupidity coming from the C++ heritage	2018-10-04 14:46:12 +02:00
Přemysl Janouch	9d2412398a	Go: additional small fixes	2018-10-04 14:14:04 +02:00
Přemysl Janouch	62206ed344	Go: documentation cleanup	2018-10-04 13:18:37 +02:00
Přemysl Janouch	9ac8360979	Go: use multiple return values The compiler has made it more obvious where we eat error messages.	2018-10-04 13:09:29 +02:00
Přemysl Janouch	50578fe99f	Go: add Object constructors	2018-10-04 12:51:23 +02:00
Přemysl Janouch	eedd9a550c	Go: cleanups	2018-10-04 12:11:43 +02:00