Update documentation

Add an external VFS for Midnight Commander
Go: enable listing all indirect objects
2021-12-09 15:28:01 +01:00 · 2021-12-09 15:24:25 +01:00 · 2021-12-09 14:07:15 +01:00 · 2021-12-09 14:07:14 +01:00 · 2021-12-08 21:33:26 +01:00 · 2021-12-08 20:49:06 +01:00
13 changed files with 403 additions and 107 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -0,0 +1,8 @@
 BasedOnStyle: Chromium
 ColumnLimit: 100
 IndentCaseLabels: false
 AccessModifierOffset: -2
 ContinuationIndentWidth: 2
 SpaceAfterTemplateKeyword: false
 SpaceAfterCStyleCast: true
 SpacesBeforeTrailingComments: 2
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,8 @@
 /builddir
 /pdf-simple-sign.cflags
 /pdf-simple-sign.config
 /pdf-simple-sign.creator
 /pdf-simple-sign.creator.user
 /pdf-simple-sign.cxxflags
 /pdf-simple-sign.files
 /pdf-simple-sign.includes
--- a/2
+++ b/2
@@ -1,4 +1,4 @@
-Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
+Copyright (c) 2017 - 2021, Přemysl Eric Janouch <p@janouch.name>
 Permission to use, copy, modify, and/or distribute this software for any
 purpose with or without fee is hereby granted.
--- a/9
+++ b/9
@@ -1,3 +1,12 @@
 1.1.1 (2020-09-06)
 * Fix a dysfunctional example in the manual
 * Go: write the xref table in a deterministic order
 * Add a trivial test suite, based on pdfsig from poppler-utils
 1.1 (2020-09-05)
 * Make it possible to change the signature reservation with an option
--- a/README.adoc
+++ b/README.adoc
@@ -12,6 +12,8 @@ Documentation
 See the link:pdf-simple-sign.adoc[man page] for information about usage.
 The rest of this README will concern itself with externalities.
 image:https://pkg.go.dev/badge/janouch.name/pdf-simple-sign@master/pdf["PkgGoDev", link="https://pkg.go.dev/janouch.name/pdf-simple-sign@master/pdf"]
 Building
 --------
 Build dependencies: Meson, Asciidoctor, a C++11 compiler, pkg-config +
@@ -27,6 +29,11 @@ In addition to the C++ version, also included is a native Go port:
 $ go get janouch.name/pdf-simple-sign/cmd/pdf-simple-sign
 And a crude external VFS for Midnight Commander, that may be used to extract
 all streams from a given PDF file:
 $ go get janouch.name/pdf-simple-sign/cmd/extfs-pdf
 Contributing and Support
 ------------------------
 Use https://git.janouch.name/p/pdf-simple-sign to report bugs, request features,
--- a/cmd/extfs-pdf/main.go
+++ b/cmd/extfs-pdf/main.go
@@ -0,0 +1,132 @@
 //
 // Copyright (c) 2021, Přemysl Eric Janouch <p@janouch.name>
 //
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted.
 //
 // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
 // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 // extfs-pdf is an external VFS plugin for Midnight Commander.
 // More serious image extractors should rewrite this to use pdfimages(1).
 package main
 import (
 	"flag"
 	"fmt"
 	"os"
 	"janouch.name/pdf-simple-sign/pdf"
 )
 func die(status int, format string, args ...interface{}) {
 	os.Stderr.WriteString(fmt.Sprintf(format+"\n", args...))
 	os.Exit(status)
 }
 func usage() {
 	die(1, "Usage: %s [-h] COMMAND DOCUMENT [ARG...]", os.Args[0])
 }
 func streamSuffix(o *pdf.Object) string {
 	if filter, _ := o.Dict["Filter"]; filter.Kind == pdf.Name {
 		switch filter.String {
 		case "JBIG2Decode":
 			// This is the file extension used by pdfimages(1).
 			// This is not a complete JBIG2 standalone file.
 			return "jb2e"
 		case "JPXDecode":
 			return "jp2"
 		case "DCTDecode":
 			return "jpg"
 		default:
 			return filter.String
 		}
 	}
 	return "stream"
 }
 func list(updater *pdf.Updater) {
 	for _, o := range updater.ListIndirect() {
 		object, err := updater.Get(o.N, o.Generation)
 		size := 0
 		if err != nil {
 			fmt.Fprintf(os.Stderr, "%s\n", err)
 		} else {
 			// Accidental transformation, retrieving original data is more work.
 			size = len(object.Serialize())
 		}
 		fmt.Printf("-r--r--r-- 1 0 0 %d 01-01-1970 00:00 %d-%d\n",
 			size, o.N, o.Generation)
 		if object.Kind == pdf.Stream {
 			fmt.Printf("-r--r--r-- 1 0 0 %d 01-01-1970 00:00 %d-%d.%s\n",
 				len(object.Stream), o.N, o.Generation, streamSuffix(&object))
 		}
 	}
 }
 func copyout(updater *pdf.Updater, storedFilename, extractTo string) {
 	var (
 		n, generation uint
 		suffix        string
 	)
 	m, err := fmt.Sscanf(storedFilename, "%d-%d%s", &n, &generation, &suffix)
 	if m < 2 {
 		die(3, "%s: %s", storedFilename, err)
 	}
 	object, err := updater.Get(n, generation)
 	if err != nil {
 		die(3, "%s: %s", storedFilename, err)
 	}
 	content := []byte(object.Serialize())
 	if suffix != "" {
 		content = object.Stream
 	}
 	if err = os.WriteFile(extractTo, content, 0666); err != nil {
 		die(3, "%s", err)
 	}
 }
 func main() {
 	flag.Usage = usage
 	flag.Parse()
 	if flag.NArg() < 2 {
 		usage()
 	}
 	command, documentPath := flag.Arg(0), flag.Arg(1)
 	doc, err := os.ReadFile(documentPath)
 	if err != nil {
 		die(1, "%s", err)
 	}
 	updater, err := pdf.NewUpdater(doc)
 	if err != nil {
 		die(2, "%s", err)
 	}
 	switch command {
 	default:
 		die(1, "unsupported command: %s", command)
 	case "list":
 		if flag.NArg() != 2 {
 			usage()
 		} else {
 			list(updater)
 		}
 	case "copyout":
 		if flag.NArg() != 4 {
 			usage()
 		} else {
 			copyout(updater, flag.Arg(2), flag.Arg(3))
 		}
 	}
 }
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,6 @@
 module janouch.name/pdf-simple-sign
-go 1.14
+go 1.17
 require (
 	go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1
--- a/go.sum
+++ b/go.sum
@@ -1,9 +1,5 @@
 go.mozilla.org/pkcs7 v0.0.0-20181029144607-24857c352dd8 h1:W3oGFPlHBLgXdsbPVixWFMYsuPhm81/Qww3XAgBbn/0=
 go.mozilla.org/pkcs7 v0.0.0-20181029144607-24857c352dd8/go.mod h1:5fWP3IVYEMc04wC+lMJAfkmNmKAl2P1swVv8VS+URZ8=
 go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1 h1:A/5uWzF44DlIgdm/PQFwfMkW0JX+cIcQi/SwLAmZP5M=
 go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk=
 golang.org/x/crypto v0.0.0-20181127143415-eb0de9b17e85 h1:et7+NAX3lLIk5qUCTA9QelBjGE/NkhzYw/mhnr0s7nI=
 golang.org/x/crypto v0.0.0-20181127143415-eb0de9b17e85/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
 golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de h1:ikNHVSjEfnvz6sxdSPCaPt572qowuyMDMJLLm3Db3ig=
 golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
--- a/meson.build
+++ b/meson.build
@@ -1,9 +1,9 @@
 project('pdf-simple-sign', 'cpp', default_options : ['cpp_std=c++11'],
-	version : '1.1')
+	version : '1.1.1')
 conf = configuration_data()
-conf.set('PROJECT_NAME', '"' + meson.project_name() + '"')
+conf.set_quoted('PROJECT_NAME', meson.project_name())
-conf.set('PROJECT_VERSION', '"' + meson.project_version() + '"')
+conf.set_quoted('PROJECT_VERSION', meson.project_version())
 configure_file(output : 'config.h', configuration : conf)
 cryptodep = dependency('libcrypto')
--- a/pdf-simple-sign.adoc
+++ b/pdf-simple-sign.adoc
@@ -50,10 +50,11 @@ Examples
 Create a self-signed certificate, make a document containing the current date,
 sign it and verify the attached signature:
- $ openssl req -newkey rsa:2048 -subj "/CN=Test" -nodes
+ $ openssl req -newkey rsa:2048 -subj /CN=Test -nodes \
-   -keyout key.pem -x509 -out cert.pem 2>/dev/null
+   -keyout key.pem -x509 -addext keyUsage=digitalSignature \
   -out cert.pem 2>/dev/null
 $ openssl pkcs12 -inkey key.pem -in cert.pem \
-   -export -passout pass:test -out key-cert.p12
+   -export -passout pass: -out key-pair.p12
 $ date | groff -T pdf > test.pdf
 $ pdf-simple-sign test.pdf test.signed.pdf key-pair.p12 ""
 $ pdfsig test.signed.pdf
--- a/pdf-simple-sign.cpp
+++ b/pdf-simple-sign.cpp
@@ -16,26 +16,26 @@
 // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 //
 #include <cstdio>
 #include <cmath>
 #include <cstdio>
 #undef NDEBUG
 #include <cassert>
 #include <vector>
 #include <map>
 #include <regex>
 #include <memory>
 #include <regex>
 #include <set>
 #include <vector>
 #if defined __GLIBCXX__ && __GLIBCXX__ < 20140422
 #error Need libstdc++ >= 4.9 for <regex>
 #endif
 #include <unistd.h>
 #include <getopt.h>
 #include <openssl/err.h>
 #include <openssl/x509v3.h>
 #include <openssl/pkcs12.h>
 #include <openssl/x509v3.h>
 #include <unistd.h>
 #include "config.h"
@@ -55,7 +55,7 @@ static std::string concatenate(const std::vector<std::string>& v, const std::str
 template<typename... Args>
 std::string ssprintf(const std::string& format, Args... args) {
-  size_t size = std::snprintf(nullptr, 0, format.c_str(), args... ) + 1;
+  size_t size = std::snprintf(nullptr, 0, format.c_str(), args...) + 1;
  std::unique_ptr<char[]> buf(new char[size]);
  std::snprintf(buf.get(), size, format.c_str(), args...);
  return std::string(buf.get(), buf.get() + size - 1);
@@ -64,7 +64,7 @@ std::string ssprintf(const std::string& format, Args... args) {
 // -------------------------------------------------------------------------------------------------
 /// PDF token/object thingy.  Objects may be composed either from one or a sequence of tokens.
-/// The PDF Reference doesn't actually speak of tokens.
+/// The PDF Reference doesn't actually speak of tokens, though ISO 32000-1:2008 does.
 struct pdf_object {
  enum type {
    END, NL, COMMENT, NIL, BOOL, NUMERIC, KEYWORD, NAME, STRING,
@@ -262,14 +262,12 @@ static std::string pdf_serialize(const pdf_object& o) {
  case pdf_object::NL:      return "\n";
  case pdf_object::NIL:     return "null";
  case pdf_object::BOOL:    return o.number ? "true" : "false";
-  case pdf_object::NUMERIC:
+  case pdf_object::NUMERIC: {
  {
    if (o.is_integer()) return std::to_string((long long) o.number);
    return std::to_string(o.number);
  }
  case pdf_object::KEYWORD: return o.string;
-  case pdf_object::NAME:
+  case pdf_object::NAME: {
  {
    std::string escaped = "/";
    for (char c : o.string) {
      if (c == '#' || strchr(pdf_lexer::delimiters, c) || strchr(pdf_lexer::whitespace, c))
@@ -279,8 +277,7 @@ static std::string pdf_serialize(const pdf_object& o) {
    }
    return escaped;
  }
-  case pdf_object::STRING:
+  case pdf_object::STRING: {
  {
    std::string escaped;
    for (char c : o.string) {
      if (c == '\\' || c == '(' || c == ')')
@@ -293,15 +290,13 @@ static std::string pdf_serialize(const pdf_object& o) {
  case pdf_object::E_ARRAY: return "]";
  case pdf_object::B_DICT:  return "<<";
  case pdf_object::E_DICT:  return ">>";
-  case pdf_object::ARRAY:
+  case pdf_object::ARRAY: {
  {
    std::vector<std::string> v;
    for (const auto& i : o.array)
      v.push_back(pdf_serialize(i));
    return "[ " + concatenate(v, " ") + " ]";
  }
-  case pdf_object::DICT:
+  case pdf_object::DICT: {
  {
    std::string s;
    for (const auto i : o.dict)
      // FIXME the key is also supposed to be escaped by pdf_serialize()
@@ -372,8 +367,8 @@ pdf_object pdf_updater::parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack
  auto g = stack.back(); stack.pop_back();
  auto n = stack.back(); stack.pop_back();
-  if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
+  if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX ||
-   || !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
+      !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
    return {pdf_object::END, "invalid object ID pair"};
  pdf_object obj{pdf_object::OBJECT};
@@ -397,8 +392,8 @@ pdf_object pdf_updater::parse_R(std::vector<pdf_object>& stack) const {
  auto g = stack.back(); stack.pop_back();
  auto n = stack.back(); stack.pop_back();
-  if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
+  if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX ||
-   || !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
+      !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
    return {pdf_object::END, "invalid reference ID pair"};
  pdf_object ref{pdf_object::REFERENCE};
@@ -415,8 +410,7 @@ pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) co
  case pdf_object::COMMENT:
    // These are not important to parsing, not even for this procedure's needs
    return parse(lex, stack);
-  case pdf_object::B_ARRAY:
+  case pdf_object::B_ARRAY: {
  {
    std::vector<pdf_object> array;
    while (1) {
      auto object = parse(lex, array);
@@ -428,8 +422,7 @@ pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) co
    }
    return array;
  }
-  case pdf_object::B_DICT:
+  case pdf_object::B_DICT: {
  {
    std::vector<pdf_object> array;
    while (1) {
      auto object = parse(lex, array);
@@ -477,8 +470,8 @@ std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entrie
      break;
    auto second = parse(lex, throwaway_stack);
-    if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX
+    if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX ||
-     || !second.is_integer() || second.number < 0 || second.number > UINT_MAX)
+        !second.is_integer() || second.number < 0 || second.number > UINT_MAX)
      return "invalid xref section header";
    const size_t start = object.number;
@@ -487,9 +480,9 @@ std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entrie
      auto off = parse(lex, throwaway_stack);
      auto gen = parse(lex, throwaway_stack);
      auto key = parse(lex, throwaway_stack);
-      if (!off.is_integer() || off.number < 0 || off.number > document.length()
+      if (!off.is_integer() || off.number < 0 || off.number > document.length() ||
-       || !gen.is_integer() || gen.number < 0 || gen.number > 65535
+          !gen.is_integer() || gen.number < 0 || gen.number > 65535 ||
-       || key.type != pdf_object::KEYWORD)
+          key.type != pdf_object::KEYWORD)
        return "invalid xref entry";
      bool free = true;
@@ -550,8 +543,8 @@ std::string pdf_updater::initialize() {
    const auto prev_offset = trailer.dict.find("Prev");
    if (prev_offset == trailer.dict.end())
      break;
-    // FIXME we don't check for size_t over or underflow
+    // FIXME do not read offsets and sizes as floating point numbers
-    if (!prev_offset->second.is_integer())
+    if (!prev_offset->second.is_integer() || prev_offset->second.number < 0)
      return "invalid Prev offset";
    xref_offset = prev_offset->second.number;
  }
@@ -657,8 +650,8 @@ void pdf_updater::flush_updates() {
  }
  trailer["Size"] = {pdf_object::NUMERIC, double(xref_size)};
-  document += "trailer\n" + pdf_serialize(trailer)
+  document +=
-    + ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref);
+    "trailer\n" + pdf_serialize(trailer) + ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref);
 }
 // -------------------------------------------------------------------------------------------------
@@ -700,9 +693,9 @@ static pdf_object pdf_get_first_page(pdf_updater& pdf, uint node_n, uint node_ge
  // XXX technically speaking, this may be an indirect reference.  The correct way to solve this
  //   seems to be having "pdf_updater" include a wrapper around "obj.dict.find"
  auto kids = obj.dict.find("Kids");
-  if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY
+  if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY ||
-   || kids->second.array.empty()
+      kids->second.array.empty() ||
-   || kids->second.array.at(0).type != pdf_object::REFERENCE)
+      kids->second.array.at(0).type != pdf_object::REFERENCE)
    return {pdf_object::NIL};
  // XXX nothing prevents us from recursing in an evil circular graph
@@ -740,8 +733,8 @@ static std::string pdf_fill_in_signature(std::string& document, size_t sign_off,
  // OpenSSL error reasons will usually be of more value than any distinction I can come up with
  std::string err = "OpenSSL failure";
-  if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr))
+  if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr)) ||
-   || !PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) {
+      !PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) {
    err = pkcs12_path + ": parse failure";
    goto error;
  }
@@ -766,8 +759,8 @@ static std::string pdf_fill_in_signature(std::string& document, size_t sign_off,
 #endif
  // The default digest is SHA1, which is mildly insecure now -- hence using PKCS7_sign_add_signer
-  if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags))
+  if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags)) ||
-   || !PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags))
+      !PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags))
    goto error;
  // For RFC 3161, this is roughly how a timestamp token would be attached (see Appendix A):
  //   PKCS7_add_attribute(signer_info, NID_id_smime_aa_timeStampToken, V_ASN1_SEQUENCE, value)
@@ -777,10 +770,10 @@ static std::string pdf_fill_in_signature(std::string& document, size_t sign_off,
  // Adaptation of the innards of the undocumented PKCS7_final() -- I didn't feel like making
  // a copy of the whole document.  Hopefully this writes directly into a digest BIO.
-  if (!(p7bio = PKCS7_dataInit(p7, nullptr))
+  if (!(p7bio = PKCS7_dataInit(p7, nullptr)) ||
-   || (ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off)
+      (ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off) ||
-   || (ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len)
+      (ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len) ||
-   || BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio))
+      BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio))
    goto error;
 #if 0
@@ -850,7 +843,7 @@ static std::string pdf_sign(std::string& document, ushort reservation) {
  // 8.7 Digital Signatures - /signature dictionary/
  auto sigdict_n = pdf.allocate();
  size_t byterange_off = 0, byterange_len = 0, sign_off = 0, sign_len = 0;
-  pdf.update(sigdict_n, [&]{
+  pdf.update(sigdict_n, [&] {
    // The timestamp is important for Adobe Acrobat Reader DC.  The ideal would be to use RFC 3161.
    pdf.document.append("<< /Type/Sig /Filter/Adobe.PPKLite /SubFilter/adbe.pkcs7.detached\n"
                        "   /M" + pdf_serialize(pdf_date(time(nullptr))) + " /ByteRange ");
@@ -883,7 +876,7 @@ static std::string pdf_sign(std::string& document, ushort reservation) {
  }}});
  auto sigfield_n = pdf.allocate();
-  pdf.update(sigfield_n, [&]{ pdf.document += pdf_serialize(sigfield); });
+  pdf.update(sigfield_n, [&] { pdf.document += pdf_serialize(sigfield); });
  auto pages_ref = root.dict.find("Pages");
  if (pages_ref == root.dict.end() || pages_ref->second.type != pdf_object::REFERENCE)
@@ -901,7 +894,7 @@ static std::string pdf_sign(std::string& document, ushort reservation) {
    annots = {pdf_object::ARRAY};
  }
  annots.array.emplace_back(pdf_object::REFERENCE, sigfield_n, 0);
-  pdf.update(page.n, [&]{ pdf.document += pdf_serialize(page); });
+  pdf.update(page.n, [&] { pdf.document += pdf_serialize(page); });
  // 8.6.1 Interactive Form Dictionary
  if (root.dict.count("AcroForm"))
@@ -918,7 +911,7 @@ static std::string pdf_sign(std::string& document, ushort reservation) {
  if (pdf.version(root) < 16)
    root.dict["Version"] = {pdf_object::NAME, "1.6"};
-  pdf.update(root_ref->second.n, [&]{ pdf.document += pdf_serialize(root); });
+  pdf.update(root_ref->second.n, [&] { pdf.document += pdf_serialize(root); });
  pdf.flush_updates();
  // Now that we know the length of everything, store byte ranges of what we're about to sign,
@@ -947,9 +940,9 @@ static void die(int status, const char* format, ...) {
 int main(int argc, char* argv[]) {
  auto invocation_name = argv[0];
-  auto usage = [=]{
+  auto usage = [=] {
    die(1, "Usage: %s [-h] [-r RESERVATION] INPUT-FILENAME OUTPUT-FILENAME PKCS12-PATH PKCS12-PASS",
-            invocation_name);
+        invocation_name);
  };
  static struct option opts[] = {
@@ -963,8 +956,7 @@ int main(int argc, char* argv[]) {
  long reservation = 4096;
  while (1) {
    int option_index = 0;
-    auto c = getopt_long(argc, const_cast<char* const*>(argv),
+    auto c = getopt_long(argc, const_cast<char* const*>(argv), "hVr:", opts, &option_index);
                         "hVr:", opts, &option_index);
    if (c == -1)
      break;
--- a/pdf/pdf.go
+++ b/pdf/pdf.go
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 2018 - 2020, Přemysl Eric Janouch <p@janouch.name>
+// Copyright (c) 2018 - 2021, Přemysl Eric Janouch <p@janouch.name>
 //
 // Permission to use, copy, modify, and/or distribute this software for any
 // purpose with or without fee is hereby granted.
@@ -59,20 +59,22 @@ const (
 	// higher-level objects
 	Array
 	Dict
 	Stream
 	Indirect
 	Reference
 )
-// Object is a PDF token/object thingy.  Objects may be composed either from
+// Object is a PDF token/object thingy. Objects may be composed either from
 // one or a sequence of tokens. The PDF Reference doesn't actually speak
-// of tokens.
+// of tokens, though ISO 32000-1:2008 does.
 type Object struct {
 	Kind ObjectKind
 	String        string            // Comment/Keyword/Name/String
 	Number        float64           // Bool, Numeric
 	Array         []Object          // Array, Indirect
-	Dict          map[string]Object // Dict, in the future also Stream
+	Dict          map[string]Object // Dict, Stream
 	Stream        []byte            // Stream
 	N, Generation uint              // Indirect, Reference
 }
@@ -458,6 +460,10 @@ func (o *Object) Serialize() string {
 			fmt.Fprint(b, " /", k, " ", v.Serialize())
 		}
 		return "<<" + b.String() + " >>"
 	case Stream:
 		d := NewDict(o.Dict)
 		d.Dict["Length"] = NewNumeric(float64(len(o.Stream)))
 		return d.Serialize() + "\nstream\n" + string(o.Stream) + "\nendstream"
 	case Indirect:
 		return fmt.Sprintf("%d %d obj\n%s\nendobj", o.N, o.Generation,
 			o.Array[0].Serialize())
@@ -497,6 +503,65 @@ type Updater struct {
 	Trailer map[string]Object
 }
 // ListIndirect returns the whole cross-reference table as Reference Objects.
 func (u *Updater) ListIndirect() []Object {
 	result := []Object{}
 	for i := 0; i < len(u.xref); i++ {
 		if u.xref[i].nonfree {
 			result = append(result, NewReference(uint(i), u.xref[i].generation))
 		}
 	}
 	return result
 }
 func (u *Updater) parseStream(lex *Lexer, stack *[]Object) (Object, error) {
 	lenStack := len(*stack)
 	if lenStack < 1 {
 		return newError("missing stream dictionary")
 	}
 	dict := (*stack)[lenStack-1]
 	if dict.Kind != Dict {
 		return newError("stream not preceded by a dictionary")
 	}
 	*stack = (*stack)[:lenStack-1]
 	length, ok := dict.Dict["Length"]
 	if !ok {
 		return newError("missing stream Length")
 	}
 	length, err := u.Dereference(length)
 	if err != nil {
 		return length, err
 	}
 	if !length.IsUint() || length.Number > math.MaxInt {
 		return newError("stream Length not an unsigned integer")
 	}
 	// Expect exactly one newline.
 	if nl, err := lex.Next(); err != nil {
 		return nl, err
 	} else if nl.Kind != NL {
 		return newError("stream does not start with a newline")
 	}
 	size := int(length.Number)
 	if len(lex.P) < size {
 		return newError("stream is longer than the document")
 	}
 	dict.Kind = Stream
 	dict.Stream = lex.P[:size]
 	lex.P = lex.P[size:]
 	// Skip any number of trailing newlines or comments.
 	if end, err := u.parse(lex, stack); err != nil {
 		return end, err
 	} else if end.Kind != Keyword || end.String != "endstream" {
 		return newError("improperly terminated stream")
 	}
 	return dict, nil
 }
 func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) (Object, error) {
 	lenStack := len(*stack)
 	if lenStack < 2 {
@@ -590,15 +655,11 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) (Object, error) {
 		}
 		return NewDict(dict), nil
 	case Keyword:
 		// Appears in the document body, typically needs
 		// to access the cross-reference table.
 		//
 		// TODO(p): Use the xref to read /Length etc. once we
 		// actually need to read such objects; presumably
 		// streams can use the Object.String member.
 		switch token.String {
 		case "stream":
-			return newError("streams are not supported yet")
+			// Appears in the document body,
 			// typically needs to access the cross-reference table.
 			return u.parseStream(lex, stack)
 		case "obj":
 			return u.parseIndirect(lex, stack)
 		case "R":
@@ -722,7 +783,7 @@ func NewUpdater(document []byte) (*Updater, error) {
 		if !ok {
 			break
 		}
-		// FIXME: We don't check for size_t over or underflow.
+		// FIXME: Do not read offsets and sizes as floating point numbers.
 		if !prevOffset.IsInteger() {
 			return nil, errors.New("invalid Prev offset")
 		}
@@ -766,8 +827,6 @@ func (u *Updater) Version(root *Object) int {
 // Get retrieves an object by its number and generation--may return
 // Nil or End with an error.
 //
 // TODO(p): We should fix all uses of this not to eat the error.
 func (u *Updater) Get(n, generation uint) (Object, error) {
 	if n >= u.xrefSize {
 		return New(Nil), nil
@@ -796,6 +855,14 @@ func (u *Updater) Get(n, generation uint) (Object, error) {
 	}
 }
 // Derefence dereferences Reference objects, and passes the other kinds through.
 func (u *Updater) Dereference(o Object) (Object, error) {
 	if o.Kind != Reference {
 		return o, nil
 	}
 	return u.Get(o.N, o.Generation)
 }
 // Allocate allocates a new object number.
 func (u *Updater) Allocate() uint {
 	n := u.xrefSize
@@ -853,30 +920,19 @@ func (u *Updater) FlushUpdates() {
 		return updated[i] < updated[j]
 	})
 	groups := make(map[uint]uint)
 	for i := 0; i < len(updated); {
 		start, count := updated[i], uint(1)
 		for i++; i != len(updated) && updated[i] == start+count; i++ {
 			count++
 		}
 		groups[start] = count
 	}
 	// Taking literally "Each cross-reference section begins with a line
 	// containing the keyword xref. Following this line are one or more
 	// cross-reference subsections." from 3.4.3 in PDF Reference.
 	if len(groups) == 0 {
 		groups[0] = 0
 	}
 	buf := bytes.NewBuffer(u.Document)
 	startXref := buf.Len() + 1
 	buf.WriteString("\nxref\n")
-	for start, count := range groups {
+	for i := 0; i < len(updated); {
-		fmt.Fprintf(buf, "%d %d\n", start, count)
+		start, stop := updated[i], updated[i]+1
-		for i := uint(0); i < count; i++ {
+		for i++; i < len(updated) && updated[i] == stop; i++ {
-			ref := u.xref[start+uint(i)]
+			stop++
 		}
 		fmt.Fprintf(buf, "%d %d\n", start, stop-start)
 		for ; start < stop; start++ {
 			ref := u.xref[start]
 			if ref.nonfree {
 				fmt.Fprintf(buf, "%010d %05d n \n", ref.offset, ref.generation)
 			} else {
@@ -885,6 +941,13 @@ func (u *Updater) FlushUpdates() {
 		}
 	}
 	// Taking literally "Each cross-reference section begins with a line
 	// containing the keyword xref. Following this line are one or more
 	// cross-reference subsections." from 3.4.3 in PDF Reference.
 	if len(updated) == 0 {
 		fmt.Fprintf(buf, "%d %d\n", 0, 0)
 	}
 	u.Trailer["Size"] = NewNumeric(float64(u.xrefSize))
 	trailer := NewDict(u.Trailer)
@@ -910,15 +973,15 @@ func NewDate(ts time.Time) Object {
 // GetFirstPage retrieves the first page of the given page (sub)tree reference,
 // or returns a Nil object if unsuccessful.
-func (u *Updater) GetFirstPage(nodeN, nodeGeneration uint) Object {
+func (u *Updater) GetFirstPage(node Object) Object {
-	obj, _ := u.Get(nodeN, nodeGeneration)
+	obj, err := u.Dereference(node)
-	if obj.Kind != Dict {
+	if err != nil || obj.Kind != Dict {
 		return New(Nil)
 	}
 	// Out of convenience; these aren't filled normally.
-	obj.N = nodeN
+	obj.N = node.N
-	obj.Generation = nodeGeneration
+	obj.Generation = node.Generation
 	if typ, ok := obj.Dict["Type"]; !ok || typ.Kind != Name {
 		return New(Nil)
@@ -938,7 +1001,7 @@ func (u *Updater) GetFirstPage(nodeN, nodeGeneration uint) Object {
 	}
 	// XXX: Nothing prevents us from recursing in an evil circular graph.
-	return u.GetFirstPage(kids.Array[0].N, kids.Array[0].Generation)
+	return u.GetFirstPage(kids.Array[0])
 }
 // -----------------------------------------------------------------------------
@@ -1132,7 +1195,10 @@ func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate,
 	if !ok || rootRef.Kind != Reference {
 		return nil, errors.New("trailer does not contain a reference to Root")
 	}
-	root, _ := pdf.Get(rootRef.N, rootRef.Generation)
+	root, err := pdf.Dereference(rootRef)
 	if err != nil {
 		return nil, fmt.Errorf("Root dictionary retrieval failed: %s", err)
 	}
 	if root.Kind != Dict {
 		return nil, errors.New("invalid Root dictionary reference")
 	}
@@ -1186,7 +1252,7 @@ func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate,
 	if !ok || pagesRef.Kind != Reference {
 		return nil, errors.New("invalid Pages reference")
 	}
-	page := pdf.GetFirstPage(pagesRef.N, pagesRef.Generation)
+	page := pdf.GetFirstPage(pagesRef)
 	if page.Kind != Dict {
 		return nil, errors.New("invalid or unsupported page tree")
 	}
--- a/test.sh
+++ b/test.sh
@@ -0,0 +1,77 @@
 #!/bin/sh -e
 # Test basic functionality of both versions
 # Usage: ./test.sh builddir/pdf-simple-sign cmd/pdf-simple-sign/pdf-simple-sign
 log() { echo "`tput sitm`-- $1`tput sgr0`"; }
 die() { echo "`tput bold`-- $1`tput sgr0`"; exit 1; }
 # Get rid of old test files
 rm -rf tmp
 mkdir tmp
 # Create documents in various tools
 log "Creating source documents"
 inkscape --pipe --export-filename=tmp/cairo.pdf <<'EOF' 2>/dev/null || :
 <svg xmlns="http://www.w3.org/2000/svg"><text x="5" y="10">Hello</text></svg>
 EOF
 date | tee tmp/lowriter.txt | groff -T pdf > tmp/groff.pdf || :
 lowriter --convert-to pdf tmp/lowriter.txt --outdir tmp >/dev/null || :
 convert rose: tmp/imagemagick.pdf || :
 # Create a root CA certificate pair
 log "Creating certificates"
 openssl req -newkey rsa:2048 -subj "/CN=Test CA" -nodes \
 	-keyout tmp/ca.key.pem -x509 -out tmp/ca.cert.pem 2>/dev/null
 # Create a private NSS database and insert our test CA there
 rm -rf tmp/nssdir
 mkdir tmp/nssdir
 certutil -N --empty-password -d sql:tmp/nssdir
 certutil -d sql:tmp/nssdir -A -n root -t ,C, -a -i tmp/ca.cert.pem
 # Create a leaf certificate pair
 cat > tmp/cert.cfg <<'EOF'
 [smime]
 basicConstraints = CA:FALSE
 keyUsage = digitalSignature
 extendedKeyUsage = emailProtection
 nsCertType = email
 EOF
 openssl req -newkey rsa:2048 -subj "/CN=Test Leaf" -nodes \
 	-keyout tmp/key.pem -out tmp/cert.csr 2>/dev/null
 openssl x509 -req -in tmp/cert.csr -out tmp/cert.pem \
 	-CA tmp/ca.cert.pem -CAkey tmp/ca.key.pem -set_serial 1 \
 	-extensions smime -extfile tmp/cert.cfg 2>/dev/null
 openssl verify -CAfile tmp/ca.cert.pem tmp/cert.pem >/dev/null
 openssl pkcs12 -inkey tmp/key.pem -in tmp/cert.pem \
 	-export -passout pass: -out tmp/key-pair.p12
 for tool in "$@"; do
 	rm -f tmp/*.signed.pdf
 	for source in tmp/*.pdf; do
 		log "Testing $tool with $source"
 		result=${source%.pdf}.signed.pdf
 		$tool "$source" "$result" tmp/key-pair.p12 ""
 		pdfsig -nssdir sql:tmp/nssdir "$result" | grep Validation
 	done
 	log "Testing $tool for expected failures"
 	$tool "$result" "$source.fail.pdf" tmp/key-pair.p12 "" \
 		&& die "Double signing shouldn't succeed"
 	$tool -r 1 "$source" "$source.fail.pdf" tmp/key-pair.p12 "" \
 		&& die "Too low reservations shouldn't succeed"
 	# Our generators do not use PDF versions higher than 1.5
 	log "Testing $tool for version detection"
 	grep -q "/Version /1.6" "$result" \
 		|| die "Version detection seems to misbehave (no upgrade)"
 	sed '1s/%PDF-1../%PDF-1.7/' "$source" > "$source.alt"
 	$tool "$source.alt" "$result.alt" tmp/key-pair.p12 ""
 	grep -q "/Version /1.6" "$result.alt" \
 		&& die "Version detection seems to misbehave (downgraded)"
 done
 log "OK"
Author	SHA1	Message	Date
Přemysl Eric Janouch	8a00d7064b	Update documentation	2021-12-09 15:28:01 +01:00
Přemysl Eric Janouch	b358467791	Add an external VFS for Midnight Commander	2021-12-09 15:24:25 +01:00
Přemysl Eric Janouch	d0f80aa6ae	Go: enable listing all indirect objects	2021-12-09 14:07:15 +01:00
Přemysl Eric Janouch	97ffe3d46e	Go: implement stream parsing/serialization	2021-12-09 14:07:14 +01:00
Přemysl Eric Janouch	1a3c7a8282	Go: add Updater.Dereference()	2021-12-08 21:33:26 +01:00
Přemysl Eric Janouch	d8171b9ac4	Go: improve error handling	2021-12-08 20:49:06 +01:00
Přemysl Eric Janouch	bcb24af926	Minor revision	2021-12-08 20:39:02 +01:00
Přemysl Eric Janouch	c0927c05dd	Add .gitignore	2021-11-06 12:28:25 +01:00
Přemysl Eric Janouch	5e87223b5d	Add clang-format configuration, clean up	2021-11-06 12:27:39 +01:00
Přemysl Eric Janouch	58a4ba1d05	meson.build: use set_quoted()	2021-11-06 11:42:57 +01:00
Přemysl Eric Janouch	350cf89e51	Bump Go modules to 1.17	2021-08-19 05:36:46 +02:00
Přemysl Eric Janouch	d4ff9a6e89	README.adoc: add a PkgGoDev badge	2020-09-11 00:15:58 +02:00
Přemysl Eric Janouch	a5176b5bbb	Bump version, update NEWS	2020-09-06 05:16:40 +02:00
Přemysl Eric Janouch	af6a937033	Go: avoid non-deterministic output The code has even turned out simpler.	2020-09-06 05:16:40 +02:00
Přemysl Eric Janouch	8913f8ba9c	Add a test script to verify basic function	2020-09-06 05:16:39 +02:00
Přemysl Eric Janouch	524eea9b2f	Manual: fix the example Things managed to work once but for rather arbitrary reasons.	2020-09-05 21:32:05 +02:00