Compare commits

...

22 Commits
v1.1 ... master

Author SHA1 Message Date
Přemysl Eric Janouch 55a17a69b7
README.adoc: update package information 5 months ago
Přemysl Eric Janouch 3781aa8e85
Don't fail tests when gropdf isn't installed 5 months ago
Přemysl Eric Janouch 69b939c707
Fix tests, document new limitation 5 months ago
Přemysl Eric Janouch 87681d15ba
Go: bump modules 5 months ago
Přemysl Eric Janouch f01d25596e
Fix the man page
> Any reference to the subject of the current manual page
> should be written with the name in bold.
1 year ago
Přemysl Eric Janouch 67596a8153
extfs-pdf: improve the listing format 2 years ago
Přemysl Eric Janouch 8a00d7064b
Update documentation 2 years ago
Přemysl Eric Janouch b358467791
Add an external VFS for Midnight Commander 2 years ago
Přemysl Eric Janouch d0f80aa6ae
Go: enable listing all indirect objects 2 years ago
Přemysl Eric Janouch 97ffe3d46e
Go: implement stream parsing/serialization 2 years ago
Přemysl Eric Janouch 1a3c7a8282
Go: add Updater.Dereference() 2 years ago
Přemysl Eric Janouch d8171b9ac4
Go: improve error handling 2 years ago
Přemysl Eric Janouch bcb24af926
Minor revision 2 years ago
Přemysl Eric Janouch c0927c05dd
Add .gitignore 2 years ago
Přemysl Eric Janouch 5e87223b5d
Add clang-format configuration, clean up 2 years ago
Přemysl Eric Janouch 58a4ba1d05
meson.build: use set_quoted() 2 years ago
Přemysl Eric Janouch 350cf89e51
Bump Go modules to 1.17 2 years ago
Přemysl Eric Janouch d4ff9a6e89
README.adoc: add a PkgGoDev badge 3 years ago
Přemysl Eric Janouch a5176b5bbb
Bump version, update NEWS 3 years ago
Přemysl Eric Janouch af6a937033
Go: avoid non-deterministic output
The code has even turned out simpler.
3 years ago
Přemysl Eric Janouch 8913f8ba9c
Add a test script to verify basic function 3 years ago
Přemysl Eric Janouch 524eea9b2f
Manual: fix the example
Things managed to work once but for rather arbitrary reasons.
3 years ago

@ -0,0 +1,8 @@
BasedOnStyle: Chromium
ColumnLimit: 100
IndentCaseLabels: false
AccessModifierOffset: -2
ContinuationIndentWidth: 2
SpaceAfterTemplateKeyword: false
SpaceAfterCStyleCast: true
SpacesBeforeTrailingComments: 2

8
.gitignore vendored

@ -0,0 +1,8 @@
/builddir
/pdf-simple-sign.cflags
/pdf-simple-sign.config
/pdf-simple-sign.creator
/pdf-simple-sign.creator.user
/pdf-simple-sign.cxxflags
/pdf-simple-sign.files
/pdf-simple-sign.includes

@ -1,4 +1,4 @@
Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
Copyright (c) 2017 - 2021, Přemysl Eric Janouch <p@janouch.name>
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted.

@ -1,3 +1,12 @@
1.1.1 (2020-09-06)
* Fix a dysfunctional example in the manual
* Go: write the xref table in a deterministic order
* Add a trivial test suite, based on pdfsig from poppler-utils
1.1 (2020-09-05)
* Make it possible to change the signature reservation with an option

@ -2,16 +2,26 @@ pdf-simple-sign
===============
'pdf-simple-sign' is a simple PDF signer intended for documents produced by
the Cairo library, GNU troff, ImageMagick, or similar.
the Cairo library (≤ 1.17.4 or using PDF 1.4), GNU troff, ImageMagick,
or similar.
I don't aim to extend the functionality any further. The project is fairly
self-contained and it should be easy to grasp and change to suit to your needs.
Packages
--------
Regular releases are sporadic. git master should be stable enough.
You can get a package with the latest development version using Arch Linux's
https://aur.archlinux.org/packages/pdf-simple-sign-git[AUR],
or as a https://git.janouch.name/p/nixexprs[Nix derivation].
Documentation
-------------
See the link:pdf-simple-sign.adoc[man page] for information about usage.
The rest of this README will concern itself with externalities.
image:https://pkg.go.dev/badge/janouch.name/pdf-simple-sign@master/pdf["PkgGoDev", link="https://pkg.go.dev/janouch.name/pdf-simple-sign@master/pdf"]
Building
--------
Build dependencies: Meson, Asciidoctor, a C++11 compiler, pkg-config +
@ -27,6 +37,11 @@ In addition to the C++ version, also included is a native Go port:
$ go get janouch.name/pdf-simple-sign/cmd/pdf-simple-sign
And a crude external VFS for Midnight Commander, that may be used to extract
all streams from a given PDF file:
$ go get janouch.name/pdf-simple-sign/cmd/extfs-pdf
Contributing and Support
------------------------
Use https://git.janouch.name/p/pdf-simple-sign to report bugs, request features,

@ -0,0 +1,139 @@
//
// Copyright (c) 2021, Přemysl Eric Janouch <p@janouch.name>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted.
//
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// extfs-pdf is an external VFS plugin for Midnight Commander.
// More serious image extractors should rewrite this to use pdfimages(1).
package main
import (
"flag"
"fmt"
"os"
"time"
"janouch.name/pdf-simple-sign/pdf"
)
func die(status int, format string, args ...interface{}) {
os.Stderr.WriteString(fmt.Sprintf(format+"\n", args...))
os.Exit(status)
}
func usage() {
die(1, "Usage: %s [-h] COMMAND DOCUMENT [ARG...]", os.Args[0])
}
func streamSuffix(o *pdf.Object) string {
if filter, _ := o.Dict["Filter"]; filter.Kind == pdf.Name {
switch filter.String {
case "JBIG2Decode":
// This is the file extension used by pdfimages(1).
// This is not a complete JBIG2 standalone file.
return "jb2e"
case "JPXDecode":
return "jp2"
case "DCTDecode":
return "jpg"
default:
return filter.String
}
}
return "stream"
}
func list(mtime time.Time, updater *pdf.Updater) {
stamp := mtime.Local().Format("01-02-2006 15:04:05")
for _, o := range updater.ListIndirect() {
object, err := updater.Get(o.N, o.Generation)
size := 0
if err != nil {
fmt.Fprintf(os.Stderr, "%s\n", err)
} else {
// Accidental transformation, retrieving original data is more work.
size = len(object.Serialize())
}
fmt.Printf("-r--r--r-- 1 0 0 %d %s n%dg%d\n",
size, stamp, o.N, o.Generation)
if object.Kind == pdf.Stream {
fmt.Printf("-r--r--r-- 1 0 0 %d %s n%dg%d.%s\n", len(object.Stream),
stamp, o.N, o.Generation, streamSuffix(&object))
}
}
}
func copyout(updater *pdf.Updater, storedFilename, extractTo string) {
var (
n, generation uint
suffix string
)
m, err := fmt.Sscanf(storedFilename, "n%dg%d%s", &n, &generation, &suffix)
if m < 2 {
die(3, "%s: %s", storedFilename, err)
}
object, err := updater.Get(n, generation)
if err != nil {
die(3, "%s: %s", storedFilename, err)
}
content := []byte(object.Serialize())
if suffix != "" {
content = object.Stream
}
if err = os.WriteFile(extractTo, content, 0666); err != nil {
die(3, "%s", err)
}
}
func main() {
flag.Usage = usage
flag.Parse()
if flag.NArg() < 2 {
usage()
}
command, documentPath := flag.Arg(0), flag.Arg(1)
doc, err := os.ReadFile(documentPath)
if err != nil {
die(1, "%s", err)
}
mtime := time.UnixMilli(0)
if info, err := os.Stat(documentPath); err == nil {
mtime = info.ModTime()
}
updater, err := pdf.NewUpdater(doc)
if err != nil {
die(2, "%s", err)
}
switch command {
default:
die(1, "unsupported command: %s", command)
case "list":
if flag.NArg() != 2 {
usage()
} else {
list(mtime, updater)
}
case "copyout":
if flag.NArg() != 4 {
usage()
} else {
copyout(updater, flag.Arg(2), flag.Arg(3))
}
}
}

@ -1,8 +1,8 @@
module janouch.name/pdf-simple-sign
go 1.14
go 1.17
require (
go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1
golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de
go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352
golang.org/x/crypto v0.10.0
)

@ -1,12 +1,12 @@
go.mozilla.org/pkcs7 v0.0.0-20181029144607-24857c352dd8 h1:W3oGFPlHBLgXdsbPVixWFMYsuPhm81/Qww3XAgBbn/0=
go.mozilla.org/pkcs7 v0.0.0-20181029144607-24857c352dd8/go.mod h1:5fWP3IVYEMc04wC+lMJAfkmNmKAl2P1swVv8VS+URZ8=
go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1 h1:A/5uWzF44DlIgdm/PQFwfMkW0JX+cIcQi/SwLAmZP5M=
go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk=
golang.org/x/crypto v0.0.0-20181127143415-eb0de9b17e85 h1:et7+NAX3lLIk5qUCTA9QelBjGE/NkhzYw/mhnr0s7nI=
golang.org/x/crypto v0.0.0-20181127143415-eb0de9b17e85/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352 h1:CCriYyAfq1Br1aIYettdHZTy8mBTIPo7We18TuO/bak=
go.mozilla.org/pkcs7 v0.0.0-20210826202110-33d05740a352/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de h1:ikNHVSjEfnvz6sxdSPCaPt572qowuyMDMJLLm3Db3ig=
golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.10.0 h1:LKqV2xt9+kDzSTfOhx4FrkEBcMrAgHSYgzywV9zcGmM=
golang.org/x/crypto v0.10.0/go.mod h1:o4eNf7Ede1fv+hwOwZsTHl9EsPFO6q6ZvYR8vYfY45I=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=

@ -1,9 +1,9 @@
project('pdf-simple-sign', 'cpp', default_options : ['cpp_std=c++11'],
version : '1.1')
version : '1.1.1')
conf = configuration_data()
conf.set('PROJECT_NAME', '"' + meson.project_name() + '"')
conf.set('PROJECT_VERSION', '"' + meson.project_version() + '"')
conf.set_quoted('PROJECT_NAME', meson.project_name())
conf.set_quoted('PROJECT_VERSION', meson.project_version())
configure_file(output : 'config.h', configuration : conf)
cryptodep = dependency('libcrypto')

@ -14,7 +14,7 @@ Synopsis
Description
-----------
'pdf-simple-sign' is a simple PDF signer intended for documents produced by
*pdf-simple-sign* is a simple PDF signer intended for documents produced by
the Cairo library, GNU troff, ImageMagick, or similar. As such, it currently
comes with some restrictions:
@ -50,10 +50,11 @@ Examples
Create a self-signed certificate, make a document containing the current date,
sign it and verify the attached signature:
$ openssl req -newkey rsa:2048 -subj "/CN=Test" -nodes
-keyout key.pem -x509 -out cert.pem 2>/dev/null
$ openssl req -newkey rsa:2048 -subj /CN=Test -nodes \
-keyout key.pem -x509 -addext keyUsage=digitalSignature \
-out cert.pem 2>/dev/null
$ openssl pkcs12 -inkey key.pem -in cert.pem \
-export -passout pass:test -out key-cert.p12
-export -passout pass: -out key-pair.p12
$ date | groff -T pdf > test.pdf
$ pdf-simple-sign test.pdf test.signed.pdf key-pair.p12 ""
$ pdfsig test.signed.pdf

@ -16,26 +16,26 @@
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
#include <cstdio>
#include <cmath>
#include <cstdio>
#undef NDEBUG
#include <cassert>
#include <vector>
#include <map>
#include <regex>
#include <memory>
#include <regex>
#include <set>
#include <vector>
#if defined __GLIBCXX__ && __GLIBCXX__ < 20140422
#error Need libstdc++ >= 4.9 for <regex>
#endif
#include <unistd.h>
#include <getopt.h>
#include <openssl/err.h>
#include <openssl/x509v3.h>
#include <openssl/pkcs12.h>
#include <openssl/x509v3.h>
#include <unistd.h>
#include "config.h"
@ -55,7 +55,7 @@ static std::string concatenate(const std::vector<std::string>& v, const std::str
template<typename... Args>
std::string ssprintf(const std::string& format, Args... args) {
size_t size = std::snprintf(nullptr, 0, format.c_str(), args... ) + 1;
size_t size = std::snprintf(nullptr, 0, format.c_str(), args...) + 1;
std::unique_ptr<char[]> buf(new char[size]);
std::snprintf(buf.get(), size, format.c_str(), args...);
return std::string(buf.get(), buf.get() + size - 1);
@ -64,7 +64,7 @@ std::string ssprintf(const std::string& format, Args... args) {
// -------------------------------------------------------------------------------------------------
/// PDF token/object thingy. Objects may be composed either from one or a sequence of tokens.
/// The PDF Reference doesn't actually speak of tokens.
/// The PDF Reference doesn't actually speak of tokens, though ISO 32000-1:2008 does.
struct pdf_object {
enum type {
END, NL, COMMENT, NIL, BOOL, NUMERIC, KEYWORD, NAME, STRING,
@ -262,14 +262,12 @@ static std::string pdf_serialize(const pdf_object& o) {
case pdf_object::NL: return "\n";
case pdf_object::NIL: return "null";
case pdf_object::BOOL: return o.number ? "true" : "false";
case pdf_object::NUMERIC:
{
case pdf_object::NUMERIC: {
if (o.is_integer()) return std::to_string((long long) o.number);
return std::to_string(o.number);
}
case pdf_object::KEYWORD: return o.string;
case pdf_object::NAME:
{
case pdf_object::NAME: {
std::string escaped = "/";
for (char c : o.string) {
if (c == '#' || strchr(pdf_lexer::delimiters, c) || strchr(pdf_lexer::whitespace, c))
@ -279,8 +277,7 @@ static std::string pdf_serialize(const pdf_object& o) {
}
return escaped;
}
case pdf_object::STRING:
{
case pdf_object::STRING: {
std::string escaped;
for (char c : o.string) {
if (c == '\\' || c == '(' || c == ')')
@ -293,15 +290,13 @@ static std::string pdf_serialize(const pdf_object& o) {
case pdf_object::E_ARRAY: return "]";
case pdf_object::B_DICT: return "<<";
case pdf_object::E_DICT: return ">>";
case pdf_object::ARRAY:
{
case pdf_object::ARRAY: {
std::vector<std::string> v;
for (const auto& i : o.array)
v.push_back(pdf_serialize(i));
return "[ " + concatenate(v, " ") + " ]";
}
case pdf_object::DICT:
{
case pdf_object::DICT: {
std::string s;
for (const auto i : o.dict)
// FIXME the key is also supposed to be escaped by pdf_serialize()
@ -372,8 +367,8 @@ pdf_object pdf_updater::parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack
auto g = stack.back(); stack.pop_back();
auto n = stack.back(); stack.pop_back();
if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
|| !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX ||
!n.is_integer() || n.number < 0 || n.number > UINT_MAX)
return {pdf_object::END, "invalid object ID pair"};
pdf_object obj{pdf_object::OBJECT};
@ -397,8 +392,8 @@ pdf_object pdf_updater::parse_R(std::vector<pdf_object>& stack) const {
auto g = stack.back(); stack.pop_back();
auto n = stack.back(); stack.pop_back();
if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
|| !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX ||
!n.is_integer() || n.number < 0 || n.number > UINT_MAX)
return {pdf_object::END, "invalid reference ID pair"};
pdf_object ref{pdf_object::REFERENCE};
@ -415,8 +410,7 @@ pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) co
case pdf_object::COMMENT:
// These are not important to parsing, not even for this procedure's needs
return parse(lex, stack);
case pdf_object::B_ARRAY:
{
case pdf_object::B_ARRAY: {
std::vector<pdf_object> array;
while (1) {
auto object = parse(lex, array);
@ -428,8 +422,7 @@ pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) co
}
return array;
}
case pdf_object::B_DICT:
{
case pdf_object::B_DICT: {
std::vector<pdf_object> array;
while (1) {
auto object = parse(lex, array);
@ -477,8 +470,8 @@ std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entrie
break;
auto second = parse(lex, throwaway_stack);
if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX
|| !second.is_integer() || second.number < 0 || second.number > UINT_MAX)
if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX ||
!second.is_integer() || second.number < 0 || second.number > UINT_MAX)
return "invalid xref section header";
const size_t start = object.number;
@ -487,9 +480,9 @@ std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entrie
auto off = parse(lex, throwaway_stack);
auto gen = parse(lex, throwaway_stack);
auto key = parse(lex, throwaway_stack);
if (!off.is_integer() || off.number < 0 || off.number > document.length()
|| !gen.is_integer() || gen.number < 0 || gen.number > 65535
|| key.type != pdf_object::KEYWORD)
if (!off.is_integer() || off.number < 0 || off.number > document.length() ||
!gen.is_integer() || gen.number < 0 || gen.number > 65535 ||
key.type != pdf_object::KEYWORD)
return "invalid xref entry";
bool free = true;
@ -550,8 +543,8 @@ std::string pdf_updater::initialize() {
const auto prev_offset = trailer.dict.find("Prev");
if (prev_offset == trailer.dict.end())
break;
// FIXME we don't check for size_t over or underflow
if (!prev_offset->second.is_integer())
// FIXME do not read offsets and sizes as floating point numbers
if (!prev_offset->second.is_integer() || prev_offset->second.number < 0)
return "invalid Prev offset";
xref_offset = prev_offset->second.number;
}
@ -657,8 +650,8 @@ void pdf_updater::flush_updates() {
}
trailer["Size"] = {pdf_object::NUMERIC, double(xref_size)};
document += "trailer\n" + pdf_serialize(trailer)
+ ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref);
document +=
"trailer\n" + pdf_serialize(trailer) + ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref);
}
// -------------------------------------------------------------------------------------------------
@ -700,9 +693,9 @@ static pdf_object pdf_get_first_page(pdf_updater& pdf, uint node_n, uint node_ge
// XXX technically speaking, this may be an indirect reference. The correct way to solve this
// seems to be having "pdf_updater" include a wrapper around "obj.dict.find"
auto kids = obj.dict.find("Kids");
if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY
|| kids->second.array.empty()
|| kids->second.array.at(0).type != pdf_object::REFERENCE)
if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY ||
kids->second.array.empty() ||
kids->second.array.at(0).type != pdf_object::REFERENCE)
return {pdf_object::NIL};
// XXX nothing prevents us from recursing in an evil circular graph
@ -740,8 +733,8 @@ static std::string pdf_fill_in_signature(std::string& document, size_t sign_off,
// OpenSSL error reasons will usually be of more value than any distinction I can come up with
std::string err = "OpenSSL failure";
if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr))
|| !PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) {
if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr)) ||
!PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) {
err = pkcs12_path + ": parse failure";
goto error;
}
@ -766,8 +759,8 @@ static std::string pdf_fill_in_signature(std::string& document, size_t sign_off,
#endif
// The default digest is SHA1, which is mildly insecure now -- hence using PKCS7_sign_add_signer
if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags))
|| !PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags))
if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags)) ||
!PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags))
goto error;
// For RFC 3161, this is roughly how a timestamp token would be attached (see Appendix A):
// PKCS7_add_attribute(signer_info, NID_id_smime_aa_timeStampToken, V_ASN1_SEQUENCE, value)
@ -777,10 +770,10 @@ static std::string pdf_fill_in_signature(std::string& document, size_t sign_off,
// Adaptation of the innards of the undocumented PKCS7_final() -- I didn't feel like making
// a copy of the whole document. Hopefully this writes directly into a digest BIO.
if (!(p7bio = PKCS7_dataInit(p7, nullptr))
|| (ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off)
|| (ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len)
|| BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio))
if (!(p7bio = PKCS7_dataInit(p7, nullptr)) ||
(ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off) ||
(ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len) ||
BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio))
goto error;
#if 0
@ -850,7 +843,7 @@ static std::string pdf_sign(std::string& document, ushort reservation) {
// 8.7 Digital Signatures - /signature dictionary/
auto sigdict_n = pdf.allocate();
size_t byterange_off = 0, byterange_len = 0, sign_off = 0, sign_len = 0;
pdf.update(sigdict_n, [&]{
pdf.update(sigdict_n, [&] {
// The timestamp is important for Adobe Acrobat Reader DC. The ideal would be to use RFC 3161.
pdf.document.append("<< /Type/Sig /Filter/Adobe.PPKLite /SubFilter/adbe.pkcs7.detached\n"
" /M" + pdf_serialize(pdf_date(time(nullptr))) + " /ByteRange ");
@ -883,7 +876,7 @@ static std::string pdf_sign(std::string& document, ushort reservation) {
}}});
auto sigfield_n = pdf.allocate();
pdf.update(sigfield_n, [&]{ pdf.document += pdf_serialize(sigfield); });
pdf.update(sigfield_n, [&] { pdf.document += pdf_serialize(sigfield); });
auto pages_ref = root.dict.find("Pages");
if (pages_ref == root.dict.end() || pages_ref->second.type != pdf_object::REFERENCE)
@ -901,7 +894,7 @@ static std::string pdf_sign(std::string& document, ushort reservation) {
annots = {pdf_object::ARRAY};
}
annots.array.emplace_back(pdf_object::REFERENCE, sigfield_n, 0);
pdf.update(page.n, [&]{ pdf.document += pdf_serialize(page); });
pdf.update(page.n, [&] { pdf.document += pdf_serialize(page); });
// 8.6.1 Interactive Form Dictionary
if (root.dict.count("AcroForm"))
@ -918,7 +911,7 @@ static std::string pdf_sign(std::string& document, ushort reservation) {
if (pdf.version(root) < 16)
root.dict["Version"] = {pdf_object::NAME, "1.6"};
pdf.update(root_ref->second.n, [&]{ pdf.document += pdf_serialize(root); });
pdf.update(root_ref->second.n, [&] { pdf.document += pdf_serialize(root); });
pdf.flush_updates();
// Now that we know the length of everything, store byte ranges of what we're about to sign,
@ -947,9 +940,9 @@ static void die(int status, const char* format, ...) {
int main(int argc, char* argv[]) {
auto invocation_name = argv[0];
auto usage = [=]{
auto usage = [=] {
die(1, "Usage: %s [-h] [-r RESERVATION] INPUT-FILENAME OUTPUT-FILENAME PKCS12-PATH PKCS12-PASS",
invocation_name);
invocation_name);
};
static struct option opts[] = {
@ -963,8 +956,7 @@ int main(int argc, char* argv[]) {
long reservation = 4096;
while (1) {
int option_index = 0;
auto c = getopt_long(argc, const_cast<char* const*>(argv),
"hVr:", opts, &option_index);
auto c = getopt_long(argc, const_cast<char* const*>(argv), "hVr:", opts, &option_index);
if (c == -1)
break;

@ -1,5 +1,5 @@
//
// Copyright (c) 2018 - 2020, Přemysl Eric Janouch <p@janouch.name>
// Copyright (c) 2018 - 2021, Přemysl Eric Janouch <p@janouch.name>
//
// Permission to use, copy, modify, and/or distribute this software for any
// purpose with or without fee is hereby granted.
@ -59,20 +59,22 @@ const (
// higher-level objects
Array
Dict
Stream
Indirect
Reference
)
// Object is a PDF token/object thingy. Objects may be composed either from
// Object is a PDF token/object thingy. Objects may be composed either from
// one or a sequence of tokens. The PDF Reference doesn't actually speak
// of tokens.
// of tokens, though ISO 32000-1:2008 does.
type Object struct {
Kind ObjectKind
String string // Comment/Keyword/Name/String
Number float64 // Bool, Numeric
Array []Object // Array, Indirect
Dict map[string]Object // Dict, in the future also Stream
Dict map[string]Object // Dict, Stream
Stream []byte // Stream
N, Generation uint // Indirect, Reference
}
@ -458,6 +460,10 @@ func (o *Object) Serialize() string {
fmt.Fprint(b, " /", k, " ", v.Serialize())
}
return "<<" + b.String() + " >>"
case Stream:
d := NewDict(o.Dict)
d.Dict["Length"] = NewNumeric(float64(len(o.Stream)))
return d.Serialize() + "\nstream\n" + string(o.Stream) + "\nendstream"
case Indirect:
return fmt.Sprintf("%d %d obj\n%s\nendobj", o.N, o.Generation,
o.Array[0].Serialize())
@ -497,6 +503,65 @@ type Updater struct {
Trailer map[string]Object
}
// ListIndirect returns the whole cross-reference table as Reference Objects.
func (u *Updater) ListIndirect() []Object {
result := []Object{}
for i := 0; i < len(u.xref); i++ {
if u.xref[i].nonfree {
result = append(result, NewReference(uint(i), u.xref[i].generation))
}
}
return result
}
func (u *Updater) parseStream(lex *Lexer, stack *[]Object) (Object, error) {
lenStack := len(*stack)
if lenStack < 1 {
return newError("missing stream dictionary")
}
dict := (*stack)[lenStack-1]
if dict.Kind != Dict {
return newError("stream not preceded by a dictionary")
}
*stack = (*stack)[:lenStack-1]
length, ok := dict.Dict["Length"]
if !ok {
return newError("missing stream Length")
}
length, err := u.Dereference(length)
if err != nil {
return length, err
}
if !length.IsUint() || length.Number > math.MaxInt {
return newError("stream Length not an unsigned integer")
}
// Expect exactly one newline.
if nl, err := lex.Next(); err != nil {
return nl, err
} else if nl.Kind != NL {
return newError("stream does not start with a newline")
}
size := int(length.Number)
if len(lex.P) < size {
return newError("stream is longer than the document")
}
dict.Kind = Stream
dict.Stream = lex.P[:size]
lex.P = lex.P[size:]
// Skip any number of trailing newlines or comments.
if end, err := u.parse(lex, stack); err != nil {
return end, err
} else if end.Kind != Keyword || end.String != "endstream" {
return newError("improperly terminated stream")
}
return dict, nil
}
func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) (Object, error) {
lenStack := len(*stack)
if lenStack < 2 {
@ -590,15 +655,11 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) (Object, error) {
}
return NewDict(dict), nil
case Keyword:
// Appears in the document body, typically needs
// to access the cross-reference table.
//
// TODO(p): Use the xref to read /Length etc. once we
// actually need to read such objects; presumably
// streams can use the Object.String member.
switch token.String {
case "stream":
return newError("streams are not supported yet")
// Appears in the document body,
// typically needs to access the cross-reference table.
return u.parseStream(lex, stack)
case "obj":
return u.parseIndirect(lex, stack)
case "R":
@ -722,7 +783,7 @@ func NewUpdater(document []byte) (*Updater, error) {
if !ok {
break
}
// FIXME: We don't check for size_t over or underflow.
// FIXME: Do not read offsets and sizes as floating point numbers.
if !prevOffset.IsInteger() {
return nil, errors.New("invalid Prev offset")
}
@ -766,8 +827,6 @@ func (u *Updater) Version(root *Object) int {
// Get retrieves an object by its number and generation--may return
// Nil or End with an error.
//
// TODO(p): We should fix all uses of this not to eat the error.
func (u *Updater) Get(n, generation uint) (Object, error) {
if n >= u.xrefSize {
return New(Nil), nil
@ -796,6 +855,14 @@ func (u *Updater) Get(n, generation uint) (Object, error) {
}
}
// Derefence dereferences Reference objects, and passes the other kinds through.
func (u *Updater) Dereference(o Object) (Object, error) {
if o.Kind != Reference {
return o, nil
}
return u.Get(o.N, o.Generation)
}
// Allocate allocates a new object number.
func (u *Updater) Allocate() uint {
n := u.xrefSize
@ -853,30 +920,19 @@ func (u *Updater) FlushUpdates() {
return updated[i] < updated[j]
})
groups := make(map[uint]uint)
for i := 0; i < len(updated); {
start, count := updated[i], uint(1)
for i++; i != len(updated) && updated[i] == start+count; i++ {
count++
}
groups[start] = count
}
// Taking literally "Each cross-reference section begins with a line
// containing the keyword xref. Following this line are one or more
// cross-reference subsections." from 3.4.3 in PDF Reference.
if len(groups) == 0 {
groups[0] = 0
}
buf := bytes.NewBuffer(u.Document)
startXref := buf.Len() + 1
buf.WriteString("\nxref\n")
for start, count := range groups {
fmt.Fprintf(buf, "%d %d\n", start, count)
for i := uint(0); i < count; i++ {
ref := u.xref[start+uint(i)]
for i := 0; i < len(updated); {
start, stop := updated[i], updated[i]+1
for i++; i < len(updated) && updated[i] == stop; i++ {
stop++
}
fmt.Fprintf(buf, "%d %d\n", start, stop-start)
for ; start < stop; start++ {
ref := u.xref[start]
if ref.nonfree {
fmt.Fprintf(buf, "%010d %05d n \n", ref.offset, ref.generation)
} else {
@ -885,6 +941,13 @@ func (u *Updater) FlushUpdates() {
}
}
// Taking literally "Each cross-reference section begins with a line
// containing the keyword xref. Following this line are one or more
// cross-reference subsections." from 3.4.3 in PDF Reference.
if len(updated) == 0 {
fmt.Fprintf(buf, "%d %d\n", 0, 0)
}
u.Trailer["Size"] = NewNumeric(float64(u.xrefSize))
trailer := NewDict(u.Trailer)
@ -910,15 +973,15 @@ func NewDate(ts time.Time) Object {
// GetFirstPage retrieves the first page of the given page (sub)tree reference,
// or returns a Nil object if unsuccessful.
func (u *Updater) GetFirstPage(nodeN, nodeGeneration uint) Object {
obj, _ := u.Get(nodeN, nodeGeneration)
if obj.Kind != Dict {
func (u *Updater) GetFirstPage(node Object) Object {
obj, err := u.Dereference(node)
if err != nil || obj.Kind != Dict {
return New(Nil)
}
// Out of convenience; these aren't filled normally.
obj.N = nodeN
obj.Generation = nodeGeneration
obj.N = node.N
obj.Generation = node.Generation
if typ, ok := obj.Dict["Type"]; !ok || typ.Kind != Name {
return New(Nil)
@ -938,7 +1001,7 @@ func (u *Updater) GetFirstPage(nodeN, nodeGeneration uint) Object {
}
// XXX: Nothing prevents us from recursing in an evil circular graph.
return u.GetFirstPage(kids.Array[0].N, kids.Array[0].Generation)
return u.GetFirstPage(kids.Array[0])
}
// -----------------------------------------------------------------------------
@ -1132,7 +1195,10 @@ func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate,
if !ok || rootRef.Kind != Reference {
return nil, errors.New("trailer does not contain a reference to Root")
}
root, _ := pdf.Get(rootRef.N, rootRef.Generation)
root, err := pdf.Dereference(rootRef)
if err != nil {
return nil, fmt.Errorf("Root dictionary retrieval failed: %s", err)
}
if root.Kind != Dict {
return nil, errors.New("invalid Root dictionary reference")
}
@ -1186,7 +1252,7 @@ func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate,
if !ok || pagesRef.Kind != Reference {
return nil, errors.New("invalid Pages reference")
}
page := pdf.GetFirstPage(pagesRef.N, pagesRef.Generation)
page := pdf.GetFirstPage(pagesRef)
if page.Kind != Dict {
return nil, errors.New("invalid or unsupported page tree")
}

@ -0,0 +1,85 @@
#!/bin/sh -e
# Test basic functionality of both versions
# Usage: ./test.sh builddir/pdf-simple-sign cmd/pdf-simple-sign/pdf-simple-sign
log() { echo "`tput sitm`-- $1`tput sgr0`"; }
die() { echo "`tput bold`-- $1`tput sgr0`"; exit 1; }
# Get rid of old test files
rm -rf tmp
mkdir tmp
# Create documents in various tools
log "Creating source documents"
inkscape --pipe --export-filename=tmp/cairo.pdf --export-pdf-version=1.4 \
<<'EOF' 2>/dev/null || :
<svg xmlns="http://www.w3.org/2000/svg"><text x="5" y="10">Hello</text></svg>
EOF
date > tmp/lowriter.txt
if command -v gropdf >/dev/null
then groff -T pdf < tmp/lowriter.txt > tmp/groff.pdf
fi
lowriter --convert-to pdf tmp/lowriter.txt --outdir tmp >/dev/null || :
convert rose: tmp/imagemagick.pdf || :
# Create a root CA certificate pair
log "Creating certificates"
openssl req -newkey rsa:2048 -subj "/CN=Test CA" -nodes \
-keyout tmp/ca.key.pem -x509 -out tmp/ca.cert.pem 2>/dev/null
# Create a private NSS database and insert our test CA there
rm -rf tmp/nssdir
mkdir tmp/nssdir
certutil -N --empty-password -d sql:tmp/nssdir
certutil -d sql:tmp/nssdir -A -n root -t ,C, -a -i tmp/ca.cert.pem
# Create a leaf certificate pair
cat > tmp/cert.cfg <<'EOF'
[smime]
basicConstraints = CA:FALSE
keyUsage = digitalSignature
extendedKeyUsage = emailProtection
nsCertType = email
EOF
openssl req -newkey rsa:2048 -subj "/CN=Test Leaf" -nodes \
-keyout tmp/key.pem -out tmp/cert.csr 2>/dev/null
openssl x509 -req -in tmp/cert.csr -out tmp/cert.pem \
-CA tmp/ca.cert.pem -CAkey tmp/ca.key.pem -set_serial 1 \
-extensions smime -extfile tmp/cert.cfg 2>/dev/null
openssl verify -CAfile tmp/ca.cert.pem tmp/cert.pem >/dev/null
# The second line accomodates the Go signer,
# which doesn't support SHA-256 within pkcs12 handling
openssl pkcs12 -inkey tmp/key.pem -in tmp/cert.pem \
-certpbe PBE-SHA1-3DES -keypbe PBE-SHA1-3DES -macalg sha1 \
-export -passout pass: -out tmp/key-pair.p12
for tool in "$@"; do
rm -f tmp/*.signed.pdf
for source in tmp/*.pdf; do
log "Testing $tool with $source"
result=${source%.pdf}.signed.pdf
$tool "$source" "$result" tmp/key-pair.p12 ""
pdfsig -nssdir sql:tmp/nssdir "$result" | grep Validation
# Only some of our generators use PDF versions higher than 1.5
log "Testing $tool for version detection"
grep -q "/Version /1.6" "$result" || grep -q "^%PDF-1.6" "$result" \
|| die "Version detection seems to misbehave (no upgrade)"
done
log "Testing $tool for expected failures"
$tool "$result" "$source.fail.pdf" tmp/key-pair.p12 "" \
&& die "Double signing shouldn't succeed"
$tool -r 1 "$source" "$source.fail.pdf" tmp/key-pair.p12 "" \
&& die "Too low reservations shouldn't succeed"
sed '1s/%PDF-1../%PDF-1.7/' "$source" > "$source.alt"
$tool "$source.alt" "$result.alt" tmp/key-pair.p12 ""
grep -q "/Version /1.6" "$result.alt" \
&& die "Version detection seems to misbehave (downgraded)"
done
log "OK"
Loading…
Cancel
Save