Compare commits
11 Commits
43ca0e5035
...
c++-librar
| Author | SHA1 | Date | |
|---|---|---|---|
|
ceea7dca2f
|
|||
|
486cafa6b4
|
|||
|
a0696cdb88
|
|||
|
be8480f8af
|
|||
|
f9f3171c02
|
|||
|
0ea296de67
|
|||
|
9d2412398a
|
|||
|
62206ed344
|
|||
|
9ac8360979
|
|||
|
50578fe99f
|
|||
|
eedd9a550c
|
2
LICENSE
2
LICENSE
@@ -1,4 +1,4 @@
|
|||||||
Copyright (c) 2017, Přemysl Janouch <p@janouch.name>
|
Copyright (c) 2017, Přemysl Eric Janouch <p@janouch.name>
|
||||||
|
|
||||||
Permission to use, copy, modify, and/or distribute this software for any
|
Permission to use, copy, modify, and/or distribute this software for any
|
||||||
purpose with or without fee is hereby granted.
|
purpose with or without fee is hereby granted.
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2018, Přemysl Janouch <p@janouch.name>
|
// Copyright (c) 2018, Přemysl Eric Janouch <p@janouch.name>
|
||||||
//
|
//
|
||||||
// Permission to use, copy, modify, and/or distribute this software for any
|
// Permission to use, copy, modify, and/or distribute this software for any
|
||||||
// purpose with or without fee is hereby granted.
|
// purpose with or without fee is hereby granted.
|
||||||
@@ -64,9 +64,9 @@ func main() {
|
|||||||
die(3, "%s", err)
|
die(3, "%s", err)
|
||||||
}
|
}
|
||||||
if pdfDocument, err = pdf.Sign(pdfDocument, key, certs); err != nil {
|
if pdfDocument, err = pdf.Sign(pdfDocument, key, certs); err != nil {
|
||||||
die(2, "error: %s", err)
|
die(4, "error: %s", err)
|
||||||
}
|
}
|
||||||
if err = ioutil.WriteFile(outputPath, pdfDocument, 0666); err != nil {
|
if err = ioutil.WriteFile(outputPath, pdfDocument, 0666); err != nil {
|
||||||
die(3, "%s", err)
|
die(5, "%s", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
8
go.mod
Normal file
8
go.mod
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
module janouch.name/pdf-simple-sign
|
||||||
|
|
||||||
|
go 1.14
|
||||||
|
|
||||||
|
require (
|
||||||
|
go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1
|
||||||
|
golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de
|
||||||
|
)
|
||||||
13
go.sum
Normal file
13
go.sum
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
go.mozilla.org/pkcs7 v0.0.0-20181029144607-24857c352dd8 h1:W3oGFPlHBLgXdsbPVixWFMYsuPhm81/Qww3XAgBbn/0=
|
||||||
|
go.mozilla.org/pkcs7 v0.0.0-20181029144607-24857c352dd8/go.mod h1:5fWP3IVYEMc04wC+lMJAfkmNmKAl2P1swVv8VS+URZ8=
|
||||||
|
go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1 h1:A/5uWzF44DlIgdm/PQFwfMkW0JX+cIcQi/SwLAmZP5M=
|
||||||
|
go.mozilla.org/pkcs7 v0.0.0-20200128120323-432b2356ecb1/go.mod h1:SNgMg+EgDFwmvSmLRTNKC5fegJjB7v23qTQ0XLGUNHk=
|
||||||
|
golang.org/x/crypto v0.0.0-20181127143415-eb0de9b17e85 h1:et7+NAX3lLIk5qUCTA9QelBjGE/NkhzYw/mhnr0s7nI=
|
||||||
|
golang.org/x/crypto v0.0.0-20181127143415-eb0de9b17e85/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||||
|
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||||
|
golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de h1:ikNHVSjEfnvz6sxdSPCaPt572qowuyMDMJLLm3Db3ig=
|
||||||
|
golang.org/x/crypto v0.0.0-20200728195943-123391ffb6de/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||||
|
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||||
|
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||||
|
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
|
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||||
@@ -1,5 +1,12 @@
|
|||||||
project('pdf-simple-sign', 'cpp', default_options : ['cpp_std=c++11'])
|
project('pdf-simple-sign', 'cpp', default_options : ['cpp_std=c++11'])
|
||||||
cryptodep = dependency('libcrypto')
|
cryptodep = dependency('libcrypto')
|
||||||
executable('pdf-simple-sign', 'pdf-simple-sign.cpp',
|
|
||||||
|
executable('pdf-simple-sign', 'pdf-simple-sign.cpp', 'pdf.cpp',
|
||||||
|
install : true,
|
||||||
|
dependencies : cryptodep)
|
||||||
|
|
||||||
|
install_headers('pdf-simple-sign.h')
|
||||||
|
library('pdf-simple-sign', 'pdf.cpp',
|
||||||
|
soversion : 0,
|
||||||
install : true,
|
install : true,
|
||||||
dependencies : cryptodep)
|
dependencies : cryptodep)
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
//
|
//
|
||||||
// pdf-simple-sign: simple PDF signer
|
// pdf-simple-sign: simple PDF signer
|
||||||
//
|
//
|
||||||
// Copyright (c) 2017, Přemysl Janouch <p@janouch.name>
|
// Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
|
||||||
//
|
//
|
||||||
// Permission to use, copy, modify, and/or distribute this software for any
|
// Permission to use, copy, modify, and/or distribute this software for any
|
||||||
// purpose with or without fee is hereby granted.
|
// purpose with or without fee is hereby granted.
|
||||||
@@ -17,899 +17,22 @@
|
|||||||
//
|
//
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <cmath>
|
#include <cstdlib>
|
||||||
#undef NDEBUG
|
#include <cstring>
|
||||||
#include <cassert>
|
#include <cerrno>
|
||||||
|
#include <cstdarg>
|
||||||
#include <vector>
|
|
||||||
#include <map>
|
|
||||||
#include <regex>
|
|
||||||
#include <memory>
|
|
||||||
#include <set>
|
|
||||||
|
|
||||||
#if defined __GLIBCXX__ && __GLIBCXX__ < 20140422
|
|
||||||
#error Need libstdc++ >= 4.9 for <regex>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <getopt.h>
|
#include <getopt.h>
|
||||||
#include <openssl/err.h>
|
|
||||||
#include <openssl/x509v3.h>
|
|
||||||
#include <openssl/pkcs12.h>
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------------------
|
#include "pdf-simple-sign.h"
|
||||||
|
|
||||||
using uint = unsigned int;
|
|
||||||
|
|
||||||
static std::string concatenate(const std::vector<std::string>& v, const std::string& delim) {
|
|
||||||
std::string res;
|
|
||||||
if (v.empty())
|
|
||||||
return res;
|
|
||||||
for (const auto& s : v)
|
|
||||||
res += s + delim;
|
|
||||||
return res.substr(0, res.length() - delim.length());
|
|
||||||
}
|
|
||||||
|
|
||||||
template<typename... Args>
|
|
||||||
std::string ssprintf(const std::string& format, Args... args) {
|
|
||||||
size_t size = std::snprintf(nullptr, 0, format.c_str(), args... ) + 1;
|
|
||||||
std::unique_ptr<char[]> buf(new char[size]);
|
|
||||||
std::snprintf(buf.get(), size, format.c_str(), args...);
|
|
||||||
return std::string(buf.get(), buf.get() + size - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/// PDF token/object thingy. Objects may be composed either from one or a sequence of tokens.
|
|
||||||
/// The PDF Reference doesn't actually speak of tokens.
|
|
||||||
struct pdf_object {
|
|
||||||
enum type {
|
|
||||||
END, NL, COMMENT, NIL, BOOL, NUMERIC, KEYWORD, NAME, STRING,
|
|
||||||
// Simple tokens
|
|
||||||
B_ARRAY, E_ARRAY, B_DICT, E_DICT,
|
|
||||||
// Higher-level objects
|
|
||||||
ARRAY, DICT, OBJECT, REFERENCE,
|
|
||||||
} type = END;
|
|
||||||
|
|
||||||
std::string string; ///< END (error message), COMMENT/KEYWORD/NAME/STRING
|
|
||||||
double number = 0.; ///< BOOL, NUMERIC
|
|
||||||
std::vector<pdf_object> array; ///< ARRAY, OBJECT
|
|
||||||
std::map<std::string, pdf_object> dict; ///< DICT, in the future also STREAM
|
|
||||||
uint n = 0, generation = 0; ///< OBJECT, REFERENCE
|
|
||||||
|
|
||||||
pdf_object(enum type type = END) : type(type) {}
|
|
||||||
pdf_object(enum type type, double v) : type(type), number(v) {}
|
|
||||||
pdf_object(enum type type, const std::string& v) : type(type), string(v) {}
|
|
||||||
pdf_object(enum type type, uint n, uint g) : type(type), n(n), generation(g) {}
|
|
||||||
pdf_object(const std::vector<pdf_object>& array) : type(ARRAY), array(array) {}
|
|
||||||
pdf_object(const std::map<std::string, pdf_object>& dict) : type(DICT), dict(dict) {}
|
|
||||||
|
|
||||||
pdf_object(const pdf_object&) = default;
|
|
||||||
pdf_object(pdf_object&&) = default;
|
|
||||||
pdf_object& operator=(const pdf_object&) = default;
|
|
||||||
pdf_object& operator=(pdf_object&&) = default;
|
|
||||||
|
|
||||||
/// Return whether this is a number without a fractional part
|
|
||||||
bool is_integer() const {
|
|
||||||
double tmp;
|
|
||||||
return type == NUMERIC && std::modf(number, &tmp) == 0.;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
/// Basic lexical analyser for the Portable Document Format, giving limited error information
|
|
||||||
struct pdf_lexer {
|
|
||||||
const unsigned char* p;
|
|
||||||
pdf_lexer(const char* s) : p(reinterpret_cast<const unsigned char*>(s)) {}
|
|
||||||
|
|
||||||
static constexpr const char* oct_alphabet = "01234567";
|
|
||||||
static constexpr const char* dec_alphabet = "0123456789";
|
|
||||||
static constexpr const char* hex_alphabet = "0123456789abcdefABCDEF";
|
|
||||||
static constexpr const char* whitespace = "\t\n\f\r ";
|
|
||||||
static constexpr const char* delimiters = "()<>[]{}/%";
|
|
||||||
|
|
||||||
bool eat_newline(int ch) {
|
|
||||||
if (ch == '\r') {
|
|
||||||
if (*p == '\n') p++;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return ch == '\n';
|
|
||||||
}
|
|
||||||
|
|
||||||
pdf_object string() {
|
|
||||||
std::string value;
|
|
||||||
int parens = 1;
|
|
||||||
while (1) {
|
|
||||||
if (!*p) return {pdf_object::END, "unexpected end of string"};
|
|
||||||
auto ch = *p++;
|
|
||||||
if (eat_newline(ch)) ch = '\n';
|
|
||||||
else if (ch == '(') { parens++; }
|
|
||||||
else if (ch == ')') { if (!--parens) break; }
|
|
||||||
else if (ch == '\\') {
|
|
||||||
if (!*p) return {pdf_object::END, "unexpected end of string"};
|
|
||||||
switch ((ch = *p++)) {
|
|
||||||
case 'n': ch = '\n'; break;
|
|
||||||
case 'r': ch = '\r'; break;
|
|
||||||
case 't': ch = '\t'; break;
|
|
||||||
case 'b': ch = '\b'; break;
|
|
||||||
case 'f': ch = '\f'; break;
|
|
||||||
default:
|
|
||||||
if (eat_newline(ch))
|
|
||||||
continue;
|
|
||||||
std::string octal;
|
|
||||||
if (ch && strchr(oct_alphabet, ch)) {
|
|
||||||
octal += ch;
|
|
||||||
if (*p && strchr(oct_alphabet, *p)) octal += *p++;
|
|
||||||
if (*p && strchr(oct_alphabet, *p)) octal += *p++;
|
|
||||||
ch = std::stoi(octal, nullptr, 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
value += ch;
|
|
||||||
}
|
|
||||||
return {pdf_object::STRING, value};
|
|
||||||
}
|
|
||||||
|
|
||||||
pdf_object string_hex() {
|
|
||||||
std::string value, buf;
|
|
||||||
while (*p != '>') {
|
|
||||||
if (!*p) return {pdf_object::END, "unexpected end of hex string"};
|
|
||||||
if (!strchr(hex_alphabet, *p))
|
|
||||||
return {pdf_object::END, "invalid hex string"};
|
|
||||||
buf += *p++;
|
|
||||||
if (buf.size() == 2) {
|
|
||||||
value += char(std::stoi(buf, nullptr, 16));
|
|
||||||
buf.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
p++;
|
|
||||||
if (!buf.empty()) value += char(std::stoi(buf + '0', nullptr, 16));
|
|
||||||
return {pdf_object::STRING, value};
|
|
||||||
}
|
|
||||||
|
|
||||||
pdf_object name() {
|
|
||||||
std::string value;
|
|
||||||
while (!strchr(whitespace, *p) && !strchr(delimiters, *p)) {
|
|
||||||
auto ch = *p++;
|
|
||||||
if (ch == '#') {
|
|
||||||
std::string hexa;
|
|
||||||
if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
|
|
||||||
if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
|
|
||||||
if (hexa.size() != 2)
|
|
||||||
return {pdf_object::END, "invalid name hexa escape"};
|
|
||||||
ch = char(std::stoi(hexa, nullptr, 16));
|
|
||||||
}
|
|
||||||
value += ch;
|
|
||||||
}
|
|
||||||
if (value.empty()) return {pdf_object::END, "unexpected end of name"};
|
|
||||||
return {pdf_object::NAME, value};
|
|
||||||
}
|
|
||||||
|
|
||||||
pdf_object comment() {
|
|
||||||
std::string value;
|
|
||||||
while (*p && *p != '\r' && *p != '\n')
|
|
||||||
value += *p++;
|
|
||||||
return {pdf_object::COMMENT, value};
|
|
||||||
}
|
|
||||||
|
|
||||||
// XXX maybe invalid numbers should rather be interpreted as keywords
|
|
||||||
pdf_object number() {
|
|
||||||
std::string value;
|
|
||||||
if (*p == '-')
|
|
||||||
value += *p++;
|
|
||||||
bool real = false, digits = false;
|
|
||||||
while (*p) {
|
|
||||||
if (strchr(dec_alphabet, *p))
|
|
||||||
digits = true;
|
|
||||||
else if (*p == '.' && !real)
|
|
||||||
real = true;
|
|
||||||
else
|
|
||||||
break;
|
|
||||||
value += *p++;
|
|
||||||
}
|
|
||||||
if (!digits) return {pdf_object::END, "invalid number"};
|
|
||||||
return {pdf_object::NUMERIC, std::stod(value, nullptr)};
|
|
||||||
}
|
|
||||||
|
|
||||||
pdf_object next() {
|
|
||||||
if (!*p)
|
|
||||||
return {pdf_object::END};
|
|
||||||
if (strchr("-0123456789.", *p))
|
|
||||||
return number();
|
|
||||||
|
|
||||||
// {} end up being keywords, we might want to error out on those
|
|
||||||
std::string value;
|
|
||||||
while (!strchr(whitespace, *p) && !strchr(delimiters, *p))
|
|
||||||
value += *p++;
|
|
||||||
if (!value.empty()) {
|
|
||||||
if (value == "null") return {pdf_object::NIL};
|
|
||||||
if (value == "true") return {pdf_object::BOOL, 1};
|
|
||||||
if (value == "false") return {pdf_object::BOOL, 0};
|
|
||||||
return {pdf_object::KEYWORD, value};
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (char ch = *p++) {
|
|
||||||
case '/': return name();
|
|
||||||
case '%': return comment();
|
|
||||||
case '(': return string();
|
|
||||||
case '[': return {pdf_object::B_ARRAY};
|
|
||||||
case ']': return {pdf_object::E_ARRAY};
|
|
||||||
case '<':
|
|
||||||
if (*p++ == '<')
|
|
||||||
return {pdf_object::B_DICT};
|
|
||||||
p--;
|
|
||||||
return string_hex();
|
|
||||||
case '>':
|
|
||||||
if (*p++ == '>')
|
|
||||||
return {pdf_object::E_DICT};
|
|
||||||
p--;
|
|
||||||
return {pdf_object::END, "unexpected '>'"};
|
|
||||||
default:
|
|
||||||
if (eat_newline(ch))
|
|
||||||
return {pdf_object::NL};
|
|
||||||
if (strchr(whitespace, ch))
|
|
||||||
return next();
|
|
||||||
return {pdf_object::END, "unexpected input"};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// FIXME lines /should not/ be longer than 255 characters, some wrapping is in order
|
|
||||||
static std::string pdf_serialize(const pdf_object& o) {
|
|
||||||
switch (o.type) {
|
|
||||||
case pdf_object::NL: return "\n";
|
|
||||||
case pdf_object::NIL: return "null";
|
|
||||||
case pdf_object::BOOL: return o.number ? "true" : "false";
|
|
||||||
case pdf_object::NUMERIC:
|
|
||||||
{
|
|
||||||
if (o.is_integer()) return std::to_string((long long) o.number);
|
|
||||||
return std::to_string(o.number);
|
|
||||||
}
|
|
||||||
case pdf_object::KEYWORD: return o.string;
|
|
||||||
case pdf_object::NAME:
|
|
||||||
{
|
|
||||||
std::string escaped = "/";
|
|
||||||
for (char c : o.string) {
|
|
||||||
if (c == '#' || strchr(pdf_lexer::delimiters, c) || strchr(pdf_lexer::whitespace, c))
|
|
||||||
escaped += ssprintf("#%02x", c);
|
|
||||||
else
|
|
||||||
escaped += c;
|
|
||||||
}
|
|
||||||
return escaped;
|
|
||||||
}
|
|
||||||
case pdf_object::STRING:
|
|
||||||
{
|
|
||||||
std::string escaped;
|
|
||||||
for (char c : o.string) {
|
|
||||||
if (c == '\\' || c == '(' || c == ')')
|
|
||||||
escaped += '\\';
|
|
||||||
escaped += c;
|
|
||||||
}
|
|
||||||
return "(" + escaped + ")";
|
|
||||||
}
|
|
||||||
case pdf_object::B_ARRAY: return "[";
|
|
||||||
case pdf_object::E_ARRAY: return "]";
|
|
||||||
case pdf_object::B_DICT: return "<<";
|
|
||||||
case pdf_object::E_DICT: return ">>";
|
|
||||||
case pdf_object::ARRAY:
|
|
||||||
{
|
|
||||||
std::vector<std::string> v;
|
|
||||||
for (const auto& i : o.array)
|
|
||||||
v.push_back(pdf_serialize(i));
|
|
||||||
return "[ " + concatenate(v, " ") + " ]";
|
|
||||||
}
|
|
||||||
case pdf_object::DICT:
|
|
||||||
{
|
|
||||||
std::string s;
|
|
||||||
for (const auto i : o.dict)
|
|
||||||
// FIXME the key is also supposed to be escaped by pdf_serialize()
|
|
||||||
s += " /" + i.first + " " + pdf_serialize(i.second);
|
|
||||||
return "<<" + s + " >>";
|
|
||||||
}
|
|
||||||
case pdf_object::OBJECT:
|
|
||||||
return ssprintf("%u %u obj\n", o.n, o.generation) + pdf_serialize(o.array.at(0)) + "\nendobj";
|
|
||||||
case pdf_object::REFERENCE:
|
|
||||||
return ssprintf("%u %u R", o.n, o.generation);
|
|
||||||
default:
|
|
||||||
assert(!"unsupported token for serialization");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/// Utility class to help read and possibly incrementally update PDF files
|
|
||||||
class pdf_updater {
|
|
||||||
struct ref {
|
|
||||||
size_t offset = 0; ///< File offset or N of the next free entry
|
|
||||||
uint generation = 0; ///< Object generation
|
|
||||||
bool free = true; ///< Whether this N has been deleted
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<ref> xref; ///< Cross-reference table
|
|
||||||
size_t xref_size = 0; ///< Current cross-reference table size, correlated to xref.size()
|
|
||||||
std::set<uint> updated; ///< List of updated objects
|
|
||||||
|
|
||||||
pdf_object parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
|
|
||||||
pdf_object parse_R(std::vector<pdf_object>& stack) const;
|
|
||||||
pdf_object parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
|
|
||||||
std::string load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries);
|
|
||||||
|
|
||||||
public:
|
|
||||||
/// The new trailer dictionary to be written, initialized with the old one
|
|
||||||
std::map<std::string, pdf_object> trailer;
|
|
||||||
|
|
||||||
std::string& document;
|
|
||||||
pdf_updater(std::string& document) : document(document) {}
|
|
||||||
|
|
||||||
/// Build the cross-reference table and prepare a new trailer dictionary
|
|
||||||
std::string initialize();
|
|
||||||
/// Retrieve an object by its number and generation -- may return NIL or END with an error
|
|
||||||
pdf_object get(uint n, uint generation) const;
|
|
||||||
/// Allocate a new object number
|
|
||||||
uint allocate();
|
|
||||||
/// Append an updated object to the end of the document
|
|
||||||
void update(uint n, std::function<void()> fill);
|
|
||||||
/// Write an updated cross-reference table and trailer
|
|
||||||
void flush_updates();
|
|
||||||
};
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/// If the object is an error, forward its message, otherwise return err.
|
|
||||||
static std::string pdf_error(const pdf_object& o, const char* err) {
|
|
||||||
if (o.type != pdf_object::END || o.string.empty()) return err;
|
|
||||||
return o.string;
|
|
||||||
}
|
|
||||||
|
|
||||||
pdf_object pdf_updater::parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
|
|
||||||
if (stack.size() < 2)
|
|
||||||
return {pdf_object::END, "missing object ID pair"};
|
|
||||||
|
|
||||||
auto g = stack.back(); stack.pop_back();
|
|
||||||
auto n = stack.back(); stack.pop_back();
|
|
||||||
if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
|
|
||||||
|| !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
|
|
||||||
return {pdf_object::END, "invalid object ID pair"};
|
|
||||||
|
|
||||||
pdf_object obj{pdf_object::OBJECT};
|
|
||||||
obj.n = n.number;
|
|
||||||
obj.generation = g.number;
|
|
||||||
|
|
||||||
while (1) {
|
|
||||||
auto object = parse(lex, obj.array);
|
|
||||||
if (object.type == pdf_object::END)
|
|
||||||
return {pdf_object::END, pdf_error(object, "object doesn't end")};
|
|
||||||
if (object.type == pdf_object::KEYWORD && object.string == "endobj")
|
|
||||||
break;
|
|
||||||
obj.array.push_back(std::move(object));
|
|
||||||
}
|
|
||||||
return obj;
|
|
||||||
}
|
|
||||||
|
|
||||||
pdf_object pdf_updater::parse_R(std::vector<pdf_object>& stack) const {
|
|
||||||
if (stack.size() < 2)
|
|
||||||
return {pdf_object::END, "missing reference ID pair"};
|
|
||||||
|
|
||||||
auto g = stack.back(); stack.pop_back();
|
|
||||||
auto n = stack.back(); stack.pop_back();
|
|
||||||
if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
|
|
||||||
|| !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
|
|
||||||
return {pdf_object::END, "invalid reference ID pair"};
|
|
||||||
|
|
||||||
pdf_object ref{pdf_object::REFERENCE};
|
|
||||||
ref.n = n.number;
|
|
||||||
ref.generation = g.number;
|
|
||||||
return ref;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Read an object at the lexer's position. Not a strict parser.
|
|
||||||
pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
|
|
||||||
auto token = lex.next();
|
|
||||||
switch (token.type) {
|
|
||||||
case pdf_object::NL:
|
|
||||||
case pdf_object::COMMENT:
|
|
||||||
// These are not important to parsing, not even for this procedure's needs
|
|
||||||
return parse(lex, stack);
|
|
||||||
case pdf_object::B_ARRAY:
|
|
||||||
{
|
|
||||||
std::vector<pdf_object> array;
|
|
||||||
while (1) {
|
|
||||||
auto object = parse(lex, array);
|
|
||||||
if (object.type == pdf_object::END)
|
|
||||||
return {pdf_object::END, pdf_error(object, "array doesn't end")};
|
|
||||||
if (object.type == pdf_object::E_ARRAY)
|
|
||||||
break;
|
|
||||||
array.push_back(std::move(object));
|
|
||||||
}
|
|
||||||
return array;
|
|
||||||
}
|
|
||||||
case pdf_object::B_DICT:
|
|
||||||
{
|
|
||||||
std::vector<pdf_object> array;
|
|
||||||
while (1) {
|
|
||||||
auto object = parse(lex, array);
|
|
||||||
if (object.type == pdf_object::END)
|
|
||||||
return {pdf_object::END, pdf_error(object, "dictionary doesn't end")};
|
|
||||||
if (object.type == pdf_object::E_DICT)
|
|
||||||
break;
|
|
||||||
array.push_back(std::move(object));
|
|
||||||
}
|
|
||||||
if (array.size() % 2)
|
|
||||||
return {pdf_object::END, "unbalanced dictionary"};
|
|
||||||
std::map<std::string, pdf_object> dict;
|
|
||||||
for (size_t i = 0; i < array.size(); i += 2) {
|
|
||||||
if (array[i].type != pdf_object::NAME)
|
|
||||||
return {pdf_object::END, "invalid dictionary key type"};
|
|
||||||
dict.insert({array[i].string, std::move(array[i + 1])});
|
|
||||||
}
|
|
||||||
return dict;
|
|
||||||
}
|
|
||||||
case pdf_object::KEYWORD:
|
|
||||||
// Appears in the document body, typically needs to access the cross-reference table
|
|
||||||
// TODO use the xref to read /Length etc. once we actually need to read such objects;
|
|
||||||
// presumably streams can use the pdf_object::string member
|
|
||||||
if (token.string == "stream") return {pdf_object::END, "streams are not supported yet"};
|
|
||||||
if (token.string == "obj") return parse_obj(lex, stack);
|
|
||||||
if (token.string == "R") return parse_R(stack);
|
|
||||||
return token;
|
|
||||||
default:
|
|
||||||
return token;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries) {
|
|
||||||
std::vector<pdf_object> throwaway_stack;
|
|
||||||
{
|
|
||||||
auto keyword = parse(lex, throwaway_stack);
|
|
||||||
if (keyword.type != pdf_object::KEYWORD || keyword.string != "xref")
|
|
||||||
return "invalid xref table";
|
|
||||||
}
|
|
||||||
while (1) {
|
|
||||||
auto object = parse(lex, throwaway_stack);
|
|
||||||
if (object.type == pdf_object::END)
|
|
||||||
return pdf_error(object, "unexpected EOF while looking for the trailer");
|
|
||||||
if (object.type == pdf_object::KEYWORD && object.string == "trailer")
|
|
||||||
break;
|
|
||||||
|
|
||||||
auto second = parse(lex, throwaway_stack);
|
|
||||||
if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX
|
|
||||||
|| !second.is_integer() || second.number < 0 || second.number > UINT_MAX)
|
|
||||||
return "invalid xref section header";
|
|
||||||
|
|
||||||
const size_t start = object.number;
|
|
||||||
const size_t count = second.number;
|
|
||||||
for (size_t i = 0; i < count; i++) {
|
|
||||||
auto off = parse(lex, throwaway_stack);
|
|
||||||
auto gen = parse(lex, throwaway_stack);
|
|
||||||
auto key = parse(lex, throwaway_stack);
|
|
||||||
if (!off.is_integer() || off.number < 0 || off.number > document.length()
|
|
||||||
|| !gen.is_integer() || gen.number < 0 || gen.number > 65535
|
|
||||||
|| key.type != pdf_object::KEYWORD)
|
|
||||||
return "invalid xref entry";
|
|
||||||
|
|
||||||
bool free = true;
|
|
||||||
if (key.string == "n")
|
|
||||||
free = false;
|
|
||||||
else if (key.string != "f")
|
|
||||||
return "invalid xref entry";
|
|
||||||
|
|
||||||
auto n = start + i;
|
|
||||||
if (loaded_entries.count(n))
|
|
||||||
continue;
|
|
||||||
if (n >= xref.size())
|
|
||||||
xref.resize(n + 1);
|
|
||||||
loaded_entries.insert(n);
|
|
||||||
|
|
||||||
auto& ref = xref[n];
|
|
||||||
ref.generation = gen.number;
|
|
||||||
ref.offset = off.number;
|
|
||||||
ref.free = free;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
std::string pdf_updater::initialize() {
|
|
||||||
// We only need to look for startxref roughly within the last kibibyte of the document
|
|
||||||
static std::regex haystack_re("[\\s\\S]*\\sstartxref\\s+(\\d+)\\s+%%EOF");
|
|
||||||
std::string haystack = document.substr(document.length() < 1024 ? 0 : document.length() - 1024);
|
|
||||||
|
|
||||||
std::smatch m;
|
|
||||||
if (!std::regex_search(haystack, m, haystack_re, std::regex_constants::match_continuous))
|
|
||||||
return "cannot find startxref";
|
|
||||||
|
|
||||||
size_t xref_offset = std::stoul(m.str(1)), last_xref_offset = xref_offset;
|
|
||||||
std::set<size_t> loaded_xrefs;
|
|
||||||
std::set<uint> loaded_entries;
|
|
||||||
|
|
||||||
std::vector<pdf_object> throwaway_stack;
|
|
||||||
while (1) {
|
|
||||||
if (loaded_xrefs.count(xref_offset))
|
|
||||||
return "circular xref offsets";
|
|
||||||
if (xref_offset >= document.length())
|
|
||||||
return "invalid xref offset";
|
|
||||||
|
|
||||||
pdf_lexer lex(document.c_str() + xref_offset);
|
|
||||||
auto err = load_xref(lex, loaded_entries);
|
|
||||||
if (!err.empty()) return err;
|
|
||||||
|
|
||||||
auto trailer = parse(lex, throwaway_stack);
|
|
||||||
if (trailer.type != pdf_object::DICT)
|
|
||||||
return pdf_error(trailer, "invalid trailer dictionary");
|
|
||||||
if (loaded_xrefs.empty())
|
|
||||||
this->trailer = trailer.dict;
|
|
||||||
loaded_xrefs.insert(xref_offset);
|
|
||||||
|
|
||||||
const auto prev_offset = trailer.dict.find("Prev");
|
|
||||||
if (prev_offset == trailer.dict.end())
|
|
||||||
break;
|
|
||||||
// FIXME we don't check for size_t over or underflow
|
|
||||||
if (!prev_offset->second.is_integer())
|
|
||||||
return "invalid Prev offset";
|
|
||||||
xref_offset = prev_offset->second.number;
|
|
||||||
}
|
|
||||||
|
|
||||||
trailer["Prev"] = {pdf_object::NUMERIC, double(last_xref_offset)};
|
|
||||||
const auto last_size = trailer.find("Size");
|
|
||||||
if (last_size == trailer.end() || !last_size->second.is_integer() ||
|
|
||||||
last_size->second.number <= 0)
|
|
||||||
return "invalid or missing cross-reference table Size";
|
|
||||||
|
|
||||||
xref_size = last_size->second.number;
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
pdf_object pdf_updater::get(uint n, uint generation) const {
|
|
||||||
if (n >= xref_size)
|
|
||||||
return {pdf_object::NIL};
|
|
||||||
|
|
||||||
const auto& ref = xref[n];
|
|
||||||
if (ref.free || ref.generation != generation || ref.offset >= document.length())
|
|
||||||
return {pdf_object::NIL};
|
|
||||||
|
|
||||||
pdf_lexer lex(document.c_str() + ref.offset);
|
|
||||||
std::vector<pdf_object> stack;
|
|
||||||
while (1) {
|
|
||||||
auto object = parse(lex, stack);
|
|
||||||
if (object.type == pdf_object::END)
|
|
||||||
return object;
|
|
||||||
if (object.type != pdf_object::OBJECT)
|
|
||||||
stack.push_back(std::move(object));
|
|
||||||
else if (object.n != n || object.generation != generation)
|
|
||||||
return {pdf_object::END, "object mismatch"};
|
|
||||||
else
|
|
||||||
return std::move(object.array.at(0));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
uint pdf_updater::allocate() {
|
|
||||||
assert(xref_size < UINT_MAX);
|
|
||||||
|
|
||||||
auto n = xref_size++;
|
|
||||||
if (xref.size() < xref_size)
|
|
||||||
xref.resize(xref_size);
|
|
||||||
|
|
||||||
// We don't make sure it gets a subsection in the update yet because we
|
|
||||||
// make no attempts at fixing the linked list of free items either
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
void pdf_updater::update(uint n, std::function<void()> fill) {
|
|
||||||
auto& ref = xref.at(n);
|
|
||||||
ref.offset = document.length() + 1;
|
|
||||||
ref.free = false;
|
|
||||||
updated.insert(n);
|
|
||||||
|
|
||||||
document += ssprintf("\n%u %u obj\n", n, ref.generation);
|
|
||||||
// Separately so that the callback can use document.length() to get the current offset
|
|
||||||
fill();
|
|
||||||
document += "\nendobj";
|
|
||||||
}
|
|
||||||
|
|
||||||
void pdf_updater::flush_updates() {
|
|
||||||
std::map<uint, size_t> groups;
|
|
||||||
for (auto i = updated.cbegin(); i != updated.cend(); ) {
|
|
||||||
size_t start = *i, count = 1;
|
|
||||||
while (++i != updated.cend() && *i == start + count)
|
|
||||||
count++;
|
|
||||||
groups[start] = count;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Taking literally "Each cross-reference section begins with a line containing the keyword xref.
|
|
||||||
// Following this line are one or more cross-reference subsections." from 3.4.3 in PDF Reference
|
|
||||||
if (groups.empty())
|
|
||||||
groups[0] = 0;
|
|
||||||
|
|
||||||
auto startxref = document.length() + 1;
|
|
||||||
document += "\nxref\n";
|
|
||||||
for (const auto& g : groups) {
|
|
||||||
document += ssprintf("%u %zu\n", g.first, g.second);
|
|
||||||
for (size_t i = 0; i < g.second; i++) {
|
|
||||||
auto& ref = xref[g.first + i];
|
|
||||||
document += ssprintf("%010zu %05u %c \n", ref.offset, ref.generation, "nf"[!!ref.free]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
trailer["Size"] = {pdf_object::NUMERIC, double(xref_size)};
|
|
||||||
document += "trailer\n" + pdf_serialize(trailer)
|
|
||||||
+ ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref);
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/// Make a PDF object representing the given point in time
|
|
||||||
static pdf_object pdf_date(time_t timestamp) {
|
|
||||||
struct tm parts;
|
|
||||||
assert(localtime_r(×tamp, &parts));
|
|
||||||
|
|
||||||
char buf[64];
|
|
||||||
assert(strftime(buf, sizeof buf, "D:%Y%m%d%H%M%S", &parts));
|
|
||||||
|
|
||||||
std::string offset = "Z";
|
|
||||||
auto offset_min = parts.tm_gmtoff / 60;
|
|
||||||
if (parts.tm_gmtoff < 0)
|
|
||||||
offset = ssprintf("-%02ld'%02ld'", -offset_min / 60, -offset_min % 60);
|
|
||||||
if (parts.tm_gmtoff > 0)
|
|
||||||
offset = ssprintf("+%02ld'%02ld'", +offset_min / 60, +offset_min % 60);
|
|
||||||
return {pdf_object::STRING, buf + offset};
|
|
||||||
}
|
|
||||||
|
|
||||||
static pdf_object pdf_get_first_page(pdf_updater& pdf, uint node_n, uint node_generation) {
|
|
||||||
auto obj = pdf.get(node_n, node_generation);
|
|
||||||
if (obj.type != pdf_object::DICT)
|
|
||||||
return {pdf_object::NIL};
|
|
||||||
|
|
||||||
// Out of convenience; these aren't filled normally
|
|
||||||
obj.n = node_n;
|
|
||||||
obj.generation = node_generation;
|
|
||||||
|
|
||||||
auto type = obj.dict.find("Type");
|
|
||||||
if (type == obj.dict.end() || type->second.type != pdf_object::NAME)
|
|
||||||
return {pdf_object::NIL};
|
|
||||||
if (type->second.string == "Page")
|
|
||||||
return obj;
|
|
||||||
if (type->second.string != "Pages")
|
|
||||||
return {pdf_object::NIL};
|
|
||||||
|
|
||||||
// XXX technically speaking, this may be an indirect reference. The correct way to solve this
|
|
||||||
// seems to be having "pdf_updater" include a wrapper around "obj.dict.find"
|
|
||||||
auto kids = obj.dict.find("Kids");
|
|
||||||
if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY
|
|
||||||
|| kids->second.array.empty()
|
|
||||||
|| kids->second.array.at(0).type != pdf_object::REFERENCE)
|
|
||||||
return {pdf_object::NIL};
|
|
||||||
|
|
||||||
// XXX nothing prevents us from recursing in an evil circular graph
|
|
||||||
return pdf_get_first_page(pdf, kids->second.array.at(0).n, kids->second.array.at(0).generation);
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
static std::string pkcs12_path, pkcs12_pass;
|
|
||||||
|
|
||||||
// /All/ bytes are checked, except for the signature hexstring itself
|
|
||||||
static std::string pdf_fill_in_signature(std::string& document, size_t sign_off, size_t sign_len) {
|
|
||||||
size_t tail_off = sign_off + sign_len, tail_len = document.size() - tail_off;
|
|
||||||
if (pkcs12_path.empty())
|
|
||||||
return "undefined path to the signing key";
|
|
||||||
|
|
||||||
auto pkcs12_fp = fopen(pkcs12_path.c_str(), "r");
|
|
||||||
if (!pkcs12_fp)
|
|
||||||
return pkcs12_path + ": " + strerror(errno);
|
|
||||||
|
|
||||||
// Abandon hope, all ye who enter OpenSSL! Half of it is undocumented.
|
|
||||||
OpenSSL_add_all_algorithms();
|
|
||||||
ERR_load_crypto_strings();
|
|
||||||
ERR_clear_error();
|
|
||||||
|
|
||||||
PKCS12* p12 = nullptr;
|
|
||||||
EVP_PKEY* private_key = nullptr;
|
|
||||||
X509* certificate = nullptr;
|
|
||||||
STACK_OF(X509)* chain = nullptr;
|
|
||||||
PKCS7* p7 = nullptr;
|
|
||||||
int len = 0, sign_flags = PKCS7_DETACHED | PKCS7_BINARY | PKCS7_NOSMIMECAP | PKCS7_PARTIAL;
|
|
||||||
BIO* p7bio = nullptr;
|
|
||||||
unsigned char* buf = nullptr;
|
|
||||||
|
|
||||||
// OpenSSL error reasons will usually be of more value than any distinction I can come up with
|
|
||||||
std::string err = "OpenSSL failure";
|
|
||||||
|
|
||||||
if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr))
|
|
||||||
|| !PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) {
|
|
||||||
err = pkcs12_path + ": parse failure";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (!private_key || !certificate) {
|
|
||||||
err = pkcs12_path + ": must contain a private key and a valid certificate chain";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
// Prevent useless signatures -- makes pdfsig from poppler happy at least (and NSS by extension)
|
|
||||||
if (!(X509_get_key_usage(certificate) & (KU_DIGITAL_SIGNATURE | KU_NON_REPUDIATION))) {
|
|
||||||
err = "the certificate's key usage must include digital signatures or non-repudiation";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
if (!(X509_get_extended_key_usage(certificate) & (XKU_SMIME | XKU_ANYEKU))) {
|
|
||||||
err = "the certificate's extended key usage must include S/MIME";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
#if 0 // This happily ignores XKU_ANYEKU and I want my tiny world to make a tiny bit more sense
|
|
||||||
if (X509_check_purpose(certificate, X509_PURPOSE_SMIME_SIGN, false /* not a CA certificate */)) {
|
|
||||||
err = "the certificate can't be used for S/MIME digital signatures";
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// The default digest is SHA1, which is mildly insecure now -- hence using PKCS7_sign_add_signer
|
|
||||||
if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags))
|
|
||||||
|| !PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags))
|
|
||||||
goto error;
|
|
||||||
// For RFC 3161, this is roughly how a timestamp token would be attached (see Appendix A):
|
|
||||||
// PKCS7_add_attribute(signer_info, NID_id_smime_aa_timeStampToken, V_ASN1_SEQUENCE, value)
|
|
||||||
for (int i = 0; i < sk_X509_num(chain); i++)
|
|
||||||
if (!PKCS7_add_certificate(p7, sk_X509_value(chain, i)))
|
|
||||||
goto error;
|
|
||||||
|
|
||||||
// Adaptation of the innards of the undocumented PKCS7_final() -- I didn't feel like making
|
|
||||||
// a copy of the whole document. Hopefully this writes directly into a digest BIO.
|
|
||||||
if (!(p7bio = PKCS7_dataInit(p7, nullptr))
|
|
||||||
|| (ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off)
|
|
||||||
|| (ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len)
|
|
||||||
|| BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio))
|
|
||||||
goto error;
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
{
|
|
||||||
// Debugging: openssl cms -inform PEM -in pdf_signature.pem -noout -cmsout -print
|
|
||||||
// Context: https://stackoverflow.com/a/29253469
|
|
||||||
auto fp = fopen("pdf_signature.pem", "wb");
|
|
||||||
assert(PEM_write_PKCS7(fp, p7) && !fclose(fp));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if ((len = i2d_PKCS7(p7, &buf)) < 0)
|
|
||||||
goto error;
|
|
||||||
if (size_t(len) * 2 > sign_len - 2 /* hexstring quotes */) {
|
|
||||||
// The obvious solution is to increase the allocation... or spend a week reading specifications
|
|
||||||
// while losing all faith in humanity as a species, and skip the PKCS7 API entirely
|
|
||||||
err = ssprintf("not enough space reserved for the signature (%zu nibbles vs %zu nibbles)",
|
|
||||||
sign_len - 2, size_t(len) * 2);
|
|
||||||
goto error;
|
|
||||||
}
|
|
||||||
for (int i = 0; i < len; i++) {
|
|
||||||
document[sign_off + 2 * i + 1] = "0123456789abcdef"[buf[i] / 16];
|
|
||||||
document[sign_off + 2 * i + 2] = "0123456789abcdef"[buf[i] % 16];
|
|
||||||
}
|
|
||||||
err.clear();
|
|
||||||
|
|
||||||
error:
|
|
||||||
OPENSSL_free(buf);
|
|
||||||
BIO_free_all(p7bio);
|
|
||||||
PKCS7_free(p7);
|
|
||||||
sk_X509_pop_free(chain, X509_free);
|
|
||||||
X509_free(certificate);
|
|
||||||
EVP_PKEY_free(private_key);
|
|
||||||
PKCS12_free(p12);
|
|
||||||
|
|
||||||
// In any case, clear the error stack (it's a queue, really) to avoid confusion elsewhere
|
|
||||||
while (auto code = ERR_get_error())
|
|
||||||
if (auto reason = ERR_reason_error_string(code))
|
|
||||||
err = err + "; " + reason;
|
|
||||||
|
|
||||||
fclose(pkcs12_fp);
|
|
||||||
return err;
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/// The presumption here is that the document is valid and that it doesn't employ cross-reference
|
|
||||||
/// streams from PDF 1.5, or at least constitutes a hybrid-reference file. The results with
|
|
||||||
/// PDF 2.0 (2017) are currently unknown as the standard costs money.
|
|
||||||
///
|
|
||||||
/// Carelessly assumes that the version of the original document is at most PDF 1.6.
|
|
||||||
///
|
|
||||||
/// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
|
|
||||||
/// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
|
|
||||||
/// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
|
|
||||||
static std::string pdf_sign(std::string& document) {
|
|
||||||
pdf_updater pdf(document);
|
|
||||||
auto err = pdf.initialize();
|
|
||||||
if (!err.empty())
|
|
||||||
return err;
|
|
||||||
|
|
||||||
auto root_ref = pdf.trailer.find("Root");
|
|
||||||
if (root_ref == pdf.trailer.end() || root_ref->second.type != pdf_object::REFERENCE)
|
|
||||||
return "trailer does not contain a reference to Root";
|
|
||||||
auto root = pdf.get(root_ref->second.n, root_ref->second.generation);
|
|
||||||
if (root.type != pdf_object::DICT)
|
|
||||||
return "invalid Root dictionary reference";
|
|
||||||
|
|
||||||
// 8.7 Digital Signatures - /signature dictionary/
|
|
||||||
auto sigdict_n = pdf.allocate();
|
|
||||||
size_t byterange_off = 0, byterange_len = 0, sign_off = 0, sign_len = 0;
|
|
||||||
pdf.update(sigdict_n, [&]{
|
|
||||||
// The timestamp is important for Adobe Acrobat Reader DC. The ideal would be to use RFC 3161.
|
|
||||||
pdf.document.append("<< /Type/Sig /Filter/Adobe.PPKLite /SubFilter/adbe.pkcs7.detached\n"
|
|
||||||
" /M" + pdf_serialize(pdf_date(time(nullptr))) + " /ByteRange ");
|
|
||||||
byterange_off = pdf.document.size();
|
|
||||||
pdf.document.append((byterange_len = 32 /* fine for a gigabyte */), ' ');
|
|
||||||
pdf.document.append("\n /Contents <");
|
|
||||||
sign_off = pdf.document.size();
|
|
||||||
pdf.document.append((sign_len = 8192 /* certificate, digest, encrypted digest, ... */), '0');
|
|
||||||
pdf.document.append("> >>");
|
|
||||||
|
|
||||||
// We actually need to exclude the hexstring quotes from signing
|
|
||||||
sign_off -= 1;
|
|
||||||
sign_len += 2;
|
|
||||||
});
|
|
||||||
|
|
||||||
// 8.6.3 Field Types - Signature Fields
|
|
||||||
pdf_object sigfield{pdf_object::DICT};
|
|
||||||
sigfield.dict.insert({"FT", {pdf_object::NAME, "Sig"}});
|
|
||||||
sigfield.dict.insert({"V", {pdf_object::REFERENCE, sigdict_n, 0}});
|
|
||||||
// 8.4.5 Annotations Types - Widget Annotations
|
|
||||||
// We can merge the Signature Annotation and omit Kids here
|
|
||||||
sigfield.dict.insert({"Subtype", {pdf_object::NAME, "Widget"}});
|
|
||||||
sigfield.dict.insert({"F", {pdf_object::NUMERIC, 2 /* Hidden */}});
|
|
||||||
sigfield.dict.insert({"T", {pdf_object::STRING, "Signature1"}});
|
|
||||||
sigfield.dict.insert({"Rect", {std::vector<pdf_object>{
|
|
||||||
{pdf_object::NUMERIC, 0},
|
|
||||||
{pdf_object::NUMERIC, 0},
|
|
||||||
{pdf_object::NUMERIC, 0},
|
|
||||||
{pdf_object::NUMERIC, 0},
|
|
||||||
}}});
|
|
||||||
|
|
||||||
auto sigfield_n = pdf.allocate();
|
|
||||||
pdf.update(sigfield_n, [&]{ pdf.document += pdf_serialize(sigfield); });
|
|
||||||
|
|
||||||
auto pages_ref = root.dict.find("Pages");
|
|
||||||
if (pages_ref == root.dict.end() || pages_ref->second.type != pdf_object::REFERENCE)
|
|
||||||
return "invalid Pages reference";
|
|
||||||
auto page = pdf_get_first_page(pdf, pages_ref->second.n, pages_ref->second.generation);
|
|
||||||
if (page.type != pdf_object::DICT)
|
|
||||||
return "invalid or unsupported page tree";
|
|
||||||
|
|
||||||
// XXX assuming this won't be an indirectly referenced array
|
|
||||||
auto& annots = page.dict["Annots"];
|
|
||||||
if (annots.type != pdf_object::ARRAY)
|
|
||||||
annots = {pdf_object::ARRAY};
|
|
||||||
annots.array.emplace_back(pdf_object::REFERENCE, sigfield_n, 0);
|
|
||||||
pdf.update(page.n, [&]{ pdf.document += pdf_serialize(page); });
|
|
||||||
|
|
||||||
// 8.6.1 Interactive Form Dictionary
|
|
||||||
// XXX assuming there are no forms already, overwriting everything
|
|
||||||
root.dict["AcroForm"] = {std::map<std::string, pdf_object>{
|
|
||||||
{"Fields", {std::vector<pdf_object>{
|
|
||||||
{pdf_object::REFERENCE, sigfield_n, 0}
|
|
||||||
}}},
|
|
||||||
{"SigFlags", {pdf_object::NUMERIC, 3 /* SignaturesExist | AppendOnly */}}
|
|
||||||
}};
|
|
||||||
|
|
||||||
// Upgrade the document version for SHA-256 etc.
|
|
||||||
// XXX assuming that it's not newer than 1.6 already -- while Cairo can't currently use a newer
|
|
||||||
// version that 1.5, it's not a bad idea to use cairo_pdf_surface_restrict_to_version()
|
|
||||||
root.dict["Version"] = {pdf_object::NAME, "1.6"};
|
|
||||||
pdf.update(root_ref->second.n, [&]{ pdf.document += pdf_serialize(root); });
|
|
||||||
pdf.flush_updates();
|
|
||||||
|
|
||||||
// Now that we know the length of everything, store byte ranges of what we're about to sign,
|
|
||||||
// which must be everything but the resulting signature itself
|
|
||||||
size_t tail_off = sign_off + sign_len, tail_len = pdf.document.size() - tail_off;
|
|
||||||
auto ranges = ssprintf("[0 %zu %zu %zu]", sign_off, tail_off, tail_len);
|
|
||||||
if (ranges.length() > byterange_len)
|
|
||||||
return "not enough space reserved for /ByteRange";
|
|
||||||
pdf.document.replace(byterange_off, std::min(ranges.length(), byterange_len), ranges);
|
|
||||||
return pdf_fill_in_signature(pdf.document, sign_off, sign_len);
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
__attribute__((format(printf, 2, 3)))
|
__attribute__((format(printf, 2, 3)))
|
||||||
static void die(int status, const char* format, ...) {
|
static void die(int status, const char* format, ...) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
va_start(ap, format);
|
va_start(ap, format);
|
||||||
if (isatty(fileno(stderr)))
|
if (isatty(fileno(stderr)))
|
||||||
vfprintf(stderr, ssprintf("\x1b[31m%s\x1b[0m\n", format).c_str(), ap);
|
vfprintf(stderr, ("\x1b[31m" + std::string(format) + "\x1b[0m\n").c_str(), ap);
|
||||||
else
|
else
|
||||||
vfprintf(stderr, format, ap);
|
vfprintf(stderr, format, ap);
|
||||||
va_end(ap);
|
va_end(ap);
|
||||||
@@ -949,8 +72,6 @@ int main(int argc, char* argv[]) {
|
|||||||
|
|
||||||
const char* input_path = argv[0];
|
const char* input_path = argv[0];
|
||||||
const char* output_path = argv[1];
|
const char* output_path = argv[1];
|
||||||
pkcs12_path = argv[2];
|
|
||||||
pkcs12_pass = argv[3];
|
|
||||||
|
|
||||||
std::string pdf_document;
|
std::string pdf_document;
|
||||||
if (auto fp = fopen(input_path, "rb")) {
|
if (auto fp = fopen(input_path, "rb")) {
|
||||||
@@ -964,7 +85,7 @@ int main(int argc, char* argv[]) {
|
|||||||
die(1, "%s: %s", input_path, strerror(errno));
|
die(1, "%s: %s", input_path, strerror(errno));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto err = pdf_sign(pdf_document);
|
auto err = pdf_simple_sign(pdf_document, argv[2], argv[3]);
|
||||||
if (!err.empty()) {
|
if (!err.empty()) {
|
||||||
die(2, "Error: %s", err.c_str());
|
die(2, "Error: %s", err.c_str());
|
||||||
}
|
}
|
||||||
|
|||||||
28
pdf-simple-sign.h
Normal file
28
pdf-simple-sign.h
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
// vim: set sw=2 ts=2 sts=2 et tw=100:
|
||||||
|
//
|
||||||
|
// pdf-simple-sign: simple PDF signer
|
||||||
|
//
|
||||||
|
// Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
|
||||||
|
//
|
||||||
|
// Permission to use, copy, modify, and/or distribute this software for any
|
||||||
|
// purpose with or without fee is hereby granted.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||||
|
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||||
|
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||||
|
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
||||||
|
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||||
|
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
//
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
/// Sign basic PDF documents, as generated by e.g. Cairo, using the key-certificate pair
|
||||||
|
/// stored in the PKCS#12 file named `pkcs12_path`, with password `pkcs12_pass`.
|
||||||
|
/// Returns a non-empty error string on failure.
|
||||||
|
std::string pdf_simple_sign(std::string& document,
|
||||||
|
const std::string& pkcs12_path,
|
||||||
|
const std::string& pkcs12_pass);
|
||||||
906
pdf.cpp
Normal file
906
pdf.cpp
Normal file
@@ -0,0 +1,906 @@
|
|||||||
|
// vim: set sw=2 ts=2 sts=2 et tw=100:
|
||||||
|
//
|
||||||
|
// pdf-simple-sign: simple PDF signer
|
||||||
|
//
|
||||||
|
// Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
|
||||||
|
//
|
||||||
|
// Permission to use, copy, modify, and/or distribute this software for any
|
||||||
|
// purpose with or without fee is hereby granted.
|
||||||
|
//
|
||||||
|
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||||
|
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||||
|
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||||
|
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
||||||
|
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||||
|
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#undef NDEBUG
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <regex>
|
||||||
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
|
|
||||||
|
#if defined __GLIBCXX__ && __GLIBCXX__ < 20140422
|
||||||
|
#error Need libstdc++ >= 4.9 for <regex>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <openssl/err.h>
|
||||||
|
#include <openssl/x509v3.h>
|
||||||
|
#include <openssl/pkcs12.h>
|
||||||
|
|
||||||
|
#include "pdf-simple-sign.h"
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
using uint = unsigned int;
|
||||||
|
|
||||||
|
static std::string concatenate(const std::vector<std::string>& v, const std::string& delim) {
|
||||||
|
std::string res;
|
||||||
|
if (v.empty())
|
||||||
|
return res;
|
||||||
|
for (const auto& s : v)
|
||||||
|
res += s + delim;
|
||||||
|
return res.substr(0, res.length() - delim.length());
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename... Args>
|
||||||
|
static std::string ssprintf(const std::string& format, Args... args) {
|
||||||
|
size_t size = std::snprintf(nullptr, 0, format.c_str(), args... ) + 1;
|
||||||
|
std::unique_ptr<char[]> buf(new char[size]);
|
||||||
|
std::snprintf(buf.get(), size, format.c_str(), args...);
|
||||||
|
return std::string(buf.get(), buf.get() + size - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// PDF token/object thingy. Objects may be composed either from one or a sequence of tokens.
|
||||||
|
/// The PDF Reference doesn't actually speak of tokens.
|
||||||
|
struct pdf_object {
|
||||||
|
enum type {
|
||||||
|
END, NL, COMMENT, NIL, BOOL, NUMERIC, KEYWORD, NAME, STRING,
|
||||||
|
// Simple tokens
|
||||||
|
B_ARRAY, E_ARRAY, B_DICT, E_DICT,
|
||||||
|
// Higher-level objects
|
||||||
|
ARRAY, DICT, OBJECT, REFERENCE,
|
||||||
|
} type = END;
|
||||||
|
|
||||||
|
std::string string; ///< END (error message), COMMENT/KEYWORD/NAME/STRING
|
||||||
|
double number = 0.; ///< BOOL, NUMERIC
|
||||||
|
std::vector<pdf_object> array; ///< ARRAY, OBJECT
|
||||||
|
std::map<std::string, pdf_object> dict; ///< DICT, in the future also STREAM
|
||||||
|
uint n = 0, generation = 0; ///< OBJECT, REFERENCE
|
||||||
|
|
||||||
|
pdf_object(enum type type = END) : type(type) {}
|
||||||
|
pdf_object(enum type type, double v) : type(type), number(v) {}
|
||||||
|
pdf_object(enum type type, const std::string& v) : type(type), string(v) {}
|
||||||
|
pdf_object(enum type type, uint n, uint g) : type(type), n(n), generation(g) {}
|
||||||
|
pdf_object(const std::vector<pdf_object>& array) : type(ARRAY), array(array) {}
|
||||||
|
pdf_object(const std::map<std::string, pdf_object>& dict) : type(DICT), dict(dict) {}
|
||||||
|
|
||||||
|
pdf_object(const pdf_object&) = default;
|
||||||
|
pdf_object(pdf_object&&) = default;
|
||||||
|
pdf_object& operator=(const pdf_object&) = default;
|
||||||
|
pdf_object& operator=(pdf_object&&) = default;
|
||||||
|
|
||||||
|
/// Return whether this is a number without a fractional part
|
||||||
|
bool is_integer() const {
|
||||||
|
double tmp;
|
||||||
|
return type == NUMERIC && std::modf(number, &tmp) == 0.;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Basic lexical analyser for the Portable Document Format, giving limited error information
|
||||||
|
struct pdf_lexer {
|
||||||
|
const unsigned char* p;
|
||||||
|
pdf_lexer(const char* s) : p(reinterpret_cast<const unsigned char*>(s)) {}
|
||||||
|
|
||||||
|
static constexpr const char* oct_alphabet = "01234567";
|
||||||
|
static constexpr const char* dec_alphabet = "0123456789";
|
||||||
|
static constexpr const char* hex_alphabet = "0123456789abcdefABCDEF";
|
||||||
|
static constexpr const char* whitespace = "\t\n\f\r ";
|
||||||
|
static constexpr const char* delimiters = "()<>[]{}/%";
|
||||||
|
|
||||||
|
bool eat_newline(int ch) {
|
||||||
|
if (ch == '\r') {
|
||||||
|
if (*p == '\n') p++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return ch == '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf_object string() {
|
||||||
|
std::string value;
|
||||||
|
int parens = 1;
|
||||||
|
while (1) {
|
||||||
|
if (!*p) return {pdf_object::END, "unexpected end of string"};
|
||||||
|
auto ch = *p++;
|
||||||
|
if (eat_newline(ch)) ch = '\n';
|
||||||
|
else if (ch == '(') { parens++; }
|
||||||
|
else if (ch == ')') { if (!--parens) break; }
|
||||||
|
else if (ch == '\\') {
|
||||||
|
if (!*p) return {pdf_object::END, "unexpected end of string"};
|
||||||
|
switch ((ch = *p++)) {
|
||||||
|
case 'n': ch = '\n'; break;
|
||||||
|
case 'r': ch = '\r'; break;
|
||||||
|
case 't': ch = '\t'; break;
|
||||||
|
case 'b': ch = '\b'; break;
|
||||||
|
case 'f': ch = '\f'; break;
|
||||||
|
default:
|
||||||
|
if (eat_newline(ch))
|
||||||
|
continue;
|
||||||
|
std::string octal;
|
||||||
|
if (ch && strchr(oct_alphabet, ch)) {
|
||||||
|
octal += ch;
|
||||||
|
if (*p && strchr(oct_alphabet, *p)) octal += *p++;
|
||||||
|
if (*p && strchr(oct_alphabet, *p)) octal += *p++;
|
||||||
|
ch = std::stoi(octal, nullptr, 8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
value += ch;
|
||||||
|
}
|
||||||
|
return {pdf_object::STRING, value};
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf_object string_hex() {
|
||||||
|
std::string value, buf;
|
||||||
|
while (*p != '>') {
|
||||||
|
if (!*p) return {pdf_object::END, "unexpected end of hex string"};
|
||||||
|
if (!strchr(hex_alphabet, *p))
|
||||||
|
return {pdf_object::END, "invalid hex string"};
|
||||||
|
buf += *p++;
|
||||||
|
if (buf.size() == 2) {
|
||||||
|
value += char(std::stoi(buf, nullptr, 16));
|
||||||
|
buf.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p++;
|
||||||
|
if (!buf.empty()) value += char(std::stoi(buf + '0', nullptr, 16));
|
||||||
|
return {pdf_object::STRING, value};
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf_object name() {
|
||||||
|
std::string value;
|
||||||
|
while (!strchr(whitespace, *p) && !strchr(delimiters, *p)) {
|
||||||
|
auto ch = *p++;
|
||||||
|
if (ch == '#') {
|
||||||
|
std::string hexa;
|
||||||
|
if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
|
||||||
|
if (*p && strchr(hex_alphabet, *p)) hexa += *p++;
|
||||||
|
if (hexa.size() != 2)
|
||||||
|
return {pdf_object::END, "invalid name hexa escape"};
|
||||||
|
ch = char(std::stoi(hexa, nullptr, 16));
|
||||||
|
}
|
||||||
|
value += ch;
|
||||||
|
}
|
||||||
|
if (value.empty()) return {pdf_object::END, "unexpected end of name"};
|
||||||
|
return {pdf_object::NAME, value};
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf_object comment() {
|
||||||
|
std::string value;
|
||||||
|
while (*p && *p != '\r' && *p != '\n')
|
||||||
|
value += *p++;
|
||||||
|
return {pdf_object::COMMENT, value};
|
||||||
|
}
|
||||||
|
|
||||||
|
// XXX maybe invalid numbers should rather be interpreted as keywords
|
||||||
|
pdf_object number() {
|
||||||
|
std::string value;
|
||||||
|
if (*p == '-')
|
||||||
|
value += *p++;
|
||||||
|
bool real = false, digits = false;
|
||||||
|
while (*p) {
|
||||||
|
if (strchr(dec_alphabet, *p))
|
||||||
|
digits = true;
|
||||||
|
else if (*p == '.' && !real)
|
||||||
|
real = true;
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
value += *p++;
|
||||||
|
}
|
||||||
|
if (!digits) return {pdf_object::END, "invalid number"};
|
||||||
|
return {pdf_object::NUMERIC, std::stod(value, nullptr)};
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf_object next() {
|
||||||
|
if (!*p)
|
||||||
|
return {pdf_object::END};
|
||||||
|
if (strchr("-0123456789.", *p))
|
||||||
|
return number();
|
||||||
|
|
||||||
|
// {} end up being keywords, we might want to error out on those
|
||||||
|
std::string value;
|
||||||
|
while (!strchr(whitespace, *p) && !strchr(delimiters, *p))
|
||||||
|
value += *p++;
|
||||||
|
if (!value.empty()) {
|
||||||
|
if (value == "null") return {pdf_object::NIL};
|
||||||
|
if (value == "true") return {pdf_object::BOOL, 1};
|
||||||
|
if (value == "false") return {pdf_object::BOOL, 0};
|
||||||
|
return {pdf_object::KEYWORD, value};
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (char ch = *p++) {
|
||||||
|
case '/': return name();
|
||||||
|
case '%': return comment();
|
||||||
|
case '(': return string();
|
||||||
|
case '[': return {pdf_object::B_ARRAY};
|
||||||
|
case ']': return {pdf_object::E_ARRAY};
|
||||||
|
case '<':
|
||||||
|
if (*p++ == '<')
|
||||||
|
return {pdf_object::B_DICT};
|
||||||
|
p--;
|
||||||
|
return string_hex();
|
||||||
|
case '>':
|
||||||
|
if (*p++ == '>')
|
||||||
|
return {pdf_object::E_DICT};
|
||||||
|
p--;
|
||||||
|
return {pdf_object::END, "unexpected '>'"};
|
||||||
|
default:
|
||||||
|
if (eat_newline(ch))
|
||||||
|
return {pdf_object::NL};
|
||||||
|
if (strchr(whitespace, ch))
|
||||||
|
return next();
|
||||||
|
return {pdf_object::END, "unexpected input"};
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// FIXME lines /should not/ be longer than 255 characters, some wrapping is in order
|
||||||
|
static std::string pdf_serialize(const pdf_object& o) {
|
||||||
|
switch (o.type) {
|
||||||
|
case pdf_object::NL: return "\n";
|
||||||
|
case pdf_object::NIL: return "null";
|
||||||
|
case pdf_object::BOOL: return o.number ? "true" : "false";
|
||||||
|
case pdf_object::NUMERIC:
|
||||||
|
{
|
||||||
|
if (o.is_integer()) return std::to_string((long long) o.number);
|
||||||
|
return std::to_string(o.number);
|
||||||
|
}
|
||||||
|
case pdf_object::KEYWORD: return o.string;
|
||||||
|
case pdf_object::NAME:
|
||||||
|
{
|
||||||
|
std::string escaped = "/";
|
||||||
|
for (char c : o.string) {
|
||||||
|
if (c == '#' || strchr(pdf_lexer::delimiters, c) || strchr(pdf_lexer::whitespace, c))
|
||||||
|
escaped += ssprintf("#%02x", c);
|
||||||
|
else
|
||||||
|
escaped += c;
|
||||||
|
}
|
||||||
|
return escaped;
|
||||||
|
}
|
||||||
|
case pdf_object::STRING:
|
||||||
|
{
|
||||||
|
std::string escaped;
|
||||||
|
for (char c : o.string) {
|
||||||
|
if (c == '\\' || c == '(' || c == ')')
|
||||||
|
escaped += '\\';
|
||||||
|
escaped += c;
|
||||||
|
}
|
||||||
|
return "(" + escaped + ")";
|
||||||
|
}
|
||||||
|
case pdf_object::B_ARRAY: return "[";
|
||||||
|
case pdf_object::E_ARRAY: return "]";
|
||||||
|
case pdf_object::B_DICT: return "<<";
|
||||||
|
case pdf_object::E_DICT: return ">>";
|
||||||
|
case pdf_object::ARRAY:
|
||||||
|
{
|
||||||
|
std::vector<std::string> v;
|
||||||
|
for (const auto& i : o.array)
|
||||||
|
v.push_back(pdf_serialize(i));
|
||||||
|
return "[ " + concatenate(v, " ") + " ]";
|
||||||
|
}
|
||||||
|
case pdf_object::DICT:
|
||||||
|
{
|
||||||
|
std::string s;
|
||||||
|
for (const auto& i : o.dict)
|
||||||
|
// FIXME the key is also supposed to be escaped by pdf_serialize()
|
||||||
|
s += " /" + i.first + " " + pdf_serialize(i.second);
|
||||||
|
return "<<" + s + " >>";
|
||||||
|
}
|
||||||
|
case pdf_object::OBJECT:
|
||||||
|
return ssprintf("%u %u obj\n", o.n, o.generation) + pdf_serialize(o.array.at(0)) + "\nendobj";
|
||||||
|
case pdf_object::REFERENCE:
|
||||||
|
return ssprintf("%u %u R", o.n, o.generation);
|
||||||
|
default:
|
||||||
|
assert(!"unsupported token for serialization");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Utility class to help read and possibly incrementally update PDF files
|
||||||
|
class pdf_updater {
|
||||||
|
struct ref {
|
||||||
|
size_t offset = 0; ///< File offset or N of the next free entry
|
||||||
|
uint generation = 0; ///< Object generation
|
||||||
|
bool free = true; ///< Whether this N has been deleted
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<ref> xref; ///< Cross-reference table
|
||||||
|
size_t xref_size = 0; ///< Current cross-reference table size, correlated to xref.size()
|
||||||
|
std::set<uint> updated; ///< List of updated objects
|
||||||
|
|
||||||
|
pdf_object parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
|
||||||
|
pdf_object parse_R(std::vector<pdf_object>& stack) const;
|
||||||
|
pdf_object parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const;
|
||||||
|
std::string load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries);
|
||||||
|
|
||||||
|
public:
|
||||||
|
/// The new trailer dictionary to be written, initialized with the old one
|
||||||
|
std::map<std::string, pdf_object> trailer;
|
||||||
|
|
||||||
|
std::string& document;
|
||||||
|
pdf_updater(std::string& document) : document(document) {}
|
||||||
|
|
||||||
|
/// Build the cross-reference table and prepare a new trailer dictionary
|
||||||
|
std::string initialize();
|
||||||
|
/// Retrieve an object by its number and generation -- may return NIL or END with an error
|
||||||
|
pdf_object get(uint n, uint generation) const;
|
||||||
|
/// Allocate a new object number
|
||||||
|
uint allocate();
|
||||||
|
/// Append an updated object to the end of the document
|
||||||
|
void update(uint n, std::function<void()> fill);
|
||||||
|
/// Write an updated cross-reference table and trailer
|
||||||
|
void flush_updates();
|
||||||
|
};
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// If the object is an error, forward its message, otherwise return err.
|
||||||
|
static std::string pdf_error(const pdf_object& o, const char* err) {
|
||||||
|
if (o.type != pdf_object::END || o.string.empty()) return err;
|
||||||
|
return o.string;
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf_object pdf_updater::parse_obj(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
|
||||||
|
if (stack.size() < 2)
|
||||||
|
return {pdf_object::END, "missing object ID pair"};
|
||||||
|
|
||||||
|
auto g = stack.back(); stack.pop_back();
|
||||||
|
auto n = stack.back(); stack.pop_back();
|
||||||
|
if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
|
||||||
|
|| !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
|
||||||
|
return {pdf_object::END, "invalid object ID pair"};
|
||||||
|
|
||||||
|
pdf_object obj{pdf_object::OBJECT};
|
||||||
|
obj.n = n.number;
|
||||||
|
obj.generation = g.number;
|
||||||
|
|
||||||
|
while (1) {
|
||||||
|
auto object = parse(lex, obj.array);
|
||||||
|
if (object.type == pdf_object::END)
|
||||||
|
return {pdf_object::END, pdf_error(object, "object doesn't end")};
|
||||||
|
if (object.type == pdf_object::KEYWORD && object.string == "endobj")
|
||||||
|
break;
|
||||||
|
obj.array.push_back(std::move(object));
|
||||||
|
}
|
||||||
|
return obj;
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf_object pdf_updater::parse_R(std::vector<pdf_object>& stack) const {
|
||||||
|
if (stack.size() < 2)
|
||||||
|
return {pdf_object::END, "missing reference ID pair"};
|
||||||
|
|
||||||
|
auto g = stack.back(); stack.pop_back();
|
||||||
|
auto n = stack.back(); stack.pop_back();
|
||||||
|
if (!g.is_integer() || g.number < 0 || g.number > UINT_MAX
|
||||||
|
|| !n.is_integer() || n.number < 0 || n.number > UINT_MAX)
|
||||||
|
return {pdf_object::END, "invalid reference ID pair"};
|
||||||
|
|
||||||
|
pdf_object ref{pdf_object::REFERENCE};
|
||||||
|
ref.n = n.number;
|
||||||
|
ref.generation = g.number;
|
||||||
|
return ref;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read an object at the lexer's position. Not a strict parser.
|
||||||
|
pdf_object pdf_updater::parse(pdf_lexer& lex, std::vector<pdf_object>& stack) const {
|
||||||
|
auto token = lex.next();
|
||||||
|
switch (token.type) {
|
||||||
|
case pdf_object::NL:
|
||||||
|
case pdf_object::COMMENT:
|
||||||
|
// These are not important to parsing, not even for this procedure's needs
|
||||||
|
return parse(lex, stack);
|
||||||
|
case pdf_object::B_ARRAY:
|
||||||
|
{
|
||||||
|
std::vector<pdf_object> array;
|
||||||
|
while (1) {
|
||||||
|
auto object = parse(lex, array);
|
||||||
|
if (object.type == pdf_object::END)
|
||||||
|
return {pdf_object::END, pdf_error(object, "array doesn't end")};
|
||||||
|
if (object.type == pdf_object::E_ARRAY)
|
||||||
|
break;
|
||||||
|
array.push_back(std::move(object));
|
||||||
|
}
|
||||||
|
return array;
|
||||||
|
}
|
||||||
|
case pdf_object::B_DICT:
|
||||||
|
{
|
||||||
|
std::vector<pdf_object> array;
|
||||||
|
while (1) {
|
||||||
|
auto object = parse(lex, array);
|
||||||
|
if (object.type == pdf_object::END)
|
||||||
|
return {pdf_object::END, pdf_error(object, "dictionary doesn't end")};
|
||||||
|
if (object.type == pdf_object::E_DICT)
|
||||||
|
break;
|
||||||
|
array.push_back(std::move(object));
|
||||||
|
}
|
||||||
|
if (array.size() % 2)
|
||||||
|
return {pdf_object::END, "unbalanced dictionary"};
|
||||||
|
std::map<std::string, pdf_object> dict;
|
||||||
|
for (size_t i = 0; i < array.size(); i += 2) {
|
||||||
|
if (array[i].type != pdf_object::NAME)
|
||||||
|
return {pdf_object::END, "invalid dictionary key type"};
|
||||||
|
dict.insert({array[i].string, std::move(array[i + 1])});
|
||||||
|
}
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
case pdf_object::KEYWORD:
|
||||||
|
// Appears in the document body, typically needs to access the cross-reference table
|
||||||
|
// TODO use the xref to read /Length etc. once we actually need to read such objects;
|
||||||
|
// presumably streams can use the pdf_object::string member
|
||||||
|
if (token.string == "stream") return {pdf_object::END, "streams are not supported yet"};
|
||||||
|
if (token.string == "obj") return parse_obj(lex, stack);
|
||||||
|
if (token.string == "R") return parse_R(stack);
|
||||||
|
return token;
|
||||||
|
default:
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string pdf_updater::load_xref(pdf_lexer& lex, std::set<uint>& loaded_entries) {
|
||||||
|
std::vector<pdf_object> throwaway_stack;
|
||||||
|
{
|
||||||
|
auto keyword = parse(lex, throwaway_stack);
|
||||||
|
if (keyword.type != pdf_object::KEYWORD || keyword.string != "xref")
|
||||||
|
return "invalid xref table";
|
||||||
|
}
|
||||||
|
while (1) {
|
||||||
|
auto object = parse(lex, throwaway_stack);
|
||||||
|
if (object.type == pdf_object::END)
|
||||||
|
return pdf_error(object, "unexpected EOF while looking for the trailer");
|
||||||
|
if (object.type == pdf_object::KEYWORD && object.string == "trailer")
|
||||||
|
break;
|
||||||
|
|
||||||
|
auto second = parse(lex, throwaway_stack);
|
||||||
|
if (!object.is_integer() || object.number < 0 || object.number > UINT_MAX
|
||||||
|
|| !second.is_integer() || second.number < 0 || second.number > UINT_MAX)
|
||||||
|
return "invalid xref section header";
|
||||||
|
|
||||||
|
const size_t start = object.number;
|
||||||
|
const size_t count = second.number;
|
||||||
|
for (size_t i = 0; i < count; i++) {
|
||||||
|
auto off = parse(lex, throwaway_stack);
|
||||||
|
auto gen = parse(lex, throwaway_stack);
|
||||||
|
auto key = parse(lex, throwaway_stack);
|
||||||
|
if (!off.is_integer() || off.number < 0 || off.number > document.length()
|
||||||
|
|| !gen.is_integer() || gen.number < 0 || gen.number > 65535
|
||||||
|
|| key.type != pdf_object::KEYWORD)
|
||||||
|
return "invalid xref entry";
|
||||||
|
|
||||||
|
bool free = true;
|
||||||
|
if (key.string == "n")
|
||||||
|
free = false;
|
||||||
|
else if (key.string != "f")
|
||||||
|
return "invalid xref entry";
|
||||||
|
|
||||||
|
auto n = start + i;
|
||||||
|
if (loaded_entries.count(n))
|
||||||
|
continue;
|
||||||
|
if (n >= xref.size())
|
||||||
|
xref.resize(n + 1);
|
||||||
|
loaded_entries.insert(n);
|
||||||
|
|
||||||
|
auto& ref = xref[n];
|
||||||
|
ref.generation = gen.number;
|
||||||
|
ref.offset = off.number;
|
||||||
|
ref.free = free;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
std::string pdf_updater::initialize() {
|
||||||
|
// We only need to look for startxref roughly within the last kibibyte of the document
|
||||||
|
static std::regex haystack_re("[\\s\\S]*\\sstartxref\\s+(\\d+)\\s+%%EOF");
|
||||||
|
std::string haystack = document.substr(document.length() < 1024 ? 0 : document.length() - 1024);
|
||||||
|
|
||||||
|
std::smatch m;
|
||||||
|
if (!std::regex_search(haystack, m, haystack_re, std::regex_constants::match_continuous))
|
||||||
|
return "cannot find startxref";
|
||||||
|
|
||||||
|
size_t xref_offset = std::stoul(m.str(1)), last_xref_offset = xref_offset;
|
||||||
|
std::set<size_t> loaded_xrefs;
|
||||||
|
std::set<uint> loaded_entries;
|
||||||
|
|
||||||
|
std::vector<pdf_object> throwaway_stack;
|
||||||
|
while (1) {
|
||||||
|
if (loaded_xrefs.count(xref_offset))
|
||||||
|
return "circular xref offsets";
|
||||||
|
if (xref_offset >= document.length())
|
||||||
|
return "invalid xref offset";
|
||||||
|
|
||||||
|
pdf_lexer lex(document.c_str() + xref_offset);
|
||||||
|
auto err = load_xref(lex, loaded_entries);
|
||||||
|
if (!err.empty()) return err;
|
||||||
|
|
||||||
|
auto trailer = parse(lex, throwaway_stack);
|
||||||
|
if (trailer.type != pdf_object::DICT)
|
||||||
|
return pdf_error(trailer, "invalid trailer dictionary");
|
||||||
|
if (loaded_xrefs.empty())
|
||||||
|
this->trailer = trailer.dict;
|
||||||
|
loaded_xrefs.insert(xref_offset);
|
||||||
|
|
||||||
|
const auto prev_offset = trailer.dict.find("Prev");
|
||||||
|
if (prev_offset == trailer.dict.end())
|
||||||
|
break;
|
||||||
|
// FIXME we don't check for size_t over or underflow
|
||||||
|
if (!prev_offset->second.is_integer())
|
||||||
|
return "invalid Prev offset";
|
||||||
|
xref_offset = prev_offset->second.number;
|
||||||
|
}
|
||||||
|
|
||||||
|
trailer["Prev"] = {pdf_object::NUMERIC, double(last_xref_offset)};
|
||||||
|
const auto last_size = trailer.find("Size");
|
||||||
|
if (last_size == trailer.end() || !last_size->second.is_integer() ||
|
||||||
|
last_size->second.number <= 0)
|
||||||
|
return "invalid or missing cross-reference table Size";
|
||||||
|
|
||||||
|
xref_size = last_size->second.number;
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
pdf_object pdf_updater::get(uint n, uint generation) const {
|
||||||
|
if (n >= xref_size)
|
||||||
|
return {pdf_object::NIL};
|
||||||
|
|
||||||
|
const auto& ref = xref[n];
|
||||||
|
if (ref.free || ref.generation != generation || ref.offset >= document.length())
|
||||||
|
return {pdf_object::NIL};
|
||||||
|
|
||||||
|
pdf_lexer lex(document.c_str() + ref.offset);
|
||||||
|
std::vector<pdf_object> stack;
|
||||||
|
while (1) {
|
||||||
|
auto object = parse(lex, stack);
|
||||||
|
if (object.type == pdf_object::END)
|
||||||
|
return object;
|
||||||
|
if (object.type != pdf_object::OBJECT)
|
||||||
|
stack.push_back(std::move(object));
|
||||||
|
else if (object.n != n || object.generation != generation)
|
||||||
|
return {pdf_object::END, "object mismatch"};
|
||||||
|
else
|
||||||
|
return std::move(object.array.at(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint pdf_updater::allocate() {
|
||||||
|
assert(xref_size < UINT_MAX);
|
||||||
|
|
||||||
|
auto n = xref_size++;
|
||||||
|
if (xref.size() < xref_size)
|
||||||
|
xref.resize(xref_size);
|
||||||
|
|
||||||
|
// We don't make sure it gets a subsection in the update yet because we
|
||||||
|
// make no attempts at fixing the linked list of free items either
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
void pdf_updater::update(uint n, std::function<void()> fill) {
|
||||||
|
auto& ref = xref.at(n);
|
||||||
|
ref.offset = document.length() + 1;
|
||||||
|
ref.free = false;
|
||||||
|
updated.insert(n);
|
||||||
|
|
||||||
|
document += ssprintf("\n%u %u obj\n", n, ref.generation);
|
||||||
|
// Separately so that the callback can use document.length() to get the current offset
|
||||||
|
fill();
|
||||||
|
document += "\nendobj";
|
||||||
|
}
|
||||||
|
|
||||||
|
void pdf_updater::flush_updates() {
|
||||||
|
std::map<uint, size_t> groups;
|
||||||
|
for (auto i = updated.cbegin(); i != updated.cend(); ) {
|
||||||
|
size_t start = *i, count = 1;
|
||||||
|
while (++i != updated.cend() && *i == start + count)
|
||||||
|
count++;
|
||||||
|
groups[start] = count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Taking literally "Each cross-reference section begins with a line containing the keyword xref.
|
||||||
|
// Following this line are one or more cross-reference subsections." from 3.4.3 in PDF Reference
|
||||||
|
if (groups.empty())
|
||||||
|
groups[0] = 0;
|
||||||
|
|
||||||
|
auto startxref = document.length() + 1;
|
||||||
|
document += "\nxref\n";
|
||||||
|
for (const auto& g : groups) {
|
||||||
|
document += ssprintf("%u %zu\n", g.first, g.second);
|
||||||
|
for (size_t i = 0; i < g.second; i++) {
|
||||||
|
auto& ref = xref[g.first + i];
|
||||||
|
document += ssprintf("%010zu %05u %c \n", ref.offset, ref.generation, "nf"[!!ref.free]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
trailer["Size"] = {pdf_object::NUMERIC, double(xref_size)};
|
||||||
|
document += "trailer\n" + pdf_serialize(trailer)
|
||||||
|
+ ssprintf("\nstartxref\n%zu\n%%%%EOF\n", startxref);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Make a PDF object representing the given point in time
|
||||||
|
static pdf_object pdf_date(time_t timestamp) {
|
||||||
|
struct tm parts;
|
||||||
|
assert(localtime_r(×tamp, &parts));
|
||||||
|
|
||||||
|
char buf[64];
|
||||||
|
assert(strftime(buf, sizeof buf, "D:%Y%m%d%H%M%S", &parts));
|
||||||
|
|
||||||
|
std::string offset = "Z";
|
||||||
|
auto offset_min = parts.tm_gmtoff / 60;
|
||||||
|
if (parts.tm_gmtoff < 0)
|
||||||
|
offset = ssprintf("-%02ld'%02ld'", -offset_min / 60, -offset_min % 60);
|
||||||
|
if (parts.tm_gmtoff > 0)
|
||||||
|
offset = ssprintf("+%02ld'%02ld'", +offset_min / 60, +offset_min % 60);
|
||||||
|
return {pdf_object::STRING, buf + offset};
|
||||||
|
}
|
||||||
|
|
||||||
|
static pdf_object pdf_get_first_page(pdf_updater& pdf, uint node_n, uint node_generation) {
|
||||||
|
auto obj = pdf.get(node_n, node_generation);
|
||||||
|
if (obj.type != pdf_object::DICT)
|
||||||
|
return {pdf_object::NIL};
|
||||||
|
|
||||||
|
// Out of convenience; these aren't filled normally
|
||||||
|
obj.n = node_n;
|
||||||
|
obj.generation = node_generation;
|
||||||
|
|
||||||
|
auto type = obj.dict.find("Type");
|
||||||
|
if (type == obj.dict.end() || type->second.type != pdf_object::NAME)
|
||||||
|
return {pdf_object::NIL};
|
||||||
|
if (type->second.string == "Page")
|
||||||
|
return obj;
|
||||||
|
if (type->second.string != "Pages")
|
||||||
|
return {pdf_object::NIL};
|
||||||
|
|
||||||
|
// XXX technically speaking, this may be an indirect reference. The correct way to solve this
|
||||||
|
// seems to be having "pdf_updater" include a wrapper around "obj.dict.find"
|
||||||
|
auto kids = obj.dict.find("Kids");
|
||||||
|
if (kids == obj.dict.end() || kids->second.type != pdf_object::ARRAY
|
||||||
|
|| kids->second.array.empty()
|
||||||
|
|| kids->second.array.at(0).type != pdf_object::REFERENCE)
|
||||||
|
return {pdf_object::NIL};
|
||||||
|
|
||||||
|
// XXX nothing prevents us from recursing in an evil circular graph
|
||||||
|
return pdf_get_first_page(pdf, kids->second.array.at(0).n, kids->second.array.at(0).generation);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// /All/ bytes are checked, except for the signature hexstring itself
|
||||||
|
static std::string pdf_fill_in_signature(std::string& document, size_t sign_off, size_t sign_len,
|
||||||
|
const std::string& pkcs12_path,
|
||||||
|
const std::string& pkcs12_pass) {
|
||||||
|
size_t tail_off = sign_off + sign_len, tail_len = document.size() - tail_off;
|
||||||
|
if (pkcs12_path.empty())
|
||||||
|
return "undefined path to the signing key";
|
||||||
|
|
||||||
|
auto pkcs12_fp = fopen(pkcs12_path.c_str(), "r");
|
||||||
|
if (!pkcs12_fp)
|
||||||
|
return pkcs12_path + ": " + strerror(errno);
|
||||||
|
|
||||||
|
// Abandon hope, all ye who enter OpenSSL! Half of it is undocumented.
|
||||||
|
OpenSSL_add_all_algorithms();
|
||||||
|
ERR_load_crypto_strings();
|
||||||
|
ERR_clear_error();
|
||||||
|
|
||||||
|
PKCS12* p12 = nullptr;
|
||||||
|
EVP_PKEY* private_key = nullptr;
|
||||||
|
X509* certificate = nullptr;
|
||||||
|
STACK_OF(X509)* chain = nullptr;
|
||||||
|
PKCS7* p7 = nullptr;
|
||||||
|
int len = 0, sign_flags = PKCS7_DETACHED | PKCS7_BINARY | PKCS7_NOSMIMECAP | PKCS7_PARTIAL;
|
||||||
|
BIO* p7bio = nullptr;
|
||||||
|
unsigned char* buf = nullptr;
|
||||||
|
|
||||||
|
// OpenSSL error reasons will usually be of more value than any distinction I can come up with
|
||||||
|
std::string err = "OpenSSL failure";
|
||||||
|
|
||||||
|
if (!(p12 = d2i_PKCS12_fp(pkcs12_fp, nullptr))
|
||||||
|
|| !PKCS12_parse(p12, pkcs12_pass.c_str(), &private_key, &certificate, &chain)) {
|
||||||
|
err = pkcs12_path + ": parse failure";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
if (!private_key || !certificate) {
|
||||||
|
err = pkcs12_path + ": must contain a private key and a valid certificate chain";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
// Prevent useless signatures -- makes pdfsig from poppler happy at least (and NSS by extension)
|
||||||
|
if (!(X509_get_key_usage(certificate) & (KU_DIGITAL_SIGNATURE | KU_NON_REPUDIATION))) {
|
||||||
|
err = "the certificate's key usage must include digital signatures or non-repudiation";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
if (!(X509_get_extended_key_usage(certificate) & (XKU_SMIME | XKU_ANYEKU))) {
|
||||||
|
err = "the certificate's extended key usage must include S/MIME";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
#if 0 // This happily ignores XKU_ANYEKU and I want my tiny world to make a tiny bit more sense
|
||||||
|
if (X509_check_purpose(certificate, X509_PURPOSE_SMIME_SIGN, false /* not a CA certificate */)) {
|
||||||
|
err = "the certificate can't be used for S/MIME digital signatures";
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// The default digest is SHA1, which is mildly insecure now -- hence using PKCS7_sign_add_signer
|
||||||
|
if (!(p7 = PKCS7_sign(nullptr, nullptr, nullptr, nullptr, sign_flags))
|
||||||
|
|| !PKCS7_sign_add_signer(p7, certificate, private_key, EVP_sha256(), sign_flags))
|
||||||
|
goto error;
|
||||||
|
// For RFC 3161, this is roughly how a timestamp token would be attached (see Appendix A):
|
||||||
|
// PKCS7_add_attribute(signer_info, NID_id_smime_aa_timeStampToken, V_ASN1_SEQUENCE, value)
|
||||||
|
for (int i = 0; i < sk_X509_num(chain); i++)
|
||||||
|
if (!PKCS7_add_certificate(p7, sk_X509_value(chain, i)))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
// Adaptation of the innards of the undocumented PKCS7_final() -- I didn't feel like making
|
||||||
|
// a copy of the whole document. Hopefully this writes directly into a digest BIO.
|
||||||
|
if (!(p7bio = PKCS7_dataInit(p7, nullptr))
|
||||||
|
|| (ssize_t) sign_off != BIO_write(p7bio, document.data(), sign_off)
|
||||||
|
|| (ssize_t) tail_len != BIO_write(p7bio, document.data() + tail_off, tail_len)
|
||||||
|
|| BIO_flush(p7bio) != 1 || !PKCS7_dataFinal(p7, p7bio))
|
||||||
|
goto error;
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
{
|
||||||
|
// Debugging: openssl cms -inform PEM -in pdf_signature.pem -noout -cmsout -print
|
||||||
|
// Context: https://stackoverflow.com/a/29253469
|
||||||
|
auto fp = fopen("pdf_signature.pem", "wb");
|
||||||
|
assert(PEM_write_PKCS7(fp, p7) && !fclose(fp));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ((len = i2d_PKCS7(p7, &buf)) < 0)
|
||||||
|
goto error;
|
||||||
|
if (size_t(len) * 2 > sign_len - 2 /* hexstring quotes */) {
|
||||||
|
// The obvious solution is to increase the allocation... or spend a week reading specifications
|
||||||
|
// while losing all faith in humanity as a species, and skip the PKCS7 API entirely
|
||||||
|
err = ssprintf("not enough space reserved for the signature (%zu nibbles vs %zu nibbles)",
|
||||||
|
sign_len - 2, size_t(len) * 2);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < len; i++) {
|
||||||
|
document[sign_off + 2 * i + 1] = "0123456789abcdef"[buf[i] / 16];
|
||||||
|
document[sign_off + 2 * i + 2] = "0123456789abcdef"[buf[i] % 16];
|
||||||
|
}
|
||||||
|
err.clear();
|
||||||
|
|
||||||
|
error:
|
||||||
|
OPENSSL_free(buf);
|
||||||
|
BIO_free_all(p7bio);
|
||||||
|
PKCS7_free(p7);
|
||||||
|
sk_X509_pop_free(chain, X509_free);
|
||||||
|
X509_free(certificate);
|
||||||
|
EVP_PKEY_free(private_key);
|
||||||
|
PKCS12_free(p12);
|
||||||
|
|
||||||
|
// In any case, clear the error stack (it's a queue, really) to avoid confusion elsewhere
|
||||||
|
while (auto code = ERR_get_error())
|
||||||
|
if (auto reason = ERR_reason_error_string(code))
|
||||||
|
err = err + "; " + reason;
|
||||||
|
|
||||||
|
fclose(pkcs12_fp);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
// The presumption here is that the document is valid and that it doesn't employ cross-reference
|
||||||
|
// streams from PDF 1.5, or at least constitutes a hybrid-reference file. The results with
|
||||||
|
// PDF 2.0 (2017) are currently unknown as the standard costs money.
|
||||||
|
//
|
||||||
|
// Carelessly assumes that the version of the original document is at most PDF 1.6.
|
||||||
|
//
|
||||||
|
// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
|
||||||
|
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
|
||||||
|
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
|
||||||
|
std::string pdf_simple_sign(std::string& document,
|
||||||
|
const std::string& pkcs12_path,
|
||||||
|
const std::string& pkcs12_pass) {
|
||||||
|
pdf_updater pdf(document);
|
||||||
|
auto err = pdf.initialize();
|
||||||
|
if (!err.empty())
|
||||||
|
return err;
|
||||||
|
|
||||||
|
auto root_ref = pdf.trailer.find("Root");
|
||||||
|
if (root_ref == pdf.trailer.end() || root_ref->second.type != pdf_object::REFERENCE)
|
||||||
|
return "trailer does not contain a reference to Root";
|
||||||
|
auto root = pdf.get(root_ref->second.n, root_ref->second.generation);
|
||||||
|
if (root.type != pdf_object::DICT)
|
||||||
|
return "invalid Root dictionary reference";
|
||||||
|
|
||||||
|
// 8.7 Digital Signatures - /signature dictionary/
|
||||||
|
auto sigdict_n = pdf.allocate();
|
||||||
|
size_t byterange_off = 0, byterange_len = 0, sign_off = 0, sign_len = 0;
|
||||||
|
pdf.update(sigdict_n, [&]{
|
||||||
|
// The timestamp is important for Adobe Acrobat Reader DC. The ideal would be to use RFC 3161.
|
||||||
|
pdf.document.append("<< /Type/Sig /Filter/Adobe.PPKLite /SubFilter/adbe.pkcs7.detached\n"
|
||||||
|
" /M" + pdf_serialize(pdf_date(time(nullptr))) + " /ByteRange ");
|
||||||
|
byterange_off = pdf.document.size();
|
||||||
|
pdf.document.append((byterange_len = 32 /* fine for a gigabyte */), ' ');
|
||||||
|
pdf.document.append("\n /Contents <");
|
||||||
|
sign_off = pdf.document.size();
|
||||||
|
pdf.document.append((sign_len = 8192 /* certificate, digest, encrypted digest, ... */), '0');
|
||||||
|
pdf.document.append("> >>");
|
||||||
|
|
||||||
|
// We actually need to exclude the hexstring quotes from signing
|
||||||
|
sign_off -= 1;
|
||||||
|
sign_len += 2;
|
||||||
|
});
|
||||||
|
|
||||||
|
// 8.6.3 Field Types - Signature Fields
|
||||||
|
pdf_object sigfield{pdf_object::DICT};
|
||||||
|
sigfield.dict.insert({"FT", {pdf_object::NAME, "Sig"}});
|
||||||
|
sigfield.dict.insert({"V", {pdf_object::REFERENCE, sigdict_n, 0}});
|
||||||
|
// 8.4.5 Annotations Types - Widget Annotations
|
||||||
|
// We can merge the Signature Annotation and omit Kids here
|
||||||
|
sigfield.dict.insert({"Subtype", {pdf_object::NAME, "Widget"}});
|
||||||
|
sigfield.dict.insert({"F", {pdf_object::NUMERIC, 2 /* Hidden */}});
|
||||||
|
sigfield.dict.insert({"T", {pdf_object::STRING, "Signature1"}});
|
||||||
|
sigfield.dict.insert({"Rect", {std::vector<pdf_object>{
|
||||||
|
{pdf_object::NUMERIC, 0},
|
||||||
|
{pdf_object::NUMERIC, 0},
|
||||||
|
{pdf_object::NUMERIC, 0},
|
||||||
|
{pdf_object::NUMERIC, 0},
|
||||||
|
}}});
|
||||||
|
|
||||||
|
auto sigfield_n = pdf.allocate();
|
||||||
|
pdf.update(sigfield_n, [&]{ pdf.document += pdf_serialize(sigfield); });
|
||||||
|
|
||||||
|
auto pages_ref = root.dict.find("Pages");
|
||||||
|
if (pages_ref == root.dict.end() || pages_ref->second.type != pdf_object::REFERENCE)
|
||||||
|
return "invalid Pages reference";
|
||||||
|
auto page = pdf_get_first_page(pdf, pages_ref->second.n, pages_ref->second.generation);
|
||||||
|
if (page.type != pdf_object::DICT)
|
||||||
|
return "invalid or unsupported page tree";
|
||||||
|
|
||||||
|
// XXX assuming this won't be an indirectly referenced array
|
||||||
|
auto& annots = page.dict["Annots"];
|
||||||
|
if (annots.type != pdf_object::ARRAY)
|
||||||
|
annots = {pdf_object::ARRAY};
|
||||||
|
annots.array.emplace_back(pdf_object::REFERENCE, sigfield_n, 0);
|
||||||
|
pdf.update(page.n, [&]{ pdf.document += pdf_serialize(page); });
|
||||||
|
|
||||||
|
// 8.6.1 Interactive Form Dictionary
|
||||||
|
// XXX assuming there are no forms already, overwriting everything
|
||||||
|
root.dict["AcroForm"] = {std::map<std::string, pdf_object>{
|
||||||
|
{"Fields", {std::vector<pdf_object>{
|
||||||
|
{pdf_object::REFERENCE, sigfield_n, 0}
|
||||||
|
}}},
|
||||||
|
{"SigFlags", {pdf_object::NUMERIC, 3 /* SignaturesExist | AppendOnly */}}
|
||||||
|
}};
|
||||||
|
|
||||||
|
// Upgrade the document version for SHA-256 etc.
|
||||||
|
// XXX assuming that it's not newer than 1.6 already -- while Cairo can't currently use a newer
|
||||||
|
// version that 1.5, it's not a bad idea to use cairo_pdf_surface_restrict_to_version()
|
||||||
|
root.dict["Version"] = {pdf_object::NAME, "1.6"};
|
||||||
|
pdf.update(root_ref->second.n, [&]{ pdf.document += pdf_serialize(root); });
|
||||||
|
pdf.flush_updates();
|
||||||
|
|
||||||
|
// Now that we know the length of everything, store byte ranges of what we're about to sign,
|
||||||
|
// which must be everything but the resulting signature itself
|
||||||
|
size_t tail_off = sign_off + sign_len, tail_len = pdf.document.size() - tail_off;
|
||||||
|
auto ranges = ssprintf("[0 %zu %zu %zu]", sign_off, tail_off, tail_len);
|
||||||
|
if (ranges.length() > byterange_len)
|
||||||
|
return "not enough space reserved for /ByteRange";
|
||||||
|
pdf.document.replace(byterange_off, std::min(ranges.length(), byterange_len), ranges);
|
||||||
|
return pdf_fill_in_signature(pdf.document, sign_off, sign_len, pkcs12_path, pkcs12_pass);
|
||||||
|
}
|
||||||
351
pdf/pdf.go
351
pdf/pdf.go
@@ -1,5 +1,5 @@
|
|||||||
//
|
//
|
||||||
// Copyright (c) 2018, Přemysl Janouch <p@janouch.name>
|
// Copyright (c) 2018, Přemysl Eric Janouch <p@janouch.name>
|
||||||
//
|
//
|
||||||
// Permission to use, copy, modify, and/or distribute this software for any
|
// Permission to use, copy, modify, and/or distribute this software for any
|
||||||
// purpose with or without fee is hereby granted.
|
// purpose with or without fee is hereby granted.
|
||||||
@@ -65,21 +65,14 @@ const (
|
|||||||
// Object is a PDF token/object thingy. Objects may be composed either from
|
// Object is a PDF token/object thingy. Objects may be composed either from
|
||||||
// one or a sequence of tokens. The PDF Reference doesn't actually speak
|
// one or a sequence of tokens. The PDF Reference doesn't actually speak
|
||||||
// of tokens.
|
// of tokens.
|
||||||
//
|
|
||||||
// TODO(p): We probably want constructors like NewString, NewBool, NewArray, ...
|
|
||||||
type Object struct {
|
type Object struct {
|
||||||
Kind ObjectKind
|
Kind ObjectKind
|
||||||
|
|
||||||
// End (error message), Comment/Keyword/Name/String
|
String string // Comment/Keyword/Name/String
|
||||||
String string
|
Number float64 // Bool, Numeric
|
||||||
// Bool, Numeric
|
Array []Object // Array, Indirect
|
||||||
Number float64
|
Dict map[string]Object // Dict, in the future also Stream
|
||||||
// Array, Indirect
|
N, Generation uint // Indirect, Reference
|
||||||
Array []Object
|
|
||||||
// Dict, in the future also Stream
|
|
||||||
Dict map[string]Object
|
|
||||||
// Indirect, Reference
|
|
||||||
N, Generation uint
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsInteger checks if the PDF object is an integer number.
|
// IsInteger checks if the PDF object is an integer number.
|
||||||
@@ -93,6 +86,48 @@ func (o *Object) IsUint() bool {
|
|||||||
return o.IsInteger() && o.Number >= 0 && o.Number <= float64(^uint(0))
|
return o.IsInteger() && o.Number >= 0 && o.Number <= float64(^uint(0))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// A slew of constructors that will hopefully get all inlined.
|
||||||
|
|
||||||
|
// New returns a new Object of the given kind, with default values.
|
||||||
|
func New(kind ObjectKind) Object { return Object{Kind: kind} }
|
||||||
|
|
||||||
|
func NewComment(c string) Object { return Object{Kind: Comment, String: c} }
|
||||||
|
func NewKeyword(k string) Object { return Object{Kind: Keyword, String: k} }
|
||||||
|
|
||||||
|
func NewBool(b bool) Object {
|
||||||
|
var b64 float64
|
||||||
|
if b {
|
||||||
|
b64 = 1
|
||||||
|
}
|
||||||
|
return Object{Kind: Bool, Number: b64}
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewNumeric(n float64) Object { return Object{Kind: Numeric, Number: n} }
|
||||||
|
func NewName(n string) Object { return Object{Kind: Name, String: n} }
|
||||||
|
func NewString(s string) Object { return Object{Kind: String, String: s} }
|
||||||
|
|
||||||
|
func NewArray(a []Object) Object {
|
||||||
|
return Object{Kind: Array, Array: a}
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewDict(d map[string]Object) Object {
|
||||||
|
if d == nil {
|
||||||
|
d = make(map[string]Object)
|
||||||
|
}
|
||||||
|
return Object{Kind: Dict, Dict: d}
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewIndirect(o Object, n, generation uint) Object {
|
||||||
|
return Object{Kind: Indirect, N: n, Generation: generation,
|
||||||
|
Array: []Object{o}}
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewReference(n, generation uint) Object {
|
||||||
|
return Object{Kind: Reference, N: n, Generation: generation}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newError(msg string) (Object, error) { return New(End), errors.New(msg) }
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -106,21 +141,21 @@ const (
|
|||||||
// Lexer is a basic lexical analyser for the Portable Document Format,
|
// Lexer is a basic lexical analyser for the Portable Document Format,
|
||||||
// giving limited error information.
|
// giving limited error information.
|
||||||
type Lexer struct {
|
type Lexer struct {
|
||||||
p []byte // input buffer
|
P []byte // input buffer
|
||||||
}
|
}
|
||||||
|
|
||||||
func (lex *Lexer) read() (byte, bool) {
|
func (lex *Lexer) read() (byte, bool) {
|
||||||
if len(lex.p) > 0 {
|
if len(lex.P) > 0 {
|
||||||
ch := lex.p[0]
|
ch := lex.P[0]
|
||||||
lex.p = lex.p[1:]
|
lex.P = lex.P[1:]
|
||||||
return ch, true
|
return ch, true
|
||||||
}
|
}
|
||||||
return 0, false
|
return 0, false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (lex *Lexer) peek() (byte, bool) {
|
func (lex *Lexer) peek() (byte, bool) {
|
||||||
if len(lex.p) > 0 {
|
if len(lex.P) > 0 {
|
||||||
return lex.p[0], true
|
return lex.P[0], true
|
||||||
}
|
}
|
||||||
return 0, false
|
return 0, false
|
||||||
}
|
}
|
||||||
@@ -165,13 +200,13 @@ func (lex *Lexer) unescape(ch byte) byte {
|
|||||||
return ch
|
return ch
|
||||||
}
|
}
|
||||||
|
|
||||||
func (lex *Lexer) string() Object {
|
func (lex *Lexer) string() (Object, error) {
|
||||||
var value []byte
|
var value []byte
|
||||||
parens := 1
|
parens := 1
|
||||||
for {
|
for {
|
||||||
ch, ok := lex.read()
|
ch, ok := lex.read()
|
||||||
if !ok {
|
if !ok {
|
||||||
return Object{Kind: End, String: "unexpected end of string"}
|
return newError("unexpected end of string")
|
||||||
}
|
}
|
||||||
if lex.eatNewline(ch) {
|
if lex.eatNewline(ch) {
|
||||||
ch = '\n'
|
ch = '\n'
|
||||||
@@ -183,7 +218,7 @@ func (lex *Lexer) string() Object {
|
|||||||
}
|
}
|
||||||
} else if ch == '\\' {
|
} else if ch == '\\' {
|
||||||
if ch, ok = lex.read(); !ok {
|
if ch, ok = lex.read(); !ok {
|
||||||
return Object{Kind: End, String: "unexpected end of string"}
|
return newError("unexpected end of string")
|
||||||
} else if lex.eatNewline(ch) {
|
} else if lex.eatNewline(ch) {
|
||||||
continue
|
continue
|
||||||
} else {
|
} else {
|
||||||
@@ -192,19 +227,19 @@ func (lex *Lexer) string() Object {
|
|||||||
}
|
}
|
||||||
value = append(value, ch)
|
value = append(value, ch)
|
||||||
}
|
}
|
||||||
return Object{Kind: String, String: string(value)}
|
return NewString(string(value)), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (lex *Lexer) stringHex() Object {
|
func (lex *Lexer) stringHex() (Object, error) {
|
||||||
var value, buf []byte
|
var value, buf []byte
|
||||||
for {
|
for {
|
||||||
ch, ok := lex.read()
|
ch, ok := lex.read()
|
||||||
if !ok {
|
if !ok {
|
||||||
return Object{Kind: End, String: "unexpected end of hex string"}
|
return newError("unexpected end of hex string")
|
||||||
} else if ch == '>' {
|
} else if ch == '>' {
|
||||||
break
|
break
|
||||||
} else if strings.IndexByte(hexAlphabet, ch) < 0 {
|
} else if strings.IndexByte(hexAlphabet, ch) < 0 {
|
||||||
return Object{Kind: End, String: "invalid hex string"}
|
return newError("invalid hex string")
|
||||||
} else if buf = append(buf, ch); len(buf) == 2 {
|
} else if buf = append(buf, ch); len(buf) == 2 {
|
||||||
u, _ := strconv.ParseUint(string(buf), 16, 8)
|
u, _ := strconv.ParseUint(string(buf), 16, 8)
|
||||||
value = append(value, byte(u))
|
value = append(value, byte(u))
|
||||||
@@ -215,10 +250,10 @@ func (lex *Lexer) stringHex() Object {
|
|||||||
u, _ := strconv.ParseUint(string(buf)+"0", 16, 8)
|
u, _ := strconv.ParseUint(string(buf)+"0", 16, 8)
|
||||||
value = append(value, byte(u))
|
value = append(value, byte(u))
|
||||||
}
|
}
|
||||||
return Object{Kind: String, String: string(value)}
|
return NewString(string(value)), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (lex *Lexer) name() Object {
|
func (lex *Lexer) name() (Object, error) {
|
||||||
var value []byte
|
var value []byte
|
||||||
for {
|
for {
|
||||||
ch, ok := lex.peek()
|
ch, ok := lex.peek()
|
||||||
@@ -237,7 +272,7 @@ func (lex *Lexer) name() Object {
|
|||||||
lex.read()
|
lex.read()
|
||||||
}
|
}
|
||||||
if len(hexa) != 2 {
|
if len(hexa) != 2 {
|
||||||
return Object{Kind: End, String: "invalid name hexa escape"}
|
return newError("invalid name hexa escape")
|
||||||
}
|
}
|
||||||
u, _ := strconv.ParseUint(string(value), 16, 8)
|
u, _ := strconv.ParseUint(string(value), 16, 8)
|
||||||
ch = byte(u)
|
ch = byte(u)
|
||||||
@@ -245,12 +280,12 @@ func (lex *Lexer) name() Object {
|
|||||||
value = append(value, ch)
|
value = append(value, ch)
|
||||||
}
|
}
|
||||||
if len(value) == 0 {
|
if len(value) == 0 {
|
||||||
return Object{Kind: End, String: "unexpected end of name"}
|
return newError("unexpected end of name")
|
||||||
}
|
}
|
||||||
return Object{Kind: Name, String: string(value)}
|
return NewName(string(value)), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (lex *Lexer) comment() Object {
|
func (lex *Lexer) comment() (Object, error) {
|
||||||
var value []byte
|
var value []byte
|
||||||
for {
|
for {
|
||||||
ch, ok := lex.peek()
|
ch, ok := lex.peek()
|
||||||
@@ -260,11 +295,11 @@ func (lex *Lexer) comment() Object {
|
|||||||
value = append(value, ch)
|
value = append(value, ch)
|
||||||
lex.read()
|
lex.read()
|
||||||
}
|
}
|
||||||
return Object{Kind: Comment, String: string(value)}
|
return NewComment(string(value)), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// XXX: Maybe invalid numbers should rather be interpreted as keywords.
|
// XXX: Maybe invalid numbers should rather be interpreted as keywords.
|
||||||
func (lex *Lexer) number() Object {
|
func (lex *Lexer) number() (Object, error) {
|
||||||
var value []byte
|
var value []byte
|
||||||
ch, ok := lex.peek()
|
ch, ok := lex.peek()
|
||||||
if ch == '-' {
|
if ch == '-' {
|
||||||
@@ -287,16 +322,16 @@ func (lex *Lexer) number() Object {
|
|||||||
lex.read()
|
lex.read()
|
||||||
}
|
}
|
||||||
if !digits {
|
if !digits {
|
||||||
return Object{Kind: End, String: "invalid number"}
|
return newError("invalid number")
|
||||||
}
|
}
|
||||||
f, _ := strconv.ParseFloat(string(value), 64)
|
f, _ := strconv.ParseFloat(string(value), 64)
|
||||||
return Object{Kind: Numeric, Number: f}
|
return NewNumeric(f), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (lex *Lexer) Next() Object {
|
func (lex *Lexer) Next() (Object, error) {
|
||||||
ch, ok := lex.peek()
|
ch, ok := lex.peek()
|
||||||
if !ok {
|
if !ok {
|
||||||
return Object{Kind: End}
|
return New(End), nil
|
||||||
}
|
}
|
||||||
if strings.IndexByte("-0123456789.", ch) >= 0 {
|
if strings.IndexByte("-0123456789.", ch) >= 0 {
|
||||||
return lex.number()
|
return lex.number()
|
||||||
@@ -315,13 +350,13 @@ func (lex *Lexer) Next() Object {
|
|||||||
switch v := string(value); v {
|
switch v := string(value); v {
|
||||||
case "":
|
case "":
|
||||||
case "null":
|
case "null":
|
||||||
return Object{Kind: Nil}
|
return New(Nil), nil
|
||||||
case "true":
|
case "true":
|
||||||
return Object{Kind: Bool, Number: 1}
|
return NewBool(true), nil
|
||||||
case "false":
|
case "false":
|
||||||
return Object{Kind: Bool, Number: 0}
|
return NewBool(false), nil
|
||||||
default:
|
default:
|
||||||
return Object{Kind: Keyword, String: v}
|
return NewKeyword(v), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
switch ch, _ := lex.read(); ch {
|
switch ch, _ := lex.read(); ch {
|
||||||
@@ -332,29 +367,29 @@ func (lex *Lexer) Next() Object {
|
|||||||
case '(':
|
case '(':
|
||||||
return lex.string()
|
return lex.string()
|
||||||
case '[':
|
case '[':
|
||||||
return Object{Kind: BArray}
|
return New(BArray), nil
|
||||||
case ']':
|
case ']':
|
||||||
return Object{Kind: EArray}
|
return New(EArray), nil
|
||||||
case '<':
|
case '<':
|
||||||
if ch, _ := lex.peek(); ch == '<' {
|
if ch, _ := lex.peek(); ch == '<' {
|
||||||
lex.read()
|
lex.read()
|
||||||
return Object{Kind: BDict}
|
return New(BDict), nil
|
||||||
}
|
}
|
||||||
return lex.stringHex()
|
return lex.stringHex()
|
||||||
case '>':
|
case '>':
|
||||||
if ch, _ := lex.peek(); ch == '>' {
|
if ch, _ := lex.peek(); ch == '>' {
|
||||||
lex.read()
|
lex.read()
|
||||||
return Object{Kind: EDict}
|
return New(EDict), nil
|
||||||
}
|
}
|
||||||
return Object{Kind: End, String: "unexpected '>'"}
|
return newError("unexpected '>'")
|
||||||
default:
|
default:
|
||||||
if lex.eatNewline(ch) {
|
if lex.eatNewline(ch) {
|
||||||
return Object{Kind: NL}
|
return New(NL), nil
|
||||||
}
|
}
|
||||||
if strings.IndexByte(whitespace, ch) >= 0 {
|
if strings.IndexByte(whitespace, ch) >= 0 {
|
||||||
return lex.Next()
|
return lex.Next()
|
||||||
}
|
}
|
||||||
return Object{Kind: End, String: "unexpected input"}
|
return newError("unexpected input")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -461,10 +496,10 @@ type Updater struct {
|
|||||||
Trailer map[string]Object
|
Trailer map[string]Object
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) Object {
|
func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) (Object, error) {
|
||||||
lenStack := len(*stack)
|
lenStack := len(*stack)
|
||||||
if lenStack < 2 {
|
if lenStack < 2 {
|
||||||
return Object{Kind: End, String: "missing object ID pair"}
|
return newError("missing object ID pair")
|
||||||
}
|
}
|
||||||
|
|
||||||
n := (*stack)[lenStack-2]
|
n := (*stack)[lenStack-2]
|
||||||
@@ -472,28 +507,30 @@ func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) Object {
|
|||||||
*stack = (*stack)[:lenStack-2]
|
*stack = (*stack)[:lenStack-2]
|
||||||
|
|
||||||
if !g.IsUint() || !n.IsUint() {
|
if !g.IsUint() || !n.IsUint() {
|
||||||
return Object{Kind: End, String: "invalid object ID pair"}
|
return newError("invalid object ID pair")
|
||||||
}
|
}
|
||||||
|
|
||||||
obj := Object{
|
var inner []Object
|
||||||
Kind: Indirect, N: uint(n.Number), Generation: uint(g.Number)}
|
|
||||||
for {
|
for {
|
||||||
object := u.parse(lex, &obj.Array)
|
object, _ := u.parse(lex, &inner)
|
||||||
if object.Kind == End {
|
if object.Kind == End {
|
||||||
return Object{Kind: End, String: "object doesn't end"}
|
return newError("object doesn't end")
|
||||||
}
|
}
|
||||||
if object.Kind == Keyword && object.String == "endobj" {
|
if object.Kind == Keyword && object.String == "endobj" {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
obj.Array = append(obj.Array, object)
|
inner = append(inner, object)
|
||||||
}
|
}
|
||||||
return obj
|
if len(inner) != 1 {
|
||||||
|
return newError("indirect objects must contain exactly one object")
|
||||||
|
}
|
||||||
|
return NewIndirect(inner[0], uint(n.Number), uint(g.Number)), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u *Updater) parseR(stack *[]Object) Object {
|
func (u *Updater) parseR(stack *[]Object) (Object, error) {
|
||||||
lenStack := len(*stack)
|
lenStack := len(*stack)
|
||||||
if lenStack < 2 {
|
if lenStack < 2 {
|
||||||
return Object{Kind: End, String: "missing reference ID pair"}
|
return newError("missing reference ID pair")
|
||||||
}
|
}
|
||||||
|
|
||||||
n := (*stack)[lenStack-2]
|
n := (*stack)[lenStack-2]
|
||||||
@@ -501,15 +538,16 @@ func (u *Updater) parseR(stack *[]Object) Object {
|
|||||||
*stack = (*stack)[:lenStack-2]
|
*stack = (*stack)[:lenStack-2]
|
||||||
|
|
||||||
if !g.IsUint() || !n.IsUint() {
|
if !g.IsUint() || !n.IsUint() {
|
||||||
return Object{Kind: End, String: "invalid reference ID pair"}
|
return newError("invalid reference ID pair")
|
||||||
}
|
}
|
||||||
return Object{
|
return NewReference(uint(n.Number), uint(g.Number)), nil
|
||||||
Kind: Reference, N: uint(n.Number), Generation: uint(g.Number)}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// parse reads an object at the lexer's position. Not a strict parser.
|
// parse reads an object at the lexer's position. Not a strict parser.
|
||||||
func (u *Updater) parse(lex *Lexer, stack *[]Object) Object {
|
//
|
||||||
switch token := lex.Next(); token.Kind {
|
// TODO(p): We should fix all uses of this not to eat the error.
|
||||||
|
func (u *Updater) parse(lex *Lexer, stack *[]Object) (Object, error) {
|
||||||
|
switch token, err := lex.Next(); token.Kind {
|
||||||
case NL, Comment:
|
case NL, Comment:
|
||||||
// These are not important to parsing,
|
// These are not important to parsing,
|
||||||
// not even for this procedure's needs.
|
// not even for this procedure's needs.
|
||||||
@@ -517,22 +555,22 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) Object {
|
|||||||
case BArray:
|
case BArray:
|
||||||
var array []Object
|
var array []Object
|
||||||
for {
|
for {
|
||||||
object := u.parse(lex, &array)
|
object, _ := u.parse(lex, &array)
|
||||||
if object.Kind == End {
|
if object.Kind == End {
|
||||||
return Object{Kind: End, String: "array doesn't end"}
|
return newError("array doesn't end")
|
||||||
}
|
}
|
||||||
if object.Kind == EArray {
|
if object.Kind == EArray {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
array = append(array, object)
|
array = append(array, object)
|
||||||
}
|
}
|
||||||
return Object{Kind: Array, Array: array}
|
return NewArray(array), nil
|
||||||
case BDict:
|
case BDict:
|
||||||
var array []Object
|
var array []Object
|
||||||
for {
|
for {
|
||||||
object := u.parse(lex, &array)
|
object, _ := u.parse(lex, &array)
|
||||||
if object.Kind == End {
|
if object.Kind == End {
|
||||||
return Object{Kind: End, String: "dictionary doesn't end"}
|
return newError("dictionary doesn't end")
|
||||||
}
|
}
|
||||||
if object.Kind == EDict {
|
if object.Kind == EDict {
|
||||||
break
|
break
|
||||||
@@ -540,17 +578,16 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) Object {
|
|||||||
array = append(array, object)
|
array = append(array, object)
|
||||||
}
|
}
|
||||||
if len(array)%2 != 0 {
|
if len(array)%2 != 0 {
|
||||||
return Object{Kind: End, String: "unbalanced dictionary"}
|
return newError("unbalanced dictionary")
|
||||||
}
|
}
|
||||||
dict := make(map[string]Object)
|
dict := make(map[string]Object)
|
||||||
for i := 0; i < len(array); i += 2 {
|
for i := 0; i < len(array); i += 2 {
|
||||||
if array[i].Kind != Name {
|
if array[i].Kind != Name {
|
||||||
return Object{
|
return newError("invalid dictionary key type")
|
||||||
Kind: End, String: "invalid dictionary key type"}
|
|
||||||
}
|
}
|
||||||
dict[array[i].String] = array[i+1]
|
dict[array[i].String] = array[i+1]
|
||||||
}
|
}
|
||||||
return Object{Kind: Dict, Dict: dict}
|
return NewDict(dict), nil
|
||||||
case Keyword:
|
case Keyword:
|
||||||
// Appears in the document body, typically needs
|
// Appears in the document body, typically needs
|
||||||
// to access the cross-reference table.
|
// to access the cross-reference table.
|
||||||
@@ -560,7 +597,7 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) Object {
|
|||||||
// streams can use the Object.String member.
|
// streams can use the Object.String member.
|
||||||
switch token.String {
|
switch token.String {
|
||||||
case "stream":
|
case "stream":
|
||||||
return Object{Kind: End, String: "streams are not supported yet"}
|
return newError("streams are not supported yet")
|
||||||
case "obj":
|
case "obj":
|
||||||
return u.parseIndirect(lex, stack)
|
return u.parseIndirect(lex, stack)
|
||||||
case "R":
|
case "R":
|
||||||
@@ -568,18 +605,18 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) Object {
|
|||||||
}
|
}
|
||||||
fallthrough
|
fallthrough
|
||||||
default:
|
default:
|
||||||
return token
|
return token, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error {
|
func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error {
|
||||||
var throwawayStack []Object
|
var throwawayStack []Object
|
||||||
if keyword := u.parse(lex,
|
if keyword, _ := u.parse(lex,
|
||||||
&throwawayStack); keyword.Kind != Keyword || keyword.String != "xref" {
|
&throwawayStack); keyword.Kind != Keyword || keyword.String != "xref" {
|
||||||
return errors.New("invalid xref table")
|
return errors.New("invalid xref table")
|
||||||
}
|
}
|
||||||
for {
|
for {
|
||||||
object := u.parse(lex, &throwawayStack)
|
object, _ := u.parse(lex, &throwawayStack)
|
||||||
if object.Kind == End {
|
if object.Kind == End {
|
||||||
return errors.New("unexpected EOF while looking for the trailer")
|
return errors.New("unexpected EOF while looking for the trailer")
|
||||||
}
|
}
|
||||||
@@ -587,16 +624,16 @@ func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error {
|
|||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
second := u.parse(lex, &throwawayStack)
|
second, _ := u.parse(lex, &throwawayStack)
|
||||||
if !object.IsUint() || !second.IsUint() {
|
if !object.IsUint() || !second.IsUint() {
|
||||||
return errors.New("invalid xref section header")
|
return errors.New("invalid xref section header")
|
||||||
}
|
}
|
||||||
|
|
||||||
start, count := uint(object.Number), uint(second.Number)
|
start, count := uint(object.Number), uint(second.Number)
|
||||||
for i := uint(0); i < count; i++ {
|
for i := uint(0); i < count; i++ {
|
||||||
off := u.parse(lex, &throwawayStack)
|
off, _ := u.parse(lex, &throwawayStack)
|
||||||
gen := u.parse(lex, &throwawayStack)
|
gen, _ := u.parse(lex, &throwawayStack)
|
||||||
key := u.parse(lex, &throwawayStack)
|
key, _ := u.parse(lex, &throwawayStack)
|
||||||
if !off.IsInteger() || off.Number < 0 ||
|
if !off.IsInteger() || off.Number < 0 ||
|
||||||
off.Number > float64(len(u.Document)) ||
|
off.Number > float64(len(u.Document)) ||
|
||||||
!gen.IsInteger() || gen.Number < 0 || gen.Number > 65535 ||
|
!gen.IsInteger() || gen.Number < 0 || gen.Number > 65535 ||
|
||||||
@@ -634,9 +671,10 @@ func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) error {
|
|||||||
|
|
||||||
var haystackRE = regexp.MustCompile(`(?s:.*)\sstartxref\s+(\d+)\s+%%EOF`)
|
var haystackRE = regexp.MustCompile(`(?s:.*)\sstartxref\s+(\d+)\s+%%EOF`)
|
||||||
|
|
||||||
// Initialize builds the cross-reference table and prepares
|
// NewUpdater initializes an Updater, building the cross-reference table and
|
||||||
// a new trailer dictionary.
|
// preparing a new trailer dictionary.
|
||||||
func (u *Updater) Initialize() error {
|
func NewUpdater(document []byte) (*Updater, error) {
|
||||||
|
u := &Updater{Document: document}
|
||||||
u.updated = make(map[uint]struct{})
|
u.updated = make(map[uint]struct{})
|
||||||
|
|
||||||
// We only need to look for startxref roughly within
|
// We only need to look for startxref roughly within
|
||||||
@@ -648,31 +686,31 @@ func (u *Updater) Initialize() error {
|
|||||||
|
|
||||||
m := haystackRE.FindSubmatch(haystack)
|
m := haystackRE.FindSubmatch(haystack)
|
||||||
if m == nil {
|
if m == nil {
|
||||||
return errors.New("cannot find startxref")
|
return nil, errors.New("cannot find startxref")
|
||||||
}
|
}
|
||||||
|
|
||||||
xrefOffset, _ := strconv.ParseInt(string(m[1]), 10, 64)
|
xrefOffset, _ := strconv.ParseInt(string(m[1]), 10, 64)
|
||||||
lastXrefOffset := xrefOffset
|
lastXrefOffset := xrefOffset
|
||||||
loadedXrefs := map[int64]struct{}{}
|
loadedXrefs := make(map[int64]struct{})
|
||||||
loadedEntries := map[uint]struct{}{}
|
loadedEntries := make(map[uint]struct{})
|
||||||
|
|
||||||
var throwawayStack []Object
|
var throwawayStack []Object
|
||||||
for {
|
for {
|
||||||
if _, ok := loadedXrefs[xrefOffset]; ok {
|
if _, ok := loadedXrefs[xrefOffset]; ok {
|
||||||
return errors.New("circular xref offsets")
|
return nil, errors.New("circular xref offsets")
|
||||||
}
|
}
|
||||||
if xrefOffset >= int64(len(u.Document)) {
|
if xrefOffset >= int64(len(u.Document)) {
|
||||||
return errors.New("invalid xref offset")
|
return nil, errors.New("invalid xref offset")
|
||||||
}
|
}
|
||||||
|
|
||||||
lex := Lexer{u.Document[xrefOffset:]}
|
lex := Lexer{u.Document[xrefOffset:]}
|
||||||
if err := u.loadXref(&lex, loadedEntries); err != nil {
|
if err := u.loadXref(&lex, loadedEntries); err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
trailer := u.parse(&lex, &throwawayStack)
|
trailer, _ := u.parse(&lex, &throwawayStack)
|
||||||
if trailer.Kind != Dict {
|
if trailer.Kind != Dict {
|
||||||
return errors.New("invalid trailer dictionary")
|
return nil, errors.New("invalid trailer dictionary")
|
||||||
}
|
}
|
||||||
if len(loadedXrefs) == 0 {
|
if len(loadedXrefs) == 0 {
|
||||||
u.Trailer = trailer.Dict
|
u.Trailer = trailer.Dict
|
||||||
@@ -685,48 +723,49 @@ func (u *Updater) Initialize() error {
|
|||||||
}
|
}
|
||||||
// FIXME: We don't check for size_t over or underflow.
|
// FIXME: We don't check for size_t over or underflow.
|
||||||
if !prevOffset.IsInteger() {
|
if !prevOffset.IsInteger() {
|
||||||
return errors.New("invalid Prev offset")
|
return nil, errors.New("invalid Prev offset")
|
||||||
}
|
}
|
||||||
xrefOffset = int64(prevOffset.Number)
|
xrefOffset = int64(prevOffset.Number)
|
||||||
}
|
}
|
||||||
|
|
||||||
u.Trailer["Prev"] = Object{
|
u.Trailer["Prev"] = NewNumeric(float64(lastXrefOffset))
|
||||||
Kind: Numeric, Number: float64(lastXrefOffset)}
|
|
||||||
|
|
||||||
lastSize, ok := u.Trailer["Size"]
|
lastSize, ok := u.Trailer["Size"]
|
||||||
if !ok || !lastSize.IsInteger() || lastSize.Number <= 0 {
|
if !ok || !lastSize.IsInteger() || lastSize.Number <= 0 {
|
||||||
return errors.New("invalid or missing cross-reference table Size")
|
return nil, errors.New("invalid or missing cross-reference table Size")
|
||||||
}
|
}
|
||||||
u.xrefSize = uint(lastSize.Number)
|
u.xrefSize = uint(lastSize.Number)
|
||||||
return nil
|
return u, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get retrieves an object by its number and generation--may return
|
// Get retrieves an object by its number and generation--may return
|
||||||
// Nil or End with an error.
|
// Nil or End with an error.
|
||||||
func (u *Updater) Get(n, generation uint) Object {
|
//
|
||||||
|
// TODO(p): We should fix all uses of this not to eat the error.
|
||||||
|
func (u *Updater) Get(n, generation uint) (Object, error) {
|
||||||
if n >= u.xrefSize {
|
if n >= u.xrefSize {
|
||||||
return Object{Kind: Nil}
|
return New(Nil), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
ref := u.xref[n]
|
ref := u.xref[n]
|
||||||
if !ref.nonfree || ref.generation != generation ||
|
if !ref.nonfree || ref.generation != generation ||
|
||||||
ref.offset >= int64(len(u.Document)) {
|
ref.offset >= int64(len(u.Document)) {
|
||||||
return Object{Kind: Nil}
|
return New(Nil), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
lex := Lexer{u.Document[ref.offset:]}
|
lex := Lexer{u.Document[ref.offset:]}
|
||||||
var stack []Object
|
var stack []Object
|
||||||
for {
|
for {
|
||||||
object := u.parse(&lex, &stack)
|
object, err := u.parse(&lex, &stack)
|
||||||
if object.Kind == End {
|
if object.Kind == End {
|
||||||
return object
|
return object, err
|
||||||
}
|
}
|
||||||
if object.Kind != Indirect {
|
if object.Kind != Indirect {
|
||||||
stack = append(stack, object)
|
stack = append(stack, object)
|
||||||
} else if object.N != n || object.Generation != generation {
|
} else if object.N != n || object.Generation != generation {
|
||||||
return Object{Kind: End, String: "object mismatch"}
|
return newError("object mismatch")
|
||||||
} else {
|
} else {
|
||||||
return object.Array[0]
|
return object.Array[0], nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -757,7 +796,8 @@ type BytesWriter interface {
|
|||||||
WriteString(s string) (n int, err error)
|
WriteString(s string) (n int, err error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update appends an updated object to the end of the document.
|
// Update appends an updated object to the end of the document. The fill
|
||||||
|
// callback must write exactly one PDF object.
|
||||||
func (u *Updater) Update(n uint, fill func(buf BytesWriter)) {
|
func (u *Updater) Update(n uint, fill func(buf BytesWriter)) {
|
||||||
oldRef := u.xref[n]
|
oldRef := u.xref[n]
|
||||||
u.updated[n] = struct{}{}
|
u.updated[n] = struct{}{}
|
||||||
@@ -819,8 +859,8 @@ func (u *Updater) FlushUpdates() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u.Trailer["Size"] = Object{Kind: Numeric, Number: float64(u.xrefSize)}
|
u.Trailer["Size"] = NewNumeric(float64(u.xrefSize))
|
||||||
trailer := Object{Kind: Dict, Dict: u.Trailer}
|
trailer := NewDict(u.Trailer)
|
||||||
|
|
||||||
fmt.Fprintf(buf, "trailer\n%s\nstartxref\n%d\n%%%%EOF\n",
|
fmt.Fprintf(buf, "trailer\n%s\nstartxref\n%d\n%%%%EOF\n",
|
||||||
trailer.Serialize(), startXref)
|
trailer.Serialize(), startXref)
|
||||||
@@ -829,8 +869,8 @@ func (u *Updater) FlushUpdates() {
|
|||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
|
|
||||||
// PdfDate makes a PDF object representing the given point in time.
|
// NewDate makes a PDF object representing the given point in time.
|
||||||
func PdfDate(ts time.Time) Object {
|
func NewDate(ts time.Time) Object {
|
||||||
buf := ts.AppendFormat(nil, "D:20060102150405")
|
buf := ts.AppendFormat(nil, "D:20060102150405")
|
||||||
// "Z07'00'" doesn't work, we need to do some of it manually.
|
// "Z07'00'" doesn't work, we need to do some of it manually.
|
||||||
if _, offset := ts.Zone(); offset != 0 {
|
if _, offset := ts.Zone(); offset != 0 {
|
||||||
@@ -839,14 +879,15 @@ func PdfDate(ts time.Time) Object {
|
|||||||
} else {
|
} else {
|
||||||
buf = append(buf, 'Z')
|
buf = append(buf, 'Z')
|
||||||
}
|
}
|
||||||
return Object{Kind: String, String: string(buf)}
|
return NewString(string(buf))
|
||||||
}
|
}
|
||||||
|
|
||||||
// PdfGetFirstPage retrieves the first page of the document or a Nil object.
|
// GetFirstPage retrieves the first page of the given page (sub)tree reference,
|
||||||
func PdfGetFirstPage(pdf *Updater, nodeN, nodeGeneration uint) Object {
|
// or returns a Nil object if unsuccessful.
|
||||||
obj := pdf.Get(nodeN, nodeGeneration)
|
func (u *Updater) GetFirstPage(nodeN, nodeGeneration uint) Object {
|
||||||
|
obj, _ := u.Get(nodeN, nodeGeneration)
|
||||||
if obj.Kind != Dict {
|
if obj.Kind != Dict {
|
||||||
return Object{Kind: Nil}
|
return New(Nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Out of convenience; these aren't filled normally.
|
// Out of convenience; these aren't filled normally.
|
||||||
@@ -854,11 +895,11 @@ func PdfGetFirstPage(pdf *Updater, nodeN, nodeGeneration uint) Object {
|
|||||||
obj.Generation = nodeGeneration
|
obj.Generation = nodeGeneration
|
||||||
|
|
||||||
if typ, ok := obj.Dict["Type"]; !ok || typ.Kind != Name {
|
if typ, ok := obj.Dict["Type"]; !ok || typ.Kind != Name {
|
||||||
return Object{Kind: Nil}
|
return New(Nil)
|
||||||
} else if typ.String == "Page" {
|
} else if typ.String == "Page" {
|
||||||
return obj
|
return obj
|
||||||
} else if typ.String != "Pages" {
|
} else if typ.String != "Pages" {
|
||||||
return Object{Kind: Nil}
|
return New(Nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
// XXX: Technically speaking, this may be an indirect reference.
|
// XXX: Technically speaking, this may be an indirect reference.
|
||||||
@@ -867,11 +908,11 @@ func PdfGetFirstPage(pdf *Updater, nodeN, nodeGeneration uint) Object {
|
|||||||
kids, ok := obj.Dict["Kids"]
|
kids, ok := obj.Dict["Kids"]
|
||||||
if !ok || kids.Kind != Array || len(kids.Array) == 0 ||
|
if !ok || kids.Kind != Array || len(kids.Array) == 0 ||
|
||||||
kids.Array[0].Kind != Reference {
|
kids.Array[0].Kind != Reference {
|
||||||
return Object{Kind: Nil}
|
return New(Nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
// XXX: Nothing prevents us from recursing in an evil circular graph.
|
// XXX: Nothing prevents us from recursing in an evil circular graph.
|
||||||
return PdfGetFirstPage(pdf, kids.Array[0].N, kids.Array[0].Generation)
|
return u.GetFirstPage(kids.Array[0].N, kids.Array[0].Generation)
|
||||||
}
|
}
|
||||||
|
|
||||||
// -----------------------------------------------------------------------------
|
// -----------------------------------------------------------------------------
|
||||||
@@ -968,8 +1009,9 @@ func PKCS12Parse(p12 []byte, password string) (
|
|||||||
|
|
||||||
// FillInSignature signs PDF contents and writes the signature into the given
|
// FillInSignature signs PDF contents and writes the signature into the given
|
||||||
// window that has been reserved for this specific purpose.
|
// window that has been reserved for this specific purpose.
|
||||||
|
// This is a very low-level function.
|
||||||
func FillInSignature(document []byte, signOff, signLen int,
|
func FillInSignature(document []byte, signOff, signLen int,
|
||||||
key crypto.PublicKey, certs []*x509.Certificate) error {
|
key crypto.PrivateKey, certs []*x509.Certificate) error {
|
||||||
if signOff < 0 || signOff > len(document) ||
|
if signOff < 0 || signOff > len(document) ||
|
||||||
signLen < 2 || signOff+signLen > len(document) {
|
signLen < 2 || signOff+signLen > len(document) {
|
||||||
return errors.New("invalid signing window")
|
return errors.New("invalid signing window")
|
||||||
@@ -1039,7 +1081,13 @@ func FillInSignature(document []byte, signOff, signLen int,
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
|
||||||
|
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
|
||||||
|
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
|
||||||
|
|
||||||
// Sign signs the given document, growing and returning the passed-in slice.
|
// Sign signs the given document, growing and returning the passed-in slice.
|
||||||
|
// There must be at least one certificate, matching the private key.
|
||||||
|
// The certificates must form a chain.
|
||||||
//
|
//
|
||||||
// The presumption here is that the document is valid and that it doesn't
|
// The presumption here is that the document is valid and that it doesn't
|
||||||
// employ cross-reference streams from PDF 1.5, or at least constitutes
|
// employ cross-reference streams from PDF 1.5, or at least constitutes
|
||||||
@@ -1048,14 +1096,10 @@ func FillInSignature(document []byte, signOff, signLen int,
|
|||||||
//
|
//
|
||||||
// Carelessly assumes that the version of the original document is at most
|
// Carelessly assumes that the version of the original document is at most
|
||||||
// PDF 1.6.
|
// PDF 1.6.
|
||||||
//
|
func Sign(document []byte,
|
||||||
// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
|
key crypto.PrivateKey, certs []*x509.Certificate) ([]byte, error) {
|
||||||
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
|
pdf, err := NewUpdater(document)
|
||||||
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
|
if err != nil {
|
||||||
func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
|
|
||||||
[]byte, error) {
|
|
||||||
pdf := &Updater{Document: document}
|
|
||||||
if err := pdf.Initialize(); err != nil {
|
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1063,7 +1107,7 @@ func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
|
|||||||
if !ok || rootRef.Kind != Reference {
|
if !ok || rootRef.Kind != Reference {
|
||||||
return nil, errors.New("trailer does not contain a reference to Root")
|
return nil, errors.New("trailer does not contain a reference to Root")
|
||||||
}
|
}
|
||||||
root := pdf.Get(rootRef.N, rootRef.Generation)
|
root, _ := pdf.Get(rootRef.N, rootRef.Generation)
|
||||||
if root.Kind != Dict {
|
if root.Kind != Dict {
|
||||||
return nil, errors.New("invalid Root dictionary reference")
|
return nil, errors.New("invalid Root dictionary reference")
|
||||||
}
|
}
|
||||||
@@ -1074,7 +1118,7 @@ func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
|
|||||||
pdf.Update(sigdictN, func(buf BytesWriter) {
|
pdf.Update(sigdictN, func(buf BytesWriter) {
|
||||||
// The timestamp is important for Adobe Acrobat Reader DC.
|
// The timestamp is important for Adobe Acrobat Reader DC.
|
||||||
// The ideal would be to use RFC 3161.
|
// The ideal would be to use RFC 3161.
|
||||||
now := PdfDate(time.Now())
|
now := NewDate(time.Now())
|
||||||
buf.WriteString("<< /Type/Sig /Filter/Adobe.PPKLite" +
|
buf.WriteString("<< /Type/Sig /Filter/Adobe.PPKLite" +
|
||||||
" /SubFilter/adbe.pkcs7.detached\n" +
|
" /SubFilter/adbe.pkcs7.detached\n" +
|
||||||
" /M" + now.Serialize() + " /ByteRange ")
|
" /M" + now.Serialize() + " /ByteRange ")
|
||||||
@@ -1085,7 +1129,7 @@ func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
|
|||||||
buf.WriteString("\n /Contents <")
|
buf.WriteString("\n /Contents <")
|
||||||
|
|
||||||
signOff = buf.Len()
|
signOff = buf.Len()
|
||||||
signLen = 8192 // cert, digest, encripted digest, ...
|
signLen = 8192 // cert, digest, encrypted digest, ...
|
||||||
buf.Write(bytes.Repeat([]byte{'0'}, signLen))
|
buf.Write(bytes.Repeat([]byte{'0'}, signLen))
|
||||||
buf.WriteString("> >>")
|
buf.WriteString("> >>")
|
||||||
|
|
||||||
@@ -1094,22 +1138,19 @@ func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
|
|||||||
signLen += 2
|
signLen += 2
|
||||||
})
|
})
|
||||||
|
|
||||||
sigfield := Object{Kind: Dict, Dict: map[string]Object{
|
sigfield := NewDict(map[string]Object{
|
||||||
// 8.6.3 Field Types - Signature Fields
|
// 8.6.3 Field Types - Signature Fields
|
||||||
"FT": {Kind: Name, String: "Sig"},
|
"FT": NewName("Sig"),
|
||||||
"V": {Kind: Reference, N: sigdictN, Generation: 0},
|
"V": NewReference(sigdictN, 0),
|
||||||
// 8.4.5 Annotations Types - Widget Annotations
|
// 8.4.5 Annotations Types - Widget Annotations
|
||||||
// We can merge the Signature Annotation and omit Kids here.
|
// We can merge the Signature Annotation and omit Kids here.
|
||||||
"Subtype": {Kind: Name, String: "Widget"},
|
"Subtype": NewName("Widget"),
|
||||||
"F": {Kind: Numeric, Number: 2 /* Hidden */},
|
"F": NewNumeric(2 /* Hidden */),
|
||||||
"T": {Kind: String, String: "Signature1"},
|
"T": NewString("Signature1"),
|
||||||
"Rect": {Kind: Array, Array: []Object{
|
"Rect": NewArray([]Object{
|
||||||
{Kind: Numeric, Number: 0},
|
NewNumeric(0), NewNumeric(0), NewNumeric(0), NewNumeric(0),
|
||||||
{Kind: Numeric, Number: 0},
|
}),
|
||||||
{Kind: Numeric, Number: 0},
|
})
|
||||||
{Kind: Numeric, Number: 0},
|
|
||||||
}},
|
|
||||||
}}
|
|
||||||
|
|
||||||
sigfieldN := pdf.Allocate()
|
sigfieldN := pdf.Allocate()
|
||||||
pdf.Update(sigfieldN, func(buf BytesWriter) {
|
pdf.Update(sigfieldN, func(buf BytesWriter) {
|
||||||
@@ -1120,7 +1161,7 @@ func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
|
|||||||
if !ok || pagesRef.Kind != Reference {
|
if !ok || pagesRef.Kind != Reference {
|
||||||
return nil, errors.New("invalid Pages reference")
|
return nil, errors.New("invalid Pages reference")
|
||||||
}
|
}
|
||||||
page := PdfGetFirstPage(pdf, pagesRef.N, pagesRef.Generation)
|
page := pdf.GetFirstPage(pagesRef.N, pagesRef.Generation)
|
||||||
if page.Kind != Dict {
|
if page.Kind != Dict {
|
||||||
return nil, errors.New("invalid or unsupported page tree")
|
return nil, errors.New("invalid or unsupported page tree")
|
||||||
}
|
}
|
||||||
@@ -1128,10 +1169,9 @@ func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
|
|||||||
// XXX: Assuming this won't be an indirectly referenced array.
|
// XXX: Assuming this won't be an indirectly referenced array.
|
||||||
annots := page.Dict["Annots"]
|
annots := page.Dict["Annots"]
|
||||||
if annots.Kind != Array {
|
if annots.Kind != Array {
|
||||||
annots = Object{Kind: Array}
|
annots = NewArray(nil)
|
||||||
}
|
}
|
||||||
annots.Array = append(annots.Array, Object{
|
annots.Array = append(annots.Array, NewReference(sigfieldN, 0))
|
||||||
Kind: Reference, N: sigfieldN, Generation: 0})
|
|
||||||
|
|
||||||
page.Dict["Annots"] = annots
|
page.Dict["Annots"] = annots
|
||||||
pdf.Update(page.N, func(buf BytesWriter) {
|
pdf.Update(page.N, func(buf BytesWriter) {
|
||||||
@@ -1140,19 +1180,16 @@ func Sign(document []byte, key crypto.PublicKey, certs []*x509.Certificate) (
|
|||||||
|
|
||||||
// 8.6.1 Interactive Form Dictionary
|
// 8.6.1 Interactive Form Dictionary
|
||||||
// XXX: Assuming there are no forms already, overwriting everything.
|
// XXX: Assuming there are no forms already, overwriting everything.
|
||||||
root.Dict["AcroForm"] = Object{Kind: Dict, Dict: map[string]Object{
|
root.Dict["AcroForm"] = NewDict(map[string]Object{
|
||||||
"Fields": {Kind: Array, Array: []Object{
|
"Fields": NewArray([]Object{NewReference(sigfieldN, 0)}),
|
||||||
{Kind: Reference, N: sigfieldN, Generation: 0},
|
"SigFlags": NewNumeric(3 /* SignaturesExist | AppendOnly */),
|
||||||
}},
|
})
|
||||||
"SigFlags": {Kind: Numeric,
|
|
||||||
Number: 3 /* SignaturesExist | AppendOnly */},
|
|
||||||
}}
|
|
||||||
|
|
||||||
// Upgrade the document version for SHA-256 etc.
|
// Upgrade the document version for SHA-256 etc.
|
||||||
// XXX: Assuming that it's not newer than 1.6 already--while Cairo can't
|
// XXX: Assuming that it's not newer than 1.6 already--while Cairo can't
|
||||||
// currently use a newer version that 1.5, it's not a bad idea to use
|
// currently use a newer version that 1.5, it's not a bad idea to use
|
||||||
// cairo_pdf_surface_restrict_to_version().
|
// cairo_pdf_surface_restrict_to_version().
|
||||||
root.Dict["Version"] = Object{Kind: Name, String: "1.6"}
|
root.Dict["Version"] = NewName("1.6")
|
||||||
pdf.Update(rootRef.N, func(buf BytesWriter) {
|
pdf.Update(rootRef.N, func(buf BytesWriter) {
|
||||||
buf.WriteString(root.Serialize())
|
buf.WriteString(root.Serialize())
|
||||||
})
|
})
|
||||||
|
|||||||
Reference in New Issue
Block a user