2018-10-02 23:19:38 +02:00
|
|
|
//
|
2023-06-29 05:01:23 +02:00
|
|
|
// Copyright (c) 2018 - 2024, Přemysl Eric Janouch <p@janouch.name>
|
2018-10-02 23:19:38 +02:00
|
|
|
//
|
|
|
|
// Permission to use, copy, modify, and/or distribute this software for any
|
|
|
|
// purpose with or without fee is hereby granted.
|
|
|
|
//
|
|
|
|
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
|
|
|
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
|
|
|
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
|
|
|
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
//
|
|
|
|
|
|
|
|
// Package pdf signs PDF documents and provides some processing utilities.
|
|
|
|
package pdf
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
2023-06-29 05:01:23 +02:00
|
|
|
"compress/zlib"
|
|
|
|
"encoding/binary"
|
2018-10-02 23:19:38 +02:00
|
|
|
"encoding/hex"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"math"
|
|
|
|
"regexp"
|
|
|
|
"sort"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"crypto"
|
|
|
|
"crypto/ecdsa"
|
|
|
|
"crypto/rsa"
|
|
|
|
"crypto/x509"
|
2020-09-04 15:34:33 +02:00
|
|
|
|
2018-10-02 23:19:38 +02:00
|
|
|
"go.mozilla.org/pkcs7"
|
|
|
|
"golang.org/x/crypto/pkcs12"
|
|
|
|
)
|
|
|
|
|
|
|
|
type ObjectKind int
|
|
|
|
|
|
|
|
const (
|
|
|
|
End ObjectKind = iota
|
|
|
|
NL
|
|
|
|
Comment
|
|
|
|
Nil
|
|
|
|
Bool
|
|
|
|
Numeric
|
|
|
|
Keyword
|
|
|
|
Name
|
|
|
|
String
|
|
|
|
|
|
|
|
// simple tokens
|
|
|
|
BArray
|
|
|
|
EArray
|
|
|
|
BDict
|
|
|
|
EDict
|
|
|
|
|
|
|
|
// higher-level objects
|
|
|
|
Array
|
|
|
|
Dict
|
2021-12-09 13:07:02 +01:00
|
|
|
Stream
|
2018-10-02 23:19:38 +02:00
|
|
|
Indirect
|
|
|
|
Reference
|
|
|
|
)
|
|
|
|
|
2021-12-08 20:39:02 +01:00
|
|
|
// Object is a PDF token/object thingy. Objects may be composed either from
|
2018-10-02 23:19:38 +02:00
|
|
|
// one or a sequence of tokens. The PDF Reference doesn't actually speak
|
2021-12-08 20:39:02 +01:00
|
|
|
// of tokens, though ISO 32000-1:2008 does.
|
2018-10-02 23:19:38 +02:00
|
|
|
type Object struct {
|
|
|
|
Kind ObjectKind
|
|
|
|
|
2018-10-04 13:11:10 +02:00
|
|
|
String string // Comment/Keyword/Name/String
|
|
|
|
Number float64 // Bool, Numeric
|
|
|
|
Array []Object // Array, Indirect
|
2021-12-09 13:07:02 +01:00
|
|
|
Dict map[string]Object // Dict, Stream
|
|
|
|
Stream []byte // Stream
|
2018-10-04 13:11:10 +02:00
|
|
|
N, Generation uint // Indirect, Reference
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// IsInteger checks if the PDF object is an integer number.
|
|
|
|
func (o *Object) IsInteger() bool {
|
|
|
|
_, f := math.Modf(o.Number)
|
|
|
|
return o.Kind == Numeric && f == 0
|
|
|
|
}
|
|
|
|
|
|
|
|
// IsUint checks if the PDF object is an integer number that fits into a uint.
|
|
|
|
func (o *Object) IsUint() bool {
|
|
|
|
return o.IsInteger() && o.Number >= 0 && o.Number <= float64(^uint(0))
|
|
|
|
}
|
|
|
|
|
2018-10-04 12:51:23 +02:00
|
|
|
// A slew of constructors that will hopefully get all inlined.
|
|
|
|
|
2018-10-04 13:11:10 +02:00
|
|
|
// New returns a new Object of the given kind, with default values.
|
|
|
|
func New(kind ObjectKind) Object { return Object{Kind: kind} }
|
|
|
|
|
2018-10-04 12:51:23 +02:00
|
|
|
func NewComment(c string) Object { return Object{Kind: Comment, String: c} }
|
|
|
|
func NewKeyword(k string) Object { return Object{Kind: Keyword, String: k} }
|
|
|
|
|
|
|
|
func NewBool(b bool) Object {
|
|
|
|
var b64 float64
|
|
|
|
if b {
|
|
|
|
b64 = 1
|
|
|
|
}
|
|
|
|
return Object{Kind: Bool, Number: b64}
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewNumeric(n float64) Object { return Object{Kind: Numeric, Number: n} }
|
|
|
|
func NewName(n string) Object { return Object{Kind: Name, String: n} }
|
|
|
|
func NewString(s string) Object { return Object{Kind: String, String: s} }
|
|
|
|
|
|
|
|
func NewArray(a []Object) Object {
|
|
|
|
return Object{Kind: Array, Array: a}
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewDict(d map[string]Object) Object {
|
|
|
|
if d == nil {
|
|
|
|
d = make(map[string]Object)
|
|
|
|
}
|
|
|
|
return Object{Kind: Dict, Dict: d}
|
|
|
|
}
|
|
|
|
|
2023-06-29 05:01:23 +02:00
|
|
|
func NewStream(d map[string]Object, s []byte) Object {
|
|
|
|
if d == nil {
|
|
|
|
d = make(map[string]Object)
|
|
|
|
}
|
|
|
|
return Object{Kind: Stream, Dict: d, Stream: s}
|
|
|
|
}
|
|
|
|
|
2018-10-04 12:51:23 +02:00
|
|
|
func NewIndirect(o Object, n, generation uint) Object {
|
|
|
|
return Object{Kind: Indirect, N: n, Generation: generation,
|
|
|
|
Array: []Object{o}}
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewReference(n, generation uint) Object {
|
|
|
|
return Object{Kind: Reference, N: n, Generation: generation}
|
|
|
|
}
|
|
|
|
|
2018-10-04 13:07:47 +02:00
|
|
|
func newError(msg string) (Object, error) { return New(End), errors.New(msg) }
|
|
|
|
|
2018-10-02 23:19:38 +02:00
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
const (
|
|
|
|
octAlphabet = "01234567"
|
|
|
|
decAlphabet = "0123456789"
|
|
|
|
hexAlphabet = "0123456789abcdefABCDEF"
|
|
|
|
whitespace = "\t\n\f\r "
|
|
|
|
delimiters = "()<>[]{}/%"
|
|
|
|
)
|
|
|
|
|
|
|
|
// Lexer is a basic lexical analyser for the Portable Document Format,
|
|
|
|
// giving limited error information.
|
|
|
|
type Lexer struct {
|
2018-10-04 14:46:12 +02:00
|
|
|
P []byte // input buffer
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (lex *Lexer) read() (byte, bool) {
|
2018-10-04 14:46:12 +02:00
|
|
|
if len(lex.P) > 0 {
|
|
|
|
ch := lex.P[0]
|
|
|
|
lex.P = lex.P[1:]
|
2018-10-02 23:19:38 +02:00
|
|
|
return ch, true
|
|
|
|
}
|
|
|
|
return 0, false
|
|
|
|
}
|
|
|
|
|
|
|
|
func (lex *Lexer) peek() (byte, bool) {
|
2018-10-04 14:46:12 +02:00
|
|
|
if len(lex.P) > 0 {
|
|
|
|
return lex.P[0], true
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
return 0, false
|
|
|
|
}
|
|
|
|
|
|
|
|
func (lex *Lexer) eatNewline(ch byte) bool {
|
|
|
|
if ch == '\r' {
|
|
|
|
if ch, _ := lex.peek(); ch == '\n' {
|
|
|
|
lex.read()
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return ch == '\n'
|
|
|
|
}
|
|
|
|
|
|
|
|
func (lex *Lexer) unescape(ch byte) byte {
|
|
|
|
switch ch {
|
|
|
|
case 'n':
|
|
|
|
return '\n'
|
|
|
|
case 'r':
|
|
|
|
return '\r'
|
|
|
|
case 't':
|
|
|
|
return '\t'
|
|
|
|
case 'b':
|
|
|
|
return '\b'
|
|
|
|
case 'f':
|
|
|
|
return '\f'
|
|
|
|
}
|
|
|
|
if strings.IndexByte(octAlphabet, ch) >= 0 {
|
|
|
|
octal := []byte{ch}
|
|
|
|
lex.read()
|
|
|
|
if ch, _ := lex.peek(); strings.IndexByte(octAlphabet, ch) >= 0 {
|
|
|
|
octal = append(octal, ch)
|
|
|
|
lex.read()
|
|
|
|
}
|
|
|
|
if ch, _ := lex.peek(); strings.IndexByte(octAlphabet, ch) >= 0 {
|
|
|
|
octal = append(octal, ch)
|
|
|
|
lex.read()
|
|
|
|
}
|
|
|
|
u, _ := strconv.ParseUint(string(octal), 8, 8)
|
|
|
|
return byte(u)
|
|
|
|
}
|
|
|
|
return ch
|
|
|
|
}
|
|
|
|
|
2018-10-04 13:07:47 +02:00
|
|
|
func (lex *Lexer) string() (Object, error) {
|
2018-10-02 23:19:38 +02:00
|
|
|
var value []byte
|
|
|
|
parens := 1
|
|
|
|
for {
|
|
|
|
ch, ok := lex.read()
|
|
|
|
if !ok {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("unexpected end of string")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
if lex.eatNewline(ch) {
|
|
|
|
ch = '\n'
|
|
|
|
} else if ch == '(' {
|
|
|
|
parens++
|
|
|
|
} else if ch == ')' {
|
|
|
|
if parens--; parens == 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
} else if ch == '\\' {
|
|
|
|
if ch, ok = lex.read(); !ok {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("unexpected end of string")
|
2018-10-02 23:19:38 +02:00
|
|
|
} else if lex.eatNewline(ch) {
|
|
|
|
continue
|
|
|
|
} else {
|
|
|
|
ch = lex.unescape(ch)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
value = append(value, ch)
|
|
|
|
}
|
2018-10-04 13:07:47 +02:00
|
|
|
return NewString(string(value)), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
2018-10-04 13:07:47 +02:00
|
|
|
func (lex *Lexer) stringHex() (Object, error) {
|
2018-10-02 23:19:38 +02:00
|
|
|
var value, buf []byte
|
|
|
|
for {
|
|
|
|
ch, ok := lex.read()
|
|
|
|
if !ok {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("unexpected end of hex string")
|
2018-10-02 23:19:38 +02:00
|
|
|
} else if ch == '>' {
|
|
|
|
break
|
|
|
|
} else if strings.IndexByte(hexAlphabet, ch) < 0 {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("invalid hex string")
|
2018-10-02 23:19:38 +02:00
|
|
|
} else if buf = append(buf, ch); len(buf) == 2 {
|
|
|
|
u, _ := strconv.ParseUint(string(buf), 16, 8)
|
|
|
|
value = append(value, byte(u))
|
|
|
|
buf = nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(buf) > 0 {
|
|
|
|
u, _ := strconv.ParseUint(string(buf)+"0", 16, 8)
|
|
|
|
value = append(value, byte(u))
|
|
|
|
}
|
2018-10-04 13:07:47 +02:00
|
|
|
return NewString(string(value)), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
2018-10-04 13:07:47 +02:00
|
|
|
func (lex *Lexer) name() (Object, error) {
|
2018-10-02 23:19:38 +02:00
|
|
|
var value []byte
|
|
|
|
for {
|
|
|
|
ch, ok := lex.peek()
|
|
|
|
if !ok || strings.IndexByte(whitespace+delimiters, ch) >= 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
lex.read()
|
|
|
|
if ch == '#' {
|
|
|
|
var hexa []byte
|
|
|
|
if ch, _ := lex.peek(); strings.IndexByte(hexAlphabet, ch) >= 0 {
|
|
|
|
hexa = append(hexa, ch)
|
|
|
|
lex.read()
|
|
|
|
}
|
|
|
|
if ch, _ := lex.peek(); strings.IndexByte(hexAlphabet, ch) >= 0 {
|
|
|
|
hexa = append(hexa, ch)
|
|
|
|
lex.read()
|
|
|
|
}
|
|
|
|
if len(hexa) != 2 {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("invalid name hexa escape")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
u, _ := strconv.ParseUint(string(value), 16, 8)
|
|
|
|
ch = byte(u)
|
|
|
|
}
|
|
|
|
value = append(value, ch)
|
|
|
|
}
|
|
|
|
if len(value) == 0 {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("unexpected end of name")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
2018-10-04 13:07:47 +02:00
|
|
|
return NewName(string(value)), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
2018-10-04 13:07:47 +02:00
|
|
|
func (lex *Lexer) comment() (Object, error) {
|
2018-10-02 23:19:38 +02:00
|
|
|
var value []byte
|
|
|
|
for {
|
|
|
|
ch, ok := lex.peek()
|
|
|
|
if !ok || ch == '\r' || ch == '\n' {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
value = append(value, ch)
|
|
|
|
lex.read()
|
|
|
|
}
|
2018-10-04 13:07:47 +02:00
|
|
|
return NewComment(string(value)), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// XXX: Maybe invalid numbers should rather be interpreted as keywords.
|
2018-10-04 13:07:47 +02:00
|
|
|
func (lex *Lexer) number() (Object, error) {
|
2018-10-02 23:19:38 +02:00
|
|
|
var value []byte
|
|
|
|
ch, ok := lex.peek()
|
|
|
|
if ch == '-' {
|
|
|
|
value = append(value, ch)
|
|
|
|
lex.read()
|
|
|
|
}
|
|
|
|
real, digits := false, false
|
|
|
|
for {
|
|
|
|
ch, ok = lex.peek()
|
|
|
|
if !ok {
|
|
|
|
break
|
|
|
|
} else if strings.IndexByte(decAlphabet, ch) >= 0 {
|
|
|
|
digits = true
|
|
|
|
} else if ch == '.' && !real {
|
|
|
|
real = true
|
|
|
|
} else {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
value = append(value, ch)
|
|
|
|
lex.read()
|
|
|
|
}
|
|
|
|
if !digits {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("invalid number")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
f, _ := strconv.ParseFloat(string(value), 64)
|
2018-10-04 13:07:47 +02:00
|
|
|
return NewNumeric(f), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
2018-10-04 13:07:47 +02:00
|
|
|
func (lex *Lexer) Next() (Object, error) {
|
2018-10-02 23:19:38 +02:00
|
|
|
ch, ok := lex.peek()
|
|
|
|
if !ok {
|
2018-10-04 13:07:47 +02:00
|
|
|
return New(End), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
if strings.IndexByte("-0123456789.", ch) >= 0 {
|
|
|
|
return lex.number()
|
|
|
|
}
|
|
|
|
|
|
|
|
// {} end up being keywords, we might want to error out on those.
|
|
|
|
var value []byte
|
|
|
|
for {
|
|
|
|
ch, ok := lex.peek()
|
|
|
|
if !ok || strings.IndexByte(whitespace+delimiters, ch) >= 0 {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
value = append(value, ch)
|
|
|
|
lex.read()
|
|
|
|
}
|
|
|
|
switch v := string(value); v {
|
|
|
|
case "":
|
|
|
|
case "null":
|
2018-10-04 13:07:47 +02:00
|
|
|
return New(Nil), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
case "true":
|
2018-10-04 13:07:47 +02:00
|
|
|
return NewBool(true), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
case "false":
|
2018-10-04 13:07:47 +02:00
|
|
|
return NewBool(false), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
default:
|
2018-10-04 13:07:47 +02:00
|
|
|
return NewKeyword(v), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
switch ch, _ := lex.read(); ch {
|
|
|
|
case '/':
|
|
|
|
return lex.name()
|
|
|
|
case '%':
|
|
|
|
return lex.comment()
|
|
|
|
case '(':
|
|
|
|
return lex.string()
|
|
|
|
case '[':
|
2018-10-04 13:07:47 +02:00
|
|
|
return New(BArray), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
case ']':
|
2018-10-04 13:07:47 +02:00
|
|
|
return New(EArray), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
case '<':
|
|
|
|
if ch, _ := lex.peek(); ch == '<' {
|
|
|
|
lex.read()
|
2018-10-04 13:07:47 +02:00
|
|
|
return New(BDict), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
return lex.stringHex()
|
|
|
|
case '>':
|
|
|
|
if ch, _ := lex.peek(); ch == '>' {
|
|
|
|
lex.read()
|
2018-10-04 13:07:47 +02:00
|
|
|
return New(EDict), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("unexpected '>'")
|
2018-10-02 23:19:38 +02:00
|
|
|
default:
|
|
|
|
if lex.eatNewline(ch) {
|
2018-10-04 13:07:47 +02:00
|
|
|
return New(NL), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
if strings.IndexByte(whitespace, ch) >= 0 {
|
|
|
|
return lex.Next()
|
|
|
|
}
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("unexpected input")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
// FIXME: Lines /should not/ be longer than 255 characters,
|
|
|
|
// some wrapping is in order.
|
|
|
|
func (o *Object) Serialize() string {
|
|
|
|
switch o.Kind {
|
|
|
|
case NL:
|
|
|
|
return "\n"
|
|
|
|
case Nil:
|
|
|
|
return "null"
|
|
|
|
case Bool:
|
|
|
|
if o.Number != 0 {
|
|
|
|
return "true"
|
|
|
|
}
|
|
|
|
return "false"
|
|
|
|
case Numeric:
|
|
|
|
return strconv.FormatFloat(o.Number, 'f', -1, 64)
|
|
|
|
case Keyword:
|
|
|
|
return o.String
|
|
|
|
case Name:
|
|
|
|
escaped := []byte{'/'}
|
|
|
|
for _, ch := range []byte(o.String) {
|
|
|
|
escaped = append(escaped, ch)
|
|
|
|
if ch == '#' || strings.IndexByte(delimiters+whitespace, ch) >= 0 {
|
|
|
|
escaped = append(escaped, fmt.Sprintf("%02x", ch)...)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return string(escaped)
|
|
|
|
case String:
|
|
|
|
escaped := []byte{'('}
|
|
|
|
for _, ch := range []byte(o.String) {
|
|
|
|
if ch == '\\' || ch == '(' || ch == ')' {
|
|
|
|
escaped = append(escaped, '\\')
|
|
|
|
}
|
|
|
|
escaped = append(escaped, ch)
|
|
|
|
}
|
|
|
|
return string(append(escaped, ')'))
|
|
|
|
case BArray:
|
|
|
|
return "["
|
|
|
|
case EArray:
|
|
|
|
return "]"
|
|
|
|
case BDict:
|
|
|
|
return "<<"
|
|
|
|
case EDict:
|
|
|
|
return ">>"
|
|
|
|
case Array:
|
|
|
|
var v []string
|
|
|
|
for _, i := range o.Array {
|
|
|
|
v = append(v, i.Serialize())
|
|
|
|
}
|
|
|
|
return "[ " + strings.Join(v, " ") + " ]"
|
|
|
|
case Dict:
|
|
|
|
b := bytes.NewBuffer(nil)
|
|
|
|
var keys []string
|
|
|
|
for k := range o.Dict {
|
|
|
|
keys = append(keys, k)
|
|
|
|
}
|
|
|
|
sort.Strings(keys)
|
|
|
|
for _, k := range keys {
|
|
|
|
v := o.Dict[k]
|
|
|
|
// FIXME: The key is also supposed to be escaped by Serialize.
|
|
|
|
fmt.Fprint(b, " /", k, " ", v.Serialize())
|
|
|
|
}
|
|
|
|
return "<<" + b.String() + " >>"
|
2021-12-09 13:07:02 +01:00
|
|
|
case Stream:
|
|
|
|
d := NewDict(o.Dict)
|
|
|
|
d.Dict["Length"] = NewNumeric(float64(len(o.Stream)))
|
|
|
|
return d.Serialize() + "\nstream\n" + string(o.Stream) + "\nendstream"
|
2018-10-02 23:19:38 +02:00
|
|
|
case Indirect:
|
|
|
|
return fmt.Sprintf("%d %d obj\n%s\nendobj", o.N, o.Generation,
|
|
|
|
o.Array[0].Serialize())
|
|
|
|
case Reference:
|
|
|
|
return fmt.Sprintf("%d %d R", o.N, o.Generation)
|
|
|
|
default:
|
|
|
|
panic("unsupported token for serialization")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
type ref struct {
|
2023-06-29 05:01:23 +02:00
|
|
|
offset int64 // file offset, or N of the next free entry, or index
|
2018-10-02 23:19:38 +02:00
|
|
|
generation uint // object generation
|
2023-06-29 05:01:23 +02:00
|
|
|
compressed *uint // PDF 1.5: N of the containing compressed object
|
2018-10-02 23:19:38 +02:00
|
|
|
nonfree bool // whether this N is taken (for a good zero value)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Updater is a utility class to help read and possibly incrementally update
|
|
|
|
// PDF files.
|
|
|
|
type Updater struct {
|
|
|
|
// cross-reference table
|
|
|
|
xref []ref
|
|
|
|
|
|
|
|
// current cross-reference table size, correlated to len(xref)
|
|
|
|
xrefSize uint
|
|
|
|
|
|
|
|
// list of updated objects
|
|
|
|
// TODO(p): A map to bool makes this simpler to work with.
|
|
|
|
// The same with another map to struct{} somewhere in this code.
|
|
|
|
updated map[uint]struct{}
|
|
|
|
|
|
|
|
// PDF document data
|
|
|
|
Document []byte
|
|
|
|
|
|
|
|
// the new trailer dictionary to be written, initialized with the old one
|
|
|
|
Trailer map[string]Object
|
|
|
|
}
|
|
|
|
|
2021-12-09 13:19:41 +01:00
|
|
|
// ListIndirect returns the whole cross-reference table as Reference Objects.
|
|
|
|
func (u *Updater) ListIndirect() []Object {
|
|
|
|
result := []Object{}
|
|
|
|
for i := 0; i < len(u.xref); i++ {
|
|
|
|
if u.xref[i].nonfree {
|
|
|
|
result = append(result, NewReference(uint(i), u.xref[i].generation))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result
|
|
|
|
}
|
|
|
|
|
2021-12-09 13:07:02 +01:00
|
|
|
func (u *Updater) parseStream(lex *Lexer, stack *[]Object) (Object, error) {
|
|
|
|
lenStack := len(*stack)
|
|
|
|
if lenStack < 1 {
|
|
|
|
return newError("missing stream dictionary")
|
|
|
|
}
|
|
|
|
dict := (*stack)[lenStack-1]
|
|
|
|
if dict.Kind != Dict {
|
|
|
|
return newError("stream not preceded by a dictionary")
|
|
|
|
}
|
|
|
|
|
|
|
|
*stack = (*stack)[:lenStack-1]
|
|
|
|
length, ok := dict.Dict["Length"]
|
|
|
|
if !ok {
|
|
|
|
return newError("missing stream Length")
|
|
|
|
}
|
|
|
|
length, err := u.Dereference(length)
|
|
|
|
if err != nil {
|
|
|
|
return length, err
|
|
|
|
}
|
|
|
|
if !length.IsUint() || length.Number > math.MaxInt {
|
|
|
|
return newError("stream Length not an unsigned integer")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Expect exactly one newline.
|
|
|
|
if nl, err := lex.Next(); err != nil {
|
|
|
|
return nl, err
|
|
|
|
} else if nl.Kind != NL {
|
|
|
|
return newError("stream does not start with a newline")
|
|
|
|
}
|
|
|
|
|
|
|
|
size := int(length.Number)
|
|
|
|
if len(lex.P) < size {
|
|
|
|
return newError("stream is longer than the document")
|
|
|
|
}
|
|
|
|
|
|
|
|
dict.Kind = Stream
|
|
|
|
dict.Stream = lex.P[:size]
|
|
|
|
lex.P = lex.P[size:]
|
|
|
|
|
|
|
|
// Skip any number of trailing newlines or comments.
|
|
|
|
if end, err := u.parse(lex, stack); err != nil {
|
|
|
|
return end, err
|
|
|
|
} else if end.Kind != Keyword || end.String != "endstream" {
|
|
|
|
return newError("improperly terminated stream")
|
|
|
|
}
|
|
|
|
return dict, nil
|
|
|
|
}
|
|
|
|
|
2018-10-04 13:07:47 +02:00
|
|
|
func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) (Object, error) {
|
2018-10-02 23:19:38 +02:00
|
|
|
lenStack := len(*stack)
|
|
|
|
if lenStack < 2 {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("missing object ID pair")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
n := (*stack)[lenStack-2]
|
|
|
|
g := (*stack)[lenStack-1]
|
|
|
|
*stack = (*stack)[:lenStack-2]
|
|
|
|
|
|
|
|
if !g.IsUint() || !n.IsUint() {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("invalid object ID pair")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
2018-10-04 12:51:23 +02:00
|
|
|
var inner []Object
|
2018-10-02 23:19:38 +02:00
|
|
|
for {
|
2018-10-04 13:07:47 +02:00
|
|
|
object, _ := u.parse(lex, &inner)
|
2018-10-02 23:19:38 +02:00
|
|
|
if object.Kind == End {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("object doesn't end")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
if object.Kind == Keyword && object.String == "endobj" {
|
|
|
|
break
|
|
|
|
}
|
2018-10-04 12:51:23 +02:00
|
|
|
inner = append(inner, object)
|
|
|
|
}
|
|
|
|
if len(inner) != 1 {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("indirect objects must contain exactly one object")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
2018-10-04 13:07:47 +02:00
|
|
|
return NewIndirect(inner[0], uint(n.Number), uint(g.Number)), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
2018-10-04 13:07:47 +02:00
|
|
|
func (u *Updater) parseR(stack *[]Object) (Object, error) {
|
2018-10-02 23:19:38 +02:00
|
|
|
lenStack := len(*stack)
|
|
|
|
if lenStack < 2 {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("missing reference ID pair")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
n := (*stack)[lenStack-2]
|
|
|
|
g := (*stack)[lenStack-1]
|
|
|
|
*stack = (*stack)[:lenStack-2]
|
|
|
|
|
|
|
|
if !g.IsUint() || !n.IsUint() {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("invalid reference ID pair")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
2018-10-04 13:07:47 +02:00
|
|
|
return NewReference(uint(n.Number), uint(g.Number)), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
2018-10-04 13:07:47 +02:00
|
|
|
// parse reads an object at the lexer's position. Not a strict parser.
|
|
|
|
//
|
|
|
|
// TODO(p): We should fix all uses of this not to eat the error.
|
|
|
|
func (u *Updater) parse(lex *Lexer, stack *[]Object) (Object, error) {
|
|
|
|
switch token, err := lex.Next(); token.Kind {
|
2018-10-02 23:19:38 +02:00
|
|
|
case NL, Comment:
|
|
|
|
// These are not important to parsing,
|
|
|
|
// not even for this procedure's needs.
|
|
|
|
return u.parse(lex, stack)
|
|
|
|
case BArray:
|
|
|
|
var array []Object
|
|
|
|
for {
|
2018-10-04 13:07:47 +02:00
|
|
|
object, _ := u.parse(lex, &array)
|
2018-10-02 23:19:38 +02:00
|
|
|
if object.Kind == End {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("array doesn't end")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
if object.Kind == EArray {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
array = append(array, object)
|
|
|
|
}
|
2018-10-04 13:07:47 +02:00
|
|
|
return NewArray(array), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
case BDict:
|
|
|
|
var array []Object
|
|
|
|
for {
|
2018-10-04 13:07:47 +02:00
|
|
|
object, _ := u.parse(lex, &array)
|
2018-10-02 23:19:38 +02:00
|
|
|
if object.Kind == End {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("dictionary doesn't end")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
if object.Kind == EDict {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
array = append(array, object)
|
|
|
|
}
|
|
|
|
if len(array)%2 != 0 {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("unbalanced dictionary")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
dict := make(map[string]Object)
|
|
|
|
for i := 0; i < len(array); i += 2 {
|
|
|
|
if array[i].Kind != Name {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("invalid dictionary key type")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
dict[array[i].String] = array[i+1]
|
|
|
|
}
|
2018-10-04 13:07:47 +02:00
|
|
|
return NewDict(dict), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
case Keyword:
|
|
|
|
switch token.String {
|
|
|
|
case "stream":
|
2021-12-09 13:07:02 +01:00
|
|
|
// Appears in the document body,
|
|
|
|
// typically needs to access the cross-reference table.
|
|
|
|
return u.parseStream(lex, stack)
|
2018-10-02 23:19:38 +02:00
|
|
|
case "obj":
|
|
|
|
return u.parseIndirect(lex, stack)
|
|
|
|
case "R":
|
|
|
|
return u.parseR(stack)
|
|
|
|
}
|
|
|
|
fallthrough
|
|
|
|
default:
|
2018-10-04 13:07:47 +02:00
|
|
|
return token, err
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-06-29 05:01:23 +02:00
|
|
|
func (u *Updater) loadXrefEntry(
|
|
|
|
n uint, r ref, loadedEntries map[uint]struct{}) {
|
|
|
|
if _, ok := loadedEntries[n]; ok {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if lenXref := uint(len(u.xref)); n >= lenXref {
|
|
|
|
u.xref = append(u.xref, make([]ref, n-lenXref+1)...)
|
|
|
|
}
|
|
|
|
loadedEntries[n] = struct{}{}
|
|
|
|
|
|
|
|
u.xref[n] = r
|
|
|
|
}
|
|
|
|
|
|
|
|
func (u *Updater) loadXrefStream(
|
|
|
|
lex *Lexer, stack []Object, loadedEntries map[uint]struct{}) (
|
|
|
|
Object, error) {
|
|
|
|
var object Object
|
|
|
|
for {
|
|
|
|
var err error
|
|
|
|
if object, err = u.parse(lex, &stack); err != nil {
|
|
|
|
return New(End), fmt.Errorf("invalid xref table: %s", err)
|
|
|
|
} else if object.Kind == End {
|
|
|
|
return newError("invalid xref table")
|
|
|
|
}
|
|
|
|
|
|
|
|
// For the sake of simplicity, keep stacking until we find an object.
|
|
|
|
if object.Kind == Indirect {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
stack = append(stack, object)
|
|
|
|
}
|
|
|
|
|
|
|
|
// ISO 32000-2:2020 7.5.8.2 Cross-reference stream dictionary
|
|
|
|
stream := object.Array[0]
|
|
|
|
if stream.Kind != Stream {
|
|
|
|
return newError("invalid xref table")
|
|
|
|
}
|
|
|
|
if typ, ok := stream.Dict["Type"]; !ok ||
|
|
|
|
typ.Kind != Name || typ.String != "XRef" {
|
|
|
|
return newError("invalid xref stream")
|
|
|
|
}
|
|
|
|
|
|
|
|
data, err := u.GetStreamData(stream)
|
|
|
|
if err != nil {
|
|
|
|
return New(End), fmt.Errorf("invalid xref stream: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
size, ok := stream.Dict["Size"]
|
|
|
|
if !ok || !size.IsUint() || size.Number <= 0 {
|
|
|
|
return newError("invalid or missing cross-reference stream Size")
|
|
|
|
}
|
|
|
|
|
|
|
|
type pair struct{ start, count uint }
|
|
|
|
pairs := []pair{}
|
|
|
|
if index, ok := stream.Dict["Index"]; !ok {
|
|
|
|
pairs = append(pairs, pair{0, uint(size.Number)})
|
|
|
|
} else {
|
|
|
|
if index.Kind != Array || len(index.Array)%2 != 0 {
|
|
|
|
return newError("invalid cross-reference stream Index")
|
|
|
|
}
|
|
|
|
|
|
|
|
a := index.Array
|
|
|
|
for i := 0; i < len(a); i += 2 {
|
|
|
|
if !a[i].IsUint() || !a[i+1].IsUint() {
|
|
|
|
return newError("invalid cross-reference stream Index")
|
|
|
|
}
|
|
|
|
pairs = append(pairs, pair{uint(a[i].Number), uint(a[i+1].Number)})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
w, ok := stream.Dict["W"]
|
|
|
|
if !ok || w.Kind != Array || len(w.Array) != 3 ||
|
|
|
|
!w.Array[0].IsUint() || !w.Array[1].IsUint() || !w.Array[2].IsUint() {
|
|
|
|
return newError("invalid or missing cross-reference stream W")
|
|
|
|
}
|
|
|
|
|
|
|
|
w1 := uint(w.Array[0].Number)
|
|
|
|
w2 := uint(w.Array[1].Number)
|
|
|
|
w3 := uint(w.Array[2].Number)
|
|
|
|
if w2 == 0 {
|
|
|
|
return newError("invalid cross-reference stream W")
|
|
|
|
}
|
|
|
|
|
|
|
|
unit := w1 + w2 + w3
|
|
|
|
if uint(len(data))%unit != 0 {
|
|
|
|
return newError("invalid cross-reference stream length")
|
|
|
|
}
|
|
|
|
|
|
|
|
readField := func(data []byte, width uint) (uint, []byte) {
|
|
|
|
var n uint
|
|
|
|
for ; width != 0; width-- {
|
|
|
|
n = n<<8 | uint(data[0])
|
|
|
|
data = data[1:]
|
|
|
|
}
|
|
|
|
return n, data
|
|
|
|
}
|
|
|
|
|
|
|
|
// ISO 32000-2:2020 7.5.8.3 Cross-reference stream data
|
|
|
|
for _, pair := range pairs {
|
|
|
|
for i := uint(0); i < pair.count; i++ {
|
|
|
|
if uint(len(data)) < unit {
|
|
|
|
return newError("premature cross-reference stream EOF")
|
|
|
|
}
|
|
|
|
|
|
|
|
var f1, f2, f3 uint = 1, 0, 0
|
|
|
|
if w1 > 0 {
|
|
|
|
f1, data = readField(data, w1)
|
|
|
|
}
|
|
|
|
f2, data = readField(data, w2)
|
|
|
|
if w3 > 0 {
|
|
|
|
f3, data = readField(data, w3)
|
|
|
|
}
|
|
|
|
|
|
|
|
var r ref
|
|
|
|
switch f1 {
|
|
|
|
case 0:
|
|
|
|
r.offset = int64(f2)
|
|
|
|
r.generation = f3
|
|
|
|
case 1:
|
|
|
|
r.offset = int64(f2)
|
|
|
|
r.generation = f3
|
|
|
|
r.nonfree = true
|
|
|
|
case 2:
|
|
|
|
r.offset = int64(f3)
|
|
|
|
r.compressed = &f2
|
|
|
|
r.nonfree = true
|
|
|
|
default:
|
2024-02-04 05:03:09 +01:00
|
|
|
// TODO(p): It should be treated as a reference to
|
|
|
|
// the null object. We can't currently represent that.
|
2023-06-29 05:01:23 +02:00
|
|
|
return newError("unsupported cross-reference stream contents")
|
|
|
|
}
|
|
|
|
|
|
|
|
u.loadXrefEntry(pair.start+i, r, loadedEntries)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
stream.Kind = Dict
|
|
|
|
stream.Stream = nil
|
|
|
|
return stream, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (u *Updater) loadXref(lex *Lexer, loadedEntries map[uint]struct{}) (
|
|
|
|
Object, error) {
|
2018-10-02 23:19:38 +02:00
|
|
|
var throwawayStack []Object
|
2023-06-29 05:01:23 +02:00
|
|
|
if object, _ := u.parse(lex,
|
|
|
|
&throwawayStack); object.Kind != Keyword || object.String != "xref" {
|
|
|
|
return u.loadXrefStream(lex, []Object{object}, loadedEntries)
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
for {
|
2018-10-04 13:07:47 +02:00
|
|
|
object, _ := u.parse(lex, &throwawayStack)
|
2018-10-02 23:19:38 +02:00
|
|
|
if object.Kind == End {
|
2023-06-29 05:01:23 +02:00
|
|
|
return newError("unexpected EOF while looking for the trailer")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
if object.Kind == Keyword && object.String == "trailer" {
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
2018-10-04 13:07:47 +02:00
|
|
|
second, _ := u.parse(lex, &throwawayStack)
|
2018-10-02 23:19:38 +02:00
|
|
|
if !object.IsUint() || !second.IsUint() {
|
2023-06-29 05:01:23 +02:00
|
|
|
return newError("invalid xref section header")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
start, count := uint(object.Number), uint(second.Number)
|
|
|
|
for i := uint(0); i < count; i++ {
|
2018-10-04 13:07:47 +02:00
|
|
|
off, _ := u.parse(lex, &throwawayStack)
|
|
|
|
gen, _ := u.parse(lex, &throwawayStack)
|
|
|
|
key, _ := u.parse(lex, &throwawayStack)
|
2018-10-02 23:19:38 +02:00
|
|
|
if !off.IsInteger() || off.Number < 0 ||
|
|
|
|
off.Number > float64(len(u.Document)) ||
|
|
|
|
!gen.IsInteger() || gen.Number < 0 || gen.Number > 65535 ||
|
|
|
|
key.Kind != Keyword {
|
2023-06-29 05:01:23 +02:00
|
|
|
return newError("invalid xref entry")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
free := true
|
|
|
|
if key.String == "n" {
|
|
|
|
free = false
|
|
|
|
} else if key.String != "f" {
|
2023-06-29 05:01:23 +02:00
|
|
|
return newError("invalid xref entry")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
2023-06-29 05:01:23 +02:00
|
|
|
u.loadXrefEntry(start+i, ref{
|
2018-10-02 23:19:38 +02:00
|
|
|
offset: int64(off.Number),
|
|
|
|
generation: uint(gen.Number),
|
|
|
|
nonfree: !free,
|
2023-06-29 05:01:23 +02:00
|
|
|
}, loadedEntries)
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
}
|
2023-06-29 05:01:23 +02:00
|
|
|
|
|
|
|
trailer, _ := u.parse(lex, &throwawayStack)
|
|
|
|
if trailer.Kind != Dict {
|
|
|
|
return newError("invalid trailer dictionary")
|
|
|
|
}
|
|
|
|
return trailer, nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
|
2020-09-04 17:16:42 +02:00
|
|
|
var trailerRE = regexp.MustCompile(`(?s:.*)\sstartxref\s+(\d+)\s+%%EOF`)
|
2018-10-02 23:19:38 +02:00
|
|
|
|
2018-10-04 14:46:12 +02:00
|
|
|
// NewUpdater initializes an Updater, building the cross-reference table and
|
|
|
|
// preparing a new trailer dictionary.
|
|
|
|
func NewUpdater(document []byte) (*Updater, error) {
|
|
|
|
u := &Updater{Document: document}
|
2018-10-02 23:19:38 +02:00
|
|
|
u.updated = make(map[uint]struct{})
|
|
|
|
|
|
|
|
// We only need to look for startxref roughly within
|
|
|
|
// the last kibibyte of the document.
|
|
|
|
haystack := u.Document
|
|
|
|
if len(haystack) > 1024 {
|
|
|
|
haystack = haystack[len(haystack)-1024:]
|
|
|
|
}
|
|
|
|
|
2020-09-04 17:16:42 +02:00
|
|
|
m := trailerRE.FindSubmatch(haystack)
|
2018-10-02 23:19:38 +02:00
|
|
|
if m == nil {
|
2018-10-04 14:46:12 +02:00
|
|
|
return nil, errors.New("cannot find startxref")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
xrefOffset, _ := strconv.ParseInt(string(m[1]), 10, 64)
|
|
|
|
lastXrefOffset := xrefOffset
|
2018-12-14 02:52:05 +01:00
|
|
|
loadedXrefs := make(map[int64]struct{})
|
|
|
|
loadedEntries := make(map[uint]struct{})
|
2018-10-02 23:19:38 +02:00
|
|
|
|
|
|
|
for {
|
|
|
|
if _, ok := loadedXrefs[xrefOffset]; ok {
|
2018-10-04 14:46:12 +02:00
|
|
|
return nil, errors.New("circular xref offsets")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
if xrefOffset >= int64(len(u.Document)) {
|
2018-10-04 14:46:12 +02:00
|
|
|
return nil, errors.New("invalid xref offset")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
lex := Lexer{u.Document[xrefOffset:]}
|
2023-06-29 05:01:23 +02:00
|
|
|
trailer, err := u.loadXref(&lex, loadedEntries)
|
|
|
|
if err != nil {
|
2018-10-04 14:46:12 +02:00
|
|
|
return nil, err
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if len(loadedXrefs) == 0 {
|
|
|
|
u.Trailer = trailer.Dict
|
|
|
|
}
|
|
|
|
loadedXrefs[xrefOffset] = struct{}{}
|
|
|
|
|
2024-02-04 05:03:09 +01:00
|
|
|
// TODO(p): Descend into XRefStm here first, if present,
|
2023-06-29 05:01:23 +02:00
|
|
|
// which is also a linked list.
|
|
|
|
|
|
|
|
// We allow for mixed cross-reference tables and streams
|
|
|
|
// within a single Prev list, although this should never occur.
|
2018-10-02 23:19:38 +02:00
|
|
|
prevOffset, ok := trailer.Dict["Prev"]
|
|
|
|
if !ok {
|
|
|
|
break
|
|
|
|
}
|
2021-12-08 20:39:02 +01:00
|
|
|
// FIXME: Do not read offsets and sizes as floating point numbers.
|
2018-10-02 23:19:38 +02:00
|
|
|
if !prevOffset.IsInteger() {
|
2018-10-04 14:46:12 +02:00
|
|
|
return nil, errors.New("invalid Prev offset")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
xrefOffset = int64(prevOffset.Number)
|
|
|
|
}
|
|
|
|
|
2018-10-04 12:51:23 +02:00
|
|
|
u.Trailer["Prev"] = NewNumeric(float64(lastXrefOffset))
|
2018-10-02 23:19:38 +02:00
|
|
|
|
|
|
|
lastSize, ok := u.Trailer["Size"]
|
|
|
|
if !ok || !lastSize.IsInteger() || lastSize.Number <= 0 {
|
2018-10-04 14:46:12 +02:00
|
|
|
return nil, errors.New("invalid or missing cross-reference table Size")
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
u.xrefSize = uint(lastSize.Number)
|
2018-10-04 14:46:12 +02:00
|
|
|
return u, nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
2020-09-04 17:16:42 +02:00
|
|
|
var versionRE = regexp.MustCompile(
|
|
|
|
`(?:^|[\r\n])%(?:!PS-Adobe-\d\.\d )?PDF-(\d)\.(\d)[\r\n]`)
|
|
|
|
|
|
|
|
// Version extracts the claimed PDF version as a positive decimal number,
|
|
|
|
// e.g. 17 for PDF 1.7. Returns zero on failure.
|
|
|
|
func (u *Updater) Version(root *Object) int {
|
|
|
|
if version, ok := root.Dict["Version"]; ok && version.Kind == Name {
|
|
|
|
if v := version.String; len(v) == 3 && v[1] == '.' &&
|
|
|
|
v[0] >= '0' && v[0] <= '9' && v[2] >= '0' && v[2] <= '9' {
|
|
|
|
return int(v[0]-'0')*10 + int(v[2]-'0')
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We only need to look for the comment roughly within
|
|
|
|
// the first kibibyte of the document.
|
|
|
|
haystack := u.Document
|
|
|
|
if len(haystack) > 1024 {
|
|
|
|
haystack = haystack[:1024]
|
|
|
|
}
|
|
|
|
if m := versionRE.FindSubmatch(haystack); m != nil {
|
|
|
|
return int(m[1][0]-'0')*10 + int(m[2][0]-'0')
|
|
|
|
}
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
2023-06-29 05:01:23 +02:00
|
|
|
func (u *Updater) getFromObjStm(nObjStm, n uint) (Object, error) {
|
|
|
|
if nObjStm == n {
|
|
|
|
return newError("ObjStm recursion")
|
|
|
|
}
|
|
|
|
|
|
|
|
stream, err := u.Get(nObjStm, 0)
|
|
|
|
if err != nil {
|
|
|
|
return stream, err
|
|
|
|
}
|
|
|
|
if stream.Kind != Stream {
|
|
|
|
return newError("invalid ObjStm")
|
|
|
|
}
|
|
|
|
if typ, ok := stream.Dict["Type"]; !ok ||
|
|
|
|
typ.Kind != Name || typ.String != "ObjStm" {
|
|
|
|
return newError("invalid ObjStm")
|
|
|
|
}
|
|
|
|
|
|
|
|
data, err := u.GetStreamData(stream)
|
|
|
|
if err != nil {
|
|
|
|
return New(End), fmt.Errorf("invalid ObjStm: %s", err)
|
|
|
|
}
|
|
|
|
entryN, ok := stream.Dict["N"]
|
|
|
|
if !ok || !entryN.IsUint() || entryN.Number <= 0 {
|
|
|
|
return newError("invalid ObjStm N")
|
|
|
|
}
|
|
|
|
entryFirst, ok := stream.Dict["First"]
|
|
|
|
if !ok || !entryFirst.IsUint() || entryFirst.Number <= 0 {
|
|
|
|
return newError("invalid ObjStm First")
|
|
|
|
}
|
|
|
|
|
|
|
|
// NOTE: This means descending into that stream if n is not found here.
|
|
|
|
// It is meant to be an object reference.
|
|
|
|
if extends, ok := stream.Dict["Extends"]; ok && extends.Kind != Nil {
|
|
|
|
return newError("ObjStm extensions are unsupported")
|
|
|
|
}
|
|
|
|
|
|
|
|
count := uint(entryN.Number)
|
|
|
|
first := uint(entryFirst.Number)
|
|
|
|
if first > uint(len(data)) {
|
|
|
|
return newError("invalid ObjStm First")
|
|
|
|
}
|
|
|
|
|
|
|
|
lex1 := Lexer{data[:first]}
|
|
|
|
data = data[first:]
|
|
|
|
|
|
|
|
type pair struct{ n, offset uint }
|
|
|
|
pairs := []pair{}
|
|
|
|
for i := uint(0); i < count; i++ {
|
|
|
|
var throwawayStack []Object
|
|
|
|
objN, _ := u.parse(&lex1, &throwawayStack)
|
|
|
|
objOffset, _ := u.parse(&lex1, &throwawayStack)
|
|
|
|
if !objN.IsUint() || !objOffset.IsUint() {
|
|
|
|
return newError("invalid ObjStm pairs")
|
|
|
|
}
|
|
|
|
pairs = append(pairs, pair{uint(objN.Number), uint(objOffset.Number)})
|
|
|
|
}
|
|
|
|
for i, pair := range pairs {
|
|
|
|
if pair.offset > uint(len(data)) ||
|
|
|
|
i > 0 && pairs[i-1].offset >= pair.offset {
|
|
|
|
return newError("invalid ObjStm pairs")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for i, pair := range pairs {
|
|
|
|
if pair.n != n {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if i+1 < len(pairs) {
|
|
|
|
data = data[pair.offset:pairs[i+1].offset]
|
|
|
|
} else {
|
|
|
|
data = data[pair.offset:]
|
|
|
|
}
|
|
|
|
|
|
|
|
lex2 := Lexer{data}
|
|
|
|
var stack []Object
|
|
|
|
for {
|
|
|
|
object, err := u.parse(&lex2, &stack)
|
|
|
|
if err != nil {
|
|
|
|
return object, err
|
|
|
|
} else if object.Kind == End {
|
|
|
|
break
|
|
|
|
} else {
|
|
|
|
stack = append(stack, object)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(stack) == 0 {
|
|
|
|
return newError("empty ObjStm object")
|
|
|
|
}
|
|
|
|
return stack[0], nil
|
|
|
|
}
|
|
|
|
return newError("object not found in ObjStm")
|
|
|
|
}
|
|
|
|
|
2018-10-02 23:19:38 +02:00
|
|
|
// Get retrieves an object by its number and generation--may return
|
|
|
|
// Nil or End with an error.
|
2018-10-04 13:07:47 +02:00
|
|
|
func (u *Updater) Get(n, generation uint) (Object, error) {
|
2018-10-02 23:19:38 +02:00
|
|
|
if n >= u.xrefSize {
|
2018-10-04 13:07:47 +02:00
|
|
|
return New(Nil), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
ref := u.xref[n]
|
2023-06-29 05:01:23 +02:00
|
|
|
if !ref.nonfree || ref.generation != generation {
|
|
|
|
return New(Nil), nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if ref.compressed != nil {
|
|
|
|
return u.getFromObjStm(*ref.compressed, n)
|
|
|
|
} else if ref.offset >= int64(len(u.Document)) {
|
2018-10-04 13:07:47 +02:00
|
|
|
return New(Nil), nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
lex := Lexer{u.Document[ref.offset:]}
|
|
|
|
var stack []Object
|
|
|
|
for {
|
2018-10-04 13:07:47 +02:00
|
|
|
object, err := u.parse(&lex, &stack)
|
2018-10-02 23:19:38 +02:00
|
|
|
if object.Kind == End {
|
2018-10-04 13:07:47 +02:00
|
|
|
return object, err
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
if object.Kind != Indirect {
|
|
|
|
stack = append(stack, object)
|
|
|
|
} else if object.N != n || object.Generation != generation {
|
2018-10-04 13:07:47 +02:00
|
|
|
return newError("object mismatch")
|
2018-10-02 23:19:38 +02:00
|
|
|
} else {
|
2018-10-04 13:07:47 +02:00
|
|
|
return object.Array[0], nil
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-12-08 21:33:09 +01:00
|
|
|
// Derefence dereferences Reference objects, and passes the other kinds through.
|
|
|
|
func (u *Updater) Dereference(o Object) (Object, error) {
|
|
|
|
if o.Kind != Reference {
|
|
|
|
return o, nil
|
|
|
|
}
|
|
|
|
return u.Get(o.N, o.Generation)
|
|
|
|
}
|
|
|
|
|
2018-10-02 23:19:38 +02:00
|
|
|
// Allocate allocates a new object number.
|
|
|
|
func (u *Updater) Allocate() uint {
|
|
|
|
n := u.xrefSize
|
|
|
|
u.xrefSize++
|
|
|
|
|
|
|
|
if u.xrefSize == 0 {
|
|
|
|
panic("overflow")
|
|
|
|
} else if lenXref := uint(len(u.xref)); lenXref < u.xrefSize {
|
|
|
|
u.xref = append(u.xref, make([]ref, u.xrefSize-lenXref)...)
|
|
|
|
}
|
|
|
|
|
|
|
|
// We don't make sure it gets a subsection in the update yet because we
|
|
|
|
// make no attempts at fixing the linked list of free items either.
|
|
|
|
return n
|
|
|
|
}
|
|
|
|
|
|
|
|
// BytesWriter is an interface over a subset of bytes.Buffer methods.
|
|
|
|
type BytesWriter interface {
|
|
|
|
Bytes() []byte
|
|
|
|
Len() int
|
|
|
|
Write(p []byte) (n int, err error)
|
|
|
|
WriteByte(c byte) error
|
|
|
|
WriteRune(r rune) (n int, err error)
|
|
|
|
WriteString(s string) (n int, err error)
|
|
|
|
}
|
|
|
|
|
2023-06-29 05:01:23 +02:00
|
|
|
// Update appends an updated object to the end of the document.
|
|
|
|
// The fill callback must write exactly one PDF object.
|
2018-10-02 23:19:38 +02:00
|
|
|
func (u *Updater) Update(n uint, fill func(buf BytesWriter)) {
|
|
|
|
oldRef := u.xref[n]
|
|
|
|
u.updated[n] = struct{}{}
|
|
|
|
u.xref[n] = ref{
|
|
|
|
offset: int64(len(u.Document) + 1),
|
|
|
|
generation: oldRef.generation,
|
|
|
|
nonfree: true,
|
|
|
|
}
|
|
|
|
|
|
|
|
buf := bytes.NewBuffer(u.Document)
|
|
|
|
fmt.Fprintf(buf, "\n%d %d obj\n", n, oldRef.generation)
|
|
|
|
|
|
|
|
// Separately so that the callback can use w.Len() to get current offset.
|
|
|
|
fill(buf)
|
|
|
|
|
|
|
|
buf.WriteString("\nendobj")
|
|
|
|
u.Document = buf.Bytes()
|
|
|
|
}
|
|
|
|
|
2023-06-29 05:01:23 +02:00
|
|
|
func (u *Updater) flushXRefStm(updated []uint, buf *bytes.Buffer) {
|
|
|
|
// The cross-reference stream has to point to itself.
|
|
|
|
// XXX: We only duplicate Update code here due to how we currently buffer.
|
|
|
|
n := u.Allocate()
|
|
|
|
updated = append(updated, n)
|
|
|
|
|
|
|
|
u.updated[n] = struct{}{}
|
|
|
|
u.xref[n] = ref{
|
|
|
|
offset: int64(buf.Len() + 1),
|
|
|
|
generation: 0,
|
|
|
|
nonfree: true,
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
2023-06-29 05:01:23 +02:00
|
|
|
|
|
|
|
index, b := []Object{}, []byte{}
|
|
|
|
write := func(f1 byte, f2, f3 uint64) {
|
|
|
|
b = append(b, f1)
|
|
|
|
b = binary.BigEndian.AppendUint64(b, f2)
|
|
|
|
b = binary.BigEndian.AppendUint64(b, f3)
|
|
|
|
}
|
|
|
|
for i := 0; i < len(updated); {
|
|
|
|
start, stop := updated[i], updated[i]+1
|
|
|
|
for i++; i < len(updated) && updated[i] == stop; i++ {
|
|
|
|
stop++
|
|
|
|
}
|
|
|
|
|
|
|
|
index = append(index,
|
|
|
|
NewNumeric(float64(start)), NewNumeric(float64(stop-start)))
|
|
|
|
for ; start < stop; start++ {
|
|
|
|
ref := u.xref[start]
|
|
|
|
if ref.compressed != nil {
|
|
|
|
write(2, uint64(*ref.compressed), uint64(ref.offset))
|
|
|
|
} else if ref.nonfree {
|
|
|
|
write(1, uint64(ref.offset), uint64(ref.generation))
|
|
|
|
} else {
|
|
|
|
write(0, uint64(ref.offset), uint64(ref.generation))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
u.Trailer["Size"] = NewNumeric(float64(u.xrefSize))
|
|
|
|
u.Trailer["Index"] = NewArray(index)
|
|
|
|
u.Trailer["W"] = NewArray([]Object{
|
|
|
|
NewNumeric(1), NewNumeric(8), NewNumeric(8),
|
2018-10-02 23:19:38 +02:00
|
|
|
})
|
|
|
|
|
2023-06-29 05:01:23 +02:00
|
|
|
for _, key := range []string{
|
|
|
|
"Filter", "DecodeParms", "F", "FFilter", "FDecodeParms", "DL"} {
|
|
|
|
delete(u.Trailer, key)
|
|
|
|
}
|
2018-10-02 23:19:38 +02:00
|
|
|
|
2023-06-29 05:01:23 +02:00
|
|
|
stream := NewStream(u.Trailer, b)
|
|
|
|
fmt.Fprintf(buf, "\n%d 0 obj\n%s\nendobj", n, stream.Serialize())
|
|
|
|
}
|
|
|
|
|
2024-02-04 05:03:09 +01:00
|
|
|
func (u *Updater) flushXRefTable(updated []uint, buf *bytes.Buffer) {
|
2023-06-29 05:01:23 +02:00
|
|
|
buf.WriteString("\nxref\n")
|
2020-09-06 04:38:41 +02:00
|
|
|
for i := 0; i < len(updated); {
|
|
|
|
start, stop := updated[i], updated[i]+1
|
|
|
|
for i++; i < len(updated) && updated[i] == stop; i++ {
|
|
|
|
stop++
|
|
|
|
}
|
|
|
|
|
|
|
|
fmt.Fprintf(buf, "%d %d\n", start, stop-start)
|
|
|
|
for ; start < stop; start++ {
|
2023-06-29 05:01:23 +02:00
|
|
|
// XXX: We should warn about any object streams here.
|
2020-09-06 04:38:41 +02:00
|
|
|
ref := u.xref[start]
|
2023-06-29 05:01:23 +02:00
|
|
|
if ref.nonfree && ref.compressed == nil {
|
2018-10-02 23:19:38 +02:00
|
|
|
fmt.Fprintf(buf, "%010d %05d n \n", ref.offset, ref.generation)
|
|
|
|
} else {
|
|
|
|
fmt.Fprintf(buf, "%010d %05d f \n", ref.offset, ref.generation)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-06 04:38:41 +02:00
|
|
|
// Taking literally "Each cross-reference section begins with a line
|
|
|
|
// containing the keyword xref. Following this line are one or more
|
|
|
|
// cross-reference subsections." from 3.4.3 in PDF Reference.
|
|
|
|
if len(updated) == 0 {
|
|
|
|
fmt.Fprintf(buf, "%d %d\n", 0, 0)
|
|
|
|
}
|
|
|
|
|
2018-10-04 12:51:23 +02:00
|
|
|
u.Trailer["Size"] = NewNumeric(float64(u.xrefSize))
|
|
|
|
trailer := NewDict(u.Trailer)
|
2023-06-29 05:01:23 +02:00
|
|
|
fmt.Fprintf(buf, "trailer\n%s", trailer.Serialize())
|
|
|
|
}
|
2018-10-02 23:19:38 +02:00
|
|
|
|
2024-02-04 05:03:09 +01:00
|
|
|
// FlushUpdates writes an updated cross-reference table and trailer, or stream.
|
2023-06-29 05:01:23 +02:00
|
|
|
func (u *Updater) FlushUpdates() {
|
|
|
|
updated := make([]uint, 0, len(u.updated))
|
|
|
|
for n := range u.updated {
|
|
|
|
updated = append(updated, n)
|
|
|
|
}
|
|
|
|
sort.Slice(updated, func(i, j int) bool {
|
|
|
|
return updated[i] < updated[j]
|
|
|
|
})
|
|
|
|
|
|
|
|
// It does not seem to be possible to upgrade a PDF file
|
|
|
|
// from trailer dictionaries to cross-reference streams,
|
|
|
|
// so keep continuity either way.
|
|
|
|
//
|
|
|
|
// (Downgrading from cross-reference streams using XRefStm would not
|
|
|
|
// create a true hybrid-reference file, although it should work.)
|
|
|
|
buf := bytes.NewBuffer(u.Document)
|
|
|
|
startXref := buf.Len() + 1 /* '\n' */
|
|
|
|
if typ, _ := u.Trailer["Type"]; typ.Kind == Name && typ.String == "XRef" {
|
|
|
|
u.flushXRefStm(updated, buf)
|
|
|
|
} else {
|
2024-02-04 05:03:09 +01:00
|
|
|
u.flushXRefTable(updated, buf)
|
2023-06-29 05:01:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
fmt.Fprintf(buf, "\nstartxref\n%d\n%%%%EOF\n", startXref)
|
2018-10-02 23:19:38 +02:00
|
|
|
u.Document = buf.Bytes()
|
2024-02-04 05:17:26 +01:00
|
|
|
u.updated = make(map[uint]struct{})
|
|
|
|
|
|
|
|
u.Trailer["Prev"] = NewNumeric(float64(startXref))
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
|
2018-10-04 12:11:43 +02:00
|
|
|
// NewDate makes a PDF object representing the given point in time.
|
|
|
|
func NewDate(ts time.Time) Object {
|
2018-10-02 23:19:38 +02:00
|
|
|
buf := ts.AppendFormat(nil, "D:20060102150405")
|
|
|
|
// "Z07'00'" doesn't work, we need to do some of it manually.
|
|
|
|
if _, offset := ts.Zone(); offset != 0 {
|
|
|
|
o := ts.AppendFormat(nil, "-0700")
|
|
|
|
buf = append(buf, o[0], o[1], o[2], '\'', o[3], o[4], '\'')
|
|
|
|
} else {
|
|
|
|
buf = append(buf, 'Z')
|
|
|
|
}
|
2018-10-04 12:51:23 +02:00
|
|
|
return NewString(string(buf))
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
2023-06-29 05:01:23 +02:00
|
|
|
// GetStreamData returns the actual data stored in a stream object,
|
|
|
|
// applying any filters.
|
|
|
|
func (u *Updater) GetStreamData(stream Object) ([]byte, error) {
|
|
|
|
if f, ok := stream.Dict["F"]; ok && f.Kind != Nil {
|
|
|
|
return nil, errors.New("stream data in other files are unsupported")
|
|
|
|
}
|
|
|
|
|
|
|
|
// Support just enough to decode a common cross-reference stream.
|
|
|
|
if filter, ok := stream.Dict["Filter"]; !ok {
|
|
|
|
return stream.Stream, nil
|
|
|
|
} else if filter.Kind != Name || filter.String != "FlateDecode" {
|
|
|
|
return nil, errors.New("unsupported stream Filter")
|
|
|
|
}
|
|
|
|
|
2024-02-04 05:03:09 +01:00
|
|
|
// TODO(p): Support << /Columns N /Predictor 12 >>
|
2023-06-29 05:01:23 +02:00
|
|
|
// which usually appears in files with cross-reference streams.
|
|
|
|
if parms, ok := stream.Dict["DecodeParms"]; ok && parms.Kind != Nil {
|
|
|
|
return nil, errors.New("DecodeParms are not supported")
|
|
|
|
}
|
|
|
|
|
|
|
|
r, err := zlib.NewReader(bytes.NewReader(stream.Stream))
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
var b bytes.Buffer
|
|
|
|
_, err = b.ReadFrom(r)
|
|
|
|
return b.Bytes(), err
|
|
|
|
}
|
|
|
|
|
2018-10-04 13:29:22 +02:00
|
|
|
// GetFirstPage retrieves the first page of the given page (sub)tree reference,
|
|
|
|
// or returns a Nil object if unsuccessful.
|
2021-12-08 21:33:09 +01:00
|
|
|
func (u *Updater) GetFirstPage(node Object) Object {
|
|
|
|
obj, err := u.Dereference(node)
|
2021-12-08 20:49:06 +01:00
|
|
|
if err != nil || obj.Kind != Dict {
|
2018-10-04 12:51:23 +02:00
|
|
|
return New(Nil)
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Out of convenience; these aren't filled normally.
|
2021-12-08 21:33:09 +01:00
|
|
|
obj.N = node.N
|
|
|
|
obj.Generation = node.Generation
|
2018-10-02 23:19:38 +02:00
|
|
|
|
|
|
|
if typ, ok := obj.Dict["Type"]; !ok || typ.Kind != Name {
|
2018-10-04 12:51:23 +02:00
|
|
|
return New(Nil)
|
2018-10-02 23:19:38 +02:00
|
|
|
} else if typ.String == "Page" {
|
|
|
|
return obj
|
|
|
|
} else if typ.String != "Pages" {
|
2018-10-04 12:51:23 +02:00
|
|
|
return New(Nil)
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// XXX: Technically speaking, this may be an indirect reference.
|
|
|
|
// The correct way to solve this seems to be having Updater include
|
|
|
|
// a wrapper around "obj.Dict". Though does it still apply in Golang?
|
|
|
|
kids, ok := obj.Dict["Kids"]
|
|
|
|
if !ok || kids.Kind != Array || len(kids.Array) == 0 ||
|
|
|
|
kids.Array[0].Kind != Reference {
|
2018-10-04 12:51:23 +02:00
|
|
|
return New(Nil)
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// XXX: Nothing prevents us from recursing in an evil circular graph.
|
2021-12-08 21:33:09 +01:00
|
|
|
return u.GetFirstPage(kids.Array[0])
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// -----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
// PKCS12Parse parses and verifies PKCS#12 data.
|
|
|
|
func PKCS12Parse(p12 []byte, password string) (
|
|
|
|
crypto.PrivateKey, []*x509.Certificate, error) {
|
|
|
|
// The pkcs12.Decode function doesn't support included intermediate
|
|
|
|
// certificates, we need to do some processing manually.
|
|
|
|
blocks, err := pkcs12.ToPEM(p12, password)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// b.Type is literally CERTIFICATE or PRIVATE KEY, the Headers only contain
|
|
|
|
// a localKeyId field. It seems like the pkey and the cert share the same
|
|
|
|
// localKeyId value. Though the leaf certificate should also be the first
|
|
|
|
// one in the PKCS#12 file, so I probably don't need that value.
|
|
|
|
var allX509Blocks [][]byte
|
|
|
|
var allCertBlocks [][]byte
|
|
|
|
for _, b := range blocks {
|
|
|
|
// CERTIFICATE, PRIVATE KEY constants are defined locally in the pkcs12
|
|
|
|
// package. crypto/tls/tls.go seems to only use literals for these and
|
|
|
|
// also accepts words in front such as RSA PRIVATE KEY.
|
|
|
|
switch b.Type {
|
|
|
|
case "PRIVATE KEY":
|
|
|
|
allX509Blocks = append(allX509Blocks, b.Bytes)
|
|
|
|
case "CERTIFICATE":
|
|
|
|
allCertBlocks = append(allCertBlocks, b.Bytes)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
switch {
|
|
|
|
case len(allX509Blocks) == 0:
|
|
|
|
return nil, nil, errors.New("missing private key")
|
|
|
|
case len(allX509Blocks) > 1:
|
|
|
|
return nil, nil, errors.New("more than one private key")
|
|
|
|
case len(allCertBlocks) == 0:
|
|
|
|
return nil, nil, errors.New("missing certificate")
|
|
|
|
}
|
|
|
|
|
|
|
|
// The PKCS#12 file may only contain PKCS#8-wrapped private keys but the
|
|
|
|
// pkcs12 package unwraps them to simple PKCS#1/EC while converting to PEM.
|
|
|
|
var key crypto.PrivateKey
|
|
|
|
if key, err = x509.ParsePKCS1PrivateKey(allX509Blocks[0]); err != nil {
|
|
|
|
if key, err = x509.ParseECPrivateKey(allX509Blocks[0]); err == nil {
|
|
|
|
return nil, nil, errors.New("failed to parse private key")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
x509Certs, err := x509.ParseCertificates(allCertBlocks[0])
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
if len(x509Certs) != 1 {
|
|
|
|
return nil, nil,
|
|
|
|
errors.New("expected exactly one certificate in the first bag")
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, cert := range allCertBlocks[1:] {
|
|
|
|
toAdd, err := x509.ParseCertificates(cert)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
x509Certs = append(x509Certs, toAdd...)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Copied from crypto/tls/tls.go.
|
|
|
|
switch pub := x509Certs[0].PublicKey.(type) {
|
|
|
|
case *rsa.PublicKey:
|
|
|
|
priv, ok := key.(*rsa.PrivateKey)
|
|
|
|
if !ok {
|
|
|
|
return nil, nil,
|
|
|
|
errors.New("private key type does not match public key type")
|
|
|
|
}
|
|
|
|
if pub.N.Cmp(priv.N) != 0 {
|
|
|
|
return nil, nil,
|
|
|
|
errors.New("private key does not match public key")
|
|
|
|
}
|
|
|
|
case *ecdsa.PublicKey:
|
|
|
|
priv, ok := key.(*ecdsa.PrivateKey)
|
|
|
|
if !ok {
|
|
|
|
return nil, nil,
|
|
|
|
errors.New("private key type does not match public key type")
|
|
|
|
}
|
|
|
|
if pub.X.Cmp(priv.X) != 0 || pub.Y.Cmp(priv.Y) != 0 {
|
|
|
|
return nil, nil,
|
|
|
|
errors.New("private key does not match public key")
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
return nil, nil, errors.New("unknown public key algorithm")
|
|
|
|
}
|
|
|
|
return key, x509Certs, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// FillInSignature signs PDF contents and writes the signature into the given
|
|
|
|
// window that has been reserved for this specific purpose.
|
2018-10-04 13:11:10 +02:00
|
|
|
// This is a very low-level function.
|
2018-10-02 23:19:38 +02:00
|
|
|
func FillInSignature(document []byte, signOff, signLen int,
|
2018-10-04 13:11:10 +02:00
|
|
|
key crypto.PrivateKey, certs []*x509.Certificate) error {
|
2018-10-02 23:19:38 +02:00
|
|
|
if signOff < 0 || signOff > len(document) ||
|
|
|
|
signLen < 2 || signOff+signLen > len(document) {
|
|
|
|
return errors.New("invalid signing window")
|
|
|
|
}
|
|
|
|
|
|
|
|
pkcsError := func(message interface{}) error {
|
|
|
|
return fmt.Errorf("key/cert: %s", message)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Prevent useless signatures--makes pdfsig from poppler happy at least
|
|
|
|
// (and NSS by extension).
|
|
|
|
x509Cert := certs[0]
|
|
|
|
if x509Cert.KeyUsage&(x509.KeyUsageDigitalSignature|
|
|
|
|
x509.KeyUsageContentCommitment /* renamed non-repudiation */) == 0 {
|
|
|
|
return pkcsError("the certificate's key usage must include " +
|
|
|
|
"digital signatures or non-repudiation")
|
|
|
|
}
|
|
|
|
|
|
|
|
extOK := false
|
|
|
|
for _, u := range x509Cert.ExtKeyUsage {
|
|
|
|
if u == x509.ExtKeyUsageAny || u == x509.ExtKeyUsageEmailProtection {
|
|
|
|
extOK = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if len(x509Cert.ExtKeyUsage) > 0 && !extOK {
|
|
|
|
return pkcsError("the certificate's extended key usage " +
|
|
|
|
"must include S/MIME")
|
|
|
|
}
|
|
|
|
|
|
|
|
// XXX: We'd like to stream to the hash manually instead of copying data.
|
|
|
|
data := make([]byte, len(document)-signLen)
|
|
|
|
copy(data, document[:signOff])
|
|
|
|
copy(data[signOff:], document[signOff+signLen:])
|
|
|
|
|
|
|
|
signedData, err := pkcs7.NewSignedData(data)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// The default digest is SHA1, which is mildly insecure now.
|
|
|
|
signedData.SetDigestAlgorithm(pkcs7.OIDDigestAlgorithmSHA256)
|
|
|
|
if err := signedData.AddSignerChain(
|
|
|
|
x509Cert, key, certs[1:], pkcs7.SignerInfoConfig{}); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
signedData.Detach()
|
|
|
|
sig, err := signedData.Finish()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Debugging: ioutil.WriteFile("pdf_signature.der", sig, 0666)
|
|
|
|
openssl cms -inform PEM -in pdf_signature.pem -noout -cmsout -print
|
|
|
|
Context: https://stackoverflow.com/a/29253469
|
|
|
|
*/
|
|
|
|
|
|
|
|
if len(sig)*2 > signLen-2 /* hexstring quotes */ {
|
|
|
|
// The obvious solution is to increase the allocation... or spend
|
|
|
|
// a week reading specifications while losing all faith in humanity
|
|
|
|
// as a species, and skip the pkcs7 package entirely.
|
|
|
|
return fmt.Errorf("not enough space reserved for the signature "+
|
|
|
|
"(%d nibbles vs %d nibbles)", signLen-2, len(sig)*2)
|
|
|
|
}
|
|
|
|
|
|
|
|
hex.Encode(document[signOff+1:], sig)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2018-10-04 12:11:43 +02:00
|
|
|
// https://www.adobe.com/devnet-docs/acrobatetk/tools/DigSig/Acrobat_DigitalSignatures_in_PDF.pdf
|
|
|
|
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/pdf_reference_1-7.pdf
|
|
|
|
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PPKAppearances.pdf
|
|
|
|
|
2018-10-02 23:19:38 +02:00
|
|
|
// Sign signs the given document, growing and returning the passed-in slice.
|
2018-10-04 12:11:43 +02:00
|
|
|
// There must be at least one certificate, matching the private key.
|
|
|
|
// The certificates must form a chain.
|
2018-10-02 23:19:38 +02:00
|
|
|
//
|
2020-09-04 18:33:12 +02:00
|
|
|
// A good default for the reservation is around 4096 (the value is in bytes).
|
|
|
|
//
|
2018-10-02 23:19:38 +02:00
|
|
|
// The presumption here is that the document is valid and that it doesn't
|
|
|
|
// employ cross-reference streams from PDF 1.5, or at least constitutes
|
|
|
|
// a hybrid-reference file. The results with PDF 2.0 (2017) are currently
|
|
|
|
// unknown as the standard costs money.
|
2020-09-04 18:33:12 +02:00
|
|
|
func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate,
|
|
|
|
reservation int) ([]byte, error) {
|
2018-10-04 14:46:12 +02:00
|
|
|
pdf, err := NewUpdater(document)
|
|
|
|
if err != nil {
|
2018-10-02 23:19:38 +02:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
rootRef, ok := pdf.Trailer["Root"]
|
|
|
|
if !ok || rootRef.Kind != Reference {
|
|
|
|
return nil, errors.New("trailer does not contain a reference to Root")
|
|
|
|
}
|
2021-12-08 21:33:09 +01:00
|
|
|
root, err := pdf.Dereference(rootRef)
|
2021-12-08 20:49:06 +01:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("Root dictionary retrieval failed: %s", err)
|
|
|
|
}
|
2018-10-02 23:19:38 +02:00
|
|
|
if root.Kind != Dict {
|
|
|
|
return nil, errors.New("invalid Root dictionary reference")
|
|
|
|
}
|
|
|
|
|
|
|
|
// 8.7 Digital Signatures - /signature dictionary/
|
|
|
|
sigdictN := pdf.Allocate()
|
|
|
|
var byterangeOff, byterangeLen, signOff, signLen int
|
|
|
|
pdf.Update(sigdictN, func(buf BytesWriter) {
|
|
|
|
// The timestamp is important for Adobe Acrobat Reader DC.
|
|
|
|
// The ideal would be to use RFC 3161.
|
2018-10-04 12:11:43 +02:00
|
|
|
now := NewDate(time.Now())
|
2018-10-02 23:19:38 +02:00
|
|
|
buf.WriteString("<< /Type/Sig /Filter/Adobe.PPKLite" +
|
|
|
|
" /SubFilter/adbe.pkcs7.detached\n" +
|
|
|
|
" /M" + now.Serialize() + " /ByteRange ")
|
|
|
|
|
|
|
|
byterangeOff = buf.Len()
|
|
|
|
byterangeLen = 32 // fine for a gigabyte
|
|
|
|
buf.Write(bytes.Repeat([]byte{' '}, byterangeLen))
|
|
|
|
buf.WriteString("\n /Contents <")
|
|
|
|
|
|
|
|
signOff = buf.Len()
|
2020-09-04 18:33:12 +02:00
|
|
|
signLen = reservation * 2 // cert, digest, encrypted digest, ...
|
2018-10-02 23:19:38 +02:00
|
|
|
buf.Write(bytes.Repeat([]byte{'0'}, signLen))
|
|
|
|
buf.WriteString("> >>")
|
|
|
|
|
|
|
|
// We actually need to exclude the hexstring quotes from signing.
|
|
|
|
signOff -= 1
|
|
|
|
signLen += 2
|
|
|
|
})
|
|
|
|
|
2018-10-04 12:51:23 +02:00
|
|
|
sigfield := NewDict(map[string]Object{
|
2018-10-02 23:19:38 +02:00
|
|
|
// 8.6.3 Field Types - Signature Fields
|
2018-10-04 12:51:23 +02:00
|
|
|
"FT": NewName("Sig"),
|
|
|
|
"V": NewReference(sigdictN, 0),
|
2018-10-02 23:19:38 +02:00
|
|
|
// 8.4.5 Annotations Types - Widget Annotations
|
|
|
|
// We can merge the Signature Annotation and omit Kids here.
|
2018-10-04 12:51:23 +02:00
|
|
|
"Subtype": NewName("Widget"),
|
|
|
|
"F": NewNumeric(2 /* Hidden */),
|
|
|
|
"T": NewString("Signature1"),
|
|
|
|
"Rect": NewArray([]Object{
|
|
|
|
NewNumeric(0), NewNumeric(0), NewNumeric(0), NewNumeric(0),
|
|
|
|
}),
|
|
|
|
})
|
2018-10-02 23:19:38 +02:00
|
|
|
|
|
|
|
sigfieldN := pdf.Allocate()
|
|
|
|
pdf.Update(sigfieldN, func(buf BytesWriter) {
|
|
|
|
buf.WriteString(sigfield.Serialize())
|
|
|
|
})
|
|
|
|
|
|
|
|
pagesRef, ok := root.Dict["Pages"]
|
|
|
|
if !ok || pagesRef.Kind != Reference {
|
|
|
|
return nil, errors.New("invalid Pages reference")
|
|
|
|
}
|
2021-12-08 21:33:09 +01:00
|
|
|
page := pdf.GetFirstPage(pagesRef)
|
2018-10-02 23:19:38 +02:00
|
|
|
if page.Kind != Dict {
|
|
|
|
return nil, errors.New("invalid or unsupported page tree")
|
|
|
|
}
|
|
|
|
|
|
|
|
annots := page.Dict["Annots"]
|
|
|
|
if annots.Kind != Array {
|
2020-09-04 15:34:33 +02:00
|
|
|
// TODO(p): Indirectly referenced arrays might not be
|
|
|
|
// that hard to support.
|
|
|
|
if annots.Kind != End {
|
|
|
|
return nil, errors.New("unexpected Annots")
|
|
|
|
}
|
2018-10-04 12:51:23 +02:00
|
|
|
annots = NewArray(nil)
|
2018-10-02 23:19:38 +02:00
|
|
|
}
|
2018-10-04 12:51:23 +02:00
|
|
|
annots.Array = append(annots.Array, NewReference(sigfieldN, 0))
|
2018-10-02 23:19:38 +02:00
|
|
|
|
|
|
|
page.Dict["Annots"] = annots
|
|
|
|
pdf.Update(page.N, func(buf BytesWriter) {
|
|
|
|
buf.WriteString(page.Serialize())
|
|
|
|
})
|
|
|
|
|
|
|
|
// 8.6.1 Interactive Form Dictionary
|
2023-06-29 05:01:23 +02:00
|
|
|
if acroform, ok := root.Dict["AcroForm"]; ok && acroform.Kind != Nil {
|
2020-09-04 15:34:33 +02:00
|
|
|
return nil, errors.New("the document already contains forms, " +
|
|
|
|
"they would be overwritten")
|
|
|
|
}
|
|
|
|
|
2018-10-04 12:51:23 +02:00
|
|
|
root.Dict["AcroForm"] = NewDict(map[string]Object{
|
|
|
|
"Fields": NewArray([]Object{NewReference(sigfieldN, 0)}),
|
|
|
|
"SigFlags": NewNumeric(3 /* SignaturesExist | AppendOnly */),
|
|
|
|
})
|
2018-10-02 23:19:38 +02:00
|
|
|
|
|
|
|
// Upgrade the document version for SHA-256 etc.
|
2020-09-04 17:16:42 +02:00
|
|
|
if pdf.Version(&root) < 16 {
|
|
|
|
root.Dict["Version"] = NewName("1.6")
|
|
|
|
}
|
|
|
|
|
2018-10-02 23:19:38 +02:00
|
|
|
pdf.Update(rootRef.N, func(buf BytesWriter) {
|
|
|
|
buf.WriteString(root.Serialize())
|
|
|
|
})
|
|
|
|
pdf.FlushUpdates()
|
|
|
|
|
|
|
|
// Now that we know the length of everything, store byte ranges of
|
|
|
|
// what we're about to sign, which must be everything but the resulting
|
|
|
|
// signature itself.
|
|
|
|
tailOff := signOff + signLen
|
|
|
|
tailLen := len(pdf.Document) - tailOff
|
|
|
|
|
|
|
|
ranges := fmt.Sprintf("[0 %d %d %d]", signOff, tailOff, tailLen)
|
|
|
|
if len(ranges) > byterangeLen {
|
|
|
|
return nil, errors.New("not enough space reserved for /ByteRange")
|
|
|
|
}
|
|
|
|
copy(pdf.Document[byterangeOff:], []byte(ranges))
|
|
|
|
if err := FillInSignature(pdf.Document, signOff, signLen,
|
|
|
|
key, certs); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return pdf.Document, nil
|
|
|
|
}
|