Compare commits
7 Commits
c0927c05dd
...
8a00d7064b
Author | SHA1 | Date | |
---|---|---|---|
8a00d7064b | |||
b358467791 | |||
d0f80aa6ae | |||
97ffe3d46e | |||
1a3c7a8282 | |||
d8171b9ac4 | |||
bcb24af926 |
2
LICENSE
2
LICENSE
@ -1,4 +1,4 @@
|
||||
Copyright (c) 2017 - 2020, Přemysl Eric Janouch <p@janouch.name>
|
||||
Copyright (c) 2017 - 2021, Přemysl Eric Janouch <p@janouch.name>
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any
|
||||
purpose with or without fee is hereby granted.
|
||||
|
@ -29,6 +29,11 @@ In addition to the C++ version, also included is a native Go port:
|
||||
|
||||
$ go get janouch.name/pdf-simple-sign/cmd/pdf-simple-sign
|
||||
|
||||
And a crude external VFS for Midnight Commander, that may be used to extract
|
||||
all streams from a given PDF file:
|
||||
|
||||
$ go get janouch.name/pdf-simple-sign/cmd/extfs-pdf
|
||||
|
||||
Contributing and Support
|
||||
------------------------
|
||||
Use https://git.janouch.name/p/pdf-simple-sign to report bugs, request features,
|
||||
|
132
cmd/extfs-pdf/main.go
Normal file
132
cmd/extfs-pdf/main.go
Normal file
@ -0,0 +1,132 @@
|
||||
//
|
||||
// Copyright (c) 2021, Přemysl Eric Janouch <p@janouch.name>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
||||
// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||
// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
//
|
||||
|
||||
// extfs-pdf is an external VFS plugin for Midnight Commander.
|
||||
// More serious image extractors should rewrite this to use pdfimages(1).
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"janouch.name/pdf-simple-sign/pdf"
|
||||
)
|
||||
|
||||
func die(status int, format string, args ...interface{}) {
|
||||
os.Stderr.WriteString(fmt.Sprintf(format+"\n", args...))
|
||||
os.Exit(status)
|
||||
}
|
||||
|
||||
func usage() {
|
||||
die(1, "Usage: %s [-h] COMMAND DOCUMENT [ARG...]", os.Args[0])
|
||||
}
|
||||
|
||||
func streamSuffix(o *pdf.Object) string {
|
||||
if filter, _ := o.Dict["Filter"]; filter.Kind == pdf.Name {
|
||||
switch filter.String {
|
||||
case "JBIG2Decode":
|
||||
// This is the file extension used by pdfimages(1).
|
||||
// This is not a complete JBIG2 standalone file.
|
||||
return "jb2e"
|
||||
case "JPXDecode":
|
||||
return "jp2"
|
||||
case "DCTDecode":
|
||||
return "jpg"
|
||||
default:
|
||||
return filter.String
|
||||
}
|
||||
}
|
||||
return "stream"
|
||||
}
|
||||
|
||||
func list(updater *pdf.Updater) {
|
||||
for _, o := range updater.ListIndirect() {
|
||||
object, err := updater.Get(o.N, o.Generation)
|
||||
size := 0
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%s\n", err)
|
||||
} else {
|
||||
// Accidental transformation, retrieving original data is more work.
|
||||
size = len(object.Serialize())
|
||||
}
|
||||
fmt.Printf("-r--r--r-- 1 0 0 %d 01-01-1970 00:00 %d-%d\n",
|
||||
size, o.N, o.Generation)
|
||||
if object.Kind == pdf.Stream {
|
||||
fmt.Printf("-r--r--r-- 1 0 0 %d 01-01-1970 00:00 %d-%d.%s\n",
|
||||
len(object.Stream), o.N, o.Generation, streamSuffix(&object))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func copyout(updater *pdf.Updater, storedFilename, extractTo string) {
|
||||
var (
|
||||
n, generation uint
|
||||
suffix string
|
||||
)
|
||||
m, err := fmt.Sscanf(storedFilename, "%d-%d%s", &n, &generation, &suffix)
|
||||
if m < 2 {
|
||||
die(3, "%s: %s", storedFilename, err)
|
||||
}
|
||||
|
||||
object, err := updater.Get(n, generation)
|
||||
if err != nil {
|
||||
die(3, "%s: %s", storedFilename, err)
|
||||
}
|
||||
|
||||
content := []byte(object.Serialize())
|
||||
if suffix != "" {
|
||||
content = object.Stream
|
||||
}
|
||||
if err = os.WriteFile(extractTo, content, 0666); err != nil {
|
||||
die(3, "%s", err)
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
flag.Usage = usage
|
||||
flag.Parse()
|
||||
if flag.NArg() < 2 {
|
||||
usage()
|
||||
}
|
||||
|
||||
command, documentPath := flag.Arg(0), flag.Arg(1)
|
||||
doc, err := os.ReadFile(documentPath)
|
||||
if err != nil {
|
||||
die(1, "%s", err)
|
||||
}
|
||||
|
||||
updater, err := pdf.NewUpdater(doc)
|
||||
if err != nil {
|
||||
die(2, "%s", err)
|
||||
}
|
||||
|
||||
switch command {
|
||||
default:
|
||||
die(1, "unsupported command: %s", command)
|
||||
case "list":
|
||||
if flag.NArg() != 2 {
|
||||
usage()
|
||||
} else {
|
||||
list(updater)
|
||||
}
|
||||
case "copyout":
|
||||
if flag.NArg() != 4 {
|
||||
usage()
|
||||
} else {
|
||||
copyout(updater, flag.Arg(2), flag.Arg(3))
|
||||
}
|
||||
}
|
||||
}
|
@ -64,7 +64,7 @@ std::string ssprintf(const std::string& format, Args... args) {
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
/// PDF token/object thingy. Objects may be composed either from one or a sequence of tokens.
|
||||
/// The PDF Reference doesn't actually speak of tokens.
|
||||
/// The PDF Reference doesn't actually speak of tokens, though ISO 32000-1:2008 does.
|
||||
struct pdf_object {
|
||||
enum type {
|
||||
END, NL, COMMENT, NIL, BOOL, NUMERIC, KEYWORD, NAME, STRING,
|
||||
@ -543,8 +543,8 @@ std::string pdf_updater::initialize() {
|
||||
const auto prev_offset = trailer.dict.find("Prev");
|
||||
if (prev_offset == trailer.dict.end())
|
||||
break;
|
||||
// FIXME we don't check for size_t over or underflow
|
||||
if (!prev_offset->second.is_integer())
|
||||
// FIXME do not read offsets and sizes as floating point numbers
|
||||
if (!prev_offset->second.is_integer() || prev_offset->second.number < 0)
|
||||
return "invalid Prev offset";
|
||||
xref_offset = prev_offset->second.number;
|
||||
}
|
||||
|
114
pdf/pdf.go
114
pdf/pdf.go
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2018 - 2020, Přemysl Eric Janouch <p@janouch.name>
|
||||
// Copyright (c) 2018 - 2021, Přemysl Eric Janouch <p@janouch.name>
|
||||
//
|
||||
// Permission to use, copy, modify, and/or distribute this software for any
|
||||
// purpose with or without fee is hereby granted.
|
||||
@ -59,20 +59,22 @@ const (
|
||||
// higher-level objects
|
||||
Array
|
||||
Dict
|
||||
Stream
|
||||
Indirect
|
||||
Reference
|
||||
)
|
||||
|
||||
// Object is a PDF token/object thingy. Objects may be composed either from
|
||||
// Object is a PDF token/object thingy. Objects may be composed either from
|
||||
// one or a sequence of tokens. The PDF Reference doesn't actually speak
|
||||
// of tokens.
|
||||
// of tokens, though ISO 32000-1:2008 does.
|
||||
type Object struct {
|
||||
Kind ObjectKind
|
||||
|
||||
String string // Comment/Keyword/Name/String
|
||||
Number float64 // Bool, Numeric
|
||||
Array []Object // Array, Indirect
|
||||
Dict map[string]Object // Dict, in the future also Stream
|
||||
Dict map[string]Object // Dict, Stream
|
||||
Stream []byte // Stream
|
||||
N, Generation uint // Indirect, Reference
|
||||
}
|
||||
|
||||
@ -458,6 +460,10 @@ func (o *Object) Serialize() string {
|
||||
fmt.Fprint(b, " /", k, " ", v.Serialize())
|
||||
}
|
||||
return "<<" + b.String() + " >>"
|
||||
case Stream:
|
||||
d := NewDict(o.Dict)
|
||||
d.Dict["Length"] = NewNumeric(float64(len(o.Stream)))
|
||||
return d.Serialize() + "\nstream\n" + string(o.Stream) + "\nendstream"
|
||||
case Indirect:
|
||||
return fmt.Sprintf("%d %d obj\n%s\nendobj", o.N, o.Generation,
|
||||
o.Array[0].Serialize())
|
||||
@ -497,6 +503,65 @@ type Updater struct {
|
||||
Trailer map[string]Object
|
||||
}
|
||||
|
||||
// ListIndirect returns the whole cross-reference table as Reference Objects.
|
||||
func (u *Updater) ListIndirect() []Object {
|
||||
result := []Object{}
|
||||
for i := 0; i < len(u.xref); i++ {
|
||||
if u.xref[i].nonfree {
|
||||
result = append(result, NewReference(uint(i), u.xref[i].generation))
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (u *Updater) parseStream(lex *Lexer, stack *[]Object) (Object, error) {
|
||||
lenStack := len(*stack)
|
||||
if lenStack < 1 {
|
||||
return newError("missing stream dictionary")
|
||||
}
|
||||
dict := (*stack)[lenStack-1]
|
||||
if dict.Kind != Dict {
|
||||
return newError("stream not preceded by a dictionary")
|
||||
}
|
||||
|
||||
*stack = (*stack)[:lenStack-1]
|
||||
length, ok := dict.Dict["Length"]
|
||||
if !ok {
|
||||
return newError("missing stream Length")
|
||||
}
|
||||
length, err := u.Dereference(length)
|
||||
if err != nil {
|
||||
return length, err
|
||||
}
|
||||
if !length.IsUint() || length.Number > math.MaxInt {
|
||||
return newError("stream Length not an unsigned integer")
|
||||
}
|
||||
|
||||
// Expect exactly one newline.
|
||||
if nl, err := lex.Next(); err != nil {
|
||||
return nl, err
|
||||
} else if nl.Kind != NL {
|
||||
return newError("stream does not start with a newline")
|
||||
}
|
||||
|
||||
size := int(length.Number)
|
||||
if len(lex.P) < size {
|
||||
return newError("stream is longer than the document")
|
||||
}
|
||||
|
||||
dict.Kind = Stream
|
||||
dict.Stream = lex.P[:size]
|
||||
lex.P = lex.P[size:]
|
||||
|
||||
// Skip any number of trailing newlines or comments.
|
||||
if end, err := u.parse(lex, stack); err != nil {
|
||||
return end, err
|
||||
} else if end.Kind != Keyword || end.String != "endstream" {
|
||||
return newError("improperly terminated stream")
|
||||
}
|
||||
return dict, nil
|
||||
}
|
||||
|
||||
func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) (Object, error) {
|
||||
lenStack := len(*stack)
|
||||
if lenStack < 2 {
|
||||
@ -590,15 +655,11 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) (Object, error) {
|
||||
}
|
||||
return NewDict(dict), nil
|
||||
case Keyword:
|
||||
// Appears in the document body, typically needs
|
||||
// to access the cross-reference table.
|
||||
//
|
||||
// TODO(p): Use the xref to read /Length etc. once we
|
||||
// actually need to read such objects; presumably
|
||||
// streams can use the Object.String member.
|
||||
switch token.String {
|
||||
case "stream":
|
||||
return newError("streams are not supported yet")
|
||||
// Appears in the document body,
|
||||
// typically needs to access the cross-reference table.
|
||||
return u.parseStream(lex, stack)
|
||||
case "obj":
|
||||
return u.parseIndirect(lex, stack)
|
||||
case "R":
|
||||
@ -722,7 +783,7 @@ func NewUpdater(document []byte) (*Updater, error) {
|
||||
if !ok {
|
||||
break
|
||||
}
|
||||
// FIXME: We don't check for size_t over or underflow.
|
||||
// FIXME: Do not read offsets and sizes as floating point numbers.
|
||||
if !prevOffset.IsInteger() {
|
||||
return nil, errors.New("invalid Prev offset")
|
||||
}
|
||||
@ -766,8 +827,6 @@ func (u *Updater) Version(root *Object) int {
|
||||
|
||||
// Get retrieves an object by its number and generation--may return
|
||||
// Nil or End with an error.
|
||||
//
|
||||
// TODO(p): We should fix all uses of this not to eat the error.
|
||||
func (u *Updater) Get(n, generation uint) (Object, error) {
|
||||
if n >= u.xrefSize {
|
||||
return New(Nil), nil
|
||||
@ -796,6 +855,14 @@ func (u *Updater) Get(n, generation uint) (Object, error) {
|
||||
}
|
||||
}
|
||||
|
||||
// Derefence dereferences Reference objects, and passes the other kinds through.
|
||||
func (u *Updater) Dereference(o Object) (Object, error) {
|
||||
if o.Kind != Reference {
|
||||
return o, nil
|
||||
}
|
||||
return u.Get(o.N, o.Generation)
|
||||
}
|
||||
|
||||
// Allocate allocates a new object number.
|
||||
func (u *Updater) Allocate() uint {
|
||||
n := u.xrefSize
|
||||
@ -906,15 +973,15 @@ func NewDate(ts time.Time) Object {
|
||||
|
||||
// GetFirstPage retrieves the first page of the given page (sub)tree reference,
|
||||
// or returns a Nil object if unsuccessful.
|
||||
func (u *Updater) GetFirstPage(nodeN, nodeGeneration uint) Object {
|
||||
obj, _ := u.Get(nodeN, nodeGeneration)
|
||||
if obj.Kind != Dict {
|
||||
func (u *Updater) GetFirstPage(node Object) Object {
|
||||
obj, err := u.Dereference(node)
|
||||
if err != nil || obj.Kind != Dict {
|
||||
return New(Nil)
|
||||
}
|
||||
|
||||
// Out of convenience; these aren't filled normally.
|
||||
obj.N = nodeN
|
||||
obj.Generation = nodeGeneration
|
||||
obj.N = node.N
|
||||
obj.Generation = node.Generation
|
||||
|
||||
if typ, ok := obj.Dict["Type"]; !ok || typ.Kind != Name {
|
||||
return New(Nil)
|
||||
@ -934,7 +1001,7 @@ func (u *Updater) GetFirstPage(nodeN, nodeGeneration uint) Object {
|
||||
}
|
||||
|
||||
// XXX: Nothing prevents us from recursing in an evil circular graph.
|
||||
return u.GetFirstPage(kids.Array[0].N, kids.Array[0].Generation)
|
||||
return u.GetFirstPage(kids.Array[0])
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@ -1128,7 +1195,10 @@ func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate,
|
||||
if !ok || rootRef.Kind != Reference {
|
||||
return nil, errors.New("trailer does not contain a reference to Root")
|
||||
}
|
||||
root, _ := pdf.Get(rootRef.N, rootRef.Generation)
|
||||
root, err := pdf.Dereference(rootRef)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Root dictionary retrieval failed: %s", err)
|
||||
}
|
||||
if root.Kind != Dict {
|
||||
return nil, errors.New("invalid Root dictionary reference")
|
||||
}
|
||||
@ -1182,7 +1252,7 @@ func Sign(document []byte, key crypto.PrivateKey, certs []*x509.Certificate,
|
||||
if !ok || pagesRef.Kind != Reference {
|
||||
return nil, errors.New("invalid Pages reference")
|
||||
}
|
||||
page := pdf.GetFirstPage(pagesRef.N, pagesRef.Generation)
|
||||
page := pdf.GetFirstPage(pagesRef)
|
||||
if page.Kind != Dict {
|
||||
return nil, errors.New("invalid or unsupported page tree")
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user