Go: implement stream parsing/serialization

This commit is contained in:
Přemysl Eric Janouch 2021-12-09 13:07:02 +01:00
parent 1a3c7a8282
commit 97ffe3d46e
Signed by: p
GPG Key ID: A0420B94F92B9493
1 changed files with 58 additions and 8 deletions

View File

@ -59,6 +59,7 @@ const (
// higher-level objects // higher-level objects
Array Array
Dict Dict
Stream
Indirect Indirect
Reference Reference
) )
@ -72,7 +73,8 @@ type Object struct {
String string // Comment/Keyword/Name/String String string // Comment/Keyword/Name/String
Number float64 // Bool, Numeric Number float64 // Bool, Numeric
Array []Object // Array, Indirect Array []Object // Array, Indirect
Dict map[string]Object // Dict, in the future also Stream Dict map[string]Object // Dict, Stream
Stream []byte // Stream
N, Generation uint // Indirect, Reference N, Generation uint // Indirect, Reference
} }
@ -458,6 +460,10 @@ func (o *Object) Serialize() string {
fmt.Fprint(b, " /", k, " ", v.Serialize()) fmt.Fprint(b, " /", k, " ", v.Serialize())
} }
return "<<" + b.String() + " >>" return "<<" + b.String() + " >>"
case Stream:
d := NewDict(o.Dict)
d.Dict["Length"] = NewNumeric(float64(len(o.Stream)))
return d.Serialize() + "\nstream\n" + string(o.Stream) + "\nendstream"
case Indirect: case Indirect:
return fmt.Sprintf("%d %d obj\n%s\nendobj", o.N, o.Generation, return fmt.Sprintf("%d %d obj\n%s\nendobj", o.N, o.Generation,
o.Array[0].Serialize()) o.Array[0].Serialize())
@ -497,6 +503,54 @@ type Updater struct {
Trailer map[string]Object Trailer map[string]Object
} }
func (u *Updater) parseStream(lex *Lexer, stack *[]Object) (Object, error) {
lenStack := len(*stack)
if lenStack < 1 {
return newError("missing stream dictionary")
}
dict := (*stack)[lenStack-1]
if dict.Kind != Dict {
return newError("stream not preceded by a dictionary")
}
*stack = (*stack)[:lenStack-1]
length, ok := dict.Dict["Length"]
if !ok {
return newError("missing stream Length")
}
length, err := u.Dereference(length)
if err != nil {
return length, err
}
if !length.IsUint() || length.Number > math.MaxInt {
return newError("stream Length not an unsigned integer")
}
// Expect exactly one newline.
if nl, err := lex.Next(); err != nil {
return nl, err
} else if nl.Kind != NL {
return newError("stream does not start with a newline")
}
size := int(length.Number)
if len(lex.P) < size {
return newError("stream is longer than the document")
}
dict.Kind = Stream
dict.Stream = lex.P[:size]
lex.P = lex.P[size:]
// Skip any number of trailing newlines or comments.
if end, err := u.parse(lex, stack); err != nil {
return end, err
} else if end.Kind != Keyword || end.String != "endstream" {
return newError("improperly terminated stream")
}
return dict, nil
}
func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) (Object, error) { func (u *Updater) parseIndirect(lex *Lexer, stack *[]Object) (Object, error) {
lenStack := len(*stack) lenStack := len(*stack)
if lenStack < 2 { if lenStack < 2 {
@ -590,15 +644,11 @@ func (u *Updater) parse(lex *Lexer, stack *[]Object) (Object, error) {
} }
return NewDict(dict), nil return NewDict(dict), nil
case Keyword: case Keyword:
// Appears in the document body, typically needs
// to access the cross-reference table.
//
// TODO(p): Use the xref to read /Length etc. once we
// actually need to read such objects; presumably
// streams can use the Object.String member.
switch token.String { switch token.String {
case "stream": case "stream":
return newError("streams are not supported yet") // Appears in the document body,
// typically needs to access the cross-reference table.
return u.parseStream(lex, stack)
case "obj": case "obj":
return u.parseIndirect(lex, stack) return u.parseIndirect(lex, stack)
case "R": case "R":