Skip to content

Commit 043541b

Browse files
committed
Fix #775, #490
1 parent 04634d3 commit 043541b

File tree

5 files changed

+73
-60
lines changed

5 files changed

+73
-60
lines changed

pkg/pdfcpu/model/parse.go

Lines changed: 57 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -527,78 +527,90 @@ func parseName(line *string) (*types.Name, error) {
527527
return &nameObj, nil
528528
}
529529

530+
func insertKey(d types.Dict, key string, val types.Object, usesHexCodes bool) (bool, error) {
531+
var duplicateKeyErr bool
532+
533+
if !usesHexCodes {
534+
if strings.IndexByte(key, '#') < 0 {
535+
// Avoid expensive "DecodeName".
536+
if _, found := d[key]; !found {
537+
d[key] = val
538+
} else {
539+
duplicateKeyErr = true
540+
}
541+
} else {
542+
duplicateKeyErr = d.Insert(key, val)
543+
usesHexCodes = true
544+
}
545+
} else {
546+
duplicateKeyErr = d.Insert(key, val)
547+
}
548+
549+
if duplicateKeyErr {
550+
// for now we digest duplicate keys.
551+
// TODO
552+
// if !validationRelaxed {
553+
// return false, errDictionaryDuplicateKey
554+
// }
555+
// if log.CLIEnabled() {
556+
// log.CLI.Printf("ParseDict: digesting duplicate key\n")
557+
// }
558+
_ = duplicateKeyErr
559+
}
560+
561+
if log.ParseEnabled() {
562+
log.Parse.Printf("ParseDict: dict[%s]=%v\n", key, val)
563+
}
564+
565+
return usesHexCodes, nil
566+
}
567+
530568
func processDictKeys(line *string, relaxed bool) (types.Dict, error) {
531569
l := *line
532570
var eol bool
533-
var hasNames bool
571+
var usesHexCodes bool
534572
d := types.NewDict()
535573
for !strings.HasPrefix(l, ">>") {
536-
key, err := parseName(&l)
574+
keyName, err := parseName(&l)
537575
if err != nil {
538576
return nil, err
539577
}
540578
if log.ParseEnabled() {
541-
log.Parse.Printf("ParseDict: key = %s\n", key)
579+
log.Parse.Printf("ParseDict: key = %s\n", keyName)
542580
}
543581

544-
// position to first non whitespace after key
582+
// Position to first non whitespace after key.
545583
l, eol = trimLeftSpace(l, relaxed)
546584

547585
if len(l) == 0 {
548586
if log.ParseEnabled() {
549587
log.Parse.Println("ParseDict: only whitespace after key")
550588
}
551-
// only whitespace after key
589+
// Only whitespace after key.
552590
return nil, errDictionaryNotTerminated
553591
}
554592

555-
// Fix for #252:
556-
// For dicts with kv pairs terminated by eol we accept a missing value as an empty string.
593+
var val types.Object
594+
557595
if eol {
558-
obj := types.StringLiteral("")
559-
if log.ParseEnabled() {
560-
log.Parse.Printf("ParseDict: dict[%s]=%v\n", key, obj)
561-
}
562-
stringKey := string(*key)
563-
if !hasNames {
564-
// Avoid expensive "DecodeName" on existing keys in "Insert".
565-
if _, found := d[stringKey]; found {
566-
return nil, errDictionaryDuplicateKey
567-
}
568-
d[stringKey] = obj
569-
hasNames = strings.IndexByte(stringKey, '#') >= 0
570-
} else {
571-
if ok := d.Insert(stringKey, obj); !ok {
572-
return nil, errDictionaryDuplicateKey
573-
}
596+
// #252: For dicts with kv pairs terminated by eol we accept a missing value as an empty string.
597+
val = types.StringLiteral("")
598+
} else {
599+
if val, err = ParseObject(&l); err != nil {
600+
return nil, err
574601
}
575-
continue
576-
}
577-
578-
obj, err := ParseObject(&l)
579-
if err != nil {
580-
return nil, err
581602
}
582603

583604
// Specifying the null object as the value of a dictionary entry (7.3.7, "Dictionary Objects")
584-
// hall be equivalent to omitting the entry entirely.
585-
if obj != nil {
586-
stringKey := string(*key)
587-
if !hasNames {
588-
// Avoid expensive "DecodeName" on existing keys in "Insert".
589-
if _, found := d[stringKey]; !found {
590-
d[stringKey] = obj
591-
hasNames = strings.IndexByte(stringKey, '#') >= 0
592-
}
593-
} else {
594-
d.Insert(stringKey, obj)
605+
// shall be equivalent to omitting the entry entirely.
606+
if val != nil {
607+
detectedHexCodes, err := insertKey(d, string(*keyName), val, usesHexCodes)
608+
if err != nil {
609+
return nil, err
595610
}
596-
if log.ParseEnabled() {
597-
log.Parse.Printf("ParseDict: dict[%s]=%v\n", key, obj)
611+
if !usesHexCodes && detectedHexCodes {
612+
usesHexCodes = true
598613
}
599-
// if ok := d.Insert(string(*key), obj); !ok {
600-
// return nil, errDictionaryDuplicateKey
601-
// }
602614
}
603615

604616
// We are positioned on the char behind the last parsed dict value.

pkg/pdfcpu/model/parse_dict_test.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,6 @@ func doTestParseDictWithComments(t *testing.T) {
148148
}
149149

150150
func doTestLargeDicts(t *testing.T) {
151-
// Make sure parsing large dictionaries is fast. Found a file in the wild
152-
// that has two dictionaries with about 200.000 entries each.
153151
var sb strings.Builder
154152
sb.WriteString("<<")
155153
for i := 0; i < 50000; i++ {

pkg/pdfcpu/read.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1361,7 +1361,10 @@ func showRep() {
13611361
// bypassXrefSection is a fix for digesting corrupt xref sections.
13621362
// It populates the xRefTable by reading in all indirect objects line by line
13631363
// and works on the assumption of a single xref section - meaning no incremental updates.
1364-
func bypassXrefSection(ctx *model.Context, offExtra int64) error {
1364+
func bypassXrefSection(ctx *model.Context, offExtra int64, wasErr error) error {
1365+
if log.ReadEnabled() {
1366+
log.Read.Printf("bypassXRefSection after %v\n", wasErr)
1367+
}
13651368
var z int64
13661369
g := types.FreeHeadGeneration
13671370
ctx.Table[0] = &model.XRefTableEntry{
@@ -1561,11 +1564,10 @@ func buildXRefTableStartingAt(ctx *model.Context, offset *int64) error {
15611564
return err
15621565
}
15631566
if offset, err = parseXRefStream(ctx, rd, offset, offExtra); err != nil {
1564-
if log.ReadEnabled() {
1565-
log.Read.Printf("bypassXRefSection after %v\n", err)
1567+
if ctx.XRefTable.ValidationMode == model.ValidationRelaxed {
1568+
// Try fix for corrupt single xref section.
1569+
return bypassXrefSection(ctx, offExtra, err)
15661570
}
1567-
// Try fix for corrupt single xref section.
1568-
return bypassXrefSection(ctx, offExtra)
15691571
}
15701572

15711573
}

pkg/pdfcpu/types/dict.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,12 @@ func (d Dict) Clone() Object {
5252
}
5353

5454
// Insert adds a new entry to this PDFDict.
55-
func (d Dict) Insert(key string, value Object) (ok bool) {
56-
_, found := d.Find(key)
57-
if !found {
58-
d[key] = value
59-
ok = true
55+
func (d Dict) Insert(k string, v Object) bool {
56+
if _, found := d.Find(k); !found {
57+
d[k] = v
58+
return true
6059
}
61-
return ok
60+
return false
6261
}
6362

6463
// InsertBool adds a new bool entry to this PDFDict.

pkg/pdfcpu/validate/colorspace.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -597,8 +597,10 @@ func validateColorSpaceArray(xRefTable *model.XRefTable, a types.Array, excludeP
597597
case model.DeviceNCS:
598598
err = validateDeviceNColorSpace(xRefTable, a, model.V13)
599599

600-
// Relaxed validation:
601-
case model.DeviceRGBCS:
600+
case model.DeviceGrayCS, model.DeviceRGBCS, model.DeviceCMYKCS:
601+
if xRefTable.ValidationMode != model.ValidationRelaxed {
602+
err = errors.Errorf("pdfcpu: validateColorSpaceArray: undefined color space: %s\n", name)
603+
}
602604

603605
default:
604606
err = errors.Errorf("pdfcpu: validateColorSpaceArray: undefined color space: %s\n", name)

0 commit comments

Comments
 (0)