Skip to content

Commit b9c28ae

Browse files
committed
Fix #930
1 parent 3aff1b0 commit b9c28ae

File tree

1 file changed

+79
-8
lines changed

1 file changed

+79
-8
lines changed

pkg/pdfcpu/optimize.go

Lines changed: 79 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,22 +63,93 @@ func optimizeContentStreamUsage(ctx *model.Context, sd *types.StreamDict, objNr
6363
return nil, nil
6464
}
6565

66+
func removeEmptyContentStreams(ctx *model.Context, pageDict types.Dict, obj types.Object, pageObjNumber int) error {
67+
var contentArr types.Array
68+
69+
if ir, ok := obj.(types.IndirectRef); ok {
70+
71+
objNr := ir.ObjectNumber.Value()
72+
entry, found := ctx.FindTableEntry(objNr, ir.GenerationNumber.Value())
73+
if !found {
74+
return errors.Errorf("removeEmptyContentStreams: obj#:%d illegal indRef for Contents\n", pageObjNumber)
75+
}
76+
77+
contentStreamDict, ok := entry.Object.(types.StreamDict)
78+
if ok {
79+
if err := contentStreamDict.Decode(); err != nil {
80+
return err
81+
}
82+
if len(contentStreamDict.Content) == 0 {
83+
pageDict.Delete("Contents")
84+
}
85+
return nil
86+
}
87+
88+
contentArr, ok = entry.Object.(types.Array)
89+
if !ok {
90+
return errors.Errorf("removeEmptyContentStreams: obj#:%d page content entry neither stream dict nor array.\n", pageObjNumber)
91+
}
92+
93+
} else if contentArr, ok = obj.(types.Array); !ok {
94+
return errors.Errorf("removeEmptyContentStreams: obj#:%d corrupt page content array\n", pageObjNumber)
95+
}
96+
97+
var newContentArr types.Array
98+
99+
for _, c := range contentArr {
100+
101+
ir, ok := c.(types.IndirectRef)
102+
if !ok {
103+
return errors.Errorf("removeEmptyContentStreams: obj#:%d corrupt page content array entry\n", pageObjNumber)
104+
}
105+
106+
objNr := ir.ObjectNumber.Value()
107+
entry, found := ctx.FindTableEntry(objNr, ir.GenerationNumber.Value())
108+
if !found {
109+
return errors.Errorf("removeEmptyContentStreams: obj#:%d illegal indRef for Contents\n", pageObjNumber)
110+
}
111+
112+
contentStreamDict, ok := entry.Object.(types.StreamDict)
113+
if !ok {
114+
return errors.Errorf("identifyPageContent: obj#:%d page content entry is no stream dict\n", pageObjNumber)
115+
}
116+
117+
if err := contentStreamDict.Decode(); err != nil {
118+
return err
119+
}
120+
if len(contentStreamDict.Content) > 0 {
121+
newContentArr = append(newContentArr, c)
122+
}
123+
}
124+
125+
pageDict["Contents"] = newContentArr
126+
127+
return nil
128+
}
129+
66130
func optimizePageContent(ctx *model.Context, pageDict types.Dict, pageObjNumber int) error {
67-
if !ctx.OptimizeDuplicateContentStreams {
131+
o, found := pageDict.Find("Contents")
132+
if !found {
68133
return nil
69134
}
70-
if log.OptimizeEnabled() {
71-
log.Optimize.Println("identifyPageContent begin")
135+
136+
if err := removeEmptyContentStreams(ctx, pageDict, o, pageObjNumber); err != nil {
137+
return err
72138
}
73139

74-
o, found := pageDict.Find("Contents")
140+
o, found = pageDict.Find("Contents")
75141
if !found {
76-
if log.OptimizeEnabled() {
77-
log.Optimize.Println("identifyPageContent end: no \"Contents\"")
78-
}
79142
return nil
80143
}
81144

145+
if !ctx.OptimizeDuplicateContentStreams {
146+
return nil
147+
}
148+
149+
if log.OptimizeEnabled() {
150+
log.Optimize.Println("identifyPageContent begin")
151+
}
152+
82153
var contentArr types.Array
83154

84155
if ir, ok := o.(types.IndirectRef); ok {
@@ -685,7 +756,7 @@ func parseResourcesDict(ctx *model.Context, pageDict types.Dict, pageNumber, pag
685756
return nil
686757
}
687758

688-
// Iterate over all pages and optimize resources.
759+
// Iterate over all pages and optimize content & resources.
689760
func parsePagesDict(ctx *model.Context, pagesDict types.Dict, pageNumber int) (int, error) {
690761
// TODO Integrate resource consolidation based on content stream requirements.
691762

0 commit comments

Comments
 (0)